Show More
This diff has been collapsed as it changes many lines, (770 lines changed) Show them Hide them | |||||
@@ -0,0 +1,770 | |||||
|
1 | /** | |||
|
2 | * Copyright (c) 2017-present, Gregory Szorc | |||
|
3 | * All rights reserved. | |||
|
4 | * | |||
|
5 | * This software may be modified and distributed under the terms | |||
|
6 | * of the BSD license. See the LICENSE file for details. | |||
|
7 | */ | |||
|
8 | ||||
|
9 | #include "python-zstandard.h" | |||
|
10 | ||||
|
11 | extern PyObject* ZstdError; | |||
|
12 | ||||
|
13 | PyDoc_STRVAR(BufferWithSegments__doc__, | |||
|
14 | "BufferWithSegments - A memory buffer holding known sub-segments.\n" | |||
|
15 | "\n" | |||
|
16 | "This type represents a contiguous chunk of memory containing N discrete\n" | |||
|
17 | "items within sub-segments of that memory.\n" | |||
|
18 | "\n" | |||
|
19 | "Segments within the buffer are stored as an array of\n" | |||
|
20 | "``(offset, length)`` pairs, where each element is an unsigned 64-bit\n" | |||
|
21 | "integer using the host/native bit order representation.\n" | |||
|
22 | "\n" | |||
|
23 | "The type exists to facilitate operations against N>1 items without the\n" | |||
|
24 | "overhead of Python object creation and management.\n" | |||
|
25 | ); | |||
|
26 | ||||
|
27 | static void BufferWithSegments_dealloc(ZstdBufferWithSegments* self) { | |||
|
28 | /* Backing memory is either canonically owned by a Py_buffer or by us. */ | |||
|
29 | if (self->parent.buf) { | |||
|
30 | PyBuffer_Release(&self->parent); | |||
|
31 | } | |||
|
32 | else if (self->useFree) { | |||
|
33 | free(self->data); | |||
|
34 | } | |||
|
35 | else { | |||
|
36 | PyMem_Free(self->data); | |||
|
37 | } | |||
|
38 | ||||
|
39 | self->data = NULL; | |||
|
40 | ||||
|
41 | if (self->useFree) { | |||
|
42 | free(self->segments); | |||
|
43 | } | |||
|
44 | else { | |||
|
45 | PyMem_Free(self->segments); | |||
|
46 | } | |||
|
47 | ||||
|
48 | self->segments = NULL; | |||
|
49 | ||||
|
50 | PyObject_Del(self); | |||
|
51 | } | |||
|
52 | ||||
|
53 | static int BufferWithSegments_init(ZstdBufferWithSegments* self, PyObject* args, PyObject* kwargs) { | |||
|
54 | static char* kwlist[] = { | |||
|
55 | "data", | |||
|
56 | "segments", | |||
|
57 | NULL | |||
|
58 | }; | |||
|
59 | ||||
|
60 | Py_buffer segments; | |||
|
61 | Py_ssize_t segmentCount; | |||
|
62 | Py_ssize_t i; | |||
|
63 | ||||
|
64 | memset(&self->parent, 0, sizeof(self->parent)); | |||
|
65 | ||||
|
66 | #if PY_MAJOR_VERSION >= 3 | |||
|
67 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*y*:BufferWithSegments", | |||
|
68 | #else | |||
|
69 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*s*:BufferWithSegments", | |||
|
70 | #endif | |||
|
71 | kwlist, &self->parent, &segments)) { | |||
|
72 | return -1; | |||
|
73 | } | |||
|
74 | ||||
|
75 | if (!PyBuffer_IsContiguous(&self->parent, 'C') || self->parent.ndim > 1) { | |||
|
76 | PyErr_SetString(PyExc_ValueError, "data buffer should be contiguous and have a single dimension"); | |||
|
77 | goto except; | |||
|
78 | } | |||
|
79 | ||||
|
80 | if (!PyBuffer_IsContiguous(&segments, 'C') || segments.ndim > 1) { | |||
|
81 | PyErr_SetString(PyExc_ValueError, "segments buffer should be contiguous and have a single dimension"); | |||
|
82 | goto except; | |||
|
83 | } | |||
|
84 | ||||
|
85 | if (segments.len % sizeof(BufferSegment)) { | |||
|
86 | PyErr_Format(PyExc_ValueError, "segments array size is not a multiple of %lu", | |||
|
87 | sizeof(BufferSegment)); | |||
|
88 | goto except; | |||
|
89 | } | |||
|
90 | ||||
|
91 | segmentCount = segments.len / sizeof(BufferSegment); | |||
|
92 | ||||
|
93 | /* Validate segments data, as blindly trusting it could lead to arbitrary | |||
|
94 | memory access. */ | |||
|
95 | for (i = 0; i < segmentCount; i++) { | |||
|
96 | BufferSegment* segment = &((BufferSegment*)(segments.buf))[i]; | |||
|
97 | ||||
|
98 | if (segment->offset + segment->length > (unsigned long long)self->parent.len) { | |||
|
99 | PyErr_SetString(PyExc_ValueError, "offset within segments array references memory outside buffer"); | |||
|
100 | goto except; | |||
|
101 | return -1; | |||
|
102 | } | |||
|
103 | } | |||
|
104 | ||||
|
105 | /* Make a copy of the segments data. It is cheap to do so and is a guard | |||
|
106 | against caller changing offsets, which has security implications. */ | |||
|
107 | self->segments = PyMem_Malloc(segments.len); | |||
|
108 | if (!self->segments) { | |||
|
109 | PyErr_NoMemory(); | |||
|
110 | goto except; | |||
|
111 | } | |||
|
112 | ||||
|
113 | memcpy(self->segments, segments.buf, segments.len); | |||
|
114 | PyBuffer_Release(&segments); | |||
|
115 | ||||
|
116 | self->data = self->parent.buf; | |||
|
117 | self->dataSize = self->parent.len; | |||
|
118 | self->segmentCount = segmentCount; | |||
|
119 | ||||
|
120 | return 0; | |||
|
121 | ||||
|
122 | except: | |||
|
123 | PyBuffer_Release(&self->parent); | |||
|
124 | PyBuffer_Release(&segments); | |||
|
125 | return -1; | |||
|
126 | }; | |||
|
127 | ||||
|
128 | /** | |||
|
129 | * Construct a BufferWithSegments from existing memory and offsets. | |||
|
130 | * | |||
|
131 | * Ownership of the backing memory and BufferSegments will be transferred to | |||
|
132 | * the created object and freed when the BufferWithSegments is destroyed. | |||
|
133 | */ | |||
|
134 | ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, | |||
|
135 | BufferSegment* segments, Py_ssize_t segmentsSize) { | |||
|
136 | ZstdBufferWithSegments* result = NULL; | |||
|
137 | Py_ssize_t i; | |||
|
138 | ||||
|
139 | if (NULL == data) { | |||
|
140 | PyErr_SetString(PyExc_ValueError, "data is NULL"); | |||
|
141 | return NULL; | |||
|
142 | } | |||
|
143 | ||||
|
144 | if (NULL == segments) { | |||
|
145 | PyErr_SetString(PyExc_ValueError, "segments is NULL"); | |||
|
146 | return NULL; | |||
|
147 | } | |||
|
148 | ||||
|
149 | for (i = 0; i < segmentsSize; i++) { | |||
|
150 | BufferSegment* segment = &segments[i]; | |||
|
151 | ||||
|
152 | if (segment->offset + segment->length > dataSize) { | |||
|
153 | PyErr_SetString(PyExc_ValueError, "offset in segments overflows buffer size"); | |||
|
154 | return NULL; | |||
|
155 | } | |||
|
156 | } | |||
|
157 | ||||
|
158 | result = PyObject_New(ZstdBufferWithSegments, &ZstdBufferWithSegmentsType); | |||
|
159 | if (NULL == result) { | |||
|
160 | return NULL; | |||
|
161 | } | |||
|
162 | ||||
|
163 | result->useFree = 0; | |||
|
164 | ||||
|
165 | memset(&result->parent, 0, sizeof(result->parent)); | |||
|
166 | result->data = data; | |||
|
167 | result->dataSize = dataSize; | |||
|
168 | result->segments = segments; | |||
|
169 | result->segmentCount = segmentsSize; | |||
|
170 | ||||
|
171 | return result; | |||
|
172 | } | |||
|
173 | ||||
|
174 | static Py_ssize_t BufferWithSegments_length(ZstdBufferWithSegments* self) { | |||
|
175 | return self->segmentCount; | |||
|
176 | } | |||
|
177 | ||||
|
178 | static ZstdBufferSegment* BufferWithSegments_item(ZstdBufferWithSegments* self, Py_ssize_t i) { | |||
|
179 | ZstdBufferSegment* result = NULL; | |||
|
180 | ||||
|
181 | if (i < 0) { | |||
|
182 | PyErr_SetString(PyExc_IndexError, "offset must be non-negative"); | |||
|
183 | return NULL; | |||
|
184 | } | |||
|
185 | ||||
|
186 | if (i >= self->segmentCount) { | |||
|
187 | PyErr_Format(PyExc_IndexError, "offset must be less than %zd", self->segmentCount); | |||
|
188 | return NULL; | |||
|
189 | } | |||
|
190 | ||||
|
191 | result = (ZstdBufferSegment*)PyObject_CallObject((PyObject*)&ZstdBufferSegmentType, NULL); | |||
|
192 | if (NULL == result) { | |||
|
193 | return NULL; | |||
|
194 | } | |||
|
195 | ||||
|
196 | result->parent = (PyObject*)self; | |||
|
197 | Py_INCREF(self); | |||
|
198 | ||||
|
199 | result->data = (char*)self->data + self->segments[i].offset; | |||
|
200 | result->dataSize = self->segments[i].length; | |||
|
201 | result->offset = self->segments[i].offset; | |||
|
202 | ||||
|
203 | return result; | |||
|
204 | } | |||
|
205 | ||||
|
206 | #if PY_MAJOR_VERSION >= 3 | |||
|
207 | static int BufferWithSegments_getbuffer(ZstdBufferWithSegments* self, Py_buffer* view, int flags) { | |||
|
208 | return PyBuffer_FillInfo(view, (PyObject*)self, self->data, self->dataSize, 1, flags); | |||
|
209 | } | |||
|
210 | #else | |||
|
211 | static Py_ssize_t BufferWithSegments_getreadbuffer(ZstdBufferWithSegments* self, Py_ssize_t segment, void **ptrptr) { | |||
|
212 | if (segment != 0) { | |||
|
213 | PyErr_SetString(PyExc_ValueError, "segment number must be 0"); | |||
|
214 | return -1; | |||
|
215 | } | |||
|
216 | ||||
|
217 | *ptrptr = self->data; | |||
|
218 | return self->dataSize; | |||
|
219 | } | |||
|
220 | ||||
|
221 | static Py_ssize_t BufferWithSegments_getsegcount(ZstdBufferWithSegments* self, Py_ssize_t* len) { | |||
|
222 | if (len) { | |||
|
223 | *len = 1; | |||
|
224 | } | |||
|
225 | ||||
|
226 | return 1; | |||
|
227 | } | |||
|
228 | #endif | |||
|
229 | ||||
|
230 | PyDoc_STRVAR(BufferWithSegments_tobytes__doc__, | |||
|
231 | "Obtain a bytes instance for this buffer.\n" | |||
|
232 | ); | |||
|
233 | ||||
|
234 | static PyObject* BufferWithSegments_tobytes(ZstdBufferWithSegments* self) { | |||
|
235 | return PyBytes_FromStringAndSize(self->data, self->dataSize); | |||
|
236 | } | |||
|
237 | ||||
|
238 | PyDoc_STRVAR(BufferWithSegments_segments__doc__, | |||
|
239 | "Obtain a BufferSegments describing segments in this sintance.\n" | |||
|
240 | ); | |||
|
241 | ||||
|
242 | static ZstdBufferSegments* BufferWithSegments_segments(ZstdBufferWithSegments* self) { | |||
|
243 | ZstdBufferSegments* result = (ZstdBufferSegments*)PyObject_CallObject((PyObject*)&ZstdBufferSegmentsType, NULL); | |||
|
244 | if (NULL == result) { | |||
|
245 | return NULL; | |||
|
246 | } | |||
|
247 | ||||
|
248 | result->parent = (PyObject*)self; | |||
|
249 | Py_INCREF(self); | |||
|
250 | result->segments = self->segments; | |||
|
251 | result->segmentCount = self->segmentCount; | |||
|
252 | ||||
|
253 | return result; | |||
|
254 | } | |||
|
255 | ||||
|
256 | static PySequenceMethods BufferWithSegments_sq = { | |||
|
257 | (lenfunc)BufferWithSegments_length, /* sq_length */ | |||
|
258 | 0, /* sq_concat */ | |||
|
259 | 0, /* sq_repeat */ | |||
|
260 | (ssizeargfunc)BufferWithSegments_item, /* sq_item */ | |||
|
261 | 0, /* sq_ass_item */ | |||
|
262 | 0, /* sq_contains */ | |||
|
263 | 0, /* sq_inplace_concat */ | |||
|
264 | 0 /* sq_inplace_repeat */ | |||
|
265 | }; | |||
|
266 | ||||
|
267 | static PyBufferProcs BufferWithSegments_as_buffer = { | |||
|
268 | #if PY_MAJOR_VERSION >= 3 | |||
|
269 | (getbufferproc)BufferWithSegments_getbuffer, /* bf_getbuffer */ | |||
|
270 | 0 /* bf_releasebuffer */ | |||
|
271 | #else | |||
|
272 | (readbufferproc)BufferWithSegments_getreadbuffer, /* bf_getreadbuffer */ | |||
|
273 | 0, /* bf_getwritebuffer */ | |||
|
274 | (segcountproc)BufferWithSegments_getsegcount, /* bf_getsegcount */ | |||
|
275 | 0 /* bf_getcharbuffer */ | |||
|
276 | #endif | |||
|
277 | }; | |||
|
278 | ||||
|
279 | static PyMethodDef BufferWithSegments_methods[] = { | |||
|
280 | { "segments", (PyCFunction)BufferWithSegments_segments, | |||
|
281 | METH_NOARGS, BufferWithSegments_segments__doc__ }, | |||
|
282 | { "tobytes", (PyCFunction)BufferWithSegments_tobytes, | |||
|
283 | METH_NOARGS, BufferWithSegments_tobytes__doc__ }, | |||
|
284 | { NULL, NULL } | |||
|
285 | }; | |||
|
286 | ||||
|
287 | static PyMemberDef BufferWithSegments_members[] = { | |||
|
288 | { "size", T_ULONGLONG, offsetof(ZstdBufferWithSegments, dataSize), | |||
|
289 | READONLY, "total size of the buffer in bytes" }, | |||
|
290 | { NULL } | |||
|
291 | }; | |||
|
292 | ||||
|
293 | PyTypeObject ZstdBufferWithSegmentsType = { | |||
|
294 | PyVarObject_HEAD_INIT(NULL, 0) | |||
|
295 | "zstd.BufferWithSegments", /* tp_name */ | |||
|
296 | sizeof(ZstdBufferWithSegments),/* tp_basicsize */ | |||
|
297 | 0, /* tp_itemsize */ | |||
|
298 | (destructor)BufferWithSegments_dealloc, /* tp_dealloc */ | |||
|
299 | 0, /* tp_print */ | |||
|
300 | 0, /* tp_getattr */ | |||
|
301 | 0, /* tp_setattr */ | |||
|
302 | 0, /* tp_compare */ | |||
|
303 | 0, /* tp_repr */ | |||
|
304 | 0, /* tp_as_number */ | |||
|
305 | &BufferWithSegments_sq, /* tp_as_sequence */ | |||
|
306 | 0, /* tp_as_mapping */ | |||
|
307 | 0, /* tp_hash */ | |||
|
308 | 0, /* tp_call */ | |||
|
309 | 0, /* tp_str */ | |||
|
310 | 0, /* tp_getattro */ | |||
|
311 | 0, /* tp_setattro */ | |||
|
312 | &BufferWithSegments_as_buffer, /* tp_as_buffer */ | |||
|
313 | Py_TPFLAGS_DEFAULT, /* tp_flags */ | |||
|
314 | BufferWithSegments__doc__, /* tp_doc */ | |||
|
315 | 0, /* tp_traverse */ | |||
|
316 | 0, /* tp_clear */ | |||
|
317 | 0, /* tp_richcompare */ | |||
|
318 | 0, /* tp_weaklistoffset */ | |||
|
319 | 0, /* tp_iter */ | |||
|
320 | 0, /* tp_iternext */ | |||
|
321 | BufferWithSegments_methods, /* tp_methods */ | |||
|
322 | BufferWithSegments_members, /* tp_members */ | |||
|
323 | 0, /* tp_getset */ | |||
|
324 | 0, /* tp_base */ | |||
|
325 | 0, /* tp_dict */ | |||
|
326 | 0, /* tp_descr_get */ | |||
|
327 | 0, /* tp_descr_set */ | |||
|
328 | 0, /* tp_dictoffset */ | |||
|
329 | (initproc)BufferWithSegments_init, /* tp_init */ | |||
|
330 | 0, /* tp_alloc */ | |||
|
331 | PyType_GenericNew, /* tp_new */ | |||
|
332 | }; | |||
|
333 | ||||
|
334 | PyDoc_STRVAR(BufferSegments__doc__, | |||
|
335 | "BufferSegments - Represents segments/offsets within a BufferWithSegments\n" | |||
|
336 | ); | |||
|
337 | ||||
|
338 | static void BufferSegments_dealloc(ZstdBufferSegments* self) { | |||
|
339 | Py_CLEAR(self->parent); | |||
|
340 | PyObject_Del(self); | |||
|
341 | } | |||
|
342 | ||||
|
343 | #if PY_MAJOR_VERSION >= 3 | |||
|
344 | static int BufferSegments_getbuffer(ZstdBufferSegments* self, Py_buffer* view, int flags) { | |||
|
345 | return PyBuffer_FillInfo(view, (PyObject*)self, | |||
|
346 | (void*)self->segments, self->segmentCount * sizeof(BufferSegment), | |||
|
347 | 1, flags); | |||
|
348 | } | |||
|
349 | #else | |||
|
350 | static Py_ssize_t BufferSegments_getreadbuffer(ZstdBufferSegments* self, Py_ssize_t segment, void **ptrptr) { | |||
|
351 | if (segment != 0) { | |||
|
352 | PyErr_SetString(PyExc_ValueError, "segment number must be 0"); | |||
|
353 | return -1; | |||
|
354 | } | |||
|
355 | ||||
|
356 | *ptrptr = (void*)self->segments; | |||
|
357 | return self->segmentCount * sizeof(BufferSegment); | |||
|
358 | } | |||
|
359 | ||||
|
360 | static Py_ssize_t BufferSegments_getsegcount(ZstdBufferSegments* self, Py_ssize_t* len) { | |||
|
361 | if (len) { | |||
|
362 | *len = 1; | |||
|
363 | } | |||
|
364 | ||||
|
365 | return 1; | |||
|
366 | } | |||
|
367 | #endif | |||
|
368 | ||||
|
369 | static PyBufferProcs BufferSegments_as_buffer = { | |||
|
370 | #if PY_MAJOR_VERSION >= 3 | |||
|
371 | (getbufferproc)BufferSegments_getbuffer, | |||
|
372 | 0 | |||
|
373 | #else | |||
|
374 | (readbufferproc)BufferSegments_getreadbuffer, | |||
|
375 | 0, | |||
|
376 | (segcountproc)BufferSegments_getsegcount, | |||
|
377 | 0 | |||
|
378 | #endif | |||
|
379 | }; | |||
|
380 | ||||
|
381 | PyTypeObject ZstdBufferSegmentsType = { | |||
|
382 | PyVarObject_HEAD_INIT(NULL, 0) | |||
|
383 | "zstd.BufferSegments", /* tp_name */ | |||
|
384 | sizeof(ZstdBufferSegments),/* tp_basicsize */ | |||
|
385 | 0, /* tp_itemsize */ | |||
|
386 | (destructor)BufferSegments_dealloc, /* tp_dealloc */ | |||
|
387 | 0, /* tp_print */ | |||
|
388 | 0, /* tp_getattr */ | |||
|
389 | 0, /* tp_setattr */ | |||
|
390 | 0, /* tp_compare */ | |||
|
391 | 0, /* tp_repr */ | |||
|
392 | 0, /* tp_as_number */ | |||
|
393 | 0, /* tp_as_sequence */ | |||
|
394 | 0, /* tp_as_mapping */ | |||
|
395 | 0, /* tp_hash */ | |||
|
396 | 0, /* tp_call */ | |||
|
397 | 0, /* tp_str */ | |||
|
398 | 0, /* tp_getattro */ | |||
|
399 | 0, /* tp_setattro */ | |||
|
400 | &BufferSegments_as_buffer, /* tp_as_buffer */ | |||
|
401 | Py_TPFLAGS_DEFAULT, /* tp_flags */ | |||
|
402 | BufferSegments__doc__, /* tp_doc */ | |||
|
403 | 0, /* tp_traverse */ | |||
|
404 | 0, /* tp_clear */ | |||
|
405 | 0, /* tp_richcompare */ | |||
|
406 | 0, /* tp_weaklistoffset */ | |||
|
407 | 0, /* tp_iter */ | |||
|
408 | 0, /* tp_iternext */ | |||
|
409 | 0, /* tp_methods */ | |||
|
410 | 0, /* tp_members */ | |||
|
411 | 0, /* tp_getset */ | |||
|
412 | 0, /* tp_base */ | |||
|
413 | 0, /* tp_dict */ | |||
|
414 | 0, /* tp_descr_get */ | |||
|
415 | 0, /* tp_descr_set */ | |||
|
416 | 0, /* tp_dictoffset */ | |||
|
417 | 0, /* tp_init */ | |||
|
418 | 0, /* tp_alloc */ | |||
|
419 | PyType_GenericNew, /* tp_new */ | |||
|
420 | }; | |||
|
421 | ||||
|
422 | PyDoc_STRVAR(BufferSegment__doc__, | |||
|
423 | "BufferSegment - Represents a segment within a BufferWithSegments\n" | |||
|
424 | ); | |||
|
425 | ||||
|
426 | static void BufferSegment_dealloc(ZstdBufferSegment* self) { | |||
|
427 | Py_CLEAR(self->parent); | |||
|
428 | PyObject_Del(self); | |||
|
429 | } | |||
|
430 | ||||
|
431 | static Py_ssize_t BufferSegment_length(ZstdBufferSegment* self) { | |||
|
432 | return self->dataSize; | |||
|
433 | } | |||
|
434 | ||||
|
435 | #if PY_MAJOR_VERSION >= 3 | |||
|
436 | static int BufferSegment_getbuffer(ZstdBufferSegment* self, Py_buffer* view, int flags) { | |||
|
437 | return PyBuffer_FillInfo(view, (PyObject*)self, | |||
|
438 | self->data, self->dataSize, 1, flags); | |||
|
439 | } | |||
|
440 | #else | |||
|
441 | static Py_ssize_t BufferSegment_getreadbuffer(ZstdBufferSegment* self, Py_ssize_t segment, void **ptrptr) { | |||
|
442 | if (segment != 0) { | |||
|
443 | PyErr_SetString(PyExc_ValueError, "segment number must be 0"); | |||
|
444 | return -1; | |||
|
445 | } | |||
|
446 | ||||
|
447 | *ptrptr = self->data; | |||
|
448 | return self->dataSize; | |||
|
449 | } | |||
|
450 | ||||
|
451 | static Py_ssize_t BufferSegment_getsegcount(ZstdBufferSegment* self, Py_ssize_t* len) { | |||
|
452 | if (len) { | |||
|
453 | *len = 1; | |||
|
454 | } | |||
|
455 | ||||
|
456 | return 1; | |||
|
457 | } | |||
|
458 | #endif | |||
|
459 | ||||
|
460 | PyDoc_STRVAR(BufferSegment_tobytes__doc__, | |||
|
461 | "Obtain a bytes instance for this segment.\n" | |||
|
462 | ); | |||
|
463 | ||||
|
464 | static PyObject* BufferSegment_tobytes(ZstdBufferSegment* self) { | |||
|
465 | return PyBytes_FromStringAndSize(self->data, self->dataSize); | |||
|
466 | } | |||
|
467 | ||||
|
468 | static PySequenceMethods BufferSegment_sq = { | |||
|
469 | (lenfunc)BufferSegment_length, /* sq_length */ | |||
|
470 | 0, /* sq_concat */ | |||
|
471 | 0, /* sq_repeat */ | |||
|
472 | 0, /* sq_item */ | |||
|
473 | 0, /* sq_ass_item */ | |||
|
474 | 0, /* sq_contains */ | |||
|
475 | 0, /* sq_inplace_concat */ | |||
|
476 | 0 /* sq_inplace_repeat */ | |||
|
477 | }; | |||
|
478 | ||||
|
479 | static PyBufferProcs BufferSegment_as_buffer = { | |||
|
480 | #if PY_MAJOR_VERSION >= 3 | |||
|
481 | (getbufferproc)BufferSegment_getbuffer, | |||
|
482 | 0 | |||
|
483 | #else | |||
|
484 | (readbufferproc)BufferSegment_getreadbuffer, | |||
|
485 | 0, | |||
|
486 | (segcountproc)BufferSegment_getsegcount, | |||
|
487 | 0 | |||
|
488 | #endif | |||
|
489 | }; | |||
|
490 | ||||
|
491 | static PyMethodDef BufferSegment_methods[] = { | |||
|
492 | { "tobytes", (PyCFunction)BufferSegment_tobytes, | |||
|
493 | METH_NOARGS, BufferSegment_tobytes__doc__ }, | |||
|
494 | { NULL, NULL } | |||
|
495 | }; | |||
|
496 | ||||
|
497 | static PyMemberDef BufferSegment_members[] = { | |||
|
498 | { "offset", T_ULONGLONG, offsetof(ZstdBufferSegment, offset), READONLY, | |||
|
499 | "offset of segment within parent buffer" }, | |||
|
500 | { NULL } | |||
|
501 | }; | |||
|
502 | ||||
|
503 | PyTypeObject ZstdBufferSegmentType = { | |||
|
504 | PyVarObject_HEAD_INIT(NULL, 0) | |||
|
505 | "zstd.BufferSegment", /* tp_name */ | |||
|
506 | sizeof(ZstdBufferSegment),/* tp_basicsize */ | |||
|
507 | 0, /* tp_itemsize */ | |||
|
508 | (destructor)BufferSegment_dealloc, /* tp_dealloc */ | |||
|
509 | 0, /* tp_print */ | |||
|
510 | 0, /* tp_getattr */ | |||
|
511 | 0, /* tp_setattr */ | |||
|
512 | 0, /* tp_compare */ | |||
|
513 | 0, /* tp_repr */ | |||
|
514 | 0, /* tp_as_number */ | |||
|
515 | &BufferSegment_sq, /* tp_as_sequence */ | |||
|
516 | 0, /* tp_as_mapping */ | |||
|
517 | 0, /* tp_hash */ | |||
|
518 | 0, /* tp_call */ | |||
|
519 | 0, /* tp_str */ | |||
|
520 | 0, /* tp_getattro */ | |||
|
521 | 0, /* tp_setattro */ | |||
|
522 | &BufferSegment_as_buffer, /* tp_as_buffer */ | |||
|
523 | Py_TPFLAGS_DEFAULT, /* tp_flags */ | |||
|
524 | BufferSegment__doc__, /* tp_doc */ | |||
|
525 | 0, /* tp_traverse */ | |||
|
526 | 0, /* tp_clear */ | |||
|
527 | 0, /* tp_richcompare */ | |||
|
528 | 0, /* tp_weaklistoffset */ | |||
|
529 | 0, /* tp_iter */ | |||
|
530 | 0, /* tp_iternext */ | |||
|
531 | BufferSegment_methods, /* tp_methods */ | |||
|
532 | BufferSegment_members, /* tp_members */ | |||
|
533 | 0, /* tp_getset */ | |||
|
534 | 0, /* tp_base */ | |||
|
535 | 0, /* tp_dict */ | |||
|
536 | 0, /* tp_descr_get */ | |||
|
537 | 0, /* tp_descr_set */ | |||
|
538 | 0, /* tp_dictoffset */ | |||
|
539 | 0, /* tp_init */ | |||
|
540 | 0, /* tp_alloc */ | |||
|
541 | PyType_GenericNew, /* tp_new */ | |||
|
542 | }; | |||
|
543 | ||||
|
544 | PyDoc_STRVAR(BufferWithSegmentsCollection__doc__, | |||
|
545 | "Represents a collection of BufferWithSegments.\n" | |||
|
546 | ); | |||
|
547 | ||||
|
548 | static void BufferWithSegmentsCollection_dealloc(ZstdBufferWithSegmentsCollection* self) { | |||
|
549 | Py_ssize_t i; | |||
|
550 | ||||
|
551 | if (self->firstElements) { | |||
|
552 | PyMem_Free(self->firstElements); | |||
|
553 | self->firstElements = NULL; | |||
|
554 | } | |||
|
555 | ||||
|
556 | if (self->buffers) { | |||
|
557 | for (i = 0; i < self->bufferCount; i++) { | |||
|
558 | Py_CLEAR(self->buffers[i]); | |||
|
559 | } | |||
|
560 | ||||
|
561 | PyMem_Free(self->buffers); | |||
|
562 | self->buffers = NULL; | |||
|
563 | } | |||
|
564 | ||||
|
565 | PyObject_Del(self); | |||
|
566 | } | |||
|
567 | ||||
|
568 | static int BufferWithSegmentsCollection_init(ZstdBufferWithSegmentsCollection* self, PyObject* args) { | |||
|
569 | Py_ssize_t size; | |||
|
570 | Py_ssize_t i; | |||
|
571 | Py_ssize_t offset = 0; | |||
|
572 | ||||
|
573 | size = PyTuple_Size(args); | |||
|
574 | if (-1 == size) { | |||
|
575 | return -1; | |||
|
576 | } | |||
|
577 | ||||
|
578 | if (0 == size) { | |||
|
579 | PyErr_SetString(PyExc_ValueError, "must pass at least 1 argument"); | |||
|
580 | return -1; | |||
|
581 | } | |||
|
582 | ||||
|
583 | for (i = 0; i < size; i++) { | |||
|
584 | PyObject* item = PyTuple_GET_ITEM(args, i); | |||
|
585 | if (!PyObject_TypeCheck(item, &ZstdBufferWithSegmentsType)) { | |||
|
586 | PyErr_SetString(PyExc_TypeError, "arguments must be BufferWithSegments instances"); | |||
|
587 | return -1; | |||
|
588 | } | |||
|
589 | ||||
|
590 | if (0 == ((ZstdBufferWithSegments*)item)->segmentCount || | |||
|
591 | 0 == ((ZstdBufferWithSegments*)item)->dataSize) { | |||
|
592 | PyErr_SetString(PyExc_ValueError, "ZstdBufferWithSegments cannot be empty"); | |||
|
593 | return -1; | |||
|
594 | } | |||
|
595 | } | |||
|
596 | ||||
|
597 | self->buffers = PyMem_Malloc(size * sizeof(ZstdBufferWithSegments*)); | |||
|
598 | if (NULL == self->buffers) { | |||
|
599 | PyErr_NoMemory(); | |||
|
600 | return -1; | |||
|
601 | } | |||
|
602 | ||||
|
603 | self->firstElements = PyMem_Malloc(size * sizeof(Py_ssize_t)); | |||
|
604 | if (NULL == self->firstElements) { | |||
|
605 | PyMem_Free(self->buffers); | |||
|
606 | self->buffers = NULL; | |||
|
607 | PyErr_NoMemory(); | |||
|
608 | return -1; | |||
|
609 | } | |||
|
610 | ||||
|
611 | self->bufferCount = size; | |||
|
612 | ||||
|
613 | for (i = 0; i < size; i++) { | |||
|
614 | ZstdBufferWithSegments* item = (ZstdBufferWithSegments*)PyTuple_GET_ITEM(args, i); | |||
|
615 | ||||
|
616 | self->buffers[i] = item; | |||
|
617 | Py_INCREF(item); | |||
|
618 | ||||
|
619 | if (i > 0) { | |||
|
620 | self->firstElements[i - 1] = offset; | |||
|
621 | } | |||
|
622 | ||||
|
623 | offset += item->segmentCount; | |||
|
624 | } | |||
|
625 | ||||
|
626 | self->firstElements[size - 1] = offset; | |||
|
627 | ||||
|
628 | return 0; | |||
|
629 | } | |||
|
630 | ||||
|
631 | static PyObject* BufferWithSegmentsCollection_size(ZstdBufferWithSegmentsCollection* self) { | |||
|
632 | Py_ssize_t i; | |||
|
633 | Py_ssize_t j; | |||
|
634 | unsigned long long size = 0; | |||
|
635 | ||||
|
636 | for (i = 0; i < self->bufferCount; i++) { | |||
|
637 | for (j = 0; j < self->buffers[i]->segmentCount; j++) { | |||
|
638 | size += self->buffers[i]->segments[j].length; | |||
|
639 | } | |||
|
640 | } | |||
|
641 | ||||
|
642 | return PyLong_FromUnsignedLongLong(size); | |||
|
643 | } | |||
|
644 | ||||
|
645 | Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection* self) { | |||
|
646 | return self->firstElements[self->bufferCount - 1]; | |||
|
647 | } | |||
|
648 | ||||
|
649 | static ZstdBufferSegment* BufferWithSegmentsCollection_item(ZstdBufferWithSegmentsCollection* self, Py_ssize_t i) { | |||
|
650 | Py_ssize_t bufferOffset; | |||
|
651 | ||||
|
652 | if (i < 0) { | |||
|
653 | PyErr_SetString(PyExc_IndexError, "offset must be non-negative"); | |||
|
654 | return NULL; | |||
|
655 | } | |||
|
656 | ||||
|
657 | if (i >= BufferWithSegmentsCollection_length(self)) { | |||
|
658 | PyErr_Format(PyExc_IndexError, "offset must be less than %zd", | |||
|
659 | BufferWithSegmentsCollection_length(self)); | |||
|
660 | return NULL; | |||
|
661 | } | |||
|
662 | ||||
|
663 | for (bufferOffset = 0; bufferOffset < self->bufferCount; bufferOffset++) { | |||
|
664 | Py_ssize_t offset = 0; | |||
|
665 | ||||
|
666 | if (i < self->firstElements[bufferOffset]) { | |||
|
667 | if (bufferOffset > 0) { | |||
|
668 | offset = self->firstElements[bufferOffset - 1]; | |||
|
669 | } | |||
|
670 | ||||
|
671 | return BufferWithSegments_item(self->buffers[bufferOffset], i - offset); | |||
|
672 | } | |||
|
673 | } | |||
|
674 | ||||
|
675 | PyErr_SetString(ZstdError, "error resolving segment; this should not happen"); | |||
|
676 | return NULL; | |||
|
677 | } | |||
|
678 | ||||
|
679 | static PySequenceMethods BufferWithSegmentsCollection_sq = { | |||
|
680 | (lenfunc)BufferWithSegmentsCollection_length, /* sq_length */ | |||
|
681 | 0, /* sq_concat */ | |||
|
682 | 0, /* sq_repeat */ | |||
|
683 | (ssizeargfunc)BufferWithSegmentsCollection_item, /* sq_item */ | |||
|
684 | 0, /* sq_ass_item */ | |||
|
685 | 0, /* sq_contains */ | |||
|
686 | 0, /* sq_inplace_concat */ | |||
|
687 | 0 /* sq_inplace_repeat */ | |||
|
688 | }; | |||
|
689 | ||||
|
690 | static PyMethodDef BufferWithSegmentsCollection_methods[] = { | |||
|
691 | { "size", (PyCFunction)BufferWithSegmentsCollection_size, | |||
|
692 | METH_NOARGS, PyDoc_STR("total size in bytes of all segments") }, | |||
|
693 | { NULL, NULL } | |||
|
694 | }; | |||
|
695 | ||||
|
696 | PyTypeObject ZstdBufferWithSegmentsCollectionType = { | |||
|
697 | PyVarObject_HEAD_INIT(NULL, 0) | |||
|
698 | "zstd.BufferWithSegmentsCollection", /* tp_name */ | |||
|
699 | sizeof(ZstdBufferWithSegmentsCollection),/* tp_basicsize */ | |||
|
700 | 0, /* tp_itemsize */ | |||
|
701 | (destructor)BufferWithSegmentsCollection_dealloc, /* tp_dealloc */ | |||
|
702 | 0, /* tp_print */ | |||
|
703 | 0, /* tp_getattr */ | |||
|
704 | 0, /* tp_setattr */ | |||
|
705 | 0, /* tp_compare */ | |||
|
706 | 0, /* tp_repr */ | |||
|
707 | 0, /* tp_as_number */ | |||
|
708 | &BufferWithSegmentsCollection_sq, /* tp_as_sequence */ | |||
|
709 | 0, /* tp_as_mapping */ | |||
|
710 | 0, /* tp_hash */ | |||
|
711 | 0, /* tp_call */ | |||
|
712 | 0, /* tp_str */ | |||
|
713 | 0, /* tp_getattro */ | |||
|
714 | 0, /* tp_setattro */ | |||
|
715 | 0, /* tp_as_buffer */ | |||
|
716 | Py_TPFLAGS_DEFAULT, /* tp_flags */ | |||
|
717 | BufferWithSegmentsCollection__doc__, /* tp_doc */ | |||
|
718 | 0, /* tp_traverse */ | |||
|
719 | 0, /* tp_clear */ | |||
|
720 | 0, /* tp_richcompare */ | |||
|
721 | 0, /* tp_weaklistoffset */ | |||
|
722 | /* TODO implement iterator for performance. */ | |||
|
723 | 0, /* tp_iter */ | |||
|
724 | 0, /* tp_iternext */ | |||
|
725 | BufferWithSegmentsCollection_methods, /* tp_methods */ | |||
|
726 | 0, /* tp_members */ | |||
|
727 | 0, /* tp_getset */ | |||
|
728 | 0, /* tp_base */ | |||
|
729 | 0, /* tp_dict */ | |||
|
730 | 0, /* tp_descr_get */ | |||
|
731 | 0, /* tp_descr_set */ | |||
|
732 | 0, /* tp_dictoffset */ | |||
|
733 | (initproc)BufferWithSegmentsCollection_init, /* tp_init */ | |||
|
734 | 0, /* tp_alloc */ | |||
|
735 | PyType_GenericNew, /* tp_new */ | |||
|
736 | }; | |||
|
737 | ||||
|
738 | void bufferutil_module_init(PyObject* mod) { | |||
|
739 | Py_TYPE(&ZstdBufferWithSegmentsType) = &PyType_Type; | |||
|
740 | if (PyType_Ready(&ZstdBufferWithSegmentsType) < 0) { | |||
|
741 | return; | |||
|
742 | } | |||
|
743 | ||||
|
744 | Py_INCREF(&ZstdBufferWithSegmentsType); | |||
|
745 | PyModule_AddObject(mod, "BufferWithSegments", (PyObject*)&ZstdBufferWithSegmentsType); | |||
|
746 | ||||
|
747 | Py_TYPE(&ZstdBufferSegmentsType) = &PyType_Type; | |||
|
748 | if (PyType_Ready(&ZstdBufferSegmentsType) < 0) { | |||
|
749 | return; | |||
|
750 | } | |||
|
751 | ||||
|
752 | Py_INCREF(&ZstdBufferSegmentsType); | |||
|
753 | PyModule_AddObject(mod, "BufferSegments", (PyObject*)&ZstdBufferSegmentsType); | |||
|
754 | ||||
|
755 | Py_TYPE(&ZstdBufferSegmentType) = &PyType_Type; | |||
|
756 | if (PyType_Ready(&ZstdBufferSegmentType) < 0) { | |||
|
757 | return; | |||
|
758 | } | |||
|
759 | ||||
|
760 | Py_INCREF(&ZstdBufferSegmentType); | |||
|
761 | PyModule_AddObject(mod, "BufferSegment", (PyObject*)&ZstdBufferSegmentType); | |||
|
762 | ||||
|
763 | Py_TYPE(&ZstdBufferWithSegmentsCollectionType) = &PyType_Type; | |||
|
764 | if (PyType_Ready(&ZstdBufferWithSegmentsCollectionType) < 0) { | |||
|
765 | return; | |||
|
766 | } | |||
|
767 | ||||
|
768 | Py_INCREF(&ZstdBufferWithSegmentsCollectionType); | |||
|
769 | PyModule_AddObject(mod, "BufferWithSegmentsCollection", (PyObject*)&ZstdBufferWithSegmentsCollectionType); | |||
|
770 | } |
@@ -0,0 +1,112 | |||||
|
1 | import struct | |||
|
2 | ||||
|
3 | try: | |||
|
4 | import unittest2 as unittest | |||
|
5 | except ImportError: | |||
|
6 | import unittest | |||
|
7 | ||||
|
8 | import zstd | |||
|
9 | ||||
|
10 | ss = struct.Struct('=QQ') | |||
|
11 | ||||
|
12 | ||||
|
13 | class TestBufferWithSegments(unittest.TestCase): | |||
|
14 | def test_arguments(self): | |||
|
15 | with self.assertRaises(TypeError): | |||
|
16 | zstd.BufferWithSegments() | |||
|
17 | ||||
|
18 | with self.assertRaises(TypeError): | |||
|
19 | zstd.BufferWithSegments(b'foo') | |||
|
20 | ||||
|
21 | # Segments data should be a multiple of 16. | |||
|
22 | with self.assertRaisesRegexp(ValueError, 'segments array size is not a multiple of 16'): | |||
|
23 | zstd.BufferWithSegments(b'foo', b'\x00\x00') | |||
|
24 | ||||
|
25 | def test_invalid_offset(self): | |||
|
26 | with self.assertRaisesRegexp(ValueError, 'offset within segments array references memory'): | |||
|
27 | zstd.BufferWithSegments(b'foo', ss.pack(0, 4)) | |||
|
28 | ||||
|
29 | def test_invalid_getitem(self): | |||
|
30 | b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3)) | |||
|
31 | ||||
|
32 | with self.assertRaisesRegexp(IndexError, 'offset must be non-negative'): | |||
|
33 | test = b[-10] | |||
|
34 | ||||
|
35 | with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'): | |||
|
36 | test = b[1] | |||
|
37 | ||||
|
38 | with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'): | |||
|
39 | test = b[2] | |||
|
40 | ||||
|
41 | def test_single(self): | |||
|
42 | b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3)) | |||
|
43 | self.assertEqual(len(b), 1) | |||
|
44 | self.assertEqual(b.size, 3) | |||
|
45 | self.assertEqual(b.tobytes(), b'foo') | |||
|
46 | ||||
|
47 | self.assertEqual(len(b[0]), 3) | |||
|
48 | self.assertEqual(b[0].offset, 0) | |||
|
49 | self.assertEqual(b[0].tobytes(), b'foo') | |||
|
50 | ||||
|
51 | def test_multiple(self): | |||
|
52 | b = zstd.BufferWithSegments(b'foofooxfooxy', b''.join([ss.pack(0, 3), | |||
|
53 | ss.pack(3, 4), | |||
|
54 | ss.pack(7, 5)])) | |||
|
55 | self.assertEqual(len(b), 3) | |||
|
56 | self.assertEqual(b.size, 12) | |||
|
57 | self.assertEqual(b.tobytes(), b'foofooxfooxy') | |||
|
58 | ||||
|
59 | self.assertEqual(b[0].tobytes(), b'foo') | |||
|
60 | self.assertEqual(b[1].tobytes(), b'foox') | |||
|
61 | self.assertEqual(b[2].tobytes(), b'fooxy') | |||
|
62 | ||||
|
63 | ||||
|
64 | class TestBufferWithSegmentsCollection(unittest.TestCase): | |||
|
65 | def test_empty_constructor(self): | |||
|
66 | with self.assertRaisesRegexp(ValueError, 'must pass at least 1 argument'): | |||
|
67 | zstd.BufferWithSegmentsCollection() | |||
|
68 | ||||
|
69 | def test_argument_validation(self): | |||
|
70 | with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'): | |||
|
71 | zstd.BufferWithSegmentsCollection(None) | |||
|
72 | ||||
|
73 | with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'): | |||
|
74 | zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'foo', ss.pack(0, 3)), | |||
|
75 | None) | |||
|
76 | ||||
|
77 | with self.assertRaisesRegexp(ValueError, 'ZstdBufferWithSegments cannot be empty'): | |||
|
78 | zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'', b'')) | |||
|
79 | ||||
|
80 | def test_length(self): | |||
|
81 | b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3)) | |||
|
82 | b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3), | |||
|
83 | ss.pack(3, 3)])) | |||
|
84 | ||||
|
85 | c = zstd.BufferWithSegmentsCollection(b1) | |||
|
86 | self.assertEqual(len(c), 1) | |||
|
87 | self.assertEqual(c.size(), 3) | |||
|
88 | ||||
|
89 | c = zstd.BufferWithSegmentsCollection(b2) | |||
|
90 | self.assertEqual(len(c), 2) | |||
|
91 | self.assertEqual(c.size(), 6) | |||
|
92 | ||||
|
93 | c = zstd.BufferWithSegmentsCollection(b1, b2) | |||
|
94 | self.assertEqual(len(c), 3) | |||
|
95 | self.assertEqual(c.size(), 9) | |||
|
96 | ||||
|
97 | def test_getitem(self): | |||
|
98 | b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3)) | |||
|
99 | b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3), | |||
|
100 | ss.pack(3, 3)])) | |||
|
101 | ||||
|
102 | c = zstd.BufferWithSegmentsCollection(b1, b2) | |||
|
103 | ||||
|
104 | with self.assertRaisesRegexp(IndexError, 'offset must be less than 3'): | |||
|
105 | c[3] | |||
|
106 | ||||
|
107 | with self.assertRaisesRegexp(IndexError, 'offset must be less than 3'): | |||
|
108 | c[4] | |||
|
109 | ||||
|
110 | self.assertEqual(c[0].tobytes(), b'foo') | |||
|
111 | self.assertEqual(c[1].tobytes(), b'bar') | |||
|
112 | self.assertEqual(c[2].tobytes(), b'baz') |
@@ -0,0 +1,143 | |||||
|
1 | import io | |||
|
2 | import os | |||
|
3 | ||||
|
4 | try: | |||
|
5 | import unittest2 as unittest | |||
|
6 | except ImportError: | |||
|
7 | import unittest | |||
|
8 | ||||
|
9 | try: | |||
|
10 | import hypothesis | |||
|
11 | import hypothesis.strategies as strategies | |||
|
12 | except ImportError: | |||
|
13 | raise unittest.SkipTest('hypothesis not available') | |||
|
14 | ||||
|
15 | import zstd | |||
|
16 | ||||
|
17 | from . common import ( | |||
|
18 | make_cffi, | |||
|
19 | random_input_data, | |||
|
20 | ) | |||
|
21 | ||||
|
22 | ||||
|
23 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') | |||
|
24 | @make_cffi | |||
|
25 | class TestCompressor_write_to_fuzzing(unittest.TestCase): | |||
|
26 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |||
|
27 | level=strategies.integers(min_value=1, max_value=5), | |||
|
28 | write_size=strategies.integers(min_value=1, max_value=1048576)) | |||
|
29 | def test_write_size_variance(self, original, level, write_size): | |||
|
30 | refctx = zstd.ZstdCompressor(level=level) | |||
|
31 | ref_frame = refctx.compress(original) | |||
|
32 | ||||
|
33 | cctx = zstd.ZstdCompressor(level=level) | |||
|
34 | b = io.BytesIO() | |||
|
35 | with cctx.write_to(b, size=len(original), write_size=write_size) as compressor: | |||
|
36 | compressor.write(original) | |||
|
37 | ||||
|
38 | self.assertEqual(b.getvalue(), ref_frame) | |||
|
39 | ||||
|
40 | ||||
|
41 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') | |||
|
42 | @make_cffi | |||
|
43 | class TestCompressor_copy_stream_fuzzing(unittest.TestCase): | |||
|
44 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |||
|
45 | level=strategies.integers(min_value=1, max_value=5), | |||
|
46 | read_size=strategies.integers(min_value=1, max_value=1048576), | |||
|
47 | write_size=strategies.integers(min_value=1, max_value=1048576)) | |||
|
48 | def test_read_write_size_variance(self, original, level, read_size, write_size): | |||
|
49 | refctx = zstd.ZstdCompressor(level=level) | |||
|
50 | ref_frame = refctx.compress(original) | |||
|
51 | ||||
|
52 | cctx = zstd.ZstdCompressor(level=level) | |||
|
53 | source = io.BytesIO(original) | |||
|
54 | dest = io.BytesIO() | |||
|
55 | ||||
|
56 | cctx.copy_stream(source, dest, size=len(original), read_size=read_size, | |||
|
57 | write_size=write_size) | |||
|
58 | ||||
|
59 | self.assertEqual(dest.getvalue(), ref_frame) | |||
|
60 | ||||
|
61 | ||||
|
62 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') | |||
|
63 | @make_cffi | |||
|
64 | class TestCompressor_compressobj_fuzzing(unittest.TestCase): | |||
|
65 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |||
|
66 | level=strategies.integers(min_value=1, max_value=5), | |||
|
67 | chunk_sizes=strategies.streaming( | |||
|
68 | strategies.integers(min_value=1, max_value=4096))) | |||
|
69 | def test_random_input_sizes(self, original, level, chunk_sizes): | |||
|
70 | chunk_sizes = iter(chunk_sizes) | |||
|
71 | ||||
|
72 | refctx = zstd.ZstdCompressor(level=level) | |||
|
73 | ref_frame = refctx.compress(original) | |||
|
74 | ||||
|
75 | cctx = zstd.ZstdCompressor(level=level) | |||
|
76 | cobj = cctx.compressobj(size=len(original)) | |||
|
77 | ||||
|
78 | chunks = [] | |||
|
79 | i = 0 | |||
|
80 | while True: | |||
|
81 | chunk_size = next(chunk_sizes) | |||
|
82 | source = original[i:i + chunk_size] | |||
|
83 | if not source: | |||
|
84 | break | |||
|
85 | ||||
|
86 | chunks.append(cobj.compress(source)) | |||
|
87 | i += chunk_size | |||
|
88 | ||||
|
89 | chunks.append(cobj.flush()) | |||
|
90 | ||||
|
91 | self.assertEqual(b''.join(chunks), ref_frame) | |||
|
92 | ||||
|
93 | ||||
|
94 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') | |||
|
95 | @make_cffi | |||
|
96 | class TestCompressor_read_from_fuzzing(unittest.TestCase): | |||
|
97 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |||
|
98 | level=strategies.integers(min_value=1, max_value=5), | |||
|
99 | read_size=strategies.integers(min_value=1, max_value=4096), | |||
|
100 | write_size=strategies.integers(min_value=1, max_value=4096)) | |||
|
101 | def test_read_write_size_variance(self, original, level, read_size, write_size): | |||
|
102 | refcctx = zstd.ZstdCompressor(level=level) | |||
|
103 | ref_frame = refcctx.compress(original) | |||
|
104 | ||||
|
105 | source = io.BytesIO(original) | |||
|
106 | ||||
|
107 | cctx = zstd.ZstdCompressor(level=level) | |||
|
108 | chunks = list(cctx.read_from(source, size=len(original), read_size=read_size, | |||
|
109 | write_size=write_size)) | |||
|
110 | ||||
|
111 | self.assertEqual(b''.join(chunks), ref_frame) | |||
|
112 | ||||
|
113 | ||||
|
114 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') | |||
|
115 | class TestCompressor_multi_compress_to_buffer_fuzzing(unittest.TestCase): | |||
|
116 | @hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()), | |||
|
117 | min_size=1, max_size=1024), | |||
|
118 | threads=strategies.integers(min_value=1, max_value=8), | |||
|
119 | use_dict=strategies.booleans()) | |||
|
120 | def test_data_equivalence(self, original, threads, use_dict): | |||
|
121 | kwargs = {} | |||
|
122 | ||||
|
123 | # Use a content dictionary because it is cheap to create. | |||
|
124 | if use_dict: | |||
|
125 | kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0]) | |||
|
126 | ||||
|
127 | cctx = zstd.ZstdCompressor(level=1, | |||
|
128 | write_content_size=True, | |||
|
129 | write_checksum=True, | |||
|
130 | **kwargs) | |||
|
131 | ||||
|
132 | result = cctx.multi_compress_to_buffer(original, threads=-1) | |||
|
133 | ||||
|
134 | self.assertEqual(len(result), len(original)) | |||
|
135 | ||||
|
136 | # The frame produced via the batch APIs may not be bit identical to that | |||
|
137 | # produced by compress() because compression parameters are adjusted | |||
|
138 | # from the first input in batch mode. So the only thing we can do is | |||
|
139 | # verify the decompressed data matches the input. | |||
|
140 | dctx = zstd.ZstdDecompressor(**kwargs) | |||
|
141 | ||||
|
142 | for i, frame in enumerate(result): | |||
|
143 | self.assertEqual(dctx.decompress(frame), original[i]) |
@@ -0,0 +1,79 | |||||
|
1 | import io | |||
|
2 | import os | |||
|
3 | ||||
|
4 | try: | |||
|
5 | import unittest2 as unittest | |||
|
6 | except ImportError: | |||
|
7 | import unittest | |||
|
8 | ||||
|
9 | try: | |||
|
10 | import hypothesis | |||
|
11 | import hypothesis.strategies as strategies | |||
|
12 | except ImportError: | |||
|
13 | raise unittest.SkipTest('hypothesis not available') | |||
|
14 | ||||
|
15 | import zstd | |||
|
16 | ||||
|
17 | from .common import ( | |||
|
18 | make_cffi, | |||
|
19 | ) | |||
|
20 | ||||
|
21 | ||||
|
22 | s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN, | |||
|
23 | max_value=zstd.WINDOWLOG_MAX) | |||
|
24 | s_chainlog = strategies.integers(min_value=zstd.CHAINLOG_MIN, | |||
|
25 | max_value=zstd.CHAINLOG_MAX) | |||
|
26 | s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN, | |||
|
27 | max_value=zstd.HASHLOG_MAX) | |||
|
28 | s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN, | |||
|
29 | max_value=zstd.SEARCHLOG_MAX) | |||
|
30 | s_searchlength = strategies.integers(min_value=zstd.SEARCHLENGTH_MIN, | |||
|
31 | max_value=zstd.SEARCHLENGTH_MAX) | |||
|
32 | s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN, | |||
|
33 | max_value=zstd.TARGETLENGTH_MAX) | |||
|
34 | s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST, | |||
|
35 | zstd.STRATEGY_DFAST, | |||
|
36 | zstd.STRATEGY_GREEDY, | |||
|
37 | zstd.STRATEGY_LAZY, | |||
|
38 | zstd.STRATEGY_LAZY2, | |||
|
39 | zstd.STRATEGY_BTLAZY2, | |||
|
40 | zstd.STRATEGY_BTOPT)) | |||
|
41 | ||||
|
42 | ||||
|
43 | @make_cffi | |||
|
44 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') | |||
|
45 | class TestCompressionParametersHypothesis(unittest.TestCase): | |||
|
46 | @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog, | |||
|
47 | s_searchlength, s_targetlength, s_strategy) | |||
|
48 | def test_valid_init(self, windowlog, chainlog, hashlog, searchlog, | |||
|
49 | searchlength, targetlength, strategy): | |||
|
50 | # ZSTD_checkCParams moves the goal posts on us from what's advertised | |||
|
51 | # in the constants. So move along with them. | |||
|
52 | if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY): | |||
|
53 | searchlength += 1 | |||
|
54 | elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST: | |||
|
55 | searchlength -= 1 | |||
|
56 | ||||
|
57 | p = zstd.CompressionParameters(windowlog, chainlog, hashlog, | |||
|
58 | searchlog, searchlength, | |||
|
59 | targetlength, strategy) | |||
|
60 | ||||
|
61 | cctx = zstd.ZstdCompressor(compression_params=p) | |||
|
62 | with cctx.write_to(io.BytesIO()): | |||
|
63 | pass | |||
|
64 | ||||
|
65 | @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog, | |||
|
66 | s_searchlength, s_targetlength, s_strategy) | |||
|
67 | def test_estimate_compression_context_size(self, windowlog, chainlog, | |||
|
68 | hashlog, searchlog, | |||
|
69 | searchlength, targetlength, | |||
|
70 | strategy): | |||
|
71 | if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY): | |||
|
72 | searchlength += 1 | |||
|
73 | elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST: | |||
|
74 | searchlength -= 1 | |||
|
75 | ||||
|
76 | p = zstd.CompressionParameters(windowlog, chainlog, hashlog, | |||
|
77 | searchlog, searchlength, | |||
|
78 | targetlength, strategy) | |||
|
79 | size = zstd.estimate_compression_context_size(p) |
@@ -0,0 +1,151 | |||||
|
1 | import io | |||
|
2 | import os | |||
|
3 | ||||
|
4 | try: | |||
|
5 | import unittest2 as unittest | |||
|
6 | except ImportError: | |||
|
7 | import unittest | |||
|
8 | ||||
|
9 | try: | |||
|
10 | import hypothesis | |||
|
11 | import hypothesis.strategies as strategies | |||
|
12 | except ImportError: | |||
|
13 | raise unittest.SkipTest('hypothesis not available') | |||
|
14 | ||||
|
15 | import zstd | |||
|
16 | ||||
|
17 | from . common import ( | |||
|
18 | make_cffi, | |||
|
19 | random_input_data, | |||
|
20 | ) | |||
|
21 | ||||
|
22 | ||||
|
23 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') | |||
|
24 | @make_cffi | |||
|
25 | class TestDecompressor_write_to_fuzzing(unittest.TestCase): | |||
|
26 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |||
|
27 | level=strategies.integers(min_value=1, max_value=5), | |||
|
28 | write_size=strategies.integers(min_value=1, max_value=8192), | |||
|
29 | input_sizes=strategies.streaming( | |||
|
30 | strategies.integers(min_value=1, max_value=4096))) | |||
|
31 | def test_write_size_variance(self, original, level, write_size, input_sizes): | |||
|
32 | input_sizes = iter(input_sizes) | |||
|
33 | ||||
|
34 | cctx = zstd.ZstdCompressor(level=level) | |||
|
35 | frame = cctx.compress(original) | |||
|
36 | ||||
|
37 | dctx = zstd.ZstdDecompressor() | |||
|
38 | source = io.BytesIO(frame) | |||
|
39 | dest = io.BytesIO() | |||
|
40 | ||||
|
41 | with dctx.write_to(dest, write_size=write_size) as decompressor: | |||
|
42 | while True: | |||
|
43 | chunk = source.read(next(input_sizes)) | |||
|
44 | if not chunk: | |||
|
45 | break | |||
|
46 | ||||
|
47 | decompressor.write(chunk) | |||
|
48 | ||||
|
49 | self.assertEqual(dest.getvalue(), original) | |||
|
50 | ||||
|
51 | ||||
|
52 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') | |||
|
53 | @make_cffi | |||
|
54 | class TestDecompressor_copy_stream_fuzzing(unittest.TestCase): | |||
|
55 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |||
|
56 | level=strategies.integers(min_value=1, max_value=5), | |||
|
57 | read_size=strategies.integers(min_value=1, max_value=8192), | |||
|
58 | write_size=strategies.integers(min_value=1, max_value=8192)) | |||
|
59 | def test_read_write_size_variance(self, original, level, read_size, write_size): | |||
|
60 | cctx = zstd.ZstdCompressor(level=level) | |||
|
61 | frame = cctx.compress(original) | |||
|
62 | ||||
|
63 | source = io.BytesIO(frame) | |||
|
64 | dest = io.BytesIO() | |||
|
65 | ||||
|
66 | dctx = zstd.ZstdDecompressor() | |||
|
67 | dctx.copy_stream(source, dest, read_size=read_size, write_size=write_size) | |||
|
68 | ||||
|
69 | self.assertEqual(dest.getvalue(), original) | |||
|
70 | ||||
|
71 | ||||
|
72 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') | |||
|
73 | @make_cffi | |||
|
74 | class TestDecompressor_decompressobj_fuzzing(unittest.TestCase): | |||
|
75 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |||
|
76 | level=strategies.integers(min_value=1, max_value=5), | |||
|
77 | chunk_sizes=strategies.streaming( | |||
|
78 | strategies.integers(min_value=1, max_value=4096))) | |||
|
79 | def test_random_input_sizes(self, original, level, chunk_sizes): | |||
|
80 | chunk_sizes = iter(chunk_sizes) | |||
|
81 | ||||
|
82 | cctx = zstd.ZstdCompressor(level=level) | |||
|
83 | frame = cctx.compress(original) | |||
|
84 | ||||
|
85 | source = io.BytesIO(frame) | |||
|
86 | ||||
|
87 | dctx = zstd.ZstdDecompressor() | |||
|
88 | dobj = dctx.decompressobj() | |||
|
89 | ||||
|
90 | chunks = [] | |||
|
91 | while True: | |||
|
92 | chunk = source.read(next(chunk_sizes)) | |||
|
93 | if not chunk: | |||
|
94 | break | |||
|
95 | ||||
|
96 | chunks.append(dobj.decompress(chunk)) | |||
|
97 | ||||
|
98 | self.assertEqual(b''.join(chunks), original) | |||
|
99 | ||||
|
100 | ||||
|
101 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') | |||
|
102 | @make_cffi | |||
|
103 | class TestDecompressor_read_from_fuzzing(unittest.TestCase): | |||
|
104 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |||
|
105 | level=strategies.integers(min_value=1, max_value=5), | |||
|
106 | read_size=strategies.integers(min_value=1, max_value=4096), | |||
|
107 | write_size=strategies.integers(min_value=1, max_value=4096)) | |||
|
108 | def test_read_write_size_variance(self, original, level, read_size, write_size): | |||
|
109 | cctx = zstd.ZstdCompressor(level=level) | |||
|
110 | frame = cctx.compress(original) | |||
|
111 | ||||
|
112 | source = io.BytesIO(frame) | |||
|
113 | ||||
|
114 | dctx = zstd.ZstdDecompressor() | |||
|
115 | chunks = list(dctx.read_from(source, read_size=read_size, write_size=write_size)) | |||
|
116 | ||||
|
117 | self.assertEqual(b''.join(chunks), original) | |||
|
118 | ||||
|
119 | ||||
|
120 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') | |||
|
121 | class TestDecompressor_multi_decompress_to_buffer_fuzzing(unittest.TestCase): | |||
|
122 | @hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()), | |||
|
123 | min_size=1, max_size=1024), | |||
|
124 | threads=strategies.integers(min_value=1, max_value=8), | |||
|
125 | use_dict=strategies.booleans()) | |||
|
126 | def test_data_equivalence(self, original, threads, use_dict): | |||
|
127 | kwargs = {} | |||
|
128 | if use_dict: | |||
|
129 | kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0]) | |||
|
130 | ||||
|
131 | cctx = zstd.ZstdCompressor(level=1, | |||
|
132 | write_content_size=True, | |||
|
133 | write_checksum=True, | |||
|
134 | **kwargs) | |||
|
135 | ||||
|
136 | frames_buffer = cctx.multi_compress_to_buffer(original, threads=-1) | |||
|
137 | ||||
|
138 | dctx = zstd.ZstdDecompressor(**kwargs) | |||
|
139 | ||||
|
140 | result = dctx.multi_decompress_to_buffer(frames_buffer) | |||
|
141 | ||||
|
142 | self.assertEqual(len(result), len(original)) | |||
|
143 | for i, frame in enumerate(result): | |||
|
144 | self.assertEqual(frame.tobytes(), original[i]) | |||
|
145 | ||||
|
146 | frames_list = [f.tobytes() for f in frames_buffer] | |||
|
147 | result = dctx.multi_decompress_to_buffer(frames_list) | |||
|
148 | ||||
|
149 | self.assertEqual(len(result), len(original)) | |||
|
150 | for i, frame in enumerate(result): | |||
|
151 | self.assertEqual(frame.tobytes(), original[i]) |
@@ -1,117 +1,145 | |||||
1 | Version History |
|
1 | Version History | |
2 | =============== |
|
2 | =============== | |
3 |
|
3 | |||
|
4 | 0.8.0 (released 2017-03-08) | |||
|
5 | --------------------------- | |||
|
6 | ||||
|
7 | * CompressionParameters now has a estimated_compression_context_size() method. | |||
|
8 | zstd.estimate_compression_context_size() is now deprecated and slated for | |||
|
9 | removal. | |||
|
10 | * Implemented a lot of fuzzing tests. | |||
|
11 | * CompressionParameters instances now perform extra validation by calling | |||
|
12 | ZSTD_checkCParams() at construction time. | |||
|
13 | * multi_compress_to_buffer() API for compressing multiple inputs as a | |||
|
14 | single operation, as efficiently as possible. | |||
|
15 | * ZSTD_CStream instances are now used across multiple operations on | |||
|
16 | ZstdCompressor instances, resulting in much better performance for | |||
|
17 | APIs that do streaming. | |||
|
18 | * ZSTD_DStream instances are now used across multiple operations on | |||
|
19 | ZstdDecompressor instances, resulting in much better performance for | |||
|
20 | APIs that do streaming. | |||
|
21 | * train_dictionary() now releases the GIL. | |||
|
22 | * Support for training dictionaries using the COVER algorithm. | |||
|
23 | * multi_decompress_to_buffer() API for decompressing multiple frames as a | |||
|
24 | single operation, as efficiently as possible. | |||
|
25 | * Support for multi-threaded compression. | |||
|
26 | * Disable deprecation warnings when compiling CFFI module. | |||
|
27 | * Fixed memory leak in train_dictionary(). | |||
|
28 | * Removed DictParameters type. | |||
|
29 | * train_dictionary() now accepts keyword arguments instead of a | |||
|
30 | DictParameters instance to control dictionary generation. | |||
|
31 | ||||
4 | 0.7.0 (released 2017-02-07) |
|
32 | 0.7.0 (released 2017-02-07) | |
5 | --------------------------- |
|
33 | --------------------------- | |
6 |
|
34 | |||
7 | * Added zstd.get_frame_parameters() to obtain info about a zstd frame. |
|
35 | * Added zstd.get_frame_parameters() to obtain info about a zstd frame. | |
8 | * Added ZstdDecompressor.decompress_content_dict_chain() for efficient |
|
36 | * Added ZstdDecompressor.decompress_content_dict_chain() for efficient | |
9 | decompression of *content-only dictionary chains*. |
|
37 | decompression of *content-only dictionary chains*. | |
10 | * CFFI module fully implemented; all tests run against both C extension and |
|
38 | * CFFI module fully implemented; all tests run against both C extension and | |
11 | CFFI implementation. |
|
39 | CFFI implementation. | |
12 | * Vendored version of zstd updated to 1.1.3. |
|
40 | * Vendored version of zstd updated to 1.1.3. | |
13 | * Use ZstdDecompressor.decompress() now uses ZSTD_createDDict_byReference() |
|
41 | * Use ZstdDecompressor.decompress() now uses ZSTD_createDDict_byReference() | |
14 | to avoid extra memory allocation of dict data. |
|
42 | to avoid extra memory allocation of dict data. | |
15 | * Add function names to error messages (by using ":name" in PyArg_Parse* |
|
43 | * Add function names to error messages (by using ":name" in PyArg_Parse* | |
16 | functions). |
|
44 | functions). | |
17 | * Reuse decompression context across operations. Previously, we created a |
|
45 | * Reuse decompression context across operations. Previously, we created a | |
18 | new ZSTD_DCtx for each decompress(). This was measured to slow down |
|
46 | new ZSTD_DCtx for each decompress(). This was measured to slow down | |
19 | decompression by 40-200MB/s. The API guarantees say ZstdDecompressor |
|
47 | decompression by 40-200MB/s. The API guarantees say ZstdDecompressor | |
20 | is not thread safe. So we reuse the ZSTD_DCtx across operations and make |
|
48 | is not thread safe. So we reuse the ZSTD_DCtx across operations and make | |
21 | things faster in the process. |
|
49 | things faster in the process. | |
22 | * ZstdCompressor.write_to()'s compress() and flush() methods now return number |
|
50 | * ZstdCompressor.write_to()'s compress() and flush() methods now return number | |
23 | of bytes written. |
|
51 | of bytes written. | |
24 | * ZstdDecompressor.write_to()'s write() method now returns the number of bytes |
|
52 | * ZstdDecompressor.write_to()'s write() method now returns the number of bytes | |
25 | written to the underlying output object. |
|
53 | written to the underlying output object. | |
26 | * CompressionParameters instances now expose their values as attributes. |
|
54 | * CompressionParameters instances now expose their values as attributes. | |
27 | * CompressionParameters instances no longer are subscriptable nor behave |
|
55 | * CompressionParameters instances no longer are subscriptable nor behave | |
28 | as tuples (backwards incompatible). Use attributes to obtain values. |
|
56 | as tuples (backwards incompatible). Use attributes to obtain values. | |
29 | * DictParameters instances now expose their values as attributes. |
|
57 | * DictParameters instances now expose their values as attributes. | |
30 |
|
58 | |||
31 | 0.6.0 (released 2017-01-14) |
|
59 | 0.6.0 (released 2017-01-14) | |
32 | --------------------------- |
|
60 | --------------------------- | |
33 |
|
61 | |||
34 | * Support for legacy zstd protocols (build time opt in feature). |
|
62 | * Support for legacy zstd protocols (build time opt in feature). | |
35 | * Automation improvements to test against Python 3.6, latest versions |
|
63 | * Automation improvements to test against Python 3.6, latest versions | |
36 | of Tox, more deterministic AppVeyor behavior. |
|
64 | of Tox, more deterministic AppVeyor behavior. | |
37 | * CFFI "parser" improved to use a compiler preprocessor instead of rewriting |
|
65 | * CFFI "parser" improved to use a compiler preprocessor instead of rewriting | |
38 | source code manually. |
|
66 | source code manually. | |
39 | * Vendored version of zstd updated to 1.1.2. |
|
67 | * Vendored version of zstd updated to 1.1.2. | |
40 | * Documentation improvements. |
|
68 | * Documentation improvements. | |
41 | * Introduce a bench.py script for performing (crude) benchmarks. |
|
69 | * Introduce a bench.py script for performing (crude) benchmarks. | |
42 | * ZSTD_CCtx instances are now reused across multiple compress() operations. |
|
70 | * ZSTD_CCtx instances are now reused across multiple compress() operations. | |
43 | * ZstdCompressor.write_to() now has a flush() method. |
|
71 | * ZstdCompressor.write_to() now has a flush() method. | |
44 | * ZstdCompressor.compressobj()'s flush() method now accepts an argument to |
|
72 | * ZstdCompressor.compressobj()'s flush() method now accepts an argument to | |
45 | flush a block (as opposed to ending the stream). |
|
73 | flush a block (as opposed to ending the stream). | |
46 | * Disallow compress(b'') when writing content sizes by default (issue #11). |
|
74 | * Disallow compress(b'') when writing content sizes by default (issue #11). | |
47 |
|
75 | |||
48 | 0.5.2 (released 2016-11-12) |
|
76 | 0.5.2 (released 2016-11-12) | |
49 | --------------------------- |
|
77 | --------------------------- | |
50 |
|
78 | |||
51 | * more packaging fixes for source distribution |
|
79 | * more packaging fixes for source distribution | |
52 |
|
80 | |||
53 | 0.5.1 (released 2016-11-12) |
|
81 | 0.5.1 (released 2016-11-12) | |
54 | --------------------------- |
|
82 | --------------------------- | |
55 |
|
83 | |||
56 | * setup_zstd.py is included in the source distribution |
|
84 | * setup_zstd.py is included in the source distribution | |
57 |
|
85 | |||
58 | 0.5.0 (released 2016-11-10) |
|
86 | 0.5.0 (released 2016-11-10) | |
59 | --------------------------- |
|
87 | --------------------------- | |
60 |
|
88 | |||
61 | * Vendored version of zstd updated to 1.1.1. |
|
89 | * Vendored version of zstd updated to 1.1.1. | |
62 | * Continuous integration for Python 3.6 and 3.7 |
|
90 | * Continuous integration for Python 3.6 and 3.7 | |
63 | * Continuous integration for Conda |
|
91 | * Continuous integration for Conda | |
64 | * Added compression and decompression APIs providing similar interfaces |
|
92 | * Added compression and decompression APIs providing similar interfaces | |
65 | to the standard library ``zlib`` and ``bz2`` modules. This allows |
|
93 | to the standard library ``zlib`` and ``bz2`` modules. This allows | |
66 | coding to a common interface. |
|
94 | coding to a common interface. | |
67 | * ``zstd.__version__` is now defined. |
|
95 | * ``zstd.__version__` is now defined. | |
68 | * ``read_from()`` on various APIs now accepts objects implementing the buffer |
|
96 | * ``read_from()`` on various APIs now accepts objects implementing the buffer | |
69 | protocol. |
|
97 | protocol. | |
70 | * ``read_from()`` has gained a ``skip_bytes`` argument. This allows callers |
|
98 | * ``read_from()`` has gained a ``skip_bytes`` argument. This allows callers | |
71 | to pass in an existing buffer with a header without having to create a |
|
99 | to pass in an existing buffer with a header without having to create a | |
72 | slice or a new object. |
|
100 | slice or a new object. | |
73 | * Implemented ``ZstdCompressionDict.as_bytes()``. |
|
101 | * Implemented ``ZstdCompressionDict.as_bytes()``. | |
74 | * Python's memory allocator is now used instead of ``malloc()``. |
|
102 | * Python's memory allocator is now used instead of ``malloc()``. | |
75 | * Low-level zstd data structures are reused in more instances, cutting down |
|
103 | * Low-level zstd data structures are reused in more instances, cutting down | |
76 | on overhead for certain operations. |
|
104 | on overhead for certain operations. | |
77 | * ``distutils`` boilerplate for obtaining an ``Extension`` instance |
|
105 | * ``distutils`` boilerplate for obtaining an ``Extension`` instance | |
78 | has now been refactored into a standalone ``setup_zstd.py`` file. This |
|
106 | has now been refactored into a standalone ``setup_zstd.py`` file. This | |
79 | allows other projects with ``setup.py`` files to reuse the |
|
107 | allows other projects with ``setup.py`` files to reuse the | |
80 | ``distutils`` code for this project without copying code. |
|
108 | ``distutils`` code for this project without copying code. | |
81 | * The monolithic ``zstd.c`` file has been split into a header file defining |
|
109 | * The monolithic ``zstd.c`` file has been split into a header file defining | |
82 | types and separate ``.c`` source files for the implementation. |
|
110 | types and separate ``.c`` source files for the implementation. | |
83 |
|
111 | |||
84 | History of the Project |
|
112 | History of the Project | |
85 | ====================== |
|
113 | ====================== | |
86 |
|
114 | |||
87 | 2016-08-31 - Zstandard 1.0.0 is released and Gregory starts hacking on a |
|
115 | 2016-08-31 - Zstandard 1.0.0 is released and Gregory starts hacking on a | |
88 | Python extension for use by the Mercurial project. A very hacky prototype |
|
116 | Python extension for use by the Mercurial project. A very hacky prototype | |
89 | is sent to the mercurial-devel list for RFC. |
|
117 | is sent to the mercurial-devel list for RFC. | |
90 |
|
118 | |||
91 | 2016-09-03 - Most functionality from Zstandard C API implemented. Source |
|
119 | 2016-09-03 - Most functionality from Zstandard C API implemented. Source | |
92 | code published on https://github.com/indygreg/python-zstandard. Travis-CI |
|
120 | code published on https://github.com/indygreg/python-zstandard. Travis-CI | |
93 | automation configured. 0.0.1 release on PyPI. |
|
121 | automation configured. 0.0.1 release on PyPI. | |
94 |
|
122 | |||
95 | 2016-09-05 - After the API was rounded out a bit and support for Python |
|
123 | 2016-09-05 - After the API was rounded out a bit and support for Python | |
96 | 2.6 and 2.7 was added, version 0.1 was released to PyPI. |
|
124 | 2.6 and 2.7 was added, version 0.1 was released to PyPI. | |
97 |
|
125 | |||
98 | 2016-09-05 - After the compressor and decompressor APIs were changed, 0.2 |
|
126 | 2016-09-05 - After the compressor and decompressor APIs were changed, 0.2 | |
99 | was released to PyPI. |
|
127 | was released to PyPI. | |
100 |
|
128 | |||
101 | 2016-09-10 - 0.3 is released with a bunch of new features. ZstdCompressor |
|
129 | 2016-09-10 - 0.3 is released with a bunch of new features. ZstdCompressor | |
102 | now accepts arguments controlling frame parameters. The source size can now |
|
130 | now accepts arguments controlling frame parameters. The source size can now | |
103 | be declared when performing streaming compression. ZstdDecompressor.decompress() |
|
131 | be declared when performing streaming compression. ZstdDecompressor.decompress() | |
104 | is implemented. Compression dictionaries are now cached when using the simple |
|
132 | is implemented. Compression dictionaries are now cached when using the simple | |
105 | compression and decompression APIs. Memory size APIs added. |
|
133 | compression and decompression APIs. Memory size APIs added. | |
106 | ZstdCompressor.read_from() and ZstdDecompressor.read_from() have been |
|
134 | ZstdCompressor.read_from() and ZstdDecompressor.read_from() have been | |
107 | implemented. This rounds out the major compression/decompression APIs planned |
|
135 | implemented. This rounds out the major compression/decompression APIs planned | |
108 | by the author. |
|
136 | by the author. | |
109 |
|
137 | |||
110 | 2016-10-02 - 0.3.3 is released with a bug fix for read_from not fully |
|
138 | 2016-10-02 - 0.3.3 is released with a bug fix for read_from not fully | |
111 | decoding a zstd frame (issue #2). |
|
139 | decoding a zstd frame (issue #2). | |
112 |
|
140 | |||
113 | 2016-10-02 - 0.4.0 is released with zstd 1.1.0, support for custom read and |
|
141 | 2016-10-02 - 0.4.0 is released with zstd 1.1.0, support for custom read and | |
114 | write buffer sizes, and a few bug fixes involving failure to read/write |
|
142 | write buffer sizes, and a few bug fixes involving failure to read/write | |
115 | all data when buffer sizes were too small to hold remaining data. |
|
143 | all data when buffer sizes were too small to hold remaining data. | |
116 |
|
144 | |||
117 | 2016-11-10 - 0.5.0 is released with zstd 1.1.1 and other enhancements. |
|
145 | 2016-11-10 - 0.5.0 is released with zstd 1.1.1 and other enhancements. |
This diff has been collapsed as it changes many lines, (580 lines changed) Show them Hide them | |||||
@@ -1,943 +1,1393 | |||||
1 | ================ |
|
1 | ================ | |
2 | python-zstandard |
|
2 | python-zstandard | |
3 | ================ |
|
3 | ================ | |
4 |
|
4 | |||
5 | This project provides Python bindings for interfacing with the |
|
5 | This project provides Python bindings for interfacing with the | |
6 | `Zstandard <http://www.zstd.net>`_ compression library. A C extension |
|
6 | `Zstandard <http://www.zstd.net>`_ compression library. A C extension | |
7 | and CFFI interface are provided. |
|
7 | and CFFI interface are provided. | |
8 |
|
8 | |||
9 | The primary goal of the project is to provide a rich interface to the |
|
9 | The primary goal of the project is to provide a rich interface to the | |
10 | underlying C API through a Pythonic interface while not sacrificing |
|
10 | underlying C API through a Pythonic interface while not sacrificing | |
11 | performance. This means exposing most of the features and flexibility |
|
11 | performance. This means exposing most of the features and flexibility | |
12 | of the C API while not sacrificing usability or safety that Python provides. |
|
12 | of the C API while not sacrificing usability or safety that Python provides. | |
13 |
|
13 | |||
14 | The canonical home for this project is |
|
14 | The canonical home for this project is | |
15 | https://github.com/indygreg/python-zstandard. |
|
15 | https://github.com/indygreg/python-zstandard. | |
16 |
|
16 | |||
17 | | |ci-status| |win-ci-status| |
|
17 | | |ci-status| |win-ci-status| | |
18 |
|
18 | |||
19 | State of Project |
|
19 | State of Project | |
20 | ================ |
|
20 | ================ | |
21 |
|
21 | |||
22 | The project is officially in beta state. The author is reasonably satisfied |
|
22 | The project is officially in beta state. The author is reasonably satisfied | |
23 |
|
|
23 | that functionality works as advertised. **There will be some backwards | |
24 | may be some backwards incompatible changes before 1.0. Though the author |
|
24 | incompatible changes before 1.0, probably in the 0.9 release.** This may | |
25 | does not intend to make any major changes to the Python API. |
|
25 | involve renaming the main module from *zstd* to *zstandard* and renaming | |
|
26 | various types and methods. Pin the package version to prevent unwanted | |||
|
27 | breakage when this change occurs! | |||
26 |
|
28 | |||
27 | This project is vendored and distributed with Mercurial 4.1, where it is |
|
29 | This project is vendored and distributed with Mercurial 4.1, where it is | |
28 | used in a production capacity. |
|
30 | used in a production capacity. | |
29 |
|
31 | |||
30 | There is continuous integration for Python versions 2.6, 2.7, and 3.3+ |
|
32 | There is continuous integration for Python versions 2.6, 2.7, and 3.3+ | |
31 | on Linux x86_x64 and Windows x86 and x86_64. The author is reasonably |
|
33 | on Linux x86_x64 and Windows x86 and x86_64. The author is reasonably | |
32 | confident the extension is stable and works as advertised on these |
|
34 | confident the extension is stable and works as advertised on these | |
33 | platforms. |
|
35 | platforms. | |
34 |
|
36 | |||
|
37 | The CFFI bindings are mostly feature complete. Where a feature is implemented | |||
|
38 | in CFFI, unit tests run against both C extension and CFFI implementation to | |||
|
39 | ensure behavior parity. | |||
|
40 | ||||
35 | Expected Changes |
|
41 | Expected Changes | |
36 | ---------------- |
|
42 | ---------------- | |
37 |
|
43 | |||
38 | The author is reasonably confident in the current state of what's |
|
44 | The author is reasonably confident in the current state of what's | |
39 | implemented on the ``ZstdCompressor`` and ``ZstdDecompressor`` types. |
|
45 | implemented on the ``ZstdCompressor`` and ``ZstdDecompressor`` types. | |
40 | Those APIs likely won't change significantly. Some low-level behavior |
|
46 | Those APIs likely won't change significantly. Some low-level behavior | |
41 | (such as naming and types expected by arguments) may change. |
|
47 | (such as naming and types expected by arguments) may change. | |
42 |
|
48 | |||
43 | There will likely be arguments added to control the input and output |
|
49 | There will likely be arguments added to control the input and output | |
44 | buffer sizes (currently, certain operations read and write in chunk |
|
50 | buffer sizes (currently, certain operations read and write in chunk | |
45 | sizes using zstd's preferred defaults). |
|
51 | sizes using zstd's preferred defaults). | |
46 |
|
52 | |||
47 | There should be an API that accepts an object that conforms to the buffer |
|
53 | There should be an API that accepts an object that conforms to the buffer | |
48 | interface and returns an iterator over compressed or decompressed output. |
|
54 | interface and returns an iterator over compressed or decompressed output. | |
49 |
|
55 | |||
|
56 | There should be an API that exposes an ``io.RawIOBase`` interface to | |||
|
57 | compressor and decompressor streams, like how ``gzip.GzipFile`` from | |||
|
58 | the standard library works (issue 13). | |||
|
59 | ||||
50 | The author is on the fence as to whether to support the extremely |
|
60 | The author is on the fence as to whether to support the extremely | |
51 | low level compression and decompression APIs. It could be useful to |
|
61 | low level compression and decompression APIs. It could be useful to | |
52 | support compression without the framing headers. But the author doesn't |
|
62 | support compression without the framing headers. But the author doesn't | |
53 | believe it a high priority at this time. |
|
63 | believe it a high priority at this time. | |
54 |
|
64 | |||
55 | The CFFI bindings are feature complete and all tests run against both |
|
65 | There will likely be a refactoring of the module names. Currently, | |
56 | the C extension and CFFI bindings to ensure behavior parity. |
|
66 | ``zstd`` is a C extension and ``zstd_cffi`` is the CFFI interface. | |
|
67 | This means that all code for the C extension must be implemented in | |||
|
68 | C. ``zstd`` may be converted to a Python module so code can be reused | |||
|
69 | between CFFI and C and so not all code in the C extension has to be C. | |||
57 |
|
70 | |||
58 | Requirements |
|
71 | Requirements | |
59 | ============ |
|
72 | ============ | |
60 |
|
73 | |||
61 | This extension is designed to run with Python 2.6, 2.7, 3.3, 3.4, 3.5, and |
|
74 | This extension is designed to run with Python 2.6, 2.7, 3.3, 3.4, 3.5, and | |
62 | 3.6 on common platforms (Linux, Windows, and OS X). Only x86_64 is |
|
75 | 3.6 on common platforms (Linux, Windows, and OS X). Only x86_64 is | |
63 | currently well-tested as an architecture. |
|
76 | currently well-tested as an architecture. | |
64 |
|
77 | |||
65 | Installing |
|
78 | Installing | |
66 | ========== |
|
79 | ========== | |
67 |
|
80 | |||
68 | This package is uploaded to PyPI at https://pypi.python.org/pypi/zstandard. |
|
81 | This package is uploaded to PyPI at https://pypi.python.org/pypi/zstandard. | |
69 | So, to install this package:: |
|
82 | So, to install this package:: | |
70 |
|
83 | |||
71 | $ pip install zstandard |
|
84 | $ pip install zstandard | |
72 |
|
85 | |||
73 | Binary wheels are made available for some platforms. If you need to |
|
86 | Binary wheels are made available for some platforms. If you need to | |
74 | install from a source distribution, all you should need is a working C |
|
87 | install from a source distribution, all you should need is a working C | |
75 | compiler and the Python development headers/libraries. On many Linux |
|
88 | compiler and the Python development headers/libraries. On many Linux | |
76 | distributions, you can install a ``python-dev`` or ``python-devel`` |
|
89 | distributions, you can install a ``python-dev`` or ``python-devel`` | |
77 | package to provide these dependencies. |
|
90 | package to provide these dependencies. | |
78 |
|
91 | |||
79 | Packages are also uploaded to Anaconda Cloud at |
|
92 | Packages are also uploaded to Anaconda Cloud at | |
80 | https://anaconda.org/indygreg/zstandard. See that URL for how to install |
|
93 | https://anaconda.org/indygreg/zstandard. See that URL for how to install | |
81 | this package with ``conda``. |
|
94 | this package with ``conda``. | |
82 |
|
95 | |||
83 | Performance |
|
96 | Performance | |
84 | =========== |
|
97 | =========== | |
85 |
|
98 | |||
86 | Very crude and non-scientific benchmarking (most benchmarks fall in this |
|
99 | Very crude and non-scientific benchmarking (most benchmarks fall in this | |
87 | category because proper benchmarking is hard) show that the Python bindings |
|
100 | category because proper benchmarking is hard) show that the Python bindings | |
88 | perform within 10% of the native C implementation. |
|
101 | perform within 10% of the native C implementation. | |
89 |
|
102 | |||
90 | The following table compares the performance of compressing and decompressing |
|
103 | The following table compares the performance of compressing and decompressing | |
91 | a 1.1 GB tar file comprised of the files in a Firefox source checkout. Values |
|
104 | a 1.1 GB tar file comprised of the files in a Firefox source checkout. Values | |
92 | obtained with the ``zstd`` program are on the left. The remaining columns detail |
|
105 | obtained with the ``zstd`` program are on the left. The remaining columns detail | |
93 | performance of various compression APIs in the Python bindings. |
|
106 | performance of various compression APIs in the Python bindings. | |
94 |
|
107 | |||
95 | +-------+-----------------+-----------------+-----------------+---------------+ |
|
108 | +-------+-----------------+-----------------+-----------------+---------------+ | |
96 | | Level | Native | Simple | Stream In | Stream Out | |
|
109 | | Level | Native | Simple | Stream In | Stream Out | | |
97 | | | Comp / Decomp | Comp / Decomp | Comp / Decomp | Comp | |
|
110 | | | Comp / Decomp | Comp / Decomp | Comp / Decomp | Comp | | |
98 | +=======+=================+=================+=================+===============+ |
|
111 | +=======+=================+=================+=================+===============+ | |
99 | | 1 | 490 / 1338 MB/s | 458 / 1266 MB/s | 407 / 1156 MB/s | 405 MB/s | |
|
112 | | 1 | 490 / 1338 MB/s | 458 / 1266 MB/s | 407 / 1156 MB/s | 405 MB/s | | |
100 | +-------+-----------------+-----------------+-----------------+---------------+ |
|
113 | +-------+-----------------+-----------------+-----------------+---------------+ | |
101 | | 2 | 412 / 1288 MB/s | 381 / 1203 MB/s | 345 / 1128 MB/s | 349 MB/s | |
|
114 | | 2 | 412 / 1288 MB/s | 381 / 1203 MB/s | 345 / 1128 MB/s | 349 MB/s | | |
102 | +-------+-----------------+-----------------+-----------------+---------------+ |
|
115 | +-------+-----------------+-----------------+-----------------+---------------+ | |
103 | | 3 | 342 / 1312 MB/s | 319 / 1182 MB/s | 285 / 1165 MB/s | 287 MB/s | |
|
116 | | 3 | 342 / 1312 MB/s | 319 / 1182 MB/s | 285 / 1165 MB/s | 287 MB/s | | |
104 | +-------+-----------------+-----------------+-----------------+---------------+ |
|
117 | +-------+-----------------+-----------------+-----------------+---------------+ | |
105 | | 11 | 64 / 1506 MB/s | 66 / 1436 MB/s | 56 / 1342 MB/s | 57 MB/s | |
|
118 | | 11 | 64 / 1506 MB/s | 66 / 1436 MB/s | 56 / 1342 MB/s | 57 MB/s | | |
106 | +-------+-----------------+-----------------+-----------------+---------------+ |
|
119 | +-------+-----------------+-----------------+-----------------+---------------+ | |
107 |
|
120 | |||
108 | Again, these are very unscientific. But it shows that Python is capable of |
|
121 | Again, these are very unscientific. But it shows that Python is capable of | |
109 | compressing at several hundred MB/s and decompressing at over 1 GB/s. |
|
122 | compressing at several hundred MB/s and decompressing at over 1 GB/s. | |
110 |
|
123 | |||
111 | Comparison to Other Python Bindings |
|
124 | Comparison to Other Python Bindings | |
112 | =================================== |
|
125 | =================================== | |
113 |
|
126 | |||
114 | https://pypi.python.org/pypi/zstd is an alternate Python binding to |
|
127 | https://pypi.python.org/pypi/zstd is an alternate Python binding to | |
115 | Zstandard. At the time this was written, the latest release of that |
|
128 | Zstandard. At the time this was written, the latest release of that | |
116 | package (1.1.2) only exposed the simple APIs for compression and decompression. |
|
129 | package (1.1.2) only exposed the simple APIs for compression and decompression. | |
117 | This package exposes much more of the zstd API, including streaming and |
|
130 | This package exposes much more of the zstd API, including streaming and | |
118 | dictionary compression. This package also has CFFI support. |
|
131 | dictionary compression. This package also has CFFI support. | |
119 |
|
132 | |||
120 | Bundling of Zstandard Source Code |
|
133 | Bundling of Zstandard Source Code | |
121 | ================================= |
|
134 | ================================= | |
122 |
|
135 | |||
123 | The source repository for this project contains a vendored copy of the |
|
136 | The source repository for this project contains a vendored copy of the | |
124 | Zstandard source code. This is done for a few reasons. |
|
137 | Zstandard source code. This is done for a few reasons. | |
125 |
|
138 | |||
126 | First, Zstandard is relatively new and not yet widely available as a system |
|
139 | First, Zstandard is relatively new and not yet widely available as a system | |
127 | package. Providing a copy of the source code enables the Python C extension |
|
140 | package. Providing a copy of the source code enables the Python C extension | |
128 | to be compiled without requiring the user to obtain the Zstandard source code |
|
141 | to be compiled without requiring the user to obtain the Zstandard source code | |
129 | separately. |
|
142 | separately. | |
130 |
|
143 | |||
131 | Second, Zstandard has both a stable *public* API and an *experimental* API. |
|
144 | Second, Zstandard has both a stable *public* API and an *experimental* API. | |
132 | The *experimental* API is actually quite useful (contains functionality for |
|
145 | The *experimental* API is actually quite useful (contains functionality for | |
133 | training dictionaries for example), so it is something we wish to expose to |
|
146 | training dictionaries for example), so it is something we wish to expose to | |
134 | Python. However, the *experimental* API is only available via static linking. |
|
147 | Python. However, the *experimental* API is only available via static linking. | |
135 | Furthermore, the *experimental* API can change at any time. So, control over |
|
148 | Furthermore, the *experimental* API can change at any time. So, control over | |
136 | the exact version of the Zstandard library linked against is important to |
|
149 | the exact version of the Zstandard library linked against is important to | |
137 | ensure known behavior. |
|
150 | ensure known behavior. | |
138 |
|
151 | |||
139 | Instructions for Building and Testing |
|
152 | Instructions for Building and Testing | |
140 | ===================================== |
|
153 | ===================================== | |
141 |
|
154 | |||
142 | Once you have the source code, the extension can be built via setup.py:: |
|
155 | Once you have the source code, the extension can be built via setup.py:: | |
143 |
|
156 | |||
144 | $ python setup.py build_ext |
|
157 | $ python setup.py build_ext | |
145 |
|
158 | |||
146 | We recommend testing with ``nose``:: |
|
159 | We recommend testing with ``nose``:: | |
147 |
|
160 | |||
148 | $ nosetests |
|
161 | $ nosetests | |
149 |
|
162 | |||
150 | A Tox configuration is present to test against multiple Python versions:: |
|
163 | A Tox configuration is present to test against multiple Python versions:: | |
151 |
|
164 | |||
152 | $ tox |
|
165 | $ tox | |
153 |
|
166 | |||
154 | Tests use the ``hypothesis`` Python package to perform fuzzing. If you |
|
167 | Tests use the ``hypothesis`` Python package to perform fuzzing. If you | |
155 | don't have it, those tests won't run. |
|
168 | don't have it, those tests won't run. Since the fuzzing tests take longer | |
|
169 | to execute than normal tests, you'll need to opt in to running them by | |||
|
170 | setting the ``ZSTD_SLOW_TESTS`` environment variable. This is set | |||
|
171 | automatically when using ``tox``. | |||
156 |
|
172 | |||
157 | There is also an experimental CFFI module. You need the ``cffi`` Python |
|
173 | The ``cffi`` Python package needs to be installed in order to build the CFFI | |
158 | package installed to build and test that. |
|
174 | bindings. If it isn't present, the CFFI bindings won't be built. | |
159 |
|
175 | |||
160 | To create a virtualenv with all development dependencies, do something |
|
176 | To create a virtualenv with all development dependencies, do something | |
161 | like the following:: |
|
177 | like the following:: | |
162 |
|
178 | |||
163 | # Python 2 |
|
179 | # Python 2 | |
164 | $ virtualenv venv |
|
180 | $ virtualenv venv | |
165 |
|
181 | |||
166 | # Python 3 |
|
182 | # Python 3 | |
167 | $ python3 -m venv venv |
|
183 | $ python3 -m venv venv | |
168 |
|
184 | |||
169 | $ source venv/bin/activate |
|
185 | $ source venv/bin/activate | |
170 | $ pip install cffi hypothesis nose tox |
|
186 | $ pip install cffi hypothesis nose tox | |
171 |
|
187 | |||
172 | API |
|
188 | API | |
173 | === |
|
189 | === | |
174 |
|
190 | |||
175 |
The compiled C extension provides a ``zstd`` Python module. Th |
|
191 | The compiled C extension provides a ``zstd`` Python module. The CFFI | |
176 | exposes the following interfaces. |
|
192 | bindings provide a ``zstd_cffi`` module. Both provide an identical API | |
|
193 | interface. The types, functions, and attributes exposed by these modules | |||
|
194 | are documented in the sections below. | |||
|
195 | ||||
|
196 | .. note:: | |||
|
197 | ||||
|
198 | The documentation in this section makes references to various zstd | |||
|
199 | concepts and functionality. The ``Concepts`` section below explains | |||
|
200 | these concepts in more detail. | |||
177 |
|
201 | |||
178 | ZstdCompressor |
|
202 | ZstdCompressor | |
179 | -------------- |
|
203 | -------------- | |
180 |
|
204 | |||
181 | The ``ZstdCompressor`` class provides an interface for performing |
|
205 | The ``ZstdCompressor`` class provides an interface for performing | |
182 | compression operations. |
|
206 | compression operations. | |
183 |
|
207 | |||
184 | Each instance is associated with parameters that control compression |
|
208 | Each instance is associated with parameters that control compression | |
185 | behavior. These come from the following named arguments (all optional): |
|
209 | behavior. These come from the following named arguments (all optional): | |
186 |
|
210 | |||
187 | level |
|
211 | level | |
188 | Integer compression level. Valid values are between 1 and 22. |
|
212 | Integer compression level. Valid values are between 1 and 22. | |
189 | dict_data |
|
213 | dict_data | |
190 | Compression dictionary to use. |
|
214 | Compression dictionary to use. | |
191 |
|
215 | |||
192 | Note: When using dictionary data and ``compress()`` is called multiple |
|
216 | Note: When using dictionary data and ``compress()`` is called multiple | |
193 | times, the ``CompressionParameters`` derived from an integer compression |
|
217 | times, the ``CompressionParameters`` derived from an integer compression | |
194 | ``level`` and the first compressed data's size will be reused for all |
|
218 | ``level`` and the first compressed data's size will be reused for all | |
195 | subsequent operations. This may not be desirable if source data size |
|
219 | subsequent operations. This may not be desirable if source data size | |
196 | varies significantly. |
|
220 | varies significantly. | |
197 | compression_params |
|
221 | compression_params | |
198 | A ``CompressionParameters`` instance (overrides the ``level`` value). |
|
222 | A ``CompressionParameters`` instance (overrides the ``level`` value). | |
199 | write_checksum |
|
223 | write_checksum | |
200 | Whether a 4 byte checksum should be written with the compressed data. |
|
224 | Whether a 4 byte checksum should be written with the compressed data. | |
201 | Defaults to False. If True, the decompressor can verify that decompressed |
|
225 | Defaults to False. If True, the decompressor can verify that decompressed | |
202 | data matches the original input data. |
|
226 | data matches the original input data. | |
203 | write_content_size |
|
227 | write_content_size | |
204 | Whether the size of the uncompressed data will be written into the |
|
228 | Whether the size of the uncompressed data will be written into the | |
205 | header of compressed data. Defaults to False. The data will only be |
|
229 | header of compressed data. Defaults to False. The data will only be | |
206 | written if the compressor knows the size of the input data. This is |
|
230 | written if the compressor knows the size of the input data. This is | |
207 | likely not true for streaming compression. |
|
231 | likely not true for streaming compression. | |
208 | write_dict_id |
|
232 | write_dict_id | |
209 | Whether to write the dictionary ID into the compressed data. |
|
233 | Whether to write the dictionary ID into the compressed data. | |
210 | Defaults to True. The dictionary ID is only written if a dictionary |
|
234 | Defaults to True. The dictionary ID is only written if a dictionary | |
211 | is being used. |
|
235 | is being used. | |
|
236 | threads | |||
|
237 | Enables and sets the number of threads to use for multi-threaded compression | |||
|
238 | operations. Defaults to 0, which means to use single-threaded compression. | |||
|
239 | Negative values will resolve to the number of logical CPUs in the system. | |||
|
240 | Read below for more info on multi-threaded compression. This argument only | |||
|
241 | controls thread count for operations that operate on individual pieces of | |||
|
242 | data. APIs that spawn multiple threads for working on multiple pieces of | |||
|
243 | data have their own ``threads`` argument. | |||
212 |
|
244 | |||
213 | Unless specified otherwise, assume that no two methods of ``ZstdCompressor`` |
|
245 | Unless specified otherwise, assume that no two methods of ``ZstdCompressor`` | |
214 | instances can be called from multiple Python threads simultaneously. In other |
|
246 | instances can be called from multiple Python threads simultaneously. In other | |
215 | words, assume instances are not thread safe unless stated otherwise. |
|
247 | words, assume instances are not thread safe unless stated otherwise. | |
216 |
|
248 | |||
217 | Simple API |
|
249 | Simple API | |
218 | ^^^^^^^^^^ |
|
250 | ^^^^^^^^^^ | |
219 |
|
251 | |||
220 | ``compress(data)`` compresses and returns data as a one-shot operation.:: |
|
252 | ``compress(data)`` compresses and returns data as a one-shot operation.:: | |
221 |
|
253 | |||
222 | cctx = zstd.ZstdCompressor() |
|
254 | cctx = zstd.ZstdCompressor() | |
223 | compressed = cctx.compress(b'data to compress') |
|
255 | compressed = cctx.compress(b'data to compress') | |
224 |
|
256 | |||
|
257 | The ``data`` argument can be any object that implements the *buffer protocol*. | |||
|
258 | ||||
225 | Unless ``compression_params`` or ``dict_data`` are passed to the |
|
259 | Unless ``compression_params`` or ``dict_data`` are passed to the | |
226 | ``ZstdCompressor``, each invocation of ``compress()`` will calculate the |
|
260 | ``ZstdCompressor``, each invocation of ``compress()`` will calculate the | |
227 | optimal compression parameters for the configured compression ``level`` and |
|
261 | optimal compression parameters for the configured compression ``level`` and | |
228 | input data size (some parameters are fine-tuned for small input sizes). |
|
262 | input data size (some parameters are fine-tuned for small input sizes). | |
229 |
|
263 | |||
230 | If a compression dictionary is being used, the compression parameters |
|
264 | If a compression dictionary is being used, the compression parameters | |
231 | determined from the first input's size will be reused for subsequent |
|
265 | determined from the first input's size will be reused for subsequent | |
232 | operations. |
|
266 | operations. | |
233 |
|
267 | |||
234 | There is currently a deficiency in zstd's C APIs that makes it difficult |
|
268 | There is currently a deficiency in zstd's C APIs that makes it difficult | |
235 | to round trip empty inputs when ``write_content_size=True``. Attempting |
|
269 | to round trip empty inputs when ``write_content_size=True``. Attempting | |
236 | this will raise a ``ValueError`` unless ``allow_empty=True`` is passed |
|
270 | this will raise a ``ValueError`` unless ``allow_empty=True`` is passed | |
237 | to ``compress()``. |
|
271 | to ``compress()``. | |
238 |
|
272 | |||
239 | Streaming Input API |
|
273 | Streaming Input API | |
240 | ^^^^^^^^^^^^^^^^^^^ |
|
274 | ^^^^^^^^^^^^^^^^^^^ | |
241 |
|
275 | |||
242 | ``write_to(fh)`` (which behaves as a context manager) allows you to *stream* |
|
276 | ``write_to(fh)`` (which behaves as a context manager) allows you to *stream* | |
243 | data into a compressor.:: |
|
277 | data into a compressor.:: | |
244 |
|
278 | |||
245 | cctx = zstd.ZstdCompressor(level=10) |
|
279 | cctx = zstd.ZstdCompressor(level=10) | |
246 | with cctx.write_to(fh) as compressor: |
|
280 | with cctx.write_to(fh) as compressor: | |
247 | compressor.write(b'chunk 0') |
|
281 | compressor.write(b'chunk 0') | |
248 | compressor.write(b'chunk 1') |
|
282 | compressor.write(b'chunk 1') | |
249 | ... |
|
283 | ... | |
250 |
|
284 | |||
251 | The argument to ``write_to()`` must have a ``write(data)`` method. As |
|
285 | The argument to ``write_to()`` must have a ``write(data)`` method. As | |
252 | compressed data is available, ``write()`` will be called with the compressed |
|
286 | compressed data is available, ``write()`` will be called with the compressed | |
253 | data as its argument. Many common Python types implement ``write()``, including |
|
287 | data as its argument. Many common Python types implement ``write()``, including | |
254 | open file handles and ``io.BytesIO``. |
|
288 | open file handles and ``io.BytesIO``. | |
255 |
|
289 | |||
256 | ``write_to()`` returns an object representing a streaming compressor instance. |
|
290 | ``write_to()`` returns an object representing a streaming compressor instance. | |
257 | It **must** be used as a context manager. That object's ``write(data)`` method |
|
291 | It **must** be used as a context manager. That object's ``write(data)`` method | |
258 | is used to feed data into the compressor. |
|
292 | is used to feed data into the compressor. | |
259 |
|
293 | |||
260 | A ``flush()`` method can be called to evict whatever data remains within the |
|
294 | A ``flush()`` method can be called to evict whatever data remains within the | |
261 | compressor's internal state into the output object. This may result in 0 or |
|
295 | compressor's internal state into the output object. This may result in 0 or | |
262 | more ``write()`` calls to the output object. |
|
296 | more ``write()`` calls to the output object. | |
263 |
|
297 | |||
264 | Both ``write()`` and ``flush()`` return the number of bytes written to the |
|
298 | Both ``write()`` and ``flush()`` return the number of bytes written to the | |
265 | object's ``write()``. In many cases, small inputs do not accumulate enough |
|
299 | object's ``write()``. In many cases, small inputs do not accumulate enough | |
266 | data to cause a write and ``write()`` will return ``0``. |
|
300 | data to cause a write and ``write()`` will return ``0``. | |
267 |
|
301 | |||
268 | If the size of the data being fed to this streaming compressor is known, |
|
302 | If the size of the data being fed to this streaming compressor is known, | |
269 | you can declare it before compression begins:: |
|
303 | you can declare it before compression begins:: | |
270 |
|
304 | |||
271 | cctx = zstd.ZstdCompressor() |
|
305 | cctx = zstd.ZstdCompressor() | |
272 | with cctx.write_to(fh, size=data_len) as compressor: |
|
306 | with cctx.write_to(fh, size=data_len) as compressor: | |
273 | compressor.write(chunk0) |
|
307 | compressor.write(chunk0) | |
274 | compressor.write(chunk1) |
|
308 | compressor.write(chunk1) | |
275 | ... |
|
309 | ... | |
276 |
|
310 | |||
277 | Declaring the size of the source data allows compression parameters to |
|
311 | Declaring the size of the source data allows compression parameters to | |
278 | be tuned. And if ``write_content_size`` is used, it also results in the |
|
312 | be tuned. And if ``write_content_size`` is used, it also results in the | |
279 | content size being written into the frame header of the output data. |
|
313 | content size being written into the frame header of the output data. | |
280 |
|
314 | |||
281 | The size of chunks being ``write()`` to the destination can be specified:: |
|
315 | The size of chunks being ``write()`` to the destination can be specified:: | |
282 |
|
316 | |||
283 | cctx = zstd.ZstdCompressor() |
|
317 | cctx = zstd.ZstdCompressor() | |
284 | with cctx.write_to(fh, write_size=32768) as compressor: |
|
318 | with cctx.write_to(fh, write_size=32768) as compressor: | |
285 | ... |
|
319 | ... | |
286 |
|
320 | |||
287 | To see how much memory is being used by the streaming compressor:: |
|
321 | To see how much memory is being used by the streaming compressor:: | |
288 |
|
322 | |||
289 | cctx = zstd.ZstdCompressor() |
|
323 | cctx = zstd.ZstdCompressor() | |
290 | with cctx.write_to(fh) as compressor: |
|
324 | with cctx.write_to(fh) as compressor: | |
291 | ... |
|
325 | ... | |
292 | byte_size = compressor.memory_size() |
|
326 | byte_size = compressor.memory_size() | |
293 |
|
327 | |||
294 | Streaming Output API |
|
328 | Streaming Output API | |
295 | ^^^^^^^^^^^^^^^^^^^^ |
|
329 | ^^^^^^^^^^^^^^^^^^^^ | |
296 |
|
330 | |||
297 | ``read_from(reader)`` provides a mechanism to stream data out of a compressor |
|
331 | ``read_from(reader)`` provides a mechanism to stream data out of a compressor | |
298 | as an iterator of data chunks.:: |
|
332 | as an iterator of data chunks.:: | |
299 |
|
333 | |||
300 | cctx = zstd.ZstdCompressor() |
|
334 | cctx = zstd.ZstdCompressor() | |
301 | for chunk in cctx.read_from(fh): |
|
335 | for chunk in cctx.read_from(fh): | |
302 | # Do something with emitted data. |
|
336 | # Do something with emitted data. | |
303 |
|
337 | |||
304 | ``read_from()`` accepts an object that has a ``read(size)`` method or conforms |
|
338 | ``read_from()`` accepts an object that has a ``read(size)`` method or conforms | |
305 | to the buffer protocol. (``bytes`` and ``memoryview`` are 2 common types that |
|
339 | to the buffer protocol. (``bytes`` and ``memoryview`` are 2 common types that | |
306 | provide the buffer protocol.) |
|
340 | provide the buffer protocol.) | |
307 |
|
341 | |||
308 | Uncompressed data is fetched from the source either by calling ``read(size)`` |
|
342 | Uncompressed data is fetched from the source either by calling ``read(size)`` | |
309 | or by fetching a slice of data from the object directly (in the case where |
|
343 | or by fetching a slice of data from the object directly (in the case where | |
310 | the buffer protocol is being used). The returned iterator consists of chunks |
|
344 | the buffer protocol is being used). The returned iterator consists of chunks | |
311 | of compressed data. |
|
345 | of compressed data. | |
312 |
|
346 | |||
313 | If reading from the source via ``read()``, ``read()`` will be called until |
|
347 | If reading from the source via ``read()``, ``read()`` will be called until | |
314 | it raises or returns an empty bytes (``b''``). It is perfectly valid for |
|
348 | it raises or returns an empty bytes (``b''``). It is perfectly valid for | |
315 | the source to deliver fewer bytes than were what requested by ``read(size)``. |
|
349 | the source to deliver fewer bytes than were what requested by ``read(size)``. | |
316 |
|
350 | |||
317 | Like ``write_to()``, ``read_from()`` also accepts a ``size`` argument |
|
351 | Like ``write_to()``, ``read_from()`` also accepts a ``size`` argument | |
318 | declaring the size of the input stream:: |
|
352 | declaring the size of the input stream:: | |
319 |
|
353 | |||
320 | cctx = zstd.ZstdCompressor() |
|
354 | cctx = zstd.ZstdCompressor() | |
321 | for chunk in cctx.read_from(fh, size=some_int): |
|
355 | for chunk in cctx.read_from(fh, size=some_int): | |
322 | pass |
|
356 | pass | |
323 |
|
357 | |||
324 | You can also control the size that data is ``read()`` from the source and |
|
358 | You can also control the size that data is ``read()`` from the source and | |
325 | the ideal size of output chunks:: |
|
359 | the ideal size of output chunks:: | |
326 |
|
360 | |||
327 | cctx = zstd.ZstdCompressor() |
|
361 | cctx = zstd.ZstdCompressor() | |
328 | for chunk in cctx.read_from(fh, read_size=16384, write_size=8192): |
|
362 | for chunk in cctx.read_from(fh, read_size=16384, write_size=8192): | |
329 | pass |
|
363 | pass | |
330 |
|
364 | |||
331 | Unlike ``write_to()``, ``read_from()`` does not give direct control over the |
|
365 | Unlike ``write_to()``, ``read_from()`` does not give direct control over the | |
332 | sizes of chunks fed into the compressor. Instead, chunk sizes will be whatever |
|
366 | sizes of chunks fed into the compressor. Instead, chunk sizes will be whatever | |
333 | the object being read from delivers. These will often be of a uniform size. |
|
367 | the object being read from delivers. These will often be of a uniform size. | |
334 |
|
368 | |||
335 | Stream Copying API |
|
369 | Stream Copying API | |
336 | ^^^^^^^^^^^^^^^^^^ |
|
370 | ^^^^^^^^^^^^^^^^^^ | |
337 |
|
371 | |||
338 | ``copy_stream(ifh, ofh)`` can be used to copy data between 2 streams while |
|
372 | ``copy_stream(ifh, ofh)`` can be used to copy data between 2 streams while | |
339 | compressing it.:: |
|
373 | compressing it.:: | |
340 |
|
374 | |||
341 | cctx = zstd.ZstdCompressor() |
|
375 | cctx = zstd.ZstdCompressor() | |
342 | cctx.copy_stream(ifh, ofh) |
|
376 | cctx.copy_stream(ifh, ofh) | |
343 |
|
377 | |||
344 | For example, say you wish to compress a file:: |
|
378 | For example, say you wish to compress a file:: | |
345 |
|
379 | |||
346 | cctx = zstd.ZstdCompressor() |
|
380 | cctx = zstd.ZstdCompressor() | |
347 | with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh: |
|
381 | with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh: | |
348 | cctx.copy_stream(ifh, ofh) |
|
382 | cctx.copy_stream(ifh, ofh) | |
349 |
|
383 | |||
350 | It is also possible to declare the size of the source stream:: |
|
384 | It is also possible to declare the size of the source stream:: | |
351 |
|
385 | |||
352 | cctx = zstd.ZstdCompressor() |
|
386 | cctx = zstd.ZstdCompressor() | |
353 | cctx.copy_stream(ifh, ofh, size=len_of_input) |
|
387 | cctx.copy_stream(ifh, ofh, size=len_of_input) | |
354 |
|
388 | |||
355 | You can also specify how large the chunks that are ``read()`` and ``write()`` |
|
389 | You can also specify how large the chunks that are ``read()`` and ``write()`` | |
356 | from and to the streams:: |
|
390 | from and to the streams:: | |
357 |
|
391 | |||
358 | cctx = zstd.ZstdCompressor() |
|
392 | cctx = zstd.ZstdCompressor() | |
359 | cctx.copy_stream(ifh, ofh, read_size=32768, write_size=16384) |
|
393 | cctx.copy_stream(ifh, ofh, read_size=32768, write_size=16384) | |
360 |
|
394 | |||
361 | The stream copier returns a 2-tuple of bytes read and written:: |
|
395 | The stream copier returns a 2-tuple of bytes read and written:: | |
362 |
|
396 | |||
363 | cctx = zstd.ZstdCompressor() |
|
397 | cctx = zstd.ZstdCompressor() | |
364 | read_count, write_count = cctx.copy_stream(ifh, ofh) |
|
398 | read_count, write_count = cctx.copy_stream(ifh, ofh) | |
365 |
|
399 | |||
366 | Compressor API |
|
400 | Compressor API | |
367 | ^^^^^^^^^^^^^^ |
|
401 | ^^^^^^^^^^^^^^ | |
368 |
|
402 | |||
369 | ``compressobj()`` returns an object that exposes ``compress(data)`` and |
|
403 | ``compressobj()`` returns an object that exposes ``compress(data)`` and | |
370 | ``flush()`` methods. Each returns compressed data or an empty bytes. |
|
404 | ``flush()`` methods. Each returns compressed data or an empty bytes. | |
371 |
|
405 | |||
372 | The purpose of ``compressobj()`` is to provide an API-compatible interface |
|
406 | The purpose of ``compressobj()`` is to provide an API-compatible interface | |
373 | with ``zlib.compressobj`` and ``bz2.BZ2Compressor``. This allows callers to |
|
407 | with ``zlib.compressobj`` and ``bz2.BZ2Compressor``. This allows callers to | |
374 | swap in different compressor objects while using the same API. |
|
408 | swap in different compressor objects while using the same API. | |
375 |
|
409 | |||
376 | ``flush()`` accepts an optional argument indicating how to end the stream. |
|
410 | ``flush()`` accepts an optional argument indicating how to end the stream. | |
377 | ``zstd.COMPRESSOBJ_FLUSH_FINISH`` (the default) ends the compression stream. |
|
411 | ``zstd.COMPRESSOBJ_FLUSH_FINISH`` (the default) ends the compression stream. | |
378 | Once this type of flush is performed, ``compress()`` and ``flush()`` can |
|
412 | Once this type of flush is performed, ``compress()`` and ``flush()`` can | |
379 | no longer be called. This type of flush **must** be called to end the |
|
413 | no longer be called. This type of flush **must** be called to end the | |
380 | compression context. If not called, returned data may be incomplete. |
|
414 | compression context. If not called, returned data may be incomplete. | |
381 |
|
415 | |||
382 | A ``zstd.COMPRESSOBJ_FLUSH_BLOCK`` argument to ``flush()`` will flush a |
|
416 | A ``zstd.COMPRESSOBJ_FLUSH_BLOCK`` argument to ``flush()`` will flush a | |
383 | zstd block. Flushes of this type can be performed multiple times. The next |
|
417 | zstd block. Flushes of this type can be performed multiple times. The next | |
384 | call to ``compress()`` will begin a new zstd block. |
|
418 | call to ``compress()`` will begin a new zstd block. | |
385 |
|
419 | |||
386 | Here is how this API should be used:: |
|
420 | Here is how this API should be used:: | |
387 |
|
421 | |||
388 | cctx = zstd.ZstdCompressor() |
|
422 | cctx = zstd.ZstdCompressor() | |
389 | cobj = cctx.compressobj() |
|
423 | cobj = cctx.compressobj() | |
390 | data = cobj.compress(b'raw input 0') |
|
424 | data = cobj.compress(b'raw input 0') | |
391 | data = cobj.compress(b'raw input 1') |
|
425 | data = cobj.compress(b'raw input 1') | |
392 | data = cobj.flush() |
|
426 | data = cobj.flush() | |
393 |
|
427 | |||
394 | Or to flush blocks:: |
|
428 | Or to flush blocks:: | |
395 |
|
429 | |||
396 | cctx.zstd.ZstdCompressor() |
|
430 | cctx.zstd.ZstdCompressor() | |
397 | cobj = cctx.compressobj() |
|
431 | cobj = cctx.compressobj() | |
398 | data = cobj.compress(b'chunk in first block') |
|
432 | data = cobj.compress(b'chunk in first block') | |
399 | data = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK) |
|
433 | data = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK) | |
400 | data = cobj.compress(b'chunk in second block') |
|
434 | data = cobj.compress(b'chunk in second block') | |
401 | data = cobj.flush() |
|
435 | data = cobj.flush() | |
402 |
|
436 | |||
403 | For best performance results, keep input chunks under 256KB. This avoids |
|
437 | For best performance results, keep input chunks under 256KB. This avoids | |
404 | extra allocations for a large output object. |
|
438 | extra allocations for a large output object. | |
405 |
|
439 | |||
406 | It is possible to declare the input size of the data that will be fed into |
|
440 | It is possible to declare the input size of the data that will be fed into | |
407 | the compressor:: |
|
441 | the compressor:: | |
408 |
|
442 | |||
409 | cctx = zstd.ZstdCompressor() |
|
443 | cctx = zstd.ZstdCompressor() | |
410 | cobj = cctx.compressobj(size=6) |
|
444 | cobj = cctx.compressobj(size=6) | |
411 | data = cobj.compress(b'foobar') |
|
445 | data = cobj.compress(b'foobar') | |
412 | data = cobj.flush() |
|
446 | data = cobj.flush() | |
413 |
|
447 | |||
|
448 | Batch Compression API | |||
|
449 | ^^^^^^^^^^^^^^^^^^^^^ | |||
|
450 | ||||
|
451 | (Experimental. Not yet supported in CFFI bindings.) | |||
|
452 | ||||
|
453 | ``multi_compress_to_buffer(data, [threads=0])`` performs compression of multiple | |||
|
454 | inputs as a single operation. | |||
|
455 | ||||
|
456 | Data to be compressed can be passed as a ``BufferWithSegmentsCollection``, a | |||
|
457 | ``BufferWithSegments``, or a list containing byte like objects. Each element of | |||
|
458 | the container will be compressed individually using the configured parameters | |||
|
459 | on the ``ZstdCompressor`` instance. | |||
|
460 | ||||
|
461 | The ``threads`` argument controls how many threads to use for compression. The | |||
|
462 | default is ``0`` which means to use a single thread. Negative values use the | |||
|
463 | number of logical CPUs in the machine. | |||
|
464 | ||||
|
465 | The function returns a ``BufferWithSegmentsCollection``. This type represents | |||
|
466 | N discrete memory allocations, eaching holding 1 or more compressed frames. | |||
|
467 | ||||
|
468 | Output data is written to shared memory buffers. This means that unlike | |||
|
469 | regular Python objects, a reference to *any* object within the collection | |||
|
470 | keeps the shared buffer and therefore memory backing it alive. This can have | |||
|
471 | undesirable effects on process memory usage. | |||
|
472 | ||||
|
473 | The API and behavior of this function is experimental and will likely change. | |||
|
474 | Known deficiencies include: | |||
|
475 | ||||
|
476 | * If asked to use multiple threads, it will always spawn that many threads, | |||
|
477 | even if the input is too small to use them. It should automatically lower | |||
|
478 | the thread count when the extra threads would just add overhead. | |||
|
479 | * The buffer allocation strategy is fixed. There is room to make it dynamic, | |||
|
480 | perhaps even to allow one output buffer per input, facilitating a variation | |||
|
481 | of the API to return a list without the adverse effects of shared memory | |||
|
482 | buffers. | |||
|
483 | ||||
414 | ZstdDecompressor |
|
484 | ZstdDecompressor | |
415 | ---------------- |
|
485 | ---------------- | |
416 |
|
486 | |||
417 | The ``ZstdDecompressor`` class provides an interface for performing |
|
487 | The ``ZstdDecompressor`` class provides an interface for performing | |
418 | decompression. |
|
488 | decompression. | |
419 |
|
489 | |||
420 | Each instance is associated with parameters that control decompression. These |
|
490 | Each instance is associated with parameters that control decompression. These | |
421 | come from the following named arguments (all optional): |
|
491 | come from the following named arguments (all optional): | |
422 |
|
492 | |||
423 | dict_data |
|
493 | dict_data | |
424 | Compression dictionary to use. |
|
494 | Compression dictionary to use. | |
425 |
|
495 | |||
426 | The interface of this class is very similar to ``ZstdCompressor`` (by design). |
|
496 | The interface of this class is very similar to ``ZstdCompressor`` (by design). | |
427 |
|
497 | |||
428 | Unless specified otherwise, assume that no two methods of ``ZstdDecompressor`` |
|
498 | Unless specified otherwise, assume that no two methods of ``ZstdDecompressor`` | |
429 | instances can be called from multiple Python threads simultaneously. In other |
|
499 | instances can be called from multiple Python threads simultaneously. In other | |
430 | words, assume instances are not thread safe unless stated otherwise. |
|
500 | words, assume instances are not thread safe unless stated otherwise. | |
431 |
|
501 | |||
432 | Simple API |
|
502 | Simple API | |
433 | ^^^^^^^^^^ |
|
503 | ^^^^^^^^^^ | |
434 |
|
504 | |||
435 | ``decompress(data)`` can be used to decompress an entire compressed zstd |
|
505 | ``decompress(data)`` can be used to decompress an entire compressed zstd | |
436 | frame in a single operation.:: |
|
506 | frame in a single operation.:: | |
437 |
|
507 | |||
438 | dctx = zstd.ZstdDecompressor() |
|
508 | dctx = zstd.ZstdDecompressor() | |
439 | decompressed = dctx.decompress(data) |
|
509 | decompressed = dctx.decompress(data) | |
440 |
|
510 | |||
441 | By default, ``decompress(data)`` will only work on data written with the content |
|
511 | By default, ``decompress(data)`` will only work on data written with the content | |
442 | size encoded in its header. This can be achieved by creating a |
|
512 | size encoded in its header. This can be achieved by creating a | |
443 | ``ZstdCompressor`` with ``write_content_size=True``. If compressed data without |
|
513 | ``ZstdCompressor`` with ``write_content_size=True``. If compressed data without | |
444 | an embedded content size is seen, ``zstd.ZstdError`` will be raised. |
|
514 | an embedded content size is seen, ``zstd.ZstdError`` will be raised. | |
445 |
|
515 | |||
446 | If the compressed data doesn't have its content size embedded within it, |
|
516 | If the compressed data doesn't have its content size embedded within it, | |
447 | decompression can be attempted by specifying the ``max_output_size`` |
|
517 | decompression can be attempted by specifying the ``max_output_size`` | |
448 | argument.:: |
|
518 | argument.:: | |
449 |
|
519 | |||
450 | dctx = zstd.ZstdDecompressor() |
|
520 | dctx = zstd.ZstdDecompressor() | |
451 | uncompressed = dctx.decompress(data, max_output_size=1048576) |
|
521 | uncompressed = dctx.decompress(data, max_output_size=1048576) | |
452 |
|
522 | |||
453 | Ideally, ``max_output_size`` will be identical to the decompressed output |
|
523 | Ideally, ``max_output_size`` will be identical to the decompressed output | |
454 | size. |
|
524 | size. | |
455 |
|
525 | |||
456 | If ``max_output_size`` is too small to hold the decompressed data, |
|
526 | If ``max_output_size`` is too small to hold the decompressed data, | |
457 | ``zstd.ZstdError`` will be raised. |
|
527 | ``zstd.ZstdError`` will be raised. | |
458 |
|
528 | |||
459 | If ``max_output_size`` is larger than the decompressed data, the allocated |
|
529 | If ``max_output_size`` is larger than the decompressed data, the allocated | |
460 | output buffer will be resized to only use the space required. |
|
530 | output buffer will be resized to only use the space required. | |
461 |
|
531 | |||
462 | Please note that an allocation of the requested ``max_output_size`` will be |
|
532 | Please note that an allocation of the requested ``max_output_size`` will be | |
463 | performed every time the method is called. Setting to a very large value could |
|
533 | performed every time the method is called. Setting to a very large value could | |
464 | result in a lot of work for the memory allocator and may result in |
|
534 | result in a lot of work for the memory allocator and may result in | |
465 | ``MemoryError`` being raised if the allocation fails. |
|
535 | ``MemoryError`` being raised if the allocation fails. | |
466 |
|
536 | |||
467 | If the exact size of decompressed data is unknown, it is **strongly** |
|
537 | If the exact size of decompressed data is unknown, it is **strongly** | |
468 | recommended to use a streaming API. |
|
538 | recommended to use a streaming API. | |
469 |
|
539 | |||
470 | Streaming Input API |
|
540 | Streaming Input API | |
471 | ^^^^^^^^^^^^^^^^^^^ |
|
541 | ^^^^^^^^^^^^^^^^^^^ | |
472 |
|
542 | |||
473 | ``write_to(fh)`` can be used to incrementally send compressed data to a |
|
543 | ``write_to(fh)`` can be used to incrementally send compressed data to a | |
474 | decompressor.:: |
|
544 | decompressor.:: | |
475 |
|
545 | |||
476 | dctx = zstd.ZstdDecompressor() |
|
546 | dctx = zstd.ZstdDecompressor() | |
477 | with dctx.write_to(fh) as decompressor: |
|
547 | with dctx.write_to(fh) as decompressor: | |
478 | decompressor.write(compressed_data) |
|
548 | decompressor.write(compressed_data) | |
479 |
|
549 | |||
480 | This behaves similarly to ``zstd.ZstdCompressor``: compressed data is written to |
|
550 | This behaves similarly to ``zstd.ZstdCompressor``: compressed data is written to | |
481 | the decompressor by calling ``write(data)`` and decompressed output is written |
|
551 | the decompressor by calling ``write(data)`` and decompressed output is written | |
482 | to the output object by calling its ``write(data)`` method. |
|
552 | to the output object by calling its ``write(data)`` method. | |
483 |
|
553 | |||
484 | Calls to ``write()`` will return the number of bytes written to the output |
|
554 | Calls to ``write()`` will return the number of bytes written to the output | |
485 | object. Not all inputs will result in bytes being written, so return values |
|
555 | object. Not all inputs will result in bytes being written, so return values | |
486 | of ``0`` are possible. |
|
556 | of ``0`` are possible. | |
487 |
|
557 | |||
488 | The size of chunks being ``write()`` to the destination can be specified:: |
|
558 | The size of chunks being ``write()`` to the destination can be specified:: | |
489 |
|
559 | |||
490 | dctx = zstd.ZstdDecompressor() |
|
560 | dctx = zstd.ZstdDecompressor() | |
491 | with dctx.write_to(fh, write_size=16384) as decompressor: |
|
561 | with dctx.write_to(fh, write_size=16384) as decompressor: | |
492 | pass |
|
562 | pass | |
493 |
|
563 | |||
494 | You can see how much memory is being used by the decompressor:: |
|
564 | You can see how much memory is being used by the decompressor:: | |
495 |
|
565 | |||
496 | dctx = zstd.ZstdDecompressor() |
|
566 | dctx = zstd.ZstdDecompressor() | |
497 | with dctx.write_to(fh) as decompressor: |
|
567 | with dctx.write_to(fh) as decompressor: | |
498 | byte_size = decompressor.memory_size() |
|
568 | byte_size = decompressor.memory_size() | |
499 |
|
569 | |||
500 | Streaming Output API |
|
570 | Streaming Output API | |
501 | ^^^^^^^^^^^^^^^^^^^^ |
|
571 | ^^^^^^^^^^^^^^^^^^^^ | |
502 |
|
572 | |||
503 | ``read_from(fh)`` provides a mechanism to stream decompressed data out of a |
|
573 | ``read_from(fh)`` provides a mechanism to stream decompressed data out of a | |
504 | compressed source as an iterator of data chunks.:: |
|
574 | compressed source as an iterator of data chunks.:: | |
505 |
|
575 | |||
506 | dctx = zstd.ZstdDecompressor() |
|
576 | dctx = zstd.ZstdDecompressor() | |
507 | for chunk in dctx.read_from(fh): |
|
577 | for chunk in dctx.read_from(fh): | |
508 | # Do something with original data. |
|
578 | # Do something with original data. | |
509 |
|
579 | |||
510 | ``read_from()`` accepts a) an object with a ``read(size)`` method that will |
|
580 | ``read_from()`` accepts a) an object with a ``read(size)`` method that will | |
511 | return compressed bytes b) an object conforming to the buffer protocol that |
|
581 | return compressed bytes b) an object conforming to the buffer protocol that | |
512 | can expose its data as a contiguous range of bytes. The ``bytes`` and |
|
582 | can expose its data as a contiguous range of bytes. The ``bytes`` and | |
513 | ``memoryview`` types expose this buffer protocol. |
|
583 | ``memoryview`` types expose this buffer protocol. | |
514 |
|
584 | |||
515 | ``read_from()`` returns an iterator whose elements are chunks of the |
|
585 | ``read_from()`` returns an iterator whose elements are chunks of the | |
516 | decompressed data. |
|
586 | decompressed data. | |
517 |
|
587 | |||
518 | The size of requested ``read()`` from the source can be specified:: |
|
588 | The size of requested ``read()`` from the source can be specified:: | |
519 |
|
589 | |||
520 | dctx = zstd.ZstdDecompressor() |
|
590 | dctx = zstd.ZstdDecompressor() | |
521 | for chunk in dctx.read_from(fh, read_size=16384): |
|
591 | for chunk in dctx.read_from(fh, read_size=16384): | |
522 | pass |
|
592 | pass | |
523 |
|
593 | |||
524 | It is also possible to skip leading bytes in the input data:: |
|
594 | It is also possible to skip leading bytes in the input data:: | |
525 |
|
595 | |||
526 | dctx = zstd.ZstdDecompressor() |
|
596 | dctx = zstd.ZstdDecompressor() | |
527 | for chunk in dctx.read_from(fh, skip_bytes=1): |
|
597 | for chunk in dctx.read_from(fh, skip_bytes=1): | |
528 | pass |
|
598 | pass | |
529 |
|
599 | |||
530 | Skipping leading bytes is useful if the source data contains extra |
|
600 | Skipping leading bytes is useful if the source data contains extra | |
531 | *header* data but you want to avoid the overhead of making a buffer copy |
|
601 | *header* data but you want to avoid the overhead of making a buffer copy | |
532 | or allocating a new ``memoryview`` object in order to decompress the data. |
|
602 | or allocating a new ``memoryview`` object in order to decompress the data. | |
533 |
|
603 | |||
534 | Similarly to ``ZstdCompressor.read_from()``, the consumer of the iterator |
|
604 | Similarly to ``ZstdCompressor.read_from()``, the consumer of the iterator | |
535 | controls when data is decompressed. If the iterator isn't consumed, |
|
605 | controls when data is decompressed. If the iterator isn't consumed, | |
536 | decompression is put on hold. |
|
606 | decompression is put on hold. | |
537 |
|
607 | |||
538 | When ``read_from()`` is passed an object conforming to the buffer protocol, |
|
608 | When ``read_from()`` is passed an object conforming to the buffer protocol, | |
539 | the behavior may seem similar to what occurs when the simple decompression |
|
609 | the behavior may seem similar to what occurs when the simple decompression | |
540 | API is used. However, this API works when the decompressed size is unknown. |
|
610 | API is used. However, this API works when the decompressed size is unknown. | |
541 | Furthermore, if feeding large inputs, the decompressor will work in chunks |
|
611 | Furthermore, if feeding large inputs, the decompressor will work in chunks | |
542 | instead of performing a single operation. |
|
612 | instead of performing a single operation. | |
543 |
|
613 | |||
544 | Stream Copying API |
|
614 | Stream Copying API | |
545 | ^^^^^^^^^^^^^^^^^^ |
|
615 | ^^^^^^^^^^^^^^^^^^ | |
546 |
|
616 | |||
547 | ``copy_stream(ifh, ofh)`` can be used to copy data across 2 streams while |
|
617 | ``copy_stream(ifh, ofh)`` can be used to copy data across 2 streams while | |
548 | performing decompression.:: |
|
618 | performing decompression.:: | |
549 |
|
619 | |||
550 | dctx = zstd.ZstdDecompressor() |
|
620 | dctx = zstd.ZstdDecompressor() | |
551 | dctx.copy_stream(ifh, ofh) |
|
621 | dctx.copy_stream(ifh, ofh) | |
552 |
|
622 | |||
553 | e.g. to decompress a file to another file:: |
|
623 | e.g. to decompress a file to another file:: | |
554 |
|
624 | |||
555 | dctx = zstd.ZstdDecompressor() |
|
625 | dctx = zstd.ZstdDecompressor() | |
556 | with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh: |
|
626 | with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh: | |
557 | dctx.copy_stream(ifh, ofh) |
|
627 | dctx.copy_stream(ifh, ofh) | |
558 |
|
628 | |||
559 | The size of chunks being ``read()`` and ``write()`` from and to the streams |
|
629 | The size of chunks being ``read()`` and ``write()`` from and to the streams | |
560 | can be specified:: |
|
630 | can be specified:: | |
561 |
|
631 | |||
562 | dctx = zstd.ZstdDecompressor() |
|
632 | dctx = zstd.ZstdDecompressor() | |
563 | dctx.copy_stream(ifh, ofh, read_size=8192, write_size=16384) |
|
633 | dctx.copy_stream(ifh, ofh, read_size=8192, write_size=16384) | |
564 |
|
634 | |||
565 | Decompressor API |
|
635 | Decompressor API | |
566 | ^^^^^^^^^^^^^^^^ |
|
636 | ^^^^^^^^^^^^^^^^ | |
567 |
|
637 | |||
568 | ``decompressobj()`` returns an object that exposes a ``decompress(data)`` |
|
638 | ``decompressobj()`` returns an object that exposes a ``decompress(data)`` | |
569 | methods. Compressed data chunks are fed into ``decompress(data)`` and |
|
639 | methods. Compressed data chunks are fed into ``decompress(data)`` and | |
570 | uncompressed output (or an empty bytes) is returned. Output from subsequent |
|
640 | uncompressed output (or an empty bytes) is returned. Output from subsequent | |
571 | calls needs to be concatenated to reassemble the full decompressed byte |
|
641 | calls needs to be concatenated to reassemble the full decompressed byte | |
572 | sequence. |
|
642 | sequence. | |
573 |
|
643 | |||
574 | The purpose of ``decompressobj()`` is to provide an API-compatible interface |
|
644 | The purpose of ``decompressobj()`` is to provide an API-compatible interface | |
575 | with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor``. This allows callers |
|
645 | with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor``. This allows callers | |
576 | to swap in different decompressor objects while using the same API. |
|
646 | to swap in different decompressor objects while using the same API. | |
577 |
|
647 | |||
578 | Each object is single use: once an input frame is decoded, ``decompress()`` |
|
648 | Each object is single use: once an input frame is decoded, ``decompress()`` | |
579 | can no longer be called. |
|
649 | can no longer be called. | |
580 |
|
650 | |||
581 | Here is how this API should be used:: |
|
651 | Here is how this API should be used:: | |
582 |
|
652 | |||
583 | dctx = zstd.ZstdDeompressor() |
|
653 | dctx = zstd.ZstdDeompressor() | |
584 | dobj = cctx.decompressobj() |
|
654 | dobj = cctx.decompressobj() | |
585 | data = dobj.decompress(compressed_chunk_0) |
|
655 | data = dobj.decompress(compressed_chunk_0) | |
586 | data = dobj.decompress(compressed_chunk_1) |
|
656 | data = dobj.decompress(compressed_chunk_1) | |
587 |
|
657 | |||
|
658 | Batch Decompression API | |||
|
659 | ^^^^^^^^^^^^^^^^^^^^^^^ | |||
|
660 | ||||
|
661 | (Experimental. Not yet supported in CFFI bindings.) | |||
|
662 | ||||
|
663 | ``multi_decompress_to_buffer()`` performs decompression of multiple | |||
|
664 | frames as a single operation and returns a ``BufferWithSegmentsCollection`` | |||
|
665 | containing decompressed data for all inputs. | |||
|
666 | ||||
|
667 | Compressed frames can be passed to the function as a ``BufferWithSegments``, | |||
|
668 | a ``BufferWithSegmentsCollection``, or as a list containing objects that | |||
|
669 | conform to the buffer protocol. For best performance, pass a | |||
|
670 | ``BufferWithSegmentsCollection`` or a ``BufferWithSegments``, as | |||
|
671 | minimal input validation will be done for that type. If calling from | |||
|
672 | Python (as opposed to C), constructing one of these instances may add | |||
|
673 | overhead cancelling out the performance overhead of validation for list | |||
|
674 | inputs. | |||
|
675 | ||||
|
676 | The decompressed size of each frame must be discoverable. It can either be | |||
|
677 | embedded within the zstd frame (``write_content_size=True`` argument to | |||
|
678 | ``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument. | |||
|
679 | ||||
|
680 | The ``decompressed_sizes`` argument is an object conforming to the buffer | |||
|
681 | protocol which holds an array of 64-bit unsigned integers in the machine's | |||
|
682 | native format defining the decompressed sizes of each frame. If this argument | |||
|
683 | is passed, it avoids having to scan each frame for its decompressed size. | |||
|
684 | This frame scanning can add noticeable overhead in some scenarios. | |||
|
685 | ||||
|
686 | The ``threads`` argument controls the number of threads to use to perform | |||
|
687 | decompression operations. The default (``0``) or the value ``1`` means to | |||
|
688 | use a single thread. Negative values use the number of logical CPUs in the | |||
|
689 | machine. | |||
|
690 | ||||
|
691 | .. note:: | |||
|
692 | ||||
|
693 | It is possible to pass a ``mmap.mmap()`` instance into this function by | |||
|
694 | wrapping it with a ``BufferWithSegments`` instance (which will define the | |||
|
695 | offsets of frames within the memory mapped region). | |||
|
696 | ||||
|
697 | This function is logically equivalent to performing ``dctx.decompress()`` | |||
|
698 | on each input frame and returning the result. | |||
|
699 | ||||
|
700 | This function exists to perform decompression on multiple frames as fast | |||
|
701 | as possible by having as little overhead as possible. Since decompression is | |||
|
702 | performed as a single operation and since the decompressed output is stored in | |||
|
703 | a single buffer, extra memory allocations, Python objects, and Python function | |||
|
704 | calls are avoided. This is ideal for scenarios where callers need to access | |||
|
705 | decompressed data for multiple frames. | |||
|
706 | ||||
|
707 | Currently, the implementation always spawns multiple threads when requested, | |||
|
708 | even if the amount of work to do is small. In the future, it will be smarter | |||
|
709 | about avoiding threads and their associated overhead when the amount of | |||
|
710 | work to do is small. | |||
|
711 | ||||
588 | Content-Only Dictionary Chain Decompression |
|
712 | Content-Only Dictionary Chain Decompression | |
589 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
713 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
590 |
|
714 | |||
591 | ``decompress_content_dict_chain(frames)`` performs decompression of a list of |
|
715 | ``decompress_content_dict_chain(frames)`` performs decompression of a list of | |
592 | zstd frames produced using chained *content-only* dictionary compression. Such |
|
716 | zstd frames produced using chained *content-only* dictionary compression. Such | |
593 | a list of frames is produced by compressing discrete inputs where each |
|
717 | a list of frames is produced by compressing discrete inputs where each | |
594 | non-initial input is compressed with a *content-only* dictionary consisting |
|
718 | non-initial input is compressed with a *content-only* dictionary consisting | |
595 | of the content of the previous input. |
|
719 | of the content of the previous input. | |
596 |
|
720 | |||
597 | For example, say you have the following inputs:: |
|
721 | For example, say you have the following inputs:: | |
598 |
|
722 | |||
599 | inputs = [b'input 1', b'input 2', b'input 3'] |
|
723 | inputs = [b'input 1', b'input 2', b'input 3'] | |
600 |
|
724 | |||
601 | The zstd frame chain consists of: |
|
725 | The zstd frame chain consists of: | |
602 |
|
726 | |||
603 | 1. ``b'input 1'`` compressed in standalone/discrete mode |
|
727 | 1. ``b'input 1'`` compressed in standalone/discrete mode | |
604 | 2. ``b'input 2'`` compressed using ``b'input 1'`` as a *content-only* dictionary |
|
728 | 2. ``b'input 2'`` compressed using ``b'input 1'`` as a *content-only* dictionary | |
605 | 3. ``b'input 3'`` compressed using ``b'input 2'`` as a *content-only* dictionary |
|
729 | 3. ``b'input 3'`` compressed using ``b'input 2'`` as a *content-only* dictionary | |
606 |
|
730 | |||
607 | Each zstd frame **must** have the content size written. |
|
731 | Each zstd frame **must** have the content size written. | |
608 |
|
732 | |||
609 | The following Python code can be used to produce a *content-only dictionary |
|
733 | The following Python code can be used to produce a *content-only dictionary | |
610 | chain*:: |
|
734 | chain*:: | |
611 |
|
735 | |||
612 |
|
|
736 | def make_chain(inputs): | |
613 |
|
|
737 | frames = [] | |
614 |
|
738 | |||
615 |
|
|
739 | # First frame is compressed in standalone/discrete mode. | |
616 |
|
|
740 | zctx = zstd.ZstdCompressor(write_content_size=True) | |
617 |
|
|
741 | frames.append(zctx.compress(inputs[0])) | |
618 |
|
742 | |||
619 |
|
|
743 | # Subsequent frames use the previous fulltext as a content-only dictionary | |
620 |
|
|
744 | for i, raw in enumerate(inputs[1:]): | |
621 |
|
|
745 | dict_data = zstd.ZstdCompressionDict(inputs[i]) | |
622 |
|
|
746 | zctx = zstd.ZstdCompressor(write_content_size=True, dict_data=dict_data) | |
623 |
|
|
747 | frames.append(zctx.compress(raw)) | |
624 |
|
748 | |||
625 |
|
|
749 | return frames | |
626 |
|
750 | |||
627 | ``decompress_content_dict_chain()`` returns the uncompressed data of the last |
|
751 | ``decompress_content_dict_chain()`` returns the uncompressed data of the last | |
628 | element in the input chain. |
|
752 | element in the input chain. | |
629 |
|
753 | |||
630 | It is possible to implement *content-only dictionary chain* decompression |
|
754 | It is possible to implement *content-only dictionary chain* decompression | |
631 | on top of other Python APIs. However, this function will likely be significantly |
|
755 | on top of other Python APIs. However, this function will likely be significantly | |
632 | faster, especially for long input chains, as it avoids the overhead of |
|
756 | faster, especially for long input chains, as it avoids the overhead of | |
633 | instantiating and passing around intermediate objects between C and Python. |
|
757 | instantiating and passing around intermediate objects between C and Python. | |
634 |
|
758 | |||
635 | Choosing an API |
|
759 | Multi-Threaded Compression | |
636 | --------------- |
|
760 | -------------------------- | |
637 |
|
||||
638 | Various forms of compression and decompression APIs are provided because each |
|
|||
639 | are suitable for different use cases. |
|
|||
640 |
|
761 | |||
641 | The simple/one-shot APIs are useful for small data, when the decompressed |
|
762 | ``ZstdCompressor`` accepts a ``threads`` argument that controls the number | |
642 | data size is known (either recorded in the zstd frame header via |
|
763 | of threads to use for compression. The way this works is that input is split | |
643 | ``write_content_size`` or known via an out-of-band mechanism, such as a file |
|
764 | into segments and each segment is fed into a worker pool for compression. Once | |
644 | size). |
|
765 | a segment is compressed, it is flushed/appended to the output. | |
645 |
|
766 | |||
646 | A limitation of the simple APIs is that input or output data must fit in memory. |
|
767 | The segment size for multi-threaded compression is chosen from the window size | |
647 | And unless using advanced tricks with Python *buffer objects*, both input and |
|
768 | of the compressor. This is derived from the ``window_log`` attribute of a | |
648 | output must fit in memory simultaneously. |
|
769 | ``CompressionParameters`` instance. By default, segment sizes are in the 1+MB | |
649 |
|
770 | range. | ||
650 | Another limitation is that compression or decompression is performed as a single |
|
|||
651 | operation. So if you feed large input, it could take a long time for the |
|
|||
652 | function to return. |
|
|||
653 |
|
771 | |||
654 | The streaming APIs do not have the limitations of the simple API. The cost to |
|
772 | If multi-threaded compression is requested and the input is smaller than the | |
655 | this is they are more complex to use than a single function call. |
|
773 | configured segment size, only a single compression thread will be used. If the | |
656 |
|
774 | input is smaller than the segment size multiplied by the thread pool size or | ||
657 | The streaming APIs put the caller in control of compression and decompression |
|
775 | if data cannot be delivered to the compressor fast enough, not all requested | |
658 | behavior by allowing them to directly control either the input or output side |
|
776 | compressor threads may be active simultaneously. | |
659 | of the operation. |
|
|||
660 |
|
||||
661 | With the streaming input APIs, the caller feeds data into the compressor or |
|
|||
662 | decompressor as they see fit. Output data will only be written after the caller |
|
|||
663 | has explicitly written data. |
|
|||
664 |
|
777 | |||
665 | With the streaming output APIs, the caller consumes output from the compressor |
|
778 | Compared to non-multi-threaded compression, multi-threaded compression has | |
666 | or decompressor as they see fit. The compressor or decompressor will only |
|
779 | higher per-operation overhead. This includes extra memory operations, | |
667 | consume data from the source when the caller is ready to receive it. |
|
780 | thread creation, lock acquisition, etc. | |
668 |
|
781 | |||
669 | One end of the streaming APIs involves a file-like object that must |
|
782 | Due to the nature of multi-threaded compression using *N* compression | |
670 | ``write()`` output data or ``read()`` input data. Depending on what the |
|
783 | *states*, the output from multi-threaded compression will likely be larger | |
671 | backing storage for these objects is, those operations may not complete quickly. |
|
784 | than non-multi-threaded compression. The difference is usually small. But | |
672 | For example, when streaming compressed data to a file, the ``write()`` into |
|
785 | there is a CPU/wall time versus size trade off that may warrant investigation. | |
673 | a streaming compressor could result in a ``write()`` to the filesystem, which |
|
786 | ||
674 | may take a long time to finish due to slow I/O on the filesystem. So, there |
|
787 | Output from multi-threaded compression does not require any special handling | |
675 | may be overhead in streaming APIs beyond the compression and decompression |
|
788 | on the decompression side. In other words, any zstd decompressor should be able | |
676 | operations. |
|
789 | to consume data produced with multi-threaded compression. | |
677 |
|
790 | |||
678 | Dictionary Creation and Management |
|
791 | Dictionary Creation and Management | |
679 | ---------------------------------- |
|
792 | ---------------------------------- | |
680 |
|
793 | |||
681 | Zstandard allows *dictionaries* to be used when compressing and |
|
794 | Compression dictionaries are represented as the ``ZstdCompressionDict`` type. | |
682 | decompressing data. The idea is that if you are compressing a lot of similar |
|
|||
683 | data, you can precompute common properties of that data (such as recurring |
|
|||
684 | byte sequences) to achieve better compression ratios. |
|
|||
685 |
|
||||
686 | In Python, compression dictionaries are represented as the |
|
|||
687 | ``ZstdCompressionDict`` type. |
|
|||
688 |
|
795 | |||
689 | Instances can be constructed from bytes:: |
|
796 | Instances can be constructed from bytes:: | |
690 |
|
797 | |||
691 | dict_data = zstd.ZstdCompressionDict(data) |
|
798 | dict_data = zstd.ZstdCompressionDict(data) | |
692 |
|
799 | |||
693 | It is possible to construct a dictionary from *any* data. Unless the |
|
800 | It is possible to construct a dictionary from *any* data. Unless the | |
694 | data begins with a magic header, the dictionary will be treated as |
|
801 | data begins with a magic header, the dictionary will be treated as | |
695 | *content-only*. *Content-only* dictionaries allow compression operations |
|
802 | *content-only*. *Content-only* dictionaries allow compression operations | |
696 | that follow to reference raw data within the content. For one use of |
|
803 | that follow to reference raw data within the content. For one use of | |
697 | *content-only* dictionaries, see |
|
804 | *content-only* dictionaries, see | |
698 | ``ZstdDecompressor.decompress_content_dict_chain()``. |
|
805 | ``ZstdDecompressor.decompress_content_dict_chain()``. | |
699 |
|
806 | |||
700 | More interestingly, instances can be created by *training* on sample data:: |
|
807 | More interestingly, instances can be created by *training* on sample data:: | |
701 |
|
808 | |||
702 | dict_data = zstd.train_dictionary(size, samples) |
|
809 | dict_data = zstd.train_dictionary(size, samples) | |
703 |
|
810 | |||
704 | This takes a list of bytes instances and creates and returns a |
|
811 | This takes a list of bytes instances and creates and returns a | |
705 | ``ZstdCompressionDict``. |
|
812 | ``ZstdCompressionDict``. | |
706 |
|
813 | |||
707 | You can see how many bytes are in the dictionary by calling ``len()``:: |
|
814 | You can see how many bytes are in the dictionary by calling ``len()``:: | |
708 |
|
815 | |||
709 | dict_data = zstd.train_dictionary(size, samples) |
|
816 | dict_data = zstd.train_dictionary(size, samples) | |
710 | dict_size = len(dict_data) # will not be larger than ``size`` |
|
817 | dict_size = len(dict_data) # will not be larger than ``size`` | |
711 |
|
818 | |||
712 | Once you have a dictionary, you can pass it to the objects performing |
|
819 | Once you have a dictionary, you can pass it to the objects performing | |
713 | compression and decompression:: |
|
820 | compression and decompression:: | |
714 |
|
821 | |||
715 | dict_data = zstd.train_dictionary(16384, samples) |
|
822 | dict_data = zstd.train_dictionary(16384, samples) | |
716 |
|
823 | |||
717 | cctx = zstd.ZstdCompressor(dict_data=dict_data) |
|
824 | cctx = zstd.ZstdCompressor(dict_data=dict_data) | |
718 | for source_data in input_data: |
|
825 | for source_data in input_data: | |
719 | compressed = cctx.compress(source_data) |
|
826 | compressed = cctx.compress(source_data) | |
720 | # Do something with compressed data. |
|
827 | # Do something with compressed data. | |
721 |
|
828 | |||
722 | dctx = zstd.ZstdDecompressor(dict_data=dict_data) |
|
829 | dctx = zstd.ZstdDecompressor(dict_data=dict_data) | |
723 | for compressed_data in input_data: |
|
830 | for compressed_data in input_data: | |
724 | buffer = io.BytesIO() |
|
831 | buffer = io.BytesIO() | |
725 | with dctx.write_to(buffer) as decompressor: |
|
832 | with dctx.write_to(buffer) as decompressor: | |
726 | decompressor.write(compressed_data) |
|
833 | decompressor.write(compressed_data) | |
727 | # Do something with raw data in ``buffer``. |
|
834 | # Do something with raw data in ``buffer``. | |
728 |
|
835 | |||
729 | Dictionaries have unique integer IDs. You can retrieve this ID via:: |
|
836 | Dictionaries have unique integer IDs. You can retrieve this ID via:: | |
730 |
|
837 | |||
731 | dict_id = zstd.dictionary_id(dict_data) |
|
838 | dict_id = zstd.dictionary_id(dict_data) | |
732 |
|
839 | |||
733 | You can obtain the raw data in the dict (useful for persisting and constructing |
|
840 | You can obtain the raw data in the dict (useful for persisting and constructing | |
734 | a ``ZstdCompressionDict`` later) via ``as_bytes()``:: |
|
841 | a ``ZstdCompressionDict`` later) via ``as_bytes()``:: | |
735 |
|
842 | |||
736 | dict_data = zstd.train_dictionary(size, samples) |
|
843 | dict_data = zstd.train_dictionary(size, samples) | |
737 | raw_data = dict_data.as_bytes() |
|
844 | raw_data = dict_data.as_bytes() | |
738 |
|
845 | |||
|
846 | The following named arguments to ``train_dictionary`` can also be used | |||
|
847 | to further control dictionary generation. | |||
|
848 | ||||
|
849 | selectivity | |||
|
850 | Integer selectivity level. Default is 9. Larger values yield more data in | |||
|
851 | dictionary. | |||
|
852 | level | |||
|
853 | Integer compression level. Default is 6. | |||
|
854 | dict_id | |||
|
855 | Integer dictionary ID for the produced dictionary. Default is 0, which | |||
|
856 | means to use a random value. | |||
|
857 | notifications | |||
|
858 | Controls writing of informational messages to ``stderr``. ``0`` (the | |||
|
859 | default) means to write nothing. ``1`` writes errors. ``2`` writes | |||
|
860 | progression info. ``3`` writes more details. And ``4`` writes all info. | |||
|
861 | ||||
|
862 | Cover Dictionaries | |||
|
863 | ^^^^^^^^^^^^^^^^^^ | |||
|
864 | ||||
|
865 | An alternate dictionary training mechanism named *cover* is also available. | |||
|
866 | More details about this training mechanism are available in the paper | |||
|
867 | *Effective Construction of Relative Lempel-Ziv Dictionaries* (authors: | |||
|
868 | Liao, Petri, Moffat, Wirth). | |||
|
869 | ||||
|
870 | To use this mechanism, use ``zstd.train_cover_dictionary()`` instead of | |||
|
871 | ``zstd.train_dictionary()``. The function behaves nearly the same except | |||
|
872 | its arguments are different and the returned dictionary will contain ``k`` | |||
|
873 | and ``d`` attributes reflecting the parameters to the cover algorithm. | |||
|
874 | ||||
|
875 | .. note:: | |||
|
876 | ||||
|
877 | The ``k`` and ``d`` attributes are only populated on dictionary | |||
|
878 | instances created by this function. If a ``ZstdCompressionDict`` is | |||
|
879 | constructed from raw bytes data, the ``k`` and ``d`` attributes will | |||
|
880 | be ``0``. | |||
|
881 | ||||
|
882 | The segment and dmer size parameters to the cover algorithm can either be | |||
|
883 | specified manually or you can ask ``train_cover_dictionary()`` to try | |||
|
884 | multiple values and pick the best one, where *best* means the smallest | |||
|
885 | compressed data size. | |||
|
886 | ||||
|
887 | In manual mode, the ``k`` and ``d`` arguments must be specified or a | |||
|
888 | ``ZstdError`` will be raised. | |||
|
889 | ||||
|
890 | In automatic mode (triggered by specifying ``optimize=True``), ``k`` | |||
|
891 | and ``d`` are optional. If a value isn't specified, then default values for | |||
|
892 | both are tested. The ``steps`` argument can control the number of steps | |||
|
893 | through ``k`` values. The ``level`` argument defines the compression level | |||
|
894 | that will be used when testing the compressed size. And ``threads`` can | |||
|
895 | specify the number of threads to use for concurrent operation. | |||
|
896 | ||||
|
897 | This function takes the following arguments: | |||
|
898 | ||||
|
899 | dict_size | |||
|
900 | Target size in bytes of the dictionary to generate. | |||
|
901 | samples | |||
|
902 | A list of bytes holding samples the dictionary will be trained from. | |||
|
903 | k | |||
|
904 | Parameter to cover algorithm defining the segment size. A reasonable range | |||
|
905 | is [16, 2048+]. | |||
|
906 | d | |||
|
907 | Parameter to cover algorithm defining the dmer size. A reasonable range is | |||
|
908 | [6, 16]. ``d`` must be less than or equal to ``k``. | |||
|
909 | dict_id | |||
|
910 | Integer dictionary ID for the produced dictionary. Default is 0, which uses | |||
|
911 | a random value. | |||
|
912 | optimize | |||
|
913 | When true, test dictionary generation with multiple parameters. | |||
|
914 | level | |||
|
915 | Integer target compression level when testing compression with | |||
|
916 | ``optimize=True``. Default is 1. | |||
|
917 | steps | |||
|
918 | Number of steps through ``k`` values to perform when ``optimize=True``. | |||
|
919 | Default is 32. | |||
|
920 | threads | |||
|
921 | Number of threads to use when ``optimize=True``. Default is 0, which means | |||
|
922 | to use a single thread. A negative value can be specified to use as many | |||
|
923 | threads as there are detected logical CPUs. | |||
|
924 | notifications | |||
|
925 | Controls writing of informational messages to ``stderr``. See the | |||
|
926 | documentation for ``train_dictionary()`` for more. | |||
|
927 | ||||
739 | Explicit Compression Parameters |
|
928 | Explicit Compression Parameters | |
740 | ------------------------------- |
|
929 | ------------------------------- | |
741 |
|
930 | |||
742 | Zstandard's integer compression levels along with the input size and dictionary |
|
931 | Zstandard's integer compression levels along with the input size and dictionary | |
743 | size are converted into a data structure defining multiple parameters to tune |
|
932 | size are converted into a data structure defining multiple parameters to tune | |
744 | behavior of the compression algorithm. It is possible to use define this |
|
933 | behavior of the compression algorithm. It is possible to use define this | |
745 | data structure explicitly to have lower-level control over compression behavior. |
|
934 | data structure explicitly to have lower-level control over compression behavior. | |
746 |
|
935 | |||
747 | The ``zstd.CompressionParameters`` type represents this data structure. |
|
936 | The ``zstd.CompressionParameters`` type represents this data structure. | |
748 | You can see how Zstandard converts compression levels to this data structure |
|
937 | You can see how Zstandard converts compression levels to this data structure | |
749 | by calling ``zstd.get_compression_parameters()``. e.g.:: |
|
938 | by calling ``zstd.get_compression_parameters()``. e.g.:: | |
750 |
|
939 | |||
751 | params = zstd.get_compression_parameters(5) |
|
940 | params = zstd.get_compression_parameters(5) | |
752 |
|
941 | |||
753 | This function also accepts the uncompressed data size and dictionary size |
|
942 | This function also accepts the uncompressed data size and dictionary size | |
754 | to adjust parameters:: |
|
943 | to adjust parameters:: | |
755 |
|
944 | |||
756 | params = zstd.get_compression_parameters(3, source_size=len(data), dict_size=len(dict_data)) |
|
945 | params = zstd.get_compression_parameters(3, source_size=len(data), dict_size=len(dict_data)) | |
757 |
|
946 | |||
758 | You can also construct compression parameters from their low-level components:: |
|
947 | You can also construct compression parameters from their low-level components:: | |
759 |
|
948 | |||
760 | params = zstd.CompressionParameters(20, 6, 12, 5, 4, 10, zstd.STRATEGY_FAST) |
|
949 | params = zstd.CompressionParameters(20, 6, 12, 5, 4, 10, zstd.STRATEGY_FAST) | |
761 |
|
950 | |||
762 | You can then configure a compressor to use the custom parameters:: |
|
951 | You can then configure a compressor to use the custom parameters:: | |
763 |
|
952 | |||
764 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
953 | cctx = zstd.ZstdCompressor(compression_params=params) | |
765 |
|
954 | |||
766 | The members/attributes of ``CompressionParameters`` instances are as follows:: |
|
955 | The members/attributes of ``CompressionParameters`` instances are as follows:: | |
767 |
|
956 | |||
768 | * window_log |
|
957 | * window_log | |
769 | * chain_log |
|
958 | * chain_log | |
770 | * hash_log |
|
959 | * hash_log | |
771 | * search_log |
|
960 | * search_log | |
772 | * search_length |
|
961 | * search_length | |
773 | * target_length |
|
962 | * target_length | |
774 | * strategy |
|
963 | * strategy | |
775 |
|
964 | |||
776 | This is the order the arguments are passed to the constructor if not using |
|
965 | This is the order the arguments are passed to the constructor if not using | |
777 | named arguments. |
|
966 | named arguments. | |
778 |
|
967 | |||
779 | You'll need to read the Zstandard documentation for what these parameters |
|
968 | You'll need to read the Zstandard documentation for what these parameters | |
780 | do. |
|
969 | do. | |
781 |
|
970 | |||
782 | Frame Inspection |
|
971 | Frame Inspection | |
783 | ---------------- |
|
972 | ---------------- | |
784 |
|
973 | |||
785 | Data emitted from zstd compression is encapsulated in a *frame*. This frame |
|
974 | Data emitted from zstd compression is encapsulated in a *frame*. This frame | |
786 | begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing |
|
975 | begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing | |
787 | the frame in more detail. For more info, see |
|
976 | the frame in more detail. For more info, see | |
788 | https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md. |
|
977 | https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md. | |
789 |
|
978 | |||
790 | ``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes |
|
979 | ``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes | |
791 | instance and return a ``FrameParameters`` object describing the frame. |
|
980 | instance and return a ``FrameParameters`` object describing the frame. | |
792 |
|
981 | |||
793 | Depending on which fields are present in the frame and their values, the |
|
982 | Depending on which fields are present in the frame and their values, the | |
794 | length of the frame parameters varies. If insufficient bytes are passed |
|
983 | length of the frame parameters varies. If insufficient bytes are passed | |
795 | in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure |
|
984 | in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure | |
796 | frame parameters can be parsed, pass in at least 18 bytes. |
|
985 | frame parameters can be parsed, pass in at least 18 bytes. | |
797 |
|
986 | |||
798 | ``FrameParameters`` instances have the following attributes: |
|
987 | ``FrameParameters`` instances have the following attributes: | |
799 |
|
988 | |||
800 | content_size |
|
989 | content_size | |
801 | Integer size of original, uncompressed content. This will be ``0`` if the |
|
990 | Integer size of original, uncompressed content. This will be ``0`` if the | |
802 | original content size isn't written to the frame (controlled with the |
|
991 | original content size isn't written to the frame (controlled with the | |
803 | ``write_content_size`` argument to ``ZstdCompressor``) or if the input |
|
992 | ``write_content_size`` argument to ``ZstdCompressor``) or if the input | |
804 | content size was ``0``. |
|
993 | content size was ``0``. | |
805 |
|
994 | |||
806 | window_size |
|
995 | window_size | |
807 | Integer size of maximum back-reference distance in compressed data. |
|
996 | Integer size of maximum back-reference distance in compressed data. | |
808 |
|
997 | |||
809 | dict_id |
|
998 | dict_id | |
810 | Integer of dictionary ID used for compression. ``0`` if no dictionary |
|
999 | Integer of dictionary ID used for compression. ``0`` if no dictionary | |
811 | ID was used or if the dictionary ID was ``0``. |
|
1000 | ID was used or if the dictionary ID was ``0``. | |
812 |
|
1001 | |||
813 | has_checksum |
|
1002 | has_checksum | |
814 | Bool indicating whether a 4 byte content checksum is stored at the end |
|
1003 | Bool indicating whether a 4 byte content checksum is stored at the end | |
815 | of the frame. |
|
1004 | of the frame. | |
816 |
|
1005 | |||
817 | Misc Functionality |
|
1006 | Misc Functionality | |
818 | ------------------ |
|
1007 | ------------------ | |
819 |
|
1008 | |||
820 | estimate_compression_context_size(CompressionParameters) |
|
1009 | estimate_compression_context_size(CompressionParameters) | |
821 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
1010 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
822 |
|
1011 | |||
823 | Given a ``CompressionParameters`` struct, estimate the memory size required |
|
1012 | Given a ``CompressionParameters`` struct, estimate the memory size required | |
824 | to perform compression. |
|
1013 | to perform compression. | |
825 |
|
1014 | |||
826 | estimate_decompression_context_size() |
|
1015 | estimate_decompression_context_size() | |
827 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
1016 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
828 |
|
1017 | |||
829 | Estimate the memory size requirements for a decompressor instance. |
|
1018 | Estimate the memory size requirements for a decompressor instance. | |
830 |
|
1019 | |||
831 | Constants |
|
1020 | Constants | |
832 | --------- |
|
1021 | --------- | |
833 |
|
1022 | |||
834 | The following module constants/attributes are exposed: |
|
1023 | The following module constants/attributes are exposed: | |
835 |
|
1024 | |||
836 | ZSTD_VERSION |
|
1025 | ZSTD_VERSION | |
837 | This module attribute exposes a 3-tuple of the Zstandard version. e.g. |
|
1026 | This module attribute exposes a 3-tuple of the Zstandard version. e.g. | |
838 | ``(1, 0, 0)`` |
|
1027 | ``(1, 0, 0)`` | |
839 | MAX_COMPRESSION_LEVEL |
|
1028 | MAX_COMPRESSION_LEVEL | |
840 | Integer max compression level accepted by compression functions |
|
1029 | Integer max compression level accepted by compression functions | |
841 | COMPRESSION_RECOMMENDED_INPUT_SIZE |
|
1030 | COMPRESSION_RECOMMENDED_INPUT_SIZE | |
842 | Recommended chunk size to feed to compressor functions |
|
1031 | Recommended chunk size to feed to compressor functions | |
843 | COMPRESSION_RECOMMENDED_OUTPUT_SIZE |
|
1032 | COMPRESSION_RECOMMENDED_OUTPUT_SIZE | |
844 | Recommended chunk size for compression output |
|
1033 | Recommended chunk size for compression output | |
845 | DECOMPRESSION_RECOMMENDED_INPUT_SIZE |
|
1034 | DECOMPRESSION_RECOMMENDED_INPUT_SIZE | |
846 | Recommended chunk size to feed into decompresor functions |
|
1035 | Recommended chunk size to feed into decompresor functions | |
847 | DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE |
|
1036 | DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE | |
848 | Recommended chunk size for decompression output |
|
1037 | Recommended chunk size for decompression output | |
849 |
|
1038 | |||
850 | FRAME_HEADER |
|
1039 | FRAME_HEADER | |
851 | bytes containing header of the Zstandard frame |
|
1040 | bytes containing header of the Zstandard frame | |
852 | MAGIC_NUMBER |
|
1041 | MAGIC_NUMBER | |
853 | Frame header as an integer |
|
1042 | Frame header as an integer | |
854 |
|
1043 | |||
855 | WINDOWLOG_MIN |
|
1044 | WINDOWLOG_MIN | |
856 | Minimum value for compression parameter |
|
1045 | Minimum value for compression parameter | |
857 | WINDOWLOG_MAX |
|
1046 | WINDOWLOG_MAX | |
858 | Maximum value for compression parameter |
|
1047 | Maximum value for compression parameter | |
859 | CHAINLOG_MIN |
|
1048 | CHAINLOG_MIN | |
860 | Minimum value for compression parameter |
|
1049 | Minimum value for compression parameter | |
861 | CHAINLOG_MAX |
|
1050 | CHAINLOG_MAX | |
862 | Maximum value for compression parameter |
|
1051 | Maximum value for compression parameter | |
863 | HASHLOG_MIN |
|
1052 | HASHLOG_MIN | |
864 | Minimum value for compression parameter |
|
1053 | Minimum value for compression parameter | |
865 | HASHLOG_MAX |
|
1054 | HASHLOG_MAX | |
866 | Maximum value for compression parameter |
|
1055 | Maximum value for compression parameter | |
867 | SEARCHLOG_MIN |
|
1056 | SEARCHLOG_MIN | |
868 | Minimum value for compression parameter |
|
1057 | Minimum value for compression parameter | |
869 | SEARCHLOG_MAX |
|
1058 | SEARCHLOG_MAX | |
870 | Maximum value for compression parameter |
|
1059 | Maximum value for compression parameter | |
871 | SEARCHLENGTH_MIN |
|
1060 | SEARCHLENGTH_MIN | |
872 | Minimum value for compression parameter |
|
1061 | Minimum value for compression parameter | |
873 | SEARCHLENGTH_MAX |
|
1062 | SEARCHLENGTH_MAX | |
874 | Maximum value for compression parameter |
|
1063 | Maximum value for compression parameter | |
875 | TARGETLENGTH_MIN |
|
1064 | TARGETLENGTH_MIN | |
876 | Minimum value for compression parameter |
|
1065 | Minimum value for compression parameter | |
877 | TARGETLENGTH_MAX |
|
1066 | TARGETLENGTH_MAX | |
878 | Maximum value for compression parameter |
|
1067 | Maximum value for compression parameter | |
879 | STRATEGY_FAST |
|
1068 | STRATEGY_FAST | |
880 | Compression strategy |
|
1069 | Compression strategy | |
881 | STRATEGY_DFAST |
|
1070 | STRATEGY_DFAST | |
882 | Compression strategy |
|
1071 | Compression strategy | |
883 | STRATEGY_GREEDY |
|
1072 | STRATEGY_GREEDY | |
884 | Compression strategy |
|
1073 | Compression strategy | |
885 | STRATEGY_LAZY |
|
1074 | STRATEGY_LAZY | |
886 | Compression strategy |
|
1075 | Compression strategy | |
887 | STRATEGY_LAZY2 |
|
1076 | STRATEGY_LAZY2 | |
888 | Compression strategy |
|
1077 | Compression strategy | |
889 | STRATEGY_BTLAZY2 |
|
1078 | STRATEGY_BTLAZY2 | |
890 | Compression strategy |
|
1079 | Compression strategy | |
891 | STRATEGY_BTOPT |
|
1080 | STRATEGY_BTOPT | |
892 | Compression strategy |
|
1081 | Compression strategy | |
893 |
|
1082 | |||
894 | Performance Considerations |
|
1083 | Performance Considerations | |
895 | -------------------------- |
|
1084 | -------------------------- | |
896 |
|
1085 | |||
897 | The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a |
|
1086 | The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a | |
898 | persistent compression or decompression *context*. Reusing a ``ZstdCompressor`` |
|
1087 | persistent compression or decompression *context*. Reusing a ``ZstdCompressor`` | |
899 | or ``ZstdDecompressor`` instance for multiple operations is faster than |
|
1088 | or ``ZstdDecompressor`` instance for multiple operations is faster than | |
900 | instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each |
|
1089 | instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each | |
901 | operation. The differences are magnified as the size of data decreases. For |
|
1090 | operation. The differences are magnified as the size of data decreases. For | |
902 | example, the difference between *context* reuse and non-reuse for 100,000 |
|
1091 | example, the difference between *context* reuse and non-reuse for 100,000 | |
903 | 100 byte inputs will be significant (possiby over 10x faster to reuse contexts) |
|
1092 | 100 byte inputs will be significant (possiby over 10x faster to reuse contexts) | |
904 | whereas 10 1,000,000 byte inputs will be more similar in speed (because the |
|
1093 | whereas 10 1,000,000 byte inputs will be more similar in speed (because the | |
905 | time spent doing compression dwarfs time spent creating new *contexts*). |
|
1094 | time spent doing compression dwarfs time spent creating new *contexts*). | |
906 |
|
1095 | |||
|
1096 | Buffer Types | |||
|
1097 | ------------ | |||
|
1098 | ||||
|
1099 | The API exposes a handful of custom types for interfacing with memory buffers. | |||
|
1100 | The primary goal of these types is to facilitate efficient multi-object | |||
|
1101 | operations. | |||
|
1102 | ||||
|
1103 | The essential idea is to have a single memory allocation provide backing | |||
|
1104 | storage for multiple logical objects. This has 2 main advantages: fewer | |||
|
1105 | allocations and optimal memory access patterns. This avoids having to allocate | |||
|
1106 | a Python object for each logical object and furthermore ensures that access of | |||
|
1107 | data for objects can be sequential (read: fast) in memory. | |||
|
1108 | ||||
|
1109 | BufferWithSegments | |||
|
1110 | ^^^^^^^^^^^^^^^^^^ | |||
|
1111 | ||||
|
1112 | The ``BufferWithSegments`` type represents a memory buffer containing N | |||
|
1113 | discrete items of known lengths (segments). It is essentially a fixed size | |||
|
1114 | memory address and an array of 2-tuples of ``(offset, length)`` 64-bit | |||
|
1115 | unsigned native endian integers defining the byte offset and length of each | |||
|
1116 | segment within the buffer. | |||
|
1117 | ||||
|
1118 | Instances behave like containers. | |||
|
1119 | ||||
|
1120 | ``len()`` returns the number of segments within the instance. | |||
|
1121 | ||||
|
1122 | ``o[index]`` or ``__getitem__`` obtains a ``BufferSegment`` representing an | |||
|
1123 | individual segment within the backing buffer. That returned object references | |||
|
1124 | (not copies) memory. This means that iterating all objects doesn't copy | |||
|
1125 | data within the buffer. | |||
|
1126 | ||||
|
1127 | The ``.size`` attribute contains the total size in bytes of the backing | |||
|
1128 | buffer. | |||
|
1129 | ||||
|
1130 | Instances conform to the buffer protocol. So a reference to the backing bytes | |||
|
1131 | can be obtained via ``memoryview(o)``. A *copy* of the backing bytes can also | |||
|
1132 | be obtained via ``.tobytes()``. | |||
|
1133 | ||||
|
1134 | The ``.segments`` attribute exposes the array of ``(offset, length)`` for | |||
|
1135 | segments within the buffer. It is a ``BufferSegments`` type. | |||
|
1136 | ||||
|
1137 | BufferSegment | |||
|
1138 | ^^^^^^^^^^^^^ | |||
|
1139 | ||||
|
1140 | The ``BufferSegment`` type represents a segment within a ``BufferWithSegments``. | |||
|
1141 | It is essentially a reference to N bytes within a ``BufferWithSegments``. | |||
|
1142 | ||||
|
1143 | ``len()`` returns the length of the segment in bytes. | |||
|
1144 | ||||
|
1145 | ``.offset`` contains the byte offset of this segment within its parent | |||
|
1146 | ``BufferWithSegments`` instance. | |||
|
1147 | ||||
|
1148 | The object conforms to the buffer protocol. ``.tobytes()`` can be called to | |||
|
1149 | obtain a ``bytes`` instance with a copy of the backing bytes. | |||
|
1150 | ||||
|
1151 | BufferSegments | |||
|
1152 | ^^^^^^^^^^^^^^ | |||
|
1153 | ||||
|
1154 | This type represents an array of ``(offset, length)`` integers defining segments | |||
|
1155 | within a ``BufferWithSegments``. | |||
|
1156 | ||||
|
1157 | The array members are 64-bit unsigned integers using host/native bit order. | |||
|
1158 | ||||
|
1159 | Instances conform to the buffer protocol. | |||
|
1160 | ||||
|
1161 | BufferWithSegmentsCollection | |||
|
1162 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |||
|
1163 | ||||
|
1164 | The ``BufferWithSegmentsCollection`` type represents a virtual spanning view | |||
|
1165 | of multiple ``BufferWithSegments`` instances. | |||
|
1166 | ||||
|
1167 | Instances are constructed from 1 or more ``BufferWithSegments`` instances. The | |||
|
1168 | resulting object behaves like an ordered sequence whose members are the | |||
|
1169 | segments within each ``BufferWithSegments``. | |||
|
1170 | ||||
|
1171 | ``len()`` returns the number of segments within all ``BufferWithSegments`` | |||
|
1172 | instances. | |||
|
1173 | ||||
|
1174 | ``o[index]`` and ``__getitem__(index)`` return the ``BufferSegment`` at | |||
|
1175 | that offset as if all ``BufferWithSegments`` instances were a single | |||
|
1176 | entity. | |||
|
1177 | ||||
|
1178 | If the object is composed of 2 ``BufferWithSegments`` instances with the | |||
|
1179 | first having 2 segments and the second have 3 segments, then ``b[0]`` | |||
|
1180 | and ``b[1]`` access segments in the first object and ``b[2]``, ``b[3]``, | |||
|
1181 | and ``b[4]`` access segments from the second. | |||
|
1182 | ||||
|
1183 | Choosing an API | |||
|
1184 | =============== | |||
|
1185 | ||||
|
1186 | There are multiple APIs for performing compression and decompression. This is | |||
|
1187 | because different applications have different needs and the library wants to | |||
|
1188 | facilitate optimal use in as many use cases as possible. | |||
|
1189 | ||||
|
1190 | From a high-level, APIs are divided into *one-shot* and *streaming*. See | |||
|
1191 | the ``Concepts`` section for a description of how these are different at | |||
|
1192 | the C layer. | |||
|
1193 | ||||
|
1194 | The *one-shot* APIs are useful for small data, where the input or output | |||
|
1195 | size is known. (The size can come from a buffer length, file size, or | |||
|
1196 | stored in the zstd frame header.) A limitation of the *one-shot* APIs is that | |||
|
1197 | input and output must fit in memory simultaneously. For say a 4 GB input, | |||
|
1198 | this is often not feasible. | |||
|
1199 | ||||
|
1200 | The *one-shot* APIs also perform all work as a single operation. So, if you | |||
|
1201 | feed it large input, it could take a long time for the function to return. | |||
|
1202 | ||||
|
1203 | The streaming APIs do not have the limitations of the simple API. But the | |||
|
1204 | price you pay for this flexibility is that they are more complex than a | |||
|
1205 | single function call. | |||
|
1206 | ||||
|
1207 | The streaming APIs put the caller in control of compression and decompression | |||
|
1208 | behavior by allowing them to directly control either the input or output side | |||
|
1209 | of the operation. | |||
|
1210 | ||||
|
1211 | With the *streaming input*, *compressor*, and *decompressor* APIs, the caller | |||
|
1212 | has full control over the input to the compression or decompression stream. | |||
|
1213 | They can directly choose when new data is operated on. | |||
|
1214 | ||||
|
1215 | With the *streaming ouput* APIs, the caller has full control over the output | |||
|
1216 | of the compression or decompression stream. It can choose when to receive | |||
|
1217 | new data. | |||
|
1218 | ||||
|
1219 | When using the *streaming* APIs that operate on file-like or stream objects, | |||
|
1220 | it is important to consider what happens in that object when I/O is requested. | |||
|
1221 | There is potential for long pauses as data is read or written from the | |||
|
1222 | underlying stream (say from interacting with a filesystem or network). This | |||
|
1223 | could add considerable overhead. | |||
|
1224 | ||||
|
1225 | Concepts | |||
|
1226 | ======== | |||
|
1227 | ||||
|
1228 | It is important to have a basic understanding of how Zstandard works in order | |||
|
1229 | to optimally use this library. In addition, there are some low-level Python | |||
|
1230 | concepts that are worth explaining to aid understanding. This section aims to | |||
|
1231 | provide that knowledge. | |||
|
1232 | ||||
|
1233 | Zstandard Frames and Compression Format | |||
|
1234 | --------------------------------------- | |||
|
1235 | ||||
|
1236 | Compressed zstandard data almost always exists within a container called a | |||
|
1237 | *frame*. (For the technically curious, see the | |||
|
1238 | `specification <https://github.com/facebook/zstd/blob/3bee41a70eaf343fbcae3637b3f6edbe52f35ed8/doc/zstd_compression_format.md>_.) | |||
|
1239 | ||||
|
1240 | The frame contains a header and optional trailer. The header contains a | |||
|
1241 | magic number to self-identify as a zstd frame and a description of the | |||
|
1242 | compressed data that follows. | |||
|
1243 | ||||
|
1244 | Among other things, the frame *optionally* contains the size of the | |||
|
1245 | decompressed data the frame represents, a 32-bit checksum of the | |||
|
1246 | decompressed data (to facilitate verification during decompression), | |||
|
1247 | and the ID of the dictionary used to compress the data. | |||
|
1248 | ||||
|
1249 | Storing the original content size in the frame (``write_content_size=True`` | |||
|
1250 | to ``ZstdCompressor``) is important for performance in some scenarios. Having | |||
|
1251 | the decompressed size stored there (or storing it elsewhere) allows | |||
|
1252 | decompression to perform a single memory allocation that is exactly sized to | |||
|
1253 | the output. This is faster than continuously growing a memory buffer to hold | |||
|
1254 | output. | |||
|
1255 | ||||
|
1256 | Compression and Decompression Contexts | |||
|
1257 | -------------------------------------- | |||
|
1258 | ||||
|
1259 | In order to perform a compression or decompression operation with the zstd | |||
|
1260 | C API, you need what's called a *context*. A context essentially holds | |||
|
1261 | configuration and state for a compression or decompression operation. For | |||
|
1262 | example, a compression context holds the configured compression level. | |||
|
1263 | ||||
|
1264 | Contexts can be reused for multiple operations. Since creating and | |||
|
1265 | destroying contexts is not free, there are performance advantages to | |||
|
1266 | reusing contexts. | |||
|
1267 | ||||
|
1268 | The ``ZstdCompressor`` and ``ZstdDecompressor`` types are essentially | |||
|
1269 | wrappers around these contexts in the zstd C API. | |||
|
1270 | ||||
|
1271 | One-shot And Streaming Operations | |||
|
1272 | --------------------------------- | |||
|
1273 | ||||
|
1274 | A compression or decompression operation can either be performed as a | |||
|
1275 | single *one-shot* operation or as a continuous *streaming* operation. | |||
|
1276 | ||||
|
1277 | In one-shot mode (the *simple* APIs provided by the Python interface), | |||
|
1278 | **all** input is handed to the compressor or decompressor as a single buffer | |||
|
1279 | and **all** output is returned as a single buffer. | |||
|
1280 | ||||
|
1281 | In streaming mode, input is delivered to the compressor or decompressor as | |||
|
1282 | a series of chunks via multiple function calls. Likewise, output is | |||
|
1283 | obtained in chunks as well. | |||
|
1284 | ||||
|
1285 | Streaming operations require an additional *stream* object to be created | |||
|
1286 | to track the operation. These are logical extensions of *context* | |||
|
1287 | instances. | |||
|
1288 | ||||
|
1289 | There are advantages and disadvantages to each mode of operation. There | |||
|
1290 | are scenarios where certain modes can't be used. See the | |||
|
1291 | ``Choosing an API`` section for more. | |||
|
1292 | ||||
|
1293 | Dictionaries | |||
|
1294 | ------------ | |||
|
1295 | ||||
|
1296 | A compression *dictionary* is essentially data used to seed the compressor | |||
|
1297 | state so it can achieve better compression. The idea is that if you are | |||
|
1298 | compressing a lot of similar pieces of data (e.g. JSON documents or anything | |||
|
1299 | sharing similar structure), then you can find common patterns across multiple | |||
|
1300 | objects then leverage those common patterns during compression and | |||
|
1301 | decompression operations to achieve better compression ratios. | |||
|
1302 | ||||
|
1303 | Dictionary compression is generally only useful for small inputs - data no | |||
|
1304 | larger than a few kilobytes. The upper bound on this range is highly dependent | |||
|
1305 | on the input data and the dictionary. | |||
|
1306 | ||||
|
1307 | Python Buffer Protocol | |||
|
1308 | ---------------------- | |||
|
1309 | ||||
|
1310 | Many functions in the library operate on objects that implement Python's | |||
|
1311 | `buffer protocol <https://docs.python.org/3.6/c-api/buffer.html>`_. | |||
|
1312 | ||||
|
1313 | The *buffer protocol* is an internal implementation detail of a Python | |||
|
1314 | type that allows instances of that type (objects) to be exposed as a raw | |||
|
1315 | pointer (or buffer) in the C API. In other words, it allows objects to be | |||
|
1316 | exposed as an array of bytes. | |||
|
1317 | ||||
|
1318 | From the perspective of the C API, objects implementing the *buffer protocol* | |||
|
1319 | all look the same: they are just a pointer to a memory address of a defined | |||
|
1320 | length. This allows the C API to be largely type agnostic when accessing their | |||
|
1321 | data. This allows custom types to be passed in without first converting them | |||
|
1322 | to a specific type. | |||
|
1323 | ||||
|
1324 | Many Python types implement the buffer protocol. These include ``bytes`` | |||
|
1325 | (``str`` on Python 2), ``bytearray``, ``array.array``, ``io.BytesIO``, | |||
|
1326 | ``mmap.mmap``, and ``memoryview``. | |||
|
1327 | ||||
|
1328 | ``python-zstandard`` APIs that accept objects conforming to the buffer | |||
|
1329 | protocol require that the buffer is *C contiguous* and has a single | |||
|
1330 | dimension (``ndim==1``). This is usually the case. An example of where it | |||
|
1331 | is not is a Numpy matrix type. | |||
|
1332 | ||||
|
1333 | Requiring Output Sizes for Non-Streaming Decompression APIs | |||
|
1334 | ----------------------------------------------------------- | |||
|
1335 | ||||
|
1336 | Non-streaming decompression APIs require that either the output size is | |||
|
1337 | explicitly defined (either in the zstd frame header or passed into the | |||
|
1338 | function) or that a max output size is specified. This restriction is for | |||
|
1339 | your safety. | |||
|
1340 | ||||
|
1341 | The *one-shot* decompression APIs store the decompressed result in a | |||
|
1342 | single buffer. This means that a buffer needs to be pre-allocated to hold | |||
|
1343 | the result. If the decompressed size is not known, then there is no universal | |||
|
1344 | good default size to use. Any default will fail or will be highly sub-optimal | |||
|
1345 | in some scenarios (it will either be too small or will put stress on the | |||
|
1346 | memory allocator to allocate a too large block). | |||
|
1347 | ||||
|
1348 | A *helpful* API may retry decompression with buffers of increasing size. | |||
|
1349 | While useful, there are obvious performance disadvantages, namely redoing | |||
|
1350 | decompression N times until it works. In addition, there is a security | |||
|
1351 | concern. Say the input came from highly compressible data, like 1 GB of the | |||
|
1352 | same byte value. The output size could be several magnitudes larger than the | |||
|
1353 | input size. An input of <100KB could decompress to >1GB. Without a bounds | |||
|
1354 | restriction on the decompressed size, certain inputs could exhaust all system | |||
|
1355 | memory. That's not good and is why the maximum output size is limited. | |||
|
1356 | ||||
907 | Note on Zstandard's *Experimental* API |
|
1357 | Note on Zstandard's *Experimental* API | |
908 | ====================================== |
|
1358 | ====================================== | |
909 |
|
1359 | |||
910 | Many of the Zstandard APIs used by this module are marked as *experimental* |
|
1360 | Many of the Zstandard APIs used by this module are marked as *experimental* | |
911 | within the Zstandard project. This includes a large number of useful |
|
1361 | within the Zstandard project. This includes a large number of useful | |
912 | features, such as compression and frame parameters and parts of dictionary |
|
1362 | features, such as compression and frame parameters and parts of dictionary | |
913 | compression. |
|
1363 | compression. | |
914 |
|
1364 | |||
915 | It is unclear how Zstandard's C API will evolve over time, especially with |
|
1365 | It is unclear how Zstandard's C API will evolve over time, especially with | |
916 | regards to this *experimental* functionality. We will try to maintain |
|
1366 | regards to this *experimental* functionality. We will try to maintain | |
917 | backwards compatibility at the Python API level. However, we cannot |
|
1367 | backwards compatibility at the Python API level. However, we cannot | |
918 | guarantee this for things not under our control. |
|
1368 | guarantee this for things not under our control. | |
919 |
|
1369 | |||
920 | Since a copy of the Zstandard source code is distributed with this |
|
1370 | Since a copy of the Zstandard source code is distributed with this | |
921 | module and since we compile against it, the behavior of a specific |
|
1371 | module and since we compile against it, the behavior of a specific | |
922 | version of this module should be constant for all of time. So if you |
|
1372 | version of this module should be constant for all of time. So if you | |
923 | pin the version of this module used in your projects (which is a Python |
|
1373 | pin the version of this module used in your projects (which is a Python | |
924 | best practice), you should be buffered from unwanted future changes. |
|
1374 | best practice), you should be buffered from unwanted future changes. | |
925 |
|
1375 | |||
926 | Donate |
|
1376 | Donate | |
927 | ====== |
|
1377 | ====== | |
928 |
|
1378 | |||
929 | A lot of time has been invested into this project by the author. |
|
1379 | A lot of time has been invested into this project by the author. | |
930 |
|
1380 | |||
931 | If you find this project useful and would like to thank the author for |
|
1381 | If you find this project useful and would like to thank the author for | |
932 | their work, consider donating some money. Any amount is appreciated. |
|
1382 | their work, consider donating some money. Any amount is appreciated. | |
933 |
|
1383 | |||
934 | .. image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif |
|
1384 | .. image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif | |
935 | :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard¤cy_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted |
|
1385 | :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard¤cy_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted | |
936 | :alt: Donate via PayPal |
|
1386 | :alt: Donate via PayPal | |
937 |
|
1387 | |||
938 | .. |ci-status| image:: https://travis-ci.org/indygreg/python-zstandard.svg?branch=master |
|
1388 | .. |ci-status| image:: https://travis-ci.org/indygreg/python-zstandard.svg?branch=master | |
939 | :target: https://travis-ci.org/indygreg/python-zstandard |
|
1389 | :target: https://travis-ci.org/indygreg/python-zstandard | |
940 |
|
1390 | |||
941 | .. |win-ci-status| image:: https://ci.appveyor.com/api/projects/status/github/indygreg/python-zstandard?svg=true |
|
1391 | .. |win-ci-status| image:: https://ci.appveyor.com/api/projects/status/github/indygreg/python-zstandard?svg=true | |
942 | :target: https://ci.appveyor.com/project/indygreg/python-zstandard |
|
1392 | :target: https://ci.appveyor.com/project/indygreg/python-zstandard | |
943 | :alt: Windows build status |
|
1393 | :alt: Windows build status |
@@ -1,248 +1,392 | |||||
1 | /** |
|
1 | /** | |
2 | * Copyright (c) 2016-present, Gregory Szorc |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
3 | * All rights reserved. |
|
3 | * All rights reserved. | |
4 | * |
|
4 | * | |
5 | * This software may be modified and distributed under the terms |
|
5 | * This software may be modified and distributed under the terms | |
6 | * of the BSD license. See the LICENSE file for details. |
|
6 | * of the BSD license. See the LICENSE file for details. | |
7 | */ |
|
7 | */ | |
8 |
|
8 | |||
9 | #include "python-zstandard.h" |
|
9 | #include "python-zstandard.h" | |
10 |
|
10 | |||
11 | extern PyObject* ZstdError; |
|
11 | extern PyObject* ZstdError; | |
12 |
|
12 | |||
13 | ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) { |
|
13 | ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) { | |
14 | static char *kwlist[] = { "dict_size", "samples", "parameters", NULL }; |
|
14 | static char* kwlist[] = { | |
|
15 | "dict_size", | |||
|
16 | "samples", | |||
|
17 | "selectivity", | |||
|
18 | "level", | |||
|
19 | "notifications", | |||
|
20 | "dict_id", | |||
|
21 | NULL | |||
|
22 | }; | |||
15 | size_t capacity; |
|
23 | size_t capacity; | |
16 | PyObject* samples; |
|
24 | PyObject* samples; | |
17 | Py_ssize_t samplesLen; |
|
25 | Py_ssize_t samplesLen; | |
18 | PyObject* parameters = NULL; |
|
26 | unsigned selectivity = 0; | |
|
27 | int level = 0; | |||
|
28 | unsigned notifications = 0; | |||
|
29 | unsigned dictID = 0; | |||
19 | ZDICT_params_t zparams; |
|
30 | ZDICT_params_t zparams; | |
20 | Py_ssize_t sampleIndex; |
|
31 | Py_ssize_t sampleIndex; | |
21 | Py_ssize_t sampleSize; |
|
32 | Py_ssize_t sampleSize; | |
22 | PyObject* sampleItem; |
|
33 | PyObject* sampleItem; | |
23 | size_t zresult; |
|
34 | size_t zresult; | |
24 | void* sampleBuffer; |
|
35 | void* sampleBuffer = NULL; | |
25 | void* sampleOffset; |
|
36 | void* sampleOffset; | |
26 | size_t samplesSize = 0; |
|
37 | size_t samplesSize = 0; | |
27 | size_t* sampleSizes; |
|
38 | size_t* sampleSizes = NULL; | |
28 | void* dict; |
|
39 | void* dict = NULL; | |
29 | ZstdCompressionDict* result; |
|
40 | ZstdCompressionDict* result = NULL; | |
30 |
|
41 | |||
31 |
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!| |
|
42 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IiII:train_dictionary", | |
32 | kwlist, |
|
43 | kwlist, | |
33 | &capacity, |
|
44 | &capacity, | |
34 | &PyList_Type, &samples, |
|
45 | &PyList_Type, &samples, | |
35 | (PyObject*)&DictParametersType, ¶meters)) { |
|
46 | &selectivity, &level, ¬ifications, &dictID)) { | |
36 | return NULL; |
|
47 | return NULL; | |
37 | } |
|
48 | } | |
38 |
|
49 | |||
39 | /* Validate parameters first since it is easiest. */ |
|
50 | memset(&zparams, 0, sizeof(zparams)); | |
40 | zparams.selectivityLevel = 0; |
|
|||
41 | zparams.compressionLevel = 0; |
|
|||
42 | zparams.notificationLevel = 0; |
|
|||
43 | zparams.dictID = 0; |
|
|||
44 | zparams.reserved[0] = 0; |
|
|||
45 | zparams.reserved[1] = 0; |
|
|||
46 |
|
51 | |||
47 | if (parameters) { |
|
52 | zparams.selectivityLevel = selectivity; | |
48 | /* TODO validate data ranges */ |
|
53 | zparams.compressionLevel = level; | |
49 | zparams.selectivityLevel = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 0)); |
|
54 | zparams.notificationLevel = notifications; | |
50 | zparams.compressionLevel = PyLong_AsLong(PyTuple_GetItem(parameters, 1)); |
|
55 | zparams.dictID = dictID; | |
51 | zparams.notificationLevel = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 2)); |
|
|||
52 | zparams.dictID = PyLong_AsUnsignedLong(PyTuple_GetItem(parameters, 3)); |
|
|||
53 | } |
|
|||
54 |
|
56 | |||
55 | /* Figure out the size of the raw samples */ |
|
57 | /* Figure out the size of the raw samples */ | |
56 | samplesLen = PyList_Size(samples); |
|
58 | samplesLen = PyList_Size(samples); | |
57 | for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) { |
|
59 | for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) { | |
58 | sampleItem = PyList_GetItem(samples, sampleIndex); |
|
60 | sampleItem = PyList_GetItem(samples, sampleIndex); | |
59 | if (!PyBytes_Check(sampleItem)) { |
|
61 | if (!PyBytes_Check(sampleItem)) { | |
60 | PyErr_SetString(PyExc_ValueError, "samples must be bytes"); |
|
62 | PyErr_SetString(PyExc_ValueError, "samples must be bytes"); | |
61 | return NULL; |
|
63 | return NULL; | |
62 | } |
|
64 | } | |
63 | samplesSize += PyBytes_GET_SIZE(sampleItem); |
|
65 | samplesSize += PyBytes_GET_SIZE(sampleItem); | |
64 | } |
|
66 | } | |
65 |
|
67 | |||
66 | /* Now that we know the total size of the raw simples, we can allocate |
|
68 | /* Now that we know the total size of the raw simples, we can allocate | |
67 | a buffer for the raw data */ |
|
69 | a buffer for the raw data */ | |
68 | sampleBuffer = PyMem_Malloc(samplesSize); |
|
70 | sampleBuffer = PyMem_Malloc(samplesSize); | |
69 | if (!sampleBuffer) { |
|
71 | if (!sampleBuffer) { | |
70 | PyErr_NoMemory(); |
|
72 | PyErr_NoMemory(); | |
71 | return NULL; |
|
73 | goto finally; | |
72 | } |
|
74 | } | |
73 | sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t)); |
|
75 | sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t)); | |
74 | if (!sampleSizes) { |
|
76 | if (!sampleSizes) { | |
75 | PyMem_Free(sampleBuffer); |
|
|||
76 | PyErr_NoMemory(); |
|
77 | PyErr_NoMemory(); | |
77 | return NULL; |
|
78 | goto finally; | |
78 | } |
|
79 | } | |
79 |
|
80 | |||
80 | sampleOffset = sampleBuffer; |
|
81 | sampleOffset = sampleBuffer; | |
81 | /* Now iterate again and assemble the samples in the buffer */ |
|
82 | /* Now iterate again and assemble the samples in the buffer */ | |
82 | for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) { |
|
83 | for (sampleIndex = 0; sampleIndex < samplesLen; sampleIndex++) { | |
83 | sampleItem = PyList_GetItem(samples, sampleIndex); |
|
84 | sampleItem = PyList_GetItem(samples, sampleIndex); | |
84 | sampleSize = PyBytes_GET_SIZE(sampleItem); |
|
85 | sampleSize = PyBytes_GET_SIZE(sampleItem); | |
85 | sampleSizes[sampleIndex] = sampleSize; |
|
86 | sampleSizes[sampleIndex] = sampleSize; | |
86 | memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize); |
|
87 | memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize); | |
87 | sampleOffset = (char*)sampleOffset + sampleSize; |
|
88 | sampleOffset = (char*)sampleOffset + sampleSize; | |
88 | } |
|
89 | } | |
89 |
|
90 | |||
90 | dict = PyMem_Malloc(capacity); |
|
91 | dict = PyMem_Malloc(capacity); | |
91 | if (!dict) { |
|
92 | if (!dict) { | |
92 | PyMem_Free(sampleSizes); |
|
|||
93 | PyMem_Free(sampleBuffer); |
|
|||
94 | PyErr_NoMemory(); |
|
93 | PyErr_NoMemory(); | |
95 | return NULL; |
|
94 | goto finally; | |
96 | } |
|
95 | } | |
97 |
|
96 | |||
|
97 | /* TODO consider using dup2() to redirect zstd's stderr writing to a buffer */ | |||
|
98 | Py_BEGIN_ALLOW_THREADS | |||
98 | zresult = ZDICT_trainFromBuffer_advanced(dict, capacity, |
|
99 | zresult = ZDICT_trainFromBuffer_advanced(dict, capacity, | |
99 | sampleBuffer, sampleSizes, (unsigned int)samplesLen, |
|
100 | sampleBuffer, sampleSizes, (unsigned int)samplesLen, | |
100 | zparams); |
|
101 | zparams); | |
|
102 | Py_END_ALLOW_THREADS | |||
101 | if (ZDICT_isError(zresult)) { |
|
103 | if (ZDICT_isError(zresult)) { | |
102 | PyErr_Format(ZstdError, "Cannot train dict: %s", ZDICT_getErrorName(zresult)); |
|
104 | PyErr_Format(ZstdError, "Cannot train dict: %s", ZDICT_getErrorName(zresult)); | |
103 | PyMem_Free(dict); |
|
105 | PyMem_Free(dict); | |
104 | PyMem_Free(sampleSizes); |
|
106 | goto finally; | |
105 | PyMem_Free(sampleBuffer); |
|
|||
106 | return NULL; |
|
|||
107 | } |
|
107 | } | |
108 |
|
108 | |||
109 | result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType); |
|
109 | result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType); | |
110 | if (!result) { |
|
110 | if (!result) { | |
111 | return NULL; |
|
111 | goto finally; | |
112 | } |
|
112 | } | |
113 |
|
113 | |||
114 | result->dictData = dict; |
|
114 | result->dictData = dict; | |
115 | result->dictSize = zresult; |
|
115 | result->dictSize = zresult; | |
|
116 | result->d = 0; | |||
|
117 | result->k = 0; | |||
|
118 | ||||
|
119 | finally: | |||
|
120 | PyMem_Free(sampleBuffer); | |||
|
121 | PyMem_Free(sampleSizes); | |||
|
122 | ||||
116 | return result; |
|
123 | return result; | |
117 | } |
|
124 | } | |
118 |
|
125 | |||
|
126 | ZstdCompressionDict* train_cover_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) { | |||
|
127 | static char* kwlist[] = { | |||
|
128 | "dict_size", | |||
|
129 | "samples", | |||
|
130 | "k", | |||
|
131 | "d", | |||
|
132 | "notifications", | |||
|
133 | "dict_id", | |||
|
134 | "level", | |||
|
135 | "optimize", | |||
|
136 | "steps", | |||
|
137 | "threads", | |||
|
138 | NULL | |||
|
139 | }; | |||
|
140 | ||||
|
141 | size_t capacity; | |||
|
142 | PyObject* samples; | |||
|
143 | unsigned k = 0; | |||
|
144 | unsigned d = 0; | |||
|
145 | unsigned notifications = 0; | |||
|
146 | unsigned dictID = 0; | |||
|
147 | int level = 0; | |||
|
148 | PyObject* optimize = NULL; | |||
|
149 | unsigned steps = 0; | |||
|
150 | int threads = 0; | |||
|
151 | COVER_params_t params; | |||
|
152 | Py_ssize_t samplesLen; | |||
|
153 | Py_ssize_t i; | |||
|
154 | size_t samplesSize = 0; | |||
|
155 | void* sampleBuffer = NULL; | |||
|
156 | size_t* sampleSizes = NULL; | |||
|
157 | void* sampleOffset; | |||
|
158 | Py_ssize_t sampleSize; | |||
|
159 | void* dict = NULL; | |||
|
160 | size_t zresult; | |||
|
161 | ZstdCompressionDict* result = NULL; | |||
|
162 | ||||
|
163 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IIIIiOIi:train_cover_dictionary", | |||
|
164 | kwlist, &capacity, &PyList_Type, &samples, | |||
|
165 | &k, &d, ¬ifications, &dictID, &level, &optimize, &steps, &threads)) { | |||
|
166 | return NULL; | |||
|
167 | } | |||
|
168 | ||||
|
169 | if (threads < 0) { | |||
|
170 | threads = cpu_count(); | |||
|
171 | } | |||
|
172 | ||||
|
173 | memset(¶ms, 0, sizeof(params)); | |||
|
174 | params.k = k; | |||
|
175 | params.d = d; | |||
|
176 | params.steps = steps; | |||
|
177 | params.nbThreads = threads; | |||
|
178 | params.notificationLevel = notifications; | |||
|
179 | params.dictID = dictID; | |||
|
180 | params.compressionLevel = level; | |||
|
181 | ||||
|
182 | /* Figure out total size of input samples. */ | |||
|
183 | samplesLen = PyList_Size(samples); | |||
|
184 | for (i = 0; i < samplesLen; i++) { | |||
|
185 | PyObject* sampleItem = PyList_GET_ITEM(samples, i); | |||
|
186 | ||||
|
187 | if (!PyBytes_Check(sampleItem)) { | |||
|
188 | PyErr_SetString(PyExc_ValueError, "samples must be bytes"); | |||
|
189 | return NULL; | |||
|
190 | } | |||
|
191 | samplesSize += PyBytes_GET_SIZE(sampleItem); | |||
|
192 | } | |||
|
193 | ||||
|
194 | sampleBuffer = PyMem_Malloc(samplesSize); | |||
|
195 | if (!sampleBuffer) { | |||
|
196 | PyErr_NoMemory(); | |||
|
197 | goto finally; | |||
|
198 | } | |||
|
199 | ||||
|
200 | sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t)); | |||
|
201 | if (!sampleSizes) { | |||
|
202 | PyErr_NoMemory(); | |||
|
203 | goto finally; | |||
|
204 | } | |||
|
205 | ||||
|
206 | sampleOffset = sampleBuffer; | |||
|
207 | for (i = 0; i < samplesLen; i++) { | |||
|
208 | PyObject* sampleItem = PyList_GET_ITEM(samples, i); | |||
|
209 | sampleSize = PyBytes_GET_SIZE(sampleItem); | |||
|
210 | sampleSizes[i] = sampleSize; | |||
|
211 | memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize); | |||
|
212 | sampleOffset = (char*)sampleOffset + sampleSize; | |||
|
213 | } | |||
|
214 | ||||
|
215 | dict = PyMem_Malloc(capacity); | |||
|
216 | if (!dict) { | |||
|
217 | PyErr_NoMemory(); | |||
|
218 | goto finally; | |||
|
219 | } | |||
|
220 | ||||
|
221 | Py_BEGIN_ALLOW_THREADS | |||
|
222 | if (optimize && PyObject_IsTrue(optimize)) { | |||
|
223 | zresult = COVER_optimizeTrainFromBuffer(dict, capacity, | |||
|
224 | sampleBuffer, sampleSizes, (unsigned)samplesLen, ¶ms); | |||
|
225 | } | |||
|
226 | else { | |||
|
227 | zresult = COVER_trainFromBuffer(dict, capacity, | |||
|
228 | sampleBuffer, sampleSizes, (unsigned)samplesLen, params); | |||
|
229 | } | |||
|
230 | Py_END_ALLOW_THREADS | |||
|
231 | ||||
|
232 | if (ZDICT_isError(zresult)) { | |||
|
233 | PyMem_Free(dict); | |||
|
234 | PyErr_Format(ZstdError, "cannot train dict: %s", ZDICT_getErrorName(zresult)); | |||
|
235 | goto finally; | |||
|
236 | } | |||
|
237 | ||||
|
238 | result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType); | |||
|
239 | if (!result) { | |||
|
240 | PyMem_Free(dict); | |||
|
241 | goto finally; | |||
|
242 | } | |||
|
243 | ||||
|
244 | result->dictData = dict; | |||
|
245 | result->dictSize = zresult; | |||
|
246 | result->d = params.d; | |||
|
247 | result->k = params.k; | |||
|
248 | ||||
|
249 | finally: | |||
|
250 | PyMem_Free(sampleBuffer); | |||
|
251 | PyMem_Free(sampleSizes); | |||
|
252 | ||||
|
253 | return result; | |||
|
254 | } | |||
119 |
|
255 | |||
120 | PyDoc_STRVAR(ZstdCompressionDict__doc__, |
|
256 | PyDoc_STRVAR(ZstdCompressionDict__doc__, | |
121 | "ZstdCompressionDict(data) - Represents a computed compression dictionary\n" |
|
257 | "ZstdCompressionDict(data) - Represents a computed compression dictionary\n" | |
122 | "\n" |
|
258 | "\n" | |
123 | "This type holds the results of a computed Zstandard compression dictionary.\n" |
|
259 | "This type holds the results of a computed Zstandard compression dictionary.\n" | |
124 | "Instances are obtained by calling ``train_dictionary()`` or by passing bytes\n" |
|
260 | "Instances are obtained by calling ``train_dictionary()`` or by passing bytes\n" | |
125 | "obtained from another source into the constructor.\n" |
|
261 | "obtained from another source into the constructor.\n" | |
126 | ); |
|
262 | ); | |
127 |
|
263 | |||
128 | static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args) { |
|
264 | static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args) { | |
129 | const char* source; |
|
265 | const char* source; | |
130 | Py_ssize_t sourceSize; |
|
266 | Py_ssize_t sourceSize; | |
131 |
|
267 | |||
132 | self->dictData = NULL; |
|
268 | self->dictData = NULL; | |
133 | self->dictSize = 0; |
|
269 | self->dictSize = 0; | |
134 |
|
270 | |||
135 | #if PY_MAJOR_VERSION >= 3 |
|
271 | #if PY_MAJOR_VERSION >= 3 | |
136 | if (!PyArg_ParseTuple(args, "y#:ZstdCompressionDict", |
|
272 | if (!PyArg_ParseTuple(args, "y#:ZstdCompressionDict", | |
137 | #else |
|
273 | #else | |
138 | if (!PyArg_ParseTuple(args, "s#:ZstdCompressionDict", |
|
274 | if (!PyArg_ParseTuple(args, "s#:ZstdCompressionDict", | |
139 | #endif |
|
275 | #endif | |
140 | &source, &sourceSize)) { |
|
276 | &source, &sourceSize)) { | |
141 | return -1; |
|
277 | return -1; | |
142 | } |
|
278 | } | |
143 |
|
279 | |||
144 | self->dictData = PyMem_Malloc(sourceSize); |
|
280 | self->dictData = PyMem_Malloc(sourceSize); | |
145 | if (!self->dictData) { |
|
281 | if (!self->dictData) { | |
146 | PyErr_NoMemory(); |
|
282 | PyErr_NoMemory(); | |
147 | return -1; |
|
283 | return -1; | |
148 | } |
|
284 | } | |
149 |
|
285 | |||
150 | memcpy(self->dictData, source, sourceSize); |
|
286 | memcpy(self->dictData, source, sourceSize); | |
151 | self->dictSize = sourceSize; |
|
287 | self->dictSize = sourceSize; | |
152 |
|
288 | |||
153 | return 0; |
|
289 | return 0; | |
154 | } |
|
290 | } | |
155 |
|
291 | |||
156 | static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) { |
|
292 | static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) { | |
157 | if (self->dictData) { |
|
293 | if (self->dictData) { | |
158 | PyMem_Free(self->dictData); |
|
294 | PyMem_Free(self->dictData); | |
159 | self->dictData = NULL; |
|
295 | self->dictData = NULL; | |
160 | } |
|
296 | } | |
161 |
|
297 | |||
162 | PyObject_Del(self); |
|
298 | PyObject_Del(self); | |
163 | } |
|
299 | } | |
164 |
|
300 | |||
165 | static PyObject* ZstdCompressionDict_dict_id(ZstdCompressionDict* self) { |
|
301 | static PyObject* ZstdCompressionDict_dict_id(ZstdCompressionDict* self) { | |
166 | unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize); |
|
302 | unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize); | |
167 |
|
303 | |||
168 | return PyLong_FromLong(dictID); |
|
304 | return PyLong_FromLong(dictID); | |
169 | } |
|
305 | } | |
170 |
|
306 | |||
171 | static PyObject* ZstdCompressionDict_as_bytes(ZstdCompressionDict* self) { |
|
307 | static PyObject* ZstdCompressionDict_as_bytes(ZstdCompressionDict* self) { | |
172 | return PyBytes_FromStringAndSize(self->dictData, self->dictSize); |
|
308 | return PyBytes_FromStringAndSize(self->dictData, self->dictSize); | |
173 | } |
|
309 | } | |
174 |
|
310 | |||
175 | static PyMethodDef ZstdCompressionDict_methods[] = { |
|
311 | static PyMethodDef ZstdCompressionDict_methods[] = { | |
176 | { "dict_id", (PyCFunction)ZstdCompressionDict_dict_id, METH_NOARGS, |
|
312 | { "dict_id", (PyCFunction)ZstdCompressionDict_dict_id, METH_NOARGS, | |
177 | PyDoc_STR("dict_id() -- obtain the numeric dictionary ID") }, |
|
313 | PyDoc_STR("dict_id() -- obtain the numeric dictionary ID") }, | |
178 | { "as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS, |
|
314 | { "as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS, | |
179 | PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary data") }, |
|
315 | PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary data") }, | |
180 | { NULL, NULL } |
|
316 | { NULL, NULL } | |
181 | }; |
|
317 | }; | |
182 |
|
318 | |||
|
319 | static PyMemberDef ZstdCompressionDict_members[] = { | |||
|
320 | { "k", T_UINT, offsetof(ZstdCompressionDict, k), READONLY, | |||
|
321 | "segment size" }, | |||
|
322 | { "d", T_UINT, offsetof(ZstdCompressionDict, d), READONLY, | |||
|
323 | "dmer size" }, | |||
|
324 | { NULL } | |||
|
325 | }; | |||
|
326 | ||||
183 | static Py_ssize_t ZstdCompressionDict_length(ZstdCompressionDict* self) { |
|
327 | static Py_ssize_t ZstdCompressionDict_length(ZstdCompressionDict* self) { | |
184 | return self->dictSize; |
|
328 | return self->dictSize; | |
185 | } |
|
329 | } | |
186 |
|
330 | |||
187 | static PySequenceMethods ZstdCompressionDict_sq = { |
|
331 | static PySequenceMethods ZstdCompressionDict_sq = { | |
188 | (lenfunc)ZstdCompressionDict_length, /* sq_length */ |
|
332 | (lenfunc)ZstdCompressionDict_length, /* sq_length */ | |
189 | 0, /* sq_concat */ |
|
333 | 0, /* sq_concat */ | |
190 | 0, /* sq_repeat */ |
|
334 | 0, /* sq_repeat */ | |
191 | 0, /* sq_item */ |
|
335 | 0, /* sq_item */ | |
192 | 0, /* sq_ass_item */ |
|
336 | 0, /* sq_ass_item */ | |
193 | 0, /* sq_contains */ |
|
337 | 0, /* sq_contains */ | |
194 | 0, /* sq_inplace_concat */ |
|
338 | 0, /* sq_inplace_concat */ | |
195 | 0 /* sq_inplace_repeat */ |
|
339 | 0 /* sq_inplace_repeat */ | |
196 | }; |
|
340 | }; | |
197 |
|
341 | |||
198 | PyTypeObject ZstdCompressionDictType = { |
|
342 | PyTypeObject ZstdCompressionDictType = { | |
199 | PyVarObject_HEAD_INIT(NULL, 0) |
|
343 | PyVarObject_HEAD_INIT(NULL, 0) | |
200 | "zstd.ZstdCompressionDict", /* tp_name */ |
|
344 | "zstd.ZstdCompressionDict", /* tp_name */ | |
201 | sizeof(ZstdCompressionDict), /* tp_basicsize */ |
|
345 | sizeof(ZstdCompressionDict), /* tp_basicsize */ | |
202 | 0, /* tp_itemsize */ |
|
346 | 0, /* tp_itemsize */ | |
203 | (destructor)ZstdCompressionDict_dealloc, /* tp_dealloc */ |
|
347 | (destructor)ZstdCompressionDict_dealloc, /* tp_dealloc */ | |
204 | 0, /* tp_print */ |
|
348 | 0, /* tp_print */ | |
205 | 0, /* tp_getattr */ |
|
349 | 0, /* tp_getattr */ | |
206 | 0, /* tp_setattr */ |
|
350 | 0, /* tp_setattr */ | |
207 | 0, /* tp_compare */ |
|
351 | 0, /* tp_compare */ | |
208 | 0, /* tp_repr */ |
|
352 | 0, /* tp_repr */ | |
209 | 0, /* tp_as_number */ |
|
353 | 0, /* tp_as_number */ | |
210 | &ZstdCompressionDict_sq, /* tp_as_sequence */ |
|
354 | &ZstdCompressionDict_sq, /* tp_as_sequence */ | |
211 | 0, /* tp_as_mapping */ |
|
355 | 0, /* tp_as_mapping */ | |
212 | 0, /* tp_hash */ |
|
356 | 0, /* tp_hash */ | |
213 | 0, /* tp_call */ |
|
357 | 0, /* tp_call */ | |
214 | 0, /* tp_str */ |
|
358 | 0, /* tp_str */ | |
215 | 0, /* tp_getattro */ |
|
359 | 0, /* tp_getattro */ | |
216 | 0, /* tp_setattro */ |
|
360 | 0, /* tp_setattro */ | |
217 | 0, /* tp_as_buffer */ |
|
361 | 0, /* tp_as_buffer */ | |
218 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
362 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
219 | ZstdCompressionDict__doc__, /* tp_doc */ |
|
363 | ZstdCompressionDict__doc__, /* tp_doc */ | |
220 | 0, /* tp_traverse */ |
|
364 | 0, /* tp_traverse */ | |
221 | 0, /* tp_clear */ |
|
365 | 0, /* tp_clear */ | |
222 | 0, /* tp_richcompare */ |
|
366 | 0, /* tp_richcompare */ | |
223 | 0, /* tp_weaklistoffset */ |
|
367 | 0, /* tp_weaklistoffset */ | |
224 | 0, /* tp_iter */ |
|
368 | 0, /* tp_iter */ | |
225 | 0, /* tp_iternext */ |
|
369 | 0, /* tp_iternext */ | |
226 | ZstdCompressionDict_methods, /* tp_methods */ |
|
370 | ZstdCompressionDict_methods, /* tp_methods */ | |
227 | 0, /* tp_members */ |
|
371 | ZstdCompressionDict_members, /* tp_members */ | |
228 | 0, /* tp_getset */ |
|
372 | 0, /* tp_getset */ | |
229 | 0, /* tp_base */ |
|
373 | 0, /* tp_base */ | |
230 | 0, /* tp_dict */ |
|
374 | 0, /* tp_dict */ | |
231 | 0, /* tp_descr_get */ |
|
375 | 0, /* tp_descr_get */ | |
232 | 0, /* tp_descr_set */ |
|
376 | 0, /* tp_descr_set */ | |
233 | 0, /* tp_dictoffset */ |
|
377 | 0, /* tp_dictoffset */ | |
234 | (initproc)ZstdCompressionDict_init, /* tp_init */ |
|
378 | (initproc)ZstdCompressionDict_init, /* tp_init */ | |
235 | 0, /* tp_alloc */ |
|
379 | 0, /* tp_alloc */ | |
236 | PyType_GenericNew, /* tp_new */ |
|
380 | PyType_GenericNew, /* tp_new */ | |
237 | }; |
|
381 | }; | |
238 |
|
382 | |||
239 | void compressiondict_module_init(PyObject* mod) { |
|
383 | void compressiondict_module_init(PyObject* mod) { | |
240 | Py_TYPE(&ZstdCompressionDictType) = &PyType_Type; |
|
384 | Py_TYPE(&ZstdCompressionDictType) = &PyType_Type; | |
241 | if (PyType_Ready(&ZstdCompressionDictType) < 0) { |
|
385 | if (PyType_Ready(&ZstdCompressionDictType) < 0) { | |
242 | return; |
|
386 | return; | |
243 | } |
|
387 | } | |
244 |
|
388 | |||
245 | Py_INCREF((PyObject*)&ZstdCompressionDictType); |
|
389 | Py_INCREF((PyObject*)&ZstdCompressionDictType); | |
246 | PyModule_AddObject(mod, "ZstdCompressionDict", |
|
390 | PyModule_AddObject(mod, "ZstdCompressionDict", | |
247 | (PyObject*)&ZstdCompressionDictType); |
|
391 | (PyObject*)&ZstdCompressionDictType); | |
248 | } |
|
392 | } |
@@ -1,220 +1,253 | |||||
1 | /** |
|
1 | /** | |
2 | * Copyright (c) 2016-present, Gregory Szorc |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
3 | * All rights reserved. |
|
3 | * All rights reserved. | |
4 | * |
|
4 | * | |
5 | * This software may be modified and distributed under the terms |
|
5 | * This software may be modified and distributed under the terms | |
6 | * of the BSD license. See the LICENSE file for details. |
|
6 | * of the BSD license. See the LICENSE file for details. | |
7 | */ |
|
7 | */ | |
8 |
|
8 | |||
9 | #include "python-zstandard.h" |
|
9 | #include "python-zstandard.h" | |
10 |
|
10 | |||
11 | void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams) { |
|
11 | void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams) { | |
12 | zparams->windowLog = params->windowLog; |
|
12 | zparams->windowLog = params->windowLog; | |
13 | zparams->chainLog = params->chainLog; |
|
13 | zparams->chainLog = params->chainLog; | |
14 | zparams->hashLog = params->hashLog; |
|
14 | zparams->hashLog = params->hashLog; | |
15 | zparams->searchLog = params->searchLog; |
|
15 | zparams->searchLog = params->searchLog; | |
16 | zparams->searchLength = params->searchLength; |
|
16 | zparams->searchLength = params->searchLength; | |
17 | zparams->targetLength = params->targetLength; |
|
17 | zparams->targetLength = params->targetLength; | |
18 | zparams->strategy = params->strategy; |
|
18 | zparams->strategy = params->strategy; | |
19 | } |
|
19 | } | |
20 |
|
20 | |||
21 | CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args) { |
|
21 | CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args) { | |
22 | int compressionLevel; |
|
22 | int compressionLevel; | |
23 | unsigned PY_LONG_LONG sourceSize = 0; |
|
23 | unsigned PY_LONG_LONG sourceSize = 0; | |
24 | Py_ssize_t dictSize = 0; |
|
24 | Py_ssize_t dictSize = 0; | |
25 | ZSTD_compressionParameters params; |
|
25 | ZSTD_compressionParameters params; | |
26 | CompressionParametersObject* result; |
|
26 | CompressionParametersObject* result; | |
27 |
|
27 | |||
28 | if (!PyArg_ParseTuple(args, "i|Kn:get_compression_parameters", |
|
28 | if (!PyArg_ParseTuple(args, "i|Kn:get_compression_parameters", | |
29 | &compressionLevel, &sourceSize, &dictSize)) { |
|
29 | &compressionLevel, &sourceSize, &dictSize)) { | |
30 | return NULL; |
|
30 | return NULL; | |
31 | } |
|
31 | } | |
32 |
|
32 | |||
33 | params = ZSTD_getCParams(compressionLevel, sourceSize, dictSize); |
|
33 | params = ZSTD_getCParams(compressionLevel, sourceSize, dictSize); | |
34 |
|
34 | |||
35 | result = PyObject_New(CompressionParametersObject, &CompressionParametersType); |
|
35 | result = PyObject_New(CompressionParametersObject, &CompressionParametersType); | |
36 | if (!result) { |
|
36 | if (!result) { | |
37 | return NULL; |
|
37 | return NULL; | |
38 | } |
|
38 | } | |
39 |
|
39 | |||
40 | result->windowLog = params.windowLog; |
|
40 | result->windowLog = params.windowLog; | |
41 | result->chainLog = params.chainLog; |
|
41 | result->chainLog = params.chainLog; | |
42 | result->hashLog = params.hashLog; |
|
42 | result->hashLog = params.hashLog; | |
43 | result->searchLog = params.searchLog; |
|
43 | result->searchLog = params.searchLog; | |
44 | result->searchLength = params.searchLength; |
|
44 | result->searchLength = params.searchLength; | |
45 | result->targetLength = params.targetLength; |
|
45 | result->targetLength = params.targetLength; | |
46 | result->strategy = params.strategy; |
|
46 | result->strategy = params.strategy; | |
47 |
|
47 | |||
48 | return result; |
|
48 | return result; | |
49 | } |
|
49 | } | |
50 |
|
50 | |||
51 | static int CompressionParameters_init(CompressionParametersObject* self, PyObject* args, PyObject* kwargs) { |
|
51 | static int CompressionParameters_init(CompressionParametersObject* self, PyObject* args, PyObject* kwargs) { | |
52 | static char* kwlist[] = { |
|
52 | static char* kwlist[] = { | |
53 | "window_log", |
|
53 | "window_log", | |
54 | "chain_log", |
|
54 | "chain_log", | |
55 | "hash_log", |
|
55 | "hash_log", | |
56 | "search_log", |
|
56 | "search_log", | |
57 | "search_length", |
|
57 | "search_length", | |
58 | "target_length", |
|
58 | "target_length", | |
59 | "strategy", |
|
59 | "strategy", | |
60 | NULL |
|
60 | NULL | |
61 | }; |
|
61 | }; | |
62 |
|
62 | |||
63 | unsigned windowLog; |
|
63 | unsigned windowLog; | |
64 | unsigned chainLog; |
|
64 | unsigned chainLog; | |
65 | unsigned hashLog; |
|
65 | unsigned hashLog; | |
66 | unsigned searchLog; |
|
66 | unsigned searchLog; | |
67 | unsigned searchLength; |
|
67 | unsigned searchLength; | |
68 | unsigned targetLength; |
|
68 | unsigned targetLength; | |
69 | unsigned strategy; |
|
69 | unsigned strategy; | |
|
70 | ZSTD_compressionParameters params; | |||
|
71 | size_t zresult; | |||
70 |
|
72 | |||
71 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "IIIIIII:CompressionParameters", |
|
73 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "IIIIIII:CompressionParameters", | |
72 | kwlist, &windowLog, &chainLog, &hashLog, &searchLog, &searchLength, |
|
74 | kwlist, &windowLog, &chainLog, &hashLog, &searchLog, &searchLength, | |
73 | &targetLength, &strategy)) { |
|
75 | &targetLength, &strategy)) { | |
74 | return -1; |
|
76 | return -1; | |
75 | } |
|
77 | } | |
76 |
|
78 | |||
77 | if (windowLog < ZSTD_WINDOWLOG_MIN || windowLog > ZSTD_WINDOWLOG_MAX) { |
|
79 | if (windowLog < ZSTD_WINDOWLOG_MIN || windowLog > ZSTD_WINDOWLOG_MAX) { | |
78 | PyErr_SetString(PyExc_ValueError, "invalid window log value"); |
|
80 | PyErr_SetString(PyExc_ValueError, "invalid window log value"); | |
79 | return -1; |
|
81 | return -1; | |
80 | } |
|
82 | } | |
81 |
|
83 | |||
82 | if (chainLog < ZSTD_CHAINLOG_MIN || chainLog > ZSTD_CHAINLOG_MAX) { |
|
84 | if (chainLog < ZSTD_CHAINLOG_MIN || chainLog > ZSTD_CHAINLOG_MAX) { | |
83 | PyErr_SetString(PyExc_ValueError, "invalid chain log value"); |
|
85 | PyErr_SetString(PyExc_ValueError, "invalid chain log value"); | |
84 | return -1; |
|
86 | return -1; | |
85 | } |
|
87 | } | |
86 |
|
88 | |||
87 | if (hashLog < ZSTD_HASHLOG_MIN || hashLog > ZSTD_HASHLOG_MAX) { |
|
89 | if (hashLog < ZSTD_HASHLOG_MIN || hashLog > ZSTD_HASHLOG_MAX) { | |
88 | PyErr_SetString(PyExc_ValueError, "invalid hash log value"); |
|
90 | PyErr_SetString(PyExc_ValueError, "invalid hash log value"); | |
89 | return -1; |
|
91 | return -1; | |
90 | } |
|
92 | } | |
91 |
|
93 | |||
92 | if (searchLog < ZSTD_SEARCHLOG_MIN || searchLog > ZSTD_SEARCHLOG_MAX) { |
|
94 | if (searchLog < ZSTD_SEARCHLOG_MIN || searchLog > ZSTD_SEARCHLOG_MAX) { | |
93 | PyErr_SetString(PyExc_ValueError, "invalid search log value"); |
|
95 | PyErr_SetString(PyExc_ValueError, "invalid search log value"); | |
94 | return -1; |
|
96 | return -1; | |
95 | } |
|
97 | } | |
96 |
|
98 | |||
97 | if (searchLength < ZSTD_SEARCHLENGTH_MIN || searchLength > ZSTD_SEARCHLENGTH_MAX) { |
|
99 | if (searchLength < ZSTD_SEARCHLENGTH_MIN || searchLength > ZSTD_SEARCHLENGTH_MAX) { | |
98 | PyErr_SetString(PyExc_ValueError, "invalid search length value"); |
|
100 | PyErr_SetString(PyExc_ValueError, "invalid search length value"); | |
99 | return -1; |
|
101 | return -1; | |
100 | } |
|
102 | } | |
101 |
|
103 | |||
102 | if (targetLength < ZSTD_TARGETLENGTH_MIN || targetLength > ZSTD_TARGETLENGTH_MAX) { |
|
104 | if (targetLength < ZSTD_TARGETLENGTH_MIN || targetLength > ZSTD_TARGETLENGTH_MAX) { | |
103 | PyErr_SetString(PyExc_ValueError, "invalid target length value"); |
|
105 | PyErr_SetString(PyExc_ValueError, "invalid target length value"); | |
104 | return -1; |
|
106 | return -1; | |
105 | } |
|
107 | } | |
106 |
|
108 | |||
107 | if (strategy < ZSTD_fast || strategy > ZSTD_btopt) { |
|
109 | if (strategy < ZSTD_fast || strategy > ZSTD_btopt) { | |
108 | PyErr_SetString(PyExc_ValueError, "invalid strategy value"); |
|
110 | PyErr_SetString(PyExc_ValueError, "invalid strategy value"); | |
109 | return -1; |
|
111 | return -1; | |
110 | } |
|
112 | } | |
111 |
|
113 | |||
112 | self->windowLog = windowLog; |
|
114 | self->windowLog = windowLog; | |
113 | self->chainLog = chainLog; |
|
115 | self->chainLog = chainLog; | |
114 | self->hashLog = hashLog; |
|
116 | self->hashLog = hashLog; | |
115 | self->searchLog = searchLog; |
|
117 | self->searchLog = searchLog; | |
116 | self->searchLength = searchLength; |
|
118 | self->searchLength = searchLength; | |
117 | self->targetLength = targetLength; |
|
119 | self->targetLength = targetLength; | |
118 | self->strategy = strategy; |
|
120 | self->strategy = strategy; | |
119 |
|
121 | |||
|
122 | ztopy_compression_parameters(self, ¶ms); | |||
|
123 | zresult = ZSTD_checkCParams(params); | |||
|
124 | ||||
|
125 | if (ZSTD_isError(zresult)) { | |||
|
126 | PyErr_Format(PyExc_ValueError, "invalid compression parameters: %s", | |||
|
127 | ZSTD_getErrorName(zresult)); | |||
|
128 | return -1; | |||
|
129 | } | |||
|
130 | ||||
120 | return 0; |
|
131 | return 0; | |
121 | } |
|
132 | } | |
122 |
|
133 | |||
|
134 | PyDoc_STRVAR(CompressionParameters_estimated_compression_context_size__doc__, | |||
|
135 | "Estimate the size in bytes of a compression context for compression parameters\n" | |||
|
136 | ); | |||
|
137 | ||||
|
138 | PyObject* CompressionParameters_estimated_compression_context_size(CompressionParametersObject* self) { | |||
|
139 | ZSTD_compressionParameters params; | |||
|
140 | ||||
|
141 | ztopy_compression_parameters(self, ¶ms); | |||
|
142 | ||||
|
143 | return PyLong_FromSize_t(ZSTD_estimateCCtxSize(params)); | |||
|
144 | } | |||
|
145 | ||||
123 | PyObject* estimate_compression_context_size(PyObject* self, PyObject* args) { |
|
146 | PyObject* estimate_compression_context_size(PyObject* self, PyObject* args) { | |
124 | CompressionParametersObject* params; |
|
147 | CompressionParametersObject* params; | |
125 | ZSTD_compressionParameters zparams; |
|
148 | ZSTD_compressionParameters zparams; | |
126 | PyObject* result; |
|
149 | PyObject* result; | |
127 |
|
150 | |||
128 | if (!PyArg_ParseTuple(args, "O!:estimate_compression_context_size", |
|
151 | if (!PyArg_ParseTuple(args, "O!:estimate_compression_context_size", | |
129 | &CompressionParametersType, ¶ms)) { |
|
152 | &CompressionParametersType, ¶ms)) { | |
130 | return NULL; |
|
153 | return NULL; | |
131 | } |
|
154 | } | |
132 |
|
155 | |||
133 | ztopy_compression_parameters(params, &zparams); |
|
156 | ztopy_compression_parameters(params, &zparams); | |
134 | result = PyLong_FromSize_t(ZSTD_estimateCCtxSize(zparams)); |
|
157 | result = PyLong_FromSize_t(ZSTD_estimateCCtxSize(zparams)); | |
135 | return result; |
|
158 | return result; | |
136 | } |
|
159 | } | |
137 |
|
160 | |||
138 | PyDoc_STRVAR(CompressionParameters__doc__, |
|
161 | PyDoc_STRVAR(CompressionParameters__doc__, | |
139 | "CompressionParameters: low-level control over zstd compression"); |
|
162 | "CompressionParameters: low-level control over zstd compression"); | |
140 |
|
163 | |||
141 | static void CompressionParameters_dealloc(PyObject* self) { |
|
164 | static void CompressionParameters_dealloc(PyObject* self) { | |
142 | PyObject_Del(self); |
|
165 | PyObject_Del(self); | |
143 | } |
|
166 | } | |
144 |
|
167 | |||
|
168 | static PyMethodDef CompressionParameters_methods[] = { | |||
|
169 | { | |||
|
170 | "estimated_compression_context_size", | |||
|
171 | (PyCFunction)CompressionParameters_estimated_compression_context_size, | |||
|
172 | METH_NOARGS, | |||
|
173 | CompressionParameters_estimated_compression_context_size__doc__ | |||
|
174 | }, | |||
|
175 | { NULL, NULL } | |||
|
176 | }; | |||
|
177 | ||||
145 | static PyMemberDef CompressionParameters_members[] = { |
|
178 | static PyMemberDef CompressionParameters_members[] = { | |
146 | { "window_log", T_UINT, |
|
179 | { "window_log", T_UINT, | |
147 | offsetof(CompressionParametersObject, windowLog), READONLY, |
|
180 | offsetof(CompressionParametersObject, windowLog), READONLY, | |
148 | "window log" }, |
|
181 | "window log" }, | |
149 | { "chain_log", T_UINT, |
|
182 | { "chain_log", T_UINT, | |
150 | offsetof(CompressionParametersObject, chainLog), READONLY, |
|
183 | offsetof(CompressionParametersObject, chainLog), READONLY, | |
151 | "chain log" }, |
|
184 | "chain log" }, | |
152 | { "hash_log", T_UINT, |
|
185 | { "hash_log", T_UINT, | |
153 | offsetof(CompressionParametersObject, hashLog), READONLY, |
|
186 | offsetof(CompressionParametersObject, hashLog), READONLY, | |
154 | "hash log" }, |
|
187 | "hash log" }, | |
155 | { "search_log", T_UINT, |
|
188 | { "search_log", T_UINT, | |
156 | offsetof(CompressionParametersObject, searchLog), READONLY, |
|
189 | offsetof(CompressionParametersObject, searchLog), READONLY, | |
157 | "search log" }, |
|
190 | "search log" }, | |
158 | { "search_length", T_UINT, |
|
191 | { "search_length", T_UINT, | |
159 | offsetof(CompressionParametersObject, searchLength), READONLY, |
|
192 | offsetof(CompressionParametersObject, searchLength), READONLY, | |
160 | "search length" }, |
|
193 | "search length" }, | |
161 | { "target_length", T_UINT, |
|
194 | { "target_length", T_UINT, | |
162 | offsetof(CompressionParametersObject, targetLength), READONLY, |
|
195 | offsetof(CompressionParametersObject, targetLength), READONLY, | |
163 | "target length" }, |
|
196 | "target length" }, | |
164 | { "strategy", T_INT, |
|
197 | { "strategy", T_INT, | |
165 | offsetof(CompressionParametersObject, strategy), READONLY, |
|
198 | offsetof(CompressionParametersObject, strategy), READONLY, | |
166 | "strategy" }, |
|
199 | "strategy" }, | |
167 | { NULL } |
|
200 | { NULL } | |
168 | }; |
|
201 | }; | |
169 |
|
202 | |||
170 | PyTypeObject CompressionParametersType = { |
|
203 | PyTypeObject CompressionParametersType = { | |
171 | PyVarObject_HEAD_INIT(NULL, 0) |
|
204 | PyVarObject_HEAD_INIT(NULL, 0) | |
172 | "CompressionParameters", /* tp_name */ |
|
205 | "CompressionParameters", /* tp_name */ | |
173 | sizeof(CompressionParametersObject), /* tp_basicsize */ |
|
206 | sizeof(CompressionParametersObject), /* tp_basicsize */ | |
174 | 0, /* tp_itemsize */ |
|
207 | 0, /* tp_itemsize */ | |
175 | (destructor)CompressionParameters_dealloc, /* tp_dealloc */ |
|
208 | (destructor)CompressionParameters_dealloc, /* tp_dealloc */ | |
176 | 0, /* tp_print */ |
|
209 | 0, /* tp_print */ | |
177 | 0, /* tp_getattr */ |
|
210 | 0, /* tp_getattr */ | |
178 | 0, /* tp_setattr */ |
|
211 | 0, /* tp_setattr */ | |
179 | 0, /* tp_compare */ |
|
212 | 0, /* tp_compare */ | |
180 | 0, /* tp_repr */ |
|
213 | 0, /* tp_repr */ | |
181 | 0, /* tp_as_number */ |
|
214 | 0, /* tp_as_number */ | |
182 | 0, /* tp_as_sequence */ |
|
215 | 0, /* tp_as_sequence */ | |
183 | 0, /* tp_as_mapping */ |
|
216 | 0, /* tp_as_mapping */ | |
184 | 0, /* tp_hash */ |
|
217 | 0, /* tp_hash */ | |
185 | 0, /* tp_call */ |
|
218 | 0, /* tp_call */ | |
186 | 0, /* tp_str */ |
|
219 | 0, /* tp_str */ | |
187 | 0, /* tp_getattro */ |
|
220 | 0, /* tp_getattro */ | |
188 | 0, /* tp_setattro */ |
|
221 | 0, /* tp_setattro */ | |
189 | 0, /* tp_as_buffer */ |
|
222 | 0, /* tp_as_buffer */ | |
190 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
223 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
191 | CompressionParameters__doc__, /* tp_doc */ |
|
224 | CompressionParameters__doc__, /* tp_doc */ | |
192 | 0, /* tp_traverse */ |
|
225 | 0, /* tp_traverse */ | |
193 | 0, /* tp_clear */ |
|
226 | 0, /* tp_clear */ | |
194 | 0, /* tp_richcompare */ |
|
227 | 0, /* tp_richcompare */ | |
195 | 0, /* tp_weaklistoffset */ |
|
228 | 0, /* tp_weaklistoffset */ | |
196 | 0, /* tp_iter */ |
|
229 | 0, /* tp_iter */ | |
197 | 0, /* tp_iternext */ |
|
230 | 0, /* tp_iternext */ | |
198 | 0, /* tp_methods */ |
|
231 | CompressionParameters_methods, /* tp_methods */ | |
199 | CompressionParameters_members, /* tp_members */ |
|
232 | CompressionParameters_members, /* tp_members */ | |
200 | 0, /* tp_getset */ |
|
233 | 0, /* tp_getset */ | |
201 | 0, /* tp_base */ |
|
234 | 0, /* tp_base */ | |
202 | 0, /* tp_dict */ |
|
235 | 0, /* tp_dict */ | |
203 | 0, /* tp_descr_get */ |
|
236 | 0, /* tp_descr_get */ | |
204 | 0, /* tp_descr_set */ |
|
237 | 0, /* tp_descr_set */ | |
205 | 0, /* tp_dictoffset */ |
|
238 | 0, /* tp_dictoffset */ | |
206 | (initproc)CompressionParameters_init, /* tp_init */ |
|
239 | (initproc)CompressionParameters_init, /* tp_init */ | |
207 | 0, /* tp_alloc */ |
|
240 | 0, /* tp_alloc */ | |
208 | PyType_GenericNew, /* tp_new */ |
|
241 | PyType_GenericNew, /* tp_new */ | |
209 | }; |
|
242 | }; | |
210 |
|
243 | |||
211 | void compressionparams_module_init(PyObject* mod) { |
|
244 | void compressionparams_module_init(PyObject* mod) { | |
212 | Py_TYPE(&CompressionParametersType) = &PyType_Type; |
|
245 | Py_TYPE(&CompressionParametersType) = &PyType_Type; | |
213 | if (PyType_Ready(&CompressionParametersType) < 0) { |
|
246 | if (PyType_Ready(&CompressionParametersType) < 0) { | |
214 | return; |
|
247 | return; | |
215 | } |
|
248 | } | |
216 |
|
249 | |||
217 |
Py_I |
|
250 | Py_INCREF(&CompressionParametersType); | |
218 | PyModule_AddObject(mod, "CompressionParameters", |
|
251 | PyModule_AddObject(mod, "CompressionParameters", | |
219 | (PyObject*)&CompressionParametersType); |
|
252 | (PyObject*)&CompressionParametersType); | |
220 | } |
|
253 | } |
@@ -1,290 +1,305 | |||||
1 | /** |
|
1 | /** | |
2 | * Copyright (c) 2016-present, Gregory Szorc |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
3 | * All rights reserved. |
|
3 | * All rights reserved. | |
4 | * |
|
4 | * | |
5 | * This software may be modified and distributed under the terms |
|
5 | * This software may be modified and distributed under the terms | |
6 | * of the BSD license. See the LICENSE file for details. |
|
6 | * of the BSD license. See the LICENSE file for details. | |
7 | */ |
|
7 | */ | |
8 |
|
8 | |||
9 | #include "python-zstandard.h" |
|
9 | #include "python-zstandard.h" | |
10 |
|
10 | |||
11 | extern PyObject* ZstdError; |
|
11 | extern PyObject* ZstdError; | |
12 |
|
12 | |||
13 | PyDoc_STRVAR(ZstdCompresssionWriter__doc__, |
|
13 | PyDoc_STRVAR(ZstdCompresssionWriter__doc__, | |
14 | """A context manager used for writing compressed output to a writer.\n" |
|
14 | """A context manager used for writing compressed output to a writer.\n" | |
15 | ); |
|
15 | ); | |
16 |
|
16 | |||
17 | static void ZstdCompressionWriter_dealloc(ZstdCompressionWriter* self) { |
|
17 | static void ZstdCompressionWriter_dealloc(ZstdCompressionWriter* self) { | |
18 | Py_XDECREF(self->compressor); |
|
18 | Py_XDECREF(self->compressor); | |
19 | Py_XDECREF(self->writer); |
|
19 | Py_XDECREF(self->writer); | |
20 |
|
20 | |||
21 | if (self->cstream) { |
|
|||
22 | ZSTD_freeCStream(self->cstream); |
|
|||
23 | self->cstream = NULL; |
|
|||
24 | } |
|
|||
25 |
|
||||
26 | PyObject_Del(self); |
|
21 | PyObject_Del(self); | |
27 | } |
|
22 | } | |
28 |
|
23 | |||
29 | static PyObject* ZstdCompressionWriter_enter(ZstdCompressionWriter* self) { |
|
24 | static PyObject* ZstdCompressionWriter_enter(ZstdCompressionWriter* self) { | |
30 | if (self->entered) { |
|
25 | if (self->entered) { | |
31 | PyErr_SetString(ZstdError, "cannot __enter__ multiple times"); |
|
26 | PyErr_SetString(ZstdError, "cannot __enter__ multiple times"); | |
32 | return NULL; |
|
27 | return NULL; | |
33 | } |
|
28 | } | |
34 |
|
29 | |||
35 | self->cstream = CStream_from_ZstdCompressor(self->compressor, self->sourceSize); |
|
30 | if (self->compressor->mtcctx) { | |
36 | if (!self->cstream) { |
|
31 | if (init_mtcstream(self->compressor, self->sourceSize)) { | |
37 | return NULL; |
|
32 | return NULL; | |
|
33 | } | |||
|
34 | } | |||
|
35 | else { | |||
|
36 | if (0 != init_cstream(self->compressor, self->sourceSize)) { | |||
|
37 | return NULL; | |||
|
38 | } | |||
38 | } |
|
39 | } | |
39 |
|
40 | |||
40 | self->entered = 1; |
|
41 | self->entered = 1; | |
41 |
|
42 | |||
42 | Py_INCREF(self); |
|
43 | Py_INCREF(self); | |
43 | return (PyObject*)self; |
|
44 | return (PyObject*)self; | |
44 | } |
|
45 | } | |
45 |
|
46 | |||
46 | static PyObject* ZstdCompressionWriter_exit(ZstdCompressionWriter* self, PyObject* args) { |
|
47 | static PyObject* ZstdCompressionWriter_exit(ZstdCompressionWriter* self, PyObject* args) { | |
47 | PyObject* exc_type; |
|
48 | PyObject* exc_type; | |
48 | PyObject* exc_value; |
|
49 | PyObject* exc_value; | |
49 | PyObject* exc_tb; |
|
50 | PyObject* exc_tb; | |
50 | size_t zresult; |
|
51 | size_t zresult; | |
51 |
|
52 | |||
52 | ZSTD_outBuffer output; |
|
53 | ZSTD_outBuffer output; | |
53 | PyObject* res; |
|
54 | PyObject* res; | |
54 |
|
55 | |||
55 | if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) { |
|
56 | if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) { | |
56 | return NULL; |
|
57 | return NULL; | |
57 | } |
|
58 | } | |
58 |
|
59 | |||
59 | self->entered = 0; |
|
60 | self->entered = 0; | |
60 |
|
61 | |||
61 | if (self->cstream && exc_type == Py_None && exc_value == Py_None && |
|
62 | if ((self->compressor->cstream || self->compressor->mtcctx) && exc_type == Py_None | |
62 | exc_tb == Py_None) { |
|
63 | && exc_value == Py_None && exc_tb == Py_None) { | |
63 |
|
64 | |||
64 | output.dst = PyMem_Malloc(self->outSize); |
|
65 | output.dst = PyMem_Malloc(self->outSize); | |
65 | if (!output.dst) { |
|
66 | if (!output.dst) { | |
66 | return PyErr_NoMemory(); |
|
67 | return PyErr_NoMemory(); | |
67 | } |
|
68 | } | |
68 | output.size = self->outSize; |
|
69 | output.size = self->outSize; | |
69 | output.pos = 0; |
|
70 | output.pos = 0; | |
70 |
|
71 | |||
71 | while (1) { |
|
72 | while (1) { | |
72 | zresult = ZSTD_endStream(self->cstream, &output); |
|
73 | if (self->compressor->mtcctx) { | |
|
74 | zresult = ZSTDMT_endStream(self->compressor->mtcctx, &output); | |||
|
75 | } | |||
|
76 | else { | |||
|
77 | zresult = ZSTD_endStream(self->compressor->cstream, &output); | |||
|
78 | } | |||
73 | if (ZSTD_isError(zresult)) { |
|
79 | if (ZSTD_isError(zresult)) { | |
74 | PyErr_Format(ZstdError, "error ending compression stream: %s", |
|
80 | PyErr_Format(ZstdError, "error ending compression stream: %s", | |
75 | ZSTD_getErrorName(zresult)); |
|
81 | ZSTD_getErrorName(zresult)); | |
76 | PyMem_Free(output.dst); |
|
82 | PyMem_Free(output.dst); | |
77 | return NULL; |
|
83 | return NULL; | |
78 | } |
|
84 | } | |
79 |
|
85 | |||
80 | if (output.pos) { |
|
86 | if (output.pos) { | |
81 | #if PY_MAJOR_VERSION >= 3 |
|
87 | #if PY_MAJOR_VERSION >= 3 | |
82 | res = PyObject_CallMethod(self->writer, "write", "y#", |
|
88 | res = PyObject_CallMethod(self->writer, "write", "y#", | |
83 | #else |
|
89 | #else | |
84 | res = PyObject_CallMethod(self->writer, "write", "s#", |
|
90 | res = PyObject_CallMethod(self->writer, "write", "s#", | |
85 | #endif |
|
91 | #endif | |
86 | output.dst, output.pos); |
|
92 | output.dst, output.pos); | |
87 | Py_XDECREF(res); |
|
93 | Py_XDECREF(res); | |
88 | } |
|
94 | } | |
89 |
|
95 | |||
90 | if (!zresult) { |
|
96 | if (!zresult) { | |
91 | break; |
|
97 | break; | |
92 | } |
|
98 | } | |
93 |
|
99 | |||
94 | output.pos = 0; |
|
100 | output.pos = 0; | |
95 | } |
|
101 | } | |
96 |
|
102 | |||
97 | PyMem_Free(output.dst); |
|
103 | PyMem_Free(output.dst); | |
98 | ZSTD_freeCStream(self->cstream); |
|
|||
99 | self->cstream = NULL; |
|
|||
100 | } |
|
104 | } | |
101 |
|
105 | |||
102 | Py_RETURN_FALSE; |
|
106 | Py_RETURN_FALSE; | |
103 | } |
|
107 | } | |
104 |
|
108 | |||
105 | static PyObject* ZstdCompressionWriter_memory_size(ZstdCompressionWriter* self) { |
|
109 | static PyObject* ZstdCompressionWriter_memory_size(ZstdCompressionWriter* self) { | |
106 | if (!self->cstream) { |
|
110 | if (!self->compressor->cstream) { | |
107 | PyErr_SetString(ZstdError, "cannot determine size of an inactive compressor; " |
|
111 | PyErr_SetString(ZstdError, "cannot determine size of an inactive compressor; " | |
108 | "call when a context manager is active"); |
|
112 | "call when a context manager is active"); | |
109 | return NULL; |
|
113 | return NULL; | |
110 | } |
|
114 | } | |
111 |
|
115 | |||
112 | return PyLong_FromSize_t(ZSTD_sizeof_CStream(self->cstream)); |
|
116 | return PyLong_FromSize_t(ZSTD_sizeof_CStream(self->compressor->cstream)); | |
113 | } |
|
117 | } | |
114 |
|
118 | |||
115 | static PyObject* ZstdCompressionWriter_write(ZstdCompressionWriter* self, PyObject* args) { |
|
119 | static PyObject* ZstdCompressionWriter_write(ZstdCompressionWriter* self, PyObject* args) { | |
116 | const char* source; |
|
120 | const char* source; | |
117 | Py_ssize_t sourceSize; |
|
121 | Py_ssize_t sourceSize; | |
118 | size_t zresult; |
|
122 | size_t zresult; | |
119 | ZSTD_inBuffer input; |
|
123 | ZSTD_inBuffer input; | |
120 | ZSTD_outBuffer output; |
|
124 | ZSTD_outBuffer output; | |
121 | PyObject* res; |
|
125 | PyObject* res; | |
122 | Py_ssize_t totalWrite = 0; |
|
126 | Py_ssize_t totalWrite = 0; | |
123 |
|
127 | |||
124 | #if PY_MAJOR_VERSION >= 3 |
|
128 | #if PY_MAJOR_VERSION >= 3 | |
125 | if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) { |
|
129 | if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) { | |
126 | #else |
|
130 | #else | |
127 | if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) { |
|
131 | if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) { | |
128 | #endif |
|
132 | #endif | |
129 | return NULL; |
|
133 | return NULL; | |
130 | } |
|
134 | } | |
131 |
|
135 | |||
132 | if (!self->entered) { |
|
136 | if (!self->entered) { | |
133 | PyErr_SetString(ZstdError, "compress must be called from an active context manager"); |
|
137 | PyErr_SetString(ZstdError, "compress must be called from an active context manager"); | |
134 | return NULL; |
|
138 | return NULL; | |
135 | } |
|
139 | } | |
136 |
|
140 | |||
137 | output.dst = PyMem_Malloc(self->outSize); |
|
141 | output.dst = PyMem_Malloc(self->outSize); | |
138 | if (!output.dst) { |
|
142 | if (!output.dst) { | |
139 | return PyErr_NoMemory(); |
|
143 | return PyErr_NoMemory(); | |
140 | } |
|
144 | } | |
141 | output.size = self->outSize; |
|
145 | output.size = self->outSize; | |
142 | output.pos = 0; |
|
146 | output.pos = 0; | |
143 |
|
147 | |||
144 | input.src = source; |
|
148 | input.src = source; | |
145 | input.size = sourceSize; |
|
149 | input.size = sourceSize; | |
146 | input.pos = 0; |
|
150 | input.pos = 0; | |
147 |
|
151 | |||
148 | while ((ssize_t)input.pos < sourceSize) { |
|
152 | while ((ssize_t)input.pos < sourceSize) { | |
149 | Py_BEGIN_ALLOW_THREADS |
|
153 | Py_BEGIN_ALLOW_THREADS | |
150 | zresult = ZSTD_compressStream(self->cstream, &output, &input); |
|
154 | if (self->compressor->mtcctx) { | |
|
155 | zresult = ZSTDMT_compressStream(self->compressor->mtcctx, | |||
|
156 | &output, &input); | |||
|
157 | } | |||
|
158 | else { | |||
|
159 | zresult = ZSTD_compressStream(self->compressor->cstream, &output, &input); | |||
|
160 | } | |||
151 | Py_END_ALLOW_THREADS |
|
161 | Py_END_ALLOW_THREADS | |
152 |
|
162 | |||
153 | if (ZSTD_isError(zresult)) { |
|
163 | if (ZSTD_isError(zresult)) { | |
154 | PyMem_Free(output.dst); |
|
164 | PyMem_Free(output.dst); | |
155 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); |
|
165 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); | |
156 | return NULL; |
|
166 | return NULL; | |
157 | } |
|
167 | } | |
158 |
|
168 | |||
159 | /* Copy data from output buffer to writer. */ |
|
169 | /* Copy data from output buffer to writer. */ | |
160 | if (output.pos) { |
|
170 | if (output.pos) { | |
161 | #if PY_MAJOR_VERSION >= 3 |
|
171 | #if PY_MAJOR_VERSION >= 3 | |
162 | res = PyObject_CallMethod(self->writer, "write", "y#", |
|
172 | res = PyObject_CallMethod(self->writer, "write", "y#", | |
163 | #else |
|
173 | #else | |
164 | res = PyObject_CallMethod(self->writer, "write", "s#", |
|
174 | res = PyObject_CallMethod(self->writer, "write", "s#", | |
165 | #endif |
|
175 | #endif | |
166 | output.dst, output.pos); |
|
176 | output.dst, output.pos); | |
167 | Py_XDECREF(res); |
|
177 | Py_XDECREF(res); | |
168 | totalWrite += output.pos; |
|
178 | totalWrite += output.pos; | |
169 | } |
|
179 | } | |
170 | output.pos = 0; |
|
180 | output.pos = 0; | |
171 | } |
|
181 | } | |
172 |
|
182 | |||
173 | PyMem_Free(output.dst); |
|
183 | PyMem_Free(output.dst); | |
174 |
|
184 | |||
175 | return PyLong_FromSsize_t(totalWrite); |
|
185 | return PyLong_FromSsize_t(totalWrite); | |
176 | } |
|
186 | } | |
177 |
|
187 | |||
178 | static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args) { |
|
188 | static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args) { | |
179 | size_t zresult; |
|
189 | size_t zresult; | |
180 | ZSTD_outBuffer output; |
|
190 | ZSTD_outBuffer output; | |
181 | PyObject* res; |
|
191 | PyObject* res; | |
182 | Py_ssize_t totalWrite = 0; |
|
192 | Py_ssize_t totalWrite = 0; | |
183 |
|
193 | |||
184 | if (!self->entered) { |
|
194 | if (!self->entered) { | |
185 | PyErr_SetString(ZstdError, "flush must be called from an active context manager"); |
|
195 | PyErr_SetString(ZstdError, "flush must be called from an active context manager"); | |
186 | return NULL; |
|
196 | return NULL; | |
187 | } |
|
197 | } | |
188 |
|
198 | |||
189 | output.dst = PyMem_Malloc(self->outSize); |
|
199 | output.dst = PyMem_Malloc(self->outSize); | |
190 | if (!output.dst) { |
|
200 | if (!output.dst) { | |
191 | return PyErr_NoMemory(); |
|
201 | return PyErr_NoMemory(); | |
192 | } |
|
202 | } | |
193 | output.size = self->outSize; |
|
203 | output.size = self->outSize; | |
194 | output.pos = 0; |
|
204 | output.pos = 0; | |
195 |
|
205 | |||
196 | while (1) { |
|
206 | while (1) { | |
197 | Py_BEGIN_ALLOW_THREADS |
|
207 | Py_BEGIN_ALLOW_THREADS | |
198 | zresult = ZSTD_flushStream(self->cstream, &output); |
|
208 | if (self->compressor->mtcctx) { | |
|
209 | zresult = ZSTDMT_flushStream(self->compressor->mtcctx, &output); | |||
|
210 | } | |||
|
211 | else { | |||
|
212 | zresult = ZSTD_flushStream(self->compressor->cstream, &output); | |||
|
213 | } | |||
199 | Py_END_ALLOW_THREADS |
|
214 | Py_END_ALLOW_THREADS | |
200 |
|
215 | |||
201 | if (ZSTD_isError(zresult)) { |
|
216 | if (ZSTD_isError(zresult)) { | |
202 | PyMem_Free(output.dst); |
|
217 | PyMem_Free(output.dst); | |
203 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); |
|
218 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); | |
204 | return NULL; |
|
219 | return NULL; | |
205 | } |
|
220 | } | |
206 |
|
221 | |||
207 | if (!output.pos) { |
|
222 | if (!output.pos) { | |
208 | break; |
|
223 | break; | |
209 | } |
|
224 | } | |
210 |
|
225 | |||
211 | /* Copy data from output buffer to writer. */ |
|
226 | /* Copy data from output buffer to writer. */ | |
212 | if (output.pos) { |
|
227 | if (output.pos) { | |
213 | #if PY_MAJOR_VERSION >= 3 |
|
228 | #if PY_MAJOR_VERSION >= 3 | |
214 | res = PyObject_CallMethod(self->writer, "write", "y#", |
|
229 | res = PyObject_CallMethod(self->writer, "write", "y#", | |
215 | #else |
|
230 | #else | |
216 | res = PyObject_CallMethod(self->writer, "write", "s#", |
|
231 | res = PyObject_CallMethod(self->writer, "write", "s#", | |
217 | #endif |
|
232 | #endif | |
218 | output.dst, output.pos); |
|
233 | output.dst, output.pos); | |
219 | Py_XDECREF(res); |
|
234 | Py_XDECREF(res); | |
220 | totalWrite += output.pos; |
|
235 | totalWrite += output.pos; | |
221 | } |
|
236 | } | |
222 | output.pos = 0; |
|
237 | output.pos = 0; | |
223 | } |
|
238 | } | |
224 |
|
239 | |||
225 | PyMem_Free(output.dst); |
|
240 | PyMem_Free(output.dst); | |
226 |
|
241 | |||
227 | return PyLong_FromSsize_t(totalWrite); |
|
242 | return PyLong_FromSsize_t(totalWrite); | |
228 | } |
|
243 | } | |
229 |
|
244 | |||
230 | static PyMethodDef ZstdCompressionWriter_methods[] = { |
|
245 | static PyMethodDef ZstdCompressionWriter_methods[] = { | |
231 | { "__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS, |
|
246 | { "__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS, | |
232 | PyDoc_STR("Enter a compression context.") }, |
|
247 | PyDoc_STR("Enter a compression context.") }, | |
233 | { "__exit__", (PyCFunction)ZstdCompressionWriter_exit, METH_VARARGS, |
|
248 | { "__exit__", (PyCFunction)ZstdCompressionWriter_exit, METH_VARARGS, | |
234 | PyDoc_STR("Exit a compression context.") }, |
|
249 | PyDoc_STR("Exit a compression context.") }, | |
235 | { "memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS, |
|
250 | { "memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS, | |
236 | PyDoc_STR("Obtain the memory size of the underlying compressor") }, |
|
251 | PyDoc_STR("Obtain the memory size of the underlying compressor") }, | |
237 | { "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS, |
|
252 | { "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS, | |
238 | PyDoc_STR("Compress data") }, |
|
253 | PyDoc_STR("Compress data") }, | |
239 | { "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_NOARGS, |
|
254 | { "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_NOARGS, | |
240 | PyDoc_STR("Flush data and finish a zstd frame") }, |
|
255 | PyDoc_STR("Flush data and finish a zstd frame") }, | |
241 | { NULL, NULL } |
|
256 | { NULL, NULL } | |
242 | }; |
|
257 | }; | |
243 |
|
258 | |||
244 | PyTypeObject ZstdCompressionWriterType = { |
|
259 | PyTypeObject ZstdCompressionWriterType = { | |
245 | PyVarObject_HEAD_INIT(NULL, 0) |
|
260 | PyVarObject_HEAD_INIT(NULL, 0) | |
246 | "zstd.ZstdCompressionWriter", /* tp_name */ |
|
261 | "zstd.ZstdCompressionWriter", /* tp_name */ | |
247 | sizeof(ZstdCompressionWriter), /* tp_basicsize */ |
|
262 | sizeof(ZstdCompressionWriter), /* tp_basicsize */ | |
248 | 0, /* tp_itemsize */ |
|
263 | 0, /* tp_itemsize */ | |
249 | (destructor)ZstdCompressionWriter_dealloc, /* tp_dealloc */ |
|
264 | (destructor)ZstdCompressionWriter_dealloc, /* tp_dealloc */ | |
250 | 0, /* tp_print */ |
|
265 | 0, /* tp_print */ | |
251 | 0, /* tp_getattr */ |
|
266 | 0, /* tp_getattr */ | |
252 | 0, /* tp_setattr */ |
|
267 | 0, /* tp_setattr */ | |
253 | 0, /* tp_compare */ |
|
268 | 0, /* tp_compare */ | |
254 | 0, /* tp_repr */ |
|
269 | 0, /* tp_repr */ | |
255 | 0, /* tp_as_number */ |
|
270 | 0, /* tp_as_number */ | |
256 | 0, /* tp_as_sequence */ |
|
271 | 0, /* tp_as_sequence */ | |
257 | 0, /* tp_as_mapping */ |
|
272 | 0, /* tp_as_mapping */ | |
258 | 0, /* tp_hash */ |
|
273 | 0, /* tp_hash */ | |
259 | 0, /* tp_call */ |
|
274 | 0, /* tp_call */ | |
260 | 0, /* tp_str */ |
|
275 | 0, /* tp_str */ | |
261 | 0, /* tp_getattro */ |
|
276 | 0, /* tp_getattro */ | |
262 | 0, /* tp_setattro */ |
|
277 | 0, /* tp_setattro */ | |
263 | 0, /* tp_as_buffer */ |
|
278 | 0, /* tp_as_buffer */ | |
264 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
279 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
265 | ZstdCompresssionWriter__doc__, /* tp_doc */ |
|
280 | ZstdCompresssionWriter__doc__, /* tp_doc */ | |
266 | 0, /* tp_traverse */ |
|
281 | 0, /* tp_traverse */ | |
267 | 0, /* tp_clear */ |
|
282 | 0, /* tp_clear */ | |
268 | 0, /* tp_richcompare */ |
|
283 | 0, /* tp_richcompare */ | |
269 | 0, /* tp_weaklistoffset */ |
|
284 | 0, /* tp_weaklistoffset */ | |
270 | 0, /* tp_iter */ |
|
285 | 0, /* tp_iter */ | |
271 | 0, /* tp_iternext */ |
|
286 | 0, /* tp_iternext */ | |
272 | ZstdCompressionWriter_methods, /* tp_methods */ |
|
287 | ZstdCompressionWriter_methods, /* tp_methods */ | |
273 | 0, /* tp_members */ |
|
288 | 0, /* tp_members */ | |
274 | 0, /* tp_getset */ |
|
289 | 0, /* tp_getset */ | |
275 | 0, /* tp_base */ |
|
290 | 0, /* tp_base */ | |
276 | 0, /* tp_dict */ |
|
291 | 0, /* tp_dict */ | |
277 | 0, /* tp_descr_get */ |
|
292 | 0, /* tp_descr_get */ | |
278 | 0, /* tp_descr_set */ |
|
293 | 0, /* tp_descr_set */ | |
279 | 0, /* tp_dictoffset */ |
|
294 | 0, /* tp_dictoffset */ | |
280 | 0, /* tp_init */ |
|
295 | 0, /* tp_init */ | |
281 | 0, /* tp_alloc */ |
|
296 | 0, /* tp_alloc */ | |
282 | PyType_GenericNew, /* tp_new */ |
|
297 | PyType_GenericNew, /* tp_new */ | |
283 | }; |
|
298 | }; | |
284 |
|
299 | |||
285 | void compressionwriter_module_init(PyObject* mod) { |
|
300 | void compressionwriter_module_init(PyObject* mod) { | |
286 | Py_TYPE(&ZstdCompressionWriterType) = &PyType_Type; |
|
301 | Py_TYPE(&ZstdCompressionWriterType) = &PyType_Type; | |
287 | if (PyType_Ready(&ZstdCompressionWriterType) < 0) { |
|
302 | if (PyType_Ready(&ZstdCompressionWriterType) < 0) { | |
288 | return; |
|
303 | return; | |
289 | } |
|
304 | } | |
290 | } |
|
305 | } |
@@ -1,250 +1,258 | |||||
1 | /** |
|
1 | /** | |
2 | * Copyright (c) 2016-present, Gregory Szorc |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
3 | * All rights reserved. |
|
3 | * All rights reserved. | |
4 | * |
|
4 | * | |
5 | * This software may be modified and distributed under the terms |
|
5 | * This software may be modified and distributed under the terms | |
6 | * of the BSD license. See the LICENSE file for details. |
|
6 | * of the BSD license. See the LICENSE file for details. | |
7 | */ |
|
7 | */ | |
8 |
|
8 | |||
9 | #include "python-zstandard.h" |
|
9 | #include "python-zstandard.h" | |
10 |
|
10 | |||
11 | extern PyObject* ZstdError; |
|
11 | extern PyObject* ZstdError; | |
12 |
|
12 | |||
13 | PyDoc_STRVAR(ZstdCompressionObj__doc__, |
|
13 | PyDoc_STRVAR(ZstdCompressionObj__doc__, | |
14 | "Perform compression using a standard library compatible API.\n" |
|
14 | "Perform compression using a standard library compatible API.\n" | |
15 | ); |
|
15 | ); | |
16 |
|
16 | |||
17 | static void ZstdCompressionObj_dealloc(ZstdCompressionObj* self) { |
|
17 | static void ZstdCompressionObj_dealloc(ZstdCompressionObj* self) { | |
18 | PyMem_Free(self->output.dst); |
|
18 | PyMem_Free(self->output.dst); | |
19 | self->output.dst = NULL; |
|
19 | self->output.dst = NULL; | |
20 |
|
20 | |||
21 | if (self->cstream) { |
|
|||
22 | ZSTD_freeCStream(self->cstream); |
|
|||
23 | self->cstream = NULL; |
|
|||
24 | } |
|
|||
25 |
|
||||
26 | Py_XDECREF(self->compressor); |
|
21 | Py_XDECREF(self->compressor); | |
27 |
|
22 | |||
28 | PyObject_Del(self); |
|
23 | PyObject_Del(self); | |
29 | } |
|
24 | } | |
30 |
|
25 | |||
31 | static PyObject* ZstdCompressionObj_compress(ZstdCompressionObj* self, PyObject* args) { |
|
26 | static PyObject* ZstdCompressionObj_compress(ZstdCompressionObj* self, PyObject* args) { | |
32 | const char* source; |
|
27 | const char* source; | |
33 | Py_ssize_t sourceSize; |
|
28 | Py_ssize_t sourceSize; | |
34 | ZSTD_inBuffer input; |
|
29 | ZSTD_inBuffer input; | |
35 | size_t zresult; |
|
30 | size_t zresult; | |
36 | PyObject* result = NULL; |
|
31 | PyObject* result = NULL; | |
37 | Py_ssize_t resultSize = 0; |
|
32 | Py_ssize_t resultSize = 0; | |
38 |
|
33 | |||
39 | if (self->finished) { |
|
34 | if (self->finished) { | |
40 | PyErr_SetString(ZstdError, "cannot call compress() after compressor finished"); |
|
35 | PyErr_SetString(ZstdError, "cannot call compress() after compressor finished"); | |
41 | return NULL; |
|
36 | return NULL; | |
42 | } |
|
37 | } | |
43 |
|
38 | |||
44 | #if PY_MAJOR_VERSION >= 3 |
|
39 | #if PY_MAJOR_VERSION >= 3 | |
45 | if (!PyArg_ParseTuple(args, "y#:compress", &source, &sourceSize)) { |
|
40 | if (!PyArg_ParseTuple(args, "y#:compress", &source, &sourceSize)) { | |
46 | #else |
|
41 | #else | |
47 | if (!PyArg_ParseTuple(args, "s#:compress", &source, &sourceSize)) { |
|
42 | if (!PyArg_ParseTuple(args, "s#:compress", &source, &sourceSize)) { | |
48 | #endif |
|
43 | #endif | |
49 | return NULL; |
|
44 | return NULL; | |
50 | } |
|
45 | } | |
51 |
|
46 | |||
52 | input.src = source; |
|
47 | input.src = source; | |
53 | input.size = sourceSize; |
|
48 | input.size = sourceSize; | |
54 | input.pos = 0; |
|
49 | input.pos = 0; | |
55 |
|
50 | |||
56 | while ((ssize_t)input.pos < sourceSize) { |
|
51 | while ((ssize_t)input.pos < sourceSize) { | |
57 | Py_BEGIN_ALLOW_THREADS |
|
52 | Py_BEGIN_ALLOW_THREADS | |
58 | zresult = ZSTD_compressStream(self->cstream, &self->output, &input); |
|
53 | if (self->compressor->mtcctx) { | |
|
54 | zresult = ZSTDMT_compressStream(self->compressor->mtcctx, | |||
|
55 | &self->output, &input); | |||
|
56 | } | |||
|
57 | else { | |||
|
58 | zresult = ZSTD_compressStream(self->compressor->cstream, &self->output, &input); | |||
|
59 | } | |||
59 | Py_END_ALLOW_THREADS |
|
60 | Py_END_ALLOW_THREADS | |
60 |
|
61 | |||
61 | if (ZSTD_isError(zresult)) { |
|
62 | if (ZSTD_isError(zresult)) { | |
62 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); |
|
63 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); | |
63 | return NULL; |
|
64 | return NULL; | |
64 | } |
|
65 | } | |
65 |
|
66 | |||
66 | if (self->output.pos) { |
|
67 | if (self->output.pos) { | |
67 | if (result) { |
|
68 | if (result) { | |
68 | resultSize = PyBytes_GET_SIZE(result); |
|
69 | resultSize = PyBytes_GET_SIZE(result); | |
69 | if (-1 == _PyBytes_Resize(&result, resultSize + self->output.pos)) { |
|
70 | if (-1 == _PyBytes_Resize(&result, resultSize + self->output.pos)) { | |
70 | return NULL; |
|
71 | return NULL; | |
71 | } |
|
72 | } | |
72 |
|
73 | |||
73 | memcpy(PyBytes_AS_STRING(result) + resultSize, |
|
74 | memcpy(PyBytes_AS_STRING(result) + resultSize, | |
74 | self->output.dst, self->output.pos); |
|
75 | self->output.dst, self->output.pos); | |
75 | } |
|
76 | } | |
76 | else { |
|
77 | else { | |
77 | result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); |
|
78 | result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); | |
78 | if (!result) { |
|
79 | if (!result) { | |
79 | return NULL; |
|
80 | return NULL; | |
80 | } |
|
81 | } | |
81 | } |
|
82 | } | |
82 |
|
83 | |||
83 | self->output.pos = 0; |
|
84 | self->output.pos = 0; | |
84 | } |
|
85 | } | |
85 | } |
|
86 | } | |
86 |
|
87 | |||
87 | if (result) { |
|
88 | if (result) { | |
88 | return result; |
|
89 | return result; | |
89 | } |
|
90 | } | |
90 | else { |
|
91 | else { | |
91 | return PyBytes_FromString(""); |
|
92 | return PyBytes_FromString(""); | |
92 | } |
|
93 | } | |
93 | } |
|
94 | } | |
94 |
|
95 | |||
95 | static PyObject* ZstdCompressionObj_flush(ZstdCompressionObj* self, PyObject* args) { |
|
96 | static PyObject* ZstdCompressionObj_flush(ZstdCompressionObj* self, PyObject* args) { | |
96 | int flushMode = compressorobj_flush_finish; |
|
97 | int flushMode = compressorobj_flush_finish; | |
97 | size_t zresult; |
|
98 | size_t zresult; | |
98 | PyObject* result = NULL; |
|
99 | PyObject* result = NULL; | |
99 | Py_ssize_t resultSize = 0; |
|
100 | Py_ssize_t resultSize = 0; | |
100 |
|
101 | |||
101 | if (!PyArg_ParseTuple(args, "|i:flush", &flushMode)) { |
|
102 | if (!PyArg_ParseTuple(args, "|i:flush", &flushMode)) { | |
102 | return NULL; |
|
103 | return NULL; | |
103 | } |
|
104 | } | |
104 |
|
105 | |||
105 | if (flushMode != compressorobj_flush_finish && flushMode != compressorobj_flush_block) { |
|
106 | if (flushMode != compressorobj_flush_finish && flushMode != compressorobj_flush_block) { | |
106 | PyErr_SetString(PyExc_ValueError, "flush mode not recognized"); |
|
107 | PyErr_SetString(PyExc_ValueError, "flush mode not recognized"); | |
107 | return NULL; |
|
108 | return NULL; | |
108 | } |
|
109 | } | |
109 |
|
110 | |||
110 | if (self->finished) { |
|
111 | if (self->finished) { | |
111 | PyErr_SetString(ZstdError, "compressor object already finished"); |
|
112 | PyErr_SetString(ZstdError, "compressor object already finished"); | |
112 | return NULL; |
|
113 | return NULL; | |
113 | } |
|
114 | } | |
114 |
|
115 | |||
115 | assert(self->output.pos == 0); |
|
116 | assert(self->output.pos == 0); | |
116 |
|
117 | |||
117 | if (flushMode == compressorobj_flush_block) { |
|
118 | if (flushMode == compressorobj_flush_block) { | |
118 | /* The output buffer is of size ZSTD_CStreamOutSize(), which is |
|
119 | /* The output buffer is of size ZSTD_CStreamOutSize(), which is | |
119 | guaranteed to hold a full block. */ |
|
120 | guaranteed to hold a full block. */ | |
120 | Py_BEGIN_ALLOW_THREADS |
|
121 | Py_BEGIN_ALLOW_THREADS | |
121 | zresult = ZSTD_flushStream(self->cstream, &self->output); |
|
122 | if (self->compressor->mtcctx) { | |
|
123 | zresult = ZSTDMT_flushStream(self->compressor->mtcctx, &self->output); | |||
|
124 | } | |||
|
125 | else { | |||
|
126 | zresult = ZSTD_flushStream(self->compressor->cstream, &self->output); | |||
|
127 | } | |||
122 | Py_END_ALLOW_THREADS |
|
128 | Py_END_ALLOW_THREADS | |
123 |
|
129 | |||
124 | if (ZSTD_isError(zresult)) { |
|
130 | if (ZSTD_isError(zresult)) { | |
125 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); |
|
131 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); | |
126 | return NULL; |
|
132 | return NULL; | |
127 | } |
|
133 | } | |
128 |
|
134 | |||
129 | /* Output buffer is guaranteed to hold full block. */ |
|
135 | /* Output buffer is guaranteed to hold full block. */ | |
130 | assert(zresult == 0); |
|
136 | assert(zresult == 0); | |
131 |
|
137 | |||
132 | if (self->output.pos) { |
|
138 | if (self->output.pos) { | |
133 | result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); |
|
139 | result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); | |
134 | if (!result) { |
|
140 | if (!result) { | |
135 | return NULL; |
|
141 | return NULL; | |
136 | } |
|
142 | } | |
137 | } |
|
143 | } | |
138 |
|
144 | |||
139 | self->output.pos = 0; |
|
145 | self->output.pos = 0; | |
140 |
|
146 | |||
141 | if (result) { |
|
147 | if (result) { | |
142 | return result; |
|
148 | return result; | |
143 | } |
|
149 | } | |
144 | else { |
|
150 | else { | |
145 | return PyBytes_FromString(""); |
|
151 | return PyBytes_FromString(""); | |
146 | } |
|
152 | } | |
147 | } |
|
153 | } | |
148 |
|
154 | |||
149 | assert(flushMode == compressorobj_flush_finish); |
|
155 | assert(flushMode == compressorobj_flush_finish); | |
150 | self->finished = 1; |
|
156 | self->finished = 1; | |
151 |
|
157 | |||
152 | while (1) { |
|
158 | while (1) { | |
153 | zresult = ZSTD_endStream(self->cstream, &self->output); |
|
159 | if (self->compressor->mtcctx) { | |
|
160 | zresult = ZSTDMT_endStream(self->compressor->mtcctx, &self->output); | |||
|
161 | } | |||
|
162 | else { | |||
|
163 | zresult = ZSTD_endStream(self->compressor->cstream, &self->output); | |||
|
164 | } | |||
154 | if (ZSTD_isError(zresult)) { |
|
165 | if (ZSTD_isError(zresult)) { | |
155 | PyErr_Format(ZstdError, "error ending compression stream: %s", |
|
166 | PyErr_Format(ZstdError, "error ending compression stream: %s", | |
156 | ZSTD_getErrorName(zresult)); |
|
167 | ZSTD_getErrorName(zresult)); | |
157 | return NULL; |
|
168 | return NULL; | |
158 | } |
|
169 | } | |
159 |
|
170 | |||
160 | if (self->output.pos) { |
|
171 | if (self->output.pos) { | |
161 | if (result) { |
|
172 | if (result) { | |
162 | resultSize = PyBytes_GET_SIZE(result); |
|
173 | resultSize = PyBytes_GET_SIZE(result); | |
163 | if (-1 == _PyBytes_Resize(&result, resultSize + self->output.pos)) { |
|
174 | if (-1 == _PyBytes_Resize(&result, resultSize + self->output.pos)) { | |
164 | return NULL; |
|
175 | return NULL; | |
165 | } |
|
176 | } | |
166 |
|
177 | |||
167 | memcpy(PyBytes_AS_STRING(result) + resultSize, |
|
178 | memcpy(PyBytes_AS_STRING(result) + resultSize, | |
168 | self->output.dst, self->output.pos); |
|
179 | self->output.dst, self->output.pos); | |
169 | } |
|
180 | } | |
170 | else { |
|
181 | else { | |
171 | result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); |
|
182 | result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); | |
172 | if (!result) { |
|
183 | if (!result) { | |
173 | return NULL; |
|
184 | return NULL; | |
174 | } |
|
185 | } | |
175 | } |
|
186 | } | |
176 |
|
187 | |||
177 | self->output.pos = 0; |
|
188 | self->output.pos = 0; | |
178 | } |
|
189 | } | |
179 |
|
190 | |||
180 | if (!zresult) { |
|
191 | if (!zresult) { | |
181 | break; |
|
192 | break; | |
182 | } |
|
193 | } | |
183 | } |
|
194 | } | |
184 |
|
195 | |||
185 | ZSTD_freeCStream(self->cstream); |
|
|||
186 | self->cstream = NULL; |
|
|||
187 |
|
||||
188 | if (result) { |
|
196 | if (result) { | |
189 | return result; |
|
197 | return result; | |
190 | } |
|
198 | } | |
191 | else { |
|
199 | else { | |
192 | return PyBytes_FromString(""); |
|
200 | return PyBytes_FromString(""); | |
193 | } |
|
201 | } | |
194 | } |
|
202 | } | |
195 |
|
203 | |||
196 | static PyMethodDef ZstdCompressionObj_methods[] = { |
|
204 | static PyMethodDef ZstdCompressionObj_methods[] = { | |
197 | { "compress", (PyCFunction)ZstdCompressionObj_compress, METH_VARARGS, |
|
205 | { "compress", (PyCFunction)ZstdCompressionObj_compress, METH_VARARGS, | |
198 | PyDoc_STR("compress data") }, |
|
206 | PyDoc_STR("compress data") }, | |
199 | { "flush", (PyCFunction)ZstdCompressionObj_flush, METH_VARARGS, |
|
207 | { "flush", (PyCFunction)ZstdCompressionObj_flush, METH_VARARGS, | |
200 | PyDoc_STR("finish compression operation") }, |
|
208 | PyDoc_STR("finish compression operation") }, | |
201 | { NULL, NULL } |
|
209 | { NULL, NULL } | |
202 | }; |
|
210 | }; | |
203 |
|
211 | |||
204 | PyTypeObject ZstdCompressionObjType = { |
|
212 | PyTypeObject ZstdCompressionObjType = { | |
205 | PyVarObject_HEAD_INIT(NULL, 0) |
|
213 | PyVarObject_HEAD_INIT(NULL, 0) | |
206 | "zstd.ZstdCompressionObj", /* tp_name */ |
|
214 | "zstd.ZstdCompressionObj", /* tp_name */ | |
207 | sizeof(ZstdCompressionObj), /* tp_basicsize */ |
|
215 | sizeof(ZstdCompressionObj), /* tp_basicsize */ | |
208 | 0, /* tp_itemsize */ |
|
216 | 0, /* tp_itemsize */ | |
209 | (destructor)ZstdCompressionObj_dealloc, /* tp_dealloc */ |
|
217 | (destructor)ZstdCompressionObj_dealloc, /* tp_dealloc */ | |
210 | 0, /* tp_print */ |
|
218 | 0, /* tp_print */ | |
211 | 0, /* tp_getattr */ |
|
219 | 0, /* tp_getattr */ | |
212 | 0, /* tp_setattr */ |
|
220 | 0, /* tp_setattr */ | |
213 | 0, /* tp_compare */ |
|
221 | 0, /* tp_compare */ | |
214 | 0, /* tp_repr */ |
|
222 | 0, /* tp_repr */ | |
215 | 0, /* tp_as_number */ |
|
223 | 0, /* tp_as_number */ | |
216 | 0, /* tp_as_sequence */ |
|
224 | 0, /* tp_as_sequence */ | |
217 | 0, /* tp_as_mapping */ |
|
225 | 0, /* tp_as_mapping */ | |
218 | 0, /* tp_hash */ |
|
226 | 0, /* tp_hash */ | |
219 | 0, /* tp_call */ |
|
227 | 0, /* tp_call */ | |
220 | 0, /* tp_str */ |
|
228 | 0, /* tp_str */ | |
221 | 0, /* tp_getattro */ |
|
229 | 0, /* tp_getattro */ | |
222 | 0, /* tp_setattro */ |
|
230 | 0, /* tp_setattro */ | |
223 | 0, /* tp_as_buffer */ |
|
231 | 0, /* tp_as_buffer */ | |
224 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
232 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
225 | ZstdCompressionObj__doc__, /* tp_doc */ |
|
233 | ZstdCompressionObj__doc__, /* tp_doc */ | |
226 | 0, /* tp_traverse */ |
|
234 | 0, /* tp_traverse */ | |
227 | 0, /* tp_clear */ |
|
235 | 0, /* tp_clear */ | |
228 | 0, /* tp_richcompare */ |
|
236 | 0, /* tp_richcompare */ | |
229 | 0, /* tp_weaklistoffset */ |
|
237 | 0, /* tp_weaklistoffset */ | |
230 | 0, /* tp_iter */ |
|
238 | 0, /* tp_iter */ | |
231 | 0, /* tp_iternext */ |
|
239 | 0, /* tp_iternext */ | |
232 | ZstdCompressionObj_methods, /* tp_methods */ |
|
240 | ZstdCompressionObj_methods, /* tp_methods */ | |
233 | 0, /* tp_members */ |
|
241 | 0, /* tp_members */ | |
234 | 0, /* tp_getset */ |
|
242 | 0, /* tp_getset */ | |
235 | 0, /* tp_base */ |
|
243 | 0, /* tp_base */ | |
236 | 0, /* tp_dict */ |
|
244 | 0, /* tp_dict */ | |
237 | 0, /* tp_descr_get */ |
|
245 | 0, /* tp_descr_get */ | |
238 | 0, /* tp_descr_set */ |
|
246 | 0, /* tp_descr_set */ | |
239 | 0, /* tp_dictoffset */ |
|
247 | 0, /* tp_dictoffset */ | |
240 | 0, /* tp_init */ |
|
248 | 0, /* tp_init */ | |
241 | 0, /* tp_alloc */ |
|
249 | 0, /* tp_alloc */ | |
242 | PyType_GenericNew, /* tp_new */ |
|
250 | PyType_GenericNew, /* tp_new */ | |
243 | }; |
|
251 | }; | |
244 |
|
252 | |||
245 | void compressobj_module_init(PyObject* module) { |
|
253 | void compressobj_module_init(PyObject* module) { | |
246 | Py_TYPE(&ZstdCompressionObjType) = &PyType_Type; |
|
254 | Py_TYPE(&ZstdCompressionObjType) = &PyType_Type; | |
247 | if (PyType_Ready(&ZstdCompressionObjType) < 0) { |
|
255 | if (PyType_Ready(&ZstdCompressionObjType) < 0) { | |
248 | return; |
|
256 | return; | |
249 | } |
|
257 | } | |
250 | } |
|
258 | } |
This diff has been collapsed as it changes many lines, (957 lines changed) Show them Hide them | |||||
@@ -1,791 +1,1544 | |||||
1 | /** |
|
1 | /** | |
2 | * Copyright (c) 2016-present, Gregory Szorc |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
3 | * All rights reserved. |
|
3 | * All rights reserved. | |
4 | * |
|
4 | * | |
5 | * This software may be modified and distributed under the terms |
|
5 | * This software may be modified and distributed under the terms | |
6 | * of the BSD license. See the LICENSE file for details. |
|
6 | * of the BSD license. See the LICENSE file for details. | |
7 | */ |
|
7 | */ | |
8 |
|
8 | |||
9 | #include "python-zstandard.h" |
|
9 | #include "python-zstandard.h" | |
|
10 | #include "pool.h" | |||
10 |
|
11 | |||
11 | extern PyObject* ZstdError; |
|
12 | extern PyObject* ZstdError; | |
12 |
|
13 | |||
13 |
int populate_cdict(ZstdCompressor* compressor, |
|
14 | int populate_cdict(ZstdCompressor* compressor, ZSTD_parameters* zparams) { | |
14 | ZSTD_customMem zmem; |
|
15 | ZSTD_customMem zmem; | |
15 | assert(!compressor->cdict); |
|
16 | ||
|
17 | if (compressor->cdict || !compressor->dict || !compressor->dict->dictData) { | |||
|
18 | return 0; | |||
|
19 | } | |||
|
20 | ||||
16 | Py_BEGIN_ALLOW_THREADS |
|
21 | Py_BEGIN_ALLOW_THREADS | |
17 | memset(&zmem, 0, sizeof(zmem)); |
|
22 | memset(&zmem, 0, sizeof(zmem)); | |
18 | compressor->cdict = ZSTD_createCDict_advanced(compressor->dict->dictData, |
|
23 | compressor->cdict = ZSTD_createCDict_advanced(compressor->dict->dictData, | |
19 | compressor->dict->dictSize, 1, *zparams, zmem); |
|
24 | compressor->dict->dictSize, 1, *zparams, zmem); | |
20 | Py_END_ALLOW_THREADS |
|
25 | Py_END_ALLOW_THREADS | |
21 |
|
26 | |||
22 | if (!compressor->cdict) { |
|
27 | if (!compressor->cdict) { | |
23 | PyErr_SetString(ZstdError, "could not create compression dictionary"); |
|
28 | PyErr_SetString(ZstdError, "could not create compression dictionary"); | |
24 | return 1; |
|
29 | return 1; | |
25 | } |
|
30 | } | |
26 |
|
31 | |||
27 | return 0; |
|
32 | return 0; | |
28 | } |
|
33 | } | |
29 |
|
34 | |||
30 | /** |
|
35 | /** | |
31 |
|
|
36 | * Ensure the ZSTD_CStream on a ZstdCompressor instance is initialized. | |
32 | * |
|
37 | * | |
33 |
* Returns |
|
38 | * Returns 0 on success. Other value on failure. Will set a Python exception | |
34 | * exception will be set. |
|
39 | * on failure. | |
35 | */ |
|
40 | */ | |
36 |
|
|
41 | int init_cstream(ZstdCompressor* compressor, unsigned long long sourceSize) { | |
37 | ZSTD_CStream* cstream; |
|
|||
38 | ZSTD_parameters zparams; |
|
42 | ZSTD_parameters zparams; | |
39 | void* dictData = NULL; |
|
43 | void* dictData = NULL; | |
40 | size_t dictSize = 0; |
|
44 | size_t dictSize = 0; | |
41 | size_t zresult; |
|
45 | size_t zresult; | |
42 |
|
46 | |||
43 | cstream = ZSTD_createCStream(); |
|
47 | if (compressor->cstream) { | |
44 | if (!cstream) { |
|
48 | zresult = ZSTD_resetCStream(compressor->cstream, sourceSize); | |
45 | PyErr_SetString(ZstdError, "cannot create CStream"); |
|
49 | if (ZSTD_isError(zresult)) { | |
46 | return NULL; |
|
50 | PyErr_Format(ZstdError, "could not reset CStream: %s", | |
|
51 | ZSTD_getErrorName(zresult)); | |||
|
52 | return -1; | |||
|
53 | } | |||
|
54 | ||||
|
55 | return 0; | |||
|
56 | } | |||
|
57 | ||||
|
58 | compressor->cstream = ZSTD_createCStream(); | |||
|
59 | if (!compressor->cstream) { | |||
|
60 | PyErr_SetString(ZstdError, "could not create CStream"); | |||
|
61 | return -1; | |||
47 | } |
|
62 | } | |
48 |
|
63 | |||
49 | if (compressor->dict) { |
|
64 | if (compressor->dict) { | |
50 | dictData = compressor->dict->dictData; |
|
65 | dictData = compressor->dict->dictData; | |
51 | dictSize = compressor->dict->dictSize; |
|
66 | dictSize = compressor->dict->dictSize; | |
52 | } |
|
67 | } | |
53 |
|
68 | |||
54 | memset(&zparams, 0, sizeof(zparams)); |
|
69 | memset(&zparams, 0, sizeof(zparams)); | |
55 | if (compressor->cparams) { |
|
70 | if (compressor->cparams) { | |
56 | ztopy_compression_parameters(compressor->cparams, &zparams.cParams); |
|
71 | ztopy_compression_parameters(compressor->cparams, &zparams.cParams); | |
57 | /* Do NOT call ZSTD_adjustCParams() here because the compression params |
|
72 | /* Do NOT call ZSTD_adjustCParams() here because the compression params | |
58 | come from the user. */ |
|
73 | come from the user. */ | |
59 | } |
|
74 | } | |
60 | else { |
|
75 | else { | |
61 | zparams.cParams = ZSTD_getCParams(compressor->compressionLevel, sourceSize, dictSize); |
|
76 | zparams.cParams = ZSTD_getCParams(compressor->compressionLevel, sourceSize, dictSize); | |
62 | } |
|
77 | } | |
63 |
|
78 | |||
64 | zparams.fParams = compressor->fparams; |
|
79 | zparams.fParams = compressor->fparams; | |
65 |
|
80 | |||
66 |
zresult = ZSTD_initCStream_advanced(cstream, dictData, dictSize, |
|
81 | zresult = ZSTD_initCStream_advanced(compressor->cstream, dictData, dictSize, | |
|
82 | zparams, sourceSize); | |||
67 |
|
83 | |||
68 | if (ZSTD_isError(zresult)) { |
|
84 | if (ZSTD_isError(zresult)) { | |
69 | ZSTD_freeCStream(cstream); |
|
85 | ZSTD_freeCStream(compressor->cstream); | |
|
86 | compressor->cstream = NULL; | |||
70 | PyErr_Format(ZstdError, "cannot init CStream: %s", ZSTD_getErrorName(zresult)); |
|
87 | PyErr_Format(ZstdError, "cannot init CStream: %s", ZSTD_getErrorName(zresult)); | |
71 |
return |
|
88 | return -1; | |
72 | } |
|
89 | } | |
73 |
|
90 | |||
74 |
return |
|
91 | return 0;; | |
|
92 | } | |||
|
93 | ||||
|
94 | int init_mtcstream(ZstdCompressor* compressor, Py_ssize_t sourceSize) { | |||
|
95 | size_t zresult; | |||
|
96 | void* dictData = NULL; | |||
|
97 | size_t dictSize = 0; | |||
|
98 | ZSTD_parameters zparams; | |||
|
99 | ||||
|
100 | assert(compressor->mtcctx); | |||
|
101 | ||||
|
102 | if (compressor->dict) { | |||
|
103 | dictData = compressor->dict->dictData; | |||
|
104 | dictSize = compressor->dict->dictSize; | |||
|
105 | } | |||
|
106 | ||||
|
107 | memset(&zparams, 0, sizeof(zparams)); | |||
|
108 | if (compressor->cparams) { | |||
|
109 | ztopy_compression_parameters(compressor->cparams, &zparams.cParams); | |||
|
110 | } | |||
|
111 | else { | |||
|
112 | zparams.cParams = ZSTD_getCParams(compressor->compressionLevel, sourceSize, dictSize); | |||
|
113 | } | |||
|
114 | ||||
|
115 | zparams.fParams = compressor->fparams; | |||
|
116 | ||||
|
117 | zresult = ZSTDMT_initCStream_advanced(compressor->mtcctx, dictData, dictSize, | |||
|
118 | zparams, sourceSize); | |||
|
119 | ||||
|
120 | if (ZSTD_isError(zresult)) { | |||
|
121 | PyErr_Format(ZstdError, "cannot init CStream: %s", ZSTD_getErrorName(zresult)); | |||
|
122 | return -1; | |||
|
123 | } | |||
|
124 | ||||
|
125 | return 0; | |||
75 | } |
|
126 | } | |
76 |
|
127 | |||
77 | PyDoc_STRVAR(ZstdCompressor__doc__, |
|
128 | PyDoc_STRVAR(ZstdCompressor__doc__, | |
78 | "ZstdCompressor(level=None, dict_data=None, compression_params=None)\n" |
|
129 | "ZstdCompressor(level=None, dict_data=None, compression_params=None)\n" | |
79 | "\n" |
|
130 | "\n" | |
80 | "Create an object used to perform Zstandard compression.\n" |
|
131 | "Create an object used to perform Zstandard compression.\n" | |
81 | "\n" |
|
132 | "\n" | |
82 | "An instance can compress data various ways. Instances can be used multiple\n" |
|
133 | "An instance can compress data various ways. Instances can be used multiple\n" | |
83 | "times. Each compression operation will use the compression parameters\n" |
|
134 | "times. Each compression operation will use the compression parameters\n" | |
84 | "defined at construction time.\n" |
|
135 | "defined at construction time.\n" | |
85 | "\n" |
|
136 | "\n" | |
86 | "Compression can be configured via the following names arguments:\n" |
|
137 | "Compression can be configured via the following names arguments:\n" | |
87 | "\n" |
|
138 | "\n" | |
88 | "level\n" |
|
139 | "level\n" | |
89 | " Integer compression level.\n" |
|
140 | " Integer compression level.\n" | |
90 | "dict_data\n" |
|
141 | "dict_data\n" | |
91 | " A ``ZstdCompressionDict`` to be used to compress with dictionary data.\n" |
|
142 | " A ``ZstdCompressionDict`` to be used to compress with dictionary data.\n" | |
92 | "compression_params\n" |
|
143 | "compression_params\n" | |
93 | " A ``CompressionParameters`` instance defining low-level compression" |
|
144 | " A ``CompressionParameters`` instance defining low-level compression" | |
94 | " parameters. If defined, this will overwrite the ``level`` argument.\n" |
|
145 | " parameters. If defined, this will overwrite the ``level`` argument.\n" | |
95 | "write_checksum\n" |
|
146 | "write_checksum\n" | |
96 | " If True, a 4 byte content checksum will be written with the compressed\n" |
|
147 | " If True, a 4 byte content checksum will be written with the compressed\n" | |
97 | " data, allowing the decompressor to perform content verification.\n" |
|
148 | " data, allowing the decompressor to perform content verification.\n" | |
98 | "write_content_size\n" |
|
149 | "write_content_size\n" | |
99 | " If True, the decompressed content size will be included in the header of\n" |
|
150 | " If True, the decompressed content size will be included in the header of\n" | |
100 | " the compressed data. This data will only be written if the compressor\n" |
|
151 | " the compressed data. This data will only be written if the compressor\n" | |
101 | " knows the size of the input data.\n" |
|
152 | " knows the size of the input data.\n" | |
102 | "write_dict_id\n" |
|
153 | "write_dict_id\n" | |
103 | " Determines whether the dictionary ID will be written into the compressed\n" |
|
154 | " Determines whether the dictionary ID will be written into the compressed\n" | |
104 | " data. Defaults to True. Only adds content to the compressed data if\n" |
|
155 | " data. Defaults to True. Only adds content to the compressed data if\n" | |
105 | " a dictionary is being used.\n" |
|
156 | " a dictionary is being used.\n" | |
|
157 | "threads\n" | |||
|
158 | " Number of threads to use to compress data concurrently. When set,\n" | |||
|
159 | " compression operations are performed on multiple threads. The default\n" | |||
|
160 | " value (0) disables multi-threaded compression. A value of ``-1`` means to\n" | |||
|
161 | " set the number of threads to the number of detected logical CPUs.\n" | |||
106 | ); |
|
162 | ); | |
107 |
|
163 | |||
108 | static int ZstdCompressor_init(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
|
164 | static int ZstdCompressor_init(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { | |
109 | static char* kwlist[] = { |
|
165 | static char* kwlist[] = { | |
110 | "level", |
|
166 | "level", | |
111 | "dict_data", |
|
167 | "dict_data", | |
112 | "compression_params", |
|
168 | "compression_params", | |
113 | "write_checksum", |
|
169 | "write_checksum", | |
114 | "write_content_size", |
|
170 | "write_content_size", | |
115 | "write_dict_id", |
|
171 | "write_dict_id", | |
|
172 | "threads", | |||
116 | NULL |
|
173 | NULL | |
117 | }; |
|
174 | }; | |
118 |
|
175 | |||
119 | int level = 3; |
|
176 | int level = 3; | |
120 | ZstdCompressionDict* dict = NULL; |
|
177 | ZstdCompressionDict* dict = NULL; | |
121 | CompressionParametersObject* params = NULL; |
|
178 | CompressionParametersObject* params = NULL; | |
122 | PyObject* writeChecksum = NULL; |
|
179 | PyObject* writeChecksum = NULL; | |
123 | PyObject* writeContentSize = NULL; |
|
180 | PyObject* writeContentSize = NULL; | |
124 | PyObject* writeDictID = NULL; |
|
181 | PyObject* writeDictID = NULL; | |
|
182 | int threads = 0; | |||
125 |
|
183 | |||
126 | self->cctx = NULL; |
|
184 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOOi:ZstdCompressor", | |
127 | self->dict = NULL; |
|
|||
128 | self->cparams = NULL; |
|
|||
129 | self->cdict = NULL; |
|
|||
130 |
|
||||
131 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOO:ZstdCompressor", |
|
|||
132 | kwlist, &level, &ZstdCompressionDictType, &dict, |
|
185 | kwlist, &level, &ZstdCompressionDictType, &dict, | |
133 | &CompressionParametersType, ¶ms, |
|
186 | &CompressionParametersType, ¶ms, | |
134 | &writeChecksum, &writeContentSize, &writeDictID)) { |
|
187 | &writeChecksum, &writeContentSize, &writeDictID, &threads)) { | |
135 | return -1; |
|
188 | return -1; | |
136 | } |
|
189 | } | |
137 |
|
190 | |||
138 | if (level < 1) { |
|
191 | if (level < 1) { | |
139 | PyErr_SetString(PyExc_ValueError, "level must be greater than 0"); |
|
192 | PyErr_SetString(PyExc_ValueError, "level must be greater than 0"); | |
140 | return -1; |
|
193 | return -1; | |
141 | } |
|
194 | } | |
142 |
|
195 | |||
143 | if (level > ZSTD_maxCLevel()) { |
|
196 | if (level > ZSTD_maxCLevel()) { | |
144 | PyErr_Format(PyExc_ValueError, "level must be less than %d", |
|
197 | PyErr_Format(PyExc_ValueError, "level must be less than %d", | |
145 | ZSTD_maxCLevel() + 1); |
|
198 | ZSTD_maxCLevel() + 1); | |
146 | return -1; |
|
199 | return -1; | |
147 | } |
|
200 | } | |
148 |
|
201 | |||
|
202 | if (threads < 0) { | |||
|
203 | threads = cpu_count(); | |||
|
204 | } | |||
|
205 | ||||
|
206 | self->threads = threads; | |||
|
207 | ||||
149 | /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the |
|
208 | /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the | |
150 | overhead of each compression operation. */ |
|
209 | overhead of each compression operation. */ | |
151 | self->cctx = ZSTD_createCCtx(); |
|
210 | if (threads) { | |
152 | if (!self->cctx) { |
|
211 | self->mtcctx = ZSTDMT_createCCtx(threads); | |
153 | PyErr_NoMemory(); |
|
212 | if (!self->mtcctx) { | |
154 | return -1; |
|
213 | PyErr_NoMemory(); | |
|
214 | return -1; | |||
|
215 | } | |||
|
216 | } | |||
|
217 | else { | |||
|
218 | self->cctx = ZSTD_createCCtx(); | |||
|
219 | if (!self->cctx) { | |||
|
220 | PyErr_NoMemory(); | |||
|
221 | return -1; | |||
|
222 | } | |||
155 | } |
|
223 | } | |
156 |
|
224 | |||
157 | self->compressionLevel = level; |
|
225 | self->compressionLevel = level; | |
158 |
|
226 | |||
159 | if (dict) { |
|
227 | if (dict) { | |
160 | self->dict = dict; |
|
228 | self->dict = dict; | |
161 | Py_INCREF(dict); |
|
229 | Py_INCREF(dict); | |
162 | } |
|
230 | } | |
163 |
|
231 | |||
164 | if (params) { |
|
232 | if (params) { | |
165 | self->cparams = params; |
|
233 | self->cparams = params; | |
166 | Py_INCREF(params); |
|
234 | Py_INCREF(params); | |
167 | } |
|
235 | } | |
168 |
|
236 | |||
169 | memset(&self->fparams, 0, sizeof(self->fparams)); |
|
237 | memset(&self->fparams, 0, sizeof(self->fparams)); | |
170 |
|
238 | |||
171 | if (writeChecksum && PyObject_IsTrue(writeChecksum)) { |
|
239 | if (writeChecksum && PyObject_IsTrue(writeChecksum)) { | |
172 | self->fparams.checksumFlag = 1; |
|
240 | self->fparams.checksumFlag = 1; | |
173 | } |
|
241 | } | |
174 | if (writeContentSize && PyObject_IsTrue(writeContentSize)) { |
|
242 | if (writeContentSize && PyObject_IsTrue(writeContentSize)) { | |
175 | self->fparams.contentSizeFlag = 1; |
|
243 | self->fparams.contentSizeFlag = 1; | |
176 | } |
|
244 | } | |
177 | if (writeDictID && PyObject_Not(writeDictID)) { |
|
245 | if (writeDictID && PyObject_Not(writeDictID)) { | |
178 | self->fparams.noDictIDFlag = 1; |
|
246 | self->fparams.noDictIDFlag = 1; | |
179 | } |
|
247 | } | |
180 |
|
248 | |||
181 | return 0; |
|
249 | return 0; | |
182 | } |
|
250 | } | |
183 |
|
251 | |||
184 | static void ZstdCompressor_dealloc(ZstdCompressor* self) { |
|
252 | static void ZstdCompressor_dealloc(ZstdCompressor* self) { | |
|
253 | if (self->cstream) { | |||
|
254 | ZSTD_freeCStream(self->cstream); | |||
|
255 | self->cstream = NULL; | |||
|
256 | } | |||
|
257 | ||||
185 | Py_XDECREF(self->cparams); |
|
258 | Py_XDECREF(self->cparams); | |
186 | Py_XDECREF(self->dict); |
|
259 | Py_XDECREF(self->dict); | |
187 |
|
260 | |||
188 | if (self->cdict) { |
|
261 | if (self->cdict) { | |
189 | ZSTD_freeCDict(self->cdict); |
|
262 | ZSTD_freeCDict(self->cdict); | |
190 | self->cdict = NULL; |
|
263 | self->cdict = NULL; | |
191 | } |
|
264 | } | |
192 |
|
265 | |||
193 | if (self->cctx) { |
|
266 | if (self->cctx) { | |
194 | ZSTD_freeCCtx(self->cctx); |
|
267 | ZSTD_freeCCtx(self->cctx); | |
195 | self->cctx = NULL; |
|
268 | self->cctx = NULL; | |
196 | } |
|
269 | } | |
197 |
|
270 | |||
|
271 | if (self->mtcctx) { | |||
|
272 | ZSTDMT_freeCCtx(self->mtcctx); | |||
|
273 | self->mtcctx = NULL; | |||
|
274 | } | |||
|
275 | ||||
198 | PyObject_Del(self); |
|
276 | PyObject_Del(self); | |
199 | } |
|
277 | } | |
200 |
|
278 | |||
201 | PyDoc_STRVAR(ZstdCompressor_copy_stream__doc__, |
|
279 | PyDoc_STRVAR(ZstdCompressor_copy_stream__doc__, | |
202 | "copy_stream(ifh, ofh[, size=0, read_size=default, write_size=default])\n" |
|
280 | "copy_stream(ifh, ofh[, size=0, read_size=default, write_size=default])\n" | |
203 | "compress data between streams\n" |
|
281 | "compress data between streams\n" | |
204 | "\n" |
|
282 | "\n" | |
205 | "Data will be read from ``ifh``, compressed, and written to ``ofh``.\n" |
|
283 | "Data will be read from ``ifh``, compressed, and written to ``ofh``.\n" | |
206 | "``ifh`` must have a ``read(size)`` method. ``ofh`` must have a ``write(data)``\n" |
|
284 | "``ifh`` must have a ``read(size)`` method. ``ofh`` must have a ``write(data)``\n" | |
207 | "method.\n" |
|
285 | "method.\n" | |
208 | "\n" |
|
286 | "\n" | |
209 | "An optional ``size`` argument specifies the size of the source stream.\n" |
|
287 | "An optional ``size`` argument specifies the size of the source stream.\n" | |
210 | "If defined, compression parameters will be tuned based on the size.\n" |
|
288 | "If defined, compression parameters will be tuned based on the size.\n" | |
211 | "\n" |
|
289 | "\n" | |
212 | "Optional arguments ``read_size`` and ``write_size`` define the chunk sizes\n" |
|
290 | "Optional arguments ``read_size`` and ``write_size`` define the chunk sizes\n" | |
213 | "of ``read()`` and ``write()`` operations, respectively. By default, they use\n" |
|
291 | "of ``read()`` and ``write()`` operations, respectively. By default, they use\n" | |
214 | "the default compression stream input and output sizes, respectively.\n" |
|
292 | "the default compression stream input and output sizes, respectively.\n" | |
215 | ); |
|
293 | ); | |
216 |
|
294 | |||
217 | static PyObject* ZstdCompressor_copy_stream(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
|
295 | static PyObject* ZstdCompressor_copy_stream(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { | |
218 | static char* kwlist[] = { |
|
296 | static char* kwlist[] = { | |
219 | "ifh", |
|
297 | "ifh", | |
220 | "ofh", |
|
298 | "ofh", | |
221 | "size", |
|
299 | "size", | |
222 | "read_size", |
|
300 | "read_size", | |
223 | "write_size", |
|
301 | "write_size", | |
224 | NULL |
|
302 | NULL | |
225 | }; |
|
303 | }; | |
226 |
|
304 | |||
227 | PyObject* source; |
|
305 | PyObject* source; | |
228 | PyObject* dest; |
|
306 | PyObject* dest; | |
229 | Py_ssize_t sourceSize = 0; |
|
307 | Py_ssize_t sourceSize = 0; | |
230 | size_t inSize = ZSTD_CStreamInSize(); |
|
308 | size_t inSize = ZSTD_CStreamInSize(); | |
231 | size_t outSize = ZSTD_CStreamOutSize(); |
|
309 | size_t outSize = ZSTD_CStreamOutSize(); | |
232 | ZSTD_CStream* cstream; |
|
|||
233 | ZSTD_inBuffer input; |
|
310 | ZSTD_inBuffer input; | |
234 | ZSTD_outBuffer output; |
|
311 | ZSTD_outBuffer output; | |
235 | Py_ssize_t totalRead = 0; |
|
312 | Py_ssize_t totalRead = 0; | |
236 | Py_ssize_t totalWrite = 0; |
|
313 | Py_ssize_t totalWrite = 0; | |
237 | char* readBuffer; |
|
314 | char* readBuffer; | |
238 | Py_ssize_t readSize; |
|
315 | Py_ssize_t readSize; | |
239 | PyObject* readResult; |
|
316 | PyObject* readResult; | |
240 | PyObject* res = NULL; |
|
317 | PyObject* res = NULL; | |
241 | size_t zresult; |
|
318 | size_t zresult; | |
242 | PyObject* writeResult; |
|
319 | PyObject* writeResult; | |
243 | PyObject* totalReadPy; |
|
320 | PyObject* totalReadPy; | |
244 | PyObject* totalWritePy; |
|
321 | PyObject* totalWritePy; | |
245 |
|
322 | |||
246 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nkk:copy_stream", kwlist, |
|
323 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|nkk:copy_stream", kwlist, | |
247 | &source, &dest, &sourceSize, &inSize, &outSize)) { |
|
324 | &source, &dest, &sourceSize, &inSize, &outSize)) { | |
248 | return NULL; |
|
325 | return NULL; | |
249 | } |
|
326 | } | |
250 |
|
327 | |||
251 | if (!PyObject_HasAttrString(source, "read")) { |
|
328 | if (!PyObject_HasAttrString(source, "read")) { | |
252 | PyErr_SetString(PyExc_ValueError, "first argument must have a read() method"); |
|
329 | PyErr_SetString(PyExc_ValueError, "first argument must have a read() method"); | |
253 | return NULL; |
|
330 | return NULL; | |
254 | } |
|
331 | } | |
255 |
|
332 | |||
256 | if (!PyObject_HasAttrString(dest, "write")) { |
|
333 | if (!PyObject_HasAttrString(dest, "write")) { | |
257 | PyErr_SetString(PyExc_ValueError, "second argument must have a write() method"); |
|
334 | PyErr_SetString(PyExc_ValueError, "second argument must have a write() method"); | |
258 | return NULL; |
|
335 | return NULL; | |
259 | } |
|
336 | } | |
260 |
|
337 | |||
261 | /* Prevent free on uninitialized memory in finally. */ |
|
338 | /* Prevent free on uninitialized memory in finally. */ | |
262 | output.dst = NULL; |
|
339 | output.dst = NULL; | |
263 |
|
340 | |||
264 | cstream = CStream_from_ZstdCompressor(self, sourceSize); |
|
341 | if (self->mtcctx) { | |
265 | if (!cstream) { |
|
342 | if (init_mtcstream(self, sourceSize)) { | |
266 | res = NULL; |
|
343 | res = NULL; | |
267 | goto finally; |
|
344 | goto finally; | |
|
345 | } | |||
|
346 | } | |||
|
347 | else { | |||
|
348 | if (0 != init_cstream(self, sourceSize)) { | |||
|
349 | res = NULL; | |||
|
350 | goto finally; | |||
|
351 | } | |||
268 | } |
|
352 | } | |
269 |
|
353 | |||
270 | output.dst = PyMem_Malloc(outSize); |
|
354 | output.dst = PyMem_Malloc(outSize); | |
271 | if (!output.dst) { |
|
355 | if (!output.dst) { | |
272 | PyErr_NoMemory(); |
|
356 | PyErr_NoMemory(); | |
273 | res = NULL; |
|
357 | res = NULL; | |
274 | goto finally; |
|
358 | goto finally; | |
275 | } |
|
359 | } | |
276 | output.size = outSize; |
|
360 | output.size = outSize; | |
277 | output.pos = 0; |
|
361 | output.pos = 0; | |
278 |
|
362 | |||
279 | while (1) { |
|
363 | while (1) { | |
280 | /* Try to read from source stream. */ |
|
364 | /* Try to read from source stream. */ | |
281 | readResult = PyObject_CallMethod(source, "read", "n", inSize); |
|
365 | readResult = PyObject_CallMethod(source, "read", "n", inSize); | |
282 | if (!readResult) { |
|
366 | if (!readResult) { | |
283 | PyErr_SetString(ZstdError, "could not read() from source"); |
|
367 | PyErr_SetString(ZstdError, "could not read() from source"); | |
284 | goto finally; |
|
368 | goto finally; | |
285 | } |
|
369 | } | |
286 |
|
370 | |||
287 | PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize); |
|
371 | PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize); | |
288 |
|
372 | |||
289 | /* If no data was read, we're at EOF. */ |
|
373 | /* If no data was read, we're at EOF. */ | |
290 | if (0 == readSize) { |
|
374 | if (0 == readSize) { | |
291 | break; |
|
375 | break; | |
292 | } |
|
376 | } | |
293 |
|
377 | |||
294 | totalRead += readSize; |
|
378 | totalRead += readSize; | |
295 |
|
379 | |||
296 | /* Send data to compressor */ |
|
380 | /* Send data to compressor */ | |
297 | input.src = readBuffer; |
|
381 | input.src = readBuffer; | |
298 | input.size = readSize; |
|
382 | input.size = readSize; | |
299 | input.pos = 0; |
|
383 | input.pos = 0; | |
300 |
|
384 | |||
301 | while (input.pos < input.size) { |
|
385 | while (input.pos < input.size) { | |
302 | Py_BEGIN_ALLOW_THREADS |
|
386 | Py_BEGIN_ALLOW_THREADS | |
303 | zresult = ZSTD_compressStream(cstream, &output, &input); |
|
387 | if (self->mtcctx) { | |
|
388 | zresult = ZSTDMT_compressStream(self->mtcctx, &output, &input); | |||
|
389 | } | |||
|
390 | else { | |||
|
391 | zresult = ZSTD_compressStream(self->cstream, &output, &input); | |||
|
392 | } | |||
304 | Py_END_ALLOW_THREADS |
|
393 | Py_END_ALLOW_THREADS | |
305 |
|
394 | |||
306 | if (ZSTD_isError(zresult)) { |
|
395 | if (ZSTD_isError(zresult)) { | |
307 | res = NULL; |
|
396 | res = NULL; | |
308 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); |
|
397 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); | |
309 | goto finally; |
|
398 | goto finally; | |
310 | } |
|
399 | } | |
311 |
|
400 | |||
312 | if (output.pos) { |
|
401 | if (output.pos) { | |
313 | #if PY_MAJOR_VERSION >= 3 |
|
402 | #if PY_MAJOR_VERSION >= 3 | |
314 | writeResult = PyObject_CallMethod(dest, "write", "y#", |
|
403 | writeResult = PyObject_CallMethod(dest, "write", "y#", | |
315 | #else |
|
404 | #else | |
316 | writeResult = PyObject_CallMethod(dest, "write", "s#", |
|
405 | writeResult = PyObject_CallMethod(dest, "write", "s#", | |
317 | #endif |
|
406 | #endif | |
318 | output.dst, output.pos); |
|
407 | output.dst, output.pos); | |
319 | Py_XDECREF(writeResult); |
|
408 | Py_XDECREF(writeResult); | |
320 | totalWrite += output.pos; |
|
409 | totalWrite += output.pos; | |
321 | output.pos = 0; |
|
410 | output.pos = 0; | |
322 | } |
|
411 | } | |
323 | } |
|
412 | } | |
324 | } |
|
413 | } | |
325 |
|
414 | |||
326 | /* We've finished reading. Now flush the compressor stream. */ |
|
415 | /* We've finished reading. Now flush the compressor stream. */ | |
327 | while (1) { |
|
416 | while (1) { | |
328 | zresult = ZSTD_endStream(cstream, &output); |
|
417 | if (self->mtcctx) { | |
|
418 | zresult = ZSTDMT_endStream(self->mtcctx, &output); | |||
|
419 | } | |||
|
420 | else { | |||
|
421 | zresult = ZSTD_endStream(self->cstream, &output); | |||
|
422 | } | |||
329 | if (ZSTD_isError(zresult)) { |
|
423 | if (ZSTD_isError(zresult)) { | |
330 | PyErr_Format(ZstdError, "error ending compression stream: %s", |
|
424 | PyErr_Format(ZstdError, "error ending compression stream: %s", | |
331 | ZSTD_getErrorName(zresult)); |
|
425 | ZSTD_getErrorName(zresult)); | |
332 | res = NULL; |
|
426 | res = NULL; | |
333 | goto finally; |
|
427 | goto finally; | |
334 | } |
|
428 | } | |
335 |
|
429 | |||
336 | if (output.pos) { |
|
430 | if (output.pos) { | |
337 | #if PY_MAJOR_VERSION >= 3 |
|
431 | #if PY_MAJOR_VERSION >= 3 | |
338 | writeResult = PyObject_CallMethod(dest, "write", "y#", |
|
432 | writeResult = PyObject_CallMethod(dest, "write", "y#", | |
339 | #else |
|
433 | #else | |
340 | writeResult = PyObject_CallMethod(dest, "write", "s#", |
|
434 | writeResult = PyObject_CallMethod(dest, "write", "s#", | |
341 | #endif |
|
435 | #endif | |
342 | output.dst, output.pos); |
|
436 | output.dst, output.pos); | |
343 | totalWrite += output.pos; |
|
437 | totalWrite += output.pos; | |
344 | Py_XDECREF(writeResult); |
|
438 | Py_XDECREF(writeResult); | |
345 | output.pos = 0; |
|
439 | output.pos = 0; | |
346 | } |
|
440 | } | |
347 |
|
441 | |||
348 | if (!zresult) { |
|
442 | if (!zresult) { | |
349 | break; |
|
443 | break; | |
350 | } |
|
444 | } | |
351 | } |
|
445 | } | |
352 |
|
446 | |||
353 | ZSTD_freeCStream(cstream); |
|
|||
354 | cstream = NULL; |
|
|||
355 |
|
||||
356 | totalReadPy = PyLong_FromSsize_t(totalRead); |
|
447 | totalReadPy = PyLong_FromSsize_t(totalRead); | |
357 | totalWritePy = PyLong_FromSsize_t(totalWrite); |
|
448 | totalWritePy = PyLong_FromSsize_t(totalWrite); | |
358 | res = PyTuple_Pack(2, totalReadPy, totalWritePy); |
|
449 | res = PyTuple_Pack(2, totalReadPy, totalWritePy); | |
359 |
Py_D |
|
450 | Py_DECREF(totalReadPy); | |
360 |
Py_D |
|
451 | Py_DECREF(totalWritePy); | |
361 |
|
452 | |||
362 | finally: |
|
453 | finally: | |
363 | if (output.dst) { |
|
454 | if (output.dst) { | |
364 | PyMem_Free(output.dst); |
|
455 | PyMem_Free(output.dst); | |
365 | } |
|
456 | } | |
366 |
|
457 | |||
367 | if (cstream) { |
|
|||
368 | ZSTD_freeCStream(cstream); |
|
|||
369 | } |
|
|||
370 |
|
||||
371 | return res; |
|
458 | return res; | |
372 | } |
|
459 | } | |
373 |
|
460 | |||
374 | PyDoc_STRVAR(ZstdCompressor_compress__doc__, |
|
461 | PyDoc_STRVAR(ZstdCompressor_compress__doc__, | |
375 | "compress(data, allow_empty=False)\n" |
|
462 | "compress(data, allow_empty=False)\n" | |
376 | "\n" |
|
463 | "\n" | |
377 | "Compress data in a single operation.\n" |
|
464 | "Compress data in a single operation.\n" | |
378 | "\n" |
|
465 | "\n" | |
379 | "This is the simplest mechanism to perform compression: simply pass in a\n" |
|
466 | "This is the simplest mechanism to perform compression: simply pass in a\n" | |
380 | "value and get a compressed value back. It is almost the most prone to abuse.\n" |
|
467 | "value and get a compressed value back. It is almost the most prone to abuse.\n" | |
381 | "The input and output values must fit in memory, so passing in very large\n" |
|
468 | "The input and output values must fit in memory, so passing in very large\n" | |
382 | "values can result in excessive memory usage. For this reason, one of the\n" |
|
469 | "values can result in excessive memory usage. For this reason, one of the\n" | |
383 | "streaming based APIs is preferred for larger values.\n" |
|
470 | "streaming based APIs is preferred for larger values.\n" | |
384 | ); |
|
471 | ); | |
385 |
|
472 | |||
386 | static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
|
473 | static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { | |
387 | static char* kwlist[] = { |
|
474 | static char* kwlist[] = { | |
388 | "data", |
|
475 | "data", | |
389 | "allow_empty", |
|
476 | "allow_empty", | |
390 | NULL |
|
477 | NULL | |
391 | }; |
|
478 | }; | |
392 |
|
479 | |||
393 | const char* source; |
|
480 | const char* source; | |
394 | Py_ssize_t sourceSize; |
|
481 | Py_ssize_t sourceSize; | |
395 | PyObject* allowEmpty = NULL; |
|
482 | PyObject* allowEmpty = NULL; | |
396 | size_t destSize; |
|
483 | size_t destSize; | |
397 | PyObject* output; |
|
484 | PyObject* output; | |
398 | char* dest; |
|
485 | char* dest; | |
399 | void* dictData = NULL; |
|
486 | void* dictData = NULL; | |
400 | size_t dictSize = 0; |
|
487 | size_t dictSize = 0; | |
401 | size_t zresult; |
|
488 | size_t zresult; | |
402 | ZSTD_parameters zparams; |
|
489 | ZSTD_parameters zparams; | |
403 |
|
490 | |||
404 | #if PY_MAJOR_VERSION >= 3 |
|
491 | #if PY_MAJOR_VERSION >= 3 | |
405 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|O:compress", |
|
492 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|O:compress", | |
406 | #else |
|
493 | #else | |
407 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|O:compress", |
|
494 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|O:compress", | |
408 | #endif |
|
495 | #endif | |
409 | kwlist, &source, &sourceSize, &allowEmpty)) { |
|
496 | kwlist, &source, &sourceSize, &allowEmpty)) { | |
410 | return NULL; |
|
497 | return NULL; | |
411 | } |
|
498 | } | |
412 |
|
499 | |||
|
500 | if (self->threads && self->dict) { | |||
|
501 | PyErr_SetString(ZstdError, | |||
|
502 | "compress() cannot be used with both dictionaries and multi-threaded compression"); | |||
|
503 | return NULL; | |||
|
504 | } | |||
|
505 | ||||
|
506 | if (self->threads && self->cparams) { | |||
|
507 | PyErr_SetString(ZstdError, | |||
|
508 | "compress() cannot be used with both compression parameters and multi-threaded compression"); | |||
|
509 | return NULL; | |||
|
510 | } | |||
|
511 | ||||
413 | /* Limitation in zstd C API doesn't let decompression side distinguish |
|
512 | /* Limitation in zstd C API doesn't let decompression side distinguish | |
414 | between content size of 0 and unknown content size. This can make round |
|
513 | between content size of 0 and unknown content size. This can make round | |
415 | tripping via Python difficult. Until this is fixed, require a flag |
|
514 | tripping via Python difficult. Until this is fixed, require a flag | |
416 | to fire the footgun. |
|
515 | to fire the footgun. | |
417 | https://github.com/indygreg/python-zstandard/issues/11 */ |
|
516 | https://github.com/indygreg/python-zstandard/issues/11 */ | |
418 | if (0 == sourceSize && self->fparams.contentSizeFlag |
|
517 | if (0 == sourceSize && self->fparams.contentSizeFlag | |
419 | && (!allowEmpty || PyObject_Not(allowEmpty))) { |
|
518 | && (!allowEmpty || PyObject_Not(allowEmpty))) { | |
420 | PyErr_SetString(PyExc_ValueError, "cannot write empty inputs when writing content sizes"); |
|
519 | PyErr_SetString(PyExc_ValueError, "cannot write empty inputs when writing content sizes"); | |
421 | return NULL; |
|
520 | return NULL; | |
422 | } |
|
521 | } | |
423 |
|
522 | |||
424 | destSize = ZSTD_compressBound(sourceSize); |
|
523 | destSize = ZSTD_compressBound(sourceSize); | |
425 | output = PyBytes_FromStringAndSize(NULL, destSize); |
|
524 | output = PyBytes_FromStringAndSize(NULL, destSize); | |
426 | if (!output) { |
|
525 | if (!output) { | |
427 | return NULL; |
|
526 | return NULL; | |
428 | } |
|
527 | } | |
429 |
|
528 | |||
430 | dest = PyBytes_AsString(output); |
|
529 | dest = PyBytes_AsString(output); | |
431 |
|
530 | |||
432 | if (self->dict) { |
|
531 | if (self->dict) { | |
433 | dictData = self->dict->dictData; |
|
532 | dictData = self->dict->dictData; | |
434 | dictSize = self->dict->dictSize; |
|
533 | dictSize = self->dict->dictSize; | |
435 | } |
|
534 | } | |
436 |
|
535 | |||
437 | memset(&zparams, 0, sizeof(zparams)); |
|
536 | memset(&zparams, 0, sizeof(zparams)); | |
438 | if (!self->cparams) { |
|
537 | if (!self->cparams) { | |
439 | zparams.cParams = ZSTD_getCParams(self->compressionLevel, sourceSize, dictSize); |
|
538 | zparams.cParams = ZSTD_getCParams(self->compressionLevel, sourceSize, dictSize); | |
440 | } |
|
539 | } | |
441 | else { |
|
540 | else { | |
442 | ztopy_compression_parameters(self->cparams, &zparams.cParams); |
|
541 | ztopy_compression_parameters(self->cparams, &zparams.cParams); | |
443 | /* Do NOT call ZSTD_adjustCParams() here because the compression params |
|
542 | /* Do NOT call ZSTD_adjustCParams() here because the compression params | |
444 | come from the user. */ |
|
543 | come from the user. */ | |
445 | } |
|
544 | } | |
446 |
|
545 | |||
447 | zparams.fParams = self->fparams; |
|
546 | zparams.fParams = self->fparams; | |
448 |
|
547 | |||
449 | /* The raw dict data has to be processed before it can be used. Since this |
|
548 | /* The raw dict data has to be processed before it can be used. Since this | |
450 | adds overhead - especially if multiple dictionary compression operations |
|
549 | adds overhead - especially if multiple dictionary compression operations | |
451 | are performed on the same ZstdCompressor instance - we create a |
|
550 | are performed on the same ZstdCompressor instance - we create a | |
452 | ZSTD_CDict once and reuse it for all operations. |
|
551 | ZSTD_CDict once and reuse it for all operations. | |
453 |
|
552 | |||
454 | Note: the compression parameters used for the first invocation (possibly |
|
553 | Note: the compression parameters used for the first invocation (possibly | |
455 | derived from the source size) will be reused on all subsequent invocations. |
|
554 | derived from the source size) will be reused on all subsequent invocations. | |
456 | https://github.com/facebook/zstd/issues/358 contains more info. We could |
|
555 | https://github.com/facebook/zstd/issues/358 contains more info. We could | |
457 | potentially add an argument somewhere to control this behavior. |
|
556 | potentially add an argument somewhere to control this behavior. | |
458 | */ |
|
557 | */ | |
459 | if (dictData && !self->cdict) { |
|
558 | if (0 != populate_cdict(self, &zparams)) { | |
460 | if (populate_cdict(self, dictData, dictSize, &zparams)) { |
|
559 | Py_DECREF(output); | |
461 | Py_DECREF(output); |
|
560 | return NULL; | |
462 | return NULL; |
|
|||
463 | } |
|
|||
464 | } |
|
561 | } | |
465 |
|
562 | |||
466 | Py_BEGIN_ALLOW_THREADS |
|
563 | Py_BEGIN_ALLOW_THREADS | |
467 | /* By avoiding ZSTD_compress(), we don't necessarily write out content |
|
564 | if (self->mtcctx) { | |
468 | size. This means the argument to ZstdCompressor to control frame |
|
565 | zresult = ZSTDMT_compressCCtx(self->mtcctx, dest, destSize, | |
469 | parameters is honored. */ |
|
566 | source, sourceSize, self->compressionLevel); | |
470 | if (self->cdict) { |
|
|||
471 | zresult = ZSTD_compress_usingCDict(self->cctx, dest, destSize, |
|
|||
472 | source, sourceSize, self->cdict); |
|
|||
473 | } |
|
567 | } | |
474 | else { |
|
568 | else { | |
475 | zresult = ZSTD_compress_advanced(self->cctx, dest, destSize, |
|
569 | /* By avoiding ZSTD_compress(), we don't necessarily write out content | |
476 | source, sourceSize, dictData, dictSize, zparams); |
|
570 | size. This means the argument to ZstdCompressor to control frame | |
|
571 | parameters is honored. */ | |||
|
572 | if (self->cdict) { | |||
|
573 | zresult = ZSTD_compress_usingCDict(self->cctx, dest, destSize, | |||
|
574 | source, sourceSize, self->cdict); | |||
|
575 | } | |||
|
576 | else { | |||
|
577 | zresult = ZSTD_compress_advanced(self->cctx, dest, destSize, | |||
|
578 | source, sourceSize, dictData, dictSize, zparams); | |||
|
579 | } | |||
477 | } |
|
580 | } | |
478 | Py_END_ALLOW_THREADS |
|
581 | Py_END_ALLOW_THREADS | |
479 |
|
582 | |||
480 | if (ZSTD_isError(zresult)) { |
|
583 | if (ZSTD_isError(zresult)) { | |
481 | PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult)); |
|
584 | PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult)); | |
482 | Py_CLEAR(output); |
|
585 | Py_CLEAR(output); | |
483 | return NULL; |
|
586 | return NULL; | |
484 | } |
|
587 | } | |
485 | else { |
|
588 | else { | |
486 | Py_SIZE(output) = zresult; |
|
589 | Py_SIZE(output) = zresult; | |
487 | } |
|
590 | } | |
488 |
|
591 | |||
489 | return output; |
|
592 | return output; | |
490 | } |
|
593 | } | |
491 |
|
594 | |||
492 | PyDoc_STRVAR(ZstdCompressionObj__doc__, |
|
595 | PyDoc_STRVAR(ZstdCompressionObj__doc__, | |
493 | "compressobj()\n" |
|
596 | "compressobj()\n" | |
494 | "\n" |
|
597 | "\n" | |
495 | "Return an object exposing ``compress(data)`` and ``flush()`` methods.\n" |
|
598 | "Return an object exposing ``compress(data)`` and ``flush()`` methods.\n" | |
496 | "\n" |
|
599 | "\n" | |
497 | "The returned object exposes an API similar to ``zlib.compressobj`` and\n" |
|
600 | "The returned object exposes an API similar to ``zlib.compressobj`` and\n" | |
498 | "``bz2.BZ2Compressor`` so that callers can swap in the zstd compressor\n" |
|
601 | "``bz2.BZ2Compressor`` so that callers can swap in the zstd compressor\n" | |
499 | "without changing how compression is performed.\n" |
|
602 | "without changing how compression is performed.\n" | |
500 | ); |
|
603 | ); | |
501 |
|
604 | |||
502 | static ZstdCompressionObj* ZstdCompressor_compressobj(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
|
605 | static ZstdCompressionObj* ZstdCompressor_compressobj(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { | |
503 | static char* kwlist[] = { |
|
606 | static char* kwlist[] = { | |
504 | "size", |
|
607 | "size", | |
505 | NULL |
|
608 | NULL | |
506 | }; |
|
609 | }; | |
507 |
|
610 | |||
508 | Py_ssize_t inSize = 0; |
|
611 | Py_ssize_t inSize = 0; | |
509 | size_t outSize = ZSTD_CStreamOutSize(); |
|
612 | size_t outSize = ZSTD_CStreamOutSize(); | |
510 | ZstdCompressionObj* result = PyObject_New(ZstdCompressionObj, &ZstdCompressionObjType); |
|
613 | ZstdCompressionObj* result = NULL; | |
511 | if (!result) { |
|
|||
512 | return NULL; |
|
|||
513 | } |
|
|||
514 |
|
614 | |||
515 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n:compressobj", kwlist, &inSize)) { |
|
615 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n:compressobj", kwlist, &inSize)) { | |
516 | return NULL; |
|
616 | return NULL; | |
517 | } |
|
617 | } | |
518 |
|
618 | |||
519 | result->cstream = CStream_from_ZstdCompressor(self, inSize); |
|
619 | result = (ZstdCompressionObj*)PyObject_CallObject((PyObject*)&ZstdCompressionObjType, NULL); | |
520 |
if (!result |
|
620 | if (!result) { | |
521 | Py_DECREF(result); |
|
|||
522 | return NULL; |
|
621 | return NULL; | |
523 | } |
|
622 | } | |
524 |
|
623 | |||
|
624 | if (self->mtcctx) { | |||
|
625 | if (init_mtcstream(self, inSize)) { | |||
|
626 | Py_DECREF(result); | |||
|
627 | return NULL; | |||
|
628 | } | |||
|
629 | } | |||
|
630 | else { | |||
|
631 | if (0 != init_cstream(self, inSize)) { | |||
|
632 | Py_DECREF(result); | |||
|
633 | return NULL; | |||
|
634 | } | |||
|
635 | } | |||
|
636 | ||||
525 | result->output.dst = PyMem_Malloc(outSize); |
|
637 | result->output.dst = PyMem_Malloc(outSize); | |
526 | if (!result->output.dst) { |
|
638 | if (!result->output.dst) { | |
527 | PyErr_NoMemory(); |
|
639 | PyErr_NoMemory(); | |
528 | Py_DECREF(result); |
|
640 | Py_DECREF(result); | |
529 | return NULL; |
|
641 | return NULL; | |
530 | } |
|
642 | } | |
531 | result->output.size = outSize; |
|
643 | result->output.size = outSize; | |
532 | result->output.pos = 0; |
|
|||
533 |
|
||||
534 | result->compressor = self; |
|
644 | result->compressor = self; | |
535 | Py_INCREF(result->compressor); |
|
645 | Py_INCREF(result->compressor); | |
536 |
|
646 | |||
537 | result->finished = 0; |
|
|||
538 |
|
||||
539 | return result; |
|
647 | return result; | |
540 | } |
|
648 | } | |
541 |
|
649 | |||
542 | PyDoc_STRVAR(ZstdCompressor_read_from__doc__, |
|
650 | PyDoc_STRVAR(ZstdCompressor_read_from__doc__, | |
543 | "read_from(reader, [size=0, read_size=default, write_size=default])\n" |
|
651 | "read_from(reader, [size=0, read_size=default, write_size=default])\n" | |
544 | "Read uncompress data from a reader and return an iterator\n" |
|
652 | "Read uncompress data from a reader and return an iterator\n" | |
545 | "\n" |
|
653 | "\n" | |
546 | "Returns an iterator of compressed data produced from reading from ``reader``.\n" |
|
654 | "Returns an iterator of compressed data produced from reading from ``reader``.\n" | |
547 | "\n" |
|
655 | "\n" | |
548 | "Uncompressed data will be obtained from ``reader`` by calling the\n" |
|
656 | "Uncompressed data will be obtained from ``reader`` by calling the\n" | |
549 | "``read(size)`` method of it. The source data will be streamed into a\n" |
|
657 | "``read(size)`` method of it. The source data will be streamed into a\n" | |
550 | "compressor. As compressed data is available, it will be exposed to the\n" |
|
658 | "compressor. As compressed data is available, it will be exposed to the\n" | |
551 | "iterator.\n" |
|
659 | "iterator.\n" | |
552 | "\n" |
|
660 | "\n" | |
553 | "Data is read from the source in chunks of ``read_size``. Compressed chunks\n" |
|
661 | "Data is read from the source in chunks of ``read_size``. Compressed chunks\n" | |
554 | "are at most ``write_size`` bytes. Both values default to the zstd input and\n" |
|
662 | "are at most ``write_size`` bytes. Both values default to the zstd input and\n" | |
555 | "and output defaults, respectively.\n" |
|
663 | "and output defaults, respectively.\n" | |
556 | "\n" |
|
664 | "\n" | |
557 | "The caller is partially in control of how fast data is fed into the\n" |
|
665 | "The caller is partially in control of how fast data is fed into the\n" | |
558 | "compressor by how it consumes the returned iterator. The compressor will\n" |
|
666 | "compressor by how it consumes the returned iterator. The compressor will\n" | |
559 | "not consume from the reader unless the caller consumes from the iterator.\n" |
|
667 | "not consume from the reader unless the caller consumes from the iterator.\n" | |
560 | ); |
|
668 | ); | |
561 |
|
669 | |||
562 | static ZstdCompressorIterator* ZstdCompressor_read_from(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
|
670 | static ZstdCompressorIterator* ZstdCompressor_read_from(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { | |
563 | static char* kwlist[] = { |
|
671 | static char* kwlist[] = { | |
564 | "reader", |
|
672 | "reader", | |
565 | "size", |
|
673 | "size", | |
566 | "read_size", |
|
674 | "read_size", | |
567 | "write_size", |
|
675 | "write_size", | |
568 | NULL |
|
676 | NULL | |
569 | }; |
|
677 | }; | |
570 |
|
678 | |||
571 | PyObject* reader; |
|
679 | PyObject* reader; | |
572 | Py_ssize_t sourceSize = 0; |
|
680 | Py_ssize_t sourceSize = 0; | |
573 | size_t inSize = ZSTD_CStreamInSize(); |
|
681 | size_t inSize = ZSTD_CStreamInSize(); | |
574 | size_t outSize = ZSTD_CStreamOutSize(); |
|
682 | size_t outSize = ZSTD_CStreamOutSize(); | |
575 | ZstdCompressorIterator* result; |
|
683 | ZstdCompressorIterator* result; | |
576 |
|
684 | |||
577 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nkk:read_from", kwlist, |
|
685 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nkk:read_from", kwlist, | |
578 | &reader, &sourceSize, &inSize, &outSize)) { |
|
686 | &reader, &sourceSize, &inSize, &outSize)) { | |
579 | return NULL; |
|
687 | return NULL; | |
580 | } |
|
688 | } | |
581 |
|
689 | |||
582 |
result = |
|
690 | result = (ZstdCompressorIterator*)PyObject_CallObject((PyObject*)&ZstdCompressorIteratorType, NULL); | |
583 | if (!result) { |
|
691 | if (!result) { | |
584 | return NULL; |
|
692 | return NULL; | |
585 | } |
|
693 | } | |
586 |
|
||||
587 | result->compressor = NULL; |
|
|||
588 | result->reader = NULL; |
|
|||
589 | result->buffer = NULL; |
|
|||
590 | result->cstream = NULL; |
|
|||
591 | result->input.src = NULL; |
|
|||
592 | result->output.dst = NULL; |
|
|||
593 | result->readResult = NULL; |
|
|||
594 |
|
||||
595 | if (PyObject_HasAttrString(reader, "read")) { |
|
694 | if (PyObject_HasAttrString(reader, "read")) { | |
596 | result->reader = reader; |
|
695 | result->reader = reader; | |
597 | Py_INCREF(result->reader); |
|
696 | Py_INCREF(result->reader); | |
598 | } |
|
697 | } | |
599 | else if (1 == PyObject_CheckBuffer(reader)) { |
|
698 | else if (1 == PyObject_CheckBuffer(reader)) { | |
600 | result->buffer = PyMem_Malloc(sizeof(Py_buffer)); |
|
699 | result->buffer = PyMem_Malloc(sizeof(Py_buffer)); | |
601 | if (!result->buffer) { |
|
700 | if (!result->buffer) { | |
602 | goto except; |
|
701 | goto except; | |
603 | } |
|
702 | } | |
604 |
|
703 | |||
605 | memset(result->buffer, 0, sizeof(Py_buffer)); |
|
704 | memset(result->buffer, 0, sizeof(Py_buffer)); | |
606 |
|
705 | |||
607 | if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) { |
|
706 | if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) { | |
608 | goto except; |
|
707 | goto except; | |
609 | } |
|
708 | } | |
610 |
|
709 | |||
611 | result->bufferOffset = 0; |
|
|||
612 | sourceSize = result->buffer->len; |
|
710 | sourceSize = result->buffer->len; | |
613 | } |
|
711 | } | |
614 | else { |
|
712 | else { | |
615 | PyErr_SetString(PyExc_ValueError, |
|
713 | PyErr_SetString(PyExc_ValueError, | |
616 | "must pass an object with a read() method or conforms to buffer protocol"); |
|
714 | "must pass an object with a read() method or conforms to buffer protocol"); | |
617 | goto except; |
|
715 | goto except; | |
618 | } |
|
716 | } | |
619 |
|
717 | |||
620 | result->compressor = self; |
|
718 | result->compressor = self; | |
621 | Py_INCREF(result->compressor); |
|
719 | Py_INCREF(result->compressor); | |
622 |
|
720 | |||
623 | result->sourceSize = sourceSize; |
|
721 | result->sourceSize = sourceSize; | |
624 | result->cstream = CStream_from_ZstdCompressor(self, sourceSize); |
|
722 | ||
625 |
if ( |
|
723 | if (self->mtcctx) { | |
626 | goto except; |
|
724 | if (init_mtcstream(self, sourceSize)) { | |
|
725 | goto except; | |||
|
726 | } | |||
|
727 | } | |||
|
728 | else { | |||
|
729 | if (0 != init_cstream(self, sourceSize)) { | |||
|
730 | goto except; | |||
|
731 | } | |||
627 | } |
|
732 | } | |
628 |
|
733 | |||
629 | result->inSize = inSize; |
|
734 | result->inSize = inSize; | |
630 | result->outSize = outSize; |
|
735 | result->outSize = outSize; | |
631 |
|
736 | |||
632 | result->output.dst = PyMem_Malloc(outSize); |
|
737 | result->output.dst = PyMem_Malloc(outSize); | |
633 | if (!result->output.dst) { |
|
738 | if (!result->output.dst) { | |
634 | PyErr_NoMemory(); |
|
739 | PyErr_NoMemory(); | |
635 | goto except; |
|
740 | goto except; | |
636 | } |
|
741 | } | |
637 | result->output.size = outSize; |
|
742 | result->output.size = outSize; | |
638 | result->output.pos = 0; |
|
|||
639 |
|
||||
640 | result->input.src = NULL; |
|
|||
641 | result->input.size = 0; |
|
|||
642 | result->input.pos = 0; |
|
|||
643 |
|
||||
644 | result->finishedInput = 0; |
|
|||
645 | result->finishedOutput = 0; |
|
|||
646 |
|
743 | |||
647 | goto finally; |
|
744 | goto finally; | |
648 |
|
745 | |||
649 | except: |
|
746 | except: | |
650 | if (result->cstream) { |
|
747 | Py_XDECREF(result->compressor); | |
651 | ZSTD_freeCStream(result->cstream); |
|
748 | Py_XDECREF(result->reader); | |
652 | result->cstream = NULL; |
|
|||
653 | } |
|
|||
654 |
|
||||
655 | Py_DecRef((PyObject*)result->compressor); |
|
|||
656 | Py_DecRef(result->reader); |
|
|||
657 |
|
||||
658 | Py_DECREF(result); |
|
749 | Py_DECREF(result); | |
659 | result = NULL; |
|
750 | result = NULL; | |
660 |
|
751 | |||
661 | finally: |
|
752 | finally: | |
662 | return result; |
|
753 | return result; | |
663 | } |
|
754 | } | |
664 |
|
755 | |||
665 | PyDoc_STRVAR(ZstdCompressor_write_to___doc__, |
|
756 | PyDoc_STRVAR(ZstdCompressor_write_to___doc__, | |
666 | "Create a context manager to write compressed data to an object.\n" |
|
757 | "Create a context manager to write compressed data to an object.\n" | |
667 | "\n" |
|
758 | "\n" | |
668 | "The passed object must have a ``write()`` method.\n" |
|
759 | "The passed object must have a ``write()`` method.\n" | |
669 | "\n" |
|
760 | "\n" | |
670 | "The caller feeds input data to the object by calling ``compress(data)``.\n" |
|
761 | "The caller feeds input data to the object by calling ``compress(data)``.\n" | |
671 | "Compressed data is written to the argument given to this function.\n" |
|
762 | "Compressed data is written to the argument given to this function.\n" | |
672 | "\n" |
|
763 | "\n" | |
673 | "The function takes an optional ``size`` argument indicating the total size\n" |
|
764 | "The function takes an optional ``size`` argument indicating the total size\n" | |
674 | "of the eventual input. If specified, the size will influence compression\n" |
|
765 | "of the eventual input. If specified, the size will influence compression\n" | |
675 | "parameter tuning and could result in the size being written into the\n" |
|
766 | "parameter tuning and could result in the size being written into the\n" | |
676 | "header of the compressed data.\n" |
|
767 | "header of the compressed data.\n" | |
677 | "\n" |
|
768 | "\n" | |
678 | "An optional ``write_size`` argument is also accepted. It defines the maximum\n" |
|
769 | "An optional ``write_size`` argument is also accepted. It defines the maximum\n" | |
679 | "byte size of chunks fed to ``write()``. By default, it uses the zstd default\n" |
|
770 | "byte size of chunks fed to ``write()``. By default, it uses the zstd default\n" | |
680 | "for a compressor output stream.\n" |
|
771 | "for a compressor output stream.\n" | |
681 | ); |
|
772 | ); | |
682 |
|
773 | |||
683 | static ZstdCompressionWriter* ZstdCompressor_write_to(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { |
|
774 | static ZstdCompressionWriter* ZstdCompressor_write_to(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { | |
684 | static char* kwlist[] = { |
|
775 | static char* kwlist[] = { | |
685 | "writer", |
|
776 | "writer", | |
686 | "size", |
|
777 | "size", | |
687 | "write_size", |
|
778 | "write_size", | |
688 | NULL |
|
779 | NULL | |
689 | }; |
|
780 | }; | |
690 |
|
781 | |||
691 | PyObject* writer; |
|
782 | PyObject* writer; | |
692 | ZstdCompressionWriter* result; |
|
783 | ZstdCompressionWriter* result; | |
693 | Py_ssize_t sourceSize = 0; |
|
784 | Py_ssize_t sourceSize = 0; | |
694 | size_t outSize = ZSTD_CStreamOutSize(); |
|
785 | size_t outSize = ZSTD_CStreamOutSize(); | |
695 |
|
786 | |||
696 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nk:write_to", kwlist, |
|
787 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|nk:write_to", kwlist, | |
697 | &writer, &sourceSize, &outSize)) { |
|
788 | &writer, &sourceSize, &outSize)) { | |
698 | return NULL; |
|
789 | return NULL; | |
699 | } |
|
790 | } | |
700 |
|
791 | |||
701 | if (!PyObject_HasAttrString(writer, "write")) { |
|
792 | if (!PyObject_HasAttrString(writer, "write")) { | |
702 | PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method"); |
|
793 | PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method"); | |
703 | return NULL; |
|
794 | return NULL; | |
704 | } |
|
795 | } | |
705 |
|
796 | |||
706 |
result = |
|
797 | result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL); | |
707 | if (!result) { |
|
798 | if (!result) { | |
708 | return NULL; |
|
799 | return NULL; | |
709 | } |
|
800 | } | |
710 |
|
801 | |||
711 | result->compressor = self; |
|
802 | result->compressor = self; | |
712 | Py_INCREF(result->compressor); |
|
803 | Py_INCREF(result->compressor); | |
713 |
|
804 | |||
714 | result->writer = writer; |
|
805 | result->writer = writer; | |
715 | Py_INCREF(result->writer); |
|
806 | Py_INCREF(result->writer); | |
716 |
|
807 | |||
717 | result->sourceSize = sourceSize; |
|
808 | result->sourceSize = sourceSize; | |
718 |
|
||||
719 | result->outSize = outSize; |
|
809 | result->outSize = outSize; | |
720 |
|
810 | |||
721 | result->entered = 0; |
|
811 | return result; | |
722 | result->cstream = NULL; |
|
812 | } | |
|
813 | ||||
|
814 | typedef struct { | |||
|
815 | void* sourceData; | |||
|
816 | size_t sourceSize; | |||
|
817 | } DataSource; | |||
|
818 | ||||
|
819 | typedef struct { | |||
|
820 | DataSource* sources; | |||
|
821 | Py_ssize_t sourcesSize; | |||
|
822 | unsigned long long totalSourceSize; | |||
|
823 | } DataSources; | |||
|
824 | ||||
|
825 | typedef struct { | |||
|
826 | void* dest; | |||
|
827 | Py_ssize_t destSize; | |||
|
828 | BufferSegment* segments; | |||
|
829 | Py_ssize_t segmentsSize; | |||
|
830 | } DestBuffer; | |||
|
831 | ||||
|
832 | typedef enum { | |||
|
833 | WorkerError_none = 0, | |||
|
834 | WorkerError_zstd = 1, | |||
|
835 | WorkerError_no_memory = 2, | |||
|
836 | } WorkerError; | |||
|
837 | ||||
|
838 | /** | |||
|
839 | * Holds state for an individual worker performing multi_compress_to_buffer work. | |||
|
840 | */ | |||
|
841 | typedef struct { | |||
|
842 | /* Used for compression. */ | |||
|
843 | ZSTD_CCtx* cctx; | |||
|
844 | ZSTD_CDict* cdict; | |||
|
845 | int cLevel; | |||
|
846 | CompressionParametersObject* cParams; | |||
|
847 | ZSTD_frameParameters fParams; | |||
|
848 | ||||
|
849 | /* What to compress. */ | |||
|
850 | DataSource* sources; | |||
|
851 | Py_ssize_t sourcesSize; | |||
|
852 | Py_ssize_t startOffset; | |||
|
853 | Py_ssize_t endOffset; | |||
|
854 | unsigned long long totalSourceSize; | |||
|
855 | ||||
|
856 | /* Result storage. */ | |||
|
857 | DestBuffer* destBuffers; | |||
|
858 | Py_ssize_t destCount; | |||
|
859 | ||||
|
860 | /* Error tracking. */ | |||
|
861 | WorkerError error; | |||
|
862 | size_t zresult; | |||
|
863 | Py_ssize_t errorOffset; | |||
|
864 | } WorkerState; | |||
|
865 | ||||
|
866 | static void compress_worker(WorkerState* state) { | |||
|
867 | Py_ssize_t inputOffset = state->startOffset; | |||
|
868 | Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1; | |||
|
869 | Py_ssize_t currentBufferStartOffset = state->startOffset; | |||
|
870 | size_t zresult; | |||
|
871 | ZSTD_parameters zparams; | |||
|
872 | void* newDest; | |||
|
873 | size_t allocationSize; | |||
|
874 | size_t boundSize; | |||
|
875 | Py_ssize_t destOffset = 0; | |||
|
876 | DataSource* sources = state->sources; | |||
|
877 | DestBuffer* destBuffer; | |||
|
878 | ||||
|
879 | assert(!state->destBuffers); | |||
|
880 | assert(0 == state->destCount); | |||
|
881 | ||||
|
882 | if (state->cParams) { | |||
|
883 | ztopy_compression_parameters(state->cParams, &zparams.cParams); | |||
|
884 | } | |||
|
885 | ||||
|
886 | zparams.fParams = state->fParams; | |||
|
887 | ||||
|
888 | /* | |||
|
889 | * The total size of the compressed data is unknown until we actually | |||
|
890 | * compress data. That means we can't pre-allocate the exact size we need. | |||
|
891 | * | |||
|
892 | * There is a cost to every allocation and reallocation. So, it is in our | |||
|
893 | * interest to minimize the number of allocations. | |||
|
894 | * | |||
|
895 | * There is also a cost to too few allocations. If allocations are too | |||
|
896 | * large they may fail. If buffers are shared and all inputs become | |||
|
897 | * irrelevant at different lifetimes, then a reference to one segment | |||
|
898 | * in the buffer will keep the entire buffer alive. This leads to excessive | |||
|
899 | * memory usage. | |||
|
900 | * | |||
|
901 | * Our current strategy is to assume a compression ratio of 16:1 and | |||
|
902 | * allocate buffers of that size, rounded up to the nearest power of 2 | |||
|
903 | * (because computers like round numbers). That ratio is greater than what | |||
|
904 | * most inputs achieve. This is by design: we don't want to over-allocate. | |||
|
905 | * But we don't want to under-allocate and lead to too many buffers either. | |||
|
906 | */ | |||
|
907 | ||||
|
908 | state->destCount = 1; | |||
|
909 | ||||
|
910 | state->destBuffers = calloc(1, sizeof(DestBuffer)); | |||
|
911 | if (NULL == state->destBuffers) { | |||
|
912 | state->error = WorkerError_no_memory; | |||
|
913 | return; | |||
|
914 | } | |||
|
915 | ||||
|
916 | destBuffer = &state->destBuffers[state->destCount - 1]; | |||
|
917 | ||||
|
918 | /* | |||
|
919 | * Rather than track bounds and grow the segments buffer, allocate space | |||
|
920 | * to hold remaining items then truncate when we're done with it. | |||
|
921 | */ | |||
|
922 | destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment)); | |||
|
923 | if (NULL == destBuffer->segments) { | |||
|
924 | state->error = WorkerError_no_memory; | |||
|
925 | return; | |||
|
926 | } | |||
|
927 | ||||
|
928 | destBuffer->segmentsSize = remainingItems; | |||
|
929 | ||||
|
930 | allocationSize = roundpow2(state->totalSourceSize >> 4); | |||
|
931 | ||||
|
932 | /* If the maximum size of the output is larger than that, round up. */ | |||
|
933 | boundSize = ZSTD_compressBound(sources[inputOffset].sourceSize); | |||
|
934 | ||||
|
935 | if (boundSize > allocationSize) { | |||
|
936 | allocationSize = roundpow2(boundSize); | |||
|
937 | } | |||
|
938 | ||||
|
939 | destBuffer->dest = malloc(allocationSize); | |||
|
940 | if (NULL == destBuffer->dest) { | |||
|
941 | state->error = WorkerError_no_memory; | |||
|
942 | return; | |||
|
943 | } | |||
|
944 | ||||
|
945 | destBuffer->destSize = allocationSize; | |||
|
946 | ||||
|
947 | for (inputOffset = state->startOffset; inputOffset <= state->endOffset; inputOffset++) { | |||
|
948 | void* source = sources[inputOffset].sourceData; | |||
|
949 | size_t sourceSize = sources[inputOffset].sourceSize; | |||
|
950 | size_t destAvailable; | |||
|
951 | void* dest; | |||
|
952 | ||||
|
953 | destAvailable = destBuffer->destSize - destOffset; | |||
|
954 | boundSize = ZSTD_compressBound(sourceSize); | |||
|
955 | ||||
|
956 | /* | |||
|
957 | * Not enough space in current buffer to hold largest compressed output. | |||
|
958 | * So allocate and switch to a new output buffer. | |||
|
959 | */ | |||
|
960 | if (boundSize > destAvailable) { | |||
|
961 | /* | |||
|
962 | * The downsizing of the existing buffer is optional. It should be cheap | |||
|
963 | * (unlike growing). So we just do it. | |||
|
964 | */ | |||
|
965 | if (destAvailable) { | |||
|
966 | newDest = realloc(destBuffer->dest, destOffset); | |||
|
967 | if (NULL == newDest) { | |||
|
968 | state->error = WorkerError_no_memory; | |||
|
969 | return; | |||
|
970 | } | |||
|
971 | ||||
|
972 | destBuffer->dest = newDest; | |||
|
973 | destBuffer->destSize = destOffset; | |||
|
974 | } | |||
|
975 | ||||
|
976 | /* Truncate segments buffer. */ | |||
|
977 | newDest = realloc(destBuffer->segments, | |||
|
978 | (inputOffset - currentBufferStartOffset + 1) * sizeof(BufferSegment)); | |||
|
979 | if (NULL == newDest) { | |||
|
980 | state->error = WorkerError_no_memory; | |||
|
981 | return; | |||
|
982 | } | |||
|
983 | ||||
|
984 | destBuffer->segments = newDest; | |||
|
985 | destBuffer->segmentsSize = inputOffset - currentBufferStartOffset; | |||
|
986 | ||||
|
987 | /* Grow space for new struct. */ | |||
|
988 | /* TODO consider over-allocating so we don't do this every time. */ | |||
|
989 | newDest = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer)); | |||
|
990 | if (NULL == newDest) { | |||
|
991 | state->error = WorkerError_no_memory; | |||
|
992 | return; | |||
|
993 | } | |||
|
994 | ||||
|
995 | state->destBuffers = newDest; | |||
|
996 | state->destCount++; | |||
|
997 | ||||
|
998 | destBuffer = &state->destBuffers[state->destCount - 1]; | |||
|
999 | ||||
|
1000 | /* Don't take any chances with non-NULL pointers. */ | |||
|
1001 | memset(destBuffer, 0, sizeof(DestBuffer)); | |||
|
1002 | ||||
|
1003 | /** | |||
|
1004 | * We could dynamically update allocation size based on work done so far. | |||
|
1005 | * For now, keep is simple. | |||
|
1006 | */ | |||
|
1007 | allocationSize = roundpow2(state->totalSourceSize >> 4); | |||
|
1008 | ||||
|
1009 | if (boundSize > allocationSize) { | |||
|
1010 | allocationSize = roundpow2(boundSize); | |||
|
1011 | } | |||
|
1012 | ||||
|
1013 | destBuffer->dest = malloc(allocationSize); | |||
|
1014 | if (NULL == destBuffer->dest) { | |||
|
1015 | state->error = WorkerError_no_memory; | |||
|
1016 | return; | |||
|
1017 | } | |||
|
1018 | ||||
|
1019 | destBuffer->destSize = allocationSize; | |||
|
1020 | destAvailable = allocationSize; | |||
|
1021 | destOffset = 0; | |||
|
1022 | ||||
|
1023 | destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment)); | |||
|
1024 | if (NULL == destBuffer->segments) { | |||
|
1025 | state->error = WorkerError_no_memory; | |||
|
1026 | return; | |||
|
1027 | } | |||
|
1028 | ||||
|
1029 | destBuffer->segmentsSize = remainingItems; | |||
|
1030 | currentBufferStartOffset = inputOffset; | |||
|
1031 | } | |||
|
1032 | ||||
|
1033 | dest = (char*)destBuffer->dest + destOffset; | |||
|
1034 | ||||
|
1035 | if (state->cdict) { | |||
|
1036 | zresult = ZSTD_compress_usingCDict(state->cctx, dest, destAvailable, | |||
|
1037 | source, sourceSize, state->cdict); | |||
|
1038 | } | |||
|
1039 | else { | |||
|
1040 | if (!state->cParams) { | |||
|
1041 | zparams.cParams = ZSTD_getCParams(state->cLevel, sourceSize, 0); | |||
|
1042 | } | |||
|
1043 | ||||
|
1044 | zresult = ZSTD_compress_advanced(state->cctx, dest, destAvailable, | |||
|
1045 | source, sourceSize, NULL, 0, zparams); | |||
|
1046 | } | |||
|
1047 | ||||
|
1048 | if (ZSTD_isError(zresult)) { | |||
|
1049 | state->error = WorkerError_zstd; | |||
|
1050 | state->zresult = zresult; | |||
|
1051 | state->errorOffset = inputOffset; | |||
|
1052 | break; | |||
|
1053 | } | |||
|
1054 | ||||
|
1055 | destBuffer->segments[inputOffset - currentBufferStartOffset].offset = destOffset; | |||
|
1056 | destBuffer->segments[inputOffset - currentBufferStartOffset].length = zresult; | |||
|
1057 | ||||
|
1058 | destOffset += zresult; | |||
|
1059 | remainingItems--; | |||
|
1060 | } | |||
|
1061 | ||||
|
1062 | if (destBuffer->destSize > destOffset) { | |||
|
1063 | newDest = realloc(destBuffer->dest, destOffset); | |||
|
1064 | if (NULL == newDest) { | |||
|
1065 | state->error = WorkerError_no_memory; | |||
|
1066 | return; | |||
|
1067 | } | |||
|
1068 | ||||
|
1069 | destBuffer->dest = newDest; | |||
|
1070 | destBuffer->destSize = destOffset; | |||
|
1071 | } | |||
|
1072 | } | |||
|
1073 | ||||
|
1074 | ZstdBufferWithSegmentsCollection* compress_from_datasources(ZstdCompressor* compressor, | |||
|
1075 | DataSources* sources, unsigned int threadCount) { | |||
|
1076 | ZSTD_parameters zparams; | |||
|
1077 | unsigned long long bytesPerWorker; | |||
|
1078 | POOL_ctx* pool = NULL; | |||
|
1079 | WorkerState* workerStates = NULL; | |||
|
1080 | Py_ssize_t i; | |||
|
1081 | unsigned long long workerBytes = 0; | |||
|
1082 | Py_ssize_t workerStartOffset = 0; | |||
|
1083 | size_t currentThread = 0; | |||
|
1084 | int errored = 0; | |||
|
1085 | Py_ssize_t segmentsCount = 0; | |||
|
1086 | Py_ssize_t segmentIndex; | |||
|
1087 | PyObject* segmentsArg = NULL; | |||
|
1088 | ZstdBufferWithSegments* buffer; | |||
|
1089 | ZstdBufferWithSegmentsCollection* result = NULL; | |||
|
1090 | ||||
|
1091 | assert(sources->sourcesSize > 0); | |||
|
1092 | assert(sources->totalSourceSize > 0); | |||
|
1093 | assert(threadCount >= 1); | |||
|
1094 | ||||
|
1095 | /* More threads than inputs makes no sense. */ | |||
|
1096 | threadCount = sources->sourcesSize < threadCount ? (unsigned int)sources->sourcesSize | |||
|
1097 | : threadCount; | |||
|
1098 | ||||
|
1099 | /* TODO lower thread count when input size is too small and threads would add | |||
|
1100 | overhead. */ | |||
|
1101 | ||||
|
1102 | /* | |||
|
1103 | * When dictionaries are used, parameters are derived from the size of the | |||
|
1104 | * first element. | |||
|
1105 | * | |||
|
1106 | * TODO come up with a better mechanism. | |||
|
1107 | */ | |||
|
1108 | memset(&zparams, 0, sizeof(zparams)); | |||
|
1109 | if (compressor->cparams) { | |||
|
1110 | ztopy_compression_parameters(compressor->cparams, &zparams.cParams); | |||
|
1111 | } | |||
|
1112 | else { | |||
|
1113 | zparams.cParams = ZSTD_getCParams(compressor->compressionLevel, | |||
|
1114 | sources->sources[0].sourceSize, | |||
|
1115 | compressor->dict ? compressor->dict->dictSize : 0); | |||
|
1116 | } | |||
|
1117 | ||||
|
1118 | zparams.fParams = compressor->fparams; | |||
|
1119 | ||||
|
1120 | if (0 != populate_cdict(compressor, &zparams)) { | |||
|
1121 | return NULL; | |||
|
1122 | } | |||
|
1123 | ||||
|
1124 | workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState)); | |||
|
1125 | if (NULL == workerStates) { | |||
|
1126 | PyErr_NoMemory(); | |||
|
1127 | goto finally; | |||
|
1128 | } | |||
|
1129 | ||||
|
1130 | memset(workerStates, 0, threadCount * sizeof(WorkerState)); | |||
|
1131 | ||||
|
1132 | if (threadCount > 1) { | |||
|
1133 | pool = POOL_create(threadCount, 1); | |||
|
1134 | if (NULL == pool) { | |||
|
1135 | PyErr_SetString(ZstdError, "could not initialize zstd thread pool"); | |||
|
1136 | goto finally; | |||
|
1137 | } | |||
|
1138 | } | |||
|
1139 | ||||
|
1140 | bytesPerWorker = sources->totalSourceSize / threadCount; | |||
|
1141 | ||||
|
1142 | for (i = 0; i < threadCount; i++) { | |||
|
1143 | workerStates[i].cctx = ZSTD_createCCtx(); | |||
|
1144 | if (!workerStates[i].cctx) { | |||
|
1145 | PyErr_NoMemory(); | |||
|
1146 | goto finally; | |||
|
1147 | } | |||
|
1148 | ||||
|
1149 | workerStates[i].cdict = compressor->cdict; | |||
|
1150 | workerStates[i].cLevel = compressor->compressionLevel; | |||
|
1151 | workerStates[i].cParams = compressor->cparams; | |||
|
1152 | workerStates[i].fParams = compressor->fparams; | |||
|
1153 | ||||
|
1154 | workerStates[i].sources = sources->sources; | |||
|
1155 | workerStates[i].sourcesSize = sources->sourcesSize; | |||
|
1156 | } | |||
|
1157 | ||||
|
1158 | Py_BEGIN_ALLOW_THREADS | |||
|
1159 | for (i = 0; i < sources->sourcesSize; i++) { | |||
|
1160 | workerBytes += sources->sources[i].sourceSize; | |||
|
1161 | ||||
|
1162 | /* | |||
|
1163 | * The last worker/thread needs to handle all remaining work. Don't | |||
|
1164 | * trigger it prematurely. Defer to the block outside of the loop | |||
|
1165 | * to run the last worker/thread. But do still process this loop | |||
|
1166 | * so workerBytes is correct. | |||
|
1167 | */ | |||
|
1168 | if (currentThread == threadCount - 1) { | |||
|
1169 | continue; | |||
|
1170 | } | |||
|
1171 | ||||
|
1172 | if (workerBytes >= bytesPerWorker) { | |||
|
1173 | assert(currentThread < threadCount); | |||
|
1174 | workerStates[currentThread].totalSourceSize = workerBytes; | |||
|
1175 | workerStates[currentThread].startOffset = workerStartOffset; | |||
|
1176 | workerStates[currentThread].endOffset = i; | |||
|
1177 | ||||
|
1178 | if (threadCount > 1) { | |||
|
1179 | POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]); | |||
|
1180 | } | |||
|
1181 | else { | |||
|
1182 | compress_worker(&workerStates[currentThread]); | |||
|
1183 | } | |||
|
1184 | ||||
|
1185 | currentThread++; | |||
|
1186 | workerStartOffset = i + 1; | |||
|
1187 | workerBytes = 0; | |||
|
1188 | } | |||
|
1189 | } | |||
|
1190 | ||||
|
1191 | if (workerBytes) { | |||
|
1192 | assert(currentThread < threadCount); | |||
|
1193 | workerStates[currentThread].totalSourceSize = workerBytes; | |||
|
1194 | workerStates[currentThread].startOffset = workerStartOffset; | |||
|
1195 | workerStates[currentThread].endOffset = sources->sourcesSize - 1; | |||
|
1196 | ||||
|
1197 | if (threadCount > 1) { | |||
|
1198 | POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]); | |||
|
1199 | } | |||
|
1200 | else { | |||
|
1201 | compress_worker(&workerStates[currentThread]); | |||
|
1202 | } | |||
|
1203 | } | |||
|
1204 | ||||
|
1205 | if (threadCount > 1) { | |||
|
1206 | POOL_free(pool); | |||
|
1207 | pool = NULL; | |||
|
1208 | } | |||
|
1209 | ||||
|
1210 | Py_END_ALLOW_THREADS | |||
|
1211 | ||||
|
1212 | for (i = 0; i < threadCount; i++) { | |||
|
1213 | switch (workerStates[i].error) { | |||
|
1214 | case WorkerError_no_memory: | |||
|
1215 | PyErr_NoMemory(); | |||
|
1216 | errored = 1; | |||
|
1217 | break; | |||
|
1218 | ||||
|
1219 | case WorkerError_zstd: | |||
|
1220 | PyErr_Format(ZstdError, "error compressing item %zd: %s", | |||
|
1221 | workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult)); | |||
|
1222 | errored = 1; | |||
|
1223 | break; | |||
|
1224 | default: | |||
|
1225 | ; | |||
|
1226 | } | |||
|
1227 | ||||
|
1228 | if (errored) { | |||
|
1229 | break; | |||
|
1230 | } | |||
|
1231 | ||||
|
1232 | } | |||
|
1233 | ||||
|
1234 | if (errored) { | |||
|
1235 | goto finally; | |||
|
1236 | } | |||
|
1237 | ||||
|
1238 | segmentsCount = 0; | |||
|
1239 | for (i = 0; i < threadCount; i++) { | |||
|
1240 | WorkerState* state = &workerStates[i]; | |||
|
1241 | segmentsCount += state->destCount; | |||
|
1242 | } | |||
|
1243 | ||||
|
1244 | segmentsArg = PyTuple_New(segmentsCount); | |||
|
1245 | if (NULL == segmentsArg) { | |||
|
1246 | goto finally; | |||
|
1247 | } | |||
|
1248 | ||||
|
1249 | segmentIndex = 0; | |||
|
1250 | ||||
|
1251 | for (i = 0; i < threadCount; i++) { | |||
|
1252 | Py_ssize_t j; | |||
|
1253 | WorkerState* state = &workerStates[i]; | |||
|
1254 | ||||
|
1255 | for (j = 0; j < state->destCount; j++) { | |||
|
1256 | DestBuffer* destBuffer = &state->destBuffers[j]; | |||
|
1257 | buffer = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize, | |||
|
1258 | destBuffer->segments, destBuffer->segmentsSize); | |||
|
1259 | ||||
|
1260 | if (NULL == buffer) { | |||
|
1261 | goto finally; | |||
|
1262 | } | |||
|
1263 | ||||
|
1264 | /* Tell instance to use free() instsead of PyMem_Free(). */ | |||
|
1265 | buffer->useFree = 1; | |||
|
1266 | ||||
|
1267 | /* | |||
|
1268 | * BufferWithSegments_FromMemory takes ownership of the backing memory. | |||
|
1269 | * Unset it here so it doesn't get freed below. | |||
|
1270 | */ | |||
|
1271 | destBuffer->dest = NULL; | |||
|
1272 | destBuffer->segments = NULL; | |||
|
1273 | ||||
|
1274 | PyTuple_SET_ITEM(segmentsArg, segmentIndex++, (PyObject*)buffer); | |||
|
1275 | } | |||
|
1276 | } | |||
|
1277 | ||||
|
1278 | result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject( | |||
|
1279 | (PyObject*)&ZstdBufferWithSegmentsCollectionType, segmentsArg); | |||
|
1280 | ||||
|
1281 | finally: | |||
|
1282 | Py_CLEAR(segmentsArg); | |||
|
1283 | ||||
|
1284 | if (pool) { | |||
|
1285 | POOL_free(pool); | |||
|
1286 | } | |||
|
1287 | ||||
|
1288 | if (workerStates) { | |||
|
1289 | Py_ssize_t j; | |||
|
1290 | ||||
|
1291 | for (i = 0; i < threadCount; i++) { | |||
|
1292 | WorkerState state = workerStates[i]; | |||
|
1293 | ||||
|
1294 | if (state.cctx) { | |||
|
1295 | ZSTD_freeCCtx(state.cctx); | |||
|
1296 | } | |||
|
1297 | ||||
|
1298 | /* malloc() is used in worker thread. */ | |||
|
1299 | ||||
|
1300 | for (j = 0; j < state.destCount; j++) { | |||
|
1301 | if (state.destBuffers) { | |||
|
1302 | free(state.destBuffers[j].dest); | |||
|
1303 | free(state.destBuffers[j].segments); | |||
|
1304 | } | |||
|
1305 | } | |||
|
1306 | ||||
|
1307 | ||||
|
1308 | free(state.destBuffers); | |||
|
1309 | } | |||
|
1310 | ||||
|
1311 | PyMem_Free(workerStates); | |||
|
1312 | } | |||
|
1313 | ||||
|
1314 | return result; | |||
|
1315 | } | |||
|
1316 | ||||
|
1317 | PyDoc_STRVAR(ZstdCompressor_multi_compress_to_buffer__doc__, | |||
|
1318 | "Compress multiple pieces of data as a single operation\n" | |||
|
1319 | "\n" | |||
|
1320 | "Receives a ``BufferWithSegmentsCollection``, a ``BufferWithSegments``, or\n" | |||
|
1321 | "a list of bytes like objects holding data to compress.\n" | |||
|
1322 | "\n" | |||
|
1323 | "Returns a ``BufferWithSegmentsCollection`` holding compressed data.\n" | |||
|
1324 | "\n" | |||
|
1325 | "This function is optimized to perform multiple compression operations as\n" | |||
|
1326 | "as possible with as little overhead as possbile.\n" | |||
|
1327 | ); | |||
|
1328 | ||||
|
1329 | static ZstdBufferWithSegmentsCollection* ZstdCompressor_multi_compress_to_buffer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { | |||
|
1330 | static char* kwlist[] = { | |||
|
1331 | "data", | |||
|
1332 | "threads", | |||
|
1333 | NULL | |||
|
1334 | }; | |||
|
1335 | ||||
|
1336 | PyObject* data; | |||
|
1337 | int threads = 0; | |||
|
1338 | Py_buffer* dataBuffers = NULL; | |||
|
1339 | DataSources sources; | |||
|
1340 | Py_ssize_t i; | |||
|
1341 | Py_ssize_t sourceCount = 0; | |||
|
1342 | ZstdBufferWithSegmentsCollection* result = NULL; | |||
|
1343 | ||||
|
1344 | if (self->mtcctx) { | |||
|
1345 | PyErr_SetString(ZstdError, | |||
|
1346 | "function cannot be called on ZstdCompressor configured for multi-threaded compression"); | |||
|
1347 | return NULL; | |||
|
1348 | } | |||
|
1349 | ||||
|
1350 | memset(&sources, 0, sizeof(sources)); | |||
|
1351 | ||||
|
1352 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:multi_compress_to_buffer", kwlist, | |||
|
1353 | &data, &threads)) { | |||
|
1354 | return NULL; | |||
|
1355 | } | |||
|
1356 | ||||
|
1357 | if (threads < 0) { | |||
|
1358 | threads = cpu_count(); | |||
|
1359 | } | |||
|
1360 | ||||
|
1361 | if (threads < 2) { | |||
|
1362 | threads = 1; | |||
|
1363 | } | |||
|
1364 | ||||
|
1365 | if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsType)) { | |||
|
1366 | ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)data; | |||
|
1367 | ||||
|
1368 | sources.sources = PyMem_Malloc(buffer->segmentCount * sizeof(DataSource)); | |||
|
1369 | if (NULL == sources.sources) { | |||
|
1370 | PyErr_NoMemory(); | |||
|
1371 | goto finally; | |||
|
1372 | } | |||
|
1373 | ||||
|
1374 | for (i = 0; i < buffer->segmentCount; i++) { | |||
|
1375 | sources.sources[i].sourceData = (char*)buffer->data + buffer->segments[i].offset; | |||
|
1376 | sources.sources[i].sourceSize = buffer->segments[i].length; | |||
|
1377 | sources.totalSourceSize += buffer->segments[i].length; | |||
|
1378 | } | |||
|
1379 | ||||
|
1380 | sources.sourcesSize = buffer->segmentCount; | |||
|
1381 | } | |||
|
1382 | else if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsCollectionType)) { | |||
|
1383 | Py_ssize_t j; | |||
|
1384 | Py_ssize_t offset = 0; | |||
|
1385 | ZstdBufferWithSegments* buffer; | |||
|
1386 | ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)data; | |||
|
1387 | ||||
|
1388 | sourceCount = BufferWithSegmentsCollection_length(collection); | |||
|
1389 | ||||
|
1390 | sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource)); | |||
|
1391 | if (NULL == sources.sources) { | |||
|
1392 | PyErr_NoMemory(); | |||
|
1393 | goto finally; | |||
|
1394 | } | |||
|
1395 | ||||
|
1396 | for (i = 0; i < collection->bufferCount; i++) { | |||
|
1397 | buffer = collection->buffers[i]; | |||
|
1398 | ||||
|
1399 | for (j = 0; j < buffer->segmentCount; j++) { | |||
|
1400 | sources.sources[offset].sourceData = (char*)buffer->data + buffer->segments[j].offset; | |||
|
1401 | sources.sources[offset].sourceSize = buffer->segments[j].length; | |||
|
1402 | sources.totalSourceSize += buffer->segments[j].length; | |||
|
1403 | ||||
|
1404 | offset++; | |||
|
1405 | } | |||
|
1406 | } | |||
|
1407 | ||||
|
1408 | sources.sourcesSize = sourceCount; | |||
|
1409 | } | |||
|
1410 | else if (PyList_Check(data)) { | |||
|
1411 | sourceCount = PyList_GET_SIZE(data); | |||
|
1412 | ||||
|
1413 | sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource)); | |||
|
1414 | if (NULL == sources.sources) { | |||
|
1415 | PyErr_NoMemory(); | |||
|
1416 | goto finally; | |||
|
1417 | } | |||
|
1418 | ||||
|
1419 | /* | |||
|
1420 | * It isn't clear whether the address referred to by Py_buffer.buf | |||
|
1421 | * is still valid after PyBuffer_Release. We we hold a reference to all | |||
|
1422 | * Py_buffer instances for the duration of the operation. | |||
|
1423 | */ | |||
|
1424 | dataBuffers = PyMem_Malloc(sourceCount * sizeof(Py_buffer)); | |||
|
1425 | if (NULL == dataBuffers) { | |||
|
1426 | PyErr_NoMemory(); | |||
|
1427 | goto finally; | |||
|
1428 | } | |||
|
1429 | ||||
|
1430 | memset(dataBuffers, 0, sourceCount * sizeof(Py_buffer)); | |||
|
1431 | ||||
|
1432 | for (i = 0; i < sourceCount; i++) { | |||
|
1433 | if (0 != PyObject_GetBuffer(PyList_GET_ITEM(data, i), | |||
|
1434 | &dataBuffers[i], PyBUF_CONTIG_RO)) { | |||
|
1435 | PyErr_Clear(); | |||
|
1436 | PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i); | |||
|
1437 | goto finally; | |||
|
1438 | } | |||
|
1439 | ||||
|
1440 | sources.sources[i].sourceData = dataBuffers[i].buf; | |||
|
1441 | sources.sources[i].sourceSize = dataBuffers[i].len; | |||
|
1442 | sources.totalSourceSize += dataBuffers[i].len; | |||
|
1443 | } | |||
|
1444 | ||||
|
1445 | sources.sourcesSize = sourceCount; | |||
|
1446 | } | |||
|
1447 | else { | |||
|
1448 | PyErr_SetString(PyExc_TypeError, "argument must be list of BufferWithSegments"); | |||
|
1449 | goto finally; | |||
|
1450 | } | |||
|
1451 | ||||
|
1452 | if (0 == sources.sourcesSize) { | |||
|
1453 | PyErr_SetString(PyExc_ValueError, "no source elements found"); | |||
|
1454 | goto finally; | |||
|
1455 | } | |||
|
1456 | ||||
|
1457 | if (0 == sources.totalSourceSize) { | |||
|
1458 | PyErr_SetString(PyExc_ValueError, "source elements are empty"); | |||
|
1459 | goto finally; | |||
|
1460 | } | |||
|
1461 | ||||
|
1462 | result = compress_from_datasources(self, &sources, threads); | |||
|
1463 | ||||
|
1464 | finally: | |||
|
1465 | PyMem_Free(sources.sources); | |||
|
1466 | ||||
|
1467 | if (dataBuffers) { | |||
|
1468 | for (i = 0; i < sourceCount; i++) { | |||
|
1469 | PyBuffer_Release(&dataBuffers[i]); | |||
|
1470 | } | |||
|
1471 | ||||
|
1472 | PyMem_Free(dataBuffers); | |||
|
1473 | } | |||
723 |
|
1474 | |||
724 | return result; |
|
1475 | return result; | |
725 | } |
|
1476 | } | |
726 |
|
1477 | |||
727 | static PyMethodDef ZstdCompressor_methods[] = { |
|
1478 | static PyMethodDef ZstdCompressor_methods[] = { | |
728 | { "compress", (PyCFunction)ZstdCompressor_compress, |
|
1479 | { "compress", (PyCFunction)ZstdCompressor_compress, | |
729 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ }, |
|
1480 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ }, | |
730 | { "compressobj", (PyCFunction)ZstdCompressor_compressobj, |
|
1481 | { "compressobj", (PyCFunction)ZstdCompressor_compressobj, | |
731 | METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ }, |
|
1482 | METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ }, | |
732 | { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream, |
|
1483 | { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream, | |
733 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ }, |
|
1484 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ }, | |
734 | { "read_from", (PyCFunction)ZstdCompressor_read_from, |
|
1485 | { "read_from", (PyCFunction)ZstdCompressor_read_from, | |
735 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_from__doc__ }, |
|
1486 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_from__doc__ }, | |
736 | { "write_to", (PyCFunction)ZstdCompressor_write_to, |
|
1487 | { "write_to", (PyCFunction)ZstdCompressor_write_to, | |
737 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_write_to___doc__ }, |
|
1488 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_write_to___doc__ }, | |
|
1489 | { "multi_compress_to_buffer", (PyCFunction)ZstdCompressor_multi_compress_to_buffer, | |||
|
1490 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_multi_compress_to_buffer__doc__ }, | |||
738 | { NULL, NULL } |
|
1491 | { NULL, NULL } | |
739 | }; |
|
1492 | }; | |
740 |
|
1493 | |||
741 | PyTypeObject ZstdCompressorType = { |
|
1494 | PyTypeObject ZstdCompressorType = { | |
742 | PyVarObject_HEAD_INIT(NULL, 0) |
|
1495 | PyVarObject_HEAD_INIT(NULL, 0) | |
743 | "zstd.ZstdCompressor", /* tp_name */ |
|
1496 | "zstd.ZstdCompressor", /* tp_name */ | |
744 | sizeof(ZstdCompressor), /* tp_basicsize */ |
|
1497 | sizeof(ZstdCompressor), /* tp_basicsize */ | |
745 | 0, /* tp_itemsize */ |
|
1498 | 0, /* tp_itemsize */ | |
746 | (destructor)ZstdCompressor_dealloc, /* tp_dealloc */ |
|
1499 | (destructor)ZstdCompressor_dealloc, /* tp_dealloc */ | |
747 | 0, /* tp_print */ |
|
1500 | 0, /* tp_print */ | |
748 | 0, /* tp_getattr */ |
|
1501 | 0, /* tp_getattr */ | |
749 | 0, /* tp_setattr */ |
|
1502 | 0, /* tp_setattr */ | |
750 | 0, /* tp_compare */ |
|
1503 | 0, /* tp_compare */ | |
751 | 0, /* tp_repr */ |
|
1504 | 0, /* tp_repr */ | |
752 | 0, /* tp_as_number */ |
|
1505 | 0, /* tp_as_number */ | |
753 | 0, /* tp_as_sequence */ |
|
1506 | 0, /* tp_as_sequence */ | |
754 | 0, /* tp_as_mapping */ |
|
1507 | 0, /* tp_as_mapping */ | |
755 | 0, /* tp_hash */ |
|
1508 | 0, /* tp_hash */ | |
756 | 0, /* tp_call */ |
|
1509 | 0, /* tp_call */ | |
757 | 0, /* tp_str */ |
|
1510 | 0, /* tp_str */ | |
758 | 0, /* tp_getattro */ |
|
1511 | 0, /* tp_getattro */ | |
759 | 0, /* tp_setattro */ |
|
1512 | 0, /* tp_setattro */ | |
760 | 0, /* tp_as_buffer */ |
|
1513 | 0, /* tp_as_buffer */ | |
761 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
1514 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
762 | ZstdCompressor__doc__, /* tp_doc */ |
|
1515 | ZstdCompressor__doc__, /* tp_doc */ | |
763 | 0, /* tp_traverse */ |
|
1516 | 0, /* tp_traverse */ | |
764 | 0, /* tp_clear */ |
|
1517 | 0, /* tp_clear */ | |
765 | 0, /* tp_richcompare */ |
|
1518 | 0, /* tp_richcompare */ | |
766 | 0, /* tp_weaklistoffset */ |
|
1519 | 0, /* tp_weaklistoffset */ | |
767 | 0, /* tp_iter */ |
|
1520 | 0, /* tp_iter */ | |
768 | 0, /* tp_iternext */ |
|
1521 | 0, /* tp_iternext */ | |
769 | ZstdCompressor_methods, /* tp_methods */ |
|
1522 | ZstdCompressor_methods, /* tp_methods */ | |
770 | 0, /* tp_members */ |
|
1523 | 0, /* tp_members */ | |
771 | 0, /* tp_getset */ |
|
1524 | 0, /* tp_getset */ | |
772 | 0, /* tp_base */ |
|
1525 | 0, /* tp_base */ | |
773 | 0, /* tp_dict */ |
|
1526 | 0, /* tp_dict */ | |
774 | 0, /* tp_descr_get */ |
|
1527 | 0, /* tp_descr_get */ | |
775 | 0, /* tp_descr_set */ |
|
1528 | 0, /* tp_descr_set */ | |
776 | 0, /* tp_dictoffset */ |
|
1529 | 0, /* tp_dictoffset */ | |
777 | (initproc)ZstdCompressor_init, /* tp_init */ |
|
1530 | (initproc)ZstdCompressor_init, /* tp_init */ | |
778 | 0, /* tp_alloc */ |
|
1531 | 0, /* tp_alloc */ | |
779 | PyType_GenericNew, /* tp_new */ |
|
1532 | PyType_GenericNew, /* tp_new */ | |
780 | }; |
|
1533 | }; | |
781 |
|
1534 | |||
782 | void compressor_module_init(PyObject* mod) { |
|
1535 | void compressor_module_init(PyObject* mod) { | |
783 | Py_TYPE(&ZstdCompressorType) = &PyType_Type; |
|
1536 | Py_TYPE(&ZstdCompressorType) = &PyType_Type; | |
784 | if (PyType_Ready(&ZstdCompressorType) < 0) { |
|
1537 | if (PyType_Ready(&ZstdCompressorType) < 0) { | |
785 | return; |
|
1538 | return; | |
786 | } |
|
1539 | } | |
787 |
|
1540 | |||
788 | Py_INCREF((PyObject*)&ZstdCompressorType); |
|
1541 | Py_INCREF((PyObject*)&ZstdCompressorType); | |
789 | PyModule_AddObject(mod, "ZstdCompressor", |
|
1542 | PyModule_AddObject(mod, "ZstdCompressor", | |
790 | (PyObject*)&ZstdCompressorType); |
|
1543 | (PyObject*)&ZstdCompressorType); | |
791 | } |
|
1544 | } |
@@ -1,234 +1,247 | |||||
1 | /** |
|
1 | /** | |
2 | * Copyright (c) 2016-present, Gregory Szorc |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
3 | * All rights reserved. |
|
3 | * All rights reserved. | |
4 | * |
|
4 | * | |
5 | * This software may be modified and distributed under the terms |
|
5 | * This software may be modified and distributed under the terms | |
6 | * of the BSD license. See the LICENSE file for details. |
|
6 | * of the BSD license. See the LICENSE file for details. | |
7 | */ |
|
7 | */ | |
8 |
|
8 | |||
9 | #include "python-zstandard.h" |
|
9 | #include "python-zstandard.h" | |
10 |
|
10 | |||
11 | #define min(a, b) (((a) < (b)) ? (a) : (b)) |
|
11 | #define min(a, b) (((a) < (b)) ? (a) : (b)) | |
12 |
|
12 | |||
13 | extern PyObject* ZstdError; |
|
13 | extern PyObject* ZstdError; | |
14 |
|
14 | |||
15 | PyDoc_STRVAR(ZstdCompressorIterator__doc__, |
|
15 | PyDoc_STRVAR(ZstdCompressorIterator__doc__, | |
16 | "Represents an iterator of compressed data.\n" |
|
16 | "Represents an iterator of compressed data.\n" | |
17 | ); |
|
17 | ); | |
18 |
|
18 | |||
19 | static void ZstdCompressorIterator_dealloc(ZstdCompressorIterator* self) { |
|
19 | static void ZstdCompressorIterator_dealloc(ZstdCompressorIterator* self) { | |
20 | Py_XDECREF(self->readResult); |
|
20 | Py_XDECREF(self->readResult); | |
21 | Py_XDECREF(self->compressor); |
|
21 | Py_XDECREF(self->compressor); | |
22 | Py_XDECREF(self->reader); |
|
22 | Py_XDECREF(self->reader); | |
23 |
|
23 | |||
24 | if (self->buffer) { |
|
24 | if (self->buffer) { | |
25 | PyBuffer_Release(self->buffer); |
|
25 | PyBuffer_Release(self->buffer); | |
26 | PyMem_FREE(self->buffer); |
|
26 | PyMem_FREE(self->buffer); | |
27 | self->buffer = NULL; |
|
27 | self->buffer = NULL; | |
28 | } |
|
28 | } | |
29 |
|
29 | |||
30 | if (self->cstream) { |
|
|||
31 | ZSTD_freeCStream(self->cstream); |
|
|||
32 | self->cstream = NULL; |
|
|||
33 | } |
|
|||
34 |
|
||||
35 | if (self->output.dst) { |
|
30 | if (self->output.dst) { | |
36 | PyMem_Free(self->output.dst); |
|
31 | PyMem_Free(self->output.dst); | |
37 | self->output.dst = NULL; |
|
32 | self->output.dst = NULL; | |
38 | } |
|
33 | } | |
39 |
|
34 | |||
40 | PyObject_Del(self); |
|
35 | PyObject_Del(self); | |
41 | } |
|
36 | } | |
42 |
|
37 | |||
43 | static PyObject* ZstdCompressorIterator_iter(PyObject* self) { |
|
38 | static PyObject* ZstdCompressorIterator_iter(PyObject* self) { | |
44 | Py_INCREF(self); |
|
39 | Py_INCREF(self); | |
45 | return self; |
|
40 | return self; | |
46 | } |
|
41 | } | |
47 |
|
42 | |||
48 | static PyObject* ZstdCompressorIterator_iternext(ZstdCompressorIterator* self) { |
|
43 | static PyObject* ZstdCompressorIterator_iternext(ZstdCompressorIterator* self) { | |
49 | size_t zresult; |
|
44 | size_t zresult; | |
50 | PyObject* readResult = NULL; |
|
45 | PyObject* readResult = NULL; | |
51 | PyObject* chunk; |
|
46 | PyObject* chunk; | |
52 | char* readBuffer; |
|
47 | char* readBuffer; | |
53 | Py_ssize_t readSize = 0; |
|
48 | Py_ssize_t readSize = 0; | |
54 | Py_ssize_t bufferRemaining; |
|
49 | Py_ssize_t bufferRemaining; | |
55 |
|
50 | |||
56 | if (self->finishedOutput) { |
|
51 | if (self->finishedOutput) { | |
57 | PyErr_SetString(PyExc_StopIteration, "output flushed"); |
|
52 | PyErr_SetString(PyExc_StopIteration, "output flushed"); | |
58 | return NULL; |
|
53 | return NULL; | |
59 | } |
|
54 | } | |
60 |
|
55 | |||
61 | feedcompressor: |
|
56 | feedcompressor: | |
62 |
|
57 | |||
63 | /* If we have data left in the input, consume it. */ |
|
58 | /* If we have data left in the input, consume it. */ | |
64 | if (self->input.pos < self->input.size) { |
|
59 | if (self->input.pos < self->input.size) { | |
65 | Py_BEGIN_ALLOW_THREADS |
|
60 | Py_BEGIN_ALLOW_THREADS | |
66 | zresult = ZSTD_compressStream(self->cstream, &self->output, &self->input); |
|
61 | if (self->compressor->mtcctx) { | |
|
62 | zresult = ZSTDMT_compressStream(self->compressor->mtcctx, | |||
|
63 | &self->output, &self->input); | |||
|
64 | } | |||
|
65 | else { | |||
|
66 | zresult = ZSTD_compressStream(self->compressor->cstream, &self->output, | |||
|
67 | &self->input); | |||
|
68 | } | |||
67 | Py_END_ALLOW_THREADS |
|
69 | Py_END_ALLOW_THREADS | |
68 |
|
70 | |||
69 | /* Release the Python object holding the input buffer. */ |
|
71 | /* Release the Python object holding the input buffer. */ | |
70 | if (self->input.pos == self->input.size) { |
|
72 | if (self->input.pos == self->input.size) { | |
71 | self->input.src = NULL; |
|
73 | self->input.src = NULL; | |
72 | self->input.pos = 0; |
|
74 | self->input.pos = 0; | |
73 | self->input.size = 0; |
|
75 | self->input.size = 0; | |
74 | Py_DECREF(self->readResult); |
|
76 | Py_DECREF(self->readResult); | |
75 | self->readResult = NULL; |
|
77 | self->readResult = NULL; | |
76 | } |
|
78 | } | |
77 |
|
79 | |||
78 | if (ZSTD_isError(zresult)) { |
|
80 | if (ZSTD_isError(zresult)) { | |
79 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); |
|
81 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); | |
80 | return NULL; |
|
82 | return NULL; | |
81 | } |
|
83 | } | |
82 |
|
84 | |||
83 | /* If it produced output data, emit it. */ |
|
85 | /* If it produced output data, emit it. */ | |
84 | if (self->output.pos) { |
|
86 | if (self->output.pos) { | |
85 | chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); |
|
87 | chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); | |
86 | self->output.pos = 0; |
|
88 | self->output.pos = 0; | |
87 | return chunk; |
|
89 | return chunk; | |
88 | } |
|
90 | } | |
89 | } |
|
91 | } | |
90 |
|
92 | |||
91 | /* We should never have output data sitting around after a previous call. */ |
|
93 | /* We should never have output data sitting around after a previous call. */ | |
92 | assert(self->output.pos == 0); |
|
94 | assert(self->output.pos == 0); | |
93 |
|
95 | |||
94 | /* The code above should have either emitted a chunk and returned or consumed |
|
96 | /* The code above should have either emitted a chunk and returned or consumed | |
95 | the entire input buffer. So the state of the input buffer is not |
|
97 | the entire input buffer. So the state of the input buffer is not | |
96 | relevant. */ |
|
98 | relevant. */ | |
97 | if (!self->finishedInput) { |
|
99 | if (!self->finishedInput) { | |
98 | if (self->reader) { |
|
100 | if (self->reader) { | |
99 | readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize); |
|
101 | readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize); | |
100 | if (!readResult) { |
|
102 | if (!readResult) { | |
101 | PyErr_SetString(ZstdError, "could not read() from source"); |
|
103 | PyErr_SetString(ZstdError, "could not read() from source"); | |
102 | return NULL; |
|
104 | return NULL; | |
103 | } |
|
105 | } | |
104 |
|
106 | |||
105 | PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize); |
|
107 | PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize); | |
106 | } |
|
108 | } | |
107 | else { |
|
109 | else { | |
108 | assert(self->buffer && self->buffer->buf); |
|
110 | assert(self->buffer && self->buffer->buf); | |
109 |
|
111 | |||
110 | /* Only support contiguous C arrays. */ |
|
112 | /* Only support contiguous C arrays. */ | |
111 | assert(self->buffer->strides == NULL && self->buffer->suboffsets == NULL); |
|
113 | assert(self->buffer->strides == NULL && self->buffer->suboffsets == NULL); | |
112 | assert(self->buffer->itemsize == 1); |
|
114 | assert(self->buffer->itemsize == 1); | |
113 |
|
115 | |||
114 | readBuffer = (char*)self->buffer->buf + self->bufferOffset; |
|
116 | readBuffer = (char*)self->buffer->buf + self->bufferOffset; | |
115 | bufferRemaining = self->buffer->len - self->bufferOffset; |
|
117 | bufferRemaining = self->buffer->len - self->bufferOffset; | |
116 | readSize = min(bufferRemaining, (Py_ssize_t)self->inSize); |
|
118 | readSize = min(bufferRemaining, (Py_ssize_t)self->inSize); | |
117 | self->bufferOffset += readSize; |
|
119 | self->bufferOffset += readSize; | |
118 | } |
|
120 | } | |
119 |
|
121 | |||
120 | if (0 == readSize) { |
|
122 | if (0 == readSize) { | |
121 | Py_XDECREF(readResult); |
|
123 | Py_XDECREF(readResult); | |
122 | self->finishedInput = 1; |
|
124 | self->finishedInput = 1; | |
123 | } |
|
125 | } | |
124 | else { |
|
126 | else { | |
125 | self->readResult = readResult; |
|
127 | self->readResult = readResult; | |
126 | } |
|
128 | } | |
127 | } |
|
129 | } | |
128 |
|
130 | |||
129 | /* EOF */ |
|
131 | /* EOF */ | |
130 | if (0 == readSize) { |
|
132 | if (0 == readSize) { | |
131 | zresult = ZSTD_endStream(self->cstream, &self->output); |
|
133 | if (self->compressor->mtcctx) { | |
|
134 | zresult = ZSTDMT_endStream(self->compressor->mtcctx, &self->output); | |||
|
135 | } | |||
|
136 | else { | |||
|
137 | zresult = ZSTD_endStream(self->compressor->cstream, &self->output); | |||
|
138 | } | |||
132 | if (ZSTD_isError(zresult)) { |
|
139 | if (ZSTD_isError(zresult)) { | |
133 | PyErr_Format(ZstdError, "error ending compression stream: %s", |
|
140 | PyErr_Format(ZstdError, "error ending compression stream: %s", | |
134 | ZSTD_getErrorName(zresult)); |
|
141 | ZSTD_getErrorName(zresult)); | |
135 | return NULL; |
|
142 | return NULL; | |
136 | } |
|
143 | } | |
137 |
|
144 | |||
138 | assert(self->output.pos); |
|
145 | assert(self->output.pos); | |
139 |
|
146 | |||
140 | if (0 == zresult) { |
|
147 | if (0 == zresult) { | |
141 | self->finishedOutput = 1; |
|
148 | self->finishedOutput = 1; | |
142 | } |
|
149 | } | |
143 |
|
150 | |||
144 | chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); |
|
151 | chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); | |
145 | self->output.pos = 0; |
|
152 | self->output.pos = 0; | |
146 | return chunk; |
|
153 | return chunk; | |
147 | } |
|
154 | } | |
148 |
|
155 | |||
149 | /* New data from reader. Feed into compressor. */ |
|
156 | /* New data from reader. Feed into compressor. */ | |
150 | self->input.src = readBuffer; |
|
157 | self->input.src = readBuffer; | |
151 | self->input.size = readSize; |
|
158 | self->input.size = readSize; | |
152 | self->input.pos = 0; |
|
159 | self->input.pos = 0; | |
153 |
|
160 | |||
154 | Py_BEGIN_ALLOW_THREADS |
|
161 | Py_BEGIN_ALLOW_THREADS | |
155 | zresult = ZSTD_compressStream(self->cstream, &self->output, &self->input); |
|
162 | if (self->compressor->mtcctx) { | |
|
163 | zresult = ZSTDMT_compressStream(self->compressor->mtcctx, &self->output, | |||
|
164 | &self->input); | |||
|
165 | } | |||
|
166 | else { | |||
|
167 | zresult = ZSTD_compressStream(self->compressor->cstream, &self->output, &self->input); | |||
|
168 | } | |||
156 | Py_END_ALLOW_THREADS |
|
169 | Py_END_ALLOW_THREADS | |
157 |
|
170 | |||
158 | /* The input buffer currently points to memory managed by Python |
|
171 | /* The input buffer currently points to memory managed by Python | |
159 | (readBuffer). This object was allocated by this function. If it wasn't |
|
172 | (readBuffer). This object was allocated by this function. If it wasn't | |
160 | fully consumed, we need to release it in a subsequent function call. |
|
173 | fully consumed, we need to release it in a subsequent function call. | |
161 | If it is fully consumed, do that now. |
|
174 | If it is fully consumed, do that now. | |
162 | */ |
|
175 | */ | |
163 | if (self->input.pos == self->input.size) { |
|
176 | if (self->input.pos == self->input.size) { | |
164 | self->input.src = NULL; |
|
177 | self->input.src = NULL; | |
165 | self->input.pos = 0; |
|
178 | self->input.pos = 0; | |
166 | self->input.size = 0; |
|
179 | self->input.size = 0; | |
167 | Py_XDECREF(self->readResult); |
|
180 | Py_XDECREF(self->readResult); | |
168 | self->readResult = NULL; |
|
181 | self->readResult = NULL; | |
169 | } |
|
182 | } | |
170 |
|
183 | |||
171 | if (ZSTD_isError(zresult)) { |
|
184 | if (ZSTD_isError(zresult)) { | |
172 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); |
|
185 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); | |
173 | return NULL; |
|
186 | return NULL; | |
174 | } |
|
187 | } | |
175 |
|
188 | |||
176 | assert(self->input.pos <= self->input.size); |
|
189 | assert(self->input.pos <= self->input.size); | |
177 |
|
190 | |||
178 | /* If we didn't write anything, start the process over. */ |
|
191 | /* If we didn't write anything, start the process over. */ | |
179 | if (0 == self->output.pos) { |
|
192 | if (0 == self->output.pos) { | |
180 | goto feedcompressor; |
|
193 | goto feedcompressor; | |
181 | } |
|
194 | } | |
182 |
|
195 | |||
183 | chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); |
|
196 | chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); | |
184 | self->output.pos = 0; |
|
197 | self->output.pos = 0; | |
185 | return chunk; |
|
198 | return chunk; | |
186 | } |
|
199 | } | |
187 |
|
200 | |||
188 | PyTypeObject ZstdCompressorIteratorType = { |
|
201 | PyTypeObject ZstdCompressorIteratorType = { | |
189 | PyVarObject_HEAD_INIT(NULL, 0) |
|
202 | PyVarObject_HEAD_INIT(NULL, 0) | |
190 | "zstd.ZstdCompressorIterator", /* tp_name */ |
|
203 | "zstd.ZstdCompressorIterator", /* tp_name */ | |
191 | sizeof(ZstdCompressorIterator), /* tp_basicsize */ |
|
204 | sizeof(ZstdCompressorIterator), /* tp_basicsize */ | |
192 | 0, /* tp_itemsize */ |
|
205 | 0, /* tp_itemsize */ | |
193 | (destructor)ZstdCompressorIterator_dealloc, /* tp_dealloc */ |
|
206 | (destructor)ZstdCompressorIterator_dealloc, /* tp_dealloc */ | |
194 | 0, /* tp_print */ |
|
207 | 0, /* tp_print */ | |
195 | 0, /* tp_getattr */ |
|
208 | 0, /* tp_getattr */ | |
196 | 0, /* tp_setattr */ |
|
209 | 0, /* tp_setattr */ | |
197 | 0, /* tp_compare */ |
|
210 | 0, /* tp_compare */ | |
198 | 0, /* tp_repr */ |
|
211 | 0, /* tp_repr */ | |
199 | 0, /* tp_as_number */ |
|
212 | 0, /* tp_as_number */ | |
200 | 0, /* tp_as_sequence */ |
|
213 | 0, /* tp_as_sequence */ | |
201 | 0, /* tp_as_mapping */ |
|
214 | 0, /* tp_as_mapping */ | |
202 | 0, /* tp_hash */ |
|
215 | 0, /* tp_hash */ | |
203 | 0, /* tp_call */ |
|
216 | 0, /* tp_call */ | |
204 | 0, /* tp_str */ |
|
217 | 0, /* tp_str */ | |
205 | 0, /* tp_getattro */ |
|
218 | 0, /* tp_getattro */ | |
206 | 0, /* tp_setattro */ |
|
219 | 0, /* tp_setattro */ | |
207 | 0, /* tp_as_buffer */ |
|
220 | 0, /* tp_as_buffer */ | |
208 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
221 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
209 | ZstdCompressorIterator__doc__, /* tp_doc */ |
|
222 | ZstdCompressorIterator__doc__, /* tp_doc */ | |
210 | 0, /* tp_traverse */ |
|
223 | 0, /* tp_traverse */ | |
211 | 0, /* tp_clear */ |
|
224 | 0, /* tp_clear */ | |
212 | 0, /* tp_richcompare */ |
|
225 | 0, /* tp_richcompare */ | |
213 | 0, /* tp_weaklistoffset */ |
|
226 | 0, /* tp_weaklistoffset */ | |
214 | ZstdCompressorIterator_iter, /* tp_iter */ |
|
227 | ZstdCompressorIterator_iter, /* tp_iter */ | |
215 | (iternextfunc)ZstdCompressorIterator_iternext, /* tp_iternext */ |
|
228 | (iternextfunc)ZstdCompressorIterator_iternext, /* tp_iternext */ | |
216 | 0, /* tp_methods */ |
|
229 | 0, /* tp_methods */ | |
217 | 0, /* tp_members */ |
|
230 | 0, /* tp_members */ | |
218 | 0, /* tp_getset */ |
|
231 | 0, /* tp_getset */ | |
219 | 0, /* tp_base */ |
|
232 | 0, /* tp_base */ | |
220 | 0, /* tp_dict */ |
|
233 | 0, /* tp_dict */ | |
221 | 0, /* tp_descr_get */ |
|
234 | 0, /* tp_descr_get */ | |
222 | 0, /* tp_descr_set */ |
|
235 | 0, /* tp_descr_set */ | |
223 | 0, /* tp_dictoffset */ |
|
236 | 0, /* tp_dictoffset */ | |
224 | 0, /* tp_init */ |
|
237 | 0, /* tp_init */ | |
225 | 0, /* tp_alloc */ |
|
238 | 0, /* tp_alloc */ | |
226 | PyType_GenericNew, /* tp_new */ |
|
239 | PyType_GenericNew, /* tp_new */ | |
227 | }; |
|
240 | }; | |
228 |
|
241 | |||
229 | void compressoriterator_module_init(PyObject* mod) { |
|
242 | void compressoriterator_module_init(PyObject* mod) { | |
230 | Py_TYPE(&ZstdCompressorIteratorType) = &PyType_Type; |
|
243 | Py_TYPE(&ZstdCompressorIteratorType) = &PyType_Type; | |
231 | if (PyType_Ready(&ZstdCompressorIteratorType) < 0) { |
|
244 | if (PyType_Ready(&ZstdCompressorIteratorType) < 0) { | |
232 | return; |
|
245 | return; | |
233 | } |
|
246 | } | |
234 | } |
|
247 | } |
@@ -1,87 +1,87 | |||||
1 | /** |
|
1 | /** | |
2 | * Copyright (c) 2016-present, Gregory Szorc |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
3 | * All rights reserved. |
|
3 | * All rights reserved. | |
4 | * |
|
4 | * | |
5 | * This software may be modified and distributed under the terms |
|
5 | * This software may be modified and distributed under the terms | |
6 | * of the BSD license. See the LICENSE file for details. |
|
6 | * of the BSD license. See the LICENSE file for details. | |
7 | */ |
|
7 | */ | |
8 |
|
8 | |||
9 | #include "python-zstandard.h" |
|
9 | #include "python-zstandard.h" | |
10 |
|
10 | |||
11 | extern PyObject* ZstdError; |
|
11 | extern PyObject* ZstdError; | |
12 |
|
12 | |||
13 | static char frame_header[] = { |
|
13 | static char frame_header[] = { | |
14 | '\x28', |
|
14 | '\x28', | |
15 | '\xb5', |
|
15 | '\xb5', | |
16 | '\x2f', |
|
16 | '\x2f', | |
17 | '\xfd', |
|
17 | '\xfd', | |
18 | }; |
|
18 | }; | |
19 |
|
19 | |||
20 | void constants_module_init(PyObject* mod) { |
|
20 | void constants_module_init(PyObject* mod) { | |
21 | PyObject* version; |
|
21 | PyObject* version; | |
22 | PyObject* zstdVersion; |
|
22 | PyObject* zstdVersion; | |
23 | PyObject* frameHeader; |
|
23 | PyObject* frameHeader; | |
24 |
|
24 | |||
25 | #if PY_MAJOR_VERSION >= 3 |
|
25 | #if PY_MAJOR_VERSION >= 3 | |
26 | version = PyUnicode_FromString(PYTHON_ZSTANDARD_VERSION); |
|
26 | version = PyUnicode_FromString(PYTHON_ZSTANDARD_VERSION); | |
27 | #else |
|
27 | #else | |
28 | version = PyString_FromString(PYTHON_ZSTANDARD_VERSION); |
|
28 | version = PyString_FromString(PYTHON_ZSTANDARD_VERSION); | |
29 | #endif |
|
29 | #endif | |
30 | Py_INCREF(version); |
|
30 | Py_INCREF(version); | |
31 | PyModule_AddObject(mod, "__version__", version); |
|
31 | PyModule_AddObject(mod, "__version__", version); | |
32 |
|
32 | |||
33 | ZstdError = PyErr_NewException("zstd.ZstdError", NULL, NULL); |
|
33 | ZstdError = PyErr_NewException("zstd.ZstdError", NULL, NULL); | |
34 | PyModule_AddObject(mod, "ZstdError", ZstdError); |
|
34 | PyModule_AddObject(mod, "ZstdError", ZstdError); | |
35 |
|
35 | |||
36 | PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_FINISH", compressorobj_flush_finish); |
|
36 | PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_FINISH", compressorobj_flush_finish); | |
37 | PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_BLOCK", compressorobj_flush_block); |
|
37 | PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_BLOCK", compressorobj_flush_block); | |
38 |
|
38 | |||
39 | /* For now, the version is a simple tuple instead of a dedicated type. */ |
|
39 | /* For now, the version is a simple tuple instead of a dedicated type. */ | |
40 | zstdVersion = PyTuple_New(3); |
|
40 | zstdVersion = PyTuple_New(3); | |
41 | PyTuple_SetItem(zstdVersion, 0, PyLong_FromLong(ZSTD_VERSION_MAJOR)); |
|
41 | PyTuple_SetItem(zstdVersion, 0, PyLong_FromLong(ZSTD_VERSION_MAJOR)); | |
42 | PyTuple_SetItem(zstdVersion, 1, PyLong_FromLong(ZSTD_VERSION_MINOR)); |
|
42 | PyTuple_SetItem(zstdVersion, 1, PyLong_FromLong(ZSTD_VERSION_MINOR)); | |
43 | PyTuple_SetItem(zstdVersion, 2, PyLong_FromLong(ZSTD_VERSION_RELEASE)); |
|
43 | PyTuple_SetItem(zstdVersion, 2, PyLong_FromLong(ZSTD_VERSION_RELEASE)); | |
44 |
Py_I |
|
44 | Py_INCREF(zstdVersion); | |
45 | PyModule_AddObject(mod, "ZSTD_VERSION", zstdVersion); |
|
45 | PyModule_AddObject(mod, "ZSTD_VERSION", zstdVersion); | |
46 |
|
46 | |||
47 | frameHeader = PyBytes_FromStringAndSize(frame_header, sizeof(frame_header)); |
|
47 | frameHeader = PyBytes_FromStringAndSize(frame_header, sizeof(frame_header)); | |
48 | if (frameHeader) { |
|
48 | if (frameHeader) { | |
49 | PyModule_AddObject(mod, "FRAME_HEADER", frameHeader); |
|
49 | PyModule_AddObject(mod, "FRAME_HEADER", frameHeader); | |
50 | } |
|
50 | } | |
51 | else { |
|
51 | else { | |
52 | PyErr_Format(PyExc_ValueError, "could not create frame header object"); |
|
52 | PyErr_Format(PyExc_ValueError, "could not create frame header object"); | |
53 | } |
|
53 | } | |
54 |
|
54 | |||
55 | PyModule_AddIntConstant(mod, "MAX_COMPRESSION_LEVEL", ZSTD_maxCLevel()); |
|
55 | PyModule_AddIntConstant(mod, "MAX_COMPRESSION_LEVEL", ZSTD_maxCLevel()); | |
56 | PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_INPUT_SIZE", |
|
56 | PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_INPUT_SIZE", | |
57 | (long)ZSTD_CStreamInSize()); |
|
57 | (long)ZSTD_CStreamInSize()); | |
58 | PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_OUTPUT_SIZE", |
|
58 | PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_OUTPUT_SIZE", | |
59 | (long)ZSTD_CStreamOutSize()); |
|
59 | (long)ZSTD_CStreamOutSize()); | |
60 | PyModule_AddIntConstant(mod, "DECOMPRESSION_RECOMMENDED_INPUT_SIZE", |
|
60 | PyModule_AddIntConstant(mod, "DECOMPRESSION_RECOMMENDED_INPUT_SIZE", | |
61 | (long)ZSTD_DStreamInSize()); |
|
61 | (long)ZSTD_DStreamInSize()); | |
62 | PyModule_AddIntConstant(mod, "DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE", |
|
62 | PyModule_AddIntConstant(mod, "DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE", | |
63 | (long)ZSTD_DStreamOutSize()); |
|
63 | (long)ZSTD_DStreamOutSize()); | |
64 |
|
64 | |||
65 | PyModule_AddIntConstant(mod, "MAGIC_NUMBER", ZSTD_MAGICNUMBER); |
|
65 | PyModule_AddIntConstant(mod, "MAGIC_NUMBER", ZSTD_MAGICNUMBER); | |
66 | PyModule_AddIntConstant(mod, "WINDOWLOG_MIN", ZSTD_WINDOWLOG_MIN); |
|
66 | PyModule_AddIntConstant(mod, "WINDOWLOG_MIN", ZSTD_WINDOWLOG_MIN); | |
67 | PyModule_AddIntConstant(mod, "WINDOWLOG_MAX", ZSTD_WINDOWLOG_MAX); |
|
67 | PyModule_AddIntConstant(mod, "WINDOWLOG_MAX", ZSTD_WINDOWLOG_MAX); | |
68 | PyModule_AddIntConstant(mod, "CHAINLOG_MIN", ZSTD_CHAINLOG_MIN); |
|
68 | PyModule_AddIntConstant(mod, "CHAINLOG_MIN", ZSTD_CHAINLOG_MIN); | |
69 | PyModule_AddIntConstant(mod, "CHAINLOG_MAX", ZSTD_CHAINLOG_MAX); |
|
69 | PyModule_AddIntConstant(mod, "CHAINLOG_MAX", ZSTD_CHAINLOG_MAX); | |
70 | PyModule_AddIntConstant(mod, "HASHLOG_MIN", ZSTD_HASHLOG_MIN); |
|
70 | PyModule_AddIntConstant(mod, "HASHLOG_MIN", ZSTD_HASHLOG_MIN); | |
71 | PyModule_AddIntConstant(mod, "HASHLOG_MAX", ZSTD_HASHLOG_MAX); |
|
71 | PyModule_AddIntConstant(mod, "HASHLOG_MAX", ZSTD_HASHLOG_MAX); | |
72 | PyModule_AddIntConstant(mod, "HASHLOG3_MAX", ZSTD_HASHLOG3_MAX); |
|
72 | PyModule_AddIntConstant(mod, "HASHLOG3_MAX", ZSTD_HASHLOG3_MAX); | |
73 | PyModule_AddIntConstant(mod, "SEARCHLOG_MIN", ZSTD_SEARCHLOG_MIN); |
|
73 | PyModule_AddIntConstant(mod, "SEARCHLOG_MIN", ZSTD_SEARCHLOG_MIN); | |
74 | PyModule_AddIntConstant(mod, "SEARCHLOG_MAX", ZSTD_SEARCHLOG_MAX); |
|
74 | PyModule_AddIntConstant(mod, "SEARCHLOG_MAX", ZSTD_SEARCHLOG_MAX); | |
75 | PyModule_AddIntConstant(mod, "SEARCHLENGTH_MIN", ZSTD_SEARCHLENGTH_MIN); |
|
75 | PyModule_AddIntConstant(mod, "SEARCHLENGTH_MIN", ZSTD_SEARCHLENGTH_MIN); | |
76 | PyModule_AddIntConstant(mod, "SEARCHLENGTH_MAX", ZSTD_SEARCHLENGTH_MAX); |
|
76 | PyModule_AddIntConstant(mod, "SEARCHLENGTH_MAX", ZSTD_SEARCHLENGTH_MAX); | |
77 | PyModule_AddIntConstant(mod, "TARGETLENGTH_MIN", ZSTD_TARGETLENGTH_MIN); |
|
77 | PyModule_AddIntConstant(mod, "TARGETLENGTH_MIN", ZSTD_TARGETLENGTH_MIN); | |
78 | PyModule_AddIntConstant(mod, "TARGETLENGTH_MAX", ZSTD_TARGETLENGTH_MAX); |
|
78 | PyModule_AddIntConstant(mod, "TARGETLENGTH_MAX", ZSTD_TARGETLENGTH_MAX); | |
79 |
|
79 | |||
80 | PyModule_AddIntConstant(mod, "STRATEGY_FAST", ZSTD_fast); |
|
80 | PyModule_AddIntConstant(mod, "STRATEGY_FAST", ZSTD_fast); | |
81 | PyModule_AddIntConstant(mod, "STRATEGY_DFAST", ZSTD_dfast); |
|
81 | PyModule_AddIntConstant(mod, "STRATEGY_DFAST", ZSTD_dfast); | |
82 | PyModule_AddIntConstant(mod, "STRATEGY_GREEDY", ZSTD_greedy); |
|
82 | PyModule_AddIntConstant(mod, "STRATEGY_GREEDY", ZSTD_greedy); | |
83 | PyModule_AddIntConstant(mod, "STRATEGY_LAZY", ZSTD_lazy); |
|
83 | PyModule_AddIntConstant(mod, "STRATEGY_LAZY", ZSTD_lazy); | |
84 | PyModule_AddIntConstant(mod, "STRATEGY_LAZY2", ZSTD_lazy2); |
|
84 | PyModule_AddIntConstant(mod, "STRATEGY_LAZY2", ZSTD_lazy2); | |
85 | PyModule_AddIntConstant(mod, "STRATEGY_BTLAZY2", ZSTD_btlazy2); |
|
85 | PyModule_AddIntConstant(mod, "STRATEGY_BTLAZY2", ZSTD_btlazy2); | |
86 | PyModule_AddIntConstant(mod, "STRATEGY_BTOPT", ZSTD_btopt); |
|
86 | PyModule_AddIntConstant(mod, "STRATEGY_BTOPT", ZSTD_btopt); | |
87 | } |
|
87 | } |
@@ -1,188 +1,179 | |||||
1 | /** |
|
1 | /** | |
2 | * Copyright (c) 2016-present, Gregory Szorc |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
3 | * All rights reserved. |
|
3 | * All rights reserved. | |
4 | * |
|
4 | * | |
5 | * This software may be modified and distributed under the terms |
|
5 | * This software may be modified and distributed under the terms | |
6 | * of the BSD license. See the LICENSE file for details. |
|
6 | * of the BSD license. See the LICENSE file for details. | |
7 | */ |
|
7 | */ | |
8 |
|
8 | |||
9 | #include "python-zstandard.h" |
|
9 | #include "python-zstandard.h" | |
10 |
|
10 | |||
11 | extern PyObject* ZstdError; |
|
11 | extern PyObject* ZstdError; | |
12 |
|
12 | |||
13 | PyDoc_STRVAR(ZstdDecompressionWriter__doc, |
|
13 | PyDoc_STRVAR(ZstdDecompressionWriter__doc, | |
14 | """A context manager used for writing decompressed output.\n" |
|
14 | """A context manager used for writing decompressed output.\n" | |
15 | ); |
|
15 | ); | |
16 |
|
16 | |||
17 | static void ZstdDecompressionWriter_dealloc(ZstdDecompressionWriter* self) { |
|
17 | static void ZstdDecompressionWriter_dealloc(ZstdDecompressionWriter* self) { | |
18 | Py_XDECREF(self->decompressor); |
|
18 | Py_XDECREF(self->decompressor); | |
19 | Py_XDECREF(self->writer); |
|
19 | Py_XDECREF(self->writer); | |
20 |
|
20 | |||
21 | if (self->dstream) { |
|
|||
22 | ZSTD_freeDStream(self->dstream); |
|
|||
23 | self->dstream = NULL; |
|
|||
24 | } |
|
|||
25 |
|
||||
26 | PyObject_Del(self); |
|
21 | PyObject_Del(self); | |
27 | } |
|
22 | } | |
28 |
|
23 | |||
29 | static PyObject* ZstdDecompressionWriter_enter(ZstdDecompressionWriter* self) { |
|
24 | static PyObject* ZstdDecompressionWriter_enter(ZstdDecompressionWriter* self) { | |
30 | if (self->entered) { |
|
25 | if (self->entered) { | |
31 | PyErr_SetString(ZstdError, "cannot __enter__ multiple times"); |
|
26 | PyErr_SetString(ZstdError, "cannot __enter__ multiple times"); | |
32 | return NULL; |
|
27 | return NULL; | |
33 | } |
|
28 | } | |
34 |
|
29 | |||
35 | self->dstream = DStream_from_ZstdDecompressor(self->decompressor); |
|
30 | if (0 != init_dstream(self->decompressor)) { | |
36 | if (!self->dstream) { |
|
|||
37 | return NULL; |
|
31 | return NULL; | |
38 | } |
|
32 | } | |
39 |
|
33 | |||
40 | self->entered = 1; |
|
34 | self->entered = 1; | |
41 |
|
35 | |||
42 | Py_INCREF(self); |
|
36 | Py_INCREF(self); | |
43 | return (PyObject*)self; |
|
37 | return (PyObject*)self; | |
44 | } |
|
38 | } | |
45 |
|
39 | |||
46 | static PyObject* ZstdDecompressionWriter_exit(ZstdDecompressionWriter* self, PyObject* args) { |
|
40 | static PyObject* ZstdDecompressionWriter_exit(ZstdDecompressionWriter* self, PyObject* args) { | |
47 | self->entered = 0; |
|
41 | self->entered = 0; | |
48 |
|
42 | |||
49 | if (self->dstream) { |
|
|||
50 | ZSTD_freeDStream(self->dstream); |
|
|||
51 | self->dstream = NULL; |
|
|||
52 | } |
|
|||
53 |
|
||||
54 | Py_RETURN_FALSE; |
|
43 | Py_RETURN_FALSE; | |
55 | } |
|
44 | } | |
56 |
|
45 | |||
57 | static PyObject* ZstdDecompressionWriter_memory_size(ZstdDecompressionWriter* self) { |
|
46 | static PyObject* ZstdDecompressionWriter_memory_size(ZstdDecompressionWriter* self) { | |
58 | if (!self->dstream) { |
|
47 | if (!self->decompressor->dstream) { | |
59 | PyErr_SetString(ZstdError, "cannot determine size of inactive decompressor; " |
|
48 | PyErr_SetString(ZstdError, "cannot determine size of inactive decompressor; " | |
60 | "call when context manager is active"); |
|
49 | "call when context manager is active"); | |
61 | return NULL; |
|
50 | return NULL; | |
62 | } |
|
51 | } | |
63 |
|
52 | |||
64 | return PyLong_FromSize_t(ZSTD_sizeof_DStream(self->dstream)); |
|
53 | return PyLong_FromSize_t(ZSTD_sizeof_DStream(self->decompressor->dstream)); | |
65 | } |
|
54 | } | |
66 |
|
55 | |||
67 | static PyObject* ZstdDecompressionWriter_write(ZstdDecompressionWriter* self, PyObject* args) { |
|
56 | static PyObject* ZstdDecompressionWriter_write(ZstdDecompressionWriter* self, PyObject* args) { | |
68 | const char* source; |
|
57 | const char* source; | |
69 | Py_ssize_t sourceSize; |
|
58 | Py_ssize_t sourceSize; | |
70 | size_t zresult = 0; |
|
59 | size_t zresult = 0; | |
71 | ZSTD_inBuffer input; |
|
60 | ZSTD_inBuffer input; | |
72 | ZSTD_outBuffer output; |
|
61 | ZSTD_outBuffer output; | |
73 | PyObject* res; |
|
62 | PyObject* res; | |
74 | Py_ssize_t totalWrite = 0; |
|
63 | Py_ssize_t totalWrite = 0; | |
75 |
|
64 | |||
76 | #if PY_MAJOR_VERSION >= 3 |
|
65 | #if PY_MAJOR_VERSION >= 3 | |
77 | if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) { |
|
66 | if (!PyArg_ParseTuple(args, "y#:write", &source, &sourceSize)) { | |
78 | #else |
|
67 | #else | |
79 | if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) { |
|
68 | if (!PyArg_ParseTuple(args, "s#:write", &source, &sourceSize)) { | |
80 | #endif |
|
69 | #endif | |
81 | return NULL; |
|
70 | return NULL; | |
82 | } |
|
71 | } | |
83 |
|
72 | |||
84 | if (!self->entered) { |
|
73 | if (!self->entered) { | |
85 | PyErr_SetString(ZstdError, "write must be called from an active context manager"); |
|
74 | PyErr_SetString(ZstdError, "write must be called from an active context manager"); | |
86 | return NULL; |
|
75 | return NULL; | |
87 | } |
|
76 | } | |
88 |
|
77 | |||
|
78 | assert(self->decompressor->dstream); | |||
|
79 | ||||
89 | output.dst = PyMem_Malloc(self->outSize); |
|
80 | output.dst = PyMem_Malloc(self->outSize); | |
90 | if (!output.dst) { |
|
81 | if (!output.dst) { | |
91 | return PyErr_NoMemory(); |
|
82 | return PyErr_NoMemory(); | |
92 | } |
|
83 | } | |
93 | output.size = self->outSize; |
|
84 | output.size = self->outSize; | |
94 | output.pos = 0; |
|
85 | output.pos = 0; | |
95 |
|
86 | |||
96 | input.src = source; |
|
87 | input.src = source; | |
97 | input.size = sourceSize; |
|
88 | input.size = sourceSize; | |
98 | input.pos = 0; |
|
89 | input.pos = 0; | |
99 |
|
90 | |||
100 | while ((ssize_t)input.pos < sourceSize) { |
|
91 | while ((ssize_t)input.pos < sourceSize) { | |
101 | Py_BEGIN_ALLOW_THREADS |
|
92 | Py_BEGIN_ALLOW_THREADS | |
102 | zresult = ZSTD_decompressStream(self->dstream, &output, &input); |
|
93 | zresult = ZSTD_decompressStream(self->decompressor->dstream, &output, &input); | |
103 | Py_END_ALLOW_THREADS |
|
94 | Py_END_ALLOW_THREADS | |
104 |
|
95 | |||
105 | if (ZSTD_isError(zresult)) { |
|
96 | if (ZSTD_isError(zresult)) { | |
106 | PyMem_Free(output.dst); |
|
97 | PyMem_Free(output.dst); | |
107 | PyErr_Format(ZstdError, "zstd decompress error: %s", |
|
98 | PyErr_Format(ZstdError, "zstd decompress error: %s", | |
108 | ZSTD_getErrorName(zresult)); |
|
99 | ZSTD_getErrorName(zresult)); | |
109 | return NULL; |
|
100 | return NULL; | |
110 | } |
|
101 | } | |
111 |
|
102 | |||
112 | if (output.pos) { |
|
103 | if (output.pos) { | |
113 | #if PY_MAJOR_VERSION >= 3 |
|
104 | #if PY_MAJOR_VERSION >= 3 | |
114 | res = PyObject_CallMethod(self->writer, "write", "y#", |
|
105 | res = PyObject_CallMethod(self->writer, "write", "y#", | |
115 | #else |
|
106 | #else | |
116 | res = PyObject_CallMethod(self->writer, "write", "s#", |
|
107 | res = PyObject_CallMethod(self->writer, "write", "s#", | |
117 | #endif |
|
108 | #endif | |
118 | output.dst, output.pos); |
|
109 | output.dst, output.pos); | |
119 | Py_XDECREF(res); |
|
110 | Py_XDECREF(res); | |
120 | totalWrite += output.pos; |
|
111 | totalWrite += output.pos; | |
121 | output.pos = 0; |
|
112 | output.pos = 0; | |
122 | } |
|
113 | } | |
123 | } |
|
114 | } | |
124 |
|
115 | |||
125 | PyMem_Free(output.dst); |
|
116 | PyMem_Free(output.dst); | |
126 |
|
117 | |||
127 | return PyLong_FromSsize_t(totalWrite); |
|
118 | return PyLong_FromSsize_t(totalWrite); | |
128 | } |
|
119 | } | |
129 |
|
120 | |||
130 | static PyMethodDef ZstdDecompressionWriter_methods[] = { |
|
121 | static PyMethodDef ZstdDecompressionWriter_methods[] = { | |
131 | { "__enter__", (PyCFunction)ZstdDecompressionWriter_enter, METH_NOARGS, |
|
122 | { "__enter__", (PyCFunction)ZstdDecompressionWriter_enter, METH_NOARGS, | |
132 | PyDoc_STR("Enter a decompression context.") }, |
|
123 | PyDoc_STR("Enter a decompression context.") }, | |
133 | { "__exit__", (PyCFunction)ZstdDecompressionWriter_exit, METH_VARARGS, |
|
124 | { "__exit__", (PyCFunction)ZstdDecompressionWriter_exit, METH_VARARGS, | |
134 | PyDoc_STR("Exit a decompression context.") }, |
|
125 | PyDoc_STR("Exit a decompression context.") }, | |
135 | { "memory_size", (PyCFunction)ZstdDecompressionWriter_memory_size, METH_NOARGS, |
|
126 | { "memory_size", (PyCFunction)ZstdDecompressionWriter_memory_size, METH_NOARGS, | |
136 | PyDoc_STR("Obtain the memory size in bytes of the underlying decompressor.") }, |
|
127 | PyDoc_STR("Obtain the memory size in bytes of the underlying decompressor.") }, | |
137 | { "write", (PyCFunction)ZstdDecompressionWriter_write, METH_VARARGS, |
|
128 | { "write", (PyCFunction)ZstdDecompressionWriter_write, METH_VARARGS, | |
138 | PyDoc_STR("Compress data") }, |
|
129 | PyDoc_STR("Compress data") }, | |
139 | { NULL, NULL } |
|
130 | { NULL, NULL } | |
140 | }; |
|
131 | }; | |
141 |
|
132 | |||
142 | PyTypeObject ZstdDecompressionWriterType = { |
|
133 | PyTypeObject ZstdDecompressionWriterType = { | |
143 | PyVarObject_HEAD_INIT(NULL, 0) |
|
134 | PyVarObject_HEAD_INIT(NULL, 0) | |
144 | "zstd.ZstdDecompressionWriter", /* tp_name */ |
|
135 | "zstd.ZstdDecompressionWriter", /* tp_name */ | |
145 | sizeof(ZstdDecompressionWriter),/* tp_basicsize */ |
|
136 | sizeof(ZstdDecompressionWriter),/* tp_basicsize */ | |
146 | 0, /* tp_itemsize */ |
|
137 | 0, /* tp_itemsize */ | |
147 | (destructor)ZstdDecompressionWriter_dealloc, /* tp_dealloc */ |
|
138 | (destructor)ZstdDecompressionWriter_dealloc, /* tp_dealloc */ | |
148 | 0, /* tp_print */ |
|
139 | 0, /* tp_print */ | |
149 | 0, /* tp_getattr */ |
|
140 | 0, /* tp_getattr */ | |
150 | 0, /* tp_setattr */ |
|
141 | 0, /* tp_setattr */ | |
151 | 0, /* tp_compare */ |
|
142 | 0, /* tp_compare */ | |
152 | 0, /* tp_repr */ |
|
143 | 0, /* tp_repr */ | |
153 | 0, /* tp_as_number */ |
|
144 | 0, /* tp_as_number */ | |
154 | 0, /* tp_as_sequence */ |
|
145 | 0, /* tp_as_sequence */ | |
155 | 0, /* tp_as_mapping */ |
|
146 | 0, /* tp_as_mapping */ | |
156 | 0, /* tp_hash */ |
|
147 | 0, /* tp_hash */ | |
157 | 0, /* tp_call */ |
|
148 | 0, /* tp_call */ | |
158 | 0, /* tp_str */ |
|
149 | 0, /* tp_str */ | |
159 | 0, /* tp_getattro */ |
|
150 | 0, /* tp_getattro */ | |
160 | 0, /* tp_setattro */ |
|
151 | 0, /* tp_setattro */ | |
161 | 0, /* tp_as_buffer */ |
|
152 | 0, /* tp_as_buffer */ | |
162 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
153 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
163 | ZstdDecompressionWriter__doc, /* tp_doc */ |
|
154 | ZstdDecompressionWriter__doc, /* tp_doc */ | |
164 | 0, /* tp_traverse */ |
|
155 | 0, /* tp_traverse */ | |
165 | 0, /* tp_clear */ |
|
156 | 0, /* tp_clear */ | |
166 | 0, /* tp_richcompare */ |
|
157 | 0, /* tp_richcompare */ | |
167 | 0, /* tp_weaklistoffset */ |
|
158 | 0, /* tp_weaklistoffset */ | |
168 | 0, /* tp_iter */ |
|
159 | 0, /* tp_iter */ | |
169 | 0, /* tp_iternext */ |
|
160 | 0, /* tp_iternext */ | |
170 | ZstdDecompressionWriter_methods,/* tp_methods */ |
|
161 | ZstdDecompressionWriter_methods,/* tp_methods */ | |
171 | 0, /* tp_members */ |
|
162 | 0, /* tp_members */ | |
172 | 0, /* tp_getset */ |
|
163 | 0, /* tp_getset */ | |
173 | 0, /* tp_base */ |
|
164 | 0, /* tp_base */ | |
174 | 0, /* tp_dict */ |
|
165 | 0, /* tp_dict */ | |
175 | 0, /* tp_descr_get */ |
|
166 | 0, /* tp_descr_get */ | |
176 | 0, /* tp_descr_set */ |
|
167 | 0, /* tp_descr_set */ | |
177 | 0, /* tp_dictoffset */ |
|
168 | 0, /* tp_dictoffset */ | |
178 | 0, /* tp_init */ |
|
169 | 0, /* tp_init */ | |
179 | 0, /* tp_alloc */ |
|
170 | 0, /* tp_alloc */ | |
180 | PyType_GenericNew, /* tp_new */ |
|
171 | PyType_GenericNew, /* tp_new */ | |
181 | }; |
|
172 | }; | |
182 |
|
173 | |||
183 | void decompressionwriter_module_init(PyObject* mod) { |
|
174 | void decompressionwriter_module_init(PyObject* mod) { | |
184 | Py_TYPE(&ZstdDecompressionWriterType) = &PyType_Type; |
|
175 | Py_TYPE(&ZstdDecompressionWriterType) = &PyType_Type; | |
185 | if (PyType_Ready(&ZstdDecompressionWriterType) < 0) { |
|
176 | if (PyType_Ready(&ZstdDecompressionWriterType) < 0) { | |
186 | return; |
|
177 | return; | |
187 | } |
|
178 | } | |
188 | } |
|
179 | } |
@@ -1,170 +1,167 | |||||
1 | /** |
|
1 | /** | |
2 | * Copyright (c) 2016-present, Gregory Szorc |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
3 | * All rights reserved. |
|
3 | * All rights reserved. | |
4 | * |
|
4 | * | |
5 | * This software may be modified and distributed under the terms |
|
5 | * This software may be modified and distributed under the terms | |
6 | * of the BSD license. See the LICENSE file for details. |
|
6 | * of the BSD license. See the LICENSE file for details. | |
7 | */ |
|
7 | */ | |
8 |
|
8 | |||
9 | #include "python-zstandard.h" |
|
9 | #include "python-zstandard.h" | |
10 |
|
10 | |||
11 | extern PyObject* ZstdError; |
|
11 | extern PyObject* ZstdError; | |
12 |
|
12 | |||
13 | PyDoc_STRVAR(DecompressionObj__doc__, |
|
13 | PyDoc_STRVAR(DecompressionObj__doc__, | |
14 | "Perform decompression using a standard library compatible API.\n" |
|
14 | "Perform decompression using a standard library compatible API.\n" | |
15 | ); |
|
15 | ); | |
16 |
|
16 | |||
17 | static void DecompressionObj_dealloc(ZstdDecompressionObj* self) { |
|
17 | static void DecompressionObj_dealloc(ZstdDecompressionObj* self) { | |
18 | if (self->dstream) { |
|
|||
19 | ZSTD_freeDStream(self->dstream); |
|
|||
20 | self->dstream = NULL; |
|
|||
21 | } |
|
|||
22 |
|
||||
23 | Py_XDECREF(self->decompressor); |
|
18 | Py_XDECREF(self->decompressor); | |
24 |
|
19 | |||
25 | PyObject_Del(self); |
|
20 | PyObject_Del(self); | |
26 | } |
|
21 | } | |
27 |
|
22 | |||
28 | static PyObject* DecompressionObj_decompress(ZstdDecompressionObj* self, PyObject* args) { |
|
23 | static PyObject* DecompressionObj_decompress(ZstdDecompressionObj* self, PyObject* args) { | |
29 | const char* source; |
|
24 | const char* source; | |
30 | Py_ssize_t sourceSize; |
|
25 | Py_ssize_t sourceSize; | |
31 | size_t zresult; |
|
26 | size_t zresult; | |
32 | ZSTD_inBuffer input; |
|
27 | ZSTD_inBuffer input; | |
33 | ZSTD_outBuffer output; |
|
28 | ZSTD_outBuffer output; | |
34 | size_t outSize = ZSTD_DStreamOutSize(); |
|
29 | size_t outSize = ZSTD_DStreamOutSize(); | |
35 | PyObject* result = NULL; |
|
30 | PyObject* result = NULL; | |
36 | Py_ssize_t resultSize = 0; |
|
31 | Py_ssize_t resultSize = 0; | |
37 |
|
32 | |||
|
33 | /* Constructor should ensure stream is populated. */ | |||
|
34 | assert(self->decompressor->dstream); | |||
|
35 | ||||
38 | if (self->finished) { |
|
36 | if (self->finished) { | |
39 | PyErr_SetString(ZstdError, "cannot use a decompressobj multiple times"); |
|
37 | PyErr_SetString(ZstdError, "cannot use a decompressobj multiple times"); | |
40 | return NULL; |
|
38 | return NULL; | |
41 | } |
|
39 | } | |
42 |
|
40 | |||
43 | #if PY_MAJOR_VERSION >= 3 |
|
41 | #if PY_MAJOR_VERSION >= 3 | |
44 | if (!PyArg_ParseTuple(args, "y#:decompress", |
|
42 | if (!PyArg_ParseTuple(args, "y#:decompress", | |
45 | #else |
|
43 | #else | |
46 | if (!PyArg_ParseTuple(args, "s#:decompress", |
|
44 | if (!PyArg_ParseTuple(args, "s#:decompress", | |
47 | #endif |
|
45 | #endif | |
48 | &source, &sourceSize)) { |
|
46 | &source, &sourceSize)) { | |
49 | return NULL; |
|
47 | return NULL; | |
50 | } |
|
48 | } | |
51 |
|
49 | |||
52 | input.src = source; |
|
50 | input.src = source; | |
53 | input.size = sourceSize; |
|
51 | input.size = sourceSize; | |
54 | input.pos = 0; |
|
52 | input.pos = 0; | |
55 |
|
53 | |||
56 | output.dst = PyMem_Malloc(outSize); |
|
54 | output.dst = PyMem_Malloc(outSize); | |
57 | if (!output.dst) { |
|
55 | if (!output.dst) { | |
58 | PyErr_NoMemory(); |
|
56 | PyErr_NoMemory(); | |
59 | return NULL; |
|
57 | return NULL; | |
60 | } |
|
58 | } | |
61 | output.size = outSize; |
|
59 | output.size = outSize; | |
62 | output.pos = 0; |
|
60 | output.pos = 0; | |
63 |
|
61 | |||
64 | /* Read input until exhausted. */ |
|
62 | /* Read input until exhausted. */ | |
65 | while (input.pos < input.size) { |
|
63 | while (input.pos < input.size) { | |
66 | Py_BEGIN_ALLOW_THREADS |
|
64 | Py_BEGIN_ALLOW_THREADS | |
67 | zresult = ZSTD_decompressStream(self->dstream, &output, &input); |
|
65 | zresult = ZSTD_decompressStream(self->decompressor->dstream, &output, &input); | |
68 | Py_END_ALLOW_THREADS |
|
66 | Py_END_ALLOW_THREADS | |
69 |
|
67 | |||
70 | if (ZSTD_isError(zresult)) { |
|
68 | if (ZSTD_isError(zresult)) { | |
71 | PyErr_Format(ZstdError, "zstd decompressor error: %s", |
|
69 | PyErr_Format(ZstdError, "zstd decompressor error: %s", | |
72 | ZSTD_getErrorName(zresult)); |
|
70 | ZSTD_getErrorName(zresult)); | |
73 | result = NULL; |
|
71 | result = NULL; | |
74 | goto finally; |
|
72 | goto finally; | |
75 | } |
|
73 | } | |
76 |
|
74 | |||
77 | if (0 == zresult) { |
|
75 | if (0 == zresult) { | |
78 | self->finished = 1; |
|
76 | self->finished = 1; | |
79 | } |
|
77 | } | |
80 |
|
78 | |||
81 | if (output.pos) { |
|
79 | if (output.pos) { | |
82 | if (result) { |
|
80 | if (result) { | |
83 | resultSize = PyBytes_GET_SIZE(result); |
|
81 | resultSize = PyBytes_GET_SIZE(result); | |
84 | if (-1 == _PyBytes_Resize(&result, resultSize + output.pos)) { |
|
82 | if (-1 == _PyBytes_Resize(&result, resultSize + output.pos)) { | |
85 | goto except; |
|
83 | goto except; | |
86 | } |
|
84 | } | |
87 |
|
85 | |||
88 | memcpy(PyBytes_AS_STRING(result) + resultSize, |
|
86 | memcpy(PyBytes_AS_STRING(result) + resultSize, | |
89 | output.dst, output.pos); |
|
87 | output.dst, output.pos); | |
90 | } |
|
88 | } | |
91 | else { |
|
89 | else { | |
92 | result = PyBytes_FromStringAndSize(output.dst, output.pos); |
|
90 | result = PyBytes_FromStringAndSize(output.dst, output.pos); | |
93 | if (!result) { |
|
91 | if (!result) { | |
94 | goto except; |
|
92 | goto except; | |
95 | } |
|
93 | } | |
96 | } |
|
94 | } | |
97 |
|
95 | |||
98 | output.pos = 0; |
|
96 | output.pos = 0; | |
99 | } |
|
97 | } | |
100 | } |
|
98 | } | |
101 |
|
99 | |||
102 | if (!result) { |
|
100 | if (!result) { | |
103 | result = PyBytes_FromString(""); |
|
101 | result = PyBytes_FromString(""); | |
104 | } |
|
102 | } | |
105 |
|
103 | |||
106 | goto finally; |
|
104 | goto finally; | |
107 |
|
105 | |||
108 | except: |
|
106 | except: | |
109 |
Py_ |
|
107 | Py_CLEAR(result); | |
110 | result = NULL; |
|
|||
111 |
|
108 | |||
112 | finally: |
|
109 | finally: | |
113 | PyMem_Free(output.dst); |
|
110 | PyMem_Free(output.dst); | |
114 |
|
111 | |||
115 | return result; |
|
112 | return result; | |
116 | } |
|
113 | } | |
117 |
|
114 | |||
118 | static PyMethodDef DecompressionObj_methods[] = { |
|
115 | static PyMethodDef DecompressionObj_methods[] = { | |
119 | { "decompress", (PyCFunction)DecompressionObj_decompress, |
|
116 | { "decompress", (PyCFunction)DecompressionObj_decompress, | |
120 | METH_VARARGS, PyDoc_STR("decompress data") }, |
|
117 | METH_VARARGS, PyDoc_STR("decompress data") }, | |
121 | { NULL, NULL } |
|
118 | { NULL, NULL } | |
122 | }; |
|
119 | }; | |
123 |
|
120 | |||
124 | PyTypeObject ZstdDecompressionObjType = { |
|
121 | PyTypeObject ZstdDecompressionObjType = { | |
125 | PyVarObject_HEAD_INIT(NULL, 0) |
|
122 | PyVarObject_HEAD_INIT(NULL, 0) | |
126 | "zstd.ZstdDecompressionObj", /* tp_name */ |
|
123 | "zstd.ZstdDecompressionObj", /* tp_name */ | |
127 | sizeof(ZstdDecompressionObj), /* tp_basicsize */ |
|
124 | sizeof(ZstdDecompressionObj), /* tp_basicsize */ | |
128 | 0, /* tp_itemsize */ |
|
125 | 0, /* tp_itemsize */ | |
129 | (destructor)DecompressionObj_dealloc, /* tp_dealloc */ |
|
126 | (destructor)DecompressionObj_dealloc, /* tp_dealloc */ | |
130 | 0, /* tp_print */ |
|
127 | 0, /* tp_print */ | |
131 | 0, /* tp_getattr */ |
|
128 | 0, /* tp_getattr */ | |
132 | 0, /* tp_setattr */ |
|
129 | 0, /* tp_setattr */ | |
133 | 0, /* tp_compare */ |
|
130 | 0, /* tp_compare */ | |
134 | 0, /* tp_repr */ |
|
131 | 0, /* tp_repr */ | |
135 | 0, /* tp_as_number */ |
|
132 | 0, /* tp_as_number */ | |
136 | 0, /* tp_as_sequence */ |
|
133 | 0, /* tp_as_sequence */ | |
137 | 0, /* tp_as_mapping */ |
|
134 | 0, /* tp_as_mapping */ | |
138 | 0, /* tp_hash */ |
|
135 | 0, /* tp_hash */ | |
139 | 0, /* tp_call */ |
|
136 | 0, /* tp_call */ | |
140 | 0, /* tp_str */ |
|
137 | 0, /* tp_str */ | |
141 | 0, /* tp_getattro */ |
|
138 | 0, /* tp_getattro */ | |
142 | 0, /* tp_setattro */ |
|
139 | 0, /* tp_setattro */ | |
143 | 0, /* tp_as_buffer */ |
|
140 | 0, /* tp_as_buffer */ | |
144 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
141 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
145 | DecompressionObj__doc__, /* tp_doc */ |
|
142 | DecompressionObj__doc__, /* tp_doc */ | |
146 | 0, /* tp_traverse */ |
|
143 | 0, /* tp_traverse */ | |
147 | 0, /* tp_clear */ |
|
144 | 0, /* tp_clear */ | |
148 | 0, /* tp_richcompare */ |
|
145 | 0, /* tp_richcompare */ | |
149 | 0, /* tp_weaklistoffset */ |
|
146 | 0, /* tp_weaklistoffset */ | |
150 | 0, /* tp_iter */ |
|
147 | 0, /* tp_iter */ | |
151 | 0, /* tp_iternext */ |
|
148 | 0, /* tp_iternext */ | |
152 | DecompressionObj_methods, /* tp_methods */ |
|
149 | DecompressionObj_methods, /* tp_methods */ | |
153 | 0, /* tp_members */ |
|
150 | 0, /* tp_members */ | |
154 | 0, /* tp_getset */ |
|
151 | 0, /* tp_getset */ | |
155 | 0, /* tp_base */ |
|
152 | 0, /* tp_base */ | |
156 | 0, /* tp_dict */ |
|
153 | 0, /* tp_dict */ | |
157 | 0, /* tp_descr_get */ |
|
154 | 0, /* tp_descr_get */ | |
158 | 0, /* tp_descr_set */ |
|
155 | 0, /* tp_descr_set */ | |
159 | 0, /* tp_dictoffset */ |
|
156 | 0, /* tp_dictoffset */ | |
160 | 0, /* tp_init */ |
|
157 | 0, /* tp_init */ | |
161 | 0, /* tp_alloc */ |
|
158 | 0, /* tp_alloc */ | |
162 | PyType_GenericNew, /* tp_new */ |
|
159 | PyType_GenericNew, /* tp_new */ | |
163 | }; |
|
160 | }; | |
164 |
|
161 | |||
165 | void decompressobj_module_init(PyObject* module) { |
|
162 | void decompressobj_module_init(PyObject* module) { | |
166 | Py_TYPE(&ZstdDecompressionObjType) = &PyType_Type; |
|
163 | Py_TYPE(&ZstdDecompressionObjType) = &PyType_Type; | |
167 | if (PyType_Ready(&ZstdDecompressionObjType) < 0) { |
|
164 | if (PyType_Ready(&ZstdDecompressionObjType) < 0) { | |
168 | return; |
|
165 | return; | |
169 | } |
|
166 | } | |
170 | } |
|
167 | } |
This diff has been collapsed as it changes many lines, (859 lines changed) Show them Hide them | |||||
@@ -1,845 +1,1580 | |||||
1 | /** |
|
1 | /** | |
2 | * Copyright (c) 2016-present, Gregory Szorc |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
3 | * All rights reserved. |
|
3 | * All rights reserved. | |
4 | * |
|
4 | * | |
5 | * This software may be modified and distributed under the terms |
|
5 | * This software may be modified and distributed under the terms | |
6 | * of the BSD license. See the LICENSE file for details. |
|
6 | * of the BSD license. See the LICENSE file for details. | |
7 | */ |
|
7 | */ | |
8 |
|
8 | |||
9 | #include "python-zstandard.h" |
|
9 | #include "python-zstandard.h" | |
|
10 | #include "pool.h" | |||
10 |
|
11 | |||
11 | extern PyObject* ZstdError; |
|
12 | extern PyObject* ZstdError; | |
12 |
|
13 | |||
13 | ZSTD_DStream* DStream_from_ZstdDecompressor(ZstdDecompressor* decompressor) { |
|
14 | /** | |
14 | ZSTD_DStream* dstream; |
|
15 | * Ensure the ZSTD_DStream on a ZstdDecompressor is initialized and reset. | |
|
16 | * | |||
|
17 | * This should be called before starting a decompression operation with a | |||
|
18 | * ZSTD_DStream on a ZstdDecompressor. | |||
|
19 | */ | |||
|
20 | int init_dstream(ZstdDecompressor* decompressor) { | |||
15 | void* dictData = NULL; |
|
21 | void* dictData = NULL; | |
16 | size_t dictSize = 0; |
|
22 | size_t dictSize = 0; | |
17 | size_t zresult; |
|
23 | size_t zresult; | |
18 |
|
24 | |||
19 | dstream = ZSTD_createDStream(); |
|
25 | /* Simple case of dstream already exists. Just reset it. */ | |
20 |
if ( |
|
26 | if (decompressor->dstream) { | |
|
27 | zresult = ZSTD_resetDStream(decompressor->dstream); | |||
|
28 | if (ZSTD_isError(zresult)) { | |||
|
29 | PyErr_Format(ZstdError, "could not reset DStream: %s", | |||
|
30 | ZSTD_getErrorName(zresult)); | |||
|
31 | return -1; | |||
|
32 | } | |||
|
33 | ||||
|
34 | return 0; | |||
|
35 | } | |||
|
36 | ||||
|
37 | decompressor->dstream = ZSTD_createDStream(); | |||
|
38 | if (!decompressor->dstream) { | |||
21 | PyErr_SetString(ZstdError, "could not create DStream"); |
|
39 | PyErr_SetString(ZstdError, "could not create DStream"); | |
22 |
return |
|
40 | return -1; | |
23 | } |
|
41 | } | |
24 |
|
42 | |||
25 | if (decompressor->dict) { |
|
43 | if (decompressor->dict) { | |
26 | dictData = decompressor->dict->dictData; |
|
44 | dictData = decompressor->dict->dictData; | |
27 | dictSize = decompressor->dict->dictSize; |
|
45 | dictSize = decompressor->dict->dictSize; | |
28 | } |
|
46 | } | |
29 |
|
47 | |||
30 | if (dictData) { |
|
48 | if (dictData) { | |
31 | zresult = ZSTD_initDStream_usingDict(dstream, dictData, dictSize); |
|
49 | zresult = ZSTD_initDStream_usingDict(decompressor->dstream, dictData, dictSize); | |
32 | } |
|
50 | } | |
33 | else { |
|
51 | else { | |
34 | zresult = ZSTD_initDStream(dstream); |
|
52 | zresult = ZSTD_initDStream(decompressor->dstream); | |
35 | } |
|
53 | } | |
36 |
|
54 | |||
37 | if (ZSTD_isError(zresult)) { |
|
55 | if (ZSTD_isError(zresult)) { | |
|
56 | /* Don't leave a reference to an invalid object. */ | |||
|
57 | ZSTD_freeDStream(decompressor->dstream); | |||
|
58 | decompressor->dstream = NULL; | |||
|
59 | ||||
38 | PyErr_Format(ZstdError, "could not initialize DStream: %s", |
|
60 | PyErr_Format(ZstdError, "could not initialize DStream: %s", | |
39 | ZSTD_getErrorName(zresult)); |
|
61 | ZSTD_getErrorName(zresult)); | |
40 |
return |
|
62 | return -1; | |
41 | } |
|
63 | } | |
42 |
|
64 | |||
43 |
return |
|
65 | return 0; | |
44 | } |
|
66 | } | |
45 |
|
67 | |||
46 | PyDoc_STRVAR(Decompressor__doc__, |
|
68 | PyDoc_STRVAR(Decompressor__doc__, | |
47 | "ZstdDecompressor(dict_data=None)\n" |
|
69 | "ZstdDecompressor(dict_data=None)\n" | |
48 | "\n" |
|
70 | "\n" | |
49 | "Create an object used to perform Zstandard decompression.\n" |
|
71 | "Create an object used to perform Zstandard decompression.\n" | |
50 | "\n" |
|
72 | "\n" | |
51 | "An instance can perform multiple decompression operations." |
|
73 | "An instance can perform multiple decompression operations." | |
52 | ); |
|
74 | ); | |
53 |
|
75 | |||
54 | static int Decompressor_init(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { |
|
76 | static int Decompressor_init(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { | |
55 | static char* kwlist[] = { |
|
77 | static char* kwlist[] = { | |
56 | "dict_data", |
|
78 | "dict_data", | |
57 | NULL |
|
79 | NULL | |
58 | }; |
|
80 | }; | |
59 |
|
81 | |||
60 | ZstdCompressionDict* dict = NULL; |
|
82 | ZstdCompressionDict* dict = NULL; | |
61 |
|
83 | |||
62 | self->dctx = NULL; |
|
84 | self->dctx = NULL; | |
63 | self->dict = NULL; |
|
85 | self->dict = NULL; | |
64 | self->ddict = NULL; |
|
86 | self->ddict = NULL; | |
65 |
|
87 | |||
66 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!:ZstdDecompressor", kwlist, |
|
88 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!:ZstdDecompressor", kwlist, | |
67 | &ZstdCompressionDictType, &dict)) { |
|
89 | &ZstdCompressionDictType, &dict)) { | |
68 | return -1; |
|
90 | return -1; | |
69 | } |
|
91 | } | |
70 |
|
92 | |||
71 | /* TODO lazily initialize the reference ZSTD_DCtx on first use since |
|
93 | /* TODO lazily initialize the reference ZSTD_DCtx on first use since | |
72 | not instances of ZstdDecompressor will use a ZSTD_DCtx. */ |
|
94 | not instances of ZstdDecompressor will use a ZSTD_DCtx. */ | |
73 | self->dctx = ZSTD_createDCtx(); |
|
95 | self->dctx = ZSTD_createDCtx(); | |
74 | if (!self->dctx) { |
|
96 | if (!self->dctx) { | |
75 | PyErr_NoMemory(); |
|
97 | PyErr_NoMemory(); | |
76 | goto except; |
|
98 | goto except; | |
77 | } |
|
99 | } | |
78 |
|
100 | |||
79 | if (dict) { |
|
101 | if (dict) { | |
80 | self->dict = dict; |
|
102 | self->dict = dict; | |
81 | Py_INCREF(dict); |
|
103 | Py_INCREF(dict); | |
82 | } |
|
104 | } | |
83 |
|
105 | |||
84 | return 0; |
|
106 | return 0; | |
85 |
|
107 | |||
86 | except: |
|
108 | except: | |
87 | if (self->dctx) { |
|
109 | if (self->dctx) { | |
88 | ZSTD_freeDCtx(self->dctx); |
|
110 | ZSTD_freeDCtx(self->dctx); | |
89 | self->dctx = NULL; |
|
111 | self->dctx = NULL; | |
90 | } |
|
112 | } | |
91 |
|
113 | |||
92 | return -1; |
|
114 | return -1; | |
93 | } |
|
115 | } | |
94 |
|
116 | |||
95 | static void Decompressor_dealloc(ZstdDecompressor* self) { |
|
117 | static void Decompressor_dealloc(ZstdDecompressor* self) { | |
96 |
|
|
118 | Py_CLEAR(self->dict); | |
97 | ZSTD_freeDCtx(self->dctx); |
|
|||
98 | } |
|
|||
99 |
|
||||
100 | Py_XDECREF(self->dict); |
|
|||
101 |
|
119 | |||
102 | if (self->ddict) { |
|
120 | if (self->ddict) { | |
103 | ZSTD_freeDDict(self->ddict); |
|
121 | ZSTD_freeDDict(self->ddict); | |
104 | self->ddict = NULL; |
|
122 | self->ddict = NULL; | |
105 | } |
|
123 | } | |
106 |
|
124 | |||
|
125 | if (self->dstream) { | |||
|
126 | ZSTD_freeDStream(self->dstream); | |||
|
127 | self->dstream = NULL; | |||
|
128 | } | |||
|
129 | ||||
|
130 | if (self->dctx) { | |||
|
131 | ZSTD_freeDCtx(self->dctx); | |||
|
132 | self->dctx = NULL; | |||
|
133 | } | |||
|
134 | ||||
107 | PyObject_Del(self); |
|
135 | PyObject_Del(self); | |
108 | } |
|
136 | } | |
109 |
|
137 | |||
110 | PyDoc_STRVAR(Decompressor_copy_stream__doc__, |
|
138 | PyDoc_STRVAR(Decompressor_copy_stream__doc__, | |
111 | "copy_stream(ifh, ofh[, read_size=default, write_size=default]) -- decompress data between streams\n" |
|
139 | "copy_stream(ifh, ofh[, read_size=default, write_size=default]) -- decompress data between streams\n" | |
112 | "\n" |
|
140 | "\n" | |
113 | "Compressed data will be read from ``ifh``, decompressed, and written to\n" |
|
141 | "Compressed data will be read from ``ifh``, decompressed, and written to\n" | |
114 | "``ofh``. ``ifh`` must have a ``read(size)`` method. ``ofh`` must have a\n" |
|
142 | "``ofh``. ``ifh`` must have a ``read(size)`` method. ``ofh`` must have a\n" | |
115 | "``write(data)`` method.\n" |
|
143 | "``write(data)`` method.\n" | |
116 | "\n" |
|
144 | "\n" | |
117 | "The optional ``read_size`` and ``write_size`` arguments control the chunk\n" |
|
145 | "The optional ``read_size`` and ``write_size`` arguments control the chunk\n" | |
118 | "size of data that is ``read()`` and ``write()`` between streams. They default\n" |
|
146 | "size of data that is ``read()`` and ``write()`` between streams. They default\n" | |
119 | "to the default input and output sizes of zstd decompressor streams.\n" |
|
147 | "to the default input and output sizes of zstd decompressor streams.\n" | |
120 | ); |
|
148 | ); | |
121 |
|
149 | |||
122 | static PyObject* Decompressor_copy_stream(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { |
|
150 | static PyObject* Decompressor_copy_stream(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { | |
123 | static char* kwlist[] = { |
|
151 | static char* kwlist[] = { | |
124 | "ifh", |
|
152 | "ifh", | |
125 | "ofh", |
|
153 | "ofh", | |
126 | "read_size", |
|
154 | "read_size", | |
127 | "write_size", |
|
155 | "write_size", | |
128 | NULL |
|
156 | NULL | |
129 | }; |
|
157 | }; | |
130 |
|
158 | |||
131 | PyObject* source; |
|
159 | PyObject* source; | |
132 | PyObject* dest; |
|
160 | PyObject* dest; | |
133 | size_t inSize = ZSTD_DStreamInSize(); |
|
161 | size_t inSize = ZSTD_DStreamInSize(); | |
134 | size_t outSize = ZSTD_DStreamOutSize(); |
|
162 | size_t outSize = ZSTD_DStreamOutSize(); | |
135 | ZSTD_DStream* dstream; |
|
|||
136 | ZSTD_inBuffer input; |
|
163 | ZSTD_inBuffer input; | |
137 | ZSTD_outBuffer output; |
|
164 | ZSTD_outBuffer output; | |
138 | Py_ssize_t totalRead = 0; |
|
165 | Py_ssize_t totalRead = 0; | |
139 | Py_ssize_t totalWrite = 0; |
|
166 | Py_ssize_t totalWrite = 0; | |
140 | char* readBuffer; |
|
167 | char* readBuffer; | |
141 | Py_ssize_t readSize; |
|
168 | Py_ssize_t readSize; | |
142 | PyObject* readResult; |
|
169 | PyObject* readResult; | |
143 | PyObject* res = NULL; |
|
170 | PyObject* res = NULL; | |
144 | size_t zresult = 0; |
|
171 | size_t zresult = 0; | |
145 | PyObject* writeResult; |
|
172 | PyObject* writeResult; | |
146 | PyObject* totalReadPy; |
|
173 | PyObject* totalReadPy; | |
147 | PyObject* totalWritePy; |
|
174 | PyObject* totalWritePy; | |
148 |
|
175 | |||
149 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk:copy_stream", kwlist, |
|
176 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk:copy_stream", kwlist, | |
150 | &source, &dest, &inSize, &outSize)) { |
|
177 | &source, &dest, &inSize, &outSize)) { | |
151 | return NULL; |
|
178 | return NULL; | |
152 | } |
|
179 | } | |
153 |
|
180 | |||
154 | if (!PyObject_HasAttrString(source, "read")) { |
|
181 | if (!PyObject_HasAttrString(source, "read")) { | |
155 | PyErr_SetString(PyExc_ValueError, "first argument must have a read() method"); |
|
182 | PyErr_SetString(PyExc_ValueError, "first argument must have a read() method"); | |
156 | return NULL; |
|
183 | return NULL; | |
157 | } |
|
184 | } | |
158 |
|
185 | |||
159 | if (!PyObject_HasAttrString(dest, "write")) { |
|
186 | if (!PyObject_HasAttrString(dest, "write")) { | |
160 | PyErr_SetString(PyExc_ValueError, "second argument must have a write() method"); |
|
187 | PyErr_SetString(PyExc_ValueError, "second argument must have a write() method"); | |
161 | return NULL; |
|
188 | return NULL; | |
162 | } |
|
189 | } | |
163 |
|
190 | |||
164 | /* Prevent free on uninitialized memory in finally. */ |
|
191 | /* Prevent free on uninitialized memory in finally. */ | |
165 | output.dst = NULL; |
|
192 | output.dst = NULL; | |
166 |
|
193 | |||
167 | dstream = DStream_from_ZstdDecompressor(self); |
|
194 | if (0 != init_dstream(self)) { | |
168 | if (!dstream) { |
|
|||
169 | res = NULL; |
|
195 | res = NULL; | |
170 | goto finally; |
|
196 | goto finally; | |
171 | } |
|
197 | } | |
172 |
|
198 | |||
173 | output.dst = PyMem_Malloc(outSize); |
|
199 | output.dst = PyMem_Malloc(outSize); | |
174 | if (!output.dst) { |
|
200 | if (!output.dst) { | |
175 | PyErr_NoMemory(); |
|
201 | PyErr_NoMemory(); | |
176 | res = NULL; |
|
202 | res = NULL; | |
177 | goto finally; |
|
203 | goto finally; | |
178 | } |
|
204 | } | |
179 | output.size = outSize; |
|
205 | output.size = outSize; | |
180 | output.pos = 0; |
|
206 | output.pos = 0; | |
181 |
|
207 | |||
182 | /* Read source stream until EOF */ |
|
208 | /* Read source stream until EOF */ | |
183 | while (1) { |
|
209 | while (1) { | |
184 | readResult = PyObject_CallMethod(source, "read", "n", inSize); |
|
210 | readResult = PyObject_CallMethod(source, "read", "n", inSize); | |
185 | if (!readResult) { |
|
211 | if (!readResult) { | |
186 | PyErr_SetString(ZstdError, "could not read() from source"); |
|
212 | PyErr_SetString(ZstdError, "could not read() from source"); | |
187 | goto finally; |
|
213 | goto finally; | |
188 | } |
|
214 | } | |
189 |
|
215 | |||
190 | PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize); |
|
216 | PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize); | |
191 |
|
217 | |||
192 | /* If no data was read, we're at EOF. */ |
|
218 | /* If no data was read, we're at EOF. */ | |
193 | if (0 == readSize) { |
|
219 | if (0 == readSize) { | |
194 | break; |
|
220 | break; | |
195 | } |
|
221 | } | |
196 |
|
222 | |||
197 | totalRead += readSize; |
|
223 | totalRead += readSize; | |
198 |
|
224 | |||
199 | /* Send data to decompressor */ |
|
225 | /* Send data to decompressor */ | |
200 | input.src = readBuffer; |
|
226 | input.src = readBuffer; | |
201 | input.size = readSize; |
|
227 | input.size = readSize; | |
202 | input.pos = 0; |
|
228 | input.pos = 0; | |
203 |
|
229 | |||
204 | while (input.pos < input.size) { |
|
230 | while (input.pos < input.size) { | |
205 | Py_BEGIN_ALLOW_THREADS |
|
231 | Py_BEGIN_ALLOW_THREADS | |
206 | zresult = ZSTD_decompressStream(dstream, &output, &input); |
|
232 | zresult = ZSTD_decompressStream(self->dstream, &output, &input); | |
207 | Py_END_ALLOW_THREADS |
|
233 | Py_END_ALLOW_THREADS | |
208 |
|
234 | |||
209 | if (ZSTD_isError(zresult)) { |
|
235 | if (ZSTD_isError(zresult)) { | |
210 | PyErr_Format(ZstdError, "zstd decompressor error: %s", |
|
236 | PyErr_Format(ZstdError, "zstd decompressor error: %s", | |
211 | ZSTD_getErrorName(zresult)); |
|
237 | ZSTD_getErrorName(zresult)); | |
212 | res = NULL; |
|
238 | res = NULL; | |
213 | goto finally; |
|
239 | goto finally; | |
214 | } |
|
240 | } | |
215 |
|
241 | |||
216 | if (output.pos) { |
|
242 | if (output.pos) { | |
217 | #if PY_MAJOR_VERSION >= 3 |
|
243 | #if PY_MAJOR_VERSION >= 3 | |
218 | writeResult = PyObject_CallMethod(dest, "write", "y#", |
|
244 | writeResult = PyObject_CallMethod(dest, "write", "y#", | |
219 | #else |
|
245 | #else | |
220 | writeResult = PyObject_CallMethod(dest, "write", "s#", |
|
246 | writeResult = PyObject_CallMethod(dest, "write", "s#", | |
221 | #endif |
|
247 | #endif | |
222 | output.dst, output.pos); |
|
248 | output.dst, output.pos); | |
223 |
|
249 | |||
224 | Py_XDECREF(writeResult); |
|
250 | Py_XDECREF(writeResult); | |
225 | totalWrite += output.pos; |
|
251 | totalWrite += output.pos; | |
226 | output.pos = 0; |
|
252 | output.pos = 0; | |
227 | } |
|
253 | } | |
228 | } |
|
254 | } | |
229 | } |
|
255 | } | |
230 |
|
256 | |||
231 | /* Source stream is exhausted. Finish up. */ |
|
257 | /* Source stream is exhausted. Finish up. */ | |
232 |
|
258 | |||
233 | ZSTD_freeDStream(dstream); |
|
|||
234 | dstream = NULL; |
|
|||
235 |
|
||||
236 | totalReadPy = PyLong_FromSsize_t(totalRead); |
|
259 | totalReadPy = PyLong_FromSsize_t(totalRead); | |
237 | totalWritePy = PyLong_FromSsize_t(totalWrite); |
|
260 | totalWritePy = PyLong_FromSsize_t(totalWrite); | |
238 | res = PyTuple_Pack(2, totalReadPy, totalWritePy); |
|
261 | res = PyTuple_Pack(2, totalReadPy, totalWritePy); | |
239 |
Py_D |
|
262 | Py_DECREF(totalReadPy); | |
240 |
Py_D |
|
263 | Py_DECREF(totalWritePy); | |
241 |
|
264 | |||
242 | finally: |
|
265 | finally: | |
243 | if (output.dst) { |
|
266 | if (output.dst) { | |
244 | PyMem_Free(output.dst); |
|
267 | PyMem_Free(output.dst); | |
245 | } |
|
268 | } | |
246 |
|
269 | |||
247 | if (dstream) { |
|
|||
248 | ZSTD_freeDStream(dstream); |
|
|||
249 | } |
|
|||
250 |
|
||||
251 | return res; |
|
270 | return res; | |
252 | } |
|
271 | } | |
253 |
|
272 | |||
254 | PyDoc_STRVAR(Decompressor_decompress__doc__, |
|
273 | PyDoc_STRVAR(Decompressor_decompress__doc__, | |
255 | "decompress(data[, max_output_size=None]) -- Decompress data in its entirety\n" |
|
274 | "decompress(data[, max_output_size=None]) -- Decompress data in its entirety\n" | |
256 | "\n" |
|
275 | "\n" | |
257 | "This method will decompress the entirety of the argument and return the\n" |
|
276 | "This method will decompress the entirety of the argument and return the\n" | |
258 | "result.\n" |
|
277 | "result.\n" | |
259 | "\n" |
|
278 | "\n" | |
260 | "The input bytes are expected to contain a full Zstandard frame (something\n" |
|
279 | "The input bytes are expected to contain a full Zstandard frame (something\n" | |
261 | "compressed with ``ZstdCompressor.compress()`` or similar). If the input does\n" |
|
280 | "compressed with ``ZstdCompressor.compress()`` or similar). If the input does\n" | |
262 | "not contain a full frame, an exception will be raised.\n" |
|
281 | "not contain a full frame, an exception will be raised.\n" | |
263 | "\n" |
|
282 | "\n" | |
264 | "If the frame header of the compressed data does not contain the content size\n" |
|
283 | "If the frame header of the compressed data does not contain the content size\n" | |
265 | "``max_output_size`` must be specified or ``ZstdError`` will be raised. An\n" |
|
284 | "``max_output_size`` must be specified or ``ZstdError`` will be raised. An\n" | |
266 | "allocation of size ``max_output_size`` will be performed and an attempt will\n" |
|
285 | "allocation of size ``max_output_size`` will be performed and an attempt will\n" | |
267 | "be made to perform decompression into that buffer. If the buffer is too\n" |
|
286 | "be made to perform decompression into that buffer. If the buffer is too\n" | |
268 | "small or cannot be allocated, ``ZstdError`` will be raised. The buffer will\n" |
|
287 | "small or cannot be allocated, ``ZstdError`` will be raised. The buffer will\n" | |
269 | "be resized if it is too large.\n" |
|
288 | "be resized if it is too large.\n" | |
270 | "\n" |
|
289 | "\n" | |
271 | "Uncompressed data could be much larger than compressed data. As a result,\n" |
|
290 | "Uncompressed data could be much larger than compressed data. As a result,\n" | |
272 | "calling this function could result in a very large memory allocation being\n" |
|
291 | "calling this function could result in a very large memory allocation being\n" | |
273 | "performed to hold the uncompressed data. Therefore it is **highly**\n" |
|
292 | "performed to hold the uncompressed data. Therefore it is **highly**\n" | |
274 | "recommended to use a streaming decompression method instead of this one.\n" |
|
293 | "recommended to use a streaming decompression method instead of this one.\n" | |
275 | ); |
|
294 | ); | |
276 |
|
295 | |||
277 | PyObject* Decompressor_decompress(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { |
|
296 | PyObject* Decompressor_decompress(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { | |
278 | static char* kwlist[] = { |
|
297 | static char* kwlist[] = { | |
279 | "data", |
|
298 | "data", | |
280 | "max_output_size", |
|
299 | "max_output_size", | |
281 | NULL |
|
300 | NULL | |
282 | }; |
|
301 | }; | |
283 |
|
302 | |||
284 | const char* source; |
|
303 | const char* source; | |
285 | Py_ssize_t sourceSize; |
|
304 | Py_ssize_t sourceSize; | |
286 | Py_ssize_t maxOutputSize = 0; |
|
305 | Py_ssize_t maxOutputSize = 0; | |
287 | unsigned long long decompressedSize; |
|
306 | unsigned long long decompressedSize; | |
288 | size_t destCapacity; |
|
307 | size_t destCapacity; | |
289 | PyObject* result = NULL; |
|
308 | PyObject* result = NULL; | |
290 | void* dictData = NULL; |
|
309 | void* dictData = NULL; | |
291 | size_t dictSize = 0; |
|
310 | size_t dictSize = 0; | |
292 | size_t zresult; |
|
311 | size_t zresult; | |
293 |
|
312 | |||
294 | #if PY_MAJOR_VERSION >= 3 |
|
313 | #if PY_MAJOR_VERSION >= 3 | |
295 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|n:decompress", |
|
314 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|n:decompress", | |
296 | #else |
|
315 | #else | |
297 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|n:decompress", |
|
316 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|n:decompress", | |
298 | #endif |
|
317 | #endif | |
299 | kwlist, &source, &sourceSize, &maxOutputSize)) { |
|
318 | kwlist, &source, &sourceSize, &maxOutputSize)) { | |
300 | return NULL; |
|
319 | return NULL; | |
301 | } |
|
320 | } | |
302 |
|
321 | |||
303 | if (self->dict) { |
|
322 | if (self->dict) { | |
304 | dictData = self->dict->dictData; |
|
323 | dictData = self->dict->dictData; | |
305 | dictSize = self->dict->dictSize; |
|
324 | dictSize = self->dict->dictSize; | |
306 | } |
|
325 | } | |
307 |
|
326 | |||
308 | if (dictData && !self->ddict) { |
|
327 | if (dictData && !self->ddict) { | |
309 | Py_BEGIN_ALLOW_THREADS |
|
328 | Py_BEGIN_ALLOW_THREADS | |
310 | self->ddict = ZSTD_createDDict_byReference(dictData, dictSize); |
|
329 | self->ddict = ZSTD_createDDict_byReference(dictData, dictSize); | |
311 | Py_END_ALLOW_THREADS |
|
330 | Py_END_ALLOW_THREADS | |
312 |
|
331 | |||
313 | if (!self->ddict) { |
|
332 | if (!self->ddict) { | |
314 | PyErr_SetString(ZstdError, "could not create decompression dict"); |
|
333 | PyErr_SetString(ZstdError, "could not create decompression dict"); | |
315 | return NULL; |
|
334 | return NULL; | |
316 | } |
|
335 | } | |
317 | } |
|
336 | } | |
318 |
|
337 | |||
319 | decompressedSize = ZSTD_getDecompressedSize(source, sourceSize); |
|
338 | decompressedSize = ZSTD_getDecompressedSize(source, sourceSize); | |
320 | /* 0 returned if content size not in the zstd frame header */ |
|
339 | /* 0 returned if content size not in the zstd frame header */ | |
321 | if (0 == decompressedSize) { |
|
340 | if (0 == decompressedSize) { | |
322 | if (0 == maxOutputSize) { |
|
341 | if (0 == maxOutputSize) { | |
323 | PyErr_SetString(ZstdError, "input data invalid or missing content size " |
|
342 | PyErr_SetString(ZstdError, "input data invalid or missing content size " | |
324 | "in frame header"); |
|
343 | "in frame header"); | |
325 | return NULL; |
|
344 | return NULL; | |
326 | } |
|
345 | } | |
327 | else { |
|
346 | else { | |
328 | result = PyBytes_FromStringAndSize(NULL, maxOutputSize); |
|
347 | result = PyBytes_FromStringAndSize(NULL, maxOutputSize); | |
329 | destCapacity = maxOutputSize; |
|
348 | destCapacity = maxOutputSize; | |
330 | } |
|
349 | } | |
331 | } |
|
350 | } | |
332 | else { |
|
351 | else { | |
333 | result = PyBytes_FromStringAndSize(NULL, decompressedSize); |
|
352 | result = PyBytes_FromStringAndSize(NULL, decompressedSize); | |
334 | destCapacity = decompressedSize; |
|
353 | destCapacity = decompressedSize; | |
335 | } |
|
354 | } | |
336 |
|
355 | |||
337 | if (!result) { |
|
356 | if (!result) { | |
338 | return NULL; |
|
357 | return NULL; | |
339 | } |
|
358 | } | |
340 |
|
359 | |||
341 | Py_BEGIN_ALLOW_THREADS |
|
360 | Py_BEGIN_ALLOW_THREADS | |
342 | if (self->ddict) { |
|
361 | if (self->ddict) { | |
343 | zresult = ZSTD_decompress_usingDDict(self->dctx, |
|
362 | zresult = ZSTD_decompress_usingDDict(self->dctx, | |
344 | PyBytes_AsString(result), destCapacity, |
|
363 | PyBytes_AsString(result), destCapacity, | |
345 | source, sourceSize, self->ddict); |
|
364 | source, sourceSize, self->ddict); | |
346 | } |
|
365 | } | |
347 | else { |
|
366 | else { | |
348 | zresult = ZSTD_decompressDCtx(self->dctx, |
|
367 | zresult = ZSTD_decompressDCtx(self->dctx, | |
349 | PyBytes_AsString(result), destCapacity, source, sourceSize); |
|
368 | PyBytes_AsString(result), destCapacity, source, sourceSize); | |
350 | } |
|
369 | } | |
351 | Py_END_ALLOW_THREADS |
|
370 | Py_END_ALLOW_THREADS | |
352 |
|
371 | |||
353 | if (ZSTD_isError(zresult)) { |
|
372 | if (ZSTD_isError(zresult)) { | |
354 | PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult)); |
|
373 | PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult)); | |
355 |
Py_D |
|
374 | Py_DECREF(result); | |
356 | return NULL; |
|
375 | return NULL; | |
357 | } |
|
376 | } | |
358 | else if (decompressedSize && zresult != decompressedSize) { |
|
377 | else if (decompressedSize && zresult != decompressedSize) { | |
359 | PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu", |
|
378 | PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu", | |
360 | zresult, decompressedSize); |
|
379 | zresult, decompressedSize); | |
361 |
Py_D |
|
380 | Py_DECREF(result); | |
362 | return NULL; |
|
381 | return NULL; | |
363 | } |
|
382 | } | |
364 | else if (zresult < destCapacity) { |
|
383 | else if (zresult < destCapacity) { | |
365 | if (_PyBytes_Resize(&result, zresult)) { |
|
384 | if (_PyBytes_Resize(&result, zresult)) { | |
366 |
Py_D |
|
385 | Py_DECREF(result); | |
367 | return NULL; |
|
386 | return NULL; | |
368 | } |
|
387 | } | |
369 | } |
|
388 | } | |
370 |
|
389 | |||
371 | return result; |
|
390 | return result; | |
372 | } |
|
391 | } | |
373 |
|
392 | |||
374 | PyDoc_STRVAR(Decompressor_decompressobj__doc__, |
|
393 | PyDoc_STRVAR(Decompressor_decompressobj__doc__, | |
375 | "decompressobj()\n" |
|
394 | "decompressobj()\n" | |
376 | "\n" |
|
395 | "\n" | |
377 | "Incrementally feed data into a decompressor.\n" |
|
396 | "Incrementally feed data into a decompressor.\n" | |
378 | "\n" |
|
397 | "\n" | |
379 | "The returned object exposes a ``decompress(data)`` method. This makes it\n" |
|
398 | "The returned object exposes a ``decompress(data)`` method. This makes it\n" | |
380 | "compatible with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor`` so that\n" |
|
399 | "compatible with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor`` so that\n" | |
381 | "callers can swap in the zstd decompressor while using the same API.\n" |
|
400 | "callers can swap in the zstd decompressor while using the same API.\n" | |
382 | ); |
|
401 | ); | |
383 |
|
402 | |||
384 | static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self) { |
|
403 | static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self) { | |
385 |
ZstdDecompressionObj* result = |
|
404 | ZstdDecompressionObj* result = (ZstdDecompressionObj*)PyObject_CallObject((PyObject*)&ZstdDecompressionObjType, NULL); | |
386 | if (!result) { |
|
405 | if (!result) { | |
387 | return NULL; |
|
406 | return NULL; | |
388 | } |
|
407 | } | |
389 |
|
408 | |||
390 | result->dstream = DStream_from_ZstdDecompressor(self); |
|
409 | if (0 != init_dstream(self)) { | |
391 | if (!result->dstream) { |
|
410 | Py_DECREF(result); | |
392 | Py_DecRef((PyObject*)result); |
|
|||
393 | return NULL; |
|
411 | return NULL; | |
394 | } |
|
412 | } | |
395 |
|
413 | |||
396 | result->decompressor = self; |
|
414 | result->decompressor = self; | |
397 | Py_INCREF(result->decompressor); |
|
415 | Py_INCREF(result->decompressor); | |
398 |
|
416 | |||
399 | result->finished = 0; |
|
|||
400 |
|
||||
401 | return result; |
|
417 | return result; | |
402 | } |
|
418 | } | |
403 |
|
419 | |||
404 | PyDoc_STRVAR(Decompressor_read_from__doc__, |
|
420 | PyDoc_STRVAR(Decompressor_read_from__doc__, | |
405 | "read_from(reader[, read_size=default, write_size=default, skip_bytes=0])\n" |
|
421 | "read_from(reader[, read_size=default, write_size=default, skip_bytes=0])\n" | |
406 | "Read compressed data and return an iterator\n" |
|
422 | "Read compressed data and return an iterator\n" | |
407 | "\n" |
|
423 | "\n" | |
408 | "Returns an iterator of decompressed data chunks produced from reading from\n" |
|
424 | "Returns an iterator of decompressed data chunks produced from reading from\n" | |
409 | "the ``reader``.\n" |
|
425 | "the ``reader``.\n" | |
410 | "\n" |
|
426 | "\n" | |
411 | "Compressed data will be obtained from ``reader`` by calling the\n" |
|
427 | "Compressed data will be obtained from ``reader`` by calling the\n" | |
412 | "``read(size)`` method of it. The source data will be streamed into a\n" |
|
428 | "``read(size)`` method of it. The source data will be streamed into a\n" | |
413 | "decompressor. As decompressed data is available, it will be exposed to the\n" |
|
429 | "decompressor. As decompressed data is available, it will be exposed to the\n" | |
414 | "returned iterator.\n" |
|
430 | "returned iterator.\n" | |
415 | "\n" |
|
431 | "\n" | |
416 | "Data is ``read()`` in chunks of size ``read_size`` and exposed to the\n" |
|
432 | "Data is ``read()`` in chunks of size ``read_size`` and exposed to the\n" | |
417 | "iterator in chunks of size ``write_size``. The default values are the input\n" |
|
433 | "iterator in chunks of size ``write_size``. The default values are the input\n" | |
418 | "and output sizes for a zstd streaming decompressor.\n" |
|
434 | "and output sizes for a zstd streaming decompressor.\n" | |
419 | "\n" |
|
435 | "\n" | |
420 | "There is also support for skipping the first ``skip_bytes`` of data from\n" |
|
436 | "There is also support for skipping the first ``skip_bytes`` of data from\n" | |
421 | "the source.\n" |
|
437 | "the source.\n" | |
422 | ); |
|
438 | ); | |
423 |
|
439 | |||
424 | static ZstdDecompressorIterator* Decompressor_read_from(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { |
|
440 | static ZstdDecompressorIterator* Decompressor_read_from(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { | |
425 | static char* kwlist[] = { |
|
441 | static char* kwlist[] = { | |
426 | "reader", |
|
442 | "reader", | |
427 | "read_size", |
|
443 | "read_size", | |
428 | "write_size", |
|
444 | "write_size", | |
429 | "skip_bytes", |
|
445 | "skip_bytes", | |
430 | NULL |
|
446 | NULL | |
431 | }; |
|
447 | }; | |
432 |
|
448 | |||
433 | PyObject* reader; |
|
449 | PyObject* reader; | |
434 | size_t inSize = ZSTD_DStreamInSize(); |
|
450 | size_t inSize = ZSTD_DStreamInSize(); | |
435 | size_t outSize = ZSTD_DStreamOutSize(); |
|
451 | size_t outSize = ZSTD_DStreamOutSize(); | |
436 | ZstdDecompressorIterator* result; |
|
452 | ZstdDecompressorIterator* result; | |
437 | size_t skipBytes = 0; |
|
453 | size_t skipBytes = 0; | |
438 |
|
454 | |||
439 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_from", kwlist, |
|
455 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_from", kwlist, | |
440 | &reader, &inSize, &outSize, &skipBytes)) { |
|
456 | &reader, &inSize, &outSize, &skipBytes)) { | |
441 | return NULL; |
|
457 | return NULL; | |
442 | } |
|
458 | } | |
443 |
|
459 | |||
444 | if (skipBytes >= inSize) { |
|
460 | if (skipBytes >= inSize) { | |
445 | PyErr_SetString(PyExc_ValueError, |
|
461 | PyErr_SetString(PyExc_ValueError, | |
446 | "skip_bytes must be smaller than read_size"); |
|
462 | "skip_bytes must be smaller than read_size"); | |
447 | return NULL; |
|
463 | return NULL; | |
448 | } |
|
464 | } | |
449 |
|
465 | |||
450 |
result = |
|
466 | result = (ZstdDecompressorIterator*)PyObject_CallObject((PyObject*)&ZstdDecompressorIteratorType, NULL); | |
451 | if (!result) { |
|
467 | if (!result) { | |
452 | return NULL; |
|
468 | return NULL; | |
453 | } |
|
469 | } | |
454 |
|
470 | |||
455 | result->decompressor = NULL; |
|
|||
456 | result->reader = NULL; |
|
|||
457 | result->buffer = NULL; |
|
|||
458 | result->dstream = NULL; |
|
|||
459 | result->input.src = NULL; |
|
|||
460 | result->output.dst = NULL; |
|
|||
461 |
|
||||
462 | if (PyObject_HasAttrString(reader, "read")) { |
|
471 | if (PyObject_HasAttrString(reader, "read")) { | |
463 | result->reader = reader; |
|
472 | result->reader = reader; | |
464 | Py_INCREF(result->reader); |
|
473 | Py_INCREF(result->reader); | |
465 | } |
|
474 | } | |
466 | else if (1 == PyObject_CheckBuffer(reader)) { |
|
475 | else if (1 == PyObject_CheckBuffer(reader)) { | |
467 | /* Object claims it is a buffer. Try to get a handle to it. */ |
|
476 | /* Object claims it is a buffer. Try to get a handle to it. */ | |
468 | result->buffer = PyMem_Malloc(sizeof(Py_buffer)); |
|
477 | result->buffer = PyMem_Malloc(sizeof(Py_buffer)); | |
469 | if (!result->buffer) { |
|
478 | if (!result->buffer) { | |
470 | goto except; |
|
479 | goto except; | |
471 | } |
|
480 | } | |
472 |
|
481 | |||
473 | memset(result->buffer, 0, sizeof(Py_buffer)); |
|
482 | memset(result->buffer, 0, sizeof(Py_buffer)); | |
474 |
|
483 | |||
475 | if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) { |
|
484 | if (0 != PyObject_GetBuffer(reader, result->buffer, PyBUF_CONTIG_RO)) { | |
476 | goto except; |
|
485 | goto except; | |
477 | } |
|
486 | } | |
478 |
|
||||
479 | result->bufferOffset = 0; |
|
|||
480 | } |
|
487 | } | |
481 | else { |
|
488 | else { | |
482 | PyErr_SetString(PyExc_ValueError, |
|
489 | PyErr_SetString(PyExc_ValueError, | |
483 | "must pass an object with a read() method or conforms to buffer protocol"); |
|
490 | "must pass an object with a read() method or conforms to buffer protocol"); | |
484 | goto except; |
|
491 | goto except; | |
485 | } |
|
492 | } | |
486 |
|
493 | |||
487 | result->decompressor = self; |
|
494 | result->decompressor = self; | |
488 | Py_INCREF(result->decompressor); |
|
495 | Py_INCREF(result->decompressor); | |
489 |
|
496 | |||
490 | result->inSize = inSize; |
|
497 | result->inSize = inSize; | |
491 | result->outSize = outSize; |
|
498 | result->outSize = outSize; | |
492 | result->skipBytes = skipBytes; |
|
499 | result->skipBytes = skipBytes; | |
493 |
|
500 | |||
494 | result->dstream = DStream_from_ZstdDecompressor(self); |
|
501 | if (0 != init_dstream(self)) { | |
495 | if (!result->dstream) { |
|
|||
496 | goto except; |
|
502 | goto except; | |
497 | } |
|
503 | } | |
498 |
|
504 | |||
499 | result->input.src = PyMem_Malloc(inSize); |
|
505 | result->input.src = PyMem_Malloc(inSize); | |
500 | if (!result->input.src) { |
|
506 | if (!result->input.src) { | |
501 | PyErr_NoMemory(); |
|
507 | PyErr_NoMemory(); | |
502 | goto except; |
|
508 | goto except; | |
503 | } |
|
509 | } | |
504 | result->input.size = 0; |
|
|||
505 | result->input.pos = 0; |
|
|||
506 |
|
||||
507 | result->output.dst = NULL; |
|
|||
508 | result->output.size = 0; |
|
|||
509 | result->output.pos = 0; |
|
|||
510 |
|
||||
511 | result->readCount = 0; |
|
|||
512 | result->finishedInput = 0; |
|
|||
513 | result->finishedOutput = 0; |
|
|||
514 |
|
510 | |||
515 | goto finally; |
|
511 | goto finally; | |
516 |
|
512 | |||
517 | except: |
|
513 | except: | |
518 | Py_CLEAR(result->reader); |
|
514 | Py_CLEAR(result->reader); | |
519 |
|
515 | |||
520 | if (result->buffer) { |
|
516 | if (result->buffer) { | |
521 | PyBuffer_Release(result->buffer); |
|
517 | PyBuffer_Release(result->buffer); | |
522 | Py_CLEAR(result->buffer); |
|
518 | Py_CLEAR(result->buffer); | |
523 | } |
|
519 | } | |
524 |
|
520 | |||
525 | Py_CLEAR(result); |
|
521 | Py_CLEAR(result); | |
526 |
|
522 | |||
527 | finally: |
|
523 | finally: | |
528 |
|
524 | |||
529 | return result; |
|
525 | return result; | |
530 | } |
|
526 | } | |
531 |
|
527 | |||
532 | PyDoc_STRVAR(Decompressor_write_to__doc__, |
|
528 | PyDoc_STRVAR(Decompressor_write_to__doc__, | |
533 | "Create a context manager to write decompressed data to an object.\n" |
|
529 | "Create a context manager to write decompressed data to an object.\n" | |
534 | "\n" |
|
530 | "\n" | |
535 | "The passed object must have a ``write()`` method.\n" |
|
531 | "The passed object must have a ``write()`` method.\n" | |
536 | "\n" |
|
532 | "\n" | |
537 | "The caller feeds intput data to the object by calling ``write(data)``.\n" |
|
533 | "The caller feeds intput data to the object by calling ``write(data)``.\n" | |
538 | "Decompressed data is written to the argument given as it is decompressed.\n" |
|
534 | "Decompressed data is written to the argument given as it is decompressed.\n" | |
539 | "\n" |
|
535 | "\n" | |
540 | "An optional ``write_size`` argument defines the size of chunks to\n" |
|
536 | "An optional ``write_size`` argument defines the size of chunks to\n" | |
541 | "``write()`` to the writer. It defaults to the default output size for a zstd\n" |
|
537 | "``write()`` to the writer. It defaults to the default output size for a zstd\n" | |
542 | "streaming decompressor.\n" |
|
538 | "streaming decompressor.\n" | |
543 | ); |
|
539 | ); | |
544 |
|
540 | |||
545 | static ZstdDecompressionWriter* Decompressor_write_to(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { |
|
541 | static ZstdDecompressionWriter* Decompressor_write_to(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { | |
546 | static char* kwlist[] = { |
|
542 | static char* kwlist[] = { | |
547 | "writer", |
|
543 | "writer", | |
548 | "write_size", |
|
544 | "write_size", | |
549 | NULL |
|
545 | NULL | |
550 | }; |
|
546 | }; | |
551 |
|
547 | |||
552 | PyObject* writer; |
|
548 | PyObject* writer; | |
553 | size_t outSize = ZSTD_DStreamOutSize(); |
|
549 | size_t outSize = ZSTD_DStreamOutSize(); | |
554 | ZstdDecompressionWriter* result; |
|
550 | ZstdDecompressionWriter* result; | |
555 |
|
551 | |||
556 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:write_to", kwlist, |
|
552 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|k:write_to", kwlist, | |
557 | &writer, &outSize)) { |
|
553 | &writer, &outSize)) { | |
558 | return NULL; |
|
554 | return NULL; | |
559 | } |
|
555 | } | |
560 |
|
556 | |||
561 | if (!PyObject_HasAttrString(writer, "write")) { |
|
557 | if (!PyObject_HasAttrString(writer, "write")) { | |
562 | PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method"); |
|
558 | PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method"); | |
563 | return NULL; |
|
559 | return NULL; | |
564 | } |
|
560 | } | |
565 |
|
561 | |||
566 |
result = |
|
562 | result = (ZstdDecompressionWriter*)PyObject_CallObject((PyObject*)&ZstdDecompressionWriterType, NULL); | |
567 | if (!result) { |
|
563 | if (!result) { | |
568 | return NULL; |
|
564 | return NULL; | |
569 | } |
|
565 | } | |
570 |
|
566 | |||
571 | result->decompressor = self; |
|
567 | result->decompressor = self; | |
572 | Py_INCREF(result->decompressor); |
|
568 | Py_INCREF(result->decompressor); | |
573 |
|
569 | |||
574 | result->writer = writer; |
|
570 | result->writer = writer; | |
575 | Py_INCREF(result->writer); |
|
571 | Py_INCREF(result->writer); | |
576 |
|
572 | |||
577 | result->outSize = outSize; |
|
573 | result->outSize = outSize; | |
578 |
|
574 | |||
579 | result->entered = 0; |
|
|||
580 | result->dstream = NULL; |
|
|||
581 |
|
||||
582 | return result; |
|
575 | return result; | |
583 | } |
|
576 | } | |
584 |
|
577 | |||
585 | PyDoc_STRVAR(Decompressor_decompress_content_dict_chain__doc__, |
|
578 | PyDoc_STRVAR(Decompressor_decompress_content_dict_chain__doc__, | |
586 | "Decompress a series of chunks using the content dictionary chaining technique\n" |
|
579 | "Decompress a series of chunks using the content dictionary chaining technique\n" | |
587 | ); |
|
580 | ); | |
588 |
|
581 | |||
589 | static PyObject* Decompressor_decompress_content_dict_chain(PyObject* self, PyObject* args, PyObject* kwargs) { |
|
582 | static PyObject* Decompressor_decompress_content_dict_chain(PyObject* self, PyObject* args, PyObject* kwargs) { | |
590 | static char* kwlist[] = { |
|
583 | static char* kwlist[] = { | |
591 | "frames", |
|
584 | "frames", | |
592 | NULL |
|
585 | NULL | |
593 | }; |
|
586 | }; | |
594 |
|
587 | |||
595 | PyObject* chunks; |
|
588 | PyObject* chunks; | |
596 | Py_ssize_t chunksLen; |
|
589 | Py_ssize_t chunksLen; | |
597 | Py_ssize_t chunkIndex; |
|
590 | Py_ssize_t chunkIndex; | |
598 | char parity = 0; |
|
591 | char parity = 0; | |
599 | PyObject* chunk; |
|
592 | PyObject* chunk; | |
600 | char* chunkData; |
|
593 | char* chunkData; | |
601 | Py_ssize_t chunkSize; |
|
594 | Py_ssize_t chunkSize; | |
602 | ZSTD_DCtx* dctx = NULL; |
|
595 | ZSTD_DCtx* dctx = NULL; | |
603 | size_t zresult; |
|
596 | size_t zresult; | |
604 | ZSTD_frameParams frameParams; |
|
597 | ZSTD_frameParams frameParams; | |
605 | void* buffer1 = NULL; |
|
598 | void* buffer1 = NULL; | |
606 | size_t buffer1Size = 0; |
|
599 | size_t buffer1Size = 0; | |
607 | size_t buffer1ContentSize = 0; |
|
600 | size_t buffer1ContentSize = 0; | |
608 | void* buffer2 = NULL; |
|
601 | void* buffer2 = NULL; | |
609 | size_t buffer2Size = 0; |
|
602 | size_t buffer2Size = 0; | |
610 | size_t buffer2ContentSize = 0; |
|
603 | size_t buffer2ContentSize = 0; | |
611 | void* destBuffer = NULL; |
|
604 | void* destBuffer = NULL; | |
612 | PyObject* result = NULL; |
|
605 | PyObject* result = NULL; | |
613 |
|
606 | |||
614 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain", |
|
607 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain", | |
615 | kwlist, &PyList_Type, &chunks)) { |
|
608 | kwlist, &PyList_Type, &chunks)) { | |
616 | return NULL; |
|
609 | return NULL; | |
617 | } |
|
610 | } | |
618 |
|
611 | |||
619 | chunksLen = PyList_Size(chunks); |
|
612 | chunksLen = PyList_Size(chunks); | |
620 | if (!chunksLen) { |
|
613 | if (!chunksLen) { | |
621 | PyErr_SetString(PyExc_ValueError, "empty input chain"); |
|
614 | PyErr_SetString(PyExc_ValueError, "empty input chain"); | |
622 | return NULL; |
|
615 | return NULL; | |
623 | } |
|
616 | } | |
624 |
|
617 | |||
625 | /* The first chunk should not be using a dictionary. We handle it specially. */ |
|
618 | /* The first chunk should not be using a dictionary. We handle it specially. */ | |
626 | chunk = PyList_GetItem(chunks, 0); |
|
619 | chunk = PyList_GetItem(chunks, 0); | |
627 | if (!PyBytes_Check(chunk)) { |
|
620 | if (!PyBytes_Check(chunk)) { | |
628 | PyErr_SetString(PyExc_ValueError, "chunk 0 must be bytes"); |
|
621 | PyErr_SetString(PyExc_ValueError, "chunk 0 must be bytes"); | |
629 | return NULL; |
|
622 | return NULL; | |
630 | } |
|
623 | } | |
631 |
|
624 | |||
632 | /* We require that all chunks be zstd frames and that they have content size set. */ |
|
625 | /* We require that all chunks be zstd frames and that they have content size set. */ | |
633 | PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize); |
|
626 | PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize); | |
634 | zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize); |
|
627 | zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize); | |
635 | if (ZSTD_isError(zresult)) { |
|
628 | if (ZSTD_isError(zresult)) { | |
636 | PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame"); |
|
629 | PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame"); | |
637 | return NULL; |
|
630 | return NULL; | |
638 | } |
|
631 | } | |
639 | else if (zresult) { |
|
632 | else if (zresult) { | |
640 | PyErr_SetString(PyExc_ValueError, "chunk 0 is too small to contain a zstd frame"); |
|
633 | PyErr_SetString(PyExc_ValueError, "chunk 0 is too small to contain a zstd frame"); | |
641 | return NULL; |
|
634 | return NULL; | |
642 | } |
|
635 | } | |
643 |
|
636 | |||
644 | if (0 == frameParams.frameContentSize) { |
|
637 | if (0 == frameParams.frameContentSize) { | |
645 | PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame"); |
|
638 | PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame"); | |
646 | return NULL; |
|
639 | return NULL; | |
647 | } |
|
640 | } | |
648 |
|
641 | |||
649 | dctx = ZSTD_createDCtx(); |
|
642 | dctx = ZSTD_createDCtx(); | |
650 | if (!dctx) { |
|
643 | if (!dctx) { | |
651 | PyErr_NoMemory(); |
|
644 | PyErr_NoMemory(); | |
652 | goto finally; |
|
645 | goto finally; | |
653 | } |
|
646 | } | |
654 |
|
647 | |||
655 | buffer1Size = frameParams.frameContentSize; |
|
648 | buffer1Size = frameParams.frameContentSize; | |
656 | buffer1 = PyMem_Malloc(buffer1Size); |
|
649 | buffer1 = PyMem_Malloc(buffer1Size); | |
657 | if (!buffer1) { |
|
650 | if (!buffer1) { | |
658 | goto finally; |
|
651 | goto finally; | |
659 | } |
|
652 | } | |
660 |
|
653 | |||
661 | Py_BEGIN_ALLOW_THREADS |
|
654 | Py_BEGIN_ALLOW_THREADS | |
662 | zresult = ZSTD_decompressDCtx(dctx, buffer1, buffer1Size, chunkData, chunkSize); |
|
655 | zresult = ZSTD_decompressDCtx(dctx, buffer1, buffer1Size, chunkData, chunkSize); | |
663 | Py_END_ALLOW_THREADS |
|
656 | Py_END_ALLOW_THREADS | |
664 | if (ZSTD_isError(zresult)) { |
|
657 | if (ZSTD_isError(zresult)) { | |
665 | PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult)); |
|
658 | PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult)); | |
666 | goto finally; |
|
659 | goto finally; | |
667 | } |
|
660 | } | |
668 |
|
661 | |||
669 | buffer1ContentSize = zresult; |
|
662 | buffer1ContentSize = zresult; | |
670 |
|
663 | |||
671 | /* Special case of a simple chain. */ |
|
664 | /* Special case of a simple chain. */ | |
672 | if (1 == chunksLen) { |
|
665 | if (1 == chunksLen) { | |
673 | result = PyBytes_FromStringAndSize(buffer1, buffer1Size); |
|
666 | result = PyBytes_FromStringAndSize(buffer1, buffer1Size); | |
674 | goto finally; |
|
667 | goto finally; | |
675 | } |
|
668 | } | |
676 |
|
669 | |||
677 | /* This should ideally look at next chunk. But this is slightly simpler. */ |
|
670 | /* This should ideally look at next chunk. But this is slightly simpler. */ | |
678 | buffer2Size = frameParams.frameContentSize; |
|
671 | buffer2Size = frameParams.frameContentSize; | |
679 | buffer2 = PyMem_Malloc(buffer2Size); |
|
672 | buffer2 = PyMem_Malloc(buffer2Size); | |
680 | if (!buffer2) { |
|
673 | if (!buffer2) { | |
681 | goto finally; |
|
674 | goto finally; | |
682 | } |
|
675 | } | |
683 |
|
676 | |||
684 | /* For each subsequent chunk, use the previous fulltext as a content dictionary. |
|
677 | /* For each subsequent chunk, use the previous fulltext as a content dictionary. | |
685 | Our strategy is to have 2 buffers. One holds the previous fulltext (to be |
|
678 | Our strategy is to have 2 buffers. One holds the previous fulltext (to be | |
686 | used as a content dictionary) and the other holds the new fulltext. The |
|
679 | used as a content dictionary) and the other holds the new fulltext. The | |
687 | buffers grow when needed but never decrease in size. This limits the |
|
680 | buffers grow when needed but never decrease in size. This limits the | |
688 | memory allocator overhead. |
|
681 | memory allocator overhead. | |
689 | */ |
|
682 | */ | |
690 | for (chunkIndex = 1; chunkIndex < chunksLen; chunkIndex++) { |
|
683 | for (chunkIndex = 1; chunkIndex < chunksLen; chunkIndex++) { | |
691 | chunk = PyList_GetItem(chunks, chunkIndex); |
|
684 | chunk = PyList_GetItem(chunks, chunkIndex); | |
692 | if (!PyBytes_Check(chunk)) { |
|
685 | if (!PyBytes_Check(chunk)) { | |
693 | PyErr_Format(PyExc_ValueError, "chunk %zd must be bytes", chunkIndex); |
|
686 | PyErr_Format(PyExc_ValueError, "chunk %zd must be bytes", chunkIndex); | |
694 | goto finally; |
|
687 | goto finally; | |
695 | } |
|
688 | } | |
696 |
|
689 | |||
697 | PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize); |
|
690 | PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize); | |
698 | zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize); |
|
691 | zresult = ZSTD_getFrameParams(&frameParams, (void*)chunkData, chunkSize); | |
699 | if (ZSTD_isError(zresult)) { |
|
692 | if (ZSTD_isError(zresult)) { | |
700 | PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex); |
|
693 | PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex); | |
701 | goto finally; |
|
694 | goto finally; | |
702 | } |
|
695 | } | |
703 | else if (zresult) { |
|
696 | else if (zresult) { | |
704 | PyErr_Format(PyExc_ValueError, "chunk %zd is too small to contain a zstd frame", chunkIndex); |
|
697 | PyErr_Format(PyExc_ValueError, "chunk %zd is too small to contain a zstd frame", chunkIndex); | |
705 | goto finally; |
|
698 | goto finally; | |
706 | } |
|
699 | } | |
707 |
|
700 | |||
708 | if (0 == frameParams.frameContentSize) { |
|
701 | if (0 == frameParams.frameContentSize) { | |
709 | PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex); |
|
702 | PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex); | |
710 | goto finally; |
|
703 | goto finally; | |
711 | } |
|
704 | } | |
712 |
|
705 | |||
713 | parity = chunkIndex % 2; |
|
706 | parity = chunkIndex % 2; | |
714 |
|
707 | |||
715 | /* This could definitely be abstracted to reduce code duplication. */ |
|
708 | /* This could definitely be abstracted to reduce code duplication. */ | |
716 | if (parity) { |
|
709 | if (parity) { | |
717 | /* Resize destination buffer to hold larger content. */ |
|
710 | /* Resize destination buffer to hold larger content. */ | |
718 | if (buffer2Size < frameParams.frameContentSize) { |
|
711 | if (buffer2Size < frameParams.frameContentSize) { | |
719 | buffer2Size = frameParams.frameContentSize; |
|
712 | buffer2Size = frameParams.frameContentSize; | |
720 | destBuffer = PyMem_Realloc(buffer2, buffer2Size); |
|
713 | destBuffer = PyMem_Realloc(buffer2, buffer2Size); | |
721 | if (!destBuffer) { |
|
714 | if (!destBuffer) { | |
722 | goto finally; |
|
715 | goto finally; | |
723 | } |
|
716 | } | |
724 | buffer2 = destBuffer; |
|
717 | buffer2 = destBuffer; | |
725 | } |
|
718 | } | |
726 |
|
719 | |||
727 | Py_BEGIN_ALLOW_THREADS |
|
720 | Py_BEGIN_ALLOW_THREADS | |
728 | zresult = ZSTD_decompress_usingDict(dctx, buffer2, buffer2Size, |
|
721 | zresult = ZSTD_decompress_usingDict(dctx, buffer2, buffer2Size, | |
729 | chunkData, chunkSize, buffer1, buffer1ContentSize); |
|
722 | chunkData, chunkSize, buffer1, buffer1ContentSize); | |
730 | Py_END_ALLOW_THREADS |
|
723 | Py_END_ALLOW_THREADS | |
731 | if (ZSTD_isError(zresult)) { |
|
724 | if (ZSTD_isError(zresult)) { | |
732 | PyErr_Format(ZstdError, "could not decompress chunk %zd: %s", |
|
725 | PyErr_Format(ZstdError, "could not decompress chunk %zd: %s", | |
733 | chunkIndex, ZSTD_getErrorName(zresult)); |
|
726 | chunkIndex, ZSTD_getErrorName(zresult)); | |
734 | goto finally; |
|
727 | goto finally; | |
735 | } |
|
728 | } | |
736 | buffer2ContentSize = zresult; |
|
729 | buffer2ContentSize = zresult; | |
737 | } |
|
730 | } | |
738 | else { |
|
731 | else { | |
739 | if (buffer1Size < frameParams.frameContentSize) { |
|
732 | if (buffer1Size < frameParams.frameContentSize) { | |
740 | buffer1Size = frameParams.frameContentSize; |
|
733 | buffer1Size = frameParams.frameContentSize; | |
741 | destBuffer = PyMem_Realloc(buffer1, buffer1Size); |
|
734 | destBuffer = PyMem_Realloc(buffer1, buffer1Size); | |
742 | if (!destBuffer) { |
|
735 | if (!destBuffer) { | |
743 | goto finally; |
|
736 | goto finally; | |
744 | } |
|
737 | } | |
745 | buffer1 = destBuffer; |
|
738 | buffer1 = destBuffer; | |
746 | } |
|
739 | } | |
747 |
|
740 | |||
748 | Py_BEGIN_ALLOW_THREADS |
|
741 | Py_BEGIN_ALLOW_THREADS | |
749 | zresult = ZSTD_decompress_usingDict(dctx, buffer1, buffer1Size, |
|
742 | zresult = ZSTD_decompress_usingDict(dctx, buffer1, buffer1Size, | |
750 | chunkData, chunkSize, buffer2, buffer2ContentSize); |
|
743 | chunkData, chunkSize, buffer2, buffer2ContentSize); | |
751 | Py_END_ALLOW_THREADS |
|
744 | Py_END_ALLOW_THREADS | |
752 | if (ZSTD_isError(zresult)) { |
|
745 | if (ZSTD_isError(zresult)) { | |
753 | PyErr_Format(ZstdError, "could not decompress chunk %zd: %s", |
|
746 | PyErr_Format(ZstdError, "could not decompress chunk %zd: %s", | |
754 | chunkIndex, ZSTD_getErrorName(zresult)); |
|
747 | chunkIndex, ZSTD_getErrorName(zresult)); | |
755 | goto finally; |
|
748 | goto finally; | |
756 | } |
|
749 | } | |
757 | buffer1ContentSize = zresult; |
|
750 | buffer1ContentSize = zresult; | |
758 | } |
|
751 | } | |
759 | } |
|
752 | } | |
760 |
|
753 | |||
761 | result = PyBytes_FromStringAndSize(parity ? buffer2 : buffer1, |
|
754 | result = PyBytes_FromStringAndSize(parity ? buffer2 : buffer1, | |
762 | parity ? buffer2ContentSize : buffer1ContentSize); |
|
755 | parity ? buffer2ContentSize : buffer1ContentSize); | |
763 |
|
756 | |||
764 | finally: |
|
757 | finally: | |
765 | if (buffer2) { |
|
758 | if (buffer2) { | |
766 | PyMem_Free(buffer2); |
|
759 | PyMem_Free(buffer2); | |
767 | } |
|
760 | } | |
768 | if (buffer1) { |
|
761 | if (buffer1) { | |
769 | PyMem_Free(buffer1); |
|
762 | PyMem_Free(buffer1); | |
770 | } |
|
763 | } | |
771 |
|
764 | |||
772 | if (dctx) { |
|
765 | if (dctx) { | |
773 | ZSTD_freeDCtx(dctx); |
|
766 | ZSTD_freeDCtx(dctx); | |
774 | } |
|
767 | } | |
775 |
|
768 | |||
776 | return result; |
|
769 | return result; | |
777 | } |
|
770 | } | |
778 |
|
771 | |||
|
772 | typedef struct { | |||
|
773 | void* sourceData; | |||
|
774 | size_t sourceSize; | |||
|
775 | unsigned long long destSize; | |||
|
776 | } FramePointer; | |||
|
777 | ||||
|
778 | typedef struct { | |||
|
779 | FramePointer* frames; | |||
|
780 | Py_ssize_t framesSize; | |||
|
781 | unsigned long long compressedSize; | |||
|
782 | } FrameSources; | |||
|
783 | ||||
|
784 | typedef struct { | |||
|
785 | void* dest; | |||
|
786 | Py_ssize_t destSize; | |||
|
787 | BufferSegment* segments; | |||
|
788 | Py_ssize_t segmentsSize; | |||
|
789 | } DestBuffer; | |||
|
790 | ||||
|
791 | typedef enum { | |||
|
792 | WorkerError_none = 0, | |||
|
793 | WorkerError_zstd = 1, | |||
|
794 | WorkerError_memory = 2, | |||
|
795 | WorkerError_sizeMismatch = 3, | |||
|
796 | WorkerError_unknownSize = 4, | |||
|
797 | } WorkerError; | |||
|
798 | ||||
|
799 | typedef struct { | |||
|
800 | /* Source records and length */ | |||
|
801 | FramePointer* framePointers; | |||
|
802 | /* Which records to process. */ | |||
|
803 | Py_ssize_t startOffset; | |||
|
804 | Py_ssize_t endOffset; | |||
|
805 | unsigned long long totalSourceSize; | |||
|
806 | ||||
|
807 | /* Compression state and settings. */ | |||
|
808 | ZSTD_DCtx* dctx; | |||
|
809 | ZSTD_DDict* ddict; | |||
|
810 | int requireOutputSizes; | |||
|
811 | ||||
|
812 | /* Output storage. */ | |||
|
813 | DestBuffer* destBuffers; | |||
|
814 | Py_ssize_t destCount; | |||
|
815 | ||||
|
816 | /* Item that error occurred on. */ | |||
|
817 | Py_ssize_t errorOffset; | |||
|
818 | /* If an error occurred. */ | |||
|
819 | WorkerError error; | |||
|
820 | /* result from zstd decompression operation */ | |||
|
821 | size_t zresult; | |||
|
822 | } WorkerState; | |||
|
823 | ||||
|
824 | static void decompress_worker(WorkerState* state) { | |||
|
825 | size_t allocationSize; | |||
|
826 | DestBuffer* destBuffer; | |||
|
827 | Py_ssize_t frameIndex; | |||
|
828 | Py_ssize_t localOffset = 0; | |||
|
829 | Py_ssize_t currentBufferStartIndex = state->startOffset; | |||
|
830 | Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1; | |||
|
831 | void* tmpBuf; | |||
|
832 | Py_ssize_t destOffset = 0; | |||
|
833 | FramePointer* framePointers = state->framePointers; | |||
|
834 | size_t zresult; | |||
|
835 | unsigned long long totalOutputSize = 0; | |||
|
836 | ||||
|
837 | assert(NULL == state->destBuffers); | |||
|
838 | assert(0 == state->destCount); | |||
|
839 | assert(state->endOffset - state->startOffset >= 0); | |||
|
840 | ||||
|
841 | /* | |||
|
842 | * We need to allocate a buffer to hold decompressed data. How we do this | |||
|
843 | * depends on what we know about the output. The following scenarios are | |||
|
844 | * possible: | |||
|
845 | * | |||
|
846 | * 1. All structs defining frames declare the output size. | |||
|
847 | * 2. The decompressed size is embedded within the zstd frame. | |||
|
848 | * 3. The decompressed size is not stored anywhere. | |||
|
849 | * | |||
|
850 | * For now, we only support #1 and #2. | |||
|
851 | */ | |||
|
852 | ||||
|
853 | /* Resolve ouput segments. */ | |||
|
854 | for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) { | |||
|
855 | FramePointer* fp = &framePointers[frameIndex]; | |||
|
856 | ||||
|
857 | if (0 == fp->destSize) { | |||
|
858 | fp->destSize = ZSTD_getDecompressedSize(fp->sourceData, fp->sourceSize); | |||
|
859 | if (0 == fp->destSize && state->requireOutputSizes) { | |||
|
860 | state->error = WorkerError_unknownSize; | |||
|
861 | state->errorOffset = frameIndex; | |||
|
862 | return; | |||
|
863 | } | |||
|
864 | } | |||
|
865 | ||||
|
866 | totalOutputSize += fp->destSize; | |||
|
867 | } | |||
|
868 | ||||
|
869 | state->destBuffers = calloc(1, sizeof(DestBuffer)); | |||
|
870 | if (NULL == state->destBuffers) { | |||
|
871 | state->error = WorkerError_memory; | |||
|
872 | return; | |||
|
873 | } | |||
|
874 | ||||
|
875 | state->destCount = 1; | |||
|
876 | ||||
|
877 | destBuffer = &state->destBuffers[state->destCount - 1]; | |||
|
878 | ||||
|
879 | assert(framePointers[state->startOffset].destSize > 0); /* For now. */ | |||
|
880 | ||||
|
881 | allocationSize = roundpow2(state->totalSourceSize); | |||
|
882 | ||||
|
883 | if (framePointers[state->startOffset].destSize > allocationSize) { | |||
|
884 | allocationSize = roundpow2(framePointers[state->startOffset].destSize); | |||
|
885 | } | |||
|
886 | ||||
|
887 | destBuffer->dest = malloc(allocationSize); | |||
|
888 | if (NULL == destBuffer->dest) { | |||
|
889 | state->error = WorkerError_memory; | |||
|
890 | return; | |||
|
891 | } | |||
|
892 | ||||
|
893 | destBuffer->destSize = allocationSize; | |||
|
894 | ||||
|
895 | destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment)); | |||
|
896 | if (NULL == destBuffer->segments) { | |||
|
897 | /* Caller will free state->dest as part of cleanup. */ | |||
|
898 | state->error = WorkerError_memory; | |||
|
899 | return; | |||
|
900 | } | |||
|
901 | ||||
|
902 | destBuffer->segmentsSize = remainingItems; | |||
|
903 | ||||
|
904 | for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) { | |||
|
905 | const void* source = framePointers[frameIndex].sourceData; | |||
|
906 | const size_t sourceSize = framePointers[frameIndex].sourceSize; | |||
|
907 | void* dest; | |||
|
908 | const size_t decompressedSize = framePointers[frameIndex].destSize; | |||
|
909 | size_t destAvailable = destBuffer->destSize - destOffset; | |||
|
910 | ||||
|
911 | assert(decompressedSize > 0); /* For now. */ | |||
|
912 | ||||
|
913 | /* | |||
|
914 | * Not enough space in current buffer. Finish current before and allocate and | |||
|
915 | * switch to a new one. | |||
|
916 | */ | |||
|
917 | if (decompressedSize > destAvailable) { | |||
|
918 | /* | |||
|
919 | * Shrinking the destination buffer is optional. But it should be cheap, | |||
|
920 | * so we just do it. | |||
|
921 | */ | |||
|
922 | if (destAvailable) { | |||
|
923 | tmpBuf = realloc(destBuffer->dest, destOffset); | |||
|
924 | if (NULL == tmpBuf) { | |||
|
925 | state->error = WorkerError_memory; | |||
|
926 | return; | |||
|
927 | } | |||
|
928 | ||||
|
929 | destBuffer->dest = tmpBuf; | |||
|
930 | destBuffer->destSize = destOffset; | |||
|
931 | } | |||
|
932 | ||||
|
933 | /* Truncate segments buffer. */ | |||
|
934 | tmpBuf = realloc(destBuffer->segments, | |||
|
935 | (frameIndex - currentBufferStartIndex) * sizeof(BufferSegment)); | |||
|
936 | if (NULL == tmpBuf) { | |||
|
937 | state->error = WorkerError_memory; | |||
|
938 | return; | |||
|
939 | } | |||
|
940 | ||||
|
941 | destBuffer->segments = tmpBuf; | |||
|
942 | destBuffer->segmentsSize = frameIndex - currentBufferStartIndex; | |||
|
943 | ||||
|
944 | /* Grow space for new DestBuffer. */ | |||
|
945 | tmpBuf = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer)); | |||
|
946 | if (NULL == tmpBuf) { | |||
|
947 | state->error = WorkerError_memory; | |||
|
948 | return; | |||
|
949 | } | |||
|
950 | ||||
|
951 | state->destBuffers = tmpBuf; | |||
|
952 | state->destCount++; | |||
|
953 | ||||
|
954 | destBuffer = &state->destBuffers[state->destCount - 1]; | |||
|
955 | ||||
|
956 | /* Don't take any chances will non-NULL pointers. */ | |||
|
957 | memset(destBuffer, 0, sizeof(DestBuffer)); | |||
|
958 | ||||
|
959 | allocationSize = roundpow2(state->totalSourceSize); | |||
|
960 | ||||
|
961 | if (decompressedSize > allocationSize) { | |||
|
962 | allocationSize = roundpow2(decompressedSize); | |||
|
963 | } | |||
|
964 | ||||
|
965 | destBuffer->dest = malloc(allocationSize); | |||
|
966 | if (NULL == destBuffer->dest) { | |||
|
967 | state->error = WorkerError_memory; | |||
|
968 | return; | |||
|
969 | } | |||
|
970 | ||||
|
971 | destBuffer->destSize = allocationSize; | |||
|
972 | destAvailable = allocationSize; | |||
|
973 | destOffset = 0; | |||
|
974 | localOffset = 0; | |||
|
975 | ||||
|
976 | destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment)); | |||
|
977 | if (NULL == destBuffer->segments) { | |||
|
978 | state->error = WorkerError_memory; | |||
|
979 | return; | |||
|
980 | } | |||
|
981 | ||||
|
982 | destBuffer->segmentsSize = remainingItems; | |||
|
983 | currentBufferStartIndex = frameIndex; | |||
|
984 | } | |||
|
985 | ||||
|
986 | dest = (char*)destBuffer->dest + destOffset; | |||
|
987 | ||||
|
988 | if (state->ddict) { | |||
|
989 | zresult = ZSTD_decompress_usingDDict(state->dctx, dest, decompressedSize, | |||
|
990 | source, sourceSize, state->ddict); | |||
|
991 | } | |||
|
992 | else { | |||
|
993 | zresult = ZSTD_decompressDCtx(state->dctx, dest, decompressedSize, | |||
|
994 | source, sourceSize); | |||
|
995 | } | |||
|
996 | ||||
|
997 | if (ZSTD_isError(zresult)) { | |||
|
998 | state->error = WorkerError_zstd; | |||
|
999 | state->zresult = zresult; | |||
|
1000 | state->errorOffset = frameIndex; | |||
|
1001 | return; | |||
|
1002 | } | |||
|
1003 | else if (zresult != decompressedSize) { | |||
|
1004 | state->error = WorkerError_sizeMismatch; | |||
|
1005 | state->zresult = zresult; | |||
|
1006 | state->errorOffset = frameIndex; | |||
|
1007 | return; | |||
|
1008 | } | |||
|
1009 | ||||
|
1010 | destBuffer->segments[localOffset].offset = destOffset; | |||
|
1011 | destBuffer->segments[localOffset].length = decompressedSize; | |||
|
1012 | destOffset += zresult; | |||
|
1013 | localOffset++; | |||
|
1014 | remainingItems--; | |||
|
1015 | } | |||
|
1016 | ||||
|
1017 | if (destBuffer->destSize > destOffset) { | |||
|
1018 | tmpBuf = realloc(destBuffer->dest, destOffset); | |||
|
1019 | if (NULL == tmpBuf) { | |||
|
1020 | state->error = WorkerError_memory; | |||
|
1021 | return; | |||
|
1022 | } | |||
|
1023 | ||||
|
1024 | destBuffer->dest = tmpBuf; | |||
|
1025 | destBuffer->destSize = destOffset; | |||
|
1026 | } | |||
|
1027 | } | |||
|
1028 | ||||
|
1029 | ZstdBufferWithSegmentsCollection* decompress_from_framesources(ZstdDecompressor* decompressor, FrameSources* frames, | |||
|
1030 | unsigned int threadCount) { | |||
|
1031 | void* dictData = NULL; | |||
|
1032 | size_t dictSize = 0; | |||
|
1033 | Py_ssize_t i = 0; | |||
|
1034 | int errored = 0; | |||
|
1035 | Py_ssize_t segmentsCount; | |||
|
1036 | ZstdBufferWithSegments* bws = NULL; | |||
|
1037 | PyObject* resultArg = NULL; | |||
|
1038 | Py_ssize_t resultIndex; | |||
|
1039 | ZstdBufferWithSegmentsCollection* result = NULL; | |||
|
1040 | FramePointer* framePointers = frames->frames; | |||
|
1041 | unsigned long long workerBytes = 0; | |||
|
1042 | int currentThread = 0; | |||
|
1043 | Py_ssize_t workerStartOffset = 0; | |||
|
1044 | POOL_ctx* pool = NULL; | |||
|
1045 | WorkerState* workerStates = NULL; | |||
|
1046 | unsigned long long bytesPerWorker; | |||
|
1047 | ||||
|
1048 | /* Caller should normalize 0 and negative values to 1 or larger. */ | |||
|
1049 | assert(threadCount >= 1); | |||
|
1050 | ||||
|
1051 | /* More threads than inputs makes no sense under any conditions. */ | |||
|
1052 | threadCount = frames->framesSize < threadCount ? (unsigned int)frames->framesSize | |||
|
1053 | : threadCount; | |||
|
1054 | ||||
|
1055 | /* TODO lower thread count if input size is too small and threads would just | |||
|
1056 | add overhead. */ | |||
|
1057 | ||||
|
1058 | if (decompressor->dict) { | |||
|
1059 | dictData = decompressor->dict->dictData; | |||
|
1060 | dictSize = decompressor->dict->dictSize; | |||
|
1061 | } | |||
|
1062 | ||||
|
1063 | if (dictData && !decompressor->ddict) { | |||
|
1064 | Py_BEGIN_ALLOW_THREADS | |||
|
1065 | decompressor->ddict = ZSTD_createDDict_byReference(dictData, dictSize); | |||
|
1066 | Py_END_ALLOW_THREADS | |||
|
1067 | ||||
|
1068 | if (!decompressor->ddict) { | |||
|
1069 | PyErr_SetString(ZstdError, "could not create decompression dict"); | |||
|
1070 | return NULL; | |||
|
1071 | } | |||
|
1072 | } | |||
|
1073 | ||||
|
1074 | /* If threadCount==1, we don't start a thread pool. But we do leverage the | |||
|
1075 | same API for dispatching work. */ | |||
|
1076 | workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState)); | |||
|
1077 | if (NULL == workerStates) { | |||
|
1078 | PyErr_NoMemory(); | |||
|
1079 | goto finally; | |||
|
1080 | } | |||
|
1081 | ||||
|
1082 | memset(workerStates, 0, threadCount * sizeof(WorkerState)); | |||
|
1083 | ||||
|
1084 | if (threadCount > 1) { | |||
|
1085 | pool = POOL_create(threadCount, 1); | |||
|
1086 | if (NULL == pool) { | |||
|
1087 | PyErr_SetString(ZstdError, "could not initialize zstd thread pool"); | |||
|
1088 | goto finally; | |||
|
1089 | } | |||
|
1090 | } | |||
|
1091 | ||||
|
1092 | bytesPerWorker = frames->compressedSize / threadCount; | |||
|
1093 | ||||
|
1094 | for (i = 0; i < threadCount; i++) { | |||
|
1095 | workerStates[i].dctx = ZSTD_createDCtx(); | |||
|
1096 | if (NULL == workerStates[i].dctx) { | |||
|
1097 | PyErr_NoMemory(); | |||
|
1098 | goto finally; | |||
|
1099 | } | |||
|
1100 | ||||
|
1101 | ZSTD_copyDCtx(workerStates[i].dctx, decompressor->dctx); | |||
|
1102 | ||||
|
1103 | workerStates[i].ddict = decompressor->ddict; | |||
|
1104 | workerStates[i].framePointers = framePointers; | |||
|
1105 | workerStates[i].requireOutputSizes = 1; | |||
|
1106 | } | |||
|
1107 | ||||
|
1108 | Py_BEGIN_ALLOW_THREADS | |||
|
1109 | /* There are many ways to split work among workers. | |||
|
1110 | ||||
|
1111 | For now, we take a simple approach of splitting work so each worker | |||
|
1112 | gets roughly the same number of input bytes. This will result in more | |||
|
1113 | starvation than running N>threadCount jobs. But it avoids complications | |||
|
1114 | around state tracking, which could involve extra locking. | |||
|
1115 | */ | |||
|
1116 | for (i = 0; i < frames->framesSize; i++) { | |||
|
1117 | workerBytes += frames->frames[i].sourceSize; | |||
|
1118 | ||||
|
1119 | /* | |||
|
1120 | * The last worker/thread needs to handle all remaining work. Don't | |||
|
1121 | * trigger it prematurely. Defer to the block outside of the loop. | |||
|
1122 | * (But still process this loop so workerBytes is correct. | |||
|
1123 | */ | |||
|
1124 | if (currentThread == threadCount - 1) { | |||
|
1125 | continue; | |||
|
1126 | } | |||
|
1127 | ||||
|
1128 | if (workerBytes >= bytesPerWorker) { | |||
|
1129 | workerStates[currentThread].startOffset = workerStartOffset; | |||
|
1130 | workerStates[currentThread].endOffset = i; | |||
|
1131 | workerStates[currentThread].totalSourceSize = workerBytes; | |||
|
1132 | ||||
|
1133 | if (threadCount > 1) { | |||
|
1134 | POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]); | |||
|
1135 | } | |||
|
1136 | else { | |||
|
1137 | decompress_worker(&workerStates[currentThread]); | |||
|
1138 | } | |||
|
1139 | currentThread++; | |||
|
1140 | workerStartOffset = i + 1; | |||
|
1141 | workerBytes = 0; | |||
|
1142 | } | |||
|
1143 | } | |||
|
1144 | ||||
|
1145 | if (workerBytes) { | |||
|
1146 | workerStates[currentThread].startOffset = workerStartOffset; | |||
|
1147 | workerStates[currentThread].endOffset = frames->framesSize - 1; | |||
|
1148 | workerStates[currentThread].totalSourceSize = workerBytes; | |||
|
1149 | ||||
|
1150 | if (threadCount > 1) { | |||
|
1151 | POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]); | |||
|
1152 | } | |||
|
1153 | else { | |||
|
1154 | decompress_worker(&workerStates[currentThread]); | |||
|
1155 | } | |||
|
1156 | } | |||
|
1157 | ||||
|
1158 | if (threadCount > 1) { | |||
|
1159 | POOL_free(pool); | |||
|
1160 | pool = NULL; | |||
|
1161 | } | |||
|
1162 | Py_END_ALLOW_THREADS | |||
|
1163 | ||||
|
1164 | for (i = 0; i < threadCount; i++) { | |||
|
1165 | switch (workerStates[i].error) { | |||
|
1166 | case WorkerError_none: | |||
|
1167 | break; | |||
|
1168 | ||||
|
1169 | case WorkerError_zstd: | |||
|
1170 | PyErr_Format(ZstdError, "error decompressing item %zd: %s", | |||
|
1171 | workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult)); | |||
|
1172 | errored = 1; | |||
|
1173 | break; | |||
|
1174 | ||||
|
1175 | case WorkerError_memory: | |||
|
1176 | PyErr_NoMemory(); | |||
|
1177 | errored = 1; | |||
|
1178 | break; | |||
|
1179 | ||||
|
1180 | case WorkerError_sizeMismatch: | |||
|
1181 | PyErr_Format(ZstdError, "error decompressing item %zd: decompressed %zu bytes; expected %llu", | |||
|
1182 | workerStates[i].errorOffset, workerStates[i].zresult, | |||
|
1183 | framePointers[workerStates[i].errorOffset].destSize); | |||
|
1184 | errored = 1; | |||
|
1185 | break; | |||
|
1186 | ||||
|
1187 | case WorkerError_unknownSize: | |||
|
1188 | PyErr_Format(PyExc_ValueError, "could not determine decompressed size of item %zd", | |||
|
1189 | workerStates[i].errorOffset); | |||
|
1190 | errored = 1; | |||
|
1191 | break; | |||
|
1192 | ||||
|
1193 | default: | |||
|
1194 | PyErr_Format(ZstdError, "unhandled error type: %d; this is a bug", | |||
|
1195 | workerStates[i].error); | |||
|
1196 | errored = 1; | |||
|
1197 | break; | |||
|
1198 | } | |||
|
1199 | ||||
|
1200 | if (errored) { | |||
|
1201 | break; | |||
|
1202 | } | |||
|
1203 | } | |||
|
1204 | ||||
|
1205 | if (errored) { | |||
|
1206 | goto finally; | |||
|
1207 | } | |||
|
1208 | ||||
|
1209 | segmentsCount = 0; | |||
|
1210 | for (i = 0; i < threadCount; i++) { | |||
|
1211 | segmentsCount += workerStates[i].destCount; | |||
|
1212 | } | |||
|
1213 | ||||
|
1214 | resultArg = PyTuple_New(segmentsCount); | |||
|
1215 | if (NULL == resultArg) { | |||
|
1216 | goto finally; | |||
|
1217 | } | |||
|
1218 | ||||
|
1219 | resultIndex = 0; | |||
|
1220 | ||||
|
1221 | for (i = 0; i < threadCount; i++) { | |||
|
1222 | Py_ssize_t bufferIndex; | |||
|
1223 | WorkerState* state = &workerStates[i]; | |||
|
1224 | ||||
|
1225 | for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) { | |||
|
1226 | DestBuffer* destBuffer = &state->destBuffers[bufferIndex]; | |||
|
1227 | ||||
|
1228 | bws = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize, | |||
|
1229 | destBuffer->segments, destBuffer->segmentsSize); | |||
|
1230 | if (NULL == bws) { | |||
|
1231 | goto finally; | |||
|
1232 | } | |||
|
1233 | ||||
|
1234 | /* | |||
|
1235 | * Memory for buffer and segments was allocated using malloc() in worker | |||
|
1236 | * and the memory is transferred to the BufferWithSegments instance. So | |||
|
1237 | * tell instance to use free() and NULL the reference in the state struct | |||
|
1238 | * so it isn't freed below. | |||
|
1239 | */ | |||
|
1240 | bws->useFree = 1; | |||
|
1241 | destBuffer->dest = NULL; | |||
|
1242 | destBuffer->segments = NULL; | |||
|
1243 | ||||
|
1244 | PyTuple_SET_ITEM(resultArg, resultIndex++, (PyObject*)bws); | |||
|
1245 | } | |||
|
1246 | } | |||
|
1247 | ||||
|
1248 | result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject( | |||
|
1249 | (PyObject*)&ZstdBufferWithSegmentsCollectionType, resultArg); | |||
|
1250 | ||||
|
1251 | finally: | |||
|
1252 | Py_CLEAR(resultArg); | |||
|
1253 | ||||
|
1254 | if (workerStates) { | |||
|
1255 | for (i = 0; i < threadCount; i++) { | |||
|
1256 | Py_ssize_t bufferIndex; | |||
|
1257 | WorkerState* state = &workerStates[i]; | |||
|
1258 | ||||
|
1259 | if (state->dctx) { | |||
|
1260 | ZSTD_freeDCtx(state->dctx); | |||
|
1261 | } | |||
|
1262 | ||||
|
1263 | for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) { | |||
|
1264 | if (state->destBuffers) { | |||
|
1265 | /* | |||
|
1266 | * Will be NULL if memory transfered to a BufferWithSegments. | |||
|
1267 | * Otherwise it is left over after an error occurred. | |||
|
1268 | */ | |||
|
1269 | free(state->destBuffers[bufferIndex].dest); | |||
|
1270 | free(state->destBuffers[bufferIndex].segments); | |||
|
1271 | } | |||
|
1272 | } | |||
|
1273 | ||||
|
1274 | free(state->destBuffers); | |||
|
1275 | } | |||
|
1276 | ||||
|
1277 | PyMem_Free(workerStates); | |||
|
1278 | } | |||
|
1279 | ||||
|
1280 | POOL_free(pool); | |||
|
1281 | ||||
|
1282 | return result; | |||
|
1283 | } | |||
|
1284 | ||||
|
1285 | PyDoc_STRVAR(Decompressor_multi_decompress_to_buffer__doc__, | |||
|
1286 | "Decompress multiple frames to output buffers\n" | |||
|
1287 | "\n" | |||
|
1288 | "Receives a ``BufferWithSegments``, a ``BufferWithSegmentsCollection`` or a\n" | |||
|
1289 | "list of bytes-like objects. Each item in the passed collection should be a\n" | |||
|
1290 | "compressed zstd frame.\n" | |||
|
1291 | "\n" | |||
|
1292 | "Unless ``decompressed_sizes`` is specified, the content size *must* be\n" | |||
|
1293 | "written into the zstd frame header. If ``decompressed_sizes`` is specified,\n" | |||
|
1294 | "it is an object conforming to the buffer protocol that represents an array\n" | |||
|
1295 | "of 64-bit unsigned integers in the machine's native format. Specifying\n" | |||
|
1296 | "``decompressed_sizes`` avoids a pre-scan of each frame to determine its\n" | |||
|
1297 | "output size.\n" | |||
|
1298 | "\n" | |||
|
1299 | "Returns a ``BufferWithSegmentsCollection`` containing the decompressed\n" | |||
|
1300 | "data. All decompressed data is allocated in a single memory buffer. The\n" | |||
|
1301 | "``BufferWithSegments`` instance tracks which objects are at which offsets\n" | |||
|
1302 | "and their respective lengths.\n" | |||
|
1303 | "\n" | |||
|
1304 | "The ``threads`` argument controls how many threads to use for operations.\n" | |||
|
1305 | "Negative values will use the same number of threads as logical CPUs on the\n" | |||
|
1306 | "machine.\n" | |||
|
1307 | ); | |||
|
1308 | ||||
|
1309 | static ZstdBufferWithSegmentsCollection* Decompressor_multi_decompress_to_buffer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) { | |||
|
1310 | static char* kwlist[] = { | |||
|
1311 | "frames", | |||
|
1312 | "decompressed_sizes", | |||
|
1313 | "threads", | |||
|
1314 | NULL | |||
|
1315 | }; | |||
|
1316 | ||||
|
1317 | PyObject* frames; | |||
|
1318 | Py_buffer frameSizes; | |||
|
1319 | int threads = 0; | |||
|
1320 | Py_ssize_t frameCount; | |||
|
1321 | Py_buffer* frameBuffers = NULL; | |||
|
1322 | FramePointer* framePointers = NULL; | |||
|
1323 | unsigned long long* frameSizesP = NULL; | |||
|
1324 | unsigned long long totalInputSize = 0; | |||
|
1325 | FrameSources frameSources; | |||
|
1326 | ZstdBufferWithSegmentsCollection* result = NULL; | |||
|
1327 | Py_ssize_t i; | |||
|
1328 | ||||
|
1329 | memset(&frameSizes, 0, sizeof(frameSizes)); | |||
|
1330 | ||||
|
1331 | #if PY_MAJOR_VERSION >= 3 | |||
|
1332 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|y*i:multi_decompress_to_buffer", | |||
|
1333 | #else | |||
|
1334 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s*i:multi_decompress_to_buffer", | |||
|
1335 | #endif | |||
|
1336 | kwlist, &frames, &frameSizes, &threads)) { | |||
|
1337 | return NULL; | |||
|
1338 | } | |||
|
1339 | ||||
|
1340 | if (frameSizes.buf) { | |||
|
1341 | if (!PyBuffer_IsContiguous(&frameSizes, 'C') || frameSizes.ndim > 1) { | |||
|
1342 | PyErr_SetString(PyExc_ValueError, "decompressed_sizes buffer should be contiguous and have a single dimension"); | |||
|
1343 | goto finally; | |||
|
1344 | } | |||
|
1345 | ||||
|
1346 | frameSizesP = (unsigned long long*)frameSizes.buf; | |||
|
1347 | } | |||
|
1348 | ||||
|
1349 | if (threads < 0) { | |||
|
1350 | threads = cpu_count(); | |||
|
1351 | } | |||
|
1352 | ||||
|
1353 | if (threads < 2) { | |||
|
1354 | threads = 1; | |||
|
1355 | } | |||
|
1356 | ||||
|
1357 | if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsType)) { | |||
|
1358 | ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)frames; | |||
|
1359 | frameCount = buffer->segmentCount; | |||
|
1360 | ||||
|
1361 | if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) { | |||
|
1362 | PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd", | |||
|
1363 | frameCount * sizeof(unsigned long long), frameSizes.len); | |||
|
1364 | goto finally; | |||
|
1365 | } | |||
|
1366 | ||||
|
1367 | framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer)); | |||
|
1368 | if (!framePointers) { | |||
|
1369 | PyErr_NoMemory(); | |||
|
1370 | goto finally; | |||
|
1371 | } | |||
|
1372 | ||||
|
1373 | for (i = 0; i < frameCount; i++) { | |||
|
1374 | void* sourceData; | |||
|
1375 | unsigned long long sourceSize; | |||
|
1376 | unsigned long long decompressedSize = 0; | |||
|
1377 | ||||
|
1378 | if (buffer->segments[i].offset + buffer->segments[i].length > buffer->dataSize) { | |||
|
1379 | PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area", i); | |||
|
1380 | goto finally; | |||
|
1381 | } | |||
|
1382 | ||||
|
1383 | sourceData = (char*)buffer->data + buffer->segments[i].offset; | |||
|
1384 | sourceSize = buffer->segments[i].length; | |||
|
1385 | totalInputSize += sourceSize; | |||
|
1386 | ||||
|
1387 | if (frameSizesP) { | |||
|
1388 | decompressedSize = frameSizesP[i]; | |||
|
1389 | } | |||
|
1390 | ||||
|
1391 | framePointers[i].sourceData = sourceData; | |||
|
1392 | framePointers[i].sourceSize = sourceSize; | |||
|
1393 | framePointers[i].destSize = decompressedSize; | |||
|
1394 | } | |||
|
1395 | } | |||
|
1396 | else if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsCollectionType)) { | |||
|
1397 | Py_ssize_t offset = 0; | |||
|
1398 | ZstdBufferWithSegments* buffer; | |||
|
1399 | ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)frames; | |||
|
1400 | ||||
|
1401 | frameCount = BufferWithSegmentsCollection_length(collection); | |||
|
1402 | ||||
|
1403 | if (frameSizes.buf && frameSizes.len != frameCount) { | |||
|
1404 | PyErr_Format(PyExc_ValueError, | |||
|
1405 | "decompressed_sizes size mismatch; expected %zd; got %zd", | |||
|
1406 | frameCount * sizeof(unsigned long long), frameSizes.len); | |||
|
1407 | goto finally; | |||
|
1408 | } | |||
|
1409 | ||||
|
1410 | framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer)); | |||
|
1411 | if (NULL == framePointers) { | |||
|
1412 | PyErr_NoMemory(); | |||
|
1413 | goto finally; | |||
|
1414 | } | |||
|
1415 | ||||
|
1416 | /* Iterate the data structure directly because it is faster. */ | |||
|
1417 | for (i = 0; i < collection->bufferCount; i++) { | |||
|
1418 | Py_ssize_t segmentIndex; | |||
|
1419 | buffer = collection->buffers[i]; | |||
|
1420 | ||||
|
1421 | for (segmentIndex = 0; segmentIndex < buffer->segmentCount; segmentIndex++) { | |||
|
1422 | if (buffer->segments[segmentIndex].offset + buffer->segments[segmentIndex].length > buffer->dataSize) { | |||
|
1423 | PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area", | |||
|
1424 | offset); | |||
|
1425 | goto finally; | |||
|
1426 | } | |||
|
1427 | ||||
|
1428 | totalInputSize += buffer->segments[segmentIndex].length; | |||
|
1429 | ||||
|
1430 | framePointers[offset].sourceData = (char*)buffer->data + buffer->segments[segmentIndex].offset; | |||
|
1431 | framePointers[offset].sourceSize = buffer->segments[segmentIndex].length; | |||
|
1432 | framePointers[offset].destSize = frameSizesP ? frameSizesP[offset] : 0; | |||
|
1433 | ||||
|
1434 | offset++; | |||
|
1435 | } | |||
|
1436 | } | |||
|
1437 | } | |||
|
1438 | else if (PyList_Check(frames)) { | |||
|
1439 | frameCount = PyList_GET_SIZE(frames); | |||
|
1440 | ||||
|
1441 | if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) { | |||
|
1442 | PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd", | |||
|
1443 | frameCount * sizeof(unsigned long long), frameSizes.len); | |||
|
1444 | goto finally; | |||
|
1445 | } | |||
|
1446 | ||||
|
1447 | framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer)); | |||
|
1448 | if (!framePointers) { | |||
|
1449 | PyErr_NoMemory(); | |||
|
1450 | goto finally; | |||
|
1451 | } | |||
|
1452 | ||||
|
1453 | /* | |||
|
1454 | * It is not clear whether Py_buffer.buf is still valid after | |||
|
1455 | * PyBuffer_Release. So, we hold a reference to all Py_buffer instances | |||
|
1456 | * for the duration of the operation. | |||
|
1457 | */ | |||
|
1458 | frameBuffers = PyMem_Malloc(frameCount * sizeof(Py_buffer)); | |||
|
1459 | if (NULL == frameBuffers) { | |||
|
1460 | PyErr_NoMemory(); | |||
|
1461 | goto finally; | |||
|
1462 | } | |||
|
1463 | ||||
|
1464 | memset(frameBuffers, 0, frameCount * sizeof(Py_buffer)); | |||
|
1465 | ||||
|
1466 | /* Do a pass to assemble info about our input buffers and output sizes. */ | |||
|
1467 | for (i = 0; i < frameCount; i++) { | |||
|
1468 | if (0 != PyObject_GetBuffer(PyList_GET_ITEM(frames, i), | |||
|
1469 | &frameBuffers[i], PyBUF_CONTIG_RO)) { | |||
|
1470 | PyErr_Clear(); | |||
|
1471 | PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i); | |||
|
1472 | goto finally; | |||
|
1473 | } | |||
|
1474 | ||||
|
1475 | totalInputSize += frameBuffers[i].len; | |||
|
1476 | ||||
|
1477 | framePointers[i].sourceData = frameBuffers[i].buf; | |||
|
1478 | framePointers[i].sourceSize = frameBuffers[i].len; | |||
|
1479 | framePointers[i].destSize = frameSizesP ? frameSizesP[i] : 0; | |||
|
1480 | } | |||
|
1481 | } | |||
|
1482 | else { | |||
|
1483 | PyErr_SetString(PyExc_TypeError, "argument must be list or BufferWithSegments"); | |||
|
1484 | goto finally; | |||
|
1485 | } | |||
|
1486 | ||||
|
1487 | /* We now have an array with info about our inputs and outputs. Feed it into | |||
|
1488 | our generic decompression function. */ | |||
|
1489 | frameSources.frames = framePointers; | |||
|
1490 | frameSources.framesSize = frameCount; | |||
|
1491 | frameSources.compressedSize = totalInputSize; | |||
|
1492 | ||||
|
1493 | result = decompress_from_framesources(self, &frameSources, threads); | |||
|
1494 | ||||
|
1495 | finally: | |||
|
1496 | if (frameSizes.buf) { | |||
|
1497 | PyBuffer_Release(&frameSizes); | |||
|
1498 | } | |||
|
1499 | PyMem_Free(framePointers); | |||
|
1500 | ||||
|
1501 | if (frameBuffers) { | |||
|
1502 | for (i = 0; i < frameCount; i++) { | |||
|
1503 | PyBuffer_Release(&frameBuffers[i]); | |||
|
1504 | } | |||
|
1505 | ||||
|
1506 | PyMem_Free(frameBuffers); | |||
|
1507 | } | |||
|
1508 | ||||
|
1509 | return result; | |||
|
1510 | } | |||
|
1511 | ||||
779 | static PyMethodDef Decompressor_methods[] = { |
|
1512 | static PyMethodDef Decompressor_methods[] = { | |
780 | { "copy_stream", (PyCFunction)Decompressor_copy_stream, METH_VARARGS | METH_KEYWORDS, |
|
1513 | { "copy_stream", (PyCFunction)Decompressor_copy_stream, METH_VARARGS | METH_KEYWORDS, | |
781 | Decompressor_copy_stream__doc__ }, |
|
1514 | Decompressor_copy_stream__doc__ }, | |
782 | { "decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS | METH_KEYWORDS, |
|
1515 | { "decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS | METH_KEYWORDS, | |
783 | Decompressor_decompress__doc__ }, |
|
1516 | Decompressor_decompress__doc__ }, | |
784 | { "decompressobj", (PyCFunction)Decompressor_decompressobj, METH_NOARGS, |
|
1517 | { "decompressobj", (PyCFunction)Decompressor_decompressobj, METH_NOARGS, | |
785 | Decompressor_decompressobj__doc__ }, |
|
1518 | Decompressor_decompressobj__doc__ }, | |
786 | { "read_from", (PyCFunction)Decompressor_read_from, METH_VARARGS | METH_KEYWORDS, |
|
1519 | { "read_from", (PyCFunction)Decompressor_read_from, METH_VARARGS | METH_KEYWORDS, | |
787 | Decompressor_read_from__doc__ }, |
|
1520 | Decompressor_read_from__doc__ }, | |
788 | { "write_to", (PyCFunction)Decompressor_write_to, METH_VARARGS | METH_KEYWORDS, |
|
1521 | { "write_to", (PyCFunction)Decompressor_write_to, METH_VARARGS | METH_KEYWORDS, | |
789 | Decompressor_write_to__doc__ }, |
|
1522 | Decompressor_write_to__doc__ }, | |
790 | { "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain, |
|
1523 | { "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain, | |
791 | METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ }, |
|
1524 | METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ }, | |
|
1525 | { "multi_decompress_to_buffer", (PyCFunction)Decompressor_multi_decompress_to_buffer, | |||
|
1526 | METH_VARARGS | METH_KEYWORDS, Decompressor_multi_decompress_to_buffer__doc__ }, | |||
792 | { NULL, NULL } |
|
1527 | { NULL, NULL } | |
793 | }; |
|
1528 | }; | |
794 |
|
1529 | |||
795 | PyTypeObject ZstdDecompressorType = { |
|
1530 | PyTypeObject ZstdDecompressorType = { | |
796 | PyVarObject_HEAD_INIT(NULL, 0) |
|
1531 | PyVarObject_HEAD_INIT(NULL, 0) | |
797 | "zstd.ZstdDecompressor", /* tp_name */ |
|
1532 | "zstd.ZstdDecompressor", /* tp_name */ | |
798 | sizeof(ZstdDecompressor), /* tp_basicsize */ |
|
1533 | sizeof(ZstdDecompressor), /* tp_basicsize */ | |
799 | 0, /* tp_itemsize */ |
|
1534 | 0, /* tp_itemsize */ | |
800 | (destructor)Decompressor_dealloc, /* tp_dealloc */ |
|
1535 | (destructor)Decompressor_dealloc, /* tp_dealloc */ | |
801 | 0, /* tp_print */ |
|
1536 | 0, /* tp_print */ | |
802 | 0, /* tp_getattr */ |
|
1537 | 0, /* tp_getattr */ | |
803 | 0, /* tp_setattr */ |
|
1538 | 0, /* tp_setattr */ | |
804 | 0, /* tp_compare */ |
|
1539 | 0, /* tp_compare */ | |
805 | 0, /* tp_repr */ |
|
1540 | 0, /* tp_repr */ | |
806 | 0, /* tp_as_number */ |
|
1541 | 0, /* tp_as_number */ | |
807 | 0, /* tp_as_sequence */ |
|
1542 | 0, /* tp_as_sequence */ | |
808 | 0, /* tp_as_mapping */ |
|
1543 | 0, /* tp_as_mapping */ | |
809 | 0, /* tp_hash */ |
|
1544 | 0, /* tp_hash */ | |
810 | 0, /* tp_call */ |
|
1545 | 0, /* tp_call */ | |
811 | 0, /* tp_str */ |
|
1546 | 0, /* tp_str */ | |
812 | 0, /* tp_getattro */ |
|
1547 | 0, /* tp_getattro */ | |
813 | 0, /* tp_setattro */ |
|
1548 | 0, /* tp_setattro */ | |
814 | 0, /* tp_as_buffer */ |
|
1549 | 0, /* tp_as_buffer */ | |
815 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
1550 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
816 | Decompressor__doc__, /* tp_doc */ |
|
1551 | Decompressor__doc__, /* tp_doc */ | |
817 | 0, /* tp_traverse */ |
|
1552 | 0, /* tp_traverse */ | |
818 | 0, /* tp_clear */ |
|
1553 | 0, /* tp_clear */ | |
819 | 0, /* tp_richcompare */ |
|
1554 | 0, /* tp_richcompare */ | |
820 | 0, /* tp_weaklistoffset */ |
|
1555 | 0, /* tp_weaklistoffset */ | |
821 | 0, /* tp_iter */ |
|
1556 | 0, /* tp_iter */ | |
822 | 0, /* tp_iternext */ |
|
1557 | 0, /* tp_iternext */ | |
823 | Decompressor_methods, /* tp_methods */ |
|
1558 | Decompressor_methods, /* tp_methods */ | |
824 | 0, /* tp_members */ |
|
1559 | 0, /* tp_members */ | |
825 | 0, /* tp_getset */ |
|
1560 | 0, /* tp_getset */ | |
826 | 0, /* tp_base */ |
|
1561 | 0, /* tp_base */ | |
827 | 0, /* tp_dict */ |
|
1562 | 0, /* tp_dict */ | |
828 | 0, /* tp_descr_get */ |
|
1563 | 0, /* tp_descr_get */ | |
829 | 0, /* tp_descr_set */ |
|
1564 | 0, /* tp_descr_set */ | |
830 | 0, /* tp_dictoffset */ |
|
1565 | 0, /* tp_dictoffset */ | |
831 | (initproc)Decompressor_init, /* tp_init */ |
|
1566 | (initproc)Decompressor_init, /* tp_init */ | |
832 | 0, /* tp_alloc */ |
|
1567 | 0, /* tp_alloc */ | |
833 | PyType_GenericNew, /* tp_new */ |
|
1568 | PyType_GenericNew, /* tp_new */ | |
834 | }; |
|
1569 | }; | |
835 |
|
1570 | |||
836 | void decompressor_module_init(PyObject* mod) { |
|
1571 | void decompressor_module_init(PyObject* mod) { | |
837 | Py_TYPE(&ZstdDecompressorType) = &PyType_Type; |
|
1572 | Py_TYPE(&ZstdDecompressorType) = &PyType_Type; | |
838 | if (PyType_Ready(&ZstdDecompressorType) < 0) { |
|
1573 | if (PyType_Ready(&ZstdDecompressorType) < 0) { | |
839 | return; |
|
1574 | return; | |
840 | } |
|
1575 | } | |
841 |
|
1576 | |||
842 | Py_INCREF((PyObject*)&ZstdDecompressorType); |
|
1577 | Py_INCREF((PyObject*)&ZstdDecompressorType); | |
843 | PyModule_AddObject(mod, "ZstdDecompressor", |
|
1578 | PyModule_AddObject(mod, "ZstdDecompressor", | |
844 | (PyObject*)&ZstdDecompressorType); |
|
1579 | (PyObject*)&ZstdDecompressorType); | |
845 | } |
|
1580 | } |
@@ -1,254 +1,251 | |||||
1 | /** |
|
1 | /** | |
2 | * Copyright (c) 2016-present, Gregory Szorc |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
3 | * All rights reserved. |
|
3 | * All rights reserved. | |
4 | * |
|
4 | * | |
5 | * This software may be modified and distributed under the terms |
|
5 | * This software may be modified and distributed under the terms | |
6 | * of the BSD license. See the LICENSE file for details. |
|
6 | * of the BSD license. See the LICENSE file for details. | |
7 | */ |
|
7 | */ | |
8 |
|
8 | |||
9 | #include "python-zstandard.h" |
|
9 | #include "python-zstandard.h" | |
10 |
|
10 | |||
11 | #define min(a, b) (((a) < (b)) ? (a) : (b)) |
|
11 | #define min(a, b) (((a) < (b)) ? (a) : (b)) | |
12 |
|
12 | |||
13 | extern PyObject* ZstdError; |
|
13 | extern PyObject* ZstdError; | |
14 |
|
14 | |||
15 | PyDoc_STRVAR(ZstdDecompressorIterator__doc__, |
|
15 | PyDoc_STRVAR(ZstdDecompressorIterator__doc__, | |
16 | "Represents an iterator of decompressed data.\n" |
|
16 | "Represents an iterator of decompressed data.\n" | |
17 | ); |
|
17 | ); | |
18 |
|
18 | |||
19 | static void ZstdDecompressorIterator_dealloc(ZstdDecompressorIterator* self) { |
|
19 | static void ZstdDecompressorIterator_dealloc(ZstdDecompressorIterator* self) { | |
20 | Py_XDECREF(self->decompressor); |
|
20 | Py_XDECREF(self->decompressor); | |
21 | Py_XDECREF(self->reader); |
|
21 | Py_XDECREF(self->reader); | |
22 |
|
22 | |||
23 | if (self->buffer) { |
|
23 | if (self->buffer) { | |
24 | PyBuffer_Release(self->buffer); |
|
24 | PyBuffer_Release(self->buffer); | |
25 | PyMem_FREE(self->buffer); |
|
25 | PyMem_FREE(self->buffer); | |
26 | self->buffer = NULL; |
|
26 | self->buffer = NULL; | |
27 | } |
|
27 | } | |
28 |
|
28 | |||
29 | if (self->dstream) { |
|
|||
30 | ZSTD_freeDStream(self->dstream); |
|
|||
31 | self->dstream = NULL; |
|
|||
32 | } |
|
|||
33 |
|
||||
34 | if (self->input.src) { |
|
29 | if (self->input.src) { | |
35 | PyMem_Free((void*)self->input.src); |
|
30 | PyMem_Free((void*)self->input.src); | |
36 | self->input.src = NULL; |
|
31 | self->input.src = NULL; | |
37 | } |
|
32 | } | |
38 |
|
33 | |||
39 | PyObject_Del(self); |
|
34 | PyObject_Del(self); | |
40 | } |
|
35 | } | |
41 |
|
36 | |||
42 | static PyObject* ZstdDecompressorIterator_iter(PyObject* self) { |
|
37 | static PyObject* ZstdDecompressorIterator_iter(PyObject* self) { | |
43 | Py_INCREF(self); |
|
38 | Py_INCREF(self); | |
44 | return self; |
|
39 | return self; | |
45 | } |
|
40 | } | |
46 |
|
41 | |||
47 | static DecompressorIteratorResult read_decompressor_iterator(ZstdDecompressorIterator* self) { |
|
42 | static DecompressorIteratorResult read_decompressor_iterator(ZstdDecompressorIterator* self) { | |
48 | size_t zresult; |
|
43 | size_t zresult; | |
49 | PyObject* chunk; |
|
44 | PyObject* chunk; | |
50 | DecompressorIteratorResult result; |
|
45 | DecompressorIteratorResult result; | |
51 | size_t oldInputPos = self->input.pos; |
|
46 | size_t oldInputPos = self->input.pos; | |
52 |
|
47 | |||
|
48 | assert(self->decompressor->dstream); | |||
|
49 | ||||
53 | result.chunk = NULL; |
|
50 | result.chunk = NULL; | |
54 |
|
51 | |||
55 | chunk = PyBytes_FromStringAndSize(NULL, self->outSize); |
|
52 | chunk = PyBytes_FromStringAndSize(NULL, self->outSize); | |
56 | if (!chunk) { |
|
53 | if (!chunk) { | |
57 | result.errored = 1; |
|
54 | result.errored = 1; | |
58 | return result; |
|
55 | return result; | |
59 | } |
|
56 | } | |
60 |
|
57 | |||
61 | self->output.dst = PyBytes_AsString(chunk); |
|
58 | self->output.dst = PyBytes_AsString(chunk); | |
62 | self->output.size = self->outSize; |
|
59 | self->output.size = self->outSize; | |
63 | self->output.pos = 0; |
|
60 | self->output.pos = 0; | |
64 |
|
61 | |||
65 | Py_BEGIN_ALLOW_THREADS |
|
62 | Py_BEGIN_ALLOW_THREADS | |
66 | zresult = ZSTD_decompressStream(self->dstream, &self->output, &self->input); |
|
63 | zresult = ZSTD_decompressStream(self->decompressor->dstream, &self->output, &self->input); | |
67 | Py_END_ALLOW_THREADS |
|
64 | Py_END_ALLOW_THREADS | |
68 |
|
65 | |||
69 | /* We're done with the pointer. Nullify to prevent anyone from getting a |
|
66 | /* We're done with the pointer. Nullify to prevent anyone from getting a | |
70 | handle on a Python object. */ |
|
67 | handle on a Python object. */ | |
71 | self->output.dst = NULL; |
|
68 | self->output.dst = NULL; | |
72 |
|
69 | |||
73 | if (ZSTD_isError(zresult)) { |
|
70 | if (ZSTD_isError(zresult)) { | |
74 | Py_DECREF(chunk); |
|
71 | Py_DECREF(chunk); | |
75 | PyErr_Format(ZstdError, "zstd decompress error: %s", |
|
72 | PyErr_Format(ZstdError, "zstd decompress error: %s", | |
76 | ZSTD_getErrorName(zresult)); |
|
73 | ZSTD_getErrorName(zresult)); | |
77 | result.errored = 1; |
|
74 | result.errored = 1; | |
78 | return result; |
|
75 | return result; | |
79 | } |
|
76 | } | |
80 |
|
77 | |||
81 | self->readCount += self->input.pos - oldInputPos; |
|
78 | self->readCount += self->input.pos - oldInputPos; | |
82 |
|
79 | |||
83 | /* Frame is fully decoded. Input exhausted and output sitting in buffer. */ |
|
80 | /* Frame is fully decoded. Input exhausted and output sitting in buffer. */ | |
84 | if (0 == zresult) { |
|
81 | if (0 == zresult) { | |
85 | self->finishedInput = 1; |
|
82 | self->finishedInput = 1; | |
86 | self->finishedOutput = 1; |
|
83 | self->finishedOutput = 1; | |
87 | } |
|
84 | } | |
88 |
|
85 | |||
89 | /* If it produced output data, return it. */ |
|
86 | /* If it produced output data, return it. */ | |
90 | if (self->output.pos) { |
|
87 | if (self->output.pos) { | |
91 | if (self->output.pos < self->outSize) { |
|
88 | if (self->output.pos < self->outSize) { | |
92 | if (_PyBytes_Resize(&chunk, self->output.pos)) { |
|
89 | if (_PyBytes_Resize(&chunk, self->output.pos)) { | |
93 | result.errored = 1; |
|
90 | result.errored = 1; | |
94 | return result; |
|
91 | return result; | |
95 | } |
|
92 | } | |
96 | } |
|
93 | } | |
97 | } |
|
94 | } | |
98 | else { |
|
95 | else { | |
99 | Py_DECREF(chunk); |
|
96 | Py_DECREF(chunk); | |
100 | chunk = NULL; |
|
97 | chunk = NULL; | |
101 | } |
|
98 | } | |
102 |
|
99 | |||
103 | result.errored = 0; |
|
100 | result.errored = 0; | |
104 | result.chunk = chunk; |
|
101 | result.chunk = chunk; | |
105 |
|
102 | |||
106 | return result; |
|
103 | return result; | |
107 | } |
|
104 | } | |
108 |
|
105 | |||
109 | static PyObject* ZstdDecompressorIterator_iternext(ZstdDecompressorIterator* self) { |
|
106 | static PyObject* ZstdDecompressorIterator_iternext(ZstdDecompressorIterator* self) { | |
110 | PyObject* readResult = NULL; |
|
107 | PyObject* readResult = NULL; | |
111 | char* readBuffer; |
|
108 | char* readBuffer; | |
112 | Py_ssize_t readSize; |
|
109 | Py_ssize_t readSize; | |
113 | Py_ssize_t bufferRemaining; |
|
110 | Py_ssize_t bufferRemaining; | |
114 | DecompressorIteratorResult result; |
|
111 | DecompressorIteratorResult result; | |
115 |
|
112 | |||
116 | if (self->finishedOutput) { |
|
113 | if (self->finishedOutput) { | |
117 | PyErr_SetString(PyExc_StopIteration, "output flushed"); |
|
114 | PyErr_SetString(PyExc_StopIteration, "output flushed"); | |
118 | return NULL; |
|
115 | return NULL; | |
119 | } |
|
116 | } | |
120 |
|
117 | |||
121 | /* If we have data left in the input, consume it. */ |
|
118 | /* If we have data left in the input, consume it. */ | |
122 | if (self->input.pos < self->input.size) { |
|
119 | if (self->input.pos < self->input.size) { | |
123 | result = read_decompressor_iterator(self); |
|
120 | result = read_decompressor_iterator(self); | |
124 | if (result.chunk || result.errored) { |
|
121 | if (result.chunk || result.errored) { | |
125 | return result.chunk; |
|
122 | return result.chunk; | |
126 | } |
|
123 | } | |
127 |
|
124 | |||
128 | /* Else fall through to get more data from input. */ |
|
125 | /* Else fall through to get more data from input. */ | |
129 | } |
|
126 | } | |
130 |
|
127 | |||
131 | read_from_source: |
|
128 | read_from_source: | |
132 |
|
129 | |||
133 | if (!self->finishedInput) { |
|
130 | if (!self->finishedInput) { | |
134 | if (self->reader) { |
|
131 | if (self->reader) { | |
135 | readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize); |
|
132 | readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize); | |
136 | if (!readResult) { |
|
133 | if (!readResult) { | |
137 | return NULL; |
|
134 | return NULL; | |
138 | } |
|
135 | } | |
139 |
|
136 | |||
140 | PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize); |
|
137 | PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize); | |
141 | } |
|
138 | } | |
142 | else { |
|
139 | else { | |
143 | assert(self->buffer && self->buffer->buf); |
|
140 | assert(self->buffer && self->buffer->buf); | |
144 |
|
141 | |||
145 | /* Only support contiguous C arrays for now */ |
|
142 | /* Only support contiguous C arrays for now */ | |
146 | assert(self->buffer->strides == NULL && self->buffer->suboffsets == NULL); |
|
143 | assert(self->buffer->strides == NULL && self->buffer->suboffsets == NULL); | |
147 | assert(self->buffer->itemsize == 1); |
|
144 | assert(self->buffer->itemsize == 1); | |
148 |
|
145 | |||
149 | /* TODO avoid memcpy() below */ |
|
146 | /* TODO avoid memcpy() below */ | |
150 | readBuffer = (char *)self->buffer->buf + self->bufferOffset; |
|
147 | readBuffer = (char *)self->buffer->buf + self->bufferOffset; | |
151 | bufferRemaining = self->buffer->len - self->bufferOffset; |
|
148 | bufferRemaining = self->buffer->len - self->bufferOffset; | |
152 | readSize = min(bufferRemaining, (Py_ssize_t)self->inSize); |
|
149 | readSize = min(bufferRemaining, (Py_ssize_t)self->inSize); | |
153 | self->bufferOffset += readSize; |
|
150 | self->bufferOffset += readSize; | |
154 | } |
|
151 | } | |
155 |
|
152 | |||
156 | if (readSize) { |
|
153 | if (readSize) { | |
157 | if (!self->readCount && self->skipBytes) { |
|
154 | if (!self->readCount && self->skipBytes) { | |
158 | assert(self->skipBytes < self->inSize); |
|
155 | assert(self->skipBytes < self->inSize); | |
159 | if ((Py_ssize_t)self->skipBytes >= readSize) { |
|
156 | if ((Py_ssize_t)self->skipBytes >= readSize) { | |
160 | PyErr_SetString(PyExc_ValueError, |
|
157 | PyErr_SetString(PyExc_ValueError, | |
161 | "skip_bytes larger than first input chunk; " |
|
158 | "skip_bytes larger than first input chunk; " | |
162 | "this scenario is currently unsupported"); |
|
159 | "this scenario is currently unsupported"); | |
163 |
Py_ |
|
160 | Py_XDECREF(readResult); | |
164 | return NULL; |
|
161 | return NULL; | |
165 | } |
|
162 | } | |
166 |
|
163 | |||
167 | readBuffer = readBuffer + self->skipBytes; |
|
164 | readBuffer = readBuffer + self->skipBytes; | |
168 | readSize -= self->skipBytes; |
|
165 | readSize -= self->skipBytes; | |
169 | } |
|
166 | } | |
170 |
|
167 | |||
171 | /* Copy input into previously allocated buffer because it can live longer |
|
168 | /* Copy input into previously allocated buffer because it can live longer | |
172 | than a single function call and we don't want to keep a ref to a Python |
|
169 | than a single function call and we don't want to keep a ref to a Python | |
173 | object around. This could be changed... */ |
|
170 | object around. This could be changed... */ | |
174 | memcpy((void*)self->input.src, readBuffer, readSize); |
|
171 | memcpy((void*)self->input.src, readBuffer, readSize); | |
175 | self->input.size = readSize; |
|
172 | self->input.size = readSize; | |
176 | self->input.pos = 0; |
|
173 | self->input.pos = 0; | |
177 | } |
|
174 | } | |
178 | /* No bytes on first read must mean an empty input stream. */ |
|
175 | /* No bytes on first read must mean an empty input stream. */ | |
179 | else if (!self->readCount) { |
|
176 | else if (!self->readCount) { | |
180 | self->finishedInput = 1; |
|
177 | self->finishedInput = 1; | |
181 | self->finishedOutput = 1; |
|
178 | self->finishedOutput = 1; | |
182 |
Py_ |
|
179 | Py_XDECREF(readResult); | |
183 | PyErr_SetString(PyExc_StopIteration, "empty input"); |
|
180 | PyErr_SetString(PyExc_StopIteration, "empty input"); | |
184 | return NULL; |
|
181 | return NULL; | |
185 | } |
|
182 | } | |
186 | else { |
|
183 | else { | |
187 | self->finishedInput = 1; |
|
184 | self->finishedInput = 1; | |
188 | } |
|
185 | } | |
189 |
|
186 | |||
190 | /* We've copied the data managed by memory. Discard the Python object. */ |
|
187 | /* We've copied the data managed by memory. Discard the Python object. */ | |
191 |
Py_ |
|
188 | Py_XDECREF(readResult); | |
192 | } |
|
189 | } | |
193 |
|
190 | |||
194 | result = read_decompressor_iterator(self); |
|
191 | result = read_decompressor_iterator(self); | |
195 | if (result.errored || result.chunk) { |
|
192 | if (result.errored || result.chunk) { | |
196 | return result.chunk; |
|
193 | return result.chunk; | |
197 | } |
|
194 | } | |
198 |
|
195 | |||
199 | /* No new output data. Try again unless we know there is no more data. */ |
|
196 | /* No new output data. Try again unless we know there is no more data. */ | |
200 | if (!self->finishedInput) { |
|
197 | if (!self->finishedInput) { | |
201 | goto read_from_source; |
|
198 | goto read_from_source; | |
202 | } |
|
199 | } | |
203 |
|
200 | |||
204 | PyErr_SetString(PyExc_StopIteration, "input exhausted"); |
|
201 | PyErr_SetString(PyExc_StopIteration, "input exhausted"); | |
205 | return NULL; |
|
202 | return NULL; | |
206 | } |
|
203 | } | |
207 |
|
204 | |||
208 | PyTypeObject ZstdDecompressorIteratorType = { |
|
205 | PyTypeObject ZstdDecompressorIteratorType = { | |
209 | PyVarObject_HEAD_INIT(NULL, 0) |
|
206 | PyVarObject_HEAD_INIT(NULL, 0) | |
210 | "zstd.ZstdDecompressorIterator", /* tp_name */ |
|
207 | "zstd.ZstdDecompressorIterator", /* tp_name */ | |
211 | sizeof(ZstdDecompressorIterator), /* tp_basicsize */ |
|
208 | sizeof(ZstdDecompressorIterator), /* tp_basicsize */ | |
212 | 0, /* tp_itemsize */ |
|
209 | 0, /* tp_itemsize */ | |
213 | (destructor)ZstdDecompressorIterator_dealloc, /* tp_dealloc */ |
|
210 | (destructor)ZstdDecompressorIterator_dealloc, /* tp_dealloc */ | |
214 | 0, /* tp_print */ |
|
211 | 0, /* tp_print */ | |
215 | 0, /* tp_getattr */ |
|
212 | 0, /* tp_getattr */ | |
216 | 0, /* tp_setattr */ |
|
213 | 0, /* tp_setattr */ | |
217 | 0, /* tp_compare */ |
|
214 | 0, /* tp_compare */ | |
218 | 0, /* tp_repr */ |
|
215 | 0, /* tp_repr */ | |
219 | 0, /* tp_as_number */ |
|
216 | 0, /* tp_as_number */ | |
220 | 0, /* tp_as_sequence */ |
|
217 | 0, /* tp_as_sequence */ | |
221 | 0, /* tp_as_mapping */ |
|
218 | 0, /* tp_as_mapping */ | |
222 | 0, /* tp_hash */ |
|
219 | 0, /* tp_hash */ | |
223 | 0, /* tp_call */ |
|
220 | 0, /* tp_call */ | |
224 | 0, /* tp_str */ |
|
221 | 0, /* tp_str */ | |
225 | 0, /* tp_getattro */ |
|
222 | 0, /* tp_getattro */ | |
226 | 0, /* tp_setattro */ |
|
223 | 0, /* tp_setattro */ | |
227 | 0, /* tp_as_buffer */ |
|
224 | 0, /* tp_as_buffer */ | |
228 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ |
|
225 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
229 | ZstdDecompressorIterator__doc__, /* tp_doc */ |
|
226 | ZstdDecompressorIterator__doc__, /* tp_doc */ | |
230 | 0, /* tp_traverse */ |
|
227 | 0, /* tp_traverse */ | |
231 | 0, /* tp_clear */ |
|
228 | 0, /* tp_clear */ | |
232 | 0, /* tp_richcompare */ |
|
229 | 0, /* tp_richcompare */ | |
233 | 0, /* tp_weaklistoffset */ |
|
230 | 0, /* tp_weaklistoffset */ | |
234 | ZstdDecompressorIterator_iter, /* tp_iter */ |
|
231 | ZstdDecompressorIterator_iter, /* tp_iter */ | |
235 | (iternextfunc)ZstdDecompressorIterator_iternext, /* tp_iternext */ |
|
232 | (iternextfunc)ZstdDecompressorIterator_iternext, /* tp_iternext */ | |
236 | 0, /* tp_methods */ |
|
233 | 0, /* tp_methods */ | |
237 | 0, /* tp_members */ |
|
234 | 0, /* tp_members */ | |
238 | 0, /* tp_getset */ |
|
235 | 0, /* tp_getset */ | |
239 | 0, /* tp_base */ |
|
236 | 0, /* tp_base */ | |
240 | 0, /* tp_dict */ |
|
237 | 0, /* tp_dict */ | |
241 | 0, /* tp_descr_get */ |
|
238 | 0, /* tp_descr_get */ | |
242 | 0, /* tp_descr_set */ |
|
239 | 0, /* tp_descr_set */ | |
243 | 0, /* tp_dictoffset */ |
|
240 | 0, /* tp_dictoffset */ | |
244 | 0, /* tp_init */ |
|
241 | 0, /* tp_init */ | |
245 | 0, /* tp_alloc */ |
|
242 | 0, /* tp_alloc */ | |
246 | PyType_GenericNew, /* tp_new */ |
|
243 | PyType_GenericNew, /* tp_new */ | |
247 | }; |
|
244 | }; | |
248 |
|
245 | |||
249 | void decompressoriterator_module_init(PyObject* mod) { |
|
246 | void decompressoriterator_module_init(PyObject* mod) { | |
250 | Py_TYPE(&ZstdDecompressorIteratorType) = &PyType_Type; |
|
247 | Py_TYPE(&ZstdDecompressorIteratorType) = &PyType_Type; | |
251 | if (PyType_Ready(&ZstdDecompressorIteratorType) < 0) { |
|
248 | if (PyType_Ready(&ZstdDecompressorIteratorType) < 0) { | |
252 | return; |
|
249 | return; | |
253 | } |
|
250 | } | |
254 | } |
|
251 | } |
@@ -1,132 +1,132 | |||||
1 | /** |
|
1 | /** | |
2 | * Copyright (c) 2017-present, Gregory Szorc |
|
2 | * Copyright (c) 2017-present, Gregory Szorc | |
3 | * All rights reserved. |
|
3 | * All rights reserved. | |
4 | * |
|
4 | * | |
5 | * This software may be modified and distributed under the terms |
|
5 | * This software may be modified and distributed under the terms | |
6 | * of the BSD license. See the LICENSE file for details. |
|
6 | * of the BSD license. See the LICENSE file for details. | |
7 | */ |
|
7 | */ | |
8 |
|
8 | |||
9 | #include "python-zstandard.h" |
|
9 | #include "python-zstandard.h" | |
10 |
|
10 | |||
11 | extern PyObject* ZstdError; |
|
11 | extern PyObject* ZstdError; | |
12 |
|
12 | |||
13 | PyDoc_STRVAR(FrameParameters__doc__, |
|
13 | PyDoc_STRVAR(FrameParameters__doc__, | |
14 | "FrameParameters: information about a zstd frame"); |
|
14 | "FrameParameters: information about a zstd frame"); | |
15 |
|
15 | |||
16 | FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args) { |
|
16 | FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args) { | |
17 | const char* source; |
|
17 | const char* source; | |
18 | Py_ssize_t sourceSize; |
|
18 | Py_ssize_t sourceSize; | |
19 | ZSTD_frameParams params; |
|
19 | ZSTD_frameParams params; | |
20 | FrameParametersObject* result = NULL; |
|
20 | FrameParametersObject* result = NULL; | |
21 | size_t zresult; |
|
21 | size_t zresult; | |
22 |
|
22 | |||
23 | #if PY_MAJOR_VERSION >= 3 |
|
23 | #if PY_MAJOR_VERSION >= 3 | |
24 | if (!PyArg_ParseTuple(args, "y#:get_frame_parameters", |
|
24 | if (!PyArg_ParseTuple(args, "y#:get_frame_parameters", | |
25 | #else |
|
25 | #else | |
26 | if (!PyArg_ParseTuple(args, "s#:get_frame_parameters", |
|
26 | if (!PyArg_ParseTuple(args, "s#:get_frame_parameters", | |
27 | #endif |
|
27 | #endif | |
28 | &source, &sourceSize)) { |
|
28 | &source, &sourceSize)) { | |
29 | return NULL; |
|
29 | return NULL; | |
30 | } |
|
30 | } | |
31 |
|
31 | |||
32 | /* Needed for Python 2 to reject unicode */ |
|
32 | /* Needed for Python 2 to reject unicode */ | |
33 | if (!PyBytes_Check(PyTuple_GET_ITEM(args, 0))) { |
|
33 | if (!PyBytes_Check(PyTuple_GET_ITEM(args, 0))) { | |
34 | PyErr_SetString(PyExc_TypeError, "argument must be bytes"); |
|
34 | PyErr_SetString(PyExc_TypeError, "argument must be bytes"); | |
35 | return NULL; |
|
35 | return NULL; | |
36 | } |
|
36 | } | |
37 |
|
37 | |||
38 | zresult = ZSTD_getFrameParams(¶ms, (void*)source, sourceSize); |
|
38 | zresult = ZSTD_getFrameParams(¶ms, (void*)source, sourceSize); | |
39 |
|
39 | |||
40 | if (ZSTD_isError(zresult)) { |
|
40 | if (ZSTD_isError(zresult)) { | |
41 | PyErr_Format(ZstdError, "cannot get frame parameters: %s", ZSTD_getErrorName(zresult)); |
|
41 | PyErr_Format(ZstdError, "cannot get frame parameters: %s", ZSTD_getErrorName(zresult)); | |
42 | return NULL; |
|
42 | return NULL; | |
43 | } |
|
43 | } | |
44 |
|
44 | |||
45 | if (zresult) { |
|
45 | if (zresult) { | |
46 | PyErr_Format(ZstdError, "not enough data for frame parameters; need %zu bytes", zresult); |
|
46 | PyErr_Format(ZstdError, "not enough data for frame parameters; need %zu bytes", zresult); | |
47 | return NULL; |
|
47 | return NULL; | |
48 | } |
|
48 | } | |
49 |
|
49 | |||
50 | result = PyObject_New(FrameParametersObject, &FrameParametersType); |
|
50 | result = PyObject_New(FrameParametersObject, &FrameParametersType); | |
51 | if (!result) { |
|
51 | if (!result) { | |
52 | return NULL; |
|
52 | return NULL; | |
53 | } |
|
53 | } | |
54 |
|
54 | |||
55 | result->frameContentSize = params.frameContentSize; |
|
55 | result->frameContentSize = params.frameContentSize; | |
56 | result->windowSize = params.windowSize; |
|
56 | result->windowSize = params.windowSize; | |
57 | result->dictID = params.dictID; |
|
57 | result->dictID = params.dictID; | |
58 | result->checksumFlag = params.checksumFlag ? 1 : 0; |
|
58 | result->checksumFlag = params.checksumFlag ? 1 : 0; | |
59 |
|
59 | |||
60 | return result; |
|
60 | return result; | |
61 | } |
|
61 | } | |
62 |
|
62 | |||
63 | static void FrameParameters_dealloc(PyObject* self) { |
|
63 | static void FrameParameters_dealloc(PyObject* self) { | |
64 | PyObject_Del(self); |
|
64 | PyObject_Del(self); | |
65 | } |
|
65 | } | |
66 |
|
66 | |||
67 | static PyMemberDef FrameParameters_members[] = { |
|
67 | static PyMemberDef FrameParameters_members[] = { | |
68 | { "content_size", T_ULONGLONG, |
|
68 | { "content_size", T_ULONGLONG, | |
69 | offsetof(FrameParametersObject, frameContentSize), READONLY, |
|
69 | offsetof(FrameParametersObject, frameContentSize), READONLY, | |
70 | "frame content size" }, |
|
70 | "frame content size" }, | |
71 | { "window_size", T_UINT, |
|
71 | { "window_size", T_UINT, | |
72 | offsetof(FrameParametersObject, windowSize), READONLY, |
|
72 | offsetof(FrameParametersObject, windowSize), READONLY, | |
73 | "window size" }, |
|
73 | "window size" }, | |
74 | { "dict_id", T_UINT, |
|
74 | { "dict_id", T_UINT, | |
75 | offsetof(FrameParametersObject, dictID), READONLY, |
|
75 | offsetof(FrameParametersObject, dictID), READONLY, | |
76 | "dictionary ID" }, |
|
76 | "dictionary ID" }, | |
77 | { "has_checksum", T_BOOL, |
|
77 | { "has_checksum", T_BOOL, | |
78 | offsetof(FrameParametersObject, checksumFlag), READONLY, |
|
78 | offsetof(FrameParametersObject, checksumFlag), READONLY, | |
79 | "checksum flag" }, |
|
79 | "checksum flag" }, | |
80 | { NULL } |
|
80 | { NULL } | |
81 | }; |
|
81 | }; | |
82 |
|
82 | |||
83 | PyTypeObject FrameParametersType = { |
|
83 | PyTypeObject FrameParametersType = { | |
84 | PyVarObject_HEAD_INIT(NULL, 0) |
|
84 | PyVarObject_HEAD_INIT(NULL, 0) | |
85 | "FrameParameters", /* tp_name */ |
|
85 | "FrameParameters", /* tp_name */ | |
86 | sizeof(FrameParametersObject), /* tp_basicsize */ |
|
86 | sizeof(FrameParametersObject), /* tp_basicsize */ | |
87 | 0, /* tp_itemsize */ |
|
87 | 0, /* tp_itemsize */ | |
88 | (destructor)FrameParameters_dealloc, /* tp_dealloc */ |
|
88 | (destructor)FrameParameters_dealloc, /* tp_dealloc */ | |
89 | 0, /* tp_print */ |
|
89 | 0, /* tp_print */ | |
90 | 0, /* tp_getattr */ |
|
90 | 0, /* tp_getattr */ | |
91 | 0, /* tp_setattr */ |
|
91 | 0, /* tp_setattr */ | |
92 | 0, /* tp_compare */ |
|
92 | 0, /* tp_compare */ | |
93 | 0, /* tp_repr */ |
|
93 | 0, /* tp_repr */ | |
94 | 0, /* tp_as_number */ |
|
94 | 0, /* tp_as_number */ | |
95 | 0, /* tp_as_sequence */ |
|
95 | 0, /* tp_as_sequence */ | |
96 | 0, /* tp_as_mapping */ |
|
96 | 0, /* tp_as_mapping */ | |
97 | 0, /* tp_hash */ |
|
97 | 0, /* tp_hash */ | |
98 | 0, /* tp_call */ |
|
98 | 0, /* tp_call */ | |
99 | 0, /* tp_str */ |
|
99 | 0, /* tp_str */ | |
100 | 0, /* tp_getattro */ |
|
100 | 0, /* tp_getattro */ | |
101 | 0, /* tp_setattro */ |
|
101 | 0, /* tp_setattro */ | |
102 | 0, /* tp_as_buffer */ |
|
102 | 0, /* tp_as_buffer */ | |
103 | Py_TPFLAGS_DEFAULT, /* tp_flags */ |
|
103 | Py_TPFLAGS_DEFAULT, /* tp_flags */ | |
104 | FrameParameters__doc__, /* tp_doc */ |
|
104 | FrameParameters__doc__, /* tp_doc */ | |
105 | 0, /* tp_traverse */ |
|
105 | 0, /* tp_traverse */ | |
106 | 0, /* tp_clear */ |
|
106 | 0, /* tp_clear */ | |
107 | 0, /* tp_richcompare */ |
|
107 | 0, /* tp_richcompare */ | |
108 | 0, /* tp_weaklistoffset */ |
|
108 | 0, /* tp_weaklistoffset */ | |
109 | 0, /* tp_iter */ |
|
109 | 0, /* tp_iter */ | |
110 | 0, /* tp_iternext */ |
|
110 | 0, /* tp_iternext */ | |
111 | 0, /* tp_methods */ |
|
111 | 0, /* tp_methods */ | |
112 | FrameParameters_members, /* tp_members */ |
|
112 | FrameParameters_members, /* tp_members */ | |
113 | 0, /* tp_getset */ |
|
113 | 0, /* tp_getset */ | |
114 | 0, /* tp_base */ |
|
114 | 0, /* tp_base */ | |
115 | 0, /* tp_dict */ |
|
115 | 0, /* tp_dict */ | |
116 | 0, /* tp_descr_get */ |
|
116 | 0, /* tp_descr_get */ | |
117 | 0, /* tp_descr_set */ |
|
117 | 0, /* tp_descr_set */ | |
118 | 0, /* tp_dictoffset */ |
|
118 | 0, /* tp_dictoffset */ | |
119 | 0, /* tp_init */ |
|
119 | 0, /* tp_init */ | |
120 | 0, /* tp_alloc */ |
|
120 | 0, /* tp_alloc */ | |
121 | 0, /* tp_new */ |
|
121 | 0, /* tp_new */ | |
122 | }; |
|
122 | }; | |
123 |
|
123 | |||
124 | void frameparams_module_init(PyObject* mod) { |
|
124 | void frameparams_module_init(PyObject* mod) { | |
125 | Py_TYPE(&FrameParametersType) = &PyType_Type; |
|
125 | Py_TYPE(&FrameParametersType) = &PyType_Type; | |
126 | if (PyType_Ready(&FrameParametersType) < 0) { |
|
126 | if (PyType_Ready(&FrameParametersType) < 0) { | |
127 | return; |
|
127 | return; | |
128 | } |
|
128 | } | |
129 |
|
129 | |||
130 |
Py_I |
|
130 | Py_INCREF(&FrameParametersType); | |
131 | PyModule_AddObject(mod, "FrameParameters", (PyObject*)&FrameParametersType); |
|
131 | PyModule_AddObject(mod, "FrameParameters", (PyObject*)&FrameParametersType); | |
132 | } |
|
132 | } |
@@ -1,190 +1,285 | |||||
1 | /** |
|
1 | /** | |
2 | * Copyright (c) 2016-present, Gregory Szorc |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
3 | * All rights reserved. |
|
3 | * All rights reserved. | |
4 | * |
|
4 | * | |
5 | * This software may be modified and distributed under the terms |
|
5 | * This software may be modified and distributed under the terms | |
6 | * of the BSD license. See the LICENSE file for details. |
|
6 | * of the BSD license. See the LICENSE file for details. | |
7 | */ |
|
7 | */ | |
8 |
|
8 | |||
9 | #define PY_SSIZE_T_CLEAN |
|
9 | #define PY_SSIZE_T_CLEAN | |
10 | #include <Python.h> |
|
10 | #include <Python.h> | |
11 | #include "structmember.h" |
|
11 | #include "structmember.h" | |
12 |
|
12 | |||
13 | #define ZSTD_STATIC_LINKING_ONLY |
|
13 | #define ZSTD_STATIC_LINKING_ONLY | |
14 | #define ZDICT_STATIC_LINKING_ONLY |
|
14 | #define ZDICT_STATIC_LINKING_ONLY | |
15 | #include "mem.h" |
|
15 | #include "mem.h" | |
16 | #include "zstd.h" |
|
16 | #include "zstd.h" | |
17 | #include "zdict.h" |
|
17 | #include "zdict.h" | |
|
18 | #include "zstdmt_compress.h" | |||
18 |
|
19 | |||
19 |
#define PYTHON_ZSTANDARD_VERSION "0. |
|
20 | #define PYTHON_ZSTANDARD_VERSION "0.8.0" | |
20 |
|
21 | |||
21 | typedef enum { |
|
22 | typedef enum { | |
22 | compressorobj_flush_finish, |
|
23 | compressorobj_flush_finish, | |
23 | compressorobj_flush_block, |
|
24 | compressorobj_flush_block, | |
24 | } CompressorObj_Flush; |
|
25 | } CompressorObj_Flush; | |
25 |
|
26 | |||
|
27 | /* | |||
|
28 | Represents a CompressionParameters type. | |||
|
29 | ||||
|
30 | This type is basically a wrapper around ZSTD_compressionParameters. | |||
|
31 | */ | |||
26 | typedef struct { |
|
32 | typedef struct { | |
27 | PyObject_HEAD |
|
33 | PyObject_HEAD | |
28 | unsigned windowLog; |
|
34 | unsigned windowLog; | |
29 | unsigned chainLog; |
|
35 | unsigned chainLog; | |
30 | unsigned hashLog; |
|
36 | unsigned hashLog; | |
31 | unsigned searchLog; |
|
37 | unsigned searchLog; | |
32 | unsigned searchLength; |
|
38 | unsigned searchLength; | |
33 | unsigned targetLength; |
|
39 | unsigned targetLength; | |
34 | ZSTD_strategy strategy; |
|
40 | ZSTD_strategy strategy; | |
35 | } CompressionParametersObject; |
|
41 | } CompressionParametersObject; | |
36 |
|
42 | |||
37 | extern PyTypeObject CompressionParametersType; |
|
43 | extern PyTypeObject CompressionParametersType; | |
38 |
|
44 | |||
|
45 | /* | |||
|
46 | Represents a FrameParameters type. | |||
|
47 | ||||
|
48 | This type is basically a wrapper around ZSTD_frameParams. | |||
|
49 | */ | |||
39 | typedef struct { |
|
50 | typedef struct { | |
40 | PyObject_HEAD |
|
51 | PyObject_HEAD | |
41 | unsigned long long frameContentSize; |
|
52 | unsigned long long frameContentSize; | |
42 | unsigned windowSize; |
|
53 | unsigned windowSize; | |
43 | unsigned dictID; |
|
54 | unsigned dictID; | |
44 | char checksumFlag; |
|
55 | char checksumFlag; | |
45 | } FrameParametersObject; |
|
56 | } FrameParametersObject; | |
46 |
|
57 | |||
47 | extern PyTypeObject FrameParametersType; |
|
58 | extern PyTypeObject FrameParametersType; | |
48 |
|
59 | |||
49 | typedef struct { |
|
60 | /* | |
50 | PyObject_HEAD |
|
61 | Represents a ZstdCompressionDict type. | |
51 | unsigned selectivityLevel; |
|
|||
52 | int compressionLevel; |
|
|||
53 | unsigned notificationLevel; |
|
|||
54 | unsigned dictID; |
|
|||
55 | } DictParametersObject; |
|
|||
56 |
|
|
62 | ||
57 | extern PyTypeObject DictParametersType; |
|
63 | Instances hold data used for a zstd compression dictionary. | |
58 |
|
64 | */ | ||
59 | typedef struct { |
|
65 | typedef struct { | |
60 | PyObject_HEAD |
|
66 | PyObject_HEAD | |
61 |
|
67 | |||
|
68 | /* Pointer to dictionary data. Owned by self. */ | |||
62 | void* dictData; |
|
69 | void* dictData; | |
|
70 | /* Size of dictionary data. */ | |||
63 | size_t dictSize; |
|
71 | size_t dictSize; | |
|
72 | /* k parameter for cover dictionaries. Only populated by train_cover_dict(). */ | |||
|
73 | unsigned k; | |||
|
74 | /* d parameter for cover dictionaries. Only populated by train_cover_dict(). */ | |||
|
75 | unsigned d; | |||
64 | } ZstdCompressionDict; |
|
76 | } ZstdCompressionDict; | |
65 |
|
77 | |||
66 | extern PyTypeObject ZstdCompressionDictType; |
|
78 | extern PyTypeObject ZstdCompressionDictType; | |
67 |
|
79 | |||
|
80 | /* | |||
|
81 | Represents a ZstdCompressor type. | |||
|
82 | */ | |||
68 | typedef struct { |
|
83 | typedef struct { | |
69 | PyObject_HEAD |
|
84 | PyObject_HEAD | |
70 |
|
85 | |||
|
86 | /* Configured compression level. Should be always set. */ | |||
71 | int compressionLevel; |
|
87 | int compressionLevel; | |
|
88 | /* Number of threads to use for operations. */ | |||
|
89 | unsigned int threads; | |||
|
90 | /* Pointer to compression dictionary to use. NULL if not using dictionary | |||
|
91 | compression. */ | |||
72 | ZstdCompressionDict* dict; |
|
92 | ZstdCompressionDict* dict; | |
|
93 | /* Compression context to use. Populated during object construction. NULL | |||
|
94 | if using multi-threaded compression. */ | |||
73 | ZSTD_CCtx* cctx; |
|
95 | ZSTD_CCtx* cctx; | |
|
96 | /* Multi-threaded compression context to use. Populated during object | |||
|
97 | construction. NULL if not using multi-threaded compression. */ | |||
|
98 | ZSTDMT_CCtx* mtcctx; | |||
|
99 | /* Digest compression dictionary. NULL initially. Populated on first use. */ | |||
74 | ZSTD_CDict* cdict; |
|
100 | ZSTD_CDict* cdict; | |
|
101 | /* Low-level compression parameter control. NULL unless passed to | |||
|
102 | constructor. Takes precedence over `compressionLevel` if defined. */ | |||
75 | CompressionParametersObject* cparams; |
|
103 | CompressionParametersObject* cparams; | |
|
104 | /* Controls zstd frame options. */ | |||
76 | ZSTD_frameParameters fparams; |
|
105 | ZSTD_frameParameters fparams; | |
|
106 | /* Holds state for streaming compression. Shared across all invocation. | |||
|
107 | Populated on first use. */ | |||
|
108 | ZSTD_CStream* cstream; | |||
77 | } ZstdCompressor; |
|
109 | } ZstdCompressor; | |
78 |
|
110 | |||
79 | extern PyTypeObject ZstdCompressorType; |
|
111 | extern PyTypeObject ZstdCompressorType; | |
80 |
|
112 | |||
81 | typedef struct { |
|
113 | typedef struct { | |
82 | PyObject_HEAD |
|
114 | PyObject_HEAD | |
83 |
|
115 | |||
84 | ZstdCompressor* compressor; |
|
116 | ZstdCompressor* compressor; | |
85 | ZSTD_CStream* cstream; |
|
|||
86 | ZSTD_outBuffer output; |
|
117 | ZSTD_outBuffer output; | |
87 | int finished; |
|
118 | int finished; | |
88 | } ZstdCompressionObj; |
|
119 | } ZstdCompressionObj; | |
89 |
|
120 | |||
90 | extern PyTypeObject ZstdCompressionObjType; |
|
121 | extern PyTypeObject ZstdCompressionObjType; | |
91 |
|
122 | |||
92 | typedef struct { |
|
123 | typedef struct { | |
93 | PyObject_HEAD |
|
124 | PyObject_HEAD | |
94 |
|
125 | |||
95 | ZstdCompressor* compressor; |
|
126 | ZstdCompressor* compressor; | |
96 | PyObject* writer; |
|
127 | PyObject* writer; | |
97 | Py_ssize_t sourceSize; |
|
128 | Py_ssize_t sourceSize; | |
98 | size_t outSize; |
|
129 | size_t outSize; | |
99 | ZSTD_CStream* cstream; |
|
|||
100 | int entered; |
|
130 | int entered; | |
101 | } ZstdCompressionWriter; |
|
131 | } ZstdCompressionWriter; | |
102 |
|
132 | |||
103 | extern PyTypeObject ZstdCompressionWriterType; |
|
133 | extern PyTypeObject ZstdCompressionWriterType; | |
104 |
|
134 | |||
105 | typedef struct { |
|
135 | typedef struct { | |
106 | PyObject_HEAD |
|
136 | PyObject_HEAD | |
107 |
|
137 | |||
108 | ZstdCompressor* compressor; |
|
138 | ZstdCompressor* compressor; | |
109 | PyObject* reader; |
|
139 | PyObject* reader; | |
110 | Py_buffer* buffer; |
|
140 | Py_buffer* buffer; | |
111 | Py_ssize_t bufferOffset; |
|
141 | Py_ssize_t bufferOffset; | |
112 | Py_ssize_t sourceSize; |
|
142 | Py_ssize_t sourceSize; | |
113 | size_t inSize; |
|
143 | size_t inSize; | |
114 | size_t outSize; |
|
144 | size_t outSize; | |
115 |
|
145 | |||
116 | ZSTD_CStream* cstream; |
|
|||
117 | ZSTD_inBuffer input; |
|
146 | ZSTD_inBuffer input; | |
118 | ZSTD_outBuffer output; |
|
147 | ZSTD_outBuffer output; | |
119 | int finishedOutput; |
|
148 | int finishedOutput; | |
120 | int finishedInput; |
|
149 | int finishedInput; | |
121 | PyObject* readResult; |
|
150 | PyObject* readResult; | |
122 | } ZstdCompressorIterator; |
|
151 | } ZstdCompressorIterator; | |
123 |
|
152 | |||
124 | extern PyTypeObject ZstdCompressorIteratorType; |
|
153 | extern PyTypeObject ZstdCompressorIteratorType; | |
125 |
|
154 | |||
126 | typedef struct { |
|
155 | typedef struct { | |
127 | PyObject_HEAD |
|
156 | PyObject_HEAD | |
128 |
|
157 | |||
129 | ZSTD_DCtx* dctx; |
|
158 | ZSTD_DCtx* dctx; | |
130 |
|
159 | |||
131 | ZstdCompressionDict* dict; |
|
160 | ZstdCompressionDict* dict; | |
132 | ZSTD_DDict* ddict; |
|
161 | ZSTD_DDict* ddict; | |
|
162 | ZSTD_DStream* dstream; | |||
133 | } ZstdDecompressor; |
|
163 | } ZstdDecompressor; | |
134 |
|
164 | |||
135 | extern PyTypeObject ZstdDecompressorType; |
|
165 | extern PyTypeObject ZstdDecompressorType; | |
136 |
|
166 | |||
137 | typedef struct { |
|
167 | typedef struct { | |
138 | PyObject_HEAD |
|
168 | PyObject_HEAD | |
139 |
|
169 | |||
140 | ZstdDecompressor* decompressor; |
|
170 | ZstdDecompressor* decompressor; | |
141 | ZSTD_DStream* dstream; |
|
|||
142 | int finished; |
|
171 | int finished; | |
143 | } ZstdDecompressionObj; |
|
172 | } ZstdDecompressionObj; | |
144 |
|
173 | |||
145 | extern PyTypeObject ZstdDecompressionObjType; |
|
174 | extern PyTypeObject ZstdDecompressionObjType; | |
146 |
|
175 | |||
147 | typedef struct { |
|
176 | typedef struct { | |
148 | PyObject_HEAD |
|
177 | PyObject_HEAD | |
149 |
|
178 | |||
150 | ZstdDecompressor* decompressor; |
|
179 | ZstdDecompressor* decompressor; | |
151 | PyObject* writer; |
|
180 | PyObject* writer; | |
152 | size_t outSize; |
|
181 | size_t outSize; | |
153 | ZSTD_DStream* dstream; |
|
|||
154 | int entered; |
|
182 | int entered; | |
155 | } ZstdDecompressionWriter; |
|
183 | } ZstdDecompressionWriter; | |
156 |
|
184 | |||
157 | extern PyTypeObject ZstdDecompressionWriterType; |
|
185 | extern PyTypeObject ZstdDecompressionWriterType; | |
158 |
|
186 | |||
159 | typedef struct { |
|
187 | typedef struct { | |
160 | PyObject_HEAD |
|
188 | PyObject_HEAD | |
161 |
|
189 | |||
162 | ZstdDecompressor* decompressor; |
|
190 | ZstdDecompressor* decompressor; | |
163 | PyObject* reader; |
|
191 | PyObject* reader; | |
164 | Py_buffer* buffer; |
|
192 | Py_buffer* buffer; | |
165 | Py_ssize_t bufferOffset; |
|
193 | Py_ssize_t bufferOffset; | |
166 | size_t inSize; |
|
194 | size_t inSize; | |
167 | size_t outSize; |
|
195 | size_t outSize; | |
168 | size_t skipBytes; |
|
196 | size_t skipBytes; | |
169 | ZSTD_DStream* dstream; |
|
|||
170 | ZSTD_inBuffer input; |
|
197 | ZSTD_inBuffer input; | |
171 | ZSTD_outBuffer output; |
|
198 | ZSTD_outBuffer output; | |
172 | Py_ssize_t readCount; |
|
199 | Py_ssize_t readCount; | |
173 | int finishedInput; |
|
200 | int finishedInput; | |
174 | int finishedOutput; |
|
201 | int finishedOutput; | |
175 | } ZstdDecompressorIterator; |
|
202 | } ZstdDecompressorIterator; | |
176 |
|
203 | |||
177 | extern PyTypeObject ZstdDecompressorIteratorType; |
|
204 | extern PyTypeObject ZstdDecompressorIteratorType; | |
178 |
|
205 | |||
179 | typedef struct { |
|
206 | typedef struct { | |
180 | int errored; |
|
207 | int errored; | |
181 | PyObject* chunk; |
|
208 | PyObject* chunk; | |
182 | } DecompressorIteratorResult; |
|
209 | } DecompressorIteratorResult; | |
183 |
|
210 | |||
|
211 | typedef struct { | |||
|
212 | unsigned long long offset; | |||
|
213 | unsigned long long length; | |||
|
214 | } BufferSegment; | |||
|
215 | ||||
|
216 | typedef struct { | |||
|
217 | PyObject_HEAD | |||
|
218 | ||||
|
219 | PyObject* parent; | |||
|
220 | BufferSegment* segments; | |||
|
221 | Py_ssize_t segmentCount; | |||
|
222 | } ZstdBufferSegments; | |||
|
223 | ||||
|
224 | extern PyTypeObject ZstdBufferSegmentsType; | |||
|
225 | ||||
|
226 | typedef struct { | |||
|
227 | PyObject_HEAD | |||
|
228 | ||||
|
229 | PyObject* parent; | |||
|
230 | void* data; | |||
|
231 | Py_ssize_t dataSize; | |||
|
232 | unsigned long long offset; | |||
|
233 | } ZstdBufferSegment; | |||
|
234 | ||||
|
235 | extern PyTypeObject ZstdBufferSegmentType; | |||
|
236 | ||||
|
237 | typedef struct { | |||
|
238 | PyObject_HEAD | |||
|
239 | ||||
|
240 | Py_buffer parent; | |||
|
241 | void* data; | |||
|
242 | unsigned long long dataSize; | |||
|
243 | BufferSegment* segments; | |||
|
244 | Py_ssize_t segmentCount; | |||
|
245 | int useFree; | |||
|
246 | } ZstdBufferWithSegments; | |||
|
247 | ||||
|
248 | extern PyTypeObject ZstdBufferWithSegmentsType; | |||
|
249 | ||||
|
250 | /** | |||
|
251 | * An ordered collection of BufferWithSegments exposed as a squashed collection. | |||
|
252 | * | |||
|
253 | * This type provides a virtual view spanning multiple BufferWithSegments | |||
|
254 | * instances. It allows multiple instances to be "chained" together and | |||
|
255 | * exposed as a single collection. e.g. if there are 2 buffers holding | |||
|
256 | * 10 segments each, then o[14] will access the 5th segment in the 2nd buffer. | |||
|
257 | */ | |||
|
258 | typedef struct { | |||
|
259 | PyObject_HEAD | |||
|
260 | ||||
|
261 | /* An array of buffers that should be exposed through this instance. */ | |||
|
262 | ZstdBufferWithSegments** buffers; | |||
|
263 | /* Number of elements in buffers array. */ | |||
|
264 | Py_ssize_t bufferCount; | |||
|
265 | /* Array of first offset in each buffer instance. 0th entry corresponds | |||
|
266 | to number of elements in the 0th buffer. 1st entry corresponds to the | |||
|
267 | sum of elements in 0th and 1st buffers. */ | |||
|
268 | Py_ssize_t* firstElements; | |||
|
269 | } ZstdBufferWithSegmentsCollection; | |||
|
270 | ||||
|
271 | extern PyTypeObject ZstdBufferWithSegmentsCollectionType; | |||
|
272 | ||||
184 | void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams); |
|
273 | void ztopy_compression_parameters(CompressionParametersObject* params, ZSTD_compressionParameters* zparams); | |
185 | CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args); |
|
274 | CompressionParametersObject* get_compression_parameters(PyObject* self, PyObject* args); | |
186 | FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args); |
|
275 | FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args); | |
187 | PyObject* estimate_compression_context_size(PyObject* self, PyObject* args); |
|
276 | PyObject* estimate_compression_context_size(PyObject* self, PyObject* args); | |
188 |
|
|
277 | int init_cstream(ZstdCompressor* compressor, unsigned long long sourceSize); | |
189 | ZSTD_DStream* DStream_from_ZstdDecompressor(ZstdDecompressor* decompressor); |
|
278 | int init_mtcstream(ZstdCompressor* compressor, Py_ssize_t sourceSize); | |
|
279 | int init_dstream(ZstdDecompressor* decompressor); | |||
190 | ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs); |
|
280 | ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs); | |
|
281 | ZstdCompressionDict* train_cover_dictionary(PyObject* self, PyObject* args, PyObject* kwargs); | |||
|
282 | ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize); | |||
|
283 | Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*); | |||
|
284 | int cpu_count(void); | |||
|
285 | size_t roundpow2(size_t); |
@@ -1,154 +1,187 | |||||
1 | # Copyright (c) 2016-present, Gregory Szorc |
|
1 | # Copyright (c) 2016-present, Gregory Szorc | |
2 | # All rights reserved. |
|
2 | # All rights reserved. | |
3 | # |
|
3 | # | |
4 | # This software may be modified and distributed under the terms |
|
4 | # This software may be modified and distributed under the terms | |
5 | # of the BSD license. See the LICENSE file for details. |
|
5 | # of the BSD license. See the LICENSE file for details. | |
6 |
|
6 | |||
7 | from __future__ import absolute_import |
|
7 | from __future__ import absolute_import | |
8 |
|
8 | |||
9 | import cffi |
|
9 | import cffi | |
10 | import distutils.ccompiler |
|
10 | import distutils.ccompiler | |
11 | import os |
|
11 | import os | |
12 | import re |
|
12 | import re | |
13 | import subprocess |
|
13 | import subprocess | |
14 | import tempfile |
|
14 | import tempfile | |
15 |
|
15 | |||
16 |
|
16 | |||
17 | HERE = os.path.abspath(os.path.dirname(__file__)) |
|
17 | HERE = os.path.abspath(os.path.dirname(__file__)) | |
18 |
|
18 | |||
19 | SOURCES = ['zstd/%s' % p for p in ( |
|
19 | SOURCES = ['zstd/%s' % p for p in ( | |
20 | 'common/entropy_common.c', |
|
20 | 'common/entropy_common.c', | |
21 | 'common/error_private.c', |
|
21 | 'common/error_private.c', | |
22 | 'common/fse_decompress.c', |
|
22 | 'common/fse_decompress.c', | |
23 | 'common/pool.c', |
|
23 | 'common/pool.c', | |
24 | 'common/threading.c', |
|
24 | 'common/threading.c', | |
25 | 'common/xxhash.c', |
|
25 | 'common/xxhash.c', | |
26 | 'common/zstd_common.c', |
|
26 | 'common/zstd_common.c', | |
27 | 'compress/fse_compress.c', |
|
27 | 'compress/fse_compress.c', | |
28 | 'compress/huf_compress.c', |
|
28 | 'compress/huf_compress.c', | |
29 | 'compress/zstd_compress.c', |
|
29 | 'compress/zstd_compress.c', | |
|
30 | 'compress/zstdmt_compress.c', | |||
30 | 'decompress/huf_decompress.c', |
|
31 | 'decompress/huf_decompress.c', | |
31 | 'decompress/zstd_decompress.c', |
|
32 | 'decompress/zstd_decompress.c', | |
32 | 'dictBuilder/cover.c', |
|
33 | 'dictBuilder/cover.c', | |
33 | 'dictBuilder/divsufsort.c', |
|
34 | 'dictBuilder/divsufsort.c', | |
34 | 'dictBuilder/zdict.c', |
|
35 | 'dictBuilder/zdict.c', | |
35 | )] |
|
36 | )] | |
36 |
|
37 | |||
|
38 | # Headers whose preprocessed output will be fed into cdef(). | |||
37 | HEADERS = [os.path.join(HERE, 'zstd', *p) for p in ( |
|
39 | HEADERS = [os.path.join(HERE, 'zstd', *p) for p in ( | |
38 | ('zstd.h',), |
|
40 | ('zstd.h',), | |
39 |
('com |
|
41 | ('compress', 'zstdmt_compress.h'), | |
40 | ('dictBuilder', 'zdict.h'), |
|
42 | ('dictBuilder', 'zdict.h'), | |
41 | )] |
|
43 | )] | |
42 |
|
44 | |||
43 | INCLUDE_DIRS = [os.path.join(HERE, d) for d in ( |
|
45 | INCLUDE_DIRS = [os.path.join(HERE, d) for d in ( | |
44 | 'zstd', |
|
46 | 'zstd', | |
45 | 'zstd/common', |
|
47 | 'zstd/common', | |
46 | 'zstd/compress', |
|
48 | 'zstd/compress', | |
47 | 'zstd/decompress', |
|
49 | 'zstd/decompress', | |
48 | 'zstd/dictBuilder', |
|
50 | 'zstd/dictBuilder', | |
49 | )] |
|
51 | )] | |
50 |
|
52 | |||
51 | # cffi can't parse some of the primitives in zstd.h. So we invoke the |
|
53 | # cffi can't parse some of the primitives in zstd.h. So we invoke the | |
52 | # preprocessor and feed its output into cffi. |
|
54 | # preprocessor and feed its output into cffi. | |
53 | compiler = distutils.ccompiler.new_compiler() |
|
55 | compiler = distutils.ccompiler.new_compiler() | |
54 |
|
56 | |||
55 | # Needed for MSVC. |
|
57 | # Needed for MSVC. | |
56 | if hasattr(compiler, 'initialize'): |
|
58 | if hasattr(compiler, 'initialize'): | |
57 | compiler.initialize() |
|
59 | compiler.initialize() | |
58 |
|
60 | |||
59 | # Distutils doesn't set compiler.preprocessor, so invoke the preprocessor |
|
61 | # Distutils doesn't set compiler.preprocessor, so invoke the preprocessor | |
60 | # manually. |
|
62 | # manually. | |
61 | if compiler.compiler_type == 'unix': |
|
63 | if compiler.compiler_type == 'unix': | |
62 | args = list(compiler.executables['compiler']) |
|
64 | args = list(compiler.executables['compiler']) | |
63 | args.extend([ |
|
65 | args.extend([ | |
64 | '-E', |
|
66 | '-E', | |
65 | '-DZSTD_STATIC_LINKING_ONLY', |
|
67 | '-DZSTD_STATIC_LINKING_ONLY', | |
66 | '-DZDICT_STATIC_LINKING_ONLY', |
|
68 | '-DZDICT_STATIC_LINKING_ONLY', | |
67 | ]) |
|
69 | ]) | |
68 | elif compiler.compiler_type == 'msvc': |
|
70 | elif compiler.compiler_type == 'msvc': | |
69 | args = [compiler.cc] |
|
71 | args = [compiler.cc] | |
70 | args.extend([ |
|
72 | args.extend([ | |
71 | '/EP', |
|
73 | '/EP', | |
72 | '/DZSTD_STATIC_LINKING_ONLY', |
|
74 | '/DZSTD_STATIC_LINKING_ONLY', | |
73 | '/DZDICT_STATIC_LINKING_ONLY', |
|
75 | '/DZDICT_STATIC_LINKING_ONLY', | |
74 | ]) |
|
76 | ]) | |
75 | else: |
|
77 | else: | |
76 | raise Exception('unsupported compiler type: %s' % compiler.compiler_type) |
|
78 | raise Exception('unsupported compiler type: %s' % compiler.compiler_type) | |
77 |
|
79 | |||
78 | def preprocess(path): |
|
80 | def preprocess(path): | |
79 | # zstd.h includes <stddef.h>, which is also included by cffi's boilerplate. |
|
|||
80 | # This can lead to duplicate declarations. So we strip this include from the |
|
|||
81 | # preprocessor invocation. |
|
|||
82 | with open(path, 'rb') as fh: |
|
81 | with open(path, 'rb') as fh: | |
83 | lines = [l for l in fh if not l.startswith(b'#include <stddef.h>')] |
|
82 | lines = [] | |
|
83 | for l in fh: | |||
|
84 | # zstd.h includes <stddef.h>, which is also included by cffi's | |||
|
85 | # boilerplate. This can lead to duplicate declarations. So we strip | |||
|
86 | # this include from the preprocessor invocation. | |||
|
87 | # | |||
|
88 | # The same things happens for including zstd.h, so give it the same | |||
|
89 | # treatment. | |||
|
90 | # | |||
|
91 | # We define ZSTD_STATIC_LINKING_ONLY, which is redundant with the inline | |||
|
92 | # #define in zstdmt_compress.h and results in a compiler warning. So drop | |||
|
93 | # the inline #define. | |||
|
94 | if l.startswith((b'#include <stddef.h>', | |||
|
95 | b'#include "zstd.h"', | |||
|
96 | b'#define ZSTD_STATIC_LINKING_ONLY')): | |||
|
97 | continue | |||
|
98 | ||||
|
99 | # ZSTDLIB_API may not be defined if we dropped zstd.h. It isn't | |||
|
100 | # important so just filter it out. | |||
|
101 | if l.startswith(b'ZSTDLIB_API'): | |||
|
102 | l = l[len(b'ZSTDLIB_API '):] | |||
|
103 | ||||
|
104 | lines.append(l) | |||
84 |
|
105 | |||
85 | fd, input_file = tempfile.mkstemp(suffix='.h') |
|
106 | fd, input_file = tempfile.mkstemp(suffix='.h') | |
86 | os.write(fd, b''.join(lines)) |
|
107 | os.write(fd, b''.join(lines)) | |
87 | os.close(fd) |
|
108 | os.close(fd) | |
88 |
|
109 | |||
89 | try: |
|
110 | try: | |
90 | process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE) |
|
111 | process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE) | |
91 | output = process.communicate()[0] |
|
112 | output = process.communicate()[0] | |
92 | ret = process.poll() |
|
113 | ret = process.poll() | |
93 | if ret: |
|
114 | if ret: | |
94 | raise Exception('preprocessor exited with error') |
|
115 | raise Exception('preprocessor exited with error') | |
95 |
|
116 | |||
96 | return output |
|
117 | return output | |
97 | finally: |
|
118 | finally: | |
98 | os.unlink(input_file) |
|
119 | os.unlink(input_file) | |
99 |
|
120 | |||
100 |
|
121 | |||
101 | def normalize_output(output): |
|
122 | def normalize_output(output): | |
102 | lines = [] |
|
123 | lines = [] | |
103 | for line in output.splitlines(): |
|
124 | for line in output.splitlines(): | |
104 | # CFFI's parser doesn't like __attribute__ on UNIX compilers. |
|
125 | # CFFI's parser doesn't like __attribute__ on UNIX compilers. | |
105 | if line.startswith(b'__attribute__ ((visibility ("default"))) '): |
|
126 | if line.startswith(b'__attribute__ ((visibility ("default"))) '): | |
106 | line = line[len(b'__attribute__ ((visibility ("default"))) '):] |
|
127 | line = line[len(b'__attribute__ ((visibility ("default"))) '):] | |
107 |
|
128 | |||
108 | if line.startswith(b'__attribute__((deprecated('): |
|
129 | if line.startswith(b'__attribute__((deprecated('): | |
109 | continue |
|
130 | continue | |
110 | elif b'__declspec(deprecated(' in line: |
|
131 | elif b'__declspec(deprecated(' in line: | |
111 | continue |
|
132 | continue | |
112 |
|
133 | |||
113 | lines.append(line) |
|
134 | lines.append(line) | |
114 |
|
135 | |||
115 | return b'\n'.join(lines) |
|
136 | return b'\n'.join(lines) | |
116 |
|
137 | |||
117 |
|
138 | |||
118 | ffi = cffi.FFI() |
|
139 | ffi = cffi.FFI() | |
|
140 | # *_DISABLE_DEPRECATE_WARNINGS prevents the compiler from emitting a warning | |||
|
141 | # when cffi uses the function. Since we statically link against zstd, even | |||
|
142 | # if we use the deprecated functions it shouldn't be a huge problem. | |||
119 | ffi.set_source('_zstd_cffi', ''' |
|
143 | ffi.set_source('_zstd_cffi', ''' | |
120 | #include "mem.h" |
|
144 | #include "mem.h" | |
121 | #define ZSTD_STATIC_LINKING_ONLY |
|
145 | #define ZSTD_STATIC_LINKING_ONLY | |
122 | #include "zstd.h" |
|
146 | #include "zstd.h" | |
123 | #define ZDICT_STATIC_LINKING_ONLY |
|
147 | #define ZDICT_STATIC_LINKING_ONLY | |
124 | #include "pool.h" |
|
148 | #define ZDICT_DISABLE_DEPRECATE_WARNINGS | |
125 | #include "zdict.h" |
|
149 | #include "zdict.h" | |
|
150 | #include "zstdmt_compress.h" | |||
126 | ''', sources=SOURCES, include_dirs=INCLUDE_DIRS) |
|
151 | ''', sources=SOURCES, include_dirs=INCLUDE_DIRS) | |
127 |
|
152 | |||
128 | DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ') |
|
153 | DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ') | |
129 |
|
154 | |||
130 | sources = [] |
|
155 | sources = [] | |
131 |
|
156 | |||
|
157 | # Feed normalized preprocessor output for headers into the cdef parser. | |||
132 | for header in HEADERS: |
|
158 | for header in HEADERS: | |
133 | preprocessed = preprocess(header) |
|
159 | preprocessed = preprocess(header) | |
134 | sources.append(normalize_output(preprocessed)) |
|
160 | sources.append(normalize_output(preprocessed)) | |
135 |
|
161 | |||
136 | # Do another pass over source and find constants that were preprocessed |
|
162 | # #define's are effectively erased as part of going through preprocessor. | |
137 | # away. |
|
163 | # So perform a manual pass to re-add those to the cdef source. | |
138 | with open(header, 'rb') as fh: |
|
164 | with open(header, 'rb') as fh: | |
139 | for line in fh: |
|
165 | for line in fh: | |
140 | line = line.strip() |
|
166 | line = line.strip() | |
141 | m = DEFINE.match(line) |
|
167 | m = DEFINE.match(line) | |
142 | if not m: |
|
168 | if not m: | |
143 | continue |
|
169 | continue | |
144 |
|
170 | |||
|
171 | if m.group(1) == b'ZSTD_STATIC_LINKING_ONLY': | |||
|
172 | continue | |||
|
173 | ||||
145 | # The parser doesn't like some constants with complex values. |
|
174 | # The parser doesn't like some constants with complex values. | |
146 | if m.group(1) in (b'ZSTD_LIB_VERSION', b'ZSTD_VERSION_STRING'): |
|
175 | if m.group(1) in (b'ZSTD_LIB_VERSION', b'ZSTD_VERSION_STRING'): | |
147 | continue |
|
176 | continue | |
148 |
|
177 | |||
|
178 | # The ... is magic syntax by the cdef parser to resolve the | |||
|
179 | # value at compile time. | |||
149 | sources.append(m.group(0) + b' ...') |
|
180 | sources.append(m.group(0) + b' ...') | |
150 |
|
181 | |||
151 | ffi.cdef(u'\n'.join(s.decode('latin1') for s in sources)) |
|
182 | cdeflines = b'\n'.join(sources).splitlines() | |
|
183 | cdeflines = [l for l in cdeflines if l.strip()] | |||
|
184 | ffi.cdef(b'\n'.join(cdeflines).decode('latin1')) | |||
152 |
|
185 | |||
153 | if __name__ == '__main__': |
|
186 | if __name__ == '__main__': | |
154 | ffi.compile() |
|
187 | ffi.compile() |
@@ -1,70 +1,76 | |||||
1 | #!/usr/bin/env python |
|
1 | #!/usr/bin/env python | |
2 | # Copyright (c) 2016-present, Gregory Szorc |
|
2 | # Copyright (c) 2016-present, Gregory Szorc | |
3 | # All rights reserved. |
|
3 | # All rights reserved. | |
4 | # |
|
4 | # | |
5 | # This software may be modified and distributed under the terms |
|
5 | # This software may be modified and distributed under the terms | |
6 | # of the BSD license. See the LICENSE file for details. |
|
6 | # of the BSD license. See the LICENSE file for details. | |
7 |
|
7 | |||
8 | import sys |
|
8 | import sys | |
9 | from setuptools import setup |
|
9 | from setuptools import setup | |
10 |
|
10 | |||
11 | try: |
|
11 | try: | |
12 | import cffi |
|
12 | import cffi | |
13 | except ImportError: |
|
13 | except ImportError: | |
14 | cffi = None |
|
14 | cffi = None | |
15 |
|
15 | |||
16 | import setup_zstd |
|
16 | import setup_zstd | |
17 |
|
17 | |||
18 | SUPPORT_LEGACY = False |
|
18 | SUPPORT_LEGACY = False | |
19 |
|
19 | |||
20 | if "--legacy" in sys.argv: |
|
20 | if "--legacy" in sys.argv: | |
21 | SUPPORT_LEGACY = True |
|
21 | SUPPORT_LEGACY = True | |
22 | sys.argv.remove("--legacy") |
|
22 | sys.argv.remove("--legacy") | |
23 |
|
23 | |||
24 | # Code for obtaining the Extension instance is in its own module to |
|
24 | # Code for obtaining the Extension instance is in its own module to | |
25 | # facilitate reuse in other projects. |
|
25 | # facilitate reuse in other projects. | |
26 | extensions = [setup_zstd.get_c_extension(SUPPORT_LEGACY, 'zstd')] |
|
26 | extensions = [setup_zstd.get_c_extension(SUPPORT_LEGACY, 'zstd')] | |
27 |
|
27 | |||
|
28 | install_requires = [] | |||
|
29 | ||||
28 | if cffi: |
|
30 | if cffi: | |
29 | import make_cffi |
|
31 | import make_cffi | |
30 | extensions.append(make_cffi.ffi.distutils_extension()) |
|
32 | extensions.append(make_cffi.ffi.distutils_extension()) | |
31 |
|
33 | |||
|
34 | # Need change in 1.8 for ffi.from_buffer() behavior. | |||
|
35 | install_requires.append('cffi>=1.8') | |||
|
36 | ||||
32 | version = None |
|
37 | version = None | |
33 |
|
38 | |||
34 | with open('c-ext/python-zstandard.h', 'r') as fh: |
|
39 | with open('c-ext/python-zstandard.h', 'r') as fh: | |
35 | for line in fh: |
|
40 | for line in fh: | |
36 | if not line.startswith('#define PYTHON_ZSTANDARD_VERSION'): |
|
41 | if not line.startswith('#define PYTHON_ZSTANDARD_VERSION'): | |
37 | continue |
|
42 | continue | |
38 |
|
43 | |||
39 | version = line.split()[2][1:-1] |
|
44 | version = line.split()[2][1:-1] | |
40 | break |
|
45 | break | |
41 |
|
46 | |||
42 | if not version: |
|
47 | if not version: | |
43 | raise Exception('could not resolve package version; ' |
|
48 | raise Exception('could not resolve package version; ' | |
44 | 'this should never happen') |
|
49 | 'this should never happen') | |
45 |
|
50 | |||
46 | setup( |
|
51 | setup( | |
47 | name='zstandard', |
|
52 | name='zstandard', | |
48 | version=version, |
|
53 | version=version, | |
49 | description='Zstandard bindings for Python', |
|
54 | description='Zstandard bindings for Python', | |
50 | long_description=open('README.rst', 'r').read(), |
|
55 | long_description=open('README.rst', 'r').read(), | |
51 | url='https://github.com/indygreg/python-zstandard', |
|
56 | url='https://github.com/indygreg/python-zstandard', | |
52 | author='Gregory Szorc', |
|
57 | author='Gregory Szorc', | |
53 | author_email='gregory.szorc@gmail.com', |
|
58 | author_email='gregory.szorc@gmail.com', | |
54 | license='BSD', |
|
59 | license='BSD', | |
55 | classifiers=[ |
|
60 | classifiers=[ | |
56 | 'Development Status :: 4 - Beta', |
|
61 | 'Development Status :: 4 - Beta', | |
57 | 'Intended Audience :: Developers', |
|
62 | 'Intended Audience :: Developers', | |
58 | 'License :: OSI Approved :: BSD License', |
|
63 | 'License :: OSI Approved :: BSD License', | |
59 | 'Programming Language :: C', |
|
64 | 'Programming Language :: C', | |
60 | 'Programming Language :: Python :: 2.6', |
|
65 | 'Programming Language :: Python :: 2.6', | |
61 | 'Programming Language :: Python :: 2.7', |
|
66 | 'Programming Language :: Python :: 2.7', | |
62 | 'Programming Language :: Python :: 3.3', |
|
67 | 'Programming Language :: Python :: 3.3', | |
63 | 'Programming Language :: Python :: 3.4', |
|
68 | 'Programming Language :: Python :: 3.4', | |
64 | 'Programming Language :: Python :: 3.5', |
|
69 | 'Programming Language :: Python :: 3.5', | |
65 | 'Programming Language :: Python :: 3.6', |
|
70 | 'Programming Language :: Python :: 3.6', | |
66 | ], |
|
71 | ], | |
67 | keywords='zstandard zstd compression', |
|
72 | keywords='zstandard zstd compression', | |
68 | ext_modules=extensions, |
|
73 | ext_modules=extensions, | |
69 | test_suite='tests', |
|
74 | test_suite='tests', | |
|
75 | install_requires=install_requires, | |||
70 | ) |
|
76 | ) |
@@ -1,96 +1,102 | |||||
1 | # Copyright (c) 2016-present, Gregory Szorc |
|
1 | # Copyright (c) 2016-present, Gregory Szorc | |
2 | # All rights reserved. |
|
2 | # All rights reserved. | |
3 | # |
|
3 | # | |
4 | # This software may be modified and distributed under the terms |
|
4 | # This software may be modified and distributed under the terms | |
5 | # of the BSD license. See the LICENSE file for details. |
|
5 | # of the BSD license. See the LICENSE file for details. | |
6 |
|
6 | |||
7 | import os |
|
7 | import os | |
8 | from distutils.extension import Extension |
|
8 | from distutils.extension import Extension | |
9 |
|
9 | |||
10 |
|
10 | |||
11 | zstd_sources = ['zstd/%s' % p for p in ( |
|
11 | zstd_sources = ['zstd/%s' % p for p in ( | |
12 | 'common/entropy_common.c', |
|
12 | 'common/entropy_common.c', | |
13 | 'common/error_private.c', |
|
13 | 'common/error_private.c', | |
14 | 'common/fse_decompress.c', |
|
14 | 'common/fse_decompress.c', | |
15 | 'common/pool.c', |
|
15 | 'common/pool.c', | |
16 | 'common/threading.c', |
|
16 | 'common/threading.c', | |
17 | 'common/xxhash.c', |
|
17 | 'common/xxhash.c', | |
18 | 'common/zstd_common.c', |
|
18 | 'common/zstd_common.c', | |
19 | 'compress/fse_compress.c', |
|
19 | 'compress/fse_compress.c', | |
20 | 'compress/huf_compress.c', |
|
20 | 'compress/huf_compress.c', | |
21 | 'compress/zstd_compress.c', |
|
21 | 'compress/zstd_compress.c', | |
|
22 | 'compress/zstdmt_compress.c', | |||
22 | 'decompress/huf_decompress.c', |
|
23 | 'decompress/huf_decompress.c', | |
23 | 'decompress/zstd_decompress.c', |
|
24 | 'decompress/zstd_decompress.c', | |
24 | 'dictBuilder/cover.c', |
|
25 | 'dictBuilder/cover.c', | |
25 | 'dictBuilder/divsufsort.c', |
|
26 | 'dictBuilder/divsufsort.c', | |
26 | 'dictBuilder/zdict.c', |
|
27 | 'dictBuilder/zdict.c', | |
27 | )] |
|
28 | )] | |
28 |
|
29 | |||
29 | zstd_sources_legacy = ['zstd/%s' % p for p in ( |
|
30 | zstd_sources_legacy = ['zstd/%s' % p for p in ( | |
30 | 'deprecated/zbuff_common.c', |
|
31 | 'deprecated/zbuff_common.c', | |
31 | 'deprecated/zbuff_compress.c', |
|
32 | 'deprecated/zbuff_compress.c', | |
32 | 'deprecated/zbuff_decompress.c', |
|
33 | 'deprecated/zbuff_decompress.c', | |
33 | 'legacy/zstd_v01.c', |
|
34 | 'legacy/zstd_v01.c', | |
34 | 'legacy/zstd_v02.c', |
|
35 | 'legacy/zstd_v02.c', | |
35 | 'legacy/zstd_v03.c', |
|
36 | 'legacy/zstd_v03.c', | |
36 | 'legacy/zstd_v04.c', |
|
37 | 'legacy/zstd_v04.c', | |
37 | 'legacy/zstd_v05.c', |
|
38 | 'legacy/zstd_v05.c', | |
38 | 'legacy/zstd_v06.c', |
|
39 | 'legacy/zstd_v06.c', | |
39 | 'legacy/zstd_v07.c' |
|
40 | 'legacy/zstd_v07.c' | |
40 | )] |
|
41 | )] | |
41 |
|
42 | |||
42 | zstd_includes = [ |
|
43 | zstd_includes = [ | |
43 | 'c-ext', |
|
44 | 'c-ext', | |
44 | 'zstd', |
|
45 | 'zstd', | |
45 | 'zstd/common', |
|
46 | 'zstd/common', | |
46 | 'zstd/compress', |
|
47 | 'zstd/compress', | |
47 | 'zstd/decompress', |
|
48 | 'zstd/decompress', | |
48 | 'zstd/dictBuilder', |
|
49 | 'zstd/dictBuilder', | |
49 | ] |
|
50 | ] | |
50 |
|
51 | |||
51 | zstd_includes_legacy = [ |
|
52 | zstd_includes_legacy = [ | |
52 | 'zstd/deprecated', |
|
53 | 'zstd/deprecated', | |
53 | 'zstd/legacy', |
|
54 | 'zstd/legacy', | |
54 | ] |
|
55 | ] | |
55 |
|
56 | |||
56 | ext_sources = [ |
|
57 | ext_sources = [ | |
57 | 'zstd.c', |
|
58 | 'zstd.c', | |
|
59 | 'c-ext/bufferutil.c', | |||
58 | 'c-ext/compressiondict.c', |
|
60 | 'c-ext/compressiondict.c', | |
59 | 'c-ext/compressobj.c', |
|
61 | 'c-ext/compressobj.c', | |
60 | 'c-ext/compressor.c', |
|
62 | 'c-ext/compressor.c', | |
61 | 'c-ext/compressoriterator.c', |
|
63 | 'c-ext/compressoriterator.c', | |
62 | 'c-ext/compressionparams.c', |
|
64 | 'c-ext/compressionparams.c', | |
63 | 'c-ext/compressionwriter.c', |
|
65 | 'c-ext/compressionwriter.c', | |
64 | 'c-ext/constants.c', |
|
66 | 'c-ext/constants.c', | |
65 | 'c-ext/decompressobj.c', |
|
67 | 'c-ext/decompressobj.c', | |
66 | 'c-ext/decompressor.c', |
|
68 | 'c-ext/decompressor.c', | |
67 | 'c-ext/decompressoriterator.c', |
|
69 | 'c-ext/decompressoriterator.c', | |
68 | 'c-ext/decompressionwriter.c', |
|
70 | 'c-ext/decompressionwriter.c', | |
69 | 'c-ext/dictparams.c', |
|
|||
70 | 'c-ext/frameparams.c', |
|
71 | 'c-ext/frameparams.c', | |
71 | ] |
|
72 | ] | |
72 |
|
73 | |||
73 | zstd_depends = [ |
|
74 | zstd_depends = [ | |
74 | 'c-ext/python-zstandard.h', |
|
75 | 'c-ext/python-zstandard.h', | |
75 | ] |
|
76 | ] | |
76 |
|
77 | |||
77 |
|
78 | |||
78 | def get_c_extension(support_legacy=False, name='zstd'): |
|
79 | def get_c_extension(support_legacy=False, name='zstd'): | |
79 | """Obtain a distutils.extension.Extension for the C extension.""" |
|
80 | """Obtain a distutils.extension.Extension for the C extension.""" | |
80 | root = os.path.abspath(os.path.dirname(__file__)) |
|
81 | root = os.path.abspath(os.path.dirname(__file__)) | |
81 |
|
82 | |||
82 | sources = [os.path.join(root, p) for p in zstd_sources + ext_sources] |
|
83 | sources = [os.path.join(root, p) for p in zstd_sources + ext_sources] | |
83 | if support_legacy: |
|
84 | if support_legacy: | |
84 | sources.extend([os.path.join(root, p) for p in zstd_sources_legacy]) |
|
85 | sources.extend([os.path.join(root, p) for p in zstd_sources_legacy]) | |
85 |
|
86 | |||
86 | include_dirs = [os.path.join(root, d) for d in zstd_includes] |
|
87 | include_dirs = [os.path.join(root, d) for d in zstd_includes] | |
87 | if support_legacy: |
|
88 | if support_legacy: | |
88 | include_dirs.extend([os.path.join(root, d) for d in zstd_includes_legacy]) |
|
89 | include_dirs.extend([os.path.join(root, d) for d in zstd_includes_legacy]) | |
89 |
|
90 | |||
90 | depends = [os.path.join(root, p) for p in zstd_depends] |
|
91 | depends = [os.path.join(root, p) for p in zstd_depends] | |
91 |
|
92 | |||
|
93 | extra_args = ['-DZSTD_MULTITHREAD'] | |||
|
94 | ||||
|
95 | if support_legacy: | |||
|
96 | extra_args.append('-DZSTD_LEGACY_SUPPORT=1') | |||
|
97 | ||||
92 | # TODO compile with optimizations. |
|
98 | # TODO compile with optimizations. | |
93 | return Extension(name, sources, |
|
99 | return Extension(name, sources, | |
94 | include_dirs=include_dirs, |
|
100 | include_dirs=include_dirs, | |
95 | depends=depends, |
|
101 | depends=depends, | |
96 | extra_compile_args=["-DZSTD_LEGACY_SUPPORT=1"] if support_legacy else []) |
|
102 | extra_compile_args=extra_args) |
@@ -1,61 +1,88 | |||||
1 | import inspect |
|
1 | import inspect | |
2 | import io |
|
2 | import io | |
|
3 | import os | |||
3 | import types |
|
4 | import types | |
4 |
|
5 | |||
5 |
|
6 | |||
6 | def make_cffi(cls): |
|
7 | def make_cffi(cls): | |
7 | """Decorator to add CFFI versions of each test method.""" |
|
8 | """Decorator to add CFFI versions of each test method.""" | |
8 |
|
9 | |||
9 | try: |
|
10 | try: | |
10 | import zstd_cffi |
|
11 | import zstd_cffi | |
11 | except ImportError: |
|
12 | except ImportError: | |
12 | return cls |
|
13 | return cls | |
13 |
|
14 | |||
14 | # If CFFI version is available, dynamically construct test methods |
|
15 | # If CFFI version is available, dynamically construct test methods | |
15 | # that use it. |
|
16 | # that use it. | |
16 |
|
17 | |||
17 | for attr in dir(cls): |
|
18 | for attr in dir(cls): | |
18 | fn = getattr(cls, attr) |
|
19 | fn = getattr(cls, attr) | |
19 | if not inspect.ismethod(fn) and not inspect.isfunction(fn): |
|
20 | if not inspect.ismethod(fn) and not inspect.isfunction(fn): | |
20 | continue |
|
21 | continue | |
21 |
|
22 | |||
22 | if not fn.__name__.startswith('test_'): |
|
23 | if not fn.__name__.startswith('test_'): | |
23 | continue |
|
24 | continue | |
24 |
|
25 | |||
25 | name = '%s_cffi' % fn.__name__ |
|
26 | name = '%s_cffi' % fn.__name__ | |
26 |
|
27 | |||
27 | # Replace the "zstd" symbol with the CFFI module instance. Then copy |
|
28 | # Replace the "zstd" symbol with the CFFI module instance. Then copy | |
28 | # the function object and install it in a new attribute. |
|
29 | # the function object and install it in a new attribute. | |
29 | if isinstance(fn, types.FunctionType): |
|
30 | if isinstance(fn, types.FunctionType): | |
30 | globs = dict(fn.__globals__) |
|
31 | globs = dict(fn.__globals__) | |
31 | globs['zstd'] = zstd_cffi |
|
32 | globs['zstd'] = zstd_cffi | |
32 | new_fn = types.FunctionType(fn.__code__, globs, name, |
|
33 | new_fn = types.FunctionType(fn.__code__, globs, name, | |
33 | fn.__defaults__, fn.__closure__) |
|
34 | fn.__defaults__, fn.__closure__) | |
34 | new_method = new_fn |
|
35 | new_method = new_fn | |
35 | else: |
|
36 | else: | |
36 | globs = dict(fn.__func__.func_globals) |
|
37 | globs = dict(fn.__func__.func_globals) | |
37 | globs['zstd'] = zstd_cffi |
|
38 | globs['zstd'] = zstd_cffi | |
38 | new_fn = types.FunctionType(fn.__func__.func_code, globs, name, |
|
39 | new_fn = types.FunctionType(fn.__func__.func_code, globs, name, | |
39 | fn.__func__.func_defaults, |
|
40 | fn.__func__.func_defaults, | |
40 | fn.__func__.func_closure) |
|
41 | fn.__func__.func_closure) | |
41 | new_method = types.UnboundMethodType(new_fn, fn.im_self, |
|
42 | new_method = types.UnboundMethodType(new_fn, fn.im_self, | |
42 | fn.im_class) |
|
43 | fn.im_class) | |
43 |
|
44 | |||
44 | setattr(cls, name, new_method) |
|
45 | setattr(cls, name, new_method) | |
45 |
|
46 | |||
46 | return cls |
|
47 | return cls | |
47 |
|
48 | |||
48 |
|
49 | |||
49 | class OpCountingBytesIO(io.BytesIO): |
|
50 | class OpCountingBytesIO(io.BytesIO): | |
50 | def __init__(self, *args, **kwargs): |
|
51 | def __init__(self, *args, **kwargs): | |
51 | self._read_count = 0 |
|
52 | self._read_count = 0 | |
52 | self._write_count = 0 |
|
53 | self._write_count = 0 | |
53 | return super(OpCountingBytesIO, self).__init__(*args, **kwargs) |
|
54 | return super(OpCountingBytesIO, self).__init__(*args, **kwargs) | |
54 |
|
55 | |||
55 | def read(self, *args): |
|
56 | def read(self, *args): | |
56 | self._read_count += 1 |
|
57 | self._read_count += 1 | |
57 | return super(OpCountingBytesIO, self).read(*args) |
|
58 | return super(OpCountingBytesIO, self).read(*args) | |
58 |
|
59 | |||
59 | def write(self, data): |
|
60 | def write(self, data): | |
60 | self._write_count += 1 |
|
61 | self._write_count += 1 | |
61 | return super(OpCountingBytesIO, self).write(data) |
|
62 | return super(OpCountingBytesIO, self).write(data) | |
|
63 | ||||
|
64 | ||||
|
65 | _source_files = [] | |||
|
66 | ||||
|
67 | ||||
|
68 | def random_input_data(): | |||
|
69 | """Obtain the raw content of source files. | |||
|
70 | ||||
|
71 | This is used for generating "random" data to feed into fuzzing, since it is | |||
|
72 | faster than random content generation. | |||
|
73 | """ | |||
|
74 | if _source_files: | |||
|
75 | return _source_files | |||
|
76 | ||||
|
77 | for root, dirs, files in os.walk(os.path.dirname(__file__)): | |||
|
78 | dirs[:] = list(sorted(dirs)) | |||
|
79 | for f in sorted(files): | |||
|
80 | try: | |||
|
81 | with open(os.path.join(root, f), 'rb') as fh: | |||
|
82 | data = fh.read() | |||
|
83 | if data: | |||
|
84 | _source_files.append(data) | |||
|
85 | except OSError: | |||
|
86 | pass | |||
|
87 | ||||
|
88 | return _source_files |
@@ -1,675 +1,905 | |||||
1 | import hashlib |
|
1 | import hashlib | |
2 | import io |
|
2 | import io | |
3 | import struct |
|
3 | import struct | |
4 | import sys |
|
4 | import sys | |
5 |
|
5 | |||
6 | try: |
|
6 | try: | |
7 | import unittest2 as unittest |
|
7 | import unittest2 as unittest | |
8 | except ImportError: |
|
8 | except ImportError: | |
9 | import unittest |
|
9 | import unittest | |
10 |
|
10 | |||
11 | import zstd |
|
11 | import zstd | |
12 |
|
12 | |||
13 | from .common import ( |
|
13 | from .common import ( | |
14 | make_cffi, |
|
14 | make_cffi, | |
15 | OpCountingBytesIO, |
|
15 | OpCountingBytesIO, | |
16 | ) |
|
16 | ) | |
17 |
|
17 | |||
18 |
|
18 | |||
19 | if sys.version_info[0] >= 3: |
|
19 | if sys.version_info[0] >= 3: | |
20 | next = lambda it: it.__next__() |
|
20 | next = lambda it: it.__next__() | |
21 | else: |
|
21 | else: | |
22 | next = lambda it: it.next() |
|
22 | next = lambda it: it.next() | |
23 |
|
23 | |||
24 |
|
24 | |||
|
25 | def multithreaded_chunk_size(level, source_size=0): | |||
|
26 | params = zstd.get_compression_parameters(level, source_size) | |||
|
27 | ||||
|
28 | return 1 << (params.window_log + 2) | |||
|
29 | ||||
|
30 | ||||
25 | @make_cffi |
|
31 | @make_cffi | |
26 | class TestCompressor(unittest.TestCase): |
|
32 | class TestCompressor(unittest.TestCase): | |
27 | def test_level_bounds(self): |
|
33 | def test_level_bounds(self): | |
28 | with self.assertRaises(ValueError): |
|
34 | with self.assertRaises(ValueError): | |
29 | zstd.ZstdCompressor(level=0) |
|
35 | zstd.ZstdCompressor(level=0) | |
30 |
|
36 | |||
31 | with self.assertRaises(ValueError): |
|
37 | with self.assertRaises(ValueError): | |
32 | zstd.ZstdCompressor(level=23) |
|
38 | zstd.ZstdCompressor(level=23) | |
33 |
|
39 | |||
34 |
|
40 | |||
35 | @make_cffi |
|
41 | @make_cffi | |
36 | class TestCompressor_compress(unittest.TestCase): |
|
42 | class TestCompressor_compress(unittest.TestCase): | |
|
43 | def test_multithreaded_unsupported(self): | |||
|
44 | samples = [] | |||
|
45 | for i in range(128): | |||
|
46 | samples.append(b'foo' * 64) | |||
|
47 | samples.append(b'bar' * 64) | |||
|
48 | ||||
|
49 | d = zstd.train_dictionary(8192, samples) | |||
|
50 | ||||
|
51 | cctx = zstd.ZstdCompressor(dict_data=d, threads=2) | |||
|
52 | ||||
|
53 | with self.assertRaisesRegexp(zstd.ZstdError, 'compress\(\) cannot be used with both dictionaries and multi-threaded compression'): | |||
|
54 | cctx.compress(b'foo') | |||
|
55 | ||||
|
56 | params = zstd.get_compression_parameters(3) | |||
|
57 | cctx = zstd.ZstdCompressor(compression_params=params, threads=2) | |||
|
58 | with self.assertRaisesRegexp(zstd.ZstdError, 'compress\(\) cannot be used with both compression parameters and multi-threaded compression'): | |||
|
59 | cctx.compress(b'foo') | |||
|
60 | ||||
37 | def test_compress_empty(self): |
|
61 | def test_compress_empty(self): | |
38 | cctx = zstd.ZstdCompressor(level=1) |
|
62 | cctx = zstd.ZstdCompressor(level=1) | |
39 | result = cctx.compress(b'') |
|
63 | result = cctx.compress(b'') | |
40 | self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') |
|
64 | self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') | |
41 | params = zstd.get_frame_parameters(result) |
|
65 | params = zstd.get_frame_parameters(result) | |
42 | self.assertEqual(params.content_size, 0) |
|
66 | self.assertEqual(params.content_size, 0) | |
43 | self.assertEqual(params.window_size, 524288) |
|
67 | self.assertEqual(params.window_size, 524288) | |
44 | self.assertEqual(params.dict_id, 0) |
|
68 | self.assertEqual(params.dict_id, 0) | |
45 | self.assertFalse(params.has_checksum, 0) |
|
69 | self.assertFalse(params.has_checksum, 0) | |
46 |
|
70 | |||
47 | # TODO should be temporary until https://github.com/facebook/zstd/issues/506 |
|
71 | # TODO should be temporary until https://github.com/facebook/zstd/issues/506 | |
48 | # is fixed. |
|
72 | # is fixed. | |
49 | cctx = zstd.ZstdCompressor(write_content_size=True) |
|
73 | cctx = zstd.ZstdCompressor(write_content_size=True) | |
50 | with self.assertRaises(ValueError): |
|
74 | with self.assertRaises(ValueError): | |
51 | cctx.compress(b'') |
|
75 | cctx.compress(b'') | |
52 |
|
76 | |||
53 | cctx.compress(b'', allow_empty=True) |
|
77 | cctx.compress(b'', allow_empty=True) | |
54 |
|
78 | |||
55 | def test_compress_large(self): |
|
79 | def test_compress_large(self): | |
56 | chunks = [] |
|
80 | chunks = [] | |
57 | for i in range(255): |
|
81 | for i in range(255): | |
58 | chunks.append(struct.Struct('>B').pack(i) * 16384) |
|
82 | chunks.append(struct.Struct('>B').pack(i) * 16384) | |
59 |
|
83 | |||
60 | cctx = zstd.ZstdCompressor(level=3) |
|
84 | cctx = zstd.ZstdCompressor(level=3) | |
61 | result = cctx.compress(b''.join(chunks)) |
|
85 | result = cctx.compress(b''.join(chunks)) | |
62 | self.assertEqual(len(result), 999) |
|
86 | self.assertEqual(len(result), 999) | |
63 | self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd') |
|
87 | self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd') | |
64 |
|
88 | |||
65 | # This matches the test for read_from() below. |
|
89 | # This matches the test for read_from() below. | |
66 | cctx = zstd.ZstdCompressor(level=1) |
|
90 | cctx = zstd.ZstdCompressor(level=1) | |
67 | result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o') |
|
91 | result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o') | |
68 | self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00' |
|
92 | self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00' | |
69 | b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0' |
|
93 | b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0' | |
70 | b'\x02\x09\x00\x00\x6f') |
|
94 | b'\x02\x09\x00\x00\x6f') | |
71 |
|
95 | |||
72 | def test_write_checksum(self): |
|
96 | def test_write_checksum(self): | |
73 | cctx = zstd.ZstdCompressor(level=1) |
|
97 | cctx = zstd.ZstdCompressor(level=1) | |
74 | no_checksum = cctx.compress(b'foobar') |
|
98 | no_checksum = cctx.compress(b'foobar') | |
75 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) |
|
99 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) | |
76 | with_checksum = cctx.compress(b'foobar') |
|
100 | with_checksum = cctx.compress(b'foobar') | |
77 |
|
101 | |||
78 | self.assertEqual(len(with_checksum), len(no_checksum) + 4) |
|
102 | self.assertEqual(len(with_checksum), len(no_checksum) + 4) | |
79 |
|
103 | |||
80 | no_params = zstd.get_frame_parameters(no_checksum) |
|
104 | no_params = zstd.get_frame_parameters(no_checksum) | |
81 | with_params = zstd.get_frame_parameters(with_checksum) |
|
105 | with_params = zstd.get_frame_parameters(with_checksum) | |
82 |
|
106 | |||
83 | self.assertFalse(no_params.has_checksum) |
|
107 | self.assertFalse(no_params.has_checksum) | |
84 | self.assertTrue(with_params.has_checksum) |
|
108 | self.assertTrue(with_params.has_checksum) | |
85 |
|
109 | |||
86 | def test_write_content_size(self): |
|
110 | def test_write_content_size(self): | |
87 | cctx = zstd.ZstdCompressor(level=1) |
|
111 | cctx = zstd.ZstdCompressor(level=1) | |
88 | no_size = cctx.compress(b'foobar' * 256) |
|
112 | no_size = cctx.compress(b'foobar' * 256) | |
89 | cctx = zstd.ZstdCompressor(level=1, write_content_size=True) |
|
113 | cctx = zstd.ZstdCompressor(level=1, write_content_size=True) | |
90 | with_size = cctx.compress(b'foobar' * 256) |
|
114 | with_size = cctx.compress(b'foobar' * 256) | |
91 |
|
115 | |||
92 | self.assertEqual(len(with_size), len(no_size) + 1) |
|
116 | self.assertEqual(len(with_size), len(no_size) + 1) | |
93 |
|
117 | |||
94 | no_params = zstd.get_frame_parameters(no_size) |
|
118 | no_params = zstd.get_frame_parameters(no_size) | |
95 | with_params = zstd.get_frame_parameters(with_size) |
|
119 | with_params = zstd.get_frame_parameters(with_size) | |
96 | self.assertEqual(no_params.content_size, 0) |
|
120 | self.assertEqual(no_params.content_size, 0) | |
97 | self.assertEqual(with_params.content_size, 1536) |
|
121 | self.assertEqual(with_params.content_size, 1536) | |
98 |
|
122 | |||
99 | def test_no_dict_id(self): |
|
123 | def test_no_dict_id(self): | |
100 | samples = [] |
|
124 | samples = [] | |
101 | for i in range(128): |
|
125 | for i in range(128): | |
102 | samples.append(b'foo' * 64) |
|
126 | samples.append(b'foo' * 64) | |
103 | samples.append(b'bar' * 64) |
|
127 | samples.append(b'bar' * 64) | |
104 | samples.append(b'foobar' * 64) |
|
128 | samples.append(b'foobar' * 64) | |
105 |
|
129 | |||
106 | d = zstd.train_dictionary(1024, samples) |
|
130 | d = zstd.train_dictionary(1024, samples) | |
107 |
|
131 | |||
108 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
132 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) | |
109 | with_dict_id = cctx.compress(b'foobarfoobar') |
|
133 | with_dict_id = cctx.compress(b'foobarfoobar') | |
110 |
|
134 | |||
111 | cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False) |
|
135 | cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False) | |
112 | no_dict_id = cctx.compress(b'foobarfoobar') |
|
136 | no_dict_id = cctx.compress(b'foobarfoobar') | |
113 |
|
137 | |||
114 | self.assertEqual(len(with_dict_id), len(no_dict_id) + 4) |
|
138 | self.assertEqual(len(with_dict_id), len(no_dict_id) + 4) | |
115 |
|
139 | |||
116 | no_params = zstd.get_frame_parameters(no_dict_id) |
|
140 | no_params = zstd.get_frame_parameters(no_dict_id) | |
117 | with_params = zstd.get_frame_parameters(with_dict_id) |
|
141 | with_params = zstd.get_frame_parameters(with_dict_id) | |
118 | self.assertEqual(no_params.dict_id, 0) |
|
142 | self.assertEqual(no_params.dict_id, 0) | |
119 | self.assertEqual(with_params.dict_id, 1584102229) |
|
143 | self.assertEqual(with_params.dict_id, 1584102229) | |
120 |
|
144 | |||
121 | def test_compress_dict_multiple(self): |
|
145 | def test_compress_dict_multiple(self): | |
122 | samples = [] |
|
146 | samples = [] | |
123 | for i in range(128): |
|
147 | for i in range(128): | |
124 | samples.append(b'foo' * 64) |
|
148 | samples.append(b'foo' * 64) | |
125 | samples.append(b'bar' * 64) |
|
149 | samples.append(b'bar' * 64) | |
126 | samples.append(b'foobar' * 64) |
|
150 | samples.append(b'foobar' * 64) | |
127 |
|
151 | |||
128 | d = zstd.train_dictionary(8192, samples) |
|
152 | d = zstd.train_dictionary(8192, samples) | |
129 |
|
153 | |||
130 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
154 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) | |
131 |
|
155 | |||
132 | for i in range(32): |
|
156 | for i in range(32): | |
133 | cctx.compress(b'foo bar foobar foo bar foobar') |
|
157 | cctx.compress(b'foo bar foobar foo bar foobar') | |
134 |
|
158 | |||
|
159 | def test_multithreaded(self): | |||
|
160 | chunk_size = multithreaded_chunk_size(1) | |||
|
161 | source = b''.join([b'x' * chunk_size, b'y' * chunk_size]) | |||
|
162 | ||||
|
163 | cctx = zstd.ZstdCompressor(level=1, threads=2) | |||
|
164 | compressed = cctx.compress(source) | |||
|
165 | ||||
|
166 | params = zstd.get_frame_parameters(compressed) | |||
|
167 | self.assertEqual(params.content_size, chunk_size * 2) | |||
|
168 | self.assertEqual(params.dict_id, 0) | |||
|
169 | self.assertFalse(params.has_checksum) | |||
|
170 | ||||
|
171 | dctx = zstd.ZstdDecompressor() | |||
|
172 | self.assertEqual(dctx.decompress(compressed), source) | |||
|
173 | ||||
135 |
|
174 | |||
136 | @make_cffi |
|
175 | @make_cffi | |
137 | class TestCompressor_compressobj(unittest.TestCase): |
|
176 | class TestCompressor_compressobj(unittest.TestCase): | |
138 | def test_compressobj_empty(self): |
|
177 | def test_compressobj_empty(self): | |
139 | cctx = zstd.ZstdCompressor(level=1) |
|
178 | cctx = zstd.ZstdCompressor(level=1) | |
140 | cobj = cctx.compressobj() |
|
179 | cobj = cctx.compressobj() | |
141 | self.assertEqual(cobj.compress(b''), b'') |
|
180 | self.assertEqual(cobj.compress(b''), b'') | |
142 | self.assertEqual(cobj.flush(), |
|
181 | self.assertEqual(cobj.flush(), | |
143 | b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') |
|
182 | b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') | |
144 |
|
183 | |||
145 | def test_compressobj_large(self): |
|
184 | def test_compressobj_large(self): | |
146 | chunks = [] |
|
185 | chunks = [] | |
147 | for i in range(255): |
|
186 | for i in range(255): | |
148 | chunks.append(struct.Struct('>B').pack(i) * 16384) |
|
187 | chunks.append(struct.Struct('>B').pack(i) * 16384) | |
149 |
|
188 | |||
150 | cctx = zstd.ZstdCompressor(level=3) |
|
189 | cctx = zstd.ZstdCompressor(level=3) | |
151 | cobj = cctx.compressobj() |
|
190 | cobj = cctx.compressobj() | |
152 |
|
191 | |||
153 | result = cobj.compress(b''.join(chunks)) + cobj.flush() |
|
192 | result = cobj.compress(b''.join(chunks)) + cobj.flush() | |
154 | self.assertEqual(len(result), 999) |
|
193 | self.assertEqual(len(result), 999) | |
155 | self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd') |
|
194 | self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd') | |
156 |
|
195 | |||
157 | params = zstd.get_frame_parameters(result) |
|
196 | params = zstd.get_frame_parameters(result) | |
158 | self.assertEqual(params.content_size, 0) |
|
197 | self.assertEqual(params.content_size, 0) | |
159 | self.assertEqual(params.window_size, 1048576) |
|
198 | self.assertEqual(params.window_size, 1048576) | |
160 | self.assertEqual(params.dict_id, 0) |
|
199 | self.assertEqual(params.dict_id, 0) | |
161 | self.assertFalse(params.has_checksum) |
|
200 | self.assertFalse(params.has_checksum) | |
162 |
|
201 | |||
163 | def test_write_checksum(self): |
|
202 | def test_write_checksum(self): | |
164 | cctx = zstd.ZstdCompressor(level=1) |
|
203 | cctx = zstd.ZstdCompressor(level=1) | |
165 | cobj = cctx.compressobj() |
|
204 | cobj = cctx.compressobj() | |
166 | no_checksum = cobj.compress(b'foobar') + cobj.flush() |
|
205 | no_checksum = cobj.compress(b'foobar') + cobj.flush() | |
167 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) |
|
206 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) | |
168 | cobj = cctx.compressobj() |
|
207 | cobj = cctx.compressobj() | |
169 | with_checksum = cobj.compress(b'foobar') + cobj.flush() |
|
208 | with_checksum = cobj.compress(b'foobar') + cobj.flush() | |
170 |
|
209 | |||
171 | no_params = zstd.get_frame_parameters(no_checksum) |
|
210 | no_params = zstd.get_frame_parameters(no_checksum) | |
172 | with_params = zstd.get_frame_parameters(with_checksum) |
|
211 | with_params = zstd.get_frame_parameters(with_checksum) | |
173 | self.assertEqual(no_params.content_size, 0) |
|
212 | self.assertEqual(no_params.content_size, 0) | |
174 | self.assertEqual(with_params.content_size, 0) |
|
213 | self.assertEqual(with_params.content_size, 0) | |
175 | self.assertEqual(no_params.dict_id, 0) |
|
214 | self.assertEqual(no_params.dict_id, 0) | |
176 | self.assertEqual(with_params.dict_id, 0) |
|
215 | self.assertEqual(with_params.dict_id, 0) | |
177 | self.assertFalse(no_params.has_checksum) |
|
216 | self.assertFalse(no_params.has_checksum) | |
178 | self.assertTrue(with_params.has_checksum) |
|
217 | self.assertTrue(with_params.has_checksum) | |
179 |
|
218 | |||
180 | self.assertEqual(len(with_checksum), len(no_checksum) + 4) |
|
219 | self.assertEqual(len(with_checksum), len(no_checksum) + 4) | |
181 |
|
220 | |||
182 | def test_write_content_size(self): |
|
221 | def test_write_content_size(self): | |
183 | cctx = zstd.ZstdCompressor(level=1) |
|
222 | cctx = zstd.ZstdCompressor(level=1) | |
184 | cobj = cctx.compressobj(size=len(b'foobar' * 256)) |
|
223 | cobj = cctx.compressobj(size=len(b'foobar' * 256)) | |
185 | no_size = cobj.compress(b'foobar' * 256) + cobj.flush() |
|
224 | no_size = cobj.compress(b'foobar' * 256) + cobj.flush() | |
186 | cctx = zstd.ZstdCompressor(level=1, write_content_size=True) |
|
225 | cctx = zstd.ZstdCompressor(level=1, write_content_size=True) | |
187 | cobj = cctx.compressobj(size=len(b'foobar' * 256)) |
|
226 | cobj = cctx.compressobj(size=len(b'foobar' * 256)) | |
188 | with_size = cobj.compress(b'foobar' * 256) + cobj.flush() |
|
227 | with_size = cobj.compress(b'foobar' * 256) + cobj.flush() | |
189 |
|
228 | |||
190 | no_params = zstd.get_frame_parameters(no_size) |
|
229 | no_params = zstd.get_frame_parameters(no_size) | |
191 | with_params = zstd.get_frame_parameters(with_size) |
|
230 | with_params = zstd.get_frame_parameters(with_size) | |
192 | self.assertEqual(no_params.content_size, 0) |
|
231 | self.assertEqual(no_params.content_size, 0) | |
193 | self.assertEqual(with_params.content_size, 1536) |
|
232 | self.assertEqual(with_params.content_size, 1536) | |
194 | self.assertEqual(no_params.dict_id, 0) |
|
233 | self.assertEqual(no_params.dict_id, 0) | |
195 | self.assertEqual(with_params.dict_id, 0) |
|
234 | self.assertEqual(with_params.dict_id, 0) | |
196 | self.assertFalse(no_params.has_checksum) |
|
235 | self.assertFalse(no_params.has_checksum) | |
197 | self.assertFalse(with_params.has_checksum) |
|
236 | self.assertFalse(with_params.has_checksum) | |
198 |
|
237 | |||
199 | self.assertEqual(len(with_size), len(no_size) + 1) |
|
238 | self.assertEqual(len(with_size), len(no_size) + 1) | |
200 |
|
239 | |||
201 | def test_compress_after_finished(self): |
|
240 | def test_compress_after_finished(self): | |
202 | cctx = zstd.ZstdCompressor() |
|
241 | cctx = zstd.ZstdCompressor() | |
203 | cobj = cctx.compressobj() |
|
242 | cobj = cctx.compressobj() | |
204 |
|
243 | |||
205 | cobj.compress(b'foo') |
|
244 | cobj.compress(b'foo') | |
206 | cobj.flush() |
|
245 | cobj.flush() | |
207 |
|
246 | |||
208 | with self.assertRaisesRegexp(zstd.ZstdError, 'cannot call compress\(\) after compressor'): |
|
247 | with self.assertRaisesRegexp(zstd.ZstdError, 'cannot call compress\(\) after compressor'): | |
209 | cobj.compress(b'foo') |
|
248 | cobj.compress(b'foo') | |
210 |
|
249 | |||
211 | with self.assertRaisesRegexp(zstd.ZstdError, 'compressor object already finished'): |
|
250 | with self.assertRaisesRegexp(zstd.ZstdError, 'compressor object already finished'): | |
212 | cobj.flush() |
|
251 | cobj.flush() | |
213 |
|
252 | |||
214 | def test_flush_block_repeated(self): |
|
253 | def test_flush_block_repeated(self): | |
215 | cctx = zstd.ZstdCompressor(level=1) |
|
254 | cctx = zstd.ZstdCompressor(level=1) | |
216 | cobj = cctx.compressobj() |
|
255 | cobj = cctx.compressobj() | |
217 |
|
256 | |||
218 | self.assertEqual(cobj.compress(b'foo'), b'') |
|
257 | self.assertEqual(cobj.compress(b'foo'), b'') | |
219 | self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), |
|
258 | self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), | |
220 | b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo') |
|
259 | b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo') | |
221 | self.assertEqual(cobj.compress(b'bar'), b'') |
|
260 | self.assertEqual(cobj.compress(b'bar'), b'') | |
222 | # 3 byte header plus content. |
|
261 | # 3 byte header plus content. | |
223 | self.assertEqual(cobj.flush(), b'\x19\x00\x00bar') |
|
262 | self.assertEqual(cobj.flush(), b'\x19\x00\x00bar') | |
224 |
|
263 | |||
225 | def test_flush_empty_block(self): |
|
264 | def test_flush_empty_block(self): | |
226 | cctx = zstd.ZstdCompressor(write_checksum=True) |
|
265 | cctx = zstd.ZstdCompressor(write_checksum=True) | |
227 | cobj = cctx.compressobj() |
|
266 | cobj = cctx.compressobj() | |
228 |
|
267 | |||
229 | cobj.compress(b'foobar') |
|
268 | cobj.compress(b'foobar') | |
230 | cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK) |
|
269 | cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK) | |
231 | # No-op if no block is active (this is internal to zstd). |
|
270 | # No-op if no block is active (this is internal to zstd). | |
232 | self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b'') |
|
271 | self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b'') | |
233 |
|
272 | |||
234 | trailing = cobj.flush() |
|
273 | trailing = cobj.flush() | |
235 | # 3 bytes block header + 4 bytes frame checksum |
|
274 | # 3 bytes block header + 4 bytes frame checksum | |
236 | self.assertEqual(len(trailing), 7) |
|
275 | self.assertEqual(len(trailing), 7) | |
237 | header = trailing[0:3] |
|
276 | header = trailing[0:3] | |
238 | self.assertEqual(header, b'\x01\x00\x00') |
|
277 | self.assertEqual(header, b'\x01\x00\x00') | |
239 |
|
278 | |||
|
279 | def test_multithreaded(self): | |||
|
280 | source = io.BytesIO() | |||
|
281 | source.write(b'a' * 1048576) | |||
|
282 | source.write(b'b' * 1048576) | |||
|
283 | source.write(b'c' * 1048576) | |||
|
284 | source.seek(0) | |||
|
285 | ||||
|
286 | cctx = zstd.ZstdCompressor(level=1, threads=2) | |||
|
287 | cobj = cctx.compressobj() | |||
|
288 | ||||
|
289 | chunks = [] | |||
|
290 | while True: | |||
|
291 | d = source.read(8192) | |||
|
292 | if not d: | |||
|
293 | break | |||
|
294 | ||||
|
295 | chunks.append(cobj.compress(d)) | |||
|
296 | ||||
|
297 | chunks.append(cobj.flush()) | |||
|
298 | ||||
|
299 | compressed = b''.join(chunks) | |||
|
300 | ||||
|
301 | self.assertEqual(len(compressed), 295) | |||
|
302 | ||||
240 |
|
303 | |||
241 | @make_cffi |
|
304 | @make_cffi | |
242 | class TestCompressor_copy_stream(unittest.TestCase): |
|
305 | class TestCompressor_copy_stream(unittest.TestCase): | |
243 | def test_no_read(self): |
|
306 | def test_no_read(self): | |
244 | source = object() |
|
307 | source = object() | |
245 | dest = io.BytesIO() |
|
308 | dest = io.BytesIO() | |
246 |
|
309 | |||
247 | cctx = zstd.ZstdCompressor() |
|
310 | cctx = zstd.ZstdCompressor() | |
248 | with self.assertRaises(ValueError): |
|
311 | with self.assertRaises(ValueError): | |
249 | cctx.copy_stream(source, dest) |
|
312 | cctx.copy_stream(source, dest) | |
250 |
|
313 | |||
251 | def test_no_write(self): |
|
314 | def test_no_write(self): | |
252 | source = io.BytesIO() |
|
315 | source = io.BytesIO() | |
253 | dest = object() |
|
316 | dest = object() | |
254 |
|
317 | |||
255 | cctx = zstd.ZstdCompressor() |
|
318 | cctx = zstd.ZstdCompressor() | |
256 | with self.assertRaises(ValueError): |
|
319 | with self.assertRaises(ValueError): | |
257 | cctx.copy_stream(source, dest) |
|
320 | cctx.copy_stream(source, dest) | |
258 |
|
321 | |||
259 | def test_empty(self): |
|
322 | def test_empty(self): | |
260 | source = io.BytesIO() |
|
323 | source = io.BytesIO() | |
261 | dest = io.BytesIO() |
|
324 | dest = io.BytesIO() | |
262 |
|
325 | |||
263 | cctx = zstd.ZstdCompressor(level=1) |
|
326 | cctx = zstd.ZstdCompressor(level=1) | |
264 | r, w = cctx.copy_stream(source, dest) |
|
327 | r, w = cctx.copy_stream(source, dest) | |
265 | self.assertEqual(int(r), 0) |
|
328 | self.assertEqual(int(r), 0) | |
266 | self.assertEqual(w, 9) |
|
329 | self.assertEqual(w, 9) | |
267 |
|
330 | |||
268 | self.assertEqual(dest.getvalue(), |
|
331 | self.assertEqual(dest.getvalue(), | |
269 | b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') |
|
332 | b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') | |
270 |
|
333 | |||
271 | def test_large_data(self): |
|
334 | def test_large_data(self): | |
272 | source = io.BytesIO() |
|
335 | source = io.BytesIO() | |
273 | for i in range(255): |
|
336 | for i in range(255): | |
274 | source.write(struct.Struct('>B').pack(i) * 16384) |
|
337 | source.write(struct.Struct('>B').pack(i) * 16384) | |
275 | source.seek(0) |
|
338 | source.seek(0) | |
276 |
|
339 | |||
277 | dest = io.BytesIO() |
|
340 | dest = io.BytesIO() | |
278 | cctx = zstd.ZstdCompressor() |
|
341 | cctx = zstd.ZstdCompressor() | |
279 | r, w = cctx.copy_stream(source, dest) |
|
342 | r, w = cctx.copy_stream(source, dest) | |
280 |
|
343 | |||
281 | self.assertEqual(r, 255 * 16384) |
|
344 | self.assertEqual(r, 255 * 16384) | |
282 | self.assertEqual(w, 999) |
|
345 | self.assertEqual(w, 999) | |
283 |
|
346 | |||
284 | params = zstd.get_frame_parameters(dest.getvalue()) |
|
347 | params = zstd.get_frame_parameters(dest.getvalue()) | |
285 | self.assertEqual(params.content_size, 0) |
|
348 | self.assertEqual(params.content_size, 0) | |
286 | self.assertEqual(params.window_size, 1048576) |
|
349 | self.assertEqual(params.window_size, 1048576) | |
287 | self.assertEqual(params.dict_id, 0) |
|
350 | self.assertEqual(params.dict_id, 0) | |
288 | self.assertFalse(params.has_checksum) |
|
351 | self.assertFalse(params.has_checksum) | |
289 |
|
352 | |||
290 | def test_write_checksum(self): |
|
353 | def test_write_checksum(self): | |
291 | source = io.BytesIO(b'foobar') |
|
354 | source = io.BytesIO(b'foobar') | |
292 | no_checksum = io.BytesIO() |
|
355 | no_checksum = io.BytesIO() | |
293 |
|
356 | |||
294 | cctx = zstd.ZstdCompressor(level=1) |
|
357 | cctx = zstd.ZstdCompressor(level=1) | |
295 | cctx.copy_stream(source, no_checksum) |
|
358 | cctx.copy_stream(source, no_checksum) | |
296 |
|
359 | |||
297 | source.seek(0) |
|
360 | source.seek(0) | |
298 | with_checksum = io.BytesIO() |
|
361 | with_checksum = io.BytesIO() | |
299 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) |
|
362 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) | |
300 | cctx.copy_stream(source, with_checksum) |
|
363 | cctx.copy_stream(source, with_checksum) | |
301 |
|
364 | |||
302 | self.assertEqual(len(with_checksum.getvalue()), |
|
365 | self.assertEqual(len(with_checksum.getvalue()), | |
303 | len(no_checksum.getvalue()) + 4) |
|
366 | len(no_checksum.getvalue()) + 4) | |
304 |
|
367 | |||
305 | no_params = zstd.get_frame_parameters(no_checksum.getvalue()) |
|
368 | no_params = zstd.get_frame_parameters(no_checksum.getvalue()) | |
306 | with_params = zstd.get_frame_parameters(with_checksum.getvalue()) |
|
369 | with_params = zstd.get_frame_parameters(with_checksum.getvalue()) | |
307 | self.assertEqual(no_params.content_size, 0) |
|
370 | self.assertEqual(no_params.content_size, 0) | |
308 | self.assertEqual(with_params.content_size, 0) |
|
371 | self.assertEqual(with_params.content_size, 0) | |
309 | self.assertEqual(no_params.dict_id, 0) |
|
372 | self.assertEqual(no_params.dict_id, 0) | |
310 | self.assertEqual(with_params.dict_id, 0) |
|
373 | self.assertEqual(with_params.dict_id, 0) | |
311 | self.assertFalse(no_params.has_checksum) |
|
374 | self.assertFalse(no_params.has_checksum) | |
312 | self.assertTrue(with_params.has_checksum) |
|
375 | self.assertTrue(with_params.has_checksum) | |
313 |
|
376 | |||
314 | def test_write_content_size(self): |
|
377 | def test_write_content_size(self): | |
315 | source = io.BytesIO(b'foobar' * 256) |
|
378 | source = io.BytesIO(b'foobar' * 256) | |
316 | no_size = io.BytesIO() |
|
379 | no_size = io.BytesIO() | |
317 |
|
380 | |||
318 | cctx = zstd.ZstdCompressor(level=1) |
|
381 | cctx = zstd.ZstdCompressor(level=1) | |
319 | cctx.copy_stream(source, no_size) |
|
382 | cctx.copy_stream(source, no_size) | |
320 |
|
383 | |||
321 | source.seek(0) |
|
384 | source.seek(0) | |
322 | with_size = io.BytesIO() |
|
385 | with_size = io.BytesIO() | |
323 | cctx = zstd.ZstdCompressor(level=1, write_content_size=True) |
|
386 | cctx = zstd.ZstdCompressor(level=1, write_content_size=True) | |
324 | cctx.copy_stream(source, with_size) |
|
387 | cctx.copy_stream(source, with_size) | |
325 |
|
388 | |||
326 | # Source content size is unknown, so no content size written. |
|
389 | # Source content size is unknown, so no content size written. | |
327 | self.assertEqual(len(with_size.getvalue()), |
|
390 | self.assertEqual(len(with_size.getvalue()), | |
328 | len(no_size.getvalue())) |
|
391 | len(no_size.getvalue())) | |
329 |
|
392 | |||
330 | source.seek(0) |
|
393 | source.seek(0) | |
331 | with_size = io.BytesIO() |
|
394 | with_size = io.BytesIO() | |
332 | cctx.copy_stream(source, with_size, size=len(source.getvalue())) |
|
395 | cctx.copy_stream(source, with_size, size=len(source.getvalue())) | |
333 |
|
396 | |||
334 | # We specified source size, so content size header is present. |
|
397 | # We specified source size, so content size header is present. | |
335 | self.assertEqual(len(with_size.getvalue()), |
|
398 | self.assertEqual(len(with_size.getvalue()), | |
336 | len(no_size.getvalue()) + 1) |
|
399 | len(no_size.getvalue()) + 1) | |
337 |
|
400 | |||
338 | no_params = zstd.get_frame_parameters(no_size.getvalue()) |
|
401 | no_params = zstd.get_frame_parameters(no_size.getvalue()) | |
339 | with_params = zstd.get_frame_parameters(with_size.getvalue()) |
|
402 | with_params = zstd.get_frame_parameters(with_size.getvalue()) | |
340 | self.assertEqual(no_params.content_size, 0) |
|
403 | self.assertEqual(no_params.content_size, 0) | |
341 | self.assertEqual(with_params.content_size, 1536) |
|
404 | self.assertEqual(with_params.content_size, 1536) | |
342 | self.assertEqual(no_params.dict_id, 0) |
|
405 | self.assertEqual(no_params.dict_id, 0) | |
343 | self.assertEqual(with_params.dict_id, 0) |
|
406 | self.assertEqual(with_params.dict_id, 0) | |
344 | self.assertFalse(no_params.has_checksum) |
|
407 | self.assertFalse(no_params.has_checksum) | |
345 | self.assertFalse(with_params.has_checksum) |
|
408 | self.assertFalse(with_params.has_checksum) | |
346 |
|
409 | |||
347 | def test_read_write_size(self): |
|
410 | def test_read_write_size(self): | |
348 | source = OpCountingBytesIO(b'foobarfoobar') |
|
411 | source = OpCountingBytesIO(b'foobarfoobar') | |
349 | dest = OpCountingBytesIO() |
|
412 | dest = OpCountingBytesIO() | |
350 | cctx = zstd.ZstdCompressor() |
|
413 | cctx = zstd.ZstdCompressor() | |
351 | r, w = cctx.copy_stream(source, dest, read_size=1, write_size=1) |
|
414 | r, w = cctx.copy_stream(source, dest, read_size=1, write_size=1) | |
352 |
|
415 | |||
353 | self.assertEqual(r, len(source.getvalue())) |
|
416 | self.assertEqual(r, len(source.getvalue())) | |
354 | self.assertEqual(w, 21) |
|
417 | self.assertEqual(w, 21) | |
355 | self.assertEqual(source._read_count, len(source.getvalue()) + 1) |
|
418 | self.assertEqual(source._read_count, len(source.getvalue()) + 1) | |
356 | self.assertEqual(dest._write_count, len(dest.getvalue())) |
|
419 | self.assertEqual(dest._write_count, len(dest.getvalue())) | |
357 |
|
420 | |||
|
421 | def test_multithreaded(self): | |||
|
422 | source = io.BytesIO() | |||
|
423 | source.write(b'a' * 1048576) | |||
|
424 | source.write(b'b' * 1048576) | |||
|
425 | source.write(b'c' * 1048576) | |||
|
426 | source.seek(0) | |||
|
427 | ||||
|
428 | dest = io.BytesIO() | |||
|
429 | cctx = zstd.ZstdCompressor(threads=2) | |||
|
430 | r, w = cctx.copy_stream(source, dest) | |||
|
431 | self.assertEqual(r, 3145728) | |||
|
432 | self.assertEqual(w, 295) | |||
|
433 | ||||
|
434 | params = zstd.get_frame_parameters(dest.getvalue()) | |||
|
435 | self.assertEqual(params.content_size, 0) | |||
|
436 | self.assertEqual(params.dict_id, 0) | |||
|
437 | self.assertFalse(params.has_checksum) | |||
|
438 | ||||
|
439 | # Writing content size and checksum works. | |||
|
440 | cctx = zstd.ZstdCompressor(threads=2, write_content_size=True, | |||
|
441 | write_checksum=True) | |||
|
442 | dest = io.BytesIO() | |||
|
443 | source.seek(0) | |||
|
444 | cctx.copy_stream(source, dest, size=len(source.getvalue())) | |||
|
445 | ||||
|
446 | params = zstd.get_frame_parameters(dest.getvalue()) | |||
|
447 | self.assertEqual(params.content_size, 3145728) | |||
|
448 | self.assertEqual(params.dict_id, 0) | |||
|
449 | self.assertTrue(params.has_checksum) | |||
|
450 | ||||
358 |
|
451 | |||
359 | def compress(data, level): |
|
452 | def compress(data, level): | |
360 | buffer = io.BytesIO() |
|
453 | buffer = io.BytesIO() | |
361 | cctx = zstd.ZstdCompressor(level=level) |
|
454 | cctx = zstd.ZstdCompressor(level=level) | |
362 | with cctx.write_to(buffer) as compressor: |
|
455 | with cctx.write_to(buffer) as compressor: | |
363 | compressor.write(data) |
|
456 | compressor.write(data) | |
364 | return buffer.getvalue() |
|
457 | return buffer.getvalue() | |
365 |
|
458 | |||
366 |
|
459 | |||
367 | @make_cffi |
|
460 | @make_cffi | |
368 | class TestCompressor_write_to(unittest.TestCase): |
|
461 | class TestCompressor_write_to(unittest.TestCase): | |
369 | def test_empty(self): |
|
462 | def test_empty(self): | |
370 | result = compress(b'', 1) |
|
463 | result = compress(b'', 1) | |
371 | self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') |
|
464 | self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') | |
372 |
|
465 | |||
373 | params = zstd.get_frame_parameters(result) |
|
466 | params = zstd.get_frame_parameters(result) | |
374 | self.assertEqual(params.content_size, 0) |
|
467 | self.assertEqual(params.content_size, 0) | |
375 | self.assertEqual(params.window_size, 524288) |
|
468 | self.assertEqual(params.window_size, 524288) | |
376 | self.assertEqual(params.dict_id, 0) |
|
469 | self.assertEqual(params.dict_id, 0) | |
377 | self.assertFalse(params.has_checksum) |
|
470 | self.assertFalse(params.has_checksum) | |
378 |
|
471 | |||
379 | def test_multiple_compress(self): |
|
472 | def test_multiple_compress(self): | |
380 | buffer = io.BytesIO() |
|
473 | buffer = io.BytesIO() | |
381 | cctx = zstd.ZstdCompressor(level=5) |
|
474 | cctx = zstd.ZstdCompressor(level=5) | |
382 | with cctx.write_to(buffer) as compressor: |
|
475 | with cctx.write_to(buffer) as compressor: | |
383 | self.assertEqual(compressor.write(b'foo'), 0) |
|
476 | self.assertEqual(compressor.write(b'foo'), 0) | |
384 | self.assertEqual(compressor.write(b'bar'), 0) |
|
477 | self.assertEqual(compressor.write(b'bar'), 0) | |
385 | self.assertEqual(compressor.write(b'x' * 8192), 0) |
|
478 | self.assertEqual(compressor.write(b'x' * 8192), 0) | |
386 |
|
479 | |||
387 | result = buffer.getvalue() |
|
480 | result = buffer.getvalue() | |
388 | self.assertEqual(result, |
|
481 | self.assertEqual(result, | |
389 | b'\x28\xb5\x2f\xfd\x00\x50\x75\x00\x00\x38\x66\x6f' |
|
482 | b'\x28\xb5\x2f\xfd\x00\x50\x75\x00\x00\x38\x66\x6f' | |
390 | b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23') |
|
483 | b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23') | |
391 |
|
484 | |||
392 | def test_dictionary(self): |
|
485 | def test_dictionary(self): | |
393 | samples = [] |
|
486 | samples = [] | |
394 | for i in range(128): |
|
487 | for i in range(128): | |
395 | samples.append(b'foo' * 64) |
|
488 | samples.append(b'foo' * 64) | |
396 | samples.append(b'bar' * 64) |
|
489 | samples.append(b'bar' * 64) | |
397 | samples.append(b'foobar' * 64) |
|
490 | samples.append(b'foobar' * 64) | |
398 |
|
491 | |||
399 | d = zstd.train_dictionary(8192, samples) |
|
492 | d = zstd.train_dictionary(8192, samples) | |
400 |
|
493 | |||
401 | buffer = io.BytesIO() |
|
494 | buffer = io.BytesIO() | |
402 | cctx = zstd.ZstdCompressor(level=9, dict_data=d) |
|
495 | cctx = zstd.ZstdCompressor(level=9, dict_data=d) | |
403 | with cctx.write_to(buffer) as compressor: |
|
496 | with cctx.write_to(buffer) as compressor: | |
404 | self.assertEqual(compressor.write(b'foo'), 0) |
|
497 | self.assertEqual(compressor.write(b'foo'), 0) | |
405 | self.assertEqual(compressor.write(b'bar'), 0) |
|
498 | self.assertEqual(compressor.write(b'bar'), 0) | |
406 | self.assertEqual(compressor.write(b'foo' * 16384), 634) |
|
499 | self.assertEqual(compressor.write(b'foo' * 16384), 634) | |
407 |
|
500 | |||
408 | compressed = buffer.getvalue() |
|
501 | compressed = buffer.getvalue() | |
409 |
|
502 | |||
410 | params = zstd.get_frame_parameters(compressed) |
|
503 | params = zstd.get_frame_parameters(compressed) | |
411 | self.assertEqual(params.content_size, 0) |
|
504 | self.assertEqual(params.content_size, 0) | |
412 | self.assertEqual(params.window_size, 1024) |
|
505 | self.assertEqual(params.window_size, 1024) | |
413 | self.assertEqual(params.dict_id, d.dict_id()) |
|
506 | self.assertEqual(params.dict_id, d.dict_id()) | |
414 | self.assertFalse(params.has_checksum) |
|
507 | self.assertFalse(params.has_checksum) | |
415 |
|
508 | |||
416 | self.assertEqual(compressed[0:32], |
|
509 | self.assertEqual(compressed[0:32], | |
417 | b'\x28\xb5\x2f\xfd\x03\x00\x55\x7b\x6b\x5e\x54\x00' |
|
510 | b'\x28\xb5\x2f\xfd\x03\x00\x55\x7b\x6b\x5e\x54\x00' | |
418 | b'\x00\x00\x02\xfc\xf4\xa5\xba\x23\x3f\x85\xb3\x54' |
|
511 | b'\x00\x00\x02\xfc\xf4\xa5\xba\x23\x3f\x85\xb3\x54' | |
419 | b'\x00\x00\x18\x6f\x6f\x66\x01\x00') |
|
512 | b'\x00\x00\x18\x6f\x6f\x66\x01\x00') | |
420 |
|
513 | |||
421 | h = hashlib.sha1(compressed).hexdigest() |
|
514 | h = hashlib.sha1(compressed).hexdigest() | |
422 | self.assertEqual(h, '1c5bcd25181bcd8c1a73ea8773323e0056129f92') |
|
515 | self.assertEqual(h, '1c5bcd25181bcd8c1a73ea8773323e0056129f92') | |
423 |
|
516 | |||
424 | def test_compression_params(self): |
|
517 | def test_compression_params(self): | |
425 | params = zstd.CompressionParameters(20, 6, 12, 5, 4, 10, zstd.STRATEGY_FAST) |
|
518 | params = zstd.CompressionParameters(20, 6, 12, 5, 4, 10, zstd.STRATEGY_FAST) | |
426 |
|
519 | |||
427 | buffer = io.BytesIO() |
|
520 | buffer = io.BytesIO() | |
428 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
521 | cctx = zstd.ZstdCompressor(compression_params=params) | |
429 | with cctx.write_to(buffer) as compressor: |
|
522 | with cctx.write_to(buffer) as compressor: | |
430 | self.assertEqual(compressor.write(b'foo'), 0) |
|
523 | self.assertEqual(compressor.write(b'foo'), 0) | |
431 | self.assertEqual(compressor.write(b'bar'), 0) |
|
524 | self.assertEqual(compressor.write(b'bar'), 0) | |
432 | self.assertEqual(compressor.write(b'foobar' * 16384), 0) |
|
525 | self.assertEqual(compressor.write(b'foobar' * 16384), 0) | |
433 |
|
526 | |||
434 | compressed = buffer.getvalue() |
|
527 | compressed = buffer.getvalue() | |
435 |
|
528 | |||
436 | params = zstd.get_frame_parameters(compressed) |
|
529 | params = zstd.get_frame_parameters(compressed) | |
437 | self.assertEqual(params.content_size, 0) |
|
530 | self.assertEqual(params.content_size, 0) | |
438 | self.assertEqual(params.window_size, 1048576) |
|
531 | self.assertEqual(params.window_size, 1048576) | |
439 | self.assertEqual(params.dict_id, 0) |
|
532 | self.assertEqual(params.dict_id, 0) | |
440 | self.assertFalse(params.has_checksum) |
|
533 | self.assertFalse(params.has_checksum) | |
441 |
|
534 | |||
442 | h = hashlib.sha1(compressed).hexdigest() |
|
535 | h = hashlib.sha1(compressed).hexdigest() | |
443 | self.assertEqual(h, '1ae31f270ed7de14235221a604b31ecd517ebd99') |
|
536 | self.assertEqual(h, '1ae31f270ed7de14235221a604b31ecd517ebd99') | |
444 |
|
537 | |||
445 | def test_write_checksum(self): |
|
538 | def test_write_checksum(self): | |
446 | no_checksum = io.BytesIO() |
|
539 | no_checksum = io.BytesIO() | |
447 | cctx = zstd.ZstdCompressor(level=1) |
|
540 | cctx = zstd.ZstdCompressor(level=1) | |
448 | with cctx.write_to(no_checksum) as compressor: |
|
541 | with cctx.write_to(no_checksum) as compressor: | |
449 | self.assertEqual(compressor.write(b'foobar'), 0) |
|
542 | self.assertEqual(compressor.write(b'foobar'), 0) | |
450 |
|
543 | |||
451 | with_checksum = io.BytesIO() |
|
544 | with_checksum = io.BytesIO() | |
452 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) |
|
545 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) | |
453 | with cctx.write_to(with_checksum) as compressor: |
|
546 | with cctx.write_to(with_checksum) as compressor: | |
454 | self.assertEqual(compressor.write(b'foobar'), 0) |
|
547 | self.assertEqual(compressor.write(b'foobar'), 0) | |
455 |
|
548 | |||
456 | no_params = zstd.get_frame_parameters(no_checksum.getvalue()) |
|
549 | no_params = zstd.get_frame_parameters(no_checksum.getvalue()) | |
457 | with_params = zstd.get_frame_parameters(with_checksum.getvalue()) |
|
550 | with_params = zstd.get_frame_parameters(with_checksum.getvalue()) | |
458 | self.assertEqual(no_params.content_size, 0) |
|
551 | self.assertEqual(no_params.content_size, 0) | |
459 | self.assertEqual(with_params.content_size, 0) |
|
552 | self.assertEqual(with_params.content_size, 0) | |
460 | self.assertEqual(no_params.dict_id, 0) |
|
553 | self.assertEqual(no_params.dict_id, 0) | |
461 | self.assertEqual(with_params.dict_id, 0) |
|
554 | self.assertEqual(with_params.dict_id, 0) | |
462 | self.assertFalse(no_params.has_checksum) |
|
555 | self.assertFalse(no_params.has_checksum) | |
463 | self.assertTrue(with_params.has_checksum) |
|
556 | self.assertTrue(with_params.has_checksum) | |
464 |
|
557 | |||
465 | self.assertEqual(len(with_checksum.getvalue()), |
|
558 | self.assertEqual(len(with_checksum.getvalue()), | |
466 | len(no_checksum.getvalue()) + 4) |
|
559 | len(no_checksum.getvalue()) + 4) | |
467 |
|
560 | |||
468 | def test_write_content_size(self): |
|
561 | def test_write_content_size(self): | |
469 | no_size = io.BytesIO() |
|
562 | no_size = io.BytesIO() | |
470 | cctx = zstd.ZstdCompressor(level=1) |
|
563 | cctx = zstd.ZstdCompressor(level=1) | |
471 | with cctx.write_to(no_size) as compressor: |
|
564 | with cctx.write_to(no_size) as compressor: | |
472 | self.assertEqual(compressor.write(b'foobar' * 256), 0) |
|
565 | self.assertEqual(compressor.write(b'foobar' * 256), 0) | |
473 |
|
566 | |||
474 | with_size = io.BytesIO() |
|
567 | with_size = io.BytesIO() | |
475 | cctx = zstd.ZstdCompressor(level=1, write_content_size=True) |
|
568 | cctx = zstd.ZstdCompressor(level=1, write_content_size=True) | |
476 | with cctx.write_to(with_size) as compressor: |
|
569 | with cctx.write_to(with_size) as compressor: | |
477 | self.assertEqual(compressor.write(b'foobar' * 256), 0) |
|
570 | self.assertEqual(compressor.write(b'foobar' * 256), 0) | |
478 |
|
571 | |||
479 | # Source size is not known in streaming mode, so header not |
|
572 | # Source size is not known in streaming mode, so header not | |
480 | # written. |
|
573 | # written. | |
481 | self.assertEqual(len(with_size.getvalue()), |
|
574 | self.assertEqual(len(with_size.getvalue()), | |
482 | len(no_size.getvalue())) |
|
575 | len(no_size.getvalue())) | |
483 |
|
576 | |||
484 | # Declaring size will write the header. |
|
577 | # Declaring size will write the header. | |
485 | with_size = io.BytesIO() |
|
578 | with_size = io.BytesIO() | |
486 | with cctx.write_to(with_size, size=len(b'foobar' * 256)) as compressor: |
|
579 | with cctx.write_to(with_size, size=len(b'foobar' * 256)) as compressor: | |
487 | self.assertEqual(compressor.write(b'foobar' * 256), 0) |
|
580 | self.assertEqual(compressor.write(b'foobar' * 256), 0) | |
488 |
|
581 | |||
489 | no_params = zstd.get_frame_parameters(no_size.getvalue()) |
|
582 | no_params = zstd.get_frame_parameters(no_size.getvalue()) | |
490 | with_params = zstd.get_frame_parameters(with_size.getvalue()) |
|
583 | with_params = zstd.get_frame_parameters(with_size.getvalue()) | |
491 | self.assertEqual(no_params.content_size, 0) |
|
584 | self.assertEqual(no_params.content_size, 0) | |
492 | self.assertEqual(with_params.content_size, 1536) |
|
585 | self.assertEqual(with_params.content_size, 1536) | |
493 | self.assertEqual(no_params.dict_id, 0) |
|
586 | self.assertEqual(no_params.dict_id, 0) | |
494 | self.assertEqual(with_params.dict_id, 0) |
|
587 | self.assertEqual(with_params.dict_id, 0) | |
495 | self.assertFalse(no_params.has_checksum) |
|
588 | self.assertFalse(no_params.has_checksum) | |
496 | self.assertFalse(with_params.has_checksum) |
|
589 | self.assertFalse(with_params.has_checksum) | |
497 |
|
590 | |||
498 | self.assertEqual(len(with_size.getvalue()), |
|
591 | self.assertEqual(len(with_size.getvalue()), | |
499 | len(no_size.getvalue()) + 1) |
|
592 | len(no_size.getvalue()) + 1) | |
500 |
|
593 | |||
501 | def test_no_dict_id(self): |
|
594 | def test_no_dict_id(self): | |
502 | samples = [] |
|
595 | samples = [] | |
503 | for i in range(128): |
|
596 | for i in range(128): | |
504 | samples.append(b'foo' * 64) |
|
597 | samples.append(b'foo' * 64) | |
505 | samples.append(b'bar' * 64) |
|
598 | samples.append(b'bar' * 64) | |
506 | samples.append(b'foobar' * 64) |
|
599 | samples.append(b'foobar' * 64) | |
507 |
|
600 | |||
508 | d = zstd.train_dictionary(1024, samples) |
|
601 | d = zstd.train_dictionary(1024, samples) | |
509 |
|
602 | |||
510 | with_dict_id = io.BytesIO() |
|
603 | with_dict_id = io.BytesIO() | |
511 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
604 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) | |
512 | with cctx.write_to(with_dict_id) as compressor: |
|
605 | with cctx.write_to(with_dict_id) as compressor: | |
513 | self.assertEqual(compressor.write(b'foobarfoobar'), 0) |
|
606 | self.assertEqual(compressor.write(b'foobarfoobar'), 0) | |
514 |
|
607 | |||
515 | cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False) |
|
608 | cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False) | |
516 | no_dict_id = io.BytesIO() |
|
609 | no_dict_id = io.BytesIO() | |
517 | with cctx.write_to(no_dict_id) as compressor: |
|
610 | with cctx.write_to(no_dict_id) as compressor: | |
518 | self.assertEqual(compressor.write(b'foobarfoobar'), 0) |
|
611 | self.assertEqual(compressor.write(b'foobarfoobar'), 0) | |
519 |
|
612 | |||
520 | no_params = zstd.get_frame_parameters(no_dict_id.getvalue()) |
|
613 | no_params = zstd.get_frame_parameters(no_dict_id.getvalue()) | |
521 | with_params = zstd.get_frame_parameters(with_dict_id.getvalue()) |
|
614 | with_params = zstd.get_frame_parameters(with_dict_id.getvalue()) | |
522 | self.assertEqual(no_params.content_size, 0) |
|
615 | self.assertEqual(no_params.content_size, 0) | |
523 | self.assertEqual(with_params.content_size, 0) |
|
616 | self.assertEqual(with_params.content_size, 0) | |
524 | self.assertEqual(no_params.dict_id, 0) |
|
617 | self.assertEqual(no_params.dict_id, 0) | |
525 | self.assertEqual(with_params.dict_id, d.dict_id()) |
|
618 | self.assertEqual(with_params.dict_id, d.dict_id()) | |
526 | self.assertFalse(no_params.has_checksum) |
|
619 | self.assertFalse(no_params.has_checksum) | |
527 | self.assertFalse(with_params.has_checksum) |
|
620 | self.assertFalse(with_params.has_checksum) | |
528 |
|
621 | |||
529 | self.assertEqual(len(with_dict_id.getvalue()), |
|
622 | self.assertEqual(len(with_dict_id.getvalue()), | |
530 | len(no_dict_id.getvalue()) + 4) |
|
623 | len(no_dict_id.getvalue()) + 4) | |
531 |
|
624 | |||
532 | def test_memory_size(self): |
|
625 | def test_memory_size(self): | |
533 | cctx = zstd.ZstdCompressor(level=3) |
|
626 | cctx = zstd.ZstdCompressor(level=3) | |
534 | buffer = io.BytesIO() |
|
627 | buffer = io.BytesIO() | |
535 | with cctx.write_to(buffer) as compressor: |
|
628 | with cctx.write_to(buffer) as compressor: | |
536 | size = compressor.memory_size() |
|
629 | size = compressor.memory_size() | |
537 |
|
630 | |||
538 | self.assertGreater(size, 100000) |
|
631 | self.assertGreater(size, 100000) | |
539 |
|
632 | |||
540 | def test_write_size(self): |
|
633 | def test_write_size(self): | |
541 | cctx = zstd.ZstdCompressor(level=3) |
|
634 | cctx = zstd.ZstdCompressor(level=3) | |
542 | dest = OpCountingBytesIO() |
|
635 | dest = OpCountingBytesIO() | |
543 | with cctx.write_to(dest, write_size=1) as compressor: |
|
636 | with cctx.write_to(dest, write_size=1) as compressor: | |
544 | self.assertEqual(compressor.write(b'foo'), 0) |
|
637 | self.assertEqual(compressor.write(b'foo'), 0) | |
545 | self.assertEqual(compressor.write(b'bar'), 0) |
|
638 | self.assertEqual(compressor.write(b'bar'), 0) | |
546 | self.assertEqual(compressor.write(b'foobar'), 0) |
|
639 | self.assertEqual(compressor.write(b'foobar'), 0) | |
547 |
|
640 | |||
548 | self.assertEqual(len(dest.getvalue()), dest._write_count) |
|
641 | self.assertEqual(len(dest.getvalue()), dest._write_count) | |
549 |
|
642 | |||
550 | def test_flush_repeated(self): |
|
643 | def test_flush_repeated(self): | |
551 | cctx = zstd.ZstdCompressor(level=3) |
|
644 | cctx = zstd.ZstdCompressor(level=3) | |
552 | dest = OpCountingBytesIO() |
|
645 | dest = OpCountingBytesIO() | |
553 | with cctx.write_to(dest) as compressor: |
|
646 | with cctx.write_to(dest) as compressor: | |
554 | self.assertEqual(compressor.write(b'foo'), 0) |
|
647 | self.assertEqual(compressor.write(b'foo'), 0) | |
555 | self.assertEqual(dest._write_count, 0) |
|
648 | self.assertEqual(dest._write_count, 0) | |
556 | self.assertEqual(compressor.flush(), 12) |
|
649 | self.assertEqual(compressor.flush(), 12) | |
557 | self.assertEqual(dest._write_count, 1) |
|
650 | self.assertEqual(dest._write_count, 1) | |
558 | self.assertEqual(compressor.write(b'bar'), 0) |
|
651 | self.assertEqual(compressor.write(b'bar'), 0) | |
559 | self.assertEqual(dest._write_count, 1) |
|
652 | self.assertEqual(dest._write_count, 1) | |
560 | self.assertEqual(compressor.flush(), 6) |
|
653 | self.assertEqual(compressor.flush(), 6) | |
561 | self.assertEqual(dest._write_count, 2) |
|
654 | self.assertEqual(dest._write_count, 2) | |
562 | self.assertEqual(compressor.write(b'baz'), 0) |
|
655 | self.assertEqual(compressor.write(b'baz'), 0) | |
563 |
|
656 | |||
564 | self.assertEqual(dest._write_count, 3) |
|
657 | self.assertEqual(dest._write_count, 3) | |
565 |
|
658 | |||
566 | def test_flush_empty_block(self): |
|
659 | def test_flush_empty_block(self): | |
567 | cctx = zstd.ZstdCompressor(level=3, write_checksum=True) |
|
660 | cctx = zstd.ZstdCompressor(level=3, write_checksum=True) | |
568 | dest = OpCountingBytesIO() |
|
661 | dest = OpCountingBytesIO() | |
569 | with cctx.write_to(dest) as compressor: |
|
662 | with cctx.write_to(dest) as compressor: | |
570 | self.assertEqual(compressor.write(b'foobar' * 8192), 0) |
|
663 | self.assertEqual(compressor.write(b'foobar' * 8192), 0) | |
571 | count = dest._write_count |
|
664 | count = dest._write_count | |
572 | offset = dest.tell() |
|
665 | offset = dest.tell() | |
573 | self.assertEqual(compressor.flush(), 23) |
|
666 | self.assertEqual(compressor.flush(), 23) | |
574 | self.assertGreater(dest._write_count, count) |
|
667 | self.assertGreater(dest._write_count, count) | |
575 | self.assertGreater(dest.tell(), offset) |
|
668 | self.assertGreater(dest.tell(), offset) | |
576 | offset = dest.tell() |
|
669 | offset = dest.tell() | |
577 | # Ending the write here should cause an empty block to be written |
|
670 | # Ending the write here should cause an empty block to be written | |
578 | # to denote end of frame. |
|
671 | # to denote end of frame. | |
579 |
|
672 | |||
580 | trailing = dest.getvalue()[offset:] |
|
673 | trailing = dest.getvalue()[offset:] | |
581 | # 3 bytes block header + 4 bytes frame checksum |
|
674 | # 3 bytes block header + 4 bytes frame checksum | |
582 | self.assertEqual(len(trailing), 7) |
|
675 | self.assertEqual(len(trailing), 7) | |
583 |
|
676 | |||
584 | header = trailing[0:3] |
|
677 | header = trailing[0:3] | |
585 | self.assertEqual(header, b'\x01\x00\x00') |
|
678 | self.assertEqual(header, b'\x01\x00\x00') | |
586 |
|
679 | |||
|
680 | def test_multithreaded(self): | |||
|
681 | dest = io.BytesIO() | |||
|
682 | cctx = zstd.ZstdCompressor(threads=2) | |||
|
683 | with cctx.write_to(dest) as compressor: | |||
|
684 | compressor.write(b'a' * 1048576) | |||
|
685 | compressor.write(b'b' * 1048576) | |||
|
686 | compressor.write(b'c' * 1048576) | |||
|
687 | ||||
|
688 | self.assertEqual(len(dest.getvalue()), 295) | |||
|
689 | ||||
587 |
|
690 | |||
588 | @make_cffi |
|
691 | @make_cffi | |
589 | class TestCompressor_read_from(unittest.TestCase): |
|
692 | class TestCompressor_read_from(unittest.TestCase): | |
590 | def test_type_validation(self): |
|
693 | def test_type_validation(self): | |
591 | cctx = zstd.ZstdCompressor() |
|
694 | cctx = zstd.ZstdCompressor() | |
592 |
|
695 | |||
593 | # Object with read() works. |
|
696 | # Object with read() works. | |
594 | for chunk in cctx.read_from(io.BytesIO()): |
|
697 | for chunk in cctx.read_from(io.BytesIO()): | |
595 | pass |
|
698 | pass | |
596 |
|
699 | |||
597 | # Buffer protocol works. |
|
700 | # Buffer protocol works. | |
598 | for chunk in cctx.read_from(b'foobar'): |
|
701 | for chunk in cctx.read_from(b'foobar'): | |
599 | pass |
|
702 | pass | |
600 |
|
703 | |||
601 | with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'): |
|
704 | with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'): | |
602 | for chunk in cctx.read_from(True): |
|
705 | for chunk in cctx.read_from(True): | |
603 | pass |
|
706 | pass | |
604 |
|
707 | |||
605 | def test_read_empty(self): |
|
708 | def test_read_empty(self): | |
606 | cctx = zstd.ZstdCompressor(level=1) |
|
709 | cctx = zstd.ZstdCompressor(level=1) | |
607 |
|
710 | |||
608 | source = io.BytesIO() |
|
711 | source = io.BytesIO() | |
609 | it = cctx.read_from(source) |
|
712 | it = cctx.read_from(source) | |
610 | chunks = list(it) |
|
713 | chunks = list(it) | |
611 | self.assertEqual(len(chunks), 1) |
|
714 | self.assertEqual(len(chunks), 1) | |
612 | compressed = b''.join(chunks) |
|
715 | compressed = b''.join(chunks) | |
613 | self.assertEqual(compressed, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') |
|
716 | self.assertEqual(compressed, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') | |
614 |
|
717 | |||
615 | # And again with the buffer protocol. |
|
718 | # And again with the buffer protocol. | |
616 | it = cctx.read_from(b'') |
|
719 | it = cctx.read_from(b'') | |
617 | chunks = list(it) |
|
720 | chunks = list(it) | |
618 | self.assertEqual(len(chunks), 1) |
|
721 | self.assertEqual(len(chunks), 1) | |
619 | compressed2 = b''.join(chunks) |
|
722 | compressed2 = b''.join(chunks) | |
620 | self.assertEqual(compressed2, compressed) |
|
723 | self.assertEqual(compressed2, compressed) | |
621 |
|
724 | |||
622 | def test_read_large(self): |
|
725 | def test_read_large(self): | |
623 | cctx = zstd.ZstdCompressor(level=1) |
|
726 | cctx = zstd.ZstdCompressor(level=1) | |
624 |
|
727 | |||
625 | source = io.BytesIO() |
|
728 | source = io.BytesIO() | |
626 | source.write(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE) |
|
729 | source.write(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE) | |
627 | source.write(b'o') |
|
730 | source.write(b'o') | |
628 | source.seek(0) |
|
731 | source.seek(0) | |
629 |
|
732 | |||
630 | # Creating an iterator should not perform any compression until |
|
733 | # Creating an iterator should not perform any compression until | |
631 | # first read. |
|
734 | # first read. | |
632 | it = cctx.read_from(source, size=len(source.getvalue())) |
|
735 | it = cctx.read_from(source, size=len(source.getvalue())) | |
633 | self.assertEqual(source.tell(), 0) |
|
736 | self.assertEqual(source.tell(), 0) | |
634 |
|
737 | |||
635 | # We should have exactly 2 output chunks. |
|
738 | # We should have exactly 2 output chunks. | |
636 | chunks = [] |
|
739 | chunks = [] | |
637 | chunk = next(it) |
|
740 | chunk = next(it) | |
638 | self.assertIsNotNone(chunk) |
|
741 | self.assertIsNotNone(chunk) | |
639 | self.assertEqual(source.tell(), zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE) |
|
742 | self.assertEqual(source.tell(), zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE) | |
640 | chunks.append(chunk) |
|
743 | chunks.append(chunk) | |
641 | chunk = next(it) |
|
744 | chunk = next(it) | |
642 | self.assertIsNotNone(chunk) |
|
745 | self.assertIsNotNone(chunk) | |
643 | chunks.append(chunk) |
|
746 | chunks.append(chunk) | |
644 |
|
747 | |||
645 | self.assertEqual(source.tell(), len(source.getvalue())) |
|
748 | self.assertEqual(source.tell(), len(source.getvalue())) | |
646 |
|
749 | |||
647 | with self.assertRaises(StopIteration): |
|
750 | with self.assertRaises(StopIteration): | |
648 | next(it) |
|
751 | next(it) | |
649 |
|
752 | |||
650 | # And again for good measure. |
|
753 | # And again for good measure. | |
651 | with self.assertRaises(StopIteration): |
|
754 | with self.assertRaises(StopIteration): | |
652 | next(it) |
|
755 | next(it) | |
653 |
|
756 | |||
654 | # We should get the same output as the one-shot compression mechanism. |
|
757 | # We should get the same output as the one-shot compression mechanism. | |
655 | self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue())) |
|
758 | self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue())) | |
656 |
|
759 | |||
657 | params = zstd.get_frame_parameters(b''.join(chunks)) |
|
760 | params = zstd.get_frame_parameters(b''.join(chunks)) | |
658 | self.assertEqual(params.content_size, 0) |
|
761 | self.assertEqual(params.content_size, 0) | |
659 | self.assertEqual(params.window_size, 262144) |
|
762 | self.assertEqual(params.window_size, 262144) | |
660 | self.assertEqual(params.dict_id, 0) |
|
763 | self.assertEqual(params.dict_id, 0) | |
661 | self.assertFalse(params.has_checksum) |
|
764 | self.assertFalse(params.has_checksum) | |
662 |
|
765 | |||
663 | # Now check the buffer protocol. |
|
766 | # Now check the buffer protocol. | |
664 | it = cctx.read_from(source.getvalue()) |
|
767 | it = cctx.read_from(source.getvalue()) | |
665 | chunks = list(it) |
|
768 | chunks = list(it) | |
666 | self.assertEqual(len(chunks), 2) |
|
769 | self.assertEqual(len(chunks), 2) | |
667 | self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue())) |
|
770 | self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue())) | |
668 |
|
771 | |||
669 | def test_read_write_size(self): |
|
772 | def test_read_write_size(self): | |
670 | source = OpCountingBytesIO(b'foobarfoobar') |
|
773 | source = OpCountingBytesIO(b'foobarfoobar') | |
671 | cctx = zstd.ZstdCompressor(level=3) |
|
774 | cctx = zstd.ZstdCompressor(level=3) | |
672 | for chunk in cctx.read_from(source, read_size=1, write_size=1): |
|
775 | for chunk in cctx.read_from(source, read_size=1, write_size=1): | |
673 | self.assertEqual(len(chunk), 1) |
|
776 | self.assertEqual(len(chunk), 1) | |
674 |
|
777 | |||
675 | self.assertEqual(source._read_count, len(source.getvalue()) + 1) |
|
778 | self.assertEqual(source._read_count, len(source.getvalue()) + 1) | |
|
779 | ||||
|
780 | def test_multithreaded(self): | |||
|
781 | source = io.BytesIO() | |||
|
782 | source.write(b'a' * 1048576) | |||
|
783 | source.write(b'b' * 1048576) | |||
|
784 | source.write(b'c' * 1048576) | |||
|
785 | source.seek(0) | |||
|
786 | ||||
|
787 | cctx = zstd.ZstdCompressor(threads=2) | |||
|
788 | ||||
|
789 | compressed = b''.join(cctx.read_from(source)) | |||
|
790 | self.assertEqual(len(compressed), 295) | |||
|
791 | ||||
|
792 | ||||
|
793 | class TestCompressor_multi_compress_to_buffer(unittest.TestCase): | |||
|
794 | def test_multithreaded_unsupported(self): | |||
|
795 | cctx = zstd.ZstdCompressor(threads=2) | |||
|
796 | ||||
|
797 | with self.assertRaisesRegexp(zstd.ZstdError, 'function cannot be called on ZstdCompressor configured for multi-threaded compression'): | |||
|
798 | cctx.multi_compress_to_buffer([b'foo']) | |||
|
799 | ||||
|
800 | def test_invalid_inputs(self): | |||
|
801 | cctx = zstd.ZstdCompressor() | |||
|
802 | ||||
|
803 | with self.assertRaises(TypeError): | |||
|
804 | cctx.multi_compress_to_buffer(True) | |||
|
805 | ||||
|
806 | with self.assertRaises(TypeError): | |||
|
807 | cctx.multi_compress_to_buffer((1, 2)) | |||
|
808 | ||||
|
809 | with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'): | |||
|
810 | cctx.multi_compress_to_buffer([u'foo']) | |||
|
811 | ||||
|
812 | def test_empty_input(self): | |||
|
813 | cctx = zstd.ZstdCompressor() | |||
|
814 | ||||
|
815 | with self.assertRaisesRegexp(ValueError, 'no source elements found'): | |||
|
816 | cctx.multi_compress_to_buffer([]) | |||
|
817 | ||||
|
818 | with self.assertRaisesRegexp(ValueError, 'source elements are empty'): | |||
|
819 | cctx.multi_compress_to_buffer([b'', b'', b'']) | |||
|
820 | ||||
|
821 | def test_list_input(self): | |||
|
822 | cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True) | |||
|
823 | ||||
|
824 | original = [b'foo' * 12, b'bar' * 6] | |||
|
825 | frames = [cctx.compress(c) for c in original] | |||
|
826 | b = cctx.multi_compress_to_buffer(original) | |||
|
827 | ||||
|
828 | self.assertIsInstance(b, zstd.BufferWithSegmentsCollection) | |||
|
829 | ||||
|
830 | self.assertEqual(len(b), 2) | |||
|
831 | self.assertEqual(b.size(), 44) | |||
|
832 | ||||
|
833 | self.assertEqual(b[0].tobytes(), frames[0]) | |||
|
834 | self.assertEqual(b[1].tobytes(), frames[1]) | |||
|
835 | ||||
|
836 | def test_buffer_with_segments_input(self): | |||
|
837 | cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True) | |||
|
838 | ||||
|
839 | original = [b'foo' * 4, b'bar' * 6] | |||
|
840 | frames = [cctx.compress(c) for c in original] | |||
|
841 | ||||
|
842 | offsets = struct.pack('=QQQQ', 0, len(original[0]), | |||
|
843 | len(original[0]), len(original[1])) | |||
|
844 | segments = zstd.BufferWithSegments(b''.join(original), offsets) | |||
|
845 | ||||
|
846 | result = cctx.multi_compress_to_buffer(segments) | |||
|
847 | ||||
|
848 | self.assertEqual(len(result), 2) | |||
|
849 | self.assertEqual(result.size(), 47) | |||
|
850 | ||||
|
851 | self.assertEqual(result[0].tobytes(), frames[0]) | |||
|
852 | self.assertEqual(result[1].tobytes(), frames[1]) | |||
|
853 | ||||
|
854 | def test_buffer_with_segments_collection_input(self): | |||
|
855 | cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True) | |||
|
856 | ||||
|
857 | original = [ | |||
|
858 | b'foo1', | |||
|
859 | b'foo2' * 2, | |||
|
860 | b'foo3' * 3, | |||
|
861 | b'foo4' * 4, | |||
|
862 | b'foo5' * 5, | |||
|
863 | ] | |||
|
864 | ||||
|
865 | frames = [cctx.compress(c) for c in original] | |||
|
866 | ||||
|
867 | b = b''.join([original[0], original[1]]) | |||
|
868 | b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ', | |||
|
869 | 0, len(original[0]), | |||
|
870 | len(original[0]), len(original[1]))) | |||
|
871 | b = b''.join([original[2], original[3], original[4]]) | |||
|
872 | b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ', | |||
|
873 | 0, len(original[2]), | |||
|
874 | len(original[2]), len(original[3]), | |||
|
875 | len(original[2]) + len(original[3]), len(original[4]))) | |||
|
876 | ||||
|
877 | c = zstd.BufferWithSegmentsCollection(b1, b2) | |||
|
878 | ||||
|
879 | result = cctx.multi_compress_to_buffer(c) | |||
|
880 | ||||
|
881 | self.assertEqual(len(result), len(frames)) | |||
|
882 | ||||
|
883 | for i, frame in enumerate(frames): | |||
|
884 | self.assertEqual(result[i].tobytes(), frame) | |||
|
885 | ||||
|
886 | def test_multiple_threads(self): | |||
|
887 | # threads argument will cause multi-threaded ZSTD APIs to be used, which will | |||
|
888 | # make output different. | |||
|
889 | refcctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True) | |||
|
890 | reference = [refcctx.compress(b'x' * 64), refcctx.compress(b'y' * 64)] | |||
|
891 | ||||
|
892 | cctx = zstd.ZstdCompressor(write_content_size=True, write_checksum=True) | |||
|
893 | ||||
|
894 | frames = [] | |||
|
895 | frames.extend(b'x' * 64 for i in range(256)) | |||
|
896 | frames.extend(b'y' * 64 for i in range(256)) | |||
|
897 | ||||
|
898 | result = cctx.multi_compress_to_buffer(frames, threads=-1) | |||
|
899 | ||||
|
900 | self.assertEqual(len(result), 512) | |||
|
901 | for i in range(512): | |||
|
902 | if i < 256: | |||
|
903 | self.assertEqual(result[i].tobytes(), reference[0]) | |||
|
904 | else: | |||
|
905 | self.assertEqual(result[i].tobytes(), reference[1]) |
@@ -1,186 +1,123 | |||||
1 | import io |
|
|||
2 |
|
||||
3 |
|
|
1 | try: | |
4 | import unittest2 as unittest |
|
2 | import unittest2 as unittest | |
5 | except ImportError: |
|
3 | except ImportError: | |
6 | import unittest |
|
4 | import unittest | |
7 |
|
5 | |||
8 | try: |
|
|||
9 | import hypothesis |
|
|||
10 | import hypothesis.strategies as strategies |
|
|||
11 | except ImportError: |
|
|||
12 | hypothesis = None |
|
|||
13 |
|
||||
14 | import zstd |
|
6 | import zstd | |
15 |
|
7 | |||
16 | from . common import ( |
|
8 | from . common import ( | |
17 | make_cffi, |
|
9 | make_cffi, | |
18 | ) |
|
10 | ) | |
19 |
|
11 | |||
20 |
|
12 | |||
21 | @make_cffi |
|
13 | @make_cffi | |
22 | class TestCompressionParameters(unittest.TestCase): |
|
14 | class TestCompressionParameters(unittest.TestCase): | |
23 | def test_init_bad_arg_type(self): |
|
15 | def test_init_bad_arg_type(self): | |
24 | with self.assertRaises(TypeError): |
|
16 | with self.assertRaises(TypeError): | |
25 | zstd.CompressionParameters() |
|
17 | zstd.CompressionParameters() | |
26 |
|
18 | |||
27 | with self.assertRaises(TypeError): |
|
19 | with self.assertRaises(TypeError): | |
28 | zstd.CompressionParameters(0, 1) |
|
20 | zstd.CompressionParameters(0, 1) | |
29 |
|
21 | |||
30 | def test_bounds(self): |
|
22 | def test_bounds(self): | |
31 | zstd.CompressionParameters(zstd.WINDOWLOG_MIN, |
|
23 | zstd.CompressionParameters(zstd.WINDOWLOG_MIN, | |
32 | zstd.CHAINLOG_MIN, |
|
24 | zstd.CHAINLOG_MIN, | |
33 | zstd.HASHLOG_MIN, |
|
25 | zstd.HASHLOG_MIN, | |
34 | zstd.SEARCHLOG_MIN, |
|
26 | zstd.SEARCHLOG_MIN, | |
35 | zstd.SEARCHLENGTH_MIN, |
|
27 | zstd.SEARCHLENGTH_MIN + 1, | |
36 | zstd.TARGETLENGTH_MIN, |
|
28 | zstd.TARGETLENGTH_MIN, | |
37 | zstd.STRATEGY_FAST) |
|
29 | zstd.STRATEGY_FAST) | |
38 |
|
30 | |||
39 | zstd.CompressionParameters(zstd.WINDOWLOG_MAX, |
|
31 | zstd.CompressionParameters(zstd.WINDOWLOG_MAX, | |
40 | zstd.CHAINLOG_MAX, |
|
32 | zstd.CHAINLOG_MAX, | |
41 | zstd.HASHLOG_MAX, |
|
33 | zstd.HASHLOG_MAX, | |
42 | zstd.SEARCHLOG_MAX, |
|
34 | zstd.SEARCHLOG_MAX, | |
43 | zstd.SEARCHLENGTH_MAX, |
|
35 | zstd.SEARCHLENGTH_MAX - 1, | |
44 | zstd.TARGETLENGTH_MAX, |
|
36 | zstd.TARGETLENGTH_MAX, | |
45 | zstd.STRATEGY_BTOPT) |
|
37 | zstd.STRATEGY_BTOPT) | |
46 |
|
38 | |||
47 | def test_get_compression_parameters(self): |
|
39 | def test_get_compression_parameters(self): | |
48 | p = zstd.get_compression_parameters(1) |
|
40 | p = zstd.get_compression_parameters(1) | |
49 | self.assertIsInstance(p, zstd.CompressionParameters) |
|
41 | self.assertIsInstance(p, zstd.CompressionParameters) | |
50 |
|
42 | |||
51 | self.assertEqual(p.window_log, 19) |
|
43 | self.assertEqual(p.window_log, 19) | |
52 |
|
44 | |||
53 | def test_members(self): |
|
45 | def test_members(self): | |
54 | p = zstd.CompressionParameters(10, 6, 7, 4, 5, 8, 1) |
|
46 | p = zstd.CompressionParameters(10, 6, 7, 4, 5, 8, 1) | |
55 | self.assertEqual(p.window_log, 10) |
|
47 | self.assertEqual(p.window_log, 10) | |
56 | self.assertEqual(p.chain_log, 6) |
|
48 | self.assertEqual(p.chain_log, 6) | |
57 | self.assertEqual(p.hash_log, 7) |
|
49 | self.assertEqual(p.hash_log, 7) | |
58 | self.assertEqual(p.search_log, 4) |
|
50 | self.assertEqual(p.search_log, 4) | |
59 | self.assertEqual(p.search_length, 5) |
|
51 | self.assertEqual(p.search_length, 5) | |
60 | self.assertEqual(p.target_length, 8) |
|
52 | self.assertEqual(p.target_length, 8) | |
61 | self.assertEqual(p.strategy, 1) |
|
53 | self.assertEqual(p.strategy, 1) | |
62 |
|
54 | |||
|
55 | def test_estimated_compression_context_size(self): | |||
|
56 | p = zstd.CompressionParameters(20, 16, 17, 1, 5, 16, zstd.STRATEGY_DFAST) | |||
|
57 | ||||
|
58 | # 32-bit has slightly different values from 64-bit. | |||
|
59 | self.assertAlmostEqual(p.estimated_compression_context_size(), 1287076, | |||
|
60 | delta=110) | |||
|
61 | ||||
63 |
|
62 | |||
64 | @make_cffi |
|
63 | @make_cffi | |
65 | class TestFrameParameters(unittest.TestCase): |
|
64 | class TestFrameParameters(unittest.TestCase): | |
66 | def test_invalid_type(self): |
|
65 | def test_invalid_type(self): | |
67 | with self.assertRaises(TypeError): |
|
66 | with self.assertRaises(TypeError): | |
68 | zstd.get_frame_parameters(None) |
|
67 | zstd.get_frame_parameters(None) | |
69 |
|
68 | |||
70 | with self.assertRaises(TypeError): |
|
69 | with self.assertRaises(TypeError): | |
71 | zstd.get_frame_parameters(u'foobarbaz') |
|
70 | zstd.get_frame_parameters(u'foobarbaz') | |
72 |
|
71 | |||
73 | def test_invalid_input_sizes(self): |
|
72 | def test_invalid_input_sizes(self): | |
74 | with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'): |
|
73 | with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'): | |
75 | zstd.get_frame_parameters(b'') |
|
74 | zstd.get_frame_parameters(b'') | |
76 |
|
75 | |||
77 | with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'): |
|
76 | with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'): | |
78 | zstd.get_frame_parameters(zstd.FRAME_HEADER) |
|
77 | zstd.get_frame_parameters(zstd.FRAME_HEADER) | |
79 |
|
78 | |||
80 | def test_invalid_frame(self): |
|
79 | def test_invalid_frame(self): | |
81 | with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'): |
|
80 | with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'): | |
82 | zstd.get_frame_parameters(b'foobarbaz') |
|
81 | zstd.get_frame_parameters(b'foobarbaz') | |
83 |
|
82 | |||
84 | def test_attributes(self): |
|
83 | def test_attributes(self): | |
85 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00') |
|
84 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00') | |
86 | self.assertEqual(params.content_size, 0) |
|
85 | self.assertEqual(params.content_size, 0) | |
87 | self.assertEqual(params.window_size, 1024) |
|
86 | self.assertEqual(params.window_size, 1024) | |
88 | self.assertEqual(params.dict_id, 0) |
|
87 | self.assertEqual(params.dict_id, 0) | |
89 | self.assertFalse(params.has_checksum) |
|
88 | self.assertFalse(params.has_checksum) | |
90 |
|
89 | |||
91 | # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte. |
|
90 | # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte. | |
92 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff') |
|
91 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff') | |
93 | self.assertEqual(params.content_size, 0) |
|
92 | self.assertEqual(params.content_size, 0) | |
94 | self.assertEqual(params.window_size, 1024) |
|
93 | self.assertEqual(params.window_size, 1024) | |
95 | self.assertEqual(params.dict_id, 255) |
|
94 | self.assertEqual(params.dict_id, 255) | |
96 | self.assertFalse(params.has_checksum) |
|
95 | self.assertFalse(params.has_checksum) | |
97 |
|
96 | |||
98 | # Lowest 3rd bit indicates if checksum is present. |
|
97 | # Lowest 3rd bit indicates if checksum is present. | |
99 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00') |
|
98 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00') | |
100 | self.assertEqual(params.content_size, 0) |
|
99 | self.assertEqual(params.content_size, 0) | |
101 | self.assertEqual(params.window_size, 1024) |
|
100 | self.assertEqual(params.window_size, 1024) | |
102 | self.assertEqual(params.dict_id, 0) |
|
101 | self.assertEqual(params.dict_id, 0) | |
103 | self.assertTrue(params.has_checksum) |
|
102 | self.assertTrue(params.has_checksum) | |
104 |
|
103 | |||
105 | # Upper 2 bits indicate content size. |
|
104 | # Upper 2 bits indicate content size. | |
106 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x40\x00\xff\x00') |
|
105 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x40\x00\xff\x00') | |
107 | self.assertEqual(params.content_size, 511) |
|
106 | self.assertEqual(params.content_size, 511) | |
108 | self.assertEqual(params.window_size, 1024) |
|
107 | self.assertEqual(params.window_size, 1024) | |
109 | self.assertEqual(params.dict_id, 0) |
|
108 | self.assertEqual(params.dict_id, 0) | |
110 | self.assertFalse(params.has_checksum) |
|
109 | self.assertFalse(params.has_checksum) | |
111 |
|
110 | |||
112 | # Window descriptor is 2nd byte after frame header. |
|
111 | # Window descriptor is 2nd byte after frame header. | |
113 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40') |
|
112 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40') | |
114 | self.assertEqual(params.content_size, 0) |
|
113 | self.assertEqual(params.content_size, 0) | |
115 | self.assertEqual(params.window_size, 262144) |
|
114 | self.assertEqual(params.window_size, 262144) | |
116 | self.assertEqual(params.dict_id, 0) |
|
115 | self.assertEqual(params.dict_id, 0) | |
117 | self.assertFalse(params.has_checksum) |
|
116 | self.assertFalse(params.has_checksum) | |
118 |
|
117 | |||
119 | # Set multiple things. |
|
118 | # Set multiple things. | |
120 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x45\x40\x0f\x10\x00') |
|
119 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x45\x40\x0f\x10\x00') | |
121 | self.assertEqual(params.content_size, 272) |
|
120 | self.assertEqual(params.content_size, 272) | |
122 | self.assertEqual(params.window_size, 262144) |
|
121 | self.assertEqual(params.window_size, 262144) | |
123 | self.assertEqual(params.dict_id, 15) |
|
122 | self.assertEqual(params.dict_id, 15) | |
124 | self.assertTrue(params.has_checksum) |
|
123 | self.assertTrue(params.has_checksum) | |
125 |
|
||||
126 |
|
||||
127 | if hypothesis: |
|
|||
128 | s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN, |
|
|||
129 | max_value=zstd.WINDOWLOG_MAX) |
|
|||
130 | s_chainlog = strategies.integers(min_value=zstd.CHAINLOG_MIN, |
|
|||
131 | max_value=zstd.CHAINLOG_MAX) |
|
|||
132 | s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN, |
|
|||
133 | max_value=zstd.HASHLOG_MAX) |
|
|||
134 | s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN, |
|
|||
135 | max_value=zstd.SEARCHLOG_MAX) |
|
|||
136 | s_searchlength = strategies.integers(min_value=zstd.SEARCHLENGTH_MIN, |
|
|||
137 | max_value=zstd.SEARCHLENGTH_MAX) |
|
|||
138 | s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN, |
|
|||
139 | max_value=zstd.TARGETLENGTH_MAX) |
|
|||
140 | s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST, |
|
|||
141 | zstd.STRATEGY_DFAST, |
|
|||
142 | zstd.STRATEGY_GREEDY, |
|
|||
143 | zstd.STRATEGY_LAZY, |
|
|||
144 | zstd.STRATEGY_LAZY2, |
|
|||
145 | zstd.STRATEGY_BTLAZY2, |
|
|||
146 | zstd.STRATEGY_BTOPT)) |
|
|||
147 |
|
||||
148 |
|
||||
149 | @make_cffi |
|
|||
150 | class TestCompressionParametersHypothesis(unittest.TestCase): |
|
|||
151 | @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog, |
|
|||
152 | s_searchlength, s_targetlength, s_strategy) |
|
|||
153 | def test_valid_init(self, windowlog, chainlog, hashlog, searchlog, |
|
|||
154 | searchlength, targetlength, strategy): |
|
|||
155 | p = zstd.CompressionParameters(windowlog, chainlog, hashlog, |
|
|||
156 | searchlog, searchlength, |
|
|||
157 | targetlength, strategy) |
|
|||
158 |
|
||||
159 | # Verify we can instantiate a compressor with the supplied values. |
|
|||
160 | # ZSTD_checkCParams moves the goal posts on us from what's advertised |
|
|||
161 | # in the constants. So move along with them. |
|
|||
162 | if searchlength == zstd.SEARCHLENGTH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY): |
|
|||
163 | searchlength += 1 |
|
|||
164 | p = zstd.CompressionParameters(windowlog, chainlog, hashlog, |
|
|||
165 | searchlog, searchlength, |
|
|||
166 | targetlength, strategy) |
|
|||
167 | elif searchlength == zstd.SEARCHLENGTH_MAX and strategy != zstd.STRATEGY_FAST: |
|
|||
168 | searchlength -= 1 |
|
|||
169 | p = zstd.CompressionParameters(windowlog, chainlog, hashlog, |
|
|||
170 | searchlog, searchlength, |
|
|||
171 | targetlength, strategy) |
|
|||
172 |
|
||||
173 | cctx = zstd.ZstdCompressor(compression_params=p) |
|
|||
174 | with cctx.write_to(io.BytesIO()): |
|
|||
175 | pass |
|
|||
176 |
|
||||
177 | @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog, |
|
|||
178 | s_searchlength, s_targetlength, s_strategy) |
|
|||
179 | def test_estimate_compression_context_size(self, windowlog, chainlog, |
|
|||
180 | hashlog, searchlog, |
|
|||
181 | searchlength, targetlength, |
|
|||
182 | strategy): |
|
|||
183 | p = zstd.CompressionParameters(windowlog, chainlog, hashlog, |
|
|||
184 | searchlog, searchlength, |
|
|||
185 | targetlength, strategy) |
|
|||
186 | size = zstd.estimate_compression_context_size(p) |
|
@@ -1,577 +1,741 | |||||
1 | import io |
|
1 | import io | |
2 | import random |
|
2 | import random | |
3 | import struct |
|
3 | import struct | |
4 | import sys |
|
4 | import sys | |
5 |
|
5 | |||
6 | try: |
|
6 | try: | |
7 | import unittest2 as unittest |
|
7 | import unittest2 as unittest | |
8 | except ImportError: |
|
8 | except ImportError: | |
9 | import unittest |
|
9 | import unittest | |
10 |
|
10 | |||
11 | import zstd |
|
11 | import zstd | |
12 |
|
12 | |||
13 | from .common import ( |
|
13 | from .common import ( | |
14 | make_cffi, |
|
14 | make_cffi, | |
15 | OpCountingBytesIO, |
|
15 | OpCountingBytesIO, | |
16 | ) |
|
16 | ) | |
17 |
|
17 | |||
18 |
|
18 | |||
19 | if sys.version_info[0] >= 3: |
|
19 | if sys.version_info[0] >= 3: | |
20 | next = lambda it: it.__next__() |
|
20 | next = lambda it: it.__next__() | |
21 | else: |
|
21 | else: | |
22 | next = lambda it: it.next() |
|
22 | next = lambda it: it.next() | |
23 |
|
23 | |||
24 |
|
24 | |||
25 | @make_cffi |
|
25 | @make_cffi | |
26 | class TestDecompressor_decompress(unittest.TestCase): |
|
26 | class TestDecompressor_decompress(unittest.TestCase): | |
27 | def test_empty_input(self): |
|
27 | def test_empty_input(self): | |
28 | dctx = zstd.ZstdDecompressor() |
|
28 | dctx = zstd.ZstdDecompressor() | |
29 |
|
29 | |||
30 | with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'): |
|
30 | with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'): | |
31 | dctx.decompress(b'') |
|
31 | dctx.decompress(b'') | |
32 |
|
32 | |||
33 | def test_invalid_input(self): |
|
33 | def test_invalid_input(self): | |
34 | dctx = zstd.ZstdDecompressor() |
|
34 | dctx = zstd.ZstdDecompressor() | |
35 |
|
35 | |||
36 | with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'): |
|
36 | with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'): | |
37 | dctx.decompress(b'foobar') |
|
37 | dctx.decompress(b'foobar') | |
38 |
|
38 | |||
39 | def test_no_content_size_in_frame(self): |
|
39 | def test_no_content_size_in_frame(self): | |
40 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
40 | cctx = zstd.ZstdCompressor(write_content_size=False) | |
41 | compressed = cctx.compress(b'foobar') |
|
41 | compressed = cctx.compress(b'foobar') | |
42 |
|
42 | |||
43 | dctx = zstd.ZstdDecompressor() |
|
43 | dctx = zstd.ZstdDecompressor() | |
44 | with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'): |
|
44 | with self.assertRaisesRegexp(zstd.ZstdError, 'input data invalid'): | |
45 | dctx.decompress(compressed) |
|
45 | dctx.decompress(compressed) | |
46 |
|
46 | |||
47 | def test_content_size_present(self): |
|
47 | def test_content_size_present(self): | |
48 | cctx = zstd.ZstdCompressor(write_content_size=True) |
|
48 | cctx = zstd.ZstdCompressor(write_content_size=True) | |
49 | compressed = cctx.compress(b'foobar') |
|
49 | compressed = cctx.compress(b'foobar') | |
50 |
|
50 | |||
51 | dctx = zstd.ZstdDecompressor() |
|
51 | dctx = zstd.ZstdDecompressor() | |
52 |
decompressed |
|
52 | decompressed = dctx.decompress(compressed) | |
53 | self.assertEqual(decompressed, b'foobar') |
|
53 | self.assertEqual(decompressed, b'foobar') | |
54 |
|
54 | |||
55 | def test_max_output_size(self): |
|
55 | def test_max_output_size(self): | |
56 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
56 | cctx = zstd.ZstdCompressor(write_content_size=False) | |
57 | source = b'foobar' * 256 |
|
57 | source = b'foobar' * 256 | |
58 | compressed = cctx.compress(source) |
|
58 | compressed = cctx.compress(source) | |
59 |
|
59 | |||
60 | dctx = zstd.ZstdDecompressor() |
|
60 | dctx = zstd.ZstdDecompressor() | |
61 | # Will fit into buffer exactly the size of input. |
|
61 | # Will fit into buffer exactly the size of input. | |
62 | decompressed = dctx.decompress(compressed, max_output_size=len(source)) |
|
62 | decompressed = dctx.decompress(compressed, max_output_size=len(source)) | |
63 | self.assertEqual(decompressed, source) |
|
63 | self.assertEqual(decompressed, source) | |
64 |
|
64 | |||
65 | # Input size - 1 fails |
|
65 | # Input size - 1 fails | |
66 | with self.assertRaisesRegexp(zstd.ZstdError, 'Destination buffer is too small'): |
|
66 | with self.assertRaisesRegexp(zstd.ZstdError, 'Destination buffer is too small'): | |
67 | dctx.decompress(compressed, max_output_size=len(source) - 1) |
|
67 | dctx.decompress(compressed, max_output_size=len(source) - 1) | |
68 |
|
68 | |||
69 | # Input size + 1 works |
|
69 | # Input size + 1 works | |
70 | decompressed = dctx.decompress(compressed, max_output_size=len(source) + 1) |
|
70 | decompressed = dctx.decompress(compressed, max_output_size=len(source) + 1) | |
71 | self.assertEqual(decompressed, source) |
|
71 | self.assertEqual(decompressed, source) | |
72 |
|
72 | |||
73 | # A much larger buffer works. |
|
73 | # A much larger buffer works. | |
74 | decompressed = dctx.decompress(compressed, max_output_size=len(source) * 64) |
|
74 | decompressed = dctx.decompress(compressed, max_output_size=len(source) * 64) | |
75 | self.assertEqual(decompressed, source) |
|
75 | self.assertEqual(decompressed, source) | |
76 |
|
76 | |||
77 | def test_stupidly_large_output_buffer(self): |
|
77 | def test_stupidly_large_output_buffer(self): | |
78 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
78 | cctx = zstd.ZstdCompressor(write_content_size=False) | |
79 | compressed = cctx.compress(b'foobar' * 256) |
|
79 | compressed = cctx.compress(b'foobar' * 256) | |
80 | dctx = zstd.ZstdDecompressor() |
|
80 | dctx = zstd.ZstdDecompressor() | |
81 |
|
81 | |||
82 | # Will get OverflowError on some Python distributions that can't |
|
82 | # Will get OverflowError on some Python distributions that can't | |
83 | # handle really large integers. |
|
83 | # handle really large integers. | |
84 | with self.assertRaises((MemoryError, OverflowError)): |
|
84 | with self.assertRaises((MemoryError, OverflowError)): | |
85 | dctx.decompress(compressed, max_output_size=2**62) |
|
85 | dctx.decompress(compressed, max_output_size=2**62) | |
86 |
|
86 | |||
87 | def test_dictionary(self): |
|
87 | def test_dictionary(self): | |
88 | samples = [] |
|
88 | samples = [] | |
89 | for i in range(128): |
|
89 | for i in range(128): | |
90 | samples.append(b'foo' * 64) |
|
90 | samples.append(b'foo' * 64) | |
91 | samples.append(b'bar' * 64) |
|
91 | samples.append(b'bar' * 64) | |
92 | samples.append(b'foobar' * 64) |
|
92 | samples.append(b'foobar' * 64) | |
93 |
|
93 | |||
94 | d = zstd.train_dictionary(8192, samples) |
|
94 | d = zstd.train_dictionary(8192, samples) | |
95 |
|
95 | |||
96 | orig = b'foobar' * 16384 |
|
96 | orig = b'foobar' * 16384 | |
97 | cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_content_size=True) |
|
97 | cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_content_size=True) | |
98 | compressed = cctx.compress(orig) |
|
98 | compressed = cctx.compress(orig) | |
99 |
|
99 | |||
100 | dctx = zstd.ZstdDecompressor(dict_data=d) |
|
100 | dctx = zstd.ZstdDecompressor(dict_data=d) | |
101 | decompressed = dctx.decompress(compressed) |
|
101 | decompressed = dctx.decompress(compressed) | |
102 |
|
102 | |||
103 | self.assertEqual(decompressed, orig) |
|
103 | self.assertEqual(decompressed, orig) | |
104 |
|
104 | |||
105 | def test_dictionary_multiple(self): |
|
105 | def test_dictionary_multiple(self): | |
106 | samples = [] |
|
106 | samples = [] | |
107 | for i in range(128): |
|
107 | for i in range(128): | |
108 | samples.append(b'foo' * 64) |
|
108 | samples.append(b'foo' * 64) | |
109 | samples.append(b'bar' * 64) |
|
109 | samples.append(b'bar' * 64) | |
110 | samples.append(b'foobar' * 64) |
|
110 | samples.append(b'foobar' * 64) | |
111 |
|
111 | |||
112 | d = zstd.train_dictionary(8192, samples) |
|
112 | d = zstd.train_dictionary(8192, samples) | |
113 |
|
113 | |||
114 | sources = (b'foobar' * 8192, b'foo' * 8192, b'bar' * 8192) |
|
114 | sources = (b'foobar' * 8192, b'foo' * 8192, b'bar' * 8192) | |
115 | compressed = [] |
|
115 | compressed = [] | |
116 | cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_content_size=True) |
|
116 | cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_content_size=True) | |
117 | for source in sources: |
|
117 | for source in sources: | |
118 | compressed.append(cctx.compress(source)) |
|
118 | compressed.append(cctx.compress(source)) | |
119 |
|
119 | |||
120 | dctx = zstd.ZstdDecompressor(dict_data=d) |
|
120 | dctx = zstd.ZstdDecompressor(dict_data=d) | |
121 | for i in range(len(sources)): |
|
121 | for i in range(len(sources)): | |
122 | decompressed = dctx.decompress(compressed[i]) |
|
122 | decompressed = dctx.decompress(compressed[i]) | |
123 | self.assertEqual(decompressed, sources[i]) |
|
123 | self.assertEqual(decompressed, sources[i]) | |
124 |
|
124 | |||
125 |
|
125 | |||
126 | @make_cffi |
|
126 | @make_cffi | |
127 | class TestDecompressor_copy_stream(unittest.TestCase): |
|
127 | class TestDecompressor_copy_stream(unittest.TestCase): | |
128 | def test_no_read(self): |
|
128 | def test_no_read(self): | |
129 | source = object() |
|
129 | source = object() | |
130 | dest = io.BytesIO() |
|
130 | dest = io.BytesIO() | |
131 |
|
131 | |||
132 | dctx = zstd.ZstdDecompressor() |
|
132 | dctx = zstd.ZstdDecompressor() | |
133 | with self.assertRaises(ValueError): |
|
133 | with self.assertRaises(ValueError): | |
134 | dctx.copy_stream(source, dest) |
|
134 | dctx.copy_stream(source, dest) | |
135 |
|
135 | |||
136 | def test_no_write(self): |
|
136 | def test_no_write(self): | |
137 | source = io.BytesIO() |
|
137 | source = io.BytesIO() | |
138 | dest = object() |
|
138 | dest = object() | |
139 |
|
139 | |||
140 | dctx = zstd.ZstdDecompressor() |
|
140 | dctx = zstd.ZstdDecompressor() | |
141 | with self.assertRaises(ValueError): |
|
141 | with self.assertRaises(ValueError): | |
142 | dctx.copy_stream(source, dest) |
|
142 | dctx.copy_stream(source, dest) | |
143 |
|
143 | |||
144 | def test_empty(self): |
|
144 | def test_empty(self): | |
145 | source = io.BytesIO() |
|
145 | source = io.BytesIO() | |
146 | dest = io.BytesIO() |
|
146 | dest = io.BytesIO() | |
147 |
|
147 | |||
148 | dctx = zstd.ZstdDecompressor() |
|
148 | dctx = zstd.ZstdDecompressor() | |
149 | # TODO should this raise an error? |
|
149 | # TODO should this raise an error? | |
150 | r, w = dctx.copy_stream(source, dest) |
|
150 | r, w = dctx.copy_stream(source, dest) | |
151 |
|
151 | |||
152 | self.assertEqual(r, 0) |
|
152 | self.assertEqual(r, 0) | |
153 | self.assertEqual(w, 0) |
|
153 | self.assertEqual(w, 0) | |
154 | self.assertEqual(dest.getvalue(), b'') |
|
154 | self.assertEqual(dest.getvalue(), b'') | |
155 |
|
155 | |||
156 | def test_large_data(self): |
|
156 | def test_large_data(self): | |
157 | source = io.BytesIO() |
|
157 | source = io.BytesIO() | |
158 | for i in range(255): |
|
158 | for i in range(255): | |
159 | source.write(struct.Struct('>B').pack(i) * 16384) |
|
159 | source.write(struct.Struct('>B').pack(i) * 16384) | |
160 | source.seek(0) |
|
160 | source.seek(0) | |
161 |
|
161 | |||
162 | compressed = io.BytesIO() |
|
162 | compressed = io.BytesIO() | |
163 | cctx = zstd.ZstdCompressor() |
|
163 | cctx = zstd.ZstdCompressor() | |
164 | cctx.copy_stream(source, compressed) |
|
164 | cctx.copy_stream(source, compressed) | |
165 |
|
165 | |||
166 | compressed.seek(0) |
|
166 | compressed.seek(0) | |
167 | dest = io.BytesIO() |
|
167 | dest = io.BytesIO() | |
168 | dctx = zstd.ZstdDecompressor() |
|
168 | dctx = zstd.ZstdDecompressor() | |
169 | r, w = dctx.copy_stream(compressed, dest) |
|
169 | r, w = dctx.copy_stream(compressed, dest) | |
170 |
|
170 | |||
171 | self.assertEqual(r, len(compressed.getvalue())) |
|
171 | self.assertEqual(r, len(compressed.getvalue())) | |
172 | self.assertEqual(w, len(source.getvalue())) |
|
172 | self.assertEqual(w, len(source.getvalue())) | |
173 |
|
173 | |||
174 | def test_read_write_size(self): |
|
174 | def test_read_write_size(self): | |
175 | source = OpCountingBytesIO(zstd.ZstdCompressor().compress( |
|
175 | source = OpCountingBytesIO(zstd.ZstdCompressor().compress( | |
176 | b'foobarfoobar')) |
|
176 | b'foobarfoobar')) | |
177 |
|
177 | |||
178 | dest = OpCountingBytesIO() |
|
178 | dest = OpCountingBytesIO() | |
179 | dctx = zstd.ZstdDecompressor() |
|
179 | dctx = zstd.ZstdDecompressor() | |
180 | r, w = dctx.copy_stream(source, dest, read_size=1, write_size=1) |
|
180 | r, w = dctx.copy_stream(source, dest, read_size=1, write_size=1) | |
181 |
|
181 | |||
182 | self.assertEqual(r, len(source.getvalue())) |
|
182 | self.assertEqual(r, len(source.getvalue())) | |
183 | self.assertEqual(w, len(b'foobarfoobar')) |
|
183 | self.assertEqual(w, len(b'foobarfoobar')) | |
184 | self.assertEqual(source._read_count, len(source.getvalue()) + 1) |
|
184 | self.assertEqual(source._read_count, len(source.getvalue()) + 1) | |
185 | self.assertEqual(dest._write_count, len(dest.getvalue())) |
|
185 | self.assertEqual(dest._write_count, len(dest.getvalue())) | |
186 |
|
186 | |||
187 |
|
187 | |||
188 | @make_cffi |
|
188 | @make_cffi | |
189 | class TestDecompressor_decompressobj(unittest.TestCase): |
|
189 | class TestDecompressor_decompressobj(unittest.TestCase): | |
190 | def test_simple(self): |
|
190 | def test_simple(self): | |
191 | data = zstd.ZstdCompressor(level=1).compress(b'foobar') |
|
191 | data = zstd.ZstdCompressor(level=1).compress(b'foobar') | |
192 |
|
192 | |||
193 | dctx = zstd.ZstdDecompressor() |
|
193 | dctx = zstd.ZstdDecompressor() | |
194 | dobj = dctx.decompressobj() |
|
194 | dobj = dctx.decompressobj() | |
195 | self.assertEqual(dobj.decompress(data), b'foobar') |
|
195 | self.assertEqual(dobj.decompress(data), b'foobar') | |
196 |
|
196 | |||
197 | def test_reuse(self): |
|
197 | def test_reuse(self): | |
198 | data = zstd.ZstdCompressor(level=1).compress(b'foobar') |
|
198 | data = zstd.ZstdCompressor(level=1).compress(b'foobar') | |
199 |
|
199 | |||
200 | dctx = zstd.ZstdDecompressor() |
|
200 | dctx = zstd.ZstdDecompressor() | |
201 | dobj = dctx.decompressobj() |
|
201 | dobj = dctx.decompressobj() | |
202 | dobj.decompress(data) |
|
202 | dobj.decompress(data) | |
203 |
|
203 | |||
204 | with self.assertRaisesRegexp(zstd.ZstdError, 'cannot use a decompressobj'): |
|
204 | with self.assertRaisesRegexp(zstd.ZstdError, 'cannot use a decompressobj'): | |
205 | dobj.decompress(data) |
|
205 | dobj.decompress(data) | |
206 |
|
206 | |||
207 |
|
207 | |||
208 | def decompress_via_writer(data): |
|
208 | def decompress_via_writer(data): | |
209 | buffer = io.BytesIO() |
|
209 | buffer = io.BytesIO() | |
210 | dctx = zstd.ZstdDecompressor() |
|
210 | dctx = zstd.ZstdDecompressor() | |
211 | with dctx.write_to(buffer) as decompressor: |
|
211 | with dctx.write_to(buffer) as decompressor: | |
212 | decompressor.write(data) |
|
212 | decompressor.write(data) | |
213 | return buffer.getvalue() |
|
213 | return buffer.getvalue() | |
214 |
|
214 | |||
215 |
|
215 | |||
216 | @make_cffi |
|
216 | @make_cffi | |
217 | class TestDecompressor_write_to(unittest.TestCase): |
|
217 | class TestDecompressor_write_to(unittest.TestCase): | |
218 | def test_empty_roundtrip(self): |
|
218 | def test_empty_roundtrip(self): | |
219 | cctx = zstd.ZstdCompressor() |
|
219 | cctx = zstd.ZstdCompressor() | |
220 | empty = cctx.compress(b'') |
|
220 | empty = cctx.compress(b'') | |
221 | self.assertEqual(decompress_via_writer(empty), b'') |
|
221 | self.assertEqual(decompress_via_writer(empty), b'') | |
222 |
|
222 | |||
223 | def test_large_roundtrip(self): |
|
223 | def test_large_roundtrip(self): | |
224 | chunks = [] |
|
224 | chunks = [] | |
225 | for i in range(255): |
|
225 | for i in range(255): | |
226 | chunks.append(struct.Struct('>B').pack(i) * 16384) |
|
226 | chunks.append(struct.Struct('>B').pack(i) * 16384) | |
227 | orig = b''.join(chunks) |
|
227 | orig = b''.join(chunks) | |
228 | cctx = zstd.ZstdCompressor() |
|
228 | cctx = zstd.ZstdCompressor() | |
229 | compressed = cctx.compress(orig) |
|
229 | compressed = cctx.compress(orig) | |
230 |
|
230 | |||
231 | self.assertEqual(decompress_via_writer(compressed), orig) |
|
231 | self.assertEqual(decompress_via_writer(compressed), orig) | |
232 |
|
232 | |||
233 | def test_multiple_calls(self): |
|
233 | def test_multiple_calls(self): | |
234 | chunks = [] |
|
234 | chunks = [] | |
235 | for i in range(255): |
|
235 | for i in range(255): | |
236 | for j in range(255): |
|
236 | for j in range(255): | |
237 | chunks.append(struct.Struct('>B').pack(j) * i) |
|
237 | chunks.append(struct.Struct('>B').pack(j) * i) | |
238 |
|
238 | |||
239 | orig = b''.join(chunks) |
|
239 | orig = b''.join(chunks) | |
240 | cctx = zstd.ZstdCompressor() |
|
240 | cctx = zstd.ZstdCompressor() | |
241 | compressed = cctx.compress(orig) |
|
241 | compressed = cctx.compress(orig) | |
242 |
|
242 | |||
243 | buffer = io.BytesIO() |
|
243 | buffer = io.BytesIO() | |
244 | dctx = zstd.ZstdDecompressor() |
|
244 | dctx = zstd.ZstdDecompressor() | |
245 | with dctx.write_to(buffer) as decompressor: |
|
245 | with dctx.write_to(buffer) as decompressor: | |
246 | pos = 0 |
|
246 | pos = 0 | |
247 | while pos < len(compressed): |
|
247 | while pos < len(compressed): | |
248 | pos2 = pos + 8192 |
|
248 | pos2 = pos + 8192 | |
249 | decompressor.write(compressed[pos:pos2]) |
|
249 | decompressor.write(compressed[pos:pos2]) | |
250 | pos += 8192 |
|
250 | pos += 8192 | |
251 | self.assertEqual(buffer.getvalue(), orig) |
|
251 | self.assertEqual(buffer.getvalue(), orig) | |
252 |
|
252 | |||
253 | def test_dictionary(self): |
|
253 | def test_dictionary(self): | |
254 | samples = [] |
|
254 | samples = [] | |
255 | for i in range(128): |
|
255 | for i in range(128): | |
256 | samples.append(b'foo' * 64) |
|
256 | samples.append(b'foo' * 64) | |
257 | samples.append(b'bar' * 64) |
|
257 | samples.append(b'bar' * 64) | |
258 | samples.append(b'foobar' * 64) |
|
258 | samples.append(b'foobar' * 64) | |
259 |
|
259 | |||
260 | d = zstd.train_dictionary(8192, samples) |
|
260 | d = zstd.train_dictionary(8192, samples) | |
261 |
|
261 | |||
262 | orig = b'foobar' * 16384 |
|
262 | orig = b'foobar' * 16384 | |
263 | buffer = io.BytesIO() |
|
263 | buffer = io.BytesIO() | |
264 | cctx = zstd.ZstdCompressor(dict_data=d) |
|
264 | cctx = zstd.ZstdCompressor(dict_data=d) | |
265 | with cctx.write_to(buffer) as compressor: |
|
265 | with cctx.write_to(buffer) as compressor: | |
266 | self.assertEqual(compressor.write(orig), 1544) |
|
266 | self.assertEqual(compressor.write(orig), 1544) | |
267 |
|
267 | |||
268 | compressed = buffer.getvalue() |
|
268 | compressed = buffer.getvalue() | |
269 | buffer = io.BytesIO() |
|
269 | buffer = io.BytesIO() | |
270 |
|
270 | |||
271 | dctx = zstd.ZstdDecompressor(dict_data=d) |
|
271 | dctx = zstd.ZstdDecompressor(dict_data=d) | |
272 | with dctx.write_to(buffer) as decompressor: |
|
272 | with dctx.write_to(buffer) as decompressor: | |
273 | self.assertEqual(decompressor.write(compressed), len(orig)) |
|
273 | self.assertEqual(decompressor.write(compressed), len(orig)) | |
274 |
|
274 | |||
275 | self.assertEqual(buffer.getvalue(), orig) |
|
275 | self.assertEqual(buffer.getvalue(), orig) | |
276 |
|
276 | |||
277 | def test_memory_size(self): |
|
277 | def test_memory_size(self): | |
278 | dctx = zstd.ZstdDecompressor() |
|
278 | dctx = zstd.ZstdDecompressor() | |
279 | buffer = io.BytesIO() |
|
279 | buffer = io.BytesIO() | |
280 | with dctx.write_to(buffer) as decompressor: |
|
280 | with dctx.write_to(buffer) as decompressor: | |
281 | size = decompressor.memory_size() |
|
281 | size = decompressor.memory_size() | |
282 |
|
282 | |||
283 | self.assertGreater(size, 100000) |
|
283 | self.assertGreater(size, 100000) | |
284 |
|
284 | |||
285 | def test_write_size(self): |
|
285 | def test_write_size(self): | |
286 | source = zstd.ZstdCompressor().compress(b'foobarfoobar') |
|
286 | source = zstd.ZstdCompressor().compress(b'foobarfoobar') | |
287 | dest = OpCountingBytesIO() |
|
287 | dest = OpCountingBytesIO() | |
288 | dctx = zstd.ZstdDecompressor() |
|
288 | dctx = zstd.ZstdDecompressor() | |
289 | with dctx.write_to(dest, write_size=1) as decompressor: |
|
289 | with dctx.write_to(dest, write_size=1) as decompressor: | |
290 | s = struct.Struct('>B') |
|
290 | s = struct.Struct('>B') | |
291 | for c in source: |
|
291 | for c in source: | |
292 | if not isinstance(c, str): |
|
292 | if not isinstance(c, str): | |
293 | c = s.pack(c) |
|
293 | c = s.pack(c) | |
294 | decompressor.write(c) |
|
294 | decompressor.write(c) | |
295 |
|
295 | |||
296 |
|
||||
297 | self.assertEqual(dest.getvalue(), b'foobarfoobar') |
|
296 | self.assertEqual(dest.getvalue(), b'foobarfoobar') | |
298 | self.assertEqual(dest._write_count, len(dest.getvalue())) |
|
297 | self.assertEqual(dest._write_count, len(dest.getvalue())) | |
299 |
|
298 | |||
300 |
|
299 | |||
301 | @make_cffi |
|
300 | @make_cffi | |
302 | class TestDecompressor_read_from(unittest.TestCase): |
|
301 | class TestDecompressor_read_from(unittest.TestCase): | |
303 | def test_type_validation(self): |
|
302 | def test_type_validation(self): | |
304 | dctx = zstd.ZstdDecompressor() |
|
303 | dctx = zstd.ZstdDecompressor() | |
305 |
|
304 | |||
306 | # Object with read() works. |
|
305 | # Object with read() works. | |
307 | dctx.read_from(io.BytesIO()) |
|
306 | dctx.read_from(io.BytesIO()) | |
308 |
|
307 | |||
309 | # Buffer protocol works. |
|
308 | # Buffer protocol works. | |
310 | dctx.read_from(b'foobar') |
|
309 | dctx.read_from(b'foobar') | |
311 |
|
310 | |||
312 | with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'): |
|
311 | with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'): | |
313 | b''.join(dctx.read_from(True)) |
|
312 | b''.join(dctx.read_from(True)) | |
314 |
|
313 | |||
315 | def test_empty_input(self): |
|
314 | def test_empty_input(self): | |
316 | dctx = zstd.ZstdDecompressor() |
|
315 | dctx = zstd.ZstdDecompressor() | |
317 |
|
316 | |||
318 | source = io.BytesIO() |
|
317 | source = io.BytesIO() | |
319 | it = dctx.read_from(source) |
|
318 | it = dctx.read_from(source) | |
320 | # TODO this is arguably wrong. Should get an error about missing frame foo. |
|
319 | # TODO this is arguably wrong. Should get an error about missing frame foo. | |
321 | with self.assertRaises(StopIteration): |
|
320 | with self.assertRaises(StopIteration): | |
322 | next(it) |
|
321 | next(it) | |
323 |
|
322 | |||
324 | it = dctx.read_from(b'') |
|
323 | it = dctx.read_from(b'') | |
325 | with self.assertRaises(StopIteration): |
|
324 | with self.assertRaises(StopIteration): | |
326 | next(it) |
|
325 | next(it) | |
327 |
|
326 | |||
328 | def test_invalid_input(self): |
|
327 | def test_invalid_input(self): | |
329 | dctx = zstd.ZstdDecompressor() |
|
328 | dctx = zstd.ZstdDecompressor() | |
330 |
|
329 | |||
331 | source = io.BytesIO(b'foobar') |
|
330 | source = io.BytesIO(b'foobar') | |
332 | it = dctx.read_from(source) |
|
331 | it = dctx.read_from(source) | |
333 | with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'): |
|
332 | with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'): | |
334 | next(it) |
|
333 | next(it) | |
335 |
|
334 | |||
336 | it = dctx.read_from(b'foobar') |
|
335 | it = dctx.read_from(b'foobar') | |
337 | with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'): |
|
336 | with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'): | |
338 | next(it) |
|
337 | next(it) | |
339 |
|
338 | |||
340 | def test_empty_roundtrip(self): |
|
339 | def test_empty_roundtrip(self): | |
341 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
340 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |
342 | empty = cctx.compress(b'') |
|
341 | empty = cctx.compress(b'') | |
343 |
|
342 | |||
344 | source = io.BytesIO(empty) |
|
343 | source = io.BytesIO(empty) | |
345 | source.seek(0) |
|
344 | source.seek(0) | |
346 |
|
345 | |||
347 | dctx = zstd.ZstdDecompressor() |
|
346 | dctx = zstd.ZstdDecompressor() | |
348 | it = dctx.read_from(source) |
|
347 | it = dctx.read_from(source) | |
349 |
|
348 | |||
350 | # No chunks should be emitted since there is no data. |
|
349 | # No chunks should be emitted since there is no data. | |
351 | with self.assertRaises(StopIteration): |
|
350 | with self.assertRaises(StopIteration): | |
352 | next(it) |
|
351 | next(it) | |
353 |
|
352 | |||
354 | # Again for good measure. |
|
353 | # Again for good measure. | |
355 | with self.assertRaises(StopIteration): |
|
354 | with self.assertRaises(StopIteration): | |
356 | next(it) |
|
355 | next(it) | |
357 |
|
356 | |||
358 | def test_skip_bytes_too_large(self): |
|
357 | def test_skip_bytes_too_large(self): | |
359 | dctx = zstd.ZstdDecompressor() |
|
358 | dctx = zstd.ZstdDecompressor() | |
360 |
|
359 | |||
361 | with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'): |
|
360 | with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'): | |
362 | b''.join(dctx.read_from(b'', skip_bytes=1, read_size=1)) |
|
361 | b''.join(dctx.read_from(b'', skip_bytes=1, read_size=1)) | |
363 |
|
362 | |||
364 | with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'): |
|
363 | with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'): | |
365 | b''.join(dctx.read_from(b'foobar', skip_bytes=10)) |
|
364 | b''.join(dctx.read_from(b'foobar', skip_bytes=10)) | |
366 |
|
365 | |||
367 | def test_skip_bytes(self): |
|
366 | def test_skip_bytes(self): | |
368 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
367 | cctx = zstd.ZstdCompressor(write_content_size=False) | |
369 | compressed = cctx.compress(b'foobar') |
|
368 | compressed = cctx.compress(b'foobar') | |
370 |
|
369 | |||
371 | dctx = zstd.ZstdDecompressor() |
|
370 | dctx = zstd.ZstdDecompressor() | |
372 | output = b''.join(dctx.read_from(b'hdr' + compressed, skip_bytes=3)) |
|
371 | output = b''.join(dctx.read_from(b'hdr' + compressed, skip_bytes=3)) | |
373 | self.assertEqual(output, b'foobar') |
|
372 | self.assertEqual(output, b'foobar') | |
374 |
|
373 | |||
375 | def test_large_output(self): |
|
374 | def test_large_output(self): | |
376 | source = io.BytesIO() |
|
375 | source = io.BytesIO() | |
377 | source.write(b'f' * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE) |
|
376 | source.write(b'f' * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE) | |
378 | source.write(b'o') |
|
377 | source.write(b'o') | |
379 | source.seek(0) |
|
378 | source.seek(0) | |
380 |
|
379 | |||
381 | cctx = zstd.ZstdCompressor(level=1) |
|
380 | cctx = zstd.ZstdCompressor(level=1) | |
382 | compressed = io.BytesIO(cctx.compress(source.getvalue())) |
|
381 | compressed = io.BytesIO(cctx.compress(source.getvalue())) | |
383 | compressed.seek(0) |
|
382 | compressed.seek(0) | |
384 |
|
383 | |||
385 | dctx = zstd.ZstdDecompressor() |
|
384 | dctx = zstd.ZstdDecompressor() | |
386 | it = dctx.read_from(compressed) |
|
385 | it = dctx.read_from(compressed) | |
387 |
|
386 | |||
388 | chunks = [] |
|
387 | chunks = [] | |
389 | chunks.append(next(it)) |
|
388 | chunks.append(next(it)) | |
390 | chunks.append(next(it)) |
|
389 | chunks.append(next(it)) | |
391 |
|
390 | |||
392 | with self.assertRaises(StopIteration): |
|
391 | with self.assertRaises(StopIteration): | |
393 | next(it) |
|
392 | next(it) | |
394 |
|
393 | |||
395 | decompressed = b''.join(chunks) |
|
394 | decompressed = b''.join(chunks) | |
396 | self.assertEqual(decompressed, source.getvalue()) |
|
395 | self.assertEqual(decompressed, source.getvalue()) | |
397 |
|
396 | |||
398 | # And again with buffer protocol. |
|
397 | # And again with buffer protocol. | |
399 | it = dctx.read_from(compressed.getvalue()) |
|
398 | it = dctx.read_from(compressed.getvalue()) | |
400 | chunks = [] |
|
399 | chunks = [] | |
401 | chunks.append(next(it)) |
|
400 | chunks.append(next(it)) | |
402 | chunks.append(next(it)) |
|
401 | chunks.append(next(it)) | |
403 |
|
402 | |||
404 | with self.assertRaises(StopIteration): |
|
403 | with self.assertRaises(StopIteration): | |
405 | next(it) |
|
404 | next(it) | |
406 |
|
405 | |||
407 | decompressed = b''.join(chunks) |
|
406 | decompressed = b''.join(chunks) | |
408 | self.assertEqual(decompressed, source.getvalue()) |
|
407 | self.assertEqual(decompressed, source.getvalue()) | |
409 |
|
408 | |||
410 | def test_large_input(self): |
|
409 | def test_large_input(self): | |
411 | bytes = list(struct.Struct('>B').pack(i) for i in range(256)) |
|
410 | bytes = list(struct.Struct('>B').pack(i) for i in range(256)) | |
412 | compressed = io.BytesIO() |
|
411 | compressed = io.BytesIO() | |
413 | input_size = 0 |
|
412 | input_size = 0 | |
414 | cctx = zstd.ZstdCompressor(level=1) |
|
413 | cctx = zstd.ZstdCompressor(level=1) | |
415 | with cctx.write_to(compressed) as compressor: |
|
414 | with cctx.write_to(compressed) as compressor: | |
416 | while True: |
|
415 | while True: | |
417 | compressor.write(random.choice(bytes)) |
|
416 | compressor.write(random.choice(bytes)) | |
418 | input_size += 1 |
|
417 | input_size += 1 | |
419 |
|
418 | |||
420 | have_compressed = len(compressed.getvalue()) > zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE |
|
419 | have_compressed = len(compressed.getvalue()) > zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE | |
421 | have_raw = input_size > zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE * 2 |
|
420 | have_raw = input_size > zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE * 2 | |
422 | if have_compressed and have_raw: |
|
421 | if have_compressed and have_raw: | |
423 | break |
|
422 | break | |
424 |
|
423 | |||
425 | compressed.seek(0) |
|
424 | compressed.seek(0) | |
426 | self.assertGreater(len(compressed.getvalue()), |
|
425 | self.assertGreater(len(compressed.getvalue()), | |
427 | zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE) |
|
426 | zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE) | |
428 |
|
427 | |||
429 | dctx = zstd.ZstdDecompressor() |
|
428 | dctx = zstd.ZstdDecompressor() | |
430 | it = dctx.read_from(compressed) |
|
429 | it = dctx.read_from(compressed) | |
431 |
|
430 | |||
432 | chunks = [] |
|
431 | chunks = [] | |
433 | chunks.append(next(it)) |
|
432 | chunks.append(next(it)) | |
434 | chunks.append(next(it)) |
|
433 | chunks.append(next(it)) | |
435 | chunks.append(next(it)) |
|
434 | chunks.append(next(it)) | |
436 |
|
435 | |||
437 | with self.assertRaises(StopIteration): |
|
436 | with self.assertRaises(StopIteration): | |
438 | next(it) |
|
437 | next(it) | |
439 |
|
438 | |||
440 | decompressed = b''.join(chunks) |
|
439 | decompressed = b''.join(chunks) | |
441 | self.assertEqual(len(decompressed), input_size) |
|
440 | self.assertEqual(len(decompressed), input_size) | |
442 |
|
441 | |||
443 | # And again with buffer protocol. |
|
442 | # And again with buffer protocol. | |
444 | it = dctx.read_from(compressed.getvalue()) |
|
443 | it = dctx.read_from(compressed.getvalue()) | |
445 |
|
444 | |||
446 | chunks = [] |
|
445 | chunks = [] | |
447 | chunks.append(next(it)) |
|
446 | chunks.append(next(it)) | |
448 | chunks.append(next(it)) |
|
447 | chunks.append(next(it)) | |
449 | chunks.append(next(it)) |
|
448 | chunks.append(next(it)) | |
450 |
|
449 | |||
451 | with self.assertRaises(StopIteration): |
|
450 | with self.assertRaises(StopIteration): | |
452 | next(it) |
|
451 | next(it) | |
453 |
|
452 | |||
454 | decompressed = b''.join(chunks) |
|
453 | decompressed = b''.join(chunks) | |
455 | self.assertEqual(len(decompressed), input_size) |
|
454 | self.assertEqual(len(decompressed), input_size) | |
456 |
|
455 | |||
457 | def test_interesting(self): |
|
456 | def test_interesting(self): | |
458 | # Found this edge case via fuzzing. |
|
457 | # Found this edge case via fuzzing. | |
459 | cctx = zstd.ZstdCompressor(level=1) |
|
458 | cctx = zstd.ZstdCompressor(level=1) | |
460 |
|
459 | |||
461 | source = io.BytesIO() |
|
460 | source = io.BytesIO() | |
462 |
|
461 | |||
463 | compressed = io.BytesIO() |
|
462 | compressed = io.BytesIO() | |
464 | with cctx.write_to(compressed) as compressor: |
|
463 | with cctx.write_to(compressed) as compressor: | |
465 | for i in range(256): |
|
464 | for i in range(256): | |
466 | chunk = b'\0' * 1024 |
|
465 | chunk = b'\0' * 1024 | |
467 | compressor.write(chunk) |
|
466 | compressor.write(chunk) | |
468 | source.write(chunk) |
|
467 | source.write(chunk) | |
469 |
|
468 | |||
470 | dctx = zstd.ZstdDecompressor() |
|
469 | dctx = zstd.ZstdDecompressor() | |
471 |
|
470 | |||
472 | simple = dctx.decompress(compressed.getvalue(), |
|
471 | simple = dctx.decompress(compressed.getvalue(), | |
473 | max_output_size=len(source.getvalue())) |
|
472 | max_output_size=len(source.getvalue())) | |
474 | self.assertEqual(simple, source.getvalue()) |
|
473 | self.assertEqual(simple, source.getvalue()) | |
475 |
|
474 | |||
476 | compressed.seek(0) |
|
475 | compressed.seek(0) | |
477 | streamed = b''.join(dctx.read_from(compressed)) |
|
476 | streamed = b''.join(dctx.read_from(compressed)) | |
478 | self.assertEqual(streamed, source.getvalue()) |
|
477 | self.assertEqual(streamed, source.getvalue()) | |
479 |
|
478 | |||
480 | def test_read_write_size(self): |
|
479 | def test_read_write_size(self): | |
481 | source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b'foobarfoobar')) |
|
480 | source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b'foobarfoobar')) | |
482 | dctx = zstd.ZstdDecompressor() |
|
481 | dctx = zstd.ZstdDecompressor() | |
483 | for chunk in dctx.read_from(source, read_size=1, write_size=1): |
|
482 | for chunk in dctx.read_from(source, read_size=1, write_size=1): | |
484 | self.assertEqual(len(chunk), 1) |
|
483 | self.assertEqual(len(chunk), 1) | |
485 |
|
484 | |||
486 | self.assertEqual(source._read_count, len(source.getvalue())) |
|
485 | self.assertEqual(source._read_count, len(source.getvalue())) | |
487 |
|
486 | |||
488 |
|
487 | |||
489 | @make_cffi |
|
488 | @make_cffi | |
490 | class TestDecompressor_content_dict_chain(unittest.TestCase): |
|
489 | class TestDecompressor_content_dict_chain(unittest.TestCase): | |
491 | def test_bad_inputs_simple(self): |
|
490 | def test_bad_inputs_simple(self): | |
492 | dctx = zstd.ZstdDecompressor() |
|
491 | dctx = zstd.ZstdDecompressor() | |
493 |
|
492 | |||
494 | with self.assertRaises(TypeError): |
|
493 | with self.assertRaises(TypeError): | |
495 | dctx.decompress_content_dict_chain(b'foo') |
|
494 | dctx.decompress_content_dict_chain(b'foo') | |
496 |
|
495 | |||
497 | with self.assertRaises(TypeError): |
|
496 | with self.assertRaises(TypeError): | |
498 | dctx.decompress_content_dict_chain((b'foo', b'bar')) |
|
497 | dctx.decompress_content_dict_chain((b'foo', b'bar')) | |
499 |
|
498 | |||
500 | with self.assertRaisesRegexp(ValueError, 'empty input chain'): |
|
499 | with self.assertRaisesRegexp(ValueError, 'empty input chain'): | |
501 | dctx.decompress_content_dict_chain([]) |
|
500 | dctx.decompress_content_dict_chain([]) | |
502 |
|
501 | |||
503 | with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'): |
|
502 | with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'): | |
504 | dctx.decompress_content_dict_chain([u'foo']) |
|
503 | dctx.decompress_content_dict_chain([u'foo']) | |
505 |
|
504 | |||
506 | with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'): |
|
505 | with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'): | |
507 | dctx.decompress_content_dict_chain([True]) |
|
506 | dctx.decompress_content_dict_chain([True]) | |
508 |
|
507 | |||
509 | with self.assertRaisesRegexp(ValueError, 'chunk 0 is too small to contain a zstd frame'): |
|
508 | with self.assertRaisesRegexp(ValueError, 'chunk 0 is too small to contain a zstd frame'): | |
510 | dctx.decompress_content_dict_chain([zstd.FRAME_HEADER]) |
|
509 | dctx.decompress_content_dict_chain([zstd.FRAME_HEADER]) | |
511 |
|
510 | |||
512 | with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'): |
|
511 | with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'): | |
513 | dctx.decompress_content_dict_chain([b'foo' * 8]) |
|
512 | dctx.decompress_content_dict_chain([b'foo' * 8]) | |
514 |
|
513 | |||
515 | no_size = zstd.ZstdCompressor().compress(b'foo' * 64) |
|
514 | no_size = zstd.ZstdCompressor().compress(b'foo' * 64) | |
516 |
|
515 | |||
517 | with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'): |
|
516 | with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'): | |
518 | dctx.decompress_content_dict_chain([no_size]) |
|
517 | dctx.decompress_content_dict_chain([no_size]) | |
519 |
|
518 | |||
520 | # Corrupt first frame. |
|
519 | # Corrupt first frame. | |
521 | frame = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64) |
|
520 | frame = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64) | |
522 | frame = frame[0:12] + frame[15:] |
|
521 | frame = frame[0:12] + frame[15:] | |
523 | with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 0'): |
|
522 | with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 0'): | |
524 | dctx.decompress_content_dict_chain([frame]) |
|
523 | dctx.decompress_content_dict_chain([frame]) | |
525 |
|
524 | |||
526 | def test_bad_subsequent_input(self): |
|
525 | def test_bad_subsequent_input(self): | |
527 | initial = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64) |
|
526 | initial = zstd.ZstdCompressor(write_content_size=True).compress(b'foo' * 64) | |
528 |
|
527 | |||
529 | dctx = zstd.ZstdDecompressor() |
|
528 | dctx = zstd.ZstdDecompressor() | |
530 |
|
529 | |||
531 | with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'): |
|
530 | with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'): | |
532 | dctx.decompress_content_dict_chain([initial, u'foo']) |
|
531 | dctx.decompress_content_dict_chain([initial, u'foo']) | |
533 |
|
532 | |||
534 | with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'): |
|
533 | with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'): | |
535 | dctx.decompress_content_dict_chain([initial, None]) |
|
534 | dctx.decompress_content_dict_chain([initial, None]) | |
536 |
|
535 | |||
537 | with self.assertRaisesRegexp(ValueError, 'chunk 1 is too small to contain a zstd frame'): |
|
536 | with self.assertRaisesRegexp(ValueError, 'chunk 1 is too small to contain a zstd frame'): | |
538 | dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER]) |
|
537 | dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER]) | |
539 |
|
538 | |||
540 | with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'): |
|
539 | with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'): | |
541 | dctx.decompress_content_dict_chain([initial, b'foo' * 8]) |
|
540 | dctx.decompress_content_dict_chain([initial, b'foo' * 8]) | |
542 |
|
541 | |||
543 | no_size = zstd.ZstdCompressor().compress(b'foo' * 64) |
|
542 | no_size = zstd.ZstdCompressor().compress(b'foo' * 64) | |
544 |
|
543 | |||
545 | with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'): |
|
544 | with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'): | |
546 | dctx.decompress_content_dict_chain([initial, no_size]) |
|
545 | dctx.decompress_content_dict_chain([initial, no_size]) | |
547 |
|
546 | |||
548 | # Corrupt second frame. |
|
547 | # Corrupt second frame. | |
549 | cctx = zstd.ZstdCompressor(write_content_size=True, dict_data=zstd.ZstdCompressionDict(b'foo' * 64)) |
|
548 | cctx = zstd.ZstdCompressor(write_content_size=True, dict_data=zstd.ZstdCompressionDict(b'foo' * 64)) | |
550 | frame = cctx.compress(b'bar' * 64) |
|
549 | frame = cctx.compress(b'bar' * 64) | |
551 | frame = frame[0:12] + frame[15:] |
|
550 | frame = frame[0:12] + frame[15:] | |
552 |
|
551 | |||
553 | with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 1'): |
|
552 | with self.assertRaisesRegexp(zstd.ZstdError, 'could not decompress chunk 1'): | |
554 | dctx.decompress_content_dict_chain([initial, frame]) |
|
553 | dctx.decompress_content_dict_chain([initial, frame]) | |
555 |
|
554 | |||
556 | def test_simple(self): |
|
555 | def test_simple(self): | |
557 | original = [ |
|
556 | original = [ | |
558 | b'foo' * 64, |
|
557 | b'foo' * 64, | |
559 | b'foobar' * 64, |
|
558 | b'foobar' * 64, | |
560 | b'baz' * 64, |
|
559 | b'baz' * 64, | |
561 | b'foobaz' * 64, |
|
560 | b'foobaz' * 64, | |
562 | b'foobarbaz' * 64, |
|
561 | b'foobarbaz' * 64, | |
563 | ] |
|
562 | ] | |
564 |
|
563 | |||
565 | chunks = [] |
|
564 | chunks = [] | |
566 | chunks.append(zstd.ZstdCompressor(write_content_size=True).compress(original[0])) |
|
565 | chunks.append(zstd.ZstdCompressor(write_content_size=True).compress(original[0])) | |
567 | for i, chunk in enumerate(original[1:]): |
|
566 | for i, chunk in enumerate(original[1:]): | |
568 | d = zstd.ZstdCompressionDict(original[i]) |
|
567 | d = zstd.ZstdCompressionDict(original[i]) | |
569 | cctx = zstd.ZstdCompressor(dict_data=d, write_content_size=True) |
|
568 | cctx = zstd.ZstdCompressor(dict_data=d, write_content_size=True) | |
570 | chunks.append(cctx.compress(chunk)) |
|
569 | chunks.append(cctx.compress(chunk)) | |
571 |
|
570 | |||
572 | for i in range(1, len(original)): |
|
571 | for i in range(1, len(original)): | |
573 | chain = chunks[0:i] |
|
572 | chain = chunks[0:i] | |
574 | expected = original[i - 1] |
|
573 | expected = original[i - 1] | |
575 | dctx = zstd.ZstdDecompressor() |
|
574 | dctx = zstd.ZstdDecompressor() | |
576 | decompressed = dctx.decompress_content_dict_chain(chain) |
|
575 | decompressed = dctx.decompress_content_dict_chain(chain) | |
577 | self.assertEqual(decompressed, expected) |
|
576 | self.assertEqual(decompressed, expected) | |
|
577 | ||||
|
578 | ||||
|
579 | # TODO enable for CFFI | |||
|
580 | class TestDecompressor_multi_decompress_to_buffer(unittest.TestCase): | |||
|
581 | def test_invalid_inputs(self): | |||
|
582 | dctx = zstd.ZstdDecompressor() | |||
|
583 | ||||
|
584 | with self.assertRaises(TypeError): | |||
|
585 | dctx.multi_decompress_to_buffer(True) | |||
|
586 | ||||
|
587 | with self.assertRaises(TypeError): | |||
|
588 | dctx.multi_decompress_to_buffer((1, 2)) | |||
|
589 | ||||
|
590 | with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'): | |||
|
591 | dctx.multi_decompress_to_buffer([u'foo']) | |||
|
592 | ||||
|
593 | with self.assertRaisesRegexp(ValueError, 'could not determine decompressed size of item 0'): | |||
|
594 | dctx.multi_decompress_to_buffer([b'foobarbaz']) | |||
|
595 | ||||
|
596 | def test_list_input(self): | |||
|
597 | cctx = zstd.ZstdCompressor(write_content_size=True) | |||
|
598 | ||||
|
599 | original = [b'foo' * 4, b'bar' * 6] | |||
|
600 | frames = [cctx.compress(d) for d in original] | |||
|
601 | ||||
|
602 | dctx = zstd.ZstdDecompressor() | |||
|
603 | result = dctx.multi_decompress_to_buffer(frames) | |||
|
604 | ||||
|
605 | self.assertEqual(len(result), len(frames)) | |||
|
606 | self.assertEqual(result.size(), sum(map(len, original))) | |||
|
607 | ||||
|
608 | for i, data in enumerate(original): | |||
|
609 | self.assertEqual(result[i].tobytes(), data) | |||
|
610 | ||||
|
611 | self.assertEqual(result[0].offset, 0) | |||
|
612 | self.assertEqual(len(result[0]), 12) | |||
|
613 | self.assertEqual(result[1].offset, 12) | |||
|
614 | self.assertEqual(len(result[1]), 18) | |||
|
615 | ||||
|
616 | def test_list_input_frame_sizes(self): | |||
|
617 | cctx = zstd.ZstdCompressor(write_content_size=False) | |||
|
618 | ||||
|
619 | original = [b'foo' * 4, b'bar' * 6, b'baz' * 8] | |||
|
620 | frames = [cctx.compress(d) for d in original] | |||
|
621 | sizes = struct.pack('=' + 'Q' * len(original), *map(len, original)) | |||
|
622 | ||||
|
623 | dctx = zstd.ZstdDecompressor() | |||
|
624 | result = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes) | |||
|
625 | ||||
|
626 | self.assertEqual(len(result), len(frames)) | |||
|
627 | self.assertEqual(result.size(), sum(map(len, original))) | |||
|
628 | ||||
|
629 | for i, data in enumerate(original): | |||
|
630 | self.assertEqual(result[i].tobytes(), data) | |||
|
631 | ||||
|
632 | def test_buffer_with_segments_input(self): | |||
|
633 | cctx = zstd.ZstdCompressor(write_content_size=True) | |||
|
634 | ||||
|
635 | original = [b'foo' * 4, b'bar' * 6] | |||
|
636 | frames = [cctx.compress(d) for d in original] | |||
|
637 | ||||
|
638 | dctx = zstd.ZstdDecompressor() | |||
|
639 | ||||
|
640 | segments = struct.pack('=QQQQ', 0, len(frames[0]), len(frames[0]), len(frames[1])) | |||
|
641 | b = zstd.BufferWithSegments(b''.join(frames), segments) | |||
|
642 | ||||
|
643 | result = dctx.multi_decompress_to_buffer(b) | |||
|
644 | ||||
|
645 | self.assertEqual(len(result), len(frames)) | |||
|
646 | self.assertEqual(result[0].offset, 0) | |||
|
647 | self.assertEqual(len(result[0]), 12) | |||
|
648 | self.assertEqual(result[1].offset, 12) | |||
|
649 | self.assertEqual(len(result[1]), 18) | |||
|
650 | ||||
|
651 | def test_buffer_with_segments_sizes(self): | |||
|
652 | cctx = zstd.ZstdCompressor(write_content_size=False) | |||
|
653 | original = [b'foo' * 4, b'bar' * 6, b'baz' * 8] | |||
|
654 | frames = [cctx.compress(d) for d in original] | |||
|
655 | sizes = struct.pack('=' + 'Q' * len(original), *map(len, original)) | |||
|
656 | ||||
|
657 | segments = struct.pack('=QQQQQQ', 0, len(frames[0]), | |||
|
658 | len(frames[0]), len(frames[1]), | |||
|
659 | len(frames[0]) + len(frames[1]), len(frames[2])) | |||
|
660 | b = zstd.BufferWithSegments(b''.join(frames), segments) | |||
|
661 | ||||
|
662 | dctx = zstd.ZstdDecompressor() | |||
|
663 | result = dctx.multi_decompress_to_buffer(b, decompressed_sizes=sizes) | |||
|
664 | ||||
|
665 | self.assertEqual(len(result), len(frames)) | |||
|
666 | self.assertEqual(result.size(), sum(map(len, original))) | |||
|
667 | ||||
|
668 | for i, data in enumerate(original): | |||
|
669 | self.assertEqual(result[i].tobytes(), data) | |||
|
670 | ||||
|
671 | def test_buffer_with_segments_collection_input(self): | |||
|
672 | cctx = zstd.ZstdCompressor(write_content_size=True) | |||
|
673 | ||||
|
674 | original = [ | |||
|
675 | b'foo0' * 2, | |||
|
676 | b'foo1' * 3, | |||
|
677 | b'foo2' * 4, | |||
|
678 | b'foo3' * 5, | |||
|
679 | b'foo4' * 6, | |||
|
680 | ] | |||
|
681 | ||||
|
682 | frames = cctx.multi_compress_to_buffer(original) | |||
|
683 | ||||
|
684 | # Check round trip. | |||
|
685 | dctx = zstd.ZstdDecompressor() | |||
|
686 | decompressed = dctx.multi_decompress_to_buffer(frames, threads=3) | |||
|
687 | ||||
|
688 | self.assertEqual(len(decompressed), len(original)) | |||
|
689 | ||||
|
690 | for i, data in enumerate(original): | |||
|
691 | self.assertEqual(data, decompressed[i].tobytes()) | |||
|
692 | ||||
|
693 | # And a manual mode. | |||
|
694 | b = b''.join([frames[0].tobytes(), frames[1].tobytes()]) | |||
|
695 | b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ', | |||
|
696 | 0, len(frames[0]), | |||
|
697 | len(frames[0]), len(frames[1]))) | |||
|
698 | ||||
|
699 | b = b''.join([frames[2].tobytes(), frames[3].tobytes(), frames[4].tobytes()]) | |||
|
700 | b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ', | |||
|
701 | 0, len(frames[2]), | |||
|
702 | len(frames[2]), len(frames[3]), | |||
|
703 | len(frames[2]) + len(frames[3]), len(frames[4]))) | |||
|
704 | ||||
|
705 | c = zstd.BufferWithSegmentsCollection(b1, b2) | |||
|
706 | ||||
|
707 | dctx = zstd.ZstdDecompressor() | |||
|
708 | decompressed = dctx.multi_decompress_to_buffer(c) | |||
|
709 | ||||
|
710 | self.assertEqual(len(decompressed), 5) | |||
|
711 | for i in range(5): | |||
|
712 | self.assertEqual(decompressed[i].tobytes(), original[i]) | |||
|
713 | ||||
|
714 | def test_multiple_threads(self): | |||
|
715 | cctx = zstd.ZstdCompressor(write_content_size=True) | |||
|
716 | ||||
|
717 | frames = [] | |||
|
718 | frames.extend(cctx.compress(b'x' * 64) for i in range(256)) | |||
|
719 | frames.extend(cctx.compress(b'y' * 64) for i in range(256)) | |||
|
720 | ||||
|
721 | dctx = zstd.ZstdDecompressor() | |||
|
722 | result = dctx.multi_decompress_to_buffer(frames, threads=-1) | |||
|
723 | ||||
|
724 | self.assertEqual(len(result), len(frames)) | |||
|
725 | self.assertEqual(result.size(), 2 * 64 * 256) | |||
|
726 | self.assertEqual(result[0].tobytes(), b'x' * 64) | |||
|
727 | self.assertEqual(result[256].tobytes(), b'y' * 64) | |||
|
728 | ||||
|
729 | def test_item_failure(self): | |||
|
730 | cctx = zstd.ZstdCompressor(write_content_size=True) | |||
|
731 | frames = [cctx.compress(b'x' * 128), cctx.compress(b'y' * 128)] | |||
|
732 | ||||
|
733 | frames[1] = frames[1] + b'extra' | |||
|
734 | ||||
|
735 | dctx = zstd.ZstdDecompressor() | |||
|
736 | ||||
|
737 | with self.assertRaisesRegexp(zstd.ZstdError, 'error decompressing item 1: Src size incorrect'): | |||
|
738 | dctx.multi_decompress_to_buffer(frames) | |||
|
739 | ||||
|
740 | with self.assertRaisesRegexp(zstd.ZstdError, 'error decompressing item 1: Src size incorrect'): | |||
|
741 | dctx.multi_decompress_to_buffer(frames, threads=2) |
@@ -1,50 +1,110 | |||||
1 | import sys |
|
1 | import sys | |
2 |
|
2 | |||
3 | try: |
|
3 | try: | |
4 | import unittest2 as unittest |
|
4 | import unittest2 as unittest | |
5 | except ImportError: |
|
5 | except ImportError: | |
6 | import unittest |
|
6 | import unittest | |
7 |
|
7 | |||
8 | import zstd |
|
8 | import zstd | |
9 |
|
9 | |||
10 | from . common import ( |
|
10 | from . common import ( | |
11 | make_cffi, |
|
11 | make_cffi, | |
12 | ) |
|
12 | ) | |
13 |
|
13 | |||
14 | if sys.version_info[0] >= 3: |
|
14 | if sys.version_info[0] >= 3: | |
15 | int_type = int |
|
15 | int_type = int | |
16 | else: |
|
16 | else: | |
17 | int_type = long |
|
17 | int_type = long | |
18 |
|
18 | |||
19 |
|
19 | |||
20 | @make_cffi |
|
20 | @make_cffi | |
21 | class TestTrainDictionary(unittest.TestCase): |
|
21 | class TestTrainDictionary(unittest.TestCase): | |
22 | def test_no_args(self): |
|
22 | def test_no_args(self): | |
23 | with self.assertRaises(TypeError): |
|
23 | with self.assertRaises(TypeError): | |
24 | zstd.train_dictionary() |
|
24 | zstd.train_dictionary() | |
25 |
|
25 | |||
26 | def test_bad_args(self): |
|
26 | def test_bad_args(self): | |
27 | with self.assertRaises(TypeError): |
|
27 | with self.assertRaises(TypeError): | |
28 | zstd.train_dictionary(8192, u'foo') |
|
28 | zstd.train_dictionary(8192, u'foo') | |
29 |
|
29 | |||
30 | with self.assertRaises(ValueError): |
|
30 | with self.assertRaises(ValueError): | |
31 | zstd.train_dictionary(8192, [u'foo']) |
|
31 | zstd.train_dictionary(8192, [u'foo']) | |
32 |
|
32 | |||
33 | def test_basic(self): |
|
33 | def test_basic(self): | |
34 | samples = [] |
|
34 | samples = [] | |
35 | for i in range(128): |
|
35 | for i in range(128): | |
36 | samples.append(b'foo' * 64) |
|
36 | samples.append(b'foo' * 64) | |
37 | samples.append(b'bar' * 64) |
|
37 | samples.append(b'bar' * 64) | |
38 | samples.append(b'foobar' * 64) |
|
38 | samples.append(b'foobar' * 64) | |
39 | samples.append(b'baz' * 64) |
|
39 | samples.append(b'baz' * 64) | |
40 | samples.append(b'foobaz' * 64) |
|
40 | samples.append(b'foobaz' * 64) | |
41 | samples.append(b'bazfoo' * 64) |
|
41 | samples.append(b'bazfoo' * 64) | |
42 |
|
42 | |||
43 | d = zstd.train_dictionary(8192, samples) |
|
43 | d = zstd.train_dictionary(8192, samples) | |
44 | self.assertLessEqual(len(d), 8192) |
|
44 | self.assertLessEqual(len(d), 8192) | |
45 |
|
45 | |||
46 | dict_id = d.dict_id() |
|
46 | dict_id = d.dict_id() | |
47 | self.assertIsInstance(dict_id, int_type) |
|
47 | self.assertIsInstance(dict_id, int_type) | |
48 |
|
48 | |||
49 | data = d.as_bytes() |
|
49 | data = d.as_bytes() | |
50 | self.assertEqual(data[0:4], b'\x37\xa4\x30\xec') |
|
50 | self.assertEqual(data[0:4], b'\x37\xa4\x30\xec') | |
|
51 | ||||
|
52 | def test_set_dict_id(self): | |||
|
53 | samples = [] | |||
|
54 | for i in range(128): | |||
|
55 | samples.append(b'foo' * 64) | |||
|
56 | samples.append(b'foobar' * 64) | |||
|
57 | ||||
|
58 | d = zstd.train_dictionary(8192, samples, dict_id=42) | |||
|
59 | self.assertEqual(d.dict_id(), 42) | |||
|
60 | ||||
|
61 | ||||
|
62 | @make_cffi | |||
|
63 | class TestTrainCoverDictionary(unittest.TestCase): | |||
|
64 | def test_no_args(self): | |||
|
65 | with self.assertRaises(TypeError): | |||
|
66 | zstd.train_cover_dictionary() | |||
|
67 | ||||
|
68 | def test_bad_args(self): | |||
|
69 | with self.assertRaises(TypeError): | |||
|
70 | zstd.train_cover_dictionary(8192, u'foo') | |||
|
71 | ||||
|
72 | with self.assertRaises(ValueError): | |||
|
73 | zstd.train_cover_dictionary(8192, [u'foo']) | |||
|
74 | ||||
|
75 | def test_basic(self): | |||
|
76 | samples = [] | |||
|
77 | for i in range(128): | |||
|
78 | samples.append(b'foo' * 64) | |||
|
79 | samples.append(b'foobar' * 64) | |||
|
80 | ||||
|
81 | d = zstd.train_cover_dictionary(8192, samples, k=64, d=16) | |||
|
82 | self.assertIsInstance(d.dict_id(), int_type) | |||
|
83 | ||||
|
84 | data = d.as_bytes() | |||
|
85 | self.assertEqual(data[0:4], b'\x37\xa4\x30\xec') | |||
|
86 | ||||
|
87 | self.assertEqual(d.k, 64) | |||
|
88 | self.assertEqual(d.d, 16) | |||
|
89 | ||||
|
90 | def test_set_dict_id(self): | |||
|
91 | samples = [] | |||
|
92 | for i in range(128): | |||
|
93 | samples.append(b'foo' * 64) | |||
|
94 | samples.append(b'foobar' * 64) | |||
|
95 | ||||
|
96 | d = zstd.train_cover_dictionary(8192, samples, k=64, d=16, | |||
|
97 | dict_id=42) | |||
|
98 | self.assertEqual(d.dict_id(), 42) | |||
|
99 | ||||
|
100 | def test_optimize(self): | |||
|
101 | samples = [] | |||
|
102 | for i in range(128): | |||
|
103 | samples.append(b'foo' * 64) | |||
|
104 | samples.append(b'foobar' * 64) | |||
|
105 | ||||
|
106 | d = zstd.train_cover_dictionary(8192, samples, optimize=True, | |||
|
107 | threads=-1, steps=1, d=16) | |||
|
108 | ||||
|
109 | self.assertEqual(d.k, 16) | |||
|
110 | self.assertEqual(d.d, 16) |
@@ -1,145 +1,210 | |||||
1 | /** |
|
1 | /** | |
2 | * Copyright (c) 2016-present, Gregory Szorc |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
3 | * All rights reserved. |
|
3 | * All rights reserved. | |
4 | * |
|
4 | * | |
5 | * This software may be modified and distributed under the terms |
|
5 | * This software may be modified and distributed under the terms | |
6 | * of the BSD license. See the LICENSE file for details. |
|
6 | * of the BSD license. See the LICENSE file for details. | |
7 | */ |
|
7 | */ | |
8 |
|
8 | |||
9 | /* A Python C extension for Zstandard. */ |
|
9 | /* A Python C extension for Zstandard. */ | |
10 |
|
10 | |||
|
11 | #if defined(_WIN32) | |||
|
12 | #define WIN32_LEAN_AND_MEAN | |||
|
13 | #include <Windows.h> | |||
|
14 | #endif | |||
|
15 | ||||
11 | #include "python-zstandard.h" |
|
16 | #include "python-zstandard.h" | |
12 |
|
17 | |||
13 | PyObject *ZstdError; |
|
18 | PyObject *ZstdError; | |
14 |
|
19 | |||
15 | PyDoc_STRVAR(estimate_compression_context_size__doc__, |
|
20 | PyDoc_STRVAR(estimate_compression_context_size__doc__, | |
16 | "estimate_compression_context_size(compression_parameters)\n" |
|
21 | "estimate_compression_context_size(compression_parameters)\n" | |
17 | "\n" |
|
22 | "\n" | |
18 | "Give the amount of memory allocated for a compression context given a\n" |
|
23 | "Give the amount of memory allocated for a compression context given a\n" | |
19 | "CompressionParameters instance"); |
|
24 | "CompressionParameters instance"); | |
20 |
|
25 | |||
21 | PyDoc_STRVAR(estimate_decompression_context_size__doc__, |
|
26 | PyDoc_STRVAR(estimate_decompression_context_size__doc__, | |
22 | "estimate_decompression_context_size()\n" |
|
27 | "estimate_decompression_context_size()\n" | |
23 | "\n" |
|
28 | "\n" | |
24 | "Estimate the amount of memory allocated to a decompression context.\n" |
|
29 | "Estimate the amount of memory allocated to a decompression context.\n" | |
25 | ); |
|
30 | ); | |
26 |
|
31 | |||
27 | static PyObject* estimate_decompression_context_size(PyObject* self) { |
|
32 | static PyObject* estimate_decompression_context_size(PyObject* self) { | |
28 | return PyLong_FromSize_t(ZSTD_estimateDCtxSize()); |
|
33 | return PyLong_FromSize_t(ZSTD_estimateDCtxSize()); | |
29 | } |
|
34 | } | |
30 |
|
35 | |||
31 | PyDoc_STRVAR(get_compression_parameters__doc__, |
|
36 | PyDoc_STRVAR(get_compression_parameters__doc__, | |
32 | "get_compression_parameters(compression_level[, source_size[, dict_size]])\n" |
|
37 | "get_compression_parameters(compression_level[, source_size[, dict_size]])\n" | |
33 | "\n" |
|
38 | "\n" | |
34 | "Obtains a ``CompressionParameters`` instance from a compression level and\n" |
|
39 | "Obtains a ``CompressionParameters`` instance from a compression level and\n" | |
35 | "optional input size and dictionary size"); |
|
40 | "optional input size and dictionary size"); | |
36 |
|
41 | |||
37 | PyDoc_STRVAR(get_frame_parameters__doc__, |
|
42 | PyDoc_STRVAR(get_frame_parameters__doc__, | |
38 | "get_frame_parameters(data)\n" |
|
43 | "get_frame_parameters(data)\n" | |
39 | "\n" |
|
44 | "\n" | |
40 | "Obtains a ``FrameParameters`` instance by parsing data.\n"); |
|
45 | "Obtains a ``FrameParameters`` instance by parsing data.\n"); | |
41 |
|
46 | |||
42 | PyDoc_STRVAR(train_dictionary__doc__, |
|
47 | PyDoc_STRVAR(train_dictionary__doc__, | |
43 | "train_dictionary(dict_size, samples)\n" |
|
48 | "train_dictionary(dict_size, samples)\n" | |
44 | "\n" |
|
49 | "\n" | |
45 | "Train a dictionary from sample data.\n" |
|
50 | "Train a dictionary from sample data.\n" | |
46 | "\n" |
|
51 | "\n" | |
47 | "A compression dictionary of size ``dict_size`` will be created from the\n" |
|
52 | "A compression dictionary of size ``dict_size`` will be created from the\n" | |
48 | "iterable of samples provided by ``samples``.\n" |
|
53 | "iterable of samples provided by ``samples``.\n" | |
49 | "\n" |
|
54 | "\n" | |
50 | "The raw dictionary content will be returned\n"); |
|
55 | "The raw dictionary content will be returned\n"); | |
51 |
|
56 | |||
|
57 | PyDoc_STRVAR(train_cover_dictionary__doc__, | |||
|
58 | "train_cover_dictionary(dict_size, samples, k=None, d=None, notifications=0, dict_id=0, level=0)\n" | |||
|
59 | "\n" | |||
|
60 | "Train a dictionary from sample data using the COVER algorithm.\n" | |||
|
61 | "\n" | |||
|
62 | "This behaves like ``train_dictionary()`` except a different algorithm is\n" | |||
|
63 | "used to create the dictionary. The algorithm has 2 parameters: ``k`` and\n" | |||
|
64 | "``d``. These control the *segment size* and *dmer size*. A reasonable range\n" | |||
|
65 | "for ``k`` is ``[16, 2048+]``. A reasonable range for ``d`` is ``[6, 16]``.\n" | |||
|
66 | "``d`` must be less than or equal to ``k``.\n" | |||
|
67 | ); | |||
|
68 | ||||
52 | static char zstd_doc[] = "Interface to zstandard"; |
|
69 | static char zstd_doc[] = "Interface to zstandard"; | |
53 |
|
70 | |||
54 | static PyMethodDef zstd_methods[] = { |
|
71 | static PyMethodDef zstd_methods[] = { | |
|
72 | /* TODO remove since it is a method on CompressionParameters. */ | |||
55 | { "estimate_compression_context_size", (PyCFunction)estimate_compression_context_size, |
|
73 | { "estimate_compression_context_size", (PyCFunction)estimate_compression_context_size, | |
56 | METH_VARARGS, estimate_compression_context_size__doc__ }, |
|
74 | METH_VARARGS, estimate_compression_context_size__doc__ }, | |
57 | { "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size, |
|
75 | { "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size, | |
58 | METH_NOARGS, estimate_decompression_context_size__doc__ }, |
|
76 | METH_NOARGS, estimate_decompression_context_size__doc__ }, | |
59 | { "get_compression_parameters", (PyCFunction)get_compression_parameters, |
|
77 | { "get_compression_parameters", (PyCFunction)get_compression_parameters, | |
60 | METH_VARARGS, get_compression_parameters__doc__ }, |
|
78 | METH_VARARGS, get_compression_parameters__doc__ }, | |
61 | { "get_frame_parameters", (PyCFunction)get_frame_parameters, |
|
79 | { "get_frame_parameters", (PyCFunction)get_frame_parameters, | |
62 | METH_VARARGS, get_frame_parameters__doc__ }, |
|
80 | METH_VARARGS, get_frame_parameters__doc__ }, | |
63 | { "train_dictionary", (PyCFunction)train_dictionary, |
|
81 | { "train_dictionary", (PyCFunction)train_dictionary, | |
64 | METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ }, |
|
82 | METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ }, | |
|
83 | { "train_cover_dictionary", (PyCFunction)train_cover_dictionary, | |||
|
84 | METH_VARARGS | METH_KEYWORDS, train_cover_dictionary__doc__ }, | |||
65 | { NULL, NULL } |
|
85 | { NULL, NULL } | |
66 | }; |
|
86 | }; | |
67 |
|
87 | |||
|
88 | void bufferutil_module_init(PyObject* mod); | |||
68 | void compressobj_module_init(PyObject* mod); |
|
89 | void compressobj_module_init(PyObject* mod); | |
69 | void compressor_module_init(PyObject* mod); |
|
90 | void compressor_module_init(PyObject* mod); | |
70 | void compressionparams_module_init(PyObject* mod); |
|
91 | void compressionparams_module_init(PyObject* mod); | |
71 | void constants_module_init(PyObject* mod); |
|
92 | void constants_module_init(PyObject* mod); | |
72 | void dictparams_module_init(PyObject* mod); |
|
|||
73 | void compressiondict_module_init(PyObject* mod); |
|
93 | void compressiondict_module_init(PyObject* mod); | |
74 | void compressionwriter_module_init(PyObject* mod); |
|
94 | void compressionwriter_module_init(PyObject* mod); | |
75 | void compressoriterator_module_init(PyObject* mod); |
|
95 | void compressoriterator_module_init(PyObject* mod); | |
76 | void decompressor_module_init(PyObject* mod); |
|
96 | void decompressor_module_init(PyObject* mod); | |
77 | void decompressobj_module_init(PyObject* mod); |
|
97 | void decompressobj_module_init(PyObject* mod); | |
78 | void decompressionwriter_module_init(PyObject* mod); |
|
98 | void decompressionwriter_module_init(PyObject* mod); | |
79 | void decompressoriterator_module_init(PyObject* mod); |
|
99 | void decompressoriterator_module_init(PyObject* mod); | |
80 | void frameparams_module_init(PyObject* mod); |
|
100 | void frameparams_module_init(PyObject* mod); | |
81 |
|
101 | |||
82 | void zstd_module_init(PyObject* m) { |
|
102 | void zstd_module_init(PyObject* m) { | |
83 | /* python-zstandard relies on unstable zstd C API features. This means |
|
103 | /* python-zstandard relies on unstable zstd C API features. This means | |
84 | that changes in zstd may break expectations in python-zstandard. |
|
104 | that changes in zstd may break expectations in python-zstandard. | |
85 |
|
105 | |||
86 | python-zstandard is distributed with a copy of the zstd sources. |
|
106 | python-zstandard is distributed with a copy of the zstd sources. | |
87 | python-zstandard is only guaranteed to work with the bundled version |
|
107 | python-zstandard is only guaranteed to work with the bundled version | |
88 | of zstd. |
|
108 | of zstd. | |
89 |
|
109 | |||
90 | However, downstream redistributors or packagers may unbundle zstd |
|
110 | However, downstream redistributors or packagers may unbundle zstd | |
91 | from python-zstandard. This can result in a mismatch between zstd |
|
111 | from python-zstandard. This can result in a mismatch between zstd | |
92 | versions and API semantics. This essentially "voids the warranty" |
|
112 | versions and API semantics. This essentially "voids the warranty" | |
93 | of python-zstandard and may cause undefined behavior. |
|
113 | of python-zstandard and may cause undefined behavior. | |
94 |
|
114 | |||
95 | We detect this mismatch here and refuse to load the module if this |
|
115 | We detect this mismatch here and refuse to load the module if this | |
96 | scenario is detected. |
|
116 | scenario is detected. | |
97 | */ |
|
117 | */ | |
98 | if (ZSTD_VERSION_NUMBER != 10103 || ZSTD_versionNumber() != 10103) { |
|
118 | if (ZSTD_VERSION_NUMBER != 10103 || ZSTD_versionNumber() != 10103) { | |
99 | PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version"); |
|
119 | PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version"); | |
100 | return; |
|
120 | return; | |
101 | } |
|
121 | } | |
102 |
|
122 | |||
|
123 | bufferutil_module_init(m); | |||
103 | compressionparams_module_init(m); |
|
124 | compressionparams_module_init(m); | |
104 | dictparams_module_init(m); |
|
|||
105 | compressiondict_module_init(m); |
|
125 | compressiondict_module_init(m); | |
106 | compressobj_module_init(m); |
|
126 | compressobj_module_init(m); | |
107 | compressor_module_init(m); |
|
127 | compressor_module_init(m); | |
108 | compressionwriter_module_init(m); |
|
128 | compressionwriter_module_init(m); | |
109 | compressoriterator_module_init(m); |
|
129 | compressoriterator_module_init(m); | |
110 | constants_module_init(m); |
|
130 | constants_module_init(m); | |
111 | decompressor_module_init(m); |
|
131 | decompressor_module_init(m); | |
112 | decompressobj_module_init(m); |
|
132 | decompressobj_module_init(m); | |
113 | decompressionwriter_module_init(m); |
|
133 | decompressionwriter_module_init(m); | |
114 | decompressoriterator_module_init(m); |
|
134 | decompressoriterator_module_init(m); | |
115 | frameparams_module_init(m); |
|
135 | frameparams_module_init(m); | |
116 | } |
|
136 | } | |
117 |
|
137 | |||
118 | #if PY_MAJOR_VERSION >= 3 |
|
138 | #if PY_MAJOR_VERSION >= 3 | |
119 | static struct PyModuleDef zstd_module = { |
|
139 | static struct PyModuleDef zstd_module = { | |
120 | PyModuleDef_HEAD_INIT, |
|
140 | PyModuleDef_HEAD_INIT, | |
121 | "zstd", |
|
141 | "zstd", | |
122 | zstd_doc, |
|
142 | zstd_doc, | |
123 | -1, |
|
143 | -1, | |
124 | zstd_methods |
|
144 | zstd_methods | |
125 | }; |
|
145 | }; | |
126 |
|
146 | |||
127 | PyMODINIT_FUNC PyInit_zstd(void) { |
|
147 | PyMODINIT_FUNC PyInit_zstd(void) { | |
128 | PyObject *m = PyModule_Create(&zstd_module); |
|
148 | PyObject *m = PyModule_Create(&zstd_module); | |
129 | if (m) { |
|
149 | if (m) { | |
130 | zstd_module_init(m); |
|
150 | zstd_module_init(m); | |
131 | if (PyErr_Occurred()) { |
|
151 | if (PyErr_Occurred()) { | |
132 | Py_DECREF(m); |
|
152 | Py_DECREF(m); | |
133 | m = NULL; |
|
153 | m = NULL; | |
134 | } |
|
154 | } | |
135 | } |
|
155 | } | |
136 | return m; |
|
156 | return m; | |
137 | } |
|
157 | } | |
138 | #else |
|
158 | #else | |
139 | PyMODINIT_FUNC initzstd(void) { |
|
159 | PyMODINIT_FUNC initzstd(void) { | |
140 | PyObject *m = Py_InitModule3("zstd", zstd_methods, zstd_doc); |
|
160 | PyObject *m = Py_InitModule3("zstd", zstd_methods, zstd_doc); | |
141 | if (m) { |
|
161 | if (m) { | |
142 | zstd_module_init(m); |
|
162 | zstd_module_init(m); | |
143 | } |
|
163 | } | |
144 | } |
|
164 | } | |
145 | #endif |
|
165 | #endif | |
|
166 | ||||
|
167 | /* Attempt to resolve the number of CPUs in the system. */ | |||
|
168 | int cpu_count() { | |||
|
169 | int count = 0; | |||
|
170 | ||||
|
171 | #if defined(_WIN32) | |||
|
172 | SYSTEM_INFO si; | |||
|
173 | si.dwNumberOfProcessors = 0; | |||
|
174 | GetSystemInfo(&si); | |||
|
175 | count = si.dwNumberOfProcessors; | |||
|
176 | #elif defined(__APPLE__) | |||
|
177 | int num; | |||
|
178 | size_t size = sizeof(int); | |||
|
179 | ||||
|
180 | if (0 == sysctlbyname("hw.logicalcpu", &num, &size, NULL, 0)) { | |||
|
181 | count = num; | |||
|
182 | } | |||
|
183 | #elif defined(__linux__) | |||
|
184 | count = sysconf(_SC_NPROCESSORS_ONLN); | |||
|
185 | #elif defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) | |||
|
186 | int mib[2]; | |||
|
187 | size_t len = sizeof(count); | |||
|
188 | mib[0] = CTL_HW; | |||
|
189 | mib[1] = HW_NCPU; | |||
|
190 | if (0 != sysctl(mib, 2, &count, &len, NULL, 0)) { | |||
|
191 | count = 0; | |||
|
192 | } | |||
|
193 | #elif defined(__hpux) | |||
|
194 | count = mpctl(MPC_GETNUMSPUS, NULL, NULL); | |||
|
195 | #endif | |||
|
196 | ||||
|
197 | return count; | |||
|
198 | } | |||
|
199 | ||||
|
200 | size_t roundpow2(size_t i) { | |||
|
201 | i--; | |||
|
202 | i |= i >> 1; | |||
|
203 | i |= i >> 2; | |||
|
204 | i |= i >> 4; | |||
|
205 | i |= i >> 8; | |||
|
206 | i |= i >> 16; | |||
|
207 | i++; | |||
|
208 | ||||
|
209 | return i; | |||
|
210 | } |
@@ -1,1042 +1,1257 | |||||
1 | # Copyright (c) 2016-present, Gregory Szorc |
|
1 | # Copyright (c) 2016-present, Gregory Szorc | |
2 | # All rights reserved. |
|
2 | # All rights reserved. | |
3 | # |
|
3 | # | |
4 | # This software may be modified and distributed under the terms |
|
4 | # This software may be modified and distributed under the terms | |
5 | # of the BSD license. See the LICENSE file for details. |
|
5 | # of the BSD license. See the LICENSE file for details. | |
6 |
|
6 | |||
7 | """Python interface to the Zstandard (zstd) compression library.""" |
|
7 | """Python interface to the Zstandard (zstd) compression library.""" | |
8 |
|
8 | |||
9 | from __future__ import absolute_import, unicode_literals |
|
9 | from __future__ import absolute_import, unicode_literals | |
10 |
|
10 | |||
|
11 | import os | |||
11 | import sys |
|
12 | import sys | |
12 |
|
13 | |||
13 | from _zstd_cffi import ( |
|
14 | from _zstd_cffi import ( | |
14 | ffi, |
|
15 | ffi, | |
15 | lib, |
|
16 | lib, | |
16 | ) |
|
17 | ) | |
17 |
|
18 | |||
18 | if sys.version_info[0] == 2: |
|
19 | if sys.version_info[0] == 2: | |
19 | bytes_type = str |
|
20 | bytes_type = str | |
20 | int_type = long |
|
21 | int_type = long | |
21 | else: |
|
22 | else: | |
22 | bytes_type = bytes |
|
23 | bytes_type = bytes | |
23 | int_type = int |
|
24 | int_type = int | |
24 |
|
25 | |||
25 |
|
26 | |||
26 | COMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_CStreamInSize() |
|
27 | COMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_CStreamInSize() | |
27 | COMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_CStreamOutSize() |
|
28 | COMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_CStreamOutSize() | |
28 | DECOMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_DStreamInSize() |
|
29 | DECOMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_DStreamInSize() | |
29 | DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_DStreamOutSize() |
|
30 | DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_DStreamOutSize() | |
30 |
|
31 | |||
31 | new_nonzero = ffi.new_allocator(should_clear_after_alloc=False) |
|
32 | new_nonzero = ffi.new_allocator(should_clear_after_alloc=False) | |
32 |
|
33 | |||
33 |
|
34 | |||
34 | MAX_COMPRESSION_LEVEL = lib.ZSTD_maxCLevel() |
|
35 | MAX_COMPRESSION_LEVEL = lib.ZSTD_maxCLevel() | |
35 | MAGIC_NUMBER = lib.ZSTD_MAGICNUMBER |
|
36 | MAGIC_NUMBER = lib.ZSTD_MAGICNUMBER | |
36 | FRAME_HEADER = b'\x28\xb5\x2f\xfd' |
|
37 | FRAME_HEADER = b'\x28\xb5\x2f\xfd' | |
37 | ZSTD_VERSION = (lib.ZSTD_VERSION_MAJOR, lib.ZSTD_VERSION_MINOR, lib.ZSTD_VERSION_RELEASE) |
|
38 | ZSTD_VERSION = (lib.ZSTD_VERSION_MAJOR, lib.ZSTD_VERSION_MINOR, lib.ZSTD_VERSION_RELEASE) | |
38 |
|
39 | |||
39 | WINDOWLOG_MIN = lib.ZSTD_WINDOWLOG_MIN |
|
40 | WINDOWLOG_MIN = lib.ZSTD_WINDOWLOG_MIN | |
40 | WINDOWLOG_MAX = lib.ZSTD_WINDOWLOG_MAX |
|
41 | WINDOWLOG_MAX = lib.ZSTD_WINDOWLOG_MAX | |
41 | CHAINLOG_MIN = lib.ZSTD_CHAINLOG_MIN |
|
42 | CHAINLOG_MIN = lib.ZSTD_CHAINLOG_MIN | |
42 | CHAINLOG_MAX = lib.ZSTD_CHAINLOG_MAX |
|
43 | CHAINLOG_MAX = lib.ZSTD_CHAINLOG_MAX | |
43 | HASHLOG_MIN = lib.ZSTD_HASHLOG_MIN |
|
44 | HASHLOG_MIN = lib.ZSTD_HASHLOG_MIN | |
44 | HASHLOG_MAX = lib.ZSTD_HASHLOG_MAX |
|
45 | HASHLOG_MAX = lib.ZSTD_HASHLOG_MAX | |
45 | HASHLOG3_MAX = lib.ZSTD_HASHLOG3_MAX |
|
46 | HASHLOG3_MAX = lib.ZSTD_HASHLOG3_MAX | |
46 | SEARCHLOG_MIN = lib.ZSTD_SEARCHLOG_MIN |
|
47 | SEARCHLOG_MIN = lib.ZSTD_SEARCHLOG_MIN | |
47 | SEARCHLOG_MAX = lib.ZSTD_SEARCHLOG_MAX |
|
48 | SEARCHLOG_MAX = lib.ZSTD_SEARCHLOG_MAX | |
48 | SEARCHLENGTH_MIN = lib.ZSTD_SEARCHLENGTH_MIN |
|
49 | SEARCHLENGTH_MIN = lib.ZSTD_SEARCHLENGTH_MIN | |
49 | SEARCHLENGTH_MAX = lib.ZSTD_SEARCHLENGTH_MAX |
|
50 | SEARCHLENGTH_MAX = lib.ZSTD_SEARCHLENGTH_MAX | |
50 | TARGETLENGTH_MIN = lib.ZSTD_TARGETLENGTH_MIN |
|
51 | TARGETLENGTH_MIN = lib.ZSTD_TARGETLENGTH_MIN | |
51 | TARGETLENGTH_MAX = lib.ZSTD_TARGETLENGTH_MAX |
|
52 | TARGETLENGTH_MAX = lib.ZSTD_TARGETLENGTH_MAX | |
52 |
|
53 | |||
53 | STRATEGY_FAST = lib.ZSTD_fast |
|
54 | STRATEGY_FAST = lib.ZSTD_fast | |
54 | STRATEGY_DFAST = lib.ZSTD_dfast |
|
55 | STRATEGY_DFAST = lib.ZSTD_dfast | |
55 | STRATEGY_GREEDY = lib.ZSTD_greedy |
|
56 | STRATEGY_GREEDY = lib.ZSTD_greedy | |
56 | STRATEGY_LAZY = lib.ZSTD_lazy |
|
57 | STRATEGY_LAZY = lib.ZSTD_lazy | |
57 | STRATEGY_LAZY2 = lib.ZSTD_lazy2 |
|
58 | STRATEGY_LAZY2 = lib.ZSTD_lazy2 | |
58 | STRATEGY_BTLAZY2 = lib.ZSTD_btlazy2 |
|
59 | STRATEGY_BTLAZY2 = lib.ZSTD_btlazy2 | |
59 | STRATEGY_BTOPT = lib.ZSTD_btopt |
|
60 | STRATEGY_BTOPT = lib.ZSTD_btopt | |
60 |
|
61 | |||
61 | COMPRESSOBJ_FLUSH_FINISH = 0 |
|
62 | COMPRESSOBJ_FLUSH_FINISH = 0 | |
62 | COMPRESSOBJ_FLUSH_BLOCK = 1 |
|
63 | COMPRESSOBJ_FLUSH_BLOCK = 1 | |
63 |
|
64 | |||
64 |
|
65 | |||
|
66 | def _cpu_count(): | |||
|
67 | # os.cpu_count() was introducd in Python 3.4. | |||
|
68 | try: | |||
|
69 | return os.cpu_count() or 0 | |||
|
70 | except AttributeError: | |||
|
71 | pass | |||
|
72 | ||||
|
73 | # Linux. | |||
|
74 | try: | |||
|
75 | if sys.version_info[0] == 2: | |||
|
76 | return os.sysconf(b'SC_NPROCESSORS_ONLN') | |||
|
77 | else: | |||
|
78 | return os.sysconf(u'SC_NPROCESSORS_ONLN') | |||
|
79 | except (AttributeError, ValueError): | |||
|
80 | pass | |||
|
81 | ||||
|
82 | # TODO implement on other platforms. | |||
|
83 | return 0 | |||
|
84 | ||||
|
85 | ||||
65 | class ZstdError(Exception): |
|
86 | class ZstdError(Exception): | |
66 | pass |
|
87 | pass | |
67 |
|
88 | |||
68 |
|
89 | |||
69 | class CompressionParameters(object): |
|
90 | class CompressionParameters(object): | |
70 | def __init__(self, window_log, chain_log, hash_log, search_log, |
|
91 | def __init__(self, window_log, chain_log, hash_log, search_log, | |
71 | search_length, target_length, strategy): |
|
92 | search_length, target_length, strategy): | |
72 | if window_log < WINDOWLOG_MIN or window_log > WINDOWLOG_MAX: |
|
93 | if window_log < WINDOWLOG_MIN or window_log > WINDOWLOG_MAX: | |
73 | raise ValueError('invalid window log value') |
|
94 | raise ValueError('invalid window log value') | |
74 |
|
95 | |||
75 | if chain_log < CHAINLOG_MIN or chain_log > CHAINLOG_MAX: |
|
96 | if chain_log < CHAINLOG_MIN or chain_log > CHAINLOG_MAX: | |
76 | raise ValueError('invalid chain log value') |
|
97 | raise ValueError('invalid chain log value') | |
77 |
|
98 | |||
78 | if hash_log < HASHLOG_MIN or hash_log > HASHLOG_MAX: |
|
99 | if hash_log < HASHLOG_MIN or hash_log > HASHLOG_MAX: | |
79 | raise ValueError('invalid hash log value') |
|
100 | raise ValueError('invalid hash log value') | |
80 |
|
101 | |||
81 | if search_log < SEARCHLOG_MIN or search_log > SEARCHLOG_MAX: |
|
102 | if search_log < SEARCHLOG_MIN or search_log > SEARCHLOG_MAX: | |
82 | raise ValueError('invalid search log value') |
|
103 | raise ValueError('invalid search log value') | |
83 |
|
104 | |||
84 | if search_length < SEARCHLENGTH_MIN or search_length > SEARCHLENGTH_MAX: |
|
105 | if search_length < SEARCHLENGTH_MIN or search_length > SEARCHLENGTH_MAX: | |
85 | raise ValueError('invalid search length value') |
|
106 | raise ValueError('invalid search length value') | |
86 |
|
107 | |||
87 | if target_length < TARGETLENGTH_MIN or target_length > TARGETLENGTH_MAX: |
|
108 | if target_length < TARGETLENGTH_MIN or target_length > TARGETLENGTH_MAX: | |
88 | raise ValueError('invalid target length value') |
|
109 | raise ValueError('invalid target length value') | |
89 |
|
110 | |||
90 | if strategy < STRATEGY_FAST or strategy > STRATEGY_BTOPT: |
|
111 | if strategy < STRATEGY_FAST or strategy > STRATEGY_BTOPT: | |
91 | raise ValueError('invalid strategy value') |
|
112 | raise ValueError('invalid strategy value') | |
92 |
|
113 | |||
93 | self.window_log = window_log |
|
114 | self.window_log = window_log | |
94 | self.chain_log = chain_log |
|
115 | self.chain_log = chain_log | |
95 | self.hash_log = hash_log |
|
116 | self.hash_log = hash_log | |
96 | self.search_log = search_log |
|
117 | self.search_log = search_log | |
97 | self.search_length = search_length |
|
118 | self.search_length = search_length | |
98 | self.target_length = target_length |
|
119 | self.target_length = target_length | |
99 | self.strategy = strategy |
|
120 | self.strategy = strategy | |
100 |
|
121 | |||
|
122 | zresult = lib.ZSTD_checkCParams(self.as_compression_parameters()) | |||
|
123 | if lib.ZSTD_isError(zresult): | |||
|
124 | raise ValueError('invalid compression parameters: %s', | |||
|
125 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |||
|
126 | ||||
|
127 | def estimated_compression_context_size(self): | |||
|
128 | return lib.ZSTD_estimateCCtxSize(self.as_compression_parameters()) | |||
|
129 | ||||
101 | def as_compression_parameters(self): |
|
130 | def as_compression_parameters(self): | |
102 | p = ffi.new('ZSTD_compressionParameters *')[0] |
|
131 | p = ffi.new('ZSTD_compressionParameters *')[0] | |
103 | p.windowLog = self.window_log |
|
132 | p.windowLog = self.window_log | |
104 | p.chainLog = self.chain_log |
|
133 | p.chainLog = self.chain_log | |
105 | p.hashLog = self.hash_log |
|
134 | p.hashLog = self.hash_log | |
106 | p.searchLog = self.search_log |
|
135 | p.searchLog = self.search_log | |
107 | p.searchLength = self.search_length |
|
136 | p.searchLength = self.search_length | |
108 | p.targetLength = self.target_length |
|
137 | p.targetLength = self.target_length | |
109 | p.strategy = self.strategy |
|
138 | p.strategy = self.strategy | |
110 |
|
139 | |||
111 | return p |
|
140 | return p | |
112 |
|
141 | |||
113 | def get_compression_parameters(level, source_size=0, dict_size=0): |
|
142 | def get_compression_parameters(level, source_size=0, dict_size=0): | |
114 | params = lib.ZSTD_getCParams(level, source_size, dict_size) |
|
143 | params = lib.ZSTD_getCParams(level, source_size, dict_size) | |
115 | return CompressionParameters(window_log=params.windowLog, |
|
144 | return CompressionParameters(window_log=params.windowLog, | |
116 | chain_log=params.chainLog, |
|
145 | chain_log=params.chainLog, | |
117 | hash_log=params.hashLog, |
|
146 | hash_log=params.hashLog, | |
118 | search_log=params.searchLog, |
|
147 | search_log=params.searchLog, | |
119 | search_length=params.searchLength, |
|
148 | search_length=params.searchLength, | |
120 | target_length=params.targetLength, |
|
149 | target_length=params.targetLength, | |
121 | strategy=params.strategy) |
|
150 | strategy=params.strategy) | |
122 |
|
151 | |||
123 |
|
152 | |||
124 | def estimate_compression_context_size(params): |
|
153 | def estimate_compression_context_size(params): | |
125 | if not isinstance(params, CompressionParameters): |
|
154 | if not isinstance(params, CompressionParameters): | |
126 | raise ValueError('argument must be a CompressionParameters') |
|
155 | raise ValueError('argument must be a CompressionParameters') | |
127 |
|
156 | |||
128 | cparams = params.as_compression_parameters() |
|
157 | cparams = params.as_compression_parameters() | |
129 | return lib.ZSTD_estimateCCtxSize(cparams) |
|
158 | return lib.ZSTD_estimateCCtxSize(cparams) | |
130 |
|
159 | |||
131 |
|
160 | |||
132 | def estimate_decompression_context_size(): |
|
161 | def estimate_decompression_context_size(): | |
133 | return lib.ZSTD_estimateDCtxSize() |
|
162 | return lib.ZSTD_estimateDCtxSize() | |
134 |
|
163 | |||
135 |
|
164 | |||
136 | class ZstdCompressionWriter(object): |
|
165 | class ZstdCompressionWriter(object): | |
137 | def __init__(self, compressor, writer, source_size, write_size): |
|
166 | def __init__(self, compressor, writer, source_size, write_size): | |
138 | self._compressor = compressor |
|
167 | self._compressor = compressor | |
139 | self._writer = writer |
|
168 | self._writer = writer | |
140 | self._source_size = source_size |
|
169 | self._source_size = source_size | |
141 | self._write_size = write_size |
|
170 | self._write_size = write_size | |
142 | self._entered = False |
|
171 | self._entered = False | |
|
172 | self._mtcctx = compressor._cctx if compressor._multithreaded else None | |||
143 |
|
173 | |||
144 | def __enter__(self): |
|
174 | def __enter__(self): | |
145 | if self._entered: |
|
175 | if self._entered: | |
146 | raise ZstdError('cannot __enter__ multiple times') |
|
176 | raise ZstdError('cannot __enter__ multiple times') | |
147 |
|
177 | |||
148 | self._cstream = self._compressor._get_cstream(self._source_size) |
|
178 | if self._mtcctx: | |
|
179 | self._compressor._init_mtcstream(self._source_size) | |||
|
180 | else: | |||
|
181 | self._compressor._ensure_cstream(self._source_size) | |||
149 | self._entered = True |
|
182 | self._entered = True | |
150 | return self |
|
183 | return self | |
151 |
|
184 | |||
152 | def __exit__(self, exc_type, exc_value, exc_tb): |
|
185 | def __exit__(self, exc_type, exc_value, exc_tb): | |
153 | self._entered = False |
|
186 | self._entered = False | |
154 |
|
187 | |||
155 | if not exc_type and not exc_value and not exc_tb: |
|
188 | if not exc_type and not exc_value and not exc_tb: | |
156 | out_buffer = ffi.new('ZSTD_outBuffer *') |
|
189 | out_buffer = ffi.new('ZSTD_outBuffer *') | |
157 | dst_buffer = ffi.new('char[]', self._write_size) |
|
190 | dst_buffer = ffi.new('char[]', self._write_size) | |
158 | out_buffer.dst = dst_buffer |
|
191 | out_buffer.dst = dst_buffer | |
159 | out_buffer.size = self._write_size |
|
192 | out_buffer.size = self._write_size | |
160 | out_buffer.pos = 0 |
|
193 | out_buffer.pos = 0 | |
161 |
|
194 | |||
162 | while True: |
|
195 | while True: | |
163 | zresult = lib.ZSTD_endStream(self._cstream, out_buffer) |
|
196 | if self._mtcctx: | |
|
197 | zresult = lib.ZSTDMT_endStream(self._mtcctx, out_buffer) | |||
|
198 | else: | |||
|
199 | zresult = lib.ZSTD_endStream(self._compressor._cstream, out_buffer) | |||
164 | if lib.ZSTD_isError(zresult): |
|
200 | if lib.ZSTD_isError(zresult): | |
165 | raise ZstdError('error ending compression stream: %s' % |
|
201 | raise ZstdError('error ending compression stream: %s' % | |
166 | ffi.string(lib.ZSTD_getErrorName(zresult))) |
|
202 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |
167 |
|
203 | |||
168 | if out_buffer.pos: |
|
204 | if out_buffer.pos: | |
169 | self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:]) |
|
205 | self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:]) | |
170 | out_buffer.pos = 0 |
|
206 | out_buffer.pos = 0 | |
171 |
|
207 | |||
172 | if zresult == 0: |
|
208 | if zresult == 0: | |
173 | break |
|
209 | break | |
174 |
|
210 | |||
175 | self._cstream = None |
|
|||
176 | self._compressor = None |
|
211 | self._compressor = None | |
177 |
|
212 | |||
178 | return False |
|
213 | return False | |
179 |
|
214 | |||
180 | def memory_size(self): |
|
215 | def memory_size(self): | |
181 | if not self._entered: |
|
216 | if not self._entered: | |
182 | raise ZstdError('cannot determine size of an inactive compressor; ' |
|
217 | raise ZstdError('cannot determine size of an inactive compressor; ' | |
183 | 'call when a context manager is active') |
|
218 | 'call when a context manager is active') | |
184 |
|
219 | |||
185 | return lib.ZSTD_sizeof_CStream(self._cstream) |
|
220 | return lib.ZSTD_sizeof_CStream(self._compressor._cstream) | |
186 |
|
221 | |||
187 | def write(self, data): |
|
222 | def write(self, data): | |
188 | if not self._entered: |
|
223 | if not self._entered: | |
189 | raise ZstdError('write() must be called from an active context ' |
|
224 | raise ZstdError('write() must be called from an active context ' | |
190 | 'manager') |
|
225 | 'manager') | |
191 |
|
226 | |||
192 | total_write = 0 |
|
227 | total_write = 0 | |
193 |
|
228 | |||
194 | data_buffer = ffi.from_buffer(data) |
|
229 | data_buffer = ffi.from_buffer(data) | |
195 |
|
230 | |||
196 | in_buffer = ffi.new('ZSTD_inBuffer *') |
|
231 | in_buffer = ffi.new('ZSTD_inBuffer *') | |
197 | in_buffer.src = data_buffer |
|
232 | in_buffer.src = data_buffer | |
198 | in_buffer.size = len(data_buffer) |
|
233 | in_buffer.size = len(data_buffer) | |
199 | in_buffer.pos = 0 |
|
234 | in_buffer.pos = 0 | |
200 |
|
235 | |||
201 | out_buffer = ffi.new('ZSTD_outBuffer *') |
|
236 | out_buffer = ffi.new('ZSTD_outBuffer *') | |
202 | dst_buffer = ffi.new('char[]', self._write_size) |
|
237 | dst_buffer = ffi.new('char[]', self._write_size) | |
203 | out_buffer.dst = dst_buffer |
|
238 | out_buffer.dst = dst_buffer | |
204 | out_buffer.size = self._write_size |
|
239 | out_buffer.size = self._write_size | |
205 | out_buffer.pos = 0 |
|
240 | out_buffer.pos = 0 | |
206 |
|
241 | |||
207 | while in_buffer.pos < in_buffer.size: |
|
242 | while in_buffer.pos < in_buffer.size: | |
208 | zresult = lib.ZSTD_compressStream(self._cstream, out_buffer, in_buffer) |
|
243 | if self._mtcctx: | |
|
244 | zresult = lib.ZSTDMT_compressStream(self._mtcctx, out_buffer, | |||
|
245 | in_buffer) | |||
|
246 | else: | |||
|
247 | zresult = lib.ZSTD_compressStream(self._compressor._cstream, out_buffer, | |||
|
248 | in_buffer) | |||
209 | if lib.ZSTD_isError(zresult): |
|
249 | if lib.ZSTD_isError(zresult): | |
210 | raise ZstdError('zstd compress error: %s' % |
|
250 | raise ZstdError('zstd compress error: %s' % | |
211 | ffi.string(lib.ZSTD_getErrorName(zresult))) |
|
251 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |
212 |
|
252 | |||
213 | if out_buffer.pos: |
|
253 | if out_buffer.pos: | |
214 | self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:]) |
|
254 | self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:]) | |
215 | total_write += out_buffer.pos |
|
255 | total_write += out_buffer.pos | |
216 | out_buffer.pos = 0 |
|
256 | out_buffer.pos = 0 | |
217 |
|
257 | |||
218 | return total_write |
|
258 | return total_write | |
219 |
|
259 | |||
220 | def flush(self): |
|
260 | def flush(self): | |
221 | if not self._entered: |
|
261 | if not self._entered: | |
222 | raise ZstdError('flush must be called from an active context manager') |
|
262 | raise ZstdError('flush must be called from an active context manager') | |
223 |
|
263 | |||
224 | total_write = 0 |
|
264 | total_write = 0 | |
225 |
|
265 | |||
226 | out_buffer = ffi.new('ZSTD_outBuffer *') |
|
266 | out_buffer = ffi.new('ZSTD_outBuffer *') | |
227 | dst_buffer = ffi.new('char[]', self._write_size) |
|
267 | dst_buffer = ffi.new('char[]', self._write_size) | |
228 | out_buffer.dst = dst_buffer |
|
268 | out_buffer.dst = dst_buffer | |
229 | out_buffer.size = self._write_size |
|
269 | out_buffer.size = self._write_size | |
230 | out_buffer.pos = 0 |
|
270 | out_buffer.pos = 0 | |
231 |
|
271 | |||
232 | while True: |
|
272 | while True: | |
233 | zresult = lib.ZSTD_flushStream(self._cstream, out_buffer) |
|
273 | if self._mtcctx: | |
|
274 | zresult = lib.ZSTDMT_flushStream(self._mtcctx, out_buffer) | |||
|
275 | else: | |||
|
276 | zresult = lib.ZSTD_flushStream(self._compressor._cstream, out_buffer) | |||
234 | if lib.ZSTD_isError(zresult): |
|
277 | if lib.ZSTD_isError(zresult): | |
235 | raise ZstdError('zstd compress error: %s' % |
|
278 | raise ZstdError('zstd compress error: %s' % | |
236 | ffi.string(lib.ZSTD_getErrorName(zresult))) |
|
279 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |
237 |
|
280 | |||
238 | if not out_buffer.pos: |
|
281 | if not out_buffer.pos: | |
239 | break |
|
282 | break | |
240 |
|
283 | |||
241 | self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:]) |
|
284 | self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:]) | |
242 | total_write += out_buffer.pos |
|
285 | total_write += out_buffer.pos | |
243 | out_buffer.pos = 0 |
|
286 | out_buffer.pos = 0 | |
244 |
|
287 | |||
245 | return total_write |
|
288 | return total_write | |
246 |
|
289 | |||
247 |
|
290 | |||
248 | class ZstdCompressionObj(object): |
|
291 | class ZstdCompressionObj(object): | |
249 | def compress(self, data): |
|
292 | def compress(self, data): | |
250 | if self._finished: |
|
293 | if self._finished: | |
251 | raise ZstdError('cannot call compress() after compressor finished') |
|
294 | raise ZstdError('cannot call compress() after compressor finished') | |
252 |
|
295 | |||
253 | data_buffer = ffi.from_buffer(data) |
|
296 | data_buffer = ffi.from_buffer(data) | |
254 | source = ffi.new('ZSTD_inBuffer *') |
|
297 | source = ffi.new('ZSTD_inBuffer *') | |
255 | source.src = data_buffer |
|
298 | source.src = data_buffer | |
256 | source.size = len(data_buffer) |
|
299 | source.size = len(data_buffer) | |
257 | source.pos = 0 |
|
300 | source.pos = 0 | |
258 |
|
301 | |||
259 | chunks = [] |
|
302 | chunks = [] | |
260 |
|
303 | |||
261 | while source.pos < len(data): |
|
304 | while source.pos < len(data): | |
262 | zresult = lib.ZSTD_compressStream(self._cstream, self._out, source) |
|
305 | if self._mtcctx: | |
|
306 | zresult = lib.ZSTDMT_compressStream(self._mtcctx, | |||
|
307 | self._out, source) | |||
|
308 | else: | |||
|
309 | zresult = lib.ZSTD_compressStream(self._compressor._cstream, self._out, | |||
|
310 | source) | |||
263 | if lib.ZSTD_isError(zresult): |
|
311 | if lib.ZSTD_isError(zresult): | |
264 | raise ZstdError('zstd compress error: %s' % |
|
312 | raise ZstdError('zstd compress error: %s' % | |
265 | ffi.string(lib.ZSTD_getErrorName(zresult))) |
|
313 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |
266 |
|
314 | |||
267 | if self._out.pos: |
|
315 | if self._out.pos: | |
268 | chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:]) |
|
316 | chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:]) | |
269 | self._out.pos = 0 |
|
317 | self._out.pos = 0 | |
270 |
|
318 | |||
271 | return b''.join(chunks) |
|
319 | return b''.join(chunks) | |
272 |
|
320 | |||
273 | def flush(self, flush_mode=COMPRESSOBJ_FLUSH_FINISH): |
|
321 | def flush(self, flush_mode=COMPRESSOBJ_FLUSH_FINISH): | |
274 | if flush_mode not in (COMPRESSOBJ_FLUSH_FINISH, COMPRESSOBJ_FLUSH_BLOCK): |
|
322 | if flush_mode not in (COMPRESSOBJ_FLUSH_FINISH, COMPRESSOBJ_FLUSH_BLOCK): | |
275 | raise ValueError('flush mode not recognized') |
|
323 | raise ValueError('flush mode not recognized') | |
276 |
|
324 | |||
277 | if self._finished: |
|
325 | if self._finished: | |
278 | raise ZstdError('compressor object already finished') |
|
326 | raise ZstdError('compressor object already finished') | |
279 |
|
327 | |||
280 | assert self._out.pos == 0 |
|
328 | assert self._out.pos == 0 | |
281 |
|
329 | |||
282 | if flush_mode == COMPRESSOBJ_FLUSH_BLOCK: |
|
330 | if flush_mode == COMPRESSOBJ_FLUSH_BLOCK: | |
283 | zresult = lib.ZSTD_flushStream(self._cstream, self._out) |
|
331 | if self._mtcctx: | |
|
332 | zresult = lib.ZSTDMT_flushStream(self._mtcctx, self._out) | |||
|
333 | else: | |||
|
334 | zresult = lib.ZSTD_flushStream(self._compressor._cstream, self._out) | |||
284 | if lib.ZSTD_isError(zresult): |
|
335 | if lib.ZSTD_isError(zresult): | |
285 | raise ZstdError('zstd compress error: %s' % |
|
336 | raise ZstdError('zstd compress error: %s' % | |
286 | ffi.string(lib.ZSTD_getErrorName(zresult))) |
|
337 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |
287 |
|
338 | |||
288 | # Output buffer is guaranteed to hold full block. |
|
339 | # Output buffer is guaranteed to hold full block. | |
289 | assert zresult == 0 |
|
340 | assert zresult == 0 | |
290 |
|
341 | |||
291 | if self._out.pos: |
|
342 | if self._out.pos: | |
292 | result = ffi.buffer(self._out.dst, self._out.pos)[:] |
|
343 | result = ffi.buffer(self._out.dst, self._out.pos)[:] | |
293 | self._out.pos = 0 |
|
344 | self._out.pos = 0 | |
294 | return result |
|
345 | return result | |
295 | else: |
|
346 | else: | |
296 | return b'' |
|
347 | return b'' | |
297 |
|
348 | |||
298 | assert flush_mode == COMPRESSOBJ_FLUSH_FINISH |
|
349 | assert flush_mode == COMPRESSOBJ_FLUSH_FINISH | |
299 | self._finished = True |
|
350 | self._finished = True | |
300 |
|
351 | |||
301 | chunks = [] |
|
352 | chunks = [] | |
302 |
|
353 | |||
303 | while True: |
|
354 | while True: | |
304 | zresult = lib.ZSTD_endStream(self._cstream, self._out) |
|
355 | if self._mtcctx: | |
|
356 | zresult = lib.ZSTDMT_endStream(self._mtcctx, self._out) | |||
|
357 | else: | |||
|
358 | zresult = lib.ZSTD_endStream(self._compressor._cstream, self._out) | |||
305 | if lib.ZSTD_isError(zresult): |
|
359 | if lib.ZSTD_isError(zresult): | |
306 | raise ZstdError('error ending compression stream: %s' % |
|
360 | raise ZstdError('error ending compression stream: %s' % | |
307 | ffi.string(lib.ZSTD_getErroName(zresult))) |
|
361 | ffi.string(lib.ZSTD_getErroName(zresult))) | |
308 |
|
362 | |||
309 | if self._out.pos: |
|
363 | if self._out.pos: | |
310 | chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:]) |
|
364 | chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:]) | |
311 | self._out.pos = 0 |
|
365 | self._out.pos = 0 | |
312 |
|
366 | |||
313 | if not zresult: |
|
367 | if not zresult: | |
314 | break |
|
368 | break | |
315 |
|
369 | |||
316 | # GC compression stream immediately. |
|
|||
317 | self._cstream = None |
|
|||
318 |
|
||||
319 | return b''.join(chunks) |
|
370 | return b''.join(chunks) | |
320 |
|
371 | |||
321 |
|
372 | |||
322 | class ZstdCompressor(object): |
|
373 | class ZstdCompressor(object): | |
323 | def __init__(self, level=3, dict_data=None, compression_params=None, |
|
374 | def __init__(self, level=3, dict_data=None, compression_params=None, | |
324 | write_checksum=False, write_content_size=False, |
|
375 | write_checksum=False, write_content_size=False, | |
325 | write_dict_id=True): |
|
376 | write_dict_id=True, threads=0): | |
326 | if level < 1: |
|
377 | if level < 1: | |
327 | raise ValueError('level must be greater than 0') |
|
378 | raise ValueError('level must be greater than 0') | |
328 | elif level > lib.ZSTD_maxCLevel(): |
|
379 | elif level > lib.ZSTD_maxCLevel(): | |
329 | raise ValueError('level must be less than %d' % lib.ZSTD_maxCLevel()) |
|
380 | raise ValueError('level must be less than %d' % lib.ZSTD_maxCLevel()) | |
330 |
|
381 | |||
|
382 | if threads < 0: | |||
|
383 | threads = _cpu_count() | |||
|
384 | ||||
331 | self._compression_level = level |
|
385 | self._compression_level = level | |
332 | self._dict_data = dict_data |
|
386 | self._dict_data = dict_data | |
333 | self._cparams = compression_params |
|
387 | self._cparams = compression_params | |
334 | self._fparams = ffi.new('ZSTD_frameParameters *')[0] |
|
388 | self._fparams = ffi.new('ZSTD_frameParameters *')[0] | |
335 | self._fparams.checksumFlag = write_checksum |
|
389 | self._fparams.checksumFlag = write_checksum | |
336 | self._fparams.contentSizeFlag = write_content_size |
|
390 | self._fparams.contentSizeFlag = write_content_size | |
337 | self._fparams.noDictIDFlag = not write_dict_id |
|
391 | self._fparams.noDictIDFlag = not write_dict_id | |
338 |
|
392 | |||
339 | cctx = lib.ZSTD_createCCtx() |
|
393 | if threads: | |
340 | if cctx == ffi.NULL: |
|
394 | cctx = lib.ZSTDMT_createCCtx(threads) | |
341 | raise MemoryError() |
|
395 | if cctx == ffi.NULL: | |
|
396 | raise MemoryError() | |||
342 |
|
397 | |||
343 | self._cctx = ffi.gc(cctx, lib.ZSTD_freeCCtx) |
|
398 | self._cctx = ffi.gc(cctx, lib.ZSTDMT_freeCCtx) | |
|
399 | self._multithreaded = True | |||
|
400 | else: | |||
|
401 | cctx = lib.ZSTD_createCCtx() | |||
|
402 | if cctx == ffi.NULL: | |||
|
403 | raise MemoryError() | |||
|
404 | ||||
|
405 | self._cctx = ffi.gc(cctx, lib.ZSTD_freeCCtx) | |||
|
406 | self._multithreaded = False | |||
|
407 | ||||
|
408 | self._cstream = None | |||
344 |
|
409 | |||
345 | def compress(self, data, allow_empty=False): |
|
410 | def compress(self, data, allow_empty=False): | |
346 | if len(data) == 0 and self._fparams.contentSizeFlag and not allow_empty: |
|
411 | if len(data) == 0 and self._fparams.contentSizeFlag and not allow_empty: | |
347 | raise ValueError('cannot write empty inputs when writing content sizes') |
|
412 | raise ValueError('cannot write empty inputs when writing content sizes') | |
348 |
|
413 | |||
|
414 | if self._multithreaded and self._dict_data: | |||
|
415 | raise ZstdError('compress() cannot be used with both dictionaries and multi-threaded compression') | |||
|
416 | ||||
|
417 | if self._multithreaded and self._cparams: | |||
|
418 | raise ZstdError('compress() cannot be used with both compression parameters and multi-threaded compression') | |||
|
419 | ||||
349 | # TODO use a CDict for performance. |
|
420 | # TODO use a CDict for performance. | |
350 | dict_data = ffi.NULL |
|
421 | dict_data = ffi.NULL | |
351 | dict_size = 0 |
|
422 | dict_size = 0 | |
352 |
|
423 | |||
353 | if self._dict_data: |
|
424 | if self._dict_data: | |
354 | dict_data = self._dict_data.as_bytes() |
|
425 | dict_data = self._dict_data.as_bytes() | |
355 | dict_size = len(self._dict_data) |
|
426 | dict_size = len(self._dict_data) | |
356 |
|
427 | |||
357 | params = ffi.new('ZSTD_parameters *')[0] |
|
428 | params = ffi.new('ZSTD_parameters *')[0] | |
358 | if self._cparams: |
|
429 | if self._cparams: | |
359 | params.cParams = self._cparams.as_compression_parameters() |
|
430 | params.cParams = self._cparams.as_compression_parameters() | |
360 | else: |
|
431 | else: | |
361 | params.cParams = lib.ZSTD_getCParams(self._compression_level, len(data), |
|
432 | params.cParams = lib.ZSTD_getCParams(self._compression_level, len(data), | |
362 | dict_size) |
|
433 | dict_size) | |
363 | params.fParams = self._fparams |
|
434 | params.fParams = self._fparams | |
364 |
|
435 | |||
365 | dest_size = lib.ZSTD_compressBound(len(data)) |
|
436 | dest_size = lib.ZSTD_compressBound(len(data)) | |
366 | out = new_nonzero('char[]', dest_size) |
|
437 | out = new_nonzero('char[]', dest_size) | |
367 |
|
438 | |||
368 | zresult = lib.ZSTD_compress_advanced(self._cctx, |
|
439 | if self._multithreaded: | |
369 | ffi.addressof(out), dest_size, |
|
440 | zresult = lib.ZSTDMT_compressCCtx(self._cctx, | |
370 |
|
|
441 | ffi.addressof(out), dest_size, | |
371 |
|
|
442 | data, len(data), | |
372 |
|
|
443 | self._compression_level) | |
|
444 | else: | |||
|
445 | zresult = lib.ZSTD_compress_advanced(self._cctx, | |||
|
446 | ffi.addressof(out), dest_size, | |||
|
447 | data, len(data), | |||
|
448 | dict_data, dict_size, | |||
|
449 | params) | |||
373 |
|
450 | |||
374 | if lib.ZSTD_isError(zresult): |
|
451 | if lib.ZSTD_isError(zresult): | |
375 | raise ZstdError('cannot compress: %s' % |
|
452 | raise ZstdError('cannot compress: %s' % | |
376 | ffi.string(lib.ZSTD_getErrorName(zresult))) |
|
453 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |
377 |
|
454 | |||
378 | return ffi.buffer(out, zresult)[:] |
|
455 | return ffi.buffer(out, zresult)[:] | |
379 |
|
456 | |||
380 | def compressobj(self, size=0): |
|
457 | def compressobj(self, size=0): | |
381 | cstream = self._get_cstream(size) |
|
458 | if self._multithreaded: | |
|
459 | self._init_mtcstream(size) | |||
|
460 | else: | |||
|
461 | self._ensure_cstream(size) | |||
|
462 | ||||
382 | cobj = ZstdCompressionObj() |
|
463 | cobj = ZstdCompressionObj() | |
383 | cobj._cstream = cstream |
|
|||
384 | cobj._out = ffi.new('ZSTD_outBuffer *') |
|
464 | cobj._out = ffi.new('ZSTD_outBuffer *') | |
385 | cobj._dst_buffer = ffi.new('char[]', COMPRESSION_RECOMMENDED_OUTPUT_SIZE) |
|
465 | cobj._dst_buffer = ffi.new('char[]', COMPRESSION_RECOMMENDED_OUTPUT_SIZE) | |
386 | cobj._out.dst = cobj._dst_buffer |
|
466 | cobj._out.dst = cobj._dst_buffer | |
387 | cobj._out.size = COMPRESSION_RECOMMENDED_OUTPUT_SIZE |
|
467 | cobj._out.size = COMPRESSION_RECOMMENDED_OUTPUT_SIZE | |
388 | cobj._out.pos = 0 |
|
468 | cobj._out.pos = 0 | |
389 | cobj._compressor = self |
|
469 | cobj._compressor = self | |
390 | cobj._finished = False |
|
470 | cobj._finished = False | |
391 |
|
471 | |||
|
472 | if self._multithreaded: | |||
|
473 | cobj._mtcctx = self._cctx | |||
|
474 | else: | |||
|
475 | cobj._mtcctx = None | |||
|
476 | ||||
392 | return cobj |
|
477 | return cobj | |
393 |
|
478 | |||
394 | def copy_stream(self, ifh, ofh, size=0, |
|
479 | def copy_stream(self, ifh, ofh, size=0, | |
395 | read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE, |
|
480 | read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE, | |
396 | write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE): |
|
481 | write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE): | |
397 |
|
482 | |||
398 | if not hasattr(ifh, 'read'): |
|
483 | if not hasattr(ifh, 'read'): | |
399 | raise ValueError('first argument must have a read() method') |
|
484 | raise ValueError('first argument must have a read() method') | |
400 | if not hasattr(ofh, 'write'): |
|
485 | if not hasattr(ofh, 'write'): | |
401 | raise ValueError('second argument must have a write() method') |
|
486 | raise ValueError('second argument must have a write() method') | |
402 |
|
487 | |||
403 |
|
|
488 | mt = self._multithreaded | |
|
489 | if mt: | |||
|
490 | self._init_mtcstream(size) | |||
|
491 | else: | |||
|
492 | self._ensure_cstream(size) | |||
404 |
|
493 | |||
405 | in_buffer = ffi.new('ZSTD_inBuffer *') |
|
494 | in_buffer = ffi.new('ZSTD_inBuffer *') | |
406 | out_buffer = ffi.new('ZSTD_outBuffer *') |
|
495 | out_buffer = ffi.new('ZSTD_outBuffer *') | |
407 |
|
496 | |||
408 | dst_buffer = ffi.new('char[]', write_size) |
|
497 | dst_buffer = ffi.new('char[]', write_size) | |
409 | out_buffer.dst = dst_buffer |
|
498 | out_buffer.dst = dst_buffer | |
410 | out_buffer.size = write_size |
|
499 | out_buffer.size = write_size | |
411 | out_buffer.pos = 0 |
|
500 | out_buffer.pos = 0 | |
412 |
|
501 | |||
413 | total_read, total_write = 0, 0 |
|
502 | total_read, total_write = 0, 0 | |
414 |
|
503 | |||
415 | while True: |
|
504 | while True: | |
416 | data = ifh.read(read_size) |
|
505 | data = ifh.read(read_size) | |
417 | if not data: |
|
506 | if not data: | |
418 | break |
|
507 | break | |
419 |
|
508 | |||
420 | data_buffer = ffi.from_buffer(data) |
|
509 | data_buffer = ffi.from_buffer(data) | |
421 | total_read += len(data_buffer) |
|
510 | total_read += len(data_buffer) | |
422 | in_buffer.src = data_buffer |
|
511 | in_buffer.src = data_buffer | |
423 | in_buffer.size = len(data_buffer) |
|
512 | in_buffer.size = len(data_buffer) | |
424 | in_buffer.pos = 0 |
|
513 | in_buffer.pos = 0 | |
425 |
|
514 | |||
426 | while in_buffer.pos < in_buffer.size: |
|
515 | while in_buffer.pos < in_buffer.size: | |
427 | zresult = lib.ZSTD_compressStream(cstream, out_buffer, in_buffer) |
|
516 | if mt: | |
|
517 | zresult = lib.ZSTDMT_compressStream(self._cctx, out_buffer, in_buffer) | |||
|
518 | else: | |||
|
519 | zresult = lib.ZSTD_compressStream(self._cstream, | |||
|
520 | out_buffer, in_buffer) | |||
428 | if lib.ZSTD_isError(zresult): |
|
521 | if lib.ZSTD_isError(zresult): | |
429 | raise ZstdError('zstd compress error: %s' % |
|
522 | raise ZstdError('zstd compress error: %s' % | |
430 | ffi.string(lib.ZSTD_getErrorName(zresult))) |
|
523 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |
431 |
|
524 | |||
432 | if out_buffer.pos: |
|
525 | if out_buffer.pos: | |
433 | ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos)) |
|
526 | ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos)) | |
434 | total_write += out_buffer.pos |
|
527 | total_write += out_buffer.pos | |
435 | out_buffer.pos = 0 |
|
528 | out_buffer.pos = 0 | |
436 |
|
529 | |||
437 | # We've finished reading. Flush the compressor. |
|
530 | # We've finished reading. Flush the compressor. | |
438 | while True: |
|
531 | while True: | |
439 | zresult = lib.ZSTD_endStream(cstream, out_buffer) |
|
532 | if mt: | |
|
533 | zresult = lib.ZSTDMT_endStream(self._cctx, out_buffer) | |||
|
534 | else: | |||
|
535 | zresult = lib.ZSTD_endStream(self._cstream, out_buffer) | |||
440 | if lib.ZSTD_isError(zresult): |
|
536 | if lib.ZSTD_isError(zresult): | |
441 | raise ZstdError('error ending compression stream: %s' % |
|
537 | raise ZstdError('error ending compression stream: %s' % | |
442 | ffi.string(lib.ZSTD_getErrorName(zresult))) |
|
538 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |
443 |
|
539 | |||
444 | if out_buffer.pos: |
|
540 | if out_buffer.pos: | |
445 | ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos)) |
|
541 | ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos)) | |
446 | total_write += out_buffer.pos |
|
542 | total_write += out_buffer.pos | |
447 | out_buffer.pos = 0 |
|
543 | out_buffer.pos = 0 | |
448 |
|
544 | |||
449 | if zresult == 0: |
|
545 | if zresult == 0: | |
450 | break |
|
546 | break | |
451 |
|
547 | |||
452 | return total_read, total_write |
|
548 | return total_read, total_write | |
453 |
|
549 | |||
454 | def write_to(self, writer, size=0, |
|
550 | def write_to(self, writer, size=0, | |
455 | write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE): |
|
551 | write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE): | |
456 |
|
552 | |||
457 | if not hasattr(writer, 'write'): |
|
553 | if not hasattr(writer, 'write'): | |
458 | raise ValueError('must pass an object with a write() method') |
|
554 | raise ValueError('must pass an object with a write() method') | |
459 |
|
555 | |||
460 | return ZstdCompressionWriter(self, writer, size, write_size) |
|
556 | return ZstdCompressionWriter(self, writer, size, write_size) | |
461 |
|
557 | |||
462 | def read_from(self, reader, size=0, |
|
558 | def read_from(self, reader, size=0, | |
463 | read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE, |
|
559 | read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE, | |
464 | write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE): |
|
560 | write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE): | |
465 | if hasattr(reader, 'read'): |
|
561 | if hasattr(reader, 'read'): | |
466 | have_read = True |
|
562 | have_read = True | |
467 | elif hasattr(reader, '__getitem__'): |
|
563 | elif hasattr(reader, '__getitem__'): | |
468 | have_read = False |
|
564 | have_read = False | |
469 | buffer_offset = 0 |
|
565 | buffer_offset = 0 | |
470 | size = len(reader) |
|
566 | size = len(reader) | |
471 | else: |
|
567 | else: | |
472 | raise ValueError('must pass an object with a read() method or ' |
|
568 | raise ValueError('must pass an object with a read() method or ' | |
473 | 'conforms to buffer protocol') |
|
569 | 'conforms to buffer protocol') | |
474 |
|
570 | |||
475 | cstream = self._get_cstream(size) |
|
571 | if self._multithreaded: | |
|
572 | self._init_mtcstream(size) | |||
|
573 | else: | |||
|
574 | self._ensure_cstream(size) | |||
476 |
|
575 | |||
477 | in_buffer = ffi.new('ZSTD_inBuffer *') |
|
576 | in_buffer = ffi.new('ZSTD_inBuffer *') | |
478 | out_buffer = ffi.new('ZSTD_outBuffer *') |
|
577 | out_buffer = ffi.new('ZSTD_outBuffer *') | |
479 |
|
578 | |||
480 | in_buffer.src = ffi.NULL |
|
579 | in_buffer.src = ffi.NULL | |
481 | in_buffer.size = 0 |
|
580 | in_buffer.size = 0 | |
482 | in_buffer.pos = 0 |
|
581 | in_buffer.pos = 0 | |
483 |
|
582 | |||
484 | dst_buffer = ffi.new('char[]', write_size) |
|
583 | dst_buffer = ffi.new('char[]', write_size) | |
485 | out_buffer.dst = dst_buffer |
|
584 | out_buffer.dst = dst_buffer | |
486 | out_buffer.size = write_size |
|
585 | out_buffer.size = write_size | |
487 | out_buffer.pos = 0 |
|
586 | out_buffer.pos = 0 | |
488 |
|
587 | |||
489 | while True: |
|
588 | while True: | |
490 | # We should never have output data sitting around after a previous |
|
589 | # We should never have output data sitting around after a previous | |
491 | # iteration. |
|
590 | # iteration. | |
492 | assert out_buffer.pos == 0 |
|
591 | assert out_buffer.pos == 0 | |
493 |
|
592 | |||
494 | # Collect input data. |
|
593 | # Collect input data. | |
495 | if have_read: |
|
594 | if have_read: | |
496 | read_result = reader.read(read_size) |
|
595 | read_result = reader.read(read_size) | |
497 | else: |
|
596 | else: | |
498 | remaining = len(reader) - buffer_offset |
|
597 | remaining = len(reader) - buffer_offset | |
499 | slice_size = min(remaining, read_size) |
|
598 | slice_size = min(remaining, read_size) | |
500 | read_result = reader[buffer_offset:buffer_offset + slice_size] |
|
599 | read_result = reader[buffer_offset:buffer_offset + slice_size] | |
501 | buffer_offset += slice_size |
|
600 | buffer_offset += slice_size | |
502 |
|
601 | |||
503 | # No new input data. Break out of the read loop. |
|
602 | # No new input data. Break out of the read loop. | |
504 | if not read_result: |
|
603 | if not read_result: | |
505 | break |
|
604 | break | |
506 |
|
605 | |||
507 | # Feed all read data into the compressor and emit output until |
|
606 | # Feed all read data into the compressor and emit output until | |
508 | # exhausted. |
|
607 | # exhausted. | |
509 | read_buffer = ffi.from_buffer(read_result) |
|
608 | read_buffer = ffi.from_buffer(read_result) | |
510 | in_buffer.src = read_buffer |
|
609 | in_buffer.src = read_buffer | |
511 | in_buffer.size = len(read_buffer) |
|
610 | in_buffer.size = len(read_buffer) | |
512 | in_buffer.pos = 0 |
|
611 | in_buffer.pos = 0 | |
513 |
|
612 | |||
514 | while in_buffer.pos < in_buffer.size: |
|
613 | while in_buffer.pos < in_buffer.size: | |
515 | zresult = lib.ZSTD_compressStream(cstream, out_buffer, in_buffer) |
|
614 | if self._multithreaded: | |
|
615 | zresult = lib.ZSTDMT_compressStream(self._cctx, out_buffer, in_buffer) | |||
|
616 | else: | |||
|
617 | zresult = lib.ZSTD_compressStream(self._cstream, out_buffer, in_buffer) | |||
516 | if lib.ZSTD_isError(zresult): |
|
618 | if lib.ZSTD_isError(zresult): | |
517 | raise ZstdError('zstd compress error: %s' % |
|
619 | raise ZstdError('zstd compress error: %s' % | |
518 | ffi.string(lib.ZSTD_getErrorName(zresult))) |
|
620 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |
519 |
|
621 | |||
520 | if out_buffer.pos: |
|
622 | if out_buffer.pos: | |
521 | data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:] |
|
623 | data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:] | |
522 | out_buffer.pos = 0 |
|
624 | out_buffer.pos = 0 | |
523 | yield data |
|
625 | yield data | |
524 |
|
626 | |||
525 | assert out_buffer.pos == 0 |
|
627 | assert out_buffer.pos == 0 | |
526 |
|
628 | |||
527 | # And repeat the loop to collect more data. |
|
629 | # And repeat the loop to collect more data. | |
528 | continue |
|
630 | continue | |
529 |
|
631 | |||
530 | # If we get here, input is exhausted. End the stream and emit what |
|
632 | # If we get here, input is exhausted. End the stream and emit what | |
531 | # remains. |
|
633 | # remains. | |
532 | while True: |
|
634 | while True: | |
533 | assert out_buffer.pos == 0 |
|
635 | assert out_buffer.pos == 0 | |
534 | zresult = lib.ZSTD_endStream(cstream, out_buffer) |
|
636 | if self._multithreaded: | |
|
637 | zresult = lib.ZSTDMT_endStream(self._cctx, out_buffer) | |||
|
638 | else: | |||
|
639 | zresult = lib.ZSTD_endStream(self._cstream, out_buffer) | |||
535 | if lib.ZSTD_isError(zresult): |
|
640 | if lib.ZSTD_isError(zresult): | |
536 | raise ZstdError('error ending compression stream: %s' % |
|
641 | raise ZstdError('error ending compression stream: %s' % | |
537 | ffi.string(lib.ZSTD_getErrorName(zresult))) |
|
642 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |
538 |
|
643 | |||
539 | if out_buffer.pos: |
|
644 | if out_buffer.pos: | |
540 | data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:] |
|
645 | data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:] | |
541 | out_buffer.pos = 0 |
|
646 | out_buffer.pos = 0 | |
542 | yield data |
|
647 | yield data | |
543 |
|
648 | |||
544 | if zresult == 0: |
|
649 | if zresult == 0: | |
545 | break |
|
650 | break | |
546 |
|
651 | |||
547 |
def _ |
|
652 | def _ensure_cstream(self, size): | |
|
653 | if self._cstream: | |||
|
654 | zresult = lib.ZSTD_resetCStream(self._cstream, size) | |||
|
655 | if lib.ZSTD_isError(zresult): | |||
|
656 | raise ZstdError('could not reset CStream: %s' % | |||
|
657 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |||
|
658 | ||||
|
659 | return | |||
|
660 | ||||
548 | cstream = lib.ZSTD_createCStream() |
|
661 | cstream = lib.ZSTD_createCStream() | |
549 | if cstream == ffi.NULL: |
|
662 | if cstream == ffi.NULL: | |
550 | raise MemoryError() |
|
663 | raise MemoryError() | |
551 |
|
664 | |||
552 | cstream = ffi.gc(cstream, lib.ZSTD_freeCStream) |
|
665 | cstream = ffi.gc(cstream, lib.ZSTD_freeCStream) | |
553 |
|
666 | |||
554 | dict_data = ffi.NULL |
|
667 | dict_data = ffi.NULL | |
555 | dict_size = 0 |
|
668 | dict_size = 0 | |
556 | if self._dict_data: |
|
669 | if self._dict_data: | |
557 | dict_data = self._dict_data.as_bytes() |
|
670 | dict_data = self._dict_data.as_bytes() | |
558 | dict_size = len(self._dict_data) |
|
671 | dict_size = len(self._dict_data) | |
559 |
|
672 | |||
560 | zparams = ffi.new('ZSTD_parameters *')[0] |
|
673 | zparams = ffi.new('ZSTD_parameters *')[0] | |
561 | if self._cparams: |
|
674 | if self._cparams: | |
562 | zparams.cParams = self._cparams.as_compression_parameters() |
|
675 | zparams.cParams = self._cparams.as_compression_parameters() | |
563 | else: |
|
676 | else: | |
564 | zparams.cParams = lib.ZSTD_getCParams(self._compression_level, |
|
677 | zparams.cParams = lib.ZSTD_getCParams(self._compression_level, | |
565 | size, dict_size) |
|
678 | size, dict_size) | |
566 | zparams.fParams = self._fparams |
|
679 | zparams.fParams = self._fparams | |
567 |
|
680 | |||
568 | zresult = lib.ZSTD_initCStream_advanced(cstream, dict_data, dict_size, |
|
681 | zresult = lib.ZSTD_initCStream_advanced(cstream, dict_data, dict_size, | |
569 | zparams, size) |
|
682 | zparams, size) | |
570 | if lib.ZSTD_isError(zresult): |
|
683 | if lib.ZSTD_isError(zresult): | |
571 | raise Exception('cannot init CStream: %s' % |
|
684 | raise Exception('cannot init CStream: %s' % | |
572 | ffi.string(lib.ZSTD_getErrorName(zresult))) |
|
685 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |
573 |
|
686 | |||
574 |
|
|
687 | self._cstream = cstream | |
|
688 | ||||
|
689 | def _init_mtcstream(self, size): | |||
|
690 | assert self._multithreaded | |||
|
691 | ||||
|
692 | dict_data = ffi.NULL | |||
|
693 | dict_size = 0 | |||
|
694 | if self._dict_data: | |||
|
695 | dict_data = self._dict_data.as_bytes() | |||
|
696 | dict_size = len(self._dict_data) | |||
|
697 | ||||
|
698 | zparams = ffi.new('ZSTD_parameters *')[0] | |||
|
699 | if self._cparams: | |||
|
700 | zparams.cParams = self._cparams.as_compression_parameters() | |||
|
701 | else: | |||
|
702 | zparams.cParams = lib.ZSTD_getCParams(self._compression_level, | |||
|
703 | size, dict_size) | |||
|
704 | ||||
|
705 | zparams.fParams = self._fparams | |||
|
706 | ||||
|
707 | zresult = lib.ZSTDMT_initCStream_advanced(self._cctx, dict_data, dict_size, | |||
|
708 | zparams, size) | |||
|
709 | ||||
|
710 | if lib.ZSTD_isError(zresult): | |||
|
711 | raise ZstdError('cannot init CStream: %s' % | |||
|
712 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |||
575 |
|
713 | |||
576 |
|
714 | |||
577 | class FrameParameters(object): |
|
715 | class FrameParameters(object): | |
578 | def __init__(self, fparams): |
|
716 | def __init__(self, fparams): | |
579 | self.content_size = fparams.frameContentSize |
|
717 | self.content_size = fparams.frameContentSize | |
580 | self.window_size = fparams.windowSize |
|
718 | self.window_size = fparams.windowSize | |
581 | self.dict_id = fparams.dictID |
|
719 | self.dict_id = fparams.dictID | |
582 | self.has_checksum = bool(fparams.checksumFlag) |
|
720 | self.has_checksum = bool(fparams.checksumFlag) | |
583 |
|
721 | |||
584 |
|
722 | |||
585 | def get_frame_parameters(data): |
|
723 | def get_frame_parameters(data): | |
586 | if not isinstance(data, bytes_type): |
|
724 | if not isinstance(data, bytes_type): | |
587 | raise TypeError('argument must be bytes') |
|
725 | raise TypeError('argument must be bytes') | |
588 |
|
726 | |||
589 | params = ffi.new('ZSTD_frameParams *') |
|
727 | params = ffi.new('ZSTD_frameParams *') | |
590 |
|
728 | |||
591 | zresult = lib.ZSTD_getFrameParams(params, data, len(data)) |
|
729 | zresult = lib.ZSTD_getFrameParams(params, data, len(data)) | |
592 | if lib.ZSTD_isError(zresult): |
|
730 | if lib.ZSTD_isError(zresult): | |
593 | raise ZstdError('cannot get frame parameters: %s' % |
|
731 | raise ZstdError('cannot get frame parameters: %s' % | |
594 | ffi.string(lib.ZSTD_getErrorName(zresult))) |
|
732 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |
595 |
|
733 | |||
596 | if zresult: |
|
734 | if zresult: | |
597 | raise ZstdError('not enough data for frame parameters; need %d bytes' % |
|
735 | raise ZstdError('not enough data for frame parameters; need %d bytes' % | |
598 | zresult) |
|
736 | zresult) | |
599 |
|
737 | |||
600 | return FrameParameters(params[0]) |
|
738 | return FrameParameters(params[0]) | |
601 |
|
739 | |||
602 |
|
740 | |||
603 | class ZstdCompressionDict(object): |
|
741 | class ZstdCompressionDict(object): | |
604 | def __init__(self, data): |
|
742 | def __init__(self, data, k=0, d=0): | |
605 | assert isinstance(data, bytes_type) |
|
743 | assert isinstance(data, bytes_type) | |
606 | self._data = data |
|
744 | self._data = data | |
|
745 | self.k = k | |||
|
746 | self.d = d | |||
607 |
|
747 | |||
608 | def __len__(self): |
|
748 | def __len__(self): | |
609 | return len(self._data) |
|
749 | return len(self._data) | |
610 |
|
750 | |||
611 | def dict_id(self): |
|
751 | def dict_id(self): | |
612 | return int_type(lib.ZDICT_getDictID(self._data, len(self._data))) |
|
752 | return int_type(lib.ZDICT_getDictID(self._data, len(self._data))) | |
613 |
|
753 | |||
614 | def as_bytes(self): |
|
754 | def as_bytes(self): | |
615 | return self._data |
|
755 | return self._data | |
616 |
|
756 | |||
617 |
|
757 | |||
618 |
def train_dictionary(dict_size, samples, |
|
758 | def train_dictionary(dict_size, samples, selectivity=0, level=0, | |
|
759 | notifications=0, dict_id=0): | |||
619 | if not isinstance(samples, list): |
|
760 | if not isinstance(samples, list): | |
620 | raise TypeError('samples must be a list') |
|
761 | raise TypeError('samples must be a list') | |
621 |
|
762 | |||
622 | total_size = sum(map(len, samples)) |
|
763 | total_size = sum(map(len, samples)) | |
623 |
|
764 | |||
624 | samples_buffer = new_nonzero('char[]', total_size) |
|
765 | samples_buffer = new_nonzero('char[]', total_size) | |
625 | sample_sizes = new_nonzero('size_t[]', len(samples)) |
|
766 | sample_sizes = new_nonzero('size_t[]', len(samples)) | |
626 |
|
767 | |||
627 | offset = 0 |
|
768 | offset = 0 | |
628 | for i, sample in enumerate(samples): |
|
769 | for i, sample in enumerate(samples): | |
629 | if not isinstance(sample, bytes_type): |
|
770 | if not isinstance(sample, bytes_type): | |
630 | raise ValueError('samples must be bytes') |
|
771 | raise ValueError('samples must be bytes') | |
631 |
|
772 | |||
632 | l = len(sample) |
|
773 | l = len(sample) | |
633 | ffi.memmove(samples_buffer + offset, sample, l) |
|
774 | ffi.memmove(samples_buffer + offset, sample, l) | |
634 | offset += l |
|
775 | offset += l | |
635 | sample_sizes[i] = l |
|
776 | sample_sizes[i] = l | |
636 |
|
777 | |||
637 | dict_data = new_nonzero('char[]', dict_size) |
|
778 | dict_data = new_nonzero('char[]', dict_size) | |
638 |
|
779 | |||
639 | zresult = lib.ZDICT_trainFromBuffer(ffi.addressof(dict_data), dict_size, |
|
780 | dparams = ffi.new('ZDICT_params_t *')[0] | |
640 | ffi.addressof(samples_buffer), |
|
781 | dparams.selectivityLevel = selectivity | |
641 | ffi.addressof(sample_sizes, 0), |
|
782 | dparams.compressionLevel = level | |
642 | len(samples)) |
|
783 | dparams.notificationLevel = notifications | |
|
784 | dparams.dictID = dict_id | |||
|
785 | ||||
|
786 | zresult = lib.ZDICT_trainFromBuffer_advanced( | |||
|
787 | ffi.addressof(dict_data), dict_size, | |||
|
788 | ffi.addressof(samples_buffer), | |||
|
789 | ffi.addressof(sample_sizes, 0), len(samples), | |||
|
790 | dparams) | |||
|
791 | ||||
643 | if lib.ZDICT_isError(zresult): |
|
792 | if lib.ZDICT_isError(zresult): | |
644 | raise ZstdError('Cannot train dict: %s' % |
|
793 | raise ZstdError('Cannot train dict: %s' % | |
645 | ffi.string(lib.ZDICT_getErrorName(zresult))) |
|
794 | ffi.string(lib.ZDICT_getErrorName(zresult))) | |
646 |
|
795 | |||
647 | return ZstdCompressionDict(ffi.buffer(dict_data, zresult)[:]) |
|
796 | return ZstdCompressionDict(ffi.buffer(dict_data, zresult)[:]) | |
648 |
|
797 | |||
649 |
|
798 | |||
|
799 | def train_cover_dictionary(dict_size, samples, k=0, d=0, | |||
|
800 | notifications=0, dict_id=0, level=0, optimize=False, | |||
|
801 | steps=0, threads=0): | |||
|
802 | if not isinstance(samples, list): | |||
|
803 | raise TypeError('samples must be a list') | |||
|
804 | ||||
|
805 | if threads < 0: | |||
|
806 | threads = _cpu_count() | |||
|
807 | ||||
|
808 | total_size = sum(map(len, samples)) | |||
|
809 | ||||
|
810 | samples_buffer = new_nonzero('char[]', total_size) | |||
|
811 | sample_sizes = new_nonzero('size_t[]', len(samples)) | |||
|
812 | ||||
|
813 | offset = 0 | |||
|
814 | for i, sample in enumerate(samples): | |||
|
815 | if not isinstance(sample, bytes_type): | |||
|
816 | raise ValueError('samples must be bytes') | |||
|
817 | ||||
|
818 | l = len(sample) | |||
|
819 | ffi.memmove(samples_buffer + offset, sample, l) | |||
|
820 | offset += l | |||
|
821 | sample_sizes[i] = l | |||
|
822 | ||||
|
823 | dict_data = new_nonzero('char[]', dict_size) | |||
|
824 | ||||
|
825 | dparams = ffi.new('COVER_params_t *')[0] | |||
|
826 | dparams.k = k | |||
|
827 | dparams.d = d | |||
|
828 | dparams.steps = steps | |||
|
829 | dparams.nbThreads = threads | |||
|
830 | dparams.notificationLevel = notifications | |||
|
831 | dparams.dictID = dict_id | |||
|
832 | dparams.compressionLevel = level | |||
|
833 | ||||
|
834 | if optimize: | |||
|
835 | zresult = lib.COVER_optimizeTrainFromBuffer( | |||
|
836 | ffi.addressof(dict_data), dict_size, | |||
|
837 | ffi.addressof(samples_buffer), | |||
|
838 | ffi.addressof(sample_sizes, 0), len(samples), | |||
|
839 | ffi.addressof(dparams)) | |||
|
840 | else: | |||
|
841 | zresult = lib.COVER_trainFromBuffer( | |||
|
842 | ffi.addressof(dict_data), dict_size, | |||
|
843 | ffi.addressof(samples_buffer), | |||
|
844 | ffi.addressof(sample_sizes, 0), len(samples), | |||
|
845 | dparams) | |||
|
846 | ||||
|
847 | if lib.ZDICT_isError(zresult): | |||
|
848 | raise ZstdError('cannot train dict: %s' % | |||
|
849 | ffi.string(lib.ZDICT_getErrorName(zresult))) | |||
|
850 | ||||
|
851 | return ZstdCompressionDict(ffi.buffer(dict_data, zresult)[:], | |||
|
852 | k=dparams.k, d=dparams.d) | |||
|
853 | ||||
|
854 | ||||
650 | class ZstdDecompressionObj(object): |
|
855 | class ZstdDecompressionObj(object): | |
651 | def __init__(self, decompressor): |
|
856 | def __init__(self, decompressor): | |
652 | self._decompressor = decompressor |
|
857 | self._decompressor = decompressor | |
653 | self._dstream = self._decompressor._get_dstream() |
|
|||
654 | self._finished = False |
|
858 | self._finished = False | |
655 |
|
859 | |||
656 | def decompress(self, data): |
|
860 | def decompress(self, data): | |
657 | if self._finished: |
|
861 | if self._finished: | |
658 | raise ZstdError('cannot use a decompressobj multiple times') |
|
862 | raise ZstdError('cannot use a decompressobj multiple times') | |
659 |
|
863 | |||
|
864 | assert(self._decompressor._dstream) | |||
|
865 | ||||
660 | in_buffer = ffi.new('ZSTD_inBuffer *') |
|
866 | in_buffer = ffi.new('ZSTD_inBuffer *') | |
661 | out_buffer = ffi.new('ZSTD_outBuffer *') |
|
867 | out_buffer = ffi.new('ZSTD_outBuffer *') | |
662 |
|
868 | |||
663 | data_buffer = ffi.from_buffer(data) |
|
869 | data_buffer = ffi.from_buffer(data) | |
664 | in_buffer.src = data_buffer |
|
870 | in_buffer.src = data_buffer | |
665 | in_buffer.size = len(data_buffer) |
|
871 | in_buffer.size = len(data_buffer) | |
666 | in_buffer.pos = 0 |
|
872 | in_buffer.pos = 0 | |
667 |
|
873 | |||
668 | dst_buffer = ffi.new('char[]', DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE) |
|
874 | dst_buffer = ffi.new('char[]', DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE) | |
669 | out_buffer.dst = dst_buffer |
|
875 | out_buffer.dst = dst_buffer | |
670 | out_buffer.size = len(dst_buffer) |
|
876 | out_buffer.size = len(dst_buffer) | |
671 | out_buffer.pos = 0 |
|
877 | out_buffer.pos = 0 | |
672 |
|
878 | |||
673 | chunks = [] |
|
879 | chunks = [] | |
674 |
|
880 | |||
675 | while in_buffer.pos < in_buffer.size: |
|
881 | while in_buffer.pos < in_buffer.size: | |
676 |
zresult = lib.ZSTD_decompressStream(self._dstream, |
|
882 | zresult = lib.ZSTD_decompressStream(self._decompressor._dstream, | |
|
883 | out_buffer, in_buffer) | |||
677 | if lib.ZSTD_isError(zresult): |
|
884 | if lib.ZSTD_isError(zresult): | |
678 | raise ZstdError('zstd decompressor error: %s' % |
|
885 | raise ZstdError('zstd decompressor error: %s' % | |
679 | ffi.string(lib.ZSTD_getErrorName(zresult))) |
|
886 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |
680 |
|
887 | |||
681 | if zresult == 0: |
|
888 | if zresult == 0: | |
682 | self._finished = True |
|
889 | self._finished = True | |
683 | self._dstream = None |
|
|||
684 | self._decompressor = None |
|
890 | self._decompressor = None | |
685 |
|
891 | |||
686 | if out_buffer.pos: |
|
892 | if out_buffer.pos: | |
687 | chunks.append(ffi.buffer(out_buffer.dst, out_buffer.pos)[:]) |
|
893 | chunks.append(ffi.buffer(out_buffer.dst, out_buffer.pos)[:]) | |
688 | out_buffer.pos = 0 |
|
894 | out_buffer.pos = 0 | |
689 |
|
895 | |||
690 | return b''.join(chunks) |
|
896 | return b''.join(chunks) | |
691 |
|
897 | |||
692 |
|
898 | |||
693 | class ZstdDecompressionWriter(object): |
|
899 | class ZstdDecompressionWriter(object): | |
694 | def __init__(self, decompressor, writer, write_size): |
|
900 | def __init__(self, decompressor, writer, write_size): | |
695 | self._decompressor = decompressor |
|
901 | self._decompressor = decompressor | |
696 | self._writer = writer |
|
902 | self._writer = writer | |
697 | self._write_size = write_size |
|
903 | self._write_size = write_size | |
698 | self._dstream = None |
|
|||
699 | self._entered = False |
|
904 | self._entered = False | |
700 |
|
905 | |||
701 | def __enter__(self): |
|
906 | def __enter__(self): | |
702 | if self._entered: |
|
907 | if self._entered: | |
703 | raise ZstdError('cannot __enter__ multiple times') |
|
908 | raise ZstdError('cannot __enter__ multiple times') | |
704 |
|
909 | |||
705 |
self. |
|
910 | self._decompressor._ensure_dstream() | |
706 | self._entered = True |
|
911 | self._entered = True | |
707 |
|
912 | |||
708 | return self |
|
913 | return self | |
709 |
|
914 | |||
710 | def __exit__(self, exc_type, exc_value, exc_tb): |
|
915 | def __exit__(self, exc_type, exc_value, exc_tb): | |
711 | self._entered = False |
|
916 | self._entered = False | |
712 | self._dstream = None |
|
|||
713 |
|
917 | |||
714 | def memory_size(self): |
|
918 | def memory_size(self): | |
715 | if not self._dstream: |
|
919 | if not self._decompressor._dstream: | |
716 | raise ZstdError('cannot determine size of inactive decompressor ' |
|
920 | raise ZstdError('cannot determine size of inactive decompressor ' | |
717 | 'call when context manager is active') |
|
921 | 'call when context manager is active') | |
718 |
|
922 | |||
719 | return lib.ZSTD_sizeof_DStream(self._dstream) |
|
923 | return lib.ZSTD_sizeof_DStream(self._decompressor._dstream) | |
720 |
|
924 | |||
721 | def write(self, data): |
|
925 | def write(self, data): | |
722 | if not self._entered: |
|
926 | if not self._entered: | |
723 | raise ZstdError('write must be called from an active context manager') |
|
927 | raise ZstdError('write must be called from an active context manager') | |
724 |
|
928 | |||
725 | total_write = 0 |
|
929 | total_write = 0 | |
726 |
|
930 | |||
727 | in_buffer = ffi.new('ZSTD_inBuffer *') |
|
931 | in_buffer = ffi.new('ZSTD_inBuffer *') | |
728 | out_buffer = ffi.new('ZSTD_outBuffer *') |
|
932 | out_buffer = ffi.new('ZSTD_outBuffer *') | |
729 |
|
933 | |||
730 | data_buffer = ffi.from_buffer(data) |
|
934 | data_buffer = ffi.from_buffer(data) | |
731 | in_buffer.src = data_buffer |
|
935 | in_buffer.src = data_buffer | |
732 | in_buffer.size = len(data_buffer) |
|
936 | in_buffer.size = len(data_buffer) | |
733 | in_buffer.pos = 0 |
|
937 | in_buffer.pos = 0 | |
734 |
|
938 | |||
735 | dst_buffer = ffi.new('char[]', self._write_size) |
|
939 | dst_buffer = ffi.new('char[]', self._write_size) | |
736 | out_buffer.dst = dst_buffer |
|
940 | out_buffer.dst = dst_buffer | |
737 | out_buffer.size = len(dst_buffer) |
|
941 | out_buffer.size = len(dst_buffer) | |
738 | out_buffer.pos = 0 |
|
942 | out_buffer.pos = 0 | |
739 |
|
943 | |||
|
944 | dstream = self._decompressor._dstream | |||
|
945 | ||||
740 | while in_buffer.pos < in_buffer.size: |
|
946 | while in_buffer.pos < in_buffer.size: | |
741 |
zresult = lib.ZSTD_decompressStream( |
|
947 | zresult = lib.ZSTD_decompressStream(dstream, out_buffer, in_buffer) | |
742 | if lib.ZSTD_isError(zresult): |
|
948 | if lib.ZSTD_isError(zresult): | |
743 | raise ZstdError('zstd decompress error: %s' % |
|
949 | raise ZstdError('zstd decompress error: %s' % | |
744 | ffi.string(lib.ZSTD_getErrorName(zresult))) |
|
950 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |
745 |
|
951 | |||
746 | if out_buffer.pos: |
|
952 | if out_buffer.pos: | |
747 | self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:]) |
|
953 | self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:]) | |
748 | total_write += out_buffer.pos |
|
954 | total_write += out_buffer.pos | |
749 | out_buffer.pos = 0 |
|
955 | out_buffer.pos = 0 | |
750 |
|
956 | |||
751 | return total_write |
|
957 | return total_write | |
752 |
|
958 | |||
753 |
|
959 | |||
754 | class ZstdDecompressor(object): |
|
960 | class ZstdDecompressor(object): | |
755 | def __init__(self, dict_data=None): |
|
961 | def __init__(self, dict_data=None): | |
756 | self._dict_data = dict_data |
|
962 | self._dict_data = dict_data | |
757 |
|
963 | |||
758 | dctx = lib.ZSTD_createDCtx() |
|
964 | dctx = lib.ZSTD_createDCtx() | |
759 | if dctx == ffi.NULL: |
|
965 | if dctx == ffi.NULL: | |
760 | raise MemoryError() |
|
966 | raise MemoryError() | |
761 |
|
967 | |||
762 | self._refdctx = ffi.gc(dctx, lib.ZSTD_freeDCtx) |
|
968 | self._refdctx = ffi.gc(dctx, lib.ZSTD_freeDCtx) | |
|
969 | self._dstream = None | |||
763 |
|
970 | |||
764 | @property |
|
971 | @property | |
765 | def _ddict(self): |
|
972 | def _ddict(self): | |
766 | if self._dict_data: |
|
973 | if self._dict_data: | |
767 | dict_data = self._dict_data.as_bytes() |
|
974 | dict_data = self._dict_data.as_bytes() | |
768 | dict_size = len(self._dict_data) |
|
975 | dict_size = len(self._dict_data) | |
769 |
|
976 | |||
770 | ddict = lib.ZSTD_createDDict(dict_data, dict_size) |
|
977 | ddict = lib.ZSTD_createDDict(dict_data, dict_size) | |
771 | if ddict == ffi.NULL: |
|
978 | if ddict == ffi.NULL: | |
772 | raise ZstdError('could not create decompression dict') |
|
979 | raise ZstdError('could not create decompression dict') | |
773 | else: |
|
980 | else: | |
774 | ddict = None |
|
981 | ddict = None | |
775 |
|
982 | |||
776 | self.__dict__['_ddict'] = ddict |
|
983 | self.__dict__['_ddict'] = ddict | |
777 | return ddict |
|
984 | return ddict | |
778 |
|
985 | |||
779 | def decompress(self, data, max_output_size=0): |
|
986 | def decompress(self, data, max_output_size=0): | |
780 | data_buffer = ffi.from_buffer(data) |
|
987 | data_buffer = ffi.from_buffer(data) | |
781 |
|
988 | |||
782 | orig_dctx = new_nonzero('char[]', lib.ZSTD_sizeof_DCtx(self._refdctx)) |
|
989 | orig_dctx = new_nonzero('char[]', lib.ZSTD_sizeof_DCtx(self._refdctx)) | |
783 | dctx = ffi.cast('ZSTD_DCtx *', orig_dctx) |
|
990 | dctx = ffi.cast('ZSTD_DCtx *', orig_dctx) | |
784 | lib.ZSTD_copyDCtx(dctx, self._refdctx) |
|
991 | lib.ZSTD_copyDCtx(dctx, self._refdctx) | |
785 |
|
992 | |||
786 | ddict = self._ddict |
|
993 | ddict = self._ddict | |
787 |
|
994 | |||
788 | output_size = lib.ZSTD_getDecompressedSize(data_buffer, len(data_buffer)) |
|
995 | output_size = lib.ZSTD_getDecompressedSize(data_buffer, len(data_buffer)) | |
789 | if output_size: |
|
996 | if output_size: | |
790 | result_buffer = ffi.new('char[]', output_size) |
|
997 | result_buffer = ffi.new('char[]', output_size) | |
791 | result_size = output_size |
|
998 | result_size = output_size | |
792 | else: |
|
999 | else: | |
793 | if not max_output_size: |
|
1000 | if not max_output_size: | |
794 | raise ZstdError('input data invalid or missing content size ' |
|
1001 | raise ZstdError('input data invalid or missing content size ' | |
795 | 'in frame header') |
|
1002 | 'in frame header') | |
796 |
|
1003 | |||
797 | result_buffer = ffi.new('char[]', max_output_size) |
|
1004 | result_buffer = ffi.new('char[]', max_output_size) | |
798 | result_size = max_output_size |
|
1005 | result_size = max_output_size | |
799 |
|
1006 | |||
800 | if ddict: |
|
1007 | if ddict: | |
801 | zresult = lib.ZSTD_decompress_usingDDict(dctx, |
|
1008 | zresult = lib.ZSTD_decompress_usingDDict(dctx, | |
802 | result_buffer, result_size, |
|
1009 | result_buffer, result_size, | |
803 | data_buffer, len(data_buffer), |
|
1010 | data_buffer, len(data_buffer), | |
804 | ddict) |
|
1011 | ddict) | |
805 | else: |
|
1012 | else: | |
806 | zresult = lib.ZSTD_decompressDCtx(dctx, |
|
1013 | zresult = lib.ZSTD_decompressDCtx(dctx, | |
807 | result_buffer, result_size, |
|
1014 | result_buffer, result_size, | |
808 | data_buffer, len(data_buffer)) |
|
1015 | data_buffer, len(data_buffer)) | |
809 | if lib.ZSTD_isError(zresult): |
|
1016 | if lib.ZSTD_isError(zresult): | |
810 | raise ZstdError('decompression error: %s' % |
|
1017 | raise ZstdError('decompression error: %s' % | |
811 | ffi.string(lib.ZSTD_getErrorName(zresult))) |
|
1018 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |
812 | elif output_size and zresult != output_size: |
|
1019 | elif output_size and zresult != output_size: | |
813 | raise ZstdError('decompression error: decompressed %d bytes; expected %d' % |
|
1020 | raise ZstdError('decompression error: decompressed %d bytes; expected %d' % | |
814 | (zresult, output_size)) |
|
1021 | (zresult, output_size)) | |
815 |
|
1022 | |||
816 | return ffi.buffer(result_buffer, zresult)[:] |
|
1023 | return ffi.buffer(result_buffer, zresult)[:] | |
817 |
|
1024 | |||
818 | def decompressobj(self): |
|
1025 | def decompressobj(self): | |
|
1026 | self._ensure_dstream() | |||
819 | return ZstdDecompressionObj(self) |
|
1027 | return ZstdDecompressionObj(self) | |
820 |
|
1028 | |||
821 | def read_from(self, reader, read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE, |
|
1029 | def read_from(self, reader, read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE, | |
822 | write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE, |
|
1030 | write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE, | |
823 | skip_bytes=0): |
|
1031 | skip_bytes=0): | |
824 | if skip_bytes >= read_size: |
|
1032 | if skip_bytes >= read_size: | |
825 | raise ValueError('skip_bytes must be smaller than read_size') |
|
1033 | raise ValueError('skip_bytes must be smaller than read_size') | |
826 |
|
1034 | |||
827 | if hasattr(reader, 'read'): |
|
1035 | if hasattr(reader, 'read'): | |
828 | have_read = True |
|
1036 | have_read = True | |
829 | elif hasattr(reader, '__getitem__'): |
|
1037 | elif hasattr(reader, '__getitem__'): | |
830 | have_read = False |
|
1038 | have_read = False | |
831 | buffer_offset = 0 |
|
1039 | buffer_offset = 0 | |
832 | size = len(reader) |
|
1040 | size = len(reader) | |
833 | else: |
|
1041 | else: | |
834 | raise ValueError('must pass an object with a read() method or ' |
|
1042 | raise ValueError('must pass an object with a read() method or ' | |
835 | 'conforms to buffer protocol') |
|
1043 | 'conforms to buffer protocol') | |
836 |
|
1044 | |||
837 | if skip_bytes: |
|
1045 | if skip_bytes: | |
838 | if have_read: |
|
1046 | if have_read: | |
839 | reader.read(skip_bytes) |
|
1047 | reader.read(skip_bytes) | |
840 | else: |
|
1048 | else: | |
841 | if skip_bytes > size: |
|
1049 | if skip_bytes > size: | |
842 | raise ValueError('skip_bytes larger than first input chunk') |
|
1050 | raise ValueError('skip_bytes larger than first input chunk') | |
843 |
|
1051 | |||
844 | buffer_offset = skip_bytes |
|
1052 | buffer_offset = skip_bytes | |
845 |
|
1053 | |||
846 |
|
|
1054 | self._ensure_dstream() | |
847 |
|
1055 | |||
848 | in_buffer = ffi.new('ZSTD_inBuffer *') |
|
1056 | in_buffer = ffi.new('ZSTD_inBuffer *') | |
849 | out_buffer = ffi.new('ZSTD_outBuffer *') |
|
1057 | out_buffer = ffi.new('ZSTD_outBuffer *') | |
850 |
|
1058 | |||
851 | dst_buffer = ffi.new('char[]', write_size) |
|
1059 | dst_buffer = ffi.new('char[]', write_size) | |
852 | out_buffer.dst = dst_buffer |
|
1060 | out_buffer.dst = dst_buffer | |
853 | out_buffer.size = len(dst_buffer) |
|
1061 | out_buffer.size = len(dst_buffer) | |
854 | out_buffer.pos = 0 |
|
1062 | out_buffer.pos = 0 | |
855 |
|
1063 | |||
856 | while True: |
|
1064 | while True: | |
857 | assert out_buffer.pos == 0 |
|
1065 | assert out_buffer.pos == 0 | |
858 |
|
1066 | |||
859 | if have_read: |
|
1067 | if have_read: | |
860 | read_result = reader.read(read_size) |
|
1068 | read_result = reader.read(read_size) | |
861 | else: |
|
1069 | else: | |
862 | remaining = size - buffer_offset |
|
1070 | remaining = size - buffer_offset | |
863 | slice_size = min(remaining, read_size) |
|
1071 | slice_size = min(remaining, read_size) | |
864 | read_result = reader[buffer_offset:buffer_offset + slice_size] |
|
1072 | read_result = reader[buffer_offset:buffer_offset + slice_size] | |
865 | buffer_offset += slice_size |
|
1073 | buffer_offset += slice_size | |
866 |
|
1074 | |||
867 | # No new input. Break out of read loop. |
|
1075 | # No new input. Break out of read loop. | |
868 | if not read_result: |
|
1076 | if not read_result: | |
869 | break |
|
1077 | break | |
870 |
|
1078 | |||
871 | # Feed all read data into decompressor and emit output until |
|
1079 | # Feed all read data into decompressor and emit output until | |
872 | # exhausted. |
|
1080 | # exhausted. | |
873 | read_buffer = ffi.from_buffer(read_result) |
|
1081 | read_buffer = ffi.from_buffer(read_result) | |
874 | in_buffer.src = read_buffer |
|
1082 | in_buffer.src = read_buffer | |
875 | in_buffer.size = len(read_buffer) |
|
1083 | in_buffer.size = len(read_buffer) | |
876 | in_buffer.pos = 0 |
|
1084 | in_buffer.pos = 0 | |
877 |
|
1085 | |||
878 | while in_buffer.pos < in_buffer.size: |
|
1086 | while in_buffer.pos < in_buffer.size: | |
879 | assert out_buffer.pos == 0 |
|
1087 | assert out_buffer.pos == 0 | |
880 |
|
1088 | |||
881 | zresult = lib.ZSTD_decompressStream(dstream, out_buffer, in_buffer) |
|
1089 | zresult = lib.ZSTD_decompressStream(self._dstream, out_buffer, in_buffer) | |
882 | if lib.ZSTD_isError(zresult): |
|
1090 | if lib.ZSTD_isError(zresult): | |
883 | raise ZstdError('zstd decompress error: %s' % |
|
1091 | raise ZstdError('zstd decompress error: %s' % | |
884 | ffi.string(lib.ZSTD_getErrorName(zresult))) |
|
1092 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |
885 |
|
1093 | |||
886 | if out_buffer.pos: |
|
1094 | if out_buffer.pos: | |
887 | data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:] |
|
1095 | data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:] | |
888 | out_buffer.pos = 0 |
|
1096 | out_buffer.pos = 0 | |
889 | yield data |
|
1097 | yield data | |
890 |
|
1098 | |||
891 | if zresult == 0: |
|
1099 | if zresult == 0: | |
892 | return |
|
1100 | return | |
893 |
|
1101 | |||
894 | # Repeat loop to collect more input data. |
|
1102 | # Repeat loop to collect more input data. | |
895 | continue |
|
1103 | continue | |
896 |
|
1104 | |||
897 | # If we get here, input is exhausted. |
|
1105 | # If we get here, input is exhausted. | |
898 |
|
1106 | |||
899 | def write_to(self, writer, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE): |
|
1107 | def write_to(self, writer, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE): | |
900 | if not hasattr(writer, 'write'): |
|
1108 | if not hasattr(writer, 'write'): | |
901 | raise ValueError('must pass an object with a write() method') |
|
1109 | raise ValueError('must pass an object with a write() method') | |
902 |
|
1110 | |||
903 | return ZstdDecompressionWriter(self, writer, write_size) |
|
1111 | return ZstdDecompressionWriter(self, writer, write_size) | |
904 |
|
1112 | |||
905 | def copy_stream(self, ifh, ofh, |
|
1113 | def copy_stream(self, ifh, ofh, | |
906 | read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE, |
|
1114 | read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE, | |
907 | write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE): |
|
1115 | write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE): | |
908 | if not hasattr(ifh, 'read'): |
|
1116 | if not hasattr(ifh, 'read'): | |
909 | raise ValueError('first argument must have a read() method') |
|
1117 | raise ValueError('first argument must have a read() method') | |
910 | if not hasattr(ofh, 'write'): |
|
1118 | if not hasattr(ofh, 'write'): | |
911 | raise ValueError('second argument must have a write() method') |
|
1119 | raise ValueError('second argument must have a write() method') | |
912 |
|
1120 | |||
913 |
|
|
1121 | self._ensure_dstream() | |
914 |
|
1122 | |||
915 | in_buffer = ffi.new('ZSTD_inBuffer *') |
|
1123 | in_buffer = ffi.new('ZSTD_inBuffer *') | |
916 | out_buffer = ffi.new('ZSTD_outBuffer *') |
|
1124 | out_buffer = ffi.new('ZSTD_outBuffer *') | |
917 |
|
1125 | |||
918 | dst_buffer = ffi.new('char[]', write_size) |
|
1126 | dst_buffer = ffi.new('char[]', write_size) | |
919 | out_buffer.dst = dst_buffer |
|
1127 | out_buffer.dst = dst_buffer | |
920 | out_buffer.size = write_size |
|
1128 | out_buffer.size = write_size | |
921 | out_buffer.pos = 0 |
|
1129 | out_buffer.pos = 0 | |
922 |
|
1130 | |||
923 | total_read, total_write = 0, 0 |
|
1131 | total_read, total_write = 0, 0 | |
924 |
|
1132 | |||
925 | # Read all available input. |
|
1133 | # Read all available input. | |
926 | while True: |
|
1134 | while True: | |
927 | data = ifh.read(read_size) |
|
1135 | data = ifh.read(read_size) | |
928 | if not data: |
|
1136 | if not data: | |
929 | break |
|
1137 | break | |
930 |
|
1138 | |||
931 | data_buffer = ffi.from_buffer(data) |
|
1139 | data_buffer = ffi.from_buffer(data) | |
932 | total_read += len(data_buffer) |
|
1140 | total_read += len(data_buffer) | |
933 | in_buffer.src = data_buffer |
|
1141 | in_buffer.src = data_buffer | |
934 | in_buffer.size = len(data_buffer) |
|
1142 | in_buffer.size = len(data_buffer) | |
935 | in_buffer.pos = 0 |
|
1143 | in_buffer.pos = 0 | |
936 |
|
1144 | |||
937 | # Flush all read data to output. |
|
1145 | # Flush all read data to output. | |
938 | while in_buffer.pos < in_buffer.size: |
|
1146 | while in_buffer.pos < in_buffer.size: | |
939 | zresult = lib.ZSTD_decompressStream(dstream, out_buffer, in_buffer) |
|
1147 | zresult = lib.ZSTD_decompressStream(self._dstream, out_buffer, in_buffer) | |
940 | if lib.ZSTD_isError(zresult): |
|
1148 | if lib.ZSTD_isError(zresult): | |
941 | raise ZstdError('zstd decompressor error: %s' % |
|
1149 | raise ZstdError('zstd decompressor error: %s' % | |
942 | ffi.string(lib.ZSTD_getErrorName(zresult))) |
|
1150 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |
943 |
|
1151 | |||
944 | if out_buffer.pos: |
|
1152 | if out_buffer.pos: | |
945 | ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos)) |
|
1153 | ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos)) | |
946 | total_write += out_buffer.pos |
|
1154 | total_write += out_buffer.pos | |
947 | out_buffer.pos = 0 |
|
1155 | out_buffer.pos = 0 | |
948 |
|
1156 | |||
949 | # Continue loop to keep reading. |
|
1157 | # Continue loop to keep reading. | |
950 |
|
1158 | |||
951 | return total_read, total_write |
|
1159 | return total_read, total_write | |
952 |
|
1160 | |||
953 | def decompress_content_dict_chain(self, frames): |
|
1161 | def decompress_content_dict_chain(self, frames): | |
954 | if not isinstance(frames, list): |
|
1162 | if not isinstance(frames, list): | |
955 | raise TypeError('argument must be a list') |
|
1163 | raise TypeError('argument must be a list') | |
956 |
|
1164 | |||
957 | if not frames: |
|
1165 | if not frames: | |
958 | raise ValueError('empty input chain') |
|
1166 | raise ValueError('empty input chain') | |
959 |
|
1167 | |||
960 | # First chunk should not be using a dictionary. We handle it specially. |
|
1168 | # First chunk should not be using a dictionary. We handle it specially. | |
961 | chunk = frames[0] |
|
1169 | chunk = frames[0] | |
962 | if not isinstance(chunk, bytes_type): |
|
1170 | if not isinstance(chunk, bytes_type): | |
963 | raise ValueError('chunk 0 must be bytes') |
|
1171 | raise ValueError('chunk 0 must be bytes') | |
964 |
|
1172 | |||
965 | # All chunks should be zstd frames and should have content size set. |
|
1173 | # All chunks should be zstd frames and should have content size set. | |
966 | chunk_buffer = ffi.from_buffer(chunk) |
|
1174 | chunk_buffer = ffi.from_buffer(chunk) | |
967 | params = ffi.new('ZSTD_frameParams *') |
|
1175 | params = ffi.new('ZSTD_frameParams *') | |
968 | zresult = lib.ZSTD_getFrameParams(params, chunk_buffer, len(chunk_buffer)) |
|
1176 | zresult = lib.ZSTD_getFrameParams(params, chunk_buffer, len(chunk_buffer)) | |
969 | if lib.ZSTD_isError(zresult): |
|
1177 | if lib.ZSTD_isError(zresult): | |
970 | raise ValueError('chunk 0 is not a valid zstd frame') |
|
1178 | raise ValueError('chunk 0 is not a valid zstd frame') | |
971 | elif zresult: |
|
1179 | elif zresult: | |
972 | raise ValueError('chunk 0 is too small to contain a zstd frame') |
|
1180 | raise ValueError('chunk 0 is too small to contain a zstd frame') | |
973 |
|
1181 | |||
974 | if not params.frameContentSize: |
|
1182 | if not params.frameContentSize: | |
975 | raise ValueError('chunk 0 missing content size in frame') |
|
1183 | raise ValueError('chunk 0 missing content size in frame') | |
976 |
|
1184 | |||
977 | dctx = lib.ZSTD_createDCtx() |
|
1185 | dctx = lib.ZSTD_createDCtx() | |
978 | if dctx == ffi.NULL: |
|
1186 | if dctx == ffi.NULL: | |
979 | raise MemoryError() |
|
1187 | raise MemoryError() | |
980 |
|
1188 | |||
981 | dctx = ffi.gc(dctx, lib.ZSTD_freeDCtx) |
|
1189 | dctx = ffi.gc(dctx, lib.ZSTD_freeDCtx) | |
982 |
|
1190 | |||
983 | last_buffer = ffi.new('char[]', params.frameContentSize) |
|
1191 | last_buffer = ffi.new('char[]', params.frameContentSize) | |
984 |
|
1192 | |||
985 | zresult = lib.ZSTD_decompressDCtx(dctx, last_buffer, len(last_buffer), |
|
1193 | zresult = lib.ZSTD_decompressDCtx(dctx, last_buffer, len(last_buffer), | |
986 | chunk_buffer, len(chunk_buffer)) |
|
1194 | chunk_buffer, len(chunk_buffer)) | |
987 | if lib.ZSTD_isError(zresult): |
|
1195 | if lib.ZSTD_isError(zresult): | |
988 | raise ZstdError('could not decompress chunk 0: %s' % |
|
1196 | raise ZstdError('could not decompress chunk 0: %s' % | |
989 | ffi.string(lib.ZSTD_getErrorName(zresult))) |
|
1197 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |
990 |
|
1198 | |||
991 | # Special case of chain length of 1 |
|
1199 | # Special case of chain length of 1 | |
992 | if len(frames) == 1: |
|
1200 | if len(frames) == 1: | |
993 | return ffi.buffer(last_buffer, len(last_buffer))[:] |
|
1201 | return ffi.buffer(last_buffer, len(last_buffer))[:] | |
994 |
|
1202 | |||
995 | i = 1 |
|
1203 | i = 1 | |
996 | while i < len(frames): |
|
1204 | while i < len(frames): | |
997 | chunk = frames[i] |
|
1205 | chunk = frames[i] | |
998 | if not isinstance(chunk, bytes_type): |
|
1206 | if not isinstance(chunk, bytes_type): | |
999 | raise ValueError('chunk %d must be bytes' % i) |
|
1207 | raise ValueError('chunk %d must be bytes' % i) | |
1000 |
|
1208 | |||
1001 | chunk_buffer = ffi.from_buffer(chunk) |
|
1209 | chunk_buffer = ffi.from_buffer(chunk) | |
1002 | zresult = lib.ZSTD_getFrameParams(params, chunk_buffer, len(chunk_buffer)) |
|
1210 | zresult = lib.ZSTD_getFrameParams(params, chunk_buffer, len(chunk_buffer)) | |
1003 | if lib.ZSTD_isError(zresult): |
|
1211 | if lib.ZSTD_isError(zresult): | |
1004 | raise ValueError('chunk %d is not a valid zstd frame' % i) |
|
1212 | raise ValueError('chunk %d is not a valid zstd frame' % i) | |
1005 | elif zresult: |
|
1213 | elif zresult: | |
1006 | raise ValueError('chunk %d is too small to contain a zstd frame' % i) |
|
1214 | raise ValueError('chunk %d is too small to contain a zstd frame' % i) | |
1007 |
|
1215 | |||
1008 | if not params.frameContentSize: |
|
1216 | if not params.frameContentSize: | |
1009 | raise ValueError('chunk %d missing content size in frame' % i) |
|
1217 | raise ValueError('chunk %d missing content size in frame' % i) | |
1010 |
|
1218 | |||
1011 | dest_buffer = ffi.new('char[]', params.frameContentSize) |
|
1219 | dest_buffer = ffi.new('char[]', params.frameContentSize) | |
1012 |
|
1220 | |||
1013 | zresult = lib.ZSTD_decompress_usingDict(dctx, dest_buffer, len(dest_buffer), |
|
1221 | zresult = lib.ZSTD_decompress_usingDict(dctx, dest_buffer, len(dest_buffer), | |
1014 | chunk_buffer, len(chunk_buffer), |
|
1222 | chunk_buffer, len(chunk_buffer), | |
1015 | last_buffer, len(last_buffer)) |
|
1223 | last_buffer, len(last_buffer)) | |
1016 | if lib.ZSTD_isError(zresult): |
|
1224 | if lib.ZSTD_isError(zresult): | |
1017 | raise ZstdError('could not decompress chunk %d' % i) |
|
1225 | raise ZstdError('could not decompress chunk %d' % i) | |
1018 |
|
1226 | |||
1019 | last_buffer = dest_buffer |
|
1227 | last_buffer = dest_buffer | |
1020 | i += 1 |
|
1228 | i += 1 | |
1021 |
|
1229 | |||
1022 | return ffi.buffer(last_buffer, len(last_buffer))[:] |
|
1230 | return ffi.buffer(last_buffer, len(last_buffer))[:] | |
1023 |
|
1231 | |||
1024 |
def _ |
|
1232 | def _ensure_dstream(self): | |
1025 | dstream = lib.ZSTD_createDStream() |
|
1233 | if self._dstream: | |
1026 | if dstream == ffi.NULL: |
|
1234 | zresult = lib.ZSTD_resetDStream(self._dstream) | |
|
1235 | if lib.ZSTD_isError(zresult): | |||
|
1236 | raise ZstdError('could not reset DStream: %s' % | |||
|
1237 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |||
|
1238 | ||||
|
1239 | return | |||
|
1240 | ||||
|
1241 | self._dstream = lib.ZSTD_createDStream() | |||
|
1242 | if self._dstream == ffi.NULL: | |||
1027 | raise MemoryError() |
|
1243 | raise MemoryError() | |
1028 |
|
1244 | |||
1029 | dstream = ffi.gc(dstream, lib.ZSTD_freeDStream) |
|
1245 | self._dstream = ffi.gc(self._dstream, lib.ZSTD_freeDStream) | |
1030 |
|
1246 | |||
1031 | if self._dict_data: |
|
1247 | if self._dict_data: | |
1032 | zresult = lib.ZSTD_initDStream_usingDict(dstream, |
|
1248 | zresult = lib.ZSTD_initDStream_usingDict(self._dstream, | |
1033 | self._dict_data.as_bytes(), |
|
1249 | self._dict_data.as_bytes(), | |
1034 | len(self._dict_data)) |
|
1250 | len(self._dict_data)) | |
1035 | else: |
|
1251 | else: | |
1036 | zresult = lib.ZSTD_initDStream(dstream) |
|
1252 | zresult = lib.ZSTD_initDStream(self._dstream) | |
1037 |
|
1253 | |||
1038 | if lib.ZSTD_isError(zresult): |
|
1254 | if lib.ZSTD_isError(zresult): | |
|
1255 | self._dstream = None | |||
1039 | raise ZstdError('could not initialize DStream: %s' % |
|
1256 | raise ZstdError('could not initialize DStream: %s' % | |
1040 | ffi.string(lib.ZSTD_getErrorName(zresult))) |
|
1257 | ffi.string(lib.ZSTD_getErrorName(zresult))) | |
1041 |
|
||||
1042 | return dstream |
|
@@ -1,41 +1,44 | |||||
1 | #require test-repo |
|
1 | #require test-repo | |
2 |
|
2 | |||
3 | $ . "$TESTDIR/helpers-testrepo.sh" |
|
3 | $ . "$TESTDIR/helpers-testrepo.sh" | |
4 | $ cd "$TESTDIR"/.. |
|
4 | $ cd "$TESTDIR"/.. | |
5 |
|
5 | |||
6 | $ hg files 'set:(**.py)' | sed 's|\\|/|g' | xargs python contrib/check-py3-compat.py |
|
6 | $ hg files 'set:(**.py)' | sed 's|\\|/|g' | xargs python contrib/check-py3-compat.py | |
7 | contrib/python-zstandard/setup.py not using absolute_import |
|
7 | contrib/python-zstandard/setup.py not using absolute_import | |
8 | contrib/python-zstandard/setup_zstd.py not using absolute_import |
|
8 | contrib/python-zstandard/setup_zstd.py not using absolute_import | |
9 | contrib/python-zstandard/tests/common.py not using absolute_import |
|
9 | contrib/python-zstandard/tests/common.py not using absolute_import | |
|
10 | contrib/python-zstandard/tests/test_buffer_util.py not using absolute_import | |||
10 | contrib/python-zstandard/tests/test_compressor.py not using absolute_import |
|
11 | contrib/python-zstandard/tests/test_compressor.py not using absolute_import | |
|
12 | contrib/python-zstandard/tests/test_compressor_fuzzing.py not using absolute_import | |||
11 | contrib/python-zstandard/tests/test_data_structures.py not using absolute_import |
|
13 | contrib/python-zstandard/tests/test_data_structures.py not using absolute_import | |
|
14 | contrib/python-zstandard/tests/test_data_structures_fuzzing.py not using absolute_import | |||
12 | contrib/python-zstandard/tests/test_decompressor.py not using absolute_import |
|
15 | contrib/python-zstandard/tests/test_decompressor.py not using absolute_import | |
|
16 | contrib/python-zstandard/tests/test_decompressor_fuzzing.py not using absolute_import | |||
13 | contrib/python-zstandard/tests/test_estimate_sizes.py not using absolute_import |
|
17 | contrib/python-zstandard/tests/test_estimate_sizes.py not using absolute_import | |
14 | contrib/python-zstandard/tests/test_module_attributes.py not using absolute_import |
|
18 | contrib/python-zstandard/tests/test_module_attributes.py not using absolute_import | |
15 | contrib/python-zstandard/tests/test_roundtrip.py not using absolute_import |
|
|||
16 | contrib/python-zstandard/tests/test_train_dictionary.py not using absolute_import |
|
19 | contrib/python-zstandard/tests/test_train_dictionary.py not using absolute_import | |
17 | i18n/check-translation.py not using absolute_import |
|
20 | i18n/check-translation.py not using absolute_import | |
18 | setup.py not using absolute_import |
|
21 | setup.py not using absolute_import | |
19 | tests/test-demandimport.py not using absolute_import |
|
22 | tests/test-demandimport.py not using absolute_import | |
20 |
|
23 | |||
21 | #if py3exe |
|
24 | #if py3exe | |
22 | $ hg files 'set:(**.py) - grep(pygments)' -X hgext/fsmonitor/pywatchman \ |
|
25 | $ hg files 'set:(**.py) - grep(pygments)' -X hgext/fsmonitor/pywatchman \ | |
23 | > | sed 's|\\|/|g' | xargs $PYTHON3 contrib/check-py3-compat.py \ |
|
26 | > | sed 's|\\|/|g' | xargs $PYTHON3 contrib/check-py3-compat.py \ | |
24 | > | sed 's/[0-9][0-9]*)$/*)/' |
|
27 | > | sed 's/[0-9][0-9]*)$/*)/' | |
25 | hgext/convert/transport.py: error importing: <*Error> No module named 'svn.client' (error at transport.py:*) (glob) |
|
28 | hgext/convert/transport.py: error importing: <*Error> No module named 'svn.client' (error at transport.py:*) (glob) | |
26 | hgext/fsmonitor/state.py: error importing: <SyntaxError> from __future__ imports must occur at the beginning of the file (__init__.py, line 30) (error at watchmanclient.py:*) |
|
29 | hgext/fsmonitor/state.py: error importing: <SyntaxError> from __future__ imports must occur at the beginning of the file (__init__.py, line 30) (error at watchmanclient.py:*) | |
27 | hgext/fsmonitor/watchmanclient.py: error importing: <SyntaxError> from __future__ imports must occur at the beginning of the file (__init__.py, line 30) (error at watchmanclient.py:*) |
|
30 | hgext/fsmonitor/watchmanclient.py: error importing: <SyntaxError> from __future__ imports must occur at the beginning of the file (__init__.py, line 30) (error at watchmanclient.py:*) | |
28 | mercurial/cffi/bdiff.py: error importing: <*Error> No module named 'mercurial.cffi' (error at check-py3-compat.py:*) (glob) |
|
31 | mercurial/cffi/bdiff.py: error importing: <*Error> No module named 'mercurial.cffi' (error at check-py3-compat.py:*) (glob) | |
29 | mercurial/cffi/mpatch.py: error importing: <*Error> No module named 'mercurial.cffi' (error at check-py3-compat.py:*) (glob) |
|
32 | mercurial/cffi/mpatch.py: error importing: <*Error> No module named 'mercurial.cffi' (error at check-py3-compat.py:*) (glob) | |
30 | mercurial/cffi/osutil.py: error importing: <*Error> No module named 'mercurial.cffi' (error at check-py3-compat.py:*) (glob) |
|
33 | mercurial/cffi/osutil.py: error importing: <*Error> No module named 'mercurial.cffi' (error at check-py3-compat.py:*) (glob) | |
31 | mercurial/scmwindows.py: error importing: <*Error> No module named 'msvcrt' (error at win32.py:*) (glob) |
|
34 | mercurial/scmwindows.py: error importing: <*Error> No module named 'msvcrt' (error at win32.py:*) (glob) | |
32 | mercurial/win32.py: error importing: <*Error> No module named 'msvcrt' (error at win32.py:*) (glob) |
|
35 | mercurial/win32.py: error importing: <*Error> No module named 'msvcrt' (error at win32.py:*) (glob) | |
33 | mercurial/windows.py: error importing: <*Error> No module named 'msvcrt' (error at windows.py:*) (glob) |
|
36 | mercurial/windows.py: error importing: <*Error> No module named 'msvcrt' (error at windows.py:*) (glob) | |
34 |
|
37 | |||
35 | #endif |
|
38 | #endif | |
36 |
|
39 | |||
37 | #if py3exe py3pygments |
|
40 | #if py3exe py3pygments | |
38 | $ hg files 'set:(**.py) and grep(pygments)' | sed 's|\\|/|g' \ |
|
41 | $ hg files 'set:(**.py) and grep(pygments)' | sed 's|\\|/|g' \ | |
39 | > | xargs $PYTHON3 contrib/check-py3-compat.py \ |
|
42 | > | xargs $PYTHON3 contrib/check-py3-compat.py \ | |
40 | > | sed 's/[0-9][0-9]*)$/*)/' |
|
43 | > | sed 's/[0-9][0-9]*)$/*)/' | |
41 | #endif |
|
44 | #endif |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
1 | NO CONTENT: file was removed |
|
NO CONTENT: file was removed |
General Comments 0
You need to be logged in to leave comments.
Login now