##// END OF EJS Templates
cext: add Python 3.10 support...
Victor Stinner -
r47090:e92ca942 default
parent child Browse files
Show More
@@ -0,0 +1,278 b''
1 // Header file providing new functions of the Python C API to old Python
2 // versions.
3 //
4 // File distributed under the MIT license.
5 //
6 // Homepage:
7 // https://github.com/pythoncapi/pythoncapi_compat
8 //
9 // Latest version:
10 // https://raw.githubusercontent.com/pythoncapi/pythoncapi_compat/master/pythoncapi_compat.h
11
12 #ifndef PYTHONCAPI_COMPAT
13 #define PYTHONCAPI_COMPAT
14
15 #ifdef __cplusplus
16 extern "C" {
17 #endif
18
19 #include <Python.h>
20 #include "frameobject.h" // PyFrameObject, PyFrame_GetBack()
21
22
23 // Cast argument to PyObject* type.
24 #ifndef _PyObject_CAST
25 # define _PyObject_CAST(op) ((PyObject*)(op))
26 #endif
27
28
29 // bpo-42262 added Py_NewRef() to Python 3.10.0a3
30 #if PY_VERSION_HEX < 0x030a00A3 && !defined(Py_NewRef)
31 static inline PyObject* _Py_NewRef(PyObject *obj)
32 {
33 Py_INCREF(obj);
34 return obj;
35 }
36 #define Py_NewRef(obj) _Py_NewRef(_PyObject_CAST(obj))
37 #endif
38
39
40 // bpo-42262 added Py_XNewRef() to Python 3.10.0a3
41 #if PY_VERSION_HEX < 0x030a00A3 && !defined(Py_XNewRef)
42 static inline PyObject* _Py_XNewRef(PyObject *obj)
43 {
44 Py_XINCREF(obj);
45 return obj;
46 }
47 #define Py_XNewRef(obj) _Py_XNewRef(_PyObject_CAST(obj))
48 #endif
49
50
51 // bpo-39573 added Py_SET_REFCNT() to Python 3.9.0a4
52 #if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_REFCNT)
53 static inline void _Py_SET_REFCNT(PyObject *ob, Py_ssize_t refcnt)
54 {
55 ob->ob_refcnt = refcnt;
56 }
57 #define Py_SET_REFCNT(ob, refcnt) _Py_SET_REFCNT((PyObject*)(ob), refcnt)
58 #endif
59
60
61 // bpo-39573 added Py_SET_TYPE() to Python 3.9.0a4
62 #if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_TYPE)
63 static inline void
64 _Py_SET_TYPE(PyObject *ob, PyTypeObject *type)
65 {
66 ob->ob_type = type;
67 }
68 #define Py_SET_TYPE(ob, type) _Py_SET_TYPE((PyObject*)(ob), type)
69 #endif
70
71
72 // bpo-39573 added Py_SET_SIZE() to Python 3.9.0a4
73 #if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_SIZE)
74 static inline void
75 _Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size)
76 {
77 ob->ob_size = size;
78 }
79 #define Py_SET_SIZE(ob, size) _Py_SET_SIZE((PyVarObject*)(ob), size)
80 #endif
81
82
83 // bpo-40421 added PyFrame_GetCode() to Python 3.9.0b1
84 #if PY_VERSION_HEX < 0x030900B1
85 static inline PyCodeObject*
86 PyFrame_GetCode(PyFrameObject *frame)
87 {
88 PyCodeObject *code;
89 assert(frame != NULL);
90 code = frame->f_code;
91 assert(code != NULL);
92 Py_INCREF(code);
93 return code;
94 }
95 #endif
96
97 static inline PyCodeObject*
98 _PyFrame_GetCodeBorrow(PyFrameObject *frame)
99 {
100 PyCodeObject *code = PyFrame_GetCode(frame);
101 Py_DECREF(code);
102 return code; // borrowed reference
103 }
104
105
106 // bpo-40421 added PyFrame_GetCode() to Python 3.9.0b1
107 #if PY_VERSION_HEX < 0x030900B1
108 static inline PyFrameObject*
109 PyFrame_GetBack(PyFrameObject *frame)
110 {
111 PyFrameObject *back;
112 assert(frame != NULL);
113 back = frame->f_back;
114 Py_XINCREF(back);
115 return back;
116 }
117 #endif
118
119 static inline PyFrameObject*
120 _PyFrame_GetBackBorrow(PyFrameObject *frame)
121 {
122 PyFrameObject *back = PyFrame_GetBack(frame);
123 Py_XDECREF(back);
124 return back; // borrowed reference
125 }
126
127
128 // bpo-39947 added PyThreadState_GetInterpreter() to Python 3.9.0a5
129 #if PY_VERSION_HEX < 0x030900A5
130 static inline PyInterpreterState *
131 PyThreadState_GetInterpreter(PyThreadState *tstate)
132 {
133 assert(tstate != NULL);
134 return tstate->interp;
135 }
136 #endif
137
138
139 // bpo-40429 added PyThreadState_GetFrame() to Python 3.9.0b1
140 #if PY_VERSION_HEX < 0x030900B1
141 static inline PyFrameObject*
142 PyThreadState_GetFrame(PyThreadState *tstate)
143 {
144 PyFrameObject *frame;
145 assert(tstate != NULL);
146 frame = tstate->frame;
147 Py_XINCREF(frame);
148 return frame;
149 }
150 #endif
151
152 static inline PyFrameObject*
153 _PyThreadState_GetFrameBorrow(PyThreadState *tstate)
154 {
155 PyFrameObject *frame = PyThreadState_GetFrame(tstate);
156 Py_XDECREF(frame);
157 return frame; // borrowed reference
158 }
159
160
161 // bpo-39947 added PyInterpreterState_Get() to Python 3.9.0a5
162 #if PY_VERSION_HEX < 0x030900A5
163 static inline PyInterpreterState *
164 PyInterpreterState_Get(void)
165 {
166 PyThreadState *tstate;
167 PyInterpreterState *interp;
168
169 tstate = PyThreadState_GET();
170 if (tstate == NULL) {
171 Py_FatalError("GIL released (tstate is NULL)");
172 }
173 interp = tstate->interp;
174 if (interp == NULL) {
175 Py_FatalError("no current interpreter");
176 }
177 return interp;
178 }
179 #endif
180
181
182 // bpo-39947 added PyInterpreterState_Get() to Python 3.9.0a6
183 #if 0x030700A1 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x030900A6
184 static inline uint64_t
185 PyThreadState_GetID(PyThreadState *tstate)
186 {
187 assert(tstate != NULL);
188 return tstate->id;
189 }
190 #endif
191
192
193 // bpo-37194 added PyObject_CallNoArgs() to Python 3.9.0a1
194 #if PY_VERSION_HEX < 0x030900A1
195 static inline PyObject*
196 PyObject_CallNoArgs(PyObject *func)
197 {
198 return PyObject_CallFunctionObjArgs(func, NULL);
199 }
200 #endif
201
202
203 // bpo-39245 made PyObject_CallOneArg() public (previously called
204 // _PyObject_CallOneArg) in Python 3.9.0a4
205 #if PY_VERSION_HEX < 0x030900A4
206 static inline PyObject*
207 PyObject_CallOneArg(PyObject *func, PyObject *arg)
208 {
209 return PyObject_CallFunctionObjArgs(func, arg, NULL);
210 }
211 #endif
212
213
214 // bpo-40024 added PyModule_AddType() to Python 3.9.0a5
215 #if PY_VERSION_HEX < 0x030900A5
216 static inline int
217 PyModule_AddType(PyObject *module, PyTypeObject *type)
218 {
219 const char *name, *dot;
220
221 if (PyType_Ready(type) < 0) {
222 return -1;
223 }
224
225 // inline _PyType_Name()
226 name = type->tp_name;
227 assert(name != NULL);
228 dot = strrchr(name, '.');
229 if (dot != NULL) {
230 name = dot + 1;
231 }
232
233 Py_INCREF(type);
234 if (PyModule_AddObject(module, name, (PyObject *)type) < 0) {
235 Py_DECREF(type);
236 return -1;
237 }
238
239 return 0;
240 }
241 #endif
242
243
244 // bpo-40241 added PyObject_GC_IsTracked() to Python 3.9.0a6.
245 // bpo-4688 added _PyObject_GC_IS_TRACKED() to Python 2.7.0a2.
246 #if PY_VERSION_HEX < 0x030900A6
247 static inline int
248 PyObject_GC_IsTracked(PyObject* obj)
249 {
250 return (PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj));
251 }
252 #endif
253
254 // bpo-40241 added PyObject_GC_IsFinalized() to Python 3.9.0a6.
255 // bpo-18112 added _PyGCHead_FINALIZED() to Python 3.4.0 final.
256 #if PY_VERSION_HEX < 0x030900A6 && PY_VERSION_HEX >= 0x030400F0
257 static inline int
258 PyObject_GC_IsFinalized(PyObject *obj)
259 {
260 return (PyObject_IS_GC(obj) && _PyGCHead_FINALIZED((PyGC_Head *)(obj)-1));
261 }
262 #endif
263
264
265 // bpo-39573 added Py_IS_TYPE() to Python 3.9.0a4
266 #if PY_VERSION_HEX < 0x030900A4 && !defined(Py_IS_TYPE)
267 static inline int
268 _Py_IS_TYPE(const PyObject *ob, const PyTypeObject *type) {
269 return ob->ob_type == type;
270 }
271 #define Py_IS_TYPE(ob, type) _Py_IS_TYPE((const PyObject*)(ob), type)
272 #endif
273
274
275 #ifdef __cplusplus
276 }
277 #endif
278 #endif // PYTHONCAPI_COMPAT
@@ -0,0 +1,278 b''
1 // Header file providing new functions of the Python C API to old Python
2 // versions.
3 //
4 // File distributed under the MIT license.
5 //
6 // Homepage:
7 // https://github.com/pythoncapi/pythoncapi_compat
8 //
9 // Latest version:
10 // https://raw.githubusercontent.com/pythoncapi/pythoncapi_compat/master/pythoncapi_compat.h
11
12 #ifndef PYTHONCAPI_COMPAT
13 #define PYTHONCAPI_COMPAT
14
15 #ifdef __cplusplus
16 extern "C" {
17 #endif
18
19 #include <Python.h>
20 #include "frameobject.h" // PyFrameObject, PyFrame_GetBack()
21
22
23 // Cast argument to PyObject* type.
24 #ifndef _PyObject_CAST
25 # define _PyObject_CAST(op) ((PyObject*)(op))
26 #endif
27
28
29 // bpo-42262 added Py_NewRef() to Python 3.10.0a3
30 #if PY_VERSION_HEX < 0x030a00A3 && !defined(Py_NewRef)
31 static inline PyObject* _Py_NewRef(PyObject *obj)
32 {
33 Py_INCREF(obj);
34 return obj;
35 }
36 #define Py_NewRef(obj) _Py_NewRef(_PyObject_CAST(obj))
37 #endif
38
39
40 // bpo-42262 added Py_XNewRef() to Python 3.10.0a3
41 #if PY_VERSION_HEX < 0x030a00A3 && !defined(Py_XNewRef)
42 static inline PyObject* _Py_XNewRef(PyObject *obj)
43 {
44 Py_XINCREF(obj);
45 return obj;
46 }
47 #define Py_XNewRef(obj) _Py_XNewRef(_PyObject_CAST(obj))
48 #endif
49
50
51 // bpo-39573 added Py_SET_REFCNT() to Python 3.9.0a4
52 #if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_REFCNT)
53 static inline void _Py_SET_REFCNT(PyObject *ob, Py_ssize_t refcnt)
54 {
55 ob->ob_refcnt = refcnt;
56 }
57 #define Py_SET_REFCNT(ob, refcnt) _Py_SET_REFCNT((PyObject*)(ob), refcnt)
58 #endif
59
60
61 // bpo-39573 added Py_SET_TYPE() to Python 3.9.0a4
62 #if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_TYPE)
63 static inline void
64 _Py_SET_TYPE(PyObject *ob, PyTypeObject *type)
65 {
66 ob->ob_type = type;
67 }
68 #define Py_SET_TYPE(ob, type) _Py_SET_TYPE((PyObject*)(ob), type)
69 #endif
70
71
72 // bpo-39573 added Py_SET_SIZE() to Python 3.9.0a4
73 #if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_SIZE)
74 static inline void
75 _Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size)
76 {
77 ob->ob_size = size;
78 }
79 #define Py_SET_SIZE(ob, size) _Py_SET_SIZE((PyVarObject*)(ob), size)
80 #endif
81
82
83 // bpo-40421 added PyFrame_GetCode() to Python 3.9.0b1
84 #if PY_VERSION_HEX < 0x030900B1
85 static inline PyCodeObject*
86 PyFrame_GetCode(PyFrameObject *frame)
87 {
88 PyCodeObject *code;
89 assert(frame != NULL);
90 code = frame->f_code;
91 assert(code != NULL);
92 Py_INCREF(code);
93 return code;
94 }
95 #endif
96
97 static inline PyCodeObject*
98 _PyFrame_GetCodeBorrow(PyFrameObject *frame)
99 {
100 PyCodeObject *code = PyFrame_GetCode(frame);
101 Py_DECREF(code);
102 return code; // borrowed reference
103 }
104
105
106 // bpo-40421 added PyFrame_GetCode() to Python 3.9.0b1
107 #if PY_VERSION_HEX < 0x030900B1
108 static inline PyFrameObject*
109 PyFrame_GetBack(PyFrameObject *frame)
110 {
111 PyFrameObject *back;
112 assert(frame != NULL);
113 back = frame->f_back;
114 Py_XINCREF(back);
115 return back;
116 }
117 #endif
118
119 static inline PyFrameObject*
120 _PyFrame_GetBackBorrow(PyFrameObject *frame)
121 {
122 PyFrameObject *back = PyFrame_GetBack(frame);
123 Py_XDECREF(back);
124 return back; // borrowed reference
125 }
126
127
128 // bpo-39947 added PyThreadState_GetInterpreter() to Python 3.9.0a5
129 #if PY_VERSION_HEX < 0x030900A5
130 static inline PyInterpreterState *
131 PyThreadState_GetInterpreter(PyThreadState *tstate)
132 {
133 assert(tstate != NULL);
134 return tstate->interp;
135 }
136 #endif
137
138
139 // bpo-40429 added PyThreadState_GetFrame() to Python 3.9.0b1
140 #if PY_VERSION_HEX < 0x030900B1
141 static inline PyFrameObject*
142 PyThreadState_GetFrame(PyThreadState *tstate)
143 {
144 PyFrameObject *frame;
145 assert(tstate != NULL);
146 frame = tstate->frame;
147 Py_XINCREF(frame);
148 return frame;
149 }
150 #endif
151
152 static inline PyFrameObject*
153 _PyThreadState_GetFrameBorrow(PyThreadState *tstate)
154 {
155 PyFrameObject *frame = PyThreadState_GetFrame(tstate);
156 Py_XDECREF(frame);
157 return frame; // borrowed reference
158 }
159
160
161 // bpo-39947 added PyInterpreterState_Get() to Python 3.9.0a5
162 #if PY_VERSION_HEX < 0x030900A5
163 static inline PyInterpreterState *
164 PyInterpreterState_Get(void)
165 {
166 PyThreadState *tstate;
167 PyInterpreterState *interp;
168
169 tstate = PyThreadState_GET();
170 if (tstate == NULL) {
171 Py_FatalError("GIL released (tstate is NULL)");
172 }
173 interp = tstate->interp;
174 if (interp == NULL) {
175 Py_FatalError("no current interpreter");
176 }
177 return interp;
178 }
179 #endif
180
181
182 // bpo-39947 added PyInterpreterState_Get() to Python 3.9.0a6
183 #if 0x030700A1 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x030900A6
184 static inline uint64_t
185 PyThreadState_GetID(PyThreadState *tstate)
186 {
187 assert(tstate != NULL);
188 return tstate->id;
189 }
190 #endif
191
192
193 // bpo-37194 added PyObject_CallNoArgs() to Python 3.9.0a1
194 #if PY_VERSION_HEX < 0x030900A1
195 static inline PyObject*
196 PyObject_CallNoArgs(PyObject *func)
197 {
198 return PyObject_CallFunctionObjArgs(func, NULL);
199 }
200 #endif
201
202
203 // bpo-39245 made PyObject_CallOneArg() public (previously called
204 // _PyObject_CallOneArg) in Python 3.9.0a4
205 #if PY_VERSION_HEX < 0x030900A4
206 static inline PyObject*
207 PyObject_CallOneArg(PyObject *func, PyObject *arg)
208 {
209 return PyObject_CallFunctionObjArgs(func, arg, NULL);
210 }
211 #endif
212
213
214 // bpo-40024 added PyModule_AddType() to Python 3.9.0a5
215 #if PY_VERSION_HEX < 0x030900A5
216 static inline int
217 PyModule_AddType(PyObject *module, PyTypeObject *type)
218 {
219 const char *name, *dot;
220
221 if (PyType_Ready(type) < 0) {
222 return -1;
223 }
224
225 // inline _PyType_Name()
226 name = type->tp_name;
227 assert(name != NULL);
228 dot = strrchr(name, '.');
229 if (dot != NULL) {
230 name = dot + 1;
231 }
232
233 Py_INCREF(type);
234 if (PyModule_AddObject(module, name, (PyObject *)type) < 0) {
235 Py_DECREF(type);
236 return -1;
237 }
238
239 return 0;
240 }
241 #endif
242
243
244 // bpo-40241 added PyObject_GC_IsTracked() to Python 3.9.0a6.
245 // bpo-4688 added _PyObject_GC_IS_TRACKED() to Python 2.7.0a2.
246 #if PY_VERSION_HEX < 0x030900A6
247 static inline int
248 PyObject_GC_IsTracked(PyObject* obj)
249 {
250 return (PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj));
251 }
252 #endif
253
254 // bpo-40241 added PyObject_GC_IsFinalized() to Python 3.9.0a6.
255 // bpo-18112 added _PyGCHead_FINALIZED() to Python 3.4.0 final.
256 #if PY_VERSION_HEX < 0x030900A6 && PY_VERSION_HEX >= 0x030400F0
257 static inline int
258 PyObject_GC_IsFinalized(PyObject *obj)
259 {
260 return (PyObject_IS_GC(obj) && _PyGCHead_FINALIZED((PyGC_Head *)(obj)-1));
261 }
262 #endif
263
264
265 // bpo-39573 added Py_IS_TYPE() to Python 3.9.0a4
266 #if PY_VERSION_HEX < 0x030900A4 && !defined(Py_IS_TYPE)
267 static inline int
268 _Py_IS_TYPE(const PyObject *ob, const PyTypeObject *type) {
269 return ob->ob_type == type;
270 }
271 #define Py_IS_TYPE(ob, type) _Py_IS_TYPE((const PyObject*)(ob), type)
272 #endif
273
274
275 #ifdef __cplusplus
276 }
277 #endif
278 #endif // PYTHONCAPI_COMPAT
@@ -1,11 +1,12 b''
1 1 # Files that just need to be migrated to the formatter.
2 2 # Do not add new files here!
3 3 mercurial/cext/manifest.c
4 4 mercurial/cext/osutil.c
5 5 # Vendored code that we should never format:
6 6 syntax: glob
7 7 contrib/python-zstandard/**.c
8 8 contrib/python-zstandard/**.h
9 9 hgext/fsmonitor/pywatchman/**.c
10 10 mercurial/thirdparty/**.c
11 11 mercurial/thirdparty/**.h
12 mercurial/pythoncapi_compat.h
@@ -1,792 +1,792 b''
1 1 /**
2 2 * Copyright (c) 2017-present, Gregory Szorc
3 3 * All rights reserved.
4 4 *
5 5 * This software may be modified and distributed under the terms
6 6 * of the BSD license. See the LICENSE file for details.
7 7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 11 extern PyObject* ZstdError;
12 12
13 13 PyDoc_STRVAR(BufferWithSegments__doc__,
14 14 "BufferWithSegments - A memory buffer holding known sub-segments.\n"
15 15 "\n"
16 16 "This type represents a contiguous chunk of memory containing N discrete\n"
17 17 "items within sub-segments of that memory.\n"
18 18 "\n"
19 19 "Segments within the buffer are stored as an array of\n"
20 20 "``(offset, length)`` pairs, where each element is an unsigned 64-bit\n"
21 21 "integer using the host/native bit order representation.\n"
22 22 "\n"
23 23 "The type exists to facilitate operations against N>1 items without the\n"
24 24 "overhead of Python object creation and management.\n"
25 25 );
26 26
27 27 static void BufferWithSegments_dealloc(ZstdBufferWithSegments* self) {
28 28 /* Backing memory is either canonically owned by a Py_buffer or by us. */
29 29 if (self->parent.buf) {
30 30 PyBuffer_Release(&self->parent);
31 31 }
32 32 else if (self->useFree) {
33 33 free(self->data);
34 34 }
35 35 else {
36 36 PyMem_Free(self->data);
37 37 }
38 38
39 39 self->data = NULL;
40 40
41 41 if (self->useFree) {
42 42 free(self->segments);
43 43 }
44 44 else {
45 45 PyMem_Free(self->segments);
46 46 }
47 47
48 48 self->segments = NULL;
49 49
50 50 PyObject_Del(self);
51 51 }
52 52
53 53 static int BufferWithSegments_init(ZstdBufferWithSegments* self, PyObject* args, PyObject* kwargs) {
54 54 static char* kwlist[] = {
55 55 "data",
56 56 "segments",
57 57 NULL
58 58 };
59 59
60 60 Py_buffer segments;
61 61 Py_ssize_t segmentCount;
62 62 Py_ssize_t i;
63 63
64 64 memset(&self->parent, 0, sizeof(self->parent));
65 65
66 66 #if PY_MAJOR_VERSION >= 3
67 67 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*y*:BufferWithSegments",
68 68 #else
69 69 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*s*:BufferWithSegments",
70 70 #endif
71 71 kwlist, &self->parent, &segments)) {
72 72 return -1;
73 73 }
74 74
75 75 if (!PyBuffer_IsContiguous(&self->parent, 'C') || self->parent.ndim > 1) {
76 76 PyErr_SetString(PyExc_ValueError, "data buffer should be contiguous and have a single dimension");
77 77 goto except;
78 78 }
79 79
80 80 if (!PyBuffer_IsContiguous(&segments, 'C') || segments.ndim > 1) {
81 81 PyErr_SetString(PyExc_ValueError, "segments buffer should be contiguous and have a single dimension");
82 82 goto except;
83 83 }
84 84
85 85 if (segments.len % sizeof(BufferSegment)) {
86 86 PyErr_Format(PyExc_ValueError, "segments array size is not a multiple of %zu",
87 87 sizeof(BufferSegment));
88 88 goto except;
89 89 }
90 90
91 91 segmentCount = segments.len / sizeof(BufferSegment);
92 92
93 93 /* Validate segments data, as blindly trusting it could lead to arbitrary
94 94 memory access. */
95 95 for (i = 0; i < segmentCount; i++) {
96 96 BufferSegment* segment = &((BufferSegment*)(segments.buf))[i];
97 97
98 98 if (segment->offset + segment->length > (unsigned long long)self->parent.len) {
99 99 PyErr_SetString(PyExc_ValueError, "offset within segments array references memory outside buffer");
100 100 goto except;
101 101 return -1;
102 102 }
103 103 }
104 104
105 105 /* Make a copy of the segments data. It is cheap to do so and is a guard
106 106 against caller changing offsets, which has security implications. */
107 107 self->segments = PyMem_Malloc(segments.len);
108 108 if (!self->segments) {
109 109 PyErr_NoMemory();
110 110 goto except;
111 111 }
112 112
113 113 memcpy(self->segments, segments.buf, segments.len);
114 114 PyBuffer_Release(&segments);
115 115
116 116 self->data = self->parent.buf;
117 117 self->dataSize = self->parent.len;
118 118 self->segmentCount = segmentCount;
119 119
120 120 return 0;
121 121
122 122 except:
123 123 PyBuffer_Release(&self->parent);
124 124 PyBuffer_Release(&segments);
125 125 return -1;
126 126 }
127 127
128 128 /**
129 129 * Construct a BufferWithSegments from existing memory and offsets.
130 130 *
131 131 * Ownership of the backing memory and BufferSegments will be transferred to
132 132 * the created object and freed when the BufferWithSegments is destroyed.
133 133 */
134 134 ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize,
135 135 BufferSegment* segments, Py_ssize_t segmentsSize) {
136 136 ZstdBufferWithSegments* result = NULL;
137 137 Py_ssize_t i;
138 138
139 139 if (NULL == data) {
140 140 PyErr_SetString(PyExc_ValueError, "data is NULL");
141 141 return NULL;
142 142 }
143 143
144 144 if (NULL == segments) {
145 145 PyErr_SetString(PyExc_ValueError, "segments is NULL");
146 146 return NULL;
147 147 }
148 148
149 149 for (i = 0; i < segmentsSize; i++) {
150 150 BufferSegment* segment = &segments[i];
151 151
152 152 if (segment->offset + segment->length > dataSize) {
153 153 PyErr_SetString(PyExc_ValueError, "offset in segments overflows buffer size");
154 154 return NULL;
155 155 }
156 156 }
157 157
158 158 result = PyObject_New(ZstdBufferWithSegments, &ZstdBufferWithSegmentsType);
159 159 if (NULL == result) {
160 160 return NULL;
161 161 }
162 162
163 163 result->useFree = 0;
164 164
165 165 memset(&result->parent, 0, sizeof(result->parent));
166 166 result->data = data;
167 167 result->dataSize = dataSize;
168 168 result->segments = segments;
169 169 result->segmentCount = segmentsSize;
170 170
171 171 return result;
172 172 }
173 173
174 174 static Py_ssize_t BufferWithSegments_length(ZstdBufferWithSegments* self) {
175 175 return self->segmentCount;
176 176 }
177 177
178 178 static ZstdBufferSegment* BufferWithSegments_item(ZstdBufferWithSegments* self, Py_ssize_t i) {
179 179 ZstdBufferSegment* result = NULL;
180 180
181 181 if (i < 0) {
182 182 PyErr_SetString(PyExc_IndexError, "offset must be non-negative");
183 183 return NULL;
184 184 }
185 185
186 186 if (i >= self->segmentCount) {
187 187 PyErr_Format(PyExc_IndexError, "offset must be less than %zd", self->segmentCount);
188 188 return NULL;
189 189 }
190 190
191 191 if (self->segments[i].length > PY_SSIZE_T_MAX) {
192 192 PyErr_Format(PyExc_ValueError,
193 193 "item at offset %zd is too large for this platform", i);
194 194 return NULL;
195 195 }
196 196
197 197 result = (ZstdBufferSegment*)PyObject_CallObject((PyObject*)&ZstdBufferSegmentType, NULL);
198 198 if (NULL == result) {
199 199 return NULL;
200 200 }
201 201
202 202 result->parent = (PyObject*)self;
203 203 Py_INCREF(self);
204 204
205 205 result->data = (char*)self->data + self->segments[i].offset;
206 206 result->dataSize = (Py_ssize_t)self->segments[i].length;
207 207 result->offset = self->segments[i].offset;
208 208
209 209 return result;
210 210 }
211 211
212 212 #if PY_MAJOR_VERSION >= 3
213 213 static int BufferWithSegments_getbuffer(ZstdBufferWithSegments* self, Py_buffer* view, int flags) {
214 214 if (self->dataSize > PY_SSIZE_T_MAX) {
215 215 view->obj = NULL;
216 216 PyErr_SetString(PyExc_BufferError, "buffer is too large for this platform");
217 217 return -1;
218 218 }
219 219
220 220 return PyBuffer_FillInfo(view, (PyObject*)self, self->data, (Py_ssize_t)self->dataSize, 1, flags);
221 221 }
222 222 #else
223 223 static Py_ssize_t BufferWithSegments_getreadbuffer(ZstdBufferWithSegments* self, Py_ssize_t segment, void **ptrptr) {
224 224 if (segment != 0) {
225 225 PyErr_SetString(PyExc_ValueError, "segment number must be 0");
226 226 return -1;
227 227 }
228 228
229 229 if (self->dataSize > PY_SSIZE_T_MAX) {
230 230 PyErr_SetString(PyExc_ValueError, "buffer is too large for this platform");
231 231 return -1;
232 232 }
233 233
234 234 *ptrptr = self->data;
235 235 return (Py_ssize_t)self->dataSize;
236 236 }
237 237
238 238 static Py_ssize_t BufferWithSegments_getsegcount(ZstdBufferWithSegments* self, Py_ssize_t* len) {
239 239 if (len) {
240 240 *len = 1;
241 241 }
242 242
243 243 return 1;
244 244 }
245 245 #endif
246 246
247 247 PyDoc_STRVAR(BufferWithSegments_tobytes__doc__,
248 248 "Obtain a bytes instance for this buffer.\n"
249 249 );
250 250
251 251 static PyObject* BufferWithSegments_tobytes(ZstdBufferWithSegments* self) {
252 252 if (self->dataSize > PY_SSIZE_T_MAX) {
253 253 PyErr_SetString(PyExc_ValueError, "buffer is too large for this platform");
254 254 return NULL;
255 255 }
256 256
257 257 return PyBytes_FromStringAndSize(self->data, (Py_ssize_t)self->dataSize);
258 258 }
259 259
260 260 PyDoc_STRVAR(BufferWithSegments_segments__doc__,
261 261 "Obtain a BufferSegments describing segments in this sintance.\n"
262 262 );
263 263
264 264 static ZstdBufferSegments* BufferWithSegments_segments(ZstdBufferWithSegments* self) {
265 265 ZstdBufferSegments* result = (ZstdBufferSegments*)PyObject_CallObject((PyObject*)&ZstdBufferSegmentsType, NULL);
266 266 if (NULL == result) {
267 267 return NULL;
268 268 }
269 269
270 270 result->parent = (PyObject*)self;
271 271 Py_INCREF(self);
272 272 result->segments = self->segments;
273 273 result->segmentCount = self->segmentCount;
274 274
275 275 return result;
276 276 }
277 277
278 278 static PySequenceMethods BufferWithSegments_sq = {
279 279 (lenfunc)BufferWithSegments_length, /* sq_length */
280 280 0, /* sq_concat */
281 281 0, /* sq_repeat */
282 282 (ssizeargfunc)BufferWithSegments_item, /* sq_item */
283 283 0, /* sq_ass_item */
284 284 0, /* sq_contains */
285 285 0, /* sq_inplace_concat */
286 286 0 /* sq_inplace_repeat */
287 287 };
288 288
289 289 static PyBufferProcs BufferWithSegments_as_buffer = {
290 290 #if PY_MAJOR_VERSION >= 3
291 291 (getbufferproc)BufferWithSegments_getbuffer, /* bf_getbuffer */
292 292 0 /* bf_releasebuffer */
293 293 #else
294 294 (readbufferproc)BufferWithSegments_getreadbuffer, /* bf_getreadbuffer */
295 295 0, /* bf_getwritebuffer */
296 296 (segcountproc)BufferWithSegments_getsegcount, /* bf_getsegcount */
297 297 0 /* bf_getcharbuffer */
298 298 #endif
299 299 };
300 300
301 301 static PyMethodDef BufferWithSegments_methods[] = {
302 302 { "segments", (PyCFunction)BufferWithSegments_segments,
303 303 METH_NOARGS, BufferWithSegments_segments__doc__ },
304 304 { "tobytes", (PyCFunction)BufferWithSegments_tobytes,
305 305 METH_NOARGS, BufferWithSegments_tobytes__doc__ },
306 306 { NULL, NULL }
307 307 };
308 308
309 309 static PyMemberDef BufferWithSegments_members[] = {
310 310 { "size", T_ULONGLONG, offsetof(ZstdBufferWithSegments, dataSize),
311 311 READONLY, "total size of the buffer in bytes" },
312 312 { NULL }
313 313 };
314 314
315 315 PyTypeObject ZstdBufferWithSegmentsType = {
316 316 PyVarObject_HEAD_INIT(NULL, 0)
317 317 "zstd.BufferWithSegments", /* tp_name */
318 318 sizeof(ZstdBufferWithSegments),/* tp_basicsize */
319 319 0, /* tp_itemsize */
320 320 (destructor)BufferWithSegments_dealloc, /* tp_dealloc */
321 321 0, /* tp_print */
322 322 0, /* tp_getattr */
323 323 0, /* tp_setattr */
324 324 0, /* tp_compare */
325 325 0, /* tp_repr */
326 326 0, /* tp_as_number */
327 327 &BufferWithSegments_sq, /* tp_as_sequence */
328 328 0, /* tp_as_mapping */
329 329 0, /* tp_hash */
330 330 0, /* tp_call */
331 331 0, /* tp_str */
332 332 0, /* tp_getattro */
333 333 0, /* tp_setattro */
334 334 &BufferWithSegments_as_buffer, /* tp_as_buffer */
335 335 Py_TPFLAGS_DEFAULT, /* tp_flags */
336 336 BufferWithSegments__doc__, /* tp_doc */
337 337 0, /* tp_traverse */
338 338 0, /* tp_clear */
339 339 0, /* tp_richcompare */
340 340 0, /* tp_weaklistoffset */
341 341 0, /* tp_iter */
342 342 0, /* tp_iternext */
343 343 BufferWithSegments_methods, /* tp_methods */
344 344 BufferWithSegments_members, /* tp_members */
345 345 0, /* tp_getset */
346 346 0, /* tp_base */
347 347 0, /* tp_dict */
348 348 0, /* tp_descr_get */
349 349 0, /* tp_descr_set */
350 350 0, /* tp_dictoffset */
351 351 (initproc)BufferWithSegments_init, /* tp_init */
352 352 0, /* tp_alloc */
353 353 PyType_GenericNew, /* tp_new */
354 354 };
355 355
356 356 PyDoc_STRVAR(BufferSegments__doc__,
357 357 "BufferSegments - Represents segments/offsets within a BufferWithSegments\n"
358 358 );
359 359
360 360 static void BufferSegments_dealloc(ZstdBufferSegments* self) {
361 361 Py_CLEAR(self->parent);
362 362 PyObject_Del(self);
363 363 }
364 364
365 365 #if PY_MAJOR_VERSION >= 3
366 366 static int BufferSegments_getbuffer(ZstdBufferSegments* self, Py_buffer* view, int flags) {
367 367 return PyBuffer_FillInfo(view, (PyObject*)self,
368 368 (void*)self->segments, self->segmentCount * sizeof(BufferSegment),
369 369 1, flags);
370 370 }
371 371 #else
372 372 static Py_ssize_t BufferSegments_getreadbuffer(ZstdBufferSegments* self, Py_ssize_t segment, void **ptrptr) {
373 373 if (segment != 0) {
374 374 PyErr_SetString(PyExc_ValueError, "segment number must be 0");
375 375 return -1;
376 376 }
377 377
378 378 *ptrptr = (void*)self->segments;
379 379 return self->segmentCount * sizeof(BufferSegment);
380 380 }
381 381
382 382 static Py_ssize_t BufferSegments_getsegcount(ZstdBufferSegments* self, Py_ssize_t* len) {
383 383 if (len) {
384 384 *len = 1;
385 385 }
386 386
387 387 return 1;
388 388 }
389 389 #endif
390 390
391 391 static PyBufferProcs BufferSegments_as_buffer = {
392 392 #if PY_MAJOR_VERSION >= 3
393 393 (getbufferproc)BufferSegments_getbuffer,
394 394 0
395 395 #else
396 396 (readbufferproc)BufferSegments_getreadbuffer,
397 397 0,
398 398 (segcountproc)BufferSegments_getsegcount,
399 399 0
400 400 #endif
401 401 };
402 402
403 403 PyTypeObject ZstdBufferSegmentsType = {
404 404 PyVarObject_HEAD_INIT(NULL, 0)
405 405 "zstd.BufferSegments", /* tp_name */
406 406 sizeof(ZstdBufferSegments),/* tp_basicsize */
407 407 0, /* tp_itemsize */
408 408 (destructor)BufferSegments_dealloc, /* tp_dealloc */
409 409 0, /* tp_print */
410 410 0, /* tp_getattr */
411 411 0, /* tp_setattr */
412 412 0, /* tp_compare */
413 413 0, /* tp_repr */
414 414 0, /* tp_as_number */
415 415 0, /* tp_as_sequence */
416 416 0, /* tp_as_mapping */
417 417 0, /* tp_hash */
418 418 0, /* tp_call */
419 419 0, /* tp_str */
420 420 0, /* tp_getattro */
421 421 0, /* tp_setattro */
422 422 &BufferSegments_as_buffer, /* tp_as_buffer */
423 423 Py_TPFLAGS_DEFAULT, /* tp_flags */
424 424 BufferSegments__doc__, /* tp_doc */
425 425 0, /* tp_traverse */
426 426 0, /* tp_clear */
427 427 0, /* tp_richcompare */
428 428 0, /* tp_weaklistoffset */
429 429 0, /* tp_iter */
430 430 0, /* tp_iternext */
431 431 0, /* tp_methods */
432 432 0, /* tp_members */
433 433 0, /* tp_getset */
434 434 0, /* tp_base */
435 435 0, /* tp_dict */
436 436 0, /* tp_descr_get */
437 437 0, /* tp_descr_set */
438 438 0, /* tp_dictoffset */
439 439 0, /* tp_init */
440 440 0, /* tp_alloc */
441 441 PyType_GenericNew, /* tp_new */
442 442 };
443 443
444 444 PyDoc_STRVAR(BufferSegment__doc__,
445 445 "BufferSegment - Represents a segment within a BufferWithSegments\n"
446 446 );
447 447
448 448 static void BufferSegment_dealloc(ZstdBufferSegment* self) {
449 449 Py_CLEAR(self->parent);
450 450 PyObject_Del(self);
451 451 }
452 452
453 453 static Py_ssize_t BufferSegment_length(ZstdBufferSegment* self) {
454 454 return self->dataSize;
455 455 }
456 456
457 457 #if PY_MAJOR_VERSION >= 3
458 458 static int BufferSegment_getbuffer(ZstdBufferSegment* self, Py_buffer* view, int flags) {
459 459 return PyBuffer_FillInfo(view, (PyObject*)self,
460 460 self->data, self->dataSize, 1, flags);
461 461 }
462 462 #else
463 463 static Py_ssize_t BufferSegment_getreadbuffer(ZstdBufferSegment* self, Py_ssize_t segment, void **ptrptr) {
464 464 if (segment != 0) {
465 465 PyErr_SetString(PyExc_ValueError, "segment number must be 0");
466 466 return -1;
467 467 }
468 468
469 469 *ptrptr = self->data;
470 470 return self->dataSize;
471 471 }
472 472
473 473 static Py_ssize_t BufferSegment_getsegcount(ZstdBufferSegment* self, Py_ssize_t* len) {
474 474 if (len) {
475 475 *len = 1;
476 476 }
477 477
478 478 return 1;
479 479 }
480 480 #endif
481 481
482 482 PyDoc_STRVAR(BufferSegment_tobytes__doc__,
483 483 "Obtain a bytes instance for this segment.\n"
484 484 );
485 485
486 486 static PyObject* BufferSegment_tobytes(ZstdBufferSegment* self) {
487 487 return PyBytes_FromStringAndSize(self->data, self->dataSize);
488 488 }
489 489
490 490 static PySequenceMethods BufferSegment_sq = {
491 491 (lenfunc)BufferSegment_length, /* sq_length */
492 492 0, /* sq_concat */
493 493 0, /* sq_repeat */
494 494 0, /* sq_item */
495 495 0, /* sq_ass_item */
496 496 0, /* sq_contains */
497 497 0, /* sq_inplace_concat */
498 498 0 /* sq_inplace_repeat */
499 499 };
500 500
501 501 static PyBufferProcs BufferSegment_as_buffer = {
502 502 #if PY_MAJOR_VERSION >= 3
503 503 (getbufferproc)BufferSegment_getbuffer,
504 504 0
505 505 #else
506 506 (readbufferproc)BufferSegment_getreadbuffer,
507 507 0,
508 508 (segcountproc)BufferSegment_getsegcount,
509 509 0
510 510 #endif
511 511 };
512 512
513 513 static PyMethodDef BufferSegment_methods[] = {
514 514 { "tobytes", (PyCFunction)BufferSegment_tobytes,
515 515 METH_NOARGS, BufferSegment_tobytes__doc__ },
516 516 { NULL, NULL }
517 517 };
518 518
519 519 static PyMemberDef BufferSegment_members[] = {
520 520 { "offset", T_ULONGLONG, offsetof(ZstdBufferSegment, offset), READONLY,
521 521 "offset of segment within parent buffer" },
522 522 { NULL }
523 523 };
524 524
525 525 PyTypeObject ZstdBufferSegmentType = {
526 526 PyVarObject_HEAD_INIT(NULL, 0)
527 527 "zstd.BufferSegment", /* tp_name */
528 528 sizeof(ZstdBufferSegment),/* tp_basicsize */
529 529 0, /* tp_itemsize */
530 530 (destructor)BufferSegment_dealloc, /* tp_dealloc */
531 531 0, /* tp_print */
532 532 0, /* tp_getattr */
533 533 0, /* tp_setattr */
534 534 0, /* tp_compare */
535 535 0, /* tp_repr */
536 536 0, /* tp_as_number */
537 537 &BufferSegment_sq, /* tp_as_sequence */
538 538 0, /* tp_as_mapping */
539 539 0, /* tp_hash */
540 540 0, /* tp_call */
541 541 0, /* tp_str */
542 542 0, /* tp_getattro */
543 543 0, /* tp_setattro */
544 544 &BufferSegment_as_buffer, /* tp_as_buffer */
545 545 Py_TPFLAGS_DEFAULT, /* tp_flags */
546 546 BufferSegment__doc__, /* tp_doc */
547 547 0, /* tp_traverse */
548 548 0, /* tp_clear */
549 549 0, /* tp_richcompare */
550 550 0, /* tp_weaklistoffset */
551 551 0, /* tp_iter */
552 552 0, /* tp_iternext */
553 553 BufferSegment_methods, /* tp_methods */
554 554 BufferSegment_members, /* tp_members */
555 555 0, /* tp_getset */
556 556 0, /* tp_base */
557 557 0, /* tp_dict */
558 558 0, /* tp_descr_get */
559 559 0, /* tp_descr_set */
560 560 0, /* tp_dictoffset */
561 561 0, /* tp_init */
562 562 0, /* tp_alloc */
563 563 PyType_GenericNew, /* tp_new */
564 564 };
565 565
566 566 PyDoc_STRVAR(BufferWithSegmentsCollection__doc__,
567 567 "Represents a collection of BufferWithSegments.\n"
568 568 );
569 569
570 570 static void BufferWithSegmentsCollection_dealloc(ZstdBufferWithSegmentsCollection* self) {
571 571 Py_ssize_t i;
572 572
573 573 if (self->firstElements) {
574 574 PyMem_Free(self->firstElements);
575 575 self->firstElements = NULL;
576 576 }
577 577
578 578 if (self->buffers) {
579 579 for (i = 0; i < self->bufferCount; i++) {
580 580 Py_CLEAR(self->buffers[i]);
581 581 }
582 582
583 583 PyMem_Free(self->buffers);
584 584 self->buffers = NULL;
585 585 }
586 586
587 587 PyObject_Del(self);
588 588 }
589 589
590 590 static int BufferWithSegmentsCollection_init(ZstdBufferWithSegmentsCollection* self, PyObject* args) {
591 591 Py_ssize_t size;
592 592 Py_ssize_t i;
593 593 Py_ssize_t offset = 0;
594 594
595 595 size = PyTuple_Size(args);
596 596 if (-1 == size) {
597 597 return -1;
598 598 }
599 599
600 600 if (0 == size) {
601 601 PyErr_SetString(PyExc_ValueError, "must pass at least 1 argument");
602 602 return -1;
603 603 }
604 604
605 605 for (i = 0; i < size; i++) {
606 606 PyObject* item = PyTuple_GET_ITEM(args, i);
607 607 if (!PyObject_TypeCheck(item, &ZstdBufferWithSegmentsType)) {
608 608 PyErr_SetString(PyExc_TypeError, "arguments must be BufferWithSegments instances");
609 609 return -1;
610 610 }
611 611
612 612 if (0 == ((ZstdBufferWithSegments*)item)->segmentCount ||
613 613 0 == ((ZstdBufferWithSegments*)item)->dataSize) {
614 614 PyErr_SetString(PyExc_ValueError, "ZstdBufferWithSegments cannot be empty");
615 615 return -1;
616 616 }
617 617 }
618 618
619 619 self->buffers = PyMem_Malloc(size * sizeof(ZstdBufferWithSegments*));
620 620 if (NULL == self->buffers) {
621 621 PyErr_NoMemory();
622 622 return -1;
623 623 }
624 624
625 625 self->firstElements = PyMem_Malloc(size * sizeof(Py_ssize_t));
626 626 if (NULL == self->firstElements) {
627 627 PyMem_Free(self->buffers);
628 628 self->buffers = NULL;
629 629 PyErr_NoMemory();
630 630 return -1;
631 631 }
632 632
633 633 self->bufferCount = size;
634 634
635 635 for (i = 0; i < size; i++) {
636 636 ZstdBufferWithSegments* item = (ZstdBufferWithSegments*)PyTuple_GET_ITEM(args, i);
637 637
638 638 self->buffers[i] = item;
639 639 Py_INCREF(item);
640 640
641 641 if (i > 0) {
642 642 self->firstElements[i - 1] = offset;
643 643 }
644 644
645 645 offset += item->segmentCount;
646 646 }
647 647
648 648 self->firstElements[size - 1] = offset;
649 649
650 650 return 0;
651 651 }
652 652
653 653 static PyObject* BufferWithSegmentsCollection_size(ZstdBufferWithSegmentsCollection* self) {
654 654 Py_ssize_t i;
655 655 Py_ssize_t j;
656 656 unsigned long long size = 0;
657 657
658 658 for (i = 0; i < self->bufferCount; i++) {
659 659 for (j = 0; j < self->buffers[i]->segmentCount; j++) {
660 660 size += self->buffers[i]->segments[j].length;
661 661 }
662 662 }
663 663
664 664 return PyLong_FromUnsignedLongLong(size);
665 665 }
666 666
667 667 Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection* self) {
668 668 return self->firstElements[self->bufferCount - 1];
669 669 }
670 670
671 671 static ZstdBufferSegment* BufferWithSegmentsCollection_item(ZstdBufferWithSegmentsCollection* self, Py_ssize_t i) {
672 672 Py_ssize_t bufferOffset;
673 673
674 674 if (i < 0) {
675 675 PyErr_SetString(PyExc_IndexError, "offset must be non-negative");
676 676 return NULL;
677 677 }
678 678
679 679 if (i >= BufferWithSegmentsCollection_length(self)) {
680 680 PyErr_Format(PyExc_IndexError, "offset must be less than %zd",
681 681 BufferWithSegmentsCollection_length(self));
682 682 return NULL;
683 683 }
684 684
685 685 for (bufferOffset = 0; bufferOffset < self->bufferCount; bufferOffset++) {
686 686 Py_ssize_t offset = 0;
687 687
688 688 if (i < self->firstElements[bufferOffset]) {
689 689 if (bufferOffset > 0) {
690 690 offset = self->firstElements[bufferOffset - 1];
691 691 }
692 692
693 693 return BufferWithSegments_item(self->buffers[bufferOffset], i - offset);
694 694 }
695 695 }
696 696
697 697 PyErr_SetString(ZstdError, "error resolving segment; this should not happen");
698 698 return NULL;
699 699 }
700 700
701 701 static PySequenceMethods BufferWithSegmentsCollection_sq = {
702 702 (lenfunc)BufferWithSegmentsCollection_length, /* sq_length */
703 703 0, /* sq_concat */
704 704 0, /* sq_repeat */
705 705 (ssizeargfunc)BufferWithSegmentsCollection_item, /* sq_item */
706 706 0, /* sq_ass_item */
707 707 0, /* sq_contains */
708 708 0, /* sq_inplace_concat */
709 709 0 /* sq_inplace_repeat */
710 710 };
711 711
712 712 static PyMethodDef BufferWithSegmentsCollection_methods[] = {
713 713 { "size", (PyCFunction)BufferWithSegmentsCollection_size,
714 714 METH_NOARGS, PyDoc_STR("total size in bytes of all segments") },
715 715 { NULL, NULL }
716 716 };
717 717
718 718 PyTypeObject ZstdBufferWithSegmentsCollectionType = {
719 719 PyVarObject_HEAD_INIT(NULL, 0)
720 720 "zstd.BufferWithSegmentsCollection", /* tp_name */
721 721 sizeof(ZstdBufferWithSegmentsCollection),/* tp_basicsize */
722 722 0, /* tp_itemsize */
723 723 (destructor)BufferWithSegmentsCollection_dealloc, /* tp_dealloc */
724 724 0, /* tp_print */
725 725 0, /* tp_getattr */
726 726 0, /* tp_setattr */
727 727 0, /* tp_compare */
728 728 0, /* tp_repr */
729 729 0, /* tp_as_number */
730 730 &BufferWithSegmentsCollection_sq, /* tp_as_sequence */
731 731 0, /* tp_as_mapping */
732 732 0, /* tp_hash */
733 733 0, /* tp_call */
734 734 0, /* tp_str */
735 735 0, /* tp_getattro */
736 736 0, /* tp_setattro */
737 737 0, /* tp_as_buffer */
738 738 Py_TPFLAGS_DEFAULT, /* tp_flags */
739 739 BufferWithSegmentsCollection__doc__, /* tp_doc */
740 740 0, /* tp_traverse */
741 741 0, /* tp_clear */
742 742 0, /* tp_richcompare */
743 743 0, /* tp_weaklistoffset */
744 744 /* TODO implement iterator for performance. */
745 745 0, /* tp_iter */
746 746 0, /* tp_iternext */
747 747 BufferWithSegmentsCollection_methods, /* tp_methods */
748 748 0, /* tp_members */
749 749 0, /* tp_getset */
750 750 0, /* tp_base */
751 751 0, /* tp_dict */
752 752 0, /* tp_descr_get */
753 753 0, /* tp_descr_set */
754 754 0, /* tp_dictoffset */
755 755 (initproc)BufferWithSegmentsCollection_init, /* tp_init */
756 756 0, /* tp_alloc */
757 757 PyType_GenericNew, /* tp_new */
758 758 };
759 759
760 760 void bufferutil_module_init(PyObject* mod) {
761 Py_TYPE(&ZstdBufferWithSegmentsType) = &PyType_Type;
761 Py_SET_TYPE(&ZstdBufferWithSegmentsType, &PyType_Type);
762 762 if (PyType_Ready(&ZstdBufferWithSegmentsType) < 0) {
763 763 return;
764 764 }
765 765
766 766 Py_INCREF(&ZstdBufferWithSegmentsType);
767 767 PyModule_AddObject(mod, "BufferWithSegments", (PyObject*)&ZstdBufferWithSegmentsType);
768 768
769 Py_TYPE(&ZstdBufferSegmentsType) = &PyType_Type;
769 Py_SET_TYPE(&ZstdBufferSegmentsType, &PyType_Type);
770 770 if (PyType_Ready(&ZstdBufferSegmentsType) < 0) {
771 771 return;
772 772 }
773 773
774 774 Py_INCREF(&ZstdBufferSegmentsType);
775 775 PyModule_AddObject(mod, "BufferSegments", (PyObject*)&ZstdBufferSegmentsType);
776 776
777 Py_TYPE(&ZstdBufferSegmentType) = &PyType_Type;
777 Py_SET_TYPE(&ZstdBufferSegmentType, &PyType_Type);
778 778 if (PyType_Ready(&ZstdBufferSegmentType) < 0) {
779 779 return;
780 780 }
781 781
782 782 Py_INCREF(&ZstdBufferSegmentType);
783 783 PyModule_AddObject(mod, "BufferSegment", (PyObject*)&ZstdBufferSegmentType);
784 784
785 Py_TYPE(&ZstdBufferWithSegmentsCollectionType) = &PyType_Type;
785 Py_SET_TYPE(&ZstdBufferWithSegmentsCollectionType, &PyType_Type);
786 786 if (PyType_Ready(&ZstdBufferWithSegmentsCollectionType) < 0) {
787 787 return;
788 788 }
789 789
790 790 Py_INCREF(&ZstdBufferWithSegmentsCollectionType);
791 791 PyModule_AddObject(mod, "BufferWithSegmentsCollection", (PyObject*)&ZstdBufferWithSegmentsCollectionType);
792 792 }
@@ -1,360 +1,360 b''
1 1 /**
2 2 * Copyright (c) 2018-present, Gregory Szorc
3 3 * All rights reserved.
4 4 *
5 5 * This software may be modified and distributed under the terms
6 6 * of the BSD license. See the LICENSE file for details.
7 7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 11 extern PyObject* ZstdError;
12 12
13 13 PyDoc_STRVAR(ZstdCompressionChunkerIterator__doc__,
14 14 "Iterator of output chunks from ZstdCompressionChunker.\n"
15 15 );
16 16
17 17 static void ZstdCompressionChunkerIterator_dealloc(ZstdCompressionChunkerIterator* self) {
18 18 Py_XDECREF(self->chunker);
19 19
20 20 PyObject_Del(self);
21 21 }
22 22
23 23 static PyObject* ZstdCompressionChunkerIterator_iter(PyObject* self) {
24 24 Py_INCREF(self);
25 25 return self;
26 26 }
27 27
28 28 static PyObject* ZstdCompressionChunkerIterator_iternext(ZstdCompressionChunkerIterator* self) {
29 29 size_t zresult;
30 30 PyObject* chunk;
31 31 ZstdCompressionChunker* chunker = self->chunker;
32 32 ZSTD_EndDirective zFlushMode;
33 33
34 34 if (self->mode != compressionchunker_mode_normal && chunker->input.pos != chunker->input.size) {
35 35 PyErr_SetString(ZstdError, "input should have been fully consumed before calling flush() or finish()");
36 36 return NULL;
37 37 }
38 38
39 39 if (chunker->finished) {
40 40 return NULL;
41 41 }
42 42
43 43 /* If we have data left in the input, consume it. */
44 44 while (chunker->input.pos < chunker->input.size) {
45 45 Py_BEGIN_ALLOW_THREADS
46 46 zresult = ZSTD_compressStream2(chunker->compressor->cctx, &chunker->output,
47 47 &chunker->input, ZSTD_e_continue);
48 48 Py_END_ALLOW_THREADS
49 49
50 50 /* Input is fully consumed. */
51 51 if (chunker->input.pos == chunker->input.size) {
52 52 chunker->input.src = NULL;
53 53 chunker->input.pos = 0;
54 54 chunker->input.size = 0;
55 55 PyBuffer_Release(&chunker->inBuffer);
56 56 }
57 57
58 58 if (ZSTD_isError(zresult)) {
59 59 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
60 60 return NULL;
61 61 }
62 62
63 63 /* If it produced a full output chunk, emit it. */
64 64 if (chunker->output.pos == chunker->output.size) {
65 65 chunk = PyBytes_FromStringAndSize(chunker->output.dst, chunker->output.pos);
66 66 if (!chunk) {
67 67 return NULL;
68 68 }
69 69
70 70 chunker->output.pos = 0;
71 71
72 72 return chunk;
73 73 }
74 74
75 75 /* Else continue to compress available input data. */
76 76 }
77 77
78 78 /* We also need this here for the special case of an empty input buffer. */
79 79 if (chunker->input.pos == chunker->input.size) {
80 80 chunker->input.src = NULL;
81 81 chunker->input.pos = 0;
82 82 chunker->input.size = 0;
83 83 PyBuffer_Release(&chunker->inBuffer);
84 84 }
85 85
86 86 /* No more input data. A partial chunk may be in chunker->output.
87 87 * If we're in normal compression mode, we're done. Otherwise if we're in
88 88 * flush or finish mode, we need to emit what data remains.
89 89 */
90 90 if (self->mode == compressionchunker_mode_normal) {
91 91 /* We don't need to set StopIteration. */
92 92 return NULL;
93 93 }
94 94
95 95 if (self->mode == compressionchunker_mode_flush) {
96 96 zFlushMode = ZSTD_e_flush;
97 97 }
98 98 else if (self->mode == compressionchunker_mode_finish) {
99 99 zFlushMode = ZSTD_e_end;
100 100 }
101 101 else {
102 102 PyErr_SetString(ZstdError, "unhandled compression mode; this should never happen");
103 103 return NULL;
104 104 }
105 105
106 106 Py_BEGIN_ALLOW_THREADS
107 107 zresult = ZSTD_compressStream2(chunker->compressor->cctx, &chunker->output,
108 108 &chunker->input, zFlushMode);
109 109 Py_END_ALLOW_THREADS
110 110
111 111 if (ZSTD_isError(zresult)) {
112 112 PyErr_Format(ZstdError, "zstd compress error: %s",
113 113 ZSTD_getErrorName(zresult));
114 114 return NULL;
115 115 }
116 116
117 117 if (!zresult && chunker->output.pos == 0) {
118 118 return NULL;
119 119 }
120 120
121 121 chunk = PyBytes_FromStringAndSize(chunker->output.dst, chunker->output.pos);
122 122 if (!chunk) {
123 123 return NULL;
124 124 }
125 125
126 126 chunker->output.pos = 0;
127 127
128 128 if (!zresult && self->mode == compressionchunker_mode_finish) {
129 129 chunker->finished = 1;
130 130 }
131 131
132 132 return chunk;
133 133 }
134 134
135 135 PyTypeObject ZstdCompressionChunkerIteratorType = {
136 136 PyVarObject_HEAD_INIT(NULL, 0)
137 137 "zstd.ZstdCompressionChunkerIterator", /* tp_name */
138 138 sizeof(ZstdCompressionChunkerIterator), /* tp_basicsize */
139 139 0, /* tp_itemsize */
140 140 (destructor)ZstdCompressionChunkerIterator_dealloc, /* tp_dealloc */
141 141 0, /* tp_print */
142 142 0, /* tp_getattr */
143 143 0, /* tp_setattr */
144 144 0, /* tp_compare */
145 145 0, /* tp_repr */
146 146 0, /* tp_as_number */
147 147 0, /* tp_as_sequence */
148 148 0, /* tp_as_mapping */
149 149 0, /* tp_hash */
150 150 0, /* tp_call */
151 151 0, /* tp_str */
152 152 0, /* tp_getattro */
153 153 0, /* tp_setattro */
154 154 0, /* tp_as_buffer */
155 155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
156 156 ZstdCompressionChunkerIterator__doc__, /* tp_doc */
157 157 0, /* tp_traverse */
158 158 0, /* tp_clear */
159 159 0, /* tp_richcompare */
160 160 0, /* tp_weaklistoffset */
161 161 ZstdCompressionChunkerIterator_iter, /* tp_iter */
162 162 (iternextfunc)ZstdCompressionChunkerIterator_iternext, /* tp_iternext */
163 163 0, /* tp_methods */
164 164 0, /* tp_members */
165 165 0, /* tp_getset */
166 166 0, /* tp_base */
167 167 0, /* tp_dict */
168 168 0, /* tp_descr_get */
169 169 0, /* tp_descr_set */
170 170 0, /* tp_dictoffset */
171 171 0, /* tp_init */
172 172 0, /* tp_alloc */
173 173 PyType_GenericNew, /* tp_new */
174 174 };
175 175
176 176 PyDoc_STRVAR(ZstdCompressionChunker__doc__,
177 177 "Compress chunks iteratively into exact chunk sizes.\n"
178 178 );
179 179
180 180 static void ZstdCompressionChunker_dealloc(ZstdCompressionChunker* self) {
181 181 PyBuffer_Release(&self->inBuffer);
182 182 self->input.src = NULL;
183 183
184 184 PyMem_Free(self->output.dst);
185 185 self->output.dst = NULL;
186 186
187 187 Py_XDECREF(self->compressor);
188 188
189 189 PyObject_Del(self);
190 190 }
191 191
192 192 static ZstdCompressionChunkerIterator* ZstdCompressionChunker_compress(ZstdCompressionChunker* self, PyObject* args, PyObject* kwargs) {
193 193 static char* kwlist[] = {
194 194 "data",
195 195 NULL
196 196 };
197 197
198 198 ZstdCompressionChunkerIterator* result;
199 199
200 200 if (self->finished) {
201 201 PyErr_SetString(ZstdError, "cannot call compress() after compression finished");
202 202 return NULL;
203 203 }
204 204
205 205 if (self->inBuffer.obj) {
206 206 PyErr_SetString(ZstdError,
207 207 "cannot perform operation before consuming output from previous operation");
208 208 return NULL;
209 209 }
210 210
211 211 #if PY_MAJOR_VERSION >= 3
212 212 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress",
213 213 #else
214 214 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:compress",
215 215 #endif
216 216 kwlist, &self->inBuffer)) {
217 217 return NULL;
218 218 }
219 219
220 220 if (!PyBuffer_IsContiguous(&self->inBuffer, 'C') || self->inBuffer.ndim > 1) {
221 221 PyErr_SetString(PyExc_ValueError,
222 222 "data buffer should be contiguous and have at most one dimension");
223 223 PyBuffer_Release(&self->inBuffer);
224 224 return NULL;
225 225 }
226 226
227 227 result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
228 228 if (!result) {
229 229 PyBuffer_Release(&self->inBuffer);
230 230 return NULL;
231 231 }
232 232
233 233 self->input.src = self->inBuffer.buf;
234 234 self->input.size = self->inBuffer.len;
235 235 self->input.pos = 0;
236 236
237 237 result->chunker = self;
238 238 Py_INCREF(result->chunker);
239 239
240 240 result->mode = compressionchunker_mode_normal;
241 241
242 242 return result;
243 243 }
244 244
245 245 static ZstdCompressionChunkerIterator* ZstdCompressionChunker_finish(ZstdCompressionChunker* self) {
246 246 ZstdCompressionChunkerIterator* result;
247 247
248 248 if (self->finished) {
249 249 PyErr_SetString(ZstdError, "cannot call finish() after compression finished");
250 250 return NULL;
251 251 }
252 252
253 253 if (self->inBuffer.obj) {
254 254 PyErr_SetString(ZstdError,
255 255 "cannot call finish() before consuming output from previous operation");
256 256 return NULL;
257 257 }
258 258
259 259 result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
260 260 if (!result) {
261 261 return NULL;
262 262 }
263 263
264 264 result->chunker = self;
265 265 Py_INCREF(result->chunker);
266 266
267 267 result->mode = compressionchunker_mode_finish;
268 268
269 269 return result;
270 270 }
271 271
272 272 static ZstdCompressionChunkerIterator* ZstdCompressionChunker_flush(ZstdCompressionChunker* self, PyObject* args, PyObject* kwargs) {
273 273 ZstdCompressionChunkerIterator* result;
274 274
275 275 if (self->finished) {
276 276 PyErr_SetString(ZstdError, "cannot call flush() after compression finished");
277 277 return NULL;
278 278 }
279 279
280 280 if (self->inBuffer.obj) {
281 281 PyErr_SetString(ZstdError,
282 282 "cannot call flush() before consuming output from previous operation");
283 283 return NULL;
284 284 }
285 285
286 286 result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
287 287 if (!result) {
288 288 return NULL;
289 289 }
290 290
291 291 result->chunker = self;
292 292 Py_INCREF(result->chunker);
293 293
294 294 result->mode = compressionchunker_mode_flush;
295 295
296 296 return result;
297 297 }
298 298
299 299 static PyMethodDef ZstdCompressionChunker_methods[] = {
300 300 { "compress", (PyCFunction)ZstdCompressionChunker_compress, METH_VARARGS | METH_KEYWORDS,
301 301 PyDoc_STR("compress data") },
302 302 { "finish", (PyCFunction)ZstdCompressionChunker_finish, METH_NOARGS,
303 303 PyDoc_STR("finish compression operation") },
304 304 { "flush", (PyCFunction)ZstdCompressionChunker_flush, METH_VARARGS | METH_KEYWORDS,
305 305 PyDoc_STR("finish compression operation") },
306 306 { NULL, NULL }
307 307 };
308 308
309 309 PyTypeObject ZstdCompressionChunkerType = {
310 310 PyVarObject_HEAD_INIT(NULL, 0)
311 311 "zstd.ZstdCompressionChunkerType", /* tp_name */
312 312 sizeof(ZstdCompressionChunker), /* tp_basicsize */
313 313 0, /* tp_itemsize */
314 314 (destructor)ZstdCompressionChunker_dealloc, /* tp_dealloc */
315 315 0, /* tp_print */
316 316 0, /* tp_getattr */
317 317 0, /* tp_setattr */
318 318 0, /* tp_compare */
319 319 0, /* tp_repr */
320 320 0, /* tp_as_number */
321 321 0, /* tp_as_sequence */
322 322 0, /* tp_as_mapping */
323 323 0, /* tp_hash */
324 324 0, /* tp_call */
325 325 0, /* tp_str */
326 326 0, /* tp_getattro */
327 327 0, /* tp_setattro */
328 328 0, /* tp_as_buffer */
329 329 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
330 330 ZstdCompressionChunker__doc__, /* tp_doc */
331 331 0, /* tp_traverse */
332 332 0, /* tp_clear */
333 333 0, /* tp_richcompare */
334 334 0, /* tp_weaklistoffset */
335 335 0, /* tp_iter */
336 336 0, /* tp_iternext */
337 337 ZstdCompressionChunker_methods, /* tp_methods */
338 338 0, /* tp_members */
339 339 0, /* tp_getset */
340 340 0, /* tp_base */
341 341 0, /* tp_dict */
342 342 0, /* tp_descr_get */
343 343 0, /* tp_descr_set */
344 344 0, /* tp_dictoffset */
345 345 0, /* tp_init */
346 346 0, /* tp_alloc */
347 347 PyType_GenericNew, /* tp_new */
348 348 };
349 349
350 350 void compressionchunker_module_init(PyObject* module) {
351 Py_TYPE(&ZstdCompressionChunkerIteratorType) = &PyType_Type;
351 Py_SET_TYPE(&ZstdCompressionChunkerIteratorType, &PyType_Type);
352 352 if (PyType_Ready(&ZstdCompressionChunkerIteratorType) < 0) {
353 353 return;
354 354 }
355 355
356 Py_TYPE(&ZstdCompressionChunkerType) = &PyType_Type;
356 Py_SET_TYPE(&ZstdCompressionChunkerType, &PyType_Type);
357 357 if (PyType_Ready(&ZstdCompressionChunkerType) < 0) {
358 358 return;
359 359 }
360 360 }
@@ -1,411 +1,411 b''
1 1 /**
2 2 * Copyright (c) 2016-present, Gregory Szorc
3 3 * All rights reserved.
4 4 *
5 5 * This software may be modified and distributed under the terms
6 6 * of the BSD license. See the LICENSE file for details.
7 7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 11 extern PyObject* ZstdError;
12 12
13 13 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) {
14 14 static char* kwlist[] = {
15 15 "dict_size",
16 16 "samples",
17 17 "k",
18 18 "d",
19 19 "notifications",
20 20 "dict_id",
21 21 "level",
22 22 "steps",
23 23 "threads",
24 24 NULL
25 25 };
26 26
27 27 size_t capacity;
28 28 PyObject* samples;
29 29 unsigned k = 0;
30 30 unsigned d = 0;
31 31 unsigned notifications = 0;
32 32 unsigned dictID = 0;
33 33 int level = 0;
34 34 unsigned steps = 0;
35 35 int threads = 0;
36 36 ZDICT_cover_params_t params;
37 37 Py_ssize_t samplesLen;
38 38 Py_ssize_t i;
39 39 size_t samplesSize = 0;
40 40 void* sampleBuffer = NULL;
41 41 size_t* sampleSizes = NULL;
42 42 void* sampleOffset;
43 43 Py_ssize_t sampleSize;
44 44 void* dict = NULL;
45 45 size_t zresult;
46 46 ZstdCompressionDict* result = NULL;
47 47
48 48 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IIIIiIi:train_dictionary",
49 49 kwlist, &capacity, &PyList_Type, &samples,
50 50 &k, &d, &notifications, &dictID, &level, &steps, &threads)) {
51 51 return NULL;
52 52 }
53 53
54 54 if (threads < 0) {
55 55 threads = cpu_count();
56 56 }
57 57
58 58 memset(&params, 0, sizeof(params));
59 59 params.k = k;
60 60 params.d = d;
61 61 params.steps = steps;
62 62 params.nbThreads = threads;
63 63 params.zParams.notificationLevel = notifications;
64 64 params.zParams.dictID = dictID;
65 65 params.zParams.compressionLevel = level;
66 66
67 67 /* Figure out total size of input samples. */
68 68 samplesLen = PyList_Size(samples);
69 69 for (i = 0; i < samplesLen; i++) {
70 70 PyObject* sampleItem = PyList_GET_ITEM(samples, i);
71 71
72 72 if (!PyBytes_Check(sampleItem)) {
73 73 PyErr_SetString(PyExc_ValueError, "samples must be bytes");
74 74 return NULL;
75 75 }
76 76 samplesSize += PyBytes_GET_SIZE(sampleItem);
77 77 }
78 78
79 79 sampleBuffer = PyMem_Malloc(samplesSize);
80 80 if (!sampleBuffer) {
81 81 PyErr_NoMemory();
82 82 goto finally;
83 83 }
84 84
85 85 sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t));
86 86 if (!sampleSizes) {
87 87 PyErr_NoMemory();
88 88 goto finally;
89 89 }
90 90
91 91 sampleOffset = sampleBuffer;
92 92 for (i = 0; i < samplesLen; i++) {
93 93 PyObject* sampleItem = PyList_GET_ITEM(samples, i);
94 94 sampleSize = PyBytes_GET_SIZE(sampleItem);
95 95 sampleSizes[i] = sampleSize;
96 96 memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize);
97 97 sampleOffset = (char*)sampleOffset + sampleSize;
98 98 }
99 99
100 100 dict = PyMem_Malloc(capacity);
101 101 if (!dict) {
102 102 PyErr_NoMemory();
103 103 goto finally;
104 104 }
105 105
106 106 Py_BEGIN_ALLOW_THREADS
107 107 /* No parameters uses the default function, which will use default params
108 108 and call ZDICT_optimizeTrainFromBuffer_cover under the hood. */
109 109 if (!params.k && !params.d && !params.zParams.compressionLevel
110 110 && !params.zParams.notificationLevel && !params.zParams.dictID) {
111 111 zresult = ZDICT_trainFromBuffer(dict, capacity, sampleBuffer,
112 112 sampleSizes, (unsigned)samplesLen);
113 113 }
114 114 /* Use optimize mode if user controlled steps or threads explicitly. */
115 115 else if (params.steps || params.nbThreads) {
116 116 zresult = ZDICT_optimizeTrainFromBuffer_cover(dict, capacity,
117 117 sampleBuffer, sampleSizes, (unsigned)samplesLen, &params);
118 118 }
119 119 /* Non-optimize mode with explicit control. */
120 120 else {
121 121 zresult = ZDICT_trainFromBuffer_cover(dict, capacity,
122 122 sampleBuffer, sampleSizes, (unsigned)samplesLen, params);
123 123 }
124 124 Py_END_ALLOW_THREADS
125 125
126 126 if (ZDICT_isError(zresult)) {
127 127 PyMem_Free(dict);
128 128 PyErr_Format(ZstdError, "cannot train dict: %s", ZDICT_getErrorName(zresult));
129 129 goto finally;
130 130 }
131 131
132 132 result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType);
133 133 if (!result) {
134 134 PyMem_Free(dict);
135 135 goto finally;
136 136 }
137 137
138 138 result->dictData = dict;
139 139 result->dictSize = zresult;
140 140 result->dictType = ZSTD_dct_fullDict;
141 141 result->d = params.d;
142 142 result->k = params.k;
143 143 result->cdict = NULL;
144 144 result->ddict = NULL;
145 145
146 146 finally:
147 147 PyMem_Free(sampleBuffer);
148 148 PyMem_Free(sampleSizes);
149 149
150 150 return result;
151 151 }
152 152
153 153 int ensure_ddict(ZstdCompressionDict* dict) {
154 154 if (dict->ddict) {
155 155 return 0;
156 156 }
157 157
158 158 Py_BEGIN_ALLOW_THREADS
159 159 dict->ddict = ZSTD_createDDict_advanced(dict->dictData, dict->dictSize,
160 160 ZSTD_dlm_byRef, dict->dictType, ZSTD_defaultCMem);
161 161 Py_END_ALLOW_THREADS
162 162 if (!dict->ddict) {
163 163 PyErr_SetString(ZstdError, "could not create decompression dict");
164 164 return 1;
165 165 }
166 166
167 167 return 0;
168 168 }
169 169
170 170 PyDoc_STRVAR(ZstdCompressionDict__doc__,
171 171 "ZstdCompressionDict(data) - Represents a computed compression dictionary\n"
172 172 "\n"
173 173 "This type holds the results of a computed Zstandard compression dictionary.\n"
174 174 "Instances are obtained by calling ``train_dictionary()`` or by passing\n"
175 175 "bytes obtained from another source into the constructor.\n"
176 176 );
177 177
178 178 static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args, PyObject* kwargs) {
179 179 static char* kwlist[] = {
180 180 "data",
181 181 "dict_type",
182 182 NULL
183 183 };
184 184
185 185 int result = -1;
186 186 Py_buffer source;
187 187 unsigned dictType = ZSTD_dct_auto;
188 188
189 189 self->dictData = NULL;
190 190 self->dictSize = 0;
191 191 self->cdict = NULL;
192 192 self->ddict = NULL;
193 193
194 194 #if PY_MAJOR_VERSION >= 3
195 195 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|I:ZstdCompressionDict",
196 196 #else
197 197 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|I:ZstdCompressionDict",
198 198 #endif
199 199 kwlist, &source, &dictType)) {
200 200 return -1;
201 201 }
202 202
203 203 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
204 204 PyErr_SetString(PyExc_ValueError,
205 205 "data buffer should be contiguous and have at most one dimension");
206 206 goto finally;
207 207 }
208 208
209 209 if (dictType != ZSTD_dct_auto && dictType != ZSTD_dct_rawContent
210 210 && dictType != ZSTD_dct_fullDict) {
211 211 PyErr_Format(PyExc_ValueError,
212 212 "invalid dictionary load mode: %d; must use DICT_TYPE_* constants",
213 213 dictType);
214 214 goto finally;
215 215 }
216 216
217 217 self->dictType = dictType;
218 218
219 219 self->dictData = PyMem_Malloc(source.len);
220 220 if (!self->dictData) {
221 221 PyErr_NoMemory();
222 222 goto finally;
223 223 }
224 224
225 225 memcpy(self->dictData, source.buf, source.len);
226 226 self->dictSize = source.len;
227 227
228 228 result = 0;
229 229
230 230 finally:
231 231 PyBuffer_Release(&source);
232 232 return result;
233 233 }
234 234
235 235 static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) {
236 236 if (self->cdict) {
237 237 ZSTD_freeCDict(self->cdict);
238 238 self->cdict = NULL;
239 239 }
240 240
241 241 if (self->ddict) {
242 242 ZSTD_freeDDict(self->ddict);
243 243 self->ddict = NULL;
244 244 }
245 245
246 246 if (self->dictData) {
247 247 PyMem_Free(self->dictData);
248 248 self->dictData = NULL;
249 249 }
250 250
251 251 PyObject_Del(self);
252 252 }
253 253
254 254 PyDoc_STRVAR(ZstdCompressionDict_precompute_compress__doc__,
255 255 "Precompute a dictionary so it can be used by multiple compressors.\n"
256 256 );
257 257
258 258 static PyObject* ZstdCompressionDict_precompute_compress(ZstdCompressionDict* self, PyObject* args, PyObject* kwargs) {
259 259 static char* kwlist[] = {
260 260 "level",
261 261 "compression_params",
262 262 NULL
263 263 };
264 264
265 265 int level = 0;
266 266 ZstdCompressionParametersObject* compressionParams = NULL;
267 267 ZSTD_compressionParameters cParams;
268 268 size_t zresult;
269 269
270 270 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!:precompute_compress", kwlist,
271 271 &level, &ZstdCompressionParametersType, &compressionParams)) {
272 272 return NULL;
273 273 }
274 274
275 275 if (level && compressionParams) {
276 276 PyErr_SetString(PyExc_ValueError,
277 277 "must only specify one of level or compression_params");
278 278 return NULL;
279 279 }
280 280
281 281 if (!level && !compressionParams) {
282 282 PyErr_SetString(PyExc_ValueError,
283 283 "must specify one of level or compression_params");
284 284 return NULL;
285 285 }
286 286
287 287 if (self->cdict) {
288 288 zresult = ZSTD_freeCDict(self->cdict);
289 289 self->cdict = NULL;
290 290 if (ZSTD_isError(zresult)) {
291 291 PyErr_Format(ZstdError, "unable to free CDict: %s",
292 292 ZSTD_getErrorName(zresult));
293 293 return NULL;
294 294 }
295 295 }
296 296
297 297 if (level) {
298 298 cParams = ZSTD_getCParams(level, 0, self->dictSize);
299 299 }
300 300 else {
301 301 if (to_cparams(compressionParams, &cParams)) {
302 302 return NULL;
303 303 }
304 304 }
305 305
306 306 assert(!self->cdict);
307 307 self->cdict = ZSTD_createCDict_advanced(self->dictData, self->dictSize,
308 308 ZSTD_dlm_byRef, self->dictType, cParams, ZSTD_defaultCMem);
309 309
310 310 if (!self->cdict) {
311 311 PyErr_SetString(ZstdError, "unable to precompute dictionary");
312 312 return NULL;
313 313 }
314 314
315 315 Py_RETURN_NONE;
316 316 }
317 317
318 318 static PyObject* ZstdCompressionDict_dict_id(ZstdCompressionDict* self) {
319 319 unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize);
320 320
321 321 return PyLong_FromLong(dictID);
322 322 }
323 323
324 324 static PyObject* ZstdCompressionDict_as_bytes(ZstdCompressionDict* self) {
325 325 return PyBytes_FromStringAndSize(self->dictData, self->dictSize);
326 326 }
327 327
328 328 static PyMethodDef ZstdCompressionDict_methods[] = {
329 329 { "dict_id", (PyCFunction)ZstdCompressionDict_dict_id, METH_NOARGS,
330 330 PyDoc_STR("dict_id() -- obtain the numeric dictionary ID") },
331 331 { "as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS,
332 332 PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary data") },
333 333 { "precompute_compress", (PyCFunction)ZstdCompressionDict_precompute_compress,
334 334 METH_VARARGS | METH_KEYWORDS, ZstdCompressionDict_precompute_compress__doc__ },
335 335 { NULL, NULL }
336 336 };
337 337
338 338 static PyMemberDef ZstdCompressionDict_members[] = {
339 339 { "k", T_UINT, offsetof(ZstdCompressionDict, k), READONLY,
340 340 "segment size" },
341 341 { "d", T_UINT, offsetof(ZstdCompressionDict, d), READONLY,
342 342 "dmer size" },
343 343 { NULL }
344 344 };
345 345
346 346 static Py_ssize_t ZstdCompressionDict_length(ZstdCompressionDict* self) {
347 347 return self->dictSize;
348 348 }
349 349
350 350 static PySequenceMethods ZstdCompressionDict_sq = {
351 351 (lenfunc)ZstdCompressionDict_length, /* sq_length */
352 352 0, /* sq_concat */
353 353 0, /* sq_repeat */
354 354 0, /* sq_item */
355 355 0, /* sq_ass_item */
356 356 0, /* sq_contains */
357 357 0, /* sq_inplace_concat */
358 358 0 /* sq_inplace_repeat */
359 359 };
360 360
361 361 PyTypeObject ZstdCompressionDictType = {
362 362 PyVarObject_HEAD_INIT(NULL, 0)
363 363 "zstd.ZstdCompressionDict", /* tp_name */
364 364 sizeof(ZstdCompressionDict), /* tp_basicsize */
365 365 0, /* tp_itemsize */
366 366 (destructor)ZstdCompressionDict_dealloc, /* tp_dealloc */
367 367 0, /* tp_print */
368 368 0, /* tp_getattr */
369 369 0, /* tp_setattr */
370 370 0, /* tp_compare */
371 371 0, /* tp_repr */
372 372 0, /* tp_as_number */
373 373 &ZstdCompressionDict_sq, /* tp_as_sequence */
374 374 0, /* tp_as_mapping */
375 375 0, /* tp_hash */
376 376 0, /* tp_call */
377 377 0, /* tp_str */
378 378 0, /* tp_getattro */
379 379 0, /* tp_setattro */
380 380 0, /* tp_as_buffer */
381 381 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
382 382 ZstdCompressionDict__doc__, /* tp_doc */
383 383 0, /* tp_traverse */
384 384 0, /* tp_clear */
385 385 0, /* tp_richcompare */
386 386 0, /* tp_weaklistoffset */
387 387 0, /* tp_iter */
388 388 0, /* tp_iternext */
389 389 ZstdCompressionDict_methods, /* tp_methods */
390 390 ZstdCompressionDict_members, /* tp_members */
391 391 0, /* tp_getset */
392 392 0, /* tp_base */
393 393 0, /* tp_dict */
394 394 0, /* tp_descr_get */
395 395 0, /* tp_descr_set */
396 396 0, /* tp_dictoffset */
397 397 (initproc)ZstdCompressionDict_init, /* tp_init */
398 398 0, /* tp_alloc */
399 399 PyType_GenericNew, /* tp_new */
400 400 };
401 401
402 402 void compressiondict_module_init(PyObject* mod) {
403 Py_TYPE(&ZstdCompressionDictType) = &PyType_Type;
403 Py_SET_TYPE(&ZstdCompressionDictType, &PyType_Type);
404 404 if (PyType_Ready(&ZstdCompressionDictType) < 0) {
405 405 return;
406 406 }
407 407
408 408 Py_INCREF((PyObject*)&ZstdCompressionDictType);
409 409 PyModule_AddObject(mod, "ZstdCompressionDict",
410 410 (PyObject*)&ZstdCompressionDictType);
411 411 }
@@ -1,572 +1,572 b''
1 1 /**
2 2 * Copyright (c) 2016-present, Gregory Szorc
3 3 * All rights reserved.
4 4 *
5 5 * This software may be modified and distributed under the terms
6 6 * of the BSD license. See the LICENSE file for details.
7 7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 11 extern PyObject* ZstdError;
12 12
13 13 int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value) {
14 14 size_t zresult = ZSTD_CCtxParams_setParameter(params, param, value);
15 15 if (ZSTD_isError(zresult)) {
16 16 PyErr_Format(ZstdError, "unable to set compression context parameter: %s",
17 17 ZSTD_getErrorName(zresult));
18 18 return 1;
19 19 }
20 20
21 21 return 0;
22 22 }
23 23
24 24 #define TRY_SET_PARAMETER(params, param, value) if (set_parameter(params, param, value)) return -1;
25 25
26 26 #define TRY_COPY_PARAMETER(source, dest, param) { \
27 27 int result; \
28 28 size_t zresult = ZSTD_CCtxParams_getParameter(source, param, &result); \
29 29 if (ZSTD_isError(zresult)) { \
30 30 return 1; \
31 31 } \
32 32 zresult = ZSTD_CCtxParams_setParameter(dest, param, result); \
33 33 if (ZSTD_isError(zresult)) { \
34 34 return 1; \
35 35 } \
36 36 }
37 37
38 38 int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj) {
39 39 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_nbWorkers);
40 40
41 41 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_format);
42 42 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_compressionLevel);
43 43 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_windowLog);
44 44 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_hashLog);
45 45 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_chainLog);
46 46 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_searchLog);
47 47 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_minMatch);
48 48 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_targetLength);
49 49 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_strategy);
50 50 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_contentSizeFlag);
51 51 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_checksumFlag);
52 52 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_dictIDFlag);
53 53 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_jobSize);
54 54 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_overlapLog);
55 55 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_forceMaxWindow);
56 56 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_enableLongDistanceMatching);
57 57 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashLog);
58 58 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmMinMatch);
59 59 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmBucketSizeLog);
60 60 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashRateLog);
61 61
62 62 return 0;
63 63 }
64 64
65 65 int reset_params(ZstdCompressionParametersObject* params) {
66 66 if (params->params) {
67 67 ZSTD_CCtxParams_reset(params->params);
68 68 }
69 69 else {
70 70 params->params = ZSTD_createCCtxParams();
71 71 if (!params->params) {
72 72 PyErr_NoMemory();
73 73 return 1;
74 74 }
75 75 }
76 76
77 77 return set_parameters(params->params, params);
78 78 }
79 79
80 80 #define TRY_GET_PARAMETER(params, param, value) { \
81 81 size_t zresult = ZSTD_CCtxParams_getParameter(params, param, value); \
82 82 if (ZSTD_isError(zresult)) { \
83 83 PyErr_Format(ZstdError, "unable to retrieve parameter: %s", ZSTD_getErrorName(zresult)); \
84 84 return 1; \
85 85 } \
86 86 }
87 87
88 88 int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams) {
89 89 int value;
90 90
91 91 TRY_GET_PARAMETER(params->params, ZSTD_c_windowLog, &value);
92 92 cparams->windowLog = value;
93 93
94 94 TRY_GET_PARAMETER(params->params, ZSTD_c_chainLog, &value);
95 95 cparams->chainLog = value;
96 96
97 97 TRY_GET_PARAMETER(params->params, ZSTD_c_hashLog, &value);
98 98 cparams->hashLog = value;
99 99
100 100 TRY_GET_PARAMETER(params->params, ZSTD_c_searchLog, &value);
101 101 cparams->searchLog = value;
102 102
103 103 TRY_GET_PARAMETER(params->params, ZSTD_c_minMatch, &value);
104 104 cparams->minMatch = value;
105 105
106 106 TRY_GET_PARAMETER(params->params, ZSTD_c_targetLength, &value);
107 107 cparams->targetLength = value;
108 108
109 109 TRY_GET_PARAMETER(params->params, ZSTD_c_strategy, &value);
110 110 cparams->strategy = value;
111 111
112 112 return 0;
113 113 }
114 114
115 115 static int ZstdCompressionParameters_init(ZstdCompressionParametersObject* self, PyObject* args, PyObject* kwargs) {
116 116 static char* kwlist[] = {
117 117 "format",
118 118 "compression_level",
119 119 "window_log",
120 120 "hash_log",
121 121 "chain_log",
122 122 "search_log",
123 123 "min_match",
124 124 "target_length",
125 125 "compression_strategy",
126 126 "strategy",
127 127 "write_content_size",
128 128 "write_checksum",
129 129 "write_dict_id",
130 130 "job_size",
131 131 "overlap_log",
132 132 "overlap_size_log",
133 133 "force_max_window",
134 134 "enable_ldm",
135 135 "ldm_hash_log",
136 136 "ldm_min_match",
137 137 "ldm_bucket_size_log",
138 138 "ldm_hash_rate_log",
139 139 "ldm_hash_every_log",
140 140 "threads",
141 141 NULL
142 142 };
143 143
144 144 int format = 0;
145 145 int compressionLevel = 0;
146 146 int windowLog = 0;
147 147 int hashLog = 0;
148 148 int chainLog = 0;
149 149 int searchLog = 0;
150 150 int minMatch = 0;
151 151 int targetLength = 0;
152 152 int compressionStrategy = -1;
153 153 int strategy = -1;
154 154 int contentSizeFlag = 1;
155 155 int checksumFlag = 0;
156 156 int dictIDFlag = 0;
157 157 int jobSize = 0;
158 158 int overlapLog = -1;
159 159 int overlapSizeLog = -1;
160 160 int forceMaxWindow = 0;
161 161 int enableLDM = 0;
162 162 int ldmHashLog = 0;
163 163 int ldmMinMatch = 0;
164 164 int ldmBucketSizeLog = 0;
165 165 int ldmHashRateLog = -1;
166 166 int ldmHashEveryLog = -1;
167 167 int threads = 0;
168 168
169 169 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
170 170 "|iiiiiiiiiiiiiiiiiiiiiiii:CompressionParameters",
171 171 kwlist, &format, &compressionLevel, &windowLog, &hashLog, &chainLog,
172 172 &searchLog, &minMatch, &targetLength, &compressionStrategy, &strategy,
173 173 &contentSizeFlag, &checksumFlag, &dictIDFlag, &jobSize, &overlapLog,
174 174 &overlapSizeLog, &forceMaxWindow, &enableLDM, &ldmHashLog, &ldmMinMatch,
175 175 &ldmBucketSizeLog, &ldmHashRateLog, &ldmHashEveryLog, &threads)) {
176 176 return -1;
177 177 }
178 178
179 179 if (reset_params(self)) {
180 180 return -1;
181 181 }
182 182
183 183 if (threads < 0) {
184 184 threads = cpu_count();
185 185 }
186 186
187 187 /* We need to set ZSTD_c_nbWorkers before ZSTD_c_jobSize and ZSTD_c_overlapLog
188 188 * because setting ZSTD_c_nbWorkers resets the other parameters. */
189 189 TRY_SET_PARAMETER(self->params, ZSTD_c_nbWorkers, threads);
190 190
191 191 TRY_SET_PARAMETER(self->params, ZSTD_c_format, format);
192 192 TRY_SET_PARAMETER(self->params, ZSTD_c_compressionLevel, compressionLevel);
193 193 TRY_SET_PARAMETER(self->params, ZSTD_c_windowLog, windowLog);
194 194 TRY_SET_PARAMETER(self->params, ZSTD_c_hashLog, hashLog);
195 195 TRY_SET_PARAMETER(self->params, ZSTD_c_chainLog, chainLog);
196 196 TRY_SET_PARAMETER(self->params, ZSTD_c_searchLog, searchLog);
197 197 TRY_SET_PARAMETER(self->params, ZSTD_c_minMatch, minMatch);
198 198 TRY_SET_PARAMETER(self->params, ZSTD_c_targetLength, targetLength);
199 199
200 200 if (compressionStrategy != -1 && strategy != -1) {
201 201 PyErr_SetString(PyExc_ValueError, "cannot specify both compression_strategy and strategy");
202 202 return -1;
203 203 }
204 204
205 205 if (compressionStrategy != -1) {
206 206 strategy = compressionStrategy;
207 207 }
208 208 else if (strategy == -1) {
209 209 strategy = 0;
210 210 }
211 211
212 212 TRY_SET_PARAMETER(self->params, ZSTD_c_strategy, strategy);
213 213 TRY_SET_PARAMETER(self->params, ZSTD_c_contentSizeFlag, contentSizeFlag);
214 214 TRY_SET_PARAMETER(self->params, ZSTD_c_checksumFlag, checksumFlag);
215 215 TRY_SET_PARAMETER(self->params, ZSTD_c_dictIDFlag, dictIDFlag);
216 216 TRY_SET_PARAMETER(self->params, ZSTD_c_jobSize, jobSize);
217 217
218 218 if (overlapLog != -1 && overlapSizeLog != -1) {
219 219 PyErr_SetString(PyExc_ValueError, "cannot specify both overlap_log and overlap_size_log");
220 220 return -1;
221 221 }
222 222
223 223 if (overlapSizeLog != -1) {
224 224 overlapLog = overlapSizeLog;
225 225 }
226 226 else if (overlapLog == -1) {
227 227 overlapLog = 0;
228 228 }
229 229
230 230 TRY_SET_PARAMETER(self->params, ZSTD_c_overlapLog, overlapLog);
231 231 TRY_SET_PARAMETER(self->params, ZSTD_c_forceMaxWindow, forceMaxWindow);
232 232 TRY_SET_PARAMETER(self->params, ZSTD_c_enableLongDistanceMatching, enableLDM);
233 233 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashLog, ldmHashLog);
234 234 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmMinMatch, ldmMinMatch);
235 235 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmBucketSizeLog, ldmBucketSizeLog);
236 236
237 237 if (ldmHashRateLog != -1 && ldmHashEveryLog != -1) {
238 238 PyErr_SetString(PyExc_ValueError, "cannot specify both ldm_hash_rate_log and ldm_hash_everyLog");
239 239 return -1;
240 240 }
241 241
242 242 if (ldmHashEveryLog != -1) {
243 243 ldmHashRateLog = ldmHashEveryLog;
244 244 }
245 245 else if (ldmHashRateLog == -1) {
246 246 ldmHashRateLog = 0;
247 247 }
248 248
249 249 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashRateLog, ldmHashRateLog);
250 250
251 251 return 0;
252 252 }
253 253
254 254 PyDoc_STRVAR(ZstdCompressionParameters_from_level__doc__,
255 255 "Create a CompressionParameters from a compression level and target sizes\n"
256 256 );
257 257
258 258 ZstdCompressionParametersObject* CompressionParameters_from_level(PyObject* undef, PyObject* args, PyObject* kwargs) {
259 259 int managedKwargs = 0;
260 260 int level;
261 261 PyObject* sourceSize = NULL;
262 262 PyObject* dictSize = NULL;
263 263 unsigned PY_LONG_LONG iSourceSize = 0;
264 264 Py_ssize_t iDictSize = 0;
265 265 PyObject* val;
266 266 ZSTD_compressionParameters params;
267 267 ZstdCompressionParametersObject* result = NULL;
268 268 int res;
269 269
270 270 if (!PyArg_ParseTuple(args, "i:from_level",
271 271 &level)) {
272 272 return NULL;
273 273 }
274 274
275 275 if (!kwargs) {
276 276 kwargs = PyDict_New();
277 277 if (!kwargs) {
278 278 return NULL;
279 279 }
280 280 managedKwargs = 1;
281 281 }
282 282
283 283 sourceSize = PyDict_GetItemString(kwargs, "source_size");
284 284 if (sourceSize) {
285 285 #if PY_MAJOR_VERSION >= 3
286 286 iSourceSize = PyLong_AsUnsignedLongLong(sourceSize);
287 287 if (iSourceSize == (unsigned PY_LONG_LONG)(-1)) {
288 288 goto cleanup;
289 289 }
290 290 #else
291 291 iSourceSize = PyInt_AsUnsignedLongLongMask(sourceSize);
292 292 #endif
293 293
294 294 PyDict_DelItemString(kwargs, "source_size");
295 295 }
296 296
297 297 dictSize = PyDict_GetItemString(kwargs, "dict_size");
298 298 if (dictSize) {
299 299 #if PY_MAJOR_VERSION >= 3
300 300 iDictSize = PyLong_AsSsize_t(dictSize);
301 301 #else
302 302 iDictSize = PyInt_AsSsize_t(dictSize);
303 303 #endif
304 304 if (iDictSize == -1) {
305 305 goto cleanup;
306 306 }
307 307
308 308 PyDict_DelItemString(kwargs, "dict_size");
309 309 }
310 310
311 311
312 312 params = ZSTD_getCParams(level, iSourceSize, iDictSize);
313 313
314 314 /* Values derived from the input level and sizes are passed along to the
315 315 constructor. But only if a value doesn't already exist. */
316 316 val = PyDict_GetItemString(kwargs, "window_log");
317 317 if (!val) {
318 318 val = PyLong_FromUnsignedLong(params.windowLog);
319 319 if (!val) {
320 320 goto cleanup;
321 321 }
322 322 PyDict_SetItemString(kwargs, "window_log", val);
323 323 Py_DECREF(val);
324 324 }
325 325
326 326 val = PyDict_GetItemString(kwargs, "chain_log");
327 327 if (!val) {
328 328 val = PyLong_FromUnsignedLong(params.chainLog);
329 329 if (!val) {
330 330 goto cleanup;
331 331 }
332 332 PyDict_SetItemString(kwargs, "chain_log", val);
333 333 Py_DECREF(val);
334 334 }
335 335
336 336 val = PyDict_GetItemString(kwargs, "hash_log");
337 337 if (!val) {
338 338 val = PyLong_FromUnsignedLong(params.hashLog);
339 339 if (!val) {
340 340 goto cleanup;
341 341 }
342 342 PyDict_SetItemString(kwargs, "hash_log", val);
343 343 Py_DECREF(val);
344 344 }
345 345
346 346 val = PyDict_GetItemString(kwargs, "search_log");
347 347 if (!val) {
348 348 val = PyLong_FromUnsignedLong(params.searchLog);
349 349 if (!val) {
350 350 goto cleanup;
351 351 }
352 352 PyDict_SetItemString(kwargs, "search_log", val);
353 353 Py_DECREF(val);
354 354 }
355 355
356 356 val = PyDict_GetItemString(kwargs, "min_match");
357 357 if (!val) {
358 358 val = PyLong_FromUnsignedLong(params.minMatch);
359 359 if (!val) {
360 360 goto cleanup;
361 361 }
362 362 PyDict_SetItemString(kwargs, "min_match", val);
363 363 Py_DECREF(val);
364 364 }
365 365
366 366 val = PyDict_GetItemString(kwargs, "target_length");
367 367 if (!val) {
368 368 val = PyLong_FromUnsignedLong(params.targetLength);
369 369 if (!val) {
370 370 goto cleanup;
371 371 }
372 372 PyDict_SetItemString(kwargs, "target_length", val);
373 373 Py_DECREF(val);
374 374 }
375 375
376 376 val = PyDict_GetItemString(kwargs, "compression_strategy");
377 377 if (!val) {
378 378 val = PyLong_FromUnsignedLong(params.strategy);
379 379 if (!val) {
380 380 goto cleanup;
381 381 }
382 382 PyDict_SetItemString(kwargs, "compression_strategy", val);
383 383 Py_DECREF(val);
384 384 }
385 385
386 386 result = PyObject_New(ZstdCompressionParametersObject, &ZstdCompressionParametersType);
387 387 if (!result) {
388 388 goto cleanup;
389 389 }
390 390
391 391 result->params = NULL;
392 392
393 393 val = PyTuple_New(0);
394 394 if (!val) {
395 395 Py_CLEAR(result);
396 396 goto cleanup;
397 397 }
398 398
399 399 res = ZstdCompressionParameters_init(result, val, kwargs);
400 400 Py_DECREF(val);
401 401
402 402 if (res) {
403 403 Py_CLEAR(result);
404 404 goto cleanup;
405 405 }
406 406
407 407 cleanup:
408 408 if (managedKwargs) {
409 409 Py_DECREF(kwargs);
410 410 }
411 411
412 412 return result;
413 413 }
414 414
415 415 PyDoc_STRVAR(ZstdCompressionParameters_estimated_compression_context_size__doc__,
416 416 "Estimate the size in bytes of a compression context for compression parameters\n"
417 417 );
418 418
419 419 PyObject* ZstdCompressionParameters_estimated_compression_context_size(ZstdCompressionParametersObject* self) {
420 420 return PyLong_FromSize_t(ZSTD_estimateCCtxSize_usingCCtxParams(self->params));
421 421 }
422 422
423 423 PyDoc_STRVAR(ZstdCompressionParameters__doc__,
424 424 "ZstdCompressionParameters: low-level control over zstd compression");
425 425
426 426 static void ZstdCompressionParameters_dealloc(ZstdCompressionParametersObject* self) {
427 427 if (self->params) {
428 428 ZSTD_freeCCtxParams(self->params);
429 429 self->params = NULL;
430 430 }
431 431
432 432 PyObject_Del(self);
433 433 }
434 434
435 435 #define PARAM_GETTER(name, param) PyObject* ZstdCompressionParameters_get_##name(PyObject* self, void* unused) { \
436 436 int result; \
437 437 size_t zresult; \
438 438 ZstdCompressionParametersObject* p = (ZstdCompressionParametersObject*)(self); \
439 439 zresult = ZSTD_CCtxParams_getParameter(p->params, param, &result); \
440 440 if (ZSTD_isError(zresult)) { \
441 441 PyErr_Format(ZstdError, "unable to get compression parameter: %s", \
442 442 ZSTD_getErrorName(zresult)); \
443 443 return NULL; \
444 444 } \
445 445 return PyLong_FromLong(result); \
446 446 }
447 447
448 448 PARAM_GETTER(format, ZSTD_c_format)
449 449 PARAM_GETTER(compression_level, ZSTD_c_compressionLevel)
450 450 PARAM_GETTER(window_log, ZSTD_c_windowLog)
451 451 PARAM_GETTER(hash_log, ZSTD_c_hashLog)
452 452 PARAM_GETTER(chain_log, ZSTD_c_chainLog)
453 453 PARAM_GETTER(search_log, ZSTD_c_searchLog)
454 454 PARAM_GETTER(min_match, ZSTD_c_minMatch)
455 455 PARAM_GETTER(target_length, ZSTD_c_targetLength)
456 456 PARAM_GETTER(compression_strategy, ZSTD_c_strategy)
457 457 PARAM_GETTER(write_content_size, ZSTD_c_contentSizeFlag)
458 458 PARAM_GETTER(write_checksum, ZSTD_c_checksumFlag)
459 459 PARAM_GETTER(write_dict_id, ZSTD_c_dictIDFlag)
460 460 PARAM_GETTER(job_size, ZSTD_c_jobSize)
461 461 PARAM_GETTER(overlap_log, ZSTD_c_overlapLog)
462 462 PARAM_GETTER(force_max_window, ZSTD_c_forceMaxWindow)
463 463 PARAM_GETTER(enable_ldm, ZSTD_c_enableLongDistanceMatching)
464 464 PARAM_GETTER(ldm_hash_log, ZSTD_c_ldmHashLog)
465 465 PARAM_GETTER(ldm_min_match, ZSTD_c_ldmMinMatch)
466 466 PARAM_GETTER(ldm_bucket_size_log, ZSTD_c_ldmBucketSizeLog)
467 467 PARAM_GETTER(ldm_hash_rate_log, ZSTD_c_ldmHashRateLog)
468 468 PARAM_GETTER(threads, ZSTD_c_nbWorkers)
469 469
470 470 static PyMethodDef ZstdCompressionParameters_methods[] = {
471 471 {
472 472 "from_level",
473 473 (PyCFunction)CompressionParameters_from_level,
474 474 METH_VARARGS | METH_KEYWORDS | METH_STATIC,
475 475 ZstdCompressionParameters_from_level__doc__
476 476 },
477 477 {
478 478 "estimated_compression_context_size",
479 479 (PyCFunction)ZstdCompressionParameters_estimated_compression_context_size,
480 480 METH_NOARGS,
481 481 ZstdCompressionParameters_estimated_compression_context_size__doc__
482 482 },
483 483 { NULL, NULL }
484 484 };
485 485
486 486 #define GET_SET_ENTRY(name) { #name, ZstdCompressionParameters_get_##name, NULL, NULL, NULL }
487 487
488 488 static PyGetSetDef ZstdCompressionParameters_getset[] = {
489 489 GET_SET_ENTRY(format),
490 490 GET_SET_ENTRY(compression_level),
491 491 GET_SET_ENTRY(window_log),
492 492 GET_SET_ENTRY(hash_log),
493 493 GET_SET_ENTRY(chain_log),
494 494 GET_SET_ENTRY(search_log),
495 495 GET_SET_ENTRY(min_match),
496 496 GET_SET_ENTRY(target_length),
497 497 GET_SET_ENTRY(compression_strategy),
498 498 GET_SET_ENTRY(write_content_size),
499 499 GET_SET_ENTRY(write_checksum),
500 500 GET_SET_ENTRY(write_dict_id),
501 501 GET_SET_ENTRY(threads),
502 502 GET_SET_ENTRY(job_size),
503 503 GET_SET_ENTRY(overlap_log),
504 504 /* TODO remove this deprecated attribute */
505 505 { "overlap_size_log", ZstdCompressionParameters_get_overlap_log, NULL, NULL, NULL },
506 506 GET_SET_ENTRY(force_max_window),
507 507 GET_SET_ENTRY(enable_ldm),
508 508 GET_SET_ENTRY(ldm_hash_log),
509 509 GET_SET_ENTRY(ldm_min_match),
510 510 GET_SET_ENTRY(ldm_bucket_size_log),
511 511 GET_SET_ENTRY(ldm_hash_rate_log),
512 512 /* TODO remove this deprecated attribute */
513 513 { "ldm_hash_every_log", ZstdCompressionParameters_get_ldm_hash_rate_log, NULL, NULL, NULL },
514 514 { NULL }
515 515 };
516 516
517 517 PyTypeObject ZstdCompressionParametersType = {
518 518 PyVarObject_HEAD_INIT(NULL, 0)
519 519 "ZstdCompressionParameters", /* tp_name */
520 520 sizeof(ZstdCompressionParametersObject), /* tp_basicsize */
521 521 0, /* tp_itemsize */
522 522 (destructor)ZstdCompressionParameters_dealloc, /* tp_dealloc */
523 523 0, /* tp_print */
524 524 0, /* tp_getattr */
525 525 0, /* tp_setattr */
526 526 0, /* tp_compare */
527 527 0, /* tp_repr */
528 528 0, /* tp_as_number */
529 529 0, /* tp_as_sequence */
530 530 0, /* tp_as_mapping */
531 531 0, /* tp_hash */
532 532 0, /* tp_call */
533 533 0, /* tp_str */
534 534 0, /* tp_getattro */
535 535 0, /* tp_setattro */
536 536 0, /* tp_as_buffer */
537 537 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
538 538 ZstdCompressionParameters__doc__, /* tp_doc */
539 539 0, /* tp_traverse */
540 540 0, /* tp_clear */
541 541 0, /* tp_richcompare */
542 542 0, /* tp_weaklistoffset */
543 543 0, /* tp_iter */
544 544 0, /* tp_iternext */
545 545 ZstdCompressionParameters_methods, /* tp_methods */
546 546 0, /* tp_members */
547 547 ZstdCompressionParameters_getset, /* tp_getset */
548 548 0, /* tp_base */
549 549 0, /* tp_dict */
550 550 0, /* tp_descr_get */
551 551 0, /* tp_descr_set */
552 552 0, /* tp_dictoffset */
553 553 (initproc)ZstdCompressionParameters_init, /* tp_init */
554 554 0, /* tp_alloc */
555 555 PyType_GenericNew, /* tp_new */
556 556 };
557 557
558 558 void compressionparams_module_init(PyObject* mod) {
559 Py_TYPE(&ZstdCompressionParametersType) = &PyType_Type;
559 Py_SET_TYPE(&ZstdCompressionParametersType, &PyType_Type);
560 560 if (PyType_Ready(&ZstdCompressionParametersType) < 0) {
561 561 return;
562 562 }
563 563
564 564 Py_INCREF(&ZstdCompressionParametersType);
565 565 PyModule_AddObject(mod, "ZstdCompressionParameters",
566 566 (PyObject*)&ZstdCompressionParametersType);
567 567
568 568 /* TODO remove deprecated alias. */
569 569 Py_INCREF(&ZstdCompressionParametersType);
570 570 PyModule_AddObject(mod, "CompressionParameters",
571 571 (PyObject*)&ZstdCompressionParametersType);
572 572 }
@@ -1,818 +1,818 b''
1 1 /**
2 2 * Copyright (c) 2017-present, Gregory Szorc
3 3 * All rights reserved.
4 4 *
5 5 * This software may be modified and distributed under the terms
6 6 * of the BSD license. See the LICENSE file for details.
7 7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 11 extern PyObject* ZstdError;
12 12
13 13 static void set_unsupported_operation(void) {
14 14 PyObject* iomod;
15 15 PyObject* exc;
16 16
17 17 iomod = PyImport_ImportModule("io");
18 18 if (NULL == iomod) {
19 19 return;
20 20 }
21 21
22 22 exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
23 23 if (NULL == exc) {
24 24 Py_DECREF(iomod);
25 25 return;
26 26 }
27 27
28 28 PyErr_SetNone(exc);
29 29 Py_DECREF(exc);
30 30 Py_DECREF(iomod);
31 31 }
32 32
33 33 static void reader_dealloc(ZstdCompressionReader* self) {
34 34 Py_XDECREF(self->compressor);
35 35 Py_XDECREF(self->reader);
36 36
37 37 if (self->buffer.buf) {
38 38 PyBuffer_Release(&self->buffer);
39 39 memset(&self->buffer, 0, sizeof(self->buffer));
40 40 }
41 41
42 42 PyObject_Del(self);
43 43 }
44 44
45 45 static ZstdCompressionReader* reader_enter(ZstdCompressionReader* self) {
46 46 if (self->entered) {
47 47 PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
48 48 return NULL;
49 49 }
50 50
51 51 self->entered = 1;
52 52
53 53 Py_INCREF(self);
54 54 return self;
55 55 }
56 56
57 57 static PyObject* reader_exit(ZstdCompressionReader* self, PyObject* args) {
58 58 PyObject* exc_type;
59 59 PyObject* exc_value;
60 60 PyObject* exc_tb;
61 61
62 62 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
63 63 return NULL;
64 64 }
65 65
66 66 self->entered = 0;
67 67 self->closed = 1;
68 68
69 69 /* Release resources associated with source. */
70 70 Py_CLEAR(self->reader);
71 71 if (self->buffer.buf) {
72 72 PyBuffer_Release(&self->buffer);
73 73 memset(&self->buffer, 0, sizeof(self->buffer));
74 74 }
75 75
76 76 Py_CLEAR(self->compressor);
77 77
78 78 Py_RETURN_FALSE;
79 79 }
80 80
81 81 static PyObject* reader_readable(ZstdCompressionReader* self) {
82 82 Py_RETURN_TRUE;
83 83 }
84 84
85 85 static PyObject* reader_writable(ZstdCompressionReader* self) {
86 86 Py_RETURN_FALSE;
87 87 }
88 88
89 89 static PyObject* reader_seekable(ZstdCompressionReader* self) {
90 90 Py_RETURN_FALSE;
91 91 }
92 92
93 93 static PyObject* reader_readline(PyObject* self, PyObject* args) {
94 94 set_unsupported_operation();
95 95 return NULL;
96 96 }
97 97
98 98 static PyObject* reader_readlines(PyObject* self, PyObject* args) {
99 99 set_unsupported_operation();
100 100 return NULL;
101 101 }
102 102
103 103 static PyObject* reader_write(PyObject* self, PyObject* args) {
104 104 PyErr_SetString(PyExc_OSError, "stream is not writable");
105 105 return NULL;
106 106 }
107 107
108 108 static PyObject* reader_writelines(PyObject* self, PyObject* args) {
109 109 PyErr_SetString(PyExc_OSError, "stream is not writable");
110 110 return NULL;
111 111 }
112 112
113 113 static PyObject* reader_isatty(PyObject* self) {
114 114 Py_RETURN_FALSE;
115 115 }
116 116
117 117 static PyObject* reader_flush(PyObject* self) {
118 118 Py_RETURN_NONE;
119 119 }
120 120
121 121 static PyObject* reader_close(ZstdCompressionReader* self) {
122 122 self->closed = 1;
123 123 Py_RETURN_NONE;
124 124 }
125 125
126 126 static PyObject* reader_tell(ZstdCompressionReader* self) {
127 127 /* TODO should this raise OSError since stream isn't seekable? */
128 128 return PyLong_FromUnsignedLongLong(self->bytesCompressed);
129 129 }
130 130
131 131 int read_compressor_input(ZstdCompressionReader* self) {
132 132 if (self->finishedInput) {
133 133 return 0;
134 134 }
135 135
136 136 if (self->input.pos != self->input.size) {
137 137 return 0;
138 138 }
139 139
140 140 if (self->reader) {
141 141 Py_buffer buffer;
142 142
143 143 assert(self->readResult == NULL);
144 144
145 145 self->readResult = PyObject_CallMethod(self->reader, "read",
146 146 "k", self->readSize);
147 147
148 148 if (NULL == self->readResult) {
149 149 return -1;
150 150 }
151 151
152 152 memset(&buffer, 0, sizeof(buffer));
153 153
154 154 if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
155 155 return -1;
156 156 }
157 157
158 158 /* EOF */
159 159 if (0 == buffer.len) {
160 160 self->finishedInput = 1;
161 161 Py_CLEAR(self->readResult);
162 162 }
163 163 else {
164 164 self->input.src = buffer.buf;
165 165 self->input.size = buffer.len;
166 166 self->input.pos = 0;
167 167 }
168 168
169 169 PyBuffer_Release(&buffer);
170 170 }
171 171 else {
172 172 assert(self->buffer.buf);
173 173
174 174 self->input.src = self->buffer.buf;
175 175 self->input.size = self->buffer.len;
176 176 self->input.pos = 0;
177 177 }
178 178
179 179 return 1;
180 180 }
181 181
182 182 int compress_input(ZstdCompressionReader* self, ZSTD_outBuffer* output) {
183 183 size_t oldPos;
184 184 size_t zresult;
185 185
186 186 /* If we have data left over, consume it. */
187 187 if (self->input.pos < self->input.size) {
188 188 oldPos = output->pos;
189 189
190 190 Py_BEGIN_ALLOW_THREADS
191 191 zresult = ZSTD_compressStream2(self->compressor->cctx,
192 192 output, &self->input, ZSTD_e_continue);
193 193 Py_END_ALLOW_THREADS
194 194
195 195 self->bytesCompressed += output->pos - oldPos;
196 196
197 197 /* Input exhausted. Clear out state tracking. */
198 198 if (self->input.pos == self->input.size) {
199 199 memset(&self->input, 0, sizeof(self->input));
200 200 Py_CLEAR(self->readResult);
201 201
202 202 if (self->buffer.buf) {
203 203 self->finishedInput = 1;
204 204 }
205 205 }
206 206
207 207 if (ZSTD_isError(zresult)) {
208 208 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
209 209 return -1;
210 210 }
211 211 }
212 212
213 213 if (output->pos && output->pos == output->size) {
214 214 return 1;
215 215 }
216 216 else {
217 217 return 0;
218 218 }
219 219 }
220 220
221 221 static PyObject* reader_read(ZstdCompressionReader* self, PyObject* args, PyObject* kwargs) {
222 222 static char* kwlist[] = {
223 223 "size",
224 224 NULL
225 225 };
226 226
227 227 Py_ssize_t size = -1;
228 228 PyObject* result = NULL;
229 229 char* resultBuffer;
230 230 Py_ssize_t resultSize;
231 231 size_t zresult;
232 232 size_t oldPos;
233 233 int readResult, compressResult;
234 234
235 235 if (self->closed) {
236 236 PyErr_SetString(PyExc_ValueError, "stream is closed");
237 237 return NULL;
238 238 }
239 239
240 240 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
241 241 return NULL;
242 242 }
243 243
244 244 if (size < -1) {
245 245 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
246 246 return NULL;
247 247 }
248 248
249 249 if (size == -1) {
250 250 return PyObject_CallMethod((PyObject*)self, "readall", NULL);
251 251 }
252 252
253 253 if (self->finishedOutput || size == 0) {
254 254 return PyBytes_FromStringAndSize("", 0);
255 255 }
256 256
257 257 result = PyBytes_FromStringAndSize(NULL, size);
258 258 if (NULL == result) {
259 259 return NULL;
260 260 }
261 261
262 262 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
263 263
264 264 self->output.dst = resultBuffer;
265 265 self->output.size = resultSize;
266 266 self->output.pos = 0;
267 267
268 268 readinput:
269 269
270 270 compressResult = compress_input(self, &self->output);
271 271
272 272 if (-1 == compressResult) {
273 273 Py_XDECREF(result);
274 274 return NULL;
275 275 }
276 276 else if (0 == compressResult) {
277 277 /* There is room in the output. We fall through to below, which will
278 278 * either get more input for us or will attempt to end the stream.
279 279 */
280 280 }
281 281 else if (1 == compressResult) {
282 282 memset(&self->output, 0, sizeof(self->output));
283 283 return result;
284 284 }
285 285 else {
286 286 assert(0);
287 287 }
288 288
289 289 readResult = read_compressor_input(self);
290 290
291 291 if (-1 == readResult) {
292 292 return NULL;
293 293 }
294 294 else if (0 == readResult) { }
295 295 else if (1 == readResult) { }
296 296 else {
297 297 assert(0);
298 298 }
299 299
300 300 if (self->input.size) {
301 301 goto readinput;
302 302 }
303 303
304 304 /* Else EOF */
305 305 oldPos = self->output.pos;
306 306
307 307 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
308 308 &self->input, ZSTD_e_end);
309 309
310 310 self->bytesCompressed += self->output.pos - oldPos;
311 311
312 312 if (ZSTD_isError(zresult)) {
313 313 PyErr_Format(ZstdError, "error ending compression stream: %s",
314 314 ZSTD_getErrorName(zresult));
315 315 Py_XDECREF(result);
316 316 return NULL;
317 317 }
318 318
319 319 assert(self->output.pos);
320 320
321 321 if (0 == zresult) {
322 322 self->finishedOutput = 1;
323 323 }
324 324
325 325 if (safe_pybytes_resize(&result, self->output.pos)) {
326 326 Py_XDECREF(result);
327 327 return NULL;
328 328 }
329 329
330 330 memset(&self->output, 0, sizeof(self->output));
331 331
332 332 return result;
333 333 }
334 334
335 335 static PyObject* reader_read1(ZstdCompressionReader* self, PyObject* args, PyObject* kwargs) {
336 336 static char* kwlist[] = {
337 337 "size",
338 338 NULL
339 339 };
340 340
341 341 Py_ssize_t size = -1;
342 342 PyObject* result = NULL;
343 343 char* resultBuffer;
344 344 Py_ssize_t resultSize;
345 345 ZSTD_outBuffer output;
346 346 int compressResult;
347 347 size_t oldPos;
348 348 size_t zresult;
349 349
350 350 if (self->closed) {
351 351 PyErr_SetString(PyExc_ValueError, "stream is closed");
352 352 return NULL;
353 353 }
354 354
355 355 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n:read1", kwlist, &size)) {
356 356 return NULL;
357 357 }
358 358
359 359 if (size < -1) {
360 360 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
361 361 return NULL;
362 362 }
363 363
364 364 if (self->finishedOutput || size == 0) {
365 365 return PyBytes_FromStringAndSize("", 0);
366 366 }
367 367
368 368 if (size == -1) {
369 369 size = ZSTD_CStreamOutSize();
370 370 }
371 371
372 372 result = PyBytes_FromStringAndSize(NULL, size);
373 373 if (NULL == result) {
374 374 return NULL;
375 375 }
376 376
377 377 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
378 378
379 379 output.dst = resultBuffer;
380 380 output.size = resultSize;
381 381 output.pos = 0;
382 382
383 383 /* read1() is supposed to use at most 1 read() from the underlying stream.
384 384 However, we can't satisfy this requirement with compression because
385 385 not every input will generate output. We /could/ flush the compressor,
386 386 but this may not be desirable. We allow multiple read() from the
387 387 underlying stream. But unlike read(), we return as soon as output data
388 388 is available.
389 389 */
390 390
391 391 compressResult = compress_input(self, &output);
392 392
393 393 if (-1 == compressResult) {
394 394 Py_XDECREF(result);
395 395 return NULL;
396 396 }
397 397 else if (0 == compressResult || 1 == compressResult) { }
398 398 else {
399 399 assert(0);
400 400 }
401 401
402 402 if (output.pos) {
403 403 goto finally;
404 404 }
405 405
406 406 while (!self->finishedInput) {
407 407 int readResult = read_compressor_input(self);
408 408
409 409 if (-1 == readResult) {
410 410 Py_XDECREF(result);
411 411 return NULL;
412 412 }
413 413 else if (0 == readResult || 1 == readResult) { }
414 414 else {
415 415 assert(0);
416 416 }
417 417
418 418 compressResult = compress_input(self, &output);
419 419
420 420 if (-1 == compressResult) {
421 421 Py_XDECREF(result);
422 422 return NULL;
423 423 }
424 424 else if (0 == compressResult || 1 == compressResult) { }
425 425 else {
426 426 assert(0);
427 427 }
428 428
429 429 if (output.pos) {
430 430 goto finally;
431 431 }
432 432 }
433 433
434 434 /* EOF */
435 435 oldPos = output.pos;
436 436
437 437 zresult = ZSTD_compressStream2(self->compressor->cctx, &output, &self->input,
438 438 ZSTD_e_end);
439 439
440 440 self->bytesCompressed += output.pos - oldPos;
441 441
442 442 if (ZSTD_isError(zresult)) {
443 443 PyErr_Format(ZstdError, "error ending compression stream: %s",
444 444 ZSTD_getErrorName(zresult));
445 445 Py_XDECREF(result);
446 446 return NULL;
447 447 }
448 448
449 449 if (zresult == 0) {
450 450 self->finishedOutput = 1;
451 451 }
452 452
453 453 finally:
454 454 if (result) {
455 455 if (safe_pybytes_resize(&result, output.pos)) {
456 456 Py_XDECREF(result);
457 457 return NULL;
458 458 }
459 459 }
460 460
461 461 return result;
462 462 }
463 463
464 464 static PyObject* reader_readall(PyObject* self) {
465 465 PyObject* chunks = NULL;
466 466 PyObject* empty = NULL;
467 467 PyObject* result = NULL;
468 468
469 469 /* Our strategy is to collect chunks into a list then join all the
470 470 * chunks at the end. We could potentially use e.g. an io.BytesIO. But
471 471 * this feels simple enough to implement and avoids potentially expensive
472 472 * reallocations of large buffers.
473 473 */
474 474 chunks = PyList_New(0);
475 475 if (NULL == chunks) {
476 476 return NULL;
477 477 }
478 478
479 479 while (1) {
480 480 PyObject* chunk = PyObject_CallMethod(self, "read", "i", 1048576);
481 481 if (NULL == chunk) {
482 482 Py_DECREF(chunks);
483 483 return NULL;
484 484 }
485 485
486 486 if (!PyBytes_Size(chunk)) {
487 487 Py_DECREF(chunk);
488 488 break;
489 489 }
490 490
491 491 if (PyList_Append(chunks, chunk)) {
492 492 Py_DECREF(chunk);
493 493 Py_DECREF(chunks);
494 494 return NULL;
495 495 }
496 496
497 497 Py_DECREF(chunk);
498 498 }
499 499
500 500 empty = PyBytes_FromStringAndSize("", 0);
501 501 if (NULL == empty) {
502 502 Py_DECREF(chunks);
503 503 return NULL;
504 504 }
505 505
506 506 result = PyObject_CallMethod(empty, "join", "O", chunks);
507 507
508 508 Py_DECREF(empty);
509 509 Py_DECREF(chunks);
510 510
511 511 return result;
512 512 }
513 513
514 514 static PyObject* reader_readinto(ZstdCompressionReader* self, PyObject* args) {
515 515 Py_buffer dest;
516 516 ZSTD_outBuffer output;
517 517 int readResult, compressResult;
518 518 PyObject* result = NULL;
519 519 size_t zresult;
520 520 size_t oldPos;
521 521
522 522 if (self->closed) {
523 523 PyErr_SetString(PyExc_ValueError, "stream is closed");
524 524 return NULL;
525 525 }
526 526
527 527 if (self->finishedOutput) {
528 528 return PyLong_FromLong(0);
529 529 }
530 530
531 531 if (!PyArg_ParseTuple(args, "w*:readinto", &dest)) {
532 532 return NULL;
533 533 }
534 534
535 535 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
536 536 PyErr_SetString(PyExc_ValueError,
537 537 "destination buffer should be contiguous and have at most one dimension");
538 538 goto finally;
539 539 }
540 540
541 541 output.dst = dest.buf;
542 542 output.size = dest.len;
543 543 output.pos = 0;
544 544
545 545 compressResult = compress_input(self, &output);
546 546
547 547 if (-1 == compressResult) {
548 548 goto finally;
549 549 }
550 550 else if (0 == compressResult) { }
551 551 else if (1 == compressResult) {
552 552 result = PyLong_FromSize_t(output.pos);
553 553 goto finally;
554 554 }
555 555 else {
556 556 assert(0);
557 557 }
558 558
559 559 while (!self->finishedInput) {
560 560 readResult = read_compressor_input(self);
561 561
562 562 if (-1 == readResult) {
563 563 goto finally;
564 564 }
565 565 else if (0 == readResult || 1 == readResult) {}
566 566 else {
567 567 assert(0);
568 568 }
569 569
570 570 compressResult = compress_input(self, &output);
571 571
572 572 if (-1 == compressResult) {
573 573 goto finally;
574 574 }
575 575 else if (0 == compressResult) { }
576 576 else if (1 == compressResult) {
577 577 result = PyLong_FromSize_t(output.pos);
578 578 goto finally;
579 579 }
580 580 else {
581 581 assert(0);
582 582 }
583 583 }
584 584
585 585 /* EOF */
586 586 oldPos = output.pos;
587 587
588 588 zresult = ZSTD_compressStream2(self->compressor->cctx, &output, &self->input,
589 589 ZSTD_e_end);
590 590
591 591 self->bytesCompressed += self->output.pos - oldPos;
592 592
593 593 if (ZSTD_isError(zresult)) {
594 594 PyErr_Format(ZstdError, "error ending compression stream: %s",
595 595 ZSTD_getErrorName(zresult));
596 596 goto finally;
597 597 }
598 598
599 599 assert(output.pos);
600 600
601 601 if (0 == zresult) {
602 602 self->finishedOutput = 1;
603 603 }
604 604
605 605 result = PyLong_FromSize_t(output.pos);
606 606
607 607 finally:
608 608 PyBuffer_Release(&dest);
609 609
610 610 return result;
611 611 }
612 612
613 613 static PyObject* reader_readinto1(ZstdCompressionReader* self, PyObject* args) {
614 614 Py_buffer dest;
615 615 PyObject* result = NULL;
616 616 ZSTD_outBuffer output;
617 617 int compressResult;
618 618 size_t oldPos;
619 619 size_t zresult;
620 620
621 621 if (self->closed) {
622 622 PyErr_SetString(PyExc_ValueError, "stream is closed");
623 623 return NULL;
624 624 }
625 625
626 626 if (self->finishedOutput) {
627 627 return PyLong_FromLong(0);
628 628 }
629 629
630 630 if (!PyArg_ParseTuple(args, "w*:readinto1", &dest)) {
631 631 return NULL;
632 632 }
633 633
634 634 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
635 635 PyErr_SetString(PyExc_ValueError,
636 636 "destination buffer should be contiguous and have at most one dimension");
637 637 goto finally;
638 638 }
639 639
640 640 output.dst = dest.buf;
641 641 output.size = dest.len;
642 642 output.pos = 0;
643 643
644 644 compressResult = compress_input(self, &output);
645 645
646 646 if (-1 == compressResult) {
647 647 goto finally;
648 648 }
649 649 else if (0 == compressResult || 1 == compressResult) { }
650 650 else {
651 651 assert(0);
652 652 }
653 653
654 654 if (output.pos) {
655 655 result = PyLong_FromSize_t(output.pos);
656 656 goto finally;
657 657 }
658 658
659 659 while (!self->finishedInput) {
660 660 int readResult = read_compressor_input(self);
661 661
662 662 if (-1 == readResult) {
663 663 goto finally;
664 664 }
665 665 else if (0 == readResult || 1 == readResult) { }
666 666 else {
667 667 assert(0);
668 668 }
669 669
670 670 compressResult = compress_input(self, &output);
671 671
672 672 if (-1 == compressResult) {
673 673 goto finally;
674 674 }
675 675 else if (0 == compressResult) { }
676 676 else if (1 == compressResult) {
677 677 result = PyLong_FromSize_t(output.pos);
678 678 goto finally;
679 679 }
680 680 else {
681 681 assert(0);
682 682 }
683 683
684 684 /* If we produced output and we're not done with input, emit
685 685 * that output now, as we've hit restrictions of read1().
686 686 */
687 687 if (output.pos && !self->finishedInput) {
688 688 result = PyLong_FromSize_t(output.pos);
689 689 goto finally;
690 690 }
691 691
692 692 /* Otherwise we either have no output or we've exhausted the
693 693 * input. Either we try to get more input or we fall through
694 694 * to EOF below */
695 695 }
696 696
697 697 /* EOF */
698 698 oldPos = output.pos;
699 699
700 700 zresult = ZSTD_compressStream2(self->compressor->cctx, &output, &self->input,
701 701 ZSTD_e_end);
702 702
703 703 self->bytesCompressed += self->output.pos - oldPos;
704 704
705 705 if (ZSTD_isError(zresult)) {
706 706 PyErr_Format(ZstdError, "error ending compression stream: %s",
707 707 ZSTD_getErrorName(zresult));
708 708 goto finally;
709 709 }
710 710
711 711 assert(output.pos);
712 712
713 713 if (0 == zresult) {
714 714 self->finishedOutput = 1;
715 715 }
716 716
717 717 result = PyLong_FromSize_t(output.pos);
718 718
719 719 finally:
720 720 PyBuffer_Release(&dest);
721 721
722 722 return result;
723 723 }
724 724
725 725 static PyObject* reader_iter(PyObject* self) {
726 726 set_unsupported_operation();
727 727 return NULL;
728 728 }
729 729
730 730 static PyObject* reader_iternext(PyObject* self) {
731 731 set_unsupported_operation();
732 732 return NULL;
733 733 }
734 734
735 735 static PyMethodDef reader_methods[] = {
736 736 { "__enter__", (PyCFunction)reader_enter, METH_NOARGS,
737 737 PyDoc_STR("Enter a compression context") },
738 738 { "__exit__", (PyCFunction)reader_exit, METH_VARARGS,
739 739 PyDoc_STR("Exit a compression context") },
740 740 { "close", (PyCFunction)reader_close, METH_NOARGS,
741 741 PyDoc_STR("Close the stream so it cannot perform any more operations") },
742 742 { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
743 743 { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
744 744 { "readable", (PyCFunction)reader_readable, METH_NOARGS,
745 745 PyDoc_STR("Returns True") },
746 746 { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS, PyDoc_STR("read compressed data") },
747 747 { "read1", (PyCFunction)reader_read1, METH_VARARGS | METH_KEYWORDS, NULL },
748 748 { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
749 749 { "readinto", (PyCFunction)reader_readinto, METH_VARARGS, NULL },
750 750 { "readinto1", (PyCFunction)reader_readinto1, METH_VARARGS, NULL },
751 751 { "readline", (PyCFunction)reader_readline, METH_VARARGS, PyDoc_STR("Not implemented") },
752 752 { "readlines", (PyCFunction)reader_readlines, METH_VARARGS, PyDoc_STR("Not implemented") },
753 753 { "seekable", (PyCFunction)reader_seekable, METH_NOARGS,
754 754 PyDoc_STR("Returns False") },
755 755 { "tell", (PyCFunction)reader_tell, METH_NOARGS,
756 756 PyDoc_STR("Returns current number of bytes compressed") },
757 757 { "writable", (PyCFunction)reader_writable, METH_NOARGS,
758 758 PyDoc_STR("Returns False") },
759 759 { "write", reader_write, METH_VARARGS, PyDoc_STR("Raises OSError") },
760 760 { "writelines", reader_writelines, METH_VARARGS, PyDoc_STR("Not implemented") },
761 761 { NULL, NULL }
762 762 };
763 763
764 764 static PyMemberDef reader_members[] = {
765 765 { "closed", T_BOOL, offsetof(ZstdCompressionReader, closed),
766 766 READONLY, "whether stream is closed" },
767 767 { NULL }
768 768 };
769 769
770 770 PyTypeObject ZstdCompressionReaderType = {
771 771 PyVarObject_HEAD_INIT(NULL, 0)
772 772 "zstd.ZstdCompressionReader", /* tp_name */
773 773 sizeof(ZstdCompressionReader), /* tp_basicsize */
774 774 0, /* tp_itemsize */
775 775 (destructor)reader_dealloc, /* tp_dealloc */
776 776 0, /* tp_print */
777 777 0, /* tp_getattr */
778 778 0, /* tp_setattr */
779 779 0, /* tp_compare */
780 780 0, /* tp_repr */
781 781 0, /* tp_as_number */
782 782 0, /* tp_as_sequence */
783 783 0, /* tp_as_mapping */
784 784 0, /* tp_hash */
785 785 0, /* tp_call */
786 786 0, /* tp_str */
787 787 0, /* tp_getattro */
788 788 0, /* tp_setattro */
789 789 0, /* tp_as_buffer */
790 790 Py_TPFLAGS_DEFAULT, /* tp_flags */
791 791 0, /* tp_doc */
792 792 0, /* tp_traverse */
793 793 0, /* tp_clear */
794 794 0, /* tp_richcompare */
795 795 0, /* tp_weaklistoffset */
796 796 reader_iter, /* tp_iter */
797 797 reader_iternext, /* tp_iternext */
798 798 reader_methods, /* tp_methods */
799 799 reader_members, /* tp_members */
800 800 0, /* tp_getset */
801 801 0, /* tp_base */
802 802 0, /* tp_dict */
803 803 0, /* tp_descr_get */
804 804 0, /* tp_descr_set */
805 805 0, /* tp_dictoffset */
806 806 0, /* tp_init */
807 807 0, /* tp_alloc */
808 808 PyType_GenericNew, /* tp_new */
809 809 };
810 810
811 811 void compressionreader_module_init(PyObject* mod) {
812 812 /* TODO make reader a sub-class of io.RawIOBase */
813 813
814 Py_TYPE(&ZstdCompressionReaderType) = &PyType_Type;
814 Py_SET_TYPE(&ZstdCompressionReaderType, &PyType_Type);
815 815 if (PyType_Ready(&ZstdCompressionReaderType) < 0) {
816 816 return;
817 817 }
818 818 }
@@ -1,372 +1,372 b''
1 1 /**
2 2 * Copyright (c) 2016-present, Gregory Szorc
3 3 * All rights reserved.
4 4 *
5 5 * This software may be modified and distributed under the terms
6 6 * of the BSD license. See the LICENSE file for details.
7 7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 11 extern PyObject* ZstdError;
12 12
13 13 PyDoc_STRVAR(ZstdCompresssionWriter__doc__,
14 14 """A context manager used for writing compressed output to a writer.\n"
15 15 );
16 16
17 17 static void ZstdCompressionWriter_dealloc(ZstdCompressionWriter* self) {
18 18 Py_XDECREF(self->compressor);
19 19 Py_XDECREF(self->writer);
20 20
21 21 PyMem_Free(self->output.dst);
22 22 self->output.dst = NULL;
23 23
24 24 PyObject_Del(self);
25 25 }
26 26
27 27 static PyObject* ZstdCompressionWriter_enter(ZstdCompressionWriter* self) {
28 28 if (self->closed) {
29 29 PyErr_SetString(PyExc_ValueError, "stream is closed");
30 30 return NULL;
31 31 }
32 32
33 33 if (self->entered) {
34 34 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
35 35 return NULL;
36 36 }
37 37
38 38 self->entered = 1;
39 39
40 40 Py_INCREF(self);
41 41 return (PyObject*)self;
42 42 }
43 43
44 44 static PyObject* ZstdCompressionWriter_exit(ZstdCompressionWriter* self, PyObject* args) {
45 45 PyObject* exc_type;
46 46 PyObject* exc_value;
47 47 PyObject* exc_tb;
48 48
49 49 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
50 50 return NULL;
51 51 }
52 52
53 53 self->entered = 0;
54 54
55 55 if (exc_type == Py_None && exc_value == Py_None && exc_tb == Py_None) {
56 56 PyObject* result = PyObject_CallMethod((PyObject*)self, "close", NULL);
57 57
58 58 if (NULL == result) {
59 59 return NULL;
60 60 }
61 61 }
62 62
63 63 Py_RETURN_FALSE;
64 64 }
65 65
66 66 static PyObject* ZstdCompressionWriter_memory_size(ZstdCompressionWriter* self) {
67 67 return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->compressor->cctx));
68 68 }
69 69
70 70 static PyObject* ZstdCompressionWriter_write(ZstdCompressionWriter* self, PyObject* args, PyObject* kwargs) {
71 71 static char* kwlist[] = {
72 72 "data",
73 73 NULL
74 74 };
75 75
76 76 PyObject* result = NULL;
77 77 Py_buffer source;
78 78 size_t zresult;
79 79 ZSTD_inBuffer input;
80 80 PyObject* res;
81 81 Py_ssize_t totalWrite = 0;
82 82
83 83 #if PY_MAJOR_VERSION >= 3
84 84 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write",
85 85 #else
86 86 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:write",
87 87 #endif
88 88 kwlist, &source)) {
89 89 return NULL;
90 90 }
91 91
92 92 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
93 93 PyErr_SetString(PyExc_ValueError,
94 94 "data buffer should be contiguous and have at most one dimension");
95 95 goto finally;
96 96 }
97 97
98 98 if (self->closed) {
99 99 PyErr_SetString(PyExc_ValueError, "stream is closed");
100 100 return NULL;
101 101 }
102 102
103 103 self->output.pos = 0;
104 104
105 105 input.src = source.buf;
106 106 input.size = source.len;
107 107 input.pos = 0;
108 108
109 109 while (input.pos < (size_t)source.len) {
110 110 Py_BEGIN_ALLOW_THREADS
111 111 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output, &input, ZSTD_e_continue);
112 112 Py_END_ALLOW_THREADS
113 113
114 114 if (ZSTD_isError(zresult)) {
115 115 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
116 116 goto finally;
117 117 }
118 118
119 119 /* Copy data from output buffer to writer. */
120 120 if (self->output.pos) {
121 121 #if PY_MAJOR_VERSION >= 3
122 122 res = PyObject_CallMethod(self->writer, "write", "y#",
123 123 #else
124 124 res = PyObject_CallMethod(self->writer, "write", "s#",
125 125 #endif
126 126 self->output.dst, self->output.pos);
127 127 Py_XDECREF(res);
128 128 totalWrite += self->output.pos;
129 129 self->bytesCompressed += self->output.pos;
130 130 }
131 131 self->output.pos = 0;
132 132 }
133 133
134 134 if (self->writeReturnRead) {
135 135 result = PyLong_FromSize_t(input.pos);
136 136 }
137 137 else {
138 138 result = PyLong_FromSsize_t(totalWrite);
139 139 }
140 140
141 141 finally:
142 142 PyBuffer_Release(&source);
143 143 return result;
144 144 }
145 145
146 146 static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args, PyObject* kwargs) {
147 147 static char* kwlist[] = {
148 148 "flush_mode",
149 149 NULL
150 150 };
151 151
152 152 size_t zresult;
153 153 ZSTD_inBuffer input;
154 154 PyObject* res;
155 155 Py_ssize_t totalWrite = 0;
156 156 unsigned flush_mode = 0;
157 157 ZSTD_EndDirective flush;
158 158
159 159 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|I:flush",
160 160 kwlist, &flush_mode)) {
161 161 return NULL;
162 162 }
163 163
164 164 switch (flush_mode) {
165 165 case 0:
166 166 flush = ZSTD_e_flush;
167 167 break;
168 168 case 1:
169 169 flush = ZSTD_e_end;
170 170 break;
171 171 default:
172 172 PyErr_Format(PyExc_ValueError, "unknown flush_mode: %d", flush_mode);
173 173 return NULL;
174 174 }
175 175
176 176 if (self->closed) {
177 177 PyErr_SetString(PyExc_ValueError, "stream is closed");
178 178 return NULL;
179 179 }
180 180
181 181 self->output.pos = 0;
182 182
183 183 input.src = NULL;
184 184 input.size = 0;
185 185 input.pos = 0;
186 186
187 187 while (1) {
188 188 Py_BEGIN_ALLOW_THREADS
189 189 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output, &input, flush);
190 190 Py_END_ALLOW_THREADS
191 191
192 192 if (ZSTD_isError(zresult)) {
193 193 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
194 194 return NULL;
195 195 }
196 196
197 197 /* Copy data from output buffer to writer. */
198 198 if (self->output.pos) {
199 199 #if PY_MAJOR_VERSION >= 3
200 200 res = PyObject_CallMethod(self->writer, "write", "y#",
201 201 #else
202 202 res = PyObject_CallMethod(self->writer, "write", "s#",
203 203 #endif
204 204 self->output.dst, self->output.pos);
205 205 Py_XDECREF(res);
206 206 totalWrite += self->output.pos;
207 207 self->bytesCompressed += self->output.pos;
208 208 }
209 209
210 210 self->output.pos = 0;
211 211
212 212 if (!zresult) {
213 213 break;
214 214 }
215 215 }
216 216
217 217 return PyLong_FromSsize_t(totalWrite);
218 218 }
219 219
220 220 static PyObject* ZstdCompressionWriter_close(ZstdCompressionWriter* self) {
221 221 PyObject* result;
222 222
223 223 if (self->closed) {
224 224 Py_RETURN_NONE;
225 225 }
226 226
227 227 result = PyObject_CallMethod((PyObject*)self, "flush", "I", 1);
228 228 self->closed = 1;
229 229
230 230 if (NULL == result) {
231 231 return NULL;
232 232 }
233 233
234 234 /* Call close on underlying stream as well. */
235 235 if (PyObject_HasAttrString(self->writer, "close")) {
236 236 return PyObject_CallMethod(self->writer, "close", NULL);
237 237 }
238 238
239 239 Py_RETURN_NONE;
240 240 }
241 241
242 242 static PyObject* ZstdCompressionWriter_fileno(ZstdCompressionWriter* self) {
243 243 if (PyObject_HasAttrString(self->writer, "fileno")) {
244 244 return PyObject_CallMethod(self->writer, "fileno", NULL);
245 245 }
246 246 else {
247 247 PyErr_SetString(PyExc_OSError, "fileno not available on underlying writer");
248 248 return NULL;
249 249 }
250 250 }
251 251
252 252 static PyObject* ZstdCompressionWriter_tell(ZstdCompressionWriter* self) {
253 253 return PyLong_FromUnsignedLongLong(self->bytesCompressed);
254 254 }
255 255
256 256 static PyObject* ZstdCompressionWriter_writelines(PyObject* self, PyObject* args) {
257 257 PyErr_SetNone(PyExc_NotImplementedError);
258 258 return NULL;
259 259 }
260 260
261 261 static PyObject* ZstdCompressionWriter_false(PyObject* self, PyObject* args) {
262 262 Py_RETURN_FALSE;
263 263 }
264 264
265 265 static PyObject* ZstdCompressionWriter_true(PyObject* self, PyObject* args) {
266 266 Py_RETURN_TRUE;
267 267 }
268 268
269 269 static PyObject* ZstdCompressionWriter_unsupported(PyObject* self, PyObject* args, PyObject* kwargs) {
270 270 PyObject* iomod;
271 271 PyObject* exc;
272 272
273 273 iomod = PyImport_ImportModule("io");
274 274 if (NULL == iomod) {
275 275 return NULL;
276 276 }
277 277
278 278 exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
279 279 if (NULL == exc) {
280 280 Py_DECREF(iomod);
281 281 return NULL;
282 282 }
283 283
284 284 PyErr_SetNone(exc);
285 285 Py_DECREF(exc);
286 286 Py_DECREF(iomod);
287 287
288 288 return NULL;
289 289 }
290 290
291 291 static PyMethodDef ZstdCompressionWriter_methods[] = {
292 292 { "__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS,
293 293 PyDoc_STR("Enter a compression context.") },
294 294 { "__exit__", (PyCFunction)ZstdCompressionWriter_exit, METH_VARARGS,
295 295 PyDoc_STR("Exit a compression context.") },
296 296 { "close", (PyCFunction)ZstdCompressionWriter_close, METH_NOARGS, NULL },
297 297 { "fileno", (PyCFunction)ZstdCompressionWriter_fileno, METH_NOARGS, NULL },
298 298 { "isatty", (PyCFunction)ZstdCompressionWriter_false, METH_NOARGS, NULL },
299 299 { "readable", (PyCFunction)ZstdCompressionWriter_false, METH_NOARGS, NULL },
300 300 { "readline", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
301 301 { "readlines", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
302 302 { "seek", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
303 303 { "seekable", ZstdCompressionWriter_false, METH_NOARGS, NULL },
304 304 { "truncate", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
305 305 { "writable", ZstdCompressionWriter_true, METH_NOARGS, NULL },
306 306 { "writelines", ZstdCompressionWriter_writelines, METH_VARARGS, NULL },
307 307 { "read", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
308 308 { "readall", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
309 309 { "readinto", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
310 310 { "memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS,
311 311 PyDoc_STR("Obtain the memory size of the underlying compressor") },
312 312 { "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS | METH_KEYWORDS,
313 313 PyDoc_STR("Compress data") },
314 314 { "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_VARARGS | METH_KEYWORDS,
315 315 PyDoc_STR("Flush data and finish a zstd frame") },
316 316 { "tell", (PyCFunction)ZstdCompressionWriter_tell, METH_NOARGS,
317 317 PyDoc_STR("Returns current number of bytes compressed") },
318 318 { NULL, NULL }
319 319 };
320 320
321 321 static PyMemberDef ZstdCompressionWriter_members[] = {
322 322 { "closed", T_BOOL, offsetof(ZstdCompressionWriter, closed), READONLY, NULL },
323 323 { NULL }
324 324 };
325 325
326 326 PyTypeObject ZstdCompressionWriterType = {
327 327 PyVarObject_HEAD_INIT(NULL, 0)
328 328 "zstd.ZstdCompressionWriter", /* tp_name */
329 329 sizeof(ZstdCompressionWriter), /* tp_basicsize */
330 330 0, /* tp_itemsize */
331 331 (destructor)ZstdCompressionWriter_dealloc, /* tp_dealloc */
332 332 0, /* tp_print */
333 333 0, /* tp_getattr */
334 334 0, /* tp_setattr */
335 335 0, /* tp_compare */
336 336 0, /* tp_repr */
337 337 0, /* tp_as_number */
338 338 0, /* tp_as_sequence */
339 339 0, /* tp_as_mapping */
340 340 0, /* tp_hash */
341 341 0, /* tp_call */
342 342 0, /* tp_str */
343 343 0, /* tp_getattro */
344 344 0, /* tp_setattro */
345 345 0, /* tp_as_buffer */
346 346 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
347 347 ZstdCompresssionWriter__doc__, /* tp_doc */
348 348 0, /* tp_traverse */
349 349 0, /* tp_clear */
350 350 0, /* tp_richcompare */
351 351 0, /* tp_weaklistoffset */
352 352 0, /* tp_iter */
353 353 0, /* tp_iternext */
354 354 ZstdCompressionWriter_methods, /* tp_methods */
355 355 ZstdCompressionWriter_members, /* tp_members */
356 356 0, /* tp_getset */
357 357 0, /* tp_base */
358 358 0, /* tp_dict */
359 359 0, /* tp_descr_get */
360 360 0, /* tp_descr_set */
361 361 0, /* tp_dictoffset */
362 362 0, /* tp_init */
363 363 0, /* tp_alloc */
364 364 PyType_GenericNew, /* tp_new */
365 365 };
366 366
367 367 void compressionwriter_module_init(PyObject* mod) {
368 Py_TYPE(&ZstdCompressionWriterType) = &PyType_Type;
368 Py_SET_TYPE(&ZstdCompressionWriterType, &PyType_Type);
369 369 if (PyType_Ready(&ZstdCompressionWriterType) < 0) {
370 370 return;
371 371 }
372 372 }
@@ -1,256 +1,256 b''
1 1 /**
2 2 * Copyright (c) 2016-present, Gregory Szorc
3 3 * All rights reserved.
4 4 *
5 5 * This software may be modified and distributed under the terms
6 6 * of the BSD license. See the LICENSE file for details.
7 7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 11 extern PyObject* ZstdError;
12 12
13 13 PyDoc_STRVAR(ZstdCompressionObj__doc__,
14 14 "Perform compression using a standard library compatible API.\n"
15 15 );
16 16
17 17 static void ZstdCompressionObj_dealloc(ZstdCompressionObj* self) {
18 18 PyMem_Free(self->output.dst);
19 19 self->output.dst = NULL;
20 20
21 21 Py_XDECREF(self->compressor);
22 22
23 23 PyObject_Del(self);
24 24 }
25 25
26 26 static PyObject* ZstdCompressionObj_compress(ZstdCompressionObj* self, PyObject* args, PyObject* kwargs) {
27 27 static char* kwlist[] = {
28 28 "data",
29 29 NULL
30 30 };
31 31
32 32 Py_buffer source;
33 33 ZSTD_inBuffer input;
34 34 size_t zresult;
35 35 PyObject* result = NULL;
36 36 Py_ssize_t resultSize = 0;
37 37
38 38 if (self->finished) {
39 39 PyErr_SetString(ZstdError, "cannot call compress() after compressor finished");
40 40 return NULL;
41 41 }
42 42
43 43 #if PY_MAJOR_VERSION >= 3
44 44 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress",
45 45 #else
46 46 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:compress",
47 47 #endif
48 48 kwlist, &source)) {
49 49 return NULL;
50 50 }
51 51
52 52 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
53 53 PyErr_SetString(PyExc_ValueError,
54 54 "data buffer should be contiguous and have at most one dimension");
55 55 goto finally;
56 56 }
57 57
58 58 input.src = source.buf;
59 59 input.size = source.len;
60 60 input.pos = 0;
61 61
62 62 while (input.pos < (size_t)source.len) {
63 63 Py_BEGIN_ALLOW_THREADS
64 64 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
65 65 &input, ZSTD_e_continue);
66 66 Py_END_ALLOW_THREADS
67 67
68 68 if (ZSTD_isError(zresult)) {
69 69 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
70 70 Py_CLEAR(result);
71 71 goto finally;
72 72 }
73 73
74 74 if (self->output.pos) {
75 75 if (result) {
76 76 resultSize = PyBytes_GET_SIZE(result);
77 77
78 78 if (safe_pybytes_resize(&result, resultSize + self->output.pos)) {
79 79 Py_CLEAR(result);
80 80 goto finally;
81 81 }
82 82
83 83 memcpy(PyBytes_AS_STRING(result) + resultSize,
84 84 self->output.dst, self->output.pos);
85 85 }
86 86 else {
87 87 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
88 88 if (!result) {
89 89 goto finally;
90 90 }
91 91 }
92 92
93 93 self->output.pos = 0;
94 94 }
95 95 }
96 96
97 97 if (NULL == result) {
98 98 result = PyBytes_FromString("");
99 99 }
100 100
101 101 finally:
102 102 PyBuffer_Release(&source);
103 103
104 104 return result;
105 105 }
106 106
107 107 static PyObject* ZstdCompressionObj_flush(ZstdCompressionObj* self, PyObject* args, PyObject* kwargs) {
108 108 static char* kwlist[] = {
109 109 "flush_mode",
110 110 NULL
111 111 };
112 112
113 113 int flushMode = compressorobj_flush_finish;
114 114 size_t zresult;
115 115 PyObject* result = NULL;
116 116 Py_ssize_t resultSize = 0;
117 117 ZSTD_inBuffer input;
118 118 ZSTD_EndDirective zFlushMode;
119 119
120 120 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:flush", kwlist, &flushMode)) {
121 121 return NULL;
122 122 }
123 123
124 124 if (flushMode != compressorobj_flush_finish && flushMode != compressorobj_flush_block) {
125 125 PyErr_SetString(PyExc_ValueError, "flush mode not recognized");
126 126 return NULL;
127 127 }
128 128
129 129 if (self->finished) {
130 130 PyErr_SetString(ZstdError, "compressor object already finished");
131 131 return NULL;
132 132 }
133 133
134 134 switch (flushMode) {
135 135 case compressorobj_flush_block:
136 136 zFlushMode = ZSTD_e_flush;
137 137 break;
138 138
139 139 case compressorobj_flush_finish:
140 140 zFlushMode = ZSTD_e_end;
141 141 self->finished = 1;
142 142 break;
143 143
144 144 default:
145 145 PyErr_SetString(ZstdError, "unhandled flush mode");
146 146 return NULL;
147 147 }
148 148
149 149 assert(self->output.pos == 0);
150 150
151 151 input.src = NULL;
152 152 input.size = 0;
153 153 input.pos = 0;
154 154
155 155 while (1) {
156 156 Py_BEGIN_ALLOW_THREADS
157 157 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
158 158 &input, zFlushMode);
159 159 Py_END_ALLOW_THREADS
160 160
161 161 if (ZSTD_isError(zresult)) {
162 162 PyErr_Format(ZstdError, "error ending compression stream: %s",
163 163 ZSTD_getErrorName(zresult));
164 164 return NULL;
165 165 }
166 166
167 167 if (self->output.pos) {
168 168 if (result) {
169 169 resultSize = PyBytes_GET_SIZE(result);
170 170
171 171 if (safe_pybytes_resize(&result, resultSize + self->output.pos)) {
172 172 Py_XDECREF(result);
173 173 return NULL;
174 174 }
175 175
176 176 memcpy(PyBytes_AS_STRING(result) + resultSize,
177 177 self->output.dst, self->output.pos);
178 178 }
179 179 else {
180 180 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
181 181 if (!result) {
182 182 return NULL;
183 183 }
184 184 }
185 185
186 186 self->output.pos = 0;
187 187 }
188 188
189 189 if (!zresult) {
190 190 break;
191 191 }
192 192 }
193 193
194 194 if (result) {
195 195 return result;
196 196 }
197 197 else {
198 198 return PyBytes_FromString("");
199 199 }
200 200 }
201 201
202 202 static PyMethodDef ZstdCompressionObj_methods[] = {
203 203 { "compress", (PyCFunction)ZstdCompressionObj_compress, METH_VARARGS | METH_KEYWORDS,
204 204 PyDoc_STR("compress data") },
205 205 { "flush", (PyCFunction)ZstdCompressionObj_flush, METH_VARARGS | METH_KEYWORDS,
206 206 PyDoc_STR("finish compression operation") },
207 207 { NULL, NULL }
208 208 };
209 209
210 210 PyTypeObject ZstdCompressionObjType = {
211 211 PyVarObject_HEAD_INIT(NULL, 0)
212 212 "zstd.ZstdCompressionObj", /* tp_name */
213 213 sizeof(ZstdCompressionObj), /* tp_basicsize */
214 214 0, /* tp_itemsize */
215 215 (destructor)ZstdCompressionObj_dealloc, /* tp_dealloc */
216 216 0, /* tp_print */
217 217 0, /* tp_getattr */
218 218 0, /* tp_setattr */
219 219 0, /* tp_compare */
220 220 0, /* tp_repr */
221 221 0, /* tp_as_number */
222 222 0, /* tp_as_sequence */
223 223 0, /* tp_as_mapping */
224 224 0, /* tp_hash */
225 225 0, /* tp_call */
226 226 0, /* tp_str */
227 227 0, /* tp_getattro */
228 228 0, /* tp_setattro */
229 229 0, /* tp_as_buffer */
230 230 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
231 231 ZstdCompressionObj__doc__, /* tp_doc */
232 232 0, /* tp_traverse */
233 233 0, /* tp_clear */
234 234 0, /* tp_richcompare */
235 235 0, /* tp_weaklistoffset */
236 236 0, /* tp_iter */
237 237 0, /* tp_iternext */
238 238 ZstdCompressionObj_methods, /* tp_methods */
239 239 0, /* tp_members */
240 240 0, /* tp_getset */
241 241 0, /* tp_base */
242 242 0, /* tp_dict */
243 243 0, /* tp_descr_get */
244 244 0, /* tp_descr_set */
245 245 0, /* tp_dictoffset */
246 246 0, /* tp_init */
247 247 0, /* tp_alloc */
248 248 PyType_GenericNew, /* tp_new */
249 249 };
250 250
251 251 void compressobj_module_init(PyObject* module) {
252 Py_TYPE(&ZstdCompressionObjType) = &PyType_Type;
252 Py_SET_TYPE(&ZstdCompressionObjType, &PyType_Type);
253 253 if (PyType_Ready(&ZstdCompressionObjType) < 0) {
254 254 return;
255 255 }
256 256 }
@@ -1,1670 +1,1670 b''
1 1 /**
2 2 * Copyright (c) 2016-present, Gregory Szorc
3 3 * All rights reserved.
4 4 *
5 5 * This software may be modified and distributed under the terms
6 6 * of the BSD license. See the LICENSE file for details.
7 7 */
8 8
9 9 #include "python-zstandard.h"
10 10 #include "pool.h"
11 11
12 12 extern PyObject* ZstdError;
13 13
14 14 int setup_cctx(ZstdCompressor* compressor) {
15 15 size_t zresult;
16 16
17 17 assert(compressor);
18 18 assert(compressor->cctx);
19 19 assert(compressor->params);
20 20
21 21 zresult = ZSTD_CCtx_setParametersUsingCCtxParams(compressor->cctx, compressor->params);
22 22 if (ZSTD_isError(zresult)) {
23 23 PyErr_Format(ZstdError, "could not set compression parameters: %s",
24 24 ZSTD_getErrorName(zresult));
25 25 return 1;
26 26 }
27 27
28 28 if (compressor->dict) {
29 29 if (compressor->dict->cdict) {
30 30 zresult = ZSTD_CCtx_refCDict(compressor->cctx, compressor->dict->cdict);
31 31 }
32 32 else {
33 33 zresult = ZSTD_CCtx_loadDictionary_advanced(compressor->cctx,
34 34 compressor->dict->dictData, compressor->dict->dictSize,
35 35 ZSTD_dlm_byRef, compressor->dict->dictType);
36 36 }
37 37 if (ZSTD_isError(zresult)) {
38 38 PyErr_Format(ZstdError, "could not load compression dictionary: %s",
39 39 ZSTD_getErrorName(zresult));
40 40 return 1;
41 41 }
42 42 }
43 43
44 44 return 0;
45 45 }
46 46
47 47 static PyObject* frame_progression(ZSTD_CCtx* cctx) {
48 48 PyObject* result = NULL;
49 49 PyObject* value;
50 50 ZSTD_frameProgression progression;
51 51
52 52 result = PyTuple_New(3);
53 53 if (!result) {
54 54 return NULL;
55 55 }
56 56
57 57 progression = ZSTD_getFrameProgression(cctx);
58 58
59 59 value = PyLong_FromUnsignedLongLong(progression.ingested);
60 60 if (!value) {
61 61 Py_DECREF(result);
62 62 return NULL;
63 63 }
64 64
65 65 PyTuple_SET_ITEM(result, 0, value);
66 66
67 67 value = PyLong_FromUnsignedLongLong(progression.consumed);
68 68 if (!value) {
69 69 Py_DECREF(result);
70 70 return NULL;
71 71 }
72 72
73 73 PyTuple_SET_ITEM(result, 1, value);
74 74
75 75 value = PyLong_FromUnsignedLongLong(progression.produced);
76 76 if (!value) {
77 77 Py_DECREF(result);
78 78 return NULL;
79 79 }
80 80
81 81 PyTuple_SET_ITEM(result, 2, value);
82 82
83 83 return result;
84 84 }
85 85
86 86 PyDoc_STRVAR(ZstdCompressor__doc__,
87 87 "ZstdCompressor(level=None, dict_data=None, compression_params=None)\n"
88 88 "\n"
89 89 "Create an object used to perform Zstandard compression.\n"
90 90 "\n"
91 91 "An instance can compress data various ways. Instances can be used multiple\n"
92 92 "times. Each compression operation will use the compression parameters\n"
93 93 "defined at construction time.\n"
94 94 "\n"
95 95 "Compression can be configured via the following names arguments:\n"
96 96 "\n"
97 97 "level\n"
98 98 " Integer compression level.\n"
99 99 "dict_data\n"
100 100 " A ``ZstdCompressionDict`` to be used to compress with dictionary data.\n"
101 101 "compression_params\n"
102 102 " A ``CompressionParameters`` instance defining low-level compression"
103 103 " parameters. If defined, this will overwrite the ``level`` argument.\n"
104 104 "write_checksum\n"
105 105 " If True, a 4 byte content checksum will be written with the compressed\n"
106 106 " data, allowing the decompressor to perform content verification.\n"
107 107 "write_content_size\n"
108 108 " If True (the default), the decompressed content size will be included in\n"
109 109 " the header of the compressed data. This data will only be written if the\n"
110 110 " compressor knows the size of the input data.\n"
111 111 "write_dict_id\n"
112 112 " Determines whether the dictionary ID will be written into the compressed\n"
113 113 " data. Defaults to True. Only adds content to the compressed data if\n"
114 114 " a dictionary is being used.\n"
115 115 "threads\n"
116 116 " Number of threads to use to compress data concurrently. When set,\n"
117 117 " compression operations are performed on multiple threads. The default\n"
118 118 " value (0) disables multi-threaded compression. A value of ``-1`` means to\n"
119 119 " set the number of threads to the number of detected logical CPUs.\n"
120 120 );
121 121
122 122 static int ZstdCompressor_init(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
123 123 static char* kwlist[] = {
124 124 "level",
125 125 "dict_data",
126 126 "compression_params",
127 127 "write_checksum",
128 128 "write_content_size",
129 129 "write_dict_id",
130 130 "threads",
131 131 NULL
132 132 };
133 133
134 134 int level = 3;
135 135 ZstdCompressionDict* dict = NULL;
136 136 ZstdCompressionParametersObject* params = NULL;
137 137 PyObject* writeChecksum = NULL;
138 138 PyObject* writeContentSize = NULL;
139 139 PyObject* writeDictID = NULL;
140 140 int threads = 0;
141 141
142 142 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOOi:ZstdCompressor",
143 143 kwlist, &level, &ZstdCompressionDictType, &dict,
144 144 &ZstdCompressionParametersType, &params,
145 145 &writeChecksum, &writeContentSize, &writeDictID, &threads)) {
146 146 return -1;
147 147 }
148 148
149 149 if (level > ZSTD_maxCLevel()) {
150 150 PyErr_Format(PyExc_ValueError, "level must be less than %d",
151 151 ZSTD_maxCLevel() + 1);
152 152 return -1;
153 153 }
154 154
155 155 if (threads < 0) {
156 156 threads = cpu_count();
157 157 }
158 158
159 159 /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the
160 160 overhead of each compression operation. */
161 161 self->cctx = ZSTD_createCCtx();
162 162 if (!self->cctx) {
163 163 PyErr_NoMemory();
164 164 return -1;
165 165 }
166 166
167 167 /* TODO stuff the original parameters away somewhere so we can reset later. This
168 168 will allow us to do things like automatically adjust cparams based on input
169 169 size (assuming zstd isn't doing that internally). */
170 170
171 171 self->params = ZSTD_createCCtxParams();
172 172 if (!self->params) {
173 173 PyErr_NoMemory();
174 174 return -1;
175 175 }
176 176
177 177 if (params && writeChecksum) {
178 178 PyErr_SetString(PyExc_ValueError,
179 179 "cannot define compression_params and write_checksum");
180 180 return -1;
181 181 }
182 182
183 183 if (params && writeContentSize) {
184 184 PyErr_SetString(PyExc_ValueError,
185 185 "cannot define compression_params and write_content_size");
186 186 return -1;
187 187 }
188 188
189 189 if (params && writeDictID) {
190 190 PyErr_SetString(PyExc_ValueError,
191 191 "cannot define compression_params and write_dict_id");
192 192 return -1;
193 193 }
194 194
195 195 if (params && threads) {
196 196 PyErr_SetString(PyExc_ValueError,
197 197 "cannot define compression_params and threads");
198 198 return -1;
199 199 }
200 200
201 201 if (params) {
202 202 if (set_parameters(self->params, params)) {
203 203 return -1;
204 204 }
205 205 }
206 206 else {
207 207 if (set_parameter(self->params, ZSTD_c_compressionLevel, level)) {
208 208 return -1;
209 209 }
210 210
211 211 if (set_parameter(self->params, ZSTD_c_contentSizeFlag,
212 212 writeContentSize ? PyObject_IsTrue(writeContentSize) : 1)) {
213 213 return -1;
214 214 }
215 215
216 216 if (set_parameter(self->params, ZSTD_c_checksumFlag,
217 217 writeChecksum ? PyObject_IsTrue(writeChecksum) : 0)) {
218 218 return -1;
219 219 }
220 220
221 221 if (set_parameter(self->params, ZSTD_c_dictIDFlag,
222 222 writeDictID ? PyObject_IsTrue(writeDictID) : 1)) {
223 223 return -1;
224 224 }
225 225
226 226 if (threads) {
227 227 if (set_parameter(self->params, ZSTD_c_nbWorkers, threads)) {
228 228 return -1;
229 229 }
230 230 }
231 231 }
232 232
233 233 if (dict) {
234 234 self->dict = dict;
235 235 Py_INCREF(dict);
236 236 }
237 237
238 238 if (setup_cctx(self)) {
239 239 return -1;
240 240 }
241 241
242 242 return 0;
243 243 }
244 244
245 245 static void ZstdCompressor_dealloc(ZstdCompressor* self) {
246 246 if (self->cctx) {
247 247 ZSTD_freeCCtx(self->cctx);
248 248 self->cctx = NULL;
249 249 }
250 250
251 251 if (self->params) {
252 252 ZSTD_freeCCtxParams(self->params);
253 253 self->params = NULL;
254 254 }
255 255
256 256 Py_XDECREF(self->dict);
257 257 PyObject_Del(self);
258 258 }
259 259
260 260 PyDoc_STRVAR(ZstdCompressor_memory_size__doc__,
261 261 "memory_size()\n"
262 262 "\n"
263 263 "Obtain the memory usage of this compressor, in bytes.\n"
264 264 );
265 265
266 266 static PyObject* ZstdCompressor_memory_size(ZstdCompressor* self) {
267 267 if (self->cctx) {
268 268 return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->cctx));
269 269 }
270 270 else {
271 271 PyErr_SetString(ZstdError, "no compressor context found; this should never happen");
272 272 return NULL;
273 273 }
274 274 }
275 275
276 276 PyDoc_STRVAR(ZstdCompressor_frame_progression__doc__,
277 277 "frame_progression()\n"
278 278 "\n"
279 279 "Return information on how much work the compressor has done.\n"
280 280 "\n"
281 281 "Returns a 3-tuple of (ingested, consumed, produced).\n"
282 282 );
283 283
284 284 static PyObject* ZstdCompressor_frame_progression(ZstdCompressor* self) {
285 285 return frame_progression(self->cctx);
286 286 }
287 287
288 288 PyDoc_STRVAR(ZstdCompressor_copy_stream__doc__,
289 289 "copy_stream(ifh, ofh[, size=0, read_size=default, write_size=default])\n"
290 290 "compress data between streams\n"
291 291 "\n"
292 292 "Data will be read from ``ifh``, compressed, and written to ``ofh``.\n"
293 293 "``ifh`` must have a ``read(size)`` method. ``ofh`` must have a ``write(data)``\n"
294 294 "method.\n"
295 295 "\n"
296 296 "An optional ``size`` argument specifies the size of the source stream.\n"
297 297 "If defined, compression parameters will be tuned based on the size.\n"
298 298 "\n"
299 299 "Optional arguments ``read_size`` and ``write_size`` define the chunk sizes\n"
300 300 "of ``read()`` and ``write()`` operations, respectively. By default, they use\n"
301 301 "the default compression stream input and output sizes, respectively.\n"
302 302 );
303 303
304 304 static PyObject* ZstdCompressor_copy_stream(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
305 305 static char* kwlist[] = {
306 306 "ifh",
307 307 "ofh",
308 308 "size",
309 309 "read_size",
310 310 "write_size",
311 311 NULL
312 312 };
313 313
314 314 PyObject* source;
315 315 PyObject* dest;
316 316 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
317 317 size_t inSize = ZSTD_CStreamInSize();
318 318 size_t outSize = ZSTD_CStreamOutSize();
319 319 ZSTD_inBuffer input;
320 320 ZSTD_outBuffer output;
321 321 Py_ssize_t totalRead = 0;
322 322 Py_ssize_t totalWrite = 0;
323 323 char* readBuffer;
324 324 Py_ssize_t readSize;
325 325 PyObject* readResult = NULL;
326 326 PyObject* res = NULL;
327 327 size_t zresult;
328 328 PyObject* writeResult;
329 329 PyObject* totalReadPy;
330 330 PyObject* totalWritePy;
331 331
332 332 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|Kkk:copy_stream", kwlist,
333 333 &source, &dest, &sourceSize, &inSize, &outSize)) {
334 334 return NULL;
335 335 }
336 336
337 337 if (!PyObject_HasAttrString(source, "read")) {
338 338 PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
339 339 return NULL;
340 340 }
341 341
342 342 if (!PyObject_HasAttrString(dest, "write")) {
343 343 PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
344 344 return NULL;
345 345 }
346 346
347 347 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
348 348
349 349 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
350 350 if (ZSTD_isError(zresult)) {
351 351 PyErr_Format(ZstdError, "error setting source size: %s",
352 352 ZSTD_getErrorName(zresult));
353 353 return NULL;
354 354 }
355 355
356 356 /* Prevent free on uninitialized memory in finally. */
357 357 output.dst = PyMem_Malloc(outSize);
358 358 if (!output.dst) {
359 359 PyErr_NoMemory();
360 360 res = NULL;
361 361 goto finally;
362 362 }
363 363 output.size = outSize;
364 364 output.pos = 0;
365 365
366 366 input.src = NULL;
367 367 input.size = 0;
368 368 input.pos = 0;
369 369
370 370 while (1) {
371 371 /* Try to read from source stream. */
372 372 readResult = PyObject_CallMethod(source, "read", "n", inSize);
373 373 if (!readResult) {
374 374 PyErr_SetString(ZstdError, "could not read() from source");
375 375 goto finally;
376 376 }
377 377
378 378 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
379 379
380 380 /* If no data was read, we're at EOF. */
381 381 if (0 == readSize) {
382 382 break;
383 383 }
384 384
385 385 totalRead += readSize;
386 386
387 387 /* Send data to compressor */
388 388 input.src = readBuffer;
389 389 input.size = readSize;
390 390 input.pos = 0;
391 391
392 392 while (input.pos < input.size) {
393 393 Py_BEGIN_ALLOW_THREADS
394 394 zresult = ZSTD_compressStream2(self->cctx, &output, &input, ZSTD_e_continue);
395 395 Py_END_ALLOW_THREADS
396 396
397 397 if (ZSTD_isError(zresult)) {
398 398 res = NULL;
399 399 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
400 400 goto finally;
401 401 }
402 402
403 403 if (output.pos) {
404 404 #if PY_MAJOR_VERSION >= 3
405 405 writeResult = PyObject_CallMethod(dest, "write", "y#",
406 406 #else
407 407 writeResult = PyObject_CallMethod(dest, "write", "s#",
408 408 #endif
409 409 output.dst, output.pos);
410 410 Py_XDECREF(writeResult);
411 411 totalWrite += output.pos;
412 412 output.pos = 0;
413 413 }
414 414 }
415 415
416 416 Py_CLEAR(readResult);
417 417 }
418 418
419 419 /* We've finished reading. Now flush the compressor stream. */
420 420 assert(input.pos == input.size);
421 421
422 422 while (1) {
423 423 Py_BEGIN_ALLOW_THREADS
424 424 zresult = ZSTD_compressStream2(self->cctx, &output, &input, ZSTD_e_end);
425 425 Py_END_ALLOW_THREADS
426 426
427 427 if (ZSTD_isError(zresult)) {
428 428 PyErr_Format(ZstdError, "error ending compression stream: %s",
429 429 ZSTD_getErrorName(zresult));
430 430 res = NULL;
431 431 goto finally;
432 432 }
433 433
434 434 if (output.pos) {
435 435 #if PY_MAJOR_VERSION >= 3
436 436 writeResult = PyObject_CallMethod(dest, "write", "y#",
437 437 #else
438 438 writeResult = PyObject_CallMethod(dest, "write", "s#",
439 439 #endif
440 440 output.dst, output.pos);
441 441 totalWrite += output.pos;
442 442 Py_XDECREF(writeResult);
443 443 output.pos = 0;
444 444 }
445 445
446 446 if (!zresult) {
447 447 break;
448 448 }
449 449 }
450 450
451 451 totalReadPy = PyLong_FromSsize_t(totalRead);
452 452 totalWritePy = PyLong_FromSsize_t(totalWrite);
453 453 res = PyTuple_Pack(2, totalReadPy, totalWritePy);
454 454 Py_DECREF(totalReadPy);
455 455 Py_DECREF(totalWritePy);
456 456
457 457 finally:
458 458 if (output.dst) {
459 459 PyMem_Free(output.dst);
460 460 }
461 461
462 462 Py_XDECREF(readResult);
463 463
464 464 return res;
465 465 }
466 466
467 467 PyDoc_STRVAR(ZstdCompressor_stream_reader__doc__,
468 468 "stream_reader(source, [size=0])\n"
469 469 "\n"
470 470 "Obtain an object that behaves like an I/O stream.\n"
471 471 "\n"
472 472 "The source object can be any object with a ``read(size)`` method\n"
473 473 "or an object that conforms to the buffer protocol.\n"
474 474 );
475 475
476 476 static ZstdCompressionReader* ZstdCompressor_stream_reader(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
477 477 static char* kwlist[] = {
478 478 "source",
479 479 "size",
480 480 "read_size",
481 481 NULL
482 482 };
483 483
484 484 PyObject* source;
485 485 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
486 486 size_t readSize = ZSTD_CStreamInSize();
487 487 ZstdCompressionReader* result = NULL;
488 488 size_t zresult;
489 489
490 490 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kk:stream_reader", kwlist,
491 491 &source, &sourceSize, &readSize)) {
492 492 return NULL;
493 493 }
494 494
495 495 result = (ZstdCompressionReader*)PyObject_CallObject((PyObject*)&ZstdCompressionReaderType, NULL);
496 496 if (!result) {
497 497 return NULL;
498 498 }
499 499
500 500 if (PyObject_HasAttrString(source, "read")) {
501 501 result->reader = source;
502 502 Py_INCREF(source);
503 503 result->readSize = readSize;
504 504 }
505 505 else if (1 == PyObject_CheckBuffer(source)) {
506 506 if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) {
507 507 goto except;
508 508 }
509 509
510 510 assert(result->buffer.len >= 0);
511 511
512 512 sourceSize = result->buffer.len;
513 513 }
514 514 else {
515 515 PyErr_SetString(PyExc_TypeError,
516 516 "must pass an object with a read() method or that conforms to the buffer protocol");
517 517 goto except;
518 518 }
519 519
520 520 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
521 521
522 522 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
523 523 if (ZSTD_isError(zresult)) {
524 524 PyErr_Format(ZstdError, "error setting source source: %s",
525 525 ZSTD_getErrorName(zresult));
526 526 goto except;
527 527 }
528 528
529 529 result->compressor = self;
530 530 Py_INCREF(self);
531 531
532 532 return result;
533 533
534 534 except:
535 535 Py_CLEAR(result);
536 536
537 537 return NULL;
538 538 }
539 539
540 540 PyDoc_STRVAR(ZstdCompressor_compress__doc__,
541 541 "compress(data)\n"
542 542 "\n"
543 543 "Compress data in a single operation.\n"
544 544 "\n"
545 545 "This is the simplest mechanism to perform compression: simply pass in a\n"
546 546 "value and get a compressed value back. It is almost the most prone to abuse.\n"
547 547 "The input and output values must fit in memory, so passing in very large\n"
548 548 "values can result in excessive memory usage. For this reason, one of the\n"
549 549 "streaming based APIs is preferred for larger values.\n"
550 550 );
551 551
552 552 static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
553 553 static char* kwlist[] = {
554 554 "data",
555 555 NULL
556 556 };
557 557
558 558 Py_buffer source;
559 559 size_t destSize;
560 560 PyObject* output = NULL;
561 561 size_t zresult;
562 562 ZSTD_outBuffer outBuffer;
563 563 ZSTD_inBuffer inBuffer;
564 564
565 565 #if PY_MAJOR_VERSION >= 3
566 566 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|O:compress",
567 567 #else
568 568 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|O:compress",
569 569 #endif
570 570 kwlist, &source)) {
571 571 return NULL;
572 572 }
573 573
574 574 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
575 575 PyErr_SetString(PyExc_ValueError,
576 576 "data buffer should be contiguous and have at most one dimension");
577 577 goto finally;
578 578 }
579 579
580 580 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
581 581
582 582 destSize = ZSTD_compressBound(source.len);
583 583 output = PyBytes_FromStringAndSize(NULL, destSize);
584 584 if (!output) {
585 585 goto finally;
586 586 }
587 587
588 588 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, source.len);
589 589 if (ZSTD_isError(zresult)) {
590 590 PyErr_Format(ZstdError, "error setting source size: %s",
591 591 ZSTD_getErrorName(zresult));
592 592 Py_CLEAR(output);
593 593 goto finally;
594 594 }
595 595
596 596 inBuffer.src = source.buf;
597 597 inBuffer.size = source.len;
598 598 inBuffer.pos = 0;
599 599
600 600 outBuffer.dst = PyBytes_AsString(output);
601 601 outBuffer.size = destSize;
602 602 outBuffer.pos = 0;
603 603
604 604 Py_BEGIN_ALLOW_THREADS
605 605 /* By avoiding ZSTD_compress(), we don't necessarily write out content
606 606 size. This means the argument to ZstdCompressor to control frame
607 607 parameters is honored. */
608 608 zresult = ZSTD_compressStream2(self->cctx, &outBuffer, &inBuffer, ZSTD_e_end);
609 609 Py_END_ALLOW_THREADS
610 610
611 611 if (ZSTD_isError(zresult)) {
612 612 PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult));
613 613 Py_CLEAR(output);
614 614 goto finally;
615 615 }
616 616 else if (zresult) {
617 617 PyErr_SetString(ZstdError, "unexpected partial frame flush");
618 618 Py_CLEAR(output);
619 619 goto finally;
620 620 }
621 621
622 Py_SIZE(output) = outBuffer.pos;
622 Py_SET_SIZE(output, outBuffer.pos);
623 623
624 624 finally:
625 625 PyBuffer_Release(&source);
626 626 return output;
627 627 }
628 628
629 629 PyDoc_STRVAR(ZstdCompressionObj__doc__,
630 630 "compressobj()\n"
631 631 "\n"
632 632 "Return an object exposing ``compress(data)`` and ``flush()`` methods.\n"
633 633 "\n"
634 634 "The returned object exposes an API similar to ``zlib.compressobj`` and\n"
635 635 "``bz2.BZ2Compressor`` so that callers can swap in the zstd compressor\n"
636 636 "without changing how compression is performed.\n"
637 637 );
638 638
639 639 static ZstdCompressionObj* ZstdCompressor_compressobj(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
640 640 static char* kwlist[] = {
641 641 "size",
642 642 NULL
643 643 };
644 644
645 645 unsigned long long inSize = ZSTD_CONTENTSIZE_UNKNOWN;
646 646 size_t outSize = ZSTD_CStreamOutSize();
647 647 ZstdCompressionObj* result = NULL;
648 648 size_t zresult;
649 649
650 650 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|K:compressobj", kwlist, &inSize)) {
651 651 return NULL;
652 652 }
653 653
654 654 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
655 655
656 656 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, inSize);
657 657 if (ZSTD_isError(zresult)) {
658 658 PyErr_Format(ZstdError, "error setting source size: %s",
659 659 ZSTD_getErrorName(zresult));
660 660 return NULL;
661 661 }
662 662
663 663 result = (ZstdCompressionObj*)PyObject_CallObject((PyObject*)&ZstdCompressionObjType, NULL);
664 664 if (!result) {
665 665 return NULL;
666 666 }
667 667
668 668 result->output.dst = PyMem_Malloc(outSize);
669 669 if (!result->output.dst) {
670 670 PyErr_NoMemory();
671 671 Py_DECREF(result);
672 672 return NULL;
673 673 }
674 674 result->output.size = outSize;
675 675 result->compressor = self;
676 676 Py_INCREF(result->compressor);
677 677
678 678 return result;
679 679 }
680 680
681 681 PyDoc_STRVAR(ZstdCompressor_read_to_iter__doc__,
682 682 "read_to_iter(reader, [size=0, read_size=default, write_size=default])\n"
683 683 "Read uncompressed data from a reader and return an iterator\n"
684 684 "\n"
685 685 "Returns an iterator of compressed data produced from reading from ``reader``.\n"
686 686 "\n"
687 687 "Uncompressed data will be obtained from ``reader`` by calling the\n"
688 688 "``read(size)`` method of it. The source data will be streamed into a\n"
689 689 "compressor. As compressed data is available, it will be exposed to the\n"
690 690 "iterator.\n"
691 691 "\n"
692 692 "Data is read from the source in chunks of ``read_size``. Compressed chunks\n"
693 693 "are at most ``write_size`` bytes. Both values default to the zstd input and\n"
694 694 "and output defaults, respectively.\n"
695 695 "\n"
696 696 "The caller is partially in control of how fast data is fed into the\n"
697 697 "compressor by how it consumes the returned iterator. The compressor will\n"
698 698 "not consume from the reader unless the caller consumes from the iterator.\n"
699 699 );
700 700
701 701 static ZstdCompressorIterator* ZstdCompressor_read_to_iter(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
702 702 static char* kwlist[] = {
703 703 "reader",
704 704 "size",
705 705 "read_size",
706 706 "write_size",
707 707 NULL
708 708 };
709 709
710 710 PyObject* reader;
711 711 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
712 712 size_t inSize = ZSTD_CStreamInSize();
713 713 size_t outSize = ZSTD_CStreamOutSize();
714 714 ZstdCompressorIterator* result;
715 715 size_t zresult;
716 716
717 717 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kkk:read_to_iter", kwlist,
718 718 &reader, &sourceSize, &inSize, &outSize)) {
719 719 return NULL;
720 720 }
721 721
722 722 result = (ZstdCompressorIterator*)PyObject_CallObject((PyObject*)&ZstdCompressorIteratorType, NULL);
723 723 if (!result) {
724 724 return NULL;
725 725 }
726 726 if (PyObject_HasAttrString(reader, "read")) {
727 727 result->reader = reader;
728 728 Py_INCREF(result->reader);
729 729 }
730 730 else if (1 == PyObject_CheckBuffer(reader)) {
731 731 if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) {
732 732 goto except;
733 733 }
734 734
735 735 sourceSize = result->buffer.len;
736 736 }
737 737 else {
738 738 PyErr_SetString(PyExc_ValueError,
739 739 "must pass an object with a read() method or conforms to buffer protocol");
740 740 goto except;
741 741 }
742 742
743 743 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
744 744
745 745 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
746 746 if (ZSTD_isError(zresult)) {
747 747 PyErr_Format(ZstdError, "error setting source size: %s",
748 748 ZSTD_getErrorName(zresult));
749 749 return NULL;
750 750 }
751 751
752 752 result->compressor = self;
753 753 Py_INCREF(result->compressor);
754 754
755 755 result->inSize = inSize;
756 756 result->outSize = outSize;
757 757
758 758 result->output.dst = PyMem_Malloc(outSize);
759 759 if (!result->output.dst) {
760 760 PyErr_NoMemory();
761 761 goto except;
762 762 }
763 763 result->output.size = outSize;
764 764
765 765 goto finally;
766 766
767 767 except:
768 768 Py_CLEAR(result);
769 769
770 770 finally:
771 771 return result;
772 772 }
773 773
774 774 PyDoc_STRVAR(ZstdCompressor_stream_writer___doc__,
775 775 "Create a context manager to write compressed data to an object.\n"
776 776 "\n"
777 777 "The passed object must have a ``write()`` method.\n"
778 778 "\n"
779 779 "The caller feeds input data to the object by calling ``compress(data)``.\n"
780 780 "Compressed data is written to the argument given to this function.\n"
781 781 "\n"
782 782 "The function takes an optional ``size`` argument indicating the total size\n"
783 783 "of the eventual input. If specified, the size will influence compression\n"
784 784 "parameter tuning and could result in the size being written into the\n"
785 785 "header of the compressed data.\n"
786 786 "\n"
787 787 "An optional ``write_size`` argument is also accepted. It defines the maximum\n"
788 788 "byte size of chunks fed to ``write()``. By default, it uses the zstd default\n"
789 789 "for a compressor output stream.\n"
790 790 );
791 791
792 792 static ZstdCompressionWriter* ZstdCompressor_stream_writer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
793 793 static char* kwlist[] = {
794 794 "writer",
795 795 "size",
796 796 "write_size",
797 797 "write_return_read",
798 798 NULL
799 799 };
800 800
801 801 PyObject* writer;
802 802 ZstdCompressionWriter* result;
803 803 size_t zresult;
804 804 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
805 805 size_t outSize = ZSTD_CStreamOutSize();
806 806 PyObject* writeReturnRead = NULL;
807 807
808 808 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|KkO:stream_writer", kwlist,
809 809 &writer, &sourceSize, &outSize, &writeReturnRead)) {
810 810 return NULL;
811 811 }
812 812
813 813 if (!PyObject_HasAttrString(writer, "write")) {
814 814 PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
815 815 return NULL;
816 816 }
817 817
818 818 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
819 819
820 820 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
821 821 if (ZSTD_isError(zresult)) {
822 822 PyErr_Format(ZstdError, "error setting source size: %s",
823 823 ZSTD_getErrorName(zresult));
824 824 return NULL;
825 825 }
826 826
827 827 result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL);
828 828 if (!result) {
829 829 return NULL;
830 830 }
831 831
832 832 result->output.dst = PyMem_Malloc(outSize);
833 833 if (!result->output.dst) {
834 834 Py_DECREF(result);
835 835 return (ZstdCompressionWriter*)PyErr_NoMemory();
836 836 }
837 837
838 838 result->output.pos = 0;
839 839 result->output.size = outSize;
840 840
841 841 result->compressor = self;
842 842 Py_INCREF(result->compressor);
843 843
844 844 result->writer = writer;
845 845 Py_INCREF(result->writer);
846 846
847 847 result->outSize = outSize;
848 848 result->bytesCompressed = 0;
849 849 result->writeReturnRead = writeReturnRead ? PyObject_IsTrue(writeReturnRead) : 0;
850 850
851 851 return result;
852 852 }
853 853
854 854 PyDoc_STRVAR(ZstdCompressor_chunker__doc__,
855 855 "Create an object for iterative compressing to same-sized chunks.\n"
856 856 );
857 857
858 858 static ZstdCompressionChunker* ZstdCompressor_chunker(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
859 859 static char* kwlist[] = {
860 860 "size",
861 861 "chunk_size",
862 862 NULL
863 863 };
864 864
865 865 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
866 866 size_t chunkSize = ZSTD_CStreamOutSize();
867 867 ZstdCompressionChunker* chunker;
868 868 size_t zresult;
869 869
870 870 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|Kk:chunker", kwlist,
871 871 &sourceSize, &chunkSize)) {
872 872 return NULL;
873 873 }
874 874
875 875 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
876 876
877 877 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
878 878 if (ZSTD_isError(zresult)) {
879 879 PyErr_Format(ZstdError, "error setting source size: %s",
880 880 ZSTD_getErrorName(zresult));
881 881 return NULL;
882 882 }
883 883
884 884 chunker = (ZstdCompressionChunker*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerType, NULL);
885 885 if (!chunker) {
886 886 return NULL;
887 887 }
888 888
889 889 chunker->output.dst = PyMem_Malloc(chunkSize);
890 890 if (!chunker->output.dst) {
891 891 PyErr_NoMemory();
892 892 Py_DECREF(chunker);
893 893 return NULL;
894 894 }
895 895 chunker->output.size = chunkSize;
896 896 chunker->output.pos = 0;
897 897
898 898 chunker->compressor = self;
899 899 Py_INCREF(chunker->compressor);
900 900
901 901 chunker->chunkSize = chunkSize;
902 902
903 903 return chunker;
904 904 }
905 905
906 906 typedef struct {
907 907 void* sourceData;
908 908 size_t sourceSize;
909 909 } DataSource;
910 910
911 911 typedef struct {
912 912 DataSource* sources;
913 913 Py_ssize_t sourcesSize;
914 914 unsigned long long totalSourceSize;
915 915 } DataSources;
916 916
917 917 typedef struct {
918 918 void* dest;
919 919 Py_ssize_t destSize;
920 920 BufferSegment* segments;
921 921 Py_ssize_t segmentsSize;
922 922 } DestBuffer;
923 923
924 924 typedef enum {
925 925 WorkerError_none = 0,
926 926 WorkerError_zstd = 1,
927 927 WorkerError_no_memory = 2,
928 928 WorkerError_nospace = 3,
929 929 } WorkerError;
930 930
931 931 /**
932 932 * Holds state for an individual worker performing multi_compress_to_buffer work.
933 933 */
934 934 typedef struct {
935 935 /* Used for compression. */
936 936 ZSTD_CCtx* cctx;
937 937
938 938 /* What to compress. */
939 939 DataSource* sources;
940 940 Py_ssize_t sourcesSize;
941 941 Py_ssize_t startOffset;
942 942 Py_ssize_t endOffset;
943 943 unsigned long long totalSourceSize;
944 944
945 945 /* Result storage. */
946 946 DestBuffer* destBuffers;
947 947 Py_ssize_t destCount;
948 948
949 949 /* Error tracking. */
950 950 WorkerError error;
951 951 size_t zresult;
952 952 Py_ssize_t errorOffset;
953 953 } WorkerState;
954 954
955 955 static void compress_worker(WorkerState* state) {
956 956 Py_ssize_t inputOffset = state->startOffset;
957 957 Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
958 958 Py_ssize_t currentBufferStartOffset = state->startOffset;
959 959 size_t zresult;
960 960 void* newDest;
961 961 size_t allocationSize;
962 962 size_t boundSize;
963 963 Py_ssize_t destOffset = 0;
964 964 DataSource* sources = state->sources;
965 965 DestBuffer* destBuffer;
966 966
967 967 assert(!state->destBuffers);
968 968 assert(0 == state->destCount);
969 969
970 970 /*
971 971 * The total size of the compressed data is unknown until we actually
972 972 * compress data. That means we can't pre-allocate the exact size we need.
973 973 *
974 974 * There is a cost to every allocation and reallocation. So, it is in our
975 975 * interest to minimize the number of allocations.
976 976 *
977 977 * There is also a cost to too few allocations. If allocations are too
978 978 * large they may fail. If buffers are shared and all inputs become
979 979 * irrelevant at different lifetimes, then a reference to one segment
980 980 * in the buffer will keep the entire buffer alive. This leads to excessive
981 981 * memory usage.
982 982 *
983 983 * Our current strategy is to assume a compression ratio of 16:1 and
984 984 * allocate buffers of that size, rounded up to the nearest power of 2
985 985 * (because computers like round numbers). That ratio is greater than what
986 986 * most inputs achieve. This is by design: we don't want to over-allocate.
987 987 * But we don't want to under-allocate and lead to too many buffers either.
988 988 */
989 989
990 990 state->destCount = 1;
991 991
992 992 state->destBuffers = calloc(1, sizeof(DestBuffer));
993 993 if (NULL == state->destBuffers) {
994 994 state->error = WorkerError_no_memory;
995 995 return;
996 996 }
997 997
998 998 destBuffer = &state->destBuffers[state->destCount - 1];
999 999
1000 1000 /*
1001 1001 * Rather than track bounds and grow the segments buffer, allocate space
1002 1002 * to hold remaining items then truncate when we're done with it.
1003 1003 */
1004 1004 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
1005 1005 if (NULL == destBuffer->segments) {
1006 1006 state->error = WorkerError_no_memory;
1007 1007 return;
1008 1008 }
1009 1009
1010 1010 destBuffer->segmentsSize = remainingItems;
1011 1011
1012 1012 assert(state->totalSourceSize <= SIZE_MAX);
1013 1013 allocationSize = roundpow2((size_t)state->totalSourceSize >> 4);
1014 1014
1015 1015 /* If the maximum size of the output is larger than that, round up. */
1016 1016 boundSize = ZSTD_compressBound(sources[inputOffset].sourceSize);
1017 1017
1018 1018 if (boundSize > allocationSize) {
1019 1019 allocationSize = roundpow2(boundSize);
1020 1020 }
1021 1021
1022 1022 destBuffer->dest = malloc(allocationSize);
1023 1023 if (NULL == destBuffer->dest) {
1024 1024 state->error = WorkerError_no_memory;
1025 1025 return;
1026 1026 }
1027 1027
1028 1028 destBuffer->destSize = allocationSize;
1029 1029
1030 1030 for (inputOffset = state->startOffset; inputOffset <= state->endOffset; inputOffset++) {
1031 1031 void* source = sources[inputOffset].sourceData;
1032 1032 size_t sourceSize = sources[inputOffset].sourceSize;
1033 1033 size_t destAvailable;
1034 1034 void* dest;
1035 1035 ZSTD_outBuffer opOutBuffer;
1036 1036 ZSTD_inBuffer opInBuffer;
1037 1037
1038 1038 destAvailable = destBuffer->destSize - destOffset;
1039 1039 boundSize = ZSTD_compressBound(sourceSize);
1040 1040
1041 1041 /*
1042 1042 * Not enough space in current buffer to hold largest compressed output.
1043 1043 * So allocate and switch to a new output buffer.
1044 1044 */
1045 1045 if (boundSize > destAvailable) {
1046 1046 /*
1047 1047 * The downsizing of the existing buffer is optional. It should be cheap
1048 1048 * (unlike growing). So we just do it.
1049 1049 */
1050 1050 if (destAvailable) {
1051 1051 newDest = realloc(destBuffer->dest, destOffset);
1052 1052 if (NULL == newDest) {
1053 1053 state->error = WorkerError_no_memory;
1054 1054 return;
1055 1055 }
1056 1056
1057 1057 destBuffer->dest = newDest;
1058 1058 destBuffer->destSize = destOffset;
1059 1059 }
1060 1060
1061 1061 /* Truncate segments buffer. */
1062 1062 newDest = realloc(destBuffer->segments,
1063 1063 (inputOffset - currentBufferStartOffset + 1) * sizeof(BufferSegment));
1064 1064 if (NULL == newDest) {
1065 1065 state->error = WorkerError_no_memory;
1066 1066 return;
1067 1067 }
1068 1068
1069 1069 destBuffer->segments = newDest;
1070 1070 destBuffer->segmentsSize = inputOffset - currentBufferStartOffset;
1071 1071
1072 1072 /* Grow space for new struct. */
1073 1073 /* TODO consider over-allocating so we don't do this every time. */
1074 1074 newDest = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer));
1075 1075 if (NULL == newDest) {
1076 1076 state->error = WorkerError_no_memory;
1077 1077 return;
1078 1078 }
1079 1079
1080 1080 state->destBuffers = newDest;
1081 1081 state->destCount++;
1082 1082
1083 1083 destBuffer = &state->destBuffers[state->destCount - 1];
1084 1084
1085 1085 /* Don't take any chances with non-NULL pointers. */
1086 1086 memset(destBuffer, 0, sizeof(DestBuffer));
1087 1087
1088 1088 /**
1089 1089 * We could dynamically update allocation size based on work done so far.
1090 1090 * For now, keep is simple.
1091 1091 */
1092 1092 assert(state->totalSourceSize <= SIZE_MAX);
1093 1093 allocationSize = roundpow2((size_t)state->totalSourceSize >> 4);
1094 1094
1095 1095 if (boundSize > allocationSize) {
1096 1096 allocationSize = roundpow2(boundSize);
1097 1097 }
1098 1098
1099 1099 destBuffer->dest = malloc(allocationSize);
1100 1100 if (NULL == destBuffer->dest) {
1101 1101 state->error = WorkerError_no_memory;
1102 1102 return;
1103 1103 }
1104 1104
1105 1105 destBuffer->destSize = allocationSize;
1106 1106 destAvailable = allocationSize;
1107 1107 destOffset = 0;
1108 1108
1109 1109 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
1110 1110 if (NULL == destBuffer->segments) {
1111 1111 state->error = WorkerError_no_memory;
1112 1112 return;
1113 1113 }
1114 1114
1115 1115 destBuffer->segmentsSize = remainingItems;
1116 1116 currentBufferStartOffset = inputOffset;
1117 1117 }
1118 1118
1119 1119 dest = (char*)destBuffer->dest + destOffset;
1120 1120
1121 1121 opInBuffer.src = source;
1122 1122 opInBuffer.size = sourceSize;
1123 1123 opInBuffer.pos = 0;
1124 1124
1125 1125 opOutBuffer.dst = dest;
1126 1126 opOutBuffer.size = destAvailable;
1127 1127 opOutBuffer.pos = 0;
1128 1128
1129 1129 zresult = ZSTD_CCtx_setPledgedSrcSize(state->cctx, sourceSize);
1130 1130 if (ZSTD_isError(zresult)) {
1131 1131 state->error = WorkerError_zstd;
1132 1132 state->zresult = zresult;
1133 1133 state->errorOffset = inputOffset;
1134 1134 break;
1135 1135 }
1136 1136
1137 1137 zresult = ZSTD_compressStream2(state->cctx, &opOutBuffer, &opInBuffer, ZSTD_e_end);
1138 1138 if (ZSTD_isError(zresult)) {
1139 1139 state->error = WorkerError_zstd;
1140 1140 state->zresult = zresult;
1141 1141 state->errorOffset = inputOffset;
1142 1142 break;
1143 1143 }
1144 1144 else if (zresult) {
1145 1145 state->error = WorkerError_nospace;
1146 1146 state->errorOffset = inputOffset;
1147 1147 break;
1148 1148 }
1149 1149
1150 1150 destBuffer->segments[inputOffset - currentBufferStartOffset].offset = destOffset;
1151 1151 destBuffer->segments[inputOffset - currentBufferStartOffset].length = opOutBuffer.pos;
1152 1152
1153 1153 destOffset += opOutBuffer.pos;
1154 1154 remainingItems--;
1155 1155 }
1156 1156
1157 1157 if (destBuffer->destSize > destOffset) {
1158 1158 newDest = realloc(destBuffer->dest, destOffset);
1159 1159 if (NULL == newDest) {
1160 1160 state->error = WorkerError_no_memory;
1161 1161 return;
1162 1162 }
1163 1163
1164 1164 destBuffer->dest = newDest;
1165 1165 destBuffer->destSize = destOffset;
1166 1166 }
1167 1167 }
1168 1168
1169 1169 ZstdBufferWithSegmentsCollection* compress_from_datasources(ZstdCompressor* compressor,
1170 1170 DataSources* sources, Py_ssize_t threadCount) {
1171 1171 unsigned long long bytesPerWorker;
1172 1172 POOL_ctx* pool = NULL;
1173 1173 WorkerState* workerStates = NULL;
1174 1174 Py_ssize_t i;
1175 1175 unsigned long long workerBytes = 0;
1176 1176 Py_ssize_t workerStartOffset = 0;
1177 1177 Py_ssize_t currentThread = 0;
1178 1178 int errored = 0;
1179 1179 Py_ssize_t segmentsCount = 0;
1180 1180 Py_ssize_t segmentIndex;
1181 1181 PyObject* segmentsArg = NULL;
1182 1182 ZstdBufferWithSegments* buffer;
1183 1183 ZstdBufferWithSegmentsCollection* result = NULL;
1184 1184
1185 1185 assert(sources->sourcesSize > 0);
1186 1186 assert(sources->totalSourceSize > 0);
1187 1187 assert(threadCount >= 1);
1188 1188
1189 1189 /* More threads than inputs makes no sense. */
1190 1190 threadCount = sources->sourcesSize < threadCount ? sources->sourcesSize
1191 1191 : threadCount;
1192 1192
1193 1193 /* TODO lower thread count when input size is too small and threads would add
1194 1194 overhead. */
1195 1195
1196 1196 workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
1197 1197 if (NULL == workerStates) {
1198 1198 PyErr_NoMemory();
1199 1199 goto finally;
1200 1200 }
1201 1201
1202 1202 memset(workerStates, 0, threadCount * sizeof(WorkerState));
1203 1203
1204 1204 if (threadCount > 1) {
1205 1205 pool = POOL_create(threadCount, 1);
1206 1206 if (NULL == pool) {
1207 1207 PyErr_SetString(ZstdError, "could not initialize zstd thread pool");
1208 1208 goto finally;
1209 1209 }
1210 1210 }
1211 1211
1212 1212 bytesPerWorker = sources->totalSourceSize / threadCount;
1213 1213
1214 1214 for (i = 0; i < threadCount; i++) {
1215 1215 size_t zresult;
1216 1216
1217 1217 workerStates[i].cctx = ZSTD_createCCtx();
1218 1218 if (!workerStates[i].cctx) {
1219 1219 PyErr_NoMemory();
1220 1220 goto finally;
1221 1221 }
1222 1222
1223 1223 zresult = ZSTD_CCtx_setParametersUsingCCtxParams(workerStates[i].cctx,
1224 1224 compressor->params);
1225 1225 if (ZSTD_isError(zresult)) {
1226 1226 PyErr_Format(ZstdError, "could not set compression parameters: %s",
1227 1227 ZSTD_getErrorName(zresult));
1228 1228 goto finally;
1229 1229 }
1230 1230
1231 1231 if (compressor->dict) {
1232 1232 if (compressor->dict->cdict) {
1233 1233 zresult = ZSTD_CCtx_refCDict(workerStates[i].cctx, compressor->dict->cdict);
1234 1234 }
1235 1235 else {
1236 1236 zresult = ZSTD_CCtx_loadDictionary_advanced(
1237 1237 workerStates[i].cctx,
1238 1238 compressor->dict->dictData,
1239 1239 compressor->dict->dictSize,
1240 1240 ZSTD_dlm_byRef,
1241 1241 compressor->dict->dictType);
1242 1242 }
1243 1243
1244 1244 if (ZSTD_isError(zresult)) {
1245 1245 PyErr_Format(ZstdError, "could not load compression dictionary: %s",
1246 1246 ZSTD_getErrorName(zresult));
1247 1247 goto finally;
1248 1248 }
1249 1249
1250 1250 }
1251 1251
1252 1252 workerStates[i].sources = sources->sources;
1253 1253 workerStates[i].sourcesSize = sources->sourcesSize;
1254 1254 }
1255 1255
1256 1256 Py_BEGIN_ALLOW_THREADS
1257 1257 for (i = 0; i < sources->sourcesSize; i++) {
1258 1258 workerBytes += sources->sources[i].sourceSize;
1259 1259
1260 1260 /*
1261 1261 * The last worker/thread needs to handle all remaining work. Don't
1262 1262 * trigger it prematurely. Defer to the block outside of the loop
1263 1263 * to run the last worker/thread. But do still process this loop
1264 1264 * so workerBytes is correct.
1265 1265 */
1266 1266 if (currentThread == threadCount - 1) {
1267 1267 continue;
1268 1268 }
1269 1269
1270 1270 if (workerBytes >= bytesPerWorker) {
1271 1271 assert(currentThread < threadCount);
1272 1272 workerStates[currentThread].totalSourceSize = workerBytes;
1273 1273 workerStates[currentThread].startOffset = workerStartOffset;
1274 1274 workerStates[currentThread].endOffset = i;
1275 1275
1276 1276 if (threadCount > 1) {
1277 1277 POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]);
1278 1278 }
1279 1279 else {
1280 1280 compress_worker(&workerStates[currentThread]);
1281 1281 }
1282 1282
1283 1283 currentThread++;
1284 1284 workerStartOffset = i + 1;
1285 1285 workerBytes = 0;
1286 1286 }
1287 1287 }
1288 1288
1289 1289 if (workerBytes) {
1290 1290 assert(currentThread < threadCount);
1291 1291 workerStates[currentThread].totalSourceSize = workerBytes;
1292 1292 workerStates[currentThread].startOffset = workerStartOffset;
1293 1293 workerStates[currentThread].endOffset = sources->sourcesSize - 1;
1294 1294
1295 1295 if (threadCount > 1) {
1296 1296 POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]);
1297 1297 }
1298 1298 else {
1299 1299 compress_worker(&workerStates[currentThread]);
1300 1300 }
1301 1301 }
1302 1302
1303 1303 if (threadCount > 1) {
1304 1304 POOL_free(pool);
1305 1305 pool = NULL;
1306 1306 }
1307 1307
1308 1308 Py_END_ALLOW_THREADS
1309 1309
1310 1310 for (i = 0; i < threadCount; i++) {
1311 1311 switch (workerStates[i].error) {
1312 1312 case WorkerError_no_memory:
1313 1313 PyErr_NoMemory();
1314 1314 errored = 1;
1315 1315 break;
1316 1316
1317 1317 case WorkerError_zstd:
1318 1318 PyErr_Format(ZstdError, "error compressing item %zd: %s",
1319 1319 workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
1320 1320 errored = 1;
1321 1321 break;
1322 1322
1323 1323 case WorkerError_nospace:
1324 1324 PyErr_Format(ZstdError, "error compressing item %zd: not enough space in output",
1325 1325 workerStates[i].errorOffset);
1326 1326 errored = 1;
1327 1327 break;
1328 1328
1329 1329 default:
1330 1330 ;
1331 1331 }
1332 1332
1333 1333 if (errored) {
1334 1334 break;
1335 1335 }
1336 1336
1337 1337 }
1338 1338
1339 1339 if (errored) {
1340 1340 goto finally;
1341 1341 }
1342 1342
1343 1343 segmentsCount = 0;
1344 1344 for (i = 0; i < threadCount; i++) {
1345 1345 WorkerState* state = &workerStates[i];
1346 1346 segmentsCount += state->destCount;
1347 1347 }
1348 1348
1349 1349 segmentsArg = PyTuple_New(segmentsCount);
1350 1350 if (NULL == segmentsArg) {
1351 1351 goto finally;
1352 1352 }
1353 1353
1354 1354 segmentIndex = 0;
1355 1355
1356 1356 for (i = 0; i < threadCount; i++) {
1357 1357 Py_ssize_t j;
1358 1358 WorkerState* state = &workerStates[i];
1359 1359
1360 1360 for (j = 0; j < state->destCount; j++) {
1361 1361 DestBuffer* destBuffer = &state->destBuffers[j];
1362 1362 buffer = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize,
1363 1363 destBuffer->segments, destBuffer->segmentsSize);
1364 1364
1365 1365 if (NULL == buffer) {
1366 1366 goto finally;
1367 1367 }
1368 1368
1369 1369 /* Tell instance to use free() instsead of PyMem_Free(). */
1370 1370 buffer->useFree = 1;
1371 1371
1372 1372 /*
1373 1373 * BufferWithSegments_FromMemory takes ownership of the backing memory.
1374 1374 * Unset it here so it doesn't get freed below.
1375 1375 */
1376 1376 destBuffer->dest = NULL;
1377 1377 destBuffer->segments = NULL;
1378 1378
1379 1379 PyTuple_SET_ITEM(segmentsArg, segmentIndex++, (PyObject*)buffer);
1380 1380 }
1381 1381 }
1382 1382
1383 1383 result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject(
1384 1384 (PyObject*)&ZstdBufferWithSegmentsCollectionType, segmentsArg);
1385 1385
1386 1386 finally:
1387 1387 Py_CLEAR(segmentsArg);
1388 1388
1389 1389 if (pool) {
1390 1390 POOL_free(pool);
1391 1391 }
1392 1392
1393 1393 if (workerStates) {
1394 1394 Py_ssize_t j;
1395 1395
1396 1396 for (i = 0; i < threadCount; i++) {
1397 1397 WorkerState state = workerStates[i];
1398 1398
1399 1399 if (state.cctx) {
1400 1400 ZSTD_freeCCtx(state.cctx);
1401 1401 }
1402 1402
1403 1403 /* malloc() is used in worker thread. */
1404 1404
1405 1405 for (j = 0; j < state.destCount; j++) {
1406 1406 if (state.destBuffers) {
1407 1407 free(state.destBuffers[j].dest);
1408 1408 free(state.destBuffers[j].segments);
1409 1409 }
1410 1410 }
1411 1411
1412 1412
1413 1413 free(state.destBuffers);
1414 1414 }
1415 1415
1416 1416 PyMem_Free(workerStates);
1417 1417 }
1418 1418
1419 1419 return result;
1420 1420 }
1421 1421
1422 1422 PyDoc_STRVAR(ZstdCompressor_multi_compress_to_buffer__doc__,
1423 1423 "Compress multiple pieces of data as a single operation\n"
1424 1424 "\n"
1425 1425 "Receives a ``BufferWithSegmentsCollection``, a ``BufferWithSegments``, or\n"
1426 1426 "a list of bytes like objects holding data to compress.\n"
1427 1427 "\n"
1428 1428 "Returns a ``BufferWithSegmentsCollection`` holding compressed data.\n"
1429 1429 "\n"
1430 1430 "This function is optimized to perform multiple compression operations as\n"
1431 1431 "as possible with as little overhead as possbile.\n"
1432 1432 );
1433 1433
1434 1434 static ZstdBufferWithSegmentsCollection* ZstdCompressor_multi_compress_to_buffer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
1435 1435 static char* kwlist[] = {
1436 1436 "data",
1437 1437 "threads",
1438 1438 NULL
1439 1439 };
1440 1440
1441 1441 PyObject* data;
1442 1442 int threads = 0;
1443 1443 Py_buffer* dataBuffers = NULL;
1444 1444 DataSources sources;
1445 1445 Py_ssize_t i;
1446 1446 Py_ssize_t sourceCount = 0;
1447 1447 ZstdBufferWithSegmentsCollection* result = NULL;
1448 1448
1449 1449 memset(&sources, 0, sizeof(sources));
1450 1450
1451 1451 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:multi_compress_to_buffer", kwlist,
1452 1452 &data, &threads)) {
1453 1453 return NULL;
1454 1454 }
1455 1455
1456 1456 if (threads < 0) {
1457 1457 threads = cpu_count();
1458 1458 }
1459 1459
1460 1460 if (threads < 2) {
1461 1461 threads = 1;
1462 1462 }
1463 1463
1464 1464 if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsType)) {
1465 1465 ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)data;
1466 1466
1467 1467 sources.sources = PyMem_Malloc(buffer->segmentCount * sizeof(DataSource));
1468 1468 if (NULL == sources.sources) {
1469 1469 PyErr_NoMemory();
1470 1470 goto finally;
1471 1471 }
1472 1472
1473 1473 for (i = 0; i < buffer->segmentCount; i++) {
1474 1474 if (buffer->segments[i].length > SIZE_MAX) {
1475 1475 PyErr_Format(PyExc_ValueError,
1476 1476 "buffer segment %zd is too large for this platform", i);
1477 1477 goto finally;
1478 1478 }
1479 1479
1480 1480 sources.sources[i].sourceData = (char*)buffer->data + buffer->segments[i].offset;
1481 1481 sources.sources[i].sourceSize = (size_t)buffer->segments[i].length;
1482 1482 sources.totalSourceSize += buffer->segments[i].length;
1483 1483 }
1484 1484
1485 1485 sources.sourcesSize = buffer->segmentCount;
1486 1486 }
1487 1487 else if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsCollectionType)) {
1488 1488 Py_ssize_t j;
1489 1489 Py_ssize_t offset = 0;
1490 1490 ZstdBufferWithSegments* buffer;
1491 1491 ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)data;
1492 1492
1493 1493 sourceCount = BufferWithSegmentsCollection_length(collection);
1494 1494
1495 1495 sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource));
1496 1496 if (NULL == sources.sources) {
1497 1497 PyErr_NoMemory();
1498 1498 goto finally;
1499 1499 }
1500 1500
1501 1501 for (i = 0; i < collection->bufferCount; i++) {
1502 1502 buffer = collection->buffers[i];
1503 1503
1504 1504 for (j = 0; j < buffer->segmentCount; j++) {
1505 1505 if (buffer->segments[j].length > SIZE_MAX) {
1506 1506 PyErr_Format(PyExc_ValueError,
1507 1507 "buffer segment %zd in buffer %zd is too large for this platform",
1508 1508 j, i);
1509 1509 goto finally;
1510 1510 }
1511 1511
1512 1512 sources.sources[offset].sourceData = (char*)buffer->data + buffer->segments[j].offset;
1513 1513 sources.sources[offset].sourceSize = (size_t)buffer->segments[j].length;
1514 1514 sources.totalSourceSize += buffer->segments[j].length;
1515 1515
1516 1516 offset++;
1517 1517 }
1518 1518 }
1519 1519
1520 1520 sources.sourcesSize = sourceCount;
1521 1521 }
1522 1522 else if (PyList_Check(data)) {
1523 1523 sourceCount = PyList_GET_SIZE(data);
1524 1524
1525 1525 sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource));
1526 1526 if (NULL == sources.sources) {
1527 1527 PyErr_NoMemory();
1528 1528 goto finally;
1529 1529 }
1530 1530
1531 1531 dataBuffers = PyMem_Malloc(sourceCount * sizeof(Py_buffer));
1532 1532 if (NULL == dataBuffers) {
1533 1533 PyErr_NoMemory();
1534 1534 goto finally;
1535 1535 }
1536 1536
1537 1537 memset(dataBuffers, 0, sourceCount * sizeof(Py_buffer));
1538 1538
1539 1539 for (i = 0; i < sourceCount; i++) {
1540 1540 if (0 != PyObject_GetBuffer(PyList_GET_ITEM(data, i),
1541 1541 &dataBuffers[i], PyBUF_CONTIG_RO)) {
1542 1542 PyErr_Clear();
1543 1543 PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i);
1544 1544 goto finally;
1545 1545 }
1546 1546
1547 1547 sources.sources[i].sourceData = dataBuffers[i].buf;
1548 1548 sources.sources[i].sourceSize = dataBuffers[i].len;
1549 1549 sources.totalSourceSize += dataBuffers[i].len;
1550 1550 }
1551 1551
1552 1552 sources.sourcesSize = sourceCount;
1553 1553 }
1554 1554 else {
1555 1555 PyErr_SetString(PyExc_TypeError, "argument must be list of BufferWithSegments");
1556 1556 goto finally;
1557 1557 }
1558 1558
1559 1559 if (0 == sources.sourcesSize) {
1560 1560 PyErr_SetString(PyExc_ValueError, "no source elements found");
1561 1561 goto finally;
1562 1562 }
1563 1563
1564 1564 if (0 == sources.totalSourceSize) {
1565 1565 PyErr_SetString(PyExc_ValueError, "source elements are empty");
1566 1566 goto finally;
1567 1567 }
1568 1568
1569 1569 if (sources.totalSourceSize > SIZE_MAX) {
1570 1570 PyErr_SetString(PyExc_ValueError, "sources are too large for this platform");
1571 1571 goto finally;
1572 1572 }
1573 1573
1574 1574 result = compress_from_datasources(self, &sources, threads);
1575 1575
1576 1576 finally:
1577 1577 PyMem_Free(sources.sources);
1578 1578
1579 1579 if (dataBuffers) {
1580 1580 for (i = 0; i < sourceCount; i++) {
1581 1581 PyBuffer_Release(&dataBuffers[i]);
1582 1582 }
1583 1583
1584 1584 PyMem_Free(dataBuffers);
1585 1585 }
1586 1586
1587 1587 return result;
1588 1588 }
1589 1589
1590 1590 static PyMethodDef ZstdCompressor_methods[] = {
1591 1591 { "chunker", (PyCFunction)ZstdCompressor_chunker,
1592 1592 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_chunker__doc__ },
1593 1593 { "compress", (PyCFunction)ZstdCompressor_compress,
1594 1594 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ },
1595 1595 { "compressobj", (PyCFunction)ZstdCompressor_compressobj,
1596 1596 METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ },
1597 1597 { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream,
1598 1598 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ },
1599 1599 { "stream_reader", (PyCFunction)ZstdCompressor_stream_reader,
1600 1600 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_reader__doc__ },
1601 1601 { "stream_writer", (PyCFunction)ZstdCompressor_stream_writer,
1602 1602 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ },
1603 1603 { "read_to_iter", (PyCFunction)ZstdCompressor_read_to_iter,
1604 1604 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ },
1605 1605 /* TODO Remove deprecated API */
1606 1606 { "read_from", (PyCFunction)ZstdCompressor_read_to_iter,
1607 1607 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ },
1608 1608 /* TODO remove deprecated API */
1609 1609 { "write_to", (PyCFunction)ZstdCompressor_stream_writer,
1610 1610 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ },
1611 1611 { "multi_compress_to_buffer", (PyCFunction)ZstdCompressor_multi_compress_to_buffer,
1612 1612 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_multi_compress_to_buffer__doc__ },
1613 1613 { "memory_size", (PyCFunction)ZstdCompressor_memory_size,
1614 1614 METH_NOARGS, ZstdCompressor_memory_size__doc__ },
1615 1615 { "frame_progression", (PyCFunction)ZstdCompressor_frame_progression,
1616 1616 METH_NOARGS, ZstdCompressor_frame_progression__doc__ },
1617 1617 { NULL, NULL }
1618 1618 };
1619 1619
1620 1620 PyTypeObject ZstdCompressorType = {
1621 1621 PyVarObject_HEAD_INIT(NULL, 0)
1622 1622 "zstd.ZstdCompressor", /* tp_name */
1623 1623 sizeof(ZstdCompressor), /* tp_basicsize */
1624 1624 0, /* tp_itemsize */
1625 1625 (destructor)ZstdCompressor_dealloc, /* tp_dealloc */
1626 1626 0, /* tp_print */
1627 1627 0, /* tp_getattr */
1628 1628 0, /* tp_setattr */
1629 1629 0, /* tp_compare */
1630 1630 0, /* tp_repr */
1631 1631 0, /* tp_as_number */
1632 1632 0, /* tp_as_sequence */
1633 1633 0, /* tp_as_mapping */
1634 1634 0, /* tp_hash */
1635 1635 0, /* tp_call */
1636 1636 0, /* tp_str */
1637 1637 0, /* tp_getattro */
1638 1638 0, /* tp_setattro */
1639 1639 0, /* tp_as_buffer */
1640 1640 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
1641 1641 ZstdCompressor__doc__, /* tp_doc */
1642 1642 0, /* tp_traverse */
1643 1643 0, /* tp_clear */
1644 1644 0, /* tp_richcompare */
1645 1645 0, /* tp_weaklistoffset */
1646 1646 0, /* tp_iter */
1647 1647 0, /* tp_iternext */
1648 1648 ZstdCompressor_methods, /* tp_methods */
1649 1649 0, /* tp_members */
1650 1650 0, /* tp_getset */
1651 1651 0, /* tp_base */
1652 1652 0, /* tp_dict */
1653 1653 0, /* tp_descr_get */
1654 1654 0, /* tp_descr_set */
1655 1655 0, /* tp_dictoffset */
1656 1656 (initproc)ZstdCompressor_init, /* tp_init */
1657 1657 0, /* tp_alloc */
1658 1658 PyType_GenericNew, /* tp_new */
1659 1659 };
1660 1660
1661 1661 void compressor_module_init(PyObject* mod) {
1662 Py_TYPE(&ZstdCompressorType) = &PyType_Type;
1662 Py_SET_TYPE(&ZstdCompressorType, &PyType_Type);
1663 1663 if (PyType_Ready(&ZstdCompressorType) < 0) {
1664 1664 return;
1665 1665 }
1666 1666
1667 1667 Py_INCREF((PyObject*)&ZstdCompressorType);
1668 1668 PyModule_AddObject(mod, "ZstdCompressor",
1669 1669 (PyObject*)&ZstdCompressorType);
1670 1670 }
@@ -1,235 +1,235 b''
1 1 /**
2 2 * Copyright (c) 2016-present, Gregory Szorc
3 3 * All rights reserved.
4 4 *
5 5 * This software may be modified and distributed under the terms
6 6 * of the BSD license. See the LICENSE file for details.
7 7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 11 #define min(a, b) (((a) < (b)) ? (a) : (b))
12 12
13 13 extern PyObject* ZstdError;
14 14
15 15 PyDoc_STRVAR(ZstdCompressorIterator__doc__,
16 16 "Represents an iterator of compressed data.\n"
17 17 );
18 18
19 19 static void ZstdCompressorIterator_dealloc(ZstdCompressorIterator* self) {
20 20 Py_XDECREF(self->readResult);
21 21 Py_XDECREF(self->compressor);
22 22 Py_XDECREF(self->reader);
23 23
24 24 if (self->buffer.buf) {
25 25 PyBuffer_Release(&self->buffer);
26 26 memset(&self->buffer, 0, sizeof(self->buffer));
27 27 }
28 28
29 29 if (self->output.dst) {
30 30 PyMem_Free(self->output.dst);
31 31 self->output.dst = NULL;
32 32 }
33 33
34 34 PyObject_Del(self);
35 35 }
36 36
37 37 static PyObject* ZstdCompressorIterator_iter(PyObject* self) {
38 38 Py_INCREF(self);
39 39 return self;
40 40 }
41 41
42 42 static PyObject* ZstdCompressorIterator_iternext(ZstdCompressorIterator* self) {
43 43 size_t zresult;
44 44 PyObject* readResult = NULL;
45 45 PyObject* chunk;
46 46 char* readBuffer;
47 47 Py_ssize_t readSize = 0;
48 48 Py_ssize_t bufferRemaining;
49 49
50 50 if (self->finishedOutput) {
51 51 PyErr_SetString(PyExc_StopIteration, "output flushed");
52 52 return NULL;
53 53 }
54 54
55 55 feedcompressor:
56 56
57 57 /* If we have data left in the input, consume it. */
58 58 if (self->input.pos < self->input.size) {
59 59 Py_BEGIN_ALLOW_THREADS
60 60 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
61 61 &self->input, ZSTD_e_continue);
62 62 Py_END_ALLOW_THREADS
63 63
64 64 /* Release the Python object holding the input buffer. */
65 65 if (self->input.pos == self->input.size) {
66 66 self->input.src = NULL;
67 67 self->input.pos = 0;
68 68 self->input.size = 0;
69 69 Py_DECREF(self->readResult);
70 70 self->readResult = NULL;
71 71 }
72 72
73 73 if (ZSTD_isError(zresult)) {
74 74 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
75 75 return NULL;
76 76 }
77 77
78 78 /* If it produced output data, emit it. */
79 79 if (self->output.pos) {
80 80 chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
81 81 self->output.pos = 0;
82 82 return chunk;
83 83 }
84 84 }
85 85
86 86 /* We should never have output data sitting around after a previous call. */
87 87 assert(self->output.pos == 0);
88 88
89 89 /* The code above should have either emitted a chunk and returned or consumed
90 90 the entire input buffer. So the state of the input buffer is not
91 91 relevant. */
92 92 if (!self->finishedInput) {
93 93 if (self->reader) {
94 94 readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize);
95 95 if (!readResult) {
96 96 PyErr_SetString(ZstdError, "could not read() from source");
97 97 return NULL;
98 98 }
99 99
100 100 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
101 101 }
102 102 else {
103 103 assert(self->buffer.buf);
104 104
105 105 /* Only support contiguous C arrays. */
106 106 assert(self->buffer.strides == NULL && self->buffer.suboffsets == NULL);
107 107 assert(self->buffer.itemsize == 1);
108 108
109 109 readBuffer = (char*)self->buffer.buf + self->bufferOffset;
110 110 bufferRemaining = self->buffer.len - self->bufferOffset;
111 111 readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
112 112 self->bufferOffset += readSize;
113 113 }
114 114
115 115 if (0 == readSize) {
116 116 Py_XDECREF(readResult);
117 117 self->finishedInput = 1;
118 118 }
119 119 else {
120 120 self->readResult = readResult;
121 121 }
122 122 }
123 123
124 124 /* EOF */
125 125 if (0 == readSize) {
126 126 self->input.src = NULL;
127 127 self->input.size = 0;
128 128 self->input.pos = 0;
129 129
130 130 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
131 131 &self->input, ZSTD_e_end);
132 132 if (ZSTD_isError(zresult)) {
133 133 PyErr_Format(ZstdError, "error ending compression stream: %s",
134 134 ZSTD_getErrorName(zresult));
135 135 return NULL;
136 136 }
137 137
138 138 assert(self->output.pos);
139 139
140 140 if (0 == zresult) {
141 141 self->finishedOutput = 1;
142 142 }
143 143
144 144 chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
145 145 self->output.pos = 0;
146 146 return chunk;
147 147 }
148 148
149 149 /* New data from reader. Feed into compressor. */
150 150 self->input.src = readBuffer;
151 151 self->input.size = readSize;
152 152 self->input.pos = 0;
153 153
154 154 Py_BEGIN_ALLOW_THREADS
155 155 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
156 156 &self->input, ZSTD_e_continue);
157 157 Py_END_ALLOW_THREADS
158 158
159 159 /* The input buffer currently points to memory managed by Python
160 160 (readBuffer). This object was allocated by this function. If it wasn't
161 161 fully consumed, we need to release it in a subsequent function call.
162 162 If it is fully consumed, do that now.
163 163 */
164 164 if (self->input.pos == self->input.size) {
165 165 self->input.src = NULL;
166 166 self->input.pos = 0;
167 167 self->input.size = 0;
168 168 Py_XDECREF(self->readResult);
169 169 self->readResult = NULL;
170 170 }
171 171
172 172 if (ZSTD_isError(zresult)) {
173 173 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
174 174 return NULL;
175 175 }
176 176
177 177 assert(self->input.pos <= self->input.size);
178 178
179 179 /* If we didn't write anything, start the process over. */
180 180 if (0 == self->output.pos) {
181 181 goto feedcompressor;
182 182 }
183 183
184 184 chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
185 185 self->output.pos = 0;
186 186 return chunk;
187 187 }
188 188
189 189 PyTypeObject ZstdCompressorIteratorType = {
190 190 PyVarObject_HEAD_INIT(NULL, 0)
191 191 "zstd.ZstdCompressorIterator", /* tp_name */
192 192 sizeof(ZstdCompressorIterator), /* tp_basicsize */
193 193 0, /* tp_itemsize */
194 194 (destructor)ZstdCompressorIterator_dealloc, /* tp_dealloc */
195 195 0, /* tp_print */
196 196 0, /* tp_getattr */
197 197 0, /* tp_setattr */
198 198 0, /* tp_compare */
199 199 0, /* tp_repr */
200 200 0, /* tp_as_number */
201 201 0, /* tp_as_sequence */
202 202 0, /* tp_as_mapping */
203 203 0, /* tp_hash */
204 204 0, /* tp_call */
205 205 0, /* tp_str */
206 206 0, /* tp_getattro */
207 207 0, /* tp_setattro */
208 208 0, /* tp_as_buffer */
209 209 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
210 210 ZstdCompressorIterator__doc__, /* tp_doc */
211 211 0, /* tp_traverse */
212 212 0, /* tp_clear */
213 213 0, /* tp_richcompare */
214 214 0, /* tp_weaklistoffset */
215 215 ZstdCompressorIterator_iter, /* tp_iter */
216 216 (iternextfunc)ZstdCompressorIterator_iternext, /* tp_iternext */
217 217 0, /* tp_methods */
218 218 0, /* tp_members */
219 219 0, /* tp_getset */
220 220 0, /* tp_base */
221 221 0, /* tp_dict */
222 222 0, /* tp_descr_get */
223 223 0, /* tp_descr_set */
224 224 0, /* tp_dictoffset */
225 225 0, /* tp_init */
226 226 0, /* tp_alloc */
227 227 PyType_GenericNew, /* tp_new */
228 228 };
229 229
230 230 void compressoriterator_module_init(PyObject* mod) {
231 Py_TYPE(&ZstdCompressorIteratorType) = &PyType_Type;
231 Py_SET_TYPE(&ZstdCompressorIteratorType, &PyType_Type);
232 232 if (PyType_Ready(&ZstdCompressorIteratorType) < 0) {
233 233 return;
234 234 }
235 235 }
@@ -1,781 +1,781 b''
1 1 /**
2 2 * Copyright (c) 2017-present, Gregory Szorc
3 3 * All rights reserved.
4 4 *
5 5 * This software may be modified and distributed under the terms
6 6 * of the BSD license. See the LICENSE file for details.
7 7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 11 extern PyObject* ZstdError;
12 12
13 13 static void set_unsupported_operation(void) {
14 14 PyObject* iomod;
15 15 PyObject* exc;
16 16
17 17 iomod = PyImport_ImportModule("io");
18 18 if (NULL == iomod) {
19 19 return;
20 20 }
21 21
22 22 exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
23 23 if (NULL == exc) {
24 24 Py_DECREF(iomod);
25 25 return;
26 26 }
27 27
28 28 PyErr_SetNone(exc);
29 29 Py_DECREF(exc);
30 30 Py_DECREF(iomod);
31 31 }
32 32
33 33 static void reader_dealloc(ZstdDecompressionReader* self) {
34 34 Py_XDECREF(self->decompressor);
35 35 Py_XDECREF(self->reader);
36 36
37 37 if (self->buffer.buf) {
38 38 PyBuffer_Release(&self->buffer);
39 39 }
40 40
41 41 PyObject_Del(self);
42 42 }
43 43
44 44 static ZstdDecompressionReader* reader_enter(ZstdDecompressionReader* self) {
45 45 if (self->entered) {
46 46 PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
47 47 return NULL;
48 48 }
49 49
50 50 self->entered = 1;
51 51
52 52 Py_INCREF(self);
53 53 return self;
54 54 }
55 55
56 56 static PyObject* reader_exit(ZstdDecompressionReader* self, PyObject* args) {
57 57 PyObject* exc_type;
58 58 PyObject* exc_value;
59 59 PyObject* exc_tb;
60 60
61 61 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
62 62 return NULL;
63 63 }
64 64
65 65 self->entered = 0;
66 66 self->closed = 1;
67 67
68 68 /* Release resources. */
69 69 Py_CLEAR(self->reader);
70 70 if (self->buffer.buf) {
71 71 PyBuffer_Release(&self->buffer);
72 72 memset(&self->buffer, 0, sizeof(self->buffer));
73 73 }
74 74
75 75 Py_CLEAR(self->decompressor);
76 76
77 77 Py_RETURN_FALSE;
78 78 }
79 79
80 80 static PyObject* reader_readable(PyObject* self) {
81 81 Py_RETURN_TRUE;
82 82 }
83 83
84 84 static PyObject* reader_writable(PyObject* self) {
85 85 Py_RETURN_FALSE;
86 86 }
87 87
88 88 static PyObject* reader_seekable(PyObject* self) {
89 89 Py_RETURN_TRUE;
90 90 }
91 91
92 92 static PyObject* reader_close(ZstdDecompressionReader* self) {
93 93 self->closed = 1;
94 94 Py_RETURN_NONE;
95 95 }
96 96
97 97 static PyObject* reader_flush(PyObject* self) {
98 98 Py_RETURN_NONE;
99 99 }
100 100
101 101 static PyObject* reader_isatty(PyObject* self) {
102 102 Py_RETURN_FALSE;
103 103 }
104 104
105 105 /**
106 106 * Read available input.
107 107 *
108 108 * Returns 0 if no data was added to input.
109 109 * Returns 1 if new input data is available.
110 110 * Returns -1 on error and sets a Python exception as a side-effect.
111 111 */
112 112 int read_decompressor_input(ZstdDecompressionReader* self) {
113 113 if (self->finishedInput) {
114 114 return 0;
115 115 }
116 116
117 117 if (self->input.pos != self->input.size) {
118 118 return 0;
119 119 }
120 120
121 121 if (self->reader) {
122 122 Py_buffer buffer;
123 123
124 124 assert(self->readResult == NULL);
125 125 self->readResult = PyObject_CallMethod(self->reader, "read",
126 126 "k", self->readSize);
127 127 if (NULL == self->readResult) {
128 128 return -1;
129 129 }
130 130
131 131 memset(&buffer, 0, sizeof(buffer));
132 132
133 133 if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
134 134 return -1;
135 135 }
136 136
137 137 /* EOF */
138 138 if (0 == buffer.len) {
139 139 self->finishedInput = 1;
140 140 Py_CLEAR(self->readResult);
141 141 }
142 142 else {
143 143 self->input.src = buffer.buf;
144 144 self->input.size = buffer.len;
145 145 self->input.pos = 0;
146 146 }
147 147
148 148 PyBuffer_Release(&buffer);
149 149 }
150 150 else {
151 151 assert(self->buffer.buf);
152 152 /*
153 153 * We should only get here once since expectation is we always
154 154 * exhaust input buffer before reading again.
155 155 */
156 156 assert(self->input.src == NULL);
157 157
158 158 self->input.src = self->buffer.buf;
159 159 self->input.size = self->buffer.len;
160 160 self->input.pos = 0;
161 161 }
162 162
163 163 return 1;
164 164 }
165 165
166 166 /**
167 167 * Decompresses available input into an output buffer.
168 168 *
169 169 * Returns 0 if we need more input.
170 170 * Returns 1 if output buffer should be emitted.
171 171 * Returns -1 on error and sets a Python exception.
172 172 */
173 173 int decompress_input(ZstdDecompressionReader* self, ZSTD_outBuffer* output) {
174 174 size_t zresult;
175 175
176 176 if (self->input.pos >= self->input.size) {
177 177 return 0;
178 178 }
179 179
180 180 Py_BEGIN_ALLOW_THREADS
181 181 zresult = ZSTD_decompressStream(self->decompressor->dctx, output, &self->input);
182 182 Py_END_ALLOW_THREADS
183 183
184 184 /* Input exhausted. Clear our state tracking. */
185 185 if (self->input.pos == self->input.size) {
186 186 memset(&self->input, 0, sizeof(self->input));
187 187 Py_CLEAR(self->readResult);
188 188
189 189 if (self->buffer.buf) {
190 190 self->finishedInput = 1;
191 191 }
192 192 }
193 193
194 194 if (ZSTD_isError(zresult)) {
195 195 PyErr_Format(ZstdError, "zstd decompress error: %s", ZSTD_getErrorName(zresult));
196 196 return -1;
197 197 }
198 198
199 199 /* We fulfilled the full read request. Signal to emit. */
200 200 if (output->pos && output->pos == output->size) {
201 201 return 1;
202 202 }
203 203 /* We're at the end of a frame and we aren't allowed to return data
204 204 spanning frames. */
205 205 else if (output->pos && zresult == 0 && !self->readAcrossFrames) {
206 206 return 1;
207 207 }
208 208
209 209 /* There is more room in the output. Signal to collect more data. */
210 210 return 0;
211 211 }
212 212
213 213 static PyObject* reader_read(ZstdDecompressionReader* self, PyObject* args, PyObject* kwargs) {
214 214 static char* kwlist[] = {
215 215 "size",
216 216 NULL
217 217 };
218 218
219 219 Py_ssize_t size = -1;
220 220 PyObject* result = NULL;
221 221 char* resultBuffer;
222 222 Py_ssize_t resultSize;
223 223 ZSTD_outBuffer output;
224 224 int decompressResult, readResult;
225 225
226 226 if (self->closed) {
227 227 PyErr_SetString(PyExc_ValueError, "stream is closed");
228 228 return NULL;
229 229 }
230 230
231 231 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
232 232 return NULL;
233 233 }
234 234
235 235 if (size < -1) {
236 236 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
237 237 return NULL;
238 238 }
239 239
240 240 if (size == -1) {
241 241 return PyObject_CallMethod((PyObject*)self, "readall", NULL);
242 242 }
243 243
244 244 if (self->finishedOutput || size == 0) {
245 245 return PyBytes_FromStringAndSize("", 0);
246 246 }
247 247
248 248 result = PyBytes_FromStringAndSize(NULL, size);
249 249 if (NULL == result) {
250 250 return NULL;
251 251 }
252 252
253 253 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
254 254
255 255 output.dst = resultBuffer;
256 256 output.size = resultSize;
257 257 output.pos = 0;
258 258
259 259 readinput:
260 260
261 261 decompressResult = decompress_input(self, &output);
262 262
263 263 if (-1 == decompressResult) {
264 264 Py_XDECREF(result);
265 265 return NULL;
266 266 }
267 267 else if (0 == decompressResult) { }
268 268 else if (1 == decompressResult) {
269 269 self->bytesDecompressed += output.pos;
270 270
271 271 if (output.pos != output.size) {
272 272 if (safe_pybytes_resize(&result, output.pos)) {
273 273 Py_XDECREF(result);
274 274 return NULL;
275 275 }
276 276 }
277 277 return result;
278 278 }
279 279 else {
280 280 assert(0);
281 281 }
282 282
283 283 readResult = read_decompressor_input(self);
284 284
285 285 if (-1 == readResult) {
286 286 Py_XDECREF(result);
287 287 return NULL;
288 288 }
289 289 else if (0 == readResult) {}
290 290 else if (1 == readResult) {}
291 291 else {
292 292 assert(0);
293 293 }
294 294
295 295 if (self->input.size) {
296 296 goto readinput;
297 297 }
298 298
299 299 /* EOF */
300 300 self->bytesDecompressed += output.pos;
301 301
302 302 if (safe_pybytes_resize(&result, output.pos)) {
303 303 Py_XDECREF(result);
304 304 return NULL;
305 305 }
306 306
307 307 return result;
308 308 }
309 309
310 310 static PyObject* reader_read1(ZstdDecompressionReader* self, PyObject* args, PyObject* kwargs) {
311 311 static char* kwlist[] = {
312 312 "size",
313 313 NULL
314 314 };
315 315
316 316 Py_ssize_t size = -1;
317 317 PyObject* result = NULL;
318 318 char* resultBuffer;
319 319 Py_ssize_t resultSize;
320 320 ZSTD_outBuffer output;
321 321
322 322 if (self->closed) {
323 323 PyErr_SetString(PyExc_ValueError, "stream is closed");
324 324 return NULL;
325 325 }
326 326
327 327 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
328 328 return NULL;
329 329 }
330 330
331 331 if (size < -1) {
332 332 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
333 333 return NULL;
334 334 }
335 335
336 336 if (self->finishedOutput || size == 0) {
337 337 return PyBytes_FromStringAndSize("", 0);
338 338 }
339 339
340 340 if (size == -1) {
341 341 size = ZSTD_DStreamOutSize();
342 342 }
343 343
344 344 result = PyBytes_FromStringAndSize(NULL, size);
345 345 if (NULL == result) {
346 346 return NULL;
347 347 }
348 348
349 349 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
350 350
351 351 output.dst = resultBuffer;
352 352 output.size = resultSize;
353 353 output.pos = 0;
354 354
355 355 /* read1() is supposed to use at most 1 read() from the underlying stream.
356 356 * However, we can't satisfy this requirement with decompression due to the
357 357 * nature of how decompression works. Our strategy is to read + decompress
358 358 * until we get any output, at which point we return. This satisfies the
359 359 * intent of the read1() API to limit read operations.
360 360 */
361 361 while (!self->finishedInput) {
362 362 int readResult, decompressResult;
363 363
364 364 readResult = read_decompressor_input(self);
365 365 if (-1 == readResult) {
366 366 Py_XDECREF(result);
367 367 return NULL;
368 368 }
369 369 else if (0 == readResult || 1 == readResult) { }
370 370 else {
371 371 assert(0);
372 372 }
373 373
374 374 decompressResult = decompress_input(self, &output);
375 375
376 376 if (-1 == decompressResult) {
377 377 Py_XDECREF(result);
378 378 return NULL;
379 379 }
380 380 else if (0 == decompressResult || 1 == decompressResult) { }
381 381 else {
382 382 assert(0);
383 383 }
384 384
385 385 if (output.pos) {
386 386 break;
387 387 }
388 388 }
389 389
390 390 self->bytesDecompressed += output.pos;
391 391 if (safe_pybytes_resize(&result, output.pos)) {
392 392 Py_XDECREF(result);
393 393 return NULL;
394 394 }
395 395
396 396 return result;
397 397 }
398 398
399 399 static PyObject* reader_readinto(ZstdDecompressionReader* self, PyObject* args) {
400 400 Py_buffer dest;
401 401 ZSTD_outBuffer output;
402 402 int decompressResult, readResult;
403 403 PyObject* result = NULL;
404 404
405 405 if (self->closed) {
406 406 PyErr_SetString(PyExc_ValueError, "stream is closed");
407 407 return NULL;
408 408 }
409 409
410 410 if (self->finishedOutput) {
411 411 return PyLong_FromLong(0);
412 412 }
413 413
414 414 if (!PyArg_ParseTuple(args, "w*:readinto", &dest)) {
415 415 return NULL;
416 416 }
417 417
418 418 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
419 419 PyErr_SetString(PyExc_ValueError,
420 420 "destination buffer should be contiguous and have at most one dimension");
421 421 goto finally;
422 422 }
423 423
424 424 output.dst = dest.buf;
425 425 output.size = dest.len;
426 426 output.pos = 0;
427 427
428 428 readinput:
429 429
430 430 decompressResult = decompress_input(self, &output);
431 431
432 432 if (-1 == decompressResult) {
433 433 goto finally;
434 434 }
435 435 else if (0 == decompressResult) { }
436 436 else if (1 == decompressResult) {
437 437 self->bytesDecompressed += output.pos;
438 438 result = PyLong_FromSize_t(output.pos);
439 439 goto finally;
440 440 }
441 441 else {
442 442 assert(0);
443 443 }
444 444
445 445 readResult = read_decompressor_input(self);
446 446
447 447 if (-1 == readResult) {
448 448 goto finally;
449 449 }
450 450 else if (0 == readResult) {}
451 451 else if (1 == readResult) {}
452 452 else {
453 453 assert(0);
454 454 }
455 455
456 456 if (self->input.size) {
457 457 goto readinput;
458 458 }
459 459
460 460 /* EOF */
461 461 self->bytesDecompressed += output.pos;
462 462 result = PyLong_FromSize_t(output.pos);
463 463
464 464 finally:
465 465 PyBuffer_Release(&dest);
466 466
467 467 return result;
468 468 }
469 469
470 470 static PyObject* reader_readinto1(ZstdDecompressionReader* self, PyObject* args) {
471 471 Py_buffer dest;
472 472 ZSTD_outBuffer output;
473 473 PyObject* result = NULL;
474 474
475 475 if (self->closed) {
476 476 PyErr_SetString(PyExc_ValueError, "stream is closed");
477 477 return NULL;
478 478 }
479 479
480 480 if (self->finishedOutput) {
481 481 return PyLong_FromLong(0);
482 482 }
483 483
484 484 if (!PyArg_ParseTuple(args, "w*:readinto1", &dest)) {
485 485 return NULL;
486 486 }
487 487
488 488 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
489 489 PyErr_SetString(PyExc_ValueError,
490 490 "destination buffer should be contiguous and have at most one dimension");
491 491 goto finally;
492 492 }
493 493
494 494 output.dst = dest.buf;
495 495 output.size = dest.len;
496 496 output.pos = 0;
497 497
498 498 while (!self->finishedInput && !self->finishedOutput) {
499 499 int decompressResult, readResult;
500 500
501 501 readResult = read_decompressor_input(self);
502 502
503 503 if (-1 == readResult) {
504 504 goto finally;
505 505 }
506 506 else if (0 == readResult || 1 == readResult) {}
507 507 else {
508 508 assert(0);
509 509 }
510 510
511 511 decompressResult = decompress_input(self, &output);
512 512
513 513 if (-1 == decompressResult) {
514 514 goto finally;
515 515 }
516 516 else if (0 == decompressResult || 1 == decompressResult) {}
517 517 else {
518 518 assert(0);
519 519 }
520 520
521 521 if (output.pos) {
522 522 break;
523 523 }
524 524 }
525 525
526 526 self->bytesDecompressed += output.pos;
527 527 result = PyLong_FromSize_t(output.pos);
528 528
529 529 finally:
530 530 PyBuffer_Release(&dest);
531 531
532 532 return result;
533 533 }
534 534
535 535 static PyObject* reader_readall(PyObject* self) {
536 536 PyObject* chunks = NULL;
537 537 PyObject* empty = NULL;
538 538 PyObject* result = NULL;
539 539
540 540 /* Our strategy is to collect chunks into a list then join all the
541 541 * chunks at the end. We could potentially use e.g. an io.BytesIO. But
542 542 * this feels simple enough to implement and avoids potentially expensive
543 543 * reallocations of large buffers.
544 544 */
545 545 chunks = PyList_New(0);
546 546 if (NULL == chunks) {
547 547 return NULL;
548 548 }
549 549
550 550 while (1) {
551 551 PyObject* chunk = PyObject_CallMethod(self, "read", "i", 1048576);
552 552 if (NULL == chunk) {
553 553 Py_DECREF(chunks);
554 554 return NULL;
555 555 }
556 556
557 557 if (!PyBytes_Size(chunk)) {
558 558 Py_DECREF(chunk);
559 559 break;
560 560 }
561 561
562 562 if (PyList_Append(chunks, chunk)) {
563 563 Py_DECREF(chunk);
564 564 Py_DECREF(chunks);
565 565 return NULL;
566 566 }
567 567
568 568 Py_DECREF(chunk);
569 569 }
570 570
571 571 empty = PyBytes_FromStringAndSize("", 0);
572 572 if (NULL == empty) {
573 573 Py_DECREF(chunks);
574 574 return NULL;
575 575 }
576 576
577 577 result = PyObject_CallMethod(empty, "join", "O", chunks);
578 578
579 579 Py_DECREF(empty);
580 580 Py_DECREF(chunks);
581 581
582 582 return result;
583 583 }
584 584
585 585 static PyObject* reader_readline(PyObject* self) {
586 586 set_unsupported_operation();
587 587 return NULL;
588 588 }
589 589
590 590 static PyObject* reader_readlines(PyObject* self) {
591 591 set_unsupported_operation();
592 592 return NULL;
593 593 }
594 594
595 595 static PyObject* reader_seek(ZstdDecompressionReader* self, PyObject* args) {
596 596 Py_ssize_t pos;
597 597 int whence = 0;
598 598 unsigned long long readAmount = 0;
599 599 size_t defaultOutSize = ZSTD_DStreamOutSize();
600 600
601 601 if (self->closed) {
602 602 PyErr_SetString(PyExc_ValueError, "stream is closed");
603 603 return NULL;
604 604 }
605 605
606 606 if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &whence)) {
607 607 return NULL;
608 608 }
609 609
610 610 if (whence == SEEK_SET) {
611 611 if (pos < 0) {
612 612 PyErr_SetString(PyExc_ValueError,
613 613 "cannot seek to negative position with SEEK_SET");
614 614 return NULL;
615 615 }
616 616
617 617 if ((unsigned long long)pos < self->bytesDecompressed) {
618 618 PyErr_SetString(PyExc_ValueError,
619 619 "cannot seek zstd decompression stream backwards");
620 620 return NULL;
621 621 }
622 622
623 623 readAmount = pos - self->bytesDecompressed;
624 624 }
625 625 else if (whence == SEEK_CUR) {
626 626 if (pos < 0) {
627 627 PyErr_SetString(PyExc_ValueError,
628 628 "cannot seek zstd decompression stream backwards");
629 629 return NULL;
630 630 }
631 631
632 632 readAmount = pos;
633 633 }
634 634 else if (whence == SEEK_END) {
635 635 /* We /could/ support this with pos==0. But let's not do that until someone
636 636 needs it. */
637 637 PyErr_SetString(PyExc_ValueError,
638 638 "zstd decompression streams cannot be seeked with SEEK_END");
639 639 return NULL;
640 640 }
641 641
642 642 /* It is a bit inefficient to do this via the Python API. But since there
643 643 is a bit of state tracking involved to read from this type, it is the
644 644 easiest to implement. */
645 645 while (readAmount) {
646 646 Py_ssize_t readSize;
647 647 PyObject* readResult = PyObject_CallMethod((PyObject*)self, "read", "K",
648 648 readAmount < defaultOutSize ? readAmount : defaultOutSize);
649 649
650 650 if (!readResult) {
651 651 return NULL;
652 652 }
653 653
654 654 readSize = PyBytes_GET_SIZE(readResult);
655 655
656 656 Py_CLEAR(readResult);
657 657
658 658 /* Empty read means EOF. */
659 659 if (!readSize) {
660 660 break;
661 661 }
662 662
663 663 readAmount -= readSize;
664 664 }
665 665
666 666 return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
667 667 }
668 668
669 669 static PyObject* reader_tell(ZstdDecompressionReader* self) {
670 670 /* TODO should this raise OSError since stream isn't seekable? */
671 671 return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
672 672 }
673 673
674 674 static PyObject* reader_write(PyObject* self, PyObject* args) {
675 675 set_unsupported_operation();
676 676 return NULL;
677 677 }
678 678
679 679 static PyObject* reader_writelines(PyObject* self, PyObject* args) {
680 680 set_unsupported_operation();
681 681 return NULL;
682 682 }
683 683
684 684 static PyObject* reader_iter(PyObject* self) {
685 685 set_unsupported_operation();
686 686 return NULL;
687 687 }
688 688
689 689 static PyObject* reader_iternext(PyObject* self) {
690 690 set_unsupported_operation();
691 691 return NULL;
692 692 }
693 693
694 694 static PyMethodDef reader_methods[] = {
695 695 { "__enter__", (PyCFunction)reader_enter, METH_NOARGS,
696 696 PyDoc_STR("Enter a compression context") },
697 697 { "__exit__", (PyCFunction)reader_exit, METH_VARARGS,
698 698 PyDoc_STR("Exit a compression context") },
699 699 { "close", (PyCFunction)reader_close, METH_NOARGS,
700 700 PyDoc_STR("Close the stream so it cannot perform any more operations") },
701 701 { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
702 702 { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
703 703 { "readable", (PyCFunction)reader_readable, METH_NOARGS,
704 704 PyDoc_STR("Returns True") },
705 705 { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS,
706 706 PyDoc_STR("read compressed data") },
707 707 { "read1", (PyCFunction)reader_read1, METH_VARARGS | METH_KEYWORDS,
708 708 PyDoc_STR("read compressed data") },
709 709 { "readinto", (PyCFunction)reader_readinto, METH_VARARGS, NULL },
710 710 { "readinto1", (PyCFunction)reader_readinto1, METH_VARARGS, NULL },
711 711 { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
712 712 { "readline", (PyCFunction)reader_readline, METH_NOARGS, PyDoc_STR("Not implemented") },
713 713 { "readlines", (PyCFunction)reader_readlines, METH_NOARGS, PyDoc_STR("Not implemented") },
714 714 { "seek", (PyCFunction)reader_seek, METH_VARARGS, PyDoc_STR("Seek the stream") },
715 715 { "seekable", (PyCFunction)reader_seekable, METH_NOARGS,
716 716 PyDoc_STR("Returns True") },
717 717 { "tell", (PyCFunction)reader_tell, METH_NOARGS,
718 718 PyDoc_STR("Returns current number of bytes compressed") },
719 719 { "writable", (PyCFunction)reader_writable, METH_NOARGS,
720 720 PyDoc_STR("Returns False") },
721 721 { "write", (PyCFunction)reader_write, METH_VARARGS, PyDoc_STR("unsupported operation") },
722 722 { "writelines", (PyCFunction)reader_writelines, METH_VARARGS, PyDoc_STR("unsupported operation") },
723 723 { NULL, NULL }
724 724 };
725 725
726 726 static PyMemberDef reader_members[] = {
727 727 { "closed", T_BOOL, offsetof(ZstdDecompressionReader, closed),
728 728 READONLY, "whether stream is closed" },
729 729 { NULL }
730 730 };
731 731
732 732 PyTypeObject ZstdDecompressionReaderType = {
733 733 PyVarObject_HEAD_INIT(NULL, 0)
734 734 "zstd.ZstdDecompressionReader", /* tp_name */
735 735 sizeof(ZstdDecompressionReader), /* tp_basicsize */
736 736 0, /* tp_itemsize */
737 737 (destructor)reader_dealloc, /* tp_dealloc */
738 738 0, /* tp_print */
739 739 0, /* tp_getattr */
740 740 0, /* tp_setattr */
741 741 0, /* tp_compare */
742 742 0, /* tp_repr */
743 743 0, /* tp_as_number */
744 744 0, /* tp_as_sequence */
745 745 0, /* tp_as_mapping */
746 746 0, /* tp_hash */
747 747 0, /* tp_call */
748 748 0, /* tp_str */
749 749 0, /* tp_getattro */
750 750 0, /* tp_setattro */
751 751 0, /* tp_as_buffer */
752 752 Py_TPFLAGS_DEFAULT, /* tp_flags */
753 753 0, /* tp_doc */
754 754 0, /* tp_traverse */
755 755 0, /* tp_clear */
756 756 0, /* tp_richcompare */
757 757 0, /* tp_weaklistoffset */
758 758 reader_iter, /* tp_iter */
759 759 reader_iternext, /* tp_iternext */
760 760 reader_methods, /* tp_methods */
761 761 reader_members, /* tp_members */
762 762 0, /* tp_getset */
763 763 0, /* tp_base */
764 764 0, /* tp_dict */
765 765 0, /* tp_descr_get */
766 766 0, /* tp_descr_set */
767 767 0, /* tp_dictoffset */
768 768 0, /* tp_init */
769 769 0, /* tp_alloc */
770 770 PyType_GenericNew, /* tp_new */
771 771 };
772 772
773 773
774 774 void decompressionreader_module_init(PyObject* mod) {
775 775 /* TODO make reader a sub-class of io.RawIOBase */
776 776
777 Py_TYPE(&ZstdDecompressionReaderType) = &PyType_Type;
777 Py_SET_TYPE(&ZstdDecompressionReaderType, &PyType_Type);
778 778 if (PyType_Ready(&ZstdDecompressionReaderType) < 0) {
779 779 return;
780 780 }
781 781 }
@@ -1,295 +1,295 b''
1 1 /**
2 2 * Copyright (c) 2016-present, Gregory Szorc
3 3 * All rights reserved.
4 4 *
5 5 * This software may be modified and distributed under the terms
6 6 * of the BSD license. See the LICENSE file for details.
7 7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 11 extern PyObject* ZstdError;
12 12
13 13 PyDoc_STRVAR(ZstdDecompressionWriter__doc,
14 14 """A context manager used for writing decompressed output.\n"
15 15 );
16 16
17 17 static void ZstdDecompressionWriter_dealloc(ZstdDecompressionWriter* self) {
18 18 Py_XDECREF(self->decompressor);
19 19 Py_XDECREF(self->writer);
20 20
21 21 PyObject_Del(self);
22 22 }
23 23
24 24 static PyObject* ZstdDecompressionWriter_enter(ZstdDecompressionWriter* self) {
25 25 if (self->closed) {
26 26 PyErr_SetString(PyExc_ValueError, "stream is closed");
27 27 return NULL;
28 28 }
29 29
30 30 if (self->entered) {
31 31 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
32 32 return NULL;
33 33 }
34 34
35 35 self->entered = 1;
36 36
37 37 Py_INCREF(self);
38 38 return (PyObject*)self;
39 39 }
40 40
41 41 static PyObject* ZstdDecompressionWriter_exit(ZstdDecompressionWriter* self, PyObject* args) {
42 42 self->entered = 0;
43 43
44 44 if (NULL == PyObject_CallMethod((PyObject*)self, "close", NULL)) {
45 45 return NULL;
46 46 }
47 47
48 48 Py_RETURN_FALSE;
49 49 }
50 50
51 51 static PyObject* ZstdDecompressionWriter_memory_size(ZstdDecompressionWriter* self) {
52 52 return PyLong_FromSize_t(ZSTD_sizeof_DCtx(self->decompressor->dctx));
53 53 }
54 54
55 55 static PyObject* ZstdDecompressionWriter_write(ZstdDecompressionWriter* self, PyObject* args, PyObject* kwargs) {
56 56 static char* kwlist[] = {
57 57 "data",
58 58 NULL
59 59 };
60 60
61 61 PyObject* result = NULL;
62 62 Py_buffer source;
63 63 size_t zresult = 0;
64 64 ZSTD_inBuffer input;
65 65 ZSTD_outBuffer output;
66 66 PyObject* res;
67 67 Py_ssize_t totalWrite = 0;
68 68
69 69 #if PY_MAJOR_VERSION >= 3
70 70 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write",
71 71 #else
72 72 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:write",
73 73 #endif
74 74 kwlist, &source)) {
75 75 return NULL;
76 76 }
77 77
78 78 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
79 79 PyErr_SetString(PyExc_ValueError,
80 80 "data buffer should be contiguous and have at most one dimension");
81 81 goto finally;
82 82 }
83 83
84 84 if (self->closed) {
85 85 PyErr_SetString(PyExc_ValueError, "stream is closed");
86 86 return NULL;
87 87 }
88 88
89 89 output.dst = PyMem_Malloc(self->outSize);
90 90 if (!output.dst) {
91 91 PyErr_NoMemory();
92 92 goto finally;
93 93 }
94 94 output.size = self->outSize;
95 95 output.pos = 0;
96 96
97 97 input.src = source.buf;
98 98 input.size = source.len;
99 99 input.pos = 0;
100 100
101 101 while (input.pos < (size_t)source.len) {
102 102 Py_BEGIN_ALLOW_THREADS
103 103 zresult = ZSTD_decompressStream(self->decompressor->dctx, &output, &input);
104 104 Py_END_ALLOW_THREADS
105 105
106 106 if (ZSTD_isError(zresult)) {
107 107 PyMem_Free(output.dst);
108 108 PyErr_Format(ZstdError, "zstd decompress error: %s",
109 109 ZSTD_getErrorName(zresult));
110 110 goto finally;
111 111 }
112 112
113 113 if (output.pos) {
114 114 #if PY_MAJOR_VERSION >= 3
115 115 res = PyObject_CallMethod(self->writer, "write", "y#",
116 116 #else
117 117 res = PyObject_CallMethod(self->writer, "write", "s#",
118 118 #endif
119 119 output.dst, output.pos);
120 120 Py_XDECREF(res);
121 121 totalWrite += output.pos;
122 122 output.pos = 0;
123 123 }
124 124 }
125 125
126 126 PyMem_Free(output.dst);
127 127
128 128 if (self->writeReturnRead) {
129 129 result = PyLong_FromSize_t(input.pos);
130 130 }
131 131 else {
132 132 result = PyLong_FromSsize_t(totalWrite);
133 133 }
134 134
135 135 finally:
136 136 PyBuffer_Release(&source);
137 137 return result;
138 138 }
139 139
140 140 static PyObject* ZstdDecompressionWriter_close(ZstdDecompressionWriter* self) {
141 141 PyObject* result;
142 142
143 143 if (self->closed) {
144 144 Py_RETURN_NONE;
145 145 }
146 146
147 147 result = PyObject_CallMethod((PyObject*)self, "flush", NULL);
148 148 self->closed = 1;
149 149
150 150 if (NULL == result) {
151 151 return NULL;
152 152 }
153 153
154 154 /* Call close on underlying stream as well. */
155 155 if (PyObject_HasAttrString(self->writer, "close")) {
156 156 return PyObject_CallMethod(self->writer, "close", NULL);
157 157 }
158 158
159 159 Py_RETURN_NONE;
160 160 }
161 161
162 162 static PyObject* ZstdDecompressionWriter_fileno(ZstdDecompressionWriter* self) {
163 163 if (PyObject_HasAttrString(self->writer, "fileno")) {
164 164 return PyObject_CallMethod(self->writer, "fileno", NULL);
165 165 }
166 166 else {
167 167 PyErr_SetString(PyExc_OSError, "fileno not available on underlying writer");
168 168 return NULL;
169 169 }
170 170 }
171 171
172 172 static PyObject* ZstdDecompressionWriter_flush(ZstdDecompressionWriter* self) {
173 173 if (self->closed) {
174 174 PyErr_SetString(PyExc_ValueError, "stream is closed");
175 175 return NULL;
176 176 }
177 177
178 178 if (PyObject_HasAttrString(self->writer, "flush")) {
179 179 return PyObject_CallMethod(self->writer, "flush", NULL);
180 180 }
181 181 else {
182 182 Py_RETURN_NONE;
183 183 }
184 184 }
185 185
186 186 static PyObject* ZstdDecompressionWriter_false(PyObject* self, PyObject* args) {
187 187 Py_RETURN_FALSE;
188 188 }
189 189
190 190 static PyObject* ZstdDecompressionWriter_true(PyObject* self, PyObject* args) {
191 191 Py_RETURN_TRUE;
192 192 }
193 193
194 194 static PyObject* ZstdDecompressionWriter_unsupported(PyObject* self, PyObject* args, PyObject* kwargs) {
195 195 PyObject* iomod;
196 196 PyObject* exc;
197 197
198 198 iomod = PyImport_ImportModule("io");
199 199 if (NULL == iomod) {
200 200 return NULL;
201 201 }
202 202
203 203 exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
204 204 if (NULL == exc) {
205 205 Py_DECREF(iomod);
206 206 return NULL;
207 207 }
208 208
209 209 PyErr_SetNone(exc);
210 210 Py_DECREF(exc);
211 211 Py_DECREF(iomod);
212 212
213 213 return NULL;
214 214 }
215 215
216 216 static PyMethodDef ZstdDecompressionWriter_methods[] = {
217 217 { "__enter__", (PyCFunction)ZstdDecompressionWriter_enter, METH_NOARGS,
218 218 PyDoc_STR("Enter a decompression context.") },
219 219 { "__exit__", (PyCFunction)ZstdDecompressionWriter_exit, METH_VARARGS,
220 220 PyDoc_STR("Exit a decompression context.") },
221 221 { "memory_size", (PyCFunction)ZstdDecompressionWriter_memory_size, METH_NOARGS,
222 222 PyDoc_STR("Obtain the memory size in bytes of the underlying decompressor.") },
223 223 { "close", (PyCFunction)ZstdDecompressionWriter_close, METH_NOARGS, NULL },
224 224 { "fileno", (PyCFunction)ZstdDecompressionWriter_fileno, METH_NOARGS, NULL },
225 225 { "flush", (PyCFunction)ZstdDecompressionWriter_flush, METH_NOARGS, NULL },
226 226 { "isatty", ZstdDecompressionWriter_false, METH_NOARGS, NULL },
227 227 { "readable", ZstdDecompressionWriter_false, METH_NOARGS, NULL },
228 228 { "readline", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
229 229 { "readlines", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
230 230 { "seek", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
231 231 { "seekable", ZstdDecompressionWriter_false, METH_NOARGS, NULL },
232 232 { "tell", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
233 233 { "truncate", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
234 234 { "writable", ZstdDecompressionWriter_true, METH_NOARGS, NULL },
235 235 { "writelines" , (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
236 236 { "read", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
237 237 { "readall", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
238 238 { "readinto", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
239 239 { "write", (PyCFunction)ZstdDecompressionWriter_write, METH_VARARGS | METH_KEYWORDS,
240 240 PyDoc_STR("Compress data") },
241 241 { NULL, NULL }
242 242 };
243 243
244 244 static PyMemberDef ZstdDecompressionWriter_members[] = {
245 245 { "closed", T_BOOL, offsetof(ZstdDecompressionWriter, closed), READONLY, NULL },
246 246 { NULL }
247 247 };
248 248
249 249 PyTypeObject ZstdDecompressionWriterType = {
250 250 PyVarObject_HEAD_INIT(NULL, 0)
251 251 "zstd.ZstdDecompressionWriter", /* tp_name */
252 252 sizeof(ZstdDecompressionWriter),/* tp_basicsize */
253 253 0, /* tp_itemsize */
254 254 (destructor)ZstdDecompressionWriter_dealloc, /* tp_dealloc */
255 255 0, /* tp_print */
256 256 0, /* tp_getattr */
257 257 0, /* tp_setattr */
258 258 0, /* tp_compare */
259 259 0, /* tp_repr */
260 260 0, /* tp_as_number */
261 261 0, /* tp_as_sequence */
262 262 0, /* tp_as_mapping */
263 263 0, /* tp_hash */
264 264 0, /* tp_call */
265 265 0, /* tp_str */
266 266 0, /* tp_getattro */
267 267 0, /* tp_setattro */
268 268 0, /* tp_as_buffer */
269 269 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
270 270 ZstdDecompressionWriter__doc, /* tp_doc */
271 271 0, /* tp_traverse */
272 272 0, /* tp_clear */
273 273 0, /* tp_richcompare */
274 274 0, /* tp_weaklistoffset */
275 275 0, /* tp_iter */
276 276 0, /* tp_iternext */
277 277 ZstdDecompressionWriter_methods,/* tp_methods */
278 278 ZstdDecompressionWriter_members,/* tp_members */
279 279 0, /* tp_getset */
280 280 0, /* tp_base */
281 281 0, /* tp_dict */
282 282 0, /* tp_descr_get */
283 283 0, /* tp_descr_set */
284 284 0, /* tp_dictoffset */
285 285 0, /* tp_init */
286 286 0, /* tp_alloc */
287 287 PyType_GenericNew, /* tp_new */
288 288 };
289 289
290 290 void decompressionwriter_module_init(PyObject* mod) {
291 Py_TYPE(&ZstdDecompressionWriterType) = &PyType_Type;
291 Py_SET_TYPE(&ZstdDecompressionWriterType, &PyType_Type);
292 292 if (PyType_Ready(&ZstdDecompressionWriterType) < 0) {
293 293 return;
294 294 }
295 295 }
@@ -1,202 +1,202 b''
1 1 /**
2 2 * Copyright (c) 2016-present, Gregory Szorc
3 3 * All rights reserved.
4 4 *
5 5 * This software may be modified and distributed under the terms
6 6 * of the BSD license. See the LICENSE file for details.
7 7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 11 extern PyObject* ZstdError;
12 12
13 13 PyDoc_STRVAR(DecompressionObj__doc__,
14 14 "Perform decompression using a standard library compatible API.\n"
15 15 );
16 16
17 17 static void DecompressionObj_dealloc(ZstdDecompressionObj* self) {
18 18 Py_XDECREF(self->decompressor);
19 19
20 20 PyObject_Del(self);
21 21 }
22 22
23 23 static PyObject* DecompressionObj_decompress(ZstdDecompressionObj* self, PyObject* args, PyObject* kwargs) {
24 24 static char* kwlist[] = {
25 25 "data",
26 26 NULL
27 27 };
28 28
29 29 Py_buffer source;
30 30 size_t zresult;
31 31 ZSTD_inBuffer input;
32 32 ZSTD_outBuffer output;
33 33 PyObject* result = NULL;
34 34 Py_ssize_t resultSize = 0;
35 35
36 36 output.dst = NULL;
37 37
38 38 if (self->finished) {
39 39 PyErr_SetString(ZstdError, "cannot use a decompressobj multiple times");
40 40 return NULL;
41 41 }
42 42
43 43 #if PY_MAJOR_VERSION >= 3
44 44 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:decompress",
45 45 #else
46 46 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:decompress",
47 47 #endif
48 48 kwlist, &source)) {
49 49 return NULL;
50 50 }
51 51
52 52 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
53 53 PyErr_SetString(PyExc_ValueError,
54 54 "data buffer should be contiguous and have at most one dimension");
55 55 goto finally;
56 56 }
57 57
58 58 /* Special case of empty input. Output will always be empty. */
59 59 if (source.len == 0) {
60 60 result = PyBytes_FromString("");
61 61 goto finally;
62 62 }
63 63
64 64 input.src = source.buf;
65 65 input.size = source.len;
66 66 input.pos = 0;
67 67
68 68 output.dst = PyMem_Malloc(self->outSize);
69 69 if (!output.dst) {
70 70 PyErr_NoMemory();
71 71 goto except;
72 72 }
73 73 output.size = self->outSize;
74 74 output.pos = 0;
75 75
76 76 while (1) {
77 77 Py_BEGIN_ALLOW_THREADS
78 78 zresult = ZSTD_decompressStream(self->decompressor->dctx, &output, &input);
79 79 Py_END_ALLOW_THREADS
80 80
81 81 if (ZSTD_isError(zresult)) {
82 82 PyErr_Format(ZstdError, "zstd decompressor error: %s",
83 83 ZSTD_getErrorName(zresult));
84 84 goto except;
85 85 }
86 86
87 87 if (0 == zresult) {
88 88 self->finished = 1;
89 89 }
90 90
91 91 if (output.pos) {
92 92 if (result) {
93 93 resultSize = PyBytes_GET_SIZE(result);
94 94 if (-1 == safe_pybytes_resize(&result, resultSize + output.pos)) {
95 95 Py_XDECREF(result);
96 96 goto except;
97 97 }
98 98
99 99 memcpy(PyBytes_AS_STRING(result) + resultSize,
100 100 output.dst, output.pos);
101 101 }
102 102 else {
103 103 result = PyBytes_FromStringAndSize(output.dst, output.pos);
104 104 if (!result) {
105 105 goto except;
106 106 }
107 107 }
108 108 }
109 109
110 110 if (zresult == 0 || (input.pos == input.size && output.pos == 0)) {
111 111 break;
112 112 }
113 113
114 114 output.pos = 0;
115 115 }
116 116
117 117 if (!result) {
118 118 result = PyBytes_FromString("");
119 119 }
120 120
121 121 goto finally;
122 122
123 123 except:
124 124 Py_CLEAR(result);
125 125
126 126 finally:
127 127 PyMem_Free(output.dst);
128 128 PyBuffer_Release(&source);
129 129
130 130 return result;
131 131 }
132 132
133 133 static PyObject* DecompressionObj_flush(ZstdDecompressionObj* self, PyObject* args, PyObject* kwargs) {
134 134 static char* kwlist[] = {
135 135 "length",
136 136 NULL
137 137 };
138 138
139 139 PyObject* length = NULL;
140 140
141 141 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O:flush", kwlist, &length)) {
142 142 return NULL;
143 143 }
144 144
145 145 Py_RETURN_NONE;
146 146 }
147 147
148 148 static PyMethodDef DecompressionObj_methods[] = {
149 149 { "decompress", (PyCFunction)DecompressionObj_decompress,
150 150 METH_VARARGS | METH_KEYWORDS, PyDoc_STR("decompress data") },
151 151 { "flush", (PyCFunction)DecompressionObj_flush,
152 152 METH_VARARGS | METH_KEYWORDS, PyDoc_STR("no-op") },
153 153 { NULL, NULL }
154 154 };
155 155
156 156 PyTypeObject ZstdDecompressionObjType = {
157 157 PyVarObject_HEAD_INIT(NULL, 0)
158 158 "zstd.ZstdDecompressionObj", /* tp_name */
159 159 sizeof(ZstdDecompressionObj), /* tp_basicsize */
160 160 0, /* tp_itemsize */
161 161 (destructor)DecompressionObj_dealloc, /* tp_dealloc */
162 162 0, /* tp_print */
163 163 0, /* tp_getattr */
164 164 0, /* tp_setattr */
165 165 0, /* tp_compare */
166 166 0, /* tp_repr */
167 167 0, /* tp_as_number */
168 168 0, /* tp_as_sequence */
169 169 0, /* tp_as_mapping */
170 170 0, /* tp_hash */
171 171 0, /* tp_call */
172 172 0, /* tp_str */
173 173 0, /* tp_getattro */
174 174 0, /* tp_setattro */
175 175 0, /* tp_as_buffer */
176 176 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
177 177 DecompressionObj__doc__, /* tp_doc */
178 178 0, /* tp_traverse */
179 179 0, /* tp_clear */
180 180 0, /* tp_richcompare */
181 181 0, /* tp_weaklistoffset */
182 182 0, /* tp_iter */
183 183 0, /* tp_iternext */
184 184 DecompressionObj_methods, /* tp_methods */
185 185 0, /* tp_members */
186 186 0, /* tp_getset */
187 187 0, /* tp_base */
188 188 0, /* tp_dict */
189 189 0, /* tp_descr_get */
190 190 0, /* tp_descr_set */
191 191 0, /* tp_dictoffset */
192 192 0, /* tp_init */
193 193 0, /* tp_alloc */
194 194 PyType_GenericNew, /* tp_new */
195 195 };
196 196
197 197 void decompressobj_module_init(PyObject* module) {
198 Py_TYPE(&ZstdDecompressionObjType) = &PyType_Type;
198 Py_SET_TYPE(&ZstdDecompressionObjType, &PyType_Type);
199 199 if (PyType_Ready(&ZstdDecompressionObjType) < 0) {
200 200 return;
201 201 }
202 202 }
@@ -1,1822 +1,1822 b''
1 1 /**
2 2 * Copyright (c) 2016-present, Gregory Szorc
3 3 * All rights reserved.
4 4 *
5 5 * This software may be modified and distributed under the terms
6 6 * of the BSD license. See the LICENSE file for details.
7 7 */
8 8
9 9 #include "python-zstandard.h"
10 10 #include "pool.h"
11 11
12 12 extern PyObject* ZstdError;
13 13
14 14 /**
15 15 * Ensure the ZSTD_DCtx on a decompressor is initiated and ready for a new operation.
16 16 */
17 17 int ensure_dctx(ZstdDecompressor* decompressor, int loadDict) {
18 18 size_t zresult;
19 19
20 20 ZSTD_DCtx_reset(decompressor->dctx, ZSTD_reset_session_only);
21 21
22 22 if (decompressor->maxWindowSize) {
23 23 zresult = ZSTD_DCtx_setMaxWindowSize(decompressor->dctx, decompressor->maxWindowSize);
24 24 if (ZSTD_isError(zresult)) {
25 25 PyErr_Format(ZstdError, "unable to set max window size: %s",
26 26 ZSTD_getErrorName(zresult));
27 27 return 1;
28 28 }
29 29 }
30 30
31 31 zresult = ZSTD_DCtx_setFormat(decompressor->dctx, decompressor->format);
32 32 if (ZSTD_isError(zresult)) {
33 33 PyErr_Format(ZstdError, "unable to set decoding format: %s",
34 34 ZSTD_getErrorName(zresult));
35 35 return 1;
36 36 }
37 37
38 38 if (loadDict && decompressor->dict) {
39 39 if (ensure_ddict(decompressor->dict)) {
40 40 return 1;
41 41 }
42 42
43 43 zresult = ZSTD_DCtx_refDDict(decompressor->dctx, decompressor->dict->ddict);
44 44 if (ZSTD_isError(zresult)) {
45 45 PyErr_Format(ZstdError, "unable to reference prepared dictionary: %s",
46 46 ZSTD_getErrorName(zresult));
47 47 return 1;
48 48 }
49 49 }
50 50
51 51 return 0;
52 52 }
53 53
54 54 PyDoc_STRVAR(Decompressor__doc__,
55 55 "ZstdDecompressor(dict_data=None)\n"
56 56 "\n"
57 57 "Create an object used to perform Zstandard decompression.\n"
58 58 "\n"
59 59 "An instance can perform multiple decompression operations."
60 60 );
61 61
62 62 static int Decompressor_init(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
63 63 static char* kwlist[] = {
64 64 "dict_data",
65 65 "max_window_size",
66 66 "format",
67 67 NULL
68 68 };
69 69
70 70 ZstdCompressionDict* dict = NULL;
71 71 Py_ssize_t maxWindowSize = 0;
72 72 ZSTD_format_e format = ZSTD_f_zstd1;
73 73
74 74 self->dctx = NULL;
75 75 self->dict = NULL;
76 76
77 77 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!nI:ZstdDecompressor", kwlist,
78 78 &ZstdCompressionDictType, &dict, &maxWindowSize, &format)) {
79 79 return -1;
80 80 }
81 81
82 82 self->dctx = ZSTD_createDCtx();
83 83 if (!self->dctx) {
84 84 PyErr_NoMemory();
85 85 goto except;
86 86 }
87 87
88 88 self->maxWindowSize = maxWindowSize;
89 89 self->format = format;
90 90
91 91 if (dict) {
92 92 self->dict = dict;
93 93 Py_INCREF(dict);
94 94 }
95 95
96 96 if (ensure_dctx(self, 1)) {
97 97 goto except;
98 98 }
99 99
100 100 return 0;
101 101
102 102 except:
103 103 Py_CLEAR(self->dict);
104 104
105 105 if (self->dctx) {
106 106 ZSTD_freeDCtx(self->dctx);
107 107 self->dctx = NULL;
108 108 }
109 109
110 110 return -1;
111 111 }
112 112
113 113 static void Decompressor_dealloc(ZstdDecompressor* self) {
114 114 Py_CLEAR(self->dict);
115 115
116 116 if (self->dctx) {
117 117 ZSTD_freeDCtx(self->dctx);
118 118 self->dctx = NULL;
119 119 }
120 120
121 121 PyObject_Del(self);
122 122 }
123 123
124 124 PyDoc_STRVAR(Decompressor_memory_size__doc__,
125 125 "memory_size() -- Size of decompression context, in bytes\n"
126 126 );
127 127
128 128 static PyObject* Decompressor_memory_size(ZstdDecompressor* self) {
129 129 if (self->dctx) {
130 130 return PyLong_FromSize_t(ZSTD_sizeof_DCtx(self->dctx));
131 131 }
132 132 else {
133 133 PyErr_SetString(ZstdError, "no decompressor context found; this should never happen");
134 134 return NULL;
135 135 }
136 136 }
137 137
138 138 PyDoc_STRVAR(Decompressor_copy_stream__doc__,
139 139 "copy_stream(ifh, ofh[, read_size=default, write_size=default]) -- decompress data between streams\n"
140 140 "\n"
141 141 "Compressed data will be read from ``ifh``, decompressed, and written to\n"
142 142 "``ofh``. ``ifh`` must have a ``read(size)`` method. ``ofh`` must have a\n"
143 143 "``write(data)`` method.\n"
144 144 "\n"
145 145 "The optional ``read_size`` and ``write_size`` arguments control the chunk\n"
146 146 "size of data that is ``read()`` and ``write()`` between streams. They default\n"
147 147 "to the default input and output sizes of zstd decompressor streams.\n"
148 148 );
149 149
150 150 static PyObject* Decompressor_copy_stream(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
151 151 static char* kwlist[] = {
152 152 "ifh",
153 153 "ofh",
154 154 "read_size",
155 155 "write_size",
156 156 NULL
157 157 };
158 158
159 159 PyObject* source;
160 160 PyObject* dest;
161 161 size_t inSize = ZSTD_DStreamInSize();
162 162 size_t outSize = ZSTD_DStreamOutSize();
163 163 ZSTD_inBuffer input;
164 164 ZSTD_outBuffer output;
165 165 Py_ssize_t totalRead = 0;
166 166 Py_ssize_t totalWrite = 0;
167 167 char* readBuffer;
168 168 Py_ssize_t readSize;
169 169 PyObject* readResult = NULL;
170 170 PyObject* res = NULL;
171 171 size_t zresult = 0;
172 172 PyObject* writeResult;
173 173 PyObject* totalReadPy;
174 174 PyObject* totalWritePy;
175 175
176 176 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk:copy_stream", kwlist,
177 177 &source, &dest, &inSize, &outSize)) {
178 178 return NULL;
179 179 }
180 180
181 181 if (!PyObject_HasAttrString(source, "read")) {
182 182 PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
183 183 return NULL;
184 184 }
185 185
186 186 if (!PyObject_HasAttrString(dest, "write")) {
187 187 PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
188 188 return NULL;
189 189 }
190 190
191 191 /* Prevent free on uninitialized memory in finally. */
192 192 output.dst = NULL;
193 193
194 194 if (ensure_dctx(self, 1)) {
195 195 res = NULL;
196 196 goto finally;
197 197 }
198 198
199 199 output.dst = PyMem_Malloc(outSize);
200 200 if (!output.dst) {
201 201 PyErr_NoMemory();
202 202 res = NULL;
203 203 goto finally;
204 204 }
205 205 output.size = outSize;
206 206 output.pos = 0;
207 207
208 208 /* Read source stream until EOF */
209 209 while (1) {
210 210 readResult = PyObject_CallMethod(source, "read", "n", inSize);
211 211 if (!readResult) {
212 212 PyErr_SetString(ZstdError, "could not read() from source");
213 213 goto finally;
214 214 }
215 215
216 216 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
217 217
218 218 /* If no data was read, we're at EOF. */
219 219 if (0 == readSize) {
220 220 break;
221 221 }
222 222
223 223 totalRead += readSize;
224 224
225 225 /* Send data to decompressor */
226 226 input.src = readBuffer;
227 227 input.size = readSize;
228 228 input.pos = 0;
229 229
230 230 while (input.pos < input.size) {
231 231 Py_BEGIN_ALLOW_THREADS
232 232 zresult = ZSTD_decompressStream(self->dctx, &output, &input);
233 233 Py_END_ALLOW_THREADS
234 234
235 235 if (ZSTD_isError(zresult)) {
236 236 PyErr_Format(ZstdError, "zstd decompressor error: %s",
237 237 ZSTD_getErrorName(zresult));
238 238 res = NULL;
239 239 goto finally;
240 240 }
241 241
242 242 if (output.pos) {
243 243 #if PY_MAJOR_VERSION >= 3
244 244 writeResult = PyObject_CallMethod(dest, "write", "y#",
245 245 #else
246 246 writeResult = PyObject_CallMethod(dest, "write", "s#",
247 247 #endif
248 248 output.dst, output.pos);
249 249
250 250 Py_XDECREF(writeResult);
251 251 totalWrite += output.pos;
252 252 output.pos = 0;
253 253 }
254 254 }
255 255
256 256 Py_CLEAR(readResult);
257 257 }
258 258
259 259 /* Source stream is exhausted. Finish up. */
260 260
261 261 totalReadPy = PyLong_FromSsize_t(totalRead);
262 262 totalWritePy = PyLong_FromSsize_t(totalWrite);
263 263 res = PyTuple_Pack(2, totalReadPy, totalWritePy);
264 264 Py_DECREF(totalReadPy);
265 265 Py_DECREF(totalWritePy);
266 266
267 267 finally:
268 268 if (output.dst) {
269 269 PyMem_Free(output.dst);
270 270 }
271 271
272 272 Py_XDECREF(readResult);
273 273
274 274 return res;
275 275 }
276 276
277 277 PyDoc_STRVAR(Decompressor_decompress__doc__,
278 278 "decompress(data[, max_output_size=None]) -- Decompress data in its entirety\n"
279 279 "\n"
280 280 "This method will decompress the entirety of the argument and return the\n"
281 281 "result.\n"
282 282 "\n"
283 283 "The input bytes are expected to contain a full Zstandard frame (something\n"
284 284 "compressed with ``ZstdCompressor.compress()`` or similar). If the input does\n"
285 285 "not contain a full frame, an exception will be raised.\n"
286 286 "\n"
287 287 "If the frame header of the compressed data does not contain the content size\n"
288 288 "``max_output_size`` must be specified or ``ZstdError`` will be raised. An\n"
289 289 "allocation of size ``max_output_size`` will be performed and an attempt will\n"
290 290 "be made to perform decompression into that buffer. If the buffer is too\n"
291 291 "small or cannot be allocated, ``ZstdError`` will be raised. The buffer will\n"
292 292 "be resized if it is too large.\n"
293 293 "\n"
294 294 "Uncompressed data could be much larger than compressed data. As a result,\n"
295 295 "calling this function could result in a very large memory allocation being\n"
296 296 "performed to hold the uncompressed data. Therefore it is **highly**\n"
297 297 "recommended to use a streaming decompression method instead of this one.\n"
298 298 );
299 299
300 300 PyObject* Decompressor_decompress(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
301 301 static char* kwlist[] = {
302 302 "data",
303 303 "max_output_size",
304 304 NULL
305 305 };
306 306
307 307 Py_buffer source;
308 308 Py_ssize_t maxOutputSize = 0;
309 309 unsigned long long decompressedSize;
310 310 size_t destCapacity;
311 311 PyObject* result = NULL;
312 312 size_t zresult;
313 313 ZSTD_outBuffer outBuffer;
314 314 ZSTD_inBuffer inBuffer;
315 315
316 316 #if PY_MAJOR_VERSION >= 3
317 317 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|n:decompress",
318 318 #else
319 319 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|n:decompress",
320 320 #endif
321 321 kwlist, &source, &maxOutputSize)) {
322 322 return NULL;
323 323 }
324 324
325 325 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
326 326 PyErr_SetString(PyExc_ValueError,
327 327 "data buffer should be contiguous and have at most one dimension");
328 328 goto finally;
329 329 }
330 330
331 331 if (ensure_dctx(self, 1)) {
332 332 goto finally;
333 333 }
334 334
335 335 decompressedSize = ZSTD_getFrameContentSize(source.buf, source.len);
336 336
337 337 if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) {
338 338 PyErr_SetString(ZstdError, "error determining content size from frame header");
339 339 goto finally;
340 340 }
341 341 /* Special case of empty frame. */
342 342 else if (0 == decompressedSize) {
343 343 result = PyBytes_FromStringAndSize("", 0);
344 344 goto finally;
345 345 }
346 346 /* Missing content size in frame header. */
347 347 if (ZSTD_CONTENTSIZE_UNKNOWN == decompressedSize) {
348 348 if (0 == maxOutputSize) {
349 349 PyErr_SetString(ZstdError, "could not determine content size in frame header");
350 350 goto finally;
351 351 }
352 352
353 353 result = PyBytes_FromStringAndSize(NULL, maxOutputSize);
354 354 destCapacity = maxOutputSize;
355 355 decompressedSize = 0;
356 356 }
357 357 /* Size is recorded in frame header. */
358 358 else {
359 359 assert(SIZE_MAX >= PY_SSIZE_T_MAX);
360 360 if (decompressedSize > PY_SSIZE_T_MAX) {
361 361 PyErr_SetString(ZstdError, "frame is too large to decompress on this platform");
362 362 goto finally;
363 363 }
364 364
365 365 result = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)decompressedSize);
366 366 destCapacity = (size_t)decompressedSize;
367 367 }
368 368
369 369 if (!result) {
370 370 goto finally;
371 371 }
372 372
373 373 outBuffer.dst = PyBytes_AsString(result);
374 374 outBuffer.size = destCapacity;
375 375 outBuffer.pos = 0;
376 376
377 377 inBuffer.src = source.buf;
378 378 inBuffer.size = source.len;
379 379 inBuffer.pos = 0;
380 380
381 381 Py_BEGIN_ALLOW_THREADS
382 382 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
383 383 Py_END_ALLOW_THREADS
384 384
385 385 if (ZSTD_isError(zresult)) {
386 386 PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult));
387 387 Py_CLEAR(result);
388 388 goto finally;
389 389 }
390 390 else if (zresult) {
391 391 PyErr_Format(ZstdError, "decompression error: did not decompress full frame");
392 392 Py_CLEAR(result);
393 393 goto finally;
394 394 }
395 395 else if (decompressedSize && outBuffer.pos != decompressedSize) {
396 396 PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu",
397 397 zresult, decompressedSize);
398 398 Py_CLEAR(result);
399 399 goto finally;
400 400 }
401 401 else if (outBuffer.pos < destCapacity) {
402 402 if (safe_pybytes_resize(&result, outBuffer.pos)) {
403 403 Py_CLEAR(result);
404 404 goto finally;
405 405 }
406 406 }
407 407
408 408 finally:
409 409 PyBuffer_Release(&source);
410 410 return result;
411 411 }
412 412
413 413 PyDoc_STRVAR(Decompressor_decompressobj__doc__,
414 414 "decompressobj([write_size=default])\n"
415 415 "\n"
416 416 "Incrementally feed data into a decompressor.\n"
417 417 "\n"
418 418 "The returned object exposes a ``decompress(data)`` method. This makes it\n"
419 419 "compatible with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor`` so that\n"
420 420 "callers can swap in the zstd decompressor while using the same API.\n"
421 421 );
422 422
423 423 static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
424 424 static char* kwlist[] = {
425 425 "write_size",
426 426 NULL
427 427 };
428 428
429 429 ZstdDecompressionObj* result = NULL;
430 430 size_t outSize = ZSTD_DStreamOutSize();
431 431
432 432 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|k:decompressobj", kwlist, &outSize)) {
433 433 return NULL;
434 434 }
435 435
436 436 if (!outSize) {
437 437 PyErr_SetString(PyExc_ValueError, "write_size must be positive");
438 438 return NULL;
439 439 }
440 440
441 441 result = (ZstdDecompressionObj*)PyObject_CallObject((PyObject*)&ZstdDecompressionObjType, NULL);
442 442 if (!result) {
443 443 return NULL;
444 444 }
445 445
446 446 if (ensure_dctx(self, 1)) {
447 447 Py_DECREF(result);
448 448 return NULL;
449 449 }
450 450
451 451 result->decompressor = self;
452 452 Py_INCREF(result->decompressor);
453 453 result->outSize = outSize;
454 454
455 455 return result;
456 456 }
457 457
458 458 PyDoc_STRVAR(Decompressor_read_to_iter__doc__,
459 459 "read_to_iter(reader[, read_size=default, write_size=default, skip_bytes=0])\n"
460 460 "Read compressed data and return an iterator\n"
461 461 "\n"
462 462 "Returns an iterator of decompressed data chunks produced from reading from\n"
463 463 "the ``reader``.\n"
464 464 "\n"
465 465 "Compressed data will be obtained from ``reader`` by calling the\n"
466 466 "``read(size)`` method of it. The source data will be streamed into a\n"
467 467 "decompressor. As decompressed data is available, it will be exposed to the\n"
468 468 "returned iterator.\n"
469 469 "\n"
470 470 "Data is ``read()`` in chunks of size ``read_size`` and exposed to the\n"
471 471 "iterator in chunks of size ``write_size``. The default values are the input\n"
472 472 "and output sizes for a zstd streaming decompressor.\n"
473 473 "\n"
474 474 "There is also support for skipping the first ``skip_bytes`` of data from\n"
475 475 "the source.\n"
476 476 );
477 477
478 478 static ZstdDecompressorIterator* Decompressor_read_to_iter(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
479 479 static char* kwlist[] = {
480 480 "reader",
481 481 "read_size",
482 482 "write_size",
483 483 "skip_bytes",
484 484 NULL
485 485 };
486 486
487 487 PyObject* reader;
488 488 size_t inSize = ZSTD_DStreamInSize();
489 489 size_t outSize = ZSTD_DStreamOutSize();
490 490 ZstdDecompressorIterator* result;
491 491 size_t skipBytes = 0;
492 492
493 493 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_to_iter", kwlist,
494 494 &reader, &inSize, &outSize, &skipBytes)) {
495 495 return NULL;
496 496 }
497 497
498 498 if (skipBytes >= inSize) {
499 499 PyErr_SetString(PyExc_ValueError,
500 500 "skip_bytes must be smaller than read_size");
501 501 return NULL;
502 502 }
503 503
504 504 result = (ZstdDecompressorIterator*)PyObject_CallObject((PyObject*)&ZstdDecompressorIteratorType, NULL);
505 505 if (!result) {
506 506 return NULL;
507 507 }
508 508
509 509 if (PyObject_HasAttrString(reader, "read")) {
510 510 result->reader = reader;
511 511 Py_INCREF(result->reader);
512 512 }
513 513 else if (1 == PyObject_CheckBuffer(reader)) {
514 514 /* Object claims it is a buffer. Try to get a handle to it. */
515 515 if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) {
516 516 goto except;
517 517 }
518 518 }
519 519 else {
520 520 PyErr_SetString(PyExc_ValueError,
521 521 "must pass an object with a read() method or conforms to buffer protocol");
522 522 goto except;
523 523 }
524 524
525 525 result->decompressor = self;
526 526 Py_INCREF(result->decompressor);
527 527
528 528 result->inSize = inSize;
529 529 result->outSize = outSize;
530 530 result->skipBytes = skipBytes;
531 531
532 532 if (ensure_dctx(self, 1)) {
533 533 goto except;
534 534 }
535 535
536 536 result->input.src = PyMem_Malloc(inSize);
537 537 if (!result->input.src) {
538 538 PyErr_NoMemory();
539 539 goto except;
540 540 }
541 541
542 542 goto finally;
543 543
544 544 except:
545 545 Py_CLEAR(result);
546 546
547 547 finally:
548 548
549 549 return result;
550 550 }
551 551
552 552 PyDoc_STRVAR(Decompressor_stream_reader__doc__,
553 553 "stream_reader(source, [read_size=default, [read_across_frames=False]])\n"
554 554 "\n"
555 555 "Obtain an object that behaves like an I/O stream that can be used for\n"
556 556 "reading decompressed output from an object.\n"
557 557 "\n"
558 558 "The source object can be any object with a ``read(size)`` method or that\n"
559 559 "conforms to the buffer protocol.\n"
560 560 "\n"
561 561 "``read_across_frames`` controls the behavior of ``read()`` when the end\n"
562 562 "of a zstd frame is reached. When ``True``, ``read()`` can potentially\n"
563 563 "return data belonging to multiple zstd frames. When ``False``, ``read()``\n"
564 564 "will return when the end of a frame is reached.\n"
565 565 );
566 566
567 567 static ZstdDecompressionReader* Decompressor_stream_reader(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
568 568 static char* kwlist[] = {
569 569 "source",
570 570 "read_size",
571 571 "read_across_frames",
572 572 NULL
573 573 };
574 574
575 575 PyObject* source;
576 576 size_t readSize = ZSTD_DStreamInSize();
577 577 PyObject* readAcrossFrames = NULL;
578 578 ZstdDecompressionReader* result;
579 579
580 580 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kO:stream_reader", kwlist,
581 581 &source, &readSize, &readAcrossFrames)) {
582 582 return NULL;
583 583 }
584 584
585 585 if (ensure_dctx(self, 1)) {
586 586 return NULL;
587 587 }
588 588
589 589 result = (ZstdDecompressionReader*)PyObject_CallObject((PyObject*)&ZstdDecompressionReaderType, NULL);
590 590 if (NULL == result) {
591 591 return NULL;
592 592 }
593 593
594 594 if (PyObject_HasAttrString(source, "read")) {
595 595 result->reader = source;
596 596 Py_INCREF(source);
597 597 result->readSize = readSize;
598 598 }
599 599 else if (1 == PyObject_CheckBuffer(source)) {
600 600 if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) {
601 601 Py_CLEAR(result);
602 602 return NULL;
603 603 }
604 604 }
605 605 else {
606 606 PyErr_SetString(PyExc_TypeError,
607 607 "must pass an object with a read() method or that conforms to the buffer protocol");
608 608 Py_CLEAR(result);
609 609 return NULL;
610 610 }
611 611
612 612 result->decompressor = self;
613 613 Py_INCREF(self);
614 614 result->readAcrossFrames = readAcrossFrames ? PyObject_IsTrue(readAcrossFrames) : 0;
615 615
616 616 return result;
617 617 }
618 618
619 619 PyDoc_STRVAR(Decompressor_stream_writer__doc__,
620 620 "Create a context manager to write decompressed data to an object.\n"
621 621 "\n"
622 622 "The passed object must have a ``write()`` method.\n"
623 623 "\n"
624 624 "The caller feeds intput data to the object by calling ``write(data)``.\n"
625 625 "Decompressed data is written to the argument given as it is decompressed.\n"
626 626 "\n"
627 627 "An optional ``write_size`` argument defines the size of chunks to\n"
628 628 "``write()`` to the writer. It defaults to the default output size for a zstd\n"
629 629 "streaming decompressor.\n"
630 630 );
631 631
632 632 static ZstdDecompressionWriter* Decompressor_stream_writer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
633 633 static char* kwlist[] = {
634 634 "writer",
635 635 "write_size",
636 636 "write_return_read",
637 637 NULL
638 638 };
639 639
640 640 PyObject* writer;
641 641 size_t outSize = ZSTD_DStreamOutSize();
642 642 PyObject* writeReturnRead = NULL;
643 643 ZstdDecompressionWriter* result;
644 644
645 645 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kO:stream_writer", kwlist,
646 646 &writer, &outSize, &writeReturnRead)) {
647 647 return NULL;
648 648 }
649 649
650 650 if (!PyObject_HasAttrString(writer, "write")) {
651 651 PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
652 652 return NULL;
653 653 }
654 654
655 655 if (ensure_dctx(self, 1)) {
656 656 return NULL;
657 657 }
658 658
659 659 result = (ZstdDecompressionWriter*)PyObject_CallObject((PyObject*)&ZstdDecompressionWriterType, NULL);
660 660 if (!result) {
661 661 return NULL;
662 662 }
663 663
664 664 result->decompressor = self;
665 665 Py_INCREF(result->decompressor);
666 666
667 667 result->writer = writer;
668 668 Py_INCREF(result->writer);
669 669
670 670 result->outSize = outSize;
671 671 result->writeReturnRead = writeReturnRead ? PyObject_IsTrue(writeReturnRead) : 0;
672 672
673 673 return result;
674 674 }
675 675
676 676 PyDoc_STRVAR(Decompressor_decompress_content_dict_chain__doc__,
677 677 "Decompress a series of chunks using the content dictionary chaining technique\n"
678 678 );
679 679
680 680 static PyObject* Decompressor_decompress_content_dict_chain(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
681 681 static char* kwlist[] = {
682 682 "frames",
683 683 NULL
684 684 };
685 685
686 686 PyObject* chunks;
687 687 Py_ssize_t chunksLen;
688 688 Py_ssize_t chunkIndex;
689 689 char parity = 0;
690 690 PyObject* chunk;
691 691 char* chunkData;
692 692 Py_ssize_t chunkSize;
693 693 size_t zresult;
694 694 ZSTD_frameHeader frameHeader;
695 695 void* buffer1 = NULL;
696 696 size_t buffer1Size = 0;
697 697 size_t buffer1ContentSize = 0;
698 698 void* buffer2 = NULL;
699 699 size_t buffer2Size = 0;
700 700 size_t buffer2ContentSize = 0;
701 701 void* destBuffer = NULL;
702 702 PyObject* result = NULL;
703 703 ZSTD_outBuffer outBuffer;
704 704 ZSTD_inBuffer inBuffer;
705 705
706 706 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain",
707 707 kwlist, &PyList_Type, &chunks)) {
708 708 return NULL;
709 709 }
710 710
711 711 chunksLen = PyList_Size(chunks);
712 712 if (!chunksLen) {
713 713 PyErr_SetString(PyExc_ValueError, "empty input chain");
714 714 return NULL;
715 715 }
716 716
717 717 /* The first chunk should not be using a dictionary. We handle it specially. */
718 718 chunk = PyList_GetItem(chunks, 0);
719 719 if (!PyBytes_Check(chunk)) {
720 720 PyErr_SetString(PyExc_ValueError, "chunk 0 must be bytes");
721 721 return NULL;
722 722 }
723 723
724 724 /* We require that all chunks be zstd frames and that they have content size set. */
725 725 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
726 726 zresult = ZSTD_getFrameHeader(&frameHeader, (void*)chunkData, chunkSize);
727 727 if (ZSTD_isError(zresult)) {
728 728 PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame");
729 729 return NULL;
730 730 }
731 731 else if (zresult) {
732 732 PyErr_SetString(PyExc_ValueError, "chunk 0 is too small to contain a zstd frame");
733 733 return NULL;
734 734 }
735 735
736 736 if (ZSTD_CONTENTSIZE_UNKNOWN == frameHeader.frameContentSize) {
737 737 PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame");
738 738 return NULL;
739 739 }
740 740
741 741 assert(ZSTD_CONTENTSIZE_ERROR != frameHeader.frameContentSize);
742 742
743 743 /* We check against PY_SSIZE_T_MAX here because we ultimately cast the
744 744 * result to a Python object and it's length can be no greater than
745 745 * Py_ssize_t. In theory, we could have an intermediate frame that is
746 746 * larger. But a) why would this API be used for frames that large b)
747 747 * it isn't worth the complexity to support. */
748 748 assert(SIZE_MAX >= PY_SSIZE_T_MAX);
749 749 if (frameHeader.frameContentSize > PY_SSIZE_T_MAX) {
750 750 PyErr_SetString(PyExc_ValueError,
751 751 "chunk 0 is too large to decompress on this platform");
752 752 return NULL;
753 753 }
754 754
755 755 if (ensure_dctx(self, 0)) {
756 756 goto finally;
757 757 }
758 758
759 759 buffer1Size = (size_t)frameHeader.frameContentSize;
760 760 buffer1 = PyMem_Malloc(buffer1Size);
761 761 if (!buffer1) {
762 762 goto finally;
763 763 }
764 764
765 765 outBuffer.dst = buffer1;
766 766 outBuffer.size = buffer1Size;
767 767 outBuffer.pos = 0;
768 768
769 769 inBuffer.src = chunkData;
770 770 inBuffer.size = chunkSize;
771 771 inBuffer.pos = 0;
772 772
773 773 Py_BEGIN_ALLOW_THREADS
774 774 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
775 775 Py_END_ALLOW_THREADS
776 776 if (ZSTD_isError(zresult)) {
777 777 PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult));
778 778 goto finally;
779 779 }
780 780 else if (zresult) {
781 781 PyErr_Format(ZstdError, "chunk 0 did not decompress full frame");
782 782 goto finally;
783 783 }
784 784
785 785 buffer1ContentSize = outBuffer.pos;
786 786
787 787 /* Special case of a simple chain. */
788 788 if (1 == chunksLen) {
789 789 result = PyBytes_FromStringAndSize(buffer1, buffer1Size);
790 790 goto finally;
791 791 }
792 792
793 793 /* This should ideally look at next chunk. But this is slightly simpler. */
794 794 buffer2Size = (size_t)frameHeader.frameContentSize;
795 795 buffer2 = PyMem_Malloc(buffer2Size);
796 796 if (!buffer2) {
797 797 goto finally;
798 798 }
799 799
800 800 /* For each subsequent chunk, use the previous fulltext as a content dictionary.
801 801 Our strategy is to have 2 buffers. One holds the previous fulltext (to be
802 802 used as a content dictionary) and the other holds the new fulltext. The
803 803 buffers grow when needed but never decrease in size. This limits the
804 804 memory allocator overhead.
805 805 */
806 806 for (chunkIndex = 1; chunkIndex < chunksLen; chunkIndex++) {
807 807 chunk = PyList_GetItem(chunks, chunkIndex);
808 808 if (!PyBytes_Check(chunk)) {
809 809 PyErr_Format(PyExc_ValueError, "chunk %zd must be bytes", chunkIndex);
810 810 goto finally;
811 811 }
812 812
813 813 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
814 814 zresult = ZSTD_getFrameHeader(&frameHeader, (void*)chunkData, chunkSize);
815 815 if (ZSTD_isError(zresult)) {
816 816 PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex);
817 817 goto finally;
818 818 }
819 819 else if (zresult) {
820 820 PyErr_Format(PyExc_ValueError, "chunk %zd is too small to contain a zstd frame", chunkIndex);
821 821 goto finally;
822 822 }
823 823
824 824 if (ZSTD_CONTENTSIZE_UNKNOWN == frameHeader.frameContentSize) {
825 825 PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex);
826 826 goto finally;
827 827 }
828 828
829 829 assert(ZSTD_CONTENTSIZE_ERROR != frameHeader.frameContentSize);
830 830
831 831 if (frameHeader.frameContentSize > PY_SSIZE_T_MAX) {
832 832 PyErr_Format(PyExc_ValueError,
833 833 "chunk %zd is too large to decompress on this platform", chunkIndex);
834 834 goto finally;
835 835 }
836 836
837 837 inBuffer.src = chunkData;
838 838 inBuffer.size = chunkSize;
839 839 inBuffer.pos = 0;
840 840
841 841 parity = chunkIndex % 2;
842 842
843 843 /* This could definitely be abstracted to reduce code duplication. */
844 844 if (parity) {
845 845 /* Resize destination buffer to hold larger content. */
846 846 if (buffer2Size < frameHeader.frameContentSize) {
847 847 buffer2Size = (size_t)frameHeader.frameContentSize;
848 848 destBuffer = PyMem_Realloc(buffer2, buffer2Size);
849 849 if (!destBuffer) {
850 850 goto finally;
851 851 }
852 852 buffer2 = destBuffer;
853 853 }
854 854
855 855 Py_BEGIN_ALLOW_THREADS
856 856 zresult = ZSTD_DCtx_refPrefix_advanced(self->dctx,
857 857 buffer1, buffer1ContentSize, ZSTD_dct_rawContent);
858 858 Py_END_ALLOW_THREADS
859 859 if (ZSTD_isError(zresult)) {
860 860 PyErr_Format(ZstdError,
861 861 "failed to load prefix dictionary at chunk %zd", chunkIndex);
862 862 goto finally;
863 863 }
864 864
865 865 outBuffer.dst = buffer2;
866 866 outBuffer.size = buffer2Size;
867 867 outBuffer.pos = 0;
868 868
869 869 Py_BEGIN_ALLOW_THREADS
870 870 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
871 871 Py_END_ALLOW_THREADS
872 872 if (ZSTD_isError(zresult)) {
873 873 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
874 874 chunkIndex, ZSTD_getErrorName(zresult));
875 875 goto finally;
876 876 }
877 877 else if (zresult) {
878 878 PyErr_Format(ZstdError, "chunk %zd did not decompress full frame",
879 879 chunkIndex);
880 880 goto finally;
881 881 }
882 882
883 883 buffer2ContentSize = outBuffer.pos;
884 884 }
885 885 else {
886 886 if (buffer1Size < frameHeader.frameContentSize) {
887 887 buffer1Size = (size_t)frameHeader.frameContentSize;
888 888 destBuffer = PyMem_Realloc(buffer1, buffer1Size);
889 889 if (!destBuffer) {
890 890 goto finally;
891 891 }
892 892 buffer1 = destBuffer;
893 893 }
894 894
895 895 Py_BEGIN_ALLOW_THREADS
896 896 zresult = ZSTD_DCtx_refPrefix_advanced(self->dctx,
897 897 buffer2, buffer2ContentSize, ZSTD_dct_rawContent);
898 898 Py_END_ALLOW_THREADS
899 899 if (ZSTD_isError(zresult)) {
900 900 PyErr_Format(ZstdError,
901 901 "failed to load prefix dictionary at chunk %zd", chunkIndex);
902 902 goto finally;
903 903 }
904 904
905 905 outBuffer.dst = buffer1;
906 906 outBuffer.size = buffer1Size;
907 907 outBuffer.pos = 0;
908 908
909 909 Py_BEGIN_ALLOW_THREADS
910 910 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
911 911 Py_END_ALLOW_THREADS
912 912 if (ZSTD_isError(zresult)) {
913 913 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
914 914 chunkIndex, ZSTD_getErrorName(zresult));
915 915 goto finally;
916 916 }
917 917 else if (zresult) {
918 918 PyErr_Format(ZstdError, "chunk %zd did not decompress full frame",
919 919 chunkIndex);
920 920 goto finally;
921 921 }
922 922
923 923 buffer1ContentSize = outBuffer.pos;
924 924 }
925 925 }
926 926
927 927 result = PyBytes_FromStringAndSize(parity ? buffer2 : buffer1,
928 928 parity ? buffer2ContentSize : buffer1ContentSize);
929 929
930 930 finally:
931 931 if (buffer2) {
932 932 PyMem_Free(buffer2);
933 933 }
934 934 if (buffer1) {
935 935 PyMem_Free(buffer1);
936 936 }
937 937
938 938 return result;
939 939 }
940 940
941 941 typedef struct {
942 942 void* sourceData;
943 943 size_t sourceSize;
944 944 size_t destSize;
945 945 } FramePointer;
946 946
947 947 typedef struct {
948 948 FramePointer* frames;
949 949 Py_ssize_t framesSize;
950 950 unsigned long long compressedSize;
951 951 } FrameSources;
952 952
953 953 typedef struct {
954 954 void* dest;
955 955 Py_ssize_t destSize;
956 956 BufferSegment* segments;
957 957 Py_ssize_t segmentsSize;
958 958 } DestBuffer;
959 959
960 960 typedef enum {
961 961 WorkerError_none = 0,
962 962 WorkerError_zstd = 1,
963 963 WorkerError_memory = 2,
964 964 WorkerError_sizeMismatch = 3,
965 965 WorkerError_unknownSize = 4,
966 966 } WorkerError;
967 967
968 968 typedef struct {
969 969 /* Source records and length */
970 970 FramePointer* framePointers;
971 971 /* Which records to process. */
972 972 Py_ssize_t startOffset;
973 973 Py_ssize_t endOffset;
974 974 unsigned long long totalSourceSize;
975 975
976 976 /* Compression state and settings. */
977 977 ZSTD_DCtx* dctx;
978 978 int requireOutputSizes;
979 979
980 980 /* Output storage. */
981 981 DestBuffer* destBuffers;
982 982 Py_ssize_t destCount;
983 983
984 984 /* Item that error occurred on. */
985 985 Py_ssize_t errorOffset;
986 986 /* If an error occurred. */
987 987 WorkerError error;
988 988 /* result from zstd decompression operation */
989 989 size_t zresult;
990 990 } WorkerState;
991 991
992 992 static void decompress_worker(WorkerState* state) {
993 993 size_t allocationSize;
994 994 DestBuffer* destBuffer;
995 995 Py_ssize_t frameIndex;
996 996 Py_ssize_t localOffset = 0;
997 997 Py_ssize_t currentBufferStartIndex = state->startOffset;
998 998 Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
999 999 void* tmpBuf;
1000 1000 Py_ssize_t destOffset = 0;
1001 1001 FramePointer* framePointers = state->framePointers;
1002 1002 size_t zresult;
1003 1003 unsigned long long totalOutputSize = 0;
1004 1004
1005 1005 assert(NULL == state->destBuffers);
1006 1006 assert(0 == state->destCount);
1007 1007 assert(state->endOffset - state->startOffset >= 0);
1008 1008
1009 1009 /* We could get here due to the way work is allocated. Ideally we wouldn't
1010 1010 get here. But that would require a bit of a refactor in the caller. */
1011 1011 if (state->totalSourceSize > SIZE_MAX) {
1012 1012 state->error = WorkerError_memory;
1013 1013 state->errorOffset = 0;
1014 1014 return;
1015 1015 }
1016 1016
1017 1017 /*
1018 1018 * We need to allocate a buffer to hold decompressed data. How we do this
1019 1019 * depends on what we know about the output. The following scenarios are
1020 1020 * possible:
1021 1021 *
1022 1022 * 1. All structs defining frames declare the output size.
1023 1023 * 2. The decompressed size is embedded within the zstd frame.
1024 1024 * 3. The decompressed size is not stored anywhere.
1025 1025 *
1026 1026 * For now, we only support #1 and #2.
1027 1027 */
1028 1028
1029 1029 /* Resolve ouput segments. */
1030 1030 for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
1031 1031 FramePointer* fp = &framePointers[frameIndex];
1032 1032 unsigned long long decompressedSize;
1033 1033
1034 1034 if (0 == fp->destSize) {
1035 1035 decompressedSize = ZSTD_getFrameContentSize(fp->sourceData, fp->sourceSize);
1036 1036
1037 1037 if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) {
1038 1038 state->error = WorkerError_unknownSize;
1039 1039 state->errorOffset = frameIndex;
1040 1040 return;
1041 1041 }
1042 1042 else if (ZSTD_CONTENTSIZE_UNKNOWN == decompressedSize) {
1043 1043 if (state->requireOutputSizes) {
1044 1044 state->error = WorkerError_unknownSize;
1045 1045 state->errorOffset = frameIndex;
1046 1046 return;
1047 1047 }
1048 1048
1049 1049 /* This will fail the assert for .destSize > 0 below. */
1050 1050 decompressedSize = 0;
1051 1051 }
1052 1052
1053 1053 if (decompressedSize > SIZE_MAX) {
1054 1054 state->error = WorkerError_memory;
1055 1055 state->errorOffset = frameIndex;
1056 1056 return;
1057 1057 }
1058 1058
1059 1059 fp->destSize = (size_t)decompressedSize;
1060 1060 }
1061 1061
1062 1062 totalOutputSize += fp->destSize;
1063 1063 }
1064 1064
1065 1065 state->destBuffers = calloc(1, sizeof(DestBuffer));
1066 1066 if (NULL == state->destBuffers) {
1067 1067 state->error = WorkerError_memory;
1068 1068 return;
1069 1069 }
1070 1070
1071 1071 state->destCount = 1;
1072 1072
1073 1073 destBuffer = &state->destBuffers[state->destCount - 1];
1074 1074
1075 1075 assert(framePointers[state->startOffset].destSize > 0); /* For now. */
1076 1076
1077 1077 allocationSize = roundpow2((size_t)state->totalSourceSize);
1078 1078
1079 1079 if (framePointers[state->startOffset].destSize > allocationSize) {
1080 1080 allocationSize = roundpow2(framePointers[state->startOffset].destSize);
1081 1081 }
1082 1082
1083 1083 destBuffer->dest = malloc(allocationSize);
1084 1084 if (NULL == destBuffer->dest) {
1085 1085 state->error = WorkerError_memory;
1086 1086 return;
1087 1087 }
1088 1088
1089 1089 destBuffer->destSize = allocationSize;
1090 1090
1091 1091 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
1092 1092 if (NULL == destBuffer->segments) {
1093 1093 /* Caller will free state->dest as part of cleanup. */
1094 1094 state->error = WorkerError_memory;
1095 1095 return;
1096 1096 }
1097 1097
1098 1098 destBuffer->segmentsSize = remainingItems;
1099 1099
1100 1100 for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
1101 1101 ZSTD_outBuffer outBuffer;
1102 1102 ZSTD_inBuffer inBuffer;
1103 1103 const void* source = framePointers[frameIndex].sourceData;
1104 1104 const size_t sourceSize = framePointers[frameIndex].sourceSize;
1105 1105 void* dest;
1106 1106 const size_t decompressedSize = framePointers[frameIndex].destSize;
1107 1107 size_t destAvailable = destBuffer->destSize - destOffset;
1108 1108
1109 1109 assert(decompressedSize > 0); /* For now. */
1110 1110
1111 1111 /*
1112 1112 * Not enough space in current buffer. Finish current before and allocate and
1113 1113 * switch to a new one.
1114 1114 */
1115 1115 if (decompressedSize > destAvailable) {
1116 1116 /*
1117 1117 * Shrinking the destination buffer is optional. But it should be cheap,
1118 1118 * so we just do it.
1119 1119 */
1120 1120 if (destAvailable) {
1121 1121 tmpBuf = realloc(destBuffer->dest, destOffset);
1122 1122 if (NULL == tmpBuf) {
1123 1123 state->error = WorkerError_memory;
1124 1124 return;
1125 1125 }
1126 1126
1127 1127 destBuffer->dest = tmpBuf;
1128 1128 destBuffer->destSize = destOffset;
1129 1129 }
1130 1130
1131 1131 /* Truncate segments buffer. */
1132 1132 tmpBuf = realloc(destBuffer->segments,
1133 1133 (frameIndex - currentBufferStartIndex) * sizeof(BufferSegment));
1134 1134 if (NULL == tmpBuf) {
1135 1135 state->error = WorkerError_memory;
1136 1136 return;
1137 1137 }
1138 1138
1139 1139 destBuffer->segments = tmpBuf;
1140 1140 destBuffer->segmentsSize = frameIndex - currentBufferStartIndex;
1141 1141
1142 1142 /* Grow space for new DestBuffer. */
1143 1143 tmpBuf = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer));
1144 1144 if (NULL == tmpBuf) {
1145 1145 state->error = WorkerError_memory;
1146 1146 return;
1147 1147 }
1148 1148
1149 1149 state->destBuffers = tmpBuf;
1150 1150 state->destCount++;
1151 1151
1152 1152 destBuffer = &state->destBuffers[state->destCount - 1];
1153 1153
1154 1154 /* Don't take any chances will non-NULL pointers. */
1155 1155 memset(destBuffer, 0, sizeof(DestBuffer));
1156 1156
1157 1157 allocationSize = roundpow2((size_t)state->totalSourceSize);
1158 1158
1159 1159 if (decompressedSize > allocationSize) {
1160 1160 allocationSize = roundpow2(decompressedSize);
1161 1161 }
1162 1162
1163 1163 destBuffer->dest = malloc(allocationSize);
1164 1164 if (NULL == destBuffer->dest) {
1165 1165 state->error = WorkerError_memory;
1166 1166 return;
1167 1167 }
1168 1168
1169 1169 destBuffer->destSize = allocationSize;
1170 1170 destAvailable = allocationSize;
1171 1171 destOffset = 0;
1172 1172 localOffset = 0;
1173 1173
1174 1174 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
1175 1175 if (NULL == destBuffer->segments) {
1176 1176 state->error = WorkerError_memory;
1177 1177 return;
1178 1178 }
1179 1179
1180 1180 destBuffer->segmentsSize = remainingItems;
1181 1181 currentBufferStartIndex = frameIndex;
1182 1182 }
1183 1183
1184 1184 dest = (char*)destBuffer->dest + destOffset;
1185 1185
1186 1186 outBuffer.dst = dest;
1187 1187 outBuffer.size = decompressedSize;
1188 1188 outBuffer.pos = 0;
1189 1189
1190 1190 inBuffer.src = source;
1191 1191 inBuffer.size = sourceSize;
1192 1192 inBuffer.pos = 0;
1193 1193
1194 1194 zresult = ZSTD_decompressStream(state->dctx, &outBuffer, &inBuffer);
1195 1195 if (ZSTD_isError(zresult)) {
1196 1196 state->error = WorkerError_zstd;
1197 1197 state->zresult = zresult;
1198 1198 state->errorOffset = frameIndex;
1199 1199 return;
1200 1200 }
1201 1201 else if (zresult || outBuffer.pos != decompressedSize) {
1202 1202 state->error = WorkerError_sizeMismatch;
1203 1203 state->zresult = outBuffer.pos;
1204 1204 state->errorOffset = frameIndex;
1205 1205 return;
1206 1206 }
1207 1207
1208 1208 destBuffer->segments[localOffset].offset = destOffset;
1209 1209 destBuffer->segments[localOffset].length = outBuffer.pos;
1210 1210 destOffset += outBuffer.pos;
1211 1211 localOffset++;
1212 1212 remainingItems--;
1213 1213 }
1214 1214
1215 1215 if (destBuffer->destSize > destOffset) {
1216 1216 tmpBuf = realloc(destBuffer->dest, destOffset);
1217 1217 if (NULL == tmpBuf) {
1218 1218 state->error = WorkerError_memory;
1219 1219 return;
1220 1220 }
1221 1221
1222 1222 destBuffer->dest = tmpBuf;
1223 1223 destBuffer->destSize = destOffset;
1224 1224 }
1225 1225 }
1226 1226
1227 1227 ZstdBufferWithSegmentsCollection* decompress_from_framesources(ZstdDecompressor* decompressor, FrameSources* frames,
1228 1228 Py_ssize_t threadCount) {
1229 1229 Py_ssize_t i = 0;
1230 1230 int errored = 0;
1231 1231 Py_ssize_t segmentsCount;
1232 1232 ZstdBufferWithSegments* bws = NULL;
1233 1233 PyObject* resultArg = NULL;
1234 1234 Py_ssize_t resultIndex;
1235 1235 ZstdBufferWithSegmentsCollection* result = NULL;
1236 1236 FramePointer* framePointers = frames->frames;
1237 1237 unsigned long long workerBytes = 0;
1238 1238 Py_ssize_t currentThread = 0;
1239 1239 Py_ssize_t workerStartOffset = 0;
1240 1240 POOL_ctx* pool = NULL;
1241 1241 WorkerState* workerStates = NULL;
1242 1242 unsigned long long bytesPerWorker;
1243 1243
1244 1244 /* Caller should normalize 0 and negative values to 1 or larger. */
1245 1245 assert(threadCount >= 1);
1246 1246
1247 1247 /* More threads than inputs makes no sense under any conditions. */
1248 1248 threadCount = frames->framesSize < threadCount ? frames->framesSize
1249 1249 : threadCount;
1250 1250
1251 1251 /* TODO lower thread count if input size is too small and threads would just
1252 1252 add overhead. */
1253 1253
1254 1254 if (decompressor->dict) {
1255 1255 if (ensure_ddict(decompressor->dict)) {
1256 1256 return NULL;
1257 1257 }
1258 1258 }
1259 1259
1260 1260 /* If threadCount==1, we don't start a thread pool. But we do leverage the
1261 1261 same API for dispatching work. */
1262 1262 workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
1263 1263 if (NULL == workerStates) {
1264 1264 PyErr_NoMemory();
1265 1265 goto finally;
1266 1266 }
1267 1267
1268 1268 memset(workerStates, 0, threadCount * sizeof(WorkerState));
1269 1269
1270 1270 if (threadCount > 1) {
1271 1271 pool = POOL_create(threadCount, 1);
1272 1272 if (NULL == pool) {
1273 1273 PyErr_SetString(ZstdError, "could not initialize zstd thread pool");
1274 1274 goto finally;
1275 1275 }
1276 1276 }
1277 1277
1278 1278 bytesPerWorker = frames->compressedSize / threadCount;
1279 1279
1280 1280 if (bytesPerWorker > SIZE_MAX) {
1281 1281 PyErr_SetString(ZstdError, "too much data per worker for this platform");
1282 1282 goto finally;
1283 1283 }
1284 1284
1285 1285 for (i = 0; i < threadCount; i++) {
1286 1286 size_t zresult;
1287 1287
1288 1288 workerStates[i].dctx = ZSTD_createDCtx();
1289 1289 if (NULL == workerStates[i].dctx) {
1290 1290 PyErr_NoMemory();
1291 1291 goto finally;
1292 1292 }
1293 1293
1294 1294 ZSTD_copyDCtx(workerStates[i].dctx, decompressor->dctx);
1295 1295
1296 1296 if (decompressor->dict) {
1297 1297 zresult = ZSTD_DCtx_refDDict(workerStates[i].dctx, decompressor->dict->ddict);
1298 1298 if (zresult) {
1299 1299 PyErr_Format(ZstdError, "unable to reference prepared dictionary: %s",
1300 1300 ZSTD_getErrorName(zresult));
1301 1301 goto finally;
1302 1302 }
1303 1303 }
1304 1304
1305 1305 workerStates[i].framePointers = framePointers;
1306 1306 workerStates[i].requireOutputSizes = 1;
1307 1307 }
1308 1308
1309 1309 Py_BEGIN_ALLOW_THREADS
1310 1310 /* There are many ways to split work among workers.
1311 1311
1312 1312 For now, we take a simple approach of splitting work so each worker
1313 1313 gets roughly the same number of input bytes. This will result in more
1314 1314 starvation than running N>threadCount jobs. But it avoids complications
1315 1315 around state tracking, which could involve extra locking.
1316 1316 */
1317 1317 for (i = 0; i < frames->framesSize; i++) {
1318 1318 workerBytes += frames->frames[i].sourceSize;
1319 1319
1320 1320 /*
1321 1321 * The last worker/thread needs to handle all remaining work. Don't
1322 1322 * trigger it prematurely. Defer to the block outside of the loop.
1323 1323 * (But still process this loop so workerBytes is correct.
1324 1324 */
1325 1325 if (currentThread == threadCount - 1) {
1326 1326 continue;
1327 1327 }
1328 1328
1329 1329 if (workerBytes >= bytesPerWorker) {
1330 1330 workerStates[currentThread].startOffset = workerStartOffset;
1331 1331 workerStates[currentThread].endOffset = i;
1332 1332 workerStates[currentThread].totalSourceSize = workerBytes;
1333 1333
1334 1334 if (threadCount > 1) {
1335 1335 POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]);
1336 1336 }
1337 1337 else {
1338 1338 decompress_worker(&workerStates[currentThread]);
1339 1339 }
1340 1340 currentThread++;
1341 1341 workerStartOffset = i + 1;
1342 1342 workerBytes = 0;
1343 1343 }
1344 1344 }
1345 1345
1346 1346 if (workerBytes) {
1347 1347 workerStates[currentThread].startOffset = workerStartOffset;
1348 1348 workerStates[currentThread].endOffset = frames->framesSize - 1;
1349 1349 workerStates[currentThread].totalSourceSize = workerBytes;
1350 1350
1351 1351 if (threadCount > 1) {
1352 1352 POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]);
1353 1353 }
1354 1354 else {
1355 1355 decompress_worker(&workerStates[currentThread]);
1356 1356 }
1357 1357 }
1358 1358
1359 1359 if (threadCount > 1) {
1360 1360 POOL_free(pool);
1361 1361 pool = NULL;
1362 1362 }
1363 1363 Py_END_ALLOW_THREADS
1364 1364
1365 1365 for (i = 0; i < threadCount; i++) {
1366 1366 switch (workerStates[i].error) {
1367 1367 case WorkerError_none:
1368 1368 break;
1369 1369
1370 1370 case WorkerError_zstd:
1371 1371 PyErr_Format(ZstdError, "error decompressing item %zd: %s",
1372 1372 workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
1373 1373 errored = 1;
1374 1374 break;
1375 1375
1376 1376 case WorkerError_memory:
1377 1377 PyErr_NoMemory();
1378 1378 errored = 1;
1379 1379 break;
1380 1380
1381 1381 case WorkerError_sizeMismatch:
1382 1382 PyErr_Format(ZstdError, "error decompressing item %zd: decompressed %zu bytes; expected %zu",
1383 1383 workerStates[i].errorOffset, workerStates[i].zresult,
1384 1384 framePointers[workerStates[i].errorOffset].destSize);
1385 1385 errored = 1;
1386 1386 break;
1387 1387
1388 1388 case WorkerError_unknownSize:
1389 1389 PyErr_Format(PyExc_ValueError, "could not determine decompressed size of item %zd",
1390 1390 workerStates[i].errorOffset);
1391 1391 errored = 1;
1392 1392 break;
1393 1393
1394 1394 default:
1395 1395 PyErr_Format(ZstdError, "unhandled error type: %d; this is a bug",
1396 1396 workerStates[i].error);
1397 1397 errored = 1;
1398 1398 break;
1399 1399 }
1400 1400
1401 1401 if (errored) {
1402 1402 break;
1403 1403 }
1404 1404 }
1405 1405
1406 1406 if (errored) {
1407 1407 goto finally;
1408 1408 }
1409 1409
1410 1410 segmentsCount = 0;
1411 1411 for (i = 0; i < threadCount; i++) {
1412 1412 segmentsCount += workerStates[i].destCount;
1413 1413 }
1414 1414
1415 1415 resultArg = PyTuple_New(segmentsCount);
1416 1416 if (NULL == resultArg) {
1417 1417 goto finally;
1418 1418 }
1419 1419
1420 1420 resultIndex = 0;
1421 1421
1422 1422 for (i = 0; i < threadCount; i++) {
1423 1423 Py_ssize_t bufferIndex;
1424 1424 WorkerState* state = &workerStates[i];
1425 1425
1426 1426 for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) {
1427 1427 DestBuffer* destBuffer = &state->destBuffers[bufferIndex];
1428 1428
1429 1429 bws = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize,
1430 1430 destBuffer->segments, destBuffer->segmentsSize);
1431 1431 if (NULL == bws) {
1432 1432 goto finally;
1433 1433 }
1434 1434
1435 1435 /*
1436 1436 * Memory for buffer and segments was allocated using malloc() in worker
1437 1437 * and the memory is transferred to the BufferWithSegments instance. So
1438 1438 * tell instance to use free() and NULL the reference in the state struct
1439 1439 * so it isn't freed below.
1440 1440 */
1441 1441 bws->useFree = 1;
1442 1442 destBuffer->dest = NULL;
1443 1443 destBuffer->segments = NULL;
1444 1444
1445 1445 PyTuple_SET_ITEM(resultArg, resultIndex++, (PyObject*)bws);
1446 1446 }
1447 1447 }
1448 1448
1449 1449 result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject(
1450 1450 (PyObject*)&ZstdBufferWithSegmentsCollectionType, resultArg);
1451 1451
1452 1452 finally:
1453 1453 Py_CLEAR(resultArg);
1454 1454
1455 1455 if (workerStates) {
1456 1456 for (i = 0; i < threadCount; i++) {
1457 1457 Py_ssize_t bufferIndex;
1458 1458 WorkerState* state = &workerStates[i];
1459 1459
1460 1460 if (state->dctx) {
1461 1461 ZSTD_freeDCtx(state->dctx);
1462 1462 }
1463 1463
1464 1464 for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) {
1465 1465 if (state->destBuffers) {
1466 1466 /*
1467 1467 * Will be NULL if memory transfered to a BufferWithSegments.
1468 1468 * Otherwise it is left over after an error occurred.
1469 1469 */
1470 1470 free(state->destBuffers[bufferIndex].dest);
1471 1471 free(state->destBuffers[bufferIndex].segments);
1472 1472 }
1473 1473 }
1474 1474
1475 1475 free(state->destBuffers);
1476 1476 }
1477 1477
1478 1478 PyMem_Free(workerStates);
1479 1479 }
1480 1480
1481 1481 POOL_free(pool);
1482 1482
1483 1483 return result;
1484 1484 }
1485 1485
1486 1486 PyDoc_STRVAR(Decompressor_multi_decompress_to_buffer__doc__,
1487 1487 "Decompress multiple frames to output buffers\n"
1488 1488 "\n"
1489 1489 "Receives a ``BufferWithSegments``, a ``BufferWithSegmentsCollection`` or a\n"
1490 1490 "list of bytes-like objects. Each item in the passed collection should be a\n"
1491 1491 "compressed zstd frame.\n"
1492 1492 "\n"
1493 1493 "Unless ``decompressed_sizes`` is specified, the content size *must* be\n"
1494 1494 "written into the zstd frame header. If ``decompressed_sizes`` is specified,\n"
1495 1495 "it is an object conforming to the buffer protocol that represents an array\n"
1496 1496 "of 64-bit unsigned integers in the machine's native format. Specifying\n"
1497 1497 "``decompressed_sizes`` avoids a pre-scan of each frame to determine its\n"
1498 1498 "output size.\n"
1499 1499 "\n"
1500 1500 "Returns a ``BufferWithSegmentsCollection`` containing the decompressed\n"
1501 1501 "data. All decompressed data is allocated in a single memory buffer. The\n"
1502 1502 "``BufferWithSegments`` instance tracks which objects are at which offsets\n"
1503 1503 "and their respective lengths.\n"
1504 1504 "\n"
1505 1505 "The ``threads`` argument controls how many threads to use for operations.\n"
1506 1506 "Negative values will use the same number of threads as logical CPUs on the\n"
1507 1507 "machine.\n"
1508 1508 );
1509 1509
1510 1510 static ZstdBufferWithSegmentsCollection* Decompressor_multi_decompress_to_buffer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
1511 1511 static char* kwlist[] = {
1512 1512 "frames",
1513 1513 "decompressed_sizes",
1514 1514 "threads",
1515 1515 NULL
1516 1516 };
1517 1517
1518 1518 PyObject* frames;
1519 1519 Py_buffer frameSizes;
1520 1520 int threads = 0;
1521 1521 Py_ssize_t frameCount;
1522 1522 Py_buffer* frameBuffers = NULL;
1523 1523 FramePointer* framePointers = NULL;
1524 1524 unsigned long long* frameSizesP = NULL;
1525 1525 unsigned long long totalInputSize = 0;
1526 1526 FrameSources frameSources;
1527 1527 ZstdBufferWithSegmentsCollection* result = NULL;
1528 1528 Py_ssize_t i;
1529 1529
1530 1530 memset(&frameSizes, 0, sizeof(frameSizes));
1531 1531
1532 1532 #if PY_MAJOR_VERSION >= 3
1533 1533 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|y*i:multi_decompress_to_buffer",
1534 1534 #else
1535 1535 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s*i:multi_decompress_to_buffer",
1536 1536 #endif
1537 1537 kwlist, &frames, &frameSizes, &threads)) {
1538 1538 return NULL;
1539 1539 }
1540 1540
1541 1541 if (frameSizes.buf) {
1542 1542 if (!PyBuffer_IsContiguous(&frameSizes, 'C') || frameSizes.ndim > 1) {
1543 1543 PyErr_SetString(PyExc_ValueError, "decompressed_sizes buffer should be contiguous and have a single dimension");
1544 1544 goto finally;
1545 1545 }
1546 1546
1547 1547 frameSizesP = (unsigned long long*)frameSizes.buf;
1548 1548 }
1549 1549
1550 1550 if (threads < 0) {
1551 1551 threads = cpu_count();
1552 1552 }
1553 1553
1554 1554 if (threads < 2) {
1555 1555 threads = 1;
1556 1556 }
1557 1557
1558 1558 if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsType)) {
1559 1559 ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)frames;
1560 1560 frameCount = buffer->segmentCount;
1561 1561
1562 1562 if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) {
1563 1563 PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd",
1564 1564 frameCount * sizeof(unsigned long long), frameSizes.len);
1565 1565 goto finally;
1566 1566 }
1567 1567
1568 1568 framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
1569 1569 if (!framePointers) {
1570 1570 PyErr_NoMemory();
1571 1571 goto finally;
1572 1572 }
1573 1573
1574 1574 for (i = 0; i < frameCount; i++) {
1575 1575 void* sourceData;
1576 1576 unsigned long long sourceSize;
1577 1577 unsigned long long decompressedSize = 0;
1578 1578
1579 1579 if (buffer->segments[i].offset + buffer->segments[i].length > buffer->dataSize) {
1580 1580 PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area", i);
1581 1581 goto finally;
1582 1582 }
1583 1583
1584 1584 sourceData = (char*)buffer->data + buffer->segments[i].offset;
1585 1585 sourceSize = buffer->segments[i].length;
1586 1586 totalInputSize += sourceSize;
1587 1587
1588 1588 if (frameSizesP) {
1589 1589 decompressedSize = frameSizesP[i];
1590 1590 }
1591 1591
1592 1592 if (sourceSize > SIZE_MAX) {
1593 1593 PyErr_Format(PyExc_ValueError,
1594 1594 "item %zd is too large for this platform", i);
1595 1595 goto finally;
1596 1596 }
1597 1597
1598 1598 if (decompressedSize > SIZE_MAX) {
1599 1599 PyErr_Format(PyExc_ValueError,
1600 1600 "decompressed size of item %zd is too large for this platform", i);
1601 1601 goto finally;
1602 1602 }
1603 1603
1604 1604 framePointers[i].sourceData = sourceData;
1605 1605 framePointers[i].sourceSize = (size_t)sourceSize;
1606 1606 framePointers[i].destSize = (size_t)decompressedSize;
1607 1607 }
1608 1608 }
1609 1609 else if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsCollectionType)) {
1610 1610 Py_ssize_t offset = 0;
1611 1611 ZstdBufferWithSegments* buffer;
1612 1612 ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)frames;
1613 1613
1614 1614 frameCount = BufferWithSegmentsCollection_length(collection);
1615 1615
1616 1616 if (frameSizes.buf && frameSizes.len != frameCount) {
1617 1617 PyErr_Format(PyExc_ValueError,
1618 1618 "decompressed_sizes size mismatch; expected %zd; got %zd",
1619 1619 frameCount * sizeof(unsigned long long), frameSizes.len);
1620 1620 goto finally;
1621 1621 }
1622 1622
1623 1623 framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
1624 1624 if (NULL == framePointers) {
1625 1625 PyErr_NoMemory();
1626 1626 goto finally;
1627 1627 }
1628 1628
1629 1629 /* Iterate the data structure directly because it is faster. */
1630 1630 for (i = 0; i < collection->bufferCount; i++) {
1631 1631 Py_ssize_t segmentIndex;
1632 1632 buffer = collection->buffers[i];
1633 1633
1634 1634 for (segmentIndex = 0; segmentIndex < buffer->segmentCount; segmentIndex++) {
1635 1635 unsigned long long decompressedSize = frameSizesP ? frameSizesP[offset] : 0;
1636 1636
1637 1637 if (buffer->segments[segmentIndex].offset + buffer->segments[segmentIndex].length > buffer->dataSize) {
1638 1638 PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area",
1639 1639 offset);
1640 1640 goto finally;
1641 1641 }
1642 1642
1643 1643 if (buffer->segments[segmentIndex].length > SIZE_MAX) {
1644 1644 PyErr_Format(PyExc_ValueError,
1645 1645 "item %zd in buffer %zd is too large for this platform",
1646 1646 segmentIndex, i);
1647 1647 goto finally;
1648 1648 }
1649 1649
1650 1650 if (decompressedSize > SIZE_MAX) {
1651 1651 PyErr_Format(PyExc_ValueError,
1652 1652 "decompressed size of item %zd in buffer %zd is too large for this platform",
1653 1653 segmentIndex, i);
1654 1654 goto finally;
1655 1655 }
1656 1656
1657 1657 totalInputSize += buffer->segments[segmentIndex].length;
1658 1658
1659 1659 framePointers[offset].sourceData = (char*)buffer->data + buffer->segments[segmentIndex].offset;
1660 1660 framePointers[offset].sourceSize = (size_t)buffer->segments[segmentIndex].length;
1661 1661 framePointers[offset].destSize = (size_t)decompressedSize;
1662 1662
1663 1663 offset++;
1664 1664 }
1665 1665 }
1666 1666 }
1667 1667 else if (PyList_Check(frames)) {
1668 1668 frameCount = PyList_GET_SIZE(frames);
1669 1669
1670 1670 if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) {
1671 1671 PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd",
1672 1672 frameCount * sizeof(unsigned long long), frameSizes.len);
1673 1673 goto finally;
1674 1674 }
1675 1675
1676 1676 framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
1677 1677 if (!framePointers) {
1678 1678 PyErr_NoMemory();
1679 1679 goto finally;
1680 1680 }
1681 1681
1682 1682 frameBuffers = PyMem_Malloc(frameCount * sizeof(Py_buffer));
1683 1683 if (NULL == frameBuffers) {
1684 1684 PyErr_NoMemory();
1685 1685 goto finally;
1686 1686 }
1687 1687
1688 1688 memset(frameBuffers, 0, frameCount * sizeof(Py_buffer));
1689 1689
1690 1690 /* Do a pass to assemble info about our input buffers and output sizes. */
1691 1691 for (i = 0; i < frameCount; i++) {
1692 1692 unsigned long long decompressedSize = frameSizesP ? frameSizesP[i] : 0;
1693 1693
1694 1694 if (0 != PyObject_GetBuffer(PyList_GET_ITEM(frames, i),
1695 1695 &frameBuffers[i], PyBUF_CONTIG_RO)) {
1696 1696 PyErr_Clear();
1697 1697 PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i);
1698 1698 goto finally;
1699 1699 }
1700 1700
1701 1701 if (decompressedSize > SIZE_MAX) {
1702 1702 PyErr_Format(PyExc_ValueError,
1703 1703 "decompressed size of item %zd is too large for this platform", i);
1704 1704 goto finally;
1705 1705 }
1706 1706
1707 1707 totalInputSize += frameBuffers[i].len;
1708 1708
1709 1709 framePointers[i].sourceData = frameBuffers[i].buf;
1710 1710 framePointers[i].sourceSize = frameBuffers[i].len;
1711 1711 framePointers[i].destSize = (size_t)decompressedSize;
1712 1712 }
1713 1713 }
1714 1714 else {
1715 1715 PyErr_SetString(PyExc_TypeError, "argument must be list or BufferWithSegments");
1716 1716 goto finally;
1717 1717 }
1718 1718
1719 1719 /* We now have an array with info about our inputs and outputs. Feed it into
1720 1720 our generic decompression function. */
1721 1721 frameSources.frames = framePointers;
1722 1722 frameSources.framesSize = frameCount;
1723 1723 frameSources.compressedSize = totalInputSize;
1724 1724
1725 1725 result = decompress_from_framesources(self, &frameSources, threads);
1726 1726
1727 1727 finally:
1728 1728 if (frameSizes.buf) {
1729 1729 PyBuffer_Release(&frameSizes);
1730 1730 }
1731 1731 PyMem_Free(framePointers);
1732 1732
1733 1733 if (frameBuffers) {
1734 1734 for (i = 0; i < frameCount; i++) {
1735 1735 PyBuffer_Release(&frameBuffers[i]);
1736 1736 }
1737 1737
1738 1738 PyMem_Free(frameBuffers);
1739 1739 }
1740 1740
1741 1741 return result;
1742 1742 }
1743 1743
1744 1744 static PyMethodDef Decompressor_methods[] = {
1745 1745 { "copy_stream", (PyCFunction)Decompressor_copy_stream, METH_VARARGS | METH_KEYWORDS,
1746 1746 Decompressor_copy_stream__doc__ },
1747 1747 { "decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS | METH_KEYWORDS,
1748 1748 Decompressor_decompress__doc__ },
1749 1749 { "decompressobj", (PyCFunction)Decompressor_decompressobj, METH_VARARGS | METH_KEYWORDS,
1750 1750 Decompressor_decompressobj__doc__ },
1751 1751 { "read_to_iter", (PyCFunction)Decompressor_read_to_iter, METH_VARARGS | METH_KEYWORDS,
1752 1752 Decompressor_read_to_iter__doc__ },
1753 1753 /* TODO Remove deprecated API */
1754 1754 { "read_from", (PyCFunction)Decompressor_read_to_iter, METH_VARARGS | METH_KEYWORDS,
1755 1755 Decompressor_read_to_iter__doc__ },
1756 1756 { "stream_reader", (PyCFunction)Decompressor_stream_reader,
1757 1757 METH_VARARGS | METH_KEYWORDS, Decompressor_stream_reader__doc__ },
1758 1758 { "stream_writer", (PyCFunction)Decompressor_stream_writer, METH_VARARGS | METH_KEYWORDS,
1759 1759 Decompressor_stream_writer__doc__ },
1760 1760 /* TODO remove deprecated API */
1761 1761 { "write_to", (PyCFunction)Decompressor_stream_writer, METH_VARARGS | METH_KEYWORDS,
1762 1762 Decompressor_stream_writer__doc__ },
1763 1763 { "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain,
1764 1764 METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ },
1765 1765 { "multi_decompress_to_buffer", (PyCFunction)Decompressor_multi_decompress_to_buffer,
1766 1766 METH_VARARGS | METH_KEYWORDS, Decompressor_multi_decompress_to_buffer__doc__ },
1767 1767 { "memory_size", (PyCFunction)Decompressor_memory_size, METH_NOARGS,
1768 1768 Decompressor_memory_size__doc__ },
1769 1769 { NULL, NULL }
1770 1770 };
1771 1771
1772 1772 PyTypeObject ZstdDecompressorType = {
1773 1773 PyVarObject_HEAD_INIT(NULL, 0)
1774 1774 "zstd.ZstdDecompressor", /* tp_name */
1775 1775 sizeof(ZstdDecompressor), /* tp_basicsize */
1776 1776 0, /* tp_itemsize */
1777 1777 (destructor)Decompressor_dealloc, /* tp_dealloc */
1778 1778 0, /* tp_print */
1779 1779 0, /* tp_getattr */
1780 1780 0, /* tp_setattr */
1781 1781 0, /* tp_compare */
1782 1782 0, /* tp_repr */
1783 1783 0, /* tp_as_number */
1784 1784 0, /* tp_as_sequence */
1785 1785 0, /* tp_as_mapping */
1786 1786 0, /* tp_hash */
1787 1787 0, /* tp_call */
1788 1788 0, /* tp_str */
1789 1789 0, /* tp_getattro */
1790 1790 0, /* tp_setattro */
1791 1791 0, /* tp_as_buffer */
1792 1792 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
1793 1793 Decompressor__doc__, /* tp_doc */
1794 1794 0, /* tp_traverse */
1795 1795 0, /* tp_clear */
1796 1796 0, /* tp_richcompare */
1797 1797 0, /* tp_weaklistoffset */
1798 1798 0, /* tp_iter */
1799 1799 0, /* tp_iternext */
1800 1800 Decompressor_methods, /* tp_methods */
1801 1801 0, /* tp_members */
1802 1802 0, /* tp_getset */
1803 1803 0, /* tp_base */
1804 1804 0, /* tp_dict */
1805 1805 0, /* tp_descr_get */
1806 1806 0, /* tp_descr_set */
1807 1807 0, /* tp_dictoffset */
1808 1808 (initproc)Decompressor_init, /* tp_init */
1809 1809 0, /* tp_alloc */
1810 1810 PyType_GenericNew, /* tp_new */
1811 1811 };
1812 1812
1813 1813 void decompressor_module_init(PyObject* mod) {
1814 Py_TYPE(&ZstdDecompressorType) = &PyType_Type;
1814 Py_SET_TYPE(&ZstdDecompressorType, &PyType_Type);
1815 1815 if (PyType_Ready(&ZstdDecompressorType) < 0) {
1816 1816 return;
1817 1817 }
1818 1818
1819 1819 Py_INCREF((PyObject*)&ZstdDecompressorType);
1820 1820 PyModule_AddObject(mod, "ZstdDecompressor",
1821 1821 (PyObject*)&ZstdDecompressorType);
1822 1822 }
@@ -1,249 +1,249 b''
1 1 /**
2 2 * Copyright (c) 2016-present, Gregory Szorc
3 3 * All rights reserved.
4 4 *
5 5 * This software may be modified and distributed under the terms
6 6 * of the BSD license. See the LICENSE file for details.
7 7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 11 #define min(a, b) (((a) < (b)) ? (a) : (b))
12 12
13 13 extern PyObject* ZstdError;
14 14
15 15 PyDoc_STRVAR(ZstdDecompressorIterator__doc__,
16 16 "Represents an iterator of decompressed data.\n"
17 17 );
18 18
19 19 static void ZstdDecompressorIterator_dealloc(ZstdDecompressorIterator* self) {
20 20 Py_XDECREF(self->decompressor);
21 21 Py_XDECREF(self->reader);
22 22
23 23 if (self->buffer.buf) {
24 24 PyBuffer_Release(&self->buffer);
25 25 memset(&self->buffer, 0, sizeof(self->buffer));
26 26 }
27 27
28 28 if (self->input.src) {
29 29 PyMem_Free((void*)self->input.src);
30 30 self->input.src = NULL;
31 31 }
32 32
33 33 PyObject_Del(self);
34 34 }
35 35
36 36 static PyObject* ZstdDecompressorIterator_iter(PyObject* self) {
37 37 Py_INCREF(self);
38 38 return self;
39 39 }
40 40
41 41 static DecompressorIteratorResult read_decompressor_iterator(ZstdDecompressorIterator* self) {
42 42 size_t zresult;
43 43 PyObject* chunk;
44 44 DecompressorIteratorResult result;
45 45 size_t oldInputPos = self->input.pos;
46 46
47 47 result.chunk = NULL;
48 48
49 49 chunk = PyBytes_FromStringAndSize(NULL, self->outSize);
50 50 if (!chunk) {
51 51 result.errored = 1;
52 52 return result;
53 53 }
54 54
55 55 self->output.dst = PyBytes_AsString(chunk);
56 56 self->output.size = self->outSize;
57 57 self->output.pos = 0;
58 58
59 59 Py_BEGIN_ALLOW_THREADS
60 60 zresult = ZSTD_decompressStream(self->decompressor->dctx, &self->output, &self->input);
61 61 Py_END_ALLOW_THREADS
62 62
63 63 /* We're done with the pointer. Nullify to prevent anyone from getting a
64 64 handle on a Python object. */
65 65 self->output.dst = NULL;
66 66
67 67 if (ZSTD_isError(zresult)) {
68 68 Py_DECREF(chunk);
69 69 PyErr_Format(ZstdError, "zstd decompress error: %s",
70 70 ZSTD_getErrorName(zresult));
71 71 result.errored = 1;
72 72 return result;
73 73 }
74 74
75 75 self->readCount += self->input.pos - oldInputPos;
76 76
77 77 /* Frame is fully decoded. Input exhausted and output sitting in buffer. */
78 78 if (0 == zresult) {
79 79 self->finishedInput = 1;
80 80 self->finishedOutput = 1;
81 81 }
82 82
83 83 /* If it produced output data, return it. */
84 84 if (self->output.pos) {
85 85 if (self->output.pos < self->outSize) {
86 86 if (safe_pybytes_resize(&chunk, self->output.pos)) {
87 87 Py_XDECREF(chunk);
88 88 result.errored = 1;
89 89 return result;
90 90 }
91 91 }
92 92 }
93 93 else {
94 94 Py_DECREF(chunk);
95 95 chunk = NULL;
96 96 }
97 97
98 98 result.errored = 0;
99 99 result.chunk = chunk;
100 100
101 101 return result;
102 102 }
103 103
104 104 static PyObject* ZstdDecompressorIterator_iternext(ZstdDecompressorIterator* self) {
105 105 PyObject* readResult = NULL;
106 106 char* readBuffer;
107 107 Py_ssize_t readSize;
108 108 Py_ssize_t bufferRemaining;
109 109 DecompressorIteratorResult result;
110 110
111 111 if (self->finishedOutput) {
112 112 PyErr_SetString(PyExc_StopIteration, "output flushed");
113 113 return NULL;
114 114 }
115 115
116 116 /* If we have data left in the input, consume it. */
117 117 if (self->input.pos < self->input.size) {
118 118 result = read_decompressor_iterator(self);
119 119 if (result.chunk || result.errored) {
120 120 return result.chunk;
121 121 }
122 122
123 123 /* Else fall through to get more data from input. */
124 124 }
125 125
126 126 read_from_source:
127 127
128 128 if (!self->finishedInput) {
129 129 if (self->reader) {
130 130 readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize);
131 131 if (!readResult) {
132 132 return NULL;
133 133 }
134 134
135 135 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
136 136 }
137 137 else {
138 138 assert(self->buffer.buf);
139 139
140 140 /* Only support contiguous C arrays for now */
141 141 assert(self->buffer.strides == NULL && self->buffer.suboffsets == NULL);
142 142 assert(self->buffer.itemsize == 1);
143 143
144 144 /* TODO avoid memcpy() below */
145 145 readBuffer = (char *)self->buffer.buf + self->bufferOffset;
146 146 bufferRemaining = self->buffer.len - self->bufferOffset;
147 147 readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
148 148 self->bufferOffset += readSize;
149 149 }
150 150
151 151 if (readSize) {
152 152 if (!self->readCount && self->skipBytes) {
153 153 assert(self->skipBytes < self->inSize);
154 154 if ((Py_ssize_t)self->skipBytes >= readSize) {
155 155 PyErr_SetString(PyExc_ValueError,
156 156 "skip_bytes larger than first input chunk; "
157 157 "this scenario is currently unsupported");
158 158 Py_XDECREF(readResult);
159 159 return NULL;
160 160 }
161 161
162 162 readBuffer = readBuffer + self->skipBytes;
163 163 readSize -= self->skipBytes;
164 164 }
165 165
166 166 /* Copy input into previously allocated buffer because it can live longer
167 167 than a single function call and we don't want to keep a ref to a Python
168 168 object around. This could be changed... */
169 169 memcpy((void*)self->input.src, readBuffer, readSize);
170 170 self->input.size = readSize;
171 171 self->input.pos = 0;
172 172 }
173 173 /* No bytes on first read must mean an empty input stream. */
174 174 else if (!self->readCount) {
175 175 self->finishedInput = 1;
176 176 self->finishedOutput = 1;
177 177 Py_XDECREF(readResult);
178 178 PyErr_SetString(PyExc_StopIteration, "empty input");
179 179 return NULL;
180 180 }
181 181 else {
182 182 self->finishedInput = 1;
183 183 }
184 184
185 185 /* We've copied the data managed by memory. Discard the Python object. */
186 186 Py_XDECREF(readResult);
187 187 }
188 188
189 189 result = read_decompressor_iterator(self);
190 190 if (result.errored || result.chunk) {
191 191 return result.chunk;
192 192 }
193 193
194 194 /* No new output data. Try again unless we know there is no more data. */
195 195 if (!self->finishedInput) {
196 196 goto read_from_source;
197 197 }
198 198
199 199 PyErr_SetString(PyExc_StopIteration, "input exhausted");
200 200 return NULL;
201 201 }
202 202
203 203 PyTypeObject ZstdDecompressorIteratorType = {
204 204 PyVarObject_HEAD_INIT(NULL, 0)
205 205 "zstd.ZstdDecompressorIterator", /* tp_name */
206 206 sizeof(ZstdDecompressorIterator), /* tp_basicsize */
207 207 0, /* tp_itemsize */
208 208 (destructor)ZstdDecompressorIterator_dealloc, /* tp_dealloc */
209 209 0, /* tp_print */
210 210 0, /* tp_getattr */
211 211 0, /* tp_setattr */
212 212 0, /* tp_compare */
213 213 0, /* tp_repr */
214 214 0, /* tp_as_number */
215 215 0, /* tp_as_sequence */
216 216 0, /* tp_as_mapping */
217 217 0, /* tp_hash */
218 218 0, /* tp_call */
219 219 0, /* tp_str */
220 220 0, /* tp_getattro */
221 221 0, /* tp_setattro */
222 222 0, /* tp_as_buffer */
223 223 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
224 224 ZstdDecompressorIterator__doc__, /* tp_doc */
225 225 0, /* tp_traverse */
226 226 0, /* tp_clear */
227 227 0, /* tp_richcompare */
228 228 0, /* tp_weaklistoffset */
229 229 ZstdDecompressorIterator_iter, /* tp_iter */
230 230 (iternextfunc)ZstdDecompressorIterator_iternext, /* tp_iternext */
231 231 0, /* tp_methods */
232 232 0, /* tp_members */
233 233 0, /* tp_getset */
234 234 0, /* tp_base */
235 235 0, /* tp_dict */
236 236 0, /* tp_descr_get */
237 237 0, /* tp_descr_set */
238 238 0, /* tp_dictoffset */
239 239 0, /* tp_init */
240 240 0, /* tp_alloc */
241 241 PyType_GenericNew, /* tp_new */
242 242 };
243 243
244 244 void decompressoriterator_module_init(PyObject* mod) {
245 Py_TYPE(&ZstdDecompressorIteratorType) = &PyType_Type;
245 Py_SET_TYPE(&ZstdDecompressorIteratorType, &PyType_Type);
246 246 if (PyType_Ready(&ZstdDecompressorIteratorType) < 0) {
247 247 return;
248 248 }
249 249 }
@@ -1,138 +1,138 b''
1 1 /**
2 2 * Copyright (c) 2017-present, Gregory Szorc
3 3 * All rights reserved.
4 4 *
5 5 * This software may be modified and distributed under the terms
6 6 * of the BSD license. See the LICENSE file for details.
7 7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 11 extern PyObject* ZstdError;
12 12
13 13 PyDoc_STRVAR(FrameParameters__doc__,
14 14 "FrameParameters: information about a zstd frame");
15 15
16 16 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs) {
17 17 static char* kwlist[] = {
18 18 "data",
19 19 NULL
20 20 };
21 21
22 22 Py_buffer source;
23 23 ZSTD_frameHeader header;
24 24 FrameParametersObject* result = NULL;
25 25 size_t zresult;
26 26
27 27 #if PY_MAJOR_VERSION >= 3
28 28 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:get_frame_parameters",
29 29 #else
30 30 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:get_frame_parameters",
31 31 #endif
32 32 kwlist, &source)) {
33 33 return NULL;
34 34 }
35 35
36 36 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
37 37 PyErr_SetString(PyExc_ValueError,
38 38 "data buffer should be contiguous and have at most one dimension");
39 39 goto finally;
40 40 }
41 41
42 42 zresult = ZSTD_getFrameHeader(&header, source.buf, source.len);
43 43
44 44 if (ZSTD_isError(zresult)) {
45 45 PyErr_Format(ZstdError, "cannot get frame parameters: %s", ZSTD_getErrorName(zresult));
46 46 goto finally;
47 47 }
48 48
49 49 if (zresult) {
50 50 PyErr_Format(ZstdError, "not enough data for frame parameters; need %zu bytes", zresult);
51 51 goto finally;
52 52 }
53 53
54 54 result = PyObject_New(FrameParametersObject, &FrameParametersType);
55 55 if (!result) {
56 56 goto finally;
57 57 }
58 58
59 59 result->frameContentSize = header.frameContentSize;
60 60 result->windowSize = header.windowSize;
61 61 result->dictID = header.dictID;
62 62 result->checksumFlag = header.checksumFlag ? 1 : 0;
63 63
64 64 finally:
65 65 PyBuffer_Release(&source);
66 66 return result;
67 67 }
68 68
69 69 static void FrameParameters_dealloc(PyObject* self) {
70 70 PyObject_Del(self);
71 71 }
72 72
73 73 static PyMemberDef FrameParameters_members[] = {
74 74 { "content_size", T_ULONGLONG,
75 75 offsetof(FrameParametersObject, frameContentSize), READONLY,
76 76 "frame content size" },
77 77 { "window_size", T_ULONGLONG,
78 78 offsetof(FrameParametersObject, windowSize), READONLY,
79 79 "window size" },
80 80 { "dict_id", T_UINT,
81 81 offsetof(FrameParametersObject, dictID), READONLY,
82 82 "dictionary ID" },
83 83 { "has_checksum", T_BOOL,
84 84 offsetof(FrameParametersObject, checksumFlag), READONLY,
85 85 "checksum flag" },
86 86 { NULL }
87 87 };
88 88
89 89 PyTypeObject FrameParametersType = {
90 90 PyVarObject_HEAD_INIT(NULL, 0)
91 91 "FrameParameters", /* tp_name */
92 92 sizeof(FrameParametersObject), /* tp_basicsize */
93 93 0, /* tp_itemsize */
94 94 (destructor)FrameParameters_dealloc, /* tp_dealloc */
95 95 0, /* tp_print */
96 96 0, /* tp_getattr */
97 97 0, /* tp_setattr */
98 98 0, /* tp_compare */
99 99 0, /* tp_repr */
100 100 0, /* tp_as_number */
101 101 0, /* tp_as_sequence */
102 102 0, /* tp_as_mapping */
103 103 0, /* tp_hash */
104 104 0, /* tp_call */
105 105 0, /* tp_str */
106 106 0, /* tp_getattro */
107 107 0, /* tp_setattro */
108 108 0, /* tp_as_buffer */
109 109 Py_TPFLAGS_DEFAULT, /* tp_flags */
110 110 FrameParameters__doc__, /* tp_doc */
111 111 0, /* tp_traverse */
112 112 0, /* tp_clear */
113 113 0, /* tp_richcompare */
114 114 0, /* tp_weaklistoffset */
115 115 0, /* tp_iter */
116 116 0, /* tp_iternext */
117 117 0, /* tp_methods */
118 118 FrameParameters_members, /* tp_members */
119 119 0, /* tp_getset */
120 120 0, /* tp_base */
121 121 0, /* tp_dict */
122 122 0, /* tp_descr_get */
123 123 0, /* tp_descr_set */
124 124 0, /* tp_dictoffset */
125 125 0, /* tp_init */
126 126 0, /* tp_alloc */
127 127 0, /* tp_new */
128 128 };
129 129
130 130 void frameparams_module_init(PyObject* mod) {
131 Py_TYPE(&FrameParametersType) = &PyType_Type;
131 Py_SET_TYPE(&FrameParametersType, &PyType_Type);
132 132 if (PyType_Ready(&FrameParametersType) < 0) {
133 133 return;
134 134 }
135 135
136 136 Py_INCREF(&FrameParametersType);
137 137 PyModule_AddObject(mod, "FrameParameters", (PyObject*)&FrameParametersType);
138 138 }
@@ -1,359 +1,360 b''
1 1 /**
2 2 * Copyright (c) 2016-present, Gregory Szorc
3 3 * All rights reserved.
4 4 *
5 5 * This software may be modified and distributed under the terms
6 6 * of the BSD license. See the LICENSE file for details.
7 7 */
8 8
9 9 #define PY_SSIZE_T_CLEAN
10 10 #include <Python.h>
11 11 #include "structmember.h"
12 #include <pythoncapi_compat.h>
12 13
13 14 #define ZSTD_STATIC_LINKING_ONLY
14 15 #define ZDICT_STATIC_LINKING_ONLY
15 16 #include <zstd.h>
16 17 #include <zdict.h>
17 18
18 19 /* Remember to change the string in zstandard/__init__ as well */
19 20 #define PYTHON_ZSTANDARD_VERSION "0.13.0"
20 21
21 22 typedef enum {
22 23 compressorobj_flush_finish,
23 24 compressorobj_flush_block,
24 25 } CompressorObj_Flush;
25 26
26 27 /*
27 28 Represents a ZstdCompressionParameters type.
28 29
29 30 This type holds all the low-level compression parameters that can be set.
30 31 */
31 32 typedef struct {
32 33 PyObject_HEAD
33 34 ZSTD_CCtx_params* params;
34 35 } ZstdCompressionParametersObject;
35 36
36 37 extern PyTypeObject ZstdCompressionParametersType;
37 38
38 39 /*
39 40 Represents a FrameParameters type.
40 41
41 42 This type is basically a wrapper around ZSTD_frameParams.
42 43 */
43 44 typedef struct {
44 45 PyObject_HEAD
45 46 unsigned long long frameContentSize;
46 47 unsigned long long windowSize;
47 48 unsigned dictID;
48 49 char checksumFlag;
49 50 } FrameParametersObject;
50 51
51 52 extern PyTypeObject FrameParametersType;
52 53
53 54 /*
54 55 Represents a ZstdCompressionDict type.
55 56
56 57 Instances hold data used for a zstd compression dictionary.
57 58 */
58 59 typedef struct {
59 60 PyObject_HEAD
60 61
61 62 /* Pointer to dictionary data. Owned by self. */
62 63 void* dictData;
63 64 /* Size of dictionary data. */
64 65 size_t dictSize;
65 66 ZSTD_dictContentType_e dictType;
66 67 /* k parameter for cover dictionaries. Only populated by train_cover_dict(). */
67 68 unsigned k;
68 69 /* d parameter for cover dictionaries. Only populated by train_cover_dict(). */
69 70 unsigned d;
70 71 /* Digested dictionary, suitable for reuse. */
71 72 ZSTD_CDict* cdict;
72 73 ZSTD_DDict* ddict;
73 74 } ZstdCompressionDict;
74 75
75 76 extern PyTypeObject ZstdCompressionDictType;
76 77
77 78 /*
78 79 Represents a ZstdCompressor type.
79 80 */
80 81 typedef struct {
81 82 PyObject_HEAD
82 83
83 84 /* Number of threads to use for operations. */
84 85 unsigned int threads;
85 86 /* Pointer to compression dictionary to use. NULL if not using dictionary
86 87 compression. */
87 88 ZstdCompressionDict* dict;
88 89 /* Compression context to use. Populated during object construction. */
89 90 ZSTD_CCtx* cctx;
90 91 /* Compression parameters in use. */
91 92 ZSTD_CCtx_params* params;
92 93 } ZstdCompressor;
93 94
94 95 extern PyTypeObject ZstdCompressorType;
95 96
96 97 typedef struct {
97 98 PyObject_HEAD
98 99
99 100 ZstdCompressor* compressor;
100 101 ZSTD_outBuffer output;
101 102 int finished;
102 103 } ZstdCompressionObj;
103 104
104 105 extern PyTypeObject ZstdCompressionObjType;
105 106
106 107 typedef struct {
107 108 PyObject_HEAD
108 109
109 110 ZstdCompressor* compressor;
110 111 PyObject* writer;
111 112 ZSTD_outBuffer output;
112 113 size_t outSize;
113 114 int entered;
114 115 int closed;
115 116 int writeReturnRead;
116 117 unsigned long long bytesCompressed;
117 118 } ZstdCompressionWriter;
118 119
119 120 extern PyTypeObject ZstdCompressionWriterType;
120 121
121 122 typedef struct {
122 123 PyObject_HEAD
123 124
124 125 ZstdCompressor* compressor;
125 126 PyObject* reader;
126 127 Py_buffer buffer;
127 128 Py_ssize_t bufferOffset;
128 129 size_t inSize;
129 130 size_t outSize;
130 131
131 132 ZSTD_inBuffer input;
132 133 ZSTD_outBuffer output;
133 134 int finishedOutput;
134 135 int finishedInput;
135 136 PyObject* readResult;
136 137 } ZstdCompressorIterator;
137 138
138 139 extern PyTypeObject ZstdCompressorIteratorType;
139 140
140 141 typedef struct {
141 142 PyObject_HEAD
142 143
143 144 ZstdCompressor* compressor;
144 145 PyObject* reader;
145 146 Py_buffer buffer;
146 147 size_t readSize;
147 148
148 149 int entered;
149 150 int closed;
150 151 unsigned long long bytesCompressed;
151 152
152 153 ZSTD_inBuffer input;
153 154 ZSTD_outBuffer output;
154 155 int finishedInput;
155 156 int finishedOutput;
156 157 PyObject* readResult;
157 158 } ZstdCompressionReader;
158 159
159 160 extern PyTypeObject ZstdCompressionReaderType;
160 161
161 162 typedef struct {
162 163 PyObject_HEAD
163 164
164 165 ZstdCompressor* compressor;
165 166 ZSTD_inBuffer input;
166 167 ZSTD_outBuffer output;
167 168 Py_buffer inBuffer;
168 169 int finished;
169 170 size_t chunkSize;
170 171 } ZstdCompressionChunker;
171 172
172 173 extern PyTypeObject ZstdCompressionChunkerType;
173 174
174 175 typedef enum {
175 176 compressionchunker_mode_normal,
176 177 compressionchunker_mode_flush,
177 178 compressionchunker_mode_finish,
178 179 } CompressionChunkerMode;
179 180
180 181 typedef struct {
181 182 PyObject_HEAD
182 183
183 184 ZstdCompressionChunker* chunker;
184 185 CompressionChunkerMode mode;
185 186 } ZstdCompressionChunkerIterator;
186 187
187 188 extern PyTypeObject ZstdCompressionChunkerIteratorType;
188 189
189 190 typedef struct {
190 191 PyObject_HEAD
191 192
192 193 ZSTD_DCtx* dctx;
193 194 ZstdCompressionDict* dict;
194 195 size_t maxWindowSize;
195 196 ZSTD_format_e format;
196 197 } ZstdDecompressor;
197 198
198 199 extern PyTypeObject ZstdDecompressorType;
199 200
200 201 typedef struct {
201 202 PyObject_HEAD
202 203
203 204 ZstdDecompressor* decompressor;
204 205 size_t outSize;
205 206 int finished;
206 207 } ZstdDecompressionObj;
207 208
208 209 extern PyTypeObject ZstdDecompressionObjType;
209 210
210 211 typedef struct {
211 212 PyObject_HEAD
212 213
213 214 /* Parent decompressor to which this object is associated. */
214 215 ZstdDecompressor* decompressor;
215 216 /* Object to read() from (if reading from a stream). */
216 217 PyObject* reader;
217 218 /* Size for read() operations on reader. */
218 219 size_t readSize;
219 220 /* Whether a read() can return data spanning multiple zstd frames. */
220 221 int readAcrossFrames;
221 222 /* Buffer to read from (if reading from a buffer). */
222 223 Py_buffer buffer;
223 224
224 225 /* Whether the context manager is active. */
225 226 int entered;
226 227 /* Whether we've closed the stream. */
227 228 int closed;
228 229
229 230 /* Number of bytes decompressed and returned to user. */
230 231 unsigned long long bytesDecompressed;
231 232
232 233 /* Tracks data going into decompressor. */
233 234 ZSTD_inBuffer input;
234 235
235 236 /* Holds output from read() operation on reader. */
236 237 PyObject* readResult;
237 238
238 239 /* Whether all input has been sent to the decompressor. */
239 240 int finishedInput;
240 241 /* Whether all output has been flushed from the decompressor. */
241 242 int finishedOutput;
242 243 } ZstdDecompressionReader;
243 244
244 245 extern PyTypeObject ZstdDecompressionReaderType;
245 246
246 247 typedef struct {
247 248 PyObject_HEAD
248 249
249 250 ZstdDecompressor* decompressor;
250 251 PyObject* writer;
251 252 size_t outSize;
252 253 int entered;
253 254 int closed;
254 255 int writeReturnRead;
255 256 } ZstdDecompressionWriter;
256 257
257 258 extern PyTypeObject ZstdDecompressionWriterType;
258 259
259 260 typedef struct {
260 261 PyObject_HEAD
261 262
262 263 ZstdDecompressor* decompressor;
263 264 PyObject* reader;
264 265 Py_buffer buffer;
265 266 Py_ssize_t bufferOffset;
266 267 size_t inSize;
267 268 size_t outSize;
268 269 size_t skipBytes;
269 270 ZSTD_inBuffer input;
270 271 ZSTD_outBuffer output;
271 272 Py_ssize_t readCount;
272 273 int finishedInput;
273 274 int finishedOutput;
274 275 } ZstdDecompressorIterator;
275 276
276 277 extern PyTypeObject ZstdDecompressorIteratorType;
277 278
278 279 typedef struct {
279 280 int errored;
280 281 PyObject* chunk;
281 282 } DecompressorIteratorResult;
282 283
283 284 typedef struct {
284 285 /* The public API is that these are 64-bit unsigned integers. So these can't
285 286 * be size_t, even though values larger than SIZE_MAX or PY_SSIZE_T_MAX may
286 287 * be nonsensical for this platform. */
287 288 unsigned long long offset;
288 289 unsigned long long length;
289 290 } BufferSegment;
290 291
291 292 typedef struct {
292 293 PyObject_HEAD
293 294
294 295 PyObject* parent;
295 296 BufferSegment* segments;
296 297 Py_ssize_t segmentCount;
297 298 } ZstdBufferSegments;
298 299
299 300 extern PyTypeObject ZstdBufferSegmentsType;
300 301
301 302 typedef struct {
302 303 PyObject_HEAD
303 304
304 305 PyObject* parent;
305 306 void* data;
306 307 Py_ssize_t dataSize;
307 308 unsigned long long offset;
308 309 } ZstdBufferSegment;
309 310
310 311 extern PyTypeObject ZstdBufferSegmentType;
311 312
312 313 typedef struct {
313 314 PyObject_HEAD
314 315
315 316 Py_buffer parent;
316 317 void* data;
317 318 unsigned long long dataSize;
318 319 BufferSegment* segments;
319 320 Py_ssize_t segmentCount;
320 321 int useFree;
321 322 } ZstdBufferWithSegments;
322 323
323 324 extern PyTypeObject ZstdBufferWithSegmentsType;
324 325
325 326 /**
326 327 * An ordered collection of BufferWithSegments exposed as a squashed collection.
327 328 *
328 329 * This type provides a virtual view spanning multiple BufferWithSegments
329 330 * instances. It allows multiple instances to be "chained" together and
330 331 * exposed as a single collection. e.g. if there are 2 buffers holding
331 332 * 10 segments each, then o[14] will access the 5th segment in the 2nd buffer.
332 333 */
333 334 typedef struct {
334 335 PyObject_HEAD
335 336
336 337 /* An array of buffers that should be exposed through this instance. */
337 338 ZstdBufferWithSegments** buffers;
338 339 /* Number of elements in buffers array. */
339 340 Py_ssize_t bufferCount;
340 341 /* Array of first offset in each buffer instance. 0th entry corresponds
341 342 to number of elements in the 0th buffer. 1st entry corresponds to the
342 343 sum of elements in 0th and 1st buffers. */
343 344 Py_ssize_t* firstElements;
344 345 } ZstdBufferWithSegmentsCollection;
345 346
346 347 extern PyTypeObject ZstdBufferWithSegmentsCollectionType;
347 348
348 349 int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
349 350 int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj);
350 351 int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams);
351 352 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs);
352 353 int ensure_ddict(ZstdCompressionDict* dict);
353 354 int ensure_dctx(ZstdDecompressor* decompressor, int loadDict);
354 355 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
355 356 ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize);
356 357 Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*);
357 358 int cpu_count(void);
358 359 size_t roundpow2(size_t);
359 360 int safe_pybytes_resize(PyObject** obj, Py_ssize_t size);
@@ -1,1419 +1,1419 b''
1 1 /*
2 2 osutil.c - native operating system services
3 3
4 4 Copyright 2007 Matt Mackall and others
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 #define _ATFILE_SOURCE
11 11 #define PY_SSIZE_T_CLEAN
12 12 #include <Python.h>
13 13 #include <errno.h>
14 14 #include <fcntl.h>
15 15 #include <stdio.h>
16 16 #include <stdlib.h>
17 17 #include <string.h>
18 18
19 19 #ifdef _WIN32
20 20 #include <io.h>
21 21 #include <windows.h>
22 22 #else
23 23 #include <dirent.h>
24 24 #include <signal.h>
25 25 #include <sys/socket.h>
26 26 #include <sys/stat.h>
27 27 #include <sys/types.h>
28 28 #include <unistd.h>
29 29 #ifdef HAVE_LINUX_STATFS
30 30 #include <linux/magic.h>
31 31 #include <sys/vfs.h>
32 32 #endif
33 33 #ifdef HAVE_BSD_STATFS
34 34 #include <sys/mount.h>
35 35 #include <sys/param.h>
36 36 #endif
37 37 #endif
38 38
39 39 #ifdef __APPLE__
40 40 #include <sys/attr.h>
41 41 #include <sys/vnode.h>
42 42 #endif
43 43
44 44 #include "util.h"
45 45
46 46 /* some platforms lack the PATH_MAX definition (eg. GNU/Hurd) */
47 47 #ifndef PATH_MAX
48 48 #define PATH_MAX 4096
49 49 #endif
50 50
51 51 #ifdef _WIN32
52 52 /*
53 53 stat struct compatible with hg expectations
54 54 Mercurial only uses st_mode, st_size and st_mtime
55 55 the rest is kept to minimize changes between implementations
56 56 */
57 57 struct hg_stat {
58 58 int st_dev;
59 59 int st_mode;
60 60 int st_nlink;
61 61 __int64 st_size;
62 62 int st_mtime;
63 63 int st_ctime;
64 64 };
65 65 struct listdir_stat {
66 66 PyObject_HEAD
67 67 struct hg_stat st;
68 68 };
69 69 #else
70 70 struct listdir_stat {
71 71 PyObject_HEAD
72 72 struct stat st;
73 73 };
74 74 #endif
75 75
76 76 #ifdef IS_PY3K
77 77 #define listdir_slot(name) \
78 78 static PyObject *listdir_stat_##name(PyObject *self, void *x) \
79 79 { \
80 80 return PyLong_FromLong(((struct listdir_stat *)self)->st.name); \
81 81 }
82 82 #else
83 83 #define listdir_slot(name) \
84 84 static PyObject *listdir_stat_##name(PyObject *self, void *x) \
85 85 { \
86 86 return PyInt_FromLong(((struct listdir_stat *)self)->st.name); \
87 87 }
88 88 #endif
89 89
90 90 listdir_slot(st_dev)
91 91 listdir_slot(st_mode)
92 92 listdir_slot(st_nlink)
93 93 #ifdef _WIN32
94 94 static PyObject *listdir_stat_st_size(PyObject *self, void *x)
95 95 {
96 96 return PyLong_FromLongLong(
97 97 (PY_LONG_LONG)((struct listdir_stat *)self)->st.st_size);
98 98 }
99 99 #else
100 100 listdir_slot(st_size)
101 101 #endif
102 102 listdir_slot(st_mtime)
103 103 listdir_slot(st_ctime)
104 104
105 105 static struct PyGetSetDef listdir_stat_getsets[] = {
106 106 {"st_dev", listdir_stat_st_dev, 0, 0, 0},
107 107 {"st_mode", listdir_stat_st_mode, 0, 0, 0},
108 108 {"st_nlink", listdir_stat_st_nlink, 0, 0, 0},
109 109 {"st_size", listdir_stat_st_size, 0, 0, 0},
110 110 {"st_mtime", listdir_stat_st_mtime, 0, 0, 0},
111 111 {"st_ctime", listdir_stat_st_ctime, 0, 0, 0},
112 112 {0, 0, 0, 0, 0}
113 113 };
114 114
115 115 static PyObject *listdir_stat_new(PyTypeObject *t, PyObject *a, PyObject *k)
116 116 {
117 117 return t->tp_alloc(t, 0);
118 118 }
119 119
120 120 static void listdir_stat_dealloc(PyObject *o)
121 121 {
122 o->ob_type->tp_free(o);
122 Py_TYPE(o)->tp_free(o);
123 123 }
124 124
125 125 static PyObject *listdir_stat_getitem(PyObject *self, PyObject *key)
126 126 {
127 127 long index = PyLong_AsLong(key);
128 128 if (index == -1 && PyErr_Occurred()) {
129 129 return NULL;
130 130 }
131 131 if (index != 8) {
132 132 PyErr_Format(PyExc_IndexError, "osutil.stat objects only "
133 133 "support stat.ST_MTIME in "
134 134 "__getitem__");
135 135 return NULL;
136 136 }
137 137 return listdir_stat_st_mtime(self, NULL);
138 138 }
139 139
140 140 static PyMappingMethods listdir_stat_type_mapping_methods = {
141 141 (lenfunc)NULL, /* mp_length */
142 142 (binaryfunc)listdir_stat_getitem, /* mp_subscript */
143 143 (objobjargproc)NULL, /* mp_ass_subscript */
144 144 };
145 145
146 146 static PyTypeObject listdir_stat_type = {
147 147 PyVarObject_HEAD_INIT(NULL, 0) /* header */
148 148 "osutil.stat", /*tp_name*/
149 149 sizeof(struct listdir_stat), /*tp_basicsize*/
150 150 0, /*tp_itemsize*/
151 151 (destructor)listdir_stat_dealloc, /*tp_dealloc*/
152 152 0, /*tp_print*/
153 153 0, /*tp_getattr*/
154 154 0, /*tp_setattr*/
155 155 0, /*tp_compare*/
156 156 0, /*tp_repr*/
157 157 0, /*tp_as_number*/
158 158 0, /*tp_as_sequence*/
159 159 &listdir_stat_type_mapping_methods, /*tp_as_mapping*/
160 160 0, /*tp_hash */
161 161 0, /*tp_call*/
162 162 0, /*tp_str*/
163 163 0, /*tp_getattro*/
164 164 0, /*tp_setattro*/
165 165 0, /*tp_as_buffer*/
166 166 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
167 167 "stat objects", /* tp_doc */
168 168 0, /* tp_traverse */
169 169 0, /* tp_clear */
170 170 0, /* tp_richcompare */
171 171 0, /* tp_weaklistoffset */
172 172 0, /* tp_iter */
173 173 0, /* tp_iternext */
174 174 0, /* tp_methods */
175 175 0, /* tp_members */
176 176 listdir_stat_getsets, /* tp_getset */
177 177 0, /* tp_base */
178 178 0, /* tp_dict */
179 179 0, /* tp_descr_get */
180 180 0, /* tp_descr_set */
181 181 0, /* tp_dictoffset */
182 182 0, /* tp_init */
183 183 0, /* tp_alloc */
184 184 listdir_stat_new, /* tp_new */
185 185 };
186 186
187 187 #ifdef _WIN32
188 188
189 189 static int to_python_time(const FILETIME *tm)
190 190 {
191 191 /* number of seconds between epoch and January 1 1601 */
192 192 const __int64 a0 = (__int64)134774L * (__int64)24L * (__int64)3600L;
193 193 /* conversion factor from 100ns to 1s */
194 194 const __int64 a1 = 10000000;
195 195 /* explicit (int) cast to suspend compiler warnings */
196 196 return (int)((((__int64)tm->dwHighDateTime << 32)
197 197 + tm->dwLowDateTime) / a1 - a0);
198 198 }
199 199
200 200 static PyObject *make_item(const WIN32_FIND_DATAA *fd, int wantstat)
201 201 {
202 202 PyObject *py_st;
203 203 struct hg_stat *stp;
204 204
205 205 int kind = (fd->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
206 206 ? _S_IFDIR : _S_IFREG;
207 207
208 208 if (!wantstat)
209 209 return Py_BuildValue(PY23("si", "yi"), fd->cFileName, kind);
210 210
211 211 py_st = PyObject_CallObject((PyObject *)&listdir_stat_type, NULL);
212 212 if (!py_st)
213 213 return NULL;
214 214
215 215 stp = &((struct listdir_stat *)py_st)->st;
216 216 /*
217 217 use kind as st_mode
218 218 rwx bits on Win32 are meaningless
219 219 and Hg does not use them anyway
220 220 */
221 221 stp->st_mode = kind;
222 222 stp->st_mtime = to_python_time(&fd->ftLastWriteTime);
223 223 stp->st_ctime = to_python_time(&fd->ftCreationTime);
224 224 if (kind == _S_IFREG)
225 225 stp->st_size = ((__int64)fd->nFileSizeHigh << 32)
226 226 + fd->nFileSizeLow;
227 227 return Py_BuildValue(PY23("siN", "yiN"), fd->cFileName,
228 228 kind, py_st);
229 229 }
230 230
231 231 static PyObject *_listdir(char *path, Py_ssize_t plen, int wantstat, char *skip)
232 232 {
233 233 PyObject *rval = NULL; /* initialize - return value */
234 234 PyObject *list;
235 235 HANDLE fh;
236 236 WIN32_FIND_DATAA fd;
237 237 char *pattern;
238 238
239 239 /* build the path + \* pattern string */
240 240 pattern = PyMem_Malloc(plen + 3); /* path + \* + \0 */
241 241 if (!pattern) {
242 242 PyErr_NoMemory();
243 243 goto error_nomem;
244 244 }
245 245 memcpy(pattern, path, plen);
246 246
247 247 if (plen > 0) {
248 248 char c = path[plen-1];
249 249 if (c != ':' && c != '/' && c != '\\')
250 250 pattern[plen++] = '\\';
251 251 }
252 252 pattern[plen++] = '*';
253 253 pattern[plen] = '\0';
254 254
255 255 fh = FindFirstFileA(pattern, &fd);
256 256 if (fh == INVALID_HANDLE_VALUE) {
257 257 PyErr_SetFromWindowsErrWithFilename(GetLastError(), path);
258 258 goto error_file;
259 259 }
260 260
261 261 list = PyList_New(0);
262 262 if (!list)
263 263 goto error_list;
264 264
265 265 do {
266 266 PyObject *item;
267 267
268 268 if (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
269 269 if (!strcmp(fd.cFileName, ".")
270 270 || !strcmp(fd.cFileName, ".."))
271 271 continue;
272 272
273 273 if (skip && !strcmp(fd.cFileName, skip)) {
274 274 rval = PyList_New(0);
275 275 goto error;
276 276 }
277 277 }
278 278
279 279 item = make_item(&fd, wantstat);
280 280 if (!item)
281 281 goto error;
282 282
283 283 if (PyList_Append(list, item)) {
284 284 Py_XDECREF(item);
285 285 goto error;
286 286 }
287 287
288 288 Py_XDECREF(item);
289 289 } while (FindNextFileA(fh, &fd));
290 290
291 291 if (GetLastError() != ERROR_NO_MORE_FILES) {
292 292 PyErr_SetFromWindowsErrWithFilename(GetLastError(), path);
293 293 goto error;
294 294 }
295 295
296 296 rval = list;
297 297 Py_XINCREF(rval);
298 298 error:
299 299 Py_XDECREF(list);
300 300 error_list:
301 301 FindClose(fh);
302 302 error_file:
303 303 PyMem_Free(pattern);
304 304 error_nomem:
305 305 return rval;
306 306 }
307 307
308 308 #else
309 309
310 310 int entkind(struct dirent *ent)
311 311 {
312 312 #ifdef DT_REG
313 313 switch (ent->d_type) {
314 314 case DT_REG: return S_IFREG;
315 315 case DT_DIR: return S_IFDIR;
316 316 case DT_LNK: return S_IFLNK;
317 317 case DT_BLK: return S_IFBLK;
318 318 case DT_CHR: return S_IFCHR;
319 319 case DT_FIFO: return S_IFIFO;
320 320 case DT_SOCK: return S_IFSOCK;
321 321 }
322 322 #endif
323 323 return -1;
324 324 }
325 325
326 326 static PyObject *makestat(const struct stat *st)
327 327 {
328 328 PyObject *stat;
329 329
330 330 stat = PyObject_CallObject((PyObject *)&listdir_stat_type, NULL);
331 331 if (stat)
332 332 memcpy(&((struct listdir_stat *)stat)->st, st, sizeof(*st));
333 333 return stat;
334 334 }
335 335
336 336 static PyObject *_listdir_stat(char *path, int pathlen, int keepstat,
337 337 char *skip)
338 338 {
339 339 PyObject *list, *elem, *ret = NULL;
340 340 char fullpath[PATH_MAX + 10];
341 341 int kind, err;
342 342 struct stat st;
343 343 struct dirent *ent;
344 344 DIR *dir;
345 345 #ifdef AT_SYMLINK_NOFOLLOW
346 346 int dfd = -1;
347 347 #endif
348 348
349 349 if (pathlen >= PATH_MAX) {
350 350 errno = ENAMETOOLONG;
351 351 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
352 352 goto error_value;
353 353 }
354 354 strncpy(fullpath, path, PATH_MAX);
355 355 fullpath[pathlen] = '/';
356 356
357 357 #ifdef AT_SYMLINK_NOFOLLOW
358 358 dfd = open(path, O_RDONLY);
359 359 if (dfd == -1) {
360 360 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
361 361 goto error_value;
362 362 }
363 363 dir = fdopendir(dfd);
364 364 #else
365 365 dir = opendir(path);
366 366 #endif
367 367 if (!dir) {
368 368 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
369 369 goto error_dir;
370 370 }
371 371
372 372 list = PyList_New(0);
373 373 if (!list)
374 374 goto error_list;
375 375
376 376 while ((ent = readdir(dir))) {
377 377 if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
378 378 continue;
379 379
380 380 kind = entkind(ent);
381 381 if (kind == -1 || keepstat) {
382 382 #ifdef AT_SYMLINK_NOFOLLOW
383 383 err = fstatat(dfd, ent->d_name, &st,
384 384 AT_SYMLINK_NOFOLLOW);
385 385 #else
386 386 strncpy(fullpath + pathlen + 1, ent->d_name,
387 387 PATH_MAX - pathlen);
388 388 fullpath[PATH_MAX] = '\0';
389 389 err = lstat(fullpath, &st);
390 390 #endif
391 391 if (err == -1) {
392 392 /* race with file deletion? */
393 393 if (errno == ENOENT)
394 394 continue;
395 395 strncpy(fullpath + pathlen + 1, ent->d_name,
396 396 PATH_MAX - pathlen);
397 397 fullpath[PATH_MAX] = 0;
398 398 PyErr_SetFromErrnoWithFilename(PyExc_OSError,
399 399 fullpath);
400 400 goto error;
401 401 }
402 402 kind = st.st_mode & S_IFMT;
403 403 }
404 404
405 405 /* quit early? */
406 406 if (skip && kind == S_IFDIR && !strcmp(ent->d_name, skip)) {
407 407 ret = PyList_New(0);
408 408 goto error;
409 409 }
410 410
411 411 if (keepstat) {
412 412 PyObject *stat = makestat(&st);
413 413 if (!stat)
414 414 goto error;
415 415 elem = Py_BuildValue(PY23("siN", "yiN"), ent->d_name,
416 416 kind, stat);
417 417 } else
418 418 elem = Py_BuildValue(PY23("si", "yi"), ent->d_name,
419 419 kind);
420 420 if (!elem)
421 421 goto error;
422 422
423 423 PyList_Append(list, elem);
424 424 Py_DECREF(elem);
425 425 }
426 426
427 427 ret = list;
428 428 Py_INCREF(ret);
429 429
430 430 error:
431 431 Py_DECREF(list);
432 432 error_list:
433 433 closedir(dir);
434 434 /* closedir also closes its dirfd */
435 435 goto error_value;
436 436 error_dir:
437 437 #ifdef AT_SYMLINK_NOFOLLOW
438 438 close(dfd);
439 439 #endif
440 440 error_value:
441 441 return ret;
442 442 }
443 443
444 444 #ifdef __APPLE__
445 445
446 446 typedef struct {
447 447 u_int32_t length;
448 448 attrreference_t name;
449 449 fsobj_type_t obj_type;
450 450 struct timespec mtime;
451 451 #if __LITTLE_ENDIAN__
452 452 mode_t access_mask;
453 453 uint16_t padding;
454 454 #else
455 455 uint16_t padding;
456 456 mode_t access_mask;
457 457 #endif
458 458 off_t size;
459 459 } __attribute__((packed)) attrbuf_entry;
460 460
461 461 int attrkind(attrbuf_entry *entry)
462 462 {
463 463 switch (entry->obj_type) {
464 464 case VREG: return S_IFREG;
465 465 case VDIR: return S_IFDIR;
466 466 case VLNK: return S_IFLNK;
467 467 case VBLK: return S_IFBLK;
468 468 case VCHR: return S_IFCHR;
469 469 case VFIFO: return S_IFIFO;
470 470 case VSOCK: return S_IFSOCK;
471 471 }
472 472 return -1;
473 473 }
474 474
475 475 /* get these many entries at a time */
476 476 #define LISTDIR_BATCH_SIZE 50
477 477
478 478 static PyObject *_listdir_batch(char *path, int pathlen, int keepstat,
479 479 char *skip, bool *fallback)
480 480 {
481 481 PyObject *list, *elem, *ret = NULL;
482 482 int kind, err;
483 483 unsigned long index;
484 484 unsigned int count, old_state, new_state;
485 485 bool state_seen = false;
486 486 attrbuf_entry *entry;
487 487 /* from the getattrlist(2) man page: a path can be no longer than
488 488 (NAME_MAX * 3 + 1) bytes. Also, "The getattrlist() function will
489 489 silently truncate attribute data if attrBufSize is too small." So
490 490 pass in a buffer big enough for the worst case. */
491 491 char attrbuf[LISTDIR_BATCH_SIZE * (sizeof(attrbuf_entry) + NAME_MAX * 3 + 1)];
492 492 unsigned int basep_unused;
493 493
494 494 struct stat st;
495 495 int dfd = -1;
496 496
497 497 /* these must match the attrbuf_entry struct, otherwise you'll end up
498 498 with garbage */
499 499 struct attrlist requested_attr = {0};
500 500 requested_attr.bitmapcount = ATTR_BIT_MAP_COUNT;
501 501 requested_attr.commonattr = (ATTR_CMN_NAME | ATTR_CMN_OBJTYPE |
502 502 ATTR_CMN_MODTIME | ATTR_CMN_ACCESSMASK);
503 503 requested_attr.fileattr = ATTR_FILE_DATALENGTH;
504 504
505 505 *fallback = false;
506 506
507 507 if (pathlen >= PATH_MAX) {
508 508 errno = ENAMETOOLONG;
509 509 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
510 510 goto error_value;
511 511 }
512 512
513 513 dfd = open(path, O_RDONLY);
514 514 if (dfd == -1) {
515 515 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
516 516 goto error_value;
517 517 }
518 518
519 519 list = PyList_New(0);
520 520 if (!list)
521 521 goto error_dir;
522 522
523 523 do {
524 524 count = LISTDIR_BATCH_SIZE;
525 525 err = getdirentriesattr(dfd, &requested_attr, &attrbuf,
526 526 sizeof(attrbuf), &count, &basep_unused,
527 527 &new_state, 0);
528 528 if (err < 0) {
529 529 if (errno == ENOTSUP) {
530 530 /* We're on a filesystem that doesn't support
531 531 getdirentriesattr. Fall back to the
532 532 stat-based implementation. */
533 533 *fallback = true;
534 534 } else
535 535 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
536 536 goto error;
537 537 }
538 538
539 539 if (!state_seen) {
540 540 old_state = new_state;
541 541 state_seen = true;
542 542 } else if (old_state != new_state) {
543 543 /* There's an edge case with getdirentriesattr. Consider
544 544 the following initial list of files:
545 545
546 546 a
547 547 b
548 548 <--
549 549 c
550 550 d
551 551
552 552 If the iteration is paused at the arrow, and b is
553 553 deleted before it is resumed, getdirentriesattr will
554 554 not return d at all! Ordinarily we're expected to
555 555 restart the iteration from the beginning. To avoid
556 556 getting stuck in a retry loop here, fall back to
557 557 stat. */
558 558 *fallback = true;
559 559 goto error;
560 560 }
561 561
562 562 entry = (attrbuf_entry *)attrbuf;
563 563
564 564 for (index = 0; index < count; index++) {
565 565 char *filename = ((char *)&entry->name) +
566 566 entry->name.attr_dataoffset;
567 567
568 568 if (!strcmp(filename, ".") || !strcmp(filename, ".."))
569 569 continue;
570 570
571 571 kind = attrkind(entry);
572 572 if (kind == -1) {
573 573 PyErr_Format(PyExc_OSError,
574 574 "unknown object type %u for file "
575 575 "%s%s!",
576 576 entry->obj_type, path, filename);
577 577 goto error;
578 578 }
579 579
580 580 /* quit early? */
581 581 if (skip && kind == S_IFDIR && !strcmp(filename, skip)) {
582 582 ret = PyList_New(0);
583 583 goto error;
584 584 }
585 585
586 586 if (keepstat) {
587 587 PyObject *stat = NULL;
588 588 /* from the getattrlist(2) man page: "Only the
589 589 permission bits ... are valid". */
590 590 st.st_mode = (entry->access_mask & ~S_IFMT) | kind;
591 591 st.st_mtime = entry->mtime.tv_sec;
592 592 st.st_size = entry->size;
593 593 stat = makestat(&st);
594 594 if (!stat)
595 595 goto error;
596 596 elem = Py_BuildValue(PY23("siN", "yiN"),
597 597 filename, kind, stat);
598 598 } else
599 599 elem = Py_BuildValue(PY23("si", "yi"),
600 600 filename, kind);
601 601 if (!elem)
602 602 goto error;
603 603
604 604 PyList_Append(list, elem);
605 605 Py_DECREF(elem);
606 606
607 607 entry = (attrbuf_entry *)((char *)entry + entry->length);
608 608 }
609 609 } while (err == 0);
610 610
611 611 ret = list;
612 612 Py_INCREF(ret);
613 613
614 614 error:
615 615 Py_DECREF(list);
616 616 error_dir:
617 617 close(dfd);
618 618 error_value:
619 619 return ret;
620 620 }
621 621
622 622 #endif /* __APPLE__ */
623 623
624 624 static PyObject *_listdir(char *path, int pathlen, int keepstat, char *skip)
625 625 {
626 626 #ifdef __APPLE__
627 627 PyObject *ret;
628 628 bool fallback = false;
629 629
630 630 ret = _listdir_batch(path, pathlen, keepstat, skip, &fallback);
631 631 if (ret != NULL || !fallback)
632 632 return ret;
633 633 #endif
634 634 return _listdir_stat(path, pathlen, keepstat, skip);
635 635 }
636 636
637 637 static PyObject *statfiles(PyObject *self, PyObject *args)
638 638 {
639 639 PyObject *names, *stats;
640 640 Py_ssize_t i, count;
641 641
642 642 if (!PyArg_ParseTuple(args, "O:statfiles", &names))
643 643 return NULL;
644 644
645 645 count = PySequence_Length(names);
646 646 if (count == -1) {
647 647 PyErr_SetString(PyExc_TypeError, "not a sequence");
648 648 return NULL;
649 649 }
650 650
651 651 stats = PyList_New(count);
652 652 if (stats == NULL)
653 653 return NULL;
654 654
655 655 for (i = 0; i < count; i++) {
656 656 PyObject *stat, *pypath;
657 657 struct stat st;
658 658 int ret, kind;
659 659 char *path;
660 660
661 661 /* With a large file count or on a slow filesystem,
662 662 don't block signals for long (issue4878). */
663 663 if ((i % 1000) == 999 && PyErr_CheckSignals() == -1)
664 664 goto bail;
665 665
666 666 pypath = PySequence_GetItem(names, i);
667 667 if (!pypath)
668 668 goto bail;
669 669 path = PyBytes_AsString(pypath);
670 670 if (path == NULL) {
671 671 Py_DECREF(pypath);
672 672 PyErr_SetString(PyExc_TypeError, "not a string");
673 673 goto bail;
674 674 }
675 675 ret = lstat(path, &st);
676 676 Py_DECREF(pypath);
677 677 kind = st.st_mode & S_IFMT;
678 678 if (ret != -1 && (kind == S_IFREG || kind == S_IFLNK)) {
679 679 stat = makestat(&st);
680 680 if (stat == NULL)
681 681 goto bail;
682 682 PyList_SET_ITEM(stats, i, stat);
683 683 } else {
684 684 Py_INCREF(Py_None);
685 685 PyList_SET_ITEM(stats, i, Py_None);
686 686 }
687 687 }
688 688
689 689 return stats;
690 690
691 691 bail:
692 692 Py_DECREF(stats);
693 693 return NULL;
694 694 }
695 695
696 696 /*
697 697 * recvfds() simply does not release GIL during blocking io operation because
698 698 * command server is known to be single-threaded.
699 699 *
700 700 * Old systems such as Solaris don't provide CMSG_LEN, msg_control, etc.
701 701 * Currently, recvfds() is not supported on these platforms.
702 702 */
703 703 #ifdef CMSG_LEN
704 704
705 705 static ssize_t recvfdstobuf(int sockfd, int **rfds, void *cbuf, size_t cbufsize)
706 706 {
707 707 char dummy[1];
708 708 struct iovec iov = {dummy, sizeof(dummy)};
709 709 struct msghdr msgh = {0};
710 710 struct cmsghdr *cmsg;
711 711
712 712 msgh.msg_iov = &iov;
713 713 msgh.msg_iovlen = 1;
714 714 msgh.msg_control = cbuf;
715 715 msgh.msg_controllen = (socklen_t)cbufsize;
716 716 if (recvmsg(sockfd, &msgh, 0) < 0)
717 717 return -1;
718 718
719 719 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg;
720 720 cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
721 721 if (cmsg->cmsg_level != SOL_SOCKET ||
722 722 cmsg->cmsg_type != SCM_RIGHTS)
723 723 continue;
724 724 *rfds = (int *)CMSG_DATA(cmsg);
725 725 return (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
726 726 }
727 727
728 728 *rfds = cbuf;
729 729 return 0;
730 730 }
731 731
732 732 static PyObject *recvfds(PyObject *self, PyObject *args)
733 733 {
734 734 int sockfd;
735 735 int *rfds = NULL;
736 736 ssize_t rfdscount, i;
737 737 char cbuf[256];
738 738 PyObject *rfdslist = NULL;
739 739
740 740 if (!PyArg_ParseTuple(args, "i", &sockfd))
741 741 return NULL;
742 742
743 743 rfdscount = recvfdstobuf(sockfd, &rfds, cbuf, sizeof(cbuf));
744 744 if (rfdscount < 0)
745 745 return PyErr_SetFromErrno(PyExc_OSError);
746 746
747 747 rfdslist = PyList_New(rfdscount);
748 748 if (!rfdslist)
749 749 goto bail;
750 750 for (i = 0; i < rfdscount; i++) {
751 751 PyObject *obj = PyLong_FromLong(rfds[i]);
752 752 if (!obj)
753 753 goto bail;
754 754 PyList_SET_ITEM(rfdslist, i, obj);
755 755 }
756 756 return rfdslist;
757 757
758 758 bail:
759 759 Py_XDECREF(rfdslist);
760 760 return NULL;
761 761 }
762 762
763 763 #endif /* CMSG_LEN */
764 764
765 765 /* allow disabling setprocname via compiler flags */
766 766 #ifndef SETPROCNAME_USE_NONE
767 767 #if defined(HAVE_SETPROCTITLE)
768 768 /* setproctitle is the first choice - available in FreeBSD */
769 769 #define SETPROCNAME_USE_SETPROCTITLE
770 770 #elif (defined(__linux__) || defined(__APPLE__)) && PY_MAJOR_VERSION == 2
771 771 /* rewrite the argv buffer in place - works in Linux and OS X. Py_GetArgcArgv
772 772 * in Python 3 returns the copied wchar_t **argv, thus unsupported. */
773 773 #define SETPROCNAME_USE_ARGVREWRITE
774 774 #else
775 775 #define SETPROCNAME_USE_NONE
776 776 #endif
777 777 #endif /* ndef SETPROCNAME_USE_NONE */
778 778
779 779 #ifndef SETPROCNAME_USE_NONE
780 780 static PyObject *setprocname(PyObject *self, PyObject *args)
781 781 {
782 782 const char *name = NULL;
783 783 if (!PyArg_ParseTuple(args, PY23("s", "y"), &name))
784 784 return NULL;
785 785
786 786 #if defined(SETPROCNAME_USE_SETPROCTITLE)
787 787 setproctitle("%s", name);
788 788 #elif defined(SETPROCNAME_USE_ARGVREWRITE)
789 789 {
790 790 static char *argvstart = NULL;
791 791 static size_t argvsize = 0;
792 792 if (argvstart == NULL) {
793 793 int argc = 0, i;
794 794 char **argv = NULL;
795 795 char *argvend;
796 796 extern void Py_GetArgcArgv(int *argc, char ***argv);
797 797 Py_GetArgcArgv(&argc, &argv);
798 798 /* Py_GetArgcArgv may not do much if a custom python
799 799 * launcher is used that doesn't record the information
800 800 * it needs. Let's handle this gracefully instead of
801 801 * segfaulting. */
802 802 if (argv != NULL)
803 803 argvend = argvstart = argv[0];
804 804 else
805 805 argvend = argvstart = NULL;
806 806
807 807 /* Check the memory we can use. Typically, argv[i] and
808 808 * argv[i + 1] are continuous. */
809 809 for (i = 0; i < argc; ++i) {
810 810 size_t len;
811 811 if (argv[i] > argvend || argv[i] < argvstart)
812 812 break; /* not continuous */
813 813 len = strlen(argv[i]);
814 814 argvend = argv[i] + len + 1 /* '\0' */;
815 815 }
816 816 if (argvend > argvstart) /* sanity check */
817 817 argvsize = argvend - argvstart;
818 818 }
819 819
820 820 if (argvstart && argvsize > 1) {
821 821 int n = snprintf(argvstart, argvsize, "%s", name);
822 822 if (n >= 0 && (size_t)n < argvsize)
823 823 memset(argvstart + n, 0, argvsize - n);
824 824 }
825 825 }
826 826 #endif
827 827
828 828 Py_RETURN_NONE;
829 829 }
830 830 #endif /* ndef SETPROCNAME_USE_NONE */
831 831
832 832 #if defined(HAVE_BSD_STATFS)
833 833 static const char *describefstype(const struct statfs *pbuf)
834 834 {
835 835 /* BSD or OSX provides a f_fstypename field */
836 836 return pbuf->f_fstypename;
837 837 }
838 838 #elif defined(HAVE_LINUX_STATFS)
839 839 static const char *describefstype(const struct statfs *pbuf)
840 840 {
841 841 /* Begin of Linux filesystems */
842 842 #ifdef ADFS_SUPER_MAGIC
843 843 if (pbuf->f_type == ADFS_SUPER_MAGIC)
844 844 return "adfs";
845 845 #endif
846 846 #ifdef AFFS_SUPER_MAGIC
847 847 if (pbuf->f_type == AFFS_SUPER_MAGIC)
848 848 return "affs";
849 849 #endif
850 850 #ifdef AUTOFS_SUPER_MAGIC
851 851 if (pbuf->f_type == AUTOFS_SUPER_MAGIC)
852 852 return "autofs";
853 853 #endif
854 854 #ifdef BDEVFS_MAGIC
855 855 if (pbuf->f_type == BDEVFS_MAGIC)
856 856 return "bdevfs";
857 857 #endif
858 858 #ifdef BEFS_SUPER_MAGIC
859 859 if (pbuf->f_type == BEFS_SUPER_MAGIC)
860 860 return "befs";
861 861 #endif
862 862 #ifdef BFS_MAGIC
863 863 if (pbuf->f_type == BFS_MAGIC)
864 864 return "bfs";
865 865 #endif
866 866 #ifdef BINFMTFS_MAGIC
867 867 if (pbuf->f_type == BINFMTFS_MAGIC)
868 868 return "binfmtfs";
869 869 #endif
870 870 #ifdef BTRFS_SUPER_MAGIC
871 871 if (pbuf->f_type == BTRFS_SUPER_MAGIC)
872 872 return "btrfs";
873 873 #endif
874 874 #ifdef CGROUP_SUPER_MAGIC
875 875 if (pbuf->f_type == CGROUP_SUPER_MAGIC)
876 876 return "cgroup";
877 877 #endif
878 878 #ifdef CIFS_MAGIC_NUMBER
879 879 if (pbuf->f_type == CIFS_MAGIC_NUMBER)
880 880 return "cifs";
881 881 #endif
882 882 #ifdef CODA_SUPER_MAGIC
883 883 if (pbuf->f_type == CODA_SUPER_MAGIC)
884 884 return "coda";
885 885 #endif
886 886 #ifdef COH_SUPER_MAGIC
887 887 if (pbuf->f_type == COH_SUPER_MAGIC)
888 888 return "coh";
889 889 #endif
890 890 #ifdef CRAMFS_MAGIC
891 891 if (pbuf->f_type == CRAMFS_MAGIC)
892 892 return "cramfs";
893 893 #endif
894 894 #ifdef DEBUGFS_MAGIC
895 895 if (pbuf->f_type == DEBUGFS_MAGIC)
896 896 return "debugfs";
897 897 #endif
898 898 #ifdef DEVFS_SUPER_MAGIC
899 899 if (pbuf->f_type == DEVFS_SUPER_MAGIC)
900 900 return "devfs";
901 901 #endif
902 902 #ifdef DEVPTS_SUPER_MAGIC
903 903 if (pbuf->f_type == DEVPTS_SUPER_MAGIC)
904 904 return "devpts";
905 905 #endif
906 906 #ifdef EFIVARFS_MAGIC
907 907 if (pbuf->f_type == EFIVARFS_MAGIC)
908 908 return "efivarfs";
909 909 #endif
910 910 #ifdef EFS_SUPER_MAGIC
911 911 if (pbuf->f_type == EFS_SUPER_MAGIC)
912 912 return "efs";
913 913 #endif
914 914 #ifdef EXT_SUPER_MAGIC
915 915 if (pbuf->f_type == EXT_SUPER_MAGIC)
916 916 return "ext";
917 917 #endif
918 918 #ifdef EXT2_OLD_SUPER_MAGIC
919 919 if (pbuf->f_type == EXT2_OLD_SUPER_MAGIC)
920 920 return "ext2";
921 921 #endif
922 922 #ifdef EXT2_SUPER_MAGIC
923 923 if (pbuf->f_type == EXT2_SUPER_MAGIC)
924 924 return "ext2";
925 925 #endif
926 926 #ifdef EXT3_SUPER_MAGIC
927 927 if (pbuf->f_type == EXT3_SUPER_MAGIC)
928 928 return "ext3";
929 929 #endif
930 930 #ifdef EXT4_SUPER_MAGIC
931 931 if (pbuf->f_type == EXT4_SUPER_MAGIC)
932 932 return "ext4";
933 933 #endif
934 934 #ifdef F2FS_SUPER_MAGIC
935 935 if (pbuf->f_type == F2FS_SUPER_MAGIC)
936 936 return "f2fs";
937 937 #endif
938 938 #ifdef FUSE_SUPER_MAGIC
939 939 if (pbuf->f_type == FUSE_SUPER_MAGIC)
940 940 return "fuse";
941 941 #endif
942 942 #ifdef FUTEXFS_SUPER_MAGIC
943 943 if (pbuf->f_type == FUTEXFS_SUPER_MAGIC)
944 944 return "futexfs";
945 945 #endif
946 946 #ifdef HFS_SUPER_MAGIC
947 947 if (pbuf->f_type == HFS_SUPER_MAGIC)
948 948 return "hfs";
949 949 #endif
950 950 #ifdef HOSTFS_SUPER_MAGIC
951 951 if (pbuf->f_type == HOSTFS_SUPER_MAGIC)
952 952 return "hostfs";
953 953 #endif
954 954 #ifdef HPFS_SUPER_MAGIC
955 955 if (pbuf->f_type == HPFS_SUPER_MAGIC)
956 956 return "hpfs";
957 957 #endif
958 958 #ifdef HUGETLBFS_MAGIC
959 959 if (pbuf->f_type == HUGETLBFS_MAGIC)
960 960 return "hugetlbfs";
961 961 #endif
962 962 #ifdef ISOFS_SUPER_MAGIC
963 963 if (pbuf->f_type == ISOFS_SUPER_MAGIC)
964 964 return "isofs";
965 965 #endif
966 966 #ifdef JFFS2_SUPER_MAGIC
967 967 if (pbuf->f_type == JFFS2_SUPER_MAGIC)
968 968 return "jffs2";
969 969 #endif
970 970 #ifdef JFS_SUPER_MAGIC
971 971 if (pbuf->f_type == JFS_SUPER_MAGIC)
972 972 return "jfs";
973 973 #endif
974 974 #ifdef MINIX_SUPER_MAGIC
975 975 if (pbuf->f_type == MINIX_SUPER_MAGIC)
976 976 return "minix";
977 977 #endif
978 978 #ifdef MINIX2_SUPER_MAGIC
979 979 if (pbuf->f_type == MINIX2_SUPER_MAGIC)
980 980 return "minix2";
981 981 #endif
982 982 #ifdef MINIX3_SUPER_MAGIC
983 983 if (pbuf->f_type == MINIX3_SUPER_MAGIC)
984 984 return "minix3";
985 985 #endif
986 986 #ifdef MQUEUE_MAGIC
987 987 if (pbuf->f_type == MQUEUE_MAGIC)
988 988 return "mqueue";
989 989 #endif
990 990 #ifdef MSDOS_SUPER_MAGIC
991 991 if (pbuf->f_type == MSDOS_SUPER_MAGIC)
992 992 return "msdos";
993 993 #endif
994 994 #ifdef NCP_SUPER_MAGIC
995 995 if (pbuf->f_type == NCP_SUPER_MAGIC)
996 996 return "ncp";
997 997 #endif
998 998 #ifdef NFS_SUPER_MAGIC
999 999 if (pbuf->f_type == NFS_SUPER_MAGIC)
1000 1000 return "nfs";
1001 1001 #endif
1002 1002 #ifdef NILFS_SUPER_MAGIC
1003 1003 if (pbuf->f_type == NILFS_SUPER_MAGIC)
1004 1004 return "nilfs";
1005 1005 #endif
1006 1006 #ifdef NTFS_SB_MAGIC
1007 1007 if (pbuf->f_type == NTFS_SB_MAGIC)
1008 1008 return "ntfs-sb";
1009 1009 #endif
1010 1010 #ifdef OCFS2_SUPER_MAGIC
1011 1011 if (pbuf->f_type == OCFS2_SUPER_MAGIC)
1012 1012 return "ocfs2";
1013 1013 #endif
1014 1014 #ifdef OPENPROM_SUPER_MAGIC
1015 1015 if (pbuf->f_type == OPENPROM_SUPER_MAGIC)
1016 1016 return "openprom";
1017 1017 #endif
1018 1018 #ifdef OVERLAYFS_SUPER_MAGIC
1019 1019 if (pbuf->f_type == OVERLAYFS_SUPER_MAGIC)
1020 1020 return "overlay";
1021 1021 #endif
1022 1022 #ifdef PIPEFS_MAGIC
1023 1023 if (pbuf->f_type == PIPEFS_MAGIC)
1024 1024 return "pipefs";
1025 1025 #endif
1026 1026 #ifdef PROC_SUPER_MAGIC
1027 1027 if (pbuf->f_type == PROC_SUPER_MAGIC)
1028 1028 return "proc";
1029 1029 #endif
1030 1030 #ifdef PSTOREFS_MAGIC
1031 1031 if (pbuf->f_type == PSTOREFS_MAGIC)
1032 1032 return "pstorefs";
1033 1033 #endif
1034 1034 #ifdef QNX4_SUPER_MAGIC
1035 1035 if (pbuf->f_type == QNX4_SUPER_MAGIC)
1036 1036 return "qnx4";
1037 1037 #endif
1038 1038 #ifdef QNX6_SUPER_MAGIC
1039 1039 if (pbuf->f_type == QNX6_SUPER_MAGIC)
1040 1040 return "qnx6";
1041 1041 #endif
1042 1042 #ifdef RAMFS_MAGIC
1043 1043 if (pbuf->f_type == RAMFS_MAGIC)
1044 1044 return "ramfs";
1045 1045 #endif
1046 1046 #ifdef REISERFS_SUPER_MAGIC
1047 1047 if (pbuf->f_type == REISERFS_SUPER_MAGIC)
1048 1048 return "reiserfs";
1049 1049 #endif
1050 1050 #ifdef ROMFS_MAGIC
1051 1051 if (pbuf->f_type == ROMFS_MAGIC)
1052 1052 return "romfs";
1053 1053 #endif
1054 1054 #ifdef SECURITYFS_MAGIC
1055 1055 if (pbuf->f_type == SECURITYFS_MAGIC)
1056 1056 return "securityfs";
1057 1057 #endif
1058 1058 #ifdef SELINUX_MAGIC
1059 1059 if (pbuf->f_type == SELINUX_MAGIC)
1060 1060 return "selinux";
1061 1061 #endif
1062 1062 #ifdef SMACK_MAGIC
1063 1063 if (pbuf->f_type == SMACK_MAGIC)
1064 1064 return "smack";
1065 1065 #endif
1066 1066 #ifdef SMB_SUPER_MAGIC
1067 1067 if (pbuf->f_type == SMB_SUPER_MAGIC)
1068 1068 return "smb";
1069 1069 #endif
1070 1070 #ifdef SOCKFS_MAGIC
1071 1071 if (pbuf->f_type == SOCKFS_MAGIC)
1072 1072 return "sockfs";
1073 1073 #endif
1074 1074 #ifdef SQUASHFS_MAGIC
1075 1075 if (pbuf->f_type == SQUASHFS_MAGIC)
1076 1076 return "squashfs";
1077 1077 #endif
1078 1078 #ifdef SYSFS_MAGIC
1079 1079 if (pbuf->f_type == SYSFS_MAGIC)
1080 1080 return "sysfs";
1081 1081 #endif
1082 1082 #ifdef SYSV2_SUPER_MAGIC
1083 1083 if (pbuf->f_type == SYSV2_SUPER_MAGIC)
1084 1084 return "sysv2";
1085 1085 #endif
1086 1086 #ifdef SYSV4_SUPER_MAGIC
1087 1087 if (pbuf->f_type == SYSV4_SUPER_MAGIC)
1088 1088 return "sysv4";
1089 1089 #endif
1090 1090 #ifdef TMPFS_MAGIC
1091 1091 if (pbuf->f_type == TMPFS_MAGIC)
1092 1092 return "tmpfs";
1093 1093 #endif
1094 1094 #ifdef UDF_SUPER_MAGIC
1095 1095 if (pbuf->f_type == UDF_SUPER_MAGIC)
1096 1096 return "udf";
1097 1097 #endif
1098 1098 #ifdef UFS_MAGIC
1099 1099 if (pbuf->f_type == UFS_MAGIC)
1100 1100 return "ufs";
1101 1101 #endif
1102 1102 #ifdef USBDEVICE_SUPER_MAGIC
1103 1103 if (pbuf->f_type == USBDEVICE_SUPER_MAGIC)
1104 1104 return "usbdevice";
1105 1105 #endif
1106 1106 #ifdef V9FS_MAGIC
1107 1107 if (pbuf->f_type == V9FS_MAGIC)
1108 1108 return "v9fs";
1109 1109 #endif
1110 1110 #ifdef VXFS_SUPER_MAGIC
1111 1111 if (pbuf->f_type == VXFS_SUPER_MAGIC)
1112 1112 return "vxfs";
1113 1113 #endif
1114 1114 #ifdef XENFS_SUPER_MAGIC
1115 1115 if (pbuf->f_type == XENFS_SUPER_MAGIC)
1116 1116 return "xenfs";
1117 1117 #endif
1118 1118 #ifdef XENIX_SUPER_MAGIC
1119 1119 if (pbuf->f_type == XENIX_SUPER_MAGIC)
1120 1120 return "xenix";
1121 1121 #endif
1122 1122 #ifdef XFS_SUPER_MAGIC
1123 1123 if (pbuf->f_type == XFS_SUPER_MAGIC)
1124 1124 return "xfs";
1125 1125 #endif
1126 1126 /* End of Linux filesystems */
1127 1127 return NULL;
1128 1128 }
1129 1129 #endif /* def HAVE_LINUX_STATFS */
1130 1130
1131 1131 #if defined(HAVE_BSD_STATFS) || defined(HAVE_LINUX_STATFS)
1132 1132 /* given a directory path, return filesystem type name (best-effort) */
1133 1133 static PyObject *getfstype(PyObject *self, PyObject *args)
1134 1134 {
1135 1135 const char *path = NULL;
1136 1136 struct statfs buf;
1137 1137 int r;
1138 1138 if (!PyArg_ParseTuple(args, PY23("s", "y"), &path))
1139 1139 return NULL;
1140 1140
1141 1141 memset(&buf, 0, sizeof(buf));
1142 1142 r = statfs(path, &buf);
1143 1143 if (r != 0)
1144 1144 return PyErr_SetFromErrno(PyExc_OSError);
1145 1145 return Py_BuildValue(PY23("s", "y"), describefstype(&buf));
1146 1146 }
1147 1147 #endif /* defined(HAVE_LINUX_STATFS) || defined(HAVE_BSD_STATFS) */
1148 1148
1149 1149 #if defined(HAVE_BSD_STATFS)
1150 1150 /* given a directory path, return filesystem mount point (best-effort) */
1151 1151 static PyObject *getfsmountpoint(PyObject *self, PyObject *args)
1152 1152 {
1153 1153 const char *path = NULL;
1154 1154 struct statfs buf;
1155 1155 int r;
1156 1156 if (!PyArg_ParseTuple(args, PY23("s", "y"), &path))
1157 1157 return NULL;
1158 1158
1159 1159 memset(&buf, 0, sizeof(buf));
1160 1160 r = statfs(path, &buf);
1161 1161 if (r != 0)
1162 1162 return PyErr_SetFromErrno(PyExc_OSError);
1163 1163 return Py_BuildValue(PY23("s", "y"), buf.f_mntonname);
1164 1164 }
1165 1165 #endif /* defined(HAVE_BSD_STATFS) */
1166 1166
1167 1167 static PyObject *unblocksignal(PyObject *self, PyObject *args)
1168 1168 {
1169 1169 int sig = 0;
1170 1170 sigset_t set;
1171 1171 int r;
1172 1172 if (!PyArg_ParseTuple(args, "i", &sig))
1173 1173 return NULL;
1174 1174 r = sigemptyset(&set);
1175 1175 if (r != 0)
1176 1176 return PyErr_SetFromErrno(PyExc_OSError);
1177 1177 r = sigaddset(&set, sig);
1178 1178 if (r != 0)
1179 1179 return PyErr_SetFromErrno(PyExc_OSError);
1180 1180 r = sigprocmask(SIG_UNBLOCK, &set, NULL);
1181 1181 if (r != 0)
1182 1182 return PyErr_SetFromErrno(PyExc_OSError);
1183 1183 Py_RETURN_NONE;
1184 1184 }
1185 1185
1186 1186 #endif /* ndef _WIN32 */
1187 1187
1188 1188 static PyObject *listdir(PyObject *self, PyObject *args, PyObject *kwargs)
1189 1189 {
1190 1190 PyObject *statobj = NULL; /* initialize - optional arg */
1191 1191 PyObject *skipobj = NULL; /* initialize - optional arg */
1192 1192 char *path, *skip = NULL;
1193 1193 Py_ssize_t plen;
1194 1194 int wantstat;
1195 1195
1196 1196 static char *kwlist[] = {"path", "stat", "skip", NULL};
1197 1197
1198 1198 if (!PyArg_ParseTupleAndKeywords(args, kwargs, PY23("s#|OO:listdir",
1199 1199 "y#|OO:listdir"),
1200 1200 kwlist, &path, &plen, &statobj, &skipobj))
1201 1201 return NULL;
1202 1202
1203 1203 wantstat = statobj && PyObject_IsTrue(statobj);
1204 1204
1205 1205 if (skipobj && skipobj != Py_None) {
1206 1206 skip = PyBytes_AsString(skipobj);
1207 1207 if (!skip)
1208 1208 return NULL;
1209 1209 }
1210 1210
1211 1211 return _listdir(path, plen, wantstat, skip);
1212 1212 }
1213 1213
1214 1214 #ifdef _WIN32
1215 1215 static PyObject *posixfile(PyObject *self, PyObject *args, PyObject *kwds)
1216 1216 {
1217 1217 static char *kwlist[] = {"name", "mode", "buffering", NULL};
1218 1218 PyObject *file_obj = NULL;
1219 1219 char *name = NULL;
1220 1220 char *mode = "rb";
1221 1221 DWORD access = 0;
1222 1222 DWORD creation;
1223 1223 HANDLE handle;
1224 1224 int fd, flags = 0;
1225 1225 int bufsize = -1;
1226 1226 char m0, m1, m2;
1227 1227 char fpmode[4];
1228 1228 int fppos = 0;
1229 1229 int plus;
1230 1230 #ifndef IS_PY3K
1231 1231 FILE *fp;
1232 1232 #endif
1233 1233
1234 1234 if (!PyArg_ParseTupleAndKeywords(args, kwds, PY23("et|si:posixfile",
1235 1235 "et|yi:posixfile"),
1236 1236 kwlist,
1237 1237 Py_FileSystemDefaultEncoding,
1238 1238 &name, &mode, &bufsize))
1239 1239 return NULL;
1240 1240
1241 1241 m0 = mode[0];
1242 1242 m1 = m0 ? mode[1] : '\0';
1243 1243 m2 = m1 ? mode[2] : '\0';
1244 1244 plus = m1 == '+' || m2 == '+';
1245 1245
1246 1246 fpmode[fppos++] = m0;
1247 1247 if (m1 == 'b' || m2 == 'b') {
1248 1248 flags = _O_BINARY;
1249 1249 fpmode[fppos++] = 'b';
1250 1250 }
1251 1251 else
1252 1252 flags = _O_TEXT;
1253 1253 if (m0 == 'r' && !plus) {
1254 1254 flags |= _O_RDONLY;
1255 1255 access = GENERIC_READ;
1256 1256 } else {
1257 1257 /*
1258 1258 work around http://support.microsoft.com/kb/899149 and
1259 1259 set _O_RDWR for 'w' and 'a', even if mode has no '+'
1260 1260 */
1261 1261 flags |= _O_RDWR;
1262 1262 access = GENERIC_READ | GENERIC_WRITE;
1263 1263 fpmode[fppos++] = '+';
1264 1264 }
1265 1265 fpmode[fppos++] = '\0';
1266 1266
1267 1267 switch (m0) {
1268 1268 case 'r':
1269 1269 creation = OPEN_EXISTING;
1270 1270 break;
1271 1271 case 'w':
1272 1272 creation = CREATE_ALWAYS;
1273 1273 break;
1274 1274 case 'a':
1275 1275 creation = OPEN_ALWAYS;
1276 1276 flags |= _O_APPEND;
1277 1277 break;
1278 1278 default:
1279 1279 PyErr_Format(PyExc_ValueError,
1280 1280 "mode string must begin with one of 'r', 'w', "
1281 1281 "or 'a', not '%c'", m0);
1282 1282 goto bail;
1283 1283 }
1284 1284
1285 1285 handle = CreateFile(name, access,
1286 1286 FILE_SHARE_READ | FILE_SHARE_WRITE |
1287 1287 FILE_SHARE_DELETE,
1288 1288 NULL,
1289 1289 creation,
1290 1290 FILE_ATTRIBUTE_NORMAL,
1291 1291 0);
1292 1292
1293 1293 if (handle == INVALID_HANDLE_VALUE) {
1294 1294 PyErr_SetFromWindowsErrWithFilename(GetLastError(), name);
1295 1295 goto bail;
1296 1296 }
1297 1297
1298 1298 fd = _open_osfhandle((intptr_t)handle, flags);
1299 1299
1300 1300 if (fd == -1) {
1301 1301 CloseHandle(handle);
1302 1302 PyErr_SetFromErrnoWithFilename(PyExc_IOError, name);
1303 1303 goto bail;
1304 1304 }
1305 1305 #ifndef IS_PY3K
1306 1306 fp = _fdopen(fd, fpmode);
1307 1307 if (fp == NULL) {
1308 1308 _close(fd);
1309 1309 PyErr_SetFromErrnoWithFilename(PyExc_IOError, name);
1310 1310 goto bail;
1311 1311 }
1312 1312
1313 1313 file_obj = PyFile_FromFile(fp, name, mode, fclose);
1314 1314 if (file_obj == NULL) {
1315 1315 fclose(fp);
1316 1316 goto bail;
1317 1317 }
1318 1318
1319 1319 PyFile_SetBufSize(file_obj, bufsize);
1320 1320 #else
1321 1321 file_obj = PyFile_FromFd(fd, name, mode, bufsize, NULL, NULL, NULL, 1);
1322 1322 if (file_obj == NULL)
1323 1323 goto bail;
1324 1324 #endif
1325 1325 bail:
1326 1326 PyMem_Free(name);
1327 1327 return file_obj;
1328 1328 }
1329 1329 #endif
1330 1330
1331 1331 #ifdef __APPLE__
1332 1332 #include <ApplicationServices/ApplicationServices.h>
1333 1333
1334 1334 static PyObject *isgui(PyObject *self)
1335 1335 {
1336 1336 CFDictionaryRef dict = CGSessionCopyCurrentDictionary();
1337 1337
1338 1338 if (dict != NULL) {
1339 1339 CFRelease(dict);
1340 1340 Py_RETURN_TRUE;
1341 1341 } else {
1342 1342 Py_RETURN_FALSE;
1343 1343 }
1344 1344 }
1345 1345 #endif
1346 1346
1347 1347 static char osutil_doc[] = "Native operating system services.";
1348 1348
1349 1349 static PyMethodDef methods[] = {
1350 1350 {"listdir", (PyCFunction)listdir, METH_VARARGS | METH_KEYWORDS,
1351 1351 "list a directory\n"},
1352 1352 #ifdef _WIN32
1353 1353 {"posixfile", (PyCFunction)posixfile, METH_VARARGS | METH_KEYWORDS,
1354 1354 "Open a file with POSIX-like semantics.\n"
1355 1355 "On error, this function may raise either a WindowsError or an IOError."},
1356 1356 #else
1357 1357 {"statfiles", (PyCFunction)statfiles, METH_VARARGS | METH_KEYWORDS,
1358 1358 "stat a series of files or symlinks\n"
1359 1359 "Returns None for non-existent entries and entries of other types.\n"},
1360 1360 #ifdef CMSG_LEN
1361 1361 {"recvfds", (PyCFunction)recvfds, METH_VARARGS,
1362 1362 "receive list of file descriptors via socket\n"},
1363 1363 #endif
1364 1364 #ifndef SETPROCNAME_USE_NONE
1365 1365 {"setprocname", (PyCFunction)setprocname, METH_VARARGS,
1366 1366 "set process title (best-effort)\n"},
1367 1367 #endif
1368 1368 #if defined(HAVE_BSD_STATFS) || defined(HAVE_LINUX_STATFS)
1369 1369 {"getfstype", (PyCFunction)getfstype, METH_VARARGS,
1370 1370 "get filesystem type (best-effort)\n"},
1371 1371 #endif
1372 1372 #if defined(HAVE_BSD_STATFS)
1373 1373 {"getfsmountpoint", (PyCFunction)getfsmountpoint, METH_VARARGS,
1374 1374 "get filesystem mount point (best-effort)\n"},
1375 1375 #endif
1376 1376 {"unblocksignal", (PyCFunction)unblocksignal, METH_VARARGS,
1377 1377 "change signal mask to unblock a given signal\n"},
1378 1378 #endif /* ndef _WIN32 */
1379 1379 #ifdef __APPLE__
1380 1380 {
1381 1381 "isgui", (PyCFunction)isgui, METH_NOARGS,
1382 1382 "Is a CoreGraphics session available?"
1383 1383 },
1384 1384 #endif
1385 1385 {NULL, NULL}
1386 1386 };
1387 1387
1388 1388 static const int version = 4;
1389 1389
1390 1390 #ifdef IS_PY3K
1391 1391 static struct PyModuleDef osutil_module = {
1392 1392 PyModuleDef_HEAD_INIT,
1393 1393 "osutil",
1394 1394 osutil_doc,
1395 1395 -1,
1396 1396 methods
1397 1397 };
1398 1398
1399 1399 PyMODINIT_FUNC PyInit_osutil(void)
1400 1400 {
1401 1401 PyObject *m;
1402 1402 if (PyType_Ready(&listdir_stat_type) < 0)
1403 1403 return NULL;
1404 1404
1405 1405 m = PyModule_Create(&osutil_module);
1406 1406 PyModule_AddIntConstant(m, "version", version);
1407 1407 return m;
1408 1408 }
1409 1409 #else
1410 1410 PyMODINIT_FUNC initosutil(void)
1411 1411 {
1412 1412 PyObject *m;
1413 1413 if (PyType_Ready(&listdir_stat_type) == -1)
1414 1414 return;
1415 1415
1416 1416 m = Py_InitModule3("osutil", methods, osutil_doc);
1417 1417 PyModule_AddIntConstant(m, "version", version);
1418 1418 }
1419 1419 #endif
@@ -1,802 +1,803 b''
1 1 /*
2 2 pathencode.c - efficient path name encoding
3 3
4 4 Copyright 2012 Facebook
5 5
6 6 This software may be used and distributed according to the terms of
7 7 the GNU General Public License, incorporated herein by reference.
8 8 */
9 9
10 10 /*
11 11 * An implementation of the name encoding scheme used by the fncache
12 12 * store. The common case is of a path < 120 bytes long, which is
13 13 * handled either in a single pass with no allocations or two passes
14 14 * with a single allocation. For longer paths, multiple passes are
15 15 * required.
16 16 */
17 17
18 18 #define PY_SSIZE_T_CLEAN
19 19 #include <Python.h>
20 20 #include <assert.h>
21 21 #include <ctype.h>
22 22 #include <stdlib.h>
23 23 #include <string.h>
24 #include "pythoncapi_compat.h"
24 25
25 26 #include "util.h"
26 27
27 28 /* state machine for the fast path */
28 29 enum path_state {
29 30 START, /* first byte of a path component */
30 31 A, /* "AUX" */
31 32 AU,
32 33 THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
33 34 C, /* "CON" or "COMn" */
34 35 CO,
35 36 COMLPT, /* "COM" or "LPT" */
36 37 COMLPTn,
37 38 L,
38 39 LP,
39 40 N,
40 41 NU,
41 42 P, /* "PRN" */
42 43 PR,
43 44 LDOT, /* leading '.' */
44 45 DOT, /* '.' in a non-leading position */
45 46 H, /* ".h" */
46 47 HGDI, /* ".hg", ".d", or ".i" */
47 48 SPACE,
48 49 DEFAULT, /* byte of a path component after the first */
49 50 };
50 51
51 52 /* state machine for dir-encoding */
52 53 enum dir_state {
53 54 DDOT,
54 55 DH,
55 56 DHGDI,
56 57 DDEFAULT,
57 58 };
58 59
59 60 static inline int inset(const uint32_t bitset[], char c)
60 61 {
61 62 return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31));
62 63 }
63 64
64 65 static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
65 66 char c)
66 67 {
67 68 if (dest) {
68 69 assert(*destlen < destsize);
69 70 dest[*destlen] = c;
70 71 }
71 72 (*destlen)++;
72 73 }
73 74
74 75 static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
75 76 const void *src, Py_ssize_t len)
76 77 {
77 78 if (dest) {
78 79 assert(*destlen + len < destsize);
79 80 memcpy((void *)&dest[*destlen], src, len);
80 81 }
81 82 *destlen += len;
82 83 }
83 84
84 85 static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize,
85 86 uint8_t c)
86 87 {
87 88 static const char hexdigit[] = "0123456789abcdef";
88 89
89 90 charcopy(dest, destlen, destsize, hexdigit[c >> 4]);
90 91 charcopy(dest, destlen, destsize, hexdigit[c & 15]);
91 92 }
92 93
93 94 /* 3-byte escape: tilde followed by two hex digits */
94 95 static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize,
95 96 char c)
96 97 {
97 98 charcopy(dest, destlen, destsize, '~');
98 99 hexencode(dest, destlen, destsize, c);
99 100 }
100 101
101 102 static Py_ssize_t _encodedir(char *dest, size_t destsize, const char *src,
102 103 Py_ssize_t len)
103 104 {
104 105 enum dir_state state = DDEFAULT;
105 106 Py_ssize_t i = 0, destlen = 0;
106 107
107 108 while (i < len) {
108 109 switch (state) {
109 110 case DDOT:
110 111 switch (src[i]) {
111 112 case 'd':
112 113 case 'i':
113 114 state = DHGDI;
114 115 charcopy(dest, &destlen, destsize, src[i++]);
115 116 break;
116 117 case 'h':
117 118 state = DH;
118 119 charcopy(dest, &destlen, destsize, src[i++]);
119 120 break;
120 121 default:
121 122 state = DDEFAULT;
122 123 break;
123 124 }
124 125 break;
125 126 case DH:
126 127 if (src[i] == 'g') {
127 128 state = DHGDI;
128 129 charcopy(dest, &destlen, destsize, src[i++]);
129 130 } else {
130 131 state = DDEFAULT;
131 132 }
132 133 break;
133 134 case DHGDI:
134 135 if (src[i] == '/') {
135 136 memcopy(dest, &destlen, destsize, ".hg", 3);
136 137 charcopy(dest, &destlen, destsize, src[i++]);
137 138 }
138 139 state = DDEFAULT;
139 140 break;
140 141 case DDEFAULT:
141 142 if (src[i] == '.') {
142 143 state = DDOT;
143 144 }
144 145 charcopy(dest, &destlen, destsize, src[i++]);
145 146 break;
146 147 }
147 148 }
148 149
149 150 return destlen;
150 151 }
151 152
152 153 PyObject *encodedir(PyObject *self, PyObject *args)
153 154 {
154 155 Py_ssize_t len, newlen;
155 156 PyObject *pathobj, *newobj;
156 157 char *path;
157 158
158 159 if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj)) {
159 160 return NULL;
160 161 }
161 162
162 163 if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) {
163 164 PyErr_SetString(PyExc_TypeError, "expected a string");
164 165 return NULL;
165 166 }
166 167
167 168 newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1;
168 169
169 170 if (newlen == len + 1) {
170 171 Py_INCREF(pathobj);
171 172 return pathobj;
172 173 }
173 174
174 175 newobj = PyBytes_FromStringAndSize(NULL, newlen);
175 176
176 177 if (newobj) {
177 178 assert(PyBytes_Check(newobj));
178 179 Py_SIZE(newobj)--;
179 180 _encodedir(PyBytes_AS_STRING(newobj), newlen, path, len + 1);
180 181 }
181 182
182 183 return newobj;
183 184 }
184 185
185 186 static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8],
186 187 char *dest, Py_ssize_t destlen, size_t destsize,
187 188 const char *src, Py_ssize_t len, int encodedir)
188 189 {
189 190 enum path_state state = START;
190 191 Py_ssize_t i = 0;
191 192
192 193 /*
193 194 * Python strings end with a zero byte, which we use as a
194 195 * terminal token as they are not valid inside path names.
195 196 */
196 197
197 198 while (i < len) {
198 199 switch (state) {
199 200 case START:
200 201 switch (src[i]) {
201 202 case '/':
202 203 charcopy(dest, &destlen, destsize, src[i++]);
203 204 break;
204 205 case '.':
205 206 state = LDOT;
206 207 escape3(dest, &destlen, destsize, src[i++]);
207 208 break;
208 209 case ' ':
209 210 state = DEFAULT;
210 211 escape3(dest, &destlen, destsize, src[i++]);
211 212 break;
212 213 case 'a':
213 214 state = A;
214 215 charcopy(dest, &destlen, destsize, src[i++]);
215 216 break;
216 217 case 'c':
217 218 state = C;
218 219 charcopy(dest, &destlen, destsize, src[i++]);
219 220 break;
220 221 case 'l':
221 222 state = L;
222 223 charcopy(dest, &destlen, destsize, src[i++]);
223 224 break;
224 225 case 'n':
225 226 state = N;
226 227 charcopy(dest, &destlen, destsize, src[i++]);
227 228 break;
228 229 case 'p':
229 230 state = P;
230 231 charcopy(dest, &destlen, destsize, src[i++]);
231 232 break;
232 233 default:
233 234 state = DEFAULT;
234 235 break;
235 236 }
236 237 break;
237 238 case A:
238 239 if (src[i] == 'u') {
239 240 state = AU;
240 241 charcopy(dest, &destlen, destsize, src[i++]);
241 242 } else {
242 243 state = DEFAULT;
243 244 }
244 245 break;
245 246 case AU:
246 247 if (src[i] == 'x') {
247 248 state = THIRD;
248 249 i++;
249 250 } else {
250 251 state = DEFAULT;
251 252 }
252 253 break;
253 254 case THIRD:
254 255 state = DEFAULT;
255 256 switch (src[i]) {
256 257 case '.':
257 258 case '/':
258 259 case '\0':
259 260 escape3(dest, &destlen, destsize, src[i - 1]);
260 261 break;
261 262 default:
262 263 i--;
263 264 break;
264 265 }
265 266 break;
266 267 case C:
267 268 if (src[i] == 'o') {
268 269 state = CO;
269 270 charcopy(dest, &destlen, destsize, src[i++]);
270 271 } else {
271 272 state = DEFAULT;
272 273 }
273 274 break;
274 275 case CO:
275 276 if (src[i] == 'm') {
276 277 state = COMLPT;
277 278 i++;
278 279 } else if (src[i] == 'n') {
279 280 state = THIRD;
280 281 i++;
281 282 } else {
282 283 state = DEFAULT;
283 284 }
284 285 break;
285 286 case COMLPT:
286 287 switch (src[i]) {
287 288 case '1':
288 289 case '2':
289 290 case '3':
290 291 case '4':
291 292 case '5':
292 293 case '6':
293 294 case '7':
294 295 case '8':
295 296 case '9':
296 297 state = COMLPTn;
297 298 i++;
298 299 break;
299 300 default:
300 301 state = DEFAULT;
301 302 charcopy(dest, &destlen, destsize, src[i - 1]);
302 303 break;
303 304 }
304 305 break;
305 306 case COMLPTn:
306 307 state = DEFAULT;
307 308 switch (src[i]) {
308 309 case '.':
309 310 case '/':
310 311 case '\0':
311 312 escape3(dest, &destlen, destsize, src[i - 2]);
312 313 charcopy(dest, &destlen, destsize, src[i - 1]);
313 314 break;
314 315 default:
315 316 memcopy(dest, &destlen, destsize, &src[i - 2],
316 317 2);
317 318 break;
318 319 }
319 320 break;
320 321 case L:
321 322 if (src[i] == 'p') {
322 323 state = LP;
323 324 charcopy(dest, &destlen, destsize, src[i++]);
324 325 } else {
325 326 state = DEFAULT;
326 327 }
327 328 break;
328 329 case LP:
329 330 if (src[i] == 't') {
330 331 state = COMLPT;
331 332 i++;
332 333 } else {
333 334 state = DEFAULT;
334 335 }
335 336 break;
336 337 case N:
337 338 if (src[i] == 'u') {
338 339 state = NU;
339 340 charcopy(dest, &destlen, destsize, src[i++]);
340 341 } else {
341 342 state = DEFAULT;
342 343 }
343 344 break;
344 345 case NU:
345 346 if (src[i] == 'l') {
346 347 state = THIRD;
347 348 i++;
348 349 } else {
349 350 state = DEFAULT;
350 351 }
351 352 break;
352 353 case P:
353 354 if (src[i] == 'r') {
354 355 state = PR;
355 356 charcopy(dest, &destlen, destsize, src[i++]);
356 357 } else {
357 358 state = DEFAULT;
358 359 }
359 360 break;
360 361 case PR:
361 362 if (src[i] == 'n') {
362 363 state = THIRD;
363 364 i++;
364 365 } else {
365 366 state = DEFAULT;
366 367 }
367 368 break;
368 369 case LDOT:
369 370 switch (src[i]) {
370 371 case 'd':
371 372 case 'i':
372 373 state = HGDI;
373 374 charcopy(dest, &destlen, destsize, src[i++]);
374 375 break;
375 376 case 'h':
376 377 state = H;
377 378 charcopy(dest, &destlen, destsize, src[i++]);
378 379 break;
379 380 default:
380 381 state = DEFAULT;
381 382 break;
382 383 }
383 384 break;
384 385 case DOT:
385 386 switch (src[i]) {
386 387 case '/':
387 388 case '\0':
388 389 state = START;
389 390 memcopy(dest, &destlen, destsize, "~2e", 3);
390 391 charcopy(dest, &destlen, destsize, src[i++]);
391 392 break;
392 393 case 'd':
393 394 case 'i':
394 395 state = HGDI;
395 396 charcopy(dest, &destlen, destsize, '.');
396 397 charcopy(dest, &destlen, destsize, src[i++]);
397 398 break;
398 399 case 'h':
399 400 state = H;
400 401 memcopy(dest, &destlen, destsize, ".h", 2);
401 402 i++;
402 403 break;
403 404 default:
404 405 state = DEFAULT;
405 406 charcopy(dest, &destlen, destsize, '.');
406 407 break;
407 408 }
408 409 break;
409 410 case H:
410 411 if (src[i] == 'g') {
411 412 state = HGDI;
412 413 charcopy(dest, &destlen, destsize, src[i++]);
413 414 } else {
414 415 state = DEFAULT;
415 416 }
416 417 break;
417 418 case HGDI:
418 419 if (src[i] == '/') {
419 420 state = START;
420 421 if (encodedir) {
421 422 memcopy(dest, &destlen, destsize, ".hg",
422 423 3);
423 424 }
424 425 charcopy(dest, &destlen, destsize, src[i++]);
425 426 } else {
426 427 state = DEFAULT;
427 428 }
428 429 break;
429 430 case SPACE:
430 431 switch (src[i]) {
431 432 case '/':
432 433 case '\0':
433 434 state = START;
434 435 memcopy(dest, &destlen, destsize, "~20", 3);
435 436 charcopy(dest, &destlen, destsize, src[i++]);
436 437 break;
437 438 default:
438 439 state = DEFAULT;
439 440 charcopy(dest, &destlen, destsize, ' ');
440 441 break;
441 442 }
442 443 break;
443 444 case DEFAULT:
444 445 while (inset(onebyte, src[i])) {
445 446 charcopy(dest, &destlen, destsize, src[i++]);
446 447 if (i == len) {
447 448 goto done;
448 449 }
449 450 }
450 451 switch (src[i]) {
451 452 case '.':
452 453 state = DOT;
453 454 i++;
454 455 break;
455 456 case ' ':
456 457 state = SPACE;
457 458 i++;
458 459 break;
459 460 case '/':
460 461 state = START;
461 462 charcopy(dest, &destlen, destsize, '/');
462 463 i++;
463 464 break;
464 465 default:
465 466 if (inset(onebyte, src[i])) {
466 467 do {
467 468 charcopy(dest, &destlen,
468 469 destsize, src[i++]);
469 470 } while (i < len &&
470 471 inset(onebyte, src[i]));
471 472 } else if (inset(twobytes, src[i])) {
472 473 char c = src[i++];
473 474 charcopy(dest, &destlen, destsize, '_');
474 475 charcopy(dest, &destlen, destsize,
475 476 c == '_' ? '_' : c + 32);
476 477 } else {
477 478 escape3(dest, &destlen, destsize,
478 479 src[i++]);
479 480 }
480 481 break;
481 482 }
482 483 break;
483 484 }
484 485 }
485 486 done:
486 487 return destlen;
487 488 }
488 489
489 490 static Py_ssize_t basicencode(char *dest, size_t destsize, const char *src,
490 491 Py_ssize_t len)
491 492 {
492 493 static const uint32_t twobytes[8] = {0, 0, 0x87fffffe};
493 494
494 495 static const uint32_t onebyte[8] = {
495 496 1,
496 497 0x2bff3bfa,
497 498 0x68000001,
498 499 0x2fffffff,
499 500 };
500 501
501 502 Py_ssize_t destlen = 0;
502 503
503 504 return _encode(twobytes, onebyte, dest, destlen, destsize, src, len, 1);
504 505 }
505 506
506 507 static const Py_ssize_t maxstorepathlen = 120;
507 508
508 509 static Py_ssize_t _lowerencode(char *dest, size_t destsize, const char *src,
509 510 Py_ssize_t len)
510 511 {
511 512 static const uint32_t onebyte[8] = {1, 0x2bfffbfb, 0xe8000001,
512 513 0x2fffffff};
513 514
514 515 static const uint32_t lower[8] = {0, 0, 0x7fffffe};
515 516
516 517 Py_ssize_t i, destlen = 0;
517 518
518 519 for (i = 0; i < len; i++) {
519 520 if (inset(onebyte, src[i])) {
520 521 charcopy(dest, &destlen, destsize, src[i]);
521 522 } else if (inset(lower, src[i])) {
522 523 charcopy(dest, &destlen, destsize, src[i] + 32);
523 524 } else {
524 525 escape3(dest, &destlen, destsize, src[i]);
525 526 }
526 527 }
527 528
528 529 return destlen;
529 530 }
530 531
531 532 PyObject *lowerencode(PyObject *self, PyObject *args)
532 533 {
533 534 char *path;
534 535 Py_ssize_t len, newlen;
535 536 PyObject *ret;
536 537
537 538 if (!PyArg_ParseTuple(args, PY23("s#:lowerencode", "y#:lowerencode"),
538 539 &path, &len)) {
539 540 return NULL;
540 541 }
541 542
542 543 newlen = _lowerencode(NULL, 0, path, len);
543 544 ret = PyBytes_FromStringAndSize(NULL, newlen);
544 545 if (ret) {
545 546 _lowerencode(PyBytes_AS_STRING(ret), newlen, path, len);
546 547 }
547 548
548 549 return ret;
549 550 }
550 551
551 552 /* See store.py:_auxencode for a description. */
552 553 static Py_ssize_t auxencode(char *dest, size_t destsize, const char *src,
553 554 Py_ssize_t len)
554 555 {
555 556 static const uint32_t twobytes[8];
556 557
557 558 static const uint32_t onebyte[8] = {
558 559 ~0U, 0xffff3ffe, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U,
559 560 };
560 561
561 562 return _encode(twobytes, onebyte, dest, 0, destsize, src, len, 0);
562 563 }
563 564
564 565 static PyObject *hashmangle(const char *src, Py_ssize_t len, const char sha[20])
565 566 {
566 567 static const Py_ssize_t dirprefixlen = 8;
567 568 static const Py_ssize_t maxshortdirslen = 68;
568 569 char *dest;
569 570 PyObject *ret;
570 571
571 572 Py_ssize_t i, d, p, lastslash = len - 1, lastdot = -1;
572 573 Py_ssize_t destsize, destlen = 0, slop, used;
573 574
574 575 while (lastslash >= 0 && src[lastslash] != '/') {
575 576 if (src[lastslash] == '.' && lastdot == -1) {
576 577 lastdot = lastslash;
577 578 }
578 579 lastslash--;
579 580 }
580 581
581 582 #if 0
582 583 /* All paths should end in a suffix of ".i" or ".d".
583 584 Unfortunately, the file names in test-hybridencode.py
584 585 violate this rule. */
585 586 if (lastdot != len - 3) {
586 587 PyErr_SetString(PyExc_ValueError,
587 588 "suffix missing or wrong length");
588 589 return NULL;
589 590 }
590 591 #endif
591 592
592 593 /* If src contains a suffix, we will append it to the end of
593 594 the new string, so make room. */
594 595 destsize = 120;
595 596 if (lastdot >= 0) {
596 597 destsize += len - lastdot - 1;
597 598 }
598 599
599 600 ret = PyBytes_FromStringAndSize(NULL, destsize);
600 601 if (ret == NULL) {
601 602 return NULL;
602 603 }
603 604
604 605 dest = PyBytes_AS_STRING(ret);
605 606 memcopy(dest, &destlen, destsize, "dh/", 3);
606 607
607 608 /* Copy up to dirprefixlen bytes of each path component, up to
608 609 a limit of maxshortdirslen bytes. */
609 610 for (i = d = p = 0; i < lastslash; i++, p++) {
610 611 if (src[i] == '/') {
611 612 char d = dest[destlen - 1];
612 613 /* After truncation, a directory name may end
613 614 in a space or dot, which are unportable. */
614 615 if (d == '.' || d == ' ') {
615 616 dest[destlen - 1] = '_';
616 617 /* The + 3 is to account for "dh/" in the
617 618 * beginning */
618 619 }
619 620 if (destlen > maxshortdirslen + 3) {
620 621 break;
621 622 }
622 623 charcopy(dest, &destlen, destsize, src[i]);
623 624 p = -1;
624 625 } else if (p < dirprefixlen) {
625 626 charcopy(dest, &destlen, destsize, src[i]);
626 627 }
627 628 }
628 629
629 630 /* Rewind to just before the last slash copied. */
630 631 if (destlen > maxshortdirslen + 3) {
631 632 do {
632 633 destlen--;
633 634 } while (destlen > 0 && dest[destlen] != '/');
634 635 }
635 636
636 637 if (destlen > 3) {
637 638 if (lastslash > 0) {
638 639 char d = dest[destlen - 1];
639 640 /* The last directory component may be
640 641 truncated, so make it safe. */
641 642 if (d == '.' || d == ' ') {
642 643 dest[destlen - 1] = '_';
643 644 }
644 645 }
645 646
646 647 charcopy(dest, &destlen, destsize, '/');
647 648 }
648 649
649 650 /* Add a prefix of the original file's name. Its length
650 651 depends on the number of bytes left after accounting for
651 652 hash and suffix. */
652 653 used = destlen + 40;
653 654 if (lastdot >= 0) {
654 655 used += len - lastdot - 1;
655 656 }
656 657 slop = maxstorepathlen - used;
657 658 if (slop > 0) {
658 659 Py_ssize_t basenamelen =
659 660 lastslash >= 0 ? len - lastslash - 2 : len - 1;
660 661
661 662 if (basenamelen > slop) {
662 663 basenamelen = slop;
663 664 }
664 665 if (basenamelen > 0) {
665 666 memcopy(dest, &destlen, destsize, &src[lastslash + 1],
666 667 basenamelen);
667 668 }
668 669 }
669 670
670 671 /* Add hash and suffix. */
671 672 for (i = 0; i < 20; i++) {
672 673 hexencode(dest, &destlen, destsize, sha[i]);
673 674 }
674 675
675 676 if (lastdot >= 0) {
676 677 memcopy(dest, &destlen, destsize, &src[lastdot],
677 678 len - lastdot - 1);
678 679 }
679 680
680 681 assert(PyBytes_Check(ret));
681 Py_SIZE(ret) = destlen;
682 Py_SET_SIZE(ret, destlen);
682 683
683 684 return ret;
684 685 }
685 686
686 687 /*
687 688 * Avoiding a trip through Python would improve performance by 50%,
688 689 * but we don't encounter enough long names to be worth the code.
689 690 */
690 691 static int sha1hash(char hash[20], const char *str, Py_ssize_t len)
691 692 {
692 693 static PyObject *shafunc;
693 694 PyObject *shaobj, *hashobj;
694 695
695 696 if (shafunc == NULL) {
696 697 PyObject *hashlib = PyImport_ImportModule("hashlib");
697 698 if (hashlib == NULL) {
698 699 PyErr_SetString(PyExc_ImportError,
699 700 "pathencode failed to find hashlib");
700 701 return -1;
701 702 }
702 703 shafunc = PyObject_GetAttrString(hashlib, "sha1");
703 704 Py_DECREF(hashlib);
704 705
705 706 if (shafunc == NULL) {
706 707 PyErr_SetString(PyExc_AttributeError,
707 708 "module 'hashlib' has no "
708 709 "attribute 'sha1' in pathencode");
709 710 return -1;
710 711 }
711 712 }
712 713
713 714 shaobj = PyObject_CallFunction(shafunc, PY23("s#", "y#"), str, len);
714 715
715 716 if (shaobj == NULL) {
716 717 return -1;
717 718 }
718 719
719 720 hashobj = PyObject_CallMethod(shaobj, "digest", "");
720 721 Py_DECREF(shaobj);
721 722 if (hashobj == NULL) {
722 723 return -1;
723 724 }
724 725
725 726 if (!PyBytes_Check(hashobj) || PyBytes_GET_SIZE(hashobj) != 20) {
726 727 PyErr_SetString(PyExc_TypeError,
727 728 "result of digest is not a 20-byte hash");
728 729 Py_DECREF(hashobj);
729 730 return -1;
730 731 }
731 732
732 733 memcpy(hash, PyBytes_AS_STRING(hashobj), 20);
733 734 Py_DECREF(hashobj);
734 735 return 0;
735 736 }
736 737
737 738 #define MAXENCODE 4096 * 4
738 739
739 740 static PyObject *hashencode(const char *src, Py_ssize_t len)
740 741 {
741 742 char dired[MAXENCODE];
742 743 char lowered[MAXENCODE];
743 744 char auxed[MAXENCODE];
744 745 Py_ssize_t dirlen, lowerlen, auxlen, baselen;
745 746 char sha[20];
746 747
747 748 baselen = (len - 5) * 3;
748 749 if (baselen >= MAXENCODE) {
749 750 PyErr_SetString(PyExc_ValueError, "string too long");
750 751 return NULL;
751 752 }
752 753
753 754 dirlen = _encodedir(dired, baselen, src, len);
754 755 if (sha1hash(sha, dired, dirlen - 1) == -1) {
755 756 return NULL;
756 757 }
757 758 lowerlen = _lowerencode(lowered, baselen, dired + 5, dirlen - 5);
758 759 auxlen = auxencode(auxed, baselen, lowered, lowerlen);
759 760 return hashmangle(auxed, auxlen, sha);
760 761 }
761 762
762 763 PyObject *pathencode(PyObject *self, PyObject *args)
763 764 {
764 765 Py_ssize_t len, newlen;
765 766 PyObject *pathobj, *newobj;
766 767 char *path;
767 768
768 769 if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj)) {
769 770 return NULL;
770 771 }
771 772
772 773 if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) {
773 774 PyErr_SetString(PyExc_TypeError, "expected a string");
774 775 return NULL;
775 776 }
776 777
777 778 if (len > maxstorepathlen) {
778 779 newlen = maxstorepathlen + 2;
779 780 } else {
780 781 newlen = len ? basicencode(NULL, 0, path, len + 1) : 1;
781 782 }
782 783
783 784 if (newlen <= maxstorepathlen + 1) {
784 785 if (newlen == len + 1) {
785 786 Py_INCREF(pathobj);
786 787 return pathobj;
787 788 }
788 789
789 790 newobj = PyBytes_FromStringAndSize(NULL, newlen);
790 791
791 792 if (newobj) {
792 793 assert(PyBytes_Check(newobj));
793 794 Py_SIZE(newobj)--;
794 795 basicencode(PyBytes_AS_STRING(newobj), newlen, path,
795 796 len + 1);
796 797 }
797 798 } else {
798 799 newobj = hashencode(path, len + 1);
799 800 }
800 801
801 802 return newobj;
802 803 }
@@ -1,95 +1,96 b''
1 1 #require test-repo
2 2
3 3 $ . "$TESTDIR/helpers-testrepo.sh"
4 4 $ check_code="$TESTDIR"/../contrib/check-code.py
5 5 $ cd "$TESTDIR"/..
6 6
7 7 New errors are not allowed. Warnings are strongly discouraged.
8 8 (The writing "no-che?k-code" is for not skipping this file when checking.)
9 9
10 10 $ testrepohg locate \
11 11 > -X contrib/python-zstandard \
12 12 > -X hgext/fsmonitor/pywatchman \
13 13 > -X mercurial/thirdparty \
14 > -X mercurial/pythoncapi_compat.h \
14 15 > | sed 's-\\-/-g' | "$check_code" --warnings --per-file=0 - || false
15 16 Skipping contrib/automation/hgautomation/__init__.py it has no-che?k-code (glob)
16 17 Skipping contrib/automation/hgautomation/aws.py it has no-che?k-code (glob)
17 18 Skipping contrib/automation/hgautomation/cli.py it has no-che?k-code (glob)
18 19 Skipping contrib/automation/hgautomation/linux.py it has no-che?k-code (glob)
19 20 Skipping contrib/automation/hgautomation/pypi.py it has no-che?k-code (glob)
20 21 Skipping contrib/automation/hgautomation/ssh.py it has no-che?k-code (glob)
21 22 Skipping contrib/automation/hgautomation/try_server.py it has no-che?k-code (glob)
22 23 Skipping contrib/automation/hgautomation/windows.py it has no-che?k-code (glob)
23 24 Skipping contrib/automation/hgautomation/winrm.py it has no-che?k-code (glob)
24 25 Skipping contrib/fuzz/FuzzedDataProvider.h it has no-che?k-code (glob)
25 26 Skipping contrib/fuzz/standalone_fuzz_target_runner.cc it has no-che?k-code (glob)
26 27 Skipping contrib/packaging/hgpackaging/cli.py it has no-che?k-code (glob)
27 28 Skipping contrib/packaging/hgpackaging/downloads.py it has no-che?k-code (glob)
28 29 Skipping contrib/packaging/hgpackaging/inno.py it has no-che?k-code (glob)
29 30 Skipping contrib/packaging/hgpackaging/py2exe.py it has no-che?k-code (glob)
30 31 Skipping contrib/packaging/hgpackaging/pyoxidizer.py it has no-che?k-code (glob)
31 32 Skipping contrib/packaging/hgpackaging/util.py it has no-che?k-code (glob)
32 33 Skipping contrib/packaging/hgpackaging/wix.py it has no-che?k-code (glob)
33 34 Skipping i18n/polib.py it has no-che?k-code (glob)
34 35 Skipping mercurial/statprof.py it has no-che?k-code (glob)
35 36 Skipping tests/badserverext.py it has no-che?k-code (glob)
36 37
37 38 @commands in debugcommands.py should be in alphabetical order.
38 39
39 40 >>> import re
40 41 >>> commands = []
41 42 >>> with open('mercurial/debugcommands.py', 'rb') as fh:
42 43 ... for line in fh:
43 44 ... m = re.match(br"^@command\('([a-z]+)", line)
44 45 ... if m:
45 46 ... commands.append(m.group(1))
46 47 >>> scommands = list(sorted(commands))
47 48 >>> for i, command in enumerate(scommands):
48 49 ... if command != commands[i]:
49 50 ... print('commands in debugcommands.py not sorted; first differing '
50 51 ... 'command is %s; expected %s' % (commands[i], command))
51 52 ... break
52 53
53 54 Prevent adding new files in the root directory accidentally.
54 55
55 56 $ testrepohg files 'glob:*'
56 57 .arcconfig
57 58 .clang-format
58 59 .editorconfig
59 60 .hgignore
60 61 .hgsigs
61 62 .hgtags
62 63 .jshintrc
63 64 CONTRIBUTING
64 65 CONTRIBUTORS
65 66 COPYING
66 67 Makefile
67 68 README.rst
68 69 black.toml
69 70 hg
70 71 hgeditor
71 72 hgweb.cgi
72 73 rustfmt.toml
73 74 setup.py
74 75
75 76 Prevent adding modules which could be shadowed by ancient .so/.dylib.
76 77
77 78 $ testrepohg files \
78 79 > mercurial/base85.py \
79 80 > mercurial/bdiff.py \
80 81 > mercurial/diffhelpers.py \
81 82 > mercurial/mpatch.py \
82 83 > mercurial/osutil.py \
83 84 > mercurial/parsers.py \
84 85 > mercurial/zstd.py
85 86 [1]
86 87
87 88 Keep python3 tests sorted:
88 89 $ sort < contrib/python3-whitelist > $TESTTMP/py3sorted
89 90 $ cmp contrib/python3-whitelist $TESTTMP/py3sorted || echo 'Please sort passing tests!'
90 91
91 92 Keep Windows line endings in check
92 93
93 94 $ testrepohg files 'set:eol(dos)'
94 95 contrib/win32/hg.bat
95 96 contrib/win32/mercurial.ini
General Comments 0
You need to be logged in to leave comments. Login now