##// END OF EJS Templates
cext: add Python 3.10 support...
Victor Stinner -
r47475:e92ca942 default
parent child Browse files
Show More
@@ -0,0 +1,278 b''
1 // Header file providing new functions of the Python C API to old Python
2 // versions.
3 //
4 // File distributed under the MIT license.
5 //
6 // Homepage:
7 // https://github.com/pythoncapi/pythoncapi_compat
8 //
9 // Latest version:
10 // https://raw.githubusercontent.com/pythoncapi/pythoncapi_compat/master/pythoncapi_compat.h
11
12 #ifndef PYTHONCAPI_COMPAT
13 #define PYTHONCAPI_COMPAT
14
15 #ifdef __cplusplus
16 extern "C" {
17 #endif
18
19 #include <Python.h>
20 #include "frameobject.h" // PyFrameObject, PyFrame_GetBack()
21
22
23 // Cast argument to PyObject* type.
24 #ifndef _PyObject_CAST
25 # define _PyObject_CAST(op) ((PyObject*)(op))
26 #endif
27
28
29 // bpo-42262 added Py_NewRef() to Python 3.10.0a3
30 #if PY_VERSION_HEX < 0x030a00A3 && !defined(Py_NewRef)
31 static inline PyObject* _Py_NewRef(PyObject *obj)
32 {
33 Py_INCREF(obj);
34 return obj;
35 }
36 #define Py_NewRef(obj) _Py_NewRef(_PyObject_CAST(obj))
37 #endif
38
39
40 // bpo-42262 added Py_XNewRef() to Python 3.10.0a3
41 #if PY_VERSION_HEX < 0x030a00A3 && !defined(Py_XNewRef)
42 static inline PyObject* _Py_XNewRef(PyObject *obj)
43 {
44 Py_XINCREF(obj);
45 return obj;
46 }
47 #define Py_XNewRef(obj) _Py_XNewRef(_PyObject_CAST(obj))
48 #endif
49
50
51 // bpo-39573 added Py_SET_REFCNT() to Python 3.9.0a4
52 #if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_REFCNT)
53 static inline void _Py_SET_REFCNT(PyObject *ob, Py_ssize_t refcnt)
54 {
55 ob->ob_refcnt = refcnt;
56 }
57 #define Py_SET_REFCNT(ob, refcnt) _Py_SET_REFCNT((PyObject*)(ob), refcnt)
58 #endif
59
60
61 // bpo-39573 added Py_SET_TYPE() to Python 3.9.0a4
62 #if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_TYPE)
63 static inline void
64 _Py_SET_TYPE(PyObject *ob, PyTypeObject *type)
65 {
66 ob->ob_type = type;
67 }
68 #define Py_SET_TYPE(ob, type) _Py_SET_TYPE((PyObject*)(ob), type)
69 #endif
70
71
72 // bpo-39573 added Py_SET_SIZE() to Python 3.9.0a4
73 #if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_SIZE)
74 static inline void
75 _Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size)
76 {
77 ob->ob_size = size;
78 }
79 #define Py_SET_SIZE(ob, size) _Py_SET_SIZE((PyVarObject*)(ob), size)
80 #endif
81
82
83 // bpo-40421 added PyFrame_GetCode() to Python 3.9.0b1
84 #if PY_VERSION_HEX < 0x030900B1
85 static inline PyCodeObject*
86 PyFrame_GetCode(PyFrameObject *frame)
87 {
88 PyCodeObject *code;
89 assert(frame != NULL);
90 code = frame->f_code;
91 assert(code != NULL);
92 Py_INCREF(code);
93 return code;
94 }
95 #endif
96
97 static inline PyCodeObject*
98 _PyFrame_GetCodeBorrow(PyFrameObject *frame)
99 {
100 PyCodeObject *code = PyFrame_GetCode(frame);
101 Py_DECREF(code);
102 return code; // borrowed reference
103 }
104
105
106 // bpo-40421 added PyFrame_GetCode() to Python 3.9.0b1
107 #if PY_VERSION_HEX < 0x030900B1
108 static inline PyFrameObject*
109 PyFrame_GetBack(PyFrameObject *frame)
110 {
111 PyFrameObject *back;
112 assert(frame != NULL);
113 back = frame->f_back;
114 Py_XINCREF(back);
115 return back;
116 }
117 #endif
118
119 static inline PyFrameObject*
120 _PyFrame_GetBackBorrow(PyFrameObject *frame)
121 {
122 PyFrameObject *back = PyFrame_GetBack(frame);
123 Py_XDECREF(back);
124 return back; // borrowed reference
125 }
126
127
128 // bpo-39947 added PyThreadState_GetInterpreter() to Python 3.9.0a5
129 #if PY_VERSION_HEX < 0x030900A5
130 static inline PyInterpreterState *
131 PyThreadState_GetInterpreter(PyThreadState *tstate)
132 {
133 assert(tstate != NULL);
134 return tstate->interp;
135 }
136 #endif
137
138
139 // bpo-40429 added PyThreadState_GetFrame() to Python 3.9.0b1
140 #if PY_VERSION_HEX < 0x030900B1
141 static inline PyFrameObject*
142 PyThreadState_GetFrame(PyThreadState *tstate)
143 {
144 PyFrameObject *frame;
145 assert(tstate != NULL);
146 frame = tstate->frame;
147 Py_XINCREF(frame);
148 return frame;
149 }
150 #endif
151
152 static inline PyFrameObject*
153 _PyThreadState_GetFrameBorrow(PyThreadState *tstate)
154 {
155 PyFrameObject *frame = PyThreadState_GetFrame(tstate);
156 Py_XDECREF(frame);
157 return frame; // borrowed reference
158 }
159
160
161 // bpo-39947 added PyInterpreterState_Get() to Python 3.9.0a5
162 #if PY_VERSION_HEX < 0x030900A5
163 static inline PyInterpreterState *
164 PyInterpreterState_Get(void)
165 {
166 PyThreadState *tstate;
167 PyInterpreterState *interp;
168
169 tstate = PyThreadState_GET();
170 if (tstate == NULL) {
171 Py_FatalError("GIL released (tstate is NULL)");
172 }
173 interp = tstate->interp;
174 if (interp == NULL) {
175 Py_FatalError("no current interpreter");
176 }
177 return interp;
178 }
179 #endif
180
181
182 // bpo-39947 added PyInterpreterState_Get() to Python 3.9.0a6
183 #if 0x030700A1 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x030900A6
184 static inline uint64_t
185 PyThreadState_GetID(PyThreadState *tstate)
186 {
187 assert(tstate != NULL);
188 return tstate->id;
189 }
190 #endif
191
192
193 // bpo-37194 added PyObject_CallNoArgs() to Python 3.9.0a1
194 #if PY_VERSION_HEX < 0x030900A1
195 static inline PyObject*
196 PyObject_CallNoArgs(PyObject *func)
197 {
198 return PyObject_CallFunctionObjArgs(func, NULL);
199 }
200 #endif
201
202
203 // bpo-39245 made PyObject_CallOneArg() public (previously called
204 // _PyObject_CallOneArg) in Python 3.9.0a4
205 #if PY_VERSION_HEX < 0x030900A4
206 static inline PyObject*
207 PyObject_CallOneArg(PyObject *func, PyObject *arg)
208 {
209 return PyObject_CallFunctionObjArgs(func, arg, NULL);
210 }
211 #endif
212
213
214 // bpo-40024 added PyModule_AddType() to Python 3.9.0a5
215 #if PY_VERSION_HEX < 0x030900A5
216 static inline int
217 PyModule_AddType(PyObject *module, PyTypeObject *type)
218 {
219 const char *name, *dot;
220
221 if (PyType_Ready(type) < 0) {
222 return -1;
223 }
224
225 // inline _PyType_Name()
226 name = type->tp_name;
227 assert(name != NULL);
228 dot = strrchr(name, '.');
229 if (dot != NULL) {
230 name = dot + 1;
231 }
232
233 Py_INCREF(type);
234 if (PyModule_AddObject(module, name, (PyObject *)type) < 0) {
235 Py_DECREF(type);
236 return -1;
237 }
238
239 return 0;
240 }
241 #endif
242
243
244 // bpo-40241 added PyObject_GC_IsTracked() to Python 3.9.0a6.
245 // bpo-4688 added _PyObject_GC_IS_TRACKED() to Python 2.7.0a2.
246 #if PY_VERSION_HEX < 0x030900A6
247 static inline int
248 PyObject_GC_IsTracked(PyObject* obj)
249 {
250 return (PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj));
251 }
252 #endif
253
254 // bpo-40241 added PyObject_GC_IsFinalized() to Python 3.9.0a6.
255 // bpo-18112 added _PyGCHead_FINALIZED() to Python 3.4.0 final.
256 #if PY_VERSION_HEX < 0x030900A6 && PY_VERSION_HEX >= 0x030400F0
257 static inline int
258 PyObject_GC_IsFinalized(PyObject *obj)
259 {
260 return (PyObject_IS_GC(obj) && _PyGCHead_FINALIZED((PyGC_Head *)(obj)-1));
261 }
262 #endif
263
264
265 // bpo-39573 added Py_IS_TYPE() to Python 3.9.0a4
266 #if PY_VERSION_HEX < 0x030900A4 && !defined(Py_IS_TYPE)
267 static inline int
268 _Py_IS_TYPE(const PyObject *ob, const PyTypeObject *type) {
269 return ob->ob_type == type;
270 }
271 #define Py_IS_TYPE(ob, type) _Py_IS_TYPE((const PyObject*)(ob), type)
272 #endif
273
274
275 #ifdef __cplusplus
276 }
277 #endif
278 #endif // PYTHONCAPI_COMPAT
@@ -0,0 +1,278 b''
1 // Header file providing new functions of the Python C API to old Python
2 // versions.
3 //
4 // File distributed under the MIT license.
5 //
6 // Homepage:
7 // https://github.com/pythoncapi/pythoncapi_compat
8 //
9 // Latest version:
10 // https://raw.githubusercontent.com/pythoncapi/pythoncapi_compat/master/pythoncapi_compat.h
11
12 #ifndef PYTHONCAPI_COMPAT
13 #define PYTHONCAPI_COMPAT
14
15 #ifdef __cplusplus
16 extern "C" {
17 #endif
18
19 #include <Python.h>
20 #include "frameobject.h" // PyFrameObject, PyFrame_GetBack()
21
22
23 // Cast argument to PyObject* type.
24 #ifndef _PyObject_CAST
25 # define _PyObject_CAST(op) ((PyObject*)(op))
26 #endif
27
28
29 // bpo-42262 added Py_NewRef() to Python 3.10.0a3
30 #if PY_VERSION_HEX < 0x030a00A3 && !defined(Py_NewRef)
31 static inline PyObject* _Py_NewRef(PyObject *obj)
32 {
33 Py_INCREF(obj);
34 return obj;
35 }
36 #define Py_NewRef(obj) _Py_NewRef(_PyObject_CAST(obj))
37 #endif
38
39
40 // bpo-42262 added Py_XNewRef() to Python 3.10.0a3
41 #if PY_VERSION_HEX < 0x030a00A3 && !defined(Py_XNewRef)
42 static inline PyObject* _Py_XNewRef(PyObject *obj)
43 {
44 Py_XINCREF(obj);
45 return obj;
46 }
47 #define Py_XNewRef(obj) _Py_XNewRef(_PyObject_CAST(obj))
48 #endif
49
50
51 // bpo-39573 added Py_SET_REFCNT() to Python 3.9.0a4
52 #if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_REFCNT)
53 static inline void _Py_SET_REFCNT(PyObject *ob, Py_ssize_t refcnt)
54 {
55 ob->ob_refcnt = refcnt;
56 }
57 #define Py_SET_REFCNT(ob, refcnt) _Py_SET_REFCNT((PyObject*)(ob), refcnt)
58 #endif
59
60
61 // bpo-39573 added Py_SET_TYPE() to Python 3.9.0a4
62 #if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_TYPE)
63 static inline void
64 _Py_SET_TYPE(PyObject *ob, PyTypeObject *type)
65 {
66 ob->ob_type = type;
67 }
68 #define Py_SET_TYPE(ob, type) _Py_SET_TYPE((PyObject*)(ob), type)
69 #endif
70
71
72 // bpo-39573 added Py_SET_SIZE() to Python 3.9.0a4
73 #if PY_VERSION_HEX < 0x030900A4 && !defined(Py_SET_SIZE)
74 static inline void
75 _Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size)
76 {
77 ob->ob_size = size;
78 }
79 #define Py_SET_SIZE(ob, size) _Py_SET_SIZE((PyVarObject*)(ob), size)
80 #endif
81
82
83 // bpo-40421 added PyFrame_GetCode() to Python 3.9.0b1
84 #if PY_VERSION_HEX < 0x030900B1
85 static inline PyCodeObject*
86 PyFrame_GetCode(PyFrameObject *frame)
87 {
88 PyCodeObject *code;
89 assert(frame != NULL);
90 code = frame->f_code;
91 assert(code != NULL);
92 Py_INCREF(code);
93 return code;
94 }
95 #endif
96
97 static inline PyCodeObject*
98 _PyFrame_GetCodeBorrow(PyFrameObject *frame)
99 {
100 PyCodeObject *code = PyFrame_GetCode(frame);
101 Py_DECREF(code);
102 return code; // borrowed reference
103 }
104
105
106 // bpo-40421 added PyFrame_GetCode() to Python 3.9.0b1
107 #if PY_VERSION_HEX < 0x030900B1
108 static inline PyFrameObject*
109 PyFrame_GetBack(PyFrameObject *frame)
110 {
111 PyFrameObject *back;
112 assert(frame != NULL);
113 back = frame->f_back;
114 Py_XINCREF(back);
115 return back;
116 }
117 #endif
118
119 static inline PyFrameObject*
120 _PyFrame_GetBackBorrow(PyFrameObject *frame)
121 {
122 PyFrameObject *back = PyFrame_GetBack(frame);
123 Py_XDECREF(back);
124 return back; // borrowed reference
125 }
126
127
128 // bpo-39947 added PyThreadState_GetInterpreter() to Python 3.9.0a5
129 #if PY_VERSION_HEX < 0x030900A5
130 static inline PyInterpreterState *
131 PyThreadState_GetInterpreter(PyThreadState *tstate)
132 {
133 assert(tstate != NULL);
134 return tstate->interp;
135 }
136 #endif
137
138
139 // bpo-40429 added PyThreadState_GetFrame() to Python 3.9.0b1
140 #if PY_VERSION_HEX < 0x030900B1
141 static inline PyFrameObject*
142 PyThreadState_GetFrame(PyThreadState *tstate)
143 {
144 PyFrameObject *frame;
145 assert(tstate != NULL);
146 frame = tstate->frame;
147 Py_XINCREF(frame);
148 return frame;
149 }
150 #endif
151
152 static inline PyFrameObject*
153 _PyThreadState_GetFrameBorrow(PyThreadState *tstate)
154 {
155 PyFrameObject *frame = PyThreadState_GetFrame(tstate);
156 Py_XDECREF(frame);
157 return frame; // borrowed reference
158 }
159
160
161 // bpo-39947 added PyInterpreterState_Get() to Python 3.9.0a5
162 #if PY_VERSION_HEX < 0x030900A5
163 static inline PyInterpreterState *
164 PyInterpreterState_Get(void)
165 {
166 PyThreadState *tstate;
167 PyInterpreterState *interp;
168
169 tstate = PyThreadState_GET();
170 if (tstate == NULL) {
171 Py_FatalError("GIL released (tstate is NULL)");
172 }
173 interp = tstate->interp;
174 if (interp == NULL) {
175 Py_FatalError("no current interpreter");
176 }
177 return interp;
178 }
179 #endif
180
181
182 // bpo-39947 added PyInterpreterState_Get() to Python 3.9.0a6
183 #if 0x030700A1 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x030900A6
184 static inline uint64_t
185 PyThreadState_GetID(PyThreadState *tstate)
186 {
187 assert(tstate != NULL);
188 return tstate->id;
189 }
190 #endif
191
192
193 // bpo-37194 added PyObject_CallNoArgs() to Python 3.9.0a1
194 #if PY_VERSION_HEX < 0x030900A1
195 static inline PyObject*
196 PyObject_CallNoArgs(PyObject *func)
197 {
198 return PyObject_CallFunctionObjArgs(func, NULL);
199 }
200 #endif
201
202
203 // bpo-39245 made PyObject_CallOneArg() public (previously called
204 // _PyObject_CallOneArg) in Python 3.9.0a4
205 #if PY_VERSION_HEX < 0x030900A4
206 static inline PyObject*
207 PyObject_CallOneArg(PyObject *func, PyObject *arg)
208 {
209 return PyObject_CallFunctionObjArgs(func, arg, NULL);
210 }
211 #endif
212
213
214 // bpo-40024 added PyModule_AddType() to Python 3.9.0a5
215 #if PY_VERSION_HEX < 0x030900A5
216 static inline int
217 PyModule_AddType(PyObject *module, PyTypeObject *type)
218 {
219 const char *name, *dot;
220
221 if (PyType_Ready(type) < 0) {
222 return -1;
223 }
224
225 // inline _PyType_Name()
226 name = type->tp_name;
227 assert(name != NULL);
228 dot = strrchr(name, '.');
229 if (dot != NULL) {
230 name = dot + 1;
231 }
232
233 Py_INCREF(type);
234 if (PyModule_AddObject(module, name, (PyObject *)type) < 0) {
235 Py_DECREF(type);
236 return -1;
237 }
238
239 return 0;
240 }
241 #endif
242
243
244 // bpo-40241 added PyObject_GC_IsTracked() to Python 3.9.0a6.
245 // bpo-4688 added _PyObject_GC_IS_TRACKED() to Python 2.7.0a2.
246 #if PY_VERSION_HEX < 0x030900A6
247 static inline int
248 PyObject_GC_IsTracked(PyObject* obj)
249 {
250 return (PyObject_IS_GC(obj) && _PyObject_GC_IS_TRACKED(obj));
251 }
252 #endif
253
254 // bpo-40241 added PyObject_GC_IsFinalized() to Python 3.9.0a6.
255 // bpo-18112 added _PyGCHead_FINALIZED() to Python 3.4.0 final.
256 #if PY_VERSION_HEX < 0x030900A6 && PY_VERSION_HEX >= 0x030400F0
257 static inline int
258 PyObject_GC_IsFinalized(PyObject *obj)
259 {
260 return (PyObject_IS_GC(obj) && _PyGCHead_FINALIZED((PyGC_Head *)(obj)-1));
261 }
262 #endif
263
264
265 // bpo-39573 added Py_IS_TYPE() to Python 3.9.0a4
266 #if PY_VERSION_HEX < 0x030900A4 && !defined(Py_IS_TYPE)
267 static inline int
268 _Py_IS_TYPE(const PyObject *ob, const PyTypeObject *type) {
269 return ob->ob_type == type;
270 }
271 #define Py_IS_TYPE(ob, type) _Py_IS_TYPE((const PyObject*)(ob), type)
272 #endif
273
274
275 #ifdef __cplusplus
276 }
277 #endif
278 #endif // PYTHONCAPI_COMPAT
@@ -1,11 +1,12 b''
1 # Files that just need to be migrated to the formatter.
1 # Files that just need to be migrated to the formatter.
2 # Do not add new files here!
2 # Do not add new files here!
3 mercurial/cext/manifest.c
3 mercurial/cext/manifest.c
4 mercurial/cext/osutil.c
4 mercurial/cext/osutil.c
5 # Vendored code that we should never format:
5 # Vendored code that we should never format:
6 syntax: glob
6 syntax: glob
7 contrib/python-zstandard/**.c
7 contrib/python-zstandard/**.c
8 contrib/python-zstandard/**.h
8 contrib/python-zstandard/**.h
9 hgext/fsmonitor/pywatchman/**.c
9 hgext/fsmonitor/pywatchman/**.c
10 mercurial/thirdparty/**.c
10 mercurial/thirdparty/**.c
11 mercurial/thirdparty/**.h
11 mercurial/thirdparty/**.h
12 mercurial/pythoncapi_compat.h
@@ -1,792 +1,792 b''
1 /**
1 /**
2 * Copyright (c) 2017-present, Gregory Szorc
2 * Copyright (c) 2017-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 PyDoc_STRVAR(BufferWithSegments__doc__,
13 PyDoc_STRVAR(BufferWithSegments__doc__,
14 "BufferWithSegments - A memory buffer holding known sub-segments.\n"
14 "BufferWithSegments - A memory buffer holding known sub-segments.\n"
15 "\n"
15 "\n"
16 "This type represents a contiguous chunk of memory containing N discrete\n"
16 "This type represents a contiguous chunk of memory containing N discrete\n"
17 "items within sub-segments of that memory.\n"
17 "items within sub-segments of that memory.\n"
18 "\n"
18 "\n"
19 "Segments within the buffer are stored as an array of\n"
19 "Segments within the buffer are stored as an array of\n"
20 "``(offset, length)`` pairs, where each element is an unsigned 64-bit\n"
20 "``(offset, length)`` pairs, where each element is an unsigned 64-bit\n"
21 "integer using the host/native bit order representation.\n"
21 "integer using the host/native bit order representation.\n"
22 "\n"
22 "\n"
23 "The type exists to facilitate operations against N>1 items without the\n"
23 "The type exists to facilitate operations against N>1 items without the\n"
24 "overhead of Python object creation and management.\n"
24 "overhead of Python object creation and management.\n"
25 );
25 );
26
26
27 static void BufferWithSegments_dealloc(ZstdBufferWithSegments* self) {
27 static void BufferWithSegments_dealloc(ZstdBufferWithSegments* self) {
28 /* Backing memory is either canonically owned by a Py_buffer or by us. */
28 /* Backing memory is either canonically owned by a Py_buffer or by us. */
29 if (self->parent.buf) {
29 if (self->parent.buf) {
30 PyBuffer_Release(&self->parent);
30 PyBuffer_Release(&self->parent);
31 }
31 }
32 else if (self->useFree) {
32 else if (self->useFree) {
33 free(self->data);
33 free(self->data);
34 }
34 }
35 else {
35 else {
36 PyMem_Free(self->data);
36 PyMem_Free(self->data);
37 }
37 }
38
38
39 self->data = NULL;
39 self->data = NULL;
40
40
41 if (self->useFree) {
41 if (self->useFree) {
42 free(self->segments);
42 free(self->segments);
43 }
43 }
44 else {
44 else {
45 PyMem_Free(self->segments);
45 PyMem_Free(self->segments);
46 }
46 }
47
47
48 self->segments = NULL;
48 self->segments = NULL;
49
49
50 PyObject_Del(self);
50 PyObject_Del(self);
51 }
51 }
52
52
53 static int BufferWithSegments_init(ZstdBufferWithSegments* self, PyObject* args, PyObject* kwargs) {
53 static int BufferWithSegments_init(ZstdBufferWithSegments* self, PyObject* args, PyObject* kwargs) {
54 static char* kwlist[] = {
54 static char* kwlist[] = {
55 "data",
55 "data",
56 "segments",
56 "segments",
57 NULL
57 NULL
58 };
58 };
59
59
60 Py_buffer segments;
60 Py_buffer segments;
61 Py_ssize_t segmentCount;
61 Py_ssize_t segmentCount;
62 Py_ssize_t i;
62 Py_ssize_t i;
63
63
64 memset(&self->parent, 0, sizeof(self->parent));
64 memset(&self->parent, 0, sizeof(self->parent));
65
65
66 #if PY_MAJOR_VERSION >= 3
66 #if PY_MAJOR_VERSION >= 3
67 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*y*:BufferWithSegments",
67 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*y*:BufferWithSegments",
68 #else
68 #else
69 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*s*:BufferWithSegments",
69 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*s*:BufferWithSegments",
70 #endif
70 #endif
71 kwlist, &self->parent, &segments)) {
71 kwlist, &self->parent, &segments)) {
72 return -1;
72 return -1;
73 }
73 }
74
74
75 if (!PyBuffer_IsContiguous(&self->parent, 'C') || self->parent.ndim > 1) {
75 if (!PyBuffer_IsContiguous(&self->parent, 'C') || self->parent.ndim > 1) {
76 PyErr_SetString(PyExc_ValueError, "data buffer should be contiguous and have a single dimension");
76 PyErr_SetString(PyExc_ValueError, "data buffer should be contiguous and have a single dimension");
77 goto except;
77 goto except;
78 }
78 }
79
79
80 if (!PyBuffer_IsContiguous(&segments, 'C') || segments.ndim > 1) {
80 if (!PyBuffer_IsContiguous(&segments, 'C') || segments.ndim > 1) {
81 PyErr_SetString(PyExc_ValueError, "segments buffer should be contiguous and have a single dimension");
81 PyErr_SetString(PyExc_ValueError, "segments buffer should be contiguous and have a single dimension");
82 goto except;
82 goto except;
83 }
83 }
84
84
85 if (segments.len % sizeof(BufferSegment)) {
85 if (segments.len % sizeof(BufferSegment)) {
86 PyErr_Format(PyExc_ValueError, "segments array size is not a multiple of %zu",
86 PyErr_Format(PyExc_ValueError, "segments array size is not a multiple of %zu",
87 sizeof(BufferSegment));
87 sizeof(BufferSegment));
88 goto except;
88 goto except;
89 }
89 }
90
90
91 segmentCount = segments.len / sizeof(BufferSegment);
91 segmentCount = segments.len / sizeof(BufferSegment);
92
92
93 /* Validate segments data, as blindly trusting it could lead to arbitrary
93 /* Validate segments data, as blindly trusting it could lead to arbitrary
94 memory access. */
94 memory access. */
95 for (i = 0; i < segmentCount; i++) {
95 for (i = 0; i < segmentCount; i++) {
96 BufferSegment* segment = &((BufferSegment*)(segments.buf))[i];
96 BufferSegment* segment = &((BufferSegment*)(segments.buf))[i];
97
97
98 if (segment->offset + segment->length > (unsigned long long)self->parent.len) {
98 if (segment->offset + segment->length > (unsigned long long)self->parent.len) {
99 PyErr_SetString(PyExc_ValueError, "offset within segments array references memory outside buffer");
99 PyErr_SetString(PyExc_ValueError, "offset within segments array references memory outside buffer");
100 goto except;
100 goto except;
101 return -1;
101 return -1;
102 }
102 }
103 }
103 }
104
104
105 /* Make a copy of the segments data. It is cheap to do so and is a guard
105 /* Make a copy of the segments data. It is cheap to do so and is a guard
106 against caller changing offsets, which has security implications. */
106 against caller changing offsets, which has security implications. */
107 self->segments = PyMem_Malloc(segments.len);
107 self->segments = PyMem_Malloc(segments.len);
108 if (!self->segments) {
108 if (!self->segments) {
109 PyErr_NoMemory();
109 PyErr_NoMemory();
110 goto except;
110 goto except;
111 }
111 }
112
112
113 memcpy(self->segments, segments.buf, segments.len);
113 memcpy(self->segments, segments.buf, segments.len);
114 PyBuffer_Release(&segments);
114 PyBuffer_Release(&segments);
115
115
116 self->data = self->parent.buf;
116 self->data = self->parent.buf;
117 self->dataSize = self->parent.len;
117 self->dataSize = self->parent.len;
118 self->segmentCount = segmentCount;
118 self->segmentCount = segmentCount;
119
119
120 return 0;
120 return 0;
121
121
122 except:
122 except:
123 PyBuffer_Release(&self->parent);
123 PyBuffer_Release(&self->parent);
124 PyBuffer_Release(&segments);
124 PyBuffer_Release(&segments);
125 return -1;
125 return -1;
126 }
126 }
127
127
128 /**
128 /**
129 * Construct a BufferWithSegments from existing memory and offsets.
129 * Construct a BufferWithSegments from existing memory and offsets.
130 *
130 *
131 * Ownership of the backing memory and BufferSegments will be transferred to
131 * Ownership of the backing memory and BufferSegments will be transferred to
132 * the created object and freed when the BufferWithSegments is destroyed.
132 * the created object and freed when the BufferWithSegments is destroyed.
133 */
133 */
134 ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize,
134 ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize,
135 BufferSegment* segments, Py_ssize_t segmentsSize) {
135 BufferSegment* segments, Py_ssize_t segmentsSize) {
136 ZstdBufferWithSegments* result = NULL;
136 ZstdBufferWithSegments* result = NULL;
137 Py_ssize_t i;
137 Py_ssize_t i;
138
138
139 if (NULL == data) {
139 if (NULL == data) {
140 PyErr_SetString(PyExc_ValueError, "data is NULL");
140 PyErr_SetString(PyExc_ValueError, "data is NULL");
141 return NULL;
141 return NULL;
142 }
142 }
143
143
144 if (NULL == segments) {
144 if (NULL == segments) {
145 PyErr_SetString(PyExc_ValueError, "segments is NULL");
145 PyErr_SetString(PyExc_ValueError, "segments is NULL");
146 return NULL;
146 return NULL;
147 }
147 }
148
148
149 for (i = 0; i < segmentsSize; i++) {
149 for (i = 0; i < segmentsSize; i++) {
150 BufferSegment* segment = &segments[i];
150 BufferSegment* segment = &segments[i];
151
151
152 if (segment->offset + segment->length > dataSize) {
152 if (segment->offset + segment->length > dataSize) {
153 PyErr_SetString(PyExc_ValueError, "offset in segments overflows buffer size");
153 PyErr_SetString(PyExc_ValueError, "offset in segments overflows buffer size");
154 return NULL;
154 return NULL;
155 }
155 }
156 }
156 }
157
157
158 result = PyObject_New(ZstdBufferWithSegments, &ZstdBufferWithSegmentsType);
158 result = PyObject_New(ZstdBufferWithSegments, &ZstdBufferWithSegmentsType);
159 if (NULL == result) {
159 if (NULL == result) {
160 return NULL;
160 return NULL;
161 }
161 }
162
162
163 result->useFree = 0;
163 result->useFree = 0;
164
164
165 memset(&result->parent, 0, sizeof(result->parent));
165 memset(&result->parent, 0, sizeof(result->parent));
166 result->data = data;
166 result->data = data;
167 result->dataSize = dataSize;
167 result->dataSize = dataSize;
168 result->segments = segments;
168 result->segments = segments;
169 result->segmentCount = segmentsSize;
169 result->segmentCount = segmentsSize;
170
170
171 return result;
171 return result;
172 }
172 }
173
173
174 static Py_ssize_t BufferWithSegments_length(ZstdBufferWithSegments* self) {
174 static Py_ssize_t BufferWithSegments_length(ZstdBufferWithSegments* self) {
175 return self->segmentCount;
175 return self->segmentCount;
176 }
176 }
177
177
178 static ZstdBufferSegment* BufferWithSegments_item(ZstdBufferWithSegments* self, Py_ssize_t i) {
178 static ZstdBufferSegment* BufferWithSegments_item(ZstdBufferWithSegments* self, Py_ssize_t i) {
179 ZstdBufferSegment* result = NULL;
179 ZstdBufferSegment* result = NULL;
180
180
181 if (i < 0) {
181 if (i < 0) {
182 PyErr_SetString(PyExc_IndexError, "offset must be non-negative");
182 PyErr_SetString(PyExc_IndexError, "offset must be non-negative");
183 return NULL;
183 return NULL;
184 }
184 }
185
185
186 if (i >= self->segmentCount) {
186 if (i >= self->segmentCount) {
187 PyErr_Format(PyExc_IndexError, "offset must be less than %zd", self->segmentCount);
187 PyErr_Format(PyExc_IndexError, "offset must be less than %zd", self->segmentCount);
188 return NULL;
188 return NULL;
189 }
189 }
190
190
191 if (self->segments[i].length > PY_SSIZE_T_MAX) {
191 if (self->segments[i].length > PY_SSIZE_T_MAX) {
192 PyErr_Format(PyExc_ValueError,
192 PyErr_Format(PyExc_ValueError,
193 "item at offset %zd is too large for this platform", i);
193 "item at offset %zd is too large for this platform", i);
194 return NULL;
194 return NULL;
195 }
195 }
196
196
197 result = (ZstdBufferSegment*)PyObject_CallObject((PyObject*)&ZstdBufferSegmentType, NULL);
197 result = (ZstdBufferSegment*)PyObject_CallObject((PyObject*)&ZstdBufferSegmentType, NULL);
198 if (NULL == result) {
198 if (NULL == result) {
199 return NULL;
199 return NULL;
200 }
200 }
201
201
202 result->parent = (PyObject*)self;
202 result->parent = (PyObject*)self;
203 Py_INCREF(self);
203 Py_INCREF(self);
204
204
205 result->data = (char*)self->data + self->segments[i].offset;
205 result->data = (char*)self->data + self->segments[i].offset;
206 result->dataSize = (Py_ssize_t)self->segments[i].length;
206 result->dataSize = (Py_ssize_t)self->segments[i].length;
207 result->offset = self->segments[i].offset;
207 result->offset = self->segments[i].offset;
208
208
209 return result;
209 return result;
210 }
210 }
211
211
212 #if PY_MAJOR_VERSION >= 3
212 #if PY_MAJOR_VERSION >= 3
213 static int BufferWithSegments_getbuffer(ZstdBufferWithSegments* self, Py_buffer* view, int flags) {
213 static int BufferWithSegments_getbuffer(ZstdBufferWithSegments* self, Py_buffer* view, int flags) {
214 if (self->dataSize > PY_SSIZE_T_MAX) {
214 if (self->dataSize > PY_SSIZE_T_MAX) {
215 view->obj = NULL;
215 view->obj = NULL;
216 PyErr_SetString(PyExc_BufferError, "buffer is too large for this platform");
216 PyErr_SetString(PyExc_BufferError, "buffer is too large for this platform");
217 return -1;
217 return -1;
218 }
218 }
219
219
220 return PyBuffer_FillInfo(view, (PyObject*)self, self->data, (Py_ssize_t)self->dataSize, 1, flags);
220 return PyBuffer_FillInfo(view, (PyObject*)self, self->data, (Py_ssize_t)self->dataSize, 1, flags);
221 }
221 }
222 #else
222 #else
223 static Py_ssize_t BufferWithSegments_getreadbuffer(ZstdBufferWithSegments* self, Py_ssize_t segment, void **ptrptr) {
223 static Py_ssize_t BufferWithSegments_getreadbuffer(ZstdBufferWithSegments* self, Py_ssize_t segment, void **ptrptr) {
224 if (segment != 0) {
224 if (segment != 0) {
225 PyErr_SetString(PyExc_ValueError, "segment number must be 0");
225 PyErr_SetString(PyExc_ValueError, "segment number must be 0");
226 return -1;
226 return -1;
227 }
227 }
228
228
229 if (self->dataSize > PY_SSIZE_T_MAX) {
229 if (self->dataSize > PY_SSIZE_T_MAX) {
230 PyErr_SetString(PyExc_ValueError, "buffer is too large for this platform");
230 PyErr_SetString(PyExc_ValueError, "buffer is too large for this platform");
231 return -1;
231 return -1;
232 }
232 }
233
233
234 *ptrptr = self->data;
234 *ptrptr = self->data;
235 return (Py_ssize_t)self->dataSize;
235 return (Py_ssize_t)self->dataSize;
236 }
236 }
237
237
238 static Py_ssize_t BufferWithSegments_getsegcount(ZstdBufferWithSegments* self, Py_ssize_t* len) {
238 static Py_ssize_t BufferWithSegments_getsegcount(ZstdBufferWithSegments* self, Py_ssize_t* len) {
239 if (len) {
239 if (len) {
240 *len = 1;
240 *len = 1;
241 }
241 }
242
242
243 return 1;
243 return 1;
244 }
244 }
245 #endif
245 #endif
246
246
247 PyDoc_STRVAR(BufferWithSegments_tobytes__doc__,
247 PyDoc_STRVAR(BufferWithSegments_tobytes__doc__,
248 "Obtain a bytes instance for this buffer.\n"
248 "Obtain a bytes instance for this buffer.\n"
249 );
249 );
250
250
251 static PyObject* BufferWithSegments_tobytes(ZstdBufferWithSegments* self) {
251 static PyObject* BufferWithSegments_tobytes(ZstdBufferWithSegments* self) {
252 if (self->dataSize > PY_SSIZE_T_MAX) {
252 if (self->dataSize > PY_SSIZE_T_MAX) {
253 PyErr_SetString(PyExc_ValueError, "buffer is too large for this platform");
253 PyErr_SetString(PyExc_ValueError, "buffer is too large for this platform");
254 return NULL;
254 return NULL;
255 }
255 }
256
256
257 return PyBytes_FromStringAndSize(self->data, (Py_ssize_t)self->dataSize);
257 return PyBytes_FromStringAndSize(self->data, (Py_ssize_t)self->dataSize);
258 }
258 }
259
259
260 PyDoc_STRVAR(BufferWithSegments_segments__doc__,
260 PyDoc_STRVAR(BufferWithSegments_segments__doc__,
261 "Obtain a BufferSegments describing segments in this sintance.\n"
261 "Obtain a BufferSegments describing segments in this sintance.\n"
262 );
262 );
263
263
264 static ZstdBufferSegments* BufferWithSegments_segments(ZstdBufferWithSegments* self) {
264 static ZstdBufferSegments* BufferWithSegments_segments(ZstdBufferWithSegments* self) {
265 ZstdBufferSegments* result = (ZstdBufferSegments*)PyObject_CallObject((PyObject*)&ZstdBufferSegmentsType, NULL);
265 ZstdBufferSegments* result = (ZstdBufferSegments*)PyObject_CallObject((PyObject*)&ZstdBufferSegmentsType, NULL);
266 if (NULL == result) {
266 if (NULL == result) {
267 return NULL;
267 return NULL;
268 }
268 }
269
269
270 result->parent = (PyObject*)self;
270 result->parent = (PyObject*)self;
271 Py_INCREF(self);
271 Py_INCREF(self);
272 result->segments = self->segments;
272 result->segments = self->segments;
273 result->segmentCount = self->segmentCount;
273 result->segmentCount = self->segmentCount;
274
274
275 return result;
275 return result;
276 }
276 }
277
277
278 static PySequenceMethods BufferWithSegments_sq = {
278 static PySequenceMethods BufferWithSegments_sq = {
279 (lenfunc)BufferWithSegments_length, /* sq_length */
279 (lenfunc)BufferWithSegments_length, /* sq_length */
280 0, /* sq_concat */
280 0, /* sq_concat */
281 0, /* sq_repeat */
281 0, /* sq_repeat */
282 (ssizeargfunc)BufferWithSegments_item, /* sq_item */
282 (ssizeargfunc)BufferWithSegments_item, /* sq_item */
283 0, /* sq_ass_item */
283 0, /* sq_ass_item */
284 0, /* sq_contains */
284 0, /* sq_contains */
285 0, /* sq_inplace_concat */
285 0, /* sq_inplace_concat */
286 0 /* sq_inplace_repeat */
286 0 /* sq_inplace_repeat */
287 };
287 };
288
288
289 static PyBufferProcs BufferWithSegments_as_buffer = {
289 static PyBufferProcs BufferWithSegments_as_buffer = {
290 #if PY_MAJOR_VERSION >= 3
290 #if PY_MAJOR_VERSION >= 3
291 (getbufferproc)BufferWithSegments_getbuffer, /* bf_getbuffer */
291 (getbufferproc)BufferWithSegments_getbuffer, /* bf_getbuffer */
292 0 /* bf_releasebuffer */
292 0 /* bf_releasebuffer */
293 #else
293 #else
294 (readbufferproc)BufferWithSegments_getreadbuffer, /* bf_getreadbuffer */
294 (readbufferproc)BufferWithSegments_getreadbuffer, /* bf_getreadbuffer */
295 0, /* bf_getwritebuffer */
295 0, /* bf_getwritebuffer */
296 (segcountproc)BufferWithSegments_getsegcount, /* bf_getsegcount */
296 (segcountproc)BufferWithSegments_getsegcount, /* bf_getsegcount */
297 0 /* bf_getcharbuffer */
297 0 /* bf_getcharbuffer */
298 #endif
298 #endif
299 };
299 };
300
300
301 static PyMethodDef BufferWithSegments_methods[] = {
301 static PyMethodDef BufferWithSegments_methods[] = {
302 { "segments", (PyCFunction)BufferWithSegments_segments,
302 { "segments", (PyCFunction)BufferWithSegments_segments,
303 METH_NOARGS, BufferWithSegments_segments__doc__ },
303 METH_NOARGS, BufferWithSegments_segments__doc__ },
304 { "tobytes", (PyCFunction)BufferWithSegments_tobytes,
304 { "tobytes", (PyCFunction)BufferWithSegments_tobytes,
305 METH_NOARGS, BufferWithSegments_tobytes__doc__ },
305 METH_NOARGS, BufferWithSegments_tobytes__doc__ },
306 { NULL, NULL }
306 { NULL, NULL }
307 };
307 };
308
308
309 static PyMemberDef BufferWithSegments_members[] = {
309 static PyMemberDef BufferWithSegments_members[] = {
310 { "size", T_ULONGLONG, offsetof(ZstdBufferWithSegments, dataSize),
310 { "size", T_ULONGLONG, offsetof(ZstdBufferWithSegments, dataSize),
311 READONLY, "total size of the buffer in bytes" },
311 READONLY, "total size of the buffer in bytes" },
312 { NULL }
312 { NULL }
313 };
313 };
314
314
315 PyTypeObject ZstdBufferWithSegmentsType = {
315 PyTypeObject ZstdBufferWithSegmentsType = {
316 PyVarObject_HEAD_INIT(NULL, 0)
316 PyVarObject_HEAD_INIT(NULL, 0)
317 "zstd.BufferWithSegments", /* tp_name */
317 "zstd.BufferWithSegments", /* tp_name */
318 sizeof(ZstdBufferWithSegments),/* tp_basicsize */
318 sizeof(ZstdBufferWithSegments),/* tp_basicsize */
319 0, /* tp_itemsize */
319 0, /* tp_itemsize */
320 (destructor)BufferWithSegments_dealloc, /* tp_dealloc */
320 (destructor)BufferWithSegments_dealloc, /* tp_dealloc */
321 0, /* tp_print */
321 0, /* tp_print */
322 0, /* tp_getattr */
322 0, /* tp_getattr */
323 0, /* tp_setattr */
323 0, /* tp_setattr */
324 0, /* tp_compare */
324 0, /* tp_compare */
325 0, /* tp_repr */
325 0, /* tp_repr */
326 0, /* tp_as_number */
326 0, /* tp_as_number */
327 &BufferWithSegments_sq, /* tp_as_sequence */
327 &BufferWithSegments_sq, /* tp_as_sequence */
328 0, /* tp_as_mapping */
328 0, /* tp_as_mapping */
329 0, /* tp_hash */
329 0, /* tp_hash */
330 0, /* tp_call */
330 0, /* tp_call */
331 0, /* tp_str */
331 0, /* tp_str */
332 0, /* tp_getattro */
332 0, /* tp_getattro */
333 0, /* tp_setattro */
333 0, /* tp_setattro */
334 &BufferWithSegments_as_buffer, /* tp_as_buffer */
334 &BufferWithSegments_as_buffer, /* tp_as_buffer */
335 Py_TPFLAGS_DEFAULT, /* tp_flags */
335 Py_TPFLAGS_DEFAULT, /* tp_flags */
336 BufferWithSegments__doc__, /* tp_doc */
336 BufferWithSegments__doc__, /* tp_doc */
337 0, /* tp_traverse */
337 0, /* tp_traverse */
338 0, /* tp_clear */
338 0, /* tp_clear */
339 0, /* tp_richcompare */
339 0, /* tp_richcompare */
340 0, /* tp_weaklistoffset */
340 0, /* tp_weaklistoffset */
341 0, /* tp_iter */
341 0, /* tp_iter */
342 0, /* tp_iternext */
342 0, /* tp_iternext */
343 BufferWithSegments_methods, /* tp_methods */
343 BufferWithSegments_methods, /* tp_methods */
344 BufferWithSegments_members, /* tp_members */
344 BufferWithSegments_members, /* tp_members */
345 0, /* tp_getset */
345 0, /* tp_getset */
346 0, /* tp_base */
346 0, /* tp_base */
347 0, /* tp_dict */
347 0, /* tp_dict */
348 0, /* tp_descr_get */
348 0, /* tp_descr_get */
349 0, /* tp_descr_set */
349 0, /* tp_descr_set */
350 0, /* tp_dictoffset */
350 0, /* tp_dictoffset */
351 (initproc)BufferWithSegments_init, /* tp_init */
351 (initproc)BufferWithSegments_init, /* tp_init */
352 0, /* tp_alloc */
352 0, /* tp_alloc */
353 PyType_GenericNew, /* tp_new */
353 PyType_GenericNew, /* tp_new */
354 };
354 };
355
355
356 PyDoc_STRVAR(BufferSegments__doc__,
356 PyDoc_STRVAR(BufferSegments__doc__,
357 "BufferSegments - Represents segments/offsets within a BufferWithSegments\n"
357 "BufferSegments - Represents segments/offsets within a BufferWithSegments\n"
358 );
358 );
359
359
360 static void BufferSegments_dealloc(ZstdBufferSegments* self) {
360 static void BufferSegments_dealloc(ZstdBufferSegments* self) {
361 Py_CLEAR(self->parent);
361 Py_CLEAR(self->parent);
362 PyObject_Del(self);
362 PyObject_Del(self);
363 }
363 }
364
364
365 #if PY_MAJOR_VERSION >= 3
365 #if PY_MAJOR_VERSION >= 3
366 static int BufferSegments_getbuffer(ZstdBufferSegments* self, Py_buffer* view, int flags) {
366 static int BufferSegments_getbuffer(ZstdBufferSegments* self, Py_buffer* view, int flags) {
367 return PyBuffer_FillInfo(view, (PyObject*)self,
367 return PyBuffer_FillInfo(view, (PyObject*)self,
368 (void*)self->segments, self->segmentCount * sizeof(BufferSegment),
368 (void*)self->segments, self->segmentCount * sizeof(BufferSegment),
369 1, flags);
369 1, flags);
370 }
370 }
371 #else
371 #else
372 static Py_ssize_t BufferSegments_getreadbuffer(ZstdBufferSegments* self, Py_ssize_t segment, void **ptrptr) {
372 static Py_ssize_t BufferSegments_getreadbuffer(ZstdBufferSegments* self, Py_ssize_t segment, void **ptrptr) {
373 if (segment != 0) {
373 if (segment != 0) {
374 PyErr_SetString(PyExc_ValueError, "segment number must be 0");
374 PyErr_SetString(PyExc_ValueError, "segment number must be 0");
375 return -1;
375 return -1;
376 }
376 }
377
377
378 *ptrptr = (void*)self->segments;
378 *ptrptr = (void*)self->segments;
379 return self->segmentCount * sizeof(BufferSegment);
379 return self->segmentCount * sizeof(BufferSegment);
380 }
380 }
381
381
382 static Py_ssize_t BufferSegments_getsegcount(ZstdBufferSegments* self, Py_ssize_t* len) {
382 static Py_ssize_t BufferSegments_getsegcount(ZstdBufferSegments* self, Py_ssize_t* len) {
383 if (len) {
383 if (len) {
384 *len = 1;
384 *len = 1;
385 }
385 }
386
386
387 return 1;
387 return 1;
388 }
388 }
389 #endif
389 #endif
390
390
391 static PyBufferProcs BufferSegments_as_buffer = {
391 static PyBufferProcs BufferSegments_as_buffer = {
392 #if PY_MAJOR_VERSION >= 3
392 #if PY_MAJOR_VERSION >= 3
393 (getbufferproc)BufferSegments_getbuffer,
393 (getbufferproc)BufferSegments_getbuffer,
394 0
394 0
395 #else
395 #else
396 (readbufferproc)BufferSegments_getreadbuffer,
396 (readbufferproc)BufferSegments_getreadbuffer,
397 0,
397 0,
398 (segcountproc)BufferSegments_getsegcount,
398 (segcountproc)BufferSegments_getsegcount,
399 0
399 0
400 #endif
400 #endif
401 };
401 };
402
402
403 PyTypeObject ZstdBufferSegmentsType = {
403 PyTypeObject ZstdBufferSegmentsType = {
404 PyVarObject_HEAD_INIT(NULL, 0)
404 PyVarObject_HEAD_INIT(NULL, 0)
405 "zstd.BufferSegments", /* tp_name */
405 "zstd.BufferSegments", /* tp_name */
406 sizeof(ZstdBufferSegments),/* tp_basicsize */
406 sizeof(ZstdBufferSegments),/* tp_basicsize */
407 0, /* tp_itemsize */
407 0, /* tp_itemsize */
408 (destructor)BufferSegments_dealloc, /* tp_dealloc */
408 (destructor)BufferSegments_dealloc, /* tp_dealloc */
409 0, /* tp_print */
409 0, /* tp_print */
410 0, /* tp_getattr */
410 0, /* tp_getattr */
411 0, /* tp_setattr */
411 0, /* tp_setattr */
412 0, /* tp_compare */
412 0, /* tp_compare */
413 0, /* tp_repr */
413 0, /* tp_repr */
414 0, /* tp_as_number */
414 0, /* tp_as_number */
415 0, /* tp_as_sequence */
415 0, /* tp_as_sequence */
416 0, /* tp_as_mapping */
416 0, /* tp_as_mapping */
417 0, /* tp_hash */
417 0, /* tp_hash */
418 0, /* tp_call */
418 0, /* tp_call */
419 0, /* tp_str */
419 0, /* tp_str */
420 0, /* tp_getattro */
420 0, /* tp_getattro */
421 0, /* tp_setattro */
421 0, /* tp_setattro */
422 &BufferSegments_as_buffer, /* tp_as_buffer */
422 &BufferSegments_as_buffer, /* tp_as_buffer */
423 Py_TPFLAGS_DEFAULT, /* tp_flags */
423 Py_TPFLAGS_DEFAULT, /* tp_flags */
424 BufferSegments__doc__, /* tp_doc */
424 BufferSegments__doc__, /* tp_doc */
425 0, /* tp_traverse */
425 0, /* tp_traverse */
426 0, /* tp_clear */
426 0, /* tp_clear */
427 0, /* tp_richcompare */
427 0, /* tp_richcompare */
428 0, /* tp_weaklistoffset */
428 0, /* tp_weaklistoffset */
429 0, /* tp_iter */
429 0, /* tp_iter */
430 0, /* tp_iternext */
430 0, /* tp_iternext */
431 0, /* tp_methods */
431 0, /* tp_methods */
432 0, /* tp_members */
432 0, /* tp_members */
433 0, /* tp_getset */
433 0, /* tp_getset */
434 0, /* tp_base */
434 0, /* tp_base */
435 0, /* tp_dict */
435 0, /* tp_dict */
436 0, /* tp_descr_get */
436 0, /* tp_descr_get */
437 0, /* tp_descr_set */
437 0, /* tp_descr_set */
438 0, /* tp_dictoffset */
438 0, /* tp_dictoffset */
439 0, /* tp_init */
439 0, /* tp_init */
440 0, /* tp_alloc */
440 0, /* tp_alloc */
441 PyType_GenericNew, /* tp_new */
441 PyType_GenericNew, /* tp_new */
442 };
442 };
443
443
444 PyDoc_STRVAR(BufferSegment__doc__,
444 PyDoc_STRVAR(BufferSegment__doc__,
445 "BufferSegment - Represents a segment within a BufferWithSegments\n"
445 "BufferSegment - Represents a segment within a BufferWithSegments\n"
446 );
446 );
447
447
448 static void BufferSegment_dealloc(ZstdBufferSegment* self) {
448 static void BufferSegment_dealloc(ZstdBufferSegment* self) {
449 Py_CLEAR(self->parent);
449 Py_CLEAR(self->parent);
450 PyObject_Del(self);
450 PyObject_Del(self);
451 }
451 }
452
452
453 static Py_ssize_t BufferSegment_length(ZstdBufferSegment* self) {
453 static Py_ssize_t BufferSegment_length(ZstdBufferSegment* self) {
454 return self->dataSize;
454 return self->dataSize;
455 }
455 }
456
456
457 #if PY_MAJOR_VERSION >= 3
457 #if PY_MAJOR_VERSION >= 3
458 static int BufferSegment_getbuffer(ZstdBufferSegment* self, Py_buffer* view, int flags) {
458 static int BufferSegment_getbuffer(ZstdBufferSegment* self, Py_buffer* view, int flags) {
459 return PyBuffer_FillInfo(view, (PyObject*)self,
459 return PyBuffer_FillInfo(view, (PyObject*)self,
460 self->data, self->dataSize, 1, flags);
460 self->data, self->dataSize, 1, flags);
461 }
461 }
462 #else
462 #else
463 static Py_ssize_t BufferSegment_getreadbuffer(ZstdBufferSegment* self, Py_ssize_t segment, void **ptrptr) {
463 static Py_ssize_t BufferSegment_getreadbuffer(ZstdBufferSegment* self, Py_ssize_t segment, void **ptrptr) {
464 if (segment != 0) {
464 if (segment != 0) {
465 PyErr_SetString(PyExc_ValueError, "segment number must be 0");
465 PyErr_SetString(PyExc_ValueError, "segment number must be 0");
466 return -1;
466 return -1;
467 }
467 }
468
468
469 *ptrptr = self->data;
469 *ptrptr = self->data;
470 return self->dataSize;
470 return self->dataSize;
471 }
471 }
472
472
473 static Py_ssize_t BufferSegment_getsegcount(ZstdBufferSegment* self, Py_ssize_t* len) {
473 static Py_ssize_t BufferSegment_getsegcount(ZstdBufferSegment* self, Py_ssize_t* len) {
474 if (len) {
474 if (len) {
475 *len = 1;
475 *len = 1;
476 }
476 }
477
477
478 return 1;
478 return 1;
479 }
479 }
480 #endif
480 #endif
481
481
482 PyDoc_STRVAR(BufferSegment_tobytes__doc__,
482 PyDoc_STRVAR(BufferSegment_tobytes__doc__,
483 "Obtain a bytes instance for this segment.\n"
483 "Obtain a bytes instance for this segment.\n"
484 );
484 );
485
485
486 static PyObject* BufferSegment_tobytes(ZstdBufferSegment* self) {
486 static PyObject* BufferSegment_tobytes(ZstdBufferSegment* self) {
487 return PyBytes_FromStringAndSize(self->data, self->dataSize);
487 return PyBytes_FromStringAndSize(self->data, self->dataSize);
488 }
488 }
489
489
490 static PySequenceMethods BufferSegment_sq = {
490 static PySequenceMethods BufferSegment_sq = {
491 (lenfunc)BufferSegment_length, /* sq_length */
491 (lenfunc)BufferSegment_length, /* sq_length */
492 0, /* sq_concat */
492 0, /* sq_concat */
493 0, /* sq_repeat */
493 0, /* sq_repeat */
494 0, /* sq_item */
494 0, /* sq_item */
495 0, /* sq_ass_item */
495 0, /* sq_ass_item */
496 0, /* sq_contains */
496 0, /* sq_contains */
497 0, /* sq_inplace_concat */
497 0, /* sq_inplace_concat */
498 0 /* sq_inplace_repeat */
498 0 /* sq_inplace_repeat */
499 };
499 };
500
500
501 static PyBufferProcs BufferSegment_as_buffer = {
501 static PyBufferProcs BufferSegment_as_buffer = {
502 #if PY_MAJOR_VERSION >= 3
502 #if PY_MAJOR_VERSION >= 3
503 (getbufferproc)BufferSegment_getbuffer,
503 (getbufferproc)BufferSegment_getbuffer,
504 0
504 0
505 #else
505 #else
506 (readbufferproc)BufferSegment_getreadbuffer,
506 (readbufferproc)BufferSegment_getreadbuffer,
507 0,
507 0,
508 (segcountproc)BufferSegment_getsegcount,
508 (segcountproc)BufferSegment_getsegcount,
509 0
509 0
510 #endif
510 #endif
511 };
511 };
512
512
513 static PyMethodDef BufferSegment_methods[] = {
513 static PyMethodDef BufferSegment_methods[] = {
514 { "tobytes", (PyCFunction)BufferSegment_tobytes,
514 { "tobytes", (PyCFunction)BufferSegment_tobytes,
515 METH_NOARGS, BufferSegment_tobytes__doc__ },
515 METH_NOARGS, BufferSegment_tobytes__doc__ },
516 { NULL, NULL }
516 { NULL, NULL }
517 };
517 };
518
518
519 static PyMemberDef BufferSegment_members[] = {
519 static PyMemberDef BufferSegment_members[] = {
520 { "offset", T_ULONGLONG, offsetof(ZstdBufferSegment, offset), READONLY,
520 { "offset", T_ULONGLONG, offsetof(ZstdBufferSegment, offset), READONLY,
521 "offset of segment within parent buffer" },
521 "offset of segment within parent buffer" },
522 { NULL }
522 { NULL }
523 };
523 };
524
524
525 PyTypeObject ZstdBufferSegmentType = {
525 PyTypeObject ZstdBufferSegmentType = {
526 PyVarObject_HEAD_INIT(NULL, 0)
526 PyVarObject_HEAD_INIT(NULL, 0)
527 "zstd.BufferSegment", /* tp_name */
527 "zstd.BufferSegment", /* tp_name */
528 sizeof(ZstdBufferSegment),/* tp_basicsize */
528 sizeof(ZstdBufferSegment),/* tp_basicsize */
529 0, /* tp_itemsize */
529 0, /* tp_itemsize */
530 (destructor)BufferSegment_dealloc, /* tp_dealloc */
530 (destructor)BufferSegment_dealloc, /* tp_dealloc */
531 0, /* tp_print */
531 0, /* tp_print */
532 0, /* tp_getattr */
532 0, /* tp_getattr */
533 0, /* tp_setattr */
533 0, /* tp_setattr */
534 0, /* tp_compare */
534 0, /* tp_compare */
535 0, /* tp_repr */
535 0, /* tp_repr */
536 0, /* tp_as_number */
536 0, /* tp_as_number */
537 &BufferSegment_sq, /* tp_as_sequence */
537 &BufferSegment_sq, /* tp_as_sequence */
538 0, /* tp_as_mapping */
538 0, /* tp_as_mapping */
539 0, /* tp_hash */
539 0, /* tp_hash */
540 0, /* tp_call */
540 0, /* tp_call */
541 0, /* tp_str */
541 0, /* tp_str */
542 0, /* tp_getattro */
542 0, /* tp_getattro */
543 0, /* tp_setattro */
543 0, /* tp_setattro */
544 &BufferSegment_as_buffer, /* tp_as_buffer */
544 &BufferSegment_as_buffer, /* tp_as_buffer */
545 Py_TPFLAGS_DEFAULT, /* tp_flags */
545 Py_TPFLAGS_DEFAULT, /* tp_flags */
546 BufferSegment__doc__, /* tp_doc */
546 BufferSegment__doc__, /* tp_doc */
547 0, /* tp_traverse */
547 0, /* tp_traverse */
548 0, /* tp_clear */
548 0, /* tp_clear */
549 0, /* tp_richcompare */
549 0, /* tp_richcompare */
550 0, /* tp_weaklistoffset */
550 0, /* tp_weaklistoffset */
551 0, /* tp_iter */
551 0, /* tp_iter */
552 0, /* tp_iternext */
552 0, /* tp_iternext */
553 BufferSegment_methods, /* tp_methods */
553 BufferSegment_methods, /* tp_methods */
554 BufferSegment_members, /* tp_members */
554 BufferSegment_members, /* tp_members */
555 0, /* tp_getset */
555 0, /* tp_getset */
556 0, /* tp_base */
556 0, /* tp_base */
557 0, /* tp_dict */
557 0, /* tp_dict */
558 0, /* tp_descr_get */
558 0, /* tp_descr_get */
559 0, /* tp_descr_set */
559 0, /* tp_descr_set */
560 0, /* tp_dictoffset */
560 0, /* tp_dictoffset */
561 0, /* tp_init */
561 0, /* tp_init */
562 0, /* tp_alloc */
562 0, /* tp_alloc */
563 PyType_GenericNew, /* tp_new */
563 PyType_GenericNew, /* tp_new */
564 };
564 };
565
565
566 PyDoc_STRVAR(BufferWithSegmentsCollection__doc__,
566 PyDoc_STRVAR(BufferWithSegmentsCollection__doc__,
567 "Represents a collection of BufferWithSegments.\n"
567 "Represents a collection of BufferWithSegments.\n"
568 );
568 );
569
569
570 static void BufferWithSegmentsCollection_dealloc(ZstdBufferWithSegmentsCollection* self) {
570 static void BufferWithSegmentsCollection_dealloc(ZstdBufferWithSegmentsCollection* self) {
571 Py_ssize_t i;
571 Py_ssize_t i;
572
572
573 if (self->firstElements) {
573 if (self->firstElements) {
574 PyMem_Free(self->firstElements);
574 PyMem_Free(self->firstElements);
575 self->firstElements = NULL;
575 self->firstElements = NULL;
576 }
576 }
577
577
578 if (self->buffers) {
578 if (self->buffers) {
579 for (i = 0; i < self->bufferCount; i++) {
579 for (i = 0; i < self->bufferCount; i++) {
580 Py_CLEAR(self->buffers[i]);
580 Py_CLEAR(self->buffers[i]);
581 }
581 }
582
582
583 PyMem_Free(self->buffers);
583 PyMem_Free(self->buffers);
584 self->buffers = NULL;
584 self->buffers = NULL;
585 }
585 }
586
586
587 PyObject_Del(self);
587 PyObject_Del(self);
588 }
588 }
589
589
590 static int BufferWithSegmentsCollection_init(ZstdBufferWithSegmentsCollection* self, PyObject* args) {
590 static int BufferWithSegmentsCollection_init(ZstdBufferWithSegmentsCollection* self, PyObject* args) {
591 Py_ssize_t size;
591 Py_ssize_t size;
592 Py_ssize_t i;
592 Py_ssize_t i;
593 Py_ssize_t offset = 0;
593 Py_ssize_t offset = 0;
594
594
595 size = PyTuple_Size(args);
595 size = PyTuple_Size(args);
596 if (-1 == size) {
596 if (-1 == size) {
597 return -1;
597 return -1;
598 }
598 }
599
599
600 if (0 == size) {
600 if (0 == size) {
601 PyErr_SetString(PyExc_ValueError, "must pass at least 1 argument");
601 PyErr_SetString(PyExc_ValueError, "must pass at least 1 argument");
602 return -1;
602 return -1;
603 }
603 }
604
604
605 for (i = 0; i < size; i++) {
605 for (i = 0; i < size; i++) {
606 PyObject* item = PyTuple_GET_ITEM(args, i);
606 PyObject* item = PyTuple_GET_ITEM(args, i);
607 if (!PyObject_TypeCheck(item, &ZstdBufferWithSegmentsType)) {
607 if (!PyObject_TypeCheck(item, &ZstdBufferWithSegmentsType)) {
608 PyErr_SetString(PyExc_TypeError, "arguments must be BufferWithSegments instances");
608 PyErr_SetString(PyExc_TypeError, "arguments must be BufferWithSegments instances");
609 return -1;
609 return -1;
610 }
610 }
611
611
612 if (0 == ((ZstdBufferWithSegments*)item)->segmentCount ||
612 if (0 == ((ZstdBufferWithSegments*)item)->segmentCount ||
613 0 == ((ZstdBufferWithSegments*)item)->dataSize) {
613 0 == ((ZstdBufferWithSegments*)item)->dataSize) {
614 PyErr_SetString(PyExc_ValueError, "ZstdBufferWithSegments cannot be empty");
614 PyErr_SetString(PyExc_ValueError, "ZstdBufferWithSegments cannot be empty");
615 return -1;
615 return -1;
616 }
616 }
617 }
617 }
618
618
619 self->buffers = PyMem_Malloc(size * sizeof(ZstdBufferWithSegments*));
619 self->buffers = PyMem_Malloc(size * sizeof(ZstdBufferWithSegments*));
620 if (NULL == self->buffers) {
620 if (NULL == self->buffers) {
621 PyErr_NoMemory();
621 PyErr_NoMemory();
622 return -1;
622 return -1;
623 }
623 }
624
624
625 self->firstElements = PyMem_Malloc(size * sizeof(Py_ssize_t));
625 self->firstElements = PyMem_Malloc(size * sizeof(Py_ssize_t));
626 if (NULL == self->firstElements) {
626 if (NULL == self->firstElements) {
627 PyMem_Free(self->buffers);
627 PyMem_Free(self->buffers);
628 self->buffers = NULL;
628 self->buffers = NULL;
629 PyErr_NoMemory();
629 PyErr_NoMemory();
630 return -1;
630 return -1;
631 }
631 }
632
632
633 self->bufferCount = size;
633 self->bufferCount = size;
634
634
635 for (i = 0; i < size; i++) {
635 for (i = 0; i < size; i++) {
636 ZstdBufferWithSegments* item = (ZstdBufferWithSegments*)PyTuple_GET_ITEM(args, i);
636 ZstdBufferWithSegments* item = (ZstdBufferWithSegments*)PyTuple_GET_ITEM(args, i);
637
637
638 self->buffers[i] = item;
638 self->buffers[i] = item;
639 Py_INCREF(item);
639 Py_INCREF(item);
640
640
641 if (i > 0) {
641 if (i > 0) {
642 self->firstElements[i - 1] = offset;
642 self->firstElements[i - 1] = offset;
643 }
643 }
644
644
645 offset += item->segmentCount;
645 offset += item->segmentCount;
646 }
646 }
647
647
648 self->firstElements[size - 1] = offset;
648 self->firstElements[size - 1] = offset;
649
649
650 return 0;
650 return 0;
651 }
651 }
652
652
653 static PyObject* BufferWithSegmentsCollection_size(ZstdBufferWithSegmentsCollection* self) {
653 static PyObject* BufferWithSegmentsCollection_size(ZstdBufferWithSegmentsCollection* self) {
654 Py_ssize_t i;
654 Py_ssize_t i;
655 Py_ssize_t j;
655 Py_ssize_t j;
656 unsigned long long size = 0;
656 unsigned long long size = 0;
657
657
658 for (i = 0; i < self->bufferCount; i++) {
658 for (i = 0; i < self->bufferCount; i++) {
659 for (j = 0; j < self->buffers[i]->segmentCount; j++) {
659 for (j = 0; j < self->buffers[i]->segmentCount; j++) {
660 size += self->buffers[i]->segments[j].length;
660 size += self->buffers[i]->segments[j].length;
661 }
661 }
662 }
662 }
663
663
664 return PyLong_FromUnsignedLongLong(size);
664 return PyLong_FromUnsignedLongLong(size);
665 }
665 }
666
666
667 Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection* self) {
667 Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection* self) {
668 return self->firstElements[self->bufferCount - 1];
668 return self->firstElements[self->bufferCount - 1];
669 }
669 }
670
670
671 static ZstdBufferSegment* BufferWithSegmentsCollection_item(ZstdBufferWithSegmentsCollection* self, Py_ssize_t i) {
671 static ZstdBufferSegment* BufferWithSegmentsCollection_item(ZstdBufferWithSegmentsCollection* self, Py_ssize_t i) {
672 Py_ssize_t bufferOffset;
672 Py_ssize_t bufferOffset;
673
673
674 if (i < 0) {
674 if (i < 0) {
675 PyErr_SetString(PyExc_IndexError, "offset must be non-negative");
675 PyErr_SetString(PyExc_IndexError, "offset must be non-negative");
676 return NULL;
676 return NULL;
677 }
677 }
678
678
679 if (i >= BufferWithSegmentsCollection_length(self)) {
679 if (i >= BufferWithSegmentsCollection_length(self)) {
680 PyErr_Format(PyExc_IndexError, "offset must be less than %zd",
680 PyErr_Format(PyExc_IndexError, "offset must be less than %zd",
681 BufferWithSegmentsCollection_length(self));
681 BufferWithSegmentsCollection_length(self));
682 return NULL;
682 return NULL;
683 }
683 }
684
684
685 for (bufferOffset = 0; bufferOffset < self->bufferCount; bufferOffset++) {
685 for (bufferOffset = 0; bufferOffset < self->bufferCount; bufferOffset++) {
686 Py_ssize_t offset = 0;
686 Py_ssize_t offset = 0;
687
687
688 if (i < self->firstElements[bufferOffset]) {
688 if (i < self->firstElements[bufferOffset]) {
689 if (bufferOffset > 0) {
689 if (bufferOffset > 0) {
690 offset = self->firstElements[bufferOffset - 1];
690 offset = self->firstElements[bufferOffset - 1];
691 }
691 }
692
692
693 return BufferWithSegments_item(self->buffers[bufferOffset], i - offset);
693 return BufferWithSegments_item(self->buffers[bufferOffset], i - offset);
694 }
694 }
695 }
695 }
696
696
697 PyErr_SetString(ZstdError, "error resolving segment; this should not happen");
697 PyErr_SetString(ZstdError, "error resolving segment; this should not happen");
698 return NULL;
698 return NULL;
699 }
699 }
700
700
701 static PySequenceMethods BufferWithSegmentsCollection_sq = {
701 static PySequenceMethods BufferWithSegmentsCollection_sq = {
702 (lenfunc)BufferWithSegmentsCollection_length, /* sq_length */
702 (lenfunc)BufferWithSegmentsCollection_length, /* sq_length */
703 0, /* sq_concat */
703 0, /* sq_concat */
704 0, /* sq_repeat */
704 0, /* sq_repeat */
705 (ssizeargfunc)BufferWithSegmentsCollection_item, /* sq_item */
705 (ssizeargfunc)BufferWithSegmentsCollection_item, /* sq_item */
706 0, /* sq_ass_item */
706 0, /* sq_ass_item */
707 0, /* sq_contains */
707 0, /* sq_contains */
708 0, /* sq_inplace_concat */
708 0, /* sq_inplace_concat */
709 0 /* sq_inplace_repeat */
709 0 /* sq_inplace_repeat */
710 };
710 };
711
711
712 static PyMethodDef BufferWithSegmentsCollection_methods[] = {
712 static PyMethodDef BufferWithSegmentsCollection_methods[] = {
713 { "size", (PyCFunction)BufferWithSegmentsCollection_size,
713 { "size", (PyCFunction)BufferWithSegmentsCollection_size,
714 METH_NOARGS, PyDoc_STR("total size in bytes of all segments") },
714 METH_NOARGS, PyDoc_STR("total size in bytes of all segments") },
715 { NULL, NULL }
715 { NULL, NULL }
716 };
716 };
717
717
718 PyTypeObject ZstdBufferWithSegmentsCollectionType = {
718 PyTypeObject ZstdBufferWithSegmentsCollectionType = {
719 PyVarObject_HEAD_INIT(NULL, 0)
719 PyVarObject_HEAD_INIT(NULL, 0)
720 "zstd.BufferWithSegmentsCollection", /* tp_name */
720 "zstd.BufferWithSegmentsCollection", /* tp_name */
721 sizeof(ZstdBufferWithSegmentsCollection),/* tp_basicsize */
721 sizeof(ZstdBufferWithSegmentsCollection),/* tp_basicsize */
722 0, /* tp_itemsize */
722 0, /* tp_itemsize */
723 (destructor)BufferWithSegmentsCollection_dealloc, /* tp_dealloc */
723 (destructor)BufferWithSegmentsCollection_dealloc, /* tp_dealloc */
724 0, /* tp_print */
724 0, /* tp_print */
725 0, /* tp_getattr */
725 0, /* tp_getattr */
726 0, /* tp_setattr */
726 0, /* tp_setattr */
727 0, /* tp_compare */
727 0, /* tp_compare */
728 0, /* tp_repr */
728 0, /* tp_repr */
729 0, /* tp_as_number */
729 0, /* tp_as_number */
730 &BufferWithSegmentsCollection_sq, /* tp_as_sequence */
730 &BufferWithSegmentsCollection_sq, /* tp_as_sequence */
731 0, /* tp_as_mapping */
731 0, /* tp_as_mapping */
732 0, /* tp_hash */
732 0, /* tp_hash */
733 0, /* tp_call */
733 0, /* tp_call */
734 0, /* tp_str */
734 0, /* tp_str */
735 0, /* tp_getattro */
735 0, /* tp_getattro */
736 0, /* tp_setattro */
736 0, /* tp_setattro */
737 0, /* tp_as_buffer */
737 0, /* tp_as_buffer */
738 Py_TPFLAGS_DEFAULT, /* tp_flags */
738 Py_TPFLAGS_DEFAULT, /* tp_flags */
739 BufferWithSegmentsCollection__doc__, /* tp_doc */
739 BufferWithSegmentsCollection__doc__, /* tp_doc */
740 0, /* tp_traverse */
740 0, /* tp_traverse */
741 0, /* tp_clear */
741 0, /* tp_clear */
742 0, /* tp_richcompare */
742 0, /* tp_richcompare */
743 0, /* tp_weaklistoffset */
743 0, /* tp_weaklistoffset */
744 /* TODO implement iterator for performance. */
744 /* TODO implement iterator for performance. */
745 0, /* tp_iter */
745 0, /* tp_iter */
746 0, /* tp_iternext */
746 0, /* tp_iternext */
747 BufferWithSegmentsCollection_methods, /* tp_methods */
747 BufferWithSegmentsCollection_methods, /* tp_methods */
748 0, /* tp_members */
748 0, /* tp_members */
749 0, /* tp_getset */
749 0, /* tp_getset */
750 0, /* tp_base */
750 0, /* tp_base */
751 0, /* tp_dict */
751 0, /* tp_dict */
752 0, /* tp_descr_get */
752 0, /* tp_descr_get */
753 0, /* tp_descr_set */
753 0, /* tp_descr_set */
754 0, /* tp_dictoffset */
754 0, /* tp_dictoffset */
755 (initproc)BufferWithSegmentsCollection_init, /* tp_init */
755 (initproc)BufferWithSegmentsCollection_init, /* tp_init */
756 0, /* tp_alloc */
756 0, /* tp_alloc */
757 PyType_GenericNew, /* tp_new */
757 PyType_GenericNew, /* tp_new */
758 };
758 };
759
759
760 void bufferutil_module_init(PyObject* mod) {
760 void bufferutil_module_init(PyObject* mod) {
761 Py_TYPE(&ZstdBufferWithSegmentsType) = &PyType_Type;
761 Py_SET_TYPE(&ZstdBufferWithSegmentsType, &PyType_Type);
762 if (PyType_Ready(&ZstdBufferWithSegmentsType) < 0) {
762 if (PyType_Ready(&ZstdBufferWithSegmentsType) < 0) {
763 return;
763 return;
764 }
764 }
765
765
766 Py_INCREF(&ZstdBufferWithSegmentsType);
766 Py_INCREF(&ZstdBufferWithSegmentsType);
767 PyModule_AddObject(mod, "BufferWithSegments", (PyObject*)&ZstdBufferWithSegmentsType);
767 PyModule_AddObject(mod, "BufferWithSegments", (PyObject*)&ZstdBufferWithSegmentsType);
768
768
769 Py_TYPE(&ZstdBufferSegmentsType) = &PyType_Type;
769 Py_SET_TYPE(&ZstdBufferSegmentsType, &PyType_Type);
770 if (PyType_Ready(&ZstdBufferSegmentsType) < 0) {
770 if (PyType_Ready(&ZstdBufferSegmentsType) < 0) {
771 return;
771 return;
772 }
772 }
773
773
774 Py_INCREF(&ZstdBufferSegmentsType);
774 Py_INCREF(&ZstdBufferSegmentsType);
775 PyModule_AddObject(mod, "BufferSegments", (PyObject*)&ZstdBufferSegmentsType);
775 PyModule_AddObject(mod, "BufferSegments", (PyObject*)&ZstdBufferSegmentsType);
776
776
777 Py_TYPE(&ZstdBufferSegmentType) = &PyType_Type;
777 Py_SET_TYPE(&ZstdBufferSegmentType, &PyType_Type);
778 if (PyType_Ready(&ZstdBufferSegmentType) < 0) {
778 if (PyType_Ready(&ZstdBufferSegmentType) < 0) {
779 return;
779 return;
780 }
780 }
781
781
782 Py_INCREF(&ZstdBufferSegmentType);
782 Py_INCREF(&ZstdBufferSegmentType);
783 PyModule_AddObject(mod, "BufferSegment", (PyObject*)&ZstdBufferSegmentType);
783 PyModule_AddObject(mod, "BufferSegment", (PyObject*)&ZstdBufferSegmentType);
784
784
785 Py_TYPE(&ZstdBufferWithSegmentsCollectionType) = &PyType_Type;
785 Py_SET_TYPE(&ZstdBufferWithSegmentsCollectionType, &PyType_Type);
786 if (PyType_Ready(&ZstdBufferWithSegmentsCollectionType) < 0) {
786 if (PyType_Ready(&ZstdBufferWithSegmentsCollectionType) < 0) {
787 return;
787 return;
788 }
788 }
789
789
790 Py_INCREF(&ZstdBufferWithSegmentsCollectionType);
790 Py_INCREF(&ZstdBufferWithSegmentsCollectionType);
791 PyModule_AddObject(mod, "BufferWithSegmentsCollection", (PyObject*)&ZstdBufferWithSegmentsCollectionType);
791 PyModule_AddObject(mod, "BufferWithSegmentsCollection", (PyObject*)&ZstdBufferWithSegmentsCollectionType);
792 }
792 }
@@ -1,360 +1,360 b''
1 /**
1 /**
2 * Copyright (c) 2018-present, Gregory Szorc
2 * Copyright (c) 2018-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 PyDoc_STRVAR(ZstdCompressionChunkerIterator__doc__,
13 PyDoc_STRVAR(ZstdCompressionChunkerIterator__doc__,
14 "Iterator of output chunks from ZstdCompressionChunker.\n"
14 "Iterator of output chunks from ZstdCompressionChunker.\n"
15 );
15 );
16
16
17 static void ZstdCompressionChunkerIterator_dealloc(ZstdCompressionChunkerIterator* self) {
17 static void ZstdCompressionChunkerIterator_dealloc(ZstdCompressionChunkerIterator* self) {
18 Py_XDECREF(self->chunker);
18 Py_XDECREF(self->chunker);
19
19
20 PyObject_Del(self);
20 PyObject_Del(self);
21 }
21 }
22
22
23 static PyObject* ZstdCompressionChunkerIterator_iter(PyObject* self) {
23 static PyObject* ZstdCompressionChunkerIterator_iter(PyObject* self) {
24 Py_INCREF(self);
24 Py_INCREF(self);
25 return self;
25 return self;
26 }
26 }
27
27
28 static PyObject* ZstdCompressionChunkerIterator_iternext(ZstdCompressionChunkerIterator* self) {
28 static PyObject* ZstdCompressionChunkerIterator_iternext(ZstdCompressionChunkerIterator* self) {
29 size_t zresult;
29 size_t zresult;
30 PyObject* chunk;
30 PyObject* chunk;
31 ZstdCompressionChunker* chunker = self->chunker;
31 ZstdCompressionChunker* chunker = self->chunker;
32 ZSTD_EndDirective zFlushMode;
32 ZSTD_EndDirective zFlushMode;
33
33
34 if (self->mode != compressionchunker_mode_normal && chunker->input.pos != chunker->input.size) {
34 if (self->mode != compressionchunker_mode_normal && chunker->input.pos != chunker->input.size) {
35 PyErr_SetString(ZstdError, "input should have been fully consumed before calling flush() or finish()");
35 PyErr_SetString(ZstdError, "input should have been fully consumed before calling flush() or finish()");
36 return NULL;
36 return NULL;
37 }
37 }
38
38
39 if (chunker->finished) {
39 if (chunker->finished) {
40 return NULL;
40 return NULL;
41 }
41 }
42
42
43 /* If we have data left in the input, consume it. */
43 /* If we have data left in the input, consume it. */
44 while (chunker->input.pos < chunker->input.size) {
44 while (chunker->input.pos < chunker->input.size) {
45 Py_BEGIN_ALLOW_THREADS
45 Py_BEGIN_ALLOW_THREADS
46 zresult = ZSTD_compressStream2(chunker->compressor->cctx, &chunker->output,
46 zresult = ZSTD_compressStream2(chunker->compressor->cctx, &chunker->output,
47 &chunker->input, ZSTD_e_continue);
47 &chunker->input, ZSTD_e_continue);
48 Py_END_ALLOW_THREADS
48 Py_END_ALLOW_THREADS
49
49
50 /* Input is fully consumed. */
50 /* Input is fully consumed. */
51 if (chunker->input.pos == chunker->input.size) {
51 if (chunker->input.pos == chunker->input.size) {
52 chunker->input.src = NULL;
52 chunker->input.src = NULL;
53 chunker->input.pos = 0;
53 chunker->input.pos = 0;
54 chunker->input.size = 0;
54 chunker->input.size = 0;
55 PyBuffer_Release(&chunker->inBuffer);
55 PyBuffer_Release(&chunker->inBuffer);
56 }
56 }
57
57
58 if (ZSTD_isError(zresult)) {
58 if (ZSTD_isError(zresult)) {
59 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
59 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
60 return NULL;
60 return NULL;
61 }
61 }
62
62
63 /* If it produced a full output chunk, emit it. */
63 /* If it produced a full output chunk, emit it. */
64 if (chunker->output.pos == chunker->output.size) {
64 if (chunker->output.pos == chunker->output.size) {
65 chunk = PyBytes_FromStringAndSize(chunker->output.dst, chunker->output.pos);
65 chunk = PyBytes_FromStringAndSize(chunker->output.dst, chunker->output.pos);
66 if (!chunk) {
66 if (!chunk) {
67 return NULL;
67 return NULL;
68 }
68 }
69
69
70 chunker->output.pos = 0;
70 chunker->output.pos = 0;
71
71
72 return chunk;
72 return chunk;
73 }
73 }
74
74
75 /* Else continue to compress available input data. */
75 /* Else continue to compress available input data. */
76 }
76 }
77
77
78 /* We also need this here for the special case of an empty input buffer. */
78 /* We also need this here for the special case of an empty input buffer. */
79 if (chunker->input.pos == chunker->input.size) {
79 if (chunker->input.pos == chunker->input.size) {
80 chunker->input.src = NULL;
80 chunker->input.src = NULL;
81 chunker->input.pos = 0;
81 chunker->input.pos = 0;
82 chunker->input.size = 0;
82 chunker->input.size = 0;
83 PyBuffer_Release(&chunker->inBuffer);
83 PyBuffer_Release(&chunker->inBuffer);
84 }
84 }
85
85
86 /* No more input data. A partial chunk may be in chunker->output.
86 /* No more input data. A partial chunk may be in chunker->output.
87 * If we're in normal compression mode, we're done. Otherwise if we're in
87 * If we're in normal compression mode, we're done. Otherwise if we're in
88 * flush or finish mode, we need to emit what data remains.
88 * flush or finish mode, we need to emit what data remains.
89 */
89 */
90 if (self->mode == compressionchunker_mode_normal) {
90 if (self->mode == compressionchunker_mode_normal) {
91 /* We don't need to set StopIteration. */
91 /* We don't need to set StopIteration. */
92 return NULL;
92 return NULL;
93 }
93 }
94
94
95 if (self->mode == compressionchunker_mode_flush) {
95 if (self->mode == compressionchunker_mode_flush) {
96 zFlushMode = ZSTD_e_flush;
96 zFlushMode = ZSTD_e_flush;
97 }
97 }
98 else if (self->mode == compressionchunker_mode_finish) {
98 else if (self->mode == compressionchunker_mode_finish) {
99 zFlushMode = ZSTD_e_end;
99 zFlushMode = ZSTD_e_end;
100 }
100 }
101 else {
101 else {
102 PyErr_SetString(ZstdError, "unhandled compression mode; this should never happen");
102 PyErr_SetString(ZstdError, "unhandled compression mode; this should never happen");
103 return NULL;
103 return NULL;
104 }
104 }
105
105
106 Py_BEGIN_ALLOW_THREADS
106 Py_BEGIN_ALLOW_THREADS
107 zresult = ZSTD_compressStream2(chunker->compressor->cctx, &chunker->output,
107 zresult = ZSTD_compressStream2(chunker->compressor->cctx, &chunker->output,
108 &chunker->input, zFlushMode);
108 &chunker->input, zFlushMode);
109 Py_END_ALLOW_THREADS
109 Py_END_ALLOW_THREADS
110
110
111 if (ZSTD_isError(zresult)) {
111 if (ZSTD_isError(zresult)) {
112 PyErr_Format(ZstdError, "zstd compress error: %s",
112 PyErr_Format(ZstdError, "zstd compress error: %s",
113 ZSTD_getErrorName(zresult));
113 ZSTD_getErrorName(zresult));
114 return NULL;
114 return NULL;
115 }
115 }
116
116
117 if (!zresult && chunker->output.pos == 0) {
117 if (!zresult && chunker->output.pos == 0) {
118 return NULL;
118 return NULL;
119 }
119 }
120
120
121 chunk = PyBytes_FromStringAndSize(chunker->output.dst, chunker->output.pos);
121 chunk = PyBytes_FromStringAndSize(chunker->output.dst, chunker->output.pos);
122 if (!chunk) {
122 if (!chunk) {
123 return NULL;
123 return NULL;
124 }
124 }
125
125
126 chunker->output.pos = 0;
126 chunker->output.pos = 0;
127
127
128 if (!zresult && self->mode == compressionchunker_mode_finish) {
128 if (!zresult && self->mode == compressionchunker_mode_finish) {
129 chunker->finished = 1;
129 chunker->finished = 1;
130 }
130 }
131
131
132 return chunk;
132 return chunk;
133 }
133 }
134
134
135 PyTypeObject ZstdCompressionChunkerIteratorType = {
135 PyTypeObject ZstdCompressionChunkerIteratorType = {
136 PyVarObject_HEAD_INIT(NULL, 0)
136 PyVarObject_HEAD_INIT(NULL, 0)
137 "zstd.ZstdCompressionChunkerIterator", /* tp_name */
137 "zstd.ZstdCompressionChunkerIterator", /* tp_name */
138 sizeof(ZstdCompressionChunkerIterator), /* tp_basicsize */
138 sizeof(ZstdCompressionChunkerIterator), /* tp_basicsize */
139 0, /* tp_itemsize */
139 0, /* tp_itemsize */
140 (destructor)ZstdCompressionChunkerIterator_dealloc, /* tp_dealloc */
140 (destructor)ZstdCompressionChunkerIterator_dealloc, /* tp_dealloc */
141 0, /* tp_print */
141 0, /* tp_print */
142 0, /* tp_getattr */
142 0, /* tp_getattr */
143 0, /* tp_setattr */
143 0, /* tp_setattr */
144 0, /* tp_compare */
144 0, /* tp_compare */
145 0, /* tp_repr */
145 0, /* tp_repr */
146 0, /* tp_as_number */
146 0, /* tp_as_number */
147 0, /* tp_as_sequence */
147 0, /* tp_as_sequence */
148 0, /* tp_as_mapping */
148 0, /* tp_as_mapping */
149 0, /* tp_hash */
149 0, /* tp_hash */
150 0, /* tp_call */
150 0, /* tp_call */
151 0, /* tp_str */
151 0, /* tp_str */
152 0, /* tp_getattro */
152 0, /* tp_getattro */
153 0, /* tp_setattro */
153 0, /* tp_setattro */
154 0, /* tp_as_buffer */
154 0, /* tp_as_buffer */
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
156 ZstdCompressionChunkerIterator__doc__, /* tp_doc */
156 ZstdCompressionChunkerIterator__doc__, /* tp_doc */
157 0, /* tp_traverse */
157 0, /* tp_traverse */
158 0, /* tp_clear */
158 0, /* tp_clear */
159 0, /* tp_richcompare */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
160 0, /* tp_weaklistoffset */
161 ZstdCompressionChunkerIterator_iter, /* tp_iter */
161 ZstdCompressionChunkerIterator_iter, /* tp_iter */
162 (iternextfunc)ZstdCompressionChunkerIterator_iternext, /* tp_iternext */
162 (iternextfunc)ZstdCompressionChunkerIterator_iternext, /* tp_iternext */
163 0, /* tp_methods */
163 0, /* tp_methods */
164 0, /* tp_members */
164 0, /* tp_members */
165 0, /* tp_getset */
165 0, /* tp_getset */
166 0, /* tp_base */
166 0, /* tp_base */
167 0, /* tp_dict */
167 0, /* tp_dict */
168 0, /* tp_descr_get */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
170 0, /* tp_dictoffset */
171 0, /* tp_init */
171 0, /* tp_init */
172 0, /* tp_alloc */
172 0, /* tp_alloc */
173 PyType_GenericNew, /* tp_new */
173 PyType_GenericNew, /* tp_new */
174 };
174 };
175
175
176 PyDoc_STRVAR(ZstdCompressionChunker__doc__,
176 PyDoc_STRVAR(ZstdCompressionChunker__doc__,
177 "Compress chunks iteratively into exact chunk sizes.\n"
177 "Compress chunks iteratively into exact chunk sizes.\n"
178 );
178 );
179
179
180 static void ZstdCompressionChunker_dealloc(ZstdCompressionChunker* self) {
180 static void ZstdCompressionChunker_dealloc(ZstdCompressionChunker* self) {
181 PyBuffer_Release(&self->inBuffer);
181 PyBuffer_Release(&self->inBuffer);
182 self->input.src = NULL;
182 self->input.src = NULL;
183
183
184 PyMem_Free(self->output.dst);
184 PyMem_Free(self->output.dst);
185 self->output.dst = NULL;
185 self->output.dst = NULL;
186
186
187 Py_XDECREF(self->compressor);
187 Py_XDECREF(self->compressor);
188
188
189 PyObject_Del(self);
189 PyObject_Del(self);
190 }
190 }
191
191
192 static ZstdCompressionChunkerIterator* ZstdCompressionChunker_compress(ZstdCompressionChunker* self, PyObject* args, PyObject* kwargs) {
192 static ZstdCompressionChunkerIterator* ZstdCompressionChunker_compress(ZstdCompressionChunker* self, PyObject* args, PyObject* kwargs) {
193 static char* kwlist[] = {
193 static char* kwlist[] = {
194 "data",
194 "data",
195 NULL
195 NULL
196 };
196 };
197
197
198 ZstdCompressionChunkerIterator* result;
198 ZstdCompressionChunkerIterator* result;
199
199
200 if (self->finished) {
200 if (self->finished) {
201 PyErr_SetString(ZstdError, "cannot call compress() after compression finished");
201 PyErr_SetString(ZstdError, "cannot call compress() after compression finished");
202 return NULL;
202 return NULL;
203 }
203 }
204
204
205 if (self->inBuffer.obj) {
205 if (self->inBuffer.obj) {
206 PyErr_SetString(ZstdError,
206 PyErr_SetString(ZstdError,
207 "cannot perform operation before consuming output from previous operation");
207 "cannot perform operation before consuming output from previous operation");
208 return NULL;
208 return NULL;
209 }
209 }
210
210
211 #if PY_MAJOR_VERSION >= 3
211 #if PY_MAJOR_VERSION >= 3
212 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress",
212 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress",
213 #else
213 #else
214 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:compress",
214 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:compress",
215 #endif
215 #endif
216 kwlist, &self->inBuffer)) {
216 kwlist, &self->inBuffer)) {
217 return NULL;
217 return NULL;
218 }
218 }
219
219
220 if (!PyBuffer_IsContiguous(&self->inBuffer, 'C') || self->inBuffer.ndim > 1) {
220 if (!PyBuffer_IsContiguous(&self->inBuffer, 'C') || self->inBuffer.ndim > 1) {
221 PyErr_SetString(PyExc_ValueError,
221 PyErr_SetString(PyExc_ValueError,
222 "data buffer should be contiguous and have at most one dimension");
222 "data buffer should be contiguous and have at most one dimension");
223 PyBuffer_Release(&self->inBuffer);
223 PyBuffer_Release(&self->inBuffer);
224 return NULL;
224 return NULL;
225 }
225 }
226
226
227 result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
227 result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
228 if (!result) {
228 if (!result) {
229 PyBuffer_Release(&self->inBuffer);
229 PyBuffer_Release(&self->inBuffer);
230 return NULL;
230 return NULL;
231 }
231 }
232
232
233 self->input.src = self->inBuffer.buf;
233 self->input.src = self->inBuffer.buf;
234 self->input.size = self->inBuffer.len;
234 self->input.size = self->inBuffer.len;
235 self->input.pos = 0;
235 self->input.pos = 0;
236
236
237 result->chunker = self;
237 result->chunker = self;
238 Py_INCREF(result->chunker);
238 Py_INCREF(result->chunker);
239
239
240 result->mode = compressionchunker_mode_normal;
240 result->mode = compressionchunker_mode_normal;
241
241
242 return result;
242 return result;
243 }
243 }
244
244
245 static ZstdCompressionChunkerIterator* ZstdCompressionChunker_finish(ZstdCompressionChunker* self) {
245 static ZstdCompressionChunkerIterator* ZstdCompressionChunker_finish(ZstdCompressionChunker* self) {
246 ZstdCompressionChunkerIterator* result;
246 ZstdCompressionChunkerIterator* result;
247
247
248 if (self->finished) {
248 if (self->finished) {
249 PyErr_SetString(ZstdError, "cannot call finish() after compression finished");
249 PyErr_SetString(ZstdError, "cannot call finish() after compression finished");
250 return NULL;
250 return NULL;
251 }
251 }
252
252
253 if (self->inBuffer.obj) {
253 if (self->inBuffer.obj) {
254 PyErr_SetString(ZstdError,
254 PyErr_SetString(ZstdError,
255 "cannot call finish() before consuming output from previous operation");
255 "cannot call finish() before consuming output from previous operation");
256 return NULL;
256 return NULL;
257 }
257 }
258
258
259 result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
259 result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
260 if (!result) {
260 if (!result) {
261 return NULL;
261 return NULL;
262 }
262 }
263
263
264 result->chunker = self;
264 result->chunker = self;
265 Py_INCREF(result->chunker);
265 Py_INCREF(result->chunker);
266
266
267 result->mode = compressionchunker_mode_finish;
267 result->mode = compressionchunker_mode_finish;
268
268
269 return result;
269 return result;
270 }
270 }
271
271
272 static ZstdCompressionChunkerIterator* ZstdCompressionChunker_flush(ZstdCompressionChunker* self, PyObject* args, PyObject* kwargs) {
272 static ZstdCompressionChunkerIterator* ZstdCompressionChunker_flush(ZstdCompressionChunker* self, PyObject* args, PyObject* kwargs) {
273 ZstdCompressionChunkerIterator* result;
273 ZstdCompressionChunkerIterator* result;
274
274
275 if (self->finished) {
275 if (self->finished) {
276 PyErr_SetString(ZstdError, "cannot call flush() after compression finished");
276 PyErr_SetString(ZstdError, "cannot call flush() after compression finished");
277 return NULL;
277 return NULL;
278 }
278 }
279
279
280 if (self->inBuffer.obj) {
280 if (self->inBuffer.obj) {
281 PyErr_SetString(ZstdError,
281 PyErr_SetString(ZstdError,
282 "cannot call flush() before consuming output from previous operation");
282 "cannot call flush() before consuming output from previous operation");
283 return NULL;
283 return NULL;
284 }
284 }
285
285
286 result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
286 result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
287 if (!result) {
287 if (!result) {
288 return NULL;
288 return NULL;
289 }
289 }
290
290
291 result->chunker = self;
291 result->chunker = self;
292 Py_INCREF(result->chunker);
292 Py_INCREF(result->chunker);
293
293
294 result->mode = compressionchunker_mode_flush;
294 result->mode = compressionchunker_mode_flush;
295
295
296 return result;
296 return result;
297 }
297 }
298
298
299 static PyMethodDef ZstdCompressionChunker_methods[] = {
299 static PyMethodDef ZstdCompressionChunker_methods[] = {
300 { "compress", (PyCFunction)ZstdCompressionChunker_compress, METH_VARARGS | METH_KEYWORDS,
300 { "compress", (PyCFunction)ZstdCompressionChunker_compress, METH_VARARGS | METH_KEYWORDS,
301 PyDoc_STR("compress data") },
301 PyDoc_STR("compress data") },
302 { "finish", (PyCFunction)ZstdCompressionChunker_finish, METH_NOARGS,
302 { "finish", (PyCFunction)ZstdCompressionChunker_finish, METH_NOARGS,
303 PyDoc_STR("finish compression operation") },
303 PyDoc_STR("finish compression operation") },
304 { "flush", (PyCFunction)ZstdCompressionChunker_flush, METH_VARARGS | METH_KEYWORDS,
304 { "flush", (PyCFunction)ZstdCompressionChunker_flush, METH_VARARGS | METH_KEYWORDS,
305 PyDoc_STR("finish compression operation") },
305 PyDoc_STR("finish compression operation") },
306 { NULL, NULL }
306 { NULL, NULL }
307 };
307 };
308
308
309 PyTypeObject ZstdCompressionChunkerType = {
309 PyTypeObject ZstdCompressionChunkerType = {
310 PyVarObject_HEAD_INIT(NULL, 0)
310 PyVarObject_HEAD_INIT(NULL, 0)
311 "zstd.ZstdCompressionChunkerType", /* tp_name */
311 "zstd.ZstdCompressionChunkerType", /* tp_name */
312 sizeof(ZstdCompressionChunker), /* tp_basicsize */
312 sizeof(ZstdCompressionChunker), /* tp_basicsize */
313 0, /* tp_itemsize */
313 0, /* tp_itemsize */
314 (destructor)ZstdCompressionChunker_dealloc, /* tp_dealloc */
314 (destructor)ZstdCompressionChunker_dealloc, /* tp_dealloc */
315 0, /* tp_print */
315 0, /* tp_print */
316 0, /* tp_getattr */
316 0, /* tp_getattr */
317 0, /* tp_setattr */
317 0, /* tp_setattr */
318 0, /* tp_compare */
318 0, /* tp_compare */
319 0, /* tp_repr */
319 0, /* tp_repr */
320 0, /* tp_as_number */
320 0, /* tp_as_number */
321 0, /* tp_as_sequence */
321 0, /* tp_as_sequence */
322 0, /* tp_as_mapping */
322 0, /* tp_as_mapping */
323 0, /* tp_hash */
323 0, /* tp_hash */
324 0, /* tp_call */
324 0, /* tp_call */
325 0, /* tp_str */
325 0, /* tp_str */
326 0, /* tp_getattro */
326 0, /* tp_getattro */
327 0, /* tp_setattro */
327 0, /* tp_setattro */
328 0, /* tp_as_buffer */
328 0, /* tp_as_buffer */
329 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
329 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
330 ZstdCompressionChunker__doc__, /* tp_doc */
330 ZstdCompressionChunker__doc__, /* tp_doc */
331 0, /* tp_traverse */
331 0, /* tp_traverse */
332 0, /* tp_clear */
332 0, /* tp_clear */
333 0, /* tp_richcompare */
333 0, /* tp_richcompare */
334 0, /* tp_weaklistoffset */
334 0, /* tp_weaklistoffset */
335 0, /* tp_iter */
335 0, /* tp_iter */
336 0, /* tp_iternext */
336 0, /* tp_iternext */
337 ZstdCompressionChunker_methods, /* tp_methods */
337 ZstdCompressionChunker_methods, /* tp_methods */
338 0, /* tp_members */
338 0, /* tp_members */
339 0, /* tp_getset */
339 0, /* tp_getset */
340 0, /* tp_base */
340 0, /* tp_base */
341 0, /* tp_dict */
341 0, /* tp_dict */
342 0, /* tp_descr_get */
342 0, /* tp_descr_get */
343 0, /* tp_descr_set */
343 0, /* tp_descr_set */
344 0, /* tp_dictoffset */
344 0, /* tp_dictoffset */
345 0, /* tp_init */
345 0, /* tp_init */
346 0, /* tp_alloc */
346 0, /* tp_alloc */
347 PyType_GenericNew, /* tp_new */
347 PyType_GenericNew, /* tp_new */
348 };
348 };
349
349
350 void compressionchunker_module_init(PyObject* module) {
350 void compressionchunker_module_init(PyObject* module) {
351 Py_TYPE(&ZstdCompressionChunkerIteratorType) = &PyType_Type;
351 Py_SET_TYPE(&ZstdCompressionChunkerIteratorType, &PyType_Type);
352 if (PyType_Ready(&ZstdCompressionChunkerIteratorType) < 0) {
352 if (PyType_Ready(&ZstdCompressionChunkerIteratorType) < 0) {
353 return;
353 return;
354 }
354 }
355
355
356 Py_TYPE(&ZstdCompressionChunkerType) = &PyType_Type;
356 Py_SET_TYPE(&ZstdCompressionChunkerType, &PyType_Type);
357 if (PyType_Ready(&ZstdCompressionChunkerType) < 0) {
357 if (PyType_Ready(&ZstdCompressionChunkerType) < 0) {
358 return;
358 return;
359 }
359 }
360 }
360 }
@@ -1,411 +1,411 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) {
13 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) {
14 static char* kwlist[] = {
14 static char* kwlist[] = {
15 "dict_size",
15 "dict_size",
16 "samples",
16 "samples",
17 "k",
17 "k",
18 "d",
18 "d",
19 "notifications",
19 "notifications",
20 "dict_id",
20 "dict_id",
21 "level",
21 "level",
22 "steps",
22 "steps",
23 "threads",
23 "threads",
24 NULL
24 NULL
25 };
25 };
26
26
27 size_t capacity;
27 size_t capacity;
28 PyObject* samples;
28 PyObject* samples;
29 unsigned k = 0;
29 unsigned k = 0;
30 unsigned d = 0;
30 unsigned d = 0;
31 unsigned notifications = 0;
31 unsigned notifications = 0;
32 unsigned dictID = 0;
32 unsigned dictID = 0;
33 int level = 0;
33 int level = 0;
34 unsigned steps = 0;
34 unsigned steps = 0;
35 int threads = 0;
35 int threads = 0;
36 ZDICT_cover_params_t params;
36 ZDICT_cover_params_t params;
37 Py_ssize_t samplesLen;
37 Py_ssize_t samplesLen;
38 Py_ssize_t i;
38 Py_ssize_t i;
39 size_t samplesSize = 0;
39 size_t samplesSize = 0;
40 void* sampleBuffer = NULL;
40 void* sampleBuffer = NULL;
41 size_t* sampleSizes = NULL;
41 size_t* sampleSizes = NULL;
42 void* sampleOffset;
42 void* sampleOffset;
43 Py_ssize_t sampleSize;
43 Py_ssize_t sampleSize;
44 void* dict = NULL;
44 void* dict = NULL;
45 size_t zresult;
45 size_t zresult;
46 ZstdCompressionDict* result = NULL;
46 ZstdCompressionDict* result = NULL;
47
47
48 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IIIIiIi:train_dictionary",
48 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IIIIiIi:train_dictionary",
49 kwlist, &capacity, &PyList_Type, &samples,
49 kwlist, &capacity, &PyList_Type, &samples,
50 &k, &d, &notifications, &dictID, &level, &steps, &threads)) {
50 &k, &d, &notifications, &dictID, &level, &steps, &threads)) {
51 return NULL;
51 return NULL;
52 }
52 }
53
53
54 if (threads < 0) {
54 if (threads < 0) {
55 threads = cpu_count();
55 threads = cpu_count();
56 }
56 }
57
57
58 memset(&params, 0, sizeof(params));
58 memset(&params, 0, sizeof(params));
59 params.k = k;
59 params.k = k;
60 params.d = d;
60 params.d = d;
61 params.steps = steps;
61 params.steps = steps;
62 params.nbThreads = threads;
62 params.nbThreads = threads;
63 params.zParams.notificationLevel = notifications;
63 params.zParams.notificationLevel = notifications;
64 params.zParams.dictID = dictID;
64 params.zParams.dictID = dictID;
65 params.zParams.compressionLevel = level;
65 params.zParams.compressionLevel = level;
66
66
67 /* Figure out total size of input samples. */
67 /* Figure out total size of input samples. */
68 samplesLen = PyList_Size(samples);
68 samplesLen = PyList_Size(samples);
69 for (i = 0; i < samplesLen; i++) {
69 for (i = 0; i < samplesLen; i++) {
70 PyObject* sampleItem = PyList_GET_ITEM(samples, i);
70 PyObject* sampleItem = PyList_GET_ITEM(samples, i);
71
71
72 if (!PyBytes_Check(sampleItem)) {
72 if (!PyBytes_Check(sampleItem)) {
73 PyErr_SetString(PyExc_ValueError, "samples must be bytes");
73 PyErr_SetString(PyExc_ValueError, "samples must be bytes");
74 return NULL;
74 return NULL;
75 }
75 }
76 samplesSize += PyBytes_GET_SIZE(sampleItem);
76 samplesSize += PyBytes_GET_SIZE(sampleItem);
77 }
77 }
78
78
79 sampleBuffer = PyMem_Malloc(samplesSize);
79 sampleBuffer = PyMem_Malloc(samplesSize);
80 if (!sampleBuffer) {
80 if (!sampleBuffer) {
81 PyErr_NoMemory();
81 PyErr_NoMemory();
82 goto finally;
82 goto finally;
83 }
83 }
84
84
85 sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t));
85 sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t));
86 if (!sampleSizes) {
86 if (!sampleSizes) {
87 PyErr_NoMemory();
87 PyErr_NoMemory();
88 goto finally;
88 goto finally;
89 }
89 }
90
90
91 sampleOffset = sampleBuffer;
91 sampleOffset = sampleBuffer;
92 for (i = 0; i < samplesLen; i++) {
92 for (i = 0; i < samplesLen; i++) {
93 PyObject* sampleItem = PyList_GET_ITEM(samples, i);
93 PyObject* sampleItem = PyList_GET_ITEM(samples, i);
94 sampleSize = PyBytes_GET_SIZE(sampleItem);
94 sampleSize = PyBytes_GET_SIZE(sampleItem);
95 sampleSizes[i] = sampleSize;
95 sampleSizes[i] = sampleSize;
96 memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize);
96 memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize);
97 sampleOffset = (char*)sampleOffset + sampleSize;
97 sampleOffset = (char*)sampleOffset + sampleSize;
98 }
98 }
99
99
100 dict = PyMem_Malloc(capacity);
100 dict = PyMem_Malloc(capacity);
101 if (!dict) {
101 if (!dict) {
102 PyErr_NoMemory();
102 PyErr_NoMemory();
103 goto finally;
103 goto finally;
104 }
104 }
105
105
106 Py_BEGIN_ALLOW_THREADS
106 Py_BEGIN_ALLOW_THREADS
107 /* No parameters uses the default function, which will use default params
107 /* No parameters uses the default function, which will use default params
108 and call ZDICT_optimizeTrainFromBuffer_cover under the hood. */
108 and call ZDICT_optimizeTrainFromBuffer_cover under the hood. */
109 if (!params.k && !params.d && !params.zParams.compressionLevel
109 if (!params.k && !params.d && !params.zParams.compressionLevel
110 && !params.zParams.notificationLevel && !params.zParams.dictID) {
110 && !params.zParams.notificationLevel && !params.zParams.dictID) {
111 zresult = ZDICT_trainFromBuffer(dict, capacity, sampleBuffer,
111 zresult = ZDICT_trainFromBuffer(dict, capacity, sampleBuffer,
112 sampleSizes, (unsigned)samplesLen);
112 sampleSizes, (unsigned)samplesLen);
113 }
113 }
114 /* Use optimize mode if user controlled steps or threads explicitly. */
114 /* Use optimize mode if user controlled steps or threads explicitly. */
115 else if (params.steps || params.nbThreads) {
115 else if (params.steps || params.nbThreads) {
116 zresult = ZDICT_optimizeTrainFromBuffer_cover(dict, capacity,
116 zresult = ZDICT_optimizeTrainFromBuffer_cover(dict, capacity,
117 sampleBuffer, sampleSizes, (unsigned)samplesLen, &params);
117 sampleBuffer, sampleSizes, (unsigned)samplesLen, &params);
118 }
118 }
119 /* Non-optimize mode with explicit control. */
119 /* Non-optimize mode with explicit control. */
120 else {
120 else {
121 zresult = ZDICT_trainFromBuffer_cover(dict, capacity,
121 zresult = ZDICT_trainFromBuffer_cover(dict, capacity,
122 sampleBuffer, sampleSizes, (unsigned)samplesLen, params);
122 sampleBuffer, sampleSizes, (unsigned)samplesLen, params);
123 }
123 }
124 Py_END_ALLOW_THREADS
124 Py_END_ALLOW_THREADS
125
125
126 if (ZDICT_isError(zresult)) {
126 if (ZDICT_isError(zresult)) {
127 PyMem_Free(dict);
127 PyMem_Free(dict);
128 PyErr_Format(ZstdError, "cannot train dict: %s", ZDICT_getErrorName(zresult));
128 PyErr_Format(ZstdError, "cannot train dict: %s", ZDICT_getErrorName(zresult));
129 goto finally;
129 goto finally;
130 }
130 }
131
131
132 result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType);
132 result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType);
133 if (!result) {
133 if (!result) {
134 PyMem_Free(dict);
134 PyMem_Free(dict);
135 goto finally;
135 goto finally;
136 }
136 }
137
137
138 result->dictData = dict;
138 result->dictData = dict;
139 result->dictSize = zresult;
139 result->dictSize = zresult;
140 result->dictType = ZSTD_dct_fullDict;
140 result->dictType = ZSTD_dct_fullDict;
141 result->d = params.d;
141 result->d = params.d;
142 result->k = params.k;
142 result->k = params.k;
143 result->cdict = NULL;
143 result->cdict = NULL;
144 result->ddict = NULL;
144 result->ddict = NULL;
145
145
146 finally:
146 finally:
147 PyMem_Free(sampleBuffer);
147 PyMem_Free(sampleBuffer);
148 PyMem_Free(sampleSizes);
148 PyMem_Free(sampleSizes);
149
149
150 return result;
150 return result;
151 }
151 }
152
152
153 int ensure_ddict(ZstdCompressionDict* dict) {
153 int ensure_ddict(ZstdCompressionDict* dict) {
154 if (dict->ddict) {
154 if (dict->ddict) {
155 return 0;
155 return 0;
156 }
156 }
157
157
158 Py_BEGIN_ALLOW_THREADS
158 Py_BEGIN_ALLOW_THREADS
159 dict->ddict = ZSTD_createDDict_advanced(dict->dictData, dict->dictSize,
159 dict->ddict = ZSTD_createDDict_advanced(dict->dictData, dict->dictSize,
160 ZSTD_dlm_byRef, dict->dictType, ZSTD_defaultCMem);
160 ZSTD_dlm_byRef, dict->dictType, ZSTD_defaultCMem);
161 Py_END_ALLOW_THREADS
161 Py_END_ALLOW_THREADS
162 if (!dict->ddict) {
162 if (!dict->ddict) {
163 PyErr_SetString(ZstdError, "could not create decompression dict");
163 PyErr_SetString(ZstdError, "could not create decompression dict");
164 return 1;
164 return 1;
165 }
165 }
166
166
167 return 0;
167 return 0;
168 }
168 }
169
169
170 PyDoc_STRVAR(ZstdCompressionDict__doc__,
170 PyDoc_STRVAR(ZstdCompressionDict__doc__,
171 "ZstdCompressionDict(data) - Represents a computed compression dictionary\n"
171 "ZstdCompressionDict(data) - Represents a computed compression dictionary\n"
172 "\n"
172 "\n"
173 "This type holds the results of a computed Zstandard compression dictionary.\n"
173 "This type holds the results of a computed Zstandard compression dictionary.\n"
174 "Instances are obtained by calling ``train_dictionary()`` or by passing\n"
174 "Instances are obtained by calling ``train_dictionary()`` or by passing\n"
175 "bytes obtained from another source into the constructor.\n"
175 "bytes obtained from another source into the constructor.\n"
176 );
176 );
177
177
178 static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args, PyObject* kwargs) {
178 static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args, PyObject* kwargs) {
179 static char* kwlist[] = {
179 static char* kwlist[] = {
180 "data",
180 "data",
181 "dict_type",
181 "dict_type",
182 NULL
182 NULL
183 };
183 };
184
184
185 int result = -1;
185 int result = -1;
186 Py_buffer source;
186 Py_buffer source;
187 unsigned dictType = ZSTD_dct_auto;
187 unsigned dictType = ZSTD_dct_auto;
188
188
189 self->dictData = NULL;
189 self->dictData = NULL;
190 self->dictSize = 0;
190 self->dictSize = 0;
191 self->cdict = NULL;
191 self->cdict = NULL;
192 self->ddict = NULL;
192 self->ddict = NULL;
193
193
194 #if PY_MAJOR_VERSION >= 3
194 #if PY_MAJOR_VERSION >= 3
195 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|I:ZstdCompressionDict",
195 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|I:ZstdCompressionDict",
196 #else
196 #else
197 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|I:ZstdCompressionDict",
197 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|I:ZstdCompressionDict",
198 #endif
198 #endif
199 kwlist, &source, &dictType)) {
199 kwlist, &source, &dictType)) {
200 return -1;
200 return -1;
201 }
201 }
202
202
203 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
203 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
204 PyErr_SetString(PyExc_ValueError,
204 PyErr_SetString(PyExc_ValueError,
205 "data buffer should be contiguous and have at most one dimension");
205 "data buffer should be contiguous and have at most one dimension");
206 goto finally;
206 goto finally;
207 }
207 }
208
208
209 if (dictType != ZSTD_dct_auto && dictType != ZSTD_dct_rawContent
209 if (dictType != ZSTD_dct_auto && dictType != ZSTD_dct_rawContent
210 && dictType != ZSTD_dct_fullDict) {
210 && dictType != ZSTD_dct_fullDict) {
211 PyErr_Format(PyExc_ValueError,
211 PyErr_Format(PyExc_ValueError,
212 "invalid dictionary load mode: %d; must use DICT_TYPE_* constants",
212 "invalid dictionary load mode: %d; must use DICT_TYPE_* constants",
213 dictType);
213 dictType);
214 goto finally;
214 goto finally;
215 }
215 }
216
216
217 self->dictType = dictType;
217 self->dictType = dictType;
218
218
219 self->dictData = PyMem_Malloc(source.len);
219 self->dictData = PyMem_Malloc(source.len);
220 if (!self->dictData) {
220 if (!self->dictData) {
221 PyErr_NoMemory();
221 PyErr_NoMemory();
222 goto finally;
222 goto finally;
223 }
223 }
224
224
225 memcpy(self->dictData, source.buf, source.len);
225 memcpy(self->dictData, source.buf, source.len);
226 self->dictSize = source.len;
226 self->dictSize = source.len;
227
227
228 result = 0;
228 result = 0;
229
229
230 finally:
230 finally:
231 PyBuffer_Release(&source);
231 PyBuffer_Release(&source);
232 return result;
232 return result;
233 }
233 }
234
234
235 static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) {
235 static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) {
236 if (self->cdict) {
236 if (self->cdict) {
237 ZSTD_freeCDict(self->cdict);
237 ZSTD_freeCDict(self->cdict);
238 self->cdict = NULL;
238 self->cdict = NULL;
239 }
239 }
240
240
241 if (self->ddict) {
241 if (self->ddict) {
242 ZSTD_freeDDict(self->ddict);
242 ZSTD_freeDDict(self->ddict);
243 self->ddict = NULL;
243 self->ddict = NULL;
244 }
244 }
245
245
246 if (self->dictData) {
246 if (self->dictData) {
247 PyMem_Free(self->dictData);
247 PyMem_Free(self->dictData);
248 self->dictData = NULL;
248 self->dictData = NULL;
249 }
249 }
250
250
251 PyObject_Del(self);
251 PyObject_Del(self);
252 }
252 }
253
253
254 PyDoc_STRVAR(ZstdCompressionDict_precompute_compress__doc__,
254 PyDoc_STRVAR(ZstdCompressionDict_precompute_compress__doc__,
255 "Precompute a dictionary so it can be used by multiple compressors.\n"
255 "Precompute a dictionary so it can be used by multiple compressors.\n"
256 );
256 );
257
257
258 static PyObject* ZstdCompressionDict_precompute_compress(ZstdCompressionDict* self, PyObject* args, PyObject* kwargs) {
258 static PyObject* ZstdCompressionDict_precompute_compress(ZstdCompressionDict* self, PyObject* args, PyObject* kwargs) {
259 static char* kwlist[] = {
259 static char* kwlist[] = {
260 "level",
260 "level",
261 "compression_params",
261 "compression_params",
262 NULL
262 NULL
263 };
263 };
264
264
265 int level = 0;
265 int level = 0;
266 ZstdCompressionParametersObject* compressionParams = NULL;
266 ZstdCompressionParametersObject* compressionParams = NULL;
267 ZSTD_compressionParameters cParams;
267 ZSTD_compressionParameters cParams;
268 size_t zresult;
268 size_t zresult;
269
269
270 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!:precompute_compress", kwlist,
270 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!:precompute_compress", kwlist,
271 &level, &ZstdCompressionParametersType, &compressionParams)) {
271 &level, &ZstdCompressionParametersType, &compressionParams)) {
272 return NULL;
272 return NULL;
273 }
273 }
274
274
275 if (level && compressionParams) {
275 if (level && compressionParams) {
276 PyErr_SetString(PyExc_ValueError,
276 PyErr_SetString(PyExc_ValueError,
277 "must only specify one of level or compression_params");
277 "must only specify one of level or compression_params");
278 return NULL;
278 return NULL;
279 }
279 }
280
280
281 if (!level && !compressionParams) {
281 if (!level && !compressionParams) {
282 PyErr_SetString(PyExc_ValueError,
282 PyErr_SetString(PyExc_ValueError,
283 "must specify one of level or compression_params");
283 "must specify one of level or compression_params");
284 return NULL;
284 return NULL;
285 }
285 }
286
286
287 if (self->cdict) {
287 if (self->cdict) {
288 zresult = ZSTD_freeCDict(self->cdict);
288 zresult = ZSTD_freeCDict(self->cdict);
289 self->cdict = NULL;
289 self->cdict = NULL;
290 if (ZSTD_isError(zresult)) {
290 if (ZSTD_isError(zresult)) {
291 PyErr_Format(ZstdError, "unable to free CDict: %s",
291 PyErr_Format(ZstdError, "unable to free CDict: %s",
292 ZSTD_getErrorName(zresult));
292 ZSTD_getErrorName(zresult));
293 return NULL;
293 return NULL;
294 }
294 }
295 }
295 }
296
296
297 if (level) {
297 if (level) {
298 cParams = ZSTD_getCParams(level, 0, self->dictSize);
298 cParams = ZSTD_getCParams(level, 0, self->dictSize);
299 }
299 }
300 else {
300 else {
301 if (to_cparams(compressionParams, &cParams)) {
301 if (to_cparams(compressionParams, &cParams)) {
302 return NULL;
302 return NULL;
303 }
303 }
304 }
304 }
305
305
306 assert(!self->cdict);
306 assert(!self->cdict);
307 self->cdict = ZSTD_createCDict_advanced(self->dictData, self->dictSize,
307 self->cdict = ZSTD_createCDict_advanced(self->dictData, self->dictSize,
308 ZSTD_dlm_byRef, self->dictType, cParams, ZSTD_defaultCMem);
308 ZSTD_dlm_byRef, self->dictType, cParams, ZSTD_defaultCMem);
309
309
310 if (!self->cdict) {
310 if (!self->cdict) {
311 PyErr_SetString(ZstdError, "unable to precompute dictionary");
311 PyErr_SetString(ZstdError, "unable to precompute dictionary");
312 return NULL;
312 return NULL;
313 }
313 }
314
314
315 Py_RETURN_NONE;
315 Py_RETURN_NONE;
316 }
316 }
317
317
318 static PyObject* ZstdCompressionDict_dict_id(ZstdCompressionDict* self) {
318 static PyObject* ZstdCompressionDict_dict_id(ZstdCompressionDict* self) {
319 unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize);
319 unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize);
320
320
321 return PyLong_FromLong(dictID);
321 return PyLong_FromLong(dictID);
322 }
322 }
323
323
324 static PyObject* ZstdCompressionDict_as_bytes(ZstdCompressionDict* self) {
324 static PyObject* ZstdCompressionDict_as_bytes(ZstdCompressionDict* self) {
325 return PyBytes_FromStringAndSize(self->dictData, self->dictSize);
325 return PyBytes_FromStringAndSize(self->dictData, self->dictSize);
326 }
326 }
327
327
328 static PyMethodDef ZstdCompressionDict_methods[] = {
328 static PyMethodDef ZstdCompressionDict_methods[] = {
329 { "dict_id", (PyCFunction)ZstdCompressionDict_dict_id, METH_NOARGS,
329 { "dict_id", (PyCFunction)ZstdCompressionDict_dict_id, METH_NOARGS,
330 PyDoc_STR("dict_id() -- obtain the numeric dictionary ID") },
330 PyDoc_STR("dict_id() -- obtain the numeric dictionary ID") },
331 { "as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS,
331 { "as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS,
332 PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary data") },
332 PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary data") },
333 { "precompute_compress", (PyCFunction)ZstdCompressionDict_precompute_compress,
333 { "precompute_compress", (PyCFunction)ZstdCompressionDict_precompute_compress,
334 METH_VARARGS | METH_KEYWORDS, ZstdCompressionDict_precompute_compress__doc__ },
334 METH_VARARGS | METH_KEYWORDS, ZstdCompressionDict_precompute_compress__doc__ },
335 { NULL, NULL }
335 { NULL, NULL }
336 };
336 };
337
337
338 static PyMemberDef ZstdCompressionDict_members[] = {
338 static PyMemberDef ZstdCompressionDict_members[] = {
339 { "k", T_UINT, offsetof(ZstdCompressionDict, k), READONLY,
339 { "k", T_UINT, offsetof(ZstdCompressionDict, k), READONLY,
340 "segment size" },
340 "segment size" },
341 { "d", T_UINT, offsetof(ZstdCompressionDict, d), READONLY,
341 { "d", T_UINT, offsetof(ZstdCompressionDict, d), READONLY,
342 "dmer size" },
342 "dmer size" },
343 { NULL }
343 { NULL }
344 };
344 };
345
345
346 static Py_ssize_t ZstdCompressionDict_length(ZstdCompressionDict* self) {
346 static Py_ssize_t ZstdCompressionDict_length(ZstdCompressionDict* self) {
347 return self->dictSize;
347 return self->dictSize;
348 }
348 }
349
349
350 static PySequenceMethods ZstdCompressionDict_sq = {
350 static PySequenceMethods ZstdCompressionDict_sq = {
351 (lenfunc)ZstdCompressionDict_length, /* sq_length */
351 (lenfunc)ZstdCompressionDict_length, /* sq_length */
352 0, /* sq_concat */
352 0, /* sq_concat */
353 0, /* sq_repeat */
353 0, /* sq_repeat */
354 0, /* sq_item */
354 0, /* sq_item */
355 0, /* sq_ass_item */
355 0, /* sq_ass_item */
356 0, /* sq_contains */
356 0, /* sq_contains */
357 0, /* sq_inplace_concat */
357 0, /* sq_inplace_concat */
358 0 /* sq_inplace_repeat */
358 0 /* sq_inplace_repeat */
359 };
359 };
360
360
361 PyTypeObject ZstdCompressionDictType = {
361 PyTypeObject ZstdCompressionDictType = {
362 PyVarObject_HEAD_INIT(NULL, 0)
362 PyVarObject_HEAD_INIT(NULL, 0)
363 "zstd.ZstdCompressionDict", /* tp_name */
363 "zstd.ZstdCompressionDict", /* tp_name */
364 sizeof(ZstdCompressionDict), /* tp_basicsize */
364 sizeof(ZstdCompressionDict), /* tp_basicsize */
365 0, /* tp_itemsize */
365 0, /* tp_itemsize */
366 (destructor)ZstdCompressionDict_dealloc, /* tp_dealloc */
366 (destructor)ZstdCompressionDict_dealloc, /* tp_dealloc */
367 0, /* tp_print */
367 0, /* tp_print */
368 0, /* tp_getattr */
368 0, /* tp_getattr */
369 0, /* tp_setattr */
369 0, /* tp_setattr */
370 0, /* tp_compare */
370 0, /* tp_compare */
371 0, /* tp_repr */
371 0, /* tp_repr */
372 0, /* tp_as_number */
372 0, /* tp_as_number */
373 &ZstdCompressionDict_sq, /* tp_as_sequence */
373 &ZstdCompressionDict_sq, /* tp_as_sequence */
374 0, /* tp_as_mapping */
374 0, /* tp_as_mapping */
375 0, /* tp_hash */
375 0, /* tp_hash */
376 0, /* tp_call */
376 0, /* tp_call */
377 0, /* tp_str */
377 0, /* tp_str */
378 0, /* tp_getattro */
378 0, /* tp_getattro */
379 0, /* tp_setattro */
379 0, /* tp_setattro */
380 0, /* tp_as_buffer */
380 0, /* tp_as_buffer */
381 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
381 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
382 ZstdCompressionDict__doc__, /* tp_doc */
382 ZstdCompressionDict__doc__, /* tp_doc */
383 0, /* tp_traverse */
383 0, /* tp_traverse */
384 0, /* tp_clear */
384 0, /* tp_clear */
385 0, /* tp_richcompare */
385 0, /* tp_richcompare */
386 0, /* tp_weaklistoffset */
386 0, /* tp_weaklistoffset */
387 0, /* tp_iter */
387 0, /* tp_iter */
388 0, /* tp_iternext */
388 0, /* tp_iternext */
389 ZstdCompressionDict_methods, /* tp_methods */
389 ZstdCompressionDict_methods, /* tp_methods */
390 ZstdCompressionDict_members, /* tp_members */
390 ZstdCompressionDict_members, /* tp_members */
391 0, /* tp_getset */
391 0, /* tp_getset */
392 0, /* tp_base */
392 0, /* tp_base */
393 0, /* tp_dict */
393 0, /* tp_dict */
394 0, /* tp_descr_get */
394 0, /* tp_descr_get */
395 0, /* tp_descr_set */
395 0, /* tp_descr_set */
396 0, /* tp_dictoffset */
396 0, /* tp_dictoffset */
397 (initproc)ZstdCompressionDict_init, /* tp_init */
397 (initproc)ZstdCompressionDict_init, /* tp_init */
398 0, /* tp_alloc */
398 0, /* tp_alloc */
399 PyType_GenericNew, /* tp_new */
399 PyType_GenericNew, /* tp_new */
400 };
400 };
401
401
402 void compressiondict_module_init(PyObject* mod) {
402 void compressiondict_module_init(PyObject* mod) {
403 Py_TYPE(&ZstdCompressionDictType) = &PyType_Type;
403 Py_SET_TYPE(&ZstdCompressionDictType, &PyType_Type);
404 if (PyType_Ready(&ZstdCompressionDictType) < 0) {
404 if (PyType_Ready(&ZstdCompressionDictType) < 0) {
405 return;
405 return;
406 }
406 }
407
407
408 Py_INCREF((PyObject*)&ZstdCompressionDictType);
408 Py_INCREF((PyObject*)&ZstdCompressionDictType);
409 PyModule_AddObject(mod, "ZstdCompressionDict",
409 PyModule_AddObject(mod, "ZstdCompressionDict",
410 (PyObject*)&ZstdCompressionDictType);
410 (PyObject*)&ZstdCompressionDictType);
411 }
411 }
@@ -1,572 +1,572 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value) {
13 int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value) {
14 size_t zresult = ZSTD_CCtxParams_setParameter(params, param, value);
14 size_t zresult = ZSTD_CCtxParams_setParameter(params, param, value);
15 if (ZSTD_isError(zresult)) {
15 if (ZSTD_isError(zresult)) {
16 PyErr_Format(ZstdError, "unable to set compression context parameter: %s",
16 PyErr_Format(ZstdError, "unable to set compression context parameter: %s",
17 ZSTD_getErrorName(zresult));
17 ZSTD_getErrorName(zresult));
18 return 1;
18 return 1;
19 }
19 }
20
20
21 return 0;
21 return 0;
22 }
22 }
23
23
24 #define TRY_SET_PARAMETER(params, param, value) if (set_parameter(params, param, value)) return -1;
24 #define TRY_SET_PARAMETER(params, param, value) if (set_parameter(params, param, value)) return -1;
25
25
26 #define TRY_COPY_PARAMETER(source, dest, param) { \
26 #define TRY_COPY_PARAMETER(source, dest, param) { \
27 int result; \
27 int result; \
28 size_t zresult = ZSTD_CCtxParams_getParameter(source, param, &result); \
28 size_t zresult = ZSTD_CCtxParams_getParameter(source, param, &result); \
29 if (ZSTD_isError(zresult)) { \
29 if (ZSTD_isError(zresult)) { \
30 return 1; \
30 return 1; \
31 } \
31 } \
32 zresult = ZSTD_CCtxParams_setParameter(dest, param, result); \
32 zresult = ZSTD_CCtxParams_setParameter(dest, param, result); \
33 if (ZSTD_isError(zresult)) { \
33 if (ZSTD_isError(zresult)) { \
34 return 1; \
34 return 1; \
35 } \
35 } \
36 }
36 }
37
37
38 int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj) {
38 int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj) {
39 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_nbWorkers);
39 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_nbWorkers);
40
40
41 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_format);
41 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_format);
42 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_compressionLevel);
42 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_compressionLevel);
43 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_windowLog);
43 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_windowLog);
44 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_hashLog);
44 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_hashLog);
45 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_chainLog);
45 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_chainLog);
46 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_searchLog);
46 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_searchLog);
47 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_minMatch);
47 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_minMatch);
48 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_targetLength);
48 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_targetLength);
49 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_strategy);
49 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_strategy);
50 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_contentSizeFlag);
50 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_contentSizeFlag);
51 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_checksumFlag);
51 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_checksumFlag);
52 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_dictIDFlag);
52 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_dictIDFlag);
53 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_jobSize);
53 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_jobSize);
54 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_overlapLog);
54 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_overlapLog);
55 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_forceMaxWindow);
55 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_forceMaxWindow);
56 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_enableLongDistanceMatching);
56 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_enableLongDistanceMatching);
57 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashLog);
57 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashLog);
58 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmMinMatch);
58 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmMinMatch);
59 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmBucketSizeLog);
59 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmBucketSizeLog);
60 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashRateLog);
60 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashRateLog);
61
61
62 return 0;
62 return 0;
63 }
63 }
64
64
65 int reset_params(ZstdCompressionParametersObject* params) {
65 int reset_params(ZstdCompressionParametersObject* params) {
66 if (params->params) {
66 if (params->params) {
67 ZSTD_CCtxParams_reset(params->params);
67 ZSTD_CCtxParams_reset(params->params);
68 }
68 }
69 else {
69 else {
70 params->params = ZSTD_createCCtxParams();
70 params->params = ZSTD_createCCtxParams();
71 if (!params->params) {
71 if (!params->params) {
72 PyErr_NoMemory();
72 PyErr_NoMemory();
73 return 1;
73 return 1;
74 }
74 }
75 }
75 }
76
76
77 return set_parameters(params->params, params);
77 return set_parameters(params->params, params);
78 }
78 }
79
79
80 #define TRY_GET_PARAMETER(params, param, value) { \
80 #define TRY_GET_PARAMETER(params, param, value) { \
81 size_t zresult = ZSTD_CCtxParams_getParameter(params, param, value); \
81 size_t zresult = ZSTD_CCtxParams_getParameter(params, param, value); \
82 if (ZSTD_isError(zresult)) { \
82 if (ZSTD_isError(zresult)) { \
83 PyErr_Format(ZstdError, "unable to retrieve parameter: %s", ZSTD_getErrorName(zresult)); \
83 PyErr_Format(ZstdError, "unable to retrieve parameter: %s", ZSTD_getErrorName(zresult)); \
84 return 1; \
84 return 1; \
85 } \
85 } \
86 }
86 }
87
87
88 int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams) {
88 int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams) {
89 int value;
89 int value;
90
90
91 TRY_GET_PARAMETER(params->params, ZSTD_c_windowLog, &value);
91 TRY_GET_PARAMETER(params->params, ZSTD_c_windowLog, &value);
92 cparams->windowLog = value;
92 cparams->windowLog = value;
93
93
94 TRY_GET_PARAMETER(params->params, ZSTD_c_chainLog, &value);
94 TRY_GET_PARAMETER(params->params, ZSTD_c_chainLog, &value);
95 cparams->chainLog = value;
95 cparams->chainLog = value;
96
96
97 TRY_GET_PARAMETER(params->params, ZSTD_c_hashLog, &value);
97 TRY_GET_PARAMETER(params->params, ZSTD_c_hashLog, &value);
98 cparams->hashLog = value;
98 cparams->hashLog = value;
99
99
100 TRY_GET_PARAMETER(params->params, ZSTD_c_searchLog, &value);
100 TRY_GET_PARAMETER(params->params, ZSTD_c_searchLog, &value);
101 cparams->searchLog = value;
101 cparams->searchLog = value;
102
102
103 TRY_GET_PARAMETER(params->params, ZSTD_c_minMatch, &value);
103 TRY_GET_PARAMETER(params->params, ZSTD_c_minMatch, &value);
104 cparams->minMatch = value;
104 cparams->minMatch = value;
105
105
106 TRY_GET_PARAMETER(params->params, ZSTD_c_targetLength, &value);
106 TRY_GET_PARAMETER(params->params, ZSTD_c_targetLength, &value);
107 cparams->targetLength = value;
107 cparams->targetLength = value;
108
108
109 TRY_GET_PARAMETER(params->params, ZSTD_c_strategy, &value);
109 TRY_GET_PARAMETER(params->params, ZSTD_c_strategy, &value);
110 cparams->strategy = value;
110 cparams->strategy = value;
111
111
112 return 0;
112 return 0;
113 }
113 }
114
114
115 static int ZstdCompressionParameters_init(ZstdCompressionParametersObject* self, PyObject* args, PyObject* kwargs) {
115 static int ZstdCompressionParameters_init(ZstdCompressionParametersObject* self, PyObject* args, PyObject* kwargs) {
116 static char* kwlist[] = {
116 static char* kwlist[] = {
117 "format",
117 "format",
118 "compression_level",
118 "compression_level",
119 "window_log",
119 "window_log",
120 "hash_log",
120 "hash_log",
121 "chain_log",
121 "chain_log",
122 "search_log",
122 "search_log",
123 "min_match",
123 "min_match",
124 "target_length",
124 "target_length",
125 "compression_strategy",
125 "compression_strategy",
126 "strategy",
126 "strategy",
127 "write_content_size",
127 "write_content_size",
128 "write_checksum",
128 "write_checksum",
129 "write_dict_id",
129 "write_dict_id",
130 "job_size",
130 "job_size",
131 "overlap_log",
131 "overlap_log",
132 "overlap_size_log",
132 "overlap_size_log",
133 "force_max_window",
133 "force_max_window",
134 "enable_ldm",
134 "enable_ldm",
135 "ldm_hash_log",
135 "ldm_hash_log",
136 "ldm_min_match",
136 "ldm_min_match",
137 "ldm_bucket_size_log",
137 "ldm_bucket_size_log",
138 "ldm_hash_rate_log",
138 "ldm_hash_rate_log",
139 "ldm_hash_every_log",
139 "ldm_hash_every_log",
140 "threads",
140 "threads",
141 NULL
141 NULL
142 };
142 };
143
143
144 int format = 0;
144 int format = 0;
145 int compressionLevel = 0;
145 int compressionLevel = 0;
146 int windowLog = 0;
146 int windowLog = 0;
147 int hashLog = 0;
147 int hashLog = 0;
148 int chainLog = 0;
148 int chainLog = 0;
149 int searchLog = 0;
149 int searchLog = 0;
150 int minMatch = 0;
150 int minMatch = 0;
151 int targetLength = 0;
151 int targetLength = 0;
152 int compressionStrategy = -1;
152 int compressionStrategy = -1;
153 int strategy = -1;
153 int strategy = -1;
154 int contentSizeFlag = 1;
154 int contentSizeFlag = 1;
155 int checksumFlag = 0;
155 int checksumFlag = 0;
156 int dictIDFlag = 0;
156 int dictIDFlag = 0;
157 int jobSize = 0;
157 int jobSize = 0;
158 int overlapLog = -1;
158 int overlapLog = -1;
159 int overlapSizeLog = -1;
159 int overlapSizeLog = -1;
160 int forceMaxWindow = 0;
160 int forceMaxWindow = 0;
161 int enableLDM = 0;
161 int enableLDM = 0;
162 int ldmHashLog = 0;
162 int ldmHashLog = 0;
163 int ldmMinMatch = 0;
163 int ldmMinMatch = 0;
164 int ldmBucketSizeLog = 0;
164 int ldmBucketSizeLog = 0;
165 int ldmHashRateLog = -1;
165 int ldmHashRateLog = -1;
166 int ldmHashEveryLog = -1;
166 int ldmHashEveryLog = -1;
167 int threads = 0;
167 int threads = 0;
168
168
169 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
169 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
170 "|iiiiiiiiiiiiiiiiiiiiiiii:CompressionParameters",
170 "|iiiiiiiiiiiiiiiiiiiiiiii:CompressionParameters",
171 kwlist, &format, &compressionLevel, &windowLog, &hashLog, &chainLog,
171 kwlist, &format, &compressionLevel, &windowLog, &hashLog, &chainLog,
172 &searchLog, &minMatch, &targetLength, &compressionStrategy, &strategy,
172 &searchLog, &minMatch, &targetLength, &compressionStrategy, &strategy,
173 &contentSizeFlag, &checksumFlag, &dictIDFlag, &jobSize, &overlapLog,
173 &contentSizeFlag, &checksumFlag, &dictIDFlag, &jobSize, &overlapLog,
174 &overlapSizeLog, &forceMaxWindow, &enableLDM, &ldmHashLog, &ldmMinMatch,
174 &overlapSizeLog, &forceMaxWindow, &enableLDM, &ldmHashLog, &ldmMinMatch,
175 &ldmBucketSizeLog, &ldmHashRateLog, &ldmHashEveryLog, &threads)) {
175 &ldmBucketSizeLog, &ldmHashRateLog, &ldmHashEveryLog, &threads)) {
176 return -1;
176 return -1;
177 }
177 }
178
178
179 if (reset_params(self)) {
179 if (reset_params(self)) {
180 return -1;
180 return -1;
181 }
181 }
182
182
183 if (threads < 0) {
183 if (threads < 0) {
184 threads = cpu_count();
184 threads = cpu_count();
185 }
185 }
186
186
187 /* We need to set ZSTD_c_nbWorkers before ZSTD_c_jobSize and ZSTD_c_overlapLog
187 /* We need to set ZSTD_c_nbWorkers before ZSTD_c_jobSize and ZSTD_c_overlapLog
188 * because setting ZSTD_c_nbWorkers resets the other parameters. */
188 * because setting ZSTD_c_nbWorkers resets the other parameters. */
189 TRY_SET_PARAMETER(self->params, ZSTD_c_nbWorkers, threads);
189 TRY_SET_PARAMETER(self->params, ZSTD_c_nbWorkers, threads);
190
190
191 TRY_SET_PARAMETER(self->params, ZSTD_c_format, format);
191 TRY_SET_PARAMETER(self->params, ZSTD_c_format, format);
192 TRY_SET_PARAMETER(self->params, ZSTD_c_compressionLevel, compressionLevel);
192 TRY_SET_PARAMETER(self->params, ZSTD_c_compressionLevel, compressionLevel);
193 TRY_SET_PARAMETER(self->params, ZSTD_c_windowLog, windowLog);
193 TRY_SET_PARAMETER(self->params, ZSTD_c_windowLog, windowLog);
194 TRY_SET_PARAMETER(self->params, ZSTD_c_hashLog, hashLog);
194 TRY_SET_PARAMETER(self->params, ZSTD_c_hashLog, hashLog);
195 TRY_SET_PARAMETER(self->params, ZSTD_c_chainLog, chainLog);
195 TRY_SET_PARAMETER(self->params, ZSTD_c_chainLog, chainLog);
196 TRY_SET_PARAMETER(self->params, ZSTD_c_searchLog, searchLog);
196 TRY_SET_PARAMETER(self->params, ZSTD_c_searchLog, searchLog);
197 TRY_SET_PARAMETER(self->params, ZSTD_c_minMatch, minMatch);
197 TRY_SET_PARAMETER(self->params, ZSTD_c_minMatch, minMatch);
198 TRY_SET_PARAMETER(self->params, ZSTD_c_targetLength, targetLength);
198 TRY_SET_PARAMETER(self->params, ZSTD_c_targetLength, targetLength);
199
199
200 if (compressionStrategy != -1 && strategy != -1) {
200 if (compressionStrategy != -1 && strategy != -1) {
201 PyErr_SetString(PyExc_ValueError, "cannot specify both compression_strategy and strategy");
201 PyErr_SetString(PyExc_ValueError, "cannot specify both compression_strategy and strategy");
202 return -1;
202 return -1;
203 }
203 }
204
204
205 if (compressionStrategy != -1) {
205 if (compressionStrategy != -1) {
206 strategy = compressionStrategy;
206 strategy = compressionStrategy;
207 }
207 }
208 else if (strategy == -1) {
208 else if (strategy == -1) {
209 strategy = 0;
209 strategy = 0;
210 }
210 }
211
211
212 TRY_SET_PARAMETER(self->params, ZSTD_c_strategy, strategy);
212 TRY_SET_PARAMETER(self->params, ZSTD_c_strategy, strategy);
213 TRY_SET_PARAMETER(self->params, ZSTD_c_contentSizeFlag, contentSizeFlag);
213 TRY_SET_PARAMETER(self->params, ZSTD_c_contentSizeFlag, contentSizeFlag);
214 TRY_SET_PARAMETER(self->params, ZSTD_c_checksumFlag, checksumFlag);
214 TRY_SET_PARAMETER(self->params, ZSTD_c_checksumFlag, checksumFlag);
215 TRY_SET_PARAMETER(self->params, ZSTD_c_dictIDFlag, dictIDFlag);
215 TRY_SET_PARAMETER(self->params, ZSTD_c_dictIDFlag, dictIDFlag);
216 TRY_SET_PARAMETER(self->params, ZSTD_c_jobSize, jobSize);
216 TRY_SET_PARAMETER(self->params, ZSTD_c_jobSize, jobSize);
217
217
218 if (overlapLog != -1 && overlapSizeLog != -1) {
218 if (overlapLog != -1 && overlapSizeLog != -1) {
219 PyErr_SetString(PyExc_ValueError, "cannot specify both overlap_log and overlap_size_log");
219 PyErr_SetString(PyExc_ValueError, "cannot specify both overlap_log and overlap_size_log");
220 return -1;
220 return -1;
221 }
221 }
222
222
223 if (overlapSizeLog != -1) {
223 if (overlapSizeLog != -1) {
224 overlapLog = overlapSizeLog;
224 overlapLog = overlapSizeLog;
225 }
225 }
226 else if (overlapLog == -1) {
226 else if (overlapLog == -1) {
227 overlapLog = 0;
227 overlapLog = 0;
228 }
228 }
229
229
230 TRY_SET_PARAMETER(self->params, ZSTD_c_overlapLog, overlapLog);
230 TRY_SET_PARAMETER(self->params, ZSTD_c_overlapLog, overlapLog);
231 TRY_SET_PARAMETER(self->params, ZSTD_c_forceMaxWindow, forceMaxWindow);
231 TRY_SET_PARAMETER(self->params, ZSTD_c_forceMaxWindow, forceMaxWindow);
232 TRY_SET_PARAMETER(self->params, ZSTD_c_enableLongDistanceMatching, enableLDM);
232 TRY_SET_PARAMETER(self->params, ZSTD_c_enableLongDistanceMatching, enableLDM);
233 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashLog, ldmHashLog);
233 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashLog, ldmHashLog);
234 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmMinMatch, ldmMinMatch);
234 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmMinMatch, ldmMinMatch);
235 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmBucketSizeLog, ldmBucketSizeLog);
235 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmBucketSizeLog, ldmBucketSizeLog);
236
236
237 if (ldmHashRateLog != -1 && ldmHashEveryLog != -1) {
237 if (ldmHashRateLog != -1 && ldmHashEveryLog != -1) {
238 PyErr_SetString(PyExc_ValueError, "cannot specify both ldm_hash_rate_log and ldm_hash_everyLog");
238 PyErr_SetString(PyExc_ValueError, "cannot specify both ldm_hash_rate_log and ldm_hash_everyLog");
239 return -1;
239 return -1;
240 }
240 }
241
241
242 if (ldmHashEveryLog != -1) {
242 if (ldmHashEveryLog != -1) {
243 ldmHashRateLog = ldmHashEveryLog;
243 ldmHashRateLog = ldmHashEveryLog;
244 }
244 }
245 else if (ldmHashRateLog == -1) {
245 else if (ldmHashRateLog == -1) {
246 ldmHashRateLog = 0;
246 ldmHashRateLog = 0;
247 }
247 }
248
248
249 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashRateLog, ldmHashRateLog);
249 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashRateLog, ldmHashRateLog);
250
250
251 return 0;
251 return 0;
252 }
252 }
253
253
254 PyDoc_STRVAR(ZstdCompressionParameters_from_level__doc__,
254 PyDoc_STRVAR(ZstdCompressionParameters_from_level__doc__,
255 "Create a CompressionParameters from a compression level and target sizes\n"
255 "Create a CompressionParameters from a compression level and target sizes\n"
256 );
256 );
257
257
258 ZstdCompressionParametersObject* CompressionParameters_from_level(PyObject* undef, PyObject* args, PyObject* kwargs) {
258 ZstdCompressionParametersObject* CompressionParameters_from_level(PyObject* undef, PyObject* args, PyObject* kwargs) {
259 int managedKwargs = 0;
259 int managedKwargs = 0;
260 int level;
260 int level;
261 PyObject* sourceSize = NULL;
261 PyObject* sourceSize = NULL;
262 PyObject* dictSize = NULL;
262 PyObject* dictSize = NULL;
263 unsigned PY_LONG_LONG iSourceSize = 0;
263 unsigned PY_LONG_LONG iSourceSize = 0;
264 Py_ssize_t iDictSize = 0;
264 Py_ssize_t iDictSize = 0;
265 PyObject* val;
265 PyObject* val;
266 ZSTD_compressionParameters params;
266 ZSTD_compressionParameters params;
267 ZstdCompressionParametersObject* result = NULL;
267 ZstdCompressionParametersObject* result = NULL;
268 int res;
268 int res;
269
269
270 if (!PyArg_ParseTuple(args, "i:from_level",
270 if (!PyArg_ParseTuple(args, "i:from_level",
271 &level)) {
271 &level)) {
272 return NULL;
272 return NULL;
273 }
273 }
274
274
275 if (!kwargs) {
275 if (!kwargs) {
276 kwargs = PyDict_New();
276 kwargs = PyDict_New();
277 if (!kwargs) {
277 if (!kwargs) {
278 return NULL;
278 return NULL;
279 }
279 }
280 managedKwargs = 1;
280 managedKwargs = 1;
281 }
281 }
282
282
283 sourceSize = PyDict_GetItemString(kwargs, "source_size");
283 sourceSize = PyDict_GetItemString(kwargs, "source_size");
284 if (sourceSize) {
284 if (sourceSize) {
285 #if PY_MAJOR_VERSION >= 3
285 #if PY_MAJOR_VERSION >= 3
286 iSourceSize = PyLong_AsUnsignedLongLong(sourceSize);
286 iSourceSize = PyLong_AsUnsignedLongLong(sourceSize);
287 if (iSourceSize == (unsigned PY_LONG_LONG)(-1)) {
287 if (iSourceSize == (unsigned PY_LONG_LONG)(-1)) {
288 goto cleanup;
288 goto cleanup;
289 }
289 }
290 #else
290 #else
291 iSourceSize = PyInt_AsUnsignedLongLongMask(sourceSize);
291 iSourceSize = PyInt_AsUnsignedLongLongMask(sourceSize);
292 #endif
292 #endif
293
293
294 PyDict_DelItemString(kwargs, "source_size");
294 PyDict_DelItemString(kwargs, "source_size");
295 }
295 }
296
296
297 dictSize = PyDict_GetItemString(kwargs, "dict_size");
297 dictSize = PyDict_GetItemString(kwargs, "dict_size");
298 if (dictSize) {
298 if (dictSize) {
299 #if PY_MAJOR_VERSION >= 3
299 #if PY_MAJOR_VERSION >= 3
300 iDictSize = PyLong_AsSsize_t(dictSize);
300 iDictSize = PyLong_AsSsize_t(dictSize);
301 #else
301 #else
302 iDictSize = PyInt_AsSsize_t(dictSize);
302 iDictSize = PyInt_AsSsize_t(dictSize);
303 #endif
303 #endif
304 if (iDictSize == -1) {
304 if (iDictSize == -1) {
305 goto cleanup;
305 goto cleanup;
306 }
306 }
307
307
308 PyDict_DelItemString(kwargs, "dict_size");
308 PyDict_DelItemString(kwargs, "dict_size");
309 }
309 }
310
310
311
311
312 params = ZSTD_getCParams(level, iSourceSize, iDictSize);
312 params = ZSTD_getCParams(level, iSourceSize, iDictSize);
313
313
314 /* Values derived from the input level and sizes are passed along to the
314 /* Values derived from the input level and sizes are passed along to the
315 constructor. But only if a value doesn't already exist. */
315 constructor. But only if a value doesn't already exist. */
316 val = PyDict_GetItemString(kwargs, "window_log");
316 val = PyDict_GetItemString(kwargs, "window_log");
317 if (!val) {
317 if (!val) {
318 val = PyLong_FromUnsignedLong(params.windowLog);
318 val = PyLong_FromUnsignedLong(params.windowLog);
319 if (!val) {
319 if (!val) {
320 goto cleanup;
320 goto cleanup;
321 }
321 }
322 PyDict_SetItemString(kwargs, "window_log", val);
322 PyDict_SetItemString(kwargs, "window_log", val);
323 Py_DECREF(val);
323 Py_DECREF(val);
324 }
324 }
325
325
326 val = PyDict_GetItemString(kwargs, "chain_log");
326 val = PyDict_GetItemString(kwargs, "chain_log");
327 if (!val) {
327 if (!val) {
328 val = PyLong_FromUnsignedLong(params.chainLog);
328 val = PyLong_FromUnsignedLong(params.chainLog);
329 if (!val) {
329 if (!val) {
330 goto cleanup;
330 goto cleanup;
331 }
331 }
332 PyDict_SetItemString(kwargs, "chain_log", val);
332 PyDict_SetItemString(kwargs, "chain_log", val);
333 Py_DECREF(val);
333 Py_DECREF(val);
334 }
334 }
335
335
336 val = PyDict_GetItemString(kwargs, "hash_log");
336 val = PyDict_GetItemString(kwargs, "hash_log");
337 if (!val) {
337 if (!val) {
338 val = PyLong_FromUnsignedLong(params.hashLog);
338 val = PyLong_FromUnsignedLong(params.hashLog);
339 if (!val) {
339 if (!val) {
340 goto cleanup;
340 goto cleanup;
341 }
341 }
342 PyDict_SetItemString(kwargs, "hash_log", val);
342 PyDict_SetItemString(kwargs, "hash_log", val);
343 Py_DECREF(val);
343 Py_DECREF(val);
344 }
344 }
345
345
346 val = PyDict_GetItemString(kwargs, "search_log");
346 val = PyDict_GetItemString(kwargs, "search_log");
347 if (!val) {
347 if (!val) {
348 val = PyLong_FromUnsignedLong(params.searchLog);
348 val = PyLong_FromUnsignedLong(params.searchLog);
349 if (!val) {
349 if (!val) {
350 goto cleanup;
350 goto cleanup;
351 }
351 }
352 PyDict_SetItemString(kwargs, "search_log", val);
352 PyDict_SetItemString(kwargs, "search_log", val);
353 Py_DECREF(val);
353 Py_DECREF(val);
354 }
354 }
355
355
356 val = PyDict_GetItemString(kwargs, "min_match");
356 val = PyDict_GetItemString(kwargs, "min_match");
357 if (!val) {
357 if (!val) {
358 val = PyLong_FromUnsignedLong(params.minMatch);
358 val = PyLong_FromUnsignedLong(params.minMatch);
359 if (!val) {
359 if (!val) {
360 goto cleanup;
360 goto cleanup;
361 }
361 }
362 PyDict_SetItemString(kwargs, "min_match", val);
362 PyDict_SetItemString(kwargs, "min_match", val);
363 Py_DECREF(val);
363 Py_DECREF(val);
364 }
364 }
365
365
366 val = PyDict_GetItemString(kwargs, "target_length");
366 val = PyDict_GetItemString(kwargs, "target_length");
367 if (!val) {
367 if (!val) {
368 val = PyLong_FromUnsignedLong(params.targetLength);
368 val = PyLong_FromUnsignedLong(params.targetLength);
369 if (!val) {
369 if (!val) {
370 goto cleanup;
370 goto cleanup;
371 }
371 }
372 PyDict_SetItemString(kwargs, "target_length", val);
372 PyDict_SetItemString(kwargs, "target_length", val);
373 Py_DECREF(val);
373 Py_DECREF(val);
374 }
374 }
375
375
376 val = PyDict_GetItemString(kwargs, "compression_strategy");
376 val = PyDict_GetItemString(kwargs, "compression_strategy");
377 if (!val) {
377 if (!val) {
378 val = PyLong_FromUnsignedLong(params.strategy);
378 val = PyLong_FromUnsignedLong(params.strategy);
379 if (!val) {
379 if (!val) {
380 goto cleanup;
380 goto cleanup;
381 }
381 }
382 PyDict_SetItemString(kwargs, "compression_strategy", val);
382 PyDict_SetItemString(kwargs, "compression_strategy", val);
383 Py_DECREF(val);
383 Py_DECREF(val);
384 }
384 }
385
385
386 result = PyObject_New(ZstdCompressionParametersObject, &ZstdCompressionParametersType);
386 result = PyObject_New(ZstdCompressionParametersObject, &ZstdCompressionParametersType);
387 if (!result) {
387 if (!result) {
388 goto cleanup;
388 goto cleanup;
389 }
389 }
390
390
391 result->params = NULL;
391 result->params = NULL;
392
392
393 val = PyTuple_New(0);
393 val = PyTuple_New(0);
394 if (!val) {
394 if (!val) {
395 Py_CLEAR(result);
395 Py_CLEAR(result);
396 goto cleanup;
396 goto cleanup;
397 }
397 }
398
398
399 res = ZstdCompressionParameters_init(result, val, kwargs);
399 res = ZstdCompressionParameters_init(result, val, kwargs);
400 Py_DECREF(val);
400 Py_DECREF(val);
401
401
402 if (res) {
402 if (res) {
403 Py_CLEAR(result);
403 Py_CLEAR(result);
404 goto cleanup;
404 goto cleanup;
405 }
405 }
406
406
407 cleanup:
407 cleanup:
408 if (managedKwargs) {
408 if (managedKwargs) {
409 Py_DECREF(kwargs);
409 Py_DECREF(kwargs);
410 }
410 }
411
411
412 return result;
412 return result;
413 }
413 }
414
414
415 PyDoc_STRVAR(ZstdCompressionParameters_estimated_compression_context_size__doc__,
415 PyDoc_STRVAR(ZstdCompressionParameters_estimated_compression_context_size__doc__,
416 "Estimate the size in bytes of a compression context for compression parameters\n"
416 "Estimate the size in bytes of a compression context for compression parameters\n"
417 );
417 );
418
418
419 PyObject* ZstdCompressionParameters_estimated_compression_context_size(ZstdCompressionParametersObject* self) {
419 PyObject* ZstdCompressionParameters_estimated_compression_context_size(ZstdCompressionParametersObject* self) {
420 return PyLong_FromSize_t(ZSTD_estimateCCtxSize_usingCCtxParams(self->params));
420 return PyLong_FromSize_t(ZSTD_estimateCCtxSize_usingCCtxParams(self->params));
421 }
421 }
422
422
423 PyDoc_STRVAR(ZstdCompressionParameters__doc__,
423 PyDoc_STRVAR(ZstdCompressionParameters__doc__,
424 "ZstdCompressionParameters: low-level control over zstd compression");
424 "ZstdCompressionParameters: low-level control over zstd compression");
425
425
426 static void ZstdCompressionParameters_dealloc(ZstdCompressionParametersObject* self) {
426 static void ZstdCompressionParameters_dealloc(ZstdCompressionParametersObject* self) {
427 if (self->params) {
427 if (self->params) {
428 ZSTD_freeCCtxParams(self->params);
428 ZSTD_freeCCtxParams(self->params);
429 self->params = NULL;
429 self->params = NULL;
430 }
430 }
431
431
432 PyObject_Del(self);
432 PyObject_Del(self);
433 }
433 }
434
434
435 #define PARAM_GETTER(name, param) PyObject* ZstdCompressionParameters_get_##name(PyObject* self, void* unused) { \
435 #define PARAM_GETTER(name, param) PyObject* ZstdCompressionParameters_get_##name(PyObject* self, void* unused) { \
436 int result; \
436 int result; \
437 size_t zresult; \
437 size_t zresult; \
438 ZstdCompressionParametersObject* p = (ZstdCompressionParametersObject*)(self); \
438 ZstdCompressionParametersObject* p = (ZstdCompressionParametersObject*)(self); \
439 zresult = ZSTD_CCtxParams_getParameter(p->params, param, &result); \
439 zresult = ZSTD_CCtxParams_getParameter(p->params, param, &result); \
440 if (ZSTD_isError(zresult)) { \
440 if (ZSTD_isError(zresult)) { \
441 PyErr_Format(ZstdError, "unable to get compression parameter: %s", \
441 PyErr_Format(ZstdError, "unable to get compression parameter: %s", \
442 ZSTD_getErrorName(zresult)); \
442 ZSTD_getErrorName(zresult)); \
443 return NULL; \
443 return NULL; \
444 } \
444 } \
445 return PyLong_FromLong(result); \
445 return PyLong_FromLong(result); \
446 }
446 }
447
447
448 PARAM_GETTER(format, ZSTD_c_format)
448 PARAM_GETTER(format, ZSTD_c_format)
449 PARAM_GETTER(compression_level, ZSTD_c_compressionLevel)
449 PARAM_GETTER(compression_level, ZSTD_c_compressionLevel)
450 PARAM_GETTER(window_log, ZSTD_c_windowLog)
450 PARAM_GETTER(window_log, ZSTD_c_windowLog)
451 PARAM_GETTER(hash_log, ZSTD_c_hashLog)
451 PARAM_GETTER(hash_log, ZSTD_c_hashLog)
452 PARAM_GETTER(chain_log, ZSTD_c_chainLog)
452 PARAM_GETTER(chain_log, ZSTD_c_chainLog)
453 PARAM_GETTER(search_log, ZSTD_c_searchLog)
453 PARAM_GETTER(search_log, ZSTD_c_searchLog)
454 PARAM_GETTER(min_match, ZSTD_c_minMatch)
454 PARAM_GETTER(min_match, ZSTD_c_minMatch)
455 PARAM_GETTER(target_length, ZSTD_c_targetLength)
455 PARAM_GETTER(target_length, ZSTD_c_targetLength)
456 PARAM_GETTER(compression_strategy, ZSTD_c_strategy)
456 PARAM_GETTER(compression_strategy, ZSTD_c_strategy)
457 PARAM_GETTER(write_content_size, ZSTD_c_contentSizeFlag)
457 PARAM_GETTER(write_content_size, ZSTD_c_contentSizeFlag)
458 PARAM_GETTER(write_checksum, ZSTD_c_checksumFlag)
458 PARAM_GETTER(write_checksum, ZSTD_c_checksumFlag)
459 PARAM_GETTER(write_dict_id, ZSTD_c_dictIDFlag)
459 PARAM_GETTER(write_dict_id, ZSTD_c_dictIDFlag)
460 PARAM_GETTER(job_size, ZSTD_c_jobSize)
460 PARAM_GETTER(job_size, ZSTD_c_jobSize)
461 PARAM_GETTER(overlap_log, ZSTD_c_overlapLog)
461 PARAM_GETTER(overlap_log, ZSTD_c_overlapLog)
462 PARAM_GETTER(force_max_window, ZSTD_c_forceMaxWindow)
462 PARAM_GETTER(force_max_window, ZSTD_c_forceMaxWindow)
463 PARAM_GETTER(enable_ldm, ZSTD_c_enableLongDistanceMatching)
463 PARAM_GETTER(enable_ldm, ZSTD_c_enableLongDistanceMatching)
464 PARAM_GETTER(ldm_hash_log, ZSTD_c_ldmHashLog)
464 PARAM_GETTER(ldm_hash_log, ZSTD_c_ldmHashLog)
465 PARAM_GETTER(ldm_min_match, ZSTD_c_ldmMinMatch)
465 PARAM_GETTER(ldm_min_match, ZSTD_c_ldmMinMatch)
466 PARAM_GETTER(ldm_bucket_size_log, ZSTD_c_ldmBucketSizeLog)
466 PARAM_GETTER(ldm_bucket_size_log, ZSTD_c_ldmBucketSizeLog)
467 PARAM_GETTER(ldm_hash_rate_log, ZSTD_c_ldmHashRateLog)
467 PARAM_GETTER(ldm_hash_rate_log, ZSTD_c_ldmHashRateLog)
468 PARAM_GETTER(threads, ZSTD_c_nbWorkers)
468 PARAM_GETTER(threads, ZSTD_c_nbWorkers)
469
469
470 static PyMethodDef ZstdCompressionParameters_methods[] = {
470 static PyMethodDef ZstdCompressionParameters_methods[] = {
471 {
471 {
472 "from_level",
472 "from_level",
473 (PyCFunction)CompressionParameters_from_level,
473 (PyCFunction)CompressionParameters_from_level,
474 METH_VARARGS | METH_KEYWORDS | METH_STATIC,
474 METH_VARARGS | METH_KEYWORDS | METH_STATIC,
475 ZstdCompressionParameters_from_level__doc__
475 ZstdCompressionParameters_from_level__doc__
476 },
476 },
477 {
477 {
478 "estimated_compression_context_size",
478 "estimated_compression_context_size",
479 (PyCFunction)ZstdCompressionParameters_estimated_compression_context_size,
479 (PyCFunction)ZstdCompressionParameters_estimated_compression_context_size,
480 METH_NOARGS,
480 METH_NOARGS,
481 ZstdCompressionParameters_estimated_compression_context_size__doc__
481 ZstdCompressionParameters_estimated_compression_context_size__doc__
482 },
482 },
483 { NULL, NULL }
483 { NULL, NULL }
484 };
484 };
485
485
486 #define GET_SET_ENTRY(name) { #name, ZstdCompressionParameters_get_##name, NULL, NULL, NULL }
486 #define GET_SET_ENTRY(name) { #name, ZstdCompressionParameters_get_##name, NULL, NULL, NULL }
487
487
488 static PyGetSetDef ZstdCompressionParameters_getset[] = {
488 static PyGetSetDef ZstdCompressionParameters_getset[] = {
489 GET_SET_ENTRY(format),
489 GET_SET_ENTRY(format),
490 GET_SET_ENTRY(compression_level),
490 GET_SET_ENTRY(compression_level),
491 GET_SET_ENTRY(window_log),
491 GET_SET_ENTRY(window_log),
492 GET_SET_ENTRY(hash_log),
492 GET_SET_ENTRY(hash_log),
493 GET_SET_ENTRY(chain_log),
493 GET_SET_ENTRY(chain_log),
494 GET_SET_ENTRY(search_log),
494 GET_SET_ENTRY(search_log),
495 GET_SET_ENTRY(min_match),
495 GET_SET_ENTRY(min_match),
496 GET_SET_ENTRY(target_length),
496 GET_SET_ENTRY(target_length),
497 GET_SET_ENTRY(compression_strategy),
497 GET_SET_ENTRY(compression_strategy),
498 GET_SET_ENTRY(write_content_size),
498 GET_SET_ENTRY(write_content_size),
499 GET_SET_ENTRY(write_checksum),
499 GET_SET_ENTRY(write_checksum),
500 GET_SET_ENTRY(write_dict_id),
500 GET_SET_ENTRY(write_dict_id),
501 GET_SET_ENTRY(threads),
501 GET_SET_ENTRY(threads),
502 GET_SET_ENTRY(job_size),
502 GET_SET_ENTRY(job_size),
503 GET_SET_ENTRY(overlap_log),
503 GET_SET_ENTRY(overlap_log),
504 /* TODO remove this deprecated attribute */
504 /* TODO remove this deprecated attribute */
505 { "overlap_size_log", ZstdCompressionParameters_get_overlap_log, NULL, NULL, NULL },
505 { "overlap_size_log", ZstdCompressionParameters_get_overlap_log, NULL, NULL, NULL },
506 GET_SET_ENTRY(force_max_window),
506 GET_SET_ENTRY(force_max_window),
507 GET_SET_ENTRY(enable_ldm),
507 GET_SET_ENTRY(enable_ldm),
508 GET_SET_ENTRY(ldm_hash_log),
508 GET_SET_ENTRY(ldm_hash_log),
509 GET_SET_ENTRY(ldm_min_match),
509 GET_SET_ENTRY(ldm_min_match),
510 GET_SET_ENTRY(ldm_bucket_size_log),
510 GET_SET_ENTRY(ldm_bucket_size_log),
511 GET_SET_ENTRY(ldm_hash_rate_log),
511 GET_SET_ENTRY(ldm_hash_rate_log),
512 /* TODO remove this deprecated attribute */
512 /* TODO remove this deprecated attribute */
513 { "ldm_hash_every_log", ZstdCompressionParameters_get_ldm_hash_rate_log, NULL, NULL, NULL },
513 { "ldm_hash_every_log", ZstdCompressionParameters_get_ldm_hash_rate_log, NULL, NULL, NULL },
514 { NULL }
514 { NULL }
515 };
515 };
516
516
517 PyTypeObject ZstdCompressionParametersType = {
517 PyTypeObject ZstdCompressionParametersType = {
518 PyVarObject_HEAD_INIT(NULL, 0)
518 PyVarObject_HEAD_INIT(NULL, 0)
519 "ZstdCompressionParameters", /* tp_name */
519 "ZstdCompressionParameters", /* tp_name */
520 sizeof(ZstdCompressionParametersObject), /* tp_basicsize */
520 sizeof(ZstdCompressionParametersObject), /* tp_basicsize */
521 0, /* tp_itemsize */
521 0, /* tp_itemsize */
522 (destructor)ZstdCompressionParameters_dealloc, /* tp_dealloc */
522 (destructor)ZstdCompressionParameters_dealloc, /* tp_dealloc */
523 0, /* tp_print */
523 0, /* tp_print */
524 0, /* tp_getattr */
524 0, /* tp_getattr */
525 0, /* tp_setattr */
525 0, /* tp_setattr */
526 0, /* tp_compare */
526 0, /* tp_compare */
527 0, /* tp_repr */
527 0, /* tp_repr */
528 0, /* tp_as_number */
528 0, /* tp_as_number */
529 0, /* tp_as_sequence */
529 0, /* tp_as_sequence */
530 0, /* tp_as_mapping */
530 0, /* tp_as_mapping */
531 0, /* tp_hash */
531 0, /* tp_hash */
532 0, /* tp_call */
532 0, /* tp_call */
533 0, /* tp_str */
533 0, /* tp_str */
534 0, /* tp_getattro */
534 0, /* tp_getattro */
535 0, /* tp_setattro */
535 0, /* tp_setattro */
536 0, /* tp_as_buffer */
536 0, /* tp_as_buffer */
537 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
537 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
538 ZstdCompressionParameters__doc__, /* tp_doc */
538 ZstdCompressionParameters__doc__, /* tp_doc */
539 0, /* tp_traverse */
539 0, /* tp_traverse */
540 0, /* tp_clear */
540 0, /* tp_clear */
541 0, /* tp_richcompare */
541 0, /* tp_richcompare */
542 0, /* tp_weaklistoffset */
542 0, /* tp_weaklistoffset */
543 0, /* tp_iter */
543 0, /* tp_iter */
544 0, /* tp_iternext */
544 0, /* tp_iternext */
545 ZstdCompressionParameters_methods, /* tp_methods */
545 ZstdCompressionParameters_methods, /* tp_methods */
546 0, /* tp_members */
546 0, /* tp_members */
547 ZstdCompressionParameters_getset, /* tp_getset */
547 ZstdCompressionParameters_getset, /* tp_getset */
548 0, /* tp_base */
548 0, /* tp_base */
549 0, /* tp_dict */
549 0, /* tp_dict */
550 0, /* tp_descr_get */
550 0, /* tp_descr_get */
551 0, /* tp_descr_set */
551 0, /* tp_descr_set */
552 0, /* tp_dictoffset */
552 0, /* tp_dictoffset */
553 (initproc)ZstdCompressionParameters_init, /* tp_init */
553 (initproc)ZstdCompressionParameters_init, /* tp_init */
554 0, /* tp_alloc */
554 0, /* tp_alloc */
555 PyType_GenericNew, /* tp_new */
555 PyType_GenericNew, /* tp_new */
556 };
556 };
557
557
558 void compressionparams_module_init(PyObject* mod) {
558 void compressionparams_module_init(PyObject* mod) {
559 Py_TYPE(&ZstdCompressionParametersType) = &PyType_Type;
559 Py_SET_TYPE(&ZstdCompressionParametersType, &PyType_Type);
560 if (PyType_Ready(&ZstdCompressionParametersType) < 0) {
560 if (PyType_Ready(&ZstdCompressionParametersType) < 0) {
561 return;
561 return;
562 }
562 }
563
563
564 Py_INCREF(&ZstdCompressionParametersType);
564 Py_INCREF(&ZstdCompressionParametersType);
565 PyModule_AddObject(mod, "ZstdCompressionParameters",
565 PyModule_AddObject(mod, "ZstdCompressionParameters",
566 (PyObject*)&ZstdCompressionParametersType);
566 (PyObject*)&ZstdCompressionParametersType);
567
567
568 /* TODO remove deprecated alias. */
568 /* TODO remove deprecated alias. */
569 Py_INCREF(&ZstdCompressionParametersType);
569 Py_INCREF(&ZstdCompressionParametersType);
570 PyModule_AddObject(mod, "CompressionParameters",
570 PyModule_AddObject(mod, "CompressionParameters",
571 (PyObject*)&ZstdCompressionParametersType);
571 (PyObject*)&ZstdCompressionParametersType);
572 }
572 }
@@ -1,818 +1,818 b''
1 /**
1 /**
2 * Copyright (c) 2017-present, Gregory Szorc
2 * Copyright (c) 2017-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 static void set_unsupported_operation(void) {
13 static void set_unsupported_operation(void) {
14 PyObject* iomod;
14 PyObject* iomod;
15 PyObject* exc;
15 PyObject* exc;
16
16
17 iomod = PyImport_ImportModule("io");
17 iomod = PyImport_ImportModule("io");
18 if (NULL == iomod) {
18 if (NULL == iomod) {
19 return;
19 return;
20 }
20 }
21
21
22 exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
22 exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
23 if (NULL == exc) {
23 if (NULL == exc) {
24 Py_DECREF(iomod);
24 Py_DECREF(iomod);
25 return;
25 return;
26 }
26 }
27
27
28 PyErr_SetNone(exc);
28 PyErr_SetNone(exc);
29 Py_DECREF(exc);
29 Py_DECREF(exc);
30 Py_DECREF(iomod);
30 Py_DECREF(iomod);
31 }
31 }
32
32
33 static void reader_dealloc(ZstdCompressionReader* self) {
33 static void reader_dealloc(ZstdCompressionReader* self) {
34 Py_XDECREF(self->compressor);
34 Py_XDECREF(self->compressor);
35 Py_XDECREF(self->reader);
35 Py_XDECREF(self->reader);
36
36
37 if (self->buffer.buf) {
37 if (self->buffer.buf) {
38 PyBuffer_Release(&self->buffer);
38 PyBuffer_Release(&self->buffer);
39 memset(&self->buffer, 0, sizeof(self->buffer));
39 memset(&self->buffer, 0, sizeof(self->buffer));
40 }
40 }
41
41
42 PyObject_Del(self);
42 PyObject_Del(self);
43 }
43 }
44
44
45 static ZstdCompressionReader* reader_enter(ZstdCompressionReader* self) {
45 static ZstdCompressionReader* reader_enter(ZstdCompressionReader* self) {
46 if (self->entered) {
46 if (self->entered) {
47 PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
47 PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
48 return NULL;
48 return NULL;
49 }
49 }
50
50
51 self->entered = 1;
51 self->entered = 1;
52
52
53 Py_INCREF(self);
53 Py_INCREF(self);
54 return self;
54 return self;
55 }
55 }
56
56
57 static PyObject* reader_exit(ZstdCompressionReader* self, PyObject* args) {
57 static PyObject* reader_exit(ZstdCompressionReader* self, PyObject* args) {
58 PyObject* exc_type;
58 PyObject* exc_type;
59 PyObject* exc_value;
59 PyObject* exc_value;
60 PyObject* exc_tb;
60 PyObject* exc_tb;
61
61
62 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
62 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
63 return NULL;
63 return NULL;
64 }
64 }
65
65
66 self->entered = 0;
66 self->entered = 0;
67 self->closed = 1;
67 self->closed = 1;
68
68
69 /* Release resources associated with source. */
69 /* Release resources associated with source. */
70 Py_CLEAR(self->reader);
70 Py_CLEAR(self->reader);
71 if (self->buffer.buf) {
71 if (self->buffer.buf) {
72 PyBuffer_Release(&self->buffer);
72 PyBuffer_Release(&self->buffer);
73 memset(&self->buffer, 0, sizeof(self->buffer));
73 memset(&self->buffer, 0, sizeof(self->buffer));
74 }
74 }
75
75
76 Py_CLEAR(self->compressor);
76 Py_CLEAR(self->compressor);
77
77
78 Py_RETURN_FALSE;
78 Py_RETURN_FALSE;
79 }
79 }
80
80
81 static PyObject* reader_readable(ZstdCompressionReader* self) {
81 static PyObject* reader_readable(ZstdCompressionReader* self) {
82 Py_RETURN_TRUE;
82 Py_RETURN_TRUE;
83 }
83 }
84
84
85 static PyObject* reader_writable(ZstdCompressionReader* self) {
85 static PyObject* reader_writable(ZstdCompressionReader* self) {
86 Py_RETURN_FALSE;
86 Py_RETURN_FALSE;
87 }
87 }
88
88
89 static PyObject* reader_seekable(ZstdCompressionReader* self) {
89 static PyObject* reader_seekable(ZstdCompressionReader* self) {
90 Py_RETURN_FALSE;
90 Py_RETURN_FALSE;
91 }
91 }
92
92
93 static PyObject* reader_readline(PyObject* self, PyObject* args) {
93 static PyObject* reader_readline(PyObject* self, PyObject* args) {
94 set_unsupported_operation();
94 set_unsupported_operation();
95 return NULL;
95 return NULL;
96 }
96 }
97
97
98 static PyObject* reader_readlines(PyObject* self, PyObject* args) {
98 static PyObject* reader_readlines(PyObject* self, PyObject* args) {
99 set_unsupported_operation();
99 set_unsupported_operation();
100 return NULL;
100 return NULL;
101 }
101 }
102
102
103 static PyObject* reader_write(PyObject* self, PyObject* args) {
103 static PyObject* reader_write(PyObject* self, PyObject* args) {
104 PyErr_SetString(PyExc_OSError, "stream is not writable");
104 PyErr_SetString(PyExc_OSError, "stream is not writable");
105 return NULL;
105 return NULL;
106 }
106 }
107
107
108 static PyObject* reader_writelines(PyObject* self, PyObject* args) {
108 static PyObject* reader_writelines(PyObject* self, PyObject* args) {
109 PyErr_SetString(PyExc_OSError, "stream is not writable");
109 PyErr_SetString(PyExc_OSError, "stream is not writable");
110 return NULL;
110 return NULL;
111 }
111 }
112
112
113 static PyObject* reader_isatty(PyObject* self) {
113 static PyObject* reader_isatty(PyObject* self) {
114 Py_RETURN_FALSE;
114 Py_RETURN_FALSE;
115 }
115 }
116
116
117 static PyObject* reader_flush(PyObject* self) {
117 static PyObject* reader_flush(PyObject* self) {
118 Py_RETURN_NONE;
118 Py_RETURN_NONE;
119 }
119 }
120
120
121 static PyObject* reader_close(ZstdCompressionReader* self) {
121 static PyObject* reader_close(ZstdCompressionReader* self) {
122 self->closed = 1;
122 self->closed = 1;
123 Py_RETURN_NONE;
123 Py_RETURN_NONE;
124 }
124 }
125
125
126 static PyObject* reader_tell(ZstdCompressionReader* self) {
126 static PyObject* reader_tell(ZstdCompressionReader* self) {
127 /* TODO should this raise OSError since stream isn't seekable? */
127 /* TODO should this raise OSError since stream isn't seekable? */
128 return PyLong_FromUnsignedLongLong(self->bytesCompressed);
128 return PyLong_FromUnsignedLongLong(self->bytesCompressed);
129 }
129 }
130
130
131 int read_compressor_input(ZstdCompressionReader* self) {
131 int read_compressor_input(ZstdCompressionReader* self) {
132 if (self->finishedInput) {
132 if (self->finishedInput) {
133 return 0;
133 return 0;
134 }
134 }
135
135
136 if (self->input.pos != self->input.size) {
136 if (self->input.pos != self->input.size) {
137 return 0;
137 return 0;
138 }
138 }
139
139
140 if (self->reader) {
140 if (self->reader) {
141 Py_buffer buffer;
141 Py_buffer buffer;
142
142
143 assert(self->readResult == NULL);
143 assert(self->readResult == NULL);
144
144
145 self->readResult = PyObject_CallMethod(self->reader, "read",
145 self->readResult = PyObject_CallMethod(self->reader, "read",
146 "k", self->readSize);
146 "k", self->readSize);
147
147
148 if (NULL == self->readResult) {
148 if (NULL == self->readResult) {
149 return -1;
149 return -1;
150 }
150 }
151
151
152 memset(&buffer, 0, sizeof(buffer));
152 memset(&buffer, 0, sizeof(buffer));
153
153
154 if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
154 if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
155 return -1;
155 return -1;
156 }
156 }
157
157
158 /* EOF */
158 /* EOF */
159 if (0 == buffer.len) {
159 if (0 == buffer.len) {
160 self->finishedInput = 1;
160 self->finishedInput = 1;
161 Py_CLEAR(self->readResult);
161 Py_CLEAR(self->readResult);
162 }
162 }
163 else {
163 else {
164 self->input.src = buffer.buf;
164 self->input.src = buffer.buf;
165 self->input.size = buffer.len;
165 self->input.size = buffer.len;
166 self->input.pos = 0;
166 self->input.pos = 0;
167 }
167 }
168
168
169 PyBuffer_Release(&buffer);
169 PyBuffer_Release(&buffer);
170 }
170 }
171 else {
171 else {
172 assert(self->buffer.buf);
172 assert(self->buffer.buf);
173
173
174 self->input.src = self->buffer.buf;
174 self->input.src = self->buffer.buf;
175 self->input.size = self->buffer.len;
175 self->input.size = self->buffer.len;
176 self->input.pos = 0;
176 self->input.pos = 0;
177 }
177 }
178
178
179 return 1;
179 return 1;
180 }
180 }
181
181
182 int compress_input(ZstdCompressionReader* self, ZSTD_outBuffer* output) {
182 int compress_input(ZstdCompressionReader* self, ZSTD_outBuffer* output) {
183 size_t oldPos;
183 size_t oldPos;
184 size_t zresult;
184 size_t zresult;
185
185
186 /* If we have data left over, consume it. */
186 /* If we have data left over, consume it. */
187 if (self->input.pos < self->input.size) {
187 if (self->input.pos < self->input.size) {
188 oldPos = output->pos;
188 oldPos = output->pos;
189
189
190 Py_BEGIN_ALLOW_THREADS
190 Py_BEGIN_ALLOW_THREADS
191 zresult = ZSTD_compressStream2(self->compressor->cctx,
191 zresult = ZSTD_compressStream2(self->compressor->cctx,
192 output, &self->input, ZSTD_e_continue);
192 output, &self->input, ZSTD_e_continue);
193 Py_END_ALLOW_THREADS
193 Py_END_ALLOW_THREADS
194
194
195 self->bytesCompressed += output->pos - oldPos;
195 self->bytesCompressed += output->pos - oldPos;
196
196
197 /* Input exhausted. Clear out state tracking. */
197 /* Input exhausted. Clear out state tracking. */
198 if (self->input.pos == self->input.size) {
198 if (self->input.pos == self->input.size) {
199 memset(&self->input, 0, sizeof(self->input));
199 memset(&self->input, 0, sizeof(self->input));
200 Py_CLEAR(self->readResult);
200 Py_CLEAR(self->readResult);
201
201
202 if (self->buffer.buf) {
202 if (self->buffer.buf) {
203 self->finishedInput = 1;
203 self->finishedInput = 1;
204 }
204 }
205 }
205 }
206
206
207 if (ZSTD_isError(zresult)) {
207 if (ZSTD_isError(zresult)) {
208 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
208 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
209 return -1;
209 return -1;
210 }
210 }
211 }
211 }
212
212
213 if (output->pos && output->pos == output->size) {
213 if (output->pos && output->pos == output->size) {
214 return 1;
214 return 1;
215 }
215 }
216 else {
216 else {
217 return 0;
217 return 0;
218 }
218 }
219 }
219 }
220
220
221 static PyObject* reader_read(ZstdCompressionReader* self, PyObject* args, PyObject* kwargs) {
221 static PyObject* reader_read(ZstdCompressionReader* self, PyObject* args, PyObject* kwargs) {
222 static char* kwlist[] = {
222 static char* kwlist[] = {
223 "size",
223 "size",
224 NULL
224 NULL
225 };
225 };
226
226
227 Py_ssize_t size = -1;
227 Py_ssize_t size = -1;
228 PyObject* result = NULL;
228 PyObject* result = NULL;
229 char* resultBuffer;
229 char* resultBuffer;
230 Py_ssize_t resultSize;
230 Py_ssize_t resultSize;
231 size_t zresult;
231 size_t zresult;
232 size_t oldPos;
232 size_t oldPos;
233 int readResult, compressResult;
233 int readResult, compressResult;
234
234
235 if (self->closed) {
235 if (self->closed) {
236 PyErr_SetString(PyExc_ValueError, "stream is closed");
236 PyErr_SetString(PyExc_ValueError, "stream is closed");
237 return NULL;
237 return NULL;
238 }
238 }
239
239
240 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
240 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
241 return NULL;
241 return NULL;
242 }
242 }
243
243
244 if (size < -1) {
244 if (size < -1) {
245 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
245 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
246 return NULL;
246 return NULL;
247 }
247 }
248
248
249 if (size == -1) {
249 if (size == -1) {
250 return PyObject_CallMethod((PyObject*)self, "readall", NULL);
250 return PyObject_CallMethod((PyObject*)self, "readall", NULL);
251 }
251 }
252
252
253 if (self->finishedOutput || size == 0) {
253 if (self->finishedOutput || size == 0) {
254 return PyBytes_FromStringAndSize("", 0);
254 return PyBytes_FromStringAndSize("", 0);
255 }
255 }
256
256
257 result = PyBytes_FromStringAndSize(NULL, size);
257 result = PyBytes_FromStringAndSize(NULL, size);
258 if (NULL == result) {
258 if (NULL == result) {
259 return NULL;
259 return NULL;
260 }
260 }
261
261
262 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
262 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
263
263
264 self->output.dst = resultBuffer;
264 self->output.dst = resultBuffer;
265 self->output.size = resultSize;
265 self->output.size = resultSize;
266 self->output.pos = 0;
266 self->output.pos = 0;
267
267
268 readinput:
268 readinput:
269
269
270 compressResult = compress_input(self, &self->output);
270 compressResult = compress_input(self, &self->output);
271
271
272 if (-1 == compressResult) {
272 if (-1 == compressResult) {
273 Py_XDECREF(result);
273 Py_XDECREF(result);
274 return NULL;
274 return NULL;
275 }
275 }
276 else if (0 == compressResult) {
276 else if (0 == compressResult) {
277 /* There is room in the output. We fall through to below, which will
277 /* There is room in the output. We fall through to below, which will
278 * either get more input for us or will attempt to end the stream.
278 * either get more input for us or will attempt to end the stream.
279 */
279 */
280 }
280 }
281 else if (1 == compressResult) {
281 else if (1 == compressResult) {
282 memset(&self->output, 0, sizeof(self->output));
282 memset(&self->output, 0, sizeof(self->output));
283 return result;
283 return result;
284 }
284 }
285 else {
285 else {
286 assert(0);
286 assert(0);
287 }
287 }
288
288
289 readResult = read_compressor_input(self);
289 readResult = read_compressor_input(self);
290
290
291 if (-1 == readResult) {
291 if (-1 == readResult) {
292 return NULL;
292 return NULL;
293 }
293 }
294 else if (0 == readResult) { }
294 else if (0 == readResult) { }
295 else if (1 == readResult) { }
295 else if (1 == readResult) { }
296 else {
296 else {
297 assert(0);
297 assert(0);
298 }
298 }
299
299
300 if (self->input.size) {
300 if (self->input.size) {
301 goto readinput;
301 goto readinput;
302 }
302 }
303
303
304 /* Else EOF */
304 /* Else EOF */
305 oldPos = self->output.pos;
305 oldPos = self->output.pos;
306
306
307 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
307 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
308 &self->input, ZSTD_e_end);
308 &self->input, ZSTD_e_end);
309
309
310 self->bytesCompressed += self->output.pos - oldPos;
310 self->bytesCompressed += self->output.pos - oldPos;
311
311
312 if (ZSTD_isError(zresult)) {
312 if (ZSTD_isError(zresult)) {
313 PyErr_Format(ZstdError, "error ending compression stream: %s",
313 PyErr_Format(ZstdError, "error ending compression stream: %s",
314 ZSTD_getErrorName(zresult));
314 ZSTD_getErrorName(zresult));
315 Py_XDECREF(result);
315 Py_XDECREF(result);
316 return NULL;
316 return NULL;
317 }
317 }
318
318
319 assert(self->output.pos);
319 assert(self->output.pos);
320
320
321 if (0 == zresult) {
321 if (0 == zresult) {
322 self->finishedOutput = 1;
322 self->finishedOutput = 1;
323 }
323 }
324
324
325 if (safe_pybytes_resize(&result, self->output.pos)) {
325 if (safe_pybytes_resize(&result, self->output.pos)) {
326 Py_XDECREF(result);
326 Py_XDECREF(result);
327 return NULL;
327 return NULL;
328 }
328 }
329
329
330 memset(&self->output, 0, sizeof(self->output));
330 memset(&self->output, 0, sizeof(self->output));
331
331
332 return result;
332 return result;
333 }
333 }
334
334
335 static PyObject* reader_read1(ZstdCompressionReader* self, PyObject* args, PyObject* kwargs) {
335 static PyObject* reader_read1(ZstdCompressionReader* self, PyObject* args, PyObject* kwargs) {
336 static char* kwlist[] = {
336 static char* kwlist[] = {
337 "size",
337 "size",
338 NULL
338 NULL
339 };
339 };
340
340
341 Py_ssize_t size = -1;
341 Py_ssize_t size = -1;
342 PyObject* result = NULL;
342 PyObject* result = NULL;
343 char* resultBuffer;
343 char* resultBuffer;
344 Py_ssize_t resultSize;
344 Py_ssize_t resultSize;
345 ZSTD_outBuffer output;
345 ZSTD_outBuffer output;
346 int compressResult;
346 int compressResult;
347 size_t oldPos;
347 size_t oldPos;
348 size_t zresult;
348 size_t zresult;
349
349
350 if (self->closed) {
350 if (self->closed) {
351 PyErr_SetString(PyExc_ValueError, "stream is closed");
351 PyErr_SetString(PyExc_ValueError, "stream is closed");
352 return NULL;
352 return NULL;
353 }
353 }
354
354
355 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n:read1", kwlist, &size)) {
355 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n:read1", kwlist, &size)) {
356 return NULL;
356 return NULL;
357 }
357 }
358
358
359 if (size < -1) {
359 if (size < -1) {
360 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
360 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
361 return NULL;
361 return NULL;
362 }
362 }
363
363
364 if (self->finishedOutput || size == 0) {
364 if (self->finishedOutput || size == 0) {
365 return PyBytes_FromStringAndSize("", 0);
365 return PyBytes_FromStringAndSize("", 0);
366 }
366 }
367
367
368 if (size == -1) {
368 if (size == -1) {
369 size = ZSTD_CStreamOutSize();
369 size = ZSTD_CStreamOutSize();
370 }
370 }
371
371
372 result = PyBytes_FromStringAndSize(NULL, size);
372 result = PyBytes_FromStringAndSize(NULL, size);
373 if (NULL == result) {
373 if (NULL == result) {
374 return NULL;
374 return NULL;
375 }
375 }
376
376
377 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
377 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
378
378
379 output.dst = resultBuffer;
379 output.dst = resultBuffer;
380 output.size = resultSize;
380 output.size = resultSize;
381 output.pos = 0;
381 output.pos = 0;
382
382
383 /* read1() is supposed to use at most 1 read() from the underlying stream.
383 /* read1() is supposed to use at most 1 read() from the underlying stream.
384 However, we can't satisfy this requirement with compression because
384 However, we can't satisfy this requirement with compression because
385 not every input will generate output. We /could/ flush the compressor,
385 not every input will generate output. We /could/ flush the compressor,
386 but this may not be desirable. We allow multiple read() from the
386 but this may not be desirable. We allow multiple read() from the
387 underlying stream. But unlike read(), we return as soon as output data
387 underlying stream. But unlike read(), we return as soon as output data
388 is available.
388 is available.
389 */
389 */
390
390
391 compressResult = compress_input(self, &output);
391 compressResult = compress_input(self, &output);
392
392
393 if (-1 == compressResult) {
393 if (-1 == compressResult) {
394 Py_XDECREF(result);
394 Py_XDECREF(result);
395 return NULL;
395 return NULL;
396 }
396 }
397 else if (0 == compressResult || 1 == compressResult) { }
397 else if (0 == compressResult || 1 == compressResult) { }
398 else {
398 else {
399 assert(0);
399 assert(0);
400 }
400 }
401
401
402 if (output.pos) {
402 if (output.pos) {
403 goto finally;
403 goto finally;
404 }
404 }
405
405
406 while (!self->finishedInput) {
406 while (!self->finishedInput) {
407 int readResult = read_compressor_input(self);
407 int readResult = read_compressor_input(self);
408
408
409 if (-1 == readResult) {
409 if (-1 == readResult) {
410 Py_XDECREF(result);
410 Py_XDECREF(result);
411 return NULL;
411 return NULL;
412 }
412 }
413 else if (0 == readResult || 1 == readResult) { }
413 else if (0 == readResult || 1 == readResult) { }
414 else {
414 else {
415 assert(0);
415 assert(0);
416 }
416 }
417
417
418 compressResult = compress_input(self, &output);
418 compressResult = compress_input(self, &output);
419
419
420 if (-1 == compressResult) {
420 if (-1 == compressResult) {
421 Py_XDECREF(result);
421 Py_XDECREF(result);
422 return NULL;
422 return NULL;
423 }
423 }
424 else if (0 == compressResult || 1 == compressResult) { }
424 else if (0 == compressResult || 1 == compressResult) { }
425 else {
425 else {
426 assert(0);
426 assert(0);
427 }
427 }
428
428
429 if (output.pos) {
429 if (output.pos) {
430 goto finally;
430 goto finally;
431 }
431 }
432 }
432 }
433
433
434 /* EOF */
434 /* EOF */
435 oldPos = output.pos;
435 oldPos = output.pos;
436
436
437 zresult = ZSTD_compressStream2(self->compressor->cctx, &output, &self->input,
437 zresult = ZSTD_compressStream2(self->compressor->cctx, &output, &self->input,
438 ZSTD_e_end);
438 ZSTD_e_end);
439
439
440 self->bytesCompressed += output.pos - oldPos;
440 self->bytesCompressed += output.pos - oldPos;
441
441
442 if (ZSTD_isError(zresult)) {
442 if (ZSTD_isError(zresult)) {
443 PyErr_Format(ZstdError, "error ending compression stream: %s",
443 PyErr_Format(ZstdError, "error ending compression stream: %s",
444 ZSTD_getErrorName(zresult));
444 ZSTD_getErrorName(zresult));
445 Py_XDECREF(result);
445 Py_XDECREF(result);
446 return NULL;
446 return NULL;
447 }
447 }
448
448
449 if (zresult == 0) {
449 if (zresult == 0) {
450 self->finishedOutput = 1;
450 self->finishedOutput = 1;
451 }
451 }
452
452
453 finally:
453 finally:
454 if (result) {
454 if (result) {
455 if (safe_pybytes_resize(&result, output.pos)) {
455 if (safe_pybytes_resize(&result, output.pos)) {
456 Py_XDECREF(result);
456 Py_XDECREF(result);
457 return NULL;
457 return NULL;
458 }
458 }
459 }
459 }
460
460
461 return result;
461 return result;
462 }
462 }
463
463
464 static PyObject* reader_readall(PyObject* self) {
464 static PyObject* reader_readall(PyObject* self) {
465 PyObject* chunks = NULL;
465 PyObject* chunks = NULL;
466 PyObject* empty = NULL;
466 PyObject* empty = NULL;
467 PyObject* result = NULL;
467 PyObject* result = NULL;
468
468
469 /* Our strategy is to collect chunks into a list then join all the
469 /* Our strategy is to collect chunks into a list then join all the
470 * chunks at the end. We could potentially use e.g. an io.BytesIO. But
470 * chunks at the end. We could potentially use e.g. an io.BytesIO. But
471 * this feels simple enough to implement and avoids potentially expensive
471 * this feels simple enough to implement and avoids potentially expensive
472 * reallocations of large buffers.
472 * reallocations of large buffers.
473 */
473 */
474 chunks = PyList_New(0);
474 chunks = PyList_New(0);
475 if (NULL == chunks) {
475 if (NULL == chunks) {
476 return NULL;
476 return NULL;
477 }
477 }
478
478
479 while (1) {
479 while (1) {
480 PyObject* chunk = PyObject_CallMethod(self, "read", "i", 1048576);
480 PyObject* chunk = PyObject_CallMethod(self, "read", "i", 1048576);
481 if (NULL == chunk) {
481 if (NULL == chunk) {
482 Py_DECREF(chunks);
482 Py_DECREF(chunks);
483 return NULL;
483 return NULL;
484 }
484 }
485
485
486 if (!PyBytes_Size(chunk)) {
486 if (!PyBytes_Size(chunk)) {
487 Py_DECREF(chunk);
487 Py_DECREF(chunk);
488 break;
488 break;
489 }
489 }
490
490
491 if (PyList_Append(chunks, chunk)) {
491 if (PyList_Append(chunks, chunk)) {
492 Py_DECREF(chunk);
492 Py_DECREF(chunk);
493 Py_DECREF(chunks);
493 Py_DECREF(chunks);
494 return NULL;
494 return NULL;
495 }
495 }
496
496
497 Py_DECREF(chunk);
497 Py_DECREF(chunk);
498 }
498 }
499
499
500 empty = PyBytes_FromStringAndSize("", 0);
500 empty = PyBytes_FromStringAndSize("", 0);
501 if (NULL == empty) {
501 if (NULL == empty) {
502 Py_DECREF(chunks);
502 Py_DECREF(chunks);
503 return NULL;
503 return NULL;
504 }
504 }
505
505
506 result = PyObject_CallMethod(empty, "join", "O", chunks);
506 result = PyObject_CallMethod(empty, "join", "O", chunks);
507
507
508 Py_DECREF(empty);
508 Py_DECREF(empty);
509 Py_DECREF(chunks);
509 Py_DECREF(chunks);
510
510
511 return result;
511 return result;
512 }
512 }
513
513
514 static PyObject* reader_readinto(ZstdCompressionReader* self, PyObject* args) {
514 static PyObject* reader_readinto(ZstdCompressionReader* self, PyObject* args) {
515 Py_buffer dest;
515 Py_buffer dest;
516 ZSTD_outBuffer output;
516 ZSTD_outBuffer output;
517 int readResult, compressResult;
517 int readResult, compressResult;
518 PyObject* result = NULL;
518 PyObject* result = NULL;
519 size_t zresult;
519 size_t zresult;
520 size_t oldPos;
520 size_t oldPos;
521
521
522 if (self->closed) {
522 if (self->closed) {
523 PyErr_SetString(PyExc_ValueError, "stream is closed");
523 PyErr_SetString(PyExc_ValueError, "stream is closed");
524 return NULL;
524 return NULL;
525 }
525 }
526
526
527 if (self->finishedOutput) {
527 if (self->finishedOutput) {
528 return PyLong_FromLong(0);
528 return PyLong_FromLong(0);
529 }
529 }
530
530
531 if (!PyArg_ParseTuple(args, "w*:readinto", &dest)) {
531 if (!PyArg_ParseTuple(args, "w*:readinto", &dest)) {
532 return NULL;
532 return NULL;
533 }
533 }
534
534
535 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
535 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
536 PyErr_SetString(PyExc_ValueError,
536 PyErr_SetString(PyExc_ValueError,
537 "destination buffer should be contiguous and have at most one dimension");
537 "destination buffer should be contiguous and have at most one dimension");
538 goto finally;
538 goto finally;
539 }
539 }
540
540
541 output.dst = dest.buf;
541 output.dst = dest.buf;
542 output.size = dest.len;
542 output.size = dest.len;
543 output.pos = 0;
543 output.pos = 0;
544
544
545 compressResult = compress_input(self, &output);
545 compressResult = compress_input(self, &output);
546
546
547 if (-1 == compressResult) {
547 if (-1 == compressResult) {
548 goto finally;
548 goto finally;
549 }
549 }
550 else if (0 == compressResult) { }
550 else if (0 == compressResult) { }
551 else if (1 == compressResult) {
551 else if (1 == compressResult) {
552 result = PyLong_FromSize_t(output.pos);
552 result = PyLong_FromSize_t(output.pos);
553 goto finally;
553 goto finally;
554 }
554 }
555 else {
555 else {
556 assert(0);
556 assert(0);
557 }
557 }
558
558
559 while (!self->finishedInput) {
559 while (!self->finishedInput) {
560 readResult = read_compressor_input(self);
560 readResult = read_compressor_input(self);
561
561
562 if (-1 == readResult) {
562 if (-1 == readResult) {
563 goto finally;
563 goto finally;
564 }
564 }
565 else if (0 == readResult || 1 == readResult) {}
565 else if (0 == readResult || 1 == readResult) {}
566 else {
566 else {
567 assert(0);
567 assert(0);
568 }
568 }
569
569
570 compressResult = compress_input(self, &output);
570 compressResult = compress_input(self, &output);
571
571
572 if (-1 == compressResult) {
572 if (-1 == compressResult) {
573 goto finally;
573 goto finally;
574 }
574 }
575 else if (0 == compressResult) { }
575 else if (0 == compressResult) { }
576 else if (1 == compressResult) {
576 else if (1 == compressResult) {
577 result = PyLong_FromSize_t(output.pos);
577 result = PyLong_FromSize_t(output.pos);
578 goto finally;
578 goto finally;
579 }
579 }
580 else {
580 else {
581 assert(0);
581 assert(0);
582 }
582 }
583 }
583 }
584
584
585 /* EOF */
585 /* EOF */
586 oldPos = output.pos;
586 oldPos = output.pos;
587
587
588 zresult = ZSTD_compressStream2(self->compressor->cctx, &output, &self->input,
588 zresult = ZSTD_compressStream2(self->compressor->cctx, &output, &self->input,
589 ZSTD_e_end);
589 ZSTD_e_end);
590
590
591 self->bytesCompressed += self->output.pos - oldPos;
591 self->bytesCompressed += self->output.pos - oldPos;
592
592
593 if (ZSTD_isError(zresult)) {
593 if (ZSTD_isError(zresult)) {
594 PyErr_Format(ZstdError, "error ending compression stream: %s",
594 PyErr_Format(ZstdError, "error ending compression stream: %s",
595 ZSTD_getErrorName(zresult));
595 ZSTD_getErrorName(zresult));
596 goto finally;
596 goto finally;
597 }
597 }
598
598
599 assert(output.pos);
599 assert(output.pos);
600
600
601 if (0 == zresult) {
601 if (0 == zresult) {
602 self->finishedOutput = 1;
602 self->finishedOutput = 1;
603 }
603 }
604
604
605 result = PyLong_FromSize_t(output.pos);
605 result = PyLong_FromSize_t(output.pos);
606
606
607 finally:
607 finally:
608 PyBuffer_Release(&dest);
608 PyBuffer_Release(&dest);
609
609
610 return result;
610 return result;
611 }
611 }
612
612
613 static PyObject* reader_readinto1(ZstdCompressionReader* self, PyObject* args) {
613 static PyObject* reader_readinto1(ZstdCompressionReader* self, PyObject* args) {
614 Py_buffer dest;
614 Py_buffer dest;
615 PyObject* result = NULL;
615 PyObject* result = NULL;
616 ZSTD_outBuffer output;
616 ZSTD_outBuffer output;
617 int compressResult;
617 int compressResult;
618 size_t oldPos;
618 size_t oldPos;
619 size_t zresult;
619 size_t zresult;
620
620
621 if (self->closed) {
621 if (self->closed) {
622 PyErr_SetString(PyExc_ValueError, "stream is closed");
622 PyErr_SetString(PyExc_ValueError, "stream is closed");
623 return NULL;
623 return NULL;
624 }
624 }
625
625
626 if (self->finishedOutput) {
626 if (self->finishedOutput) {
627 return PyLong_FromLong(0);
627 return PyLong_FromLong(0);
628 }
628 }
629
629
630 if (!PyArg_ParseTuple(args, "w*:readinto1", &dest)) {
630 if (!PyArg_ParseTuple(args, "w*:readinto1", &dest)) {
631 return NULL;
631 return NULL;
632 }
632 }
633
633
634 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
634 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
635 PyErr_SetString(PyExc_ValueError,
635 PyErr_SetString(PyExc_ValueError,
636 "destination buffer should be contiguous and have at most one dimension");
636 "destination buffer should be contiguous and have at most one dimension");
637 goto finally;
637 goto finally;
638 }
638 }
639
639
640 output.dst = dest.buf;
640 output.dst = dest.buf;
641 output.size = dest.len;
641 output.size = dest.len;
642 output.pos = 0;
642 output.pos = 0;
643
643
644 compressResult = compress_input(self, &output);
644 compressResult = compress_input(self, &output);
645
645
646 if (-1 == compressResult) {
646 if (-1 == compressResult) {
647 goto finally;
647 goto finally;
648 }
648 }
649 else if (0 == compressResult || 1 == compressResult) { }
649 else if (0 == compressResult || 1 == compressResult) { }
650 else {
650 else {
651 assert(0);
651 assert(0);
652 }
652 }
653
653
654 if (output.pos) {
654 if (output.pos) {
655 result = PyLong_FromSize_t(output.pos);
655 result = PyLong_FromSize_t(output.pos);
656 goto finally;
656 goto finally;
657 }
657 }
658
658
659 while (!self->finishedInput) {
659 while (!self->finishedInput) {
660 int readResult = read_compressor_input(self);
660 int readResult = read_compressor_input(self);
661
661
662 if (-1 == readResult) {
662 if (-1 == readResult) {
663 goto finally;
663 goto finally;
664 }
664 }
665 else if (0 == readResult || 1 == readResult) { }
665 else if (0 == readResult || 1 == readResult) { }
666 else {
666 else {
667 assert(0);
667 assert(0);
668 }
668 }
669
669
670 compressResult = compress_input(self, &output);
670 compressResult = compress_input(self, &output);
671
671
672 if (-1 == compressResult) {
672 if (-1 == compressResult) {
673 goto finally;
673 goto finally;
674 }
674 }
675 else if (0 == compressResult) { }
675 else if (0 == compressResult) { }
676 else if (1 == compressResult) {
676 else if (1 == compressResult) {
677 result = PyLong_FromSize_t(output.pos);
677 result = PyLong_FromSize_t(output.pos);
678 goto finally;
678 goto finally;
679 }
679 }
680 else {
680 else {
681 assert(0);
681 assert(0);
682 }
682 }
683
683
684 /* If we produced output and we're not done with input, emit
684 /* If we produced output and we're not done with input, emit
685 * that output now, as we've hit restrictions of read1().
685 * that output now, as we've hit restrictions of read1().
686 */
686 */
687 if (output.pos && !self->finishedInput) {
687 if (output.pos && !self->finishedInput) {
688 result = PyLong_FromSize_t(output.pos);
688 result = PyLong_FromSize_t(output.pos);
689 goto finally;
689 goto finally;
690 }
690 }
691
691
692 /* Otherwise we either have no output or we've exhausted the
692 /* Otherwise we either have no output or we've exhausted the
693 * input. Either we try to get more input or we fall through
693 * input. Either we try to get more input or we fall through
694 * to EOF below */
694 * to EOF below */
695 }
695 }
696
696
697 /* EOF */
697 /* EOF */
698 oldPos = output.pos;
698 oldPos = output.pos;
699
699
700 zresult = ZSTD_compressStream2(self->compressor->cctx, &output, &self->input,
700 zresult = ZSTD_compressStream2(self->compressor->cctx, &output, &self->input,
701 ZSTD_e_end);
701 ZSTD_e_end);
702
702
703 self->bytesCompressed += self->output.pos - oldPos;
703 self->bytesCompressed += self->output.pos - oldPos;
704
704
705 if (ZSTD_isError(zresult)) {
705 if (ZSTD_isError(zresult)) {
706 PyErr_Format(ZstdError, "error ending compression stream: %s",
706 PyErr_Format(ZstdError, "error ending compression stream: %s",
707 ZSTD_getErrorName(zresult));
707 ZSTD_getErrorName(zresult));
708 goto finally;
708 goto finally;
709 }
709 }
710
710
711 assert(output.pos);
711 assert(output.pos);
712
712
713 if (0 == zresult) {
713 if (0 == zresult) {
714 self->finishedOutput = 1;
714 self->finishedOutput = 1;
715 }
715 }
716
716
717 result = PyLong_FromSize_t(output.pos);
717 result = PyLong_FromSize_t(output.pos);
718
718
719 finally:
719 finally:
720 PyBuffer_Release(&dest);
720 PyBuffer_Release(&dest);
721
721
722 return result;
722 return result;
723 }
723 }
724
724
725 static PyObject* reader_iter(PyObject* self) {
725 static PyObject* reader_iter(PyObject* self) {
726 set_unsupported_operation();
726 set_unsupported_operation();
727 return NULL;
727 return NULL;
728 }
728 }
729
729
730 static PyObject* reader_iternext(PyObject* self) {
730 static PyObject* reader_iternext(PyObject* self) {
731 set_unsupported_operation();
731 set_unsupported_operation();
732 return NULL;
732 return NULL;
733 }
733 }
734
734
735 static PyMethodDef reader_methods[] = {
735 static PyMethodDef reader_methods[] = {
736 { "__enter__", (PyCFunction)reader_enter, METH_NOARGS,
736 { "__enter__", (PyCFunction)reader_enter, METH_NOARGS,
737 PyDoc_STR("Enter a compression context") },
737 PyDoc_STR("Enter a compression context") },
738 { "__exit__", (PyCFunction)reader_exit, METH_VARARGS,
738 { "__exit__", (PyCFunction)reader_exit, METH_VARARGS,
739 PyDoc_STR("Exit a compression context") },
739 PyDoc_STR("Exit a compression context") },
740 { "close", (PyCFunction)reader_close, METH_NOARGS,
740 { "close", (PyCFunction)reader_close, METH_NOARGS,
741 PyDoc_STR("Close the stream so it cannot perform any more operations") },
741 PyDoc_STR("Close the stream so it cannot perform any more operations") },
742 { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
742 { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
743 { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
743 { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
744 { "readable", (PyCFunction)reader_readable, METH_NOARGS,
744 { "readable", (PyCFunction)reader_readable, METH_NOARGS,
745 PyDoc_STR("Returns True") },
745 PyDoc_STR("Returns True") },
746 { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS, PyDoc_STR("read compressed data") },
746 { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS, PyDoc_STR("read compressed data") },
747 { "read1", (PyCFunction)reader_read1, METH_VARARGS | METH_KEYWORDS, NULL },
747 { "read1", (PyCFunction)reader_read1, METH_VARARGS | METH_KEYWORDS, NULL },
748 { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
748 { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
749 { "readinto", (PyCFunction)reader_readinto, METH_VARARGS, NULL },
749 { "readinto", (PyCFunction)reader_readinto, METH_VARARGS, NULL },
750 { "readinto1", (PyCFunction)reader_readinto1, METH_VARARGS, NULL },
750 { "readinto1", (PyCFunction)reader_readinto1, METH_VARARGS, NULL },
751 { "readline", (PyCFunction)reader_readline, METH_VARARGS, PyDoc_STR("Not implemented") },
751 { "readline", (PyCFunction)reader_readline, METH_VARARGS, PyDoc_STR("Not implemented") },
752 { "readlines", (PyCFunction)reader_readlines, METH_VARARGS, PyDoc_STR("Not implemented") },
752 { "readlines", (PyCFunction)reader_readlines, METH_VARARGS, PyDoc_STR("Not implemented") },
753 { "seekable", (PyCFunction)reader_seekable, METH_NOARGS,
753 { "seekable", (PyCFunction)reader_seekable, METH_NOARGS,
754 PyDoc_STR("Returns False") },
754 PyDoc_STR("Returns False") },
755 { "tell", (PyCFunction)reader_tell, METH_NOARGS,
755 { "tell", (PyCFunction)reader_tell, METH_NOARGS,
756 PyDoc_STR("Returns current number of bytes compressed") },
756 PyDoc_STR("Returns current number of bytes compressed") },
757 { "writable", (PyCFunction)reader_writable, METH_NOARGS,
757 { "writable", (PyCFunction)reader_writable, METH_NOARGS,
758 PyDoc_STR("Returns False") },
758 PyDoc_STR("Returns False") },
759 { "write", reader_write, METH_VARARGS, PyDoc_STR("Raises OSError") },
759 { "write", reader_write, METH_VARARGS, PyDoc_STR("Raises OSError") },
760 { "writelines", reader_writelines, METH_VARARGS, PyDoc_STR("Not implemented") },
760 { "writelines", reader_writelines, METH_VARARGS, PyDoc_STR("Not implemented") },
761 { NULL, NULL }
761 { NULL, NULL }
762 };
762 };
763
763
764 static PyMemberDef reader_members[] = {
764 static PyMemberDef reader_members[] = {
765 { "closed", T_BOOL, offsetof(ZstdCompressionReader, closed),
765 { "closed", T_BOOL, offsetof(ZstdCompressionReader, closed),
766 READONLY, "whether stream is closed" },
766 READONLY, "whether stream is closed" },
767 { NULL }
767 { NULL }
768 };
768 };
769
769
770 PyTypeObject ZstdCompressionReaderType = {
770 PyTypeObject ZstdCompressionReaderType = {
771 PyVarObject_HEAD_INIT(NULL, 0)
771 PyVarObject_HEAD_INIT(NULL, 0)
772 "zstd.ZstdCompressionReader", /* tp_name */
772 "zstd.ZstdCompressionReader", /* tp_name */
773 sizeof(ZstdCompressionReader), /* tp_basicsize */
773 sizeof(ZstdCompressionReader), /* tp_basicsize */
774 0, /* tp_itemsize */
774 0, /* tp_itemsize */
775 (destructor)reader_dealloc, /* tp_dealloc */
775 (destructor)reader_dealloc, /* tp_dealloc */
776 0, /* tp_print */
776 0, /* tp_print */
777 0, /* tp_getattr */
777 0, /* tp_getattr */
778 0, /* tp_setattr */
778 0, /* tp_setattr */
779 0, /* tp_compare */
779 0, /* tp_compare */
780 0, /* tp_repr */
780 0, /* tp_repr */
781 0, /* tp_as_number */
781 0, /* tp_as_number */
782 0, /* tp_as_sequence */
782 0, /* tp_as_sequence */
783 0, /* tp_as_mapping */
783 0, /* tp_as_mapping */
784 0, /* tp_hash */
784 0, /* tp_hash */
785 0, /* tp_call */
785 0, /* tp_call */
786 0, /* tp_str */
786 0, /* tp_str */
787 0, /* tp_getattro */
787 0, /* tp_getattro */
788 0, /* tp_setattro */
788 0, /* tp_setattro */
789 0, /* tp_as_buffer */
789 0, /* tp_as_buffer */
790 Py_TPFLAGS_DEFAULT, /* tp_flags */
790 Py_TPFLAGS_DEFAULT, /* tp_flags */
791 0, /* tp_doc */
791 0, /* tp_doc */
792 0, /* tp_traverse */
792 0, /* tp_traverse */
793 0, /* tp_clear */
793 0, /* tp_clear */
794 0, /* tp_richcompare */
794 0, /* tp_richcompare */
795 0, /* tp_weaklistoffset */
795 0, /* tp_weaklistoffset */
796 reader_iter, /* tp_iter */
796 reader_iter, /* tp_iter */
797 reader_iternext, /* tp_iternext */
797 reader_iternext, /* tp_iternext */
798 reader_methods, /* tp_methods */
798 reader_methods, /* tp_methods */
799 reader_members, /* tp_members */
799 reader_members, /* tp_members */
800 0, /* tp_getset */
800 0, /* tp_getset */
801 0, /* tp_base */
801 0, /* tp_base */
802 0, /* tp_dict */
802 0, /* tp_dict */
803 0, /* tp_descr_get */
803 0, /* tp_descr_get */
804 0, /* tp_descr_set */
804 0, /* tp_descr_set */
805 0, /* tp_dictoffset */
805 0, /* tp_dictoffset */
806 0, /* tp_init */
806 0, /* tp_init */
807 0, /* tp_alloc */
807 0, /* tp_alloc */
808 PyType_GenericNew, /* tp_new */
808 PyType_GenericNew, /* tp_new */
809 };
809 };
810
810
811 void compressionreader_module_init(PyObject* mod) {
811 void compressionreader_module_init(PyObject* mod) {
812 /* TODO make reader a sub-class of io.RawIOBase */
812 /* TODO make reader a sub-class of io.RawIOBase */
813
813
814 Py_TYPE(&ZstdCompressionReaderType) = &PyType_Type;
814 Py_SET_TYPE(&ZstdCompressionReaderType, &PyType_Type);
815 if (PyType_Ready(&ZstdCompressionReaderType) < 0) {
815 if (PyType_Ready(&ZstdCompressionReaderType) < 0) {
816 return;
816 return;
817 }
817 }
818 }
818 }
@@ -1,372 +1,372 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 PyDoc_STRVAR(ZstdCompresssionWriter__doc__,
13 PyDoc_STRVAR(ZstdCompresssionWriter__doc__,
14 """A context manager used for writing compressed output to a writer.\n"
14 """A context manager used for writing compressed output to a writer.\n"
15 );
15 );
16
16
17 static void ZstdCompressionWriter_dealloc(ZstdCompressionWriter* self) {
17 static void ZstdCompressionWriter_dealloc(ZstdCompressionWriter* self) {
18 Py_XDECREF(self->compressor);
18 Py_XDECREF(self->compressor);
19 Py_XDECREF(self->writer);
19 Py_XDECREF(self->writer);
20
20
21 PyMem_Free(self->output.dst);
21 PyMem_Free(self->output.dst);
22 self->output.dst = NULL;
22 self->output.dst = NULL;
23
23
24 PyObject_Del(self);
24 PyObject_Del(self);
25 }
25 }
26
26
27 static PyObject* ZstdCompressionWriter_enter(ZstdCompressionWriter* self) {
27 static PyObject* ZstdCompressionWriter_enter(ZstdCompressionWriter* self) {
28 if (self->closed) {
28 if (self->closed) {
29 PyErr_SetString(PyExc_ValueError, "stream is closed");
29 PyErr_SetString(PyExc_ValueError, "stream is closed");
30 return NULL;
30 return NULL;
31 }
31 }
32
32
33 if (self->entered) {
33 if (self->entered) {
34 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
34 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
35 return NULL;
35 return NULL;
36 }
36 }
37
37
38 self->entered = 1;
38 self->entered = 1;
39
39
40 Py_INCREF(self);
40 Py_INCREF(self);
41 return (PyObject*)self;
41 return (PyObject*)self;
42 }
42 }
43
43
44 static PyObject* ZstdCompressionWriter_exit(ZstdCompressionWriter* self, PyObject* args) {
44 static PyObject* ZstdCompressionWriter_exit(ZstdCompressionWriter* self, PyObject* args) {
45 PyObject* exc_type;
45 PyObject* exc_type;
46 PyObject* exc_value;
46 PyObject* exc_value;
47 PyObject* exc_tb;
47 PyObject* exc_tb;
48
48
49 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
49 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
50 return NULL;
50 return NULL;
51 }
51 }
52
52
53 self->entered = 0;
53 self->entered = 0;
54
54
55 if (exc_type == Py_None && exc_value == Py_None && exc_tb == Py_None) {
55 if (exc_type == Py_None && exc_value == Py_None && exc_tb == Py_None) {
56 PyObject* result = PyObject_CallMethod((PyObject*)self, "close", NULL);
56 PyObject* result = PyObject_CallMethod((PyObject*)self, "close", NULL);
57
57
58 if (NULL == result) {
58 if (NULL == result) {
59 return NULL;
59 return NULL;
60 }
60 }
61 }
61 }
62
62
63 Py_RETURN_FALSE;
63 Py_RETURN_FALSE;
64 }
64 }
65
65
66 static PyObject* ZstdCompressionWriter_memory_size(ZstdCompressionWriter* self) {
66 static PyObject* ZstdCompressionWriter_memory_size(ZstdCompressionWriter* self) {
67 return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->compressor->cctx));
67 return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->compressor->cctx));
68 }
68 }
69
69
70 static PyObject* ZstdCompressionWriter_write(ZstdCompressionWriter* self, PyObject* args, PyObject* kwargs) {
70 static PyObject* ZstdCompressionWriter_write(ZstdCompressionWriter* self, PyObject* args, PyObject* kwargs) {
71 static char* kwlist[] = {
71 static char* kwlist[] = {
72 "data",
72 "data",
73 NULL
73 NULL
74 };
74 };
75
75
76 PyObject* result = NULL;
76 PyObject* result = NULL;
77 Py_buffer source;
77 Py_buffer source;
78 size_t zresult;
78 size_t zresult;
79 ZSTD_inBuffer input;
79 ZSTD_inBuffer input;
80 PyObject* res;
80 PyObject* res;
81 Py_ssize_t totalWrite = 0;
81 Py_ssize_t totalWrite = 0;
82
82
83 #if PY_MAJOR_VERSION >= 3
83 #if PY_MAJOR_VERSION >= 3
84 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write",
84 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write",
85 #else
85 #else
86 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:write",
86 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:write",
87 #endif
87 #endif
88 kwlist, &source)) {
88 kwlist, &source)) {
89 return NULL;
89 return NULL;
90 }
90 }
91
91
92 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
92 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
93 PyErr_SetString(PyExc_ValueError,
93 PyErr_SetString(PyExc_ValueError,
94 "data buffer should be contiguous and have at most one dimension");
94 "data buffer should be contiguous and have at most one dimension");
95 goto finally;
95 goto finally;
96 }
96 }
97
97
98 if (self->closed) {
98 if (self->closed) {
99 PyErr_SetString(PyExc_ValueError, "stream is closed");
99 PyErr_SetString(PyExc_ValueError, "stream is closed");
100 return NULL;
100 return NULL;
101 }
101 }
102
102
103 self->output.pos = 0;
103 self->output.pos = 0;
104
104
105 input.src = source.buf;
105 input.src = source.buf;
106 input.size = source.len;
106 input.size = source.len;
107 input.pos = 0;
107 input.pos = 0;
108
108
109 while (input.pos < (size_t)source.len) {
109 while (input.pos < (size_t)source.len) {
110 Py_BEGIN_ALLOW_THREADS
110 Py_BEGIN_ALLOW_THREADS
111 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output, &input, ZSTD_e_continue);
111 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output, &input, ZSTD_e_continue);
112 Py_END_ALLOW_THREADS
112 Py_END_ALLOW_THREADS
113
113
114 if (ZSTD_isError(zresult)) {
114 if (ZSTD_isError(zresult)) {
115 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
115 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
116 goto finally;
116 goto finally;
117 }
117 }
118
118
119 /* Copy data from output buffer to writer. */
119 /* Copy data from output buffer to writer. */
120 if (self->output.pos) {
120 if (self->output.pos) {
121 #if PY_MAJOR_VERSION >= 3
121 #if PY_MAJOR_VERSION >= 3
122 res = PyObject_CallMethod(self->writer, "write", "y#",
122 res = PyObject_CallMethod(self->writer, "write", "y#",
123 #else
123 #else
124 res = PyObject_CallMethod(self->writer, "write", "s#",
124 res = PyObject_CallMethod(self->writer, "write", "s#",
125 #endif
125 #endif
126 self->output.dst, self->output.pos);
126 self->output.dst, self->output.pos);
127 Py_XDECREF(res);
127 Py_XDECREF(res);
128 totalWrite += self->output.pos;
128 totalWrite += self->output.pos;
129 self->bytesCompressed += self->output.pos;
129 self->bytesCompressed += self->output.pos;
130 }
130 }
131 self->output.pos = 0;
131 self->output.pos = 0;
132 }
132 }
133
133
134 if (self->writeReturnRead) {
134 if (self->writeReturnRead) {
135 result = PyLong_FromSize_t(input.pos);
135 result = PyLong_FromSize_t(input.pos);
136 }
136 }
137 else {
137 else {
138 result = PyLong_FromSsize_t(totalWrite);
138 result = PyLong_FromSsize_t(totalWrite);
139 }
139 }
140
140
141 finally:
141 finally:
142 PyBuffer_Release(&source);
142 PyBuffer_Release(&source);
143 return result;
143 return result;
144 }
144 }
145
145
146 static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args, PyObject* kwargs) {
146 static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args, PyObject* kwargs) {
147 static char* kwlist[] = {
147 static char* kwlist[] = {
148 "flush_mode",
148 "flush_mode",
149 NULL
149 NULL
150 };
150 };
151
151
152 size_t zresult;
152 size_t zresult;
153 ZSTD_inBuffer input;
153 ZSTD_inBuffer input;
154 PyObject* res;
154 PyObject* res;
155 Py_ssize_t totalWrite = 0;
155 Py_ssize_t totalWrite = 0;
156 unsigned flush_mode = 0;
156 unsigned flush_mode = 0;
157 ZSTD_EndDirective flush;
157 ZSTD_EndDirective flush;
158
158
159 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|I:flush",
159 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|I:flush",
160 kwlist, &flush_mode)) {
160 kwlist, &flush_mode)) {
161 return NULL;
161 return NULL;
162 }
162 }
163
163
164 switch (flush_mode) {
164 switch (flush_mode) {
165 case 0:
165 case 0:
166 flush = ZSTD_e_flush;
166 flush = ZSTD_e_flush;
167 break;
167 break;
168 case 1:
168 case 1:
169 flush = ZSTD_e_end;
169 flush = ZSTD_e_end;
170 break;
170 break;
171 default:
171 default:
172 PyErr_Format(PyExc_ValueError, "unknown flush_mode: %d", flush_mode);
172 PyErr_Format(PyExc_ValueError, "unknown flush_mode: %d", flush_mode);
173 return NULL;
173 return NULL;
174 }
174 }
175
175
176 if (self->closed) {
176 if (self->closed) {
177 PyErr_SetString(PyExc_ValueError, "stream is closed");
177 PyErr_SetString(PyExc_ValueError, "stream is closed");
178 return NULL;
178 return NULL;
179 }
179 }
180
180
181 self->output.pos = 0;
181 self->output.pos = 0;
182
182
183 input.src = NULL;
183 input.src = NULL;
184 input.size = 0;
184 input.size = 0;
185 input.pos = 0;
185 input.pos = 0;
186
186
187 while (1) {
187 while (1) {
188 Py_BEGIN_ALLOW_THREADS
188 Py_BEGIN_ALLOW_THREADS
189 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output, &input, flush);
189 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output, &input, flush);
190 Py_END_ALLOW_THREADS
190 Py_END_ALLOW_THREADS
191
191
192 if (ZSTD_isError(zresult)) {
192 if (ZSTD_isError(zresult)) {
193 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
193 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
194 return NULL;
194 return NULL;
195 }
195 }
196
196
197 /* Copy data from output buffer to writer. */
197 /* Copy data from output buffer to writer. */
198 if (self->output.pos) {
198 if (self->output.pos) {
199 #if PY_MAJOR_VERSION >= 3
199 #if PY_MAJOR_VERSION >= 3
200 res = PyObject_CallMethod(self->writer, "write", "y#",
200 res = PyObject_CallMethod(self->writer, "write", "y#",
201 #else
201 #else
202 res = PyObject_CallMethod(self->writer, "write", "s#",
202 res = PyObject_CallMethod(self->writer, "write", "s#",
203 #endif
203 #endif
204 self->output.dst, self->output.pos);
204 self->output.dst, self->output.pos);
205 Py_XDECREF(res);
205 Py_XDECREF(res);
206 totalWrite += self->output.pos;
206 totalWrite += self->output.pos;
207 self->bytesCompressed += self->output.pos;
207 self->bytesCompressed += self->output.pos;
208 }
208 }
209
209
210 self->output.pos = 0;
210 self->output.pos = 0;
211
211
212 if (!zresult) {
212 if (!zresult) {
213 break;
213 break;
214 }
214 }
215 }
215 }
216
216
217 return PyLong_FromSsize_t(totalWrite);
217 return PyLong_FromSsize_t(totalWrite);
218 }
218 }
219
219
220 static PyObject* ZstdCompressionWriter_close(ZstdCompressionWriter* self) {
220 static PyObject* ZstdCompressionWriter_close(ZstdCompressionWriter* self) {
221 PyObject* result;
221 PyObject* result;
222
222
223 if (self->closed) {
223 if (self->closed) {
224 Py_RETURN_NONE;
224 Py_RETURN_NONE;
225 }
225 }
226
226
227 result = PyObject_CallMethod((PyObject*)self, "flush", "I", 1);
227 result = PyObject_CallMethod((PyObject*)self, "flush", "I", 1);
228 self->closed = 1;
228 self->closed = 1;
229
229
230 if (NULL == result) {
230 if (NULL == result) {
231 return NULL;
231 return NULL;
232 }
232 }
233
233
234 /* Call close on underlying stream as well. */
234 /* Call close on underlying stream as well. */
235 if (PyObject_HasAttrString(self->writer, "close")) {
235 if (PyObject_HasAttrString(self->writer, "close")) {
236 return PyObject_CallMethod(self->writer, "close", NULL);
236 return PyObject_CallMethod(self->writer, "close", NULL);
237 }
237 }
238
238
239 Py_RETURN_NONE;
239 Py_RETURN_NONE;
240 }
240 }
241
241
242 static PyObject* ZstdCompressionWriter_fileno(ZstdCompressionWriter* self) {
242 static PyObject* ZstdCompressionWriter_fileno(ZstdCompressionWriter* self) {
243 if (PyObject_HasAttrString(self->writer, "fileno")) {
243 if (PyObject_HasAttrString(self->writer, "fileno")) {
244 return PyObject_CallMethod(self->writer, "fileno", NULL);
244 return PyObject_CallMethod(self->writer, "fileno", NULL);
245 }
245 }
246 else {
246 else {
247 PyErr_SetString(PyExc_OSError, "fileno not available on underlying writer");
247 PyErr_SetString(PyExc_OSError, "fileno not available on underlying writer");
248 return NULL;
248 return NULL;
249 }
249 }
250 }
250 }
251
251
252 static PyObject* ZstdCompressionWriter_tell(ZstdCompressionWriter* self) {
252 static PyObject* ZstdCompressionWriter_tell(ZstdCompressionWriter* self) {
253 return PyLong_FromUnsignedLongLong(self->bytesCompressed);
253 return PyLong_FromUnsignedLongLong(self->bytesCompressed);
254 }
254 }
255
255
256 static PyObject* ZstdCompressionWriter_writelines(PyObject* self, PyObject* args) {
256 static PyObject* ZstdCompressionWriter_writelines(PyObject* self, PyObject* args) {
257 PyErr_SetNone(PyExc_NotImplementedError);
257 PyErr_SetNone(PyExc_NotImplementedError);
258 return NULL;
258 return NULL;
259 }
259 }
260
260
261 static PyObject* ZstdCompressionWriter_false(PyObject* self, PyObject* args) {
261 static PyObject* ZstdCompressionWriter_false(PyObject* self, PyObject* args) {
262 Py_RETURN_FALSE;
262 Py_RETURN_FALSE;
263 }
263 }
264
264
265 static PyObject* ZstdCompressionWriter_true(PyObject* self, PyObject* args) {
265 static PyObject* ZstdCompressionWriter_true(PyObject* self, PyObject* args) {
266 Py_RETURN_TRUE;
266 Py_RETURN_TRUE;
267 }
267 }
268
268
269 static PyObject* ZstdCompressionWriter_unsupported(PyObject* self, PyObject* args, PyObject* kwargs) {
269 static PyObject* ZstdCompressionWriter_unsupported(PyObject* self, PyObject* args, PyObject* kwargs) {
270 PyObject* iomod;
270 PyObject* iomod;
271 PyObject* exc;
271 PyObject* exc;
272
272
273 iomod = PyImport_ImportModule("io");
273 iomod = PyImport_ImportModule("io");
274 if (NULL == iomod) {
274 if (NULL == iomod) {
275 return NULL;
275 return NULL;
276 }
276 }
277
277
278 exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
278 exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
279 if (NULL == exc) {
279 if (NULL == exc) {
280 Py_DECREF(iomod);
280 Py_DECREF(iomod);
281 return NULL;
281 return NULL;
282 }
282 }
283
283
284 PyErr_SetNone(exc);
284 PyErr_SetNone(exc);
285 Py_DECREF(exc);
285 Py_DECREF(exc);
286 Py_DECREF(iomod);
286 Py_DECREF(iomod);
287
287
288 return NULL;
288 return NULL;
289 }
289 }
290
290
291 static PyMethodDef ZstdCompressionWriter_methods[] = {
291 static PyMethodDef ZstdCompressionWriter_methods[] = {
292 { "__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS,
292 { "__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS,
293 PyDoc_STR("Enter a compression context.") },
293 PyDoc_STR("Enter a compression context.") },
294 { "__exit__", (PyCFunction)ZstdCompressionWriter_exit, METH_VARARGS,
294 { "__exit__", (PyCFunction)ZstdCompressionWriter_exit, METH_VARARGS,
295 PyDoc_STR("Exit a compression context.") },
295 PyDoc_STR("Exit a compression context.") },
296 { "close", (PyCFunction)ZstdCompressionWriter_close, METH_NOARGS, NULL },
296 { "close", (PyCFunction)ZstdCompressionWriter_close, METH_NOARGS, NULL },
297 { "fileno", (PyCFunction)ZstdCompressionWriter_fileno, METH_NOARGS, NULL },
297 { "fileno", (PyCFunction)ZstdCompressionWriter_fileno, METH_NOARGS, NULL },
298 { "isatty", (PyCFunction)ZstdCompressionWriter_false, METH_NOARGS, NULL },
298 { "isatty", (PyCFunction)ZstdCompressionWriter_false, METH_NOARGS, NULL },
299 { "readable", (PyCFunction)ZstdCompressionWriter_false, METH_NOARGS, NULL },
299 { "readable", (PyCFunction)ZstdCompressionWriter_false, METH_NOARGS, NULL },
300 { "readline", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
300 { "readline", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
301 { "readlines", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
301 { "readlines", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
302 { "seek", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
302 { "seek", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
303 { "seekable", ZstdCompressionWriter_false, METH_NOARGS, NULL },
303 { "seekable", ZstdCompressionWriter_false, METH_NOARGS, NULL },
304 { "truncate", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
304 { "truncate", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
305 { "writable", ZstdCompressionWriter_true, METH_NOARGS, NULL },
305 { "writable", ZstdCompressionWriter_true, METH_NOARGS, NULL },
306 { "writelines", ZstdCompressionWriter_writelines, METH_VARARGS, NULL },
306 { "writelines", ZstdCompressionWriter_writelines, METH_VARARGS, NULL },
307 { "read", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
307 { "read", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
308 { "readall", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
308 { "readall", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
309 { "readinto", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
309 { "readinto", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
310 { "memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS,
310 { "memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS,
311 PyDoc_STR("Obtain the memory size of the underlying compressor") },
311 PyDoc_STR("Obtain the memory size of the underlying compressor") },
312 { "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS | METH_KEYWORDS,
312 { "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS | METH_KEYWORDS,
313 PyDoc_STR("Compress data") },
313 PyDoc_STR("Compress data") },
314 { "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_VARARGS | METH_KEYWORDS,
314 { "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_VARARGS | METH_KEYWORDS,
315 PyDoc_STR("Flush data and finish a zstd frame") },
315 PyDoc_STR("Flush data and finish a zstd frame") },
316 { "tell", (PyCFunction)ZstdCompressionWriter_tell, METH_NOARGS,
316 { "tell", (PyCFunction)ZstdCompressionWriter_tell, METH_NOARGS,
317 PyDoc_STR("Returns current number of bytes compressed") },
317 PyDoc_STR("Returns current number of bytes compressed") },
318 { NULL, NULL }
318 { NULL, NULL }
319 };
319 };
320
320
321 static PyMemberDef ZstdCompressionWriter_members[] = {
321 static PyMemberDef ZstdCompressionWriter_members[] = {
322 { "closed", T_BOOL, offsetof(ZstdCompressionWriter, closed), READONLY, NULL },
322 { "closed", T_BOOL, offsetof(ZstdCompressionWriter, closed), READONLY, NULL },
323 { NULL }
323 { NULL }
324 };
324 };
325
325
326 PyTypeObject ZstdCompressionWriterType = {
326 PyTypeObject ZstdCompressionWriterType = {
327 PyVarObject_HEAD_INIT(NULL, 0)
327 PyVarObject_HEAD_INIT(NULL, 0)
328 "zstd.ZstdCompressionWriter", /* tp_name */
328 "zstd.ZstdCompressionWriter", /* tp_name */
329 sizeof(ZstdCompressionWriter), /* tp_basicsize */
329 sizeof(ZstdCompressionWriter), /* tp_basicsize */
330 0, /* tp_itemsize */
330 0, /* tp_itemsize */
331 (destructor)ZstdCompressionWriter_dealloc, /* tp_dealloc */
331 (destructor)ZstdCompressionWriter_dealloc, /* tp_dealloc */
332 0, /* tp_print */
332 0, /* tp_print */
333 0, /* tp_getattr */
333 0, /* tp_getattr */
334 0, /* tp_setattr */
334 0, /* tp_setattr */
335 0, /* tp_compare */
335 0, /* tp_compare */
336 0, /* tp_repr */
336 0, /* tp_repr */
337 0, /* tp_as_number */
337 0, /* tp_as_number */
338 0, /* tp_as_sequence */
338 0, /* tp_as_sequence */
339 0, /* tp_as_mapping */
339 0, /* tp_as_mapping */
340 0, /* tp_hash */
340 0, /* tp_hash */
341 0, /* tp_call */
341 0, /* tp_call */
342 0, /* tp_str */
342 0, /* tp_str */
343 0, /* tp_getattro */
343 0, /* tp_getattro */
344 0, /* tp_setattro */
344 0, /* tp_setattro */
345 0, /* tp_as_buffer */
345 0, /* tp_as_buffer */
346 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
346 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
347 ZstdCompresssionWriter__doc__, /* tp_doc */
347 ZstdCompresssionWriter__doc__, /* tp_doc */
348 0, /* tp_traverse */
348 0, /* tp_traverse */
349 0, /* tp_clear */
349 0, /* tp_clear */
350 0, /* tp_richcompare */
350 0, /* tp_richcompare */
351 0, /* tp_weaklistoffset */
351 0, /* tp_weaklistoffset */
352 0, /* tp_iter */
352 0, /* tp_iter */
353 0, /* tp_iternext */
353 0, /* tp_iternext */
354 ZstdCompressionWriter_methods, /* tp_methods */
354 ZstdCompressionWriter_methods, /* tp_methods */
355 ZstdCompressionWriter_members, /* tp_members */
355 ZstdCompressionWriter_members, /* tp_members */
356 0, /* tp_getset */
356 0, /* tp_getset */
357 0, /* tp_base */
357 0, /* tp_base */
358 0, /* tp_dict */
358 0, /* tp_dict */
359 0, /* tp_descr_get */
359 0, /* tp_descr_get */
360 0, /* tp_descr_set */
360 0, /* tp_descr_set */
361 0, /* tp_dictoffset */
361 0, /* tp_dictoffset */
362 0, /* tp_init */
362 0, /* tp_init */
363 0, /* tp_alloc */
363 0, /* tp_alloc */
364 PyType_GenericNew, /* tp_new */
364 PyType_GenericNew, /* tp_new */
365 };
365 };
366
366
367 void compressionwriter_module_init(PyObject* mod) {
367 void compressionwriter_module_init(PyObject* mod) {
368 Py_TYPE(&ZstdCompressionWriterType) = &PyType_Type;
368 Py_SET_TYPE(&ZstdCompressionWriterType, &PyType_Type);
369 if (PyType_Ready(&ZstdCompressionWriterType) < 0) {
369 if (PyType_Ready(&ZstdCompressionWriterType) < 0) {
370 return;
370 return;
371 }
371 }
372 }
372 }
@@ -1,256 +1,256 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 PyDoc_STRVAR(ZstdCompressionObj__doc__,
13 PyDoc_STRVAR(ZstdCompressionObj__doc__,
14 "Perform compression using a standard library compatible API.\n"
14 "Perform compression using a standard library compatible API.\n"
15 );
15 );
16
16
17 static void ZstdCompressionObj_dealloc(ZstdCompressionObj* self) {
17 static void ZstdCompressionObj_dealloc(ZstdCompressionObj* self) {
18 PyMem_Free(self->output.dst);
18 PyMem_Free(self->output.dst);
19 self->output.dst = NULL;
19 self->output.dst = NULL;
20
20
21 Py_XDECREF(self->compressor);
21 Py_XDECREF(self->compressor);
22
22
23 PyObject_Del(self);
23 PyObject_Del(self);
24 }
24 }
25
25
26 static PyObject* ZstdCompressionObj_compress(ZstdCompressionObj* self, PyObject* args, PyObject* kwargs) {
26 static PyObject* ZstdCompressionObj_compress(ZstdCompressionObj* self, PyObject* args, PyObject* kwargs) {
27 static char* kwlist[] = {
27 static char* kwlist[] = {
28 "data",
28 "data",
29 NULL
29 NULL
30 };
30 };
31
31
32 Py_buffer source;
32 Py_buffer source;
33 ZSTD_inBuffer input;
33 ZSTD_inBuffer input;
34 size_t zresult;
34 size_t zresult;
35 PyObject* result = NULL;
35 PyObject* result = NULL;
36 Py_ssize_t resultSize = 0;
36 Py_ssize_t resultSize = 0;
37
37
38 if (self->finished) {
38 if (self->finished) {
39 PyErr_SetString(ZstdError, "cannot call compress() after compressor finished");
39 PyErr_SetString(ZstdError, "cannot call compress() after compressor finished");
40 return NULL;
40 return NULL;
41 }
41 }
42
42
43 #if PY_MAJOR_VERSION >= 3
43 #if PY_MAJOR_VERSION >= 3
44 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress",
44 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress",
45 #else
45 #else
46 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:compress",
46 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:compress",
47 #endif
47 #endif
48 kwlist, &source)) {
48 kwlist, &source)) {
49 return NULL;
49 return NULL;
50 }
50 }
51
51
52 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
52 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
53 PyErr_SetString(PyExc_ValueError,
53 PyErr_SetString(PyExc_ValueError,
54 "data buffer should be contiguous and have at most one dimension");
54 "data buffer should be contiguous and have at most one dimension");
55 goto finally;
55 goto finally;
56 }
56 }
57
57
58 input.src = source.buf;
58 input.src = source.buf;
59 input.size = source.len;
59 input.size = source.len;
60 input.pos = 0;
60 input.pos = 0;
61
61
62 while (input.pos < (size_t)source.len) {
62 while (input.pos < (size_t)source.len) {
63 Py_BEGIN_ALLOW_THREADS
63 Py_BEGIN_ALLOW_THREADS
64 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
64 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
65 &input, ZSTD_e_continue);
65 &input, ZSTD_e_continue);
66 Py_END_ALLOW_THREADS
66 Py_END_ALLOW_THREADS
67
67
68 if (ZSTD_isError(zresult)) {
68 if (ZSTD_isError(zresult)) {
69 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
69 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
70 Py_CLEAR(result);
70 Py_CLEAR(result);
71 goto finally;
71 goto finally;
72 }
72 }
73
73
74 if (self->output.pos) {
74 if (self->output.pos) {
75 if (result) {
75 if (result) {
76 resultSize = PyBytes_GET_SIZE(result);
76 resultSize = PyBytes_GET_SIZE(result);
77
77
78 if (safe_pybytes_resize(&result, resultSize + self->output.pos)) {
78 if (safe_pybytes_resize(&result, resultSize + self->output.pos)) {
79 Py_CLEAR(result);
79 Py_CLEAR(result);
80 goto finally;
80 goto finally;
81 }
81 }
82
82
83 memcpy(PyBytes_AS_STRING(result) + resultSize,
83 memcpy(PyBytes_AS_STRING(result) + resultSize,
84 self->output.dst, self->output.pos);
84 self->output.dst, self->output.pos);
85 }
85 }
86 else {
86 else {
87 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
87 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
88 if (!result) {
88 if (!result) {
89 goto finally;
89 goto finally;
90 }
90 }
91 }
91 }
92
92
93 self->output.pos = 0;
93 self->output.pos = 0;
94 }
94 }
95 }
95 }
96
96
97 if (NULL == result) {
97 if (NULL == result) {
98 result = PyBytes_FromString("");
98 result = PyBytes_FromString("");
99 }
99 }
100
100
101 finally:
101 finally:
102 PyBuffer_Release(&source);
102 PyBuffer_Release(&source);
103
103
104 return result;
104 return result;
105 }
105 }
106
106
107 static PyObject* ZstdCompressionObj_flush(ZstdCompressionObj* self, PyObject* args, PyObject* kwargs) {
107 static PyObject* ZstdCompressionObj_flush(ZstdCompressionObj* self, PyObject* args, PyObject* kwargs) {
108 static char* kwlist[] = {
108 static char* kwlist[] = {
109 "flush_mode",
109 "flush_mode",
110 NULL
110 NULL
111 };
111 };
112
112
113 int flushMode = compressorobj_flush_finish;
113 int flushMode = compressorobj_flush_finish;
114 size_t zresult;
114 size_t zresult;
115 PyObject* result = NULL;
115 PyObject* result = NULL;
116 Py_ssize_t resultSize = 0;
116 Py_ssize_t resultSize = 0;
117 ZSTD_inBuffer input;
117 ZSTD_inBuffer input;
118 ZSTD_EndDirective zFlushMode;
118 ZSTD_EndDirective zFlushMode;
119
119
120 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:flush", kwlist, &flushMode)) {
120 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:flush", kwlist, &flushMode)) {
121 return NULL;
121 return NULL;
122 }
122 }
123
123
124 if (flushMode != compressorobj_flush_finish && flushMode != compressorobj_flush_block) {
124 if (flushMode != compressorobj_flush_finish && flushMode != compressorobj_flush_block) {
125 PyErr_SetString(PyExc_ValueError, "flush mode not recognized");
125 PyErr_SetString(PyExc_ValueError, "flush mode not recognized");
126 return NULL;
126 return NULL;
127 }
127 }
128
128
129 if (self->finished) {
129 if (self->finished) {
130 PyErr_SetString(ZstdError, "compressor object already finished");
130 PyErr_SetString(ZstdError, "compressor object already finished");
131 return NULL;
131 return NULL;
132 }
132 }
133
133
134 switch (flushMode) {
134 switch (flushMode) {
135 case compressorobj_flush_block:
135 case compressorobj_flush_block:
136 zFlushMode = ZSTD_e_flush;
136 zFlushMode = ZSTD_e_flush;
137 break;
137 break;
138
138
139 case compressorobj_flush_finish:
139 case compressorobj_flush_finish:
140 zFlushMode = ZSTD_e_end;
140 zFlushMode = ZSTD_e_end;
141 self->finished = 1;
141 self->finished = 1;
142 break;
142 break;
143
143
144 default:
144 default:
145 PyErr_SetString(ZstdError, "unhandled flush mode");
145 PyErr_SetString(ZstdError, "unhandled flush mode");
146 return NULL;
146 return NULL;
147 }
147 }
148
148
149 assert(self->output.pos == 0);
149 assert(self->output.pos == 0);
150
150
151 input.src = NULL;
151 input.src = NULL;
152 input.size = 0;
152 input.size = 0;
153 input.pos = 0;
153 input.pos = 0;
154
154
155 while (1) {
155 while (1) {
156 Py_BEGIN_ALLOW_THREADS
156 Py_BEGIN_ALLOW_THREADS
157 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
157 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
158 &input, zFlushMode);
158 &input, zFlushMode);
159 Py_END_ALLOW_THREADS
159 Py_END_ALLOW_THREADS
160
160
161 if (ZSTD_isError(zresult)) {
161 if (ZSTD_isError(zresult)) {
162 PyErr_Format(ZstdError, "error ending compression stream: %s",
162 PyErr_Format(ZstdError, "error ending compression stream: %s",
163 ZSTD_getErrorName(zresult));
163 ZSTD_getErrorName(zresult));
164 return NULL;
164 return NULL;
165 }
165 }
166
166
167 if (self->output.pos) {
167 if (self->output.pos) {
168 if (result) {
168 if (result) {
169 resultSize = PyBytes_GET_SIZE(result);
169 resultSize = PyBytes_GET_SIZE(result);
170
170
171 if (safe_pybytes_resize(&result, resultSize + self->output.pos)) {
171 if (safe_pybytes_resize(&result, resultSize + self->output.pos)) {
172 Py_XDECREF(result);
172 Py_XDECREF(result);
173 return NULL;
173 return NULL;
174 }
174 }
175
175
176 memcpy(PyBytes_AS_STRING(result) + resultSize,
176 memcpy(PyBytes_AS_STRING(result) + resultSize,
177 self->output.dst, self->output.pos);
177 self->output.dst, self->output.pos);
178 }
178 }
179 else {
179 else {
180 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
180 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
181 if (!result) {
181 if (!result) {
182 return NULL;
182 return NULL;
183 }
183 }
184 }
184 }
185
185
186 self->output.pos = 0;
186 self->output.pos = 0;
187 }
187 }
188
188
189 if (!zresult) {
189 if (!zresult) {
190 break;
190 break;
191 }
191 }
192 }
192 }
193
193
194 if (result) {
194 if (result) {
195 return result;
195 return result;
196 }
196 }
197 else {
197 else {
198 return PyBytes_FromString("");
198 return PyBytes_FromString("");
199 }
199 }
200 }
200 }
201
201
202 static PyMethodDef ZstdCompressionObj_methods[] = {
202 static PyMethodDef ZstdCompressionObj_methods[] = {
203 { "compress", (PyCFunction)ZstdCompressionObj_compress, METH_VARARGS | METH_KEYWORDS,
203 { "compress", (PyCFunction)ZstdCompressionObj_compress, METH_VARARGS | METH_KEYWORDS,
204 PyDoc_STR("compress data") },
204 PyDoc_STR("compress data") },
205 { "flush", (PyCFunction)ZstdCompressionObj_flush, METH_VARARGS | METH_KEYWORDS,
205 { "flush", (PyCFunction)ZstdCompressionObj_flush, METH_VARARGS | METH_KEYWORDS,
206 PyDoc_STR("finish compression operation") },
206 PyDoc_STR("finish compression operation") },
207 { NULL, NULL }
207 { NULL, NULL }
208 };
208 };
209
209
210 PyTypeObject ZstdCompressionObjType = {
210 PyTypeObject ZstdCompressionObjType = {
211 PyVarObject_HEAD_INIT(NULL, 0)
211 PyVarObject_HEAD_INIT(NULL, 0)
212 "zstd.ZstdCompressionObj", /* tp_name */
212 "zstd.ZstdCompressionObj", /* tp_name */
213 sizeof(ZstdCompressionObj), /* tp_basicsize */
213 sizeof(ZstdCompressionObj), /* tp_basicsize */
214 0, /* tp_itemsize */
214 0, /* tp_itemsize */
215 (destructor)ZstdCompressionObj_dealloc, /* tp_dealloc */
215 (destructor)ZstdCompressionObj_dealloc, /* tp_dealloc */
216 0, /* tp_print */
216 0, /* tp_print */
217 0, /* tp_getattr */
217 0, /* tp_getattr */
218 0, /* tp_setattr */
218 0, /* tp_setattr */
219 0, /* tp_compare */
219 0, /* tp_compare */
220 0, /* tp_repr */
220 0, /* tp_repr */
221 0, /* tp_as_number */
221 0, /* tp_as_number */
222 0, /* tp_as_sequence */
222 0, /* tp_as_sequence */
223 0, /* tp_as_mapping */
223 0, /* tp_as_mapping */
224 0, /* tp_hash */
224 0, /* tp_hash */
225 0, /* tp_call */
225 0, /* tp_call */
226 0, /* tp_str */
226 0, /* tp_str */
227 0, /* tp_getattro */
227 0, /* tp_getattro */
228 0, /* tp_setattro */
228 0, /* tp_setattro */
229 0, /* tp_as_buffer */
229 0, /* tp_as_buffer */
230 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
230 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
231 ZstdCompressionObj__doc__, /* tp_doc */
231 ZstdCompressionObj__doc__, /* tp_doc */
232 0, /* tp_traverse */
232 0, /* tp_traverse */
233 0, /* tp_clear */
233 0, /* tp_clear */
234 0, /* tp_richcompare */
234 0, /* tp_richcompare */
235 0, /* tp_weaklistoffset */
235 0, /* tp_weaklistoffset */
236 0, /* tp_iter */
236 0, /* tp_iter */
237 0, /* tp_iternext */
237 0, /* tp_iternext */
238 ZstdCompressionObj_methods, /* tp_methods */
238 ZstdCompressionObj_methods, /* tp_methods */
239 0, /* tp_members */
239 0, /* tp_members */
240 0, /* tp_getset */
240 0, /* tp_getset */
241 0, /* tp_base */
241 0, /* tp_base */
242 0, /* tp_dict */
242 0, /* tp_dict */
243 0, /* tp_descr_get */
243 0, /* tp_descr_get */
244 0, /* tp_descr_set */
244 0, /* tp_descr_set */
245 0, /* tp_dictoffset */
245 0, /* tp_dictoffset */
246 0, /* tp_init */
246 0, /* tp_init */
247 0, /* tp_alloc */
247 0, /* tp_alloc */
248 PyType_GenericNew, /* tp_new */
248 PyType_GenericNew, /* tp_new */
249 };
249 };
250
250
251 void compressobj_module_init(PyObject* module) {
251 void compressobj_module_init(PyObject* module) {
252 Py_TYPE(&ZstdCompressionObjType) = &PyType_Type;
252 Py_SET_TYPE(&ZstdCompressionObjType, &PyType_Type);
253 if (PyType_Ready(&ZstdCompressionObjType) < 0) {
253 if (PyType_Ready(&ZstdCompressionObjType) < 0) {
254 return;
254 return;
255 }
255 }
256 }
256 }
@@ -1,1670 +1,1670 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10 #include "pool.h"
10 #include "pool.h"
11
11
12 extern PyObject* ZstdError;
12 extern PyObject* ZstdError;
13
13
14 int setup_cctx(ZstdCompressor* compressor) {
14 int setup_cctx(ZstdCompressor* compressor) {
15 size_t zresult;
15 size_t zresult;
16
16
17 assert(compressor);
17 assert(compressor);
18 assert(compressor->cctx);
18 assert(compressor->cctx);
19 assert(compressor->params);
19 assert(compressor->params);
20
20
21 zresult = ZSTD_CCtx_setParametersUsingCCtxParams(compressor->cctx, compressor->params);
21 zresult = ZSTD_CCtx_setParametersUsingCCtxParams(compressor->cctx, compressor->params);
22 if (ZSTD_isError(zresult)) {
22 if (ZSTD_isError(zresult)) {
23 PyErr_Format(ZstdError, "could not set compression parameters: %s",
23 PyErr_Format(ZstdError, "could not set compression parameters: %s",
24 ZSTD_getErrorName(zresult));
24 ZSTD_getErrorName(zresult));
25 return 1;
25 return 1;
26 }
26 }
27
27
28 if (compressor->dict) {
28 if (compressor->dict) {
29 if (compressor->dict->cdict) {
29 if (compressor->dict->cdict) {
30 zresult = ZSTD_CCtx_refCDict(compressor->cctx, compressor->dict->cdict);
30 zresult = ZSTD_CCtx_refCDict(compressor->cctx, compressor->dict->cdict);
31 }
31 }
32 else {
32 else {
33 zresult = ZSTD_CCtx_loadDictionary_advanced(compressor->cctx,
33 zresult = ZSTD_CCtx_loadDictionary_advanced(compressor->cctx,
34 compressor->dict->dictData, compressor->dict->dictSize,
34 compressor->dict->dictData, compressor->dict->dictSize,
35 ZSTD_dlm_byRef, compressor->dict->dictType);
35 ZSTD_dlm_byRef, compressor->dict->dictType);
36 }
36 }
37 if (ZSTD_isError(zresult)) {
37 if (ZSTD_isError(zresult)) {
38 PyErr_Format(ZstdError, "could not load compression dictionary: %s",
38 PyErr_Format(ZstdError, "could not load compression dictionary: %s",
39 ZSTD_getErrorName(zresult));
39 ZSTD_getErrorName(zresult));
40 return 1;
40 return 1;
41 }
41 }
42 }
42 }
43
43
44 return 0;
44 return 0;
45 }
45 }
46
46
47 static PyObject* frame_progression(ZSTD_CCtx* cctx) {
47 static PyObject* frame_progression(ZSTD_CCtx* cctx) {
48 PyObject* result = NULL;
48 PyObject* result = NULL;
49 PyObject* value;
49 PyObject* value;
50 ZSTD_frameProgression progression;
50 ZSTD_frameProgression progression;
51
51
52 result = PyTuple_New(3);
52 result = PyTuple_New(3);
53 if (!result) {
53 if (!result) {
54 return NULL;
54 return NULL;
55 }
55 }
56
56
57 progression = ZSTD_getFrameProgression(cctx);
57 progression = ZSTD_getFrameProgression(cctx);
58
58
59 value = PyLong_FromUnsignedLongLong(progression.ingested);
59 value = PyLong_FromUnsignedLongLong(progression.ingested);
60 if (!value) {
60 if (!value) {
61 Py_DECREF(result);
61 Py_DECREF(result);
62 return NULL;
62 return NULL;
63 }
63 }
64
64
65 PyTuple_SET_ITEM(result, 0, value);
65 PyTuple_SET_ITEM(result, 0, value);
66
66
67 value = PyLong_FromUnsignedLongLong(progression.consumed);
67 value = PyLong_FromUnsignedLongLong(progression.consumed);
68 if (!value) {
68 if (!value) {
69 Py_DECREF(result);
69 Py_DECREF(result);
70 return NULL;
70 return NULL;
71 }
71 }
72
72
73 PyTuple_SET_ITEM(result, 1, value);
73 PyTuple_SET_ITEM(result, 1, value);
74
74
75 value = PyLong_FromUnsignedLongLong(progression.produced);
75 value = PyLong_FromUnsignedLongLong(progression.produced);
76 if (!value) {
76 if (!value) {
77 Py_DECREF(result);
77 Py_DECREF(result);
78 return NULL;
78 return NULL;
79 }
79 }
80
80
81 PyTuple_SET_ITEM(result, 2, value);
81 PyTuple_SET_ITEM(result, 2, value);
82
82
83 return result;
83 return result;
84 }
84 }
85
85
86 PyDoc_STRVAR(ZstdCompressor__doc__,
86 PyDoc_STRVAR(ZstdCompressor__doc__,
87 "ZstdCompressor(level=None, dict_data=None, compression_params=None)\n"
87 "ZstdCompressor(level=None, dict_data=None, compression_params=None)\n"
88 "\n"
88 "\n"
89 "Create an object used to perform Zstandard compression.\n"
89 "Create an object used to perform Zstandard compression.\n"
90 "\n"
90 "\n"
91 "An instance can compress data various ways. Instances can be used multiple\n"
91 "An instance can compress data various ways. Instances can be used multiple\n"
92 "times. Each compression operation will use the compression parameters\n"
92 "times. Each compression operation will use the compression parameters\n"
93 "defined at construction time.\n"
93 "defined at construction time.\n"
94 "\n"
94 "\n"
95 "Compression can be configured via the following names arguments:\n"
95 "Compression can be configured via the following names arguments:\n"
96 "\n"
96 "\n"
97 "level\n"
97 "level\n"
98 " Integer compression level.\n"
98 " Integer compression level.\n"
99 "dict_data\n"
99 "dict_data\n"
100 " A ``ZstdCompressionDict`` to be used to compress with dictionary data.\n"
100 " A ``ZstdCompressionDict`` to be used to compress with dictionary data.\n"
101 "compression_params\n"
101 "compression_params\n"
102 " A ``CompressionParameters`` instance defining low-level compression"
102 " A ``CompressionParameters`` instance defining low-level compression"
103 " parameters. If defined, this will overwrite the ``level`` argument.\n"
103 " parameters. If defined, this will overwrite the ``level`` argument.\n"
104 "write_checksum\n"
104 "write_checksum\n"
105 " If True, a 4 byte content checksum will be written with the compressed\n"
105 " If True, a 4 byte content checksum will be written with the compressed\n"
106 " data, allowing the decompressor to perform content verification.\n"
106 " data, allowing the decompressor to perform content verification.\n"
107 "write_content_size\n"
107 "write_content_size\n"
108 " If True (the default), the decompressed content size will be included in\n"
108 " If True (the default), the decompressed content size will be included in\n"
109 " the header of the compressed data. This data will only be written if the\n"
109 " the header of the compressed data. This data will only be written if the\n"
110 " compressor knows the size of the input data.\n"
110 " compressor knows the size of the input data.\n"
111 "write_dict_id\n"
111 "write_dict_id\n"
112 " Determines whether the dictionary ID will be written into the compressed\n"
112 " Determines whether the dictionary ID will be written into the compressed\n"
113 " data. Defaults to True. Only adds content to the compressed data if\n"
113 " data. Defaults to True. Only adds content to the compressed data if\n"
114 " a dictionary is being used.\n"
114 " a dictionary is being used.\n"
115 "threads\n"
115 "threads\n"
116 " Number of threads to use to compress data concurrently. When set,\n"
116 " Number of threads to use to compress data concurrently. When set,\n"
117 " compression operations are performed on multiple threads. The default\n"
117 " compression operations are performed on multiple threads. The default\n"
118 " value (0) disables multi-threaded compression. A value of ``-1`` means to\n"
118 " value (0) disables multi-threaded compression. A value of ``-1`` means to\n"
119 " set the number of threads to the number of detected logical CPUs.\n"
119 " set the number of threads to the number of detected logical CPUs.\n"
120 );
120 );
121
121
122 static int ZstdCompressor_init(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
122 static int ZstdCompressor_init(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
123 static char* kwlist[] = {
123 static char* kwlist[] = {
124 "level",
124 "level",
125 "dict_data",
125 "dict_data",
126 "compression_params",
126 "compression_params",
127 "write_checksum",
127 "write_checksum",
128 "write_content_size",
128 "write_content_size",
129 "write_dict_id",
129 "write_dict_id",
130 "threads",
130 "threads",
131 NULL
131 NULL
132 };
132 };
133
133
134 int level = 3;
134 int level = 3;
135 ZstdCompressionDict* dict = NULL;
135 ZstdCompressionDict* dict = NULL;
136 ZstdCompressionParametersObject* params = NULL;
136 ZstdCompressionParametersObject* params = NULL;
137 PyObject* writeChecksum = NULL;
137 PyObject* writeChecksum = NULL;
138 PyObject* writeContentSize = NULL;
138 PyObject* writeContentSize = NULL;
139 PyObject* writeDictID = NULL;
139 PyObject* writeDictID = NULL;
140 int threads = 0;
140 int threads = 0;
141
141
142 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOOi:ZstdCompressor",
142 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOOi:ZstdCompressor",
143 kwlist, &level, &ZstdCompressionDictType, &dict,
143 kwlist, &level, &ZstdCompressionDictType, &dict,
144 &ZstdCompressionParametersType, &params,
144 &ZstdCompressionParametersType, &params,
145 &writeChecksum, &writeContentSize, &writeDictID, &threads)) {
145 &writeChecksum, &writeContentSize, &writeDictID, &threads)) {
146 return -1;
146 return -1;
147 }
147 }
148
148
149 if (level > ZSTD_maxCLevel()) {
149 if (level > ZSTD_maxCLevel()) {
150 PyErr_Format(PyExc_ValueError, "level must be less than %d",
150 PyErr_Format(PyExc_ValueError, "level must be less than %d",
151 ZSTD_maxCLevel() + 1);
151 ZSTD_maxCLevel() + 1);
152 return -1;
152 return -1;
153 }
153 }
154
154
155 if (threads < 0) {
155 if (threads < 0) {
156 threads = cpu_count();
156 threads = cpu_count();
157 }
157 }
158
158
159 /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the
159 /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the
160 overhead of each compression operation. */
160 overhead of each compression operation. */
161 self->cctx = ZSTD_createCCtx();
161 self->cctx = ZSTD_createCCtx();
162 if (!self->cctx) {
162 if (!self->cctx) {
163 PyErr_NoMemory();
163 PyErr_NoMemory();
164 return -1;
164 return -1;
165 }
165 }
166
166
167 /* TODO stuff the original parameters away somewhere so we can reset later. This
167 /* TODO stuff the original parameters away somewhere so we can reset later. This
168 will allow us to do things like automatically adjust cparams based on input
168 will allow us to do things like automatically adjust cparams based on input
169 size (assuming zstd isn't doing that internally). */
169 size (assuming zstd isn't doing that internally). */
170
170
171 self->params = ZSTD_createCCtxParams();
171 self->params = ZSTD_createCCtxParams();
172 if (!self->params) {
172 if (!self->params) {
173 PyErr_NoMemory();
173 PyErr_NoMemory();
174 return -1;
174 return -1;
175 }
175 }
176
176
177 if (params && writeChecksum) {
177 if (params && writeChecksum) {
178 PyErr_SetString(PyExc_ValueError,
178 PyErr_SetString(PyExc_ValueError,
179 "cannot define compression_params and write_checksum");
179 "cannot define compression_params and write_checksum");
180 return -1;
180 return -1;
181 }
181 }
182
182
183 if (params && writeContentSize) {
183 if (params && writeContentSize) {
184 PyErr_SetString(PyExc_ValueError,
184 PyErr_SetString(PyExc_ValueError,
185 "cannot define compression_params and write_content_size");
185 "cannot define compression_params and write_content_size");
186 return -1;
186 return -1;
187 }
187 }
188
188
189 if (params && writeDictID) {
189 if (params && writeDictID) {
190 PyErr_SetString(PyExc_ValueError,
190 PyErr_SetString(PyExc_ValueError,
191 "cannot define compression_params and write_dict_id");
191 "cannot define compression_params and write_dict_id");
192 return -1;
192 return -1;
193 }
193 }
194
194
195 if (params && threads) {
195 if (params && threads) {
196 PyErr_SetString(PyExc_ValueError,
196 PyErr_SetString(PyExc_ValueError,
197 "cannot define compression_params and threads");
197 "cannot define compression_params and threads");
198 return -1;
198 return -1;
199 }
199 }
200
200
201 if (params) {
201 if (params) {
202 if (set_parameters(self->params, params)) {
202 if (set_parameters(self->params, params)) {
203 return -1;
203 return -1;
204 }
204 }
205 }
205 }
206 else {
206 else {
207 if (set_parameter(self->params, ZSTD_c_compressionLevel, level)) {
207 if (set_parameter(self->params, ZSTD_c_compressionLevel, level)) {
208 return -1;
208 return -1;
209 }
209 }
210
210
211 if (set_parameter(self->params, ZSTD_c_contentSizeFlag,
211 if (set_parameter(self->params, ZSTD_c_contentSizeFlag,
212 writeContentSize ? PyObject_IsTrue(writeContentSize) : 1)) {
212 writeContentSize ? PyObject_IsTrue(writeContentSize) : 1)) {
213 return -1;
213 return -1;
214 }
214 }
215
215
216 if (set_parameter(self->params, ZSTD_c_checksumFlag,
216 if (set_parameter(self->params, ZSTD_c_checksumFlag,
217 writeChecksum ? PyObject_IsTrue(writeChecksum) : 0)) {
217 writeChecksum ? PyObject_IsTrue(writeChecksum) : 0)) {
218 return -1;
218 return -1;
219 }
219 }
220
220
221 if (set_parameter(self->params, ZSTD_c_dictIDFlag,
221 if (set_parameter(self->params, ZSTD_c_dictIDFlag,
222 writeDictID ? PyObject_IsTrue(writeDictID) : 1)) {
222 writeDictID ? PyObject_IsTrue(writeDictID) : 1)) {
223 return -1;
223 return -1;
224 }
224 }
225
225
226 if (threads) {
226 if (threads) {
227 if (set_parameter(self->params, ZSTD_c_nbWorkers, threads)) {
227 if (set_parameter(self->params, ZSTD_c_nbWorkers, threads)) {
228 return -1;
228 return -1;
229 }
229 }
230 }
230 }
231 }
231 }
232
232
233 if (dict) {
233 if (dict) {
234 self->dict = dict;
234 self->dict = dict;
235 Py_INCREF(dict);
235 Py_INCREF(dict);
236 }
236 }
237
237
238 if (setup_cctx(self)) {
238 if (setup_cctx(self)) {
239 return -1;
239 return -1;
240 }
240 }
241
241
242 return 0;
242 return 0;
243 }
243 }
244
244
245 static void ZstdCompressor_dealloc(ZstdCompressor* self) {
245 static void ZstdCompressor_dealloc(ZstdCompressor* self) {
246 if (self->cctx) {
246 if (self->cctx) {
247 ZSTD_freeCCtx(self->cctx);
247 ZSTD_freeCCtx(self->cctx);
248 self->cctx = NULL;
248 self->cctx = NULL;
249 }
249 }
250
250
251 if (self->params) {
251 if (self->params) {
252 ZSTD_freeCCtxParams(self->params);
252 ZSTD_freeCCtxParams(self->params);
253 self->params = NULL;
253 self->params = NULL;
254 }
254 }
255
255
256 Py_XDECREF(self->dict);
256 Py_XDECREF(self->dict);
257 PyObject_Del(self);
257 PyObject_Del(self);
258 }
258 }
259
259
260 PyDoc_STRVAR(ZstdCompressor_memory_size__doc__,
260 PyDoc_STRVAR(ZstdCompressor_memory_size__doc__,
261 "memory_size()\n"
261 "memory_size()\n"
262 "\n"
262 "\n"
263 "Obtain the memory usage of this compressor, in bytes.\n"
263 "Obtain the memory usage of this compressor, in bytes.\n"
264 );
264 );
265
265
266 static PyObject* ZstdCompressor_memory_size(ZstdCompressor* self) {
266 static PyObject* ZstdCompressor_memory_size(ZstdCompressor* self) {
267 if (self->cctx) {
267 if (self->cctx) {
268 return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->cctx));
268 return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->cctx));
269 }
269 }
270 else {
270 else {
271 PyErr_SetString(ZstdError, "no compressor context found; this should never happen");
271 PyErr_SetString(ZstdError, "no compressor context found; this should never happen");
272 return NULL;
272 return NULL;
273 }
273 }
274 }
274 }
275
275
276 PyDoc_STRVAR(ZstdCompressor_frame_progression__doc__,
276 PyDoc_STRVAR(ZstdCompressor_frame_progression__doc__,
277 "frame_progression()\n"
277 "frame_progression()\n"
278 "\n"
278 "\n"
279 "Return information on how much work the compressor has done.\n"
279 "Return information on how much work the compressor has done.\n"
280 "\n"
280 "\n"
281 "Returns a 3-tuple of (ingested, consumed, produced).\n"
281 "Returns a 3-tuple of (ingested, consumed, produced).\n"
282 );
282 );
283
283
284 static PyObject* ZstdCompressor_frame_progression(ZstdCompressor* self) {
284 static PyObject* ZstdCompressor_frame_progression(ZstdCompressor* self) {
285 return frame_progression(self->cctx);
285 return frame_progression(self->cctx);
286 }
286 }
287
287
288 PyDoc_STRVAR(ZstdCompressor_copy_stream__doc__,
288 PyDoc_STRVAR(ZstdCompressor_copy_stream__doc__,
289 "copy_stream(ifh, ofh[, size=0, read_size=default, write_size=default])\n"
289 "copy_stream(ifh, ofh[, size=0, read_size=default, write_size=default])\n"
290 "compress data between streams\n"
290 "compress data between streams\n"
291 "\n"
291 "\n"
292 "Data will be read from ``ifh``, compressed, and written to ``ofh``.\n"
292 "Data will be read from ``ifh``, compressed, and written to ``ofh``.\n"
293 "``ifh`` must have a ``read(size)`` method. ``ofh`` must have a ``write(data)``\n"
293 "``ifh`` must have a ``read(size)`` method. ``ofh`` must have a ``write(data)``\n"
294 "method.\n"
294 "method.\n"
295 "\n"
295 "\n"
296 "An optional ``size`` argument specifies the size of the source stream.\n"
296 "An optional ``size`` argument specifies the size of the source stream.\n"
297 "If defined, compression parameters will be tuned based on the size.\n"
297 "If defined, compression parameters will be tuned based on the size.\n"
298 "\n"
298 "\n"
299 "Optional arguments ``read_size`` and ``write_size`` define the chunk sizes\n"
299 "Optional arguments ``read_size`` and ``write_size`` define the chunk sizes\n"
300 "of ``read()`` and ``write()`` operations, respectively. By default, they use\n"
300 "of ``read()`` and ``write()`` operations, respectively. By default, they use\n"
301 "the default compression stream input and output sizes, respectively.\n"
301 "the default compression stream input and output sizes, respectively.\n"
302 );
302 );
303
303
304 static PyObject* ZstdCompressor_copy_stream(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
304 static PyObject* ZstdCompressor_copy_stream(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
305 static char* kwlist[] = {
305 static char* kwlist[] = {
306 "ifh",
306 "ifh",
307 "ofh",
307 "ofh",
308 "size",
308 "size",
309 "read_size",
309 "read_size",
310 "write_size",
310 "write_size",
311 NULL
311 NULL
312 };
312 };
313
313
314 PyObject* source;
314 PyObject* source;
315 PyObject* dest;
315 PyObject* dest;
316 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
316 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
317 size_t inSize = ZSTD_CStreamInSize();
317 size_t inSize = ZSTD_CStreamInSize();
318 size_t outSize = ZSTD_CStreamOutSize();
318 size_t outSize = ZSTD_CStreamOutSize();
319 ZSTD_inBuffer input;
319 ZSTD_inBuffer input;
320 ZSTD_outBuffer output;
320 ZSTD_outBuffer output;
321 Py_ssize_t totalRead = 0;
321 Py_ssize_t totalRead = 0;
322 Py_ssize_t totalWrite = 0;
322 Py_ssize_t totalWrite = 0;
323 char* readBuffer;
323 char* readBuffer;
324 Py_ssize_t readSize;
324 Py_ssize_t readSize;
325 PyObject* readResult = NULL;
325 PyObject* readResult = NULL;
326 PyObject* res = NULL;
326 PyObject* res = NULL;
327 size_t zresult;
327 size_t zresult;
328 PyObject* writeResult;
328 PyObject* writeResult;
329 PyObject* totalReadPy;
329 PyObject* totalReadPy;
330 PyObject* totalWritePy;
330 PyObject* totalWritePy;
331
331
332 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|Kkk:copy_stream", kwlist,
332 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|Kkk:copy_stream", kwlist,
333 &source, &dest, &sourceSize, &inSize, &outSize)) {
333 &source, &dest, &sourceSize, &inSize, &outSize)) {
334 return NULL;
334 return NULL;
335 }
335 }
336
336
337 if (!PyObject_HasAttrString(source, "read")) {
337 if (!PyObject_HasAttrString(source, "read")) {
338 PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
338 PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
339 return NULL;
339 return NULL;
340 }
340 }
341
341
342 if (!PyObject_HasAttrString(dest, "write")) {
342 if (!PyObject_HasAttrString(dest, "write")) {
343 PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
343 PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
344 return NULL;
344 return NULL;
345 }
345 }
346
346
347 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
347 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
348
348
349 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
349 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
350 if (ZSTD_isError(zresult)) {
350 if (ZSTD_isError(zresult)) {
351 PyErr_Format(ZstdError, "error setting source size: %s",
351 PyErr_Format(ZstdError, "error setting source size: %s",
352 ZSTD_getErrorName(zresult));
352 ZSTD_getErrorName(zresult));
353 return NULL;
353 return NULL;
354 }
354 }
355
355
356 /* Prevent free on uninitialized memory in finally. */
356 /* Prevent free on uninitialized memory in finally. */
357 output.dst = PyMem_Malloc(outSize);
357 output.dst = PyMem_Malloc(outSize);
358 if (!output.dst) {
358 if (!output.dst) {
359 PyErr_NoMemory();
359 PyErr_NoMemory();
360 res = NULL;
360 res = NULL;
361 goto finally;
361 goto finally;
362 }
362 }
363 output.size = outSize;
363 output.size = outSize;
364 output.pos = 0;
364 output.pos = 0;
365
365
366 input.src = NULL;
366 input.src = NULL;
367 input.size = 0;
367 input.size = 0;
368 input.pos = 0;
368 input.pos = 0;
369
369
370 while (1) {
370 while (1) {
371 /* Try to read from source stream. */
371 /* Try to read from source stream. */
372 readResult = PyObject_CallMethod(source, "read", "n", inSize);
372 readResult = PyObject_CallMethod(source, "read", "n", inSize);
373 if (!readResult) {
373 if (!readResult) {
374 PyErr_SetString(ZstdError, "could not read() from source");
374 PyErr_SetString(ZstdError, "could not read() from source");
375 goto finally;
375 goto finally;
376 }
376 }
377
377
378 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
378 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
379
379
380 /* If no data was read, we're at EOF. */
380 /* If no data was read, we're at EOF. */
381 if (0 == readSize) {
381 if (0 == readSize) {
382 break;
382 break;
383 }
383 }
384
384
385 totalRead += readSize;
385 totalRead += readSize;
386
386
387 /* Send data to compressor */
387 /* Send data to compressor */
388 input.src = readBuffer;
388 input.src = readBuffer;
389 input.size = readSize;
389 input.size = readSize;
390 input.pos = 0;
390 input.pos = 0;
391
391
392 while (input.pos < input.size) {
392 while (input.pos < input.size) {
393 Py_BEGIN_ALLOW_THREADS
393 Py_BEGIN_ALLOW_THREADS
394 zresult = ZSTD_compressStream2(self->cctx, &output, &input, ZSTD_e_continue);
394 zresult = ZSTD_compressStream2(self->cctx, &output, &input, ZSTD_e_continue);
395 Py_END_ALLOW_THREADS
395 Py_END_ALLOW_THREADS
396
396
397 if (ZSTD_isError(zresult)) {
397 if (ZSTD_isError(zresult)) {
398 res = NULL;
398 res = NULL;
399 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
399 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
400 goto finally;
400 goto finally;
401 }
401 }
402
402
403 if (output.pos) {
403 if (output.pos) {
404 #if PY_MAJOR_VERSION >= 3
404 #if PY_MAJOR_VERSION >= 3
405 writeResult = PyObject_CallMethod(dest, "write", "y#",
405 writeResult = PyObject_CallMethod(dest, "write", "y#",
406 #else
406 #else
407 writeResult = PyObject_CallMethod(dest, "write", "s#",
407 writeResult = PyObject_CallMethod(dest, "write", "s#",
408 #endif
408 #endif
409 output.dst, output.pos);
409 output.dst, output.pos);
410 Py_XDECREF(writeResult);
410 Py_XDECREF(writeResult);
411 totalWrite += output.pos;
411 totalWrite += output.pos;
412 output.pos = 0;
412 output.pos = 0;
413 }
413 }
414 }
414 }
415
415
416 Py_CLEAR(readResult);
416 Py_CLEAR(readResult);
417 }
417 }
418
418
419 /* We've finished reading. Now flush the compressor stream. */
419 /* We've finished reading. Now flush the compressor stream. */
420 assert(input.pos == input.size);
420 assert(input.pos == input.size);
421
421
422 while (1) {
422 while (1) {
423 Py_BEGIN_ALLOW_THREADS
423 Py_BEGIN_ALLOW_THREADS
424 zresult = ZSTD_compressStream2(self->cctx, &output, &input, ZSTD_e_end);
424 zresult = ZSTD_compressStream2(self->cctx, &output, &input, ZSTD_e_end);
425 Py_END_ALLOW_THREADS
425 Py_END_ALLOW_THREADS
426
426
427 if (ZSTD_isError(zresult)) {
427 if (ZSTD_isError(zresult)) {
428 PyErr_Format(ZstdError, "error ending compression stream: %s",
428 PyErr_Format(ZstdError, "error ending compression stream: %s",
429 ZSTD_getErrorName(zresult));
429 ZSTD_getErrorName(zresult));
430 res = NULL;
430 res = NULL;
431 goto finally;
431 goto finally;
432 }
432 }
433
433
434 if (output.pos) {
434 if (output.pos) {
435 #if PY_MAJOR_VERSION >= 3
435 #if PY_MAJOR_VERSION >= 3
436 writeResult = PyObject_CallMethod(dest, "write", "y#",
436 writeResult = PyObject_CallMethod(dest, "write", "y#",
437 #else
437 #else
438 writeResult = PyObject_CallMethod(dest, "write", "s#",
438 writeResult = PyObject_CallMethod(dest, "write", "s#",
439 #endif
439 #endif
440 output.dst, output.pos);
440 output.dst, output.pos);
441 totalWrite += output.pos;
441 totalWrite += output.pos;
442 Py_XDECREF(writeResult);
442 Py_XDECREF(writeResult);
443 output.pos = 0;
443 output.pos = 0;
444 }
444 }
445
445
446 if (!zresult) {
446 if (!zresult) {
447 break;
447 break;
448 }
448 }
449 }
449 }
450
450
451 totalReadPy = PyLong_FromSsize_t(totalRead);
451 totalReadPy = PyLong_FromSsize_t(totalRead);
452 totalWritePy = PyLong_FromSsize_t(totalWrite);
452 totalWritePy = PyLong_FromSsize_t(totalWrite);
453 res = PyTuple_Pack(2, totalReadPy, totalWritePy);
453 res = PyTuple_Pack(2, totalReadPy, totalWritePy);
454 Py_DECREF(totalReadPy);
454 Py_DECREF(totalReadPy);
455 Py_DECREF(totalWritePy);
455 Py_DECREF(totalWritePy);
456
456
457 finally:
457 finally:
458 if (output.dst) {
458 if (output.dst) {
459 PyMem_Free(output.dst);
459 PyMem_Free(output.dst);
460 }
460 }
461
461
462 Py_XDECREF(readResult);
462 Py_XDECREF(readResult);
463
463
464 return res;
464 return res;
465 }
465 }
466
466
467 PyDoc_STRVAR(ZstdCompressor_stream_reader__doc__,
467 PyDoc_STRVAR(ZstdCompressor_stream_reader__doc__,
468 "stream_reader(source, [size=0])\n"
468 "stream_reader(source, [size=0])\n"
469 "\n"
469 "\n"
470 "Obtain an object that behaves like an I/O stream.\n"
470 "Obtain an object that behaves like an I/O stream.\n"
471 "\n"
471 "\n"
472 "The source object can be any object with a ``read(size)`` method\n"
472 "The source object can be any object with a ``read(size)`` method\n"
473 "or an object that conforms to the buffer protocol.\n"
473 "or an object that conforms to the buffer protocol.\n"
474 );
474 );
475
475
476 static ZstdCompressionReader* ZstdCompressor_stream_reader(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
476 static ZstdCompressionReader* ZstdCompressor_stream_reader(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
477 static char* kwlist[] = {
477 static char* kwlist[] = {
478 "source",
478 "source",
479 "size",
479 "size",
480 "read_size",
480 "read_size",
481 NULL
481 NULL
482 };
482 };
483
483
484 PyObject* source;
484 PyObject* source;
485 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
485 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
486 size_t readSize = ZSTD_CStreamInSize();
486 size_t readSize = ZSTD_CStreamInSize();
487 ZstdCompressionReader* result = NULL;
487 ZstdCompressionReader* result = NULL;
488 size_t zresult;
488 size_t zresult;
489
489
490 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kk:stream_reader", kwlist,
490 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kk:stream_reader", kwlist,
491 &source, &sourceSize, &readSize)) {
491 &source, &sourceSize, &readSize)) {
492 return NULL;
492 return NULL;
493 }
493 }
494
494
495 result = (ZstdCompressionReader*)PyObject_CallObject((PyObject*)&ZstdCompressionReaderType, NULL);
495 result = (ZstdCompressionReader*)PyObject_CallObject((PyObject*)&ZstdCompressionReaderType, NULL);
496 if (!result) {
496 if (!result) {
497 return NULL;
497 return NULL;
498 }
498 }
499
499
500 if (PyObject_HasAttrString(source, "read")) {
500 if (PyObject_HasAttrString(source, "read")) {
501 result->reader = source;
501 result->reader = source;
502 Py_INCREF(source);
502 Py_INCREF(source);
503 result->readSize = readSize;
503 result->readSize = readSize;
504 }
504 }
505 else if (1 == PyObject_CheckBuffer(source)) {
505 else if (1 == PyObject_CheckBuffer(source)) {
506 if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) {
506 if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) {
507 goto except;
507 goto except;
508 }
508 }
509
509
510 assert(result->buffer.len >= 0);
510 assert(result->buffer.len >= 0);
511
511
512 sourceSize = result->buffer.len;
512 sourceSize = result->buffer.len;
513 }
513 }
514 else {
514 else {
515 PyErr_SetString(PyExc_TypeError,
515 PyErr_SetString(PyExc_TypeError,
516 "must pass an object with a read() method or that conforms to the buffer protocol");
516 "must pass an object with a read() method or that conforms to the buffer protocol");
517 goto except;
517 goto except;
518 }
518 }
519
519
520 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
520 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
521
521
522 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
522 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
523 if (ZSTD_isError(zresult)) {
523 if (ZSTD_isError(zresult)) {
524 PyErr_Format(ZstdError, "error setting source source: %s",
524 PyErr_Format(ZstdError, "error setting source source: %s",
525 ZSTD_getErrorName(zresult));
525 ZSTD_getErrorName(zresult));
526 goto except;
526 goto except;
527 }
527 }
528
528
529 result->compressor = self;
529 result->compressor = self;
530 Py_INCREF(self);
530 Py_INCREF(self);
531
531
532 return result;
532 return result;
533
533
534 except:
534 except:
535 Py_CLEAR(result);
535 Py_CLEAR(result);
536
536
537 return NULL;
537 return NULL;
538 }
538 }
539
539
540 PyDoc_STRVAR(ZstdCompressor_compress__doc__,
540 PyDoc_STRVAR(ZstdCompressor_compress__doc__,
541 "compress(data)\n"
541 "compress(data)\n"
542 "\n"
542 "\n"
543 "Compress data in a single operation.\n"
543 "Compress data in a single operation.\n"
544 "\n"
544 "\n"
545 "This is the simplest mechanism to perform compression: simply pass in a\n"
545 "This is the simplest mechanism to perform compression: simply pass in a\n"
546 "value and get a compressed value back. It is almost the most prone to abuse.\n"
546 "value and get a compressed value back. It is almost the most prone to abuse.\n"
547 "The input and output values must fit in memory, so passing in very large\n"
547 "The input and output values must fit in memory, so passing in very large\n"
548 "values can result in excessive memory usage. For this reason, one of the\n"
548 "values can result in excessive memory usage. For this reason, one of the\n"
549 "streaming based APIs is preferred for larger values.\n"
549 "streaming based APIs is preferred for larger values.\n"
550 );
550 );
551
551
552 static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
552 static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
553 static char* kwlist[] = {
553 static char* kwlist[] = {
554 "data",
554 "data",
555 NULL
555 NULL
556 };
556 };
557
557
558 Py_buffer source;
558 Py_buffer source;
559 size_t destSize;
559 size_t destSize;
560 PyObject* output = NULL;
560 PyObject* output = NULL;
561 size_t zresult;
561 size_t zresult;
562 ZSTD_outBuffer outBuffer;
562 ZSTD_outBuffer outBuffer;
563 ZSTD_inBuffer inBuffer;
563 ZSTD_inBuffer inBuffer;
564
564
565 #if PY_MAJOR_VERSION >= 3
565 #if PY_MAJOR_VERSION >= 3
566 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|O:compress",
566 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|O:compress",
567 #else
567 #else
568 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|O:compress",
568 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|O:compress",
569 #endif
569 #endif
570 kwlist, &source)) {
570 kwlist, &source)) {
571 return NULL;
571 return NULL;
572 }
572 }
573
573
574 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
574 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
575 PyErr_SetString(PyExc_ValueError,
575 PyErr_SetString(PyExc_ValueError,
576 "data buffer should be contiguous and have at most one dimension");
576 "data buffer should be contiguous and have at most one dimension");
577 goto finally;
577 goto finally;
578 }
578 }
579
579
580 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
580 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
581
581
582 destSize = ZSTD_compressBound(source.len);
582 destSize = ZSTD_compressBound(source.len);
583 output = PyBytes_FromStringAndSize(NULL, destSize);
583 output = PyBytes_FromStringAndSize(NULL, destSize);
584 if (!output) {
584 if (!output) {
585 goto finally;
585 goto finally;
586 }
586 }
587
587
588 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, source.len);
588 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, source.len);
589 if (ZSTD_isError(zresult)) {
589 if (ZSTD_isError(zresult)) {
590 PyErr_Format(ZstdError, "error setting source size: %s",
590 PyErr_Format(ZstdError, "error setting source size: %s",
591 ZSTD_getErrorName(zresult));
591 ZSTD_getErrorName(zresult));
592 Py_CLEAR(output);
592 Py_CLEAR(output);
593 goto finally;
593 goto finally;
594 }
594 }
595
595
596 inBuffer.src = source.buf;
596 inBuffer.src = source.buf;
597 inBuffer.size = source.len;
597 inBuffer.size = source.len;
598 inBuffer.pos = 0;
598 inBuffer.pos = 0;
599
599
600 outBuffer.dst = PyBytes_AsString(output);
600 outBuffer.dst = PyBytes_AsString(output);
601 outBuffer.size = destSize;
601 outBuffer.size = destSize;
602 outBuffer.pos = 0;
602 outBuffer.pos = 0;
603
603
604 Py_BEGIN_ALLOW_THREADS
604 Py_BEGIN_ALLOW_THREADS
605 /* By avoiding ZSTD_compress(), we don't necessarily write out content
605 /* By avoiding ZSTD_compress(), we don't necessarily write out content
606 size. This means the argument to ZstdCompressor to control frame
606 size. This means the argument to ZstdCompressor to control frame
607 parameters is honored. */
607 parameters is honored. */
608 zresult = ZSTD_compressStream2(self->cctx, &outBuffer, &inBuffer, ZSTD_e_end);
608 zresult = ZSTD_compressStream2(self->cctx, &outBuffer, &inBuffer, ZSTD_e_end);
609 Py_END_ALLOW_THREADS
609 Py_END_ALLOW_THREADS
610
610
611 if (ZSTD_isError(zresult)) {
611 if (ZSTD_isError(zresult)) {
612 PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult));
612 PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult));
613 Py_CLEAR(output);
613 Py_CLEAR(output);
614 goto finally;
614 goto finally;
615 }
615 }
616 else if (zresult) {
616 else if (zresult) {
617 PyErr_SetString(ZstdError, "unexpected partial frame flush");
617 PyErr_SetString(ZstdError, "unexpected partial frame flush");
618 Py_CLEAR(output);
618 Py_CLEAR(output);
619 goto finally;
619 goto finally;
620 }
620 }
621
621
622 Py_SIZE(output) = outBuffer.pos;
622 Py_SET_SIZE(output, outBuffer.pos);
623
623
624 finally:
624 finally:
625 PyBuffer_Release(&source);
625 PyBuffer_Release(&source);
626 return output;
626 return output;
627 }
627 }
628
628
629 PyDoc_STRVAR(ZstdCompressionObj__doc__,
629 PyDoc_STRVAR(ZstdCompressionObj__doc__,
630 "compressobj()\n"
630 "compressobj()\n"
631 "\n"
631 "\n"
632 "Return an object exposing ``compress(data)`` and ``flush()`` methods.\n"
632 "Return an object exposing ``compress(data)`` and ``flush()`` methods.\n"
633 "\n"
633 "\n"
634 "The returned object exposes an API similar to ``zlib.compressobj`` and\n"
634 "The returned object exposes an API similar to ``zlib.compressobj`` and\n"
635 "``bz2.BZ2Compressor`` so that callers can swap in the zstd compressor\n"
635 "``bz2.BZ2Compressor`` so that callers can swap in the zstd compressor\n"
636 "without changing how compression is performed.\n"
636 "without changing how compression is performed.\n"
637 );
637 );
638
638
639 static ZstdCompressionObj* ZstdCompressor_compressobj(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
639 static ZstdCompressionObj* ZstdCompressor_compressobj(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
640 static char* kwlist[] = {
640 static char* kwlist[] = {
641 "size",
641 "size",
642 NULL
642 NULL
643 };
643 };
644
644
645 unsigned long long inSize = ZSTD_CONTENTSIZE_UNKNOWN;
645 unsigned long long inSize = ZSTD_CONTENTSIZE_UNKNOWN;
646 size_t outSize = ZSTD_CStreamOutSize();
646 size_t outSize = ZSTD_CStreamOutSize();
647 ZstdCompressionObj* result = NULL;
647 ZstdCompressionObj* result = NULL;
648 size_t zresult;
648 size_t zresult;
649
649
650 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|K:compressobj", kwlist, &inSize)) {
650 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|K:compressobj", kwlist, &inSize)) {
651 return NULL;
651 return NULL;
652 }
652 }
653
653
654 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
654 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
655
655
656 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, inSize);
656 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, inSize);
657 if (ZSTD_isError(zresult)) {
657 if (ZSTD_isError(zresult)) {
658 PyErr_Format(ZstdError, "error setting source size: %s",
658 PyErr_Format(ZstdError, "error setting source size: %s",
659 ZSTD_getErrorName(zresult));
659 ZSTD_getErrorName(zresult));
660 return NULL;
660 return NULL;
661 }
661 }
662
662
663 result = (ZstdCompressionObj*)PyObject_CallObject((PyObject*)&ZstdCompressionObjType, NULL);
663 result = (ZstdCompressionObj*)PyObject_CallObject((PyObject*)&ZstdCompressionObjType, NULL);
664 if (!result) {
664 if (!result) {
665 return NULL;
665 return NULL;
666 }
666 }
667
667
668 result->output.dst = PyMem_Malloc(outSize);
668 result->output.dst = PyMem_Malloc(outSize);
669 if (!result->output.dst) {
669 if (!result->output.dst) {
670 PyErr_NoMemory();
670 PyErr_NoMemory();
671 Py_DECREF(result);
671 Py_DECREF(result);
672 return NULL;
672 return NULL;
673 }
673 }
674 result->output.size = outSize;
674 result->output.size = outSize;
675 result->compressor = self;
675 result->compressor = self;
676 Py_INCREF(result->compressor);
676 Py_INCREF(result->compressor);
677
677
678 return result;
678 return result;
679 }
679 }
680
680
681 PyDoc_STRVAR(ZstdCompressor_read_to_iter__doc__,
681 PyDoc_STRVAR(ZstdCompressor_read_to_iter__doc__,
682 "read_to_iter(reader, [size=0, read_size=default, write_size=default])\n"
682 "read_to_iter(reader, [size=0, read_size=default, write_size=default])\n"
683 "Read uncompressed data from a reader and return an iterator\n"
683 "Read uncompressed data from a reader and return an iterator\n"
684 "\n"
684 "\n"
685 "Returns an iterator of compressed data produced from reading from ``reader``.\n"
685 "Returns an iterator of compressed data produced from reading from ``reader``.\n"
686 "\n"
686 "\n"
687 "Uncompressed data will be obtained from ``reader`` by calling the\n"
687 "Uncompressed data will be obtained from ``reader`` by calling the\n"
688 "``read(size)`` method of it. The source data will be streamed into a\n"
688 "``read(size)`` method of it. The source data will be streamed into a\n"
689 "compressor. As compressed data is available, it will be exposed to the\n"
689 "compressor. As compressed data is available, it will be exposed to the\n"
690 "iterator.\n"
690 "iterator.\n"
691 "\n"
691 "\n"
692 "Data is read from the source in chunks of ``read_size``. Compressed chunks\n"
692 "Data is read from the source in chunks of ``read_size``. Compressed chunks\n"
693 "are at most ``write_size`` bytes. Both values default to the zstd input and\n"
693 "are at most ``write_size`` bytes. Both values default to the zstd input and\n"
694 "and output defaults, respectively.\n"
694 "and output defaults, respectively.\n"
695 "\n"
695 "\n"
696 "The caller is partially in control of how fast data is fed into the\n"
696 "The caller is partially in control of how fast data is fed into the\n"
697 "compressor by how it consumes the returned iterator. The compressor will\n"
697 "compressor by how it consumes the returned iterator. The compressor will\n"
698 "not consume from the reader unless the caller consumes from the iterator.\n"
698 "not consume from the reader unless the caller consumes from the iterator.\n"
699 );
699 );
700
700
701 static ZstdCompressorIterator* ZstdCompressor_read_to_iter(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
701 static ZstdCompressorIterator* ZstdCompressor_read_to_iter(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
702 static char* kwlist[] = {
702 static char* kwlist[] = {
703 "reader",
703 "reader",
704 "size",
704 "size",
705 "read_size",
705 "read_size",
706 "write_size",
706 "write_size",
707 NULL
707 NULL
708 };
708 };
709
709
710 PyObject* reader;
710 PyObject* reader;
711 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
711 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
712 size_t inSize = ZSTD_CStreamInSize();
712 size_t inSize = ZSTD_CStreamInSize();
713 size_t outSize = ZSTD_CStreamOutSize();
713 size_t outSize = ZSTD_CStreamOutSize();
714 ZstdCompressorIterator* result;
714 ZstdCompressorIterator* result;
715 size_t zresult;
715 size_t zresult;
716
716
717 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kkk:read_to_iter", kwlist,
717 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kkk:read_to_iter", kwlist,
718 &reader, &sourceSize, &inSize, &outSize)) {
718 &reader, &sourceSize, &inSize, &outSize)) {
719 return NULL;
719 return NULL;
720 }
720 }
721
721
722 result = (ZstdCompressorIterator*)PyObject_CallObject((PyObject*)&ZstdCompressorIteratorType, NULL);
722 result = (ZstdCompressorIterator*)PyObject_CallObject((PyObject*)&ZstdCompressorIteratorType, NULL);
723 if (!result) {
723 if (!result) {
724 return NULL;
724 return NULL;
725 }
725 }
726 if (PyObject_HasAttrString(reader, "read")) {
726 if (PyObject_HasAttrString(reader, "read")) {
727 result->reader = reader;
727 result->reader = reader;
728 Py_INCREF(result->reader);
728 Py_INCREF(result->reader);
729 }
729 }
730 else if (1 == PyObject_CheckBuffer(reader)) {
730 else if (1 == PyObject_CheckBuffer(reader)) {
731 if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) {
731 if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) {
732 goto except;
732 goto except;
733 }
733 }
734
734
735 sourceSize = result->buffer.len;
735 sourceSize = result->buffer.len;
736 }
736 }
737 else {
737 else {
738 PyErr_SetString(PyExc_ValueError,
738 PyErr_SetString(PyExc_ValueError,
739 "must pass an object with a read() method or conforms to buffer protocol");
739 "must pass an object with a read() method or conforms to buffer protocol");
740 goto except;
740 goto except;
741 }
741 }
742
742
743 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
743 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
744
744
745 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
745 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
746 if (ZSTD_isError(zresult)) {
746 if (ZSTD_isError(zresult)) {
747 PyErr_Format(ZstdError, "error setting source size: %s",
747 PyErr_Format(ZstdError, "error setting source size: %s",
748 ZSTD_getErrorName(zresult));
748 ZSTD_getErrorName(zresult));
749 return NULL;
749 return NULL;
750 }
750 }
751
751
752 result->compressor = self;
752 result->compressor = self;
753 Py_INCREF(result->compressor);
753 Py_INCREF(result->compressor);
754
754
755 result->inSize = inSize;
755 result->inSize = inSize;
756 result->outSize = outSize;
756 result->outSize = outSize;
757
757
758 result->output.dst = PyMem_Malloc(outSize);
758 result->output.dst = PyMem_Malloc(outSize);
759 if (!result->output.dst) {
759 if (!result->output.dst) {
760 PyErr_NoMemory();
760 PyErr_NoMemory();
761 goto except;
761 goto except;
762 }
762 }
763 result->output.size = outSize;
763 result->output.size = outSize;
764
764
765 goto finally;
765 goto finally;
766
766
767 except:
767 except:
768 Py_CLEAR(result);
768 Py_CLEAR(result);
769
769
770 finally:
770 finally:
771 return result;
771 return result;
772 }
772 }
773
773
774 PyDoc_STRVAR(ZstdCompressor_stream_writer___doc__,
774 PyDoc_STRVAR(ZstdCompressor_stream_writer___doc__,
775 "Create a context manager to write compressed data to an object.\n"
775 "Create a context manager to write compressed data to an object.\n"
776 "\n"
776 "\n"
777 "The passed object must have a ``write()`` method.\n"
777 "The passed object must have a ``write()`` method.\n"
778 "\n"
778 "\n"
779 "The caller feeds input data to the object by calling ``compress(data)``.\n"
779 "The caller feeds input data to the object by calling ``compress(data)``.\n"
780 "Compressed data is written to the argument given to this function.\n"
780 "Compressed data is written to the argument given to this function.\n"
781 "\n"
781 "\n"
782 "The function takes an optional ``size`` argument indicating the total size\n"
782 "The function takes an optional ``size`` argument indicating the total size\n"
783 "of the eventual input. If specified, the size will influence compression\n"
783 "of the eventual input. If specified, the size will influence compression\n"
784 "parameter tuning and could result in the size being written into the\n"
784 "parameter tuning and could result in the size being written into the\n"
785 "header of the compressed data.\n"
785 "header of the compressed data.\n"
786 "\n"
786 "\n"
787 "An optional ``write_size`` argument is also accepted. It defines the maximum\n"
787 "An optional ``write_size`` argument is also accepted. It defines the maximum\n"
788 "byte size of chunks fed to ``write()``. By default, it uses the zstd default\n"
788 "byte size of chunks fed to ``write()``. By default, it uses the zstd default\n"
789 "for a compressor output stream.\n"
789 "for a compressor output stream.\n"
790 );
790 );
791
791
792 static ZstdCompressionWriter* ZstdCompressor_stream_writer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
792 static ZstdCompressionWriter* ZstdCompressor_stream_writer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
793 static char* kwlist[] = {
793 static char* kwlist[] = {
794 "writer",
794 "writer",
795 "size",
795 "size",
796 "write_size",
796 "write_size",
797 "write_return_read",
797 "write_return_read",
798 NULL
798 NULL
799 };
799 };
800
800
801 PyObject* writer;
801 PyObject* writer;
802 ZstdCompressionWriter* result;
802 ZstdCompressionWriter* result;
803 size_t zresult;
803 size_t zresult;
804 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
804 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
805 size_t outSize = ZSTD_CStreamOutSize();
805 size_t outSize = ZSTD_CStreamOutSize();
806 PyObject* writeReturnRead = NULL;
806 PyObject* writeReturnRead = NULL;
807
807
808 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|KkO:stream_writer", kwlist,
808 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|KkO:stream_writer", kwlist,
809 &writer, &sourceSize, &outSize, &writeReturnRead)) {
809 &writer, &sourceSize, &outSize, &writeReturnRead)) {
810 return NULL;
810 return NULL;
811 }
811 }
812
812
813 if (!PyObject_HasAttrString(writer, "write")) {
813 if (!PyObject_HasAttrString(writer, "write")) {
814 PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
814 PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
815 return NULL;
815 return NULL;
816 }
816 }
817
817
818 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
818 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
819
819
820 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
820 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
821 if (ZSTD_isError(zresult)) {
821 if (ZSTD_isError(zresult)) {
822 PyErr_Format(ZstdError, "error setting source size: %s",
822 PyErr_Format(ZstdError, "error setting source size: %s",
823 ZSTD_getErrorName(zresult));
823 ZSTD_getErrorName(zresult));
824 return NULL;
824 return NULL;
825 }
825 }
826
826
827 result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL);
827 result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL);
828 if (!result) {
828 if (!result) {
829 return NULL;
829 return NULL;
830 }
830 }
831
831
832 result->output.dst = PyMem_Malloc(outSize);
832 result->output.dst = PyMem_Malloc(outSize);
833 if (!result->output.dst) {
833 if (!result->output.dst) {
834 Py_DECREF(result);
834 Py_DECREF(result);
835 return (ZstdCompressionWriter*)PyErr_NoMemory();
835 return (ZstdCompressionWriter*)PyErr_NoMemory();
836 }
836 }
837
837
838 result->output.pos = 0;
838 result->output.pos = 0;
839 result->output.size = outSize;
839 result->output.size = outSize;
840
840
841 result->compressor = self;
841 result->compressor = self;
842 Py_INCREF(result->compressor);
842 Py_INCREF(result->compressor);
843
843
844 result->writer = writer;
844 result->writer = writer;
845 Py_INCREF(result->writer);
845 Py_INCREF(result->writer);
846
846
847 result->outSize = outSize;
847 result->outSize = outSize;
848 result->bytesCompressed = 0;
848 result->bytesCompressed = 0;
849 result->writeReturnRead = writeReturnRead ? PyObject_IsTrue(writeReturnRead) : 0;
849 result->writeReturnRead = writeReturnRead ? PyObject_IsTrue(writeReturnRead) : 0;
850
850
851 return result;
851 return result;
852 }
852 }
853
853
854 PyDoc_STRVAR(ZstdCompressor_chunker__doc__,
854 PyDoc_STRVAR(ZstdCompressor_chunker__doc__,
855 "Create an object for iterative compressing to same-sized chunks.\n"
855 "Create an object for iterative compressing to same-sized chunks.\n"
856 );
856 );
857
857
858 static ZstdCompressionChunker* ZstdCompressor_chunker(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
858 static ZstdCompressionChunker* ZstdCompressor_chunker(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
859 static char* kwlist[] = {
859 static char* kwlist[] = {
860 "size",
860 "size",
861 "chunk_size",
861 "chunk_size",
862 NULL
862 NULL
863 };
863 };
864
864
865 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
865 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
866 size_t chunkSize = ZSTD_CStreamOutSize();
866 size_t chunkSize = ZSTD_CStreamOutSize();
867 ZstdCompressionChunker* chunker;
867 ZstdCompressionChunker* chunker;
868 size_t zresult;
868 size_t zresult;
869
869
870 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|Kk:chunker", kwlist,
870 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|Kk:chunker", kwlist,
871 &sourceSize, &chunkSize)) {
871 &sourceSize, &chunkSize)) {
872 return NULL;
872 return NULL;
873 }
873 }
874
874
875 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
875 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
876
876
877 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
877 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
878 if (ZSTD_isError(zresult)) {
878 if (ZSTD_isError(zresult)) {
879 PyErr_Format(ZstdError, "error setting source size: %s",
879 PyErr_Format(ZstdError, "error setting source size: %s",
880 ZSTD_getErrorName(zresult));
880 ZSTD_getErrorName(zresult));
881 return NULL;
881 return NULL;
882 }
882 }
883
883
884 chunker = (ZstdCompressionChunker*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerType, NULL);
884 chunker = (ZstdCompressionChunker*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerType, NULL);
885 if (!chunker) {
885 if (!chunker) {
886 return NULL;
886 return NULL;
887 }
887 }
888
888
889 chunker->output.dst = PyMem_Malloc(chunkSize);
889 chunker->output.dst = PyMem_Malloc(chunkSize);
890 if (!chunker->output.dst) {
890 if (!chunker->output.dst) {
891 PyErr_NoMemory();
891 PyErr_NoMemory();
892 Py_DECREF(chunker);
892 Py_DECREF(chunker);
893 return NULL;
893 return NULL;
894 }
894 }
895 chunker->output.size = chunkSize;
895 chunker->output.size = chunkSize;
896 chunker->output.pos = 0;
896 chunker->output.pos = 0;
897
897
898 chunker->compressor = self;
898 chunker->compressor = self;
899 Py_INCREF(chunker->compressor);
899 Py_INCREF(chunker->compressor);
900
900
901 chunker->chunkSize = chunkSize;
901 chunker->chunkSize = chunkSize;
902
902
903 return chunker;
903 return chunker;
904 }
904 }
905
905
906 typedef struct {
906 typedef struct {
907 void* sourceData;
907 void* sourceData;
908 size_t sourceSize;
908 size_t sourceSize;
909 } DataSource;
909 } DataSource;
910
910
911 typedef struct {
911 typedef struct {
912 DataSource* sources;
912 DataSource* sources;
913 Py_ssize_t sourcesSize;
913 Py_ssize_t sourcesSize;
914 unsigned long long totalSourceSize;
914 unsigned long long totalSourceSize;
915 } DataSources;
915 } DataSources;
916
916
917 typedef struct {
917 typedef struct {
918 void* dest;
918 void* dest;
919 Py_ssize_t destSize;
919 Py_ssize_t destSize;
920 BufferSegment* segments;
920 BufferSegment* segments;
921 Py_ssize_t segmentsSize;
921 Py_ssize_t segmentsSize;
922 } DestBuffer;
922 } DestBuffer;
923
923
924 typedef enum {
924 typedef enum {
925 WorkerError_none = 0,
925 WorkerError_none = 0,
926 WorkerError_zstd = 1,
926 WorkerError_zstd = 1,
927 WorkerError_no_memory = 2,
927 WorkerError_no_memory = 2,
928 WorkerError_nospace = 3,
928 WorkerError_nospace = 3,
929 } WorkerError;
929 } WorkerError;
930
930
931 /**
931 /**
932 * Holds state for an individual worker performing multi_compress_to_buffer work.
932 * Holds state for an individual worker performing multi_compress_to_buffer work.
933 */
933 */
934 typedef struct {
934 typedef struct {
935 /* Used for compression. */
935 /* Used for compression. */
936 ZSTD_CCtx* cctx;
936 ZSTD_CCtx* cctx;
937
937
938 /* What to compress. */
938 /* What to compress. */
939 DataSource* sources;
939 DataSource* sources;
940 Py_ssize_t sourcesSize;
940 Py_ssize_t sourcesSize;
941 Py_ssize_t startOffset;
941 Py_ssize_t startOffset;
942 Py_ssize_t endOffset;
942 Py_ssize_t endOffset;
943 unsigned long long totalSourceSize;
943 unsigned long long totalSourceSize;
944
944
945 /* Result storage. */
945 /* Result storage. */
946 DestBuffer* destBuffers;
946 DestBuffer* destBuffers;
947 Py_ssize_t destCount;
947 Py_ssize_t destCount;
948
948
949 /* Error tracking. */
949 /* Error tracking. */
950 WorkerError error;
950 WorkerError error;
951 size_t zresult;
951 size_t zresult;
952 Py_ssize_t errorOffset;
952 Py_ssize_t errorOffset;
953 } WorkerState;
953 } WorkerState;
954
954
955 static void compress_worker(WorkerState* state) {
955 static void compress_worker(WorkerState* state) {
956 Py_ssize_t inputOffset = state->startOffset;
956 Py_ssize_t inputOffset = state->startOffset;
957 Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
957 Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
958 Py_ssize_t currentBufferStartOffset = state->startOffset;
958 Py_ssize_t currentBufferStartOffset = state->startOffset;
959 size_t zresult;
959 size_t zresult;
960 void* newDest;
960 void* newDest;
961 size_t allocationSize;
961 size_t allocationSize;
962 size_t boundSize;
962 size_t boundSize;
963 Py_ssize_t destOffset = 0;
963 Py_ssize_t destOffset = 0;
964 DataSource* sources = state->sources;
964 DataSource* sources = state->sources;
965 DestBuffer* destBuffer;
965 DestBuffer* destBuffer;
966
966
967 assert(!state->destBuffers);
967 assert(!state->destBuffers);
968 assert(0 == state->destCount);
968 assert(0 == state->destCount);
969
969
970 /*
970 /*
971 * The total size of the compressed data is unknown until we actually
971 * The total size of the compressed data is unknown until we actually
972 * compress data. That means we can't pre-allocate the exact size we need.
972 * compress data. That means we can't pre-allocate the exact size we need.
973 *
973 *
974 * There is a cost to every allocation and reallocation. So, it is in our
974 * There is a cost to every allocation and reallocation. So, it is in our
975 * interest to minimize the number of allocations.
975 * interest to minimize the number of allocations.
976 *
976 *
977 * There is also a cost to too few allocations. If allocations are too
977 * There is also a cost to too few allocations. If allocations are too
978 * large they may fail. If buffers are shared and all inputs become
978 * large they may fail. If buffers are shared and all inputs become
979 * irrelevant at different lifetimes, then a reference to one segment
979 * irrelevant at different lifetimes, then a reference to one segment
980 * in the buffer will keep the entire buffer alive. This leads to excessive
980 * in the buffer will keep the entire buffer alive. This leads to excessive
981 * memory usage.
981 * memory usage.
982 *
982 *
983 * Our current strategy is to assume a compression ratio of 16:1 and
983 * Our current strategy is to assume a compression ratio of 16:1 and
984 * allocate buffers of that size, rounded up to the nearest power of 2
984 * allocate buffers of that size, rounded up to the nearest power of 2
985 * (because computers like round numbers). That ratio is greater than what
985 * (because computers like round numbers). That ratio is greater than what
986 * most inputs achieve. This is by design: we don't want to over-allocate.
986 * most inputs achieve. This is by design: we don't want to over-allocate.
987 * But we don't want to under-allocate and lead to too many buffers either.
987 * But we don't want to under-allocate and lead to too many buffers either.
988 */
988 */
989
989
990 state->destCount = 1;
990 state->destCount = 1;
991
991
992 state->destBuffers = calloc(1, sizeof(DestBuffer));
992 state->destBuffers = calloc(1, sizeof(DestBuffer));
993 if (NULL == state->destBuffers) {
993 if (NULL == state->destBuffers) {
994 state->error = WorkerError_no_memory;
994 state->error = WorkerError_no_memory;
995 return;
995 return;
996 }
996 }
997
997
998 destBuffer = &state->destBuffers[state->destCount - 1];
998 destBuffer = &state->destBuffers[state->destCount - 1];
999
999
1000 /*
1000 /*
1001 * Rather than track bounds and grow the segments buffer, allocate space
1001 * Rather than track bounds and grow the segments buffer, allocate space
1002 * to hold remaining items then truncate when we're done with it.
1002 * to hold remaining items then truncate when we're done with it.
1003 */
1003 */
1004 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
1004 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
1005 if (NULL == destBuffer->segments) {
1005 if (NULL == destBuffer->segments) {
1006 state->error = WorkerError_no_memory;
1006 state->error = WorkerError_no_memory;
1007 return;
1007 return;
1008 }
1008 }
1009
1009
1010 destBuffer->segmentsSize = remainingItems;
1010 destBuffer->segmentsSize = remainingItems;
1011
1011
1012 assert(state->totalSourceSize <= SIZE_MAX);
1012 assert(state->totalSourceSize <= SIZE_MAX);
1013 allocationSize = roundpow2((size_t)state->totalSourceSize >> 4);
1013 allocationSize = roundpow2((size_t)state->totalSourceSize >> 4);
1014
1014
1015 /* If the maximum size of the output is larger than that, round up. */
1015 /* If the maximum size of the output is larger than that, round up. */
1016 boundSize = ZSTD_compressBound(sources[inputOffset].sourceSize);
1016 boundSize = ZSTD_compressBound(sources[inputOffset].sourceSize);
1017
1017
1018 if (boundSize > allocationSize) {
1018 if (boundSize > allocationSize) {
1019 allocationSize = roundpow2(boundSize);
1019 allocationSize = roundpow2(boundSize);
1020 }
1020 }
1021
1021
1022 destBuffer->dest = malloc(allocationSize);
1022 destBuffer->dest = malloc(allocationSize);
1023 if (NULL == destBuffer->dest) {
1023 if (NULL == destBuffer->dest) {
1024 state->error = WorkerError_no_memory;
1024 state->error = WorkerError_no_memory;
1025 return;
1025 return;
1026 }
1026 }
1027
1027
1028 destBuffer->destSize = allocationSize;
1028 destBuffer->destSize = allocationSize;
1029
1029
1030 for (inputOffset = state->startOffset; inputOffset <= state->endOffset; inputOffset++) {
1030 for (inputOffset = state->startOffset; inputOffset <= state->endOffset; inputOffset++) {
1031 void* source = sources[inputOffset].sourceData;
1031 void* source = sources[inputOffset].sourceData;
1032 size_t sourceSize = sources[inputOffset].sourceSize;
1032 size_t sourceSize = sources[inputOffset].sourceSize;
1033 size_t destAvailable;
1033 size_t destAvailable;
1034 void* dest;
1034 void* dest;
1035 ZSTD_outBuffer opOutBuffer;
1035 ZSTD_outBuffer opOutBuffer;
1036 ZSTD_inBuffer opInBuffer;
1036 ZSTD_inBuffer opInBuffer;
1037
1037
1038 destAvailable = destBuffer->destSize - destOffset;
1038 destAvailable = destBuffer->destSize - destOffset;
1039 boundSize = ZSTD_compressBound(sourceSize);
1039 boundSize = ZSTD_compressBound(sourceSize);
1040
1040
1041 /*
1041 /*
1042 * Not enough space in current buffer to hold largest compressed output.
1042 * Not enough space in current buffer to hold largest compressed output.
1043 * So allocate and switch to a new output buffer.
1043 * So allocate and switch to a new output buffer.
1044 */
1044 */
1045 if (boundSize > destAvailable) {
1045 if (boundSize > destAvailable) {
1046 /*
1046 /*
1047 * The downsizing of the existing buffer is optional. It should be cheap
1047 * The downsizing of the existing buffer is optional. It should be cheap
1048 * (unlike growing). So we just do it.
1048 * (unlike growing). So we just do it.
1049 */
1049 */
1050 if (destAvailable) {
1050 if (destAvailable) {
1051 newDest = realloc(destBuffer->dest, destOffset);
1051 newDest = realloc(destBuffer->dest, destOffset);
1052 if (NULL == newDest) {
1052 if (NULL == newDest) {
1053 state->error = WorkerError_no_memory;
1053 state->error = WorkerError_no_memory;
1054 return;
1054 return;
1055 }
1055 }
1056
1056
1057 destBuffer->dest = newDest;
1057 destBuffer->dest = newDest;
1058 destBuffer->destSize = destOffset;
1058 destBuffer->destSize = destOffset;
1059 }
1059 }
1060
1060
1061 /* Truncate segments buffer. */
1061 /* Truncate segments buffer. */
1062 newDest = realloc(destBuffer->segments,
1062 newDest = realloc(destBuffer->segments,
1063 (inputOffset - currentBufferStartOffset + 1) * sizeof(BufferSegment));
1063 (inputOffset - currentBufferStartOffset + 1) * sizeof(BufferSegment));
1064 if (NULL == newDest) {
1064 if (NULL == newDest) {
1065 state->error = WorkerError_no_memory;
1065 state->error = WorkerError_no_memory;
1066 return;
1066 return;
1067 }
1067 }
1068
1068
1069 destBuffer->segments = newDest;
1069 destBuffer->segments = newDest;
1070 destBuffer->segmentsSize = inputOffset - currentBufferStartOffset;
1070 destBuffer->segmentsSize = inputOffset - currentBufferStartOffset;
1071
1071
1072 /* Grow space for new struct. */
1072 /* Grow space for new struct. */
1073 /* TODO consider over-allocating so we don't do this every time. */
1073 /* TODO consider over-allocating so we don't do this every time. */
1074 newDest = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer));
1074 newDest = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer));
1075 if (NULL == newDest) {
1075 if (NULL == newDest) {
1076 state->error = WorkerError_no_memory;
1076 state->error = WorkerError_no_memory;
1077 return;
1077 return;
1078 }
1078 }
1079
1079
1080 state->destBuffers = newDest;
1080 state->destBuffers = newDest;
1081 state->destCount++;
1081 state->destCount++;
1082
1082
1083 destBuffer = &state->destBuffers[state->destCount - 1];
1083 destBuffer = &state->destBuffers[state->destCount - 1];
1084
1084
1085 /* Don't take any chances with non-NULL pointers. */
1085 /* Don't take any chances with non-NULL pointers. */
1086 memset(destBuffer, 0, sizeof(DestBuffer));
1086 memset(destBuffer, 0, sizeof(DestBuffer));
1087
1087
1088 /**
1088 /**
1089 * We could dynamically update allocation size based on work done so far.
1089 * We could dynamically update allocation size based on work done so far.
1090 * For now, keep is simple.
1090 * For now, keep is simple.
1091 */
1091 */
1092 assert(state->totalSourceSize <= SIZE_MAX);
1092 assert(state->totalSourceSize <= SIZE_MAX);
1093 allocationSize = roundpow2((size_t)state->totalSourceSize >> 4);
1093 allocationSize = roundpow2((size_t)state->totalSourceSize >> 4);
1094
1094
1095 if (boundSize > allocationSize) {
1095 if (boundSize > allocationSize) {
1096 allocationSize = roundpow2(boundSize);
1096 allocationSize = roundpow2(boundSize);
1097 }
1097 }
1098
1098
1099 destBuffer->dest = malloc(allocationSize);
1099 destBuffer->dest = malloc(allocationSize);
1100 if (NULL == destBuffer->dest) {
1100 if (NULL == destBuffer->dest) {
1101 state->error = WorkerError_no_memory;
1101 state->error = WorkerError_no_memory;
1102 return;
1102 return;
1103 }
1103 }
1104
1104
1105 destBuffer->destSize = allocationSize;
1105 destBuffer->destSize = allocationSize;
1106 destAvailable = allocationSize;
1106 destAvailable = allocationSize;
1107 destOffset = 0;
1107 destOffset = 0;
1108
1108
1109 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
1109 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
1110 if (NULL == destBuffer->segments) {
1110 if (NULL == destBuffer->segments) {
1111 state->error = WorkerError_no_memory;
1111 state->error = WorkerError_no_memory;
1112 return;
1112 return;
1113 }
1113 }
1114
1114
1115 destBuffer->segmentsSize = remainingItems;
1115 destBuffer->segmentsSize = remainingItems;
1116 currentBufferStartOffset = inputOffset;
1116 currentBufferStartOffset = inputOffset;
1117 }
1117 }
1118
1118
1119 dest = (char*)destBuffer->dest + destOffset;
1119 dest = (char*)destBuffer->dest + destOffset;
1120
1120
1121 opInBuffer.src = source;
1121 opInBuffer.src = source;
1122 opInBuffer.size = sourceSize;
1122 opInBuffer.size = sourceSize;
1123 opInBuffer.pos = 0;
1123 opInBuffer.pos = 0;
1124
1124
1125 opOutBuffer.dst = dest;
1125 opOutBuffer.dst = dest;
1126 opOutBuffer.size = destAvailable;
1126 opOutBuffer.size = destAvailable;
1127 opOutBuffer.pos = 0;
1127 opOutBuffer.pos = 0;
1128
1128
1129 zresult = ZSTD_CCtx_setPledgedSrcSize(state->cctx, sourceSize);
1129 zresult = ZSTD_CCtx_setPledgedSrcSize(state->cctx, sourceSize);
1130 if (ZSTD_isError(zresult)) {
1130 if (ZSTD_isError(zresult)) {
1131 state->error = WorkerError_zstd;
1131 state->error = WorkerError_zstd;
1132 state->zresult = zresult;
1132 state->zresult = zresult;
1133 state->errorOffset = inputOffset;
1133 state->errorOffset = inputOffset;
1134 break;
1134 break;
1135 }
1135 }
1136
1136
1137 zresult = ZSTD_compressStream2(state->cctx, &opOutBuffer, &opInBuffer, ZSTD_e_end);
1137 zresult = ZSTD_compressStream2(state->cctx, &opOutBuffer, &opInBuffer, ZSTD_e_end);
1138 if (ZSTD_isError(zresult)) {
1138 if (ZSTD_isError(zresult)) {
1139 state->error = WorkerError_zstd;
1139 state->error = WorkerError_zstd;
1140 state->zresult = zresult;
1140 state->zresult = zresult;
1141 state->errorOffset = inputOffset;
1141 state->errorOffset = inputOffset;
1142 break;
1142 break;
1143 }
1143 }
1144 else if (zresult) {
1144 else if (zresult) {
1145 state->error = WorkerError_nospace;
1145 state->error = WorkerError_nospace;
1146 state->errorOffset = inputOffset;
1146 state->errorOffset = inputOffset;
1147 break;
1147 break;
1148 }
1148 }
1149
1149
1150 destBuffer->segments[inputOffset - currentBufferStartOffset].offset = destOffset;
1150 destBuffer->segments[inputOffset - currentBufferStartOffset].offset = destOffset;
1151 destBuffer->segments[inputOffset - currentBufferStartOffset].length = opOutBuffer.pos;
1151 destBuffer->segments[inputOffset - currentBufferStartOffset].length = opOutBuffer.pos;
1152
1152
1153 destOffset += opOutBuffer.pos;
1153 destOffset += opOutBuffer.pos;
1154 remainingItems--;
1154 remainingItems--;
1155 }
1155 }
1156
1156
1157 if (destBuffer->destSize > destOffset) {
1157 if (destBuffer->destSize > destOffset) {
1158 newDest = realloc(destBuffer->dest, destOffset);
1158 newDest = realloc(destBuffer->dest, destOffset);
1159 if (NULL == newDest) {
1159 if (NULL == newDest) {
1160 state->error = WorkerError_no_memory;
1160 state->error = WorkerError_no_memory;
1161 return;
1161 return;
1162 }
1162 }
1163
1163
1164 destBuffer->dest = newDest;
1164 destBuffer->dest = newDest;
1165 destBuffer->destSize = destOffset;
1165 destBuffer->destSize = destOffset;
1166 }
1166 }
1167 }
1167 }
1168
1168
1169 ZstdBufferWithSegmentsCollection* compress_from_datasources(ZstdCompressor* compressor,
1169 ZstdBufferWithSegmentsCollection* compress_from_datasources(ZstdCompressor* compressor,
1170 DataSources* sources, Py_ssize_t threadCount) {
1170 DataSources* sources, Py_ssize_t threadCount) {
1171 unsigned long long bytesPerWorker;
1171 unsigned long long bytesPerWorker;
1172 POOL_ctx* pool = NULL;
1172 POOL_ctx* pool = NULL;
1173 WorkerState* workerStates = NULL;
1173 WorkerState* workerStates = NULL;
1174 Py_ssize_t i;
1174 Py_ssize_t i;
1175 unsigned long long workerBytes = 0;
1175 unsigned long long workerBytes = 0;
1176 Py_ssize_t workerStartOffset = 0;
1176 Py_ssize_t workerStartOffset = 0;
1177 Py_ssize_t currentThread = 0;
1177 Py_ssize_t currentThread = 0;
1178 int errored = 0;
1178 int errored = 0;
1179 Py_ssize_t segmentsCount = 0;
1179 Py_ssize_t segmentsCount = 0;
1180 Py_ssize_t segmentIndex;
1180 Py_ssize_t segmentIndex;
1181 PyObject* segmentsArg = NULL;
1181 PyObject* segmentsArg = NULL;
1182 ZstdBufferWithSegments* buffer;
1182 ZstdBufferWithSegments* buffer;
1183 ZstdBufferWithSegmentsCollection* result = NULL;
1183 ZstdBufferWithSegmentsCollection* result = NULL;
1184
1184
1185 assert(sources->sourcesSize > 0);
1185 assert(sources->sourcesSize > 0);
1186 assert(sources->totalSourceSize > 0);
1186 assert(sources->totalSourceSize > 0);
1187 assert(threadCount >= 1);
1187 assert(threadCount >= 1);
1188
1188
1189 /* More threads than inputs makes no sense. */
1189 /* More threads than inputs makes no sense. */
1190 threadCount = sources->sourcesSize < threadCount ? sources->sourcesSize
1190 threadCount = sources->sourcesSize < threadCount ? sources->sourcesSize
1191 : threadCount;
1191 : threadCount;
1192
1192
1193 /* TODO lower thread count when input size is too small and threads would add
1193 /* TODO lower thread count when input size is too small and threads would add
1194 overhead. */
1194 overhead. */
1195
1195
1196 workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
1196 workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
1197 if (NULL == workerStates) {
1197 if (NULL == workerStates) {
1198 PyErr_NoMemory();
1198 PyErr_NoMemory();
1199 goto finally;
1199 goto finally;
1200 }
1200 }
1201
1201
1202 memset(workerStates, 0, threadCount * sizeof(WorkerState));
1202 memset(workerStates, 0, threadCount * sizeof(WorkerState));
1203
1203
1204 if (threadCount > 1) {
1204 if (threadCount > 1) {
1205 pool = POOL_create(threadCount, 1);
1205 pool = POOL_create(threadCount, 1);
1206 if (NULL == pool) {
1206 if (NULL == pool) {
1207 PyErr_SetString(ZstdError, "could not initialize zstd thread pool");
1207 PyErr_SetString(ZstdError, "could not initialize zstd thread pool");
1208 goto finally;
1208 goto finally;
1209 }
1209 }
1210 }
1210 }
1211
1211
1212 bytesPerWorker = sources->totalSourceSize / threadCount;
1212 bytesPerWorker = sources->totalSourceSize / threadCount;
1213
1213
1214 for (i = 0; i < threadCount; i++) {
1214 for (i = 0; i < threadCount; i++) {
1215 size_t zresult;
1215 size_t zresult;
1216
1216
1217 workerStates[i].cctx = ZSTD_createCCtx();
1217 workerStates[i].cctx = ZSTD_createCCtx();
1218 if (!workerStates[i].cctx) {
1218 if (!workerStates[i].cctx) {
1219 PyErr_NoMemory();
1219 PyErr_NoMemory();
1220 goto finally;
1220 goto finally;
1221 }
1221 }
1222
1222
1223 zresult = ZSTD_CCtx_setParametersUsingCCtxParams(workerStates[i].cctx,
1223 zresult = ZSTD_CCtx_setParametersUsingCCtxParams(workerStates[i].cctx,
1224 compressor->params);
1224 compressor->params);
1225 if (ZSTD_isError(zresult)) {
1225 if (ZSTD_isError(zresult)) {
1226 PyErr_Format(ZstdError, "could not set compression parameters: %s",
1226 PyErr_Format(ZstdError, "could not set compression parameters: %s",
1227 ZSTD_getErrorName(zresult));
1227 ZSTD_getErrorName(zresult));
1228 goto finally;
1228 goto finally;
1229 }
1229 }
1230
1230
1231 if (compressor->dict) {
1231 if (compressor->dict) {
1232 if (compressor->dict->cdict) {
1232 if (compressor->dict->cdict) {
1233 zresult = ZSTD_CCtx_refCDict(workerStates[i].cctx, compressor->dict->cdict);
1233 zresult = ZSTD_CCtx_refCDict(workerStates[i].cctx, compressor->dict->cdict);
1234 }
1234 }
1235 else {
1235 else {
1236 zresult = ZSTD_CCtx_loadDictionary_advanced(
1236 zresult = ZSTD_CCtx_loadDictionary_advanced(
1237 workerStates[i].cctx,
1237 workerStates[i].cctx,
1238 compressor->dict->dictData,
1238 compressor->dict->dictData,
1239 compressor->dict->dictSize,
1239 compressor->dict->dictSize,
1240 ZSTD_dlm_byRef,
1240 ZSTD_dlm_byRef,
1241 compressor->dict->dictType);
1241 compressor->dict->dictType);
1242 }
1242 }
1243
1243
1244 if (ZSTD_isError(zresult)) {
1244 if (ZSTD_isError(zresult)) {
1245 PyErr_Format(ZstdError, "could not load compression dictionary: %s",
1245 PyErr_Format(ZstdError, "could not load compression dictionary: %s",
1246 ZSTD_getErrorName(zresult));
1246 ZSTD_getErrorName(zresult));
1247 goto finally;
1247 goto finally;
1248 }
1248 }
1249
1249
1250 }
1250 }
1251
1251
1252 workerStates[i].sources = sources->sources;
1252 workerStates[i].sources = sources->sources;
1253 workerStates[i].sourcesSize = sources->sourcesSize;
1253 workerStates[i].sourcesSize = sources->sourcesSize;
1254 }
1254 }
1255
1255
1256 Py_BEGIN_ALLOW_THREADS
1256 Py_BEGIN_ALLOW_THREADS
1257 for (i = 0; i < sources->sourcesSize; i++) {
1257 for (i = 0; i < sources->sourcesSize; i++) {
1258 workerBytes += sources->sources[i].sourceSize;
1258 workerBytes += sources->sources[i].sourceSize;
1259
1259
1260 /*
1260 /*
1261 * The last worker/thread needs to handle all remaining work. Don't
1261 * The last worker/thread needs to handle all remaining work. Don't
1262 * trigger it prematurely. Defer to the block outside of the loop
1262 * trigger it prematurely. Defer to the block outside of the loop
1263 * to run the last worker/thread. But do still process this loop
1263 * to run the last worker/thread. But do still process this loop
1264 * so workerBytes is correct.
1264 * so workerBytes is correct.
1265 */
1265 */
1266 if (currentThread == threadCount - 1) {
1266 if (currentThread == threadCount - 1) {
1267 continue;
1267 continue;
1268 }
1268 }
1269
1269
1270 if (workerBytes >= bytesPerWorker) {
1270 if (workerBytes >= bytesPerWorker) {
1271 assert(currentThread < threadCount);
1271 assert(currentThread < threadCount);
1272 workerStates[currentThread].totalSourceSize = workerBytes;
1272 workerStates[currentThread].totalSourceSize = workerBytes;
1273 workerStates[currentThread].startOffset = workerStartOffset;
1273 workerStates[currentThread].startOffset = workerStartOffset;
1274 workerStates[currentThread].endOffset = i;
1274 workerStates[currentThread].endOffset = i;
1275
1275
1276 if (threadCount > 1) {
1276 if (threadCount > 1) {
1277 POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]);
1277 POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]);
1278 }
1278 }
1279 else {
1279 else {
1280 compress_worker(&workerStates[currentThread]);
1280 compress_worker(&workerStates[currentThread]);
1281 }
1281 }
1282
1282
1283 currentThread++;
1283 currentThread++;
1284 workerStartOffset = i + 1;
1284 workerStartOffset = i + 1;
1285 workerBytes = 0;
1285 workerBytes = 0;
1286 }
1286 }
1287 }
1287 }
1288
1288
1289 if (workerBytes) {
1289 if (workerBytes) {
1290 assert(currentThread < threadCount);
1290 assert(currentThread < threadCount);
1291 workerStates[currentThread].totalSourceSize = workerBytes;
1291 workerStates[currentThread].totalSourceSize = workerBytes;
1292 workerStates[currentThread].startOffset = workerStartOffset;
1292 workerStates[currentThread].startOffset = workerStartOffset;
1293 workerStates[currentThread].endOffset = sources->sourcesSize - 1;
1293 workerStates[currentThread].endOffset = sources->sourcesSize - 1;
1294
1294
1295 if (threadCount > 1) {
1295 if (threadCount > 1) {
1296 POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]);
1296 POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]);
1297 }
1297 }
1298 else {
1298 else {
1299 compress_worker(&workerStates[currentThread]);
1299 compress_worker(&workerStates[currentThread]);
1300 }
1300 }
1301 }
1301 }
1302
1302
1303 if (threadCount > 1) {
1303 if (threadCount > 1) {
1304 POOL_free(pool);
1304 POOL_free(pool);
1305 pool = NULL;
1305 pool = NULL;
1306 }
1306 }
1307
1307
1308 Py_END_ALLOW_THREADS
1308 Py_END_ALLOW_THREADS
1309
1309
1310 for (i = 0; i < threadCount; i++) {
1310 for (i = 0; i < threadCount; i++) {
1311 switch (workerStates[i].error) {
1311 switch (workerStates[i].error) {
1312 case WorkerError_no_memory:
1312 case WorkerError_no_memory:
1313 PyErr_NoMemory();
1313 PyErr_NoMemory();
1314 errored = 1;
1314 errored = 1;
1315 break;
1315 break;
1316
1316
1317 case WorkerError_zstd:
1317 case WorkerError_zstd:
1318 PyErr_Format(ZstdError, "error compressing item %zd: %s",
1318 PyErr_Format(ZstdError, "error compressing item %zd: %s",
1319 workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
1319 workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
1320 errored = 1;
1320 errored = 1;
1321 break;
1321 break;
1322
1322
1323 case WorkerError_nospace:
1323 case WorkerError_nospace:
1324 PyErr_Format(ZstdError, "error compressing item %zd: not enough space in output",
1324 PyErr_Format(ZstdError, "error compressing item %zd: not enough space in output",
1325 workerStates[i].errorOffset);
1325 workerStates[i].errorOffset);
1326 errored = 1;
1326 errored = 1;
1327 break;
1327 break;
1328
1328
1329 default:
1329 default:
1330 ;
1330 ;
1331 }
1331 }
1332
1332
1333 if (errored) {
1333 if (errored) {
1334 break;
1334 break;
1335 }
1335 }
1336
1336
1337 }
1337 }
1338
1338
1339 if (errored) {
1339 if (errored) {
1340 goto finally;
1340 goto finally;
1341 }
1341 }
1342
1342
1343 segmentsCount = 0;
1343 segmentsCount = 0;
1344 for (i = 0; i < threadCount; i++) {
1344 for (i = 0; i < threadCount; i++) {
1345 WorkerState* state = &workerStates[i];
1345 WorkerState* state = &workerStates[i];
1346 segmentsCount += state->destCount;
1346 segmentsCount += state->destCount;
1347 }
1347 }
1348
1348
1349 segmentsArg = PyTuple_New(segmentsCount);
1349 segmentsArg = PyTuple_New(segmentsCount);
1350 if (NULL == segmentsArg) {
1350 if (NULL == segmentsArg) {
1351 goto finally;
1351 goto finally;
1352 }
1352 }
1353
1353
1354 segmentIndex = 0;
1354 segmentIndex = 0;
1355
1355
1356 for (i = 0; i < threadCount; i++) {
1356 for (i = 0; i < threadCount; i++) {
1357 Py_ssize_t j;
1357 Py_ssize_t j;
1358 WorkerState* state = &workerStates[i];
1358 WorkerState* state = &workerStates[i];
1359
1359
1360 for (j = 0; j < state->destCount; j++) {
1360 for (j = 0; j < state->destCount; j++) {
1361 DestBuffer* destBuffer = &state->destBuffers[j];
1361 DestBuffer* destBuffer = &state->destBuffers[j];
1362 buffer = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize,
1362 buffer = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize,
1363 destBuffer->segments, destBuffer->segmentsSize);
1363 destBuffer->segments, destBuffer->segmentsSize);
1364
1364
1365 if (NULL == buffer) {
1365 if (NULL == buffer) {
1366 goto finally;
1366 goto finally;
1367 }
1367 }
1368
1368
1369 /* Tell instance to use free() instsead of PyMem_Free(). */
1369 /* Tell instance to use free() instsead of PyMem_Free(). */
1370 buffer->useFree = 1;
1370 buffer->useFree = 1;
1371
1371
1372 /*
1372 /*
1373 * BufferWithSegments_FromMemory takes ownership of the backing memory.
1373 * BufferWithSegments_FromMemory takes ownership of the backing memory.
1374 * Unset it here so it doesn't get freed below.
1374 * Unset it here so it doesn't get freed below.
1375 */
1375 */
1376 destBuffer->dest = NULL;
1376 destBuffer->dest = NULL;
1377 destBuffer->segments = NULL;
1377 destBuffer->segments = NULL;
1378
1378
1379 PyTuple_SET_ITEM(segmentsArg, segmentIndex++, (PyObject*)buffer);
1379 PyTuple_SET_ITEM(segmentsArg, segmentIndex++, (PyObject*)buffer);
1380 }
1380 }
1381 }
1381 }
1382
1382
1383 result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject(
1383 result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject(
1384 (PyObject*)&ZstdBufferWithSegmentsCollectionType, segmentsArg);
1384 (PyObject*)&ZstdBufferWithSegmentsCollectionType, segmentsArg);
1385
1385
1386 finally:
1386 finally:
1387 Py_CLEAR(segmentsArg);
1387 Py_CLEAR(segmentsArg);
1388
1388
1389 if (pool) {
1389 if (pool) {
1390 POOL_free(pool);
1390 POOL_free(pool);
1391 }
1391 }
1392
1392
1393 if (workerStates) {
1393 if (workerStates) {
1394 Py_ssize_t j;
1394 Py_ssize_t j;
1395
1395
1396 for (i = 0; i < threadCount; i++) {
1396 for (i = 0; i < threadCount; i++) {
1397 WorkerState state = workerStates[i];
1397 WorkerState state = workerStates[i];
1398
1398
1399 if (state.cctx) {
1399 if (state.cctx) {
1400 ZSTD_freeCCtx(state.cctx);
1400 ZSTD_freeCCtx(state.cctx);
1401 }
1401 }
1402
1402
1403 /* malloc() is used in worker thread. */
1403 /* malloc() is used in worker thread. */
1404
1404
1405 for (j = 0; j < state.destCount; j++) {
1405 for (j = 0; j < state.destCount; j++) {
1406 if (state.destBuffers) {
1406 if (state.destBuffers) {
1407 free(state.destBuffers[j].dest);
1407 free(state.destBuffers[j].dest);
1408 free(state.destBuffers[j].segments);
1408 free(state.destBuffers[j].segments);
1409 }
1409 }
1410 }
1410 }
1411
1411
1412
1412
1413 free(state.destBuffers);
1413 free(state.destBuffers);
1414 }
1414 }
1415
1415
1416 PyMem_Free(workerStates);
1416 PyMem_Free(workerStates);
1417 }
1417 }
1418
1418
1419 return result;
1419 return result;
1420 }
1420 }
1421
1421
1422 PyDoc_STRVAR(ZstdCompressor_multi_compress_to_buffer__doc__,
1422 PyDoc_STRVAR(ZstdCompressor_multi_compress_to_buffer__doc__,
1423 "Compress multiple pieces of data as a single operation\n"
1423 "Compress multiple pieces of data as a single operation\n"
1424 "\n"
1424 "\n"
1425 "Receives a ``BufferWithSegmentsCollection``, a ``BufferWithSegments``, or\n"
1425 "Receives a ``BufferWithSegmentsCollection``, a ``BufferWithSegments``, or\n"
1426 "a list of bytes like objects holding data to compress.\n"
1426 "a list of bytes like objects holding data to compress.\n"
1427 "\n"
1427 "\n"
1428 "Returns a ``BufferWithSegmentsCollection`` holding compressed data.\n"
1428 "Returns a ``BufferWithSegmentsCollection`` holding compressed data.\n"
1429 "\n"
1429 "\n"
1430 "This function is optimized to perform multiple compression operations as\n"
1430 "This function is optimized to perform multiple compression operations as\n"
1431 "as possible with as little overhead as possbile.\n"
1431 "as possible with as little overhead as possbile.\n"
1432 );
1432 );
1433
1433
1434 static ZstdBufferWithSegmentsCollection* ZstdCompressor_multi_compress_to_buffer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
1434 static ZstdBufferWithSegmentsCollection* ZstdCompressor_multi_compress_to_buffer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
1435 static char* kwlist[] = {
1435 static char* kwlist[] = {
1436 "data",
1436 "data",
1437 "threads",
1437 "threads",
1438 NULL
1438 NULL
1439 };
1439 };
1440
1440
1441 PyObject* data;
1441 PyObject* data;
1442 int threads = 0;
1442 int threads = 0;
1443 Py_buffer* dataBuffers = NULL;
1443 Py_buffer* dataBuffers = NULL;
1444 DataSources sources;
1444 DataSources sources;
1445 Py_ssize_t i;
1445 Py_ssize_t i;
1446 Py_ssize_t sourceCount = 0;
1446 Py_ssize_t sourceCount = 0;
1447 ZstdBufferWithSegmentsCollection* result = NULL;
1447 ZstdBufferWithSegmentsCollection* result = NULL;
1448
1448
1449 memset(&sources, 0, sizeof(sources));
1449 memset(&sources, 0, sizeof(sources));
1450
1450
1451 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:multi_compress_to_buffer", kwlist,
1451 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:multi_compress_to_buffer", kwlist,
1452 &data, &threads)) {
1452 &data, &threads)) {
1453 return NULL;
1453 return NULL;
1454 }
1454 }
1455
1455
1456 if (threads < 0) {
1456 if (threads < 0) {
1457 threads = cpu_count();
1457 threads = cpu_count();
1458 }
1458 }
1459
1459
1460 if (threads < 2) {
1460 if (threads < 2) {
1461 threads = 1;
1461 threads = 1;
1462 }
1462 }
1463
1463
1464 if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsType)) {
1464 if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsType)) {
1465 ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)data;
1465 ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)data;
1466
1466
1467 sources.sources = PyMem_Malloc(buffer->segmentCount * sizeof(DataSource));
1467 sources.sources = PyMem_Malloc(buffer->segmentCount * sizeof(DataSource));
1468 if (NULL == sources.sources) {
1468 if (NULL == sources.sources) {
1469 PyErr_NoMemory();
1469 PyErr_NoMemory();
1470 goto finally;
1470 goto finally;
1471 }
1471 }
1472
1472
1473 for (i = 0; i < buffer->segmentCount; i++) {
1473 for (i = 0; i < buffer->segmentCount; i++) {
1474 if (buffer->segments[i].length > SIZE_MAX) {
1474 if (buffer->segments[i].length > SIZE_MAX) {
1475 PyErr_Format(PyExc_ValueError,
1475 PyErr_Format(PyExc_ValueError,
1476 "buffer segment %zd is too large for this platform", i);
1476 "buffer segment %zd is too large for this platform", i);
1477 goto finally;
1477 goto finally;
1478 }
1478 }
1479
1479
1480 sources.sources[i].sourceData = (char*)buffer->data + buffer->segments[i].offset;
1480 sources.sources[i].sourceData = (char*)buffer->data + buffer->segments[i].offset;
1481 sources.sources[i].sourceSize = (size_t)buffer->segments[i].length;
1481 sources.sources[i].sourceSize = (size_t)buffer->segments[i].length;
1482 sources.totalSourceSize += buffer->segments[i].length;
1482 sources.totalSourceSize += buffer->segments[i].length;
1483 }
1483 }
1484
1484
1485 sources.sourcesSize = buffer->segmentCount;
1485 sources.sourcesSize = buffer->segmentCount;
1486 }
1486 }
1487 else if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsCollectionType)) {
1487 else if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsCollectionType)) {
1488 Py_ssize_t j;
1488 Py_ssize_t j;
1489 Py_ssize_t offset = 0;
1489 Py_ssize_t offset = 0;
1490 ZstdBufferWithSegments* buffer;
1490 ZstdBufferWithSegments* buffer;
1491 ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)data;
1491 ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)data;
1492
1492
1493 sourceCount = BufferWithSegmentsCollection_length(collection);
1493 sourceCount = BufferWithSegmentsCollection_length(collection);
1494
1494
1495 sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource));
1495 sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource));
1496 if (NULL == sources.sources) {
1496 if (NULL == sources.sources) {
1497 PyErr_NoMemory();
1497 PyErr_NoMemory();
1498 goto finally;
1498 goto finally;
1499 }
1499 }
1500
1500
1501 for (i = 0; i < collection->bufferCount; i++) {
1501 for (i = 0; i < collection->bufferCount; i++) {
1502 buffer = collection->buffers[i];
1502 buffer = collection->buffers[i];
1503
1503
1504 for (j = 0; j < buffer->segmentCount; j++) {
1504 for (j = 0; j < buffer->segmentCount; j++) {
1505 if (buffer->segments[j].length > SIZE_MAX) {
1505 if (buffer->segments[j].length > SIZE_MAX) {
1506 PyErr_Format(PyExc_ValueError,
1506 PyErr_Format(PyExc_ValueError,
1507 "buffer segment %zd in buffer %zd is too large for this platform",
1507 "buffer segment %zd in buffer %zd is too large for this platform",
1508 j, i);
1508 j, i);
1509 goto finally;
1509 goto finally;
1510 }
1510 }
1511
1511
1512 sources.sources[offset].sourceData = (char*)buffer->data + buffer->segments[j].offset;
1512 sources.sources[offset].sourceData = (char*)buffer->data + buffer->segments[j].offset;
1513 sources.sources[offset].sourceSize = (size_t)buffer->segments[j].length;
1513 sources.sources[offset].sourceSize = (size_t)buffer->segments[j].length;
1514 sources.totalSourceSize += buffer->segments[j].length;
1514 sources.totalSourceSize += buffer->segments[j].length;
1515
1515
1516 offset++;
1516 offset++;
1517 }
1517 }
1518 }
1518 }
1519
1519
1520 sources.sourcesSize = sourceCount;
1520 sources.sourcesSize = sourceCount;
1521 }
1521 }
1522 else if (PyList_Check(data)) {
1522 else if (PyList_Check(data)) {
1523 sourceCount = PyList_GET_SIZE(data);
1523 sourceCount = PyList_GET_SIZE(data);
1524
1524
1525 sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource));
1525 sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource));
1526 if (NULL == sources.sources) {
1526 if (NULL == sources.sources) {
1527 PyErr_NoMemory();
1527 PyErr_NoMemory();
1528 goto finally;
1528 goto finally;
1529 }
1529 }
1530
1530
1531 dataBuffers = PyMem_Malloc(sourceCount * sizeof(Py_buffer));
1531 dataBuffers = PyMem_Malloc(sourceCount * sizeof(Py_buffer));
1532 if (NULL == dataBuffers) {
1532 if (NULL == dataBuffers) {
1533 PyErr_NoMemory();
1533 PyErr_NoMemory();
1534 goto finally;
1534 goto finally;
1535 }
1535 }
1536
1536
1537 memset(dataBuffers, 0, sourceCount * sizeof(Py_buffer));
1537 memset(dataBuffers, 0, sourceCount * sizeof(Py_buffer));
1538
1538
1539 for (i = 0; i < sourceCount; i++) {
1539 for (i = 0; i < sourceCount; i++) {
1540 if (0 != PyObject_GetBuffer(PyList_GET_ITEM(data, i),
1540 if (0 != PyObject_GetBuffer(PyList_GET_ITEM(data, i),
1541 &dataBuffers[i], PyBUF_CONTIG_RO)) {
1541 &dataBuffers[i], PyBUF_CONTIG_RO)) {
1542 PyErr_Clear();
1542 PyErr_Clear();
1543 PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i);
1543 PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i);
1544 goto finally;
1544 goto finally;
1545 }
1545 }
1546
1546
1547 sources.sources[i].sourceData = dataBuffers[i].buf;
1547 sources.sources[i].sourceData = dataBuffers[i].buf;
1548 sources.sources[i].sourceSize = dataBuffers[i].len;
1548 sources.sources[i].sourceSize = dataBuffers[i].len;
1549 sources.totalSourceSize += dataBuffers[i].len;
1549 sources.totalSourceSize += dataBuffers[i].len;
1550 }
1550 }
1551
1551
1552 sources.sourcesSize = sourceCount;
1552 sources.sourcesSize = sourceCount;
1553 }
1553 }
1554 else {
1554 else {
1555 PyErr_SetString(PyExc_TypeError, "argument must be list of BufferWithSegments");
1555 PyErr_SetString(PyExc_TypeError, "argument must be list of BufferWithSegments");
1556 goto finally;
1556 goto finally;
1557 }
1557 }
1558
1558
1559 if (0 == sources.sourcesSize) {
1559 if (0 == sources.sourcesSize) {
1560 PyErr_SetString(PyExc_ValueError, "no source elements found");
1560 PyErr_SetString(PyExc_ValueError, "no source elements found");
1561 goto finally;
1561 goto finally;
1562 }
1562 }
1563
1563
1564 if (0 == sources.totalSourceSize) {
1564 if (0 == sources.totalSourceSize) {
1565 PyErr_SetString(PyExc_ValueError, "source elements are empty");
1565 PyErr_SetString(PyExc_ValueError, "source elements are empty");
1566 goto finally;
1566 goto finally;
1567 }
1567 }
1568
1568
1569 if (sources.totalSourceSize > SIZE_MAX) {
1569 if (sources.totalSourceSize > SIZE_MAX) {
1570 PyErr_SetString(PyExc_ValueError, "sources are too large for this platform");
1570 PyErr_SetString(PyExc_ValueError, "sources are too large for this platform");
1571 goto finally;
1571 goto finally;
1572 }
1572 }
1573
1573
1574 result = compress_from_datasources(self, &sources, threads);
1574 result = compress_from_datasources(self, &sources, threads);
1575
1575
1576 finally:
1576 finally:
1577 PyMem_Free(sources.sources);
1577 PyMem_Free(sources.sources);
1578
1578
1579 if (dataBuffers) {
1579 if (dataBuffers) {
1580 for (i = 0; i < sourceCount; i++) {
1580 for (i = 0; i < sourceCount; i++) {
1581 PyBuffer_Release(&dataBuffers[i]);
1581 PyBuffer_Release(&dataBuffers[i]);
1582 }
1582 }
1583
1583
1584 PyMem_Free(dataBuffers);
1584 PyMem_Free(dataBuffers);
1585 }
1585 }
1586
1586
1587 return result;
1587 return result;
1588 }
1588 }
1589
1589
1590 static PyMethodDef ZstdCompressor_methods[] = {
1590 static PyMethodDef ZstdCompressor_methods[] = {
1591 { "chunker", (PyCFunction)ZstdCompressor_chunker,
1591 { "chunker", (PyCFunction)ZstdCompressor_chunker,
1592 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_chunker__doc__ },
1592 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_chunker__doc__ },
1593 { "compress", (PyCFunction)ZstdCompressor_compress,
1593 { "compress", (PyCFunction)ZstdCompressor_compress,
1594 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ },
1594 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ },
1595 { "compressobj", (PyCFunction)ZstdCompressor_compressobj,
1595 { "compressobj", (PyCFunction)ZstdCompressor_compressobj,
1596 METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ },
1596 METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ },
1597 { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream,
1597 { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream,
1598 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ },
1598 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ },
1599 { "stream_reader", (PyCFunction)ZstdCompressor_stream_reader,
1599 { "stream_reader", (PyCFunction)ZstdCompressor_stream_reader,
1600 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_reader__doc__ },
1600 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_reader__doc__ },
1601 { "stream_writer", (PyCFunction)ZstdCompressor_stream_writer,
1601 { "stream_writer", (PyCFunction)ZstdCompressor_stream_writer,
1602 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ },
1602 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ },
1603 { "read_to_iter", (PyCFunction)ZstdCompressor_read_to_iter,
1603 { "read_to_iter", (PyCFunction)ZstdCompressor_read_to_iter,
1604 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ },
1604 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ },
1605 /* TODO Remove deprecated API */
1605 /* TODO Remove deprecated API */
1606 { "read_from", (PyCFunction)ZstdCompressor_read_to_iter,
1606 { "read_from", (PyCFunction)ZstdCompressor_read_to_iter,
1607 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ },
1607 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ },
1608 /* TODO remove deprecated API */
1608 /* TODO remove deprecated API */
1609 { "write_to", (PyCFunction)ZstdCompressor_stream_writer,
1609 { "write_to", (PyCFunction)ZstdCompressor_stream_writer,
1610 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ },
1610 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ },
1611 { "multi_compress_to_buffer", (PyCFunction)ZstdCompressor_multi_compress_to_buffer,
1611 { "multi_compress_to_buffer", (PyCFunction)ZstdCompressor_multi_compress_to_buffer,
1612 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_multi_compress_to_buffer__doc__ },
1612 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_multi_compress_to_buffer__doc__ },
1613 { "memory_size", (PyCFunction)ZstdCompressor_memory_size,
1613 { "memory_size", (PyCFunction)ZstdCompressor_memory_size,
1614 METH_NOARGS, ZstdCompressor_memory_size__doc__ },
1614 METH_NOARGS, ZstdCompressor_memory_size__doc__ },
1615 { "frame_progression", (PyCFunction)ZstdCompressor_frame_progression,
1615 { "frame_progression", (PyCFunction)ZstdCompressor_frame_progression,
1616 METH_NOARGS, ZstdCompressor_frame_progression__doc__ },
1616 METH_NOARGS, ZstdCompressor_frame_progression__doc__ },
1617 { NULL, NULL }
1617 { NULL, NULL }
1618 };
1618 };
1619
1619
1620 PyTypeObject ZstdCompressorType = {
1620 PyTypeObject ZstdCompressorType = {
1621 PyVarObject_HEAD_INIT(NULL, 0)
1621 PyVarObject_HEAD_INIT(NULL, 0)
1622 "zstd.ZstdCompressor", /* tp_name */
1622 "zstd.ZstdCompressor", /* tp_name */
1623 sizeof(ZstdCompressor), /* tp_basicsize */
1623 sizeof(ZstdCompressor), /* tp_basicsize */
1624 0, /* tp_itemsize */
1624 0, /* tp_itemsize */
1625 (destructor)ZstdCompressor_dealloc, /* tp_dealloc */
1625 (destructor)ZstdCompressor_dealloc, /* tp_dealloc */
1626 0, /* tp_print */
1626 0, /* tp_print */
1627 0, /* tp_getattr */
1627 0, /* tp_getattr */
1628 0, /* tp_setattr */
1628 0, /* tp_setattr */
1629 0, /* tp_compare */
1629 0, /* tp_compare */
1630 0, /* tp_repr */
1630 0, /* tp_repr */
1631 0, /* tp_as_number */
1631 0, /* tp_as_number */
1632 0, /* tp_as_sequence */
1632 0, /* tp_as_sequence */
1633 0, /* tp_as_mapping */
1633 0, /* tp_as_mapping */
1634 0, /* tp_hash */
1634 0, /* tp_hash */
1635 0, /* tp_call */
1635 0, /* tp_call */
1636 0, /* tp_str */
1636 0, /* tp_str */
1637 0, /* tp_getattro */
1637 0, /* tp_getattro */
1638 0, /* tp_setattro */
1638 0, /* tp_setattro */
1639 0, /* tp_as_buffer */
1639 0, /* tp_as_buffer */
1640 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
1640 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
1641 ZstdCompressor__doc__, /* tp_doc */
1641 ZstdCompressor__doc__, /* tp_doc */
1642 0, /* tp_traverse */
1642 0, /* tp_traverse */
1643 0, /* tp_clear */
1643 0, /* tp_clear */
1644 0, /* tp_richcompare */
1644 0, /* tp_richcompare */
1645 0, /* tp_weaklistoffset */
1645 0, /* tp_weaklistoffset */
1646 0, /* tp_iter */
1646 0, /* tp_iter */
1647 0, /* tp_iternext */
1647 0, /* tp_iternext */
1648 ZstdCompressor_methods, /* tp_methods */
1648 ZstdCompressor_methods, /* tp_methods */
1649 0, /* tp_members */
1649 0, /* tp_members */
1650 0, /* tp_getset */
1650 0, /* tp_getset */
1651 0, /* tp_base */
1651 0, /* tp_base */
1652 0, /* tp_dict */
1652 0, /* tp_dict */
1653 0, /* tp_descr_get */
1653 0, /* tp_descr_get */
1654 0, /* tp_descr_set */
1654 0, /* tp_descr_set */
1655 0, /* tp_dictoffset */
1655 0, /* tp_dictoffset */
1656 (initproc)ZstdCompressor_init, /* tp_init */
1656 (initproc)ZstdCompressor_init, /* tp_init */
1657 0, /* tp_alloc */
1657 0, /* tp_alloc */
1658 PyType_GenericNew, /* tp_new */
1658 PyType_GenericNew, /* tp_new */
1659 };
1659 };
1660
1660
1661 void compressor_module_init(PyObject* mod) {
1661 void compressor_module_init(PyObject* mod) {
1662 Py_TYPE(&ZstdCompressorType) = &PyType_Type;
1662 Py_SET_TYPE(&ZstdCompressorType, &PyType_Type);
1663 if (PyType_Ready(&ZstdCompressorType) < 0) {
1663 if (PyType_Ready(&ZstdCompressorType) < 0) {
1664 return;
1664 return;
1665 }
1665 }
1666
1666
1667 Py_INCREF((PyObject*)&ZstdCompressorType);
1667 Py_INCREF((PyObject*)&ZstdCompressorType);
1668 PyModule_AddObject(mod, "ZstdCompressor",
1668 PyModule_AddObject(mod, "ZstdCompressor",
1669 (PyObject*)&ZstdCompressorType);
1669 (PyObject*)&ZstdCompressorType);
1670 }
1670 }
@@ -1,235 +1,235 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 #define min(a, b) (((a) < (b)) ? (a) : (b))
11 #define min(a, b) (((a) < (b)) ? (a) : (b))
12
12
13 extern PyObject* ZstdError;
13 extern PyObject* ZstdError;
14
14
15 PyDoc_STRVAR(ZstdCompressorIterator__doc__,
15 PyDoc_STRVAR(ZstdCompressorIterator__doc__,
16 "Represents an iterator of compressed data.\n"
16 "Represents an iterator of compressed data.\n"
17 );
17 );
18
18
19 static void ZstdCompressorIterator_dealloc(ZstdCompressorIterator* self) {
19 static void ZstdCompressorIterator_dealloc(ZstdCompressorIterator* self) {
20 Py_XDECREF(self->readResult);
20 Py_XDECREF(self->readResult);
21 Py_XDECREF(self->compressor);
21 Py_XDECREF(self->compressor);
22 Py_XDECREF(self->reader);
22 Py_XDECREF(self->reader);
23
23
24 if (self->buffer.buf) {
24 if (self->buffer.buf) {
25 PyBuffer_Release(&self->buffer);
25 PyBuffer_Release(&self->buffer);
26 memset(&self->buffer, 0, sizeof(self->buffer));
26 memset(&self->buffer, 0, sizeof(self->buffer));
27 }
27 }
28
28
29 if (self->output.dst) {
29 if (self->output.dst) {
30 PyMem_Free(self->output.dst);
30 PyMem_Free(self->output.dst);
31 self->output.dst = NULL;
31 self->output.dst = NULL;
32 }
32 }
33
33
34 PyObject_Del(self);
34 PyObject_Del(self);
35 }
35 }
36
36
37 static PyObject* ZstdCompressorIterator_iter(PyObject* self) {
37 static PyObject* ZstdCompressorIterator_iter(PyObject* self) {
38 Py_INCREF(self);
38 Py_INCREF(self);
39 return self;
39 return self;
40 }
40 }
41
41
42 static PyObject* ZstdCompressorIterator_iternext(ZstdCompressorIterator* self) {
42 static PyObject* ZstdCompressorIterator_iternext(ZstdCompressorIterator* self) {
43 size_t zresult;
43 size_t zresult;
44 PyObject* readResult = NULL;
44 PyObject* readResult = NULL;
45 PyObject* chunk;
45 PyObject* chunk;
46 char* readBuffer;
46 char* readBuffer;
47 Py_ssize_t readSize = 0;
47 Py_ssize_t readSize = 0;
48 Py_ssize_t bufferRemaining;
48 Py_ssize_t bufferRemaining;
49
49
50 if (self->finishedOutput) {
50 if (self->finishedOutput) {
51 PyErr_SetString(PyExc_StopIteration, "output flushed");
51 PyErr_SetString(PyExc_StopIteration, "output flushed");
52 return NULL;
52 return NULL;
53 }
53 }
54
54
55 feedcompressor:
55 feedcompressor:
56
56
57 /* If we have data left in the input, consume it. */
57 /* If we have data left in the input, consume it. */
58 if (self->input.pos < self->input.size) {
58 if (self->input.pos < self->input.size) {
59 Py_BEGIN_ALLOW_THREADS
59 Py_BEGIN_ALLOW_THREADS
60 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
60 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
61 &self->input, ZSTD_e_continue);
61 &self->input, ZSTD_e_continue);
62 Py_END_ALLOW_THREADS
62 Py_END_ALLOW_THREADS
63
63
64 /* Release the Python object holding the input buffer. */
64 /* Release the Python object holding the input buffer. */
65 if (self->input.pos == self->input.size) {
65 if (self->input.pos == self->input.size) {
66 self->input.src = NULL;
66 self->input.src = NULL;
67 self->input.pos = 0;
67 self->input.pos = 0;
68 self->input.size = 0;
68 self->input.size = 0;
69 Py_DECREF(self->readResult);
69 Py_DECREF(self->readResult);
70 self->readResult = NULL;
70 self->readResult = NULL;
71 }
71 }
72
72
73 if (ZSTD_isError(zresult)) {
73 if (ZSTD_isError(zresult)) {
74 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
74 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
75 return NULL;
75 return NULL;
76 }
76 }
77
77
78 /* If it produced output data, emit it. */
78 /* If it produced output data, emit it. */
79 if (self->output.pos) {
79 if (self->output.pos) {
80 chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
80 chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
81 self->output.pos = 0;
81 self->output.pos = 0;
82 return chunk;
82 return chunk;
83 }
83 }
84 }
84 }
85
85
86 /* We should never have output data sitting around after a previous call. */
86 /* We should never have output data sitting around after a previous call. */
87 assert(self->output.pos == 0);
87 assert(self->output.pos == 0);
88
88
89 /* The code above should have either emitted a chunk and returned or consumed
89 /* The code above should have either emitted a chunk and returned or consumed
90 the entire input buffer. So the state of the input buffer is not
90 the entire input buffer. So the state of the input buffer is not
91 relevant. */
91 relevant. */
92 if (!self->finishedInput) {
92 if (!self->finishedInput) {
93 if (self->reader) {
93 if (self->reader) {
94 readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize);
94 readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize);
95 if (!readResult) {
95 if (!readResult) {
96 PyErr_SetString(ZstdError, "could not read() from source");
96 PyErr_SetString(ZstdError, "could not read() from source");
97 return NULL;
97 return NULL;
98 }
98 }
99
99
100 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
100 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
101 }
101 }
102 else {
102 else {
103 assert(self->buffer.buf);
103 assert(self->buffer.buf);
104
104
105 /* Only support contiguous C arrays. */
105 /* Only support contiguous C arrays. */
106 assert(self->buffer.strides == NULL && self->buffer.suboffsets == NULL);
106 assert(self->buffer.strides == NULL && self->buffer.suboffsets == NULL);
107 assert(self->buffer.itemsize == 1);
107 assert(self->buffer.itemsize == 1);
108
108
109 readBuffer = (char*)self->buffer.buf + self->bufferOffset;
109 readBuffer = (char*)self->buffer.buf + self->bufferOffset;
110 bufferRemaining = self->buffer.len - self->bufferOffset;
110 bufferRemaining = self->buffer.len - self->bufferOffset;
111 readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
111 readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
112 self->bufferOffset += readSize;
112 self->bufferOffset += readSize;
113 }
113 }
114
114
115 if (0 == readSize) {
115 if (0 == readSize) {
116 Py_XDECREF(readResult);
116 Py_XDECREF(readResult);
117 self->finishedInput = 1;
117 self->finishedInput = 1;
118 }
118 }
119 else {
119 else {
120 self->readResult = readResult;
120 self->readResult = readResult;
121 }
121 }
122 }
122 }
123
123
124 /* EOF */
124 /* EOF */
125 if (0 == readSize) {
125 if (0 == readSize) {
126 self->input.src = NULL;
126 self->input.src = NULL;
127 self->input.size = 0;
127 self->input.size = 0;
128 self->input.pos = 0;
128 self->input.pos = 0;
129
129
130 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
130 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
131 &self->input, ZSTD_e_end);
131 &self->input, ZSTD_e_end);
132 if (ZSTD_isError(zresult)) {
132 if (ZSTD_isError(zresult)) {
133 PyErr_Format(ZstdError, "error ending compression stream: %s",
133 PyErr_Format(ZstdError, "error ending compression stream: %s",
134 ZSTD_getErrorName(zresult));
134 ZSTD_getErrorName(zresult));
135 return NULL;
135 return NULL;
136 }
136 }
137
137
138 assert(self->output.pos);
138 assert(self->output.pos);
139
139
140 if (0 == zresult) {
140 if (0 == zresult) {
141 self->finishedOutput = 1;
141 self->finishedOutput = 1;
142 }
142 }
143
143
144 chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
144 chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
145 self->output.pos = 0;
145 self->output.pos = 0;
146 return chunk;
146 return chunk;
147 }
147 }
148
148
149 /* New data from reader. Feed into compressor. */
149 /* New data from reader. Feed into compressor. */
150 self->input.src = readBuffer;
150 self->input.src = readBuffer;
151 self->input.size = readSize;
151 self->input.size = readSize;
152 self->input.pos = 0;
152 self->input.pos = 0;
153
153
154 Py_BEGIN_ALLOW_THREADS
154 Py_BEGIN_ALLOW_THREADS
155 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
155 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
156 &self->input, ZSTD_e_continue);
156 &self->input, ZSTD_e_continue);
157 Py_END_ALLOW_THREADS
157 Py_END_ALLOW_THREADS
158
158
159 /* The input buffer currently points to memory managed by Python
159 /* The input buffer currently points to memory managed by Python
160 (readBuffer). This object was allocated by this function. If it wasn't
160 (readBuffer). This object was allocated by this function. If it wasn't
161 fully consumed, we need to release it in a subsequent function call.
161 fully consumed, we need to release it in a subsequent function call.
162 If it is fully consumed, do that now.
162 If it is fully consumed, do that now.
163 */
163 */
164 if (self->input.pos == self->input.size) {
164 if (self->input.pos == self->input.size) {
165 self->input.src = NULL;
165 self->input.src = NULL;
166 self->input.pos = 0;
166 self->input.pos = 0;
167 self->input.size = 0;
167 self->input.size = 0;
168 Py_XDECREF(self->readResult);
168 Py_XDECREF(self->readResult);
169 self->readResult = NULL;
169 self->readResult = NULL;
170 }
170 }
171
171
172 if (ZSTD_isError(zresult)) {
172 if (ZSTD_isError(zresult)) {
173 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
173 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
174 return NULL;
174 return NULL;
175 }
175 }
176
176
177 assert(self->input.pos <= self->input.size);
177 assert(self->input.pos <= self->input.size);
178
178
179 /* If we didn't write anything, start the process over. */
179 /* If we didn't write anything, start the process over. */
180 if (0 == self->output.pos) {
180 if (0 == self->output.pos) {
181 goto feedcompressor;
181 goto feedcompressor;
182 }
182 }
183
183
184 chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
184 chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
185 self->output.pos = 0;
185 self->output.pos = 0;
186 return chunk;
186 return chunk;
187 }
187 }
188
188
189 PyTypeObject ZstdCompressorIteratorType = {
189 PyTypeObject ZstdCompressorIteratorType = {
190 PyVarObject_HEAD_INIT(NULL, 0)
190 PyVarObject_HEAD_INIT(NULL, 0)
191 "zstd.ZstdCompressorIterator", /* tp_name */
191 "zstd.ZstdCompressorIterator", /* tp_name */
192 sizeof(ZstdCompressorIterator), /* tp_basicsize */
192 sizeof(ZstdCompressorIterator), /* tp_basicsize */
193 0, /* tp_itemsize */
193 0, /* tp_itemsize */
194 (destructor)ZstdCompressorIterator_dealloc, /* tp_dealloc */
194 (destructor)ZstdCompressorIterator_dealloc, /* tp_dealloc */
195 0, /* tp_print */
195 0, /* tp_print */
196 0, /* tp_getattr */
196 0, /* tp_getattr */
197 0, /* tp_setattr */
197 0, /* tp_setattr */
198 0, /* tp_compare */
198 0, /* tp_compare */
199 0, /* tp_repr */
199 0, /* tp_repr */
200 0, /* tp_as_number */
200 0, /* tp_as_number */
201 0, /* tp_as_sequence */
201 0, /* tp_as_sequence */
202 0, /* tp_as_mapping */
202 0, /* tp_as_mapping */
203 0, /* tp_hash */
203 0, /* tp_hash */
204 0, /* tp_call */
204 0, /* tp_call */
205 0, /* tp_str */
205 0, /* tp_str */
206 0, /* tp_getattro */
206 0, /* tp_getattro */
207 0, /* tp_setattro */
207 0, /* tp_setattro */
208 0, /* tp_as_buffer */
208 0, /* tp_as_buffer */
209 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
209 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
210 ZstdCompressorIterator__doc__, /* tp_doc */
210 ZstdCompressorIterator__doc__, /* tp_doc */
211 0, /* tp_traverse */
211 0, /* tp_traverse */
212 0, /* tp_clear */
212 0, /* tp_clear */
213 0, /* tp_richcompare */
213 0, /* tp_richcompare */
214 0, /* tp_weaklistoffset */
214 0, /* tp_weaklistoffset */
215 ZstdCompressorIterator_iter, /* tp_iter */
215 ZstdCompressorIterator_iter, /* tp_iter */
216 (iternextfunc)ZstdCompressorIterator_iternext, /* tp_iternext */
216 (iternextfunc)ZstdCompressorIterator_iternext, /* tp_iternext */
217 0, /* tp_methods */
217 0, /* tp_methods */
218 0, /* tp_members */
218 0, /* tp_members */
219 0, /* tp_getset */
219 0, /* tp_getset */
220 0, /* tp_base */
220 0, /* tp_base */
221 0, /* tp_dict */
221 0, /* tp_dict */
222 0, /* tp_descr_get */
222 0, /* tp_descr_get */
223 0, /* tp_descr_set */
223 0, /* tp_descr_set */
224 0, /* tp_dictoffset */
224 0, /* tp_dictoffset */
225 0, /* tp_init */
225 0, /* tp_init */
226 0, /* tp_alloc */
226 0, /* tp_alloc */
227 PyType_GenericNew, /* tp_new */
227 PyType_GenericNew, /* tp_new */
228 };
228 };
229
229
230 void compressoriterator_module_init(PyObject* mod) {
230 void compressoriterator_module_init(PyObject* mod) {
231 Py_TYPE(&ZstdCompressorIteratorType) = &PyType_Type;
231 Py_SET_TYPE(&ZstdCompressorIteratorType, &PyType_Type);
232 if (PyType_Ready(&ZstdCompressorIteratorType) < 0) {
232 if (PyType_Ready(&ZstdCompressorIteratorType) < 0) {
233 return;
233 return;
234 }
234 }
235 }
235 }
@@ -1,781 +1,781 b''
1 /**
1 /**
2 * Copyright (c) 2017-present, Gregory Szorc
2 * Copyright (c) 2017-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 static void set_unsupported_operation(void) {
13 static void set_unsupported_operation(void) {
14 PyObject* iomod;
14 PyObject* iomod;
15 PyObject* exc;
15 PyObject* exc;
16
16
17 iomod = PyImport_ImportModule("io");
17 iomod = PyImport_ImportModule("io");
18 if (NULL == iomod) {
18 if (NULL == iomod) {
19 return;
19 return;
20 }
20 }
21
21
22 exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
22 exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
23 if (NULL == exc) {
23 if (NULL == exc) {
24 Py_DECREF(iomod);
24 Py_DECREF(iomod);
25 return;
25 return;
26 }
26 }
27
27
28 PyErr_SetNone(exc);
28 PyErr_SetNone(exc);
29 Py_DECREF(exc);
29 Py_DECREF(exc);
30 Py_DECREF(iomod);
30 Py_DECREF(iomod);
31 }
31 }
32
32
33 static void reader_dealloc(ZstdDecompressionReader* self) {
33 static void reader_dealloc(ZstdDecompressionReader* self) {
34 Py_XDECREF(self->decompressor);
34 Py_XDECREF(self->decompressor);
35 Py_XDECREF(self->reader);
35 Py_XDECREF(self->reader);
36
36
37 if (self->buffer.buf) {
37 if (self->buffer.buf) {
38 PyBuffer_Release(&self->buffer);
38 PyBuffer_Release(&self->buffer);
39 }
39 }
40
40
41 PyObject_Del(self);
41 PyObject_Del(self);
42 }
42 }
43
43
44 static ZstdDecompressionReader* reader_enter(ZstdDecompressionReader* self) {
44 static ZstdDecompressionReader* reader_enter(ZstdDecompressionReader* self) {
45 if (self->entered) {
45 if (self->entered) {
46 PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
46 PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
47 return NULL;
47 return NULL;
48 }
48 }
49
49
50 self->entered = 1;
50 self->entered = 1;
51
51
52 Py_INCREF(self);
52 Py_INCREF(self);
53 return self;
53 return self;
54 }
54 }
55
55
56 static PyObject* reader_exit(ZstdDecompressionReader* self, PyObject* args) {
56 static PyObject* reader_exit(ZstdDecompressionReader* self, PyObject* args) {
57 PyObject* exc_type;
57 PyObject* exc_type;
58 PyObject* exc_value;
58 PyObject* exc_value;
59 PyObject* exc_tb;
59 PyObject* exc_tb;
60
60
61 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
61 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
62 return NULL;
62 return NULL;
63 }
63 }
64
64
65 self->entered = 0;
65 self->entered = 0;
66 self->closed = 1;
66 self->closed = 1;
67
67
68 /* Release resources. */
68 /* Release resources. */
69 Py_CLEAR(self->reader);
69 Py_CLEAR(self->reader);
70 if (self->buffer.buf) {
70 if (self->buffer.buf) {
71 PyBuffer_Release(&self->buffer);
71 PyBuffer_Release(&self->buffer);
72 memset(&self->buffer, 0, sizeof(self->buffer));
72 memset(&self->buffer, 0, sizeof(self->buffer));
73 }
73 }
74
74
75 Py_CLEAR(self->decompressor);
75 Py_CLEAR(self->decompressor);
76
76
77 Py_RETURN_FALSE;
77 Py_RETURN_FALSE;
78 }
78 }
79
79
80 static PyObject* reader_readable(PyObject* self) {
80 static PyObject* reader_readable(PyObject* self) {
81 Py_RETURN_TRUE;
81 Py_RETURN_TRUE;
82 }
82 }
83
83
84 static PyObject* reader_writable(PyObject* self) {
84 static PyObject* reader_writable(PyObject* self) {
85 Py_RETURN_FALSE;
85 Py_RETURN_FALSE;
86 }
86 }
87
87
88 static PyObject* reader_seekable(PyObject* self) {
88 static PyObject* reader_seekable(PyObject* self) {
89 Py_RETURN_TRUE;
89 Py_RETURN_TRUE;
90 }
90 }
91
91
92 static PyObject* reader_close(ZstdDecompressionReader* self) {
92 static PyObject* reader_close(ZstdDecompressionReader* self) {
93 self->closed = 1;
93 self->closed = 1;
94 Py_RETURN_NONE;
94 Py_RETURN_NONE;
95 }
95 }
96
96
97 static PyObject* reader_flush(PyObject* self) {
97 static PyObject* reader_flush(PyObject* self) {
98 Py_RETURN_NONE;
98 Py_RETURN_NONE;
99 }
99 }
100
100
101 static PyObject* reader_isatty(PyObject* self) {
101 static PyObject* reader_isatty(PyObject* self) {
102 Py_RETURN_FALSE;
102 Py_RETURN_FALSE;
103 }
103 }
104
104
105 /**
105 /**
106 * Read available input.
106 * Read available input.
107 *
107 *
108 * Returns 0 if no data was added to input.
108 * Returns 0 if no data was added to input.
109 * Returns 1 if new input data is available.
109 * Returns 1 if new input data is available.
110 * Returns -1 on error and sets a Python exception as a side-effect.
110 * Returns -1 on error and sets a Python exception as a side-effect.
111 */
111 */
112 int read_decompressor_input(ZstdDecompressionReader* self) {
112 int read_decompressor_input(ZstdDecompressionReader* self) {
113 if (self->finishedInput) {
113 if (self->finishedInput) {
114 return 0;
114 return 0;
115 }
115 }
116
116
117 if (self->input.pos != self->input.size) {
117 if (self->input.pos != self->input.size) {
118 return 0;
118 return 0;
119 }
119 }
120
120
121 if (self->reader) {
121 if (self->reader) {
122 Py_buffer buffer;
122 Py_buffer buffer;
123
123
124 assert(self->readResult == NULL);
124 assert(self->readResult == NULL);
125 self->readResult = PyObject_CallMethod(self->reader, "read",
125 self->readResult = PyObject_CallMethod(self->reader, "read",
126 "k", self->readSize);
126 "k", self->readSize);
127 if (NULL == self->readResult) {
127 if (NULL == self->readResult) {
128 return -1;
128 return -1;
129 }
129 }
130
130
131 memset(&buffer, 0, sizeof(buffer));
131 memset(&buffer, 0, sizeof(buffer));
132
132
133 if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
133 if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
134 return -1;
134 return -1;
135 }
135 }
136
136
137 /* EOF */
137 /* EOF */
138 if (0 == buffer.len) {
138 if (0 == buffer.len) {
139 self->finishedInput = 1;
139 self->finishedInput = 1;
140 Py_CLEAR(self->readResult);
140 Py_CLEAR(self->readResult);
141 }
141 }
142 else {
142 else {
143 self->input.src = buffer.buf;
143 self->input.src = buffer.buf;
144 self->input.size = buffer.len;
144 self->input.size = buffer.len;
145 self->input.pos = 0;
145 self->input.pos = 0;
146 }
146 }
147
147
148 PyBuffer_Release(&buffer);
148 PyBuffer_Release(&buffer);
149 }
149 }
150 else {
150 else {
151 assert(self->buffer.buf);
151 assert(self->buffer.buf);
152 /*
152 /*
153 * We should only get here once since expectation is we always
153 * We should only get here once since expectation is we always
154 * exhaust input buffer before reading again.
154 * exhaust input buffer before reading again.
155 */
155 */
156 assert(self->input.src == NULL);
156 assert(self->input.src == NULL);
157
157
158 self->input.src = self->buffer.buf;
158 self->input.src = self->buffer.buf;
159 self->input.size = self->buffer.len;
159 self->input.size = self->buffer.len;
160 self->input.pos = 0;
160 self->input.pos = 0;
161 }
161 }
162
162
163 return 1;
163 return 1;
164 }
164 }
165
165
166 /**
166 /**
167 * Decompresses available input into an output buffer.
167 * Decompresses available input into an output buffer.
168 *
168 *
169 * Returns 0 if we need more input.
169 * Returns 0 if we need more input.
170 * Returns 1 if output buffer should be emitted.
170 * Returns 1 if output buffer should be emitted.
171 * Returns -1 on error and sets a Python exception.
171 * Returns -1 on error and sets a Python exception.
172 */
172 */
173 int decompress_input(ZstdDecompressionReader* self, ZSTD_outBuffer* output) {
173 int decompress_input(ZstdDecompressionReader* self, ZSTD_outBuffer* output) {
174 size_t zresult;
174 size_t zresult;
175
175
176 if (self->input.pos >= self->input.size) {
176 if (self->input.pos >= self->input.size) {
177 return 0;
177 return 0;
178 }
178 }
179
179
180 Py_BEGIN_ALLOW_THREADS
180 Py_BEGIN_ALLOW_THREADS
181 zresult = ZSTD_decompressStream(self->decompressor->dctx, output, &self->input);
181 zresult = ZSTD_decompressStream(self->decompressor->dctx, output, &self->input);
182 Py_END_ALLOW_THREADS
182 Py_END_ALLOW_THREADS
183
183
184 /* Input exhausted. Clear our state tracking. */
184 /* Input exhausted. Clear our state tracking. */
185 if (self->input.pos == self->input.size) {
185 if (self->input.pos == self->input.size) {
186 memset(&self->input, 0, sizeof(self->input));
186 memset(&self->input, 0, sizeof(self->input));
187 Py_CLEAR(self->readResult);
187 Py_CLEAR(self->readResult);
188
188
189 if (self->buffer.buf) {
189 if (self->buffer.buf) {
190 self->finishedInput = 1;
190 self->finishedInput = 1;
191 }
191 }
192 }
192 }
193
193
194 if (ZSTD_isError(zresult)) {
194 if (ZSTD_isError(zresult)) {
195 PyErr_Format(ZstdError, "zstd decompress error: %s", ZSTD_getErrorName(zresult));
195 PyErr_Format(ZstdError, "zstd decompress error: %s", ZSTD_getErrorName(zresult));
196 return -1;
196 return -1;
197 }
197 }
198
198
199 /* We fulfilled the full read request. Signal to emit. */
199 /* We fulfilled the full read request. Signal to emit. */
200 if (output->pos && output->pos == output->size) {
200 if (output->pos && output->pos == output->size) {
201 return 1;
201 return 1;
202 }
202 }
203 /* We're at the end of a frame and we aren't allowed to return data
203 /* We're at the end of a frame and we aren't allowed to return data
204 spanning frames. */
204 spanning frames. */
205 else if (output->pos && zresult == 0 && !self->readAcrossFrames) {
205 else if (output->pos && zresult == 0 && !self->readAcrossFrames) {
206 return 1;
206 return 1;
207 }
207 }
208
208
209 /* There is more room in the output. Signal to collect more data. */
209 /* There is more room in the output. Signal to collect more data. */
210 return 0;
210 return 0;
211 }
211 }
212
212
213 static PyObject* reader_read(ZstdDecompressionReader* self, PyObject* args, PyObject* kwargs) {
213 static PyObject* reader_read(ZstdDecompressionReader* self, PyObject* args, PyObject* kwargs) {
214 static char* kwlist[] = {
214 static char* kwlist[] = {
215 "size",
215 "size",
216 NULL
216 NULL
217 };
217 };
218
218
219 Py_ssize_t size = -1;
219 Py_ssize_t size = -1;
220 PyObject* result = NULL;
220 PyObject* result = NULL;
221 char* resultBuffer;
221 char* resultBuffer;
222 Py_ssize_t resultSize;
222 Py_ssize_t resultSize;
223 ZSTD_outBuffer output;
223 ZSTD_outBuffer output;
224 int decompressResult, readResult;
224 int decompressResult, readResult;
225
225
226 if (self->closed) {
226 if (self->closed) {
227 PyErr_SetString(PyExc_ValueError, "stream is closed");
227 PyErr_SetString(PyExc_ValueError, "stream is closed");
228 return NULL;
228 return NULL;
229 }
229 }
230
230
231 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
231 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
232 return NULL;
232 return NULL;
233 }
233 }
234
234
235 if (size < -1) {
235 if (size < -1) {
236 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
236 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
237 return NULL;
237 return NULL;
238 }
238 }
239
239
240 if (size == -1) {
240 if (size == -1) {
241 return PyObject_CallMethod((PyObject*)self, "readall", NULL);
241 return PyObject_CallMethod((PyObject*)self, "readall", NULL);
242 }
242 }
243
243
244 if (self->finishedOutput || size == 0) {
244 if (self->finishedOutput || size == 0) {
245 return PyBytes_FromStringAndSize("", 0);
245 return PyBytes_FromStringAndSize("", 0);
246 }
246 }
247
247
248 result = PyBytes_FromStringAndSize(NULL, size);
248 result = PyBytes_FromStringAndSize(NULL, size);
249 if (NULL == result) {
249 if (NULL == result) {
250 return NULL;
250 return NULL;
251 }
251 }
252
252
253 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
253 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
254
254
255 output.dst = resultBuffer;
255 output.dst = resultBuffer;
256 output.size = resultSize;
256 output.size = resultSize;
257 output.pos = 0;
257 output.pos = 0;
258
258
259 readinput:
259 readinput:
260
260
261 decompressResult = decompress_input(self, &output);
261 decompressResult = decompress_input(self, &output);
262
262
263 if (-1 == decompressResult) {
263 if (-1 == decompressResult) {
264 Py_XDECREF(result);
264 Py_XDECREF(result);
265 return NULL;
265 return NULL;
266 }
266 }
267 else if (0 == decompressResult) { }
267 else if (0 == decompressResult) { }
268 else if (1 == decompressResult) {
268 else if (1 == decompressResult) {
269 self->bytesDecompressed += output.pos;
269 self->bytesDecompressed += output.pos;
270
270
271 if (output.pos != output.size) {
271 if (output.pos != output.size) {
272 if (safe_pybytes_resize(&result, output.pos)) {
272 if (safe_pybytes_resize(&result, output.pos)) {
273 Py_XDECREF(result);
273 Py_XDECREF(result);
274 return NULL;
274 return NULL;
275 }
275 }
276 }
276 }
277 return result;
277 return result;
278 }
278 }
279 else {
279 else {
280 assert(0);
280 assert(0);
281 }
281 }
282
282
283 readResult = read_decompressor_input(self);
283 readResult = read_decompressor_input(self);
284
284
285 if (-1 == readResult) {
285 if (-1 == readResult) {
286 Py_XDECREF(result);
286 Py_XDECREF(result);
287 return NULL;
287 return NULL;
288 }
288 }
289 else if (0 == readResult) {}
289 else if (0 == readResult) {}
290 else if (1 == readResult) {}
290 else if (1 == readResult) {}
291 else {
291 else {
292 assert(0);
292 assert(0);
293 }
293 }
294
294
295 if (self->input.size) {
295 if (self->input.size) {
296 goto readinput;
296 goto readinput;
297 }
297 }
298
298
299 /* EOF */
299 /* EOF */
300 self->bytesDecompressed += output.pos;
300 self->bytesDecompressed += output.pos;
301
301
302 if (safe_pybytes_resize(&result, output.pos)) {
302 if (safe_pybytes_resize(&result, output.pos)) {
303 Py_XDECREF(result);
303 Py_XDECREF(result);
304 return NULL;
304 return NULL;
305 }
305 }
306
306
307 return result;
307 return result;
308 }
308 }
309
309
310 static PyObject* reader_read1(ZstdDecompressionReader* self, PyObject* args, PyObject* kwargs) {
310 static PyObject* reader_read1(ZstdDecompressionReader* self, PyObject* args, PyObject* kwargs) {
311 static char* kwlist[] = {
311 static char* kwlist[] = {
312 "size",
312 "size",
313 NULL
313 NULL
314 };
314 };
315
315
316 Py_ssize_t size = -1;
316 Py_ssize_t size = -1;
317 PyObject* result = NULL;
317 PyObject* result = NULL;
318 char* resultBuffer;
318 char* resultBuffer;
319 Py_ssize_t resultSize;
319 Py_ssize_t resultSize;
320 ZSTD_outBuffer output;
320 ZSTD_outBuffer output;
321
321
322 if (self->closed) {
322 if (self->closed) {
323 PyErr_SetString(PyExc_ValueError, "stream is closed");
323 PyErr_SetString(PyExc_ValueError, "stream is closed");
324 return NULL;
324 return NULL;
325 }
325 }
326
326
327 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
327 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
328 return NULL;
328 return NULL;
329 }
329 }
330
330
331 if (size < -1) {
331 if (size < -1) {
332 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
332 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
333 return NULL;
333 return NULL;
334 }
334 }
335
335
336 if (self->finishedOutput || size == 0) {
336 if (self->finishedOutput || size == 0) {
337 return PyBytes_FromStringAndSize("", 0);
337 return PyBytes_FromStringAndSize("", 0);
338 }
338 }
339
339
340 if (size == -1) {
340 if (size == -1) {
341 size = ZSTD_DStreamOutSize();
341 size = ZSTD_DStreamOutSize();
342 }
342 }
343
343
344 result = PyBytes_FromStringAndSize(NULL, size);
344 result = PyBytes_FromStringAndSize(NULL, size);
345 if (NULL == result) {
345 if (NULL == result) {
346 return NULL;
346 return NULL;
347 }
347 }
348
348
349 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
349 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
350
350
351 output.dst = resultBuffer;
351 output.dst = resultBuffer;
352 output.size = resultSize;
352 output.size = resultSize;
353 output.pos = 0;
353 output.pos = 0;
354
354
355 /* read1() is supposed to use at most 1 read() from the underlying stream.
355 /* read1() is supposed to use at most 1 read() from the underlying stream.
356 * However, we can't satisfy this requirement with decompression due to the
356 * However, we can't satisfy this requirement with decompression due to the
357 * nature of how decompression works. Our strategy is to read + decompress
357 * nature of how decompression works. Our strategy is to read + decompress
358 * until we get any output, at which point we return. This satisfies the
358 * until we get any output, at which point we return. This satisfies the
359 * intent of the read1() API to limit read operations.
359 * intent of the read1() API to limit read operations.
360 */
360 */
361 while (!self->finishedInput) {
361 while (!self->finishedInput) {
362 int readResult, decompressResult;
362 int readResult, decompressResult;
363
363
364 readResult = read_decompressor_input(self);
364 readResult = read_decompressor_input(self);
365 if (-1 == readResult) {
365 if (-1 == readResult) {
366 Py_XDECREF(result);
366 Py_XDECREF(result);
367 return NULL;
367 return NULL;
368 }
368 }
369 else if (0 == readResult || 1 == readResult) { }
369 else if (0 == readResult || 1 == readResult) { }
370 else {
370 else {
371 assert(0);
371 assert(0);
372 }
372 }
373
373
374 decompressResult = decompress_input(self, &output);
374 decompressResult = decompress_input(self, &output);
375
375
376 if (-1 == decompressResult) {
376 if (-1 == decompressResult) {
377 Py_XDECREF(result);
377 Py_XDECREF(result);
378 return NULL;
378 return NULL;
379 }
379 }
380 else if (0 == decompressResult || 1 == decompressResult) { }
380 else if (0 == decompressResult || 1 == decompressResult) { }
381 else {
381 else {
382 assert(0);
382 assert(0);
383 }
383 }
384
384
385 if (output.pos) {
385 if (output.pos) {
386 break;
386 break;
387 }
387 }
388 }
388 }
389
389
390 self->bytesDecompressed += output.pos;
390 self->bytesDecompressed += output.pos;
391 if (safe_pybytes_resize(&result, output.pos)) {
391 if (safe_pybytes_resize(&result, output.pos)) {
392 Py_XDECREF(result);
392 Py_XDECREF(result);
393 return NULL;
393 return NULL;
394 }
394 }
395
395
396 return result;
396 return result;
397 }
397 }
398
398
399 static PyObject* reader_readinto(ZstdDecompressionReader* self, PyObject* args) {
399 static PyObject* reader_readinto(ZstdDecompressionReader* self, PyObject* args) {
400 Py_buffer dest;
400 Py_buffer dest;
401 ZSTD_outBuffer output;
401 ZSTD_outBuffer output;
402 int decompressResult, readResult;
402 int decompressResult, readResult;
403 PyObject* result = NULL;
403 PyObject* result = NULL;
404
404
405 if (self->closed) {
405 if (self->closed) {
406 PyErr_SetString(PyExc_ValueError, "stream is closed");
406 PyErr_SetString(PyExc_ValueError, "stream is closed");
407 return NULL;
407 return NULL;
408 }
408 }
409
409
410 if (self->finishedOutput) {
410 if (self->finishedOutput) {
411 return PyLong_FromLong(0);
411 return PyLong_FromLong(0);
412 }
412 }
413
413
414 if (!PyArg_ParseTuple(args, "w*:readinto", &dest)) {
414 if (!PyArg_ParseTuple(args, "w*:readinto", &dest)) {
415 return NULL;
415 return NULL;
416 }
416 }
417
417
418 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
418 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
419 PyErr_SetString(PyExc_ValueError,
419 PyErr_SetString(PyExc_ValueError,
420 "destination buffer should be contiguous and have at most one dimension");
420 "destination buffer should be contiguous and have at most one dimension");
421 goto finally;
421 goto finally;
422 }
422 }
423
423
424 output.dst = dest.buf;
424 output.dst = dest.buf;
425 output.size = dest.len;
425 output.size = dest.len;
426 output.pos = 0;
426 output.pos = 0;
427
427
428 readinput:
428 readinput:
429
429
430 decompressResult = decompress_input(self, &output);
430 decompressResult = decompress_input(self, &output);
431
431
432 if (-1 == decompressResult) {
432 if (-1 == decompressResult) {
433 goto finally;
433 goto finally;
434 }
434 }
435 else if (0 == decompressResult) { }
435 else if (0 == decompressResult) { }
436 else if (1 == decompressResult) {
436 else if (1 == decompressResult) {
437 self->bytesDecompressed += output.pos;
437 self->bytesDecompressed += output.pos;
438 result = PyLong_FromSize_t(output.pos);
438 result = PyLong_FromSize_t(output.pos);
439 goto finally;
439 goto finally;
440 }
440 }
441 else {
441 else {
442 assert(0);
442 assert(0);
443 }
443 }
444
444
445 readResult = read_decompressor_input(self);
445 readResult = read_decompressor_input(self);
446
446
447 if (-1 == readResult) {
447 if (-1 == readResult) {
448 goto finally;
448 goto finally;
449 }
449 }
450 else if (0 == readResult) {}
450 else if (0 == readResult) {}
451 else if (1 == readResult) {}
451 else if (1 == readResult) {}
452 else {
452 else {
453 assert(0);
453 assert(0);
454 }
454 }
455
455
456 if (self->input.size) {
456 if (self->input.size) {
457 goto readinput;
457 goto readinput;
458 }
458 }
459
459
460 /* EOF */
460 /* EOF */
461 self->bytesDecompressed += output.pos;
461 self->bytesDecompressed += output.pos;
462 result = PyLong_FromSize_t(output.pos);
462 result = PyLong_FromSize_t(output.pos);
463
463
464 finally:
464 finally:
465 PyBuffer_Release(&dest);
465 PyBuffer_Release(&dest);
466
466
467 return result;
467 return result;
468 }
468 }
469
469
470 static PyObject* reader_readinto1(ZstdDecompressionReader* self, PyObject* args) {
470 static PyObject* reader_readinto1(ZstdDecompressionReader* self, PyObject* args) {
471 Py_buffer dest;
471 Py_buffer dest;
472 ZSTD_outBuffer output;
472 ZSTD_outBuffer output;
473 PyObject* result = NULL;
473 PyObject* result = NULL;
474
474
475 if (self->closed) {
475 if (self->closed) {
476 PyErr_SetString(PyExc_ValueError, "stream is closed");
476 PyErr_SetString(PyExc_ValueError, "stream is closed");
477 return NULL;
477 return NULL;
478 }
478 }
479
479
480 if (self->finishedOutput) {
480 if (self->finishedOutput) {
481 return PyLong_FromLong(0);
481 return PyLong_FromLong(0);
482 }
482 }
483
483
484 if (!PyArg_ParseTuple(args, "w*:readinto1", &dest)) {
484 if (!PyArg_ParseTuple(args, "w*:readinto1", &dest)) {
485 return NULL;
485 return NULL;
486 }
486 }
487
487
488 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
488 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
489 PyErr_SetString(PyExc_ValueError,
489 PyErr_SetString(PyExc_ValueError,
490 "destination buffer should be contiguous and have at most one dimension");
490 "destination buffer should be contiguous and have at most one dimension");
491 goto finally;
491 goto finally;
492 }
492 }
493
493
494 output.dst = dest.buf;
494 output.dst = dest.buf;
495 output.size = dest.len;
495 output.size = dest.len;
496 output.pos = 0;
496 output.pos = 0;
497
497
498 while (!self->finishedInput && !self->finishedOutput) {
498 while (!self->finishedInput && !self->finishedOutput) {
499 int decompressResult, readResult;
499 int decompressResult, readResult;
500
500
501 readResult = read_decompressor_input(self);
501 readResult = read_decompressor_input(self);
502
502
503 if (-1 == readResult) {
503 if (-1 == readResult) {
504 goto finally;
504 goto finally;
505 }
505 }
506 else if (0 == readResult || 1 == readResult) {}
506 else if (0 == readResult || 1 == readResult) {}
507 else {
507 else {
508 assert(0);
508 assert(0);
509 }
509 }
510
510
511 decompressResult = decompress_input(self, &output);
511 decompressResult = decompress_input(self, &output);
512
512
513 if (-1 == decompressResult) {
513 if (-1 == decompressResult) {
514 goto finally;
514 goto finally;
515 }
515 }
516 else if (0 == decompressResult || 1 == decompressResult) {}
516 else if (0 == decompressResult || 1 == decompressResult) {}
517 else {
517 else {
518 assert(0);
518 assert(0);
519 }
519 }
520
520
521 if (output.pos) {
521 if (output.pos) {
522 break;
522 break;
523 }
523 }
524 }
524 }
525
525
526 self->bytesDecompressed += output.pos;
526 self->bytesDecompressed += output.pos;
527 result = PyLong_FromSize_t(output.pos);
527 result = PyLong_FromSize_t(output.pos);
528
528
529 finally:
529 finally:
530 PyBuffer_Release(&dest);
530 PyBuffer_Release(&dest);
531
531
532 return result;
532 return result;
533 }
533 }
534
534
535 static PyObject* reader_readall(PyObject* self) {
535 static PyObject* reader_readall(PyObject* self) {
536 PyObject* chunks = NULL;
536 PyObject* chunks = NULL;
537 PyObject* empty = NULL;
537 PyObject* empty = NULL;
538 PyObject* result = NULL;
538 PyObject* result = NULL;
539
539
540 /* Our strategy is to collect chunks into a list then join all the
540 /* Our strategy is to collect chunks into a list then join all the
541 * chunks at the end. We could potentially use e.g. an io.BytesIO. But
541 * chunks at the end. We could potentially use e.g. an io.BytesIO. But
542 * this feels simple enough to implement and avoids potentially expensive
542 * this feels simple enough to implement and avoids potentially expensive
543 * reallocations of large buffers.
543 * reallocations of large buffers.
544 */
544 */
545 chunks = PyList_New(0);
545 chunks = PyList_New(0);
546 if (NULL == chunks) {
546 if (NULL == chunks) {
547 return NULL;
547 return NULL;
548 }
548 }
549
549
550 while (1) {
550 while (1) {
551 PyObject* chunk = PyObject_CallMethod(self, "read", "i", 1048576);
551 PyObject* chunk = PyObject_CallMethod(self, "read", "i", 1048576);
552 if (NULL == chunk) {
552 if (NULL == chunk) {
553 Py_DECREF(chunks);
553 Py_DECREF(chunks);
554 return NULL;
554 return NULL;
555 }
555 }
556
556
557 if (!PyBytes_Size(chunk)) {
557 if (!PyBytes_Size(chunk)) {
558 Py_DECREF(chunk);
558 Py_DECREF(chunk);
559 break;
559 break;
560 }
560 }
561
561
562 if (PyList_Append(chunks, chunk)) {
562 if (PyList_Append(chunks, chunk)) {
563 Py_DECREF(chunk);
563 Py_DECREF(chunk);
564 Py_DECREF(chunks);
564 Py_DECREF(chunks);
565 return NULL;
565 return NULL;
566 }
566 }
567
567
568 Py_DECREF(chunk);
568 Py_DECREF(chunk);
569 }
569 }
570
570
571 empty = PyBytes_FromStringAndSize("", 0);
571 empty = PyBytes_FromStringAndSize("", 0);
572 if (NULL == empty) {
572 if (NULL == empty) {
573 Py_DECREF(chunks);
573 Py_DECREF(chunks);
574 return NULL;
574 return NULL;
575 }
575 }
576
576
577 result = PyObject_CallMethod(empty, "join", "O", chunks);
577 result = PyObject_CallMethod(empty, "join", "O", chunks);
578
578
579 Py_DECREF(empty);
579 Py_DECREF(empty);
580 Py_DECREF(chunks);
580 Py_DECREF(chunks);
581
581
582 return result;
582 return result;
583 }
583 }
584
584
585 static PyObject* reader_readline(PyObject* self) {
585 static PyObject* reader_readline(PyObject* self) {
586 set_unsupported_operation();
586 set_unsupported_operation();
587 return NULL;
587 return NULL;
588 }
588 }
589
589
590 static PyObject* reader_readlines(PyObject* self) {
590 static PyObject* reader_readlines(PyObject* self) {
591 set_unsupported_operation();
591 set_unsupported_operation();
592 return NULL;
592 return NULL;
593 }
593 }
594
594
595 static PyObject* reader_seek(ZstdDecompressionReader* self, PyObject* args) {
595 static PyObject* reader_seek(ZstdDecompressionReader* self, PyObject* args) {
596 Py_ssize_t pos;
596 Py_ssize_t pos;
597 int whence = 0;
597 int whence = 0;
598 unsigned long long readAmount = 0;
598 unsigned long long readAmount = 0;
599 size_t defaultOutSize = ZSTD_DStreamOutSize();
599 size_t defaultOutSize = ZSTD_DStreamOutSize();
600
600
601 if (self->closed) {
601 if (self->closed) {
602 PyErr_SetString(PyExc_ValueError, "stream is closed");
602 PyErr_SetString(PyExc_ValueError, "stream is closed");
603 return NULL;
603 return NULL;
604 }
604 }
605
605
606 if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &whence)) {
606 if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &whence)) {
607 return NULL;
607 return NULL;
608 }
608 }
609
609
610 if (whence == SEEK_SET) {
610 if (whence == SEEK_SET) {
611 if (pos < 0) {
611 if (pos < 0) {
612 PyErr_SetString(PyExc_ValueError,
612 PyErr_SetString(PyExc_ValueError,
613 "cannot seek to negative position with SEEK_SET");
613 "cannot seek to negative position with SEEK_SET");
614 return NULL;
614 return NULL;
615 }
615 }
616
616
617 if ((unsigned long long)pos < self->bytesDecompressed) {
617 if ((unsigned long long)pos < self->bytesDecompressed) {
618 PyErr_SetString(PyExc_ValueError,
618 PyErr_SetString(PyExc_ValueError,
619 "cannot seek zstd decompression stream backwards");
619 "cannot seek zstd decompression stream backwards");
620 return NULL;
620 return NULL;
621 }
621 }
622
622
623 readAmount = pos - self->bytesDecompressed;
623 readAmount = pos - self->bytesDecompressed;
624 }
624 }
625 else if (whence == SEEK_CUR) {
625 else if (whence == SEEK_CUR) {
626 if (pos < 0) {
626 if (pos < 0) {
627 PyErr_SetString(PyExc_ValueError,
627 PyErr_SetString(PyExc_ValueError,
628 "cannot seek zstd decompression stream backwards");
628 "cannot seek zstd decompression stream backwards");
629 return NULL;
629 return NULL;
630 }
630 }
631
631
632 readAmount = pos;
632 readAmount = pos;
633 }
633 }
634 else if (whence == SEEK_END) {
634 else if (whence == SEEK_END) {
635 /* We /could/ support this with pos==0. But let's not do that until someone
635 /* We /could/ support this with pos==0. But let's not do that until someone
636 needs it. */
636 needs it. */
637 PyErr_SetString(PyExc_ValueError,
637 PyErr_SetString(PyExc_ValueError,
638 "zstd decompression streams cannot be seeked with SEEK_END");
638 "zstd decompression streams cannot be seeked with SEEK_END");
639 return NULL;
639 return NULL;
640 }
640 }
641
641
642 /* It is a bit inefficient to do this via the Python API. But since there
642 /* It is a bit inefficient to do this via the Python API. But since there
643 is a bit of state tracking involved to read from this type, it is the
643 is a bit of state tracking involved to read from this type, it is the
644 easiest to implement. */
644 easiest to implement. */
645 while (readAmount) {
645 while (readAmount) {
646 Py_ssize_t readSize;
646 Py_ssize_t readSize;
647 PyObject* readResult = PyObject_CallMethod((PyObject*)self, "read", "K",
647 PyObject* readResult = PyObject_CallMethod((PyObject*)self, "read", "K",
648 readAmount < defaultOutSize ? readAmount : defaultOutSize);
648 readAmount < defaultOutSize ? readAmount : defaultOutSize);
649
649
650 if (!readResult) {
650 if (!readResult) {
651 return NULL;
651 return NULL;
652 }
652 }
653
653
654 readSize = PyBytes_GET_SIZE(readResult);
654 readSize = PyBytes_GET_SIZE(readResult);
655
655
656 Py_CLEAR(readResult);
656 Py_CLEAR(readResult);
657
657
658 /* Empty read means EOF. */
658 /* Empty read means EOF. */
659 if (!readSize) {
659 if (!readSize) {
660 break;
660 break;
661 }
661 }
662
662
663 readAmount -= readSize;
663 readAmount -= readSize;
664 }
664 }
665
665
666 return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
666 return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
667 }
667 }
668
668
669 static PyObject* reader_tell(ZstdDecompressionReader* self) {
669 static PyObject* reader_tell(ZstdDecompressionReader* self) {
670 /* TODO should this raise OSError since stream isn't seekable? */
670 /* TODO should this raise OSError since stream isn't seekable? */
671 return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
671 return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
672 }
672 }
673
673
674 static PyObject* reader_write(PyObject* self, PyObject* args) {
674 static PyObject* reader_write(PyObject* self, PyObject* args) {
675 set_unsupported_operation();
675 set_unsupported_operation();
676 return NULL;
676 return NULL;
677 }
677 }
678
678
679 static PyObject* reader_writelines(PyObject* self, PyObject* args) {
679 static PyObject* reader_writelines(PyObject* self, PyObject* args) {
680 set_unsupported_operation();
680 set_unsupported_operation();
681 return NULL;
681 return NULL;
682 }
682 }
683
683
684 static PyObject* reader_iter(PyObject* self) {
684 static PyObject* reader_iter(PyObject* self) {
685 set_unsupported_operation();
685 set_unsupported_operation();
686 return NULL;
686 return NULL;
687 }
687 }
688
688
689 static PyObject* reader_iternext(PyObject* self) {
689 static PyObject* reader_iternext(PyObject* self) {
690 set_unsupported_operation();
690 set_unsupported_operation();
691 return NULL;
691 return NULL;
692 }
692 }
693
693
694 static PyMethodDef reader_methods[] = {
694 static PyMethodDef reader_methods[] = {
695 { "__enter__", (PyCFunction)reader_enter, METH_NOARGS,
695 { "__enter__", (PyCFunction)reader_enter, METH_NOARGS,
696 PyDoc_STR("Enter a compression context") },
696 PyDoc_STR("Enter a compression context") },
697 { "__exit__", (PyCFunction)reader_exit, METH_VARARGS,
697 { "__exit__", (PyCFunction)reader_exit, METH_VARARGS,
698 PyDoc_STR("Exit a compression context") },
698 PyDoc_STR("Exit a compression context") },
699 { "close", (PyCFunction)reader_close, METH_NOARGS,
699 { "close", (PyCFunction)reader_close, METH_NOARGS,
700 PyDoc_STR("Close the stream so it cannot perform any more operations") },
700 PyDoc_STR("Close the stream so it cannot perform any more operations") },
701 { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
701 { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
702 { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
702 { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
703 { "readable", (PyCFunction)reader_readable, METH_NOARGS,
703 { "readable", (PyCFunction)reader_readable, METH_NOARGS,
704 PyDoc_STR("Returns True") },
704 PyDoc_STR("Returns True") },
705 { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS,
705 { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS,
706 PyDoc_STR("read compressed data") },
706 PyDoc_STR("read compressed data") },
707 { "read1", (PyCFunction)reader_read1, METH_VARARGS | METH_KEYWORDS,
707 { "read1", (PyCFunction)reader_read1, METH_VARARGS | METH_KEYWORDS,
708 PyDoc_STR("read compressed data") },
708 PyDoc_STR("read compressed data") },
709 { "readinto", (PyCFunction)reader_readinto, METH_VARARGS, NULL },
709 { "readinto", (PyCFunction)reader_readinto, METH_VARARGS, NULL },
710 { "readinto1", (PyCFunction)reader_readinto1, METH_VARARGS, NULL },
710 { "readinto1", (PyCFunction)reader_readinto1, METH_VARARGS, NULL },
711 { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
711 { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
712 { "readline", (PyCFunction)reader_readline, METH_NOARGS, PyDoc_STR("Not implemented") },
712 { "readline", (PyCFunction)reader_readline, METH_NOARGS, PyDoc_STR("Not implemented") },
713 { "readlines", (PyCFunction)reader_readlines, METH_NOARGS, PyDoc_STR("Not implemented") },
713 { "readlines", (PyCFunction)reader_readlines, METH_NOARGS, PyDoc_STR("Not implemented") },
714 { "seek", (PyCFunction)reader_seek, METH_VARARGS, PyDoc_STR("Seek the stream") },
714 { "seek", (PyCFunction)reader_seek, METH_VARARGS, PyDoc_STR("Seek the stream") },
715 { "seekable", (PyCFunction)reader_seekable, METH_NOARGS,
715 { "seekable", (PyCFunction)reader_seekable, METH_NOARGS,
716 PyDoc_STR("Returns True") },
716 PyDoc_STR("Returns True") },
717 { "tell", (PyCFunction)reader_tell, METH_NOARGS,
717 { "tell", (PyCFunction)reader_tell, METH_NOARGS,
718 PyDoc_STR("Returns current number of bytes compressed") },
718 PyDoc_STR("Returns current number of bytes compressed") },
719 { "writable", (PyCFunction)reader_writable, METH_NOARGS,
719 { "writable", (PyCFunction)reader_writable, METH_NOARGS,
720 PyDoc_STR("Returns False") },
720 PyDoc_STR("Returns False") },
721 { "write", (PyCFunction)reader_write, METH_VARARGS, PyDoc_STR("unsupported operation") },
721 { "write", (PyCFunction)reader_write, METH_VARARGS, PyDoc_STR("unsupported operation") },
722 { "writelines", (PyCFunction)reader_writelines, METH_VARARGS, PyDoc_STR("unsupported operation") },
722 { "writelines", (PyCFunction)reader_writelines, METH_VARARGS, PyDoc_STR("unsupported operation") },
723 { NULL, NULL }
723 { NULL, NULL }
724 };
724 };
725
725
726 static PyMemberDef reader_members[] = {
726 static PyMemberDef reader_members[] = {
727 { "closed", T_BOOL, offsetof(ZstdDecompressionReader, closed),
727 { "closed", T_BOOL, offsetof(ZstdDecompressionReader, closed),
728 READONLY, "whether stream is closed" },
728 READONLY, "whether stream is closed" },
729 { NULL }
729 { NULL }
730 };
730 };
731
731
732 PyTypeObject ZstdDecompressionReaderType = {
732 PyTypeObject ZstdDecompressionReaderType = {
733 PyVarObject_HEAD_INIT(NULL, 0)
733 PyVarObject_HEAD_INIT(NULL, 0)
734 "zstd.ZstdDecompressionReader", /* tp_name */
734 "zstd.ZstdDecompressionReader", /* tp_name */
735 sizeof(ZstdDecompressionReader), /* tp_basicsize */
735 sizeof(ZstdDecompressionReader), /* tp_basicsize */
736 0, /* tp_itemsize */
736 0, /* tp_itemsize */
737 (destructor)reader_dealloc, /* tp_dealloc */
737 (destructor)reader_dealloc, /* tp_dealloc */
738 0, /* tp_print */
738 0, /* tp_print */
739 0, /* tp_getattr */
739 0, /* tp_getattr */
740 0, /* tp_setattr */
740 0, /* tp_setattr */
741 0, /* tp_compare */
741 0, /* tp_compare */
742 0, /* tp_repr */
742 0, /* tp_repr */
743 0, /* tp_as_number */
743 0, /* tp_as_number */
744 0, /* tp_as_sequence */
744 0, /* tp_as_sequence */
745 0, /* tp_as_mapping */
745 0, /* tp_as_mapping */
746 0, /* tp_hash */
746 0, /* tp_hash */
747 0, /* tp_call */
747 0, /* tp_call */
748 0, /* tp_str */
748 0, /* tp_str */
749 0, /* tp_getattro */
749 0, /* tp_getattro */
750 0, /* tp_setattro */
750 0, /* tp_setattro */
751 0, /* tp_as_buffer */
751 0, /* tp_as_buffer */
752 Py_TPFLAGS_DEFAULT, /* tp_flags */
752 Py_TPFLAGS_DEFAULT, /* tp_flags */
753 0, /* tp_doc */
753 0, /* tp_doc */
754 0, /* tp_traverse */
754 0, /* tp_traverse */
755 0, /* tp_clear */
755 0, /* tp_clear */
756 0, /* tp_richcompare */
756 0, /* tp_richcompare */
757 0, /* tp_weaklistoffset */
757 0, /* tp_weaklistoffset */
758 reader_iter, /* tp_iter */
758 reader_iter, /* tp_iter */
759 reader_iternext, /* tp_iternext */
759 reader_iternext, /* tp_iternext */
760 reader_methods, /* tp_methods */
760 reader_methods, /* tp_methods */
761 reader_members, /* tp_members */
761 reader_members, /* tp_members */
762 0, /* tp_getset */
762 0, /* tp_getset */
763 0, /* tp_base */
763 0, /* tp_base */
764 0, /* tp_dict */
764 0, /* tp_dict */
765 0, /* tp_descr_get */
765 0, /* tp_descr_get */
766 0, /* tp_descr_set */
766 0, /* tp_descr_set */
767 0, /* tp_dictoffset */
767 0, /* tp_dictoffset */
768 0, /* tp_init */
768 0, /* tp_init */
769 0, /* tp_alloc */
769 0, /* tp_alloc */
770 PyType_GenericNew, /* tp_new */
770 PyType_GenericNew, /* tp_new */
771 };
771 };
772
772
773
773
774 void decompressionreader_module_init(PyObject* mod) {
774 void decompressionreader_module_init(PyObject* mod) {
775 /* TODO make reader a sub-class of io.RawIOBase */
775 /* TODO make reader a sub-class of io.RawIOBase */
776
776
777 Py_TYPE(&ZstdDecompressionReaderType) = &PyType_Type;
777 Py_SET_TYPE(&ZstdDecompressionReaderType, &PyType_Type);
778 if (PyType_Ready(&ZstdDecompressionReaderType) < 0) {
778 if (PyType_Ready(&ZstdDecompressionReaderType) < 0) {
779 return;
779 return;
780 }
780 }
781 }
781 }
@@ -1,295 +1,295 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 PyDoc_STRVAR(ZstdDecompressionWriter__doc,
13 PyDoc_STRVAR(ZstdDecompressionWriter__doc,
14 """A context manager used for writing decompressed output.\n"
14 """A context manager used for writing decompressed output.\n"
15 );
15 );
16
16
17 static void ZstdDecompressionWriter_dealloc(ZstdDecompressionWriter* self) {
17 static void ZstdDecompressionWriter_dealloc(ZstdDecompressionWriter* self) {
18 Py_XDECREF(self->decompressor);
18 Py_XDECREF(self->decompressor);
19 Py_XDECREF(self->writer);
19 Py_XDECREF(self->writer);
20
20
21 PyObject_Del(self);
21 PyObject_Del(self);
22 }
22 }
23
23
24 static PyObject* ZstdDecompressionWriter_enter(ZstdDecompressionWriter* self) {
24 static PyObject* ZstdDecompressionWriter_enter(ZstdDecompressionWriter* self) {
25 if (self->closed) {
25 if (self->closed) {
26 PyErr_SetString(PyExc_ValueError, "stream is closed");
26 PyErr_SetString(PyExc_ValueError, "stream is closed");
27 return NULL;
27 return NULL;
28 }
28 }
29
29
30 if (self->entered) {
30 if (self->entered) {
31 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
31 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
32 return NULL;
32 return NULL;
33 }
33 }
34
34
35 self->entered = 1;
35 self->entered = 1;
36
36
37 Py_INCREF(self);
37 Py_INCREF(self);
38 return (PyObject*)self;
38 return (PyObject*)self;
39 }
39 }
40
40
41 static PyObject* ZstdDecompressionWriter_exit(ZstdDecompressionWriter* self, PyObject* args) {
41 static PyObject* ZstdDecompressionWriter_exit(ZstdDecompressionWriter* self, PyObject* args) {
42 self->entered = 0;
42 self->entered = 0;
43
43
44 if (NULL == PyObject_CallMethod((PyObject*)self, "close", NULL)) {
44 if (NULL == PyObject_CallMethod((PyObject*)self, "close", NULL)) {
45 return NULL;
45 return NULL;
46 }
46 }
47
47
48 Py_RETURN_FALSE;
48 Py_RETURN_FALSE;
49 }
49 }
50
50
51 static PyObject* ZstdDecompressionWriter_memory_size(ZstdDecompressionWriter* self) {
51 static PyObject* ZstdDecompressionWriter_memory_size(ZstdDecompressionWriter* self) {
52 return PyLong_FromSize_t(ZSTD_sizeof_DCtx(self->decompressor->dctx));
52 return PyLong_FromSize_t(ZSTD_sizeof_DCtx(self->decompressor->dctx));
53 }
53 }
54
54
55 static PyObject* ZstdDecompressionWriter_write(ZstdDecompressionWriter* self, PyObject* args, PyObject* kwargs) {
55 static PyObject* ZstdDecompressionWriter_write(ZstdDecompressionWriter* self, PyObject* args, PyObject* kwargs) {
56 static char* kwlist[] = {
56 static char* kwlist[] = {
57 "data",
57 "data",
58 NULL
58 NULL
59 };
59 };
60
60
61 PyObject* result = NULL;
61 PyObject* result = NULL;
62 Py_buffer source;
62 Py_buffer source;
63 size_t zresult = 0;
63 size_t zresult = 0;
64 ZSTD_inBuffer input;
64 ZSTD_inBuffer input;
65 ZSTD_outBuffer output;
65 ZSTD_outBuffer output;
66 PyObject* res;
66 PyObject* res;
67 Py_ssize_t totalWrite = 0;
67 Py_ssize_t totalWrite = 0;
68
68
69 #if PY_MAJOR_VERSION >= 3
69 #if PY_MAJOR_VERSION >= 3
70 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write",
70 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write",
71 #else
71 #else
72 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:write",
72 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:write",
73 #endif
73 #endif
74 kwlist, &source)) {
74 kwlist, &source)) {
75 return NULL;
75 return NULL;
76 }
76 }
77
77
78 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
78 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
79 PyErr_SetString(PyExc_ValueError,
79 PyErr_SetString(PyExc_ValueError,
80 "data buffer should be contiguous and have at most one dimension");
80 "data buffer should be contiguous and have at most one dimension");
81 goto finally;
81 goto finally;
82 }
82 }
83
83
84 if (self->closed) {
84 if (self->closed) {
85 PyErr_SetString(PyExc_ValueError, "stream is closed");
85 PyErr_SetString(PyExc_ValueError, "stream is closed");
86 return NULL;
86 return NULL;
87 }
87 }
88
88
89 output.dst = PyMem_Malloc(self->outSize);
89 output.dst = PyMem_Malloc(self->outSize);
90 if (!output.dst) {
90 if (!output.dst) {
91 PyErr_NoMemory();
91 PyErr_NoMemory();
92 goto finally;
92 goto finally;
93 }
93 }
94 output.size = self->outSize;
94 output.size = self->outSize;
95 output.pos = 0;
95 output.pos = 0;
96
96
97 input.src = source.buf;
97 input.src = source.buf;
98 input.size = source.len;
98 input.size = source.len;
99 input.pos = 0;
99 input.pos = 0;
100
100
101 while (input.pos < (size_t)source.len) {
101 while (input.pos < (size_t)source.len) {
102 Py_BEGIN_ALLOW_THREADS
102 Py_BEGIN_ALLOW_THREADS
103 zresult = ZSTD_decompressStream(self->decompressor->dctx, &output, &input);
103 zresult = ZSTD_decompressStream(self->decompressor->dctx, &output, &input);
104 Py_END_ALLOW_THREADS
104 Py_END_ALLOW_THREADS
105
105
106 if (ZSTD_isError(zresult)) {
106 if (ZSTD_isError(zresult)) {
107 PyMem_Free(output.dst);
107 PyMem_Free(output.dst);
108 PyErr_Format(ZstdError, "zstd decompress error: %s",
108 PyErr_Format(ZstdError, "zstd decompress error: %s",
109 ZSTD_getErrorName(zresult));
109 ZSTD_getErrorName(zresult));
110 goto finally;
110 goto finally;
111 }
111 }
112
112
113 if (output.pos) {
113 if (output.pos) {
114 #if PY_MAJOR_VERSION >= 3
114 #if PY_MAJOR_VERSION >= 3
115 res = PyObject_CallMethod(self->writer, "write", "y#",
115 res = PyObject_CallMethod(self->writer, "write", "y#",
116 #else
116 #else
117 res = PyObject_CallMethod(self->writer, "write", "s#",
117 res = PyObject_CallMethod(self->writer, "write", "s#",
118 #endif
118 #endif
119 output.dst, output.pos);
119 output.dst, output.pos);
120 Py_XDECREF(res);
120 Py_XDECREF(res);
121 totalWrite += output.pos;
121 totalWrite += output.pos;
122 output.pos = 0;
122 output.pos = 0;
123 }
123 }
124 }
124 }
125
125
126 PyMem_Free(output.dst);
126 PyMem_Free(output.dst);
127
127
128 if (self->writeReturnRead) {
128 if (self->writeReturnRead) {
129 result = PyLong_FromSize_t(input.pos);
129 result = PyLong_FromSize_t(input.pos);
130 }
130 }
131 else {
131 else {
132 result = PyLong_FromSsize_t(totalWrite);
132 result = PyLong_FromSsize_t(totalWrite);
133 }
133 }
134
134
135 finally:
135 finally:
136 PyBuffer_Release(&source);
136 PyBuffer_Release(&source);
137 return result;
137 return result;
138 }
138 }
139
139
140 static PyObject* ZstdDecompressionWriter_close(ZstdDecompressionWriter* self) {
140 static PyObject* ZstdDecompressionWriter_close(ZstdDecompressionWriter* self) {
141 PyObject* result;
141 PyObject* result;
142
142
143 if (self->closed) {
143 if (self->closed) {
144 Py_RETURN_NONE;
144 Py_RETURN_NONE;
145 }
145 }
146
146
147 result = PyObject_CallMethod((PyObject*)self, "flush", NULL);
147 result = PyObject_CallMethod((PyObject*)self, "flush", NULL);
148 self->closed = 1;
148 self->closed = 1;
149
149
150 if (NULL == result) {
150 if (NULL == result) {
151 return NULL;
151 return NULL;
152 }
152 }
153
153
154 /* Call close on underlying stream as well. */
154 /* Call close on underlying stream as well. */
155 if (PyObject_HasAttrString(self->writer, "close")) {
155 if (PyObject_HasAttrString(self->writer, "close")) {
156 return PyObject_CallMethod(self->writer, "close", NULL);
156 return PyObject_CallMethod(self->writer, "close", NULL);
157 }
157 }
158
158
159 Py_RETURN_NONE;
159 Py_RETURN_NONE;
160 }
160 }
161
161
162 static PyObject* ZstdDecompressionWriter_fileno(ZstdDecompressionWriter* self) {
162 static PyObject* ZstdDecompressionWriter_fileno(ZstdDecompressionWriter* self) {
163 if (PyObject_HasAttrString(self->writer, "fileno")) {
163 if (PyObject_HasAttrString(self->writer, "fileno")) {
164 return PyObject_CallMethod(self->writer, "fileno", NULL);
164 return PyObject_CallMethod(self->writer, "fileno", NULL);
165 }
165 }
166 else {
166 else {
167 PyErr_SetString(PyExc_OSError, "fileno not available on underlying writer");
167 PyErr_SetString(PyExc_OSError, "fileno not available on underlying writer");
168 return NULL;
168 return NULL;
169 }
169 }
170 }
170 }
171
171
172 static PyObject* ZstdDecompressionWriter_flush(ZstdDecompressionWriter* self) {
172 static PyObject* ZstdDecompressionWriter_flush(ZstdDecompressionWriter* self) {
173 if (self->closed) {
173 if (self->closed) {
174 PyErr_SetString(PyExc_ValueError, "stream is closed");
174 PyErr_SetString(PyExc_ValueError, "stream is closed");
175 return NULL;
175 return NULL;
176 }
176 }
177
177
178 if (PyObject_HasAttrString(self->writer, "flush")) {
178 if (PyObject_HasAttrString(self->writer, "flush")) {
179 return PyObject_CallMethod(self->writer, "flush", NULL);
179 return PyObject_CallMethod(self->writer, "flush", NULL);
180 }
180 }
181 else {
181 else {
182 Py_RETURN_NONE;
182 Py_RETURN_NONE;
183 }
183 }
184 }
184 }
185
185
186 static PyObject* ZstdDecompressionWriter_false(PyObject* self, PyObject* args) {
186 static PyObject* ZstdDecompressionWriter_false(PyObject* self, PyObject* args) {
187 Py_RETURN_FALSE;
187 Py_RETURN_FALSE;
188 }
188 }
189
189
190 static PyObject* ZstdDecompressionWriter_true(PyObject* self, PyObject* args) {
190 static PyObject* ZstdDecompressionWriter_true(PyObject* self, PyObject* args) {
191 Py_RETURN_TRUE;
191 Py_RETURN_TRUE;
192 }
192 }
193
193
194 static PyObject* ZstdDecompressionWriter_unsupported(PyObject* self, PyObject* args, PyObject* kwargs) {
194 static PyObject* ZstdDecompressionWriter_unsupported(PyObject* self, PyObject* args, PyObject* kwargs) {
195 PyObject* iomod;
195 PyObject* iomod;
196 PyObject* exc;
196 PyObject* exc;
197
197
198 iomod = PyImport_ImportModule("io");
198 iomod = PyImport_ImportModule("io");
199 if (NULL == iomod) {
199 if (NULL == iomod) {
200 return NULL;
200 return NULL;
201 }
201 }
202
202
203 exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
203 exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
204 if (NULL == exc) {
204 if (NULL == exc) {
205 Py_DECREF(iomod);
205 Py_DECREF(iomod);
206 return NULL;
206 return NULL;
207 }
207 }
208
208
209 PyErr_SetNone(exc);
209 PyErr_SetNone(exc);
210 Py_DECREF(exc);
210 Py_DECREF(exc);
211 Py_DECREF(iomod);
211 Py_DECREF(iomod);
212
212
213 return NULL;
213 return NULL;
214 }
214 }
215
215
216 static PyMethodDef ZstdDecompressionWriter_methods[] = {
216 static PyMethodDef ZstdDecompressionWriter_methods[] = {
217 { "__enter__", (PyCFunction)ZstdDecompressionWriter_enter, METH_NOARGS,
217 { "__enter__", (PyCFunction)ZstdDecompressionWriter_enter, METH_NOARGS,
218 PyDoc_STR("Enter a decompression context.") },
218 PyDoc_STR("Enter a decompression context.") },
219 { "__exit__", (PyCFunction)ZstdDecompressionWriter_exit, METH_VARARGS,
219 { "__exit__", (PyCFunction)ZstdDecompressionWriter_exit, METH_VARARGS,
220 PyDoc_STR("Exit a decompression context.") },
220 PyDoc_STR("Exit a decompression context.") },
221 { "memory_size", (PyCFunction)ZstdDecompressionWriter_memory_size, METH_NOARGS,
221 { "memory_size", (PyCFunction)ZstdDecompressionWriter_memory_size, METH_NOARGS,
222 PyDoc_STR("Obtain the memory size in bytes of the underlying decompressor.") },
222 PyDoc_STR("Obtain the memory size in bytes of the underlying decompressor.") },
223 { "close", (PyCFunction)ZstdDecompressionWriter_close, METH_NOARGS, NULL },
223 { "close", (PyCFunction)ZstdDecompressionWriter_close, METH_NOARGS, NULL },
224 { "fileno", (PyCFunction)ZstdDecompressionWriter_fileno, METH_NOARGS, NULL },
224 { "fileno", (PyCFunction)ZstdDecompressionWriter_fileno, METH_NOARGS, NULL },
225 { "flush", (PyCFunction)ZstdDecompressionWriter_flush, METH_NOARGS, NULL },
225 { "flush", (PyCFunction)ZstdDecompressionWriter_flush, METH_NOARGS, NULL },
226 { "isatty", ZstdDecompressionWriter_false, METH_NOARGS, NULL },
226 { "isatty", ZstdDecompressionWriter_false, METH_NOARGS, NULL },
227 { "readable", ZstdDecompressionWriter_false, METH_NOARGS, NULL },
227 { "readable", ZstdDecompressionWriter_false, METH_NOARGS, NULL },
228 { "readline", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
228 { "readline", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
229 { "readlines", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
229 { "readlines", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
230 { "seek", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
230 { "seek", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
231 { "seekable", ZstdDecompressionWriter_false, METH_NOARGS, NULL },
231 { "seekable", ZstdDecompressionWriter_false, METH_NOARGS, NULL },
232 { "tell", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
232 { "tell", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
233 { "truncate", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
233 { "truncate", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
234 { "writable", ZstdDecompressionWriter_true, METH_NOARGS, NULL },
234 { "writable", ZstdDecompressionWriter_true, METH_NOARGS, NULL },
235 { "writelines" , (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
235 { "writelines" , (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
236 { "read", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
236 { "read", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
237 { "readall", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
237 { "readall", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
238 { "readinto", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
238 { "readinto", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
239 { "write", (PyCFunction)ZstdDecompressionWriter_write, METH_VARARGS | METH_KEYWORDS,
239 { "write", (PyCFunction)ZstdDecompressionWriter_write, METH_VARARGS | METH_KEYWORDS,
240 PyDoc_STR("Compress data") },
240 PyDoc_STR("Compress data") },
241 { NULL, NULL }
241 { NULL, NULL }
242 };
242 };
243
243
244 static PyMemberDef ZstdDecompressionWriter_members[] = {
244 static PyMemberDef ZstdDecompressionWriter_members[] = {
245 { "closed", T_BOOL, offsetof(ZstdDecompressionWriter, closed), READONLY, NULL },
245 { "closed", T_BOOL, offsetof(ZstdDecompressionWriter, closed), READONLY, NULL },
246 { NULL }
246 { NULL }
247 };
247 };
248
248
249 PyTypeObject ZstdDecompressionWriterType = {
249 PyTypeObject ZstdDecompressionWriterType = {
250 PyVarObject_HEAD_INIT(NULL, 0)
250 PyVarObject_HEAD_INIT(NULL, 0)
251 "zstd.ZstdDecompressionWriter", /* tp_name */
251 "zstd.ZstdDecompressionWriter", /* tp_name */
252 sizeof(ZstdDecompressionWriter),/* tp_basicsize */
252 sizeof(ZstdDecompressionWriter),/* tp_basicsize */
253 0, /* tp_itemsize */
253 0, /* tp_itemsize */
254 (destructor)ZstdDecompressionWriter_dealloc, /* tp_dealloc */
254 (destructor)ZstdDecompressionWriter_dealloc, /* tp_dealloc */
255 0, /* tp_print */
255 0, /* tp_print */
256 0, /* tp_getattr */
256 0, /* tp_getattr */
257 0, /* tp_setattr */
257 0, /* tp_setattr */
258 0, /* tp_compare */
258 0, /* tp_compare */
259 0, /* tp_repr */
259 0, /* tp_repr */
260 0, /* tp_as_number */
260 0, /* tp_as_number */
261 0, /* tp_as_sequence */
261 0, /* tp_as_sequence */
262 0, /* tp_as_mapping */
262 0, /* tp_as_mapping */
263 0, /* tp_hash */
263 0, /* tp_hash */
264 0, /* tp_call */
264 0, /* tp_call */
265 0, /* tp_str */
265 0, /* tp_str */
266 0, /* tp_getattro */
266 0, /* tp_getattro */
267 0, /* tp_setattro */
267 0, /* tp_setattro */
268 0, /* tp_as_buffer */
268 0, /* tp_as_buffer */
269 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
269 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
270 ZstdDecompressionWriter__doc, /* tp_doc */
270 ZstdDecompressionWriter__doc, /* tp_doc */
271 0, /* tp_traverse */
271 0, /* tp_traverse */
272 0, /* tp_clear */
272 0, /* tp_clear */
273 0, /* tp_richcompare */
273 0, /* tp_richcompare */
274 0, /* tp_weaklistoffset */
274 0, /* tp_weaklistoffset */
275 0, /* tp_iter */
275 0, /* tp_iter */
276 0, /* tp_iternext */
276 0, /* tp_iternext */
277 ZstdDecompressionWriter_methods,/* tp_methods */
277 ZstdDecompressionWriter_methods,/* tp_methods */
278 ZstdDecompressionWriter_members,/* tp_members */
278 ZstdDecompressionWriter_members,/* tp_members */
279 0, /* tp_getset */
279 0, /* tp_getset */
280 0, /* tp_base */
280 0, /* tp_base */
281 0, /* tp_dict */
281 0, /* tp_dict */
282 0, /* tp_descr_get */
282 0, /* tp_descr_get */
283 0, /* tp_descr_set */
283 0, /* tp_descr_set */
284 0, /* tp_dictoffset */
284 0, /* tp_dictoffset */
285 0, /* tp_init */
285 0, /* tp_init */
286 0, /* tp_alloc */
286 0, /* tp_alloc */
287 PyType_GenericNew, /* tp_new */
287 PyType_GenericNew, /* tp_new */
288 };
288 };
289
289
290 void decompressionwriter_module_init(PyObject* mod) {
290 void decompressionwriter_module_init(PyObject* mod) {
291 Py_TYPE(&ZstdDecompressionWriterType) = &PyType_Type;
291 Py_SET_TYPE(&ZstdDecompressionWriterType, &PyType_Type);
292 if (PyType_Ready(&ZstdDecompressionWriterType) < 0) {
292 if (PyType_Ready(&ZstdDecompressionWriterType) < 0) {
293 return;
293 return;
294 }
294 }
295 }
295 }
@@ -1,202 +1,202 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 PyDoc_STRVAR(DecompressionObj__doc__,
13 PyDoc_STRVAR(DecompressionObj__doc__,
14 "Perform decompression using a standard library compatible API.\n"
14 "Perform decompression using a standard library compatible API.\n"
15 );
15 );
16
16
17 static void DecompressionObj_dealloc(ZstdDecompressionObj* self) {
17 static void DecompressionObj_dealloc(ZstdDecompressionObj* self) {
18 Py_XDECREF(self->decompressor);
18 Py_XDECREF(self->decompressor);
19
19
20 PyObject_Del(self);
20 PyObject_Del(self);
21 }
21 }
22
22
23 static PyObject* DecompressionObj_decompress(ZstdDecompressionObj* self, PyObject* args, PyObject* kwargs) {
23 static PyObject* DecompressionObj_decompress(ZstdDecompressionObj* self, PyObject* args, PyObject* kwargs) {
24 static char* kwlist[] = {
24 static char* kwlist[] = {
25 "data",
25 "data",
26 NULL
26 NULL
27 };
27 };
28
28
29 Py_buffer source;
29 Py_buffer source;
30 size_t zresult;
30 size_t zresult;
31 ZSTD_inBuffer input;
31 ZSTD_inBuffer input;
32 ZSTD_outBuffer output;
32 ZSTD_outBuffer output;
33 PyObject* result = NULL;
33 PyObject* result = NULL;
34 Py_ssize_t resultSize = 0;
34 Py_ssize_t resultSize = 0;
35
35
36 output.dst = NULL;
36 output.dst = NULL;
37
37
38 if (self->finished) {
38 if (self->finished) {
39 PyErr_SetString(ZstdError, "cannot use a decompressobj multiple times");
39 PyErr_SetString(ZstdError, "cannot use a decompressobj multiple times");
40 return NULL;
40 return NULL;
41 }
41 }
42
42
43 #if PY_MAJOR_VERSION >= 3
43 #if PY_MAJOR_VERSION >= 3
44 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:decompress",
44 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:decompress",
45 #else
45 #else
46 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:decompress",
46 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:decompress",
47 #endif
47 #endif
48 kwlist, &source)) {
48 kwlist, &source)) {
49 return NULL;
49 return NULL;
50 }
50 }
51
51
52 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
52 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
53 PyErr_SetString(PyExc_ValueError,
53 PyErr_SetString(PyExc_ValueError,
54 "data buffer should be contiguous and have at most one dimension");
54 "data buffer should be contiguous and have at most one dimension");
55 goto finally;
55 goto finally;
56 }
56 }
57
57
58 /* Special case of empty input. Output will always be empty. */
58 /* Special case of empty input. Output will always be empty. */
59 if (source.len == 0) {
59 if (source.len == 0) {
60 result = PyBytes_FromString("");
60 result = PyBytes_FromString("");
61 goto finally;
61 goto finally;
62 }
62 }
63
63
64 input.src = source.buf;
64 input.src = source.buf;
65 input.size = source.len;
65 input.size = source.len;
66 input.pos = 0;
66 input.pos = 0;
67
67
68 output.dst = PyMem_Malloc(self->outSize);
68 output.dst = PyMem_Malloc(self->outSize);
69 if (!output.dst) {
69 if (!output.dst) {
70 PyErr_NoMemory();
70 PyErr_NoMemory();
71 goto except;
71 goto except;
72 }
72 }
73 output.size = self->outSize;
73 output.size = self->outSize;
74 output.pos = 0;
74 output.pos = 0;
75
75
76 while (1) {
76 while (1) {
77 Py_BEGIN_ALLOW_THREADS
77 Py_BEGIN_ALLOW_THREADS
78 zresult = ZSTD_decompressStream(self->decompressor->dctx, &output, &input);
78 zresult = ZSTD_decompressStream(self->decompressor->dctx, &output, &input);
79 Py_END_ALLOW_THREADS
79 Py_END_ALLOW_THREADS
80
80
81 if (ZSTD_isError(zresult)) {
81 if (ZSTD_isError(zresult)) {
82 PyErr_Format(ZstdError, "zstd decompressor error: %s",
82 PyErr_Format(ZstdError, "zstd decompressor error: %s",
83 ZSTD_getErrorName(zresult));
83 ZSTD_getErrorName(zresult));
84 goto except;
84 goto except;
85 }
85 }
86
86
87 if (0 == zresult) {
87 if (0 == zresult) {
88 self->finished = 1;
88 self->finished = 1;
89 }
89 }
90
90
91 if (output.pos) {
91 if (output.pos) {
92 if (result) {
92 if (result) {
93 resultSize = PyBytes_GET_SIZE(result);
93 resultSize = PyBytes_GET_SIZE(result);
94 if (-1 == safe_pybytes_resize(&result, resultSize + output.pos)) {
94 if (-1 == safe_pybytes_resize(&result, resultSize + output.pos)) {
95 Py_XDECREF(result);
95 Py_XDECREF(result);
96 goto except;
96 goto except;
97 }
97 }
98
98
99 memcpy(PyBytes_AS_STRING(result) + resultSize,
99 memcpy(PyBytes_AS_STRING(result) + resultSize,
100 output.dst, output.pos);
100 output.dst, output.pos);
101 }
101 }
102 else {
102 else {
103 result = PyBytes_FromStringAndSize(output.dst, output.pos);
103 result = PyBytes_FromStringAndSize(output.dst, output.pos);
104 if (!result) {
104 if (!result) {
105 goto except;
105 goto except;
106 }
106 }
107 }
107 }
108 }
108 }
109
109
110 if (zresult == 0 || (input.pos == input.size && output.pos == 0)) {
110 if (zresult == 0 || (input.pos == input.size && output.pos == 0)) {
111 break;
111 break;
112 }
112 }
113
113
114 output.pos = 0;
114 output.pos = 0;
115 }
115 }
116
116
117 if (!result) {
117 if (!result) {
118 result = PyBytes_FromString("");
118 result = PyBytes_FromString("");
119 }
119 }
120
120
121 goto finally;
121 goto finally;
122
122
123 except:
123 except:
124 Py_CLEAR(result);
124 Py_CLEAR(result);
125
125
126 finally:
126 finally:
127 PyMem_Free(output.dst);
127 PyMem_Free(output.dst);
128 PyBuffer_Release(&source);
128 PyBuffer_Release(&source);
129
129
130 return result;
130 return result;
131 }
131 }
132
132
133 static PyObject* DecompressionObj_flush(ZstdDecompressionObj* self, PyObject* args, PyObject* kwargs) {
133 static PyObject* DecompressionObj_flush(ZstdDecompressionObj* self, PyObject* args, PyObject* kwargs) {
134 static char* kwlist[] = {
134 static char* kwlist[] = {
135 "length",
135 "length",
136 NULL
136 NULL
137 };
137 };
138
138
139 PyObject* length = NULL;
139 PyObject* length = NULL;
140
140
141 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O:flush", kwlist, &length)) {
141 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O:flush", kwlist, &length)) {
142 return NULL;
142 return NULL;
143 }
143 }
144
144
145 Py_RETURN_NONE;
145 Py_RETURN_NONE;
146 }
146 }
147
147
148 static PyMethodDef DecompressionObj_methods[] = {
148 static PyMethodDef DecompressionObj_methods[] = {
149 { "decompress", (PyCFunction)DecompressionObj_decompress,
149 { "decompress", (PyCFunction)DecompressionObj_decompress,
150 METH_VARARGS | METH_KEYWORDS, PyDoc_STR("decompress data") },
150 METH_VARARGS | METH_KEYWORDS, PyDoc_STR("decompress data") },
151 { "flush", (PyCFunction)DecompressionObj_flush,
151 { "flush", (PyCFunction)DecompressionObj_flush,
152 METH_VARARGS | METH_KEYWORDS, PyDoc_STR("no-op") },
152 METH_VARARGS | METH_KEYWORDS, PyDoc_STR("no-op") },
153 { NULL, NULL }
153 { NULL, NULL }
154 };
154 };
155
155
156 PyTypeObject ZstdDecompressionObjType = {
156 PyTypeObject ZstdDecompressionObjType = {
157 PyVarObject_HEAD_INIT(NULL, 0)
157 PyVarObject_HEAD_INIT(NULL, 0)
158 "zstd.ZstdDecompressionObj", /* tp_name */
158 "zstd.ZstdDecompressionObj", /* tp_name */
159 sizeof(ZstdDecompressionObj), /* tp_basicsize */
159 sizeof(ZstdDecompressionObj), /* tp_basicsize */
160 0, /* tp_itemsize */
160 0, /* tp_itemsize */
161 (destructor)DecompressionObj_dealloc, /* tp_dealloc */
161 (destructor)DecompressionObj_dealloc, /* tp_dealloc */
162 0, /* tp_print */
162 0, /* tp_print */
163 0, /* tp_getattr */
163 0, /* tp_getattr */
164 0, /* tp_setattr */
164 0, /* tp_setattr */
165 0, /* tp_compare */
165 0, /* tp_compare */
166 0, /* tp_repr */
166 0, /* tp_repr */
167 0, /* tp_as_number */
167 0, /* tp_as_number */
168 0, /* tp_as_sequence */
168 0, /* tp_as_sequence */
169 0, /* tp_as_mapping */
169 0, /* tp_as_mapping */
170 0, /* tp_hash */
170 0, /* tp_hash */
171 0, /* tp_call */
171 0, /* tp_call */
172 0, /* tp_str */
172 0, /* tp_str */
173 0, /* tp_getattro */
173 0, /* tp_getattro */
174 0, /* tp_setattro */
174 0, /* tp_setattro */
175 0, /* tp_as_buffer */
175 0, /* tp_as_buffer */
176 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
176 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
177 DecompressionObj__doc__, /* tp_doc */
177 DecompressionObj__doc__, /* tp_doc */
178 0, /* tp_traverse */
178 0, /* tp_traverse */
179 0, /* tp_clear */
179 0, /* tp_clear */
180 0, /* tp_richcompare */
180 0, /* tp_richcompare */
181 0, /* tp_weaklistoffset */
181 0, /* tp_weaklistoffset */
182 0, /* tp_iter */
182 0, /* tp_iter */
183 0, /* tp_iternext */
183 0, /* tp_iternext */
184 DecompressionObj_methods, /* tp_methods */
184 DecompressionObj_methods, /* tp_methods */
185 0, /* tp_members */
185 0, /* tp_members */
186 0, /* tp_getset */
186 0, /* tp_getset */
187 0, /* tp_base */
187 0, /* tp_base */
188 0, /* tp_dict */
188 0, /* tp_dict */
189 0, /* tp_descr_get */
189 0, /* tp_descr_get */
190 0, /* tp_descr_set */
190 0, /* tp_descr_set */
191 0, /* tp_dictoffset */
191 0, /* tp_dictoffset */
192 0, /* tp_init */
192 0, /* tp_init */
193 0, /* tp_alloc */
193 0, /* tp_alloc */
194 PyType_GenericNew, /* tp_new */
194 PyType_GenericNew, /* tp_new */
195 };
195 };
196
196
197 void decompressobj_module_init(PyObject* module) {
197 void decompressobj_module_init(PyObject* module) {
198 Py_TYPE(&ZstdDecompressionObjType) = &PyType_Type;
198 Py_SET_TYPE(&ZstdDecompressionObjType, &PyType_Type);
199 if (PyType_Ready(&ZstdDecompressionObjType) < 0) {
199 if (PyType_Ready(&ZstdDecompressionObjType) < 0) {
200 return;
200 return;
201 }
201 }
202 }
202 }
@@ -1,1822 +1,1822 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10 #include "pool.h"
10 #include "pool.h"
11
11
12 extern PyObject* ZstdError;
12 extern PyObject* ZstdError;
13
13
14 /**
14 /**
15 * Ensure the ZSTD_DCtx on a decompressor is initiated and ready for a new operation.
15 * Ensure the ZSTD_DCtx on a decompressor is initiated and ready for a new operation.
16 */
16 */
17 int ensure_dctx(ZstdDecompressor* decompressor, int loadDict) {
17 int ensure_dctx(ZstdDecompressor* decompressor, int loadDict) {
18 size_t zresult;
18 size_t zresult;
19
19
20 ZSTD_DCtx_reset(decompressor->dctx, ZSTD_reset_session_only);
20 ZSTD_DCtx_reset(decompressor->dctx, ZSTD_reset_session_only);
21
21
22 if (decompressor->maxWindowSize) {
22 if (decompressor->maxWindowSize) {
23 zresult = ZSTD_DCtx_setMaxWindowSize(decompressor->dctx, decompressor->maxWindowSize);
23 zresult = ZSTD_DCtx_setMaxWindowSize(decompressor->dctx, decompressor->maxWindowSize);
24 if (ZSTD_isError(zresult)) {
24 if (ZSTD_isError(zresult)) {
25 PyErr_Format(ZstdError, "unable to set max window size: %s",
25 PyErr_Format(ZstdError, "unable to set max window size: %s",
26 ZSTD_getErrorName(zresult));
26 ZSTD_getErrorName(zresult));
27 return 1;
27 return 1;
28 }
28 }
29 }
29 }
30
30
31 zresult = ZSTD_DCtx_setFormat(decompressor->dctx, decompressor->format);
31 zresult = ZSTD_DCtx_setFormat(decompressor->dctx, decompressor->format);
32 if (ZSTD_isError(zresult)) {
32 if (ZSTD_isError(zresult)) {
33 PyErr_Format(ZstdError, "unable to set decoding format: %s",
33 PyErr_Format(ZstdError, "unable to set decoding format: %s",
34 ZSTD_getErrorName(zresult));
34 ZSTD_getErrorName(zresult));
35 return 1;
35 return 1;
36 }
36 }
37
37
38 if (loadDict && decompressor->dict) {
38 if (loadDict && decompressor->dict) {
39 if (ensure_ddict(decompressor->dict)) {
39 if (ensure_ddict(decompressor->dict)) {
40 return 1;
40 return 1;
41 }
41 }
42
42
43 zresult = ZSTD_DCtx_refDDict(decompressor->dctx, decompressor->dict->ddict);
43 zresult = ZSTD_DCtx_refDDict(decompressor->dctx, decompressor->dict->ddict);
44 if (ZSTD_isError(zresult)) {
44 if (ZSTD_isError(zresult)) {
45 PyErr_Format(ZstdError, "unable to reference prepared dictionary: %s",
45 PyErr_Format(ZstdError, "unable to reference prepared dictionary: %s",
46 ZSTD_getErrorName(zresult));
46 ZSTD_getErrorName(zresult));
47 return 1;
47 return 1;
48 }
48 }
49 }
49 }
50
50
51 return 0;
51 return 0;
52 }
52 }
53
53
54 PyDoc_STRVAR(Decompressor__doc__,
54 PyDoc_STRVAR(Decompressor__doc__,
55 "ZstdDecompressor(dict_data=None)\n"
55 "ZstdDecompressor(dict_data=None)\n"
56 "\n"
56 "\n"
57 "Create an object used to perform Zstandard decompression.\n"
57 "Create an object used to perform Zstandard decompression.\n"
58 "\n"
58 "\n"
59 "An instance can perform multiple decompression operations."
59 "An instance can perform multiple decompression operations."
60 );
60 );
61
61
62 static int Decompressor_init(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
62 static int Decompressor_init(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
63 static char* kwlist[] = {
63 static char* kwlist[] = {
64 "dict_data",
64 "dict_data",
65 "max_window_size",
65 "max_window_size",
66 "format",
66 "format",
67 NULL
67 NULL
68 };
68 };
69
69
70 ZstdCompressionDict* dict = NULL;
70 ZstdCompressionDict* dict = NULL;
71 Py_ssize_t maxWindowSize = 0;
71 Py_ssize_t maxWindowSize = 0;
72 ZSTD_format_e format = ZSTD_f_zstd1;
72 ZSTD_format_e format = ZSTD_f_zstd1;
73
73
74 self->dctx = NULL;
74 self->dctx = NULL;
75 self->dict = NULL;
75 self->dict = NULL;
76
76
77 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!nI:ZstdDecompressor", kwlist,
77 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!nI:ZstdDecompressor", kwlist,
78 &ZstdCompressionDictType, &dict, &maxWindowSize, &format)) {
78 &ZstdCompressionDictType, &dict, &maxWindowSize, &format)) {
79 return -1;
79 return -1;
80 }
80 }
81
81
82 self->dctx = ZSTD_createDCtx();
82 self->dctx = ZSTD_createDCtx();
83 if (!self->dctx) {
83 if (!self->dctx) {
84 PyErr_NoMemory();
84 PyErr_NoMemory();
85 goto except;
85 goto except;
86 }
86 }
87
87
88 self->maxWindowSize = maxWindowSize;
88 self->maxWindowSize = maxWindowSize;
89 self->format = format;
89 self->format = format;
90
90
91 if (dict) {
91 if (dict) {
92 self->dict = dict;
92 self->dict = dict;
93 Py_INCREF(dict);
93 Py_INCREF(dict);
94 }
94 }
95
95
96 if (ensure_dctx(self, 1)) {
96 if (ensure_dctx(self, 1)) {
97 goto except;
97 goto except;
98 }
98 }
99
99
100 return 0;
100 return 0;
101
101
102 except:
102 except:
103 Py_CLEAR(self->dict);
103 Py_CLEAR(self->dict);
104
104
105 if (self->dctx) {
105 if (self->dctx) {
106 ZSTD_freeDCtx(self->dctx);
106 ZSTD_freeDCtx(self->dctx);
107 self->dctx = NULL;
107 self->dctx = NULL;
108 }
108 }
109
109
110 return -1;
110 return -1;
111 }
111 }
112
112
113 static void Decompressor_dealloc(ZstdDecompressor* self) {
113 static void Decompressor_dealloc(ZstdDecompressor* self) {
114 Py_CLEAR(self->dict);
114 Py_CLEAR(self->dict);
115
115
116 if (self->dctx) {
116 if (self->dctx) {
117 ZSTD_freeDCtx(self->dctx);
117 ZSTD_freeDCtx(self->dctx);
118 self->dctx = NULL;
118 self->dctx = NULL;
119 }
119 }
120
120
121 PyObject_Del(self);
121 PyObject_Del(self);
122 }
122 }
123
123
124 PyDoc_STRVAR(Decompressor_memory_size__doc__,
124 PyDoc_STRVAR(Decompressor_memory_size__doc__,
125 "memory_size() -- Size of decompression context, in bytes\n"
125 "memory_size() -- Size of decompression context, in bytes\n"
126 );
126 );
127
127
128 static PyObject* Decompressor_memory_size(ZstdDecompressor* self) {
128 static PyObject* Decompressor_memory_size(ZstdDecompressor* self) {
129 if (self->dctx) {
129 if (self->dctx) {
130 return PyLong_FromSize_t(ZSTD_sizeof_DCtx(self->dctx));
130 return PyLong_FromSize_t(ZSTD_sizeof_DCtx(self->dctx));
131 }
131 }
132 else {
132 else {
133 PyErr_SetString(ZstdError, "no decompressor context found; this should never happen");
133 PyErr_SetString(ZstdError, "no decompressor context found; this should never happen");
134 return NULL;
134 return NULL;
135 }
135 }
136 }
136 }
137
137
138 PyDoc_STRVAR(Decompressor_copy_stream__doc__,
138 PyDoc_STRVAR(Decompressor_copy_stream__doc__,
139 "copy_stream(ifh, ofh[, read_size=default, write_size=default]) -- decompress data between streams\n"
139 "copy_stream(ifh, ofh[, read_size=default, write_size=default]) -- decompress data between streams\n"
140 "\n"
140 "\n"
141 "Compressed data will be read from ``ifh``, decompressed, and written to\n"
141 "Compressed data will be read from ``ifh``, decompressed, and written to\n"
142 "``ofh``. ``ifh`` must have a ``read(size)`` method. ``ofh`` must have a\n"
142 "``ofh``. ``ifh`` must have a ``read(size)`` method. ``ofh`` must have a\n"
143 "``write(data)`` method.\n"
143 "``write(data)`` method.\n"
144 "\n"
144 "\n"
145 "The optional ``read_size`` and ``write_size`` arguments control the chunk\n"
145 "The optional ``read_size`` and ``write_size`` arguments control the chunk\n"
146 "size of data that is ``read()`` and ``write()`` between streams. They default\n"
146 "size of data that is ``read()`` and ``write()`` between streams. They default\n"
147 "to the default input and output sizes of zstd decompressor streams.\n"
147 "to the default input and output sizes of zstd decompressor streams.\n"
148 );
148 );
149
149
150 static PyObject* Decompressor_copy_stream(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
150 static PyObject* Decompressor_copy_stream(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
151 static char* kwlist[] = {
151 static char* kwlist[] = {
152 "ifh",
152 "ifh",
153 "ofh",
153 "ofh",
154 "read_size",
154 "read_size",
155 "write_size",
155 "write_size",
156 NULL
156 NULL
157 };
157 };
158
158
159 PyObject* source;
159 PyObject* source;
160 PyObject* dest;
160 PyObject* dest;
161 size_t inSize = ZSTD_DStreamInSize();
161 size_t inSize = ZSTD_DStreamInSize();
162 size_t outSize = ZSTD_DStreamOutSize();
162 size_t outSize = ZSTD_DStreamOutSize();
163 ZSTD_inBuffer input;
163 ZSTD_inBuffer input;
164 ZSTD_outBuffer output;
164 ZSTD_outBuffer output;
165 Py_ssize_t totalRead = 0;
165 Py_ssize_t totalRead = 0;
166 Py_ssize_t totalWrite = 0;
166 Py_ssize_t totalWrite = 0;
167 char* readBuffer;
167 char* readBuffer;
168 Py_ssize_t readSize;
168 Py_ssize_t readSize;
169 PyObject* readResult = NULL;
169 PyObject* readResult = NULL;
170 PyObject* res = NULL;
170 PyObject* res = NULL;
171 size_t zresult = 0;
171 size_t zresult = 0;
172 PyObject* writeResult;
172 PyObject* writeResult;
173 PyObject* totalReadPy;
173 PyObject* totalReadPy;
174 PyObject* totalWritePy;
174 PyObject* totalWritePy;
175
175
176 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk:copy_stream", kwlist,
176 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|kk:copy_stream", kwlist,
177 &source, &dest, &inSize, &outSize)) {
177 &source, &dest, &inSize, &outSize)) {
178 return NULL;
178 return NULL;
179 }
179 }
180
180
181 if (!PyObject_HasAttrString(source, "read")) {
181 if (!PyObject_HasAttrString(source, "read")) {
182 PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
182 PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
183 return NULL;
183 return NULL;
184 }
184 }
185
185
186 if (!PyObject_HasAttrString(dest, "write")) {
186 if (!PyObject_HasAttrString(dest, "write")) {
187 PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
187 PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
188 return NULL;
188 return NULL;
189 }
189 }
190
190
191 /* Prevent free on uninitialized memory in finally. */
191 /* Prevent free on uninitialized memory in finally. */
192 output.dst = NULL;
192 output.dst = NULL;
193
193
194 if (ensure_dctx(self, 1)) {
194 if (ensure_dctx(self, 1)) {
195 res = NULL;
195 res = NULL;
196 goto finally;
196 goto finally;
197 }
197 }
198
198
199 output.dst = PyMem_Malloc(outSize);
199 output.dst = PyMem_Malloc(outSize);
200 if (!output.dst) {
200 if (!output.dst) {
201 PyErr_NoMemory();
201 PyErr_NoMemory();
202 res = NULL;
202 res = NULL;
203 goto finally;
203 goto finally;
204 }
204 }
205 output.size = outSize;
205 output.size = outSize;
206 output.pos = 0;
206 output.pos = 0;
207
207
208 /* Read source stream until EOF */
208 /* Read source stream until EOF */
209 while (1) {
209 while (1) {
210 readResult = PyObject_CallMethod(source, "read", "n", inSize);
210 readResult = PyObject_CallMethod(source, "read", "n", inSize);
211 if (!readResult) {
211 if (!readResult) {
212 PyErr_SetString(ZstdError, "could not read() from source");
212 PyErr_SetString(ZstdError, "could not read() from source");
213 goto finally;
213 goto finally;
214 }
214 }
215
215
216 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
216 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
217
217
218 /* If no data was read, we're at EOF. */
218 /* If no data was read, we're at EOF. */
219 if (0 == readSize) {
219 if (0 == readSize) {
220 break;
220 break;
221 }
221 }
222
222
223 totalRead += readSize;
223 totalRead += readSize;
224
224
225 /* Send data to decompressor */
225 /* Send data to decompressor */
226 input.src = readBuffer;
226 input.src = readBuffer;
227 input.size = readSize;
227 input.size = readSize;
228 input.pos = 0;
228 input.pos = 0;
229
229
230 while (input.pos < input.size) {
230 while (input.pos < input.size) {
231 Py_BEGIN_ALLOW_THREADS
231 Py_BEGIN_ALLOW_THREADS
232 zresult = ZSTD_decompressStream(self->dctx, &output, &input);
232 zresult = ZSTD_decompressStream(self->dctx, &output, &input);
233 Py_END_ALLOW_THREADS
233 Py_END_ALLOW_THREADS
234
234
235 if (ZSTD_isError(zresult)) {
235 if (ZSTD_isError(zresult)) {
236 PyErr_Format(ZstdError, "zstd decompressor error: %s",
236 PyErr_Format(ZstdError, "zstd decompressor error: %s",
237 ZSTD_getErrorName(zresult));
237 ZSTD_getErrorName(zresult));
238 res = NULL;
238 res = NULL;
239 goto finally;
239 goto finally;
240 }
240 }
241
241
242 if (output.pos) {
242 if (output.pos) {
243 #if PY_MAJOR_VERSION >= 3
243 #if PY_MAJOR_VERSION >= 3
244 writeResult = PyObject_CallMethod(dest, "write", "y#",
244 writeResult = PyObject_CallMethod(dest, "write", "y#",
245 #else
245 #else
246 writeResult = PyObject_CallMethod(dest, "write", "s#",
246 writeResult = PyObject_CallMethod(dest, "write", "s#",
247 #endif
247 #endif
248 output.dst, output.pos);
248 output.dst, output.pos);
249
249
250 Py_XDECREF(writeResult);
250 Py_XDECREF(writeResult);
251 totalWrite += output.pos;
251 totalWrite += output.pos;
252 output.pos = 0;
252 output.pos = 0;
253 }
253 }
254 }
254 }
255
255
256 Py_CLEAR(readResult);
256 Py_CLEAR(readResult);
257 }
257 }
258
258
259 /* Source stream is exhausted. Finish up. */
259 /* Source stream is exhausted. Finish up. */
260
260
261 totalReadPy = PyLong_FromSsize_t(totalRead);
261 totalReadPy = PyLong_FromSsize_t(totalRead);
262 totalWritePy = PyLong_FromSsize_t(totalWrite);
262 totalWritePy = PyLong_FromSsize_t(totalWrite);
263 res = PyTuple_Pack(2, totalReadPy, totalWritePy);
263 res = PyTuple_Pack(2, totalReadPy, totalWritePy);
264 Py_DECREF(totalReadPy);
264 Py_DECREF(totalReadPy);
265 Py_DECREF(totalWritePy);
265 Py_DECREF(totalWritePy);
266
266
267 finally:
267 finally:
268 if (output.dst) {
268 if (output.dst) {
269 PyMem_Free(output.dst);
269 PyMem_Free(output.dst);
270 }
270 }
271
271
272 Py_XDECREF(readResult);
272 Py_XDECREF(readResult);
273
273
274 return res;
274 return res;
275 }
275 }
276
276
277 PyDoc_STRVAR(Decompressor_decompress__doc__,
277 PyDoc_STRVAR(Decompressor_decompress__doc__,
278 "decompress(data[, max_output_size=None]) -- Decompress data in its entirety\n"
278 "decompress(data[, max_output_size=None]) -- Decompress data in its entirety\n"
279 "\n"
279 "\n"
280 "This method will decompress the entirety of the argument and return the\n"
280 "This method will decompress the entirety of the argument and return the\n"
281 "result.\n"
281 "result.\n"
282 "\n"
282 "\n"
283 "The input bytes are expected to contain a full Zstandard frame (something\n"
283 "The input bytes are expected to contain a full Zstandard frame (something\n"
284 "compressed with ``ZstdCompressor.compress()`` or similar). If the input does\n"
284 "compressed with ``ZstdCompressor.compress()`` or similar). If the input does\n"
285 "not contain a full frame, an exception will be raised.\n"
285 "not contain a full frame, an exception will be raised.\n"
286 "\n"
286 "\n"
287 "If the frame header of the compressed data does not contain the content size\n"
287 "If the frame header of the compressed data does not contain the content size\n"
288 "``max_output_size`` must be specified or ``ZstdError`` will be raised. An\n"
288 "``max_output_size`` must be specified or ``ZstdError`` will be raised. An\n"
289 "allocation of size ``max_output_size`` will be performed and an attempt will\n"
289 "allocation of size ``max_output_size`` will be performed and an attempt will\n"
290 "be made to perform decompression into that buffer. If the buffer is too\n"
290 "be made to perform decompression into that buffer. If the buffer is too\n"
291 "small or cannot be allocated, ``ZstdError`` will be raised. The buffer will\n"
291 "small or cannot be allocated, ``ZstdError`` will be raised. The buffer will\n"
292 "be resized if it is too large.\n"
292 "be resized if it is too large.\n"
293 "\n"
293 "\n"
294 "Uncompressed data could be much larger than compressed data. As a result,\n"
294 "Uncompressed data could be much larger than compressed data. As a result,\n"
295 "calling this function could result in a very large memory allocation being\n"
295 "calling this function could result in a very large memory allocation being\n"
296 "performed to hold the uncompressed data. Therefore it is **highly**\n"
296 "performed to hold the uncompressed data. Therefore it is **highly**\n"
297 "recommended to use a streaming decompression method instead of this one.\n"
297 "recommended to use a streaming decompression method instead of this one.\n"
298 );
298 );
299
299
300 PyObject* Decompressor_decompress(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
300 PyObject* Decompressor_decompress(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
301 static char* kwlist[] = {
301 static char* kwlist[] = {
302 "data",
302 "data",
303 "max_output_size",
303 "max_output_size",
304 NULL
304 NULL
305 };
305 };
306
306
307 Py_buffer source;
307 Py_buffer source;
308 Py_ssize_t maxOutputSize = 0;
308 Py_ssize_t maxOutputSize = 0;
309 unsigned long long decompressedSize;
309 unsigned long long decompressedSize;
310 size_t destCapacity;
310 size_t destCapacity;
311 PyObject* result = NULL;
311 PyObject* result = NULL;
312 size_t zresult;
312 size_t zresult;
313 ZSTD_outBuffer outBuffer;
313 ZSTD_outBuffer outBuffer;
314 ZSTD_inBuffer inBuffer;
314 ZSTD_inBuffer inBuffer;
315
315
316 #if PY_MAJOR_VERSION >= 3
316 #if PY_MAJOR_VERSION >= 3
317 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|n:decompress",
317 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|n:decompress",
318 #else
318 #else
319 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|n:decompress",
319 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|n:decompress",
320 #endif
320 #endif
321 kwlist, &source, &maxOutputSize)) {
321 kwlist, &source, &maxOutputSize)) {
322 return NULL;
322 return NULL;
323 }
323 }
324
324
325 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
325 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
326 PyErr_SetString(PyExc_ValueError,
326 PyErr_SetString(PyExc_ValueError,
327 "data buffer should be contiguous and have at most one dimension");
327 "data buffer should be contiguous and have at most one dimension");
328 goto finally;
328 goto finally;
329 }
329 }
330
330
331 if (ensure_dctx(self, 1)) {
331 if (ensure_dctx(self, 1)) {
332 goto finally;
332 goto finally;
333 }
333 }
334
334
335 decompressedSize = ZSTD_getFrameContentSize(source.buf, source.len);
335 decompressedSize = ZSTD_getFrameContentSize(source.buf, source.len);
336
336
337 if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) {
337 if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) {
338 PyErr_SetString(ZstdError, "error determining content size from frame header");
338 PyErr_SetString(ZstdError, "error determining content size from frame header");
339 goto finally;
339 goto finally;
340 }
340 }
341 /* Special case of empty frame. */
341 /* Special case of empty frame. */
342 else if (0 == decompressedSize) {
342 else if (0 == decompressedSize) {
343 result = PyBytes_FromStringAndSize("", 0);
343 result = PyBytes_FromStringAndSize("", 0);
344 goto finally;
344 goto finally;
345 }
345 }
346 /* Missing content size in frame header. */
346 /* Missing content size in frame header. */
347 if (ZSTD_CONTENTSIZE_UNKNOWN == decompressedSize) {
347 if (ZSTD_CONTENTSIZE_UNKNOWN == decompressedSize) {
348 if (0 == maxOutputSize) {
348 if (0 == maxOutputSize) {
349 PyErr_SetString(ZstdError, "could not determine content size in frame header");
349 PyErr_SetString(ZstdError, "could not determine content size in frame header");
350 goto finally;
350 goto finally;
351 }
351 }
352
352
353 result = PyBytes_FromStringAndSize(NULL, maxOutputSize);
353 result = PyBytes_FromStringAndSize(NULL, maxOutputSize);
354 destCapacity = maxOutputSize;
354 destCapacity = maxOutputSize;
355 decompressedSize = 0;
355 decompressedSize = 0;
356 }
356 }
357 /* Size is recorded in frame header. */
357 /* Size is recorded in frame header. */
358 else {
358 else {
359 assert(SIZE_MAX >= PY_SSIZE_T_MAX);
359 assert(SIZE_MAX >= PY_SSIZE_T_MAX);
360 if (decompressedSize > PY_SSIZE_T_MAX) {
360 if (decompressedSize > PY_SSIZE_T_MAX) {
361 PyErr_SetString(ZstdError, "frame is too large to decompress on this platform");
361 PyErr_SetString(ZstdError, "frame is too large to decompress on this platform");
362 goto finally;
362 goto finally;
363 }
363 }
364
364
365 result = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)decompressedSize);
365 result = PyBytes_FromStringAndSize(NULL, (Py_ssize_t)decompressedSize);
366 destCapacity = (size_t)decompressedSize;
366 destCapacity = (size_t)decompressedSize;
367 }
367 }
368
368
369 if (!result) {
369 if (!result) {
370 goto finally;
370 goto finally;
371 }
371 }
372
372
373 outBuffer.dst = PyBytes_AsString(result);
373 outBuffer.dst = PyBytes_AsString(result);
374 outBuffer.size = destCapacity;
374 outBuffer.size = destCapacity;
375 outBuffer.pos = 0;
375 outBuffer.pos = 0;
376
376
377 inBuffer.src = source.buf;
377 inBuffer.src = source.buf;
378 inBuffer.size = source.len;
378 inBuffer.size = source.len;
379 inBuffer.pos = 0;
379 inBuffer.pos = 0;
380
380
381 Py_BEGIN_ALLOW_THREADS
381 Py_BEGIN_ALLOW_THREADS
382 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
382 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
383 Py_END_ALLOW_THREADS
383 Py_END_ALLOW_THREADS
384
384
385 if (ZSTD_isError(zresult)) {
385 if (ZSTD_isError(zresult)) {
386 PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult));
386 PyErr_Format(ZstdError, "decompression error: %s", ZSTD_getErrorName(zresult));
387 Py_CLEAR(result);
387 Py_CLEAR(result);
388 goto finally;
388 goto finally;
389 }
389 }
390 else if (zresult) {
390 else if (zresult) {
391 PyErr_Format(ZstdError, "decompression error: did not decompress full frame");
391 PyErr_Format(ZstdError, "decompression error: did not decompress full frame");
392 Py_CLEAR(result);
392 Py_CLEAR(result);
393 goto finally;
393 goto finally;
394 }
394 }
395 else if (decompressedSize && outBuffer.pos != decompressedSize) {
395 else if (decompressedSize && outBuffer.pos != decompressedSize) {
396 PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu",
396 PyErr_Format(ZstdError, "decompression error: decompressed %zu bytes; expected %llu",
397 zresult, decompressedSize);
397 zresult, decompressedSize);
398 Py_CLEAR(result);
398 Py_CLEAR(result);
399 goto finally;
399 goto finally;
400 }
400 }
401 else if (outBuffer.pos < destCapacity) {
401 else if (outBuffer.pos < destCapacity) {
402 if (safe_pybytes_resize(&result, outBuffer.pos)) {
402 if (safe_pybytes_resize(&result, outBuffer.pos)) {
403 Py_CLEAR(result);
403 Py_CLEAR(result);
404 goto finally;
404 goto finally;
405 }
405 }
406 }
406 }
407
407
408 finally:
408 finally:
409 PyBuffer_Release(&source);
409 PyBuffer_Release(&source);
410 return result;
410 return result;
411 }
411 }
412
412
413 PyDoc_STRVAR(Decompressor_decompressobj__doc__,
413 PyDoc_STRVAR(Decompressor_decompressobj__doc__,
414 "decompressobj([write_size=default])\n"
414 "decompressobj([write_size=default])\n"
415 "\n"
415 "\n"
416 "Incrementally feed data into a decompressor.\n"
416 "Incrementally feed data into a decompressor.\n"
417 "\n"
417 "\n"
418 "The returned object exposes a ``decompress(data)`` method. This makes it\n"
418 "The returned object exposes a ``decompress(data)`` method. This makes it\n"
419 "compatible with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor`` so that\n"
419 "compatible with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor`` so that\n"
420 "callers can swap in the zstd decompressor while using the same API.\n"
420 "callers can swap in the zstd decompressor while using the same API.\n"
421 );
421 );
422
422
423 static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
423 static ZstdDecompressionObj* Decompressor_decompressobj(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
424 static char* kwlist[] = {
424 static char* kwlist[] = {
425 "write_size",
425 "write_size",
426 NULL
426 NULL
427 };
427 };
428
428
429 ZstdDecompressionObj* result = NULL;
429 ZstdDecompressionObj* result = NULL;
430 size_t outSize = ZSTD_DStreamOutSize();
430 size_t outSize = ZSTD_DStreamOutSize();
431
431
432 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|k:decompressobj", kwlist, &outSize)) {
432 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|k:decompressobj", kwlist, &outSize)) {
433 return NULL;
433 return NULL;
434 }
434 }
435
435
436 if (!outSize) {
436 if (!outSize) {
437 PyErr_SetString(PyExc_ValueError, "write_size must be positive");
437 PyErr_SetString(PyExc_ValueError, "write_size must be positive");
438 return NULL;
438 return NULL;
439 }
439 }
440
440
441 result = (ZstdDecompressionObj*)PyObject_CallObject((PyObject*)&ZstdDecompressionObjType, NULL);
441 result = (ZstdDecompressionObj*)PyObject_CallObject((PyObject*)&ZstdDecompressionObjType, NULL);
442 if (!result) {
442 if (!result) {
443 return NULL;
443 return NULL;
444 }
444 }
445
445
446 if (ensure_dctx(self, 1)) {
446 if (ensure_dctx(self, 1)) {
447 Py_DECREF(result);
447 Py_DECREF(result);
448 return NULL;
448 return NULL;
449 }
449 }
450
450
451 result->decompressor = self;
451 result->decompressor = self;
452 Py_INCREF(result->decompressor);
452 Py_INCREF(result->decompressor);
453 result->outSize = outSize;
453 result->outSize = outSize;
454
454
455 return result;
455 return result;
456 }
456 }
457
457
458 PyDoc_STRVAR(Decompressor_read_to_iter__doc__,
458 PyDoc_STRVAR(Decompressor_read_to_iter__doc__,
459 "read_to_iter(reader[, read_size=default, write_size=default, skip_bytes=0])\n"
459 "read_to_iter(reader[, read_size=default, write_size=default, skip_bytes=0])\n"
460 "Read compressed data and return an iterator\n"
460 "Read compressed data and return an iterator\n"
461 "\n"
461 "\n"
462 "Returns an iterator of decompressed data chunks produced from reading from\n"
462 "Returns an iterator of decompressed data chunks produced from reading from\n"
463 "the ``reader``.\n"
463 "the ``reader``.\n"
464 "\n"
464 "\n"
465 "Compressed data will be obtained from ``reader`` by calling the\n"
465 "Compressed data will be obtained from ``reader`` by calling the\n"
466 "``read(size)`` method of it. The source data will be streamed into a\n"
466 "``read(size)`` method of it. The source data will be streamed into a\n"
467 "decompressor. As decompressed data is available, it will be exposed to the\n"
467 "decompressor. As decompressed data is available, it will be exposed to the\n"
468 "returned iterator.\n"
468 "returned iterator.\n"
469 "\n"
469 "\n"
470 "Data is ``read()`` in chunks of size ``read_size`` and exposed to the\n"
470 "Data is ``read()`` in chunks of size ``read_size`` and exposed to the\n"
471 "iterator in chunks of size ``write_size``. The default values are the input\n"
471 "iterator in chunks of size ``write_size``. The default values are the input\n"
472 "and output sizes for a zstd streaming decompressor.\n"
472 "and output sizes for a zstd streaming decompressor.\n"
473 "\n"
473 "\n"
474 "There is also support for skipping the first ``skip_bytes`` of data from\n"
474 "There is also support for skipping the first ``skip_bytes`` of data from\n"
475 "the source.\n"
475 "the source.\n"
476 );
476 );
477
477
478 static ZstdDecompressorIterator* Decompressor_read_to_iter(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
478 static ZstdDecompressorIterator* Decompressor_read_to_iter(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
479 static char* kwlist[] = {
479 static char* kwlist[] = {
480 "reader",
480 "reader",
481 "read_size",
481 "read_size",
482 "write_size",
482 "write_size",
483 "skip_bytes",
483 "skip_bytes",
484 NULL
484 NULL
485 };
485 };
486
486
487 PyObject* reader;
487 PyObject* reader;
488 size_t inSize = ZSTD_DStreamInSize();
488 size_t inSize = ZSTD_DStreamInSize();
489 size_t outSize = ZSTD_DStreamOutSize();
489 size_t outSize = ZSTD_DStreamOutSize();
490 ZstdDecompressorIterator* result;
490 ZstdDecompressorIterator* result;
491 size_t skipBytes = 0;
491 size_t skipBytes = 0;
492
492
493 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_to_iter", kwlist,
493 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kkk:read_to_iter", kwlist,
494 &reader, &inSize, &outSize, &skipBytes)) {
494 &reader, &inSize, &outSize, &skipBytes)) {
495 return NULL;
495 return NULL;
496 }
496 }
497
497
498 if (skipBytes >= inSize) {
498 if (skipBytes >= inSize) {
499 PyErr_SetString(PyExc_ValueError,
499 PyErr_SetString(PyExc_ValueError,
500 "skip_bytes must be smaller than read_size");
500 "skip_bytes must be smaller than read_size");
501 return NULL;
501 return NULL;
502 }
502 }
503
503
504 result = (ZstdDecompressorIterator*)PyObject_CallObject((PyObject*)&ZstdDecompressorIteratorType, NULL);
504 result = (ZstdDecompressorIterator*)PyObject_CallObject((PyObject*)&ZstdDecompressorIteratorType, NULL);
505 if (!result) {
505 if (!result) {
506 return NULL;
506 return NULL;
507 }
507 }
508
508
509 if (PyObject_HasAttrString(reader, "read")) {
509 if (PyObject_HasAttrString(reader, "read")) {
510 result->reader = reader;
510 result->reader = reader;
511 Py_INCREF(result->reader);
511 Py_INCREF(result->reader);
512 }
512 }
513 else if (1 == PyObject_CheckBuffer(reader)) {
513 else if (1 == PyObject_CheckBuffer(reader)) {
514 /* Object claims it is a buffer. Try to get a handle to it. */
514 /* Object claims it is a buffer. Try to get a handle to it. */
515 if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) {
515 if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) {
516 goto except;
516 goto except;
517 }
517 }
518 }
518 }
519 else {
519 else {
520 PyErr_SetString(PyExc_ValueError,
520 PyErr_SetString(PyExc_ValueError,
521 "must pass an object with a read() method or conforms to buffer protocol");
521 "must pass an object with a read() method or conforms to buffer protocol");
522 goto except;
522 goto except;
523 }
523 }
524
524
525 result->decompressor = self;
525 result->decompressor = self;
526 Py_INCREF(result->decompressor);
526 Py_INCREF(result->decompressor);
527
527
528 result->inSize = inSize;
528 result->inSize = inSize;
529 result->outSize = outSize;
529 result->outSize = outSize;
530 result->skipBytes = skipBytes;
530 result->skipBytes = skipBytes;
531
531
532 if (ensure_dctx(self, 1)) {
532 if (ensure_dctx(self, 1)) {
533 goto except;
533 goto except;
534 }
534 }
535
535
536 result->input.src = PyMem_Malloc(inSize);
536 result->input.src = PyMem_Malloc(inSize);
537 if (!result->input.src) {
537 if (!result->input.src) {
538 PyErr_NoMemory();
538 PyErr_NoMemory();
539 goto except;
539 goto except;
540 }
540 }
541
541
542 goto finally;
542 goto finally;
543
543
544 except:
544 except:
545 Py_CLEAR(result);
545 Py_CLEAR(result);
546
546
547 finally:
547 finally:
548
548
549 return result;
549 return result;
550 }
550 }
551
551
552 PyDoc_STRVAR(Decompressor_stream_reader__doc__,
552 PyDoc_STRVAR(Decompressor_stream_reader__doc__,
553 "stream_reader(source, [read_size=default, [read_across_frames=False]])\n"
553 "stream_reader(source, [read_size=default, [read_across_frames=False]])\n"
554 "\n"
554 "\n"
555 "Obtain an object that behaves like an I/O stream that can be used for\n"
555 "Obtain an object that behaves like an I/O stream that can be used for\n"
556 "reading decompressed output from an object.\n"
556 "reading decompressed output from an object.\n"
557 "\n"
557 "\n"
558 "The source object can be any object with a ``read(size)`` method or that\n"
558 "The source object can be any object with a ``read(size)`` method or that\n"
559 "conforms to the buffer protocol.\n"
559 "conforms to the buffer protocol.\n"
560 "\n"
560 "\n"
561 "``read_across_frames`` controls the behavior of ``read()`` when the end\n"
561 "``read_across_frames`` controls the behavior of ``read()`` when the end\n"
562 "of a zstd frame is reached. When ``True``, ``read()`` can potentially\n"
562 "of a zstd frame is reached. When ``True``, ``read()`` can potentially\n"
563 "return data belonging to multiple zstd frames. When ``False``, ``read()``\n"
563 "return data belonging to multiple zstd frames. When ``False``, ``read()``\n"
564 "will return when the end of a frame is reached.\n"
564 "will return when the end of a frame is reached.\n"
565 );
565 );
566
566
567 static ZstdDecompressionReader* Decompressor_stream_reader(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
567 static ZstdDecompressionReader* Decompressor_stream_reader(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
568 static char* kwlist[] = {
568 static char* kwlist[] = {
569 "source",
569 "source",
570 "read_size",
570 "read_size",
571 "read_across_frames",
571 "read_across_frames",
572 NULL
572 NULL
573 };
573 };
574
574
575 PyObject* source;
575 PyObject* source;
576 size_t readSize = ZSTD_DStreamInSize();
576 size_t readSize = ZSTD_DStreamInSize();
577 PyObject* readAcrossFrames = NULL;
577 PyObject* readAcrossFrames = NULL;
578 ZstdDecompressionReader* result;
578 ZstdDecompressionReader* result;
579
579
580 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kO:stream_reader", kwlist,
580 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kO:stream_reader", kwlist,
581 &source, &readSize, &readAcrossFrames)) {
581 &source, &readSize, &readAcrossFrames)) {
582 return NULL;
582 return NULL;
583 }
583 }
584
584
585 if (ensure_dctx(self, 1)) {
585 if (ensure_dctx(self, 1)) {
586 return NULL;
586 return NULL;
587 }
587 }
588
588
589 result = (ZstdDecompressionReader*)PyObject_CallObject((PyObject*)&ZstdDecompressionReaderType, NULL);
589 result = (ZstdDecompressionReader*)PyObject_CallObject((PyObject*)&ZstdDecompressionReaderType, NULL);
590 if (NULL == result) {
590 if (NULL == result) {
591 return NULL;
591 return NULL;
592 }
592 }
593
593
594 if (PyObject_HasAttrString(source, "read")) {
594 if (PyObject_HasAttrString(source, "read")) {
595 result->reader = source;
595 result->reader = source;
596 Py_INCREF(source);
596 Py_INCREF(source);
597 result->readSize = readSize;
597 result->readSize = readSize;
598 }
598 }
599 else if (1 == PyObject_CheckBuffer(source)) {
599 else if (1 == PyObject_CheckBuffer(source)) {
600 if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) {
600 if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) {
601 Py_CLEAR(result);
601 Py_CLEAR(result);
602 return NULL;
602 return NULL;
603 }
603 }
604 }
604 }
605 else {
605 else {
606 PyErr_SetString(PyExc_TypeError,
606 PyErr_SetString(PyExc_TypeError,
607 "must pass an object with a read() method or that conforms to the buffer protocol");
607 "must pass an object with a read() method or that conforms to the buffer protocol");
608 Py_CLEAR(result);
608 Py_CLEAR(result);
609 return NULL;
609 return NULL;
610 }
610 }
611
611
612 result->decompressor = self;
612 result->decompressor = self;
613 Py_INCREF(self);
613 Py_INCREF(self);
614 result->readAcrossFrames = readAcrossFrames ? PyObject_IsTrue(readAcrossFrames) : 0;
614 result->readAcrossFrames = readAcrossFrames ? PyObject_IsTrue(readAcrossFrames) : 0;
615
615
616 return result;
616 return result;
617 }
617 }
618
618
619 PyDoc_STRVAR(Decompressor_stream_writer__doc__,
619 PyDoc_STRVAR(Decompressor_stream_writer__doc__,
620 "Create a context manager to write decompressed data to an object.\n"
620 "Create a context manager to write decompressed data to an object.\n"
621 "\n"
621 "\n"
622 "The passed object must have a ``write()`` method.\n"
622 "The passed object must have a ``write()`` method.\n"
623 "\n"
623 "\n"
624 "The caller feeds intput data to the object by calling ``write(data)``.\n"
624 "The caller feeds intput data to the object by calling ``write(data)``.\n"
625 "Decompressed data is written to the argument given as it is decompressed.\n"
625 "Decompressed data is written to the argument given as it is decompressed.\n"
626 "\n"
626 "\n"
627 "An optional ``write_size`` argument defines the size of chunks to\n"
627 "An optional ``write_size`` argument defines the size of chunks to\n"
628 "``write()`` to the writer. It defaults to the default output size for a zstd\n"
628 "``write()`` to the writer. It defaults to the default output size for a zstd\n"
629 "streaming decompressor.\n"
629 "streaming decompressor.\n"
630 );
630 );
631
631
632 static ZstdDecompressionWriter* Decompressor_stream_writer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
632 static ZstdDecompressionWriter* Decompressor_stream_writer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
633 static char* kwlist[] = {
633 static char* kwlist[] = {
634 "writer",
634 "writer",
635 "write_size",
635 "write_size",
636 "write_return_read",
636 "write_return_read",
637 NULL
637 NULL
638 };
638 };
639
639
640 PyObject* writer;
640 PyObject* writer;
641 size_t outSize = ZSTD_DStreamOutSize();
641 size_t outSize = ZSTD_DStreamOutSize();
642 PyObject* writeReturnRead = NULL;
642 PyObject* writeReturnRead = NULL;
643 ZstdDecompressionWriter* result;
643 ZstdDecompressionWriter* result;
644
644
645 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kO:stream_writer", kwlist,
645 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|kO:stream_writer", kwlist,
646 &writer, &outSize, &writeReturnRead)) {
646 &writer, &outSize, &writeReturnRead)) {
647 return NULL;
647 return NULL;
648 }
648 }
649
649
650 if (!PyObject_HasAttrString(writer, "write")) {
650 if (!PyObject_HasAttrString(writer, "write")) {
651 PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
651 PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
652 return NULL;
652 return NULL;
653 }
653 }
654
654
655 if (ensure_dctx(self, 1)) {
655 if (ensure_dctx(self, 1)) {
656 return NULL;
656 return NULL;
657 }
657 }
658
658
659 result = (ZstdDecompressionWriter*)PyObject_CallObject((PyObject*)&ZstdDecompressionWriterType, NULL);
659 result = (ZstdDecompressionWriter*)PyObject_CallObject((PyObject*)&ZstdDecompressionWriterType, NULL);
660 if (!result) {
660 if (!result) {
661 return NULL;
661 return NULL;
662 }
662 }
663
663
664 result->decompressor = self;
664 result->decompressor = self;
665 Py_INCREF(result->decompressor);
665 Py_INCREF(result->decompressor);
666
666
667 result->writer = writer;
667 result->writer = writer;
668 Py_INCREF(result->writer);
668 Py_INCREF(result->writer);
669
669
670 result->outSize = outSize;
670 result->outSize = outSize;
671 result->writeReturnRead = writeReturnRead ? PyObject_IsTrue(writeReturnRead) : 0;
671 result->writeReturnRead = writeReturnRead ? PyObject_IsTrue(writeReturnRead) : 0;
672
672
673 return result;
673 return result;
674 }
674 }
675
675
676 PyDoc_STRVAR(Decompressor_decompress_content_dict_chain__doc__,
676 PyDoc_STRVAR(Decompressor_decompress_content_dict_chain__doc__,
677 "Decompress a series of chunks using the content dictionary chaining technique\n"
677 "Decompress a series of chunks using the content dictionary chaining technique\n"
678 );
678 );
679
679
680 static PyObject* Decompressor_decompress_content_dict_chain(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
680 static PyObject* Decompressor_decompress_content_dict_chain(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
681 static char* kwlist[] = {
681 static char* kwlist[] = {
682 "frames",
682 "frames",
683 NULL
683 NULL
684 };
684 };
685
685
686 PyObject* chunks;
686 PyObject* chunks;
687 Py_ssize_t chunksLen;
687 Py_ssize_t chunksLen;
688 Py_ssize_t chunkIndex;
688 Py_ssize_t chunkIndex;
689 char parity = 0;
689 char parity = 0;
690 PyObject* chunk;
690 PyObject* chunk;
691 char* chunkData;
691 char* chunkData;
692 Py_ssize_t chunkSize;
692 Py_ssize_t chunkSize;
693 size_t zresult;
693 size_t zresult;
694 ZSTD_frameHeader frameHeader;
694 ZSTD_frameHeader frameHeader;
695 void* buffer1 = NULL;
695 void* buffer1 = NULL;
696 size_t buffer1Size = 0;
696 size_t buffer1Size = 0;
697 size_t buffer1ContentSize = 0;
697 size_t buffer1ContentSize = 0;
698 void* buffer2 = NULL;
698 void* buffer2 = NULL;
699 size_t buffer2Size = 0;
699 size_t buffer2Size = 0;
700 size_t buffer2ContentSize = 0;
700 size_t buffer2ContentSize = 0;
701 void* destBuffer = NULL;
701 void* destBuffer = NULL;
702 PyObject* result = NULL;
702 PyObject* result = NULL;
703 ZSTD_outBuffer outBuffer;
703 ZSTD_outBuffer outBuffer;
704 ZSTD_inBuffer inBuffer;
704 ZSTD_inBuffer inBuffer;
705
705
706 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain",
706 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O!:decompress_content_dict_chain",
707 kwlist, &PyList_Type, &chunks)) {
707 kwlist, &PyList_Type, &chunks)) {
708 return NULL;
708 return NULL;
709 }
709 }
710
710
711 chunksLen = PyList_Size(chunks);
711 chunksLen = PyList_Size(chunks);
712 if (!chunksLen) {
712 if (!chunksLen) {
713 PyErr_SetString(PyExc_ValueError, "empty input chain");
713 PyErr_SetString(PyExc_ValueError, "empty input chain");
714 return NULL;
714 return NULL;
715 }
715 }
716
716
717 /* The first chunk should not be using a dictionary. We handle it specially. */
717 /* The first chunk should not be using a dictionary. We handle it specially. */
718 chunk = PyList_GetItem(chunks, 0);
718 chunk = PyList_GetItem(chunks, 0);
719 if (!PyBytes_Check(chunk)) {
719 if (!PyBytes_Check(chunk)) {
720 PyErr_SetString(PyExc_ValueError, "chunk 0 must be bytes");
720 PyErr_SetString(PyExc_ValueError, "chunk 0 must be bytes");
721 return NULL;
721 return NULL;
722 }
722 }
723
723
724 /* We require that all chunks be zstd frames and that they have content size set. */
724 /* We require that all chunks be zstd frames and that they have content size set. */
725 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
725 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
726 zresult = ZSTD_getFrameHeader(&frameHeader, (void*)chunkData, chunkSize);
726 zresult = ZSTD_getFrameHeader(&frameHeader, (void*)chunkData, chunkSize);
727 if (ZSTD_isError(zresult)) {
727 if (ZSTD_isError(zresult)) {
728 PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame");
728 PyErr_SetString(PyExc_ValueError, "chunk 0 is not a valid zstd frame");
729 return NULL;
729 return NULL;
730 }
730 }
731 else if (zresult) {
731 else if (zresult) {
732 PyErr_SetString(PyExc_ValueError, "chunk 0 is too small to contain a zstd frame");
732 PyErr_SetString(PyExc_ValueError, "chunk 0 is too small to contain a zstd frame");
733 return NULL;
733 return NULL;
734 }
734 }
735
735
736 if (ZSTD_CONTENTSIZE_UNKNOWN == frameHeader.frameContentSize) {
736 if (ZSTD_CONTENTSIZE_UNKNOWN == frameHeader.frameContentSize) {
737 PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame");
737 PyErr_SetString(PyExc_ValueError, "chunk 0 missing content size in frame");
738 return NULL;
738 return NULL;
739 }
739 }
740
740
741 assert(ZSTD_CONTENTSIZE_ERROR != frameHeader.frameContentSize);
741 assert(ZSTD_CONTENTSIZE_ERROR != frameHeader.frameContentSize);
742
742
743 /* We check against PY_SSIZE_T_MAX here because we ultimately cast the
743 /* We check against PY_SSIZE_T_MAX here because we ultimately cast the
744 * result to a Python object and it's length can be no greater than
744 * result to a Python object and it's length can be no greater than
745 * Py_ssize_t. In theory, we could have an intermediate frame that is
745 * Py_ssize_t. In theory, we could have an intermediate frame that is
746 * larger. But a) why would this API be used for frames that large b)
746 * larger. But a) why would this API be used for frames that large b)
747 * it isn't worth the complexity to support. */
747 * it isn't worth the complexity to support. */
748 assert(SIZE_MAX >= PY_SSIZE_T_MAX);
748 assert(SIZE_MAX >= PY_SSIZE_T_MAX);
749 if (frameHeader.frameContentSize > PY_SSIZE_T_MAX) {
749 if (frameHeader.frameContentSize > PY_SSIZE_T_MAX) {
750 PyErr_SetString(PyExc_ValueError,
750 PyErr_SetString(PyExc_ValueError,
751 "chunk 0 is too large to decompress on this platform");
751 "chunk 0 is too large to decompress on this platform");
752 return NULL;
752 return NULL;
753 }
753 }
754
754
755 if (ensure_dctx(self, 0)) {
755 if (ensure_dctx(self, 0)) {
756 goto finally;
756 goto finally;
757 }
757 }
758
758
759 buffer1Size = (size_t)frameHeader.frameContentSize;
759 buffer1Size = (size_t)frameHeader.frameContentSize;
760 buffer1 = PyMem_Malloc(buffer1Size);
760 buffer1 = PyMem_Malloc(buffer1Size);
761 if (!buffer1) {
761 if (!buffer1) {
762 goto finally;
762 goto finally;
763 }
763 }
764
764
765 outBuffer.dst = buffer1;
765 outBuffer.dst = buffer1;
766 outBuffer.size = buffer1Size;
766 outBuffer.size = buffer1Size;
767 outBuffer.pos = 0;
767 outBuffer.pos = 0;
768
768
769 inBuffer.src = chunkData;
769 inBuffer.src = chunkData;
770 inBuffer.size = chunkSize;
770 inBuffer.size = chunkSize;
771 inBuffer.pos = 0;
771 inBuffer.pos = 0;
772
772
773 Py_BEGIN_ALLOW_THREADS
773 Py_BEGIN_ALLOW_THREADS
774 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
774 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
775 Py_END_ALLOW_THREADS
775 Py_END_ALLOW_THREADS
776 if (ZSTD_isError(zresult)) {
776 if (ZSTD_isError(zresult)) {
777 PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult));
777 PyErr_Format(ZstdError, "could not decompress chunk 0: %s", ZSTD_getErrorName(zresult));
778 goto finally;
778 goto finally;
779 }
779 }
780 else if (zresult) {
780 else if (zresult) {
781 PyErr_Format(ZstdError, "chunk 0 did not decompress full frame");
781 PyErr_Format(ZstdError, "chunk 0 did not decompress full frame");
782 goto finally;
782 goto finally;
783 }
783 }
784
784
785 buffer1ContentSize = outBuffer.pos;
785 buffer1ContentSize = outBuffer.pos;
786
786
787 /* Special case of a simple chain. */
787 /* Special case of a simple chain. */
788 if (1 == chunksLen) {
788 if (1 == chunksLen) {
789 result = PyBytes_FromStringAndSize(buffer1, buffer1Size);
789 result = PyBytes_FromStringAndSize(buffer1, buffer1Size);
790 goto finally;
790 goto finally;
791 }
791 }
792
792
793 /* This should ideally look at next chunk. But this is slightly simpler. */
793 /* This should ideally look at next chunk. But this is slightly simpler. */
794 buffer2Size = (size_t)frameHeader.frameContentSize;
794 buffer2Size = (size_t)frameHeader.frameContentSize;
795 buffer2 = PyMem_Malloc(buffer2Size);
795 buffer2 = PyMem_Malloc(buffer2Size);
796 if (!buffer2) {
796 if (!buffer2) {
797 goto finally;
797 goto finally;
798 }
798 }
799
799
800 /* For each subsequent chunk, use the previous fulltext as a content dictionary.
800 /* For each subsequent chunk, use the previous fulltext as a content dictionary.
801 Our strategy is to have 2 buffers. One holds the previous fulltext (to be
801 Our strategy is to have 2 buffers. One holds the previous fulltext (to be
802 used as a content dictionary) and the other holds the new fulltext. The
802 used as a content dictionary) and the other holds the new fulltext. The
803 buffers grow when needed but never decrease in size. This limits the
803 buffers grow when needed but never decrease in size. This limits the
804 memory allocator overhead.
804 memory allocator overhead.
805 */
805 */
806 for (chunkIndex = 1; chunkIndex < chunksLen; chunkIndex++) {
806 for (chunkIndex = 1; chunkIndex < chunksLen; chunkIndex++) {
807 chunk = PyList_GetItem(chunks, chunkIndex);
807 chunk = PyList_GetItem(chunks, chunkIndex);
808 if (!PyBytes_Check(chunk)) {
808 if (!PyBytes_Check(chunk)) {
809 PyErr_Format(PyExc_ValueError, "chunk %zd must be bytes", chunkIndex);
809 PyErr_Format(PyExc_ValueError, "chunk %zd must be bytes", chunkIndex);
810 goto finally;
810 goto finally;
811 }
811 }
812
812
813 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
813 PyBytes_AsStringAndSize(chunk, &chunkData, &chunkSize);
814 zresult = ZSTD_getFrameHeader(&frameHeader, (void*)chunkData, chunkSize);
814 zresult = ZSTD_getFrameHeader(&frameHeader, (void*)chunkData, chunkSize);
815 if (ZSTD_isError(zresult)) {
815 if (ZSTD_isError(zresult)) {
816 PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex);
816 PyErr_Format(PyExc_ValueError, "chunk %zd is not a valid zstd frame", chunkIndex);
817 goto finally;
817 goto finally;
818 }
818 }
819 else if (zresult) {
819 else if (zresult) {
820 PyErr_Format(PyExc_ValueError, "chunk %zd is too small to contain a zstd frame", chunkIndex);
820 PyErr_Format(PyExc_ValueError, "chunk %zd is too small to contain a zstd frame", chunkIndex);
821 goto finally;
821 goto finally;
822 }
822 }
823
823
824 if (ZSTD_CONTENTSIZE_UNKNOWN == frameHeader.frameContentSize) {
824 if (ZSTD_CONTENTSIZE_UNKNOWN == frameHeader.frameContentSize) {
825 PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex);
825 PyErr_Format(PyExc_ValueError, "chunk %zd missing content size in frame", chunkIndex);
826 goto finally;
826 goto finally;
827 }
827 }
828
828
829 assert(ZSTD_CONTENTSIZE_ERROR != frameHeader.frameContentSize);
829 assert(ZSTD_CONTENTSIZE_ERROR != frameHeader.frameContentSize);
830
830
831 if (frameHeader.frameContentSize > PY_SSIZE_T_MAX) {
831 if (frameHeader.frameContentSize > PY_SSIZE_T_MAX) {
832 PyErr_Format(PyExc_ValueError,
832 PyErr_Format(PyExc_ValueError,
833 "chunk %zd is too large to decompress on this platform", chunkIndex);
833 "chunk %zd is too large to decompress on this platform", chunkIndex);
834 goto finally;
834 goto finally;
835 }
835 }
836
836
837 inBuffer.src = chunkData;
837 inBuffer.src = chunkData;
838 inBuffer.size = chunkSize;
838 inBuffer.size = chunkSize;
839 inBuffer.pos = 0;
839 inBuffer.pos = 0;
840
840
841 parity = chunkIndex % 2;
841 parity = chunkIndex % 2;
842
842
843 /* This could definitely be abstracted to reduce code duplication. */
843 /* This could definitely be abstracted to reduce code duplication. */
844 if (parity) {
844 if (parity) {
845 /* Resize destination buffer to hold larger content. */
845 /* Resize destination buffer to hold larger content. */
846 if (buffer2Size < frameHeader.frameContentSize) {
846 if (buffer2Size < frameHeader.frameContentSize) {
847 buffer2Size = (size_t)frameHeader.frameContentSize;
847 buffer2Size = (size_t)frameHeader.frameContentSize;
848 destBuffer = PyMem_Realloc(buffer2, buffer2Size);
848 destBuffer = PyMem_Realloc(buffer2, buffer2Size);
849 if (!destBuffer) {
849 if (!destBuffer) {
850 goto finally;
850 goto finally;
851 }
851 }
852 buffer2 = destBuffer;
852 buffer2 = destBuffer;
853 }
853 }
854
854
855 Py_BEGIN_ALLOW_THREADS
855 Py_BEGIN_ALLOW_THREADS
856 zresult = ZSTD_DCtx_refPrefix_advanced(self->dctx,
856 zresult = ZSTD_DCtx_refPrefix_advanced(self->dctx,
857 buffer1, buffer1ContentSize, ZSTD_dct_rawContent);
857 buffer1, buffer1ContentSize, ZSTD_dct_rawContent);
858 Py_END_ALLOW_THREADS
858 Py_END_ALLOW_THREADS
859 if (ZSTD_isError(zresult)) {
859 if (ZSTD_isError(zresult)) {
860 PyErr_Format(ZstdError,
860 PyErr_Format(ZstdError,
861 "failed to load prefix dictionary at chunk %zd", chunkIndex);
861 "failed to load prefix dictionary at chunk %zd", chunkIndex);
862 goto finally;
862 goto finally;
863 }
863 }
864
864
865 outBuffer.dst = buffer2;
865 outBuffer.dst = buffer2;
866 outBuffer.size = buffer2Size;
866 outBuffer.size = buffer2Size;
867 outBuffer.pos = 0;
867 outBuffer.pos = 0;
868
868
869 Py_BEGIN_ALLOW_THREADS
869 Py_BEGIN_ALLOW_THREADS
870 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
870 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
871 Py_END_ALLOW_THREADS
871 Py_END_ALLOW_THREADS
872 if (ZSTD_isError(zresult)) {
872 if (ZSTD_isError(zresult)) {
873 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
873 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
874 chunkIndex, ZSTD_getErrorName(zresult));
874 chunkIndex, ZSTD_getErrorName(zresult));
875 goto finally;
875 goto finally;
876 }
876 }
877 else if (zresult) {
877 else if (zresult) {
878 PyErr_Format(ZstdError, "chunk %zd did not decompress full frame",
878 PyErr_Format(ZstdError, "chunk %zd did not decompress full frame",
879 chunkIndex);
879 chunkIndex);
880 goto finally;
880 goto finally;
881 }
881 }
882
882
883 buffer2ContentSize = outBuffer.pos;
883 buffer2ContentSize = outBuffer.pos;
884 }
884 }
885 else {
885 else {
886 if (buffer1Size < frameHeader.frameContentSize) {
886 if (buffer1Size < frameHeader.frameContentSize) {
887 buffer1Size = (size_t)frameHeader.frameContentSize;
887 buffer1Size = (size_t)frameHeader.frameContentSize;
888 destBuffer = PyMem_Realloc(buffer1, buffer1Size);
888 destBuffer = PyMem_Realloc(buffer1, buffer1Size);
889 if (!destBuffer) {
889 if (!destBuffer) {
890 goto finally;
890 goto finally;
891 }
891 }
892 buffer1 = destBuffer;
892 buffer1 = destBuffer;
893 }
893 }
894
894
895 Py_BEGIN_ALLOW_THREADS
895 Py_BEGIN_ALLOW_THREADS
896 zresult = ZSTD_DCtx_refPrefix_advanced(self->dctx,
896 zresult = ZSTD_DCtx_refPrefix_advanced(self->dctx,
897 buffer2, buffer2ContentSize, ZSTD_dct_rawContent);
897 buffer2, buffer2ContentSize, ZSTD_dct_rawContent);
898 Py_END_ALLOW_THREADS
898 Py_END_ALLOW_THREADS
899 if (ZSTD_isError(zresult)) {
899 if (ZSTD_isError(zresult)) {
900 PyErr_Format(ZstdError,
900 PyErr_Format(ZstdError,
901 "failed to load prefix dictionary at chunk %zd", chunkIndex);
901 "failed to load prefix dictionary at chunk %zd", chunkIndex);
902 goto finally;
902 goto finally;
903 }
903 }
904
904
905 outBuffer.dst = buffer1;
905 outBuffer.dst = buffer1;
906 outBuffer.size = buffer1Size;
906 outBuffer.size = buffer1Size;
907 outBuffer.pos = 0;
907 outBuffer.pos = 0;
908
908
909 Py_BEGIN_ALLOW_THREADS
909 Py_BEGIN_ALLOW_THREADS
910 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
910 zresult = ZSTD_decompressStream(self->dctx, &outBuffer, &inBuffer);
911 Py_END_ALLOW_THREADS
911 Py_END_ALLOW_THREADS
912 if (ZSTD_isError(zresult)) {
912 if (ZSTD_isError(zresult)) {
913 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
913 PyErr_Format(ZstdError, "could not decompress chunk %zd: %s",
914 chunkIndex, ZSTD_getErrorName(zresult));
914 chunkIndex, ZSTD_getErrorName(zresult));
915 goto finally;
915 goto finally;
916 }
916 }
917 else if (zresult) {
917 else if (zresult) {
918 PyErr_Format(ZstdError, "chunk %zd did not decompress full frame",
918 PyErr_Format(ZstdError, "chunk %zd did not decompress full frame",
919 chunkIndex);
919 chunkIndex);
920 goto finally;
920 goto finally;
921 }
921 }
922
922
923 buffer1ContentSize = outBuffer.pos;
923 buffer1ContentSize = outBuffer.pos;
924 }
924 }
925 }
925 }
926
926
927 result = PyBytes_FromStringAndSize(parity ? buffer2 : buffer1,
927 result = PyBytes_FromStringAndSize(parity ? buffer2 : buffer1,
928 parity ? buffer2ContentSize : buffer1ContentSize);
928 parity ? buffer2ContentSize : buffer1ContentSize);
929
929
930 finally:
930 finally:
931 if (buffer2) {
931 if (buffer2) {
932 PyMem_Free(buffer2);
932 PyMem_Free(buffer2);
933 }
933 }
934 if (buffer1) {
934 if (buffer1) {
935 PyMem_Free(buffer1);
935 PyMem_Free(buffer1);
936 }
936 }
937
937
938 return result;
938 return result;
939 }
939 }
940
940
941 typedef struct {
941 typedef struct {
942 void* sourceData;
942 void* sourceData;
943 size_t sourceSize;
943 size_t sourceSize;
944 size_t destSize;
944 size_t destSize;
945 } FramePointer;
945 } FramePointer;
946
946
947 typedef struct {
947 typedef struct {
948 FramePointer* frames;
948 FramePointer* frames;
949 Py_ssize_t framesSize;
949 Py_ssize_t framesSize;
950 unsigned long long compressedSize;
950 unsigned long long compressedSize;
951 } FrameSources;
951 } FrameSources;
952
952
953 typedef struct {
953 typedef struct {
954 void* dest;
954 void* dest;
955 Py_ssize_t destSize;
955 Py_ssize_t destSize;
956 BufferSegment* segments;
956 BufferSegment* segments;
957 Py_ssize_t segmentsSize;
957 Py_ssize_t segmentsSize;
958 } DestBuffer;
958 } DestBuffer;
959
959
960 typedef enum {
960 typedef enum {
961 WorkerError_none = 0,
961 WorkerError_none = 0,
962 WorkerError_zstd = 1,
962 WorkerError_zstd = 1,
963 WorkerError_memory = 2,
963 WorkerError_memory = 2,
964 WorkerError_sizeMismatch = 3,
964 WorkerError_sizeMismatch = 3,
965 WorkerError_unknownSize = 4,
965 WorkerError_unknownSize = 4,
966 } WorkerError;
966 } WorkerError;
967
967
968 typedef struct {
968 typedef struct {
969 /* Source records and length */
969 /* Source records and length */
970 FramePointer* framePointers;
970 FramePointer* framePointers;
971 /* Which records to process. */
971 /* Which records to process. */
972 Py_ssize_t startOffset;
972 Py_ssize_t startOffset;
973 Py_ssize_t endOffset;
973 Py_ssize_t endOffset;
974 unsigned long long totalSourceSize;
974 unsigned long long totalSourceSize;
975
975
976 /* Compression state and settings. */
976 /* Compression state and settings. */
977 ZSTD_DCtx* dctx;
977 ZSTD_DCtx* dctx;
978 int requireOutputSizes;
978 int requireOutputSizes;
979
979
980 /* Output storage. */
980 /* Output storage. */
981 DestBuffer* destBuffers;
981 DestBuffer* destBuffers;
982 Py_ssize_t destCount;
982 Py_ssize_t destCount;
983
983
984 /* Item that error occurred on. */
984 /* Item that error occurred on. */
985 Py_ssize_t errorOffset;
985 Py_ssize_t errorOffset;
986 /* If an error occurred. */
986 /* If an error occurred. */
987 WorkerError error;
987 WorkerError error;
988 /* result from zstd decompression operation */
988 /* result from zstd decompression operation */
989 size_t zresult;
989 size_t zresult;
990 } WorkerState;
990 } WorkerState;
991
991
992 static void decompress_worker(WorkerState* state) {
992 static void decompress_worker(WorkerState* state) {
993 size_t allocationSize;
993 size_t allocationSize;
994 DestBuffer* destBuffer;
994 DestBuffer* destBuffer;
995 Py_ssize_t frameIndex;
995 Py_ssize_t frameIndex;
996 Py_ssize_t localOffset = 0;
996 Py_ssize_t localOffset = 0;
997 Py_ssize_t currentBufferStartIndex = state->startOffset;
997 Py_ssize_t currentBufferStartIndex = state->startOffset;
998 Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
998 Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
999 void* tmpBuf;
999 void* tmpBuf;
1000 Py_ssize_t destOffset = 0;
1000 Py_ssize_t destOffset = 0;
1001 FramePointer* framePointers = state->framePointers;
1001 FramePointer* framePointers = state->framePointers;
1002 size_t zresult;
1002 size_t zresult;
1003 unsigned long long totalOutputSize = 0;
1003 unsigned long long totalOutputSize = 0;
1004
1004
1005 assert(NULL == state->destBuffers);
1005 assert(NULL == state->destBuffers);
1006 assert(0 == state->destCount);
1006 assert(0 == state->destCount);
1007 assert(state->endOffset - state->startOffset >= 0);
1007 assert(state->endOffset - state->startOffset >= 0);
1008
1008
1009 /* We could get here due to the way work is allocated. Ideally we wouldn't
1009 /* We could get here due to the way work is allocated. Ideally we wouldn't
1010 get here. But that would require a bit of a refactor in the caller. */
1010 get here. But that would require a bit of a refactor in the caller. */
1011 if (state->totalSourceSize > SIZE_MAX) {
1011 if (state->totalSourceSize > SIZE_MAX) {
1012 state->error = WorkerError_memory;
1012 state->error = WorkerError_memory;
1013 state->errorOffset = 0;
1013 state->errorOffset = 0;
1014 return;
1014 return;
1015 }
1015 }
1016
1016
1017 /*
1017 /*
1018 * We need to allocate a buffer to hold decompressed data. How we do this
1018 * We need to allocate a buffer to hold decompressed data. How we do this
1019 * depends on what we know about the output. The following scenarios are
1019 * depends on what we know about the output. The following scenarios are
1020 * possible:
1020 * possible:
1021 *
1021 *
1022 * 1. All structs defining frames declare the output size.
1022 * 1. All structs defining frames declare the output size.
1023 * 2. The decompressed size is embedded within the zstd frame.
1023 * 2. The decompressed size is embedded within the zstd frame.
1024 * 3. The decompressed size is not stored anywhere.
1024 * 3. The decompressed size is not stored anywhere.
1025 *
1025 *
1026 * For now, we only support #1 and #2.
1026 * For now, we only support #1 and #2.
1027 */
1027 */
1028
1028
1029 /* Resolve ouput segments. */
1029 /* Resolve ouput segments. */
1030 for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
1030 for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
1031 FramePointer* fp = &framePointers[frameIndex];
1031 FramePointer* fp = &framePointers[frameIndex];
1032 unsigned long long decompressedSize;
1032 unsigned long long decompressedSize;
1033
1033
1034 if (0 == fp->destSize) {
1034 if (0 == fp->destSize) {
1035 decompressedSize = ZSTD_getFrameContentSize(fp->sourceData, fp->sourceSize);
1035 decompressedSize = ZSTD_getFrameContentSize(fp->sourceData, fp->sourceSize);
1036
1036
1037 if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) {
1037 if (ZSTD_CONTENTSIZE_ERROR == decompressedSize) {
1038 state->error = WorkerError_unknownSize;
1038 state->error = WorkerError_unknownSize;
1039 state->errorOffset = frameIndex;
1039 state->errorOffset = frameIndex;
1040 return;
1040 return;
1041 }
1041 }
1042 else if (ZSTD_CONTENTSIZE_UNKNOWN == decompressedSize) {
1042 else if (ZSTD_CONTENTSIZE_UNKNOWN == decompressedSize) {
1043 if (state->requireOutputSizes) {
1043 if (state->requireOutputSizes) {
1044 state->error = WorkerError_unknownSize;
1044 state->error = WorkerError_unknownSize;
1045 state->errorOffset = frameIndex;
1045 state->errorOffset = frameIndex;
1046 return;
1046 return;
1047 }
1047 }
1048
1048
1049 /* This will fail the assert for .destSize > 0 below. */
1049 /* This will fail the assert for .destSize > 0 below. */
1050 decompressedSize = 0;
1050 decompressedSize = 0;
1051 }
1051 }
1052
1052
1053 if (decompressedSize > SIZE_MAX) {
1053 if (decompressedSize > SIZE_MAX) {
1054 state->error = WorkerError_memory;
1054 state->error = WorkerError_memory;
1055 state->errorOffset = frameIndex;
1055 state->errorOffset = frameIndex;
1056 return;
1056 return;
1057 }
1057 }
1058
1058
1059 fp->destSize = (size_t)decompressedSize;
1059 fp->destSize = (size_t)decompressedSize;
1060 }
1060 }
1061
1061
1062 totalOutputSize += fp->destSize;
1062 totalOutputSize += fp->destSize;
1063 }
1063 }
1064
1064
1065 state->destBuffers = calloc(1, sizeof(DestBuffer));
1065 state->destBuffers = calloc(1, sizeof(DestBuffer));
1066 if (NULL == state->destBuffers) {
1066 if (NULL == state->destBuffers) {
1067 state->error = WorkerError_memory;
1067 state->error = WorkerError_memory;
1068 return;
1068 return;
1069 }
1069 }
1070
1070
1071 state->destCount = 1;
1071 state->destCount = 1;
1072
1072
1073 destBuffer = &state->destBuffers[state->destCount - 1];
1073 destBuffer = &state->destBuffers[state->destCount - 1];
1074
1074
1075 assert(framePointers[state->startOffset].destSize > 0); /* For now. */
1075 assert(framePointers[state->startOffset].destSize > 0); /* For now. */
1076
1076
1077 allocationSize = roundpow2((size_t)state->totalSourceSize);
1077 allocationSize = roundpow2((size_t)state->totalSourceSize);
1078
1078
1079 if (framePointers[state->startOffset].destSize > allocationSize) {
1079 if (framePointers[state->startOffset].destSize > allocationSize) {
1080 allocationSize = roundpow2(framePointers[state->startOffset].destSize);
1080 allocationSize = roundpow2(framePointers[state->startOffset].destSize);
1081 }
1081 }
1082
1082
1083 destBuffer->dest = malloc(allocationSize);
1083 destBuffer->dest = malloc(allocationSize);
1084 if (NULL == destBuffer->dest) {
1084 if (NULL == destBuffer->dest) {
1085 state->error = WorkerError_memory;
1085 state->error = WorkerError_memory;
1086 return;
1086 return;
1087 }
1087 }
1088
1088
1089 destBuffer->destSize = allocationSize;
1089 destBuffer->destSize = allocationSize;
1090
1090
1091 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
1091 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
1092 if (NULL == destBuffer->segments) {
1092 if (NULL == destBuffer->segments) {
1093 /* Caller will free state->dest as part of cleanup. */
1093 /* Caller will free state->dest as part of cleanup. */
1094 state->error = WorkerError_memory;
1094 state->error = WorkerError_memory;
1095 return;
1095 return;
1096 }
1096 }
1097
1097
1098 destBuffer->segmentsSize = remainingItems;
1098 destBuffer->segmentsSize = remainingItems;
1099
1099
1100 for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
1100 for (frameIndex = state->startOffset; frameIndex <= state->endOffset; frameIndex++) {
1101 ZSTD_outBuffer outBuffer;
1101 ZSTD_outBuffer outBuffer;
1102 ZSTD_inBuffer inBuffer;
1102 ZSTD_inBuffer inBuffer;
1103 const void* source = framePointers[frameIndex].sourceData;
1103 const void* source = framePointers[frameIndex].sourceData;
1104 const size_t sourceSize = framePointers[frameIndex].sourceSize;
1104 const size_t sourceSize = framePointers[frameIndex].sourceSize;
1105 void* dest;
1105 void* dest;
1106 const size_t decompressedSize = framePointers[frameIndex].destSize;
1106 const size_t decompressedSize = framePointers[frameIndex].destSize;
1107 size_t destAvailable = destBuffer->destSize - destOffset;
1107 size_t destAvailable = destBuffer->destSize - destOffset;
1108
1108
1109 assert(decompressedSize > 0); /* For now. */
1109 assert(decompressedSize > 0); /* For now. */
1110
1110
1111 /*
1111 /*
1112 * Not enough space in current buffer. Finish current before and allocate and
1112 * Not enough space in current buffer. Finish current before and allocate and
1113 * switch to a new one.
1113 * switch to a new one.
1114 */
1114 */
1115 if (decompressedSize > destAvailable) {
1115 if (decompressedSize > destAvailable) {
1116 /*
1116 /*
1117 * Shrinking the destination buffer is optional. But it should be cheap,
1117 * Shrinking the destination buffer is optional. But it should be cheap,
1118 * so we just do it.
1118 * so we just do it.
1119 */
1119 */
1120 if (destAvailable) {
1120 if (destAvailable) {
1121 tmpBuf = realloc(destBuffer->dest, destOffset);
1121 tmpBuf = realloc(destBuffer->dest, destOffset);
1122 if (NULL == tmpBuf) {
1122 if (NULL == tmpBuf) {
1123 state->error = WorkerError_memory;
1123 state->error = WorkerError_memory;
1124 return;
1124 return;
1125 }
1125 }
1126
1126
1127 destBuffer->dest = tmpBuf;
1127 destBuffer->dest = tmpBuf;
1128 destBuffer->destSize = destOffset;
1128 destBuffer->destSize = destOffset;
1129 }
1129 }
1130
1130
1131 /* Truncate segments buffer. */
1131 /* Truncate segments buffer. */
1132 tmpBuf = realloc(destBuffer->segments,
1132 tmpBuf = realloc(destBuffer->segments,
1133 (frameIndex - currentBufferStartIndex) * sizeof(BufferSegment));
1133 (frameIndex - currentBufferStartIndex) * sizeof(BufferSegment));
1134 if (NULL == tmpBuf) {
1134 if (NULL == tmpBuf) {
1135 state->error = WorkerError_memory;
1135 state->error = WorkerError_memory;
1136 return;
1136 return;
1137 }
1137 }
1138
1138
1139 destBuffer->segments = tmpBuf;
1139 destBuffer->segments = tmpBuf;
1140 destBuffer->segmentsSize = frameIndex - currentBufferStartIndex;
1140 destBuffer->segmentsSize = frameIndex - currentBufferStartIndex;
1141
1141
1142 /* Grow space for new DestBuffer. */
1142 /* Grow space for new DestBuffer. */
1143 tmpBuf = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer));
1143 tmpBuf = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer));
1144 if (NULL == tmpBuf) {
1144 if (NULL == tmpBuf) {
1145 state->error = WorkerError_memory;
1145 state->error = WorkerError_memory;
1146 return;
1146 return;
1147 }
1147 }
1148
1148
1149 state->destBuffers = tmpBuf;
1149 state->destBuffers = tmpBuf;
1150 state->destCount++;
1150 state->destCount++;
1151
1151
1152 destBuffer = &state->destBuffers[state->destCount - 1];
1152 destBuffer = &state->destBuffers[state->destCount - 1];
1153
1153
1154 /* Don't take any chances will non-NULL pointers. */
1154 /* Don't take any chances will non-NULL pointers. */
1155 memset(destBuffer, 0, sizeof(DestBuffer));
1155 memset(destBuffer, 0, sizeof(DestBuffer));
1156
1156
1157 allocationSize = roundpow2((size_t)state->totalSourceSize);
1157 allocationSize = roundpow2((size_t)state->totalSourceSize);
1158
1158
1159 if (decompressedSize > allocationSize) {
1159 if (decompressedSize > allocationSize) {
1160 allocationSize = roundpow2(decompressedSize);
1160 allocationSize = roundpow2(decompressedSize);
1161 }
1161 }
1162
1162
1163 destBuffer->dest = malloc(allocationSize);
1163 destBuffer->dest = malloc(allocationSize);
1164 if (NULL == destBuffer->dest) {
1164 if (NULL == destBuffer->dest) {
1165 state->error = WorkerError_memory;
1165 state->error = WorkerError_memory;
1166 return;
1166 return;
1167 }
1167 }
1168
1168
1169 destBuffer->destSize = allocationSize;
1169 destBuffer->destSize = allocationSize;
1170 destAvailable = allocationSize;
1170 destAvailable = allocationSize;
1171 destOffset = 0;
1171 destOffset = 0;
1172 localOffset = 0;
1172 localOffset = 0;
1173
1173
1174 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
1174 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
1175 if (NULL == destBuffer->segments) {
1175 if (NULL == destBuffer->segments) {
1176 state->error = WorkerError_memory;
1176 state->error = WorkerError_memory;
1177 return;
1177 return;
1178 }
1178 }
1179
1179
1180 destBuffer->segmentsSize = remainingItems;
1180 destBuffer->segmentsSize = remainingItems;
1181 currentBufferStartIndex = frameIndex;
1181 currentBufferStartIndex = frameIndex;
1182 }
1182 }
1183
1183
1184 dest = (char*)destBuffer->dest + destOffset;
1184 dest = (char*)destBuffer->dest + destOffset;
1185
1185
1186 outBuffer.dst = dest;
1186 outBuffer.dst = dest;
1187 outBuffer.size = decompressedSize;
1187 outBuffer.size = decompressedSize;
1188 outBuffer.pos = 0;
1188 outBuffer.pos = 0;
1189
1189
1190 inBuffer.src = source;
1190 inBuffer.src = source;
1191 inBuffer.size = sourceSize;
1191 inBuffer.size = sourceSize;
1192 inBuffer.pos = 0;
1192 inBuffer.pos = 0;
1193
1193
1194 zresult = ZSTD_decompressStream(state->dctx, &outBuffer, &inBuffer);
1194 zresult = ZSTD_decompressStream(state->dctx, &outBuffer, &inBuffer);
1195 if (ZSTD_isError(zresult)) {
1195 if (ZSTD_isError(zresult)) {
1196 state->error = WorkerError_zstd;
1196 state->error = WorkerError_zstd;
1197 state->zresult = zresult;
1197 state->zresult = zresult;
1198 state->errorOffset = frameIndex;
1198 state->errorOffset = frameIndex;
1199 return;
1199 return;
1200 }
1200 }
1201 else if (zresult || outBuffer.pos != decompressedSize) {
1201 else if (zresult || outBuffer.pos != decompressedSize) {
1202 state->error = WorkerError_sizeMismatch;
1202 state->error = WorkerError_sizeMismatch;
1203 state->zresult = outBuffer.pos;
1203 state->zresult = outBuffer.pos;
1204 state->errorOffset = frameIndex;
1204 state->errorOffset = frameIndex;
1205 return;
1205 return;
1206 }
1206 }
1207
1207
1208 destBuffer->segments[localOffset].offset = destOffset;
1208 destBuffer->segments[localOffset].offset = destOffset;
1209 destBuffer->segments[localOffset].length = outBuffer.pos;
1209 destBuffer->segments[localOffset].length = outBuffer.pos;
1210 destOffset += outBuffer.pos;
1210 destOffset += outBuffer.pos;
1211 localOffset++;
1211 localOffset++;
1212 remainingItems--;
1212 remainingItems--;
1213 }
1213 }
1214
1214
1215 if (destBuffer->destSize > destOffset) {
1215 if (destBuffer->destSize > destOffset) {
1216 tmpBuf = realloc(destBuffer->dest, destOffset);
1216 tmpBuf = realloc(destBuffer->dest, destOffset);
1217 if (NULL == tmpBuf) {
1217 if (NULL == tmpBuf) {
1218 state->error = WorkerError_memory;
1218 state->error = WorkerError_memory;
1219 return;
1219 return;
1220 }
1220 }
1221
1221
1222 destBuffer->dest = tmpBuf;
1222 destBuffer->dest = tmpBuf;
1223 destBuffer->destSize = destOffset;
1223 destBuffer->destSize = destOffset;
1224 }
1224 }
1225 }
1225 }
1226
1226
1227 ZstdBufferWithSegmentsCollection* decompress_from_framesources(ZstdDecompressor* decompressor, FrameSources* frames,
1227 ZstdBufferWithSegmentsCollection* decompress_from_framesources(ZstdDecompressor* decompressor, FrameSources* frames,
1228 Py_ssize_t threadCount) {
1228 Py_ssize_t threadCount) {
1229 Py_ssize_t i = 0;
1229 Py_ssize_t i = 0;
1230 int errored = 0;
1230 int errored = 0;
1231 Py_ssize_t segmentsCount;
1231 Py_ssize_t segmentsCount;
1232 ZstdBufferWithSegments* bws = NULL;
1232 ZstdBufferWithSegments* bws = NULL;
1233 PyObject* resultArg = NULL;
1233 PyObject* resultArg = NULL;
1234 Py_ssize_t resultIndex;
1234 Py_ssize_t resultIndex;
1235 ZstdBufferWithSegmentsCollection* result = NULL;
1235 ZstdBufferWithSegmentsCollection* result = NULL;
1236 FramePointer* framePointers = frames->frames;
1236 FramePointer* framePointers = frames->frames;
1237 unsigned long long workerBytes = 0;
1237 unsigned long long workerBytes = 0;
1238 Py_ssize_t currentThread = 0;
1238 Py_ssize_t currentThread = 0;
1239 Py_ssize_t workerStartOffset = 0;
1239 Py_ssize_t workerStartOffset = 0;
1240 POOL_ctx* pool = NULL;
1240 POOL_ctx* pool = NULL;
1241 WorkerState* workerStates = NULL;
1241 WorkerState* workerStates = NULL;
1242 unsigned long long bytesPerWorker;
1242 unsigned long long bytesPerWorker;
1243
1243
1244 /* Caller should normalize 0 and negative values to 1 or larger. */
1244 /* Caller should normalize 0 and negative values to 1 or larger. */
1245 assert(threadCount >= 1);
1245 assert(threadCount >= 1);
1246
1246
1247 /* More threads than inputs makes no sense under any conditions. */
1247 /* More threads than inputs makes no sense under any conditions. */
1248 threadCount = frames->framesSize < threadCount ? frames->framesSize
1248 threadCount = frames->framesSize < threadCount ? frames->framesSize
1249 : threadCount;
1249 : threadCount;
1250
1250
1251 /* TODO lower thread count if input size is too small and threads would just
1251 /* TODO lower thread count if input size is too small and threads would just
1252 add overhead. */
1252 add overhead. */
1253
1253
1254 if (decompressor->dict) {
1254 if (decompressor->dict) {
1255 if (ensure_ddict(decompressor->dict)) {
1255 if (ensure_ddict(decompressor->dict)) {
1256 return NULL;
1256 return NULL;
1257 }
1257 }
1258 }
1258 }
1259
1259
1260 /* If threadCount==1, we don't start a thread pool. But we do leverage the
1260 /* If threadCount==1, we don't start a thread pool. But we do leverage the
1261 same API for dispatching work. */
1261 same API for dispatching work. */
1262 workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
1262 workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
1263 if (NULL == workerStates) {
1263 if (NULL == workerStates) {
1264 PyErr_NoMemory();
1264 PyErr_NoMemory();
1265 goto finally;
1265 goto finally;
1266 }
1266 }
1267
1267
1268 memset(workerStates, 0, threadCount * sizeof(WorkerState));
1268 memset(workerStates, 0, threadCount * sizeof(WorkerState));
1269
1269
1270 if (threadCount > 1) {
1270 if (threadCount > 1) {
1271 pool = POOL_create(threadCount, 1);
1271 pool = POOL_create(threadCount, 1);
1272 if (NULL == pool) {
1272 if (NULL == pool) {
1273 PyErr_SetString(ZstdError, "could not initialize zstd thread pool");
1273 PyErr_SetString(ZstdError, "could not initialize zstd thread pool");
1274 goto finally;
1274 goto finally;
1275 }
1275 }
1276 }
1276 }
1277
1277
1278 bytesPerWorker = frames->compressedSize / threadCount;
1278 bytesPerWorker = frames->compressedSize / threadCount;
1279
1279
1280 if (bytesPerWorker > SIZE_MAX) {
1280 if (bytesPerWorker > SIZE_MAX) {
1281 PyErr_SetString(ZstdError, "too much data per worker for this platform");
1281 PyErr_SetString(ZstdError, "too much data per worker for this platform");
1282 goto finally;
1282 goto finally;
1283 }
1283 }
1284
1284
1285 for (i = 0; i < threadCount; i++) {
1285 for (i = 0; i < threadCount; i++) {
1286 size_t zresult;
1286 size_t zresult;
1287
1287
1288 workerStates[i].dctx = ZSTD_createDCtx();
1288 workerStates[i].dctx = ZSTD_createDCtx();
1289 if (NULL == workerStates[i].dctx) {
1289 if (NULL == workerStates[i].dctx) {
1290 PyErr_NoMemory();
1290 PyErr_NoMemory();
1291 goto finally;
1291 goto finally;
1292 }
1292 }
1293
1293
1294 ZSTD_copyDCtx(workerStates[i].dctx, decompressor->dctx);
1294 ZSTD_copyDCtx(workerStates[i].dctx, decompressor->dctx);
1295
1295
1296 if (decompressor->dict) {
1296 if (decompressor->dict) {
1297 zresult = ZSTD_DCtx_refDDict(workerStates[i].dctx, decompressor->dict->ddict);
1297 zresult = ZSTD_DCtx_refDDict(workerStates[i].dctx, decompressor->dict->ddict);
1298 if (zresult) {
1298 if (zresult) {
1299 PyErr_Format(ZstdError, "unable to reference prepared dictionary: %s",
1299 PyErr_Format(ZstdError, "unable to reference prepared dictionary: %s",
1300 ZSTD_getErrorName(zresult));
1300 ZSTD_getErrorName(zresult));
1301 goto finally;
1301 goto finally;
1302 }
1302 }
1303 }
1303 }
1304
1304
1305 workerStates[i].framePointers = framePointers;
1305 workerStates[i].framePointers = framePointers;
1306 workerStates[i].requireOutputSizes = 1;
1306 workerStates[i].requireOutputSizes = 1;
1307 }
1307 }
1308
1308
1309 Py_BEGIN_ALLOW_THREADS
1309 Py_BEGIN_ALLOW_THREADS
1310 /* There are many ways to split work among workers.
1310 /* There are many ways to split work among workers.
1311
1311
1312 For now, we take a simple approach of splitting work so each worker
1312 For now, we take a simple approach of splitting work so each worker
1313 gets roughly the same number of input bytes. This will result in more
1313 gets roughly the same number of input bytes. This will result in more
1314 starvation than running N>threadCount jobs. But it avoids complications
1314 starvation than running N>threadCount jobs. But it avoids complications
1315 around state tracking, which could involve extra locking.
1315 around state tracking, which could involve extra locking.
1316 */
1316 */
1317 for (i = 0; i < frames->framesSize; i++) {
1317 for (i = 0; i < frames->framesSize; i++) {
1318 workerBytes += frames->frames[i].sourceSize;
1318 workerBytes += frames->frames[i].sourceSize;
1319
1319
1320 /*
1320 /*
1321 * The last worker/thread needs to handle all remaining work. Don't
1321 * The last worker/thread needs to handle all remaining work. Don't
1322 * trigger it prematurely. Defer to the block outside of the loop.
1322 * trigger it prematurely. Defer to the block outside of the loop.
1323 * (But still process this loop so workerBytes is correct.
1323 * (But still process this loop so workerBytes is correct.
1324 */
1324 */
1325 if (currentThread == threadCount - 1) {
1325 if (currentThread == threadCount - 1) {
1326 continue;
1326 continue;
1327 }
1327 }
1328
1328
1329 if (workerBytes >= bytesPerWorker) {
1329 if (workerBytes >= bytesPerWorker) {
1330 workerStates[currentThread].startOffset = workerStartOffset;
1330 workerStates[currentThread].startOffset = workerStartOffset;
1331 workerStates[currentThread].endOffset = i;
1331 workerStates[currentThread].endOffset = i;
1332 workerStates[currentThread].totalSourceSize = workerBytes;
1332 workerStates[currentThread].totalSourceSize = workerBytes;
1333
1333
1334 if (threadCount > 1) {
1334 if (threadCount > 1) {
1335 POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]);
1335 POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]);
1336 }
1336 }
1337 else {
1337 else {
1338 decompress_worker(&workerStates[currentThread]);
1338 decompress_worker(&workerStates[currentThread]);
1339 }
1339 }
1340 currentThread++;
1340 currentThread++;
1341 workerStartOffset = i + 1;
1341 workerStartOffset = i + 1;
1342 workerBytes = 0;
1342 workerBytes = 0;
1343 }
1343 }
1344 }
1344 }
1345
1345
1346 if (workerBytes) {
1346 if (workerBytes) {
1347 workerStates[currentThread].startOffset = workerStartOffset;
1347 workerStates[currentThread].startOffset = workerStartOffset;
1348 workerStates[currentThread].endOffset = frames->framesSize - 1;
1348 workerStates[currentThread].endOffset = frames->framesSize - 1;
1349 workerStates[currentThread].totalSourceSize = workerBytes;
1349 workerStates[currentThread].totalSourceSize = workerBytes;
1350
1350
1351 if (threadCount > 1) {
1351 if (threadCount > 1) {
1352 POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]);
1352 POOL_add(pool, (POOL_function)decompress_worker, &workerStates[currentThread]);
1353 }
1353 }
1354 else {
1354 else {
1355 decompress_worker(&workerStates[currentThread]);
1355 decompress_worker(&workerStates[currentThread]);
1356 }
1356 }
1357 }
1357 }
1358
1358
1359 if (threadCount > 1) {
1359 if (threadCount > 1) {
1360 POOL_free(pool);
1360 POOL_free(pool);
1361 pool = NULL;
1361 pool = NULL;
1362 }
1362 }
1363 Py_END_ALLOW_THREADS
1363 Py_END_ALLOW_THREADS
1364
1364
1365 for (i = 0; i < threadCount; i++) {
1365 for (i = 0; i < threadCount; i++) {
1366 switch (workerStates[i].error) {
1366 switch (workerStates[i].error) {
1367 case WorkerError_none:
1367 case WorkerError_none:
1368 break;
1368 break;
1369
1369
1370 case WorkerError_zstd:
1370 case WorkerError_zstd:
1371 PyErr_Format(ZstdError, "error decompressing item %zd: %s",
1371 PyErr_Format(ZstdError, "error decompressing item %zd: %s",
1372 workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
1372 workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
1373 errored = 1;
1373 errored = 1;
1374 break;
1374 break;
1375
1375
1376 case WorkerError_memory:
1376 case WorkerError_memory:
1377 PyErr_NoMemory();
1377 PyErr_NoMemory();
1378 errored = 1;
1378 errored = 1;
1379 break;
1379 break;
1380
1380
1381 case WorkerError_sizeMismatch:
1381 case WorkerError_sizeMismatch:
1382 PyErr_Format(ZstdError, "error decompressing item %zd: decompressed %zu bytes; expected %zu",
1382 PyErr_Format(ZstdError, "error decompressing item %zd: decompressed %zu bytes; expected %zu",
1383 workerStates[i].errorOffset, workerStates[i].zresult,
1383 workerStates[i].errorOffset, workerStates[i].zresult,
1384 framePointers[workerStates[i].errorOffset].destSize);
1384 framePointers[workerStates[i].errorOffset].destSize);
1385 errored = 1;
1385 errored = 1;
1386 break;
1386 break;
1387
1387
1388 case WorkerError_unknownSize:
1388 case WorkerError_unknownSize:
1389 PyErr_Format(PyExc_ValueError, "could not determine decompressed size of item %zd",
1389 PyErr_Format(PyExc_ValueError, "could not determine decompressed size of item %zd",
1390 workerStates[i].errorOffset);
1390 workerStates[i].errorOffset);
1391 errored = 1;
1391 errored = 1;
1392 break;
1392 break;
1393
1393
1394 default:
1394 default:
1395 PyErr_Format(ZstdError, "unhandled error type: %d; this is a bug",
1395 PyErr_Format(ZstdError, "unhandled error type: %d; this is a bug",
1396 workerStates[i].error);
1396 workerStates[i].error);
1397 errored = 1;
1397 errored = 1;
1398 break;
1398 break;
1399 }
1399 }
1400
1400
1401 if (errored) {
1401 if (errored) {
1402 break;
1402 break;
1403 }
1403 }
1404 }
1404 }
1405
1405
1406 if (errored) {
1406 if (errored) {
1407 goto finally;
1407 goto finally;
1408 }
1408 }
1409
1409
1410 segmentsCount = 0;
1410 segmentsCount = 0;
1411 for (i = 0; i < threadCount; i++) {
1411 for (i = 0; i < threadCount; i++) {
1412 segmentsCount += workerStates[i].destCount;
1412 segmentsCount += workerStates[i].destCount;
1413 }
1413 }
1414
1414
1415 resultArg = PyTuple_New(segmentsCount);
1415 resultArg = PyTuple_New(segmentsCount);
1416 if (NULL == resultArg) {
1416 if (NULL == resultArg) {
1417 goto finally;
1417 goto finally;
1418 }
1418 }
1419
1419
1420 resultIndex = 0;
1420 resultIndex = 0;
1421
1421
1422 for (i = 0; i < threadCount; i++) {
1422 for (i = 0; i < threadCount; i++) {
1423 Py_ssize_t bufferIndex;
1423 Py_ssize_t bufferIndex;
1424 WorkerState* state = &workerStates[i];
1424 WorkerState* state = &workerStates[i];
1425
1425
1426 for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) {
1426 for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) {
1427 DestBuffer* destBuffer = &state->destBuffers[bufferIndex];
1427 DestBuffer* destBuffer = &state->destBuffers[bufferIndex];
1428
1428
1429 bws = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize,
1429 bws = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize,
1430 destBuffer->segments, destBuffer->segmentsSize);
1430 destBuffer->segments, destBuffer->segmentsSize);
1431 if (NULL == bws) {
1431 if (NULL == bws) {
1432 goto finally;
1432 goto finally;
1433 }
1433 }
1434
1434
1435 /*
1435 /*
1436 * Memory for buffer and segments was allocated using malloc() in worker
1436 * Memory for buffer and segments was allocated using malloc() in worker
1437 * and the memory is transferred to the BufferWithSegments instance. So
1437 * and the memory is transferred to the BufferWithSegments instance. So
1438 * tell instance to use free() and NULL the reference in the state struct
1438 * tell instance to use free() and NULL the reference in the state struct
1439 * so it isn't freed below.
1439 * so it isn't freed below.
1440 */
1440 */
1441 bws->useFree = 1;
1441 bws->useFree = 1;
1442 destBuffer->dest = NULL;
1442 destBuffer->dest = NULL;
1443 destBuffer->segments = NULL;
1443 destBuffer->segments = NULL;
1444
1444
1445 PyTuple_SET_ITEM(resultArg, resultIndex++, (PyObject*)bws);
1445 PyTuple_SET_ITEM(resultArg, resultIndex++, (PyObject*)bws);
1446 }
1446 }
1447 }
1447 }
1448
1448
1449 result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject(
1449 result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject(
1450 (PyObject*)&ZstdBufferWithSegmentsCollectionType, resultArg);
1450 (PyObject*)&ZstdBufferWithSegmentsCollectionType, resultArg);
1451
1451
1452 finally:
1452 finally:
1453 Py_CLEAR(resultArg);
1453 Py_CLEAR(resultArg);
1454
1454
1455 if (workerStates) {
1455 if (workerStates) {
1456 for (i = 0; i < threadCount; i++) {
1456 for (i = 0; i < threadCount; i++) {
1457 Py_ssize_t bufferIndex;
1457 Py_ssize_t bufferIndex;
1458 WorkerState* state = &workerStates[i];
1458 WorkerState* state = &workerStates[i];
1459
1459
1460 if (state->dctx) {
1460 if (state->dctx) {
1461 ZSTD_freeDCtx(state->dctx);
1461 ZSTD_freeDCtx(state->dctx);
1462 }
1462 }
1463
1463
1464 for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) {
1464 for (bufferIndex = 0; bufferIndex < state->destCount; bufferIndex++) {
1465 if (state->destBuffers) {
1465 if (state->destBuffers) {
1466 /*
1466 /*
1467 * Will be NULL if memory transfered to a BufferWithSegments.
1467 * Will be NULL if memory transfered to a BufferWithSegments.
1468 * Otherwise it is left over after an error occurred.
1468 * Otherwise it is left over after an error occurred.
1469 */
1469 */
1470 free(state->destBuffers[bufferIndex].dest);
1470 free(state->destBuffers[bufferIndex].dest);
1471 free(state->destBuffers[bufferIndex].segments);
1471 free(state->destBuffers[bufferIndex].segments);
1472 }
1472 }
1473 }
1473 }
1474
1474
1475 free(state->destBuffers);
1475 free(state->destBuffers);
1476 }
1476 }
1477
1477
1478 PyMem_Free(workerStates);
1478 PyMem_Free(workerStates);
1479 }
1479 }
1480
1480
1481 POOL_free(pool);
1481 POOL_free(pool);
1482
1482
1483 return result;
1483 return result;
1484 }
1484 }
1485
1485
1486 PyDoc_STRVAR(Decompressor_multi_decompress_to_buffer__doc__,
1486 PyDoc_STRVAR(Decompressor_multi_decompress_to_buffer__doc__,
1487 "Decompress multiple frames to output buffers\n"
1487 "Decompress multiple frames to output buffers\n"
1488 "\n"
1488 "\n"
1489 "Receives a ``BufferWithSegments``, a ``BufferWithSegmentsCollection`` or a\n"
1489 "Receives a ``BufferWithSegments``, a ``BufferWithSegmentsCollection`` or a\n"
1490 "list of bytes-like objects. Each item in the passed collection should be a\n"
1490 "list of bytes-like objects. Each item in the passed collection should be a\n"
1491 "compressed zstd frame.\n"
1491 "compressed zstd frame.\n"
1492 "\n"
1492 "\n"
1493 "Unless ``decompressed_sizes`` is specified, the content size *must* be\n"
1493 "Unless ``decompressed_sizes`` is specified, the content size *must* be\n"
1494 "written into the zstd frame header. If ``decompressed_sizes`` is specified,\n"
1494 "written into the zstd frame header. If ``decompressed_sizes`` is specified,\n"
1495 "it is an object conforming to the buffer protocol that represents an array\n"
1495 "it is an object conforming to the buffer protocol that represents an array\n"
1496 "of 64-bit unsigned integers in the machine's native format. Specifying\n"
1496 "of 64-bit unsigned integers in the machine's native format. Specifying\n"
1497 "``decompressed_sizes`` avoids a pre-scan of each frame to determine its\n"
1497 "``decompressed_sizes`` avoids a pre-scan of each frame to determine its\n"
1498 "output size.\n"
1498 "output size.\n"
1499 "\n"
1499 "\n"
1500 "Returns a ``BufferWithSegmentsCollection`` containing the decompressed\n"
1500 "Returns a ``BufferWithSegmentsCollection`` containing the decompressed\n"
1501 "data. All decompressed data is allocated in a single memory buffer. The\n"
1501 "data. All decompressed data is allocated in a single memory buffer. The\n"
1502 "``BufferWithSegments`` instance tracks which objects are at which offsets\n"
1502 "``BufferWithSegments`` instance tracks which objects are at which offsets\n"
1503 "and their respective lengths.\n"
1503 "and their respective lengths.\n"
1504 "\n"
1504 "\n"
1505 "The ``threads`` argument controls how many threads to use for operations.\n"
1505 "The ``threads`` argument controls how many threads to use for operations.\n"
1506 "Negative values will use the same number of threads as logical CPUs on the\n"
1506 "Negative values will use the same number of threads as logical CPUs on the\n"
1507 "machine.\n"
1507 "machine.\n"
1508 );
1508 );
1509
1509
1510 static ZstdBufferWithSegmentsCollection* Decompressor_multi_decompress_to_buffer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
1510 static ZstdBufferWithSegmentsCollection* Decompressor_multi_decompress_to_buffer(ZstdDecompressor* self, PyObject* args, PyObject* kwargs) {
1511 static char* kwlist[] = {
1511 static char* kwlist[] = {
1512 "frames",
1512 "frames",
1513 "decompressed_sizes",
1513 "decompressed_sizes",
1514 "threads",
1514 "threads",
1515 NULL
1515 NULL
1516 };
1516 };
1517
1517
1518 PyObject* frames;
1518 PyObject* frames;
1519 Py_buffer frameSizes;
1519 Py_buffer frameSizes;
1520 int threads = 0;
1520 int threads = 0;
1521 Py_ssize_t frameCount;
1521 Py_ssize_t frameCount;
1522 Py_buffer* frameBuffers = NULL;
1522 Py_buffer* frameBuffers = NULL;
1523 FramePointer* framePointers = NULL;
1523 FramePointer* framePointers = NULL;
1524 unsigned long long* frameSizesP = NULL;
1524 unsigned long long* frameSizesP = NULL;
1525 unsigned long long totalInputSize = 0;
1525 unsigned long long totalInputSize = 0;
1526 FrameSources frameSources;
1526 FrameSources frameSources;
1527 ZstdBufferWithSegmentsCollection* result = NULL;
1527 ZstdBufferWithSegmentsCollection* result = NULL;
1528 Py_ssize_t i;
1528 Py_ssize_t i;
1529
1529
1530 memset(&frameSizes, 0, sizeof(frameSizes));
1530 memset(&frameSizes, 0, sizeof(frameSizes));
1531
1531
1532 #if PY_MAJOR_VERSION >= 3
1532 #if PY_MAJOR_VERSION >= 3
1533 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|y*i:multi_decompress_to_buffer",
1533 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|y*i:multi_decompress_to_buffer",
1534 #else
1534 #else
1535 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s*i:multi_decompress_to_buffer",
1535 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|s*i:multi_decompress_to_buffer",
1536 #endif
1536 #endif
1537 kwlist, &frames, &frameSizes, &threads)) {
1537 kwlist, &frames, &frameSizes, &threads)) {
1538 return NULL;
1538 return NULL;
1539 }
1539 }
1540
1540
1541 if (frameSizes.buf) {
1541 if (frameSizes.buf) {
1542 if (!PyBuffer_IsContiguous(&frameSizes, 'C') || frameSizes.ndim > 1) {
1542 if (!PyBuffer_IsContiguous(&frameSizes, 'C') || frameSizes.ndim > 1) {
1543 PyErr_SetString(PyExc_ValueError, "decompressed_sizes buffer should be contiguous and have a single dimension");
1543 PyErr_SetString(PyExc_ValueError, "decompressed_sizes buffer should be contiguous and have a single dimension");
1544 goto finally;
1544 goto finally;
1545 }
1545 }
1546
1546
1547 frameSizesP = (unsigned long long*)frameSizes.buf;
1547 frameSizesP = (unsigned long long*)frameSizes.buf;
1548 }
1548 }
1549
1549
1550 if (threads < 0) {
1550 if (threads < 0) {
1551 threads = cpu_count();
1551 threads = cpu_count();
1552 }
1552 }
1553
1553
1554 if (threads < 2) {
1554 if (threads < 2) {
1555 threads = 1;
1555 threads = 1;
1556 }
1556 }
1557
1557
1558 if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsType)) {
1558 if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsType)) {
1559 ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)frames;
1559 ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)frames;
1560 frameCount = buffer->segmentCount;
1560 frameCount = buffer->segmentCount;
1561
1561
1562 if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) {
1562 if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) {
1563 PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd",
1563 PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd",
1564 frameCount * sizeof(unsigned long long), frameSizes.len);
1564 frameCount * sizeof(unsigned long long), frameSizes.len);
1565 goto finally;
1565 goto finally;
1566 }
1566 }
1567
1567
1568 framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
1568 framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
1569 if (!framePointers) {
1569 if (!framePointers) {
1570 PyErr_NoMemory();
1570 PyErr_NoMemory();
1571 goto finally;
1571 goto finally;
1572 }
1572 }
1573
1573
1574 for (i = 0; i < frameCount; i++) {
1574 for (i = 0; i < frameCount; i++) {
1575 void* sourceData;
1575 void* sourceData;
1576 unsigned long long sourceSize;
1576 unsigned long long sourceSize;
1577 unsigned long long decompressedSize = 0;
1577 unsigned long long decompressedSize = 0;
1578
1578
1579 if (buffer->segments[i].offset + buffer->segments[i].length > buffer->dataSize) {
1579 if (buffer->segments[i].offset + buffer->segments[i].length > buffer->dataSize) {
1580 PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area", i);
1580 PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area", i);
1581 goto finally;
1581 goto finally;
1582 }
1582 }
1583
1583
1584 sourceData = (char*)buffer->data + buffer->segments[i].offset;
1584 sourceData = (char*)buffer->data + buffer->segments[i].offset;
1585 sourceSize = buffer->segments[i].length;
1585 sourceSize = buffer->segments[i].length;
1586 totalInputSize += sourceSize;
1586 totalInputSize += sourceSize;
1587
1587
1588 if (frameSizesP) {
1588 if (frameSizesP) {
1589 decompressedSize = frameSizesP[i];
1589 decompressedSize = frameSizesP[i];
1590 }
1590 }
1591
1591
1592 if (sourceSize > SIZE_MAX) {
1592 if (sourceSize > SIZE_MAX) {
1593 PyErr_Format(PyExc_ValueError,
1593 PyErr_Format(PyExc_ValueError,
1594 "item %zd is too large for this platform", i);
1594 "item %zd is too large for this platform", i);
1595 goto finally;
1595 goto finally;
1596 }
1596 }
1597
1597
1598 if (decompressedSize > SIZE_MAX) {
1598 if (decompressedSize > SIZE_MAX) {
1599 PyErr_Format(PyExc_ValueError,
1599 PyErr_Format(PyExc_ValueError,
1600 "decompressed size of item %zd is too large for this platform", i);
1600 "decompressed size of item %zd is too large for this platform", i);
1601 goto finally;
1601 goto finally;
1602 }
1602 }
1603
1603
1604 framePointers[i].sourceData = sourceData;
1604 framePointers[i].sourceData = sourceData;
1605 framePointers[i].sourceSize = (size_t)sourceSize;
1605 framePointers[i].sourceSize = (size_t)sourceSize;
1606 framePointers[i].destSize = (size_t)decompressedSize;
1606 framePointers[i].destSize = (size_t)decompressedSize;
1607 }
1607 }
1608 }
1608 }
1609 else if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsCollectionType)) {
1609 else if (PyObject_TypeCheck(frames, &ZstdBufferWithSegmentsCollectionType)) {
1610 Py_ssize_t offset = 0;
1610 Py_ssize_t offset = 0;
1611 ZstdBufferWithSegments* buffer;
1611 ZstdBufferWithSegments* buffer;
1612 ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)frames;
1612 ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)frames;
1613
1613
1614 frameCount = BufferWithSegmentsCollection_length(collection);
1614 frameCount = BufferWithSegmentsCollection_length(collection);
1615
1615
1616 if (frameSizes.buf && frameSizes.len != frameCount) {
1616 if (frameSizes.buf && frameSizes.len != frameCount) {
1617 PyErr_Format(PyExc_ValueError,
1617 PyErr_Format(PyExc_ValueError,
1618 "decompressed_sizes size mismatch; expected %zd; got %zd",
1618 "decompressed_sizes size mismatch; expected %zd; got %zd",
1619 frameCount * sizeof(unsigned long long), frameSizes.len);
1619 frameCount * sizeof(unsigned long long), frameSizes.len);
1620 goto finally;
1620 goto finally;
1621 }
1621 }
1622
1622
1623 framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
1623 framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
1624 if (NULL == framePointers) {
1624 if (NULL == framePointers) {
1625 PyErr_NoMemory();
1625 PyErr_NoMemory();
1626 goto finally;
1626 goto finally;
1627 }
1627 }
1628
1628
1629 /* Iterate the data structure directly because it is faster. */
1629 /* Iterate the data structure directly because it is faster. */
1630 for (i = 0; i < collection->bufferCount; i++) {
1630 for (i = 0; i < collection->bufferCount; i++) {
1631 Py_ssize_t segmentIndex;
1631 Py_ssize_t segmentIndex;
1632 buffer = collection->buffers[i];
1632 buffer = collection->buffers[i];
1633
1633
1634 for (segmentIndex = 0; segmentIndex < buffer->segmentCount; segmentIndex++) {
1634 for (segmentIndex = 0; segmentIndex < buffer->segmentCount; segmentIndex++) {
1635 unsigned long long decompressedSize = frameSizesP ? frameSizesP[offset] : 0;
1635 unsigned long long decompressedSize = frameSizesP ? frameSizesP[offset] : 0;
1636
1636
1637 if (buffer->segments[segmentIndex].offset + buffer->segments[segmentIndex].length > buffer->dataSize) {
1637 if (buffer->segments[segmentIndex].offset + buffer->segments[segmentIndex].length > buffer->dataSize) {
1638 PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area",
1638 PyErr_Format(PyExc_ValueError, "item %zd has offset outside memory area",
1639 offset);
1639 offset);
1640 goto finally;
1640 goto finally;
1641 }
1641 }
1642
1642
1643 if (buffer->segments[segmentIndex].length > SIZE_MAX) {
1643 if (buffer->segments[segmentIndex].length > SIZE_MAX) {
1644 PyErr_Format(PyExc_ValueError,
1644 PyErr_Format(PyExc_ValueError,
1645 "item %zd in buffer %zd is too large for this platform",
1645 "item %zd in buffer %zd is too large for this platform",
1646 segmentIndex, i);
1646 segmentIndex, i);
1647 goto finally;
1647 goto finally;
1648 }
1648 }
1649
1649
1650 if (decompressedSize > SIZE_MAX) {
1650 if (decompressedSize > SIZE_MAX) {
1651 PyErr_Format(PyExc_ValueError,
1651 PyErr_Format(PyExc_ValueError,
1652 "decompressed size of item %zd in buffer %zd is too large for this platform",
1652 "decompressed size of item %zd in buffer %zd is too large for this platform",
1653 segmentIndex, i);
1653 segmentIndex, i);
1654 goto finally;
1654 goto finally;
1655 }
1655 }
1656
1656
1657 totalInputSize += buffer->segments[segmentIndex].length;
1657 totalInputSize += buffer->segments[segmentIndex].length;
1658
1658
1659 framePointers[offset].sourceData = (char*)buffer->data + buffer->segments[segmentIndex].offset;
1659 framePointers[offset].sourceData = (char*)buffer->data + buffer->segments[segmentIndex].offset;
1660 framePointers[offset].sourceSize = (size_t)buffer->segments[segmentIndex].length;
1660 framePointers[offset].sourceSize = (size_t)buffer->segments[segmentIndex].length;
1661 framePointers[offset].destSize = (size_t)decompressedSize;
1661 framePointers[offset].destSize = (size_t)decompressedSize;
1662
1662
1663 offset++;
1663 offset++;
1664 }
1664 }
1665 }
1665 }
1666 }
1666 }
1667 else if (PyList_Check(frames)) {
1667 else if (PyList_Check(frames)) {
1668 frameCount = PyList_GET_SIZE(frames);
1668 frameCount = PyList_GET_SIZE(frames);
1669
1669
1670 if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) {
1670 if (frameSizes.buf && frameSizes.len != frameCount * (Py_ssize_t)sizeof(unsigned long long)) {
1671 PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd",
1671 PyErr_Format(PyExc_ValueError, "decompressed_sizes size mismatch; expected %zd, got %zd",
1672 frameCount * sizeof(unsigned long long), frameSizes.len);
1672 frameCount * sizeof(unsigned long long), frameSizes.len);
1673 goto finally;
1673 goto finally;
1674 }
1674 }
1675
1675
1676 framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
1676 framePointers = PyMem_Malloc(frameCount * sizeof(FramePointer));
1677 if (!framePointers) {
1677 if (!framePointers) {
1678 PyErr_NoMemory();
1678 PyErr_NoMemory();
1679 goto finally;
1679 goto finally;
1680 }
1680 }
1681
1681
1682 frameBuffers = PyMem_Malloc(frameCount * sizeof(Py_buffer));
1682 frameBuffers = PyMem_Malloc(frameCount * sizeof(Py_buffer));
1683 if (NULL == frameBuffers) {
1683 if (NULL == frameBuffers) {
1684 PyErr_NoMemory();
1684 PyErr_NoMemory();
1685 goto finally;
1685 goto finally;
1686 }
1686 }
1687
1687
1688 memset(frameBuffers, 0, frameCount * sizeof(Py_buffer));
1688 memset(frameBuffers, 0, frameCount * sizeof(Py_buffer));
1689
1689
1690 /* Do a pass to assemble info about our input buffers and output sizes. */
1690 /* Do a pass to assemble info about our input buffers and output sizes. */
1691 for (i = 0; i < frameCount; i++) {
1691 for (i = 0; i < frameCount; i++) {
1692 unsigned long long decompressedSize = frameSizesP ? frameSizesP[i] : 0;
1692 unsigned long long decompressedSize = frameSizesP ? frameSizesP[i] : 0;
1693
1693
1694 if (0 != PyObject_GetBuffer(PyList_GET_ITEM(frames, i),
1694 if (0 != PyObject_GetBuffer(PyList_GET_ITEM(frames, i),
1695 &frameBuffers[i], PyBUF_CONTIG_RO)) {
1695 &frameBuffers[i], PyBUF_CONTIG_RO)) {
1696 PyErr_Clear();
1696 PyErr_Clear();
1697 PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i);
1697 PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i);
1698 goto finally;
1698 goto finally;
1699 }
1699 }
1700
1700
1701 if (decompressedSize > SIZE_MAX) {
1701 if (decompressedSize > SIZE_MAX) {
1702 PyErr_Format(PyExc_ValueError,
1702 PyErr_Format(PyExc_ValueError,
1703 "decompressed size of item %zd is too large for this platform", i);
1703 "decompressed size of item %zd is too large for this platform", i);
1704 goto finally;
1704 goto finally;
1705 }
1705 }
1706
1706
1707 totalInputSize += frameBuffers[i].len;
1707 totalInputSize += frameBuffers[i].len;
1708
1708
1709 framePointers[i].sourceData = frameBuffers[i].buf;
1709 framePointers[i].sourceData = frameBuffers[i].buf;
1710 framePointers[i].sourceSize = frameBuffers[i].len;
1710 framePointers[i].sourceSize = frameBuffers[i].len;
1711 framePointers[i].destSize = (size_t)decompressedSize;
1711 framePointers[i].destSize = (size_t)decompressedSize;
1712 }
1712 }
1713 }
1713 }
1714 else {
1714 else {
1715 PyErr_SetString(PyExc_TypeError, "argument must be list or BufferWithSegments");
1715 PyErr_SetString(PyExc_TypeError, "argument must be list or BufferWithSegments");
1716 goto finally;
1716 goto finally;
1717 }
1717 }
1718
1718
1719 /* We now have an array with info about our inputs and outputs. Feed it into
1719 /* We now have an array with info about our inputs and outputs. Feed it into
1720 our generic decompression function. */
1720 our generic decompression function. */
1721 frameSources.frames = framePointers;
1721 frameSources.frames = framePointers;
1722 frameSources.framesSize = frameCount;
1722 frameSources.framesSize = frameCount;
1723 frameSources.compressedSize = totalInputSize;
1723 frameSources.compressedSize = totalInputSize;
1724
1724
1725 result = decompress_from_framesources(self, &frameSources, threads);
1725 result = decompress_from_framesources(self, &frameSources, threads);
1726
1726
1727 finally:
1727 finally:
1728 if (frameSizes.buf) {
1728 if (frameSizes.buf) {
1729 PyBuffer_Release(&frameSizes);
1729 PyBuffer_Release(&frameSizes);
1730 }
1730 }
1731 PyMem_Free(framePointers);
1731 PyMem_Free(framePointers);
1732
1732
1733 if (frameBuffers) {
1733 if (frameBuffers) {
1734 for (i = 0; i < frameCount; i++) {
1734 for (i = 0; i < frameCount; i++) {
1735 PyBuffer_Release(&frameBuffers[i]);
1735 PyBuffer_Release(&frameBuffers[i]);
1736 }
1736 }
1737
1737
1738 PyMem_Free(frameBuffers);
1738 PyMem_Free(frameBuffers);
1739 }
1739 }
1740
1740
1741 return result;
1741 return result;
1742 }
1742 }
1743
1743
1744 static PyMethodDef Decompressor_methods[] = {
1744 static PyMethodDef Decompressor_methods[] = {
1745 { "copy_stream", (PyCFunction)Decompressor_copy_stream, METH_VARARGS | METH_KEYWORDS,
1745 { "copy_stream", (PyCFunction)Decompressor_copy_stream, METH_VARARGS | METH_KEYWORDS,
1746 Decompressor_copy_stream__doc__ },
1746 Decompressor_copy_stream__doc__ },
1747 { "decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS | METH_KEYWORDS,
1747 { "decompress", (PyCFunction)Decompressor_decompress, METH_VARARGS | METH_KEYWORDS,
1748 Decompressor_decompress__doc__ },
1748 Decompressor_decompress__doc__ },
1749 { "decompressobj", (PyCFunction)Decompressor_decompressobj, METH_VARARGS | METH_KEYWORDS,
1749 { "decompressobj", (PyCFunction)Decompressor_decompressobj, METH_VARARGS | METH_KEYWORDS,
1750 Decompressor_decompressobj__doc__ },
1750 Decompressor_decompressobj__doc__ },
1751 { "read_to_iter", (PyCFunction)Decompressor_read_to_iter, METH_VARARGS | METH_KEYWORDS,
1751 { "read_to_iter", (PyCFunction)Decompressor_read_to_iter, METH_VARARGS | METH_KEYWORDS,
1752 Decompressor_read_to_iter__doc__ },
1752 Decompressor_read_to_iter__doc__ },
1753 /* TODO Remove deprecated API */
1753 /* TODO Remove deprecated API */
1754 { "read_from", (PyCFunction)Decompressor_read_to_iter, METH_VARARGS | METH_KEYWORDS,
1754 { "read_from", (PyCFunction)Decompressor_read_to_iter, METH_VARARGS | METH_KEYWORDS,
1755 Decompressor_read_to_iter__doc__ },
1755 Decompressor_read_to_iter__doc__ },
1756 { "stream_reader", (PyCFunction)Decompressor_stream_reader,
1756 { "stream_reader", (PyCFunction)Decompressor_stream_reader,
1757 METH_VARARGS | METH_KEYWORDS, Decompressor_stream_reader__doc__ },
1757 METH_VARARGS | METH_KEYWORDS, Decompressor_stream_reader__doc__ },
1758 { "stream_writer", (PyCFunction)Decompressor_stream_writer, METH_VARARGS | METH_KEYWORDS,
1758 { "stream_writer", (PyCFunction)Decompressor_stream_writer, METH_VARARGS | METH_KEYWORDS,
1759 Decompressor_stream_writer__doc__ },
1759 Decompressor_stream_writer__doc__ },
1760 /* TODO remove deprecated API */
1760 /* TODO remove deprecated API */
1761 { "write_to", (PyCFunction)Decompressor_stream_writer, METH_VARARGS | METH_KEYWORDS,
1761 { "write_to", (PyCFunction)Decompressor_stream_writer, METH_VARARGS | METH_KEYWORDS,
1762 Decompressor_stream_writer__doc__ },
1762 Decompressor_stream_writer__doc__ },
1763 { "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain,
1763 { "decompress_content_dict_chain", (PyCFunction)Decompressor_decompress_content_dict_chain,
1764 METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ },
1764 METH_VARARGS | METH_KEYWORDS, Decompressor_decompress_content_dict_chain__doc__ },
1765 { "multi_decompress_to_buffer", (PyCFunction)Decompressor_multi_decompress_to_buffer,
1765 { "multi_decompress_to_buffer", (PyCFunction)Decompressor_multi_decompress_to_buffer,
1766 METH_VARARGS | METH_KEYWORDS, Decompressor_multi_decompress_to_buffer__doc__ },
1766 METH_VARARGS | METH_KEYWORDS, Decompressor_multi_decompress_to_buffer__doc__ },
1767 { "memory_size", (PyCFunction)Decompressor_memory_size, METH_NOARGS,
1767 { "memory_size", (PyCFunction)Decompressor_memory_size, METH_NOARGS,
1768 Decompressor_memory_size__doc__ },
1768 Decompressor_memory_size__doc__ },
1769 { NULL, NULL }
1769 { NULL, NULL }
1770 };
1770 };
1771
1771
1772 PyTypeObject ZstdDecompressorType = {
1772 PyTypeObject ZstdDecompressorType = {
1773 PyVarObject_HEAD_INIT(NULL, 0)
1773 PyVarObject_HEAD_INIT(NULL, 0)
1774 "zstd.ZstdDecompressor", /* tp_name */
1774 "zstd.ZstdDecompressor", /* tp_name */
1775 sizeof(ZstdDecompressor), /* tp_basicsize */
1775 sizeof(ZstdDecompressor), /* tp_basicsize */
1776 0, /* tp_itemsize */
1776 0, /* tp_itemsize */
1777 (destructor)Decompressor_dealloc, /* tp_dealloc */
1777 (destructor)Decompressor_dealloc, /* tp_dealloc */
1778 0, /* tp_print */
1778 0, /* tp_print */
1779 0, /* tp_getattr */
1779 0, /* tp_getattr */
1780 0, /* tp_setattr */
1780 0, /* tp_setattr */
1781 0, /* tp_compare */
1781 0, /* tp_compare */
1782 0, /* tp_repr */
1782 0, /* tp_repr */
1783 0, /* tp_as_number */
1783 0, /* tp_as_number */
1784 0, /* tp_as_sequence */
1784 0, /* tp_as_sequence */
1785 0, /* tp_as_mapping */
1785 0, /* tp_as_mapping */
1786 0, /* tp_hash */
1786 0, /* tp_hash */
1787 0, /* tp_call */
1787 0, /* tp_call */
1788 0, /* tp_str */
1788 0, /* tp_str */
1789 0, /* tp_getattro */
1789 0, /* tp_getattro */
1790 0, /* tp_setattro */
1790 0, /* tp_setattro */
1791 0, /* tp_as_buffer */
1791 0, /* tp_as_buffer */
1792 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
1792 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
1793 Decompressor__doc__, /* tp_doc */
1793 Decompressor__doc__, /* tp_doc */
1794 0, /* tp_traverse */
1794 0, /* tp_traverse */
1795 0, /* tp_clear */
1795 0, /* tp_clear */
1796 0, /* tp_richcompare */
1796 0, /* tp_richcompare */
1797 0, /* tp_weaklistoffset */
1797 0, /* tp_weaklistoffset */
1798 0, /* tp_iter */
1798 0, /* tp_iter */
1799 0, /* tp_iternext */
1799 0, /* tp_iternext */
1800 Decompressor_methods, /* tp_methods */
1800 Decompressor_methods, /* tp_methods */
1801 0, /* tp_members */
1801 0, /* tp_members */
1802 0, /* tp_getset */
1802 0, /* tp_getset */
1803 0, /* tp_base */
1803 0, /* tp_base */
1804 0, /* tp_dict */
1804 0, /* tp_dict */
1805 0, /* tp_descr_get */
1805 0, /* tp_descr_get */
1806 0, /* tp_descr_set */
1806 0, /* tp_descr_set */
1807 0, /* tp_dictoffset */
1807 0, /* tp_dictoffset */
1808 (initproc)Decompressor_init, /* tp_init */
1808 (initproc)Decompressor_init, /* tp_init */
1809 0, /* tp_alloc */
1809 0, /* tp_alloc */
1810 PyType_GenericNew, /* tp_new */
1810 PyType_GenericNew, /* tp_new */
1811 };
1811 };
1812
1812
1813 void decompressor_module_init(PyObject* mod) {
1813 void decompressor_module_init(PyObject* mod) {
1814 Py_TYPE(&ZstdDecompressorType) = &PyType_Type;
1814 Py_SET_TYPE(&ZstdDecompressorType, &PyType_Type);
1815 if (PyType_Ready(&ZstdDecompressorType) < 0) {
1815 if (PyType_Ready(&ZstdDecompressorType) < 0) {
1816 return;
1816 return;
1817 }
1817 }
1818
1818
1819 Py_INCREF((PyObject*)&ZstdDecompressorType);
1819 Py_INCREF((PyObject*)&ZstdDecompressorType);
1820 PyModule_AddObject(mod, "ZstdDecompressor",
1820 PyModule_AddObject(mod, "ZstdDecompressor",
1821 (PyObject*)&ZstdDecompressorType);
1821 (PyObject*)&ZstdDecompressorType);
1822 }
1822 }
@@ -1,249 +1,249 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 #define min(a, b) (((a) < (b)) ? (a) : (b))
11 #define min(a, b) (((a) < (b)) ? (a) : (b))
12
12
13 extern PyObject* ZstdError;
13 extern PyObject* ZstdError;
14
14
15 PyDoc_STRVAR(ZstdDecompressorIterator__doc__,
15 PyDoc_STRVAR(ZstdDecompressorIterator__doc__,
16 "Represents an iterator of decompressed data.\n"
16 "Represents an iterator of decompressed data.\n"
17 );
17 );
18
18
19 static void ZstdDecompressorIterator_dealloc(ZstdDecompressorIterator* self) {
19 static void ZstdDecompressorIterator_dealloc(ZstdDecompressorIterator* self) {
20 Py_XDECREF(self->decompressor);
20 Py_XDECREF(self->decompressor);
21 Py_XDECREF(self->reader);
21 Py_XDECREF(self->reader);
22
22
23 if (self->buffer.buf) {
23 if (self->buffer.buf) {
24 PyBuffer_Release(&self->buffer);
24 PyBuffer_Release(&self->buffer);
25 memset(&self->buffer, 0, sizeof(self->buffer));
25 memset(&self->buffer, 0, sizeof(self->buffer));
26 }
26 }
27
27
28 if (self->input.src) {
28 if (self->input.src) {
29 PyMem_Free((void*)self->input.src);
29 PyMem_Free((void*)self->input.src);
30 self->input.src = NULL;
30 self->input.src = NULL;
31 }
31 }
32
32
33 PyObject_Del(self);
33 PyObject_Del(self);
34 }
34 }
35
35
36 static PyObject* ZstdDecompressorIterator_iter(PyObject* self) {
36 static PyObject* ZstdDecompressorIterator_iter(PyObject* self) {
37 Py_INCREF(self);
37 Py_INCREF(self);
38 return self;
38 return self;
39 }
39 }
40
40
41 static DecompressorIteratorResult read_decompressor_iterator(ZstdDecompressorIterator* self) {
41 static DecompressorIteratorResult read_decompressor_iterator(ZstdDecompressorIterator* self) {
42 size_t zresult;
42 size_t zresult;
43 PyObject* chunk;
43 PyObject* chunk;
44 DecompressorIteratorResult result;
44 DecompressorIteratorResult result;
45 size_t oldInputPos = self->input.pos;
45 size_t oldInputPos = self->input.pos;
46
46
47 result.chunk = NULL;
47 result.chunk = NULL;
48
48
49 chunk = PyBytes_FromStringAndSize(NULL, self->outSize);
49 chunk = PyBytes_FromStringAndSize(NULL, self->outSize);
50 if (!chunk) {
50 if (!chunk) {
51 result.errored = 1;
51 result.errored = 1;
52 return result;
52 return result;
53 }
53 }
54
54
55 self->output.dst = PyBytes_AsString(chunk);
55 self->output.dst = PyBytes_AsString(chunk);
56 self->output.size = self->outSize;
56 self->output.size = self->outSize;
57 self->output.pos = 0;
57 self->output.pos = 0;
58
58
59 Py_BEGIN_ALLOW_THREADS
59 Py_BEGIN_ALLOW_THREADS
60 zresult = ZSTD_decompressStream(self->decompressor->dctx, &self->output, &self->input);
60 zresult = ZSTD_decompressStream(self->decompressor->dctx, &self->output, &self->input);
61 Py_END_ALLOW_THREADS
61 Py_END_ALLOW_THREADS
62
62
63 /* We're done with the pointer. Nullify to prevent anyone from getting a
63 /* We're done with the pointer. Nullify to prevent anyone from getting a
64 handle on a Python object. */
64 handle on a Python object. */
65 self->output.dst = NULL;
65 self->output.dst = NULL;
66
66
67 if (ZSTD_isError(zresult)) {
67 if (ZSTD_isError(zresult)) {
68 Py_DECREF(chunk);
68 Py_DECREF(chunk);
69 PyErr_Format(ZstdError, "zstd decompress error: %s",
69 PyErr_Format(ZstdError, "zstd decompress error: %s",
70 ZSTD_getErrorName(zresult));
70 ZSTD_getErrorName(zresult));
71 result.errored = 1;
71 result.errored = 1;
72 return result;
72 return result;
73 }
73 }
74
74
75 self->readCount += self->input.pos - oldInputPos;
75 self->readCount += self->input.pos - oldInputPos;
76
76
77 /* Frame is fully decoded. Input exhausted and output sitting in buffer. */
77 /* Frame is fully decoded. Input exhausted and output sitting in buffer. */
78 if (0 == zresult) {
78 if (0 == zresult) {
79 self->finishedInput = 1;
79 self->finishedInput = 1;
80 self->finishedOutput = 1;
80 self->finishedOutput = 1;
81 }
81 }
82
82
83 /* If it produced output data, return it. */
83 /* If it produced output data, return it. */
84 if (self->output.pos) {
84 if (self->output.pos) {
85 if (self->output.pos < self->outSize) {
85 if (self->output.pos < self->outSize) {
86 if (safe_pybytes_resize(&chunk, self->output.pos)) {
86 if (safe_pybytes_resize(&chunk, self->output.pos)) {
87 Py_XDECREF(chunk);
87 Py_XDECREF(chunk);
88 result.errored = 1;
88 result.errored = 1;
89 return result;
89 return result;
90 }
90 }
91 }
91 }
92 }
92 }
93 else {
93 else {
94 Py_DECREF(chunk);
94 Py_DECREF(chunk);
95 chunk = NULL;
95 chunk = NULL;
96 }
96 }
97
97
98 result.errored = 0;
98 result.errored = 0;
99 result.chunk = chunk;
99 result.chunk = chunk;
100
100
101 return result;
101 return result;
102 }
102 }
103
103
104 static PyObject* ZstdDecompressorIterator_iternext(ZstdDecompressorIterator* self) {
104 static PyObject* ZstdDecompressorIterator_iternext(ZstdDecompressorIterator* self) {
105 PyObject* readResult = NULL;
105 PyObject* readResult = NULL;
106 char* readBuffer;
106 char* readBuffer;
107 Py_ssize_t readSize;
107 Py_ssize_t readSize;
108 Py_ssize_t bufferRemaining;
108 Py_ssize_t bufferRemaining;
109 DecompressorIteratorResult result;
109 DecompressorIteratorResult result;
110
110
111 if (self->finishedOutput) {
111 if (self->finishedOutput) {
112 PyErr_SetString(PyExc_StopIteration, "output flushed");
112 PyErr_SetString(PyExc_StopIteration, "output flushed");
113 return NULL;
113 return NULL;
114 }
114 }
115
115
116 /* If we have data left in the input, consume it. */
116 /* If we have data left in the input, consume it. */
117 if (self->input.pos < self->input.size) {
117 if (self->input.pos < self->input.size) {
118 result = read_decompressor_iterator(self);
118 result = read_decompressor_iterator(self);
119 if (result.chunk || result.errored) {
119 if (result.chunk || result.errored) {
120 return result.chunk;
120 return result.chunk;
121 }
121 }
122
122
123 /* Else fall through to get more data from input. */
123 /* Else fall through to get more data from input. */
124 }
124 }
125
125
126 read_from_source:
126 read_from_source:
127
127
128 if (!self->finishedInput) {
128 if (!self->finishedInput) {
129 if (self->reader) {
129 if (self->reader) {
130 readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize);
130 readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize);
131 if (!readResult) {
131 if (!readResult) {
132 return NULL;
132 return NULL;
133 }
133 }
134
134
135 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
135 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
136 }
136 }
137 else {
137 else {
138 assert(self->buffer.buf);
138 assert(self->buffer.buf);
139
139
140 /* Only support contiguous C arrays for now */
140 /* Only support contiguous C arrays for now */
141 assert(self->buffer.strides == NULL && self->buffer.suboffsets == NULL);
141 assert(self->buffer.strides == NULL && self->buffer.suboffsets == NULL);
142 assert(self->buffer.itemsize == 1);
142 assert(self->buffer.itemsize == 1);
143
143
144 /* TODO avoid memcpy() below */
144 /* TODO avoid memcpy() below */
145 readBuffer = (char *)self->buffer.buf + self->bufferOffset;
145 readBuffer = (char *)self->buffer.buf + self->bufferOffset;
146 bufferRemaining = self->buffer.len - self->bufferOffset;
146 bufferRemaining = self->buffer.len - self->bufferOffset;
147 readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
147 readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
148 self->bufferOffset += readSize;
148 self->bufferOffset += readSize;
149 }
149 }
150
150
151 if (readSize) {
151 if (readSize) {
152 if (!self->readCount && self->skipBytes) {
152 if (!self->readCount && self->skipBytes) {
153 assert(self->skipBytes < self->inSize);
153 assert(self->skipBytes < self->inSize);
154 if ((Py_ssize_t)self->skipBytes >= readSize) {
154 if ((Py_ssize_t)self->skipBytes >= readSize) {
155 PyErr_SetString(PyExc_ValueError,
155 PyErr_SetString(PyExc_ValueError,
156 "skip_bytes larger than first input chunk; "
156 "skip_bytes larger than first input chunk; "
157 "this scenario is currently unsupported");
157 "this scenario is currently unsupported");
158 Py_XDECREF(readResult);
158 Py_XDECREF(readResult);
159 return NULL;
159 return NULL;
160 }
160 }
161
161
162 readBuffer = readBuffer + self->skipBytes;
162 readBuffer = readBuffer + self->skipBytes;
163 readSize -= self->skipBytes;
163 readSize -= self->skipBytes;
164 }
164 }
165
165
166 /* Copy input into previously allocated buffer because it can live longer
166 /* Copy input into previously allocated buffer because it can live longer
167 than a single function call and we don't want to keep a ref to a Python
167 than a single function call and we don't want to keep a ref to a Python
168 object around. This could be changed... */
168 object around. This could be changed... */
169 memcpy((void*)self->input.src, readBuffer, readSize);
169 memcpy((void*)self->input.src, readBuffer, readSize);
170 self->input.size = readSize;
170 self->input.size = readSize;
171 self->input.pos = 0;
171 self->input.pos = 0;
172 }
172 }
173 /* No bytes on first read must mean an empty input stream. */
173 /* No bytes on first read must mean an empty input stream. */
174 else if (!self->readCount) {
174 else if (!self->readCount) {
175 self->finishedInput = 1;
175 self->finishedInput = 1;
176 self->finishedOutput = 1;
176 self->finishedOutput = 1;
177 Py_XDECREF(readResult);
177 Py_XDECREF(readResult);
178 PyErr_SetString(PyExc_StopIteration, "empty input");
178 PyErr_SetString(PyExc_StopIteration, "empty input");
179 return NULL;
179 return NULL;
180 }
180 }
181 else {
181 else {
182 self->finishedInput = 1;
182 self->finishedInput = 1;
183 }
183 }
184
184
185 /* We've copied the data managed by memory. Discard the Python object. */
185 /* We've copied the data managed by memory. Discard the Python object. */
186 Py_XDECREF(readResult);
186 Py_XDECREF(readResult);
187 }
187 }
188
188
189 result = read_decompressor_iterator(self);
189 result = read_decompressor_iterator(self);
190 if (result.errored || result.chunk) {
190 if (result.errored || result.chunk) {
191 return result.chunk;
191 return result.chunk;
192 }
192 }
193
193
194 /* No new output data. Try again unless we know there is no more data. */
194 /* No new output data. Try again unless we know there is no more data. */
195 if (!self->finishedInput) {
195 if (!self->finishedInput) {
196 goto read_from_source;
196 goto read_from_source;
197 }
197 }
198
198
199 PyErr_SetString(PyExc_StopIteration, "input exhausted");
199 PyErr_SetString(PyExc_StopIteration, "input exhausted");
200 return NULL;
200 return NULL;
201 }
201 }
202
202
203 PyTypeObject ZstdDecompressorIteratorType = {
203 PyTypeObject ZstdDecompressorIteratorType = {
204 PyVarObject_HEAD_INIT(NULL, 0)
204 PyVarObject_HEAD_INIT(NULL, 0)
205 "zstd.ZstdDecompressorIterator", /* tp_name */
205 "zstd.ZstdDecompressorIterator", /* tp_name */
206 sizeof(ZstdDecompressorIterator), /* tp_basicsize */
206 sizeof(ZstdDecompressorIterator), /* tp_basicsize */
207 0, /* tp_itemsize */
207 0, /* tp_itemsize */
208 (destructor)ZstdDecompressorIterator_dealloc, /* tp_dealloc */
208 (destructor)ZstdDecompressorIterator_dealloc, /* tp_dealloc */
209 0, /* tp_print */
209 0, /* tp_print */
210 0, /* tp_getattr */
210 0, /* tp_getattr */
211 0, /* tp_setattr */
211 0, /* tp_setattr */
212 0, /* tp_compare */
212 0, /* tp_compare */
213 0, /* tp_repr */
213 0, /* tp_repr */
214 0, /* tp_as_number */
214 0, /* tp_as_number */
215 0, /* tp_as_sequence */
215 0, /* tp_as_sequence */
216 0, /* tp_as_mapping */
216 0, /* tp_as_mapping */
217 0, /* tp_hash */
217 0, /* tp_hash */
218 0, /* tp_call */
218 0, /* tp_call */
219 0, /* tp_str */
219 0, /* tp_str */
220 0, /* tp_getattro */
220 0, /* tp_getattro */
221 0, /* tp_setattro */
221 0, /* tp_setattro */
222 0, /* tp_as_buffer */
222 0, /* tp_as_buffer */
223 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
223 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
224 ZstdDecompressorIterator__doc__, /* tp_doc */
224 ZstdDecompressorIterator__doc__, /* tp_doc */
225 0, /* tp_traverse */
225 0, /* tp_traverse */
226 0, /* tp_clear */
226 0, /* tp_clear */
227 0, /* tp_richcompare */
227 0, /* tp_richcompare */
228 0, /* tp_weaklistoffset */
228 0, /* tp_weaklistoffset */
229 ZstdDecompressorIterator_iter, /* tp_iter */
229 ZstdDecompressorIterator_iter, /* tp_iter */
230 (iternextfunc)ZstdDecompressorIterator_iternext, /* tp_iternext */
230 (iternextfunc)ZstdDecompressorIterator_iternext, /* tp_iternext */
231 0, /* tp_methods */
231 0, /* tp_methods */
232 0, /* tp_members */
232 0, /* tp_members */
233 0, /* tp_getset */
233 0, /* tp_getset */
234 0, /* tp_base */
234 0, /* tp_base */
235 0, /* tp_dict */
235 0, /* tp_dict */
236 0, /* tp_descr_get */
236 0, /* tp_descr_get */
237 0, /* tp_descr_set */
237 0, /* tp_descr_set */
238 0, /* tp_dictoffset */
238 0, /* tp_dictoffset */
239 0, /* tp_init */
239 0, /* tp_init */
240 0, /* tp_alloc */
240 0, /* tp_alloc */
241 PyType_GenericNew, /* tp_new */
241 PyType_GenericNew, /* tp_new */
242 };
242 };
243
243
244 void decompressoriterator_module_init(PyObject* mod) {
244 void decompressoriterator_module_init(PyObject* mod) {
245 Py_TYPE(&ZstdDecompressorIteratorType) = &PyType_Type;
245 Py_SET_TYPE(&ZstdDecompressorIteratorType, &PyType_Type);
246 if (PyType_Ready(&ZstdDecompressorIteratorType) < 0) {
246 if (PyType_Ready(&ZstdDecompressorIteratorType) < 0) {
247 return;
247 return;
248 }
248 }
249 }
249 }
@@ -1,138 +1,138 b''
1 /**
1 /**
2 * Copyright (c) 2017-present, Gregory Szorc
2 * Copyright (c) 2017-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 PyDoc_STRVAR(FrameParameters__doc__,
13 PyDoc_STRVAR(FrameParameters__doc__,
14 "FrameParameters: information about a zstd frame");
14 "FrameParameters: information about a zstd frame");
15
15
16 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs) {
16 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs) {
17 static char* kwlist[] = {
17 static char* kwlist[] = {
18 "data",
18 "data",
19 NULL
19 NULL
20 };
20 };
21
21
22 Py_buffer source;
22 Py_buffer source;
23 ZSTD_frameHeader header;
23 ZSTD_frameHeader header;
24 FrameParametersObject* result = NULL;
24 FrameParametersObject* result = NULL;
25 size_t zresult;
25 size_t zresult;
26
26
27 #if PY_MAJOR_VERSION >= 3
27 #if PY_MAJOR_VERSION >= 3
28 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:get_frame_parameters",
28 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:get_frame_parameters",
29 #else
29 #else
30 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:get_frame_parameters",
30 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:get_frame_parameters",
31 #endif
31 #endif
32 kwlist, &source)) {
32 kwlist, &source)) {
33 return NULL;
33 return NULL;
34 }
34 }
35
35
36 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
36 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
37 PyErr_SetString(PyExc_ValueError,
37 PyErr_SetString(PyExc_ValueError,
38 "data buffer should be contiguous and have at most one dimension");
38 "data buffer should be contiguous and have at most one dimension");
39 goto finally;
39 goto finally;
40 }
40 }
41
41
42 zresult = ZSTD_getFrameHeader(&header, source.buf, source.len);
42 zresult = ZSTD_getFrameHeader(&header, source.buf, source.len);
43
43
44 if (ZSTD_isError(zresult)) {
44 if (ZSTD_isError(zresult)) {
45 PyErr_Format(ZstdError, "cannot get frame parameters: %s", ZSTD_getErrorName(zresult));
45 PyErr_Format(ZstdError, "cannot get frame parameters: %s", ZSTD_getErrorName(zresult));
46 goto finally;
46 goto finally;
47 }
47 }
48
48
49 if (zresult) {
49 if (zresult) {
50 PyErr_Format(ZstdError, "not enough data for frame parameters; need %zu bytes", zresult);
50 PyErr_Format(ZstdError, "not enough data for frame parameters; need %zu bytes", zresult);
51 goto finally;
51 goto finally;
52 }
52 }
53
53
54 result = PyObject_New(FrameParametersObject, &FrameParametersType);
54 result = PyObject_New(FrameParametersObject, &FrameParametersType);
55 if (!result) {
55 if (!result) {
56 goto finally;
56 goto finally;
57 }
57 }
58
58
59 result->frameContentSize = header.frameContentSize;
59 result->frameContentSize = header.frameContentSize;
60 result->windowSize = header.windowSize;
60 result->windowSize = header.windowSize;
61 result->dictID = header.dictID;
61 result->dictID = header.dictID;
62 result->checksumFlag = header.checksumFlag ? 1 : 0;
62 result->checksumFlag = header.checksumFlag ? 1 : 0;
63
63
64 finally:
64 finally:
65 PyBuffer_Release(&source);
65 PyBuffer_Release(&source);
66 return result;
66 return result;
67 }
67 }
68
68
69 static void FrameParameters_dealloc(PyObject* self) {
69 static void FrameParameters_dealloc(PyObject* self) {
70 PyObject_Del(self);
70 PyObject_Del(self);
71 }
71 }
72
72
73 static PyMemberDef FrameParameters_members[] = {
73 static PyMemberDef FrameParameters_members[] = {
74 { "content_size", T_ULONGLONG,
74 { "content_size", T_ULONGLONG,
75 offsetof(FrameParametersObject, frameContentSize), READONLY,
75 offsetof(FrameParametersObject, frameContentSize), READONLY,
76 "frame content size" },
76 "frame content size" },
77 { "window_size", T_ULONGLONG,
77 { "window_size", T_ULONGLONG,
78 offsetof(FrameParametersObject, windowSize), READONLY,
78 offsetof(FrameParametersObject, windowSize), READONLY,
79 "window size" },
79 "window size" },
80 { "dict_id", T_UINT,
80 { "dict_id", T_UINT,
81 offsetof(FrameParametersObject, dictID), READONLY,
81 offsetof(FrameParametersObject, dictID), READONLY,
82 "dictionary ID" },
82 "dictionary ID" },
83 { "has_checksum", T_BOOL,
83 { "has_checksum", T_BOOL,
84 offsetof(FrameParametersObject, checksumFlag), READONLY,
84 offsetof(FrameParametersObject, checksumFlag), READONLY,
85 "checksum flag" },
85 "checksum flag" },
86 { NULL }
86 { NULL }
87 };
87 };
88
88
89 PyTypeObject FrameParametersType = {
89 PyTypeObject FrameParametersType = {
90 PyVarObject_HEAD_INIT(NULL, 0)
90 PyVarObject_HEAD_INIT(NULL, 0)
91 "FrameParameters", /* tp_name */
91 "FrameParameters", /* tp_name */
92 sizeof(FrameParametersObject), /* tp_basicsize */
92 sizeof(FrameParametersObject), /* tp_basicsize */
93 0, /* tp_itemsize */
93 0, /* tp_itemsize */
94 (destructor)FrameParameters_dealloc, /* tp_dealloc */
94 (destructor)FrameParameters_dealloc, /* tp_dealloc */
95 0, /* tp_print */
95 0, /* tp_print */
96 0, /* tp_getattr */
96 0, /* tp_getattr */
97 0, /* tp_setattr */
97 0, /* tp_setattr */
98 0, /* tp_compare */
98 0, /* tp_compare */
99 0, /* tp_repr */
99 0, /* tp_repr */
100 0, /* tp_as_number */
100 0, /* tp_as_number */
101 0, /* tp_as_sequence */
101 0, /* tp_as_sequence */
102 0, /* tp_as_mapping */
102 0, /* tp_as_mapping */
103 0, /* tp_hash */
103 0, /* tp_hash */
104 0, /* tp_call */
104 0, /* tp_call */
105 0, /* tp_str */
105 0, /* tp_str */
106 0, /* tp_getattro */
106 0, /* tp_getattro */
107 0, /* tp_setattro */
107 0, /* tp_setattro */
108 0, /* tp_as_buffer */
108 0, /* tp_as_buffer */
109 Py_TPFLAGS_DEFAULT, /* tp_flags */
109 Py_TPFLAGS_DEFAULT, /* tp_flags */
110 FrameParameters__doc__, /* tp_doc */
110 FrameParameters__doc__, /* tp_doc */
111 0, /* tp_traverse */
111 0, /* tp_traverse */
112 0, /* tp_clear */
112 0, /* tp_clear */
113 0, /* tp_richcompare */
113 0, /* tp_richcompare */
114 0, /* tp_weaklistoffset */
114 0, /* tp_weaklistoffset */
115 0, /* tp_iter */
115 0, /* tp_iter */
116 0, /* tp_iternext */
116 0, /* tp_iternext */
117 0, /* tp_methods */
117 0, /* tp_methods */
118 FrameParameters_members, /* tp_members */
118 FrameParameters_members, /* tp_members */
119 0, /* tp_getset */
119 0, /* tp_getset */
120 0, /* tp_base */
120 0, /* tp_base */
121 0, /* tp_dict */
121 0, /* tp_dict */
122 0, /* tp_descr_get */
122 0, /* tp_descr_get */
123 0, /* tp_descr_set */
123 0, /* tp_descr_set */
124 0, /* tp_dictoffset */
124 0, /* tp_dictoffset */
125 0, /* tp_init */
125 0, /* tp_init */
126 0, /* tp_alloc */
126 0, /* tp_alloc */
127 0, /* tp_new */
127 0, /* tp_new */
128 };
128 };
129
129
130 void frameparams_module_init(PyObject* mod) {
130 void frameparams_module_init(PyObject* mod) {
131 Py_TYPE(&FrameParametersType) = &PyType_Type;
131 Py_SET_TYPE(&FrameParametersType, &PyType_Type);
132 if (PyType_Ready(&FrameParametersType) < 0) {
132 if (PyType_Ready(&FrameParametersType) < 0) {
133 return;
133 return;
134 }
134 }
135
135
136 Py_INCREF(&FrameParametersType);
136 Py_INCREF(&FrameParametersType);
137 PyModule_AddObject(mod, "FrameParameters", (PyObject*)&FrameParametersType);
137 PyModule_AddObject(mod, "FrameParameters", (PyObject*)&FrameParametersType);
138 }
138 }
@@ -1,359 +1,360 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #define PY_SSIZE_T_CLEAN
9 #define PY_SSIZE_T_CLEAN
10 #include <Python.h>
10 #include <Python.h>
11 #include "structmember.h"
11 #include "structmember.h"
12 #include <pythoncapi_compat.h>
12
13
13 #define ZSTD_STATIC_LINKING_ONLY
14 #define ZSTD_STATIC_LINKING_ONLY
14 #define ZDICT_STATIC_LINKING_ONLY
15 #define ZDICT_STATIC_LINKING_ONLY
15 #include <zstd.h>
16 #include <zstd.h>
16 #include <zdict.h>
17 #include <zdict.h>
17
18
18 /* Remember to change the string in zstandard/__init__ as well */
19 /* Remember to change the string in zstandard/__init__ as well */
19 #define PYTHON_ZSTANDARD_VERSION "0.13.0"
20 #define PYTHON_ZSTANDARD_VERSION "0.13.0"
20
21
21 typedef enum {
22 typedef enum {
22 compressorobj_flush_finish,
23 compressorobj_flush_finish,
23 compressorobj_flush_block,
24 compressorobj_flush_block,
24 } CompressorObj_Flush;
25 } CompressorObj_Flush;
25
26
26 /*
27 /*
27 Represents a ZstdCompressionParameters type.
28 Represents a ZstdCompressionParameters type.
28
29
29 This type holds all the low-level compression parameters that can be set.
30 This type holds all the low-level compression parameters that can be set.
30 */
31 */
31 typedef struct {
32 typedef struct {
32 PyObject_HEAD
33 PyObject_HEAD
33 ZSTD_CCtx_params* params;
34 ZSTD_CCtx_params* params;
34 } ZstdCompressionParametersObject;
35 } ZstdCompressionParametersObject;
35
36
36 extern PyTypeObject ZstdCompressionParametersType;
37 extern PyTypeObject ZstdCompressionParametersType;
37
38
38 /*
39 /*
39 Represents a FrameParameters type.
40 Represents a FrameParameters type.
40
41
41 This type is basically a wrapper around ZSTD_frameParams.
42 This type is basically a wrapper around ZSTD_frameParams.
42 */
43 */
43 typedef struct {
44 typedef struct {
44 PyObject_HEAD
45 PyObject_HEAD
45 unsigned long long frameContentSize;
46 unsigned long long frameContentSize;
46 unsigned long long windowSize;
47 unsigned long long windowSize;
47 unsigned dictID;
48 unsigned dictID;
48 char checksumFlag;
49 char checksumFlag;
49 } FrameParametersObject;
50 } FrameParametersObject;
50
51
51 extern PyTypeObject FrameParametersType;
52 extern PyTypeObject FrameParametersType;
52
53
53 /*
54 /*
54 Represents a ZstdCompressionDict type.
55 Represents a ZstdCompressionDict type.
55
56
56 Instances hold data used for a zstd compression dictionary.
57 Instances hold data used for a zstd compression dictionary.
57 */
58 */
58 typedef struct {
59 typedef struct {
59 PyObject_HEAD
60 PyObject_HEAD
60
61
61 /* Pointer to dictionary data. Owned by self. */
62 /* Pointer to dictionary data. Owned by self. */
62 void* dictData;
63 void* dictData;
63 /* Size of dictionary data. */
64 /* Size of dictionary data. */
64 size_t dictSize;
65 size_t dictSize;
65 ZSTD_dictContentType_e dictType;
66 ZSTD_dictContentType_e dictType;
66 /* k parameter for cover dictionaries. Only populated by train_cover_dict(). */
67 /* k parameter for cover dictionaries. Only populated by train_cover_dict(). */
67 unsigned k;
68 unsigned k;
68 /* d parameter for cover dictionaries. Only populated by train_cover_dict(). */
69 /* d parameter for cover dictionaries. Only populated by train_cover_dict(). */
69 unsigned d;
70 unsigned d;
70 /* Digested dictionary, suitable for reuse. */
71 /* Digested dictionary, suitable for reuse. */
71 ZSTD_CDict* cdict;
72 ZSTD_CDict* cdict;
72 ZSTD_DDict* ddict;
73 ZSTD_DDict* ddict;
73 } ZstdCompressionDict;
74 } ZstdCompressionDict;
74
75
75 extern PyTypeObject ZstdCompressionDictType;
76 extern PyTypeObject ZstdCompressionDictType;
76
77
77 /*
78 /*
78 Represents a ZstdCompressor type.
79 Represents a ZstdCompressor type.
79 */
80 */
80 typedef struct {
81 typedef struct {
81 PyObject_HEAD
82 PyObject_HEAD
82
83
83 /* Number of threads to use for operations. */
84 /* Number of threads to use for operations. */
84 unsigned int threads;
85 unsigned int threads;
85 /* Pointer to compression dictionary to use. NULL if not using dictionary
86 /* Pointer to compression dictionary to use. NULL if not using dictionary
86 compression. */
87 compression. */
87 ZstdCompressionDict* dict;
88 ZstdCompressionDict* dict;
88 /* Compression context to use. Populated during object construction. */
89 /* Compression context to use. Populated during object construction. */
89 ZSTD_CCtx* cctx;
90 ZSTD_CCtx* cctx;
90 /* Compression parameters in use. */
91 /* Compression parameters in use. */
91 ZSTD_CCtx_params* params;
92 ZSTD_CCtx_params* params;
92 } ZstdCompressor;
93 } ZstdCompressor;
93
94
94 extern PyTypeObject ZstdCompressorType;
95 extern PyTypeObject ZstdCompressorType;
95
96
96 typedef struct {
97 typedef struct {
97 PyObject_HEAD
98 PyObject_HEAD
98
99
99 ZstdCompressor* compressor;
100 ZstdCompressor* compressor;
100 ZSTD_outBuffer output;
101 ZSTD_outBuffer output;
101 int finished;
102 int finished;
102 } ZstdCompressionObj;
103 } ZstdCompressionObj;
103
104
104 extern PyTypeObject ZstdCompressionObjType;
105 extern PyTypeObject ZstdCompressionObjType;
105
106
106 typedef struct {
107 typedef struct {
107 PyObject_HEAD
108 PyObject_HEAD
108
109
109 ZstdCompressor* compressor;
110 ZstdCompressor* compressor;
110 PyObject* writer;
111 PyObject* writer;
111 ZSTD_outBuffer output;
112 ZSTD_outBuffer output;
112 size_t outSize;
113 size_t outSize;
113 int entered;
114 int entered;
114 int closed;
115 int closed;
115 int writeReturnRead;
116 int writeReturnRead;
116 unsigned long long bytesCompressed;
117 unsigned long long bytesCompressed;
117 } ZstdCompressionWriter;
118 } ZstdCompressionWriter;
118
119
119 extern PyTypeObject ZstdCompressionWriterType;
120 extern PyTypeObject ZstdCompressionWriterType;
120
121
121 typedef struct {
122 typedef struct {
122 PyObject_HEAD
123 PyObject_HEAD
123
124
124 ZstdCompressor* compressor;
125 ZstdCompressor* compressor;
125 PyObject* reader;
126 PyObject* reader;
126 Py_buffer buffer;
127 Py_buffer buffer;
127 Py_ssize_t bufferOffset;
128 Py_ssize_t bufferOffset;
128 size_t inSize;
129 size_t inSize;
129 size_t outSize;
130 size_t outSize;
130
131
131 ZSTD_inBuffer input;
132 ZSTD_inBuffer input;
132 ZSTD_outBuffer output;
133 ZSTD_outBuffer output;
133 int finishedOutput;
134 int finishedOutput;
134 int finishedInput;
135 int finishedInput;
135 PyObject* readResult;
136 PyObject* readResult;
136 } ZstdCompressorIterator;
137 } ZstdCompressorIterator;
137
138
138 extern PyTypeObject ZstdCompressorIteratorType;
139 extern PyTypeObject ZstdCompressorIteratorType;
139
140
140 typedef struct {
141 typedef struct {
141 PyObject_HEAD
142 PyObject_HEAD
142
143
143 ZstdCompressor* compressor;
144 ZstdCompressor* compressor;
144 PyObject* reader;
145 PyObject* reader;
145 Py_buffer buffer;
146 Py_buffer buffer;
146 size_t readSize;
147 size_t readSize;
147
148
148 int entered;
149 int entered;
149 int closed;
150 int closed;
150 unsigned long long bytesCompressed;
151 unsigned long long bytesCompressed;
151
152
152 ZSTD_inBuffer input;
153 ZSTD_inBuffer input;
153 ZSTD_outBuffer output;
154 ZSTD_outBuffer output;
154 int finishedInput;
155 int finishedInput;
155 int finishedOutput;
156 int finishedOutput;
156 PyObject* readResult;
157 PyObject* readResult;
157 } ZstdCompressionReader;
158 } ZstdCompressionReader;
158
159
159 extern PyTypeObject ZstdCompressionReaderType;
160 extern PyTypeObject ZstdCompressionReaderType;
160
161
161 typedef struct {
162 typedef struct {
162 PyObject_HEAD
163 PyObject_HEAD
163
164
164 ZstdCompressor* compressor;
165 ZstdCompressor* compressor;
165 ZSTD_inBuffer input;
166 ZSTD_inBuffer input;
166 ZSTD_outBuffer output;
167 ZSTD_outBuffer output;
167 Py_buffer inBuffer;
168 Py_buffer inBuffer;
168 int finished;
169 int finished;
169 size_t chunkSize;
170 size_t chunkSize;
170 } ZstdCompressionChunker;
171 } ZstdCompressionChunker;
171
172
172 extern PyTypeObject ZstdCompressionChunkerType;
173 extern PyTypeObject ZstdCompressionChunkerType;
173
174
174 typedef enum {
175 typedef enum {
175 compressionchunker_mode_normal,
176 compressionchunker_mode_normal,
176 compressionchunker_mode_flush,
177 compressionchunker_mode_flush,
177 compressionchunker_mode_finish,
178 compressionchunker_mode_finish,
178 } CompressionChunkerMode;
179 } CompressionChunkerMode;
179
180
180 typedef struct {
181 typedef struct {
181 PyObject_HEAD
182 PyObject_HEAD
182
183
183 ZstdCompressionChunker* chunker;
184 ZstdCompressionChunker* chunker;
184 CompressionChunkerMode mode;
185 CompressionChunkerMode mode;
185 } ZstdCompressionChunkerIterator;
186 } ZstdCompressionChunkerIterator;
186
187
187 extern PyTypeObject ZstdCompressionChunkerIteratorType;
188 extern PyTypeObject ZstdCompressionChunkerIteratorType;
188
189
189 typedef struct {
190 typedef struct {
190 PyObject_HEAD
191 PyObject_HEAD
191
192
192 ZSTD_DCtx* dctx;
193 ZSTD_DCtx* dctx;
193 ZstdCompressionDict* dict;
194 ZstdCompressionDict* dict;
194 size_t maxWindowSize;
195 size_t maxWindowSize;
195 ZSTD_format_e format;
196 ZSTD_format_e format;
196 } ZstdDecompressor;
197 } ZstdDecompressor;
197
198
198 extern PyTypeObject ZstdDecompressorType;
199 extern PyTypeObject ZstdDecompressorType;
199
200
200 typedef struct {
201 typedef struct {
201 PyObject_HEAD
202 PyObject_HEAD
202
203
203 ZstdDecompressor* decompressor;
204 ZstdDecompressor* decompressor;
204 size_t outSize;
205 size_t outSize;
205 int finished;
206 int finished;
206 } ZstdDecompressionObj;
207 } ZstdDecompressionObj;
207
208
208 extern PyTypeObject ZstdDecompressionObjType;
209 extern PyTypeObject ZstdDecompressionObjType;
209
210
210 typedef struct {
211 typedef struct {
211 PyObject_HEAD
212 PyObject_HEAD
212
213
213 /* Parent decompressor to which this object is associated. */
214 /* Parent decompressor to which this object is associated. */
214 ZstdDecompressor* decompressor;
215 ZstdDecompressor* decompressor;
215 /* Object to read() from (if reading from a stream). */
216 /* Object to read() from (if reading from a stream). */
216 PyObject* reader;
217 PyObject* reader;
217 /* Size for read() operations on reader. */
218 /* Size for read() operations on reader. */
218 size_t readSize;
219 size_t readSize;
219 /* Whether a read() can return data spanning multiple zstd frames. */
220 /* Whether a read() can return data spanning multiple zstd frames. */
220 int readAcrossFrames;
221 int readAcrossFrames;
221 /* Buffer to read from (if reading from a buffer). */
222 /* Buffer to read from (if reading from a buffer). */
222 Py_buffer buffer;
223 Py_buffer buffer;
223
224
224 /* Whether the context manager is active. */
225 /* Whether the context manager is active. */
225 int entered;
226 int entered;
226 /* Whether we've closed the stream. */
227 /* Whether we've closed the stream. */
227 int closed;
228 int closed;
228
229
229 /* Number of bytes decompressed and returned to user. */
230 /* Number of bytes decompressed and returned to user. */
230 unsigned long long bytesDecompressed;
231 unsigned long long bytesDecompressed;
231
232
232 /* Tracks data going into decompressor. */
233 /* Tracks data going into decompressor. */
233 ZSTD_inBuffer input;
234 ZSTD_inBuffer input;
234
235
235 /* Holds output from read() operation on reader. */
236 /* Holds output from read() operation on reader. */
236 PyObject* readResult;
237 PyObject* readResult;
237
238
238 /* Whether all input has been sent to the decompressor. */
239 /* Whether all input has been sent to the decompressor. */
239 int finishedInput;
240 int finishedInput;
240 /* Whether all output has been flushed from the decompressor. */
241 /* Whether all output has been flushed from the decompressor. */
241 int finishedOutput;
242 int finishedOutput;
242 } ZstdDecompressionReader;
243 } ZstdDecompressionReader;
243
244
244 extern PyTypeObject ZstdDecompressionReaderType;
245 extern PyTypeObject ZstdDecompressionReaderType;
245
246
246 typedef struct {
247 typedef struct {
247 PyObject_HEAD
248 PyObject_HEAD
248
249
249 ZstdDecompressor* decompressor;
250 ZstdDecompressor* decompressor;
250 PyObject* writer;
251 PyObject* writer;
251 size_t outSize;
252 size_t outSize;
252 int entered;
253 int entered;
253 int closed;
254 int closed;
254 int writeReturnRead;
255 int writeReturnRead;
255 } ZstdDecompressionWriter;
256 } ZstdDecompressionWriter;
256
257
257 extern PyTypeObject ZstdDecompressionWriterType;
258 extern PyTypeObject ZstdDecompressionWriterType;
258
259
259 typedef struct {
260 typedef struct {
260 PyObject_HEAD
261 PyObject_HEAD
261
262
262 ZstdDecompressor* decompressor;
263 ZstdDecompressor* decompressor;
263 PyObject* reader;
264 PyObject* reader;
264 Py_buffer buffer;
265 Py_buffer buffer;
265 Py_ssize_t bufferOffset;
266 Py_ssize_t bufferOffset;
266 size_t inSize;
267 size_t inSize;
267 size_t outSize;
268 size_t outSize;
268 size_t skipBytes;
269 size_t skipBytes;
269 ZSTD_inBuffer input;
270 ZSTD_inBuffer input;
270 ZSTD_outBuffer output;
271 ZSTD_outBuffer output;
271 Py_ssize_t readCount;
272 Py_ssize_t readCount;
272 int finishedInput;
273 int finishedInput;
273 int finishedOutput;
274 int finishedOutput;
274 } ZstdDecompressorIterator;
275 } ZstdDecompressorIterator;
275
276
276 extern PyTypeObject ZstdDecompressorIteratorType;
277 extern PyTypeObject ZstdDecompressorIteratorType;
277
278
278 typedef struct {
279 typedef struct {
279 int errored;
280 int errored;
280 PyObject* chunk;
281 PyObject* chunk;
281 } DecompressorIteratorResult;
282 } DecompressorIteratorResult;
282
283
283 typedef struct {
284 typedef struct {
284 /* The public API is that these are 64-bit unsigned integers. So these can't
285 /* The public API is that these are 64-bit unsigned integers. So these can't
285 * be size_t, even though values larger than SIZE_MAX or PY_SSIZE_T_MAX may
286 * be size_t, even though values larger than SIZE_MAX or PY_SSIZE_T_MAX may
286 * be nonsensical for this platform. */
287 * be nonsensical for this platform. */
287 unsigned long long offset;
288 unsigned long long offset;
288 unsigned long long length;
289 unsigned long long length;
289 } BufferSegment;
290 } BufferSegment;
290
291
291 typedef struct {
292 typedef struct {
292 PyObject_HEAD
293 PyObject_HEAD
293
294
294 PyObject* parent;
295 PyObject* parent;
295 BufferSegment* segments;
296 BufferSegment* segments;
296 Py_ssize_t segmentCount;
297 Py_ssize_t segmentCount;
297 } ZstdBufferSegments;
298 } ZstdBufferSegments;
298
299
299 extern PyTypeObject ZstdBufferSegmentsType;
300 extern PyTypeObject ZstdBufferSegmentsType;
300
301
301 typedef struct {
302 typedef struct {
302 PyObject_HEAD
303 PyObject_HEAD
303
304
304 PyObject* parent;
305 PyObject* parent;
305 void* data;
306 void* data;
306 Py_ssize_t dataSize;
307 Py_ssize_t dataSize;
307 unsigned long long offset;
308 unsigned long long offset;
308 } ZstdBufferSegment;
309 } ZstdBufferSegment;
309
310
310 extern PyTypeObject ZstdBufferSegmentType;
311 extern PyTypeObject ZstdBufferSegmentType;
311
312
312 typedef struct {
313 typedef struct {
313 PyObject_HEAD
314 PyObject_HEAD
314
315
315 Py_buffer parent;
316 Py_buffer parent;
316 void* data;
317 void* data;
317 unsigned long long dataSize;
318 unsigned long long dataSize;
318 BufferSegment* segments;
319 BufferSegment* segments;
319 Py_ssize_t segmentCount;
320 Py_ssize_t segmentCount;
320 int useFree;
321 int useFree;
321 } ZstdBufferWithSegments;
322 } ZstdBufferWithSegments;
322
323
323 extern PyTypeObject ZstdBufferWithSegmentsType;
324 extern PyTypeObject ZstdBufferWithSegmentsType;
324
325
325 /**
326 /**
326 * An ordered collection of BufferWithSegments exposed as a squashed collection.
327 * An ordered collection of BufferWithSegments exposed as a squashed collection.
327 *
328 *
328 * This type provides a virtual view spanning multiple BufferWithSegments
329 * This type provides a virtual view spanning multiple BufferWithSegments
329 * instances. It allows multiple instances to be "chained" together and
330 * instances. It allows multiple instances to be "chained" together and
330 * exposed as a single collection. e.g. if there are 2 buffers holding
331 * exposed as a single collection. e.g. if there are 2 buffers holding
331 * 10 segments each, then o[14] will access the 5th segment in the 2nd buffer.
332 * 10 segments each, then o[14] will access the 5th segment in the 2nd buffer.
332 */
333 */
333 typedef struct {
334 typedef struct {
334 PyObject_HEAD
335 PyObject_HEAD
335
336
336 /* An array of buffers that should be exposed through this instance. */
337 /* An array of buffers that should be exposed through this instance. */
337 ZstdBufferWithSegments** buffers;
338 ZstdBufferWithSegments** buffers;
338 /* Number of elements in buffers array. */
339 /* Number of elements in buffers array. */
339 Py_ssize_t bufferCount;
340 Py_ssize_t bufferCount;
340 /* Array of first offset in each buffer instance. 0th entry corresponds
341 /* Array of first offset in each buffer instance. 0th entry corresponds
341 to number of elements in the 0th buffer. 1st entry corresponds to the
342 to number of elements in the 0th buffer. 1st entry corresponds to the
342 sum of elements in 0th and 1st buffers. */
343 sum of elements in 0th and 1st buffers. */
343 Py_ssize_t* firstElements;
344 Py_ssize_t* firstElements;
344 } ZstdBufferWithSegmentsCollection;
345 } ZstdBufferWithSegmentsCollection;
345
346
346 extern PyTypeObject ZstdBufferWithSegmentsCollectionType;
347 extern PyTypeObject ZstdBufferWithSegmentsCollectionType;
347
348
348 int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
349 int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
349 int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj);
350 int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj);
350 int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams);
351 int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams);
351 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs);
352 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs);
352 int ensure_ddict(ZstdCompressionDict* dict);
353 int ensure_ddict(ZstdCompressionDict* dict);
353 int ensure_dctx(ZstdDecompressor* decompressor, int loadDict);
354 int ensure_dctx(ZstdDecompressor* decompressor, int loadDict);
354 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
355 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
355 ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize);
356 ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize);
356 Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*);
357 Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*);
357 int cpu_count(void);
358 int cpu_count(void);
358 size_t roundpow2(size_t);
359 size_t roundpow2(size_t);
359 int safe_pybytes_resize(PyObject** obj, Py_ssize_t size);
360 int safe_pybytes_resize(PyObject** obj, Py_ssize_t size);
@@ -1,1419 +1,1419 b''
1 /*
1 /*
2 osutil.c - native operating system services
2 osutil.c - native operating system services
3
3
4 Copyright 2007 Matt Mackall and others
4 Copyright 2007 Matt Mackall and others
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 #define _ATFILE_SOURCE
10 #define _ATFILE_SOURCE
11 #define PY_SSIZE_T_CLEAN
11 #define PY_SSIZE_T_CLEAN
12 #include <Python.h>
12 #include <Python.h>
13 #include <errno.h>
13 #include <errno.h>
14 #include <fcntl.h>
14 #include <fcntl.h>
15 #include <stdio.h>
15 #include <stdio.h>
16 #include <stdlib.h>
16 #include <stdlib.h>
17 #include <string.h>
17 #include <string.h>
18
18
19 #ifdef _WIN32
19 #ifdef _WIN32
20 #include <io.h>
20 #include <io.h>
21 #include <windows.h>
21 #include <windows.h>
22 #else
22 #else
23 #include <dirent.h>
23 #include <dirent.h>
24 #include <signal.h>
24 #include <signal.h>
25 #include <sys/socket.h>
25 #include <sys/socket.h>
26 #include <sys/stat.h>
26 #include <sys/stat.h>
27 #include <sys/types.h>
27 #include <sys/types.h>
28 #include <unistd.h>
28 #include <unistd.h>
29 #ifdef HAVE_LINUX_STATFS
29 #ifdef HAVE_LINUX_STATFS
30 #include <linux/magic.h>
30 #include <linux/magic.h>
31 #include <sys/vfs.h>
31 #include <sys/vfs.h>
32 #endif
32 #endif
33 #ifdef HAVE_BSD_STATFS
33 #ifdef HAVE_BSD_STATFS
34 #include <sys/mount.h>
34 #include <sys/mount.h>
35 #include <sys/param.h>
35 #include <sys/param.h>
36 #endif
36 #endif
37 #endif
37 #endif
38
38
39 #ifdef __APPLE__
39 #ifdef __APPLE__
40 #include <sys/attr.h>
40 #include <sys/attr.h>
41 #include <sys/vnode.h>
41 #include <sys/vnode.h>
42 #endif
42 #endif
43
43
44 #include "util.h"
44 #include "util.h"
45
45
46 /* some platforms lack the PATH_MAX definition (eg. GNU/Hurd) */
46 /* some platforms lack the PATH_MAX definition (eg. GNU/Hurd) */
47 #ifndef PATH_MAX
47 #ifndef PATH_MAX
48 #define PATH_MAX 4096
48 #define PATH_MAX 4096
49 #endif
49 #endif
50
50
51 #ifdef _WIN32
51 #ifdef _WIN32
52 /*
52 /*
53 stat struct compatible with hg expectations
53 stat struct compatible with hg expectations
54 Mercurial only uses st_mode, st_size and st_mtime
54 Mercurial only uses st_mode, st_size and st_mtime
55 the rest is kept to minimize changes between implementations
55 the rest is kept to minimize changes between implementations
56 */
56 */
57 struct hg_stat {
57 struct hg_stat {
58 int st_dev;
58 int st_dev;
59 int st_mode;
59 int st_mode;
60 int st_nlink;
60 int st_nlink;
61 __int64 st_size;
61 __int64 st_size;
62 int st_mtime;
62 int st_mtime;
63 int st_ctime;
63 int st_ctime;
64 };
64 };
65 struct listdir_stat {
65 struct listdir_stat {
66 PyObject_HEAD
66 PyObject_HEAD
67 struct hg_stat st;
67 struct hg_stat st;
68 };
68 };
69 #else
69 #else
70 struct listdir_stat {
70 struct listdir_stat {
71 PyObject_HEAD
71 PyObject_HEAD
72 struct stat st;
72 struct stat st;
73 };
73 };
74 #endif
74 #endif
75
75
76 #ifdef IS_PY3K
76 #ifdef IS_PY3K
77 #define listdir_slot(name) \
77 #define listdir_slot(name) \
78 static PyObject *listdir_stat_##name(PyObject *self, void *x) \
78 static PyObject *listdir_stat_##name(PyObject *self, void *x) \
79 { \
79 { \
80 return PyLong_FromLong(((struct listdir_stat *)self)->st.name); \
80 return PyLong_FromLong(((struct listdir_stat *)self)->st.name); \
81 }
81 }
82 #else
82 #else
83 #define listdir_slot(name) \
83 #define listdir_slot(name) \
84 static PyObject *listdir_stat_##name(PyObject *self, void *x) \
84 static PyObject *listdir_stat_##name(PyObject *self, void *x) \
85 { \
85 { \
86 return PyInt_FromLong(((struct listdir_stat *)self)->st.name); \
86 return PyInt_FromLong(((struct listdir_stat *)self)->st.name); \
87 }
87 }
88 #endif
88 #endif
89
89
90 listdir_slot(st_dev)
90 listdir_slot(st_dev)
91 listdir_slot(st_mode)
91 listdir_slot(st_mode)
92 listdir_slot(st_nlink)
92 listdir_slot(st_nlink)
93 #ifdef _WIN32
93 #ifdef _WIN32
94 static PyObject *listdir_stat_st_size(PyObject *self, void *x)
94 static PyObject *listdir_stat_st_size(PyObject *self, void *x)
95 {
95 {
96 return PyLong_FromLongLong(
96 return PyLong_FromLongLong(
97 (PY_LONG_LONG)((struct listdir_stat *)self)->st.st_size);
97 (PY_LONG_LONG)((struct listdir_stat *)self)->st.st_size);
98 }
98 }
99 #else
99 #else
100 listdir_slot(st_size)
100 listdir_slot(st_size)
101 #endif
101 #endif
102 listdir_slot(st_mtime)
102 listdir_slot(st_mtime)
103 listdir_slot(st_ctime)
103 listdir_slot(st_ctime)
104
104
105 static struct PyGetSetDef listdir_stat_getsets[] = {
105 static struct PyGetSetDef listdir_stat_getsets[] = {
106 {"st_dev", listdir_stat_st_dev, 0, 0, 0},
106 {"st_dev", listdir_stat_st_dev, 0, 0, 0},
107 {"st_mode", listdir_stat_st_mode, 0, 0, 0},
107 {"st_mode", listdir_stat_st_mode, 0, 0, 0},
108 {"st_nlink", listdir_stat_st_nlink, 0, 0, 0},
108 {"st_nlink", listdir_stat_st_nlink, 0, 0, 0},
109 {"st_size", listdir_stat_st_size, 0, 0, 0},
109 {"st_size", listdir_stat_st_size, 0, 0, 0},
110 {"st_mtime", listdir_stat_st_mtime, 0, 0, 0},
110 {"st_mtime", listdir_stat_st_mtime, 0, 0, 0},
111 {"st_ctime", listdir_stat_st_ctime, 0, 0, 0},
111 {"st_ctime", listdir_stat_st_ctime, 0, 0, 0},
112 {0, 0, 0, 0, 0}
112 {0, 0, 0, 0, 0}
113 };
113 };
114
114
115 static PyObject *listdir_stat_new(PyTypeObject *t, PyObject *a, PyObject *k)
115 static PyObject *listdir_stat_new(PyTypeObject *t, PyObject *a, PyObject *k)
116 {
116 {
117 return t->tp_alloc(t, 0);
117 return t->tp_alloc(t, 0);
118 }
118 }
119
119
120 static void listdir_stat_dealloc(PyObject *o)
120 static void listdir_stat_dealloc(PyObject *o)
121 {
121 {
122 o->ob_type->tp_free(o);
122 Py_TYPE(o)->tp_free(o);
123 }
123 }
124
124
125 static PyObject *listdir_stat_getitem(PyObject *self, PyObject *key)
125 static PyObject *listdir_stat_getitem(PyObject *self, PyObject *key)
126 {
126 {
127 long index = PyLong_AsLong(key);
127 long index = PyLong_AsLong(key);
128 if (index == -1 && PyErr_Occurred()) {
128 if (index == -1 && PyErr_Occurred()) {
129 return NULL;
129 return NULL;
130 }
130 }
131 if (index != 8) {
131 if (index != 8) {
132 PyErr_Format(PyExc_IndexError, "osutil.stat objects only "
132 PyErr_Format(PyExc_IndexError, "osutil.stat objects only "
133 "support stat.ST_MTIME in "
133 "support stat.ST_MTIME in "
134 "__getitem__");
134 "__getitem__");
135 return NULL;
135 return NULL;
136 }
136 }
137 return listdir_stat_st_mtime(self, NULL);
137 return listdir_stat_st_mtime(self, NULL);
138 }
138 }
139
139
140 static PyMappingMethods listdir_stat_type_mapping_methods = {
140 static PyMappingMethods listdir_stat_type_mapping_methods = {
141 (lenfunc)NULL, /* mp_length */
141 (lenfunc)NULL, /* mp_length */
142 (binaryfunc)listdir_stat_getitem, /* mp_subscript */
142 (binaryfunc)listdir_stat_getitem, /* mp_subscript */
143 (objobjargproc)NULL, /* mp_ass_subscript */
143 (objobjargproc)NULL, /* mp_ass_subscript */
144 };
144 };
145
145
146 static PyTypeObject listdir_stat_type = {
146 static PyTypeObject listdir_stat_type = {
147 PyVarObject_HEAD_INIT(NULL, 0) /* header */
147 PyVarObject_HEAD_INIT(NULL, 0) /* header */
148 "osutil.stat", /*tp_name*/
148 "osutil.stat", /*tp_name*/
149 sizeof(struct listdir_stat), /*tp_basicsize*/
149 sizeof(struct listdir_stat), /*tp_basicsize*/
150 0, /*tp_itemsize*/
150 0, /*tp_itemsize*/
151 (destructor)listdir_stat_dealloc, /*tp_dealloc*/
151 (destructor)listdir_stat_dealloc, /*tp_dealloc*/
152 0, /*tp_print*/
152 0, /*tp_print*/
153 0, /*tp_getattr*/
153 0, /*tp_getattr*/
154 0, /*tp_setattr*/
154 0, /*tp_setattr*/
155 0, /*tp_compare*/
155 0, /*tp_compare*/
156 0, /*tp_repr*/
156 0, /*tp_repr*/
157 0, /*tp_as_number*/
157 0, /*tp_as_number*/
158 0, /*tp_as_sequence*/
158 0, /*tp_as_sequence*/
159 &listdir_stat_type_mapping_methods, /*tp_as_mapping*/
159 &listdir_stat_type_mapping_methods, /*tp_as_mapping*/
160 0, /*tp_hash */
160 0, /*tp_hash */
161 0, /*tp_call*/
161 0, /*tp_call*/
162 0, /*tp_str*/
162 0, /*tp_str*/
163 0, /*tp_getattro*/
163 0, /*tp_getattro*/
164 0, /*tp_setattro*/
164 0, /*tp_setattro*/
165 0, /*tp_as_buffer*/
165 0, /*tp_as_buffer*/
166 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
166 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
167 "stat objects", /* tp_doc */
167 "stat objects", /* tp_doc */
168 0, /* tp_traverse */
168 0, /* tp_traverse */
169 0, /* tp_clear */
169 0, /* tp_clear */
170 0, /* tp_richcompare */
170 0, /* tp_richcompare */
171 0, /* tp_weaklistoffset */
171 0, /* tp_weaklistoffset */
172 0, /* tp_iter */
172 0, /* tp_iter */
173 0, /* tp_iternext */
173 0, /* tp_iternext */
174 0, /* tp_methods */
174 0, /* tp_methods */
175 0, /* tp_members */
175 0, /* tp_members */
176 listdir_stat_getsets, /* tp_getset */
176 listdir_stat_getsets, /* tp_getset */
177 0, /* tp_base */
177 0, /* tp_base */
178 0, /* tp_dict */
178 0, /* tp_dict */
179 0, /* tp_descr_get */
179 0, /* tp_descr_get */
180 0, /* tp_descr_set */
180 0, /* tp_descr_set */
181 0, /* tp_dictoffset */
181 0, /* tp_dictoffset */
182 0, /* tp_init */
182 0, /* tp_init */
183 0, /* tp_alloc */
183 0, /* tp_alloc */
184 listdir_stat_new, /* tp_new */
184 listdir_stat_new, /* tp_new */
185 };
185 };
186
186
187 #ifdef _WIN32
187 #ifdef _WIN32
188
188
189 static int to_python_time(const FILETIME *tm)
189 static int to_python_time(const FILETIME *tm)
190 {
190 {
191 /* number of seconds between epoch and January 1 1601 */
191 /* number of seconds between epoch and January 1 1601 */
192 const __int64 a0 = (__int64)134774L * (__int64)24L * (__int64)3600L;
192 const __int64 a0 = (__int64)134774L * (__int64)24L * (__int64)3600L;
193 /* conversion factor from 100ns to 1s */
193 /* conversion factor from 100ns to 1s */
194 const __int64 a1 = 10000000;
194 const __int64 a1 = 10000000;
195 /* explicit (int) cast to suspend compiler warnings */
195 /* explicit (int) cast to suspend compiler warnings */
196 return (int)((((__int64)tm->dwHighDateTime << 32)
196 return (int)((((__int64)tm->dwHighDateTime << 32)
197 + tm->dwLowDateTime) / a1 - a0);
197 + tm->dwLowDateTime) / a1 - a0);
198 }
198 }
199
199
200 static PyObject *make_item(const WIN32_FIND_DATAA *fd, int wantstat)
200 static PyObject *make_item(const WIN32_FIND_DATAA *fd, int wantstat)
201 {
201 {
202 PyObject *py_st;
202 PyObject *py_st;
203 struct hg_stat *stp;
203 struct hg_stat *stp;
204
204
205 int kind = (fd->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
205 int kind = (fd->dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)
206 ? _S_IFDIR : _S_IFREG;
206 ? _S_IFDIR : _S_IFREG;
207
207
208 if (!wantstat)
208 if (!wantstat)
209 return Py_BuildValue(PY23("si", "yi"), fd->cFileName, kind);
209 return Py_BuildValue(PY23("si", "yi"), fd->cFileName, kind);
210
210
211 py_st = PyObject_CallObject((PyObject *)&listdir_stat_type, NULL);
211 py_st = PyObject_CallObject((PyObject *)&listdir_stat_type, NULL);
212 if (!py_st)
212 if (!py_st)
213 return NULL;
213 return NULL;
214
214
215 stp = &((struct listdir_stat *)py_st)->st;
215 stp = &((struct listdir_stat *)py_st)->st;
216 /*
216 /*
217 use kind as st_mode
217 use kind as st_mode
218 rwx bits on Win32 are meaningless
218 rwx bits on Win32 are meaningless
219 and Hg does not use them anyway
219 and Hg does not use them anyway
220 */
220 */
221 stp->st_mode = kind;
221 stp->st_mode = kind;
222 stp->st_mtime = to_python_time(&fd->ftLastWriteTime);
222 stp->st_mtime = to_python_time(&fd->ftLastWriteTime);
223 stp->st_ctime = to_python_time(&fd->ftCreationTime);
223 stp->st_ctime = to_python_time(&fd->ftCreationTime);
224 if (kind == _S_IFREG)
224 if (kind == _S_IFREG)
225 stp->st_size = ((__int64)fd->nFileSizeHigh << 32)
225 stp->st_size = ((__int64)fd->nFileSizeHigh << 32)
226 + fd->nFileSizeLow;
226 + fd->nFileSizeLow;
227 return Py_BuildValue(PY23("siN", "yiN"), fd->cFileName,
227 return Py_BuildValue(PY23("siN", "yiN"), fd->cFileName,
228 kind, py_st);
228 kind, py_st);
229 }
229 }
230
230
231 static PyObject *_listdir(char *path, Py_ssize_t plen, int wantstat, char *skip)
231 static PyObject *_listdir(char *path, Py_ssize_t plen, int wantstat, char *skip)
232 {
232 {
233 PyObject *rval = NULL; /* initialize - return value */
233 PyObject *rval = NULL; /* initialize - return value */
234 PyObject *list;
234 PyObject *list;
235 HANDLE fh;
235 HANDLE fh;
236 WIN32_FIND_DATAA fd;
236 WIN32_FIND_DATAA fd;
237 char *pattern;
237 char *pattern;
238
238
239 /* build the path + \* pattern string */
239 /* build the path + \* pattern string */
240 pattern = PyMem_Malloc(plen + 3); /* path + \* + \0 */
240 pattern = PyMem_Malloc(plen + 3); /* path + \* + \0 */
241 if (!pattern) {
241 if (!pattern) {
242 PyErr_NoMemory();
242 PyErr_NoMemory();
243 goto error_nomem;
243 goto error_nomem;
244 }
244 }
245 memcpy(pattern, path, plen);
245 memcpy(pattern, path, plen);
246
246
247 if (plen > 0) {
247 if (plen > 0) {
248 char c = path[plen-1];
248 char c = path[plen-1];
249 if (c != ':' && c != '/' && c != '\\')
249 if (c != ':' && c != '/' && c != '\\')
250 pattern[plen++] = '\\';
250 pattern[plen++] = '\\';
251 }
251 }
252 pattern[plen++] = '*';
252 pattern[plen++] = '*';
253 pattern[plen] = '\0';
253 pattern[plen] = '\0';
254
254
255 fh = FindFirstFileA(pattern, &fd);
255 fh = FindFirstFileA(pattern, &fd);
256 if (fh == INVALID_HANDLE_VALUE) {
256 if (fh == INVALID_HANDLE_VALUE) {
257 PyErr_SetFromWindowsErrWithFilename(GetLastError(), path);
257 PyErr_SetFromWindowsErrWithFilename(GetLastError(), path);
258 goto error_file;
258 goto error_file;
259 }
259 }
260
260
261 list = PyList_New(0);
261 list = PyList_New(0);
262 if (!list)
262 if (!list)
263 goto error_list;
263 goto error_list;
264
264
265 do {
265 do {
266 PyObject *item;
266 PyObject *item;
267
267
268 if (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
268 if (fd.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
269 if (!strcmp(fd.cFileName, ".")
269 if (!strcmp(fd.cFileName, ".")
270 || !strcmp(fd.cFileName, ".."))
270 || !strcmp(fd.cFileName, ".."))
271 continue;
271 continue;
272
272
273 if (skip && !strcmp(fd.cFileName, skip)) {
273 if (skip && !strcmp(fd.cFileName, skip)) {
274 rval = PyList_New(0);
274 rval = PyList_New(0);
275 goto error;
275 goto error;
276 }
276 }
277 }
277 }
278
278
279 item = make_item(&fd, wantstat);
279 item = make_item(&fd, wantstat);
280 if (!item)
280 if (!item)
281 goto error;
281 goto error;
282
282
283 if (PyList_Append(list, item)) {
283 if (PyList_Append(list, item)) {
284 Py_XDECREF(item);
284 Py_XDECREF(item);
285 goto error;
285 goto error;
286 }
286 }
287
287
288 Py_XDECREF(item);
288 Py_XDECREF(item);
289 } while (FindNextFileA(fh, &fd));
289 } while (FindNextFileA(fh, &fd));
290
290
291 if (GetLastError() != ERROR_NO_MORE_FILES) {
291 if (GetLastError() != ERROR_NO_MORE_FILES) {
292 PyErr_SetFromWindowsErrWithFilename(GetLastError(), path);
292 PyErr_SetFromWindowsErrWithFilename(GetLastError(), path);
293 goto error;
293 goto error;
294 }
294 }
295
295
296 rval = list;
296 rval = list;
297 Py_XINCREF(rval);
297 Py_XINCREF(rval);
298 error:
298 error:
299 Py_XDECREF(list);
299 Py_XDECREF(list);
300 error_list:
300 error_list:
301 FindClose(fh);
301 FindClose(fh);
302 error_file:
302 error_file:
303 PyMem_Free(pattern);
303 PyMem_Free(pattern);
304 error_nomem:
304 error_nomem:
305 return rval;
305 return rval;
306 }
306 }
307
307
308 #else
308 #else
309
309
310 int entkind(struct dirent *ent)
310 int entkind(struct dirent *ent)
311 {
311 {
312 #ifdef DT_REG
312 #ifdef DT_REG
313 switch (ent->d_type) {
313 switch (ent->d_type) {
314 case DT_REG: return S_IFREG;
314 case DT_REG: return S_IFREG;
315 case DT_DIR: return S_IFDIR;
315 case DT_DIR: return S_IFDIR;
316 case DT_LNK: return S_IFLNK;
316 case DT_LNK: return S_IFLNK;
317 case DT_BLK: return S_IFBLK;
317 case DT_BLK: return S_IFBLK;
318 case DT_CHR: return S_IFCHR;
318 case DT_CHR: return S_IFCHR;
319 case DT_FIFO: return S_IFIFO;
319 case DT_FIFO: return S_IFIFO;
320 case DT_SOCK: return S_IFSOCK;
320 case DT_SOCK: return S_IFSOCK;
321 }
321 }
322 #endif
322 #endif
323 return -1;
323 return -1;
324 }
324 }
325
325
326 static PyObject *makestat(const struct stat *st)
326 static PyObject *makestat(const struct stat *st)
327 {
327 {
328 PyObject *stat;
328 PyObject *stat;
329
329
330 stat = PyObject_CallObject((PyObject *)&listdir_stat_type, NULL);
330 stat = PyObject_CallObject((PyObject *)&listdir_stat_type, NULL);
331 if (stat)
331 if (stat)
332 memcpy(&((struct listdir_stat *)stat)->st, st, sizeof(*st));
332 memcpy(&((struct listdir_stat *)stat)->st, st, sizeof(*st));
333 return stat;
333 return stat;
334 }
334 }
335
335
336 static PyObject *_listdir_stat(char *path, int pathlen, int keepstat,
336 static PyObject *_listdir_stat(char *path, int pathlen, int keepstat,
337 char *skip)
337 char *skip)
338 {
338 {
339 PyObject *list, *elem, *ret = NULL;
339 PyObject *list, *elem, *ret = NULL;
340 char fullpath[PATH_MAX + 10];
340 char fullpath[PATH_MAX + 10];
341 int kind, err;
341 int kind, err;
342 struct stat st;
342 struct stat st;
343 struct dirent *ent;
343 struct dirent *ent;
344 DIR *dir;
344 DIR *dir;
345 #ifdef AT_SYMLINK_NOFOLLOW
345 #ifdef AT_SYMLINK_NOFOLLOW
346 int dfd = -1;
346 int dfd = -1;
347 #endif
347 #endif
348
348
349 if (pathlen >= PATH_MAX) {
349 if (pathlen >= PATH_MAX) {
350 errno = ENAMETOOLONG;
350 errno = ENAMETOOLONG;
351 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
351 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
352 goto error_value;
352 goto error_value;
353 }
353 }
354 strncpy(fullpath, path, PATH_MAX);
354 strncpy(fullpath, path, PATH_MAX);
355 fullpath[pathlen] = '/';
355 fullpath[pathlen] = '/';
356
356
357 #ifdef AT_SYMLINK_NOFOLLOW
357 #ifdef AT_SYMLINK_NOFOLLOW
358 dfd = open(path, O_RDONLY);
358 dfd = open(path, O_RDONLY);
359 if (dfd == -1) {
359 if (dfd == -1) {
360 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
360 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
361 goto error_value;
361 goto error_value;
362 }
362 }
363 dir = fdopendir(dfd);
363 dir = fdopendir(dfd);
364 #else
364 #else
365 dir = opendir(path);
365 dir = opendir(path);
366 #endif
366 #endif
367 if (!dir) {
367 if (!dir) {
368 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
368 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
369 goto error_dir;
369 goto error_dir;
370 }
370 }
371
371
372 list = PyList_New(0);
372 list = PyList_New(0);
373 if (!list)
373 if (!list)
374 goto error_list;
374 goto error_list;
375
375
376 while ((ent = readdir(dir))) {
376 while ((ent = readdir(dir))) {
377 if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
377 if (!strcmp(ent->d_name, ".") || !strcmp(ent->d_name, ".."))
378 continue;
378 continue;
379
379
380 kind = entkind(ent);
380 kind = entkind(ent);
381 if (kind == -1 || keepstat) {
381 if (kind == -1 || keepstat) {
382 #ifdef AT_SYMLINK_NOFOLLOW
382 #ifdef AT_SYMLINK_NOFOLLOW
383 err = fstatat(dfd, ent->d_name, &st,
383 err = fstatat(dfd, ent->d_name, &st,
384 AT_SYMLINK_NOFOLLOW);
384 AT_SYMLINK_NOFOLLOW);
385 #else
385 #else
386 strncpy(fullpath + pathlen + 1, ent->d_name,
386 strncpy(fullpath + pathlen + 1, ent->d_name,
387 PATH_MAX - pathlen);
387 PATH_MAX - pathlen);
388 fullpath[PATH_MAX] = '\0';
388 fullpath[PATH_MAX] = '\0';
389 err = lstat(fullpath, &st);
389 err = lstat(fullpath, &st);
390 #endif
390 #endif
391 if (err == -1) {
391 if (err == -1) {
392 /* race with file deletion? */
392 /* race with file deletion? */
393 if (errno == ENOENT)
393 if (errno == ENOENT)
394 continue;
394 continue;
395 strncpy(fullpath + pathlen + 1, ent->d_name,
395 strncpy(fullpath + pathlen + 1, ent->d_name,
396 PATH_MAX - pathlen);
396 PATH_MAX - pathlen);
397 fullpath[PATH_MAX] = 0;
397 fullpath[PATH_MAX] = 0;
398 PyErr_SetFromErrnoWithFilename(PyExc_OSError,
398 PyErr_SetFromErrnoWithFilename(PyExc_OSError,
399 fullpath);
399 fullpath);
400 goto error;
400 goto error;
401 }
401 }
402 kind = st.st_mode & S_IFMT;
402 kind = st.st_mode & S_IFMT;
403 }
403 }
404
404
405 /* quit early? */
405 /* quit early? */
406 if (skip && kind == S_IFDIR && !strcmp(ent->d_name, skip)) {
406 if (skip && kind == S_IFDIR && !strcmp(ent->d_name, skip)) {
407 ret = PyList_New(0);
407 ret = PyList_New(0);
408 goto error;
408 goto error;
409 }
409 }
410
410
411 if (keepstat) {
411 if (keepstat) {
412 PyObject *stat = makestat(&st);
412 PyObject *stat = makestat(&st);
413 if (!stat)
413 if (!stat)
414 goto error;
414 goto error;
415 elem = Py_BuildValue(PY23("siN", "yiN"), ent->d_name,
415 elem = Py_BuildValue(PY23("siN", "yiN"), ent->d_name,
416 kind, stat);
416 kind, stat);
417 } else
417 } else
418 elem = Py_BuildValue(PY23("si", "yi"), ent->d_name,
418 elem = Py_BuildValue(PY23("si", "yi"), ent->d_name,
419 kind);
419 kind);
420 if (!elem)
420 if (!elem)
421 goto error;
421 goto error;
422
422
423 PyList_Append(list, elem);
423 PyList_Append(list, elem);
424 Py_DECREF(elem);
424 Py_DECREF(elem);
425 }
425 }
426
426
427 ret = list;
427 ret = list;
428 Py_INCREF(ret);
428 Py_INCREF(ret);
429
429
430 error:
430 error:
431 Py_DECREF(list);
431 Py_DECREF(list);
432 error_list:
432 error_list:
433 closedir(dir);
433 closedir(dir);
434 /* closedir also closes its dirfd */
434 /* closedir also closes its dirfd */
435 goto error_value;
435 goto error_value;
436 error_dir:
436 error_dir:
437 #ifdef AT_SYMLINK_NOFOLLOW
437 #ifdef AT_SYMLINK_NOFOLLOW
438 close(dfd);
438 close(dfd);
439 #endif
439 #endif
440 error_value:
440 error_value:
441 return ret;
441 return ret;
442 }
442 }
443
443
444 #ifdef __APPLE__
444 #ifdef __APPLE__
445
445
446 typedef struct {
446 typedef struct {
447 u_int32_t length;
447 u_int32_t length;
448 attrreference_t name;
448 attrreference_t name;
449 fsobj_type_t obj_type;
449 fsobj_type_t obj_type;
450 struct timespec mtime;
450 struct timespec mtime;
451 #if __LITTLE_ENDIAN__
451 #if __LITTLE_ENDIAN__
452 mode_t access_mask;
452 mode_t access_mask;
453 uint16_t padding;
453 uint16_t padding;
454 #else
454 #else
455 uint16_t padding;
455 uint16_t padding;
456 mode_t access_mask;
456 mode_t access_mask;
457 #endif
457 #endif
458 off_t size;
458 off_t size;
459 } __attribute__((packed)) attrbuf_entry;
459 } __attribute__((packed)) attrbuf_entry;
460
460
461 int attrkind(attrbuf_entry *entry)
461 int attrkind(attrbuf_entry *entry)
462 {
462 {
463 switch (entry->obj_type) {
463 switch (entry->obj_type) {
464 case VREG: return S_IFREG;
464 case VREG: return S_IFREG;
465 case VDIR: return S_IFDIR;
465 case VDIR: return S_IFDIR;
466 case VLNK: return S_IFLNK;
466 case VLNK: return S_IFLNK;
467 case VBLK: return S_IFBLK;
467 case VBLK: return S_IFBLK;
468 case VCHR: return S_IFCHR;
468 case VCHR: return S_IFCHR;
469 case VFIFO: return S_IFIFO;
469 case VFIFO: return S_IFIFO;
470 case VSOCK: return S_IFSOCK;
470 case VSOCK: return S_IFSOCK;
471 }
471 }
472 return -1;
472 return -1;
473 }
473 }
474
474
475 /* get these many entries at a time */
475 /* get these many entries at a time */
476 #define LISTDIR_BATCH_SIZE 50
476 #define LISTDIR_BATCH_SIZE 50
477
477
478 static PyObject *_listdir_batch(char *path, int pathlen, int keepstat,
478 static PyObject *_listdir_batch(char *path, int pathlen, int keepstat,
479 char *skip, bool *fallback)
479 char *skip, bool *fallback)
480 {
480 {
481 PyObject *list, *elem, *ret = NULL;
481 PyObject *list, *elem, *ret = NULL;
482 int kind, err;
482 int kind, err;
483 unsigned long index;
483 unsigned long index;
484 unsigned int count, old_state, new_state;
484 unsigned int count, old_state, new_state;
485 bool state_seen = false;
485 bool state_seen = false;
486 attrbuf_entry *entry;
486 attrbuf_entry *entry;
487 /* from the getattrlist(2) man page: a path can be no longer than
487 /* from the getattrlist(2) man page: a path can be no longer than
488 (NAME_MAX * 3 + 1) bytes. Also, "The getattrlist() function will
488 (NAME_MAX * 3 + 1) bytes. Also, "The getattrlist() function will
489 silently truncate attribute data if attrBufSize is too small." So
489 silently truncate attribute data if attrBufSize is too small." So
490 pass in a buffer big enough for the worst case. */
490 pass in a buffer big enough for the worst case. */
491 char attrbuf[LISTDIR_BATCH_SIZE * (sizeof(attrbuf_entry) + NAME_MAX * 3 + 1)];
491 char attrbuf[LISTDIR_BATCH_SIZE * (sizeof(attrbuf_entry) + NAME_MAX * 3 + 1)];
492 unsigned int basep_unused;
492 unsigned int basep_unused;
493
493
494 struct stat st;
494 struct stat st;
495 int dfd = -1;
495 int dfd = -1;
496
496
497 /* these must match the attrbuf_entry struct, otherwise you'll end up
497 /* these must match the attrbuf_entry struct, otherwise you'll end up
498 with garbage */
498 with garbage */
499 struct attrlist requested_attr = {0};
499 struct attrlist requested_attr = {0};
500 requested_attr.bitmapcount = ATTR_BIT_MAP_COUNT;
500 requested_attr.bitmapcount = ATTR_BIT_MAP_COUNT;
501 requested_attr.commonattr = (ATTR_CMN_NAME | ATTR_CMN_OBJTYPE |
501 requested_attr.commonattr = (ATTR_CMN_NAME | ATTR_CMN_OBJTYPE |
502 ATTR_CMN_MODTIME | ATTR_CMN_ACCESSMASK);
502 ATTR_CMN_MODTIME | ATTR_CMN_ACCESSMASK);
503 requested_attr.fileattr = ATTR_FILE_DATALENGTH;
503 requested_attr.fileattr = ATTR_FILE_DATALENGTH;
504
504
505 *fallback = false;
505 *fallback = false;
506
506
507 if (pathlen >= PATH_MAX) {
507 if (pathlen >= PATH_MAX) {
508 errno = ENAMETOOLONG;
508 errno = ENAMETOOLONG;
509 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
509 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
510 goto error_value;
510 goto error_value;
511 }
511 }
512
512
513 dfd = open(path, O_RDONLY);
513 dfd = open(path, O_RDONLY);
514 if (dfd == -1) {
514 if (dfd == -1) {
515 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
515 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
516 goto error_value;
516 goto error_value;
517 }
517 }
518
518
519 list = PyList_New(0);
519 list = PyList_New(0);
520 if (!list)
520 if (!list)
521 goto error_dir;
521 goto error_dir;
522
522
523 do {
523 do {
524 count = LISTDIR_BATCH_SIZE;
524 count = LISTDIR_BATCH_SIZE;
525 err = getdirentriesattr(dfd, &requested_attr, &attrbuf,
525 err = getdirentriesattr(dfd, &requested_attr, &attrbuf,
526 sizeof(attrbuf), &count, &basep_unused,
526 sizeof(attrbuf), &count, &basep_unused,
527 &new_state, 0);
527 &new_state, 0);
528 if (err < 0) {
528 if (err < 0) {
529 if (errno == ENOTSUP) {
529 if (errno == ENOTSUP) {
530 /* We're on a filesystem that doesn't support
530 /* We're on a filesystem that doesn't support
531 getdirentriesattr. Fall back to the
531 getdirentriesattr. Fall back to the
532 stat-based implementation. */
532 stat-based implementation. */
533 *fallback = true;
533 *fallback = true;
534 } else
534 } else
535 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
535 PyErr_SetFromErrnoWithFilename(PyExc_OSError, path);
536 goto error;
536 goto error;
537 }
537 }
538
538
539 if (!state_seen) {
539 if (!state_seen) {
540 old_state = new_state;
540 old_state = new_state;
541 state_seen = true;
541 state_seen = true;
542 } else if (old_state != new_state) {
542 } else if (old_state != new_state) {
543 /* There's an edge case with getdirentriesattr. Consider
543 /* There's an edge case with getdirentriesattr. Consider
544 the following initial list of files:
544 the following initial list of files:
545
545
546 a
546 a
547 b
547 b
548 <--
548 <--
549 c
549 c
550 d
550 d
551
551
552 If the iteration is paused at the arrow, and b is
552 If the iteration is paused at the arrow, and b is
553 deleted before it is resumed, getdirentriesattr will
553 deleted before it is resumed, getdirentriesattr will
554 not return d at all! Ordinarily we're expected to
554 not return d at all! Ordinarily we're expected to
555 restart the iteration from the beginning. To avoid
555 restart the iteration from the beginning. To avoid
556 getting stuck in a retry loop here, fall back to
556 getting stuck in a retry loop here, fall back to
557 stat. */
557 stat. */
558 *fallback = true;
558 *fallback = true;
559 goto error;
559 goto error;
560 }
560 }
561
561
562 entry = (attrbuf_entry *)attrbuf;
562 entry = (attrbuf_entry *)attrbuf;
563
563
564 for (index = 0; index < count; index++) {
564 for (index = 0; index < count; index++) {
565 char *filename = ((char *)&entry->name) +
565 char *filename = ((char *)&entry->name) +
566 entry->name.attr_dataoffset;
566 entry->name.attr_dataoffset;
567
567
568 if (!strcmp(filename, ".") || !strcmp(filename, ".."))
568 if (!strcmp(filename, ".") || !strcmp(filename, ".."))
569 continue;
569 continue;
570
570
571 kind = attrkind(entry);
571 kind = attrkind(entry);
572 if (kind == -1) {
572 if (kind == -1) {
573 PyErr_Format(PyExc_OSError,
573 PyErr_Format(PyExc_OSError,
574 "unknown object type %u for file "
574 "unknown object type %u for file "
575 "%s%s!",
575 "%s%s!",
576 entry->obj_type, path, filename);
576 entry->obj_type, path, filename);
577 goto error;
577 goto error;
578 }
578 }
579
579
580 /* quit early? */
580 /* quit early? */
581 if (skip && kind == S_IFDIR && !strcmp(filename, skip)) {
581 if (skip && kind == S_IFDIR && !strcmp(filename, skip)) {
582 ret = PyList_New(0);
582 ret = PyList_New(0);
583 goto error;
583 goto error;
584 }
584 }
585
585
586 if (keepstat) {
586 if (keepstat) {
587 PyObject *stat = NULL;
587 PyObject *stat = NULL;
588 /* from the getattrlist(2) man page: "Only the
588 /* from the getattrlist(2) man page: "Only the
589 permission bits ... are valid". */
589 permission bits ... are valid". */
590 st.st_mode = (entry->access_mask & ~S_IFMT) | kind;
590 st.st_mode = (entry->access_mask & ~S_IFMT) | kind;
591 st.st_mtime = entry->mtime.tv_sec;
591 st.st_mtime = entry->mtime.tv_sec;
592 st.st_size = entry->size;
592 st.st_size = entry->size;
593 stat = makestat(&st);
593 stat = makestat(&st);
594 if (!stat)
594 if (!stat)
595 goto error;
595 goto error;
596 elem = Py_BuildValue(PY23("siN", "yiN"),
596 elem = Py_BuildValue(PY23("siN", "yiN"),
597 filename, kind, stat);
597 filename, kind, stat);
598 } else
598 } else
599 elem = Py_BuildValue(PY23("si", "yi"),
599 elem = Py_BuildValue(PY23("si", "yi"),
600 filename, kind);
600 filename, kind);
601 if (!elem)
601 if (!elem)
602 goto error;
602 goto error;
603
603
604 PyList_Append(list, elem);
604 PyList_Append(list, elem);
605 Py_DECREF(elem);
605 Py_DECREF(elem);
606
606
607 entry = (attrbuf_entry *)((char *)entry + entry->length);
607 entry = (attrbuf_entry *)((char *)entry + entry->length);
608 }
608 }
609 } while (err == 0);
609 } while (err == 0);
610
610
611 ret = list;
611 ret = list;
612 Py_INCREF(ret);
612 Py_INCREF(ret);
613
613
614 error:
614 error:
615 Py_DECREF(list);
615 Py_DECREF(list);
616 error_dir:
616 error_dir:
617 close(dfd);
617 close(dfd);
618 error_value:
618 error_value:
619 return ret;
619 return ret;
620 }
620 }
621
621
622 #endif /* __APPLE__ */
622 #endif /* __APPLE__ */
623
623
624 static PyObject *_listdir(char *path, int pathlen, int keepstat, char *skip)
624 static PyObject *_listdir(char *path, int pathlen, int keepstat, char *skip)
625 {
625 {
626 #ifdef __APPLE__
626 #ifdef __APPLE__
627 PyObject *ret;
627 PyObject *ret;
628 bool fallback = false;
628 bool fallback = false;
629
629
630 ret = _listdir_batch(path, pathlen, keepstat, skip, &fallback);
630 ret = _listdir_batch(path, pathlen, keepstat, skip, &fallback);
631 if (ret != NULL || !fallback)
631 if (ret != NULL || !fallback)
632 return ret;
632 return ret;
633 #endif
633 #endif
634 return _listdir_stat(path, pathlen, keepstat, skip);
634 return _listdir_stat(path, pathlen, keepstat, skip);
635 }
635 }
636
636
637 static PyObject *statfiles(PyObject *self, PyObject *args)
637 static PyObject *statfiles(PyObject *self, PyObject *args)
638 {
638 {
639 PyObject *names, *stats;
639 PyObject *names, *stats;
640 Py_ssize_t i, count;
640 Py_ssize_t i, count;
641
641
642 if (!PyArg_ParseTuple(args, "O:statfiles", &names))
642 if (!PyArg_ParseTuple(args, "O:statfiles", &names))
643 return NULL;
643 return NULL;
644
644
645 count = PySequence_Length(names);
645 count = PySequence_Length(names);
646 if (count == -1) {
646 if (count == -1) {
647 PyErr_SetString(PyExc_TypeError, "not a sequence");
647 PyErr_SetString(PyExc_TypeError, "not a sequence");
648 return NULL;
648 return NULL;
649 }
649 }
650
650
651 stats = PyList_New(count);
651 stats = PyList_New(count);
652 if (stats == NULL)
652 if (stats == NULL)
653 return NULL;
653 return NULL;
654
654
655 for (i = 0; i < count; i++) {
655 for (i = 0; i < count; i++) {
656 PyObject *stat, *pypath;
656 PyObject *stat, *pypath;
657 struct stat st;
657 struct stat st;
658 int ret, kind;
658 int ret, kind;
659 char *path;
659 char *path;
660
660
661 /* With a large file count or on a slow filesystem,
661 /* With a large file count or on a slow filesystem,
662 don't block signals for long (issue4878). */
662 don't block signals for long (issue4878). */
663 if ((i % 1000) == 999 && PyErr_CheckSignals() == -1)
663 if ((i % 1000) == 999 && PyErr_CheckSignals() == -1)
664 goto bail;
664 goto bail;
665
665
666 pypath = PySequence_GetItem(names, i);
666 pypath = PySequence_GetItem(names, i);
667 if (!pypath)
667 if (!pypath)
668 goto bail;
668 goto bail;
669 path = PyBytes_AsString(pypath);
669 path = PyBytes_AsString(pypath);
670 if (path == NULL) {
670 if (path == NULL) {
671 Py_DECREF(pypath);
671 Py_DECREF(pypath);
672 PyErr_SetString(PyExc_TypeError, "not a string");
672 PyErr_SetString(PyExc_TypeError, "not a string");
673 goto bail;
673 goto bail;
674 }
674 }
675 ret = lstat(path, &st);
675 ret = lstat(path, &st);
676 Py_DECREF(pypath);
676 Py_DECREF(pypath);
677 kind = st.st_mode & S_IFMT;
677 kind = st.st_mode & S_IFMT;
678 if (ret != -1 && (kind == S_IFREG || kind == S_IFLNK)) {
678 if (ret != -1 && (kind == S_IFREG || kind == S_IFLNK)) {
679 stat = makestat(&st);
679 stat = makestat(&st);
680 if (stat == NULL)
680 if (stat == NULL)
681 goto bail;
681 goto bail;
682 PyList_SET_ITEM(stats, i, stat);
682 PyList_SET_ITEM(stats, i, stat);
683 } else {
683 } else {
684 Py_INCREF(Py_None);
684 Py_INCREF(Py_None);
685 PyList_SET_ITEM(stats, i, Py_None);
685 PyList_SET_ITEM(stats, i, Py_None);
686 }
686 }
687 }
687 }
688
688
689 return stats;
689 return stats;
690
690
691 bail:
691 bail:
692 Py_DECREF(stats);
692 Py_DECREF(stats);
693 return NULL;
693 return NULL;
694 }
694 }
695
695
696 /*
696 /*
697 * recvfds() simply does not release GIL during blocking io operation because
697 * recvfds() simply does not release GIL during blocking io operation because
698 * command server is known to be single-threaded.
698 * command server is known to be single-threaded.
699 *
699 *
700 * Old systems such as Solaris don't provide CMSG_LEN, msg_control, etc.
700 * Old systems such as Solaris don't provide CMSG_LEN, msg_control, etc.
701 * Currently, recvfds() is not supported on these platforms.
701 * Currently, recvfds() is not supported on these platforms.
702 */
702 */
703 #ifdef CMSG_LEN
703 #ifdef CMSG_LEN
704
704
705 static ssize_t recvfdstobuf(int sockfd, int **rfds, void *cbuf, size_t cbufsize)
705 static ssize_t recvfdstobuf(int sockfd, int **rfds, void *cbuf, size_t cbufsize)
706 {
706 {
707 char dummy[1];
707 char dummy[1];
708 struct iovec iov = {dummy, sizeof(dummy)};
708 struct iovec iov = {dummy, sizeof(dummy)};
709 struct msghdr msgh = {0};
709 struct msghdr msgh = {0};
710 struct cmsghdr *cmsg;
710 struct cmsghdr *cmsg;
711
711
712 msgh.msg_iov = &iov;
712 msgh.msg_iov = &iov;
713 msgh.msg_iovlen = 1;
713 msgh.msg_iovlen = 1;
714 msgh.msg_control = cbuf;
714 msgh.msg_control = cbuf;
715 msgh.msg_controllen = (socklen_t)cbufsize;
715 msgh.msg_controllen = (socklen_t)cbufsize;
716 if (recvmsg(sockfd, &msgh, 0) < 0)
716 if (recvmsg(sockfd, &msgh, 0) < 0)
717 return -1;
717 return -1;
718
718
719 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg;
719 for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg;
720 cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
720 cmsg = CMSG_NXTHDR(&msgh, cmsg)) {
721 if (cmsg->cmsg_level != SOL_SOCKET ||
721 if (cmsg->cmsg_level != SOL_SOCKET ||
722 cmsg->cmsg_type != SCM_RIGHTS)
722 cmsg->cmsg_type != SCM_RIGHTS)
723 continue;
723 continue;
724 *rfds = (int *)CMSG_DATA(cmsg);
724 *rfds = (int *)CMSG_DATA(cmsg);
725 return (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
725 return (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int);
726 }
726 }
727
727
728 *rfds = cbuf;
728 *rfds = cbuf;
729 return 0;
729 return 0;
730 }
730 }
731
731
732 static PyObject *recvfds(PyObject *self, PyObject *args)
732 static PyObject *recvfds(PyObject *self, PyObject *args)
733 {
733 {
734 int sockfd;
734 int sockfd;
735 int *rfds = NULL;
735 int *rfds = NULL;
736 ssize_t rfdscount, i;
736 ssize_t rfdscount, i;
737 char cbuf[256];
737 char cbuf[256];
738 PyObject *rfdslist = NULL;
738 PyObject *rfdslist = NULL;
739
739
740 if (!PyArg_ParseTuple(args, "i", &sockfd))
740 if (!PyArg_ParseTuple(args, "i", &sockfd))
741 return NULL;
741 return NULL;
742
742
743 rfdscount = recvfdstobuf(sockfd, &rfds, cbuf, sizeof(cbuf));
743 rfdscount = recvfdstobuf(sockfd, &rfds, cbuf, sizeof(cbuf));
744 if (rfdscount < 0)
744 if (rfdscount < 0)
745 return PyErr_SetFromErrno(PyExc_OSError);
745 return PyErr_SetFromErrno(PyExc_OSError);
746
746
747 rfdslist = PyList_New(rfdscount);
747 rfdslist = PyList_New(rfdscount);
748 if (!rfdslist)
748 if (!rfdslist)
749 goto bail;
749 goto bail;
750 for (i = 0; i < rfdscount; i++) {
750 for (i = 0; i < rfdscount; i++) {
751 PyObject *obj = PyLong_FromLong(rfds[i]);
751 PyObject *obj = PyLong_FromLong(rfds[i]);
752 if (!obj)
752 if (!obj)
753 goto bail;
753 goto bail;
754 PyList_SET_ITEM(rfdslist, i, obj);
754 PyList_SET_ITEM(rfdslist, i, obj);
755 }
755 }
756 return rfdslist;
756 return rfdslist;
757
757
758 bail:
758 bail:
759 Py_XDECREF(rfdslist);
759 Py_XDECREF(rfdslist);
760 return NULL;
760 return NULL;
761 }
761 }
762
762
763 #endif /* CMSG_LEN */
763 #endif /* CMSG_LEN */
764
764
765 /* allow disabling setprocname via compiler flags */
765 /* allow disabling setprocname via compiler flags */
766 #ifndef SETPROCNAME_USE_NONE
766 #ifndef SETPROCNAME_USE_NONE
767 #if defined(HAVE_SETPROCTITLE)
767 #if defined(HAVE_SETPROCTITLE)
768 /* setproctitle is the first choice - available in FreeBSD */
768 /* setproctitle is the first choice - available in FreeBSD */
769 #define SETPROCNAME_USE_SETPROCTITLE
769 #define SETPROCNAME_USE_SETPROCTITLE
770 #elif (defined(__linux__) || defined(__APPLE__)) && PY_MAJOR_VERSION == 2
770 #elif (defined(__linux__) || defined(__APPLE__)) && PY_MAJOR_VERSION == 2
771 /* rewrite the argv buffer in place - works in Linux and OS X. Py_GetArgcArgv
771 /* rewrite the argv buffer in place - works in Linux and OS X. Py_GetArgcArgv
772 * in Python 3 returns the copied wchar_t **argv, thus unsupported. */
772 * in Python 3 returns the copied wchar_t **argv, thus unsupported. */
773 #define SETPROCNAME_USE_ARGVREWRITE
773 #define SETPROCNAME_USE_ARGVREWRITE
774 #else
774 #else
775 #define SETPROCNAME_USE_NONE
775 #define SETPROCNAME_USE_NONE
776 #endif
776 #endif
777 #endif /* ndef SETPROCNAME_USE_NONE */
777 #endif /* ndef SETPROCNAME_USE_NONE */
778
778
779 #ifndef SETPROCNAME_USE_NONE
779 #ifndef SETPROCNAME_USE_NONE
780 static PyObject *setprocname(PyObject *self, PyObject *args)
780 static PyObject *setprocname(PyObject *self, PyObject *args)
781 {
781 {
782 const char *name = NULL;
782 const char *name = NULL;
783 if (!PyArg_ParseTuple(args, PY23("s", "y"), &name))
783 if (!PyArg_ParseTuple(args, PY23("s", "y"), &name))
784 return NULL;
784 return NULL;
785
785
786 #if defined(SETPROCNAME_USE_SETPROCTITLE)
786 #if defined(SETPROCNAME_USE_SETPROCTITLE)
787 setproctitle("%s", name);
787 setproctitle("%s", name);
788 #elif defined(SETPROCNAME_USE_ARGVREWRITE)
788 #elif defined(SETPROCNAME_USE_ARGVREWRITE)
789 {
789 {
790 static char *argvstart = NULL;
790 static char *argvstart = NULL;
791 static size_t argvsize = 0;
791 static size_t argvsize = 0;
792 if (argvstart == NULL) {
792 if (argvstart == NULL) {
793 int argc = 0, i;
793 int argc = 0, i;
794 char **argv = NULL;
794 char **argv = NULL;
795 char *argvend;
795 char *argvend;
796 extern void Py_GetArgcArgv(int *argc, char ***argv);
796 extern void Py_GetArgcArgv(int *argc, char ***argv);
797 Py_GetArgcArgv(&argc, &argv);
797 Py_GetArgcArgv(&argc, &argv);
798 /* Py_GetArgcArgv may not do much if a custom python
798 /* Py_GetArgcArgv may not do much if a custom python
799 * launcher is used that doesn't record the information
799 * launcher is used that doesn't record the information
800 * it needs. Let's handle this gracefully instead of
800 * it needs. Let's handle this gracefully instead of
801 * segfaulting. */
801 * segfaulting. */
802 if (argv != NULL)
802 if (argv != NULL)
803 argvend = argvstart = argv[0];
803 argvend = argvstart = argv[0];
804 else
804 else
805 argvend = argvstart = NULL;
805 argvend = argvstart = NULL;
806
806
807 /* Check the memory we can use. Typically, argv[i] and
807 /* Check the memory we can use. Typically, argv[i] and
808 * argv[i + 1] are continuous. */
808 * argv[i + 1] are continuous. */
809 for (i = 0; i < argc; ++i) {
809 for (i = 0; i < argc; ++i) {
810 size_t len;
810 size_t len;
811 if (argv[i] > argvend || argv[i] < argvstart)
811 if (argv[i] > argvend || argv[i] < argvstart)
812 break; /* not continuous */
812 break; /* not continuous */
813 len = strlen(argv[i]);
813 len = strlen(argv[i]);
814 argvend = argv[i] + len + 1 /* '\0' */;
814 argvend = argv[i] + len + 1 /* '\0' */;
815 }
815 }
816 if (argvend > argvstart) /* sanity check */
816 if (argvend > argvstart) /* sanity check */
817 argvsize = argvend - argvstart;
817 argvsize = argvend - argvstart;
818 }
818 }
819
819
820 if (argvstart && argvsize > 1) {
820 if (argvstart && argvsize > 1) {
821 int n = snprintf(argvstart, argvsize, "%s", name);
821 int n = snprintf(argvstart, argvsize, "%s", name);
822 if (n >= 0 && (size_t)n < argvsize)
822 if (n >= 0 && (size_t)n < argvsize)
823 memset(argvstart + n, 0, argvsize - n);
823 memset(argvstart + n, 0, argvsize - n);
824 }
824 }
825 }
825 }
826 #endif
826 #endif
827
827
828 Py_RETURN_NONE;
828 Py_RETURN_NONE;
829 }
829 }
830 #endif /* ndef SETPROCNAME_USE_NONE */
830 #endif /* ndef SETPROCNAME_USE_NONE */
831
831
832 #if defined(HAVE_BSD_STATFS)
832 #if defined(HAVE_BSD_STATFS)
833 static const char *describefstype(const struct statfs *pbuf)
833 static const char *describefstype(const struct statfs *pbuf)
834 {
834 {
835 /* BSD or OSX provides a f_fstypename field */
835 /* BSD or OSX provides a f_fstypename field */
836 return pbuf->f_fstypename;
836 return pbuf->f_fstypename;
837 }
837 }
838 #elif defined(HAVE_LINUX_STATFS)
838 #elif defined(HAVE_LINUX_STATFS)
839 static const char *describefstype(const struct statfs *pbuf)
839 static const char *describefstype(const struct statfs *pbuf)
840 {
840 {
841 /* Begin of Linux filesystems */
841 /* Begin of Linux filesystems */
842 #ifdef ADFS_SUPER_MAGIC
842 #ifdef ADFS_SUPER_MAGIC
843 if (pbuf->f_type == ADFS_SUPER_MAGIC)
843 if (pbuf->f_type == ADFS_SUPER_MAGIC)
844 return "adfs";
844 return "adfs";
845 #endif
845 #endif
846 #ifdef AFFS_SUPER_MAGIC
846 #ifdef AFFS_SUPER_MAGIC
847 if (pbuf->f_type == AFFS_SUPER_MAGIC)
847 if (pbuf->f_type == AFFS_SUPER_MAGIC)
848 return "affs";
848 return "affs";
849 #endif
849 #endif
850 #ifdef AUTOFS_SUPER_MAGIC
850 #ifdef AUTOFS_SUPER_MAGIC
851 if (pbuf->f_type == AUTOFS_SUPER_MAGIC)
851 if (pbuf->f_type == AUTOFS_SUPER_MAGIC)
852 return "autofs";
852 return "autofs";
853 #endif
853 #endif
854 #ifdef BDEVFS_MAGIC
854 #ifdef BDEVFS_MAGIC
855 if (pbuf->f_type == BDEVFS_MAGIC)
855 if (pbuf->f_type == BDEVFS_MAGIC)
856 return "bdevfs";
856 return "bdevfs";
857 #endif
857 #endif
858 #ifdef BEFS_SUPER_MAGIC
858 #ifdef BEFS_SUPER_MAGIC
859 if (pbuf->f_type == BEFS_SUPER_MAGIC)
859 if (pbuf->f_type == BEFS_SUPER_MAGIC)
860 return "befs";
860 return "befs";
861 #endif
861 #endif
862 #ifdef BFS_MAGIC
862 #ifdef BFS_MAGIC
863 if (pbuf->f_type == BFS_MAGIC)
863 if (pbuf->f_type == BFS_MAGIC)
864 return "bfs";
864 return "bfs";
865 #endif
865 #endif
866 #ifdef BINFMTFS_MAGIC
866 #ifdef BINFMTFS_MAGIC
867 if (pbuf->f_type == BINFMTFS_MAGIC)
867 if (pbuf->f_type == BINFMTFS_MAGIC)
868 return "binfmtfs";
868 return "binfmtfs";
869 #endif
869 #endif
870 #ifdef BTRFS_SUPER_MAGIC
870 #ifdef BTRFS_SUPER_MAGIC
871 if (pbuf->f_type == BTRFS_SUPER_MAGIC)
871 if (pbuf->f_type == BTRFS_SUPER_MAGIC)
872 return "btrfs";
872 return "btrfs";
873 #endif
873 #endif
874 #ifdef CGROUP_SUPER_MAGIC
874 #ifdef CGROUP_SUPER_MAGIC
875 if (pbuf->f_type == CGROUP_SUPER_MAGIC)
875 if (pbuf->f_type == CGROUP_SUPER_MAGIC)
876 return "cgroup";
876 return "cgroup";
877 #endif
877 #endif
878 #ifdef CIFS_MAGIC_NUMBER
878 #ifdef CIFS_MAGIC_NUMBER
879 if (pbuf->f_type == CIFS_MAGIC_NUMBER)
879 if (pbuf->f_type == CIFS_MAGIC_NUMBER)
880 return "cifs";
880 return "cifs";
881 #endif
881 #endif
882 #ifdef CODA_SUPER_MAGIC
882 #ifdef CODA_SUPER_MAGIC
883 if (pbuf->f_type == CODA_SUPER_MAGIC)
883 if (pbuf->f_type == CODA_SUPER_MAGIC)
884 return "coda";
884 return "coda";
885 #endif
885 #endif
886 #ifdef COH_SUPER_MAGIC
886 #ifdef COH_SUPER_MAGIC
887 if (pbuf->f_type == COH_SUPER_MAGIC)
887 if (pbuf->f_type == COH_SUPER_MAGIC)
888 return "coh";
888 return "coh";
889 #endif
889 #endif
890 #ifdef CRAMFS_MAGIC
890 #ifdef CRAMFS_MAGIC
891 if (pbuf->f_type == CRAMFS_MAGIC)
891 if (pbuf->f_type == CRAMFS_MAGIC)
892 return "cramfs";
892 return "cramfs";
893 #endif
893 #endif
894 #ifdef DEBUGFS_MAGIC
894 #ifdef DEBUGFS_MAGIC
895 if (pbuf->f_type == DEBUGFS_MAGIC)
895 if (pbuf->f_type == DEBUGFS_MAGIC)
896 return "debugfs";
896 return "debugfs";
897 #endif
897 #endif
898 #ifdef DEVFS_SUPER_MAGIC
898 #ifdef DEVFS_SUPER_MAGIC
899 if (pbuf->f_type == DEVFS_SUPER_MAGIC)
899 if (pbuf->f_type == DEVFS_SUPER_MAGIC)
900 return "devfs";
900 return "devfs";
901 #endif
901 #endif
902 #ifdef DEVPTS_SUPER_MAGIC
902 #ifdef DEVPTS_SUPER_MAGIC
903 if (pbuf->f_type == DEVPTS_SUPER_MAGIC)
903 if (pbuf->f_type == DEVPTS_SUPER_MAGIC)
904 return "devpts";
904 return "devpts";
905 #endif
905 #endif
906 #ifdef EFIVARFS_MAGIC
906 #ifdef EFIVARFS_MAGIC
907 if (pbuf->f_type == EFIVARFS_MAGIC)
907 if (pbuf->f_type == EFIVARFS_MAGIC)
908 return "efivarfs";
908 return "efivarfs";
909 #endif
909 #endif
910 #ifdef EFS_SUPER_MAGIC
910 #ifdef EFS_SUPER_MAGIC
911 if (pbuf->f_type == EFS_SUPER_MAGIC)
911 if (pbuf->f_type == EFS_SUPER_MAGIC)
912 return "efs";
912 return "efs";
913 #endif
913 #endif
914 #ifdef EXT_SUPER_MAGIC
914 #ifdef EXT_SUPER_MAGIC
915 if (pbuf->f_type == EXT_SUPER_MAGIC)
915 if (pbuf->f_type == EXT_SUPER_MAGIC)
916 return "ext";
916 return "ext";
917 #endif
917 #endif
918 #ifdef EXT2_OLD_SUPER_MAGIC
918 #ifdef EXT2_OLD_SUPER_MAGIC
919 if (pbuf->f_type == EXT2_OLD_SUPER_MAGIC)
919 if (pbuf->f_type == EXT2_OLD_SUPER_MAGIC)
920 return "ext2";
920 return "ext2";
921 #endif
921 #endif
922 #ifdef EXT2_SUPER_MAGIC
922 #ifdef EXT2_SUPER_MAGIC
923 if (pbuf->f_type == EXT2_SUPER_MAGIC)
923 if (pbuf->f_type == EXT2_SUPER_MAGIC)
924 return "ext2";
924 return "ext2";
925 #endif
925 #endif
926 #ifdef EXT3_SUPER_MAGIC
926 #ifdef EXT3_SUPER_MAGIC
927 if (pbuf->f_type == EXT3_SUPER_MAGIC)
927 if (pbuf->f_type == EXT3_SUPER_MAGIC)
928 return "ext3";
928 return "ext3";
929 #endif
929 #endif
930 #ifdef EXT4_SUPER_MAGIC
930 #ifdef EXT4_SUPER_MAGIC
931 if (pbuf->f_type == EXT4_SUPER_MAGIC)
931 if (pbuf->f_type == EXT4_SUPER_MAGIC)
932 return "ext4";
932 return "ext4";
933 #endif
933 #endif
934 #ifdef F2FS_SUPER_MAGIC
934 #ifdef F2FS_SUPER_MAGIC
935 if (pbuf->f_type == F2FS_SUPER_MAGIC)
935 if (pbuf->f_type == F2FS_SUPER_MAGIC)
936 return "f2fs";
936 return "f2fs";
937 #endif
937 #endif
938 #ifdef FUSE_SUPER_MAGIC
938 #ifdef FUSE_SUPER_MAGIC
939 if (pbuf->f_type == FUSE_SUPER_MAGIC)
939 if (pbuf->f_type == FUSE_SUPER_MAGIC)
940 return "fuse";
940 return "fuse";
941 #endif
941 #endif
942 #ifdef FUTEXFS_SUPER_MAGIC
942 #ifdef FUTEXFS_SUPER_MAGIC
943 if (pbuf->f_type == FUTEXFS_SUPER_MAGIC)
943 if (pbuf->f_type == FUTEXFS_SUPER_MAGIC)
944 return "futexfs";
944 return "futexfs";
945 #endif
945 #endif
946 #ifdef HFS_SUPER_MAGIC
946 #ifdef HFS_SUPER_MAGIC
947 if (pbuf->f_type == HFS_SUPER_MAGIC)
947 if (pbuf->f_type == HFS_SUPER_MAGIC)
948 return "hfs";
948 return "hfs";
949 #endif
949 #endif
950 #ifdef HOSTFS_SUPER_MAGIC
950 #ifdef HOSTFS_SUPER_MAGIC
951 if (pbuf->f_type == HOSTFS_SUPER_MAGIC)
951 if (pbuf->f_type == HOSTFS_SUPER_MAGIC)
952 return "hostfs";
952 return "hostfs";
953 #endif
953 #endif
954 #ifdef HPFS_SUPER_MAGIC
954 #ifdef HPFS_SUPER_MAGIC
955 if (pbuf->f_type == HPFS_SUPER_MAGIC)
955 if (pbuf->f_type == HPFS_SUPER_MAGIC)
956 return "hpfs";
956 return "hpfs";
957 #endif
957 #endif
958 #ifdef HUGETLBFS_MAGIC
958 #ifdef HUGETLBFS_MAGIC
959 if (pbuf->f_type == HUGETLBFS_MAGIC)
959 if (pbuf->f_type == HUGETLBFS_MAGIC)
960 return "hugetlbfs";
960 return "hugetlbfs";
961 #endif
961 #endif
962 #ifdef ISOFS_SUPER_MAGIC
962 #ifdef ISOFS_SUPER_MAGIC
963 if (pbuf->f_type == ISOFS_SUPER_MAGIC)
963 if (pbuf->f_type == ISOFS_SUPER_MAGIC)
964 return "isofs";
964 return "isofs";
965 #endif
965 #endif
966 #ifdef JFFS2_SUPER_MAGIC
966 #ifdef JFFS2_SUPER_MAGIC
967 if (pbuf->f_type == JFFS2_SUPER_MAGIC)
967 if (pbuf->f_type == JFFS2_SUPER_MAGIC)
968 return "jffs2";
968 return "jffs2";
969 #endif
969 #endif
970 #ifdef JFS_SUPER_MAGIC
970 #ifdef JFS_SUPER_MAGIC
971 if (pbuf->f_type == JFS_SUPER_MAGIC)
971 if (pbuf->f_type == JFS_SUPER_MAGIC)
972 return "jfs";
972 return "jfs";
973 #endif
973 #endif
974 #ifdef MINIX_SUPER_MAGIC
974 #ifdef MINIX_SUPER_MAGIC
975 if (pbuf->f_type == MINIX_SUPER_MAGIC)
975 if (pbuf->f_type == MINIX_SUPER_MAGIC)
976 return "minix";
976 return "minix";
977 #endif
977 #endif
978 #ifdef MINIX2_SUPER_MAGIC
978 #ifdef MINIX2_SUPER_MAGIC
979 if (pbuf->f_type == MINIX2_SUPER_MAGIC)
979 if (pbuf->f_type == MINIX2_SUPER_MAGIC)
980 return "minix2";
980 return "minix2";
981 #endif
981 #endif
982 #ifdef MINIX3_SUPER_MAGIC
982 #ifdef MINIX3_SUPER_MAGIC
983 if (pbuf->f_type == MINIX3_SUPER_MAGIC)
983 if (pbuf->f_type == MINIX3_SUPER_MAGIC)
984 return "minix3";
984 return "minix3";
985 #endif
985 #endif
986 #ifdef MQUEUE_MAGIC
986 #ifdef MQUEUE_MAGIC
987 if (pbuf->f_type == MQUEUE_MAGIC)
987 if (pbuf->f_type == MQUEUE_MAGIC)
988 return "mqueue";
988 return "mqueue";
989 #endif
989 #endif
990 #ifdef MSDOS_SUPER_MAGIC
990 #ifdef MSDOS_SUPER_MAGIC
991 if (pbuf->f_type == MSDOS_SUPER_MAGIC)
991 if (pbuf->f_type == MSDOS_SUPER_MAGIC)
992 return "msdos";
992 return "msdos";
993 #endif
993 #endif
994 #ifdef NCP_SUPER_MAGIC
994 #ifdef NCP_SUPER_MAGIC
995 if (pbuf->f_type == NCP_SUPER_MAGIC)
995 if (pbuf->f_type == NCP_SUPER_MAGIC)
996 return "ncp";
996 return "ncp";
997 #endif
997 #endif
998 #ifdef NFS_SUPER_MAGIC
998 #ifdef NFS_SUPER_MAGIC
999 if (pbuf->f_type == NFS_SUPER_MAGIC)
999 if (pbuf->f_type == NFS_SUPER_MAGIC)
1000 return "nfs";
1000 return "nfs";
1001 #endif
1001 #endif
1002 #ifdef NILFS_SUPER_MAGIC
1002 #ifdef NILFS_SUPER_MAGIC
1003 if (pbuf->f_type == NILFS_SUPER_MAGIC)
1003 if (pbuf->f_type == NILFS_SUPER_MAGIC)
1004 return "nilfs";
1004 return "nilfs";
1005 #endif
1005 #endif
1006 #ifdef NTFS_SB_MAGIC
1006 #ifdef NTFS_SB_MAGIC
1007 if (pbuf->f_type == NTFS_SB_MAGIC)
1007 if (pbuf->f_type == NTFS_SB_MAGIC)
1008 return "ntfs-sb";
1008 return "ntfs-sb";
1009 #endif
1009 #endif
1010 #ifdef OCFS2_SUPER_MAGIC
1010 #ifdef OCFS2_SUPER_MAGIC
1011 if (pbuf->f_type == OCFS2_SUPER_MAGIC)
1011 if (pbuf->f_type == OCFS2_SUPER_MAGIC)
1012 return "ocfs2";
1012 return "ocfs2";
1013 #endif
1013 #endif
1014 #ifdef OPENPROM_SUPER_MAGIC
1014 #ifdef OPENPROM_SUPER_MAGIC
1015 if (pbuf->f_type == OPENPROM_SUPER_MAGIC)
1015 if (pbuf->f_type == OPENPROM_SUPER_MAGIC)
1016 return "openprom";
1016 return "openprom";
1017 #endif
1017 #endif
1018 #ifdef OVERLAYFS_SUPER_MAGIC
1018 #ifdef OVERLAYFS_SUPER_MAGIC
1019 if (pbuf->f_type == OVERLAYFS_SUPER_MAGIC)
1019 if (pbuf->f_type == OVERLAYFS_SUPER_MAGIC)
1020 return "overlay";
1020 return "overlay";
1021 #endif
1021 #endif
1022 #ifdef PIPEFS_MAGIC
1022 #ifdef PIPEFS_MAGIC
1023 if (pbuf->f_type == PIPEFS_MAGIC)
1023 if (pbuf->f_type == PIPEFS_MAGIC)
1024 return "pipefs";
1024 return "pipefs";
1025 #endif
1025 #endif
1026 #ifdef PROC_SUPER_MAGIC
1026 #ifdef PROC_SUPER_MAGIC
1027 if (pbuf->f_type == PROC_SUPER_MAGIC)
1027 if (pbuf->f_type == PROC_SUPER_MAGIC)
1028 return "proc";
1028 return "proc";
1029 #endif
1029 #endif
1030 #ifdef PSTOREFS_MAGIC
1030 #ifdef PSTOREFS_MAGIC
1031 if (pbuf->f_type == PSTOREFS_MAGIC)
1031 if (pbuf->f_type == PSTOREFS_MAGIC)
1032 return "pstorefs";
1032 return "pstorefs";
1033 #endif
1033 #endif
1034 #ifdef QNX4_SUPER_MAGIC
1034 #ifdef QNX4_SUPER_MAGIC
1035 if (pbuf->f_type == QNX4_SUPER_MAGIC)
1035 if (pbuf->f_type == QNX4_SUPER_MAGIC)
1036 return "qnx4";
1036 return "qnx4";
1037 #endif
1037 #endif
1038 #ifdef QNX6_SUPER_MAGIC
1038 #ifdef QNX6_SUPER_MAGIC
1039 if (pbuf->f_type == QNX6_SUPER_MAGIC)
1039 if (pbuf->f_type == QNX6_SUPER_MAGIC)
1040 return "qnx6";
1040 return "qnx6";
1041 #endif
1041 #endif
1042 #ifdef RAMFS_MAGIC
1042 #ifdef RAMFS_MAGIC
1043 if (pbuf->f_type == RAMFS_MAGIC)
1043 if (pbuf->f_type == RAMFS_MAGIC)
1044 return "ramfs";
1044 return "ramfs";
1045 #endif
1045 #endif
1046 #ifdef REISERFS_SUPER_MAGIC
1046 #ifdef REISERFS_SUPER_MAGIC
1047 if (pbuf->f_type == REISERFS_SUPER_MAGIC)
1047 if (pbuf->f_type == REISERFS_SUPER_MAGIC)
1048 return "reiserfs";
1048 return "reiserfs";
1049 #endif
1049 #endif
1050 #ifdef ROMFS_MAGIC
1050 #ifdef ROMFS_MAGIC
1051 if (pbuf->f_type == ROMFS_MAGIC)
1051 if (pbuf->f_type == ROMFS_MAGIC)
1052 return "romfs";
1052 return "romfs";
1053 #endif
1053 #endif
1054 #ifdef SECURITYFS_MAGIC
1054 #ifdef SECURITYFS_MAGIC
1055 if (pbuf->f_type == SECURITYFS_MAGIC)
1055 if (pbuf->f_type == SECURITYFS_MAGIC)
1056 return "securityfs";
1056 return "securityfs";
1057 #endif
1057 #endif
1058 #ifdef SELINUX_MAGIC
1058 #ifdef SELINUX_MAGIC
1059 if (pbuf->f_type == SELINUX_MAGIC)
1059 if (pbuf->f_type == SELINUX_MAGIC)
1060 return "selinux";
1060 return "selinux";
1061 #endif
1061 #endif
1062 #ifdef SMACK_MAGIC
1062 #ifdef SMACK_MAGIC
1063 if (pbuf->f_type == SMACK_MAGIC)
1063 if (pbuf->f_type == SMACK_MAGIC)
1064 return "smack";
1064 return "smack";
1065 #endif
1065 #endif
1066 #ifdef SMB_SUPER_MAGIC
1066 #ifdef SMB_SUPER_MAGIC
1067 if (pbuf->f_type == SMB_SUPER_MAGIC)
1067 if (pbuf->f_type == SMB_SUPER_MAGIC)
1068 return "smb";
1068 return "smb";
1069 #endif
1069 #endif
1070 #ifdef SOCKFS_MAGIC
1070 #ifdef SOCKFS_MAGIC
1071 if (pbuf->f_type == SOCKFS_MAGIC)
1071 if (pbuf->f_type == SOCKFS_MAGIC)
1072 return "sockfs";
1072 return "sockfs";
1073 #endif
1073 #endif
1074 #ifdef SQUASHFS_MAGIC
1074 #ifdef SQUASHFS_MAGIC
1075 if (pbuf->f_type == SQUASHFS_MAGIC)
1075 if (pbuf->f_type == SQUASHFS_MAGIC)
1076 return "squashfs";
1076 return "squashfs";
1077 #endif
1077 #endif
1078 #ifdef SYSFS_MAGIC
1078 #ifdef SYSFS_MAGIC
1079 if (pbuf->f_type == SYSFS_MAGIC)
1079 if (pbuf->f_type == SYSFS_MAGIC)
1080 return "sysfs";
1080 return "sysfs";
1081 #endif
1081 #endif
1082 #ifdef SYSV2_SUPER_MAGIC
1082 #ifdef SYSV2_SUPER_MAGIC
1083 if (pbuf->f_type == SYSV2_SUPER_MAGIC)
1083 if (pbuf->f_type == SYSV2_SUPER_MAGIC)
1084 return "sysv2";
1084 return "sysv2";
1085 #endif
1085 #endif
1086 #ifdef SYSV4_SUPER_MAGIC
1086 #ifdef SYSV4_SUPER_MAGIC
1087 if (pbuf->f_type == SYSV4_SUPER_MAGIC)
1087 if (pbuf->f_type == SYSV4_SUPER_MAGIC)
1088 return "sysv4";
1088 return "sysv4";
1089 #endif
1089 #endif
1090 #ifdef TMPFS_MAGIC
1090 #ifdef TMPFS_MAGIC
1091 if (pbuf->f_type == TMPFS_MAGIC)
1091 if (pbuf->f_type == TMPFS_MAGIC)
1092 return "tmpfs";
1092 return "tmpfs";
1093 #endif
1093 #endif
1094 #ifdef UDF_SUPER_MAGIC
1094 #ifdef UDF_SUPER_MAGIC
1095 if (pbuf->f_type == UDF_SUPER_MAGIC)
1095 if (pbuf->f_type == UDF_SUPER_MAGIC)
1096 return "udf";
1096 return "udf";
1097 #endif
1097 #endif
1098 #ifdef UFS_MAGIC
1098 #ifdef UFS_MAGIC
1099 if (pbuf->f_type == UFS_MAGIC)
1099 if (pbuf->f_type == UFS_MAGIC)
1100 return "ufs";
1100 return "ufs";
1101 #endif
1101 #endif
1102 #ifdef USBDEVICE_SUPER_MAGIC
1102 #ifdef USBDEVICE_SUPER_MAGIC
1103 if (pbuf->f_type == USBDEVICE_SUPER_MAGIC)
1103 if (pbuf->f_type == USBDEVICE_SUPER_MAGIC)
1104 return "usbdevice";
1104 return "usbdevice";
1105 #endif
1105 #endif
1106 #ifdef V9FS_MAGIC
1106 #ifdef V9FS_MAGIC
1107 if (pbuf->f_type == V9FS_MAGIC)
1107 if (pbuf->f_type == V9FS_MAGIC)
1108 return "v9fs";
1108 return "v9fs";
1109 #endif
1109 #endif
1110 #ifdef VXFS_SUPER_MAGIC
1110 #ifdef VXFS_SUPER_MAGIC
1111 if (pbuf->f_type == VXFS_SUPER_MAGIC)
1111 if (pbuf->f_type == VXFS_SUPER_MAGIC)
1112 return "vxfs";
1112 return "vxfs";
1113 #endif
1113 #endif
1114 #ifdef XENFS_SUPER_MAGIC
1114 #ifdef XENFS_SUPER_MAGIC
1115 if (pbuf->f_type == XENFS_SUPER_MAGIC)
1115 if (pbuf->f_type == XENFS_SUPER_MAGIC)
1116 return "xenfs";
1116 return "xenfs";
1117 #endif
1117 #endif
1118 #ifdef XENIX_SUPER_MAGIC
1118 #ifdef XENIX_SUPER_MAGIC
1119 if (pbuf->f_type == XENIX_SUPER_MAGIC)
1119 if (pbuf->f_type == XENIX_SUPER_MAGIC)
1120 return "xenix";
1120 return "xenix";
1121 #endif
1121 #endif
1122 #ifdef XFS_SUPER_MAGIC
1122 #ifdef XFS_SUPER_MAGIC
1123 if (pbuf->f_type == XFS_SUPER_MAGIC)
1123 if (pbuf->f_type == XFS_SUPER_MAGIC)
1124 return "xfs";
1124 return "xfs";
1125 #endif
1125 #endif
1126 /* End of Linux filesystems */
1126 /* End of Linux filesystems */
1127 return NULL;
1127 return NULL;
1128 }
1128 }
1129 #endif /* def HAVE_LINUX_STATFS */
1129 #endif /* def HAVE_LINUX_STATFS */
1130
1130
1131 #if defined(HAVE_BSD_STATFS) || defined(HAVE_LINUX_STATFS)
1131 #if defined(HAVE_BSD_STATFS) || defined(HAVE_LINUX_STATFS)
1132 /* given a directory path, return filesystem type name (best-effort) */
1132 /* given a directory path, return filesystem type name (best-effort) */
1133 static PyObject *getfstype(PyObject *self, PyObject *args)
1133 static PyObject *getfstype(PyObject *self, PyObject *args)
1134 {
1134 {
1135 const char *path = NULL;
1135 const char *path = NULL;
1136 struct statfs buf;
1136 struct statfs buf;
1137 int r;
1137 int r;
1138 if (!PyArg_ParseTuple(args, PY23("s", "y"), &path))
1138 if (!PyArg_ParseTuple(args, PY23("s", "y"), &path))
1139 return NULL;
1139 return NULL;
1140
1140
1141 memset(&buf, 0, sizeof(buf));
1141 memset(&buf, 0, sizeof(buf));
1142 r = statfs(path, &buf);
1142 r = statfs(path, &buf);
1143 if (r != 0)
1143 if (r != 0)
1144 return PyErr_SetFromErrno(PyExc_OSError);
1144 return PyErr_SetFromErrno(PyExc_OSError);
1145 return Py_BuildValue(PY23("s", "y"), describefstype(&buf));
1145 return Py_BuildValue(PY23("s", "y"), describefstype(&buf));
1146 }
1146 }
1147 #endif /* defined(HAVE_LINUX_STATFS) || defined(HAVE_BSD_STATFS) */
1147 #endif /* defined(HAVE_LINUX_STATFS) || defined(HAVE_BSD_STATFS) */
1148
1148
1149 #if defined(HAVE_BSD_STATFS)
1149 #if defined(HAVE_BSD_STATFS)
1150 /* given a directory path, return filesystem mount point (best-effort) */
1150 /* given a directory path, return filesystem mount point (best-effort) */
1151 static PyObject *getfsmountpoint(PyObject *self, PyObject *args)
1151 static PyObject *getfsmountpoint(PyObject *self, PyObject *args)
1152 {
1152 {
1153 const char *path = NULL;
1153 const char *path = NULL;
1154 struct statfs buf;
1154 struct statfs buf;
1155 int r;
1155 int r;
1156 if (!PyArg_ParseTuple(args, PY23("s", "y"), &path))
1156 if (!PyArg_ParseTuple(args, PY23("s", "y"), &path))
1157 return NULL;
1157 return NULL;
1158
1158
1159 memset(&buf, 0, sizeof(buf));
1159 memset(&buf, 0, sizeof(buf));
1160 r = statfs(path, &buf);
1160 r = statfs(path, &buf);
1161 if (r != 0)
1161 if (r != 0)
1162 return PyErr_SetFromErrno(PyExc_OSError);
1162 return PyErr_SetFromErrno(PyExc_OSError);
1163 return Py_BuildValue(PY23("s", "y"), buf.f_mntonname);
1163 return Py_BuildValue(PY23("s", "y"), buf.f_mntonname);
1164 }
1164 }
1165 #endif /* defined(HAVE_BSD_STATFS) */
1165 #endif /* defined(HAVE_BSD_STATFS) */
1166
1166
1167 static PyObject *unblocksignal(PyObject *self, PyObject *args)
1167 static PyObject *unblocksignal(PyObject *self, PyObject *args)
1168 {
1168 {
1169 int sig = 0;
1169 int sig = 0;
1170 sigset_t set;
1170 sigset_t set;
1171 int r;
1171 int r;
1172 if (!PyArg_ParseTuple(args, "i", &sig))
1172 if (!PyArg_ParseTuple(args, "i", &sig))
1173 return NULL;
1173 return NULL;
1174 r = sigemptyset(&set);
1174 r = sigemptyset(&set);
1175 if (r != 0)
1175 if (r != 0)
1176 return PyErr_SetFromErrno(PyExc_OSError);
1176 return PyErr_SetFromErrno(PyExc_OSError);
1177 r = sigaddset(&set, sig);
1177 r = sigaddset(&set, sig);
1178 if (r != 0)
1178 if (r != 0)
1179 return PyErr_SetFromErrno(PyExc_OSError);
1179 return PyErr_SetFromErrno(PyExc_OSError);
1180 r = sigprocmask(SIG_UNBLOCK, &set, NULL);
1180 r = sigprocmask(SIG_UNBLOCK, &set, NULL);
1181 if (r != 0)
1181 if (r != 0)
1182 return PyErr_SetFromErrno(PyExc_OSError);
1182 return PyErr_SetFromErrno(PyExc_OSError);
1183 Py_RETURN_NONE;
1183 Py_RETURN_NONE;
1184 }
1184 }
1185
1185
1186 #endif /* ndef _WIN32 */
1186 #endif /* ndef _WIN32 */
1187
1187
1188 static PyObject *listdir(PyObject *self, PyObject *args, PyObject *kwargs)
1188 static PyObject *listdir(PyObject *self, PyObject *args, PyObject *kwargs)
1189 {
1189 {
1190 PyObject *statobj = NULL; /* initialize - optional arg */
1190 PyObject *statobj = NULL; /* initialize - optional arg */
1191 PyObject *skipobj = NULL; /* initialize - optional arg */
1191 PyObject *skipobj = NULL; /* initialize - optional arg */
1192 char *path, *skip = NULL;
1192 char *path, *skip = NULL;
1193 Py_ssize_t plen;
1193 Py_ssize_t plen;
1194 int wantstat;
1194 int wantstat;
1195
1195
1196 static char *kwlist[] = {"path", "stat", "skip", NULL};
1196 static char *kwlist[] = {"path", "stat", "skip", NULL};
1197
1197
1198 if (!PyArg_ParseTupleAndKeywords(args, kwargs, PY23("s#|OO:listdir",
1198 if (!PyArg_ParseTupleAndKeywords(args, kwargs, PY23("s#|OO:listdir",
1199 "y#|OO:listdir"),
1199 "y#|OO:listdir"),
1200 kwlist, &path, &plen, &statobj, &skipobj))
1200 kwlist, &path, &plen, &statobj, &skipobj))
1201 return NULL;
1201 return NULL;
1202
1202
1203 wantstat = statobj && PyObject_IsTrue(statobj);
1203 wantstat = statobj && PyObject_IsTrue(statobj);
1204
1204
1205 if (skipobj && skipobj != Py_None) {
1205 if (skipobj && skipobj != Py_None) {
1206 skip = PyBytes_AsString(skipobj);
1206 skip = PyBytes_AsString(skipobj);
1207 if (!skip)
1207 if (!skip)
1208 return NULL;
1208 return NULL;
1209 }
1209 }
1210
1210
1211 return _listdir(path, plen, wantstat, skip);
1211 return _listdir(path, plen, wantstat, skip);
1212 }
1212 }
1213
1213
1214 #ifdef _WIN32
1214 #ifdef _WIN32
1215 static PyObject *posixfile(PyObject *self, PyObject *args, PyObject *kwds)
1215 static PyObject *posixfile(PyObject *self, PyObject *args, PyObject *kwds)
1216 {
1216 {
1217 static char *kwlist[] = {"name", "mode", "buffering", NULL};
1217 static char *kwlist[] = {"name", "mode", "buffering", NULL};
1218 PyObject *file_obj = NULL;
1218 PyObject *file_obj = NULL;
1219 char *name = NULL;
1219 char *name = NULL;
1220 char *mode = "rb";
1220 char *mode = "rb";
1221 DWORD access = 0;
1221 DWORD access = 0;
1222 DWORD creation;
1222 DWORD creation;
1223 HANDLE handle;
1223 HANDLE handle;
1224 int fd, flags = 0;
1224 int fd, flags = 0;
1225 int bufsize = -1;
1225 int bufsize = -1;
1226 char m0, m1, m2;
1226 char m0, m1, m2;
1227 char fpmode[4];
1227 char fpmode[4];
1228 int fppos = 0;
1228 int fppos = 0;
1229 int plus;
1229 int plus;
1230 #ifndef IS_PY3K
1230 #ifndef IS_PY3K
1231 FILE *fp;
1231 FILE *fp;
1232 #endif
1232 #endif
1233
1233
1234 if (!PyArg_ParseTupleAndKeywords(args, kwds, PY23("et|si:posixfile",
1234 if (!PyArg_ParseTupleAndKeywords(args, kwds, PY23("et|si:posixfile",
1235 "et|yi:posixfile"),
1235 "et|yi:posixfile"),
1236 kwlist,
1236 kwlist,
1237 Py_FileSystemDefaultEncoding,
1237 Py_FileSystemDefaultEncoding,
1238 &name, &mode, &bufsize))
1238 &name, &mode, &bufsize))
1239 return NULL;
1239 return NULL;
1240
1240
1241 m0 = mode[0];
1241 m0 = mode[0];
1242 m1 = m0 ? mode[1] : '\0';
1242 m1 = m0 ? mode[1] : '\0';
1243 m2 = m1 ? mode[2] : '\0';
1243 m2 = m1 ? mode[2] : '\0';
1244 plus = m1 == '+' || m2 == '+';
1244 plus = m1 == '+' || m2 == '+';
1245
1245
1246 fpmode[fppos++] = m0;
1246 fpmode[fppos++] = m0;
1247 if (m1 == 'b' || m2 == 'b') {
1247 if (m1 == 'b' || m2 == 'b') {
1248 flags = _O_BINARY;
1248 flags = _O_BINARY;
1249 fpmode[fppos++] = 'b';
1249 fpmode[fppos++] = 'b';
1250 }
1250 }
1251 else
1251 else
1252 flags = _O_TEXT;
1252 flags = _O_TEXT;
1253 if (m0 == 'r' && !plus) {
1253 if (m0 == 'r' && !plus) {
1254 flags |= _O_RDONLY;
1254 flags |= _O_RDONLY;
1255 access = GENERIC_READ;
1255 access = GENERIC_READ;
1256 } else {
1256 } else {
1257 /*
1257 /*
1258 work around http://support.microsoft.com/kb/899149 and
1258 work around http://support.microsoft.com/kb/899149 and
1259 set _O_RDWR for 'w' and 'a', even if mode has no '+'
1259 set _O_RDWR for 'w' and 'a', even if mode has no '+'
1260 */
1260 */
1261 flags |= _O_RDWR;
1261 flags |= _O_RDWR;
1262 access = GENERIC_READ | GENERIC_WRITE;
1262 access = GENERIC_READ | GENERIC_WRITE;
1263 fpmode[fppos++] = '+';
1263 fpmode[fppos++] = '+';
1264 }
1264 }
1265 fpmode[fppos++] = '\0';
1265 fpmode[fppos++] = '\0';
1266
1266
1267 switch (m0) {
1267 switch (m0) {
1268 case 'r':
1268 case 'r':
1269 creation = OPEN_EXISTING;
1269 creation = OPEN_EXISTING;
1270 break;
1270 break;
1271 case 'w':
1271 case 'w':
1272 creation = CREATE_ALWAYS;
1272 creation = CREATE_ALWAYS;
1273 break;
1273 break;
1274 case 'a':
1274 case 'a':
1275 creation = OPEN_ALWAYS;
1275 creation = OPEN_ALWAYS;
1276 flags |= _O_APPEND;
1276 flags |= _O_APPEND;
1277 break;
1277 break;
1278 default:
1278 default:
1279 PyErr_Format(PyExc_ValueError,
1279 PyErr_Format(PyExc_ValueError,
1280 "mode string must begin with one of 'r', 'w', "
1280 "mode string must begin with one of 'r', 'w', "
1281 "or 'a', not '%c'", m0);
1281 "or 'a', not '%c'", m0);
1282 goto bail;
1282 goto bail;
1283 }
1283 }
1284
1284
1285 handle = CreateFile(name, access,
1285 handle = CreateFile(name, access,
1286 FILE_SHARE_READ | FILE_SHARE_WRITE |
1286 FILE_SHARE_READ | FILE_SHARE_WRITE |
1287 FILE_SHARE_DELETE,
1287 FILE_SHARE_DELETE,
1288 NULL,
1288 NULL,
1289 creation,
1289 creation,
1290 FILE_ATTRIBUTE_NORMAL,
1290 FILE_ATTRIBUTE_NORMAL,
1291 0);
1291 0);
1292
1292
1293 if (handle == INVALID_HANDLE_VALUE) {
1293 if (handle == INVALID_HANDLE_VALUE) {
1294 PyErr_SetFromWindowsErrWithFilename(GetLastError(), name);
1294 PyErr_SetFromWindowsErrWithFilename(GetLastError(), name);
1295 goto bail;
1295 goto bail;
1296 }
1296 }
1297
1297
1298 fd = _open_osfhandle((intptr_t)handle, flags);
1298 fd = _open_osfhandle((intptr_t)handle, flags);
1299
1299
1300 if (fd == -1) {
1300 if (fd == -1) {
1301 CloseHandle(handle);
1301 CloseHandle(handle);
1302 PyErr_SetFromErrnoWithFilename(PyExc_IOError, name);
1302 PyErr_SetFromErrnoWithFilename(PyExc_IOError, name);
1303 goto bail;
1303 goto bail;
1304 }
1304 }
1305 #ifndef IS_PY3K
1305 #ifndef IS_PY3K
1306 fp = _fdopen(fd, fpmode);
1306 fp = _fdopen(fd, fpmode);
1307 if (fp == NULL) {
1307 if (fp == NULL) {
1308 _close(fd);
1308 _close(fd);
1309 PyErr_SetFromErrnoWithFilename(PyExc_IOError, name);
1309 PyErr_SetFromErrnoWithFilename(PyExc_IOError, name);
1310 goto bail;
1310 goto bail;
1311 }
1311 }
1312
1312
1313 file_obj = PyFile_FromFile(fp, name, mode, fclose);
1313 file_obj = PyFile_FromFile(fp, name, mode, fclose);
1314 if (file_obj == NULL) {
1314 if (file_obj == NULL) {
1315 fclose(fp);
1315 fclose(fp);
1316 goto bail;
1316 goto bail;
1317 }
1317 }
1318
1318
1319 PyFile_SetBufSize(file_obj, bufsize);
1319 PyFile_SetBufSize(file_obj, bufsize);
1320 #else
1320 #else
1321 file_obj = PyFile_FromFd(fd, name, mode, bufsize, NULL, NULL, NULL, 1);
1321 file_obj = PyFile_FromFd(fd, name, mode, bufsize, NULL, NULL, NULL, 1);
1322 if (file_obj == NULL)
1322 if (file_obj == NULL)
1323 goto bail;
1323 goto bail;
1324 #endif
1324 #endif
1325 bail:
1325 bail:
1326 PyMem_Free(name);
1326 PyMem_Free(name);
1327 return file_obj;
1327 return file_obj;
1328 }
1328 }
1329 #endif
1329 #endif
1330
1330
1331 #ifdef __APPLE__
1331 #ifdef __APPLE__
1332 #include <ApplicationServices/ApplicationServices.h>
1332 #include <ApplicationServices/ApplicationServices.h>
1333
1333
1334 static PyObject *isgui(PyObject *self)
1334 static PyObject *isgui(PyObject *self)
1335 {
1335 {
1336 CFDictionaryRef dict = CGSessionCopyCurrentDictionary();
1336 CFDictionaryRef dict = CGSessionCopyCurrentDictionary();
1337
1337
1338 if (dict != NULL) {
1338 if (dict != NULL) {
1339 CFRelease(dict);
1339 CFRelease(dict);
1340 Py_RETURN_TRUE;
1340 Py_RETURN_TRUE;
1341 } else {
1341 } else {
1342 Py_RETURN_FALSE;
1342 Py_RETURN_FALSE;
1343 }
1343 }
1344 }
1344 }
1345 #endif
1345 #endif
1346
1346
1347 static char osutil_doc[] = "Native operating system services.";
1347 static char osutil_doc[] = "Native operating system services.";
1348
1348
1349 static PyMethodDef methods[] = {
1349 static PyMethodDef methods[] = {
1350 {"listdir", (PyCFunction)listdir, METH_VARARGS | METH_KEYWORDS,
1350 {"listdir", (PyCFunction)listdir, METH_VARARGS | METH_KEYWORDS,
1351 "list a directory\n"},
1351 "list a directory\n"},
1352 #ifdef _WIN32
1352 #ifdef _WIN32
1353 {"posixfile", (PyCFunction)posixfile, METH_VARARGS | METH_KEYWORDS,
1353 {"posixfile", (PyCFunction)posixfile, METH_VARARGS | METH_KEYWORDS,
1354 "Open a file with POSIX-like semantics.\n"
1354 "Open a file with POSIX-like semantics.\n"
1355 "On error, this function may raise either a WindowsError or an IOError."},
1355 "On error, this function may raise either a WindowsError or an IOError."},
1356 #else
1356 #else
1357 {"statfiles", (PyCFunction)statfiles, METH_VARARGS | METH_KEYWORDS,
1357 {"statfiles", (PyCFunction)statfiles, METH_VARARGS | METH_KEYWORDS,
1358 "stat a series of files or symlinks\n"
1358 "stat a series of files or symlinks\n"
1359 "Returns None for non-existent entries and entries of other types.\n"},
1359 "Returns None for non-existent entries and entries of other types.\n"},
1360 #ifdef CMSG_LEN
1360 #ifdef CMSG_LEN
1361 {"recvfds", (PyCFunction)recvfds, METH_VARARGS,
1361 {"recvfds", (PyCFunction)recvfds, METH_VARARGS,
1362 "receive list of file descriptors via socket\n"},
1362 "receive list of file descriptors via socket\n"},
1363 #endif
1363 #endif
1364 #ifndef SETPROCNAME_USE_NONE
1364 #ifndef SETPROCNAME_USE_NONE
1365 {"setprocname", (PyCFunction)setprocname, METH_VARARGS,
1365 {"setprocname", (PyCFunction)setprocname, METH_VARARGS,
1366 "set process title (best-effort)\n"},
1366 "set process title (best-effort)\n"},
1367 #endif
1367 #endif
1368 #if defined(HAVE_BSD_STATFS) || defined(HAVE_LINUX_STATFS)
1368 #if defined(HAVE_BSD_STATFS) || defined(HAVE_LINUX_STATFS)
1369 {"getfstype", (PyCFunction)getfstype, METH_VARARGS,
1369 {"getfstype", (PyCFunction)getfstype, METH_VARARGS,
1370 "get filesystem type (best-effort)\n"},
1370 "get filesystem type (best-effort)\n"},
1371 #endif
1371 #endif
1372 #if defined(HAVE_BSD_STATFS)
1372 #if defined(HAVE_BSD_STATFS)
1373 {"getfsmountpoint", (PyCFunction)getfsmountpoint, METH_VARARGS,
1373 {"getfsmountpoint", (PyCFunction)getfsmountpoint, METH_VARARGS,
1374 "get filesystem mount point (best-effort)\n"},
1374 "get filesystem mount point (best-effort)\n"},
1375 #endif
1375 #endif
1376 {"unblocksignal", (PyCFunction)unblocksignal, METH_VARARGS,
1376 {"unblocksignal", (PyCFunction)unblocksignal, METH_VARARGS,
1377 "change signal mask to unblock a given signal\n"},
1377 "change signal mask to unblock a given signal\n"},
1378 #endif /* ndef _WIN32 */
1378 #endif /* ndef _WIN32 */
1379 #ifdef __APPLE__
1379 #ifdef __APPLE__
1380 {
1380 {
1381 "isgui", (PyCFunction)isgui, METH_NOARGS,
1381 "isgui", (PyCFunction)isgui, METH_NOARGS,
1382 "Is a CoreGraphics session available?"
1382 "Is a CoreGraphics session available?"
1383 },
1383 },
1384 #endif
1384 #endif
1385 {NULL, NULL}
1385 {NULL, NULL}
1386 };
1386 };
1387
1387
1388 static const int version = 4;
1388 static const int version = 4;
1389
1389
1390 #ifdef IS_PY3K
1390 #ifdef IS_PY3K
1391 static struct PyModuleDef osutil_module = {
1391 static struct PyModuleDef osutil_module = {
1392 PyModuleDef_HEAD_INIT,
1392 PyModuleDef_HEAD_INIT,
1393 "osutil",
1393 "osutil",
1394 osutil_doc,
1394 osutil_doc,
1395 -1,
1395 -1,
1396 methods
1396 methods
1397 };
1397 };
1398
1398
1399 PyMODINIT_FUNC PyInit_osutil(void)
1399 PyMODINIT_FUNC PyInit_osutil(void)
1400 {
1400 {
1401 PyObject *m;
1401 PyObject *m;
1402 if (PyType_Ready(&listdir_stat_type) < 0)
1402 if (PyType_Ready(&listdir_stat_type) < 0)
1403 return NULL;
1403 return NULL;
1404
1404
1405 m = PyModule_Create(&osutil_module);
1405 m = PyModule_Create(&osutil_module);
1406 PyModule_AddIntConstant(m, "version", version);
1406 PyModule_AddIntConstant(m, "version", version);
1407 return m;
1407 return m;
1408 }
1408 }
1409 #else
1409 #else
1410 PyMODINIT_FUNC initosutil(void)
1410 PyMODINIT_FUNC initosutil(void)
1411 {
1411 {
1412 PyObject *m;
1412 PyObject *m;
1413 if (PyType_Ready(&listdir_stat_type) == -1)
1413 if (PyType_Ready(&listdir_stat_type) == -1)
1414 return;
1414 return;
1415
1415
1416 m = Py_InitModule3("osutil", methods, osutil_doc);
1416 m = Py_InitModule3("osutil", methods, osutil_doc);
1417 PyModule_AddIntConstant(m, "version", version);
1417 PyModule_AddIntConstant(m, "version", version);
1418 }
1418 }
1419 #endif
1419 #endif
@@ -1,802 +1,803 b''
1 /*
1 /*
2 pathencode.c - efficient path name encoding
2 pathencode.c - efficient path name encoding
3
3
4 Copyright 2012 Facebook
4 Copyright 2012 Facebook
5
5
6 This software may be used and distributed according to the terms of
6 This software may be used and distributed according to the terms of
7 the GNU General Public License, incorporated herein by reference.
7 the GNU General Public License, incorporated herein by reference.
8 */
8 */
9
9
10 /*
10 /*
11 * An implementation of the name encoding scheme used by the fncache
11 * An implementation of the name encoding scheme used by the fncache
12 * store. The common case is of a path < 120 bytes long, which is
12 * store. The common case is of a path < 120 bytes long, which is
13 * handled either in a single pass with no allocations or two passes
13 * handled either in a single pass with no allocations or two passes
14 * with a single allocation. For longer paths, multiple passes are
14 * with a single allocation. For longer paths, multiple passes are
15 * required.
15 * required.
16 */
16 */
17
17
18 #define PY_SSIZE_T_CLEAN
18 #define PY_SSIZE_T_CLEAN
19 #include <Python.h>
19 #include <Python.h>
20 #include <assert.h>
20 #include <assert.h>
21 #include <ctype.h>
21 #include <ctype.h>
22 #include <stdlib.h>
22 #include <stdlib.h>
23 #include <string.h>
23 #include <string.h>
24 #include "pythoncapi_compat.h"
24
25
25 #include "util.h"
26 #include "util.h"
26
27
27 /* state machine for the fast path */
28 /* state machine for the fast path */
28 enum path_state {
29 enum path_state {
29 START, /* first byte of a path component */
30 START, /* first byte of a path component */
30 A, /* "AUX" */
31 A, /* "AUX" */
31 AU,
32 AU,
32 THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
33 THIRD, /* third of a 3-byte sequence, e.g. "AUX", "NUL" */
33 C, /* "CON" or "COMn" */
34 C, /* "CON" or "COMn" */
34 CO,
35 CO,
35 COMLPT, /* "COM" or "LPT" */
36 COMLPT, /* "COM" or "LPT" */
36 COMLPTn,
37 COMLPTn,
37 L,
38 L,
38 LP,
39 LP,
39 N,
40 N,
40 NU,
41 NU,
41 P, /* "PRN" */
42 P, /* "PRN" */
42 PR,
43 PR,
43 LDOT, /* leading '.' */
44 LDOT, /* leading '.' */
44 DOT, /* '.' in a non-leading position */
45 DOT, /* '.' in a non-leading position */
45 H, /* ".h" */
46 H, /* ".h" */
46 HGDI, /* ".hg", ".d", or ".i" */
47 HGDI, /* ".hg", ".d", or ".i" */
47 SPACE,
48 SPACE,
48 DEFAULT, /* byte of a path component after the first */
49 DEFAULT, /* byte of a path component after the first */
49 };
50 };
50
51
51 /* state machine for dir-encoding */
52 /* state machine for dir-encoding */
52 enum dir_state {
53 enum dir_state {
53 DDOT,
54 DDOT,
54 DH,
55 DH,
55 DHGDI,
56 DHGDI,
56 DDEFAULT,
57 DDEFAULT,
57 };
58 };
58
59
59 static inline int inset(const uint32_t bitset[], char c)
60 static inline int inset(const uint32_t bitset[], char c)
60 {
61 {
61 return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31));
62 return bitset[((uint8_t)c) >> 5] & (1 << (((uint8_t)c) & 31));
62 }
63 }
63
64
64 static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
65 static inline void charcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
65 char c)
66 char c)
66 {
67 {
67 if (dest) {
68 if (dest) {
68 assert(*destlen < destsize);
69 assert(*destlen < destsize);
69 dest[*destlen] = c;
70 dest[*destlen] = c;
70 }
71 }
71 (*destlen)++;
72 (*destlen)++;
72 }
73 }
73
74
74 static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
75 static inline void memcopy(char *dest, Py_ssize_t *destlen, size_t destsize,
75 const void *src, Py_ssize_t len)
76 const void *src, Py_ssize_t len)
76 {
77 {
77 if (dest) {
78 if (dest) {
78 assert(*destlen + len < destsize);
79 assert(*destlen + len < destsize);
79 memcpy((void *)&dest[*destlen], src, len);
80 memcpy((void *)&dest[*destlen], src, len);
80 }
81 }
81 *destlen += len;
82 *destlen += len;
82 }
83 }
83
84
84 static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize,
85 static inline void hexencode(char *dest, Py_ssize_t *destlen, size_t destsize,
85 uint8_t c)
86 uint8_t c)
86 {
87 {
87 static const char hexdigit[] = "0123456789abcdef";
88 static const char hexdigit[] = "0123456789abcdef";
88
89
89 charcopy(dest, destlen, destsize, hexdigit[c >> 4]);
90 charcopy(dest, destlen, destsize, hexdigit[c >> 4]);
90 charcopy(dest, destlen, destsize, hexdigit[c & 15]);
91 charcopy(dest, destlen, destsize, hexdigit[c & 15]);
91 }
92 }
92
93
93 /* 3-byte escape: tilde followed by two hex digits */
94 /* 3-byte escape: tilde followed by two hex digits */
94 static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize,
95 static inline void escape3(char *dest, Py_ssize_t *destlen, size_t destsize,
95 char c)
96 char c)
96 {
97 {
97 charcopy(dest, destlen, destsize, '~');
98 charcopy(dest, destlen, destsize, '~');
98 hexencode(dest, destlen, destsize, c);
99 hexencode(dest, destlen, destsize, c);
99 }
100 }
100
101
101 static Py_ssize_t _encodedir(char *dest, size_t destsize, const char *src,
102 static Py_ssize_t _encodedir(char *dest, size_t destsize, const char *src,
102 Py_ssize_t len)
103 Py_ssize_t len)
103 {
104 {
104 enum dir_state state = DDEFAULT;
105 enum dir_state state = DDEFAULT;
105 Py_ssize_t i = 0, destlen = 0;
106 Py_ssize_t i = 0, destlen = 0;
106
107
107 while (i < len) {
108 while (i < len) {
108 switch (state) {
109 switch (state) {
109 case DDOT:
110 case DDOT:
110 switch (src[i]) {
111 switch (src[i]) {
111 case 'd':
112 case 'd':
112 case 'i':
113 case 'i':
113 state = DHGDI;
114 state = DHGDI;
114 charcopy(dest, &destlen, destsize, src[i++]);
115 charcopy(dest, &destlen, destsize, src[i++]);
115 break;
116 break;
116 case 'h':
117 case 'h':
117 state = DH;
118 state = DH;
118 charcopy(dest, &destlen, destsize, src[i++]);
119 charcopy(dest, &destlen, destsize, src[i++]);
119 break;
120 break;
120 default:
121 default:
121 state = DDEFAULT;
122 state = DDEFAULT;
122 break;
123 break;
123 }
124 }
124 break;
125 break;
125 case DH:
126 case DH:
126 if (src[i] == 'g') {
127 if (src[i] == 'g') {
127 state = DHGDI;
128 state = DHGDI;
128 charcopy(dest, &destlen, destsize, src[i++]);
129 charcopy(dest, &destlen, destsize, src[i++]);
129 } else {
130 } else {
130 state = DDEFAULT;
131 state = DDEFAULT;
131 }
132 }
132 break;
133 break;
133 case DHGDI:
134 case DHGDI:
134 if (src[i] == '/') {
135 if (src[i] == '/') {
135 memcopy(dest, &destlen, destsize, ".hg", 3);
136 memcopy(dest, &destlen, destsize, ".hg", 3);
136 charcopy(dest, &destlen, destsize, src[i++]);
137 charcopy(dest, &destlen, destsize, src[i++]);
137 }
138 }
138 state = DDEFAULT;
139 state = DDEFAULT;
139 break;
140 break;
140 case DDEFAULT:
141 case DDEFAULT:
141 if (src[i] == '.') {
142 if (src[i] == '.') {
142 state = DDOT;
143 state = DDOT;
143 }
144 }
144 charcopy(dest, &destlen, destsize, src[i++]);
145 charcopy(dest, &destlen, destsize, src[i++]);
145 break;
146 break;
146 }
147 }
147 }
148 }
148
149
149 return destlen;
150 return destlen;
150 }
151 }
151
152
152 PyObject *encodedir(PyObject *self, PyObject *args)
153 PyObject *encodedir(PyObject *self, PyObject *args)
153 {
154 {
154 Py_ssize_t len, newlen;
155 Py_ssize_t len, newlen;
155 PyObject *pathobj, *newobj;
156 PyObject *pathobj, *newobj;
156 char *path;
157 char *path;
157
158
158 if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj)) {
159 if (!PyArg_ParseTuple(args, "O:encodedir", &pathobj)) {
159 return NULL;
160 return NULL;
160 }
161 }
161
162
162 if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) {
163 if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) {
163 PyErr_SetString(PyExc_TypeError, "expected a string");
164 PyErr_SetString(PyExc_TypeError, "expected a string");
164 return NULL;
165 return NULL;
165 }
166 }
166
167
167 newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1;
168 newlen = len ? _encodedir(NULL, 0, path, len + 1) : 1;
168
169
169 if (newlen == len + 1) {
170 if (newlen == len + 1) {
170 Py_INCREF(pathobj);
171 Py_INCREF(pathobj);
171 return pathobj;
172 return pathobj;
172 }
173 }
173
174
174 newobj = PyBytes_FromStringAndSize(NULL, newlen);
175 newobj = PyBytes_FromStringAndSize(NULL, newlen);
175
176
176 if (newobj) {
177 if (newobj) {
177 assert(PyBytes_Check(newobj));
178 assert(PyBytes_Check(newobj));
178 Py_SIZE(newobj)--;
179 Py_SIZE(newobj)--;
179 _encodedir(PyBytes_AS_STRING(newobj), newlen, path, len + 1);
180 _encodedir(PyBytes_AS_STRING(newobj), newlen, path, len + 1);
180 }
181 }
181
182
182 return newobj;
183 return newobj;
183 }
184 }
184
185
185 static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8],
186 static Py_ssize_t _encode(const uint32_t twobytes[8], const uint32_t onebyte[8],
186 char *dest, Py_ssize_t destlen, size_t destsize,
187 char *dest, Py_ssize_t destlen, size_t destsize,
187 const char *src, Py_ssize_t len, int encodedir)
188 const char *src, Py_ssize_t len, int encodedir)
188 {
189 {
189 enum path_state state = START;
190 enum path_state state = START;
190 Py_ssize_t i = 0;
191 Py_ssize_t i = 0;
191
192
192 /*
193 /*
193 * Python strings end with a zero byte, which we use as a
194 * Python strings end with a zero byte, which we use as a
194 * terminal token as they are not valid inside path names.
195 * terminal token as they are not valid inside path names.
195 */
196 */
196
197
197 while (i < len) {
198 while (i < len) {
198 switch (state) {
199 switch (state) {
199 case START:
200 case START:
200 switch (src[i]) {
201 switch (src[i]) {
201 case '/':
202 case '/':
202 charcopy(dest, &destlen, destsize, src[i++]);
203 charcopy(dest, &destlen, destsize, src[i++]);
203 break;
204 break;
204 case '.':
205 case '.':
205 state = LDOT;
206 state = LDOT;
206 escape3(dest, &destlen, destsize, src[i++]);
207 escape3(dest, &destlen, destsize, src[i++]);
207 break;
208 break;
208 case ' ':
209 case ' ':
209 state = DEFAULT;
210 state = DEFAULT;
210 escape3(dest, &destlen, destsize, src[i++]);
211 escape3(dest, &destlen, destsize, src[i++]);
211 break;
212 break;
212 case 'a':
213 case 'a':
213 state = A;
214 state = A;
214 charcopy(dest, &destlen, destsize, src[i++]);
215 charcopy(dest, &destlen, destsize, src[i++]);
215 break;
216 break;
216 case 'c':
217 case 'c':
217 state = C;
218 state = C;
218 charcopy(dest, &destlen, destsize, src[i++]);
219 charcopy(dest, &destlen, destsize, src[i++]);
219 break;
220 break;
220 case 'l':
221 case 'l':
221 state = L;
222 state = L;
222 charcopy(dest, &destlen, destsize, src[i++]);
223 charcopy(dest, &destlen, destsize, src[i++]);
223 break;
224 break;
224 case 'n':
225 case 'n':
225 state = N;
226 state = N;
226 charcopy(dest, &destlen, destsize, src[i++]);
227 charcopy(dest, &destlen, destsize, src[i++]);
227 break;
228 break;
228 case 'p':
229 case 'p':
229 state = P;
230 state = P;
230 charcopy(dest, &destlen, destsize, src[i++]);
231 charcopy(dest, &destlen, destsize, src[i++]);
231 break;
232 break;
232 default:
233 default:
233 state = DEFAULT;
234 state = DEFAULT;
234 break;
235 break;
235 }
236 }
236 break;
237 break;
237 case A:
238 case A:
238 if (src[i] == 'u') {
239 if (src[i] == 'u') {
239 state = AU;
240 state = AU;
240 charcopy(dest, &destlen, destsize, src[i++]);
241 charcopy(dest, &destlen, destsize, src[i++]);
241 } else {
242 } else {
242 state = DEFAULT;
243 state = DEFAULT;
243 }
244 }
244 break;
245 break;
245 case AU:
246 case AU:
246 if (src[i] == 'x') {
247 if (src[i] == 'x') {
247 state = THIRD;
248 state = THIRD;
248 i++;
249 i++;
249 } else {
250 } else {
250 state = DEFAULT;
251 state = DEFAULT;
251 }
252 }
252 break;
253 break;
253 case THIRD:
254 case THIRD:
254 state = DEFAULT;
255 state = DEFAULT;
255 switch (src[i]) {
256 switch (src[i]) {
256 case '.':
257 case '.':
257 case '/':
258 case '/':
258 case '\0':
259 case '\0':
259 escape3(dest, &destlen, destsize, src[i - 1]);
260 escape3(dest, &destlen, destsize, src[i - 1]);
260 break;
261 break;
261 default:
262 default:
262 i--;
263 i--;
263 break;
264 break;
264 }
265 }
265 break;
266 break;
266 case C:
267 case C:
267 if (src[i] == 'o') {
268 if (src[i] == 'o') {
268 state = CO;
269 state = CO;
269 charcopy(dest, &destlen, destsize, src[i++]);
270 charcopy(dest, &destlen, destsize, src[i++]);
270 } else {
271 } else {
271 state = DEFAULT;
272 state = DEFAULT;
272 }
273 }
273 break;
274 break;
274 case CO:
275 case CO:
275 if (src[i] == 'm') {
276 if (src[i] == 'm') {
276 state = COMLPT;
277 state = COMLPT;
277 i++;
278 i++;
278 } else if (src[i] == 'n') {
279 } else if (src[i] == 'n') {
279 state = THIRD;
280 state = THIRD;
280 i++;
281 i++;
281 } else {
282 } else {
282 state = DEFAULT;
283 state = DEFAULT;
283 }
284 }
284 break;
285 break;
285 case COMLPT:
286 case COMLPT:
286 switch (src[i]) {
287 switch (src[i]) {
287 case '1':
288 case '1':
288 case '2':
289 case '2':
289 case '3':
290 case '3':
290 case '4':
291 case '4':
291 case '5':
292 case '5':
292 case '6':
293 case '6':
293 case '7':
294 case '7':
294 case '8':
295 case '8':
295 case '9':
296 case '9':
296 state = COMLPTn;
297 state = COMLPTn;
297 i++;
298 i++;
298 break;
299 break;
299 default:
300 default:
300 state = DEFAULT;
301 state = DEFAULT;
301 charcopy(dest, &destlen, destsize, src[i - 1]);
302 charcopy(dest, &destlen, destsize, src[i - 1]);
302 break;
303 break;
303 }
304 }
304 break;
305 break;
305 case COMLPTn:
306 case COMLPTn:
306 state = DEFAULT;
307 state = DEFAULT;
307 switch (src[i]) {
308 switch (src[i]) {
308 case '.':
309 case '.':
309 case '/':
310 case '/':
310 case '\0':
311 case '\0':
311 escape3(dest, &destlen, destsize, src[i - 2]);
312 escape3(dest, &destlen, destsize, src[i - 2]);
312 charcopy(dest, &destlen, destsize, src[i - 1]);
313 charcopy(dest, &destlen, destsize, src[i - 1]);
313 break;
314 break;
314 default:
315 default:
315 memcopy(dest, &destlen, destsize, &src[i - 2],
316 memcopy(dest, &destlen, destsize, &src[i - 2],
316 2);
317 2);
317 break;
318 break;
318 }
319 }
319 break;
320 break;
320 case L:
321 case L:
321 if (src[i] == 'p') {
322 if (src[i] == 'p') {
322 state = LP;
323 state = LP;
323 charcopy(dest, &destlen, destsize, src[i++]);
324 charcopy(dest, &destlen, destsize, src[i++]);
324 } else {
325 } else {
325 state = DEFAULT;
326 state = DEFAULT;
326 }
327 }
327 break;
328 break;
328 case LP:
329 case LP:
329 if (src[i] == 't') {
330 if (src[i] == 't') {
330 state = COMLPT;
331 state = COMLPT;
331 i++;
332 i++;
332 } else {
333 } else {
333 state = DEFAULT;
334 state = DEFAULT;
334 }
335 }
335 break;
336 break;
336 case N:
337 case N:
337 if (src[i] == 'u') {
338 if (src[i] == 'u') {
338 state = NU;
339 state = NU;
339 charcopy(dest, &destlen, destsize, src[i++]);
340 charcopy(dest, &destlen, destsize, src[i++]);
340 } else {
341 } else {
341 state = DEFAULT;
342 state = DEFAULT;
342 }
343 }
343 break;
344 break;
344 case NU:
345 case NU:
345 if (src[i] == 'l') {
346 if (src[i] == 'l') {
346 state = THIRD;
347 state = THIRD;
347 i++;
348 i++;
348 } else {
349 } else {
349 state = DEFAULT;
350 state = DEFAULT;
350 }
351 }
351 break;
352 break;
352 case P:
353 case P:
353 if (src[i] == 'r') {
354 if (src[i] == 'r') {
354 state = PR;
355 state = PR;
355 charcopy(dest, &destlen, destsize, src[i++]);
356 charcopy(dest, &destlen, destsize, src[i++]);
356 } else {
357 } else {
357 state = DEFAULT;
358 state = DEFAULT;
358 }
359 }
359 break;
360 break;
360 case PR:
361 case PR:
361 if (src[i] == 'n') {
362 if (src[i] == 'n') {
362 state = THIRD;
363 state = THIRD;
363 i++;
364 i++;
364 } else {
365 } else {
365 state = DEFAULT;
366 state = DEFAULT;
366 }
367 }
367 break;
368 break;
368 case LDOT:
369 case LDOT:
369 switch (src[i]) {
370 switch (src[i]) {
370 case 'd':
371 case 'd':
371 case 'i':
372 case 'i':
372 state = HGDI;
373 state = HGDI;
373 charcopy(dest, &destlen, destsize, src[i++]);
374 charcopy(dest, &destlen, destsize, src[i++]);
374 break;
375 break;
375 case 'h':
376 case 'h':
376 state = H;
377 state = H;
377 charcopy(dest, &destlen, destsize, src[i++]);
378 charcopy(dest, &destlen, destsize, src[i++]);
378 break;
379 break;
379 default:
380 default:
380 state = DEFAULT;
381 state = DEFAULT;
381 break;
382 break;
382 }
383 }
383 break;
384 break;
384 case DOT:
385 case DOT:
385 switch (src[i]) {
386 switch (src[i]) {
386 case '/':
387 case '/':
387 case '\0':
388 case '\0':
388 state = START;
389 state = START;
389 memcopy(dest, &destlen, destsize, "~2e", 3);
390 memcopy(dest, &destlen, destsize, "~2e", 3);
390 charcopy(dest, &destlen, destsize, src[i++]);
391 charcopy(dest, &destlen, destsize, src[i++]);
391 break;
392 break;
392 case 'd':
393 case 'd':
393 case 'i':
394 case 'i':
394 state = HGDI;
395 state = HGDI;
395 charcopy(dest, &destlen, destsize, '.');
396 charcopy(dest, &destlen, destsize, '.');
396 charcopy(dest, &destlen, destsize, src[i++]);
397 charcopy(dest, &destlen, destsize, src[i++]);
397 break;
398 break;
398 case 'h':
399 case 'h':
399 state = H;
400 state = H;
400 memcopy(dest, &destlen, destsize, ".h", 2);
401 memcopy(dest, &destlen, destsize, ".h", 2);
401 i++;
402 i++;
402 break;
403 break;
403 default:
404 default:
404 state = DEFAULT;
405 state = DEFAULT;
405 charcopy(dest, &destlen, destsize, '.');
406 charcopy(dest, &destlen, destsize, '.');
406 break;
407 break;
407 }
408 }
408 break;
409 break;
409 case H:
410 case H:
410 if (src[i] == 'g') {
411 if (src[i] == 'g') {
411 state = HGDI;
412 state = HGDI;
412 charcopy(dest, &destlen, destsize, src[i++]);
413 charcopy(dest, &destlen, destsize, src[i++]);
413 } else {
414 } else {
414 state = DEFAULT;
415 state = DEFAULT;
415 }
416 }
416 break;
417 break;
417 case HGDI:
418 case HGDI:
418 if (src[i] == '/') {
419 if (src[i] == '/') {
419 state = START;
420 state = START;
420 if (encodedir) {
421 if (encodedir) {
421 memcopy(dest, &destlen, destsize, ".hg",
422 memcopy(dest, &destlen, destsize, ".hg",
422 3);
423 3);
423 }
424 }
424 charcopy(dest, &destlen, destsize, src[i++]);
425 charcopy(dest, &destlen, destsize, src[i++]);
425 } else {
426 } else {
426 state = DEFAULT;
427 state = DEFAULT;
427 }
428 }
428 break;
429 break;
429 case SPACE:
430 case SPACE:
430 switch (src[i]) {
431 switch (src[i]) {
431 case '/':
432 case '/':
432 case '\0':
433 case '\0':
433 state = START;
434 state = START;
434 memcopy(dest, &destlen, destsize, "~20", 3);
435 memcopy(dest, &destlen, destsize, "~20", 3);
435 charcopy(dest, &destlen, destsize, src[i++]);
436 charcopy(dest, &destlen, destsize, src[i++]);
436 break;
437 break;
437 default:
438 default:
438 state = DEFAULT;
439 state = DEFAULT;
439 charcopy(dest, &destlen, destsize, ' ');
440 charcopy(dest, &destlen, destsize, ' ');
440 break;
441 break;
441 }
442 }
442 break;
443 break;
443 case DEFAULT:
444 case DEFAULT:
444 while (inset(onebyte, src[i])) {
445 while (inset(onebyte, src[i])) {
445 charcopy(dest, &destlen, destsize, src[i++]);
446 charcopy(dest, &destlen, destsize, src[i++]);
446 if (i == len) {
447 if (i == len) {
447 goto done;
448 goto done;
448 }
449 }
449 }
450 }
450 switch (src[i]) {
451 switch (src[i]) {
451 case '.':
452 case '.':
452 state = DOT;
453 state = DOT;
453 i++;
454 i++;
454 break;
455 break;
455 case ' ':
456 case ' ':
456 state = SPACE;
457 state = SPACE;
457 i++;
458 i++;
458 break;
459 break;
459 case '/':
460 case '/':
460 state = START;
461 state = START;
461 charcopy(dest, &destlen, destsize, '/');
462 charcopy(dest, &destlen, destsize, '/');
462 i++;
463 i++;
463 break;
464 break;
464 default:
465 default:
465 if (inset(onebyte, src[i])) {
466 if (inset(onebyte, src[i])) {
466 do {
467 do {
467 charcopy(dest, &destlen,
468 charcopy(dest, &destlen,
468 destsize, src[i++]);
469 destsize, src[i++]);
469 } while (i < len &&
470 } while (i < len &&
470 inset(onebyte, src[i]));
471 inset(onebyte, src[i]));
471 } else if (inset(twobytes, src[i])) {
472 } else if (inset(twobytes, src[i])) {
472 char c = src[i++];
473 char c = src[i++];
473 charcopy(dest, &destlen, destsize, '_');
474 charcopy(dest, &destlen, destsize, '_');
474 charcopy(dest, &destlen, destsize,
475 charcopy(dest, &destlen, destsize,
475 c == '_' ? '_' : c + 32);
476 c == '_' ? '_' : c + 32);
476 } else {
477 } else {
477 escape3(dest, &destlen, destsize,
478 escape3(dest, &destlen, destsize,
478 src[i++]);
479 src[i++]);
479 }
480 }
480 break;
481 break;
481 }
482 }
482 break;
483 break;
483 }
484 }
484 }
485 }
485 done:
486 done:
486 return destlen;
487 return destlen;
487 }
488 }
488
489
489 static Py_ssize_t basicencode(char *dest, size_t destsize, const char *src,
490 static Py_ssize_t basicencode(char *dest, size_t destsize, const char *src,
490 Py_ssize_t len)
491 Py_ssize_t len)
491 {
492 {
492 static const uint32_t twobytes[8] = {0, 0, 0x87fffffe};
493 static const uint32_t twobytes[8] = {0, 0, 0x87fffffe};
493
494
494 static const uint32_t onebyte[8] = {
495 static const uint32_t onebyte[8] = {
495 1,
496 1,
496 0x2bff3bfa,
497 0x2bff3bfa,
497 0x68000001,
498 0x68000001,
498 0x2fffffff,
499 0x2fffffff,
499 };
500 };
500
501
501 Py_ssize_t destlen = 0;
502 Py_ssize_t destlen = 0;
502
503
503 return _encode(twobytes, onebyte, dest, destlen, destsize, src, len, 1);
504 return _encode(twobytes, onebyte, dest, destlen, destsize, src, len, 1);
504 }
505 }
505
506
506 static const Py_ssize_t maxstorepathlen = 120;
507 static const Py_ssize_t maxstorepathlen = 120;
507
508
508 static Py_ssize_t _lowerencode(char *dest, size_t destsize, const char *src,
509 static Py_ssize_t _lowerencode(char *dest, size_t destsize, const char *src,
509 Py_ssize_t len)
510 Py_ssize_t len)
510 {
511 {
511 static const uint32_t onebyte[8] = {1, 0x2bfffbfb, 0xe8000001,
512 static const uint32_t onebyte[8] = {1, 0x2bfffbfb, 0xe8000001,
512 0x2fffffff};
513 0x2fffffff};
513
514
514 static const uint32_t lower[8] = {0, 0, 0x7fffffe};
515 static const uint32_t lower[8] = {0, 0, 0x7fffffe};
515
516
516 Py_ssize_t i, destlen = 0;
517 Py_ssize_t i, destlen = 0;
517
518
518 for (i = 0; i < len; i++) {
519 for (i = 0; i < len; i++) {
519 if (inset(onebyte, src[i])) {
520 if (inset(onebyte, src[i])) {
520 charcopy(dest, &destlen, destsize, src[i]);
521 charcopy(dest, &destlen, destsize, src[i]);
521 } else if (inset(lower, src[i])) {
522 } else if (inset(lower, src[i])) {
522 charcopy(dest, &destlen, destsize, src[i] + 32);
523 charcopy(dest, &destlen, destsize, src[i] + 32);
523 } else {
524 } else {
524 escape3(dest, &destlen, destsize, src[i]);
525 escape3(dest, &destlen, destsize, src[i]);
525 }
526 }
526 }
527 }
527
528
528 return destlen;
529 return destlen;
529 }
530 }
530
531
531 PyObject *lowerencode(PyObject *self, PyObject *args)
532 PyObject *lowerencode(PyObject *self, PyObject *args)
532 {
533 {
533 char *path;
534 char *path;
534 Py_ssize_t len, newlen;
535 Py_ssize_t len, newlen;
535 PyObject *ret;
536 PyObject *ret;
536
537
537 if (!PyArg_ParseTuple(args, PY23("s#:lowerencode", "y#:lowerencode"),
538 if (!PyArg_ParseTuple(args, PY23("s#:lowerencode", "y#:lowerencode"),
538 &path, &len)) {
539 &path, &len)) {
539 return NULL;
540 return NULL;
540 }
541 }
541
542
542 newlen = _lowerencode(NULL, 0, path, len);
543 newlen = _lowerencode(NULL, 0, path, len);
543 ret = PyBytes_FromStringAndSize(NULL, newlen);
544 ret = PyBytes_FromStringAndSize(NULL, newlen);
544 if (ret) {
545 if (ret) {
545 _lowerencode(PyBytes_AS_STRING(ret), newlen, path, len);
546 _lowerencode(PyBytes_AS_STRING(ret), newlen, path, len);
546 }
547 }
547
548
548 return ret;
549 return ret;
549 }
550 }
550
551
551 /* See store.py:_auxencode for a description. */
552 /* See store.py:_auxencode for a description. */
552 static Py_ssize_t auxencode(char *dest, size_t destsize, const char *src,
553 static Py_ssize_t auxencode(char *dest, size_t destsize, const char *src,
553 Py_ssize_t len)
554 Py_ssize_t len)
554 {
555 {
555 static const uint32_t twobytes[8];
556 static const uint32_t twobytes[8];
556
557
557 static const uint32_t onebyte[8] = {
558 static const uint32_t onebyte[8] = {
558 ~0U, 0xffff3ffe, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U,
559 ~0U, 0xffff3ffe, ~0U, ~0U, ~0U, ~0U, ~0U, ~0U,
559 };
560 };
560
561
561 return _encode(twobytes, onebyte, dest, 0, destsize, src, len, 0);
562 return _encode(twobytes, onebyte, dest, 0, destsize, src, len, 0);
562 }
563 }
563
564
564 static PyObject *hashmangle(const char *src, Py_ssize_t len, const char sha[20])
565 static PyObject *hashmangle(const char *src, Py_ssize_t len, const char sha[20])
565 {
566 {
566 static const Py_ssize_t dirprefixlen = 8;
567 static const Py_ssize_t dirprefixlen = 8;
567 static const Py_ssize_t maxshortdirslen = 68;
568 static const Py_ssize_t maxshortdirslen = 68;
568 char *dest;
569 char *dest;
569 PyObject *ret;
570 PyObject *ret;
570
571
571 Py_ssize_t i, d, p, lastslash = len - 1, lastdot = -1;
572 Py_ssize_t i, d, p, lastslash = len - 1, lastdot = -1;
572 Py_ssize_t destsize, destlen = 0, slop, used;
573 Py_ssize_t destsize, destlen = 0, slop, used;
573
574
574 while (lastslash >= 0 && src[lastslash] != '/') {
575 while (lastslash >= 0 && src[lastslash] != '/') {
575 if (src[lastslash] == '.' && lastdot == -1) {
576 if (src[lastslash] == '.' && lastdot == -1) {
576 lastdot = lastslash;
577 lastdot = lastslash;
577 }
578 }
578 lastslash--;
579 lastslash--;
579 }
580 }
580
581
581 #if 0
582 #if 0
582 /* All paths should end in a suffix of ".i" or ".d".
583 /* All paths should end in a suffix of ".i" or ".d".
583 Unfortunately, the file names in test-hybridencode.py
584 Unfortunately, the file names in test-hybridencode.py
584 violate this rule. */
585 violate this rule. */
585 if (lastdot != len - 3) {
586 if (lastdot != len - 3) {
586 PyErr_SetString(PyExc_ValueError,
587 PyErr_SetString(PyExc_ValueError,
587 "suffix missing or wrong length");
588 "suffix missing or wrong length");
588 return NULL;
589 return NULL;
589 }
590 }
590 #endif
591 #endif
591
592
592 /* If src contains a suffix, we will append it to the end of
593 /* If src contains a suffix, we will append it to the end of
593 the new string, so make room. */
594 the new string, so make room. */
594 destsize = 120;
595 destsize = 120;
595 if (lastdot >= 0) {
596 if (lastdot >= 0) {
596 destsize += len - lastdot - 1;
597 destsize += len - lastdot - 1;
597 }
598 }
598
599
599 ret = PyBytes_FromStringAndSize(NULL, destsize);
600 ret = PyBytes_FromStringAndSize(NULL, destsize);
600 if (ret == NULL) {
601 if (ret == NULL) {
601 return NULL;
602 return NULL;
602 }
603 }
603
604
604 dest = PyBytes_AS_STRING(ret);
605 dest = PyBytes_AS_STRING(ret);
605 memcopy(dest, &destlen, destsize, "dh/", 3);
606 memcopy(dest, &destlen, destsize, "dh/", 3);
606
607
607 /* Copy up to dirprefixlen bytes of each path component, up to
608 /* Copy up to dirprefixlen bytes of each path component, up to
608 a limit of maxshortdirslen bytes. */
609 a limit of maxshortdirslen bytes. */
609 for (i = d = p = 0; i < lastslash; i++, p++) {
610 for (i = d = p = 0; i < lastslash; i++, p++) {
610 if (src[i] == '/') {
611 if (src[i] == '/') {
611 char d = dest[destlen - 1];
612 char d = dest[destlen - 1];
612 /* After truncation, a directory name may end
613 /* After truncation, a directory name may end
613 in a space or dot, which are unportable. */
614 in a space or dot, which are unportable. */
614 if (d == '.' || d == ' ') {
615 if (d == '.' || d == ' ') {
615 dest[destlen - 1] = '_';
616 dest[destlen - 1] = '_';
616 /* The + 3 is to account for "dh/" in the
617 /* The + 3 is to account for "dh/" in the
617 * beginning */
618 * beginning */
618 }
619 }
619 if (destlen > maxshortdirslen + 3) {
620 if (destlen > maxshortdirslen + 3) {
620 break;
621 break;
621 }
622 }
622 charcopy(dest, &destlen, destsize, src[i]);
623 charcopy(dest, &destlen, destsize, src[i]);
623 p = -1;
624 p = -1;
624 } else if (p < dirprefixlen) {
625 } else if (p < dirprefixlen) {
625 charcopy(dest, &destlen, destsize, src[i]);
626 charcopy(dest, &destlen, destsize, src[i]);
626 }
627 }
627 }
628 }
628
629
629 /* Rewind to just before the last slash copied. */
630 /* Rewind to just before the last slash copied. */
630 if (destlen > maxshortdirslen + 3) {
631 if (destlen > maxshortdirslen + 3) {
631 do {
632 do {
632 destlen--;
633 destlen--;
633 } while (destlen > 0 && dest[destlen] != '/');
634 } while (destlen > 0 && dest[destlen] != '/');
634 }
635 }
635
636
636 if (destlen > 3) {
637 if (destlen > 3) {
637 if (lastslash > 0) {
638 if (lastslash > 0) {
638 char d = dest[destlen - 1];
639 char d = dest[destlen - 1];
639 /* The last directory component may be
640 /* The last directory component may be
640 truncated, so make it safe. */
641 truncated, so make it safe. */
641 if (d == '.' || d == ' ') {
642 if (d == '.' || d == ' ') {
642 dest[destlen - 1] = '_';
643 dest[destlen - 1] = '_';
643 }
644 }
644 }
645 }
645
646
646 charcopy(dest, &destlen, destsize, '/');
647 charcopy(dest, &destlen, destsize, '/');
647 }
648 }
648
649
649 /* Add a prefix of the original file's name. Its length
650 /* Add a prefix of the original file's name. Its length
650 depends on the number of bytes left after accounting for
651 depends on the number of bytes left after accounting for
651 hash and suffix. */
652 hash and suffix. */
652 used = destlen + 40;
653 used = destlen + 40;
653 if (lastdot >= 0) {
654 if (lastdot >= 0) {
654 used += len - lastdot - 1;
655 used += len - lastdot - 1;
655 }
656 }
656 slop = maxstorepathlen - used;
657 slop = maxstorepathlen - used;
657 if (slop > 0) {
658 if (slop > 0) {
658 Py_ssize_t basenamelen =
659 Py_ssize_t basenamelen =
659 lastslash >= 0 ? len - lastslash - 2 : len - 1;
660 lastslash >= 0 ? len - lastslash - 2 : len - 1;
660
661
661 if (basenamelen > slop) {
662 if (basenamelen > slop) {
662 basenamelen = slop;
663 basenamelen = slop;
663 }
664 }
664 if (basenamelen > 0) {
665 if (basenamelen > 0) {
665 memcopy(dest, &destlen, destsize, &src[lastslash + 1],
666 memcopy(dest, &destlen, destsize, &src[lastslash + 1],
666 basenamelen);
667 basenamelen);
667 }
668 }
668 }
669 }
669
670
670 /* Add hash and suffix. */
671 /* Add hash and suffix. */
671 for (i = 0; i < 20; i++) {
672 for (i = 0; i < 20; i++) {
672 hexencode(dest, &destlen, destsize, sha[i]);
673 hexencode(dest, &destlen, destsize, sha[i]);
673 }
674 }
674
675
675 if (lastdot >= 0) {
676 if (lastdot >= 0) {
676 memcopy(dest, &destlen, destsize, &src[lastdot],
677 memcopy(dest, &destlen, destsize, &src[lastdot],
677 len - lastdot - 1);
678 len - lastdot - 1);
678 }
679 }
679
680
680 assert(PyBytes_Check(ret));
681 assert(PyBytes_Check(ret));
681 Py_SIZE(ret) = destlen;
682 Py_SET_SIZE(ret, destlen);
682
683
683 return ret;
684 return ret;
684 }
685 }
685
686
686 /*
687 /*
687 * Avoiding a trip through Python would improve performance by 50%,
688 * Avoiding a trip through Python would improve performance by 50%,
688 * but we don't encounter enough long names to be worth the code.
689 * but we don't encounter enough long names to be worth the code.
689 */
690 */
690 static int sha1hash(char hash[20], const char *str, Py_ssize_t len)
691 static int sha1hash(char hash[20], const char *str, Py_ssize_t len)
691 {
692 {
692 static PyObject *shafunc;
693 static PyObject *shafunc;
693 PyObject *shaobj, *hashobj;
694 PyObject *shaobj, *hashobj;
694
695
695 if (shafunc == NULL) {
696 if (shafunc == NULL) {
696 PyObject *hashlib = PyImport_ImportModule("hashlib");
697 PyObject *hashlib = PyImport_ImportModule("hashlib");
697 if (hashlib == NULL) {
698 if (hashlib == NULL) {
698 PyErr_SetString(PyExc_ImportError,
699 PyErr_SetString(PyExc_ImportError,
699 "pathencode failed to find hashlib");
700 "pathencode failed to find hashlib");
700 return -1;
701 return -1;
701 }
702 }
702 shafunc = PyObject_GetAttrString(hashlib, "sha1");
703 shafunc = PyObject_GetAttrString(hashlib, "sha1");
703 Py_DECREF(hashlib);
704 Py_DECREF(hashlib);
704
705
705 if (shafunc == NULL) {
706 if (shafunc == NULL) {
706 PyErr_SetString(PyExc_AttributeError,
707 PyErr_SetString(PyExc_AttributeError,
707 "module 'hashlib' has no "
708 "module 'hashlib' has no "
708 "attribute 'sha1' in pathencode");
709 "attribute 'sha1' in pathencode");
709 return -1;
710 return -1;
710 }
711 }
711 }
712 }
712
713
713 shaobj = PyObject_CallFunction(shafunc, PY23("s#", "y#"), str, len);
714 shaobj = PyObject_CallFunction(shafunc, PY23("s#", "y#"), str, len);
714
715
715 if (shaobj == NULL) {
716 if (shaobj == NULL) {
716 return -1;
717 return -1;
717 }
718 }
718
719
719 hashobj = PyObject_CallMethod(shaobj, "digest", "");
720 hashobj = PyObject_CallMethod(shaobj, "digest", "");
720 Py_DECREF(shaobj);
721 Py_DECREF(shaobj);
721 if (hashobj == NULL) {
722 if (hashobj == NULL) {
722 return -1;
723 return -1;
723 }
724 }
724
725
725 if (!PyBytes_Check(hashobj) || PyBytes_GET_SIZE(hashobj) != 20) {
726 if (!PyBytes_Check(hashobj) || PyBytes_GET_SIZE(hashobj) != 20) {
726 PyErr_SetString(PyExc_TypeError,
727 PyErr_SetString(PyExc_TypeError,
727 "result of digest is not a 20-byte hash");
728 "result of digest is not a 20-byte hash");
728 Py_DECREF(hashobj);
729 Py_DECREF(hashobj);
729 return -1;
730 return -1;
730 }
731 }
731
732
732 memcpy(hash, PyBytes_AS_STRING(hashobj), 20);
733 memcpy(hash, PyBytes_AS_STRING(hashobj), 20);
733 Py_DECREF(hashobj);
734 Py_DECREF(hashobj);
734 return 0;
735 return 0;
735 }
736 }
736
737
737 #define MAXENCODE 4096 * 4
738 #define MAXENCODE 4096 * 4
738
739
739 static PyObject *hashencode(const char *src, Py_ssize_t len)
740 static PyObject *hashencode(const char *src, Py_ssize_t len)
740 {
741 {
741 char dired[MAXENCODE];
742 char dired[MAXENCODE];
742 char lowered[MAXENCODE];
743 char lowered[MAXENCODE];
743 char auxed[MAXENCODE];
744 char auxed[MAXENCODE];
744 Py_ssize_t dirlen, lowerlen, auxlen, baselen;
745 Py_ssize_t dirlen, lowerlen, auxlen, baselen;
745 char sha[20];
746 char sha[20];
746
747
747 baselen = (len - 5) * 3;
748 baselen = (len - 5) * 3;
748 if (baselen >= MAXENCODE) {
749 if (baselen >= MAXENCODE) {
749 PyErr_SetString(PyExc_ValueError, "string too long");
750 PyErr_SetString(PyExc_ValueError, "string too long");
750 return NULL;
751 return NULL;
751 }
752 }
752
753
753 dirlen = _encodedir(dired, baselen, src, len);
754 dirlen = _encodedir(dired, baselen, src, len);
754 if (sha1hash(sha, dired, dirlen - 1) == -1) {
755 if (sha1hash(sha, dired, dirlen - 1) == -1) {
755 return NULL;
756 return NULL;
756 }
757 }
757 lowerlen = _lowerencode(lowered, baselen, dired + 5, dirlen - 5);
758 lowerlen = _lowerencode(lowered, baselen, dired + 5, dirlen - 5);
758 auxlen = auxencode(auxed, baselen, lowered, lowerlen);
759 auxlen = auxencode(auxed, baselen, lowered, lowerlen);
759 return hashmangle(auxed, auxlen, sha);
760 return hashmangle(auxed, auxlen, sha);
760 }
761 }
761
762
762 PyObject *pathencode(PyObject *self, PyObject *args)
763 PyObject *pathencode(PyObject *self, PyObject *args)
763 {
764 {
764 Py_ssize_t len, newlen;
765 Py_ssize_t len, newlen;
765 PyObject *pathobj, *newobj;
766 PyObject *pathobj, *newobj;
766 char *path;
767 char *path;
767
768
768 if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj)) {
769 if (!PyArg_ParseTuple(args, "O:pathencode", &pathobj)) {
769 return NULL;
770 return NULL;
770 }
771 }
771
772
772 if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) {
773 if (PyBytes_AsStringAndSize(pathobj, &path, &len) == -1) {
773 PyErr_SetString(PyExc_TypeError, "expected a string");
774 PyErr_SetString(PyExc_TypeError, "expected a string");
774 return NULL;
775 return NULL;
775 }
776 }
776
777
777 if (len > maxstorepathlen) {
778 if (len > maxstorepathlen) {
778 newlen = maxstorepathlen + 2;
779 newlen = maxstorepathlen + 2;
779 } else {
780 } else {
780 newlen = len ? basicencode(NULL, 0, path, len + 1) : 1;
781 newlen = len ? basicencode(NULL, 0, path, len + 1) : 1;
781 }
782 }
782
783
783 if (newlen <= maxstorepathlen + 1) {
784 if (newlen <= maxstorepathlen + 1) {
784 if (newlen == len + 1) {
785 if (newlen == len + 1) {
785 Py_INCREF(pathobj);
786 Py_INCREF(pathobj);
786 return pathobj;
787 return pathobj;
787 }
788 }
788
789
789 newobj = PyBytes_FromStringAndSize(NULL, newlen);
790 newobj = PyBytes_FromStringAndSize(NULL, newlen);
790
791
791 if (newobj) {
792 if (newobj) {
792 assert(PyBytes_Check(newobj));
793 assert(PyBytes_Check(newobj));
793 Py_SIZE(newobj)--;
794 Py_SIZE(newobj)--;
794 basicencode(PyBytes_AS_STRING(newobj), newlen, path,
795 basicencode(PyBytes_AS_STRING(newobj), newlen, path,
795 len + 1);
796 len + 1);
796 }
797 }
797 } else {
798 } else {
798 newobj = hashencode(path, len + 1);
799 newobj = hashencode(path, len + 1);
799 }
800 }
800
801
801 return newobj;
802 return newobj;
802 }
803 }
@@ -1,95 +1,96 b''
1 #require test-repo
1 #require test-repo
2
2
3 $ . "$TESTDIR/helpers-testrepo.sh"
3 $ . "$TESTDIR/helpers-testrepo.sh"
4 $ check_code="$TESTDIR"/../contrib/check-code.py
4 $ check_code="$TESTDIR"/../contrib/check-code.py
5 $ cd "$TESTDIR"/..
5 $ cd "$TESTDIR"/..
6
6
7 New errors are not allowed. Warnings are strongly discouraged.
7 New errors are not allowed. Warnings are strongly discouraged.
8 (The writing "no-che?k-code" is for not skipping this file when checking.)
8 (The writing "no-che?k-code" is for not skipping this file when checking.)
9
9
10 $ testrepohg locate \
10 $ testrepohg locate \
11 > -X contrib/python-zstandard \
11 > -X contrib/python-zstandard \
12 > -X hgext/fsmonitor/pywatchman \
12 > -X hgext/fsmonitor/pywatchman \
13 > -X mercurial/thirdparty \
13 > -X mercurial/thirdparty \
14 > -X mercurial/pythoncapi_compat.h \
14 > | sed 's-\\-/-g' | "$check_code" --warnings --per-file=0 - || false
15 > | sed 's-\\-/-g' | "$check_code" --warnings --per-file=0 - || false
15 Skipping contrib/automation/hgautomation/__init__.py it has no-che?k-code (glob)
16 Skipping contrib/automation/hgautomation/__init__.py it has no-che?k-code (glob)
16 Skipping contrib/automation/hgautomation/aws.py it has no-che?k-code (glob)
17 Skipping contrib/automation/hgautomation/aws.py it has no-che?k-code (glob)
17 Skipping contrib/automation/hgautomation/cli.py it has no-che?k-code (glob)
18 Skipping contrib/automation/hgautomation/cli.py it has no-che?k-code (glob)
18 Skipping contrib/automation/hgautomation/linux.py it has no-che?k-code (glob)
19 Skipping contrib/automation/hgautomation/linux.py it has no-che?k-code (glob)
19 Skipping contrib/automation/hgautomation/pypi.py it has no-che?k-code (glob)
20 Skipping contrib/automation/hgautomation/pypi.py it has no-che?k-code (glob)
20 Skipping contrib/automation/hgautomation/ssh.py it has no-che?k-code (glob)
21 Skipping contrib/automation/hgautomation/ssh.py it has no-che?k-code (glob)
21 Skipping contrib/automation/hgautomation/try_server.py it has no-che?k-code (glob)
22 Skipping contrib/automation/hgautomation/try_server.py it has no-che?k-code (glob)
22 Skipping contrib/automation/hgautomation/windows.py it has no-che?k-code (glob)
23 Skipping contrib/automation/hgautomation/windows.py it has no-che?k-code (glob)
23 Skipping contrib/automation/hgautomation/winrm.py it has no-che?k-code (glob)
24 Skipping contrib/automation/hgautomation/winrm.py it has no-che?k-code (glob)
24 Skipping contrib/fuzz/FuzzedDataProvider.h it has no-che?k-code (glob)
25 Skipping contrib/fuzz/FuzzedDataProvider.h it has no-che?k-code (glob)
25 Skipping contrib/fuzz/standalone_fuzz_target_runner.cc it has no-che?k-code (glob)
26 Skipping contrib/fuzz/standalone_fuzz_target_runner.cc it has no-che?k-code (glob)
26 Skipping contrib/packaging/hgpackaging/cli.py it has no-che?k-code (glob)
27 Skipping contrib/packaging/hgpackaging/cli.py it has no-che?k-code (glob)
27 Skipping contrib/packaging/hgpackaging/downloads.py it has no-che?k-code (glob)
28 Skipping contrib/packaging/hgpackaging/downloads.py it has no-che?k-code (glob)
28 Skipping contrib/packaging/hgpackaging/inno.py it has no-che?k-code (glob)
29 Skipping contrib/packaging/hgpackaging/inno.py it has no-che?k-code (glob)
29 Skipping contrib/packaging/hgpackaging/py2exe.py it has no-che?k-code (glob)
30 Skipping contrib/packaging/hgpackaging/py2exe.py it has no-che?k-code (glob)
30 Skipping contrib/packaging/hgpackaging/pyoxidizer.py it has no-che?k-code (glob)
31 Skipping contrib/packaging/hgpackaging/pyoxidizer.py it has no-che?k-code (glob)
31 Skipping contrib/packaging/hgpackaging/util.py it has no-che?k-code (glob)
32 Skipping contrib/packaging/hgpackaging/util.py it has no-che?k-code (glob)
32 Skipping contrib/packaging/hgpackaging/wix.py it has no-che?k-code (glob)
33 Skipping contrib/packaging/hgpackaging/wix.py it has no-che?k-code (glob)
33 Skipping i18n/polib.py it has no-che?k-code (glob)
34 Skipping i18n/polib.py it has no-che?k-code (glob)
34 Skipping mercurial/statprof.py it has no-che?k-code (glob)
35 Skipping mercurial/statprof.py it has no-che?k-code (glob)
35 Skipping tests/badserverext.py it has no-che?k-code (glob)
36 Skipping tests/badserverext.py it has no-che?k-code (glob)
36
37
37 @commands in debugcommands.py should be in alphabetical order.
38 @commands in debugcommands.py should be in alphabetical order.
38
39
39 >>> import re
40 >>> import re
40 >>> commands = []
41 >>> commands = []
41 >>> with open('mercurial/debugcommands.py', 'rb') as fh:
42 >>> with open('mercurial/debugcommands.py', 'rb') as fh:
42 ... for line in fh:
43 ... for line in fh:
43 ... m = re.match(br"^@command\('([a-z]+)", line)
44 ... m = re.match(br"^@command\('([a-z]+)", line)
44 ... if m:
45 ... if m:
45 ... commands.append(m.group(1))
46 ... commands.append(m.group(1))
46 >>> scommands = list(sorted(commands))
47 >>> scommands = list(sorted(commands))
47 >>> for i, command in enumerate(scommands):
48 >>> for i, command in enumerate(scommands):
48 ... if command != commands[i]:
49 ... if command != commands[i]:
49 ... print('commands in debugcommands.py not sorted; first differing '
50 ... print('commands in debugcommands.py not sorted; first differing '
50 ... 'command is %s; expected %s' % (commands[i], command))
51 ... 'command is %s; expected %s' % (commands[i], command))
51 ... break
52 ... break
52
53
53 Prevent adding new files in the root directory accidentally.
54 Prevent adding new files in the root directory accidentally.
54
55
55 $ testrepohg files 'glob:*'
56 $ testrepohg files 'glob:*'
56 .arcconfig
57 .arcconfig
57 .clang-format
58 .clang-format
58 .editorconfig
59 .editorconfig
59 .hgignore
60 .hgignore
60 .hgsigs
61 .hgsigs
61 .hgtags
62 .hgtags
62 .jshintrc
63 .jshintrc
63 CONTRIBUTING
64 CONTRIBUTING
64 CONTRIBUTORS
65 CONTRIBUTORS
65 COPYING
66 COPYING
66 Makefile
67 Makefile
67 README.rst
68 README.rst
68 black.toml
69 black.toml
69 hg
70 hg
70 hgeditor
71 hgeditor
71 hgweb.cgi
72 hgweb.cgi
72 rustfmt.toml
73 rustfmt.toml
73 setup.py
74 setup.py
74
75
75 Prevent adding modules which could be shadowed by ancient .so/.dylib.
76 Prevent adding modules which could be shadowed by ancient .so/.dylib.
76
77
77 $ testrepohg files \
78 $ testrepohg files \
78 > mercurial/base85.py \
79 > mercurial/base85.py \
79 > mercurial/bdiff.py \
80 > mercurial/bdiff.py \
80 > mercurial/diffhelpers.py \
81 > mercurial/diffhelpers.py \
81 > mercurial/mpatch.py \
82 > mercurial/mpatch.py \
82 > mercurial/osutil.py \
83 > mercurial/osutil.py \
83 > mercurial/parsers.py \
84 > mercurial/parsers.py \
84 > mercurial/zstd.py
85 > mercurial/zstd.py
85 [1]
86 [1]
86
87
87 Keep python3 tests sorted:
88 Keep python3 tests sorted:
88 $ sort < contrib/python3-whitelist > $TESTTMP/py3sorted
89 $ sort < contrib/python3-whitelist > $TESTTMP/py3sorted
89 $ cmp contrib/python3-whitelist $TESTTMP/py3sorted || echo 'Please sort passing tests!'
90 $ cmp contrib/python3-whitelist $TESTTMP/py3sorted || echo 'Please sort passing tests!'
90
91
91 Keep Windows line endings in check
92 Keep Windows line endings in check
92
93
93 $ testrepohg files 'set:eol(dos)'
94 $ testrepohg files 'set:eol(dos)'
94 contrib/win32/hg.bat
95 contrib/win32/hg.bat
95 contrib/win32/mercurial.ini
96 contrib/win32/mercurial.ini
General Comments 0
You need to be logged in to leave comments. Login now