##// END OF EJS Templates
branching: merge stable into default
Raphaël Gomès -
r54034:1b4a024f merge tip default
parent child Browse files
Show More

The requested changes are too big and content was truncated. Show full diff

@@ -0,0 +1,412
1 # This file is automatically @generated by Cargo.
2 # It is not intended for manual editing.
3 version = 3
4
5 [[package]]
6 name = "autocfg"
7 version = "1.3.0"
8 source = "registry+https://github.com/rust-lang/crates.io-index"
9 checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0"
10
11 [[package]]
12 name = "bitflags"
13 version = "2.5.0"
14 source = "registry+https://github.com/rust-lang/crates.io-index"
15 checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1"
16
17 [[package]]
18 name = "cc"
19 version = "1.0.98"
20 source = "registry+https://github.com/rust-lang/crates.io-index"
21 checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f"
22 dependencies = [
23 "jobserver",
24 "libc",
25 "once_cell",
26 ]
27
28 [[package]]
29 name = "cfg-if"
30 version = "1.0.0"
31 source = "registry+https://github.com/rust-lang/crates.io-index"
32 checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
33
34 [[package]]
35 name = "crossbeam-deque"
36 version = "0.8.5"
37 source = "registry+https://github.com/rust-lang/crates.io-index"
38 checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
39 dependencies = [
40 "crossbeam-epoch",
41 "crossbeam-utils",
42 ]
43
44 [[package]]
45 name = "crossbeam-epoch"
46 version = "0.9.18"
47 source = "registry+https://github.com/rust-lang/crates.io-index"
48 checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
49 dependencies = [
50 "crossbeam-utils",
51 ]
52
53 [[package]]
54 name = "crossbeam-utils"
55 version = "0.8.20"
56 source = "registry+https://github.com/rust-lang/crates.io-index"
57 checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
58
59 [[package]]
60 name = "either"
61 version = "1.12.0"
62 source = "registry+https://github.com/rust-lang/crates.io-index"
63 checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b"
64
65 [[package]]
66 name = "heck"
67 version = "0.4.1"
68 source = "registry+https://github.com/rust-lang/crates.io-index"
69 checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
70
71 [[package]]
72 name = "hermit-abi"
73 version = "0.3.9"
74 source = "registry+https://github.com/rust-lang/crates.io-index"
75 checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
76
77 [[package]]
78 name = "indoc"
79 version = "2.0.5"
80 source = "registry+https://github.com/rust-lang/crates.io-index"
81 checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5"
82
83 [[package]]
84 name = "jobserver"
85 version = "0.1.31"
86 source = "registry+https://github.com/rust-lang/crates.io-index"
87 checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e"
88 dependencies = [
89 "libc",
90 ]
91
92 [[package]]
93 name = "libc"
94 version = "0.2.155"
95 source = "registry+https://github.com/rust-lang/crates.io-index"
96 checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
97
98 [[package]]
99 name = "lock_api"
100 version = "0.4.12"
101 source = "registry+https://github.com/rust-lang/crates.io-index"
102 checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
103 dependencies = [
104 "autocfg",
105 "scopeguard",
106 ]
107
108 [[package]]
109 name = "memoffset"
110 version = "0.9.1"
111 source = "registry+https://github.com/rust-lang/crates.io-index"
112 checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
113 dependencies = [
114 "autocfg",
115 ]
116
117 [[package]]
118 name = "num_cpus"
119 version = "1.16.0"
120 source = "registry+https://github.com/rust-lang/crates.io-index"
121 checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
122 dependencies = [
123 "hermit-abi",
124 "libc",
125 ]
126
127 [[package]]
128 name = "once_cell"
129 version = "1.19.0"
130 source = "registry+https://github.com/rust-lang/crates.io-index"
131 checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92"
132
133 [[package]]
134 name = "parking_lot"
135 version = "0.12.3"
136 source = "registry+https://github.com/rust-lang/crates.io-index"
137 checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
138 dependencies = [
139 "lock_api",
140 "parking_lot_core",
141 ]
142
143 [[package]]
144 name = "parking_lot_core"
145 version = "0.9.10"
146 source = "registry+https://github.com/rust-lang/crates.io-index"
147 checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
148 dependencies = [
149 "cfg-if",
150 "libc",
151 "redox_syscall",
152 "smallvec",
153 "windows-targets",
154 ]
155
156 [[package]]
157 name = "pkg-config"
158 version = "0.3.30"
159 source = "registry+https://github.com/rust-lang/crates.io-index"
160 checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
161
162 [[package]]
163 name = "portable-atomic"
164 version = "1.6.0"
165 source = "registry+https://github.com/rust-lang/crates.io-index"
166 checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0"
167
168 [[package]]
169 name = "proc-macro2"
170 version = "1.0.84"
171 source = "registry+https://github.com/rust-lang/crates.io-index"
172 checksum = "ec96c6a92621310b51366f1e28d05ef11489516e93be030060e5fc12024a49d6"
173 dependencies = [
174 "unicode-ident",
175 ]
176
177 [[package]]
178 name = "pyo3"
179 version = "0.21.2"
180 source = "registry+https://github.com/rust-lang/crates.io-index"
181 checksum = "a5e00b96a521718e08e03b1a622f01c8a8deb50719335de3f60b3b3950f069d8"
182 dependencies = [
183 "cfg-if",
184 "indoc",
185 "libc",
186 "memoffset",
187 "parking_lot",
188 "portable-atomic",
189 "pyo3-build-config",
190 "pyo3-ffi",
191 "pyo3-macros",
192 "unindent",
193 ]
194
195 [[package]]
196 name = "pyo3-build-config"
197 version = "0.21.2"
198 source = "registry+https://github.com/rust-lang/crates.io-index"
199 checksum = "7883df5835fafdad87c0d888b266c8ec0f4c9ca48a5bed6bbb592e8dedee1b50"
200 dependencies = [
201 "once_cell",
202 "target-lexicon",
203 ]
204
205 [[package]]
206 name = "pyo3-ffi"
207 version = "0.21.2"
208 source = "registry+https://github.com/rust-lang/crates.io-index"
209 checksum = "01be5843dc60b916ab4dad1dca6d20b9b4e6ddc8e15f50c47fe6d85f1fb97403"
210 dependencies = [
211 "libc",
212 "pyo3-build-config",
213 ]
214
215 [[package]]
216 name = "pyo3-macros"
217 version = "0.21.2"
218 source = "registry+https://github.com/rust-lang/crates.io-index"
219 checksum = "77b34069fc0682e11b31dbd10321cbf94808394c56fd996796ce45217dfac53c"
220 dependencies = [
221 "proc-macro2",
222 "pyo3-macros-backend",
223 "quote",
224 "syn",
225 ]
226
227 [[package]]
228 name = "pyo3-macros-backend"
229 version = "0.21.2"
230 source = "registry+https://github.com/rust-lang/crates.io-index"
231 checksum = "08260721f32db5e1a5beae69a55553f56b99bd0e1c3e6e0a5e8851a9d0f5a85c"
232 dependencies = [
233 "heck",
234 "proc-macro2",
235 "pyo3-build-config",
236 "quote",
237 "syn",
238 ]
239
240 [[package]]
241 name = "python-zstandard"
242 version = "0.23.0-pre"
243 dependencies = [
244 "libc",
245 "num_cpus",
246 "pyo3",
247 "rayon",
248 "zstd-safe",
249 "zstd-sys",
250 ]
251
252 [[package]]
253 name = "quote"
254 version = "1.0.36"
255 source = "registry+https://github.com/rust-lang/crates.io-index"
256 checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7"
257 dependencies = [
258 "proc-macro2",
259 ]
260
261 [[package]]
262 name = "rayon"
263 version = "1.10.0"
264 source = "registry+https://github.com/rust-lang/crates.io-index"
265 checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa"
266 dependencies = [
267 "either",
268 "rayon-core",
269 ]
270
271 [[package]]
272 name = "rayon-core"
273 version = "1.12.1"
274 source = "registry+https://github.com/rust-lang/crates.io-index"
275 checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
276 dependencies = [
277 "crossbeam-deque",
278 "crossbeam-utils",
279 ]
280
281 [[package]]
282 name = "redox_syscall"
283 version = "0.5.1"
284 source = "registry+https://github.com/rust-lang/crates.io-index"
285 checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e"
286 dependencies = [
287 "bitflags",
288 ]
289
290 [[package]]
291 name = "scopeguard"
292 version = "1.2.0"
293 source = "registry+https://github.com/rust-lang/crates.io-index"
294 checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
295
296 [[package]]
297 name = "smallvec"
298 version = "1.13.2"
299 source = "registry+https://github.com/rust-lang/crates.io-index"
300 checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
301
302 [[package]]
303 name = "syn"
304 version = "2.0.66"
305 source = "registry+https://github.com/rust-lang/crates.io-index"
306 checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5"
307 dependencies = [
308 "proc-macro2",
309 "quote",
310 "unicode-ident",
311 ]
312
313 [[package]]
314 name = "target-lexicon"
315 version = "0.12.14"
316 source = "registry+https://github.com/rust-lang/crates.io-index"
317 checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f"
318
319 [[package]]
320 name = "unicode-ident"
321 version = "1.0.12"
322 source = "registry+https://github.com/rust-lang/crates.io-index"
323 checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
324
325 [[package]]
326 name = "unindent"
327 version = "0.2.3"
328 source = "registry+https://github.com/rust-lang/crates.io-index"
329 checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce"
330
331 [[package]]
332 name = "windows-targets"
333 version = "0.52.5"
334 source = "registry+https://github.com/rust-lang/crates.io-index"
335 checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb"
336 dependencies = [
337 "windows_aarch64_gnullvm",
338 "windows_aarch64_msvc",
339 "windows_i686_gnu",
340 "windows_i686_gnullvm",
341 "windows_i686_msvc",
342 "windows_x86_64_gnu",
343 "windows_x86_64_gnullvm",
344 "windows_x86_64_msvc",
345 ]
346
347 [[package]]
348 name = "windows_aarch64_gnullvm"
349 version = "0.52.5"
350 source = "registry+https://github.com/rust-lang/crates.io-index"
351 checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263"
352
353 [[package]]
354 name = "windows_aarch64_msvc"
355 version = "0.52.5"
356 source = "registry+https://github.com/rust-lang/crates.io-index"
357 checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6"
358
359 [[package]]
360 name = "windows_i686_gnu"
361 version = "0.52.5"
362 source = "registry+https://github.com/rust-lang/crates.io-index"
363 checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670"
364
365 [[package]]
366 name = "windows_i686_gnullvm"
367 version = "0.52.5"
368 source = "registry+https://github.com/rust-lang/crates.io-index"
369 checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9"
370
371 [[package]]
372 name = "windows_i686_msvc"
373 version = "0.52.5"
374 source = "registry+https://github.com/rust-lang/crates.io-index"
375 checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf"
376
377 [[package]]
378 name = "windows_x86_64_gnu"
379 version = "0.52.5"
380 source = "registry+https://github.com/rust-lang/crates.io-index"
381 checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9"
382
383 [[package]]
384 name = "windows_x86_64_gnullvm"
385 version = "0.52.5"
386 source = "registry+https://github.com/rust-lang/crates.io-index"
387 checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596"
388
389 [[package]]
390 name = "windows_x86_64_msvc"
391 version = "0.52.5"
392 source = "registry+https://github.com/rust-lang/crates.io-index"
393 checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0"
394
395 [[package]]
396 name = "zstd-safe"
397 version = "7.1.0"
398 source = "registry+https://github.com/rust-lang/crates.io-index"
399 checksum = "1cd99b45c6bc03a018c8b8a86025678c87e55526064e38f9df301989dce7ec0a"
400 dependencies = [
401 "zstd-sys",
402 ]
403
404 [[package]]
405 name = "zstd-sys"
406 version = "2.0.10+zstd.1.5.6"
407 source = "registry+https://github.com/rust-lang/crates.io-index"
408 checksum = "c253a4914af5bafc8fa8c86ee400827e83cf6ec01195ec1f1ed8441bf00d65aa"
409 dependencies = [
410 "cc",
411 "pkg-config",
412 ]
@@ -0,0 +1,30
1 [package]
2 name = "python-zstandard"
3 version = "0.23.0"
4 authors = ["Gregory Szorc <gregory.szorc@gmail.com>"]
5 edition = "2021"
6 license = "BSD-3-Clause"
7 description = "Python bindings to zstandard compression library"
8 readme = "README.rst"
9
10 [lib]
11 name = "backend_rust"
12 crate-type = ["cdylib"]
13 path = "rust-ext/src/lib.rs"
14
15 [dependencies]
16 libc = "0.2.155"
17 num_cpus = "1.16.0"
18 rayon = "1.10.0"
19
20 [dependencies.zstd-safe]
21 version = "7.1.0"
22 features = ["experimental", "legacy", "zstdmt"]
23
24 [dependencies.zstd-sys]
25 version = "2.0.10+zstd.1.5.6"
26 features = ["experimental", "legacy", "zstdmt"]
27
28 [dependencies.pyo3]
29 version = "0.21.2"
30 features = ["extension-module"]
@@ -0,0 +1,353
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
8
9 /* A Python C extension for Zstandard. */
10
11 #if defined(_WIN32)
12 #define WIN32_LEAN_AND_MEAN
13 #include <Windows.h>
14 #elif defined(__APPLE__) || defined(__OpenBSD__) || defined(__FreeBSD__) || \
15 defined(__NetBSD__) || defined(__DragonFly__)
16 #include <sys/types.h>
17
18 #include <sys/sysctl.h>
19
20 #endif
21
22 #include "python-zstandard.h"
23
24 #include "bufferutil.c"
25 #include "compressionchunker.c"
26 #include "compressiondict.c"
27 #include "compressionparams.c"
28 #include "compressionreader.c"
29 #include "compressionwriter.c"
30 #include "compressobj.c"
31 #include "compressor.c"
32 #include "compressoriterator.c"
33 #include "constants.c"
34 #include "decompressionreader.c"
35 #include "decompressionwriter.c"
36 #include "decompressobj.c"
37 #include "decompressor.c"
38 #include "decompressoriterator.c"
39 #include "frameparams.c"
40
41 PyObject *ZstdError;
42
43 static PyObject *estimate_decompression_context_size(PyObject *self) {
44 return PyLong_FromSize_t(ZSTD_estimateDCtxSize());
45 }
46
47 static PyObject *frame_content_size(PyObject *self, PyObject *args,
48 PyObject *kwargs) {
49 static char *kwlist[] = {"source", NULL};
50
51 Py_buffer source;
52 PyObject *result = NULL;
53 unsigned long long size;
54
55 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_content_size",
56 kwlist, &source)) {
57 return NULL;
58 }
59
60 size = ZSTD_getFrameContentSize(source.buf, source.len);
61
62 if (size == ZSTD_CONTENTSIZE_ERROR) {
63 PyErr_SetString(ZstdError, "error when determining content size");
64 }
65 else if (size == ZSTD_CONTENTSIZE_UNKNOWN) {
66 result = PyLong_FromLong(-1);
67 }
68 else {
69 result = PyLong_FromUnsignedLongLong(size);
70 }
71
72 PyBuffer_Release(&source);
73
74 return result;
75 }
76
77 static PyObject *frame_header_size(PyObject *self, PyObject *args,
78 PyObject *kwargs) {
79 static char *kwlist[] = {"source", NULL};
80
81 Py_buffer source;
82 PyObject *result = NULL;
83 size_t zresult;
84
85 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_header_size",
86 kwlist, &source)) {
87 return NULL;
88 }
89
90 zresult = ZSTD_frameHeaderSize(source.buf, source.len);
91 if (ZSTD_isError(zresult)) {
92 PyErr_Format(ZstdError, "could not determine frame header size: %s",
93 ZSTD_getErrorName(zresult));
94 }
95 else {
96 result = PyLong_FromSize_t(zresult);
97 }
98
99 PyBuffer_Release(&source);
100
101 return result;
102 }
103
104 static char zstd_doc[] = "Interface to zstandard";
105
106 static PyMethodDef zstd_methods[] = {
107 {"estimate_decompression_context_size",
108 (PyCFunction)estimate_decompression_context_size, METH_NOARGS, NULL},
109 {"frame_content_size", (PyCFunction)frame_content_size,
110 METH_VARARGS | METH_KEYWORDS, NULL},
111 {"frame_header_size", (PyCFunction)frame_header_size,
112 METH_VARARGS | METH_KEYWORDS, NULL},
113 {"get_frame_parameters", (PyCFunction)get_frame_parameters,
114 METH_VARARGS | METH_KEYWORDS, NULL},
115 {"train_dictionary", (PyCFunction)train_dictionary,
116 METH_VARARGS | METH_KEYWORDS, NULL},
117 {NULL, NULL}};
118
119 void bufferutil_module_init(PyObject *mod);
120 void compressobj_module_init(PyObject *mod);
121 void compressor_module_init(PyObject *mod);
122 void compressionparams_module_init(PyObject *mod);
123 void constants_module_init(PyObject *mod);
124 void compressionchunker_module_init(PyObject *mod);
125 void compressiondict_module_init(PyObject *mod);
126 void compressionreader_module_init(PyObject *mod);
127 void compressionwriter_module_init(PyObject *mod);
128 void compressoriterator_module_init(PyObject *mod);
129 void decompressor_module_init(PyObject *mod);
130 void decompressobj_module_init(PyObject *mod);
131 void decompressionreader_module_init(PyObject *mod);
132 void decompressionwriter_module_init(PyObject *mod);
133 void decompressoriterator_module_init(PyObject *mod);
134 void frameparams_module_init(PyObject *mod);
135
136 void zstd_module_init(PyObject *m) {
137 /* python-zstandard relies on unstable zstd C API features. This means
138 that changes in zstd may break expectations in python-zstandard.
139
140 python-zstandard is distributed with a copy of the zstd sources.
141 python-zstandard is only guaranteed to work with the bundled version
142 of zstd.
143
144 However, downstream redistributors or packagers may unbundle zstd
145 from python-zstandard. This can result in a mismatch between zstd
146 versions and API semantics. This essentially "voids the warranty"
147 of python-zstandard and may cause undefined behavior.
148
149 We detect this mismatch here and refuse to load the module if this
150 scenario is detected.
151 */
152 PyObject *features = NULL;
153 PyObject *feature = NULL;
154 unsigned zstd_ver_no = ZSTD_versionNumber();
155 unsigned our_hardcoded_version = 10506;
156 if (ZSTD_VERSION_NUMBER != our_hardcoded_version ||
157 zstd_ver_no != our_hardcoded_version) {
158 PyErr_Format(
159 PyExc_ImportError,
160 "zstd C API versions mismatch; Python bindings were not "
161 "compiled/linked against expected zstd version (%u returned by the "
162 "lib, %u hardcoded in zstd headers, %u hardcoded in the cext)",
163 zstd_ver_no, ZSTD_VERSION_NUMBER, our_hardcoded_version);
164 return;
165 }
166
167 features = PySet_New(NULL);
168 if (NULL == features) {
169 PyErr_SetString(PyExc_ImportError, "could not create empty set");
170 return;
171 }
172
173 feature = PyUnicode_FromString("buffer_types");
174 if (NULL == feature) {
175 PyErr_SetString(PyExc_ImportError, "could not create feature string");
176 return;
177 }
178
179 if (PySet_Add(features, feature) == -1) {
180 return;
181 }
182
183 Py_DECREF(feature);
184
185 #ifdef HAVE_ZSTD_POOL_APIS
186 feature = PyUnicode_FromString("multi_compress_to_buffer");
187 if (NULL == feature) {
188 PyErr_SetString(PyExc_ImportError, "could not create feature string");
189 return;
190 }
191
192 if (PySet_Add(features, feature) == -1) {
193 return;
194 }
195
196 Py_DECREF(feature);
197 #endif
198
199 #ifdef HAVE_ZSTD_POOL_APIS
200 feature = PyUnicode_FromString("multi_decompress_to_buffer");
201 if (NULL == feature) {
202 PyErr_SetString(PyExc_ImportError, "could not create feature string");
203 return;
204 }
205
206 if (PySet_Add(features, feature) == -1) {
207 return;
208 }
209
210 Py_DECREF(feature);
211 #endif
212
213 if (PyObject_SetAttrString(m, "backend_features", features) == -1) {
214 return;
215 }
216
217 Py_DECREF(features);
218
219 bufferutil_module_init(m);
220 compressionparams_module_init(m);
221 compressiondict_module_init(m);
222 compressobj_module_init(m);
223 compressor_module_init(m);
224 compressionchunker_module_init(m);
225 compressionreader_module_init(m);
226 compressionwriter_module_init(m);
227 compressoriterator_module_init(m);
228 constants_module_init(m);
229 decompressor_module_init(m);
230 decompressobj_module_init(m);
231 decompressionreader_module_init(m);
232 decompressionwriter_module_init(m);
233 decompressoriterator_module_init(m);
234 frameparams_module_init(m);
235 }
236
237 #if defined(__GNUC__) && (__GNUC__ >= 4)
238 #define PYTHON_ZSTD_VISIBILITY __attribute__((visibility("default")))
239 #else
240 #define PYTHON_ZSTD_VISIBILITY
241 #endif
242
243 static struct PyModuleDef zstd_module = {PyModuleDef_HEAD_INIT, "zstd",
244 zstd_doc, -1, zstd_methods};
245
246 PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC PyInit_zstd(void) {
247 PyObject *m = PyModule_Create(&zstd_module);
248 if (m) {
249 zstd_module_init(m);
250 if (PyErr_Occurred()) {
251 Py_DECREF(m);
252 m = NULL;
253 }
254 }
255 return m;
256 }
257
258 /* Attempt to resolve the number of CPUs in the system. */
259 int cpu_count() {
260 int count = 0;
261
262 #if defined(_WIN32)
263 SYSTEM_INFO si;
264 si.dwNumberOfProcessors = 0;
265 GetSystemInfo(&si);
266 count = si.dwNumberOfProcessors;
267 #elif defined(__APPLE__)
268 int num;
269 size_t size = sizeof(int);
270
271 if (0 == sysctlbyname("hw.logicalcpu", &num, &size, NULL, 0)) {
272 count = num;
273 }
274 #elif defined(__linux__)
275 count = sysconf(_SC_NPROCESSORS_ONLN);
276 #elif defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || \
277 defined(__DragonFly__)
278 int mib[2];
279 size_t len = sizeof(count);
280 mib[0] = CTL_HW;
281 mib[1] = HW_NCPU;
282 if (0 != sysctl(mib, 2, &count, &len, NULL, 0)) {
283 count = 0;
284 }
285 #elif defined(__hpux)
286 count = mpctl(MPC_GETNUMSPUS, NULL, NULL);
287 #endif
288
289 return count;
290 }
291
292 size_t roundpow2(size_t i) {
293 i--;
294 i |= i >> 1;
295 i |= i >> 2;
296 i |= i >> 4;
297 i |= i >> 8;
298 i |= i >> 16;
299 i++;
300
301 return i;
302 }
303
304 /* Safer version of _PyBytes_Resize().
305 *
306 * _PyBytes_Resize() only works if the refcount is 1. In some scenarios,
307 * we can get an object with a refcount > 1, even if it was just created
308 * with PyBytes_FromStringAndSize()! That's because (at least) CPython
309 * pre-allocates PyBytes instances of size 1 for every possible byte value.
310 *
311 * If non-0 is returned, obj may or may not be NULL.
312 */
313 int safe_pybytes_resize(PyObject **obj, Py_ssize_t size) {
314 PyObject *tmp;
315
316 if ((*obj)->ob_refcnt == 1) {
317 return _PyBytes_Resize(obj, size);
318 }
319
320 tmp = PyBytes_FromStringAndSize(NULL, size);
321 if (!tmp) {
322 return -1;
323 }
324
325 memcpy(PyBytes_AS_STRING(tmp), PyBytes_AS_STRING(*obj),
326 PyBytes_GET_SIZE(*obj));
327
328 Py_DECREF(*obj);
329 *obj = tmp;
330
331 return 0;
332 }
333
334 // Set/raise an `io.UnsupportedOperation` exception.
335 void set_io_unsupported_operation(void) {
336 PyObject *iomod;
337 PyObject *exc;
338
339 iomod = PyImport_ImportModule("io");
340 if (NULL == iomod) {
341 return;
342 }
343
344 exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
345 if (NULL == exc) {
346 Py_DECREF(iomod);
347 return;
348 }
349
350 PyErr_SetNone(exc);
351 Py_DECREF(exc);
352 Py_DECREF(iomod);
353 }
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
@@ -273,6 +273,8 9751b9ccd74d8386687f88fbdfe280877840ec7d
273 273 b964f92261d4fbb64f19aa6af2b072f7730b913a 0 iQHNBAABCgA3FiEEH2b4zfZU6QXBHaBhoR4BzQ4F2VYFAme2VVUZHGFscGhhcmVAcmFwaGFlbGdvbWVzLmRldgAKCRChHgHNDgXZVn3aDACSMVaJexSgl1UfjBAKjwaF4t9Y2pBKnYibahXmddViwhhIISPzeVtvaM9y/4Cm4SP11S6PQ356aiZ3RjhtQbmRHQJe5cXGkBaxykIxLSC/KgDy9HXHDDATwvo+aF/QVBX8ig/cr0NdVpwtvQq7+rkDNfbObwu8pPIbZGqOoNM1ND2Kz6P+FqbNZfGPwLP/AaCtCl2dXcf/Z774JUsAEZ6InqvP1/m/atAG7phesXhem8cpPb6e8LohuiJpnbV2rUj7SEqk0eF2BRapSukSZC2vxdqsy4hcXO1uwJ3V3GPtegpdMG25OE3ALy/2WKoh4inJV+WfJy1+DEiSdP++Rpadv/By68WIBvWY/rKgWAYPqIE5IKH5CtcZkkFMtfoooFGiz7uvci5+ZaetZnHVPm9FZH3KZsNccsESDkT25I+rwynqt8LKt1qEA+Ur43U6ipG+LZxT7sOGGYYElU6cSoSIcrcMUfsbi0XhgpnZch4QwjoMyzWnXgcjnivnn3arMkw=
274 274 89ab2459f62ac8da3eb4f3ee2120d1814ce805d5 0 iQJTBAABCgA9FiEE7SE+SGsjJJvcEHtZRcqpKnHKAnsFAmfIwvYfHHBpZXJyZS15dmVzLmRhdmlkQGVucy1seW9uLm9yZwAKCRBFyqkqccoCezXwD/9bb5PG/f9WN/7XgzumLCatokHgAGWTNLmEmP414yT5UAf/hyiMbzUwO9TGKbe8s1UPrlMDfp+zoP/wMc6UrT4vXKwOTbfANyUsZ/T67m4BxyTi+fke5a4Ghb9OsV/qMiwmO6jLC4hk24dodgmZwL/H6JFgsPEHTElSCa9sVv6LZ9NZwu5xn3JgjkP34/l7niMIhOSyfHNOpF4rfWcc8bbojxNHru8nzPx8OVBLVHnidh0dM1D3dkbkRKuxJftzVNkgaw+kI3mpAKntInrVZfqCmrni6biY/GF2Lr+sYA/rWXsdEhZG3EtysP0UhmUYRryKJeLPDZsWIaTqJVPLYRe38DNpfNxQYRUo3mZkbbE0kf389jSsRyVKIK2UYBEKGRg98BHq3Urvdn9lu9s725gfgJMeEsZRt/ljTbmdbpxMUVIFaB0Paw+PlzjTklxlFZCxEnBI34yhHxXNXfQgLvNbokPqF4T5Y5pCWplULlaCBosqu1EGHDxv+o+k2/NECE4RGnv9vON6sweADdCgL3yDlFn7PW0eudQlpnf2/mIzCivFL1BKyUJb1XcFwAQFHRJDyBdaWHtYAswkZMihMVysI8GfjePMpVF2Q4O5osYTPtIXAX92HXJwsp1jGaaP6g2Ipy9v04Q8RH33Nk5i/TEy54jJu7e435wrVhyCnHKntw==
275 275 4654407c6df8510f8c84741c61b905d71f6adc8f 0 iQJTBAABCgA9FiEE7SE+SGsjJJvcEHtZRcqpKnHKAnsFAmfaBCYfHHBpZXJyZS15dmVzLmRhdmlkQGVucy1seW9uLm9yZwAKCRBFyqkqccoCe8lFD/411juPs3B0x9OBhEK8EChaieu7q8U8hL6pHQ64p7J2j+OORuvVtemsFZ2jvQ53ERX+eSOy+O1B6vZS9W+4scEoGFrPksNaiKQAlBnnEncePv2RJgBP2BNzlp2VYJgL0u7gbTWLeJcrqEZ+4JDmhrYlTPdZ21/cSUbX+kEShpO+tl10cEdmuZLxyu+/0uiSKYQ076MvNPoWL/75Y67kIRBC9Wufufhec0BneMwBF1vvpOEYrF33tJS2AZk+6YHZc7jOic1yMTadrXz+roFkDBsKLjJRk5W8d8f/IMKev0O4fk3m/s4nOFvWPFxGejCbBC3Aj1nyvUjS5HiIa4zBUw1mJl1Ws55xH+tTyNweSdl2v78xYynThpfe7a80zW9/CtGmcuG7CgMW/aq4TlxtMUd74uQowGHQw9vSJT1dWFASud4JPzrpnSTzUXgd2GBYwvhz0hdZdJfY0+eoNI2yz3T+sGEf0wmRF8iVZ1qXInIe7/qusMG93L9sl1H9SebHu/OlTQJLyGxqeidWZintRT6hzsbTYew6cK+Eevpg0TTMFibovjlINRHwwkgYv9/iU/+vlN176nuyywnNpkqv14Xdfpf26qPl6uDHH3JLRree1y4L/2due8hr9vfrNYq9+Cq6FJgjQsSTiJHUYjRKElMDkIkjQPXyMzW1yOk7n6LBsw==
276 3d34a380ea3aecef2db1a468d487f675ae0b45f5 0 iQHNBAABCgA3FiEEH2b4zfZU6QXBHaBhoR4BzQ4F2VYFAmfujfsZHGFscGhhcmVAcmFwaGFlbGdvbWVzLmRldgAKCRChHgHNDgXZVslIC/4s39YHv+1dmkU1X1b+VU5G1XpnbHwA0sVymbKuDhlDjbacZhwTamLF8bJioiJhNlPEpbURnuh8ndaldrc4w2MdOLBGpKhPC+ixlEYh3eQqidyzy90xQKdOdilq4QJZ0CcIXpJV8SkgV0i+WQHT5zg3SRhder+NYv4Lu+XFmZ/FSp6Q5r6uyc4+/RIvnU7x+G/AMpeWec9EcruZ8cAuEZQ4yWdqsYXrya6JEfEJdmfW5Wb3ZlF6vAbIgoLlZApFM4Ld1C8wlhdjjX2X7CJmFWb2zAmENaF+6JhhTagkSGA1xapZCi5Ldd2XA1w+qBhAAGdmuTCISL4BSeQVg6pEWDd8QHNl1O0d/acY6yy4sRO8iP6hHbngs7Dd0j06canxHKEq4E9DW6hues7kmKzetol8k9VVk5KaAHvW2rjMDm9PTGY4S29mlU69PIbe2GgCVkhk0CXV6w+XKSdXK7N5cuJnjyEcIYw6Sp4bPJ3YfKF4wcvyueXzPC8glNHgb5cGi/I=
276 277 ed68f9a47a090cc6450a7d5617baf123ba5fc42a 0 iQJTBAABCgA9FiEE7SE+SGsjJJvcEHtZRcqpKnHKAnsFAmfCIFcfHHBpZXJyZS15dmVzLmRhdmlkQGVucy1seW9uLm9yZwAKCRBFyqkqccoCeyueD/94hn/kb5U9hvtQlUm0el+rV+DAHcoS5GzF/V3Q3/ibpcym6c/JaFdi2S0+kNpam5DAaZBQpQj43YHJUehnMqpZ8XxZuFrhgVA95AQhjKtXualFcnfAnqrT7zdLEgKLHQ3qW6tU0djflkLnv1MDJmQXRKfibVQetgoyctKWzaTSf/V1eOHv0TutV3SEFnymdvMOi0wOmzOJIU/OLE7kWX2kUab1sDWagenoF9nAwDMLTgodLkNcSSTEv7t3iOKoZ8F+OECbGyRSC1bqwuGdL7ndyzXBPycMANYmT2584smRQsqNwpD+TTRNw3uMjqk1tsTkMUJdfaVYpPO2e2rhOZjFxDFfS95jDYLlG8rOwrmr9pYSwA7LV3ZJP9L3LBdcnIMxRF9iyoGLq5zLQgOzwhyDoqKGtTGKa/JnR+Hs6oCQYe12mQAlY797TQUHFUbNf+va0DaJpZzW0MFsUfr+SgL/ruHnOYa2ijejwxHLP38pvAMBSZKvBkW0gLcPS16kvk4XZtbHXeFbRhFiKOxgrkLQyncoFk7N5XNVucUsIsMd2JFwFTpxGrp+TploJw0AC/xAC816XY2iIulKVyZ3VjOI0+oqZRVr4T7fJCzs6bcqYvShMf96J1wS5crKBeTIiTZYJCakj/ql4GvJmSshxGa3flL695HMcqWUcE5Ql1BHgg==
277 278 cb2b2242428df5be87693b8acaac71deafcff2c5 0 iQJTBAABCgA9FiEE7SE+SGsjJJvcEHtZRcqpKnHKAnsFAmfYltMfHHBpZXJyZS15dmVzLmRhdmlkQGVucy1seW9uLm9yZwAKCRBFyqkqccoCe3evD/0QdpG+TO2caznVLGTXm5dccYKLfSdFymoBoZKkzGHqzZgbeT6uw3NubRNthqd++lby9pfzIi4DBV+LUc2/IC+jWBzNKKFIm6OpXXtVc2TRj0MN3Nbhx761QDmguVmWuyqZu8/uAttCbFvf91LE2Oky2FtLfUFDzcWDf0p+FzkfMZaZktQnSi9O3xFfbdA0j6dVmxuYfOIbGlCznxwXgbOaaAjMl6dCaE3MKjgZsrG6lmwFvCY3GVRp255sLm4bjHoGHyEJRsdp74BVzG6tdNJ5HjD6Wd1ATFUbhpE7CmEPZ6a5joCbxkXoOZxAYWF6IBDQeHaITnRL29xc6I15I5sKoCtixepPlJONucYke1BLSxW3nYHpnmeKeVhwEE03es6A1jDKxXZp5CZ54fBcGD/KlZ6Yd0tMaZGoczUpLU0suGU5xLtCTcS9XSEOsC1yt5kl7/A8r0GPaGq6oOOJNPJBXpZ5hQdaUSmM9ilzbSZBMnjd1TlqQq7tsJv471E+YehO5PAf2vqFtfcNSdDhinL7xGqZH5/vMOH0F+lIArleNIQDXsuXDeKz22vFdiBmJBSZPmAuJ2uyiTEaSbFXpDyM/CVL2sKNziWE3UqqTi5e2REdvUfqmAuc7Q9RHWqbt9ERh0bIIdbDgNkn81beX1AADAcUm8MS7mrddcgMMNPx8A==
278 279 9b285d51e4b6fb01eb9de357092b993311bd1152 0 iQHNBAABCgA3FiEEH2b4zfZU6QXBHaBhoR4BzQ4F2VYFAmfjNHcZHGFscGhhcmVAcmFwaGFlbGdvbWVzLmRldgAKCRChHgHNDgXZVpLhC/9r0GZ2P280J5xao8BH223CXOSNznVgEHOwHqPD172iIV9/4wCfhhuHh5AGoLU46ARp25cy8sobnTXvRojmtu22jJxnq4uBsWkCCrF4bdsHI3GVuJS8oiZLkqdjkEjV1bEbtiBo/C2KPC+m+o5/fW91MgeGng5xjrKULozjQfwNNghQjLDNRqVoMTlQsjNhXyddSEaesfNYz/IUz0zcpfFw7aKlpEZVps0qKu8OcPggHOUEdaTgFBpzT2JxkGukIlVFtQdkDjz5EyNKjFXdTkKmhb6+4u5C+NDXM+d7p5/g3TU5cQa0ScCp1f3NqHl5cMF8axc5FUK+FTq9JOyPSBvFTxPTghc46mMb+4g2CRupigZJnO106saBqKsSZC76T6N0fN0wgUNEsMM0iS5binm6vTcNRHJdjezksWyfuPPmtGVKyGc5zanMY+xNhVASQlXp/nldS1p9iEMxUy4OjC++lrp0efkSnGX/4dJ718cDV78NoRVVG0hXazAgNKIr7nU=
280 010a86744bfc6ede2e3ed3ecef3faee0de804f47 0 iQJTBAABCgA9FiEE7SE+SGsjJJvcEHtZRcqpKnHKAnsFAmfySqkfHHBpZXJyZS15dmVzLmRhdmlkQGVucy1seW9uLm9yZwAKCRBFyqkqccoCe5c+D/4z66Y4f8AGos/u9zBzCy2CFQ5sXyK+b8nMmutbaWSqp3FkgVq7/ULqY76eSDI3EMqW6sGUQNOfW7jWJHzW9TMZc8KC/Z4cLN2Q7fSlx0o0b7to2i6Gq4C6UJwExruNm9zL/LO8zBBsaN24SSLYCes2R6EKy0RYNWhn41a5qCEMkH9CYJHJZzpKUmDLO7yePolspkZdcxjOE8TM2MAJ9+S/BOPZmGEHcwP89XnXurYFXA4YA/jC+7v4vWnPEUYPB+YYiKTAig5CisQ3JYdPJm+HLYbrVhANLrTMUDDXMPSU/q0KgS+RdIfbpzNygiHihcuGw6Nw+/w2sdrR8RH1RbsdyCvQYLm3R3xcUM67ciAH6HKwnfn9/8yezsmJrX29w859rcbA+2A+Wqg9dFxQ7VBh9I4JU6Azf5V8pa1b2fmq7jOm2g58Be5cuCWJ9eoD04KYhcFwpfvHWN+MCW8bOQf93+V8fp/HvC7GolCCsqXZwK4usY4uZoRbuXyJ7TXTNOW2VeqQTLR50rM5HTV9zAHoFBK2aX/yWHFm9XvlqHHle6iVpRCWzASPMO69q8CdkJtzPkFu0ultVrmTTiZo2eP0GO9h3EwmEsijqQ2hyhl3Q2pH/FmRfFD9Z2qgCs+i58iQuTFrZUwljtUt/rFMkXmA2CrM92hxUqtpEDgFjIdFtw==
@@ -289,6 +289,8 9751b9ccd74d8386687f88fbdfe280877840ec7d
289 289 b964f92261d4fbb64f19aa6af2b072f7730b913a 6.9.2
290 290 89ab2459f62ac8da3eb4f3ee2120d1814ce805d5 6.9.3
291 291 4654407c6df8510f8c84741c61b905d71f6adc8f 6.9.4
292 3d34a380ea3aecef2db1a468d487f675ae0b45f5 6.9.5
292 293 ed68f9a47a090cc6450a7d5617baf123ba5fc42a 7.0rc0
293 294 cb2b2242428df5be87693b8acaac71deafcff2c5 7.0rc1
294 295 9b285d51e4b6fb01eb9de357092b993311bd1152 7.0
296 010a86744bfc6ede2e3ed3ecef3faee0de804f47 7.0.1
@@ -591,6 +591,9 build-c-wheel-macos:
591 591 tags:
592 592 - macos
593 593 script:
594 - sh -c 'which "$PYTHON"'
595 - sh -c '"$PYTHON" -V'
596 - sh -c 'which cibuildwheel'
594 597 - PLATFORM=`$PYTHON -c 'import sys; print(sys.platform)'`
595 598 - rm -rf tmp-wheels
596 599 - cibuildwheel --output-dir tmp-wheels/
@@ -1,9 +1,10
1 1 graft c-ext
2 2 graft debian
3 graft rust-ext
3 4 graft zstd
4 5 graft tests
5 6 include make_cffi.py
6 7 include setup_zstd.py
7 include zstd.c
8 include Cargo.lock
9 include Cargo.toml
8 10 include LICENSE
9 include NEWS.rst
This diff has been collapsed as it changes many lines, (1597 lines changed) Show them Hide them
@@ -2,6 +2,8
2 2 python-zstandard
3 3 ================
4 4
5 | |ci-test| |ci-wheel| |ci-typing| |ci-sdist| |ci-anaconda| |ci-sphinx|
6
5 7 This project provides Python bindings for interfacing with the
6 8 `Zstandard <http://www.zstd.net>`_ compression library. A C extension
7 9 and CFFI interface are provided.
@@ -11,1592 +13,25 underlying C API through a Pythonic inte
11 13 performance. This means exposing most of the features and flexibility
12 14 of the C API while not sacrificing usability or safety that Python provides.
13 15
14 The canonical home for this project lives in a Mercurial repository run by
15 the author. For convenience, that repository is frequently synchronized to
16 The canonical home for this project is
16 17 https://github.com/indygreg/python-zstandard.
17 18
18 | |ci-status|
19
20 Requirements
21 ============
22
23 This extension is designed to run with Python 2.7, 3.5, 3.6, 3.7, and 3.8
24 on common platforms (Linux, Windows, and OS X). On PyPy (both PyPy2 and PyPy3) we support version 6.0.0 and above.
25 x86 and x86_64 are well-tested on Windows. Only x86_64 is well-tested on Linux and macOS.
26
27 Installing
28 ==========
29
30 This package is uploaded to PyPI at https://pypi.python.org/pypi/zstandard.
31 So, to install this package::
32
33 $ pip install zstandard
34
35 Binary wheels are made available for some platforms. If you need to
36 install from a source distribution, all you should need is a working C
37 compiler and the Python development headers/libraries. On many Linux
38 distributions, you can install a ``python-dev`` or ``python-devel``
39 package to provide these dependencies.
40
41 Packages are also uploaded to Anaconda Cloud at
42 https://anaconda.org/indygreg/zstandard. See that URL for how to install
43 this package with ``conda``.
44
45 Performance
46 ===========
47
48 zstandard is a highly tunable compression algorithm. In its default settings
49 (compression level 3), it will be faster at compression and decompression and
50 will have better compression ratios than zlib on most data sets. When tuned
51 for speed, it approaches lz4's speed and ratios. When tuned for compression
52 ratio, it approaches lzma ratios and compression speed, but decompression
53 speed is much faster. See the official zstandard documentation for more.
54
55 zstandard and this library support multi-threaded compression. There is a
56 mechanism to compress large inputs using multiple threads.
57
58 The performance of this library is usually very similar to what the zstandard
59 C API can deliver. Overhead in this library is due to general Python overhead
60 and can't easily be avoided by *any* zstandard Python binding. This library
61 exposes multiple APIs for performing compression and decompression so callers
62 can pick an API suitable for their need. Contrast with the compression
63 modules in Python's standard library (like ``zlib``), which only offer limited
64 mechanisms for performing operations. The API flexibility means consumers can
65 choose to use APIs that facilitate zero copying or minimize Python object
66 creation and garbage collection overhead.
67
68 This library is capable of single-threaded throughputs well over 1 GB/s. For
69 exact numbers, measure yourself. The source code repository has a ``bench.py``
70 script that can be used to measure things.
71
72 API
73 ===
74
75 To interface with Zstandard, simply import the ``zstandard`` module::
76
77 import zstandard
78
79 It is a popular convention to alias the module as a different name for
80 brevity::
81
82 import zstandard as zstd
83
84 This module attempts to import and use either the C extension or CFFI
85 implementation. On Python platforms known to support C extensions (like
86 CPython), it raises an ImportError if the C extension cannot be imported.
87 On Python platforms known to not support C extensions (like PyPy), it only
88 attempts to import the CFFI implementation and raises ImportError if that
89 can't be done. On other platforms, it first tries to import the C extension
90 then falls back to CFFI if that fails and raises ImportError if CFFI fails.
91
92 To change the module import behavior, a ``PYTHON_ZSTANDARD_IMPORT_POLICY``
93 environment variable can be set. The following values are accepted:
94
95 default
96 The behavior described above.
97 cffi_fallback
98 Always try to import the C extension then fall back to CFFI if that
99 fails.
100 cext
101 Only attempt to import the C extension.
102 cffi
103 Only attempt to import the CFFI implementation.
104
105 In addition, the ``zstandard`` module exports a ``backend`` attribute
106 containing the string name of the backend being used. It will be one
107 of ``cext`` or ``cffi`` (for *C extension* and *cffi*, respectively).
108
109 The types, functions, and attributes exposed by the ``zstandard`` module
110 are documented in the sections below.
111
112 .. note::
113
114 The documentation in this section makes references to various zstd
115 concepts and functionality. The source repository contains a
116 ``docs/concepts.rst`` file explaining these in more detail.
117
118 ZstdCompressor
119 --------------
120
121 The ``ZstdCompressor`` class provides an interface for performing
122 compression operations. Each instance is essentially a wrapper around a
123 ``ZSTD_CCtx`` from the C API.
124
125 Each instance is associated with parameters that control compression
126 behavior. These come from the following named arguments (all optional):
127
128 level
129 Integer compression level. Valid values are between 1 and 22.
130 dict_data
131 Compression dictionary to use.
132
133 Note: When using dictionary data and ``compress()`` is called multiple
134 times, the ``ZstdCompressionParameters`` derived from an integer
135 compression ``level`` and the first compressed data's size will be reused
136 for all subsequent operations. This may not be desirable if source data
137 size varies significantly.
138 compression_params
139 A ``ZstdCompressionParameters`` instance defining compression settings.
140 write_checksum
141 Whether a 4 byte checksum should be written with the compressed data.
142 Defaults to False. If True, the decompressor can verify that decompressed
143 data matches the original input data.
144 write_content_size
145 Whether the size of the uncompressed data will be written into the
146 header of compressed data. Defaults to True. The data will only be
147 written if the compressor knows the size of the input data. This is
148 often not true for streaming compression.
149 write_dict_id
150 Whether to write the dictionary ID into the compressed data.
151 Defaults to True. The dictionary ID is only written if a dictionary
152 is being used.
153 threads
154 Enables and sets the number of threads to use for multi-threaded compression
155 operations. Defaults to 0, which means to use single-threaded compression.
156 Negative values will resolve to the number of logical CPUs in the system.
157 Read below for more info on multi-threaded compression. This argument only
158 controls thread count for operations that operate on individual pieces of
159 data. APIs that spawn multiple threads for working on multiple pieces of
160 data have their own ``threads`` argument.
161
162 ``compression_params`` is mutually exclusive with ``level``, ``write_checksum``,
163 ``write_content_size``, ``write_dict_id``, and ``threads``.
164
165 Unless specified otherwise, assume that no two methods of ``ZstdCompressor``
166 instances can be called from multiple Python threads simultaneously. In other
167 words, assume instances are not thread safe unless stated otherwise.
168
169 Utility Methods
170 ^^^^^^^^^^^^^^^
171
172 ``frame_progression()`` returns a 3-tuple containing the number of bytes
173 ingested, consumed, and produced by the current compression operation.
174
175 ``memory_size()`` obtains the memory utilization of the underlying zstd
176 compression context, in bytes.::
177
178 cctx = zstd.ZstdCompressor()
179 memory = cctx.memory_size()
180
181 Simple API
182 ^^^^^^^^^^
183
184 ``compress(data)`` compresses and returns data as a one-shot operation.::
185
186 cctx = zstd.ZstdCompressor()
187 compressed = cctx.compress(b'data to compress')
188
189 The ``data`` argument can be any object that implements the *buffer protocol*.
190
191 Stream Reader API
192 ^^^^^^^^^^^^^^^^^
193
194 ``stream_reader(source)`` can be used to obtain an object conforming to the
195 ``io.RawIOBase`` interface for reading compressed output as a stream::
196
197 with open(path, 'rb') as fh:
198 cctx = zstd.ZstdCompressor()
199 reader = cctx.stream_reader(fh)
200 while True:
201 chunk = reader.read(16384)
202 if not chunk:
203 break
204
205 # Do something with compressed chunk.
206
207 Instances can also be used as context managers::
208
209 with open(path, 'rb') as fh:
210 with cctx.stream_reader(fh) as reader:
211 while True:
212 chunk = reader.read(16384)
213 if not chunk:
214 break
215
216 # Do something with compressed chunk.
217
218 When the context manager exits or ``close()`` is called, the stream is closed,
219 underlying resources are released, and future operations against the compression
220 stream will fail.
221
222 The ``source`` argument to ``stream_reader()`` can be any object with a
223 ``read(size)`` method or any object implementing the *buffer protocol*.
224
225 ``stream_reader()`` accepts a ``size`` argument specifying how large the input
226 stream is. This is used to adjust compression parameters so they are
227 tailored to the source size.::
228
229 with open(path, 'rb') as fh:
230 cctx = zstd.ZstdCompressor()
231 with cctx.stream_reader(fh, size=os.stat(path).st_size) as reader:
232 ...
233
234 If the ``source`` is a stream, you can specify how large ``read()`` requests
235 to that stream should be via the ``read_size`` argument. It defaults to
236 ``zstandard.COMPRESSION_RECOMMENDED_INPUT_SIZE``.::
237
238 with open(path, 'rb') as fh:
239 cctx = zstd.ZstdCompressor()
240 # Will perform fh.read(8192) when obtaining data to feed into the
241 # compressor.
242 with cctx.stream_reader(fh, read_size=8192) as reader:
243 ...
244
245 The stream returned by ``stream_reader()`` is neither writable nor seekable
246 (even if the underlying source is seekable). ``readline()`` and
247 ``readlines()`` are not implemented because they don't make sense for
248 compressed data. ``tell()`` returns the number of compressed bytes
249 emitted so far.
250
251 Streaming Input API
252 ^^^^^^^^^^^^^^^^^^^
253
254 ``stream_writer(fh)`` allows you to *stream* data into a compressor.
255
256 Returned instances implement the ``io.RawIOBase`` interface. Only methods
257 that involve writing will do useful things.
258
259 The argument to ``stream_writer()`` must have a ``write(data)`` method. As
260 compressed data is available, ``write()`` will be called with the compressed
261 data as its argument. Many common Python types implement ``write()``, including
262 open file handles and ``io.BytesIO``.
263
264 The ``write(data)`` method is used to feed data into the compressor.
265
266 The ``flush([flush_mode=FLUSH_BLOCK])`` method can be called to evict whatever
267 data remains within the compressor's internal state into the output object. This
268 may result in 0 or more ``write()`` calls to the output object. This method
269 accepts an optional ``flush_mode`` argument to control the flushing behavior.
270 Its value can be any of the ``FLUSH_*`` constants.
271
272 Both ``write()`` and ``flush()`` return the number of bytes written to the
273 object's ``write()``. In many cases, small inputs do not accumulate enough
274 data to cause a write and ``write()`` will return ``0``.
275
276 Calling ``close()`` will mark the stream as closed and subsequent I/O
277 operations will raise ``ValueError`` (per the documented behavior of
278 ``io.RawIOBase``). ``close()`` will also call ``close()`` on the underlying
279 stream if such a method exists.
280
281 Typically usage is as follows::
282
283 cctx = zstd.ZstdCompressor(level=10)
284 compressor = cctx.stream_writer(fh)
285
286 compressor.write(b'chunk 0\n')
287 compressor.write(b'chunk 1\n')
288 compressor.flush()
289 # Receiver will be able to decode ``chunk 0\nchunk 1\n`` at this point.
290 # Receiver is also expecting more data in the zstd *frame*.
291
292 compressor.write(b'chunk 2\n')
293 compressor.flush(zstd.FLUSH_FRAME)
294 # Receiver will be able to decode ``chunk 0\nchunk 1\nchunk 2``.
295 # Receiver is expecting no more data, as the zstd frame is closed.
296 # Any future calls to ``write()`` at this point will construct a new
297 # zstd frame.
298
299 Instances can be used as context managers. Exiting the context manager is
300 the equivalent of calling ``close()``, which is equivalent to calling
301 ``flush(zstd.FLUSH_FRAME)``::
302
303 cctx = zstd.ZstdCompressor(level=10)
304 with cctx.stream_writer(fh) as compressor:
305 compressor.write(b'chunk 0')
306 compressor.write(b'chunk 1')
307 ...
308
309 .. important::
310
311 If ``flush(FLUSH_FRAME)`` is not called, emitted data doesn't constitute
312 a full zstd *frame* and consumers of this data may complain about malformed
313 input. It is recommended to use instances as a context manager to ensure
314 *frames* are properly finished.
315
316 If the size of the data being fed to this streaming compressor is known,
317 you can declare it before compression begins::
318
319 cctx = zstd.ZstdCompressor()
320 with cctx.stream_writer(fh, size=data_len) as compressor:
321 compressor.write(chunk0)
322 compressor.write(chunk1)
323 ...
324
325 Declaring the size of the source data allows compression parameters to
326 be tuned. And if ``write_content_size`` is used, it also results in the
327 content size being written into the frame header of the output data.
328
329 The size of chunks being ``write()`` to the destination can be specified::
330
331 cctx = zstd.ZstdCompressor()
332 with cctx.stream_writer(fh, write_size=32768) as compressor:
333 ...
334
335 To see how much memory is being used by the streaming compressor::
336
337 cctx = zstd.ZstdCompressor()
338 with cctx.stream_writer(fh) as compressor:
339 ...
340 byte_size = compressor.memory_size()
341
342 Thte total number of bytes written so far are exposed via ``tell()``::
343
344 cctx = zstd.ZstdCompressor()
345 with cctx.stream_writer(fh) as compressor:
346 ...
347 total_written = compressor.tell()
348
349 ``stream_writer()`` accepts a ``write_return_read`` boolean argument to control
350 the return value of ``write()``. When ``False`` (the default), ``write()`` returns
351 the number of bytes that were ``write()``en to the underlying object. When
352 ``True``, ``write()`` returns the number of bytes read from the input that
353 were subsequently written to the compressor. ``True`` is the *proper* behavior
354 for ``write()`` as specified by the ``io.RawIOBase`` interface and will become
355 the default value in a future release.
356
357 Streaming Output API
358 ^^^^^^^^^^^^^^^^^^^^
359
360 ``read_to_iter(reader)`` provides a mechanism to stream data out of a
361 compressor as an iterator of data chunks.::
362
363 cctx = zstd.ZstdCompressor()
364 for chunk in cctx.read_to_iter(fh):
365 # Do something with emitted data.
366
367 ``read_to_iter()`` accepts an object that has a ``read(size)`` method or
368 conforms to the buffer protocol.
369
370 Uncompressed data is fetched from the source either by calling ``read(size)``
371 or by fetching a slice of data from the object directly (in the case where
372 the buffer protocol is being used). The returned iterator consists of chunks
373 of compressed data.
374
375 If reading from the source via ``read()``, ``read()`` will be called until
376 it raises or returns an empty bytes (``b''``). It is perfectly valid for
377 the source to deliver fewer bytes than were what requested by ``read(size)``.
378
379 Like ``stream_writer()``, ``read_to_iter()`` also accepts a ``size`` argument
380 declaring the size of the input stream::
381
382 cctx = zstd.ZstdCompressor()
383 for chunk in cctx.read_to_iter(fh, size=some_int):
384 pass
385
386 You can also control the size that data is ``read()`` from the source and
387 the ideal size of output chunks::
388
389 cctx = zstd.ZstdCompressor()
390 for chunk in cctx.read_to_iter(fh, read_size=16384, write_size=8192):
391 pass
392
393 Unlike ``stream_writer()``, ``read_to_iter()`` does not give direct control
394 over the sizes of chunks fed into the compressor. Instead, chunk sizes will
395 be whatever the object being read from delivers. These will often be of a
396 uniform size.
397
398 Stream Copying API
399 ^^^^^^^^^^^^^^^^^^
400
401 ``copy_stream(ifh, ofh)`` can be used to copy data between 2 streams while
402 compressing it.::
403
404 cctx = zstd.ZstdCompressor()
405 cctx.copy_stream(ifh, ofh)
406
407 For example, say you wish to compress a file::
408
409 cctx = zstd.ZstdCompressor()
410 with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
411 cctx.copy_stream(ifh, ofh)
19 For usage documentation, see https://python-zstandard.readthedocs.org/.
412 20
413 It is also possible to declare the size of the source stream::
414
415 cctx = zstd.ZstdCompressor()
416 cctx.copy_stream(ifh, ofh, size=len_of_input)
417
418 You can also specify how large the chunks that are ``read()`` and ``write()``
419 from and to the streams::
420
421 cctx = zstd.ZstdCompressor()
422 cctx.copy_stream(ifh, ofh, read_size=32768, write_size=16384)
423
424 The stream copier returns a 2-tuple of bytes read and written::
425
426 cctx = zstd.ZstdCompressor()
427 read_count, write_count = cctx.copy_stream(ifh, ofh)
428
429 Compressor API
430 ^^^^^^^^^^^^^^
431
432 ``compressobj()`` returns an object that exposes ``compress(data)`` and
433 ``flush()`` methods. Each returns compressed data or an empty bytes.
434
435 The purpose of ``compressobj()`` is to provide an API-compatible interface
436 with ``zlib.compressobj``, ``bz2.BZ2Compressor``, etc. This allows callers to
437 swap in different compressor objects while using the same API.
438
439 ``flush()`` accepts an optional argument indicating how to end the stream.
440 ``zstd.COMPRESSOBJ_FLUSH_FINISH`` (the default) ends the compression stream.
441 Once this type of flush is performed, ``compress()`` and ``flush()`` can
442 no longer be called. This type of flush **must** be called to end the
443 compression context. If not called, returned data may be incomplete.
444
445 A ``zstd.COMPRESSOBJ_FLUSH_BLOCK`` argument to ``flush()`` will flush a
446 zstd block. Flushes of this type can be performed multiple times. The next
447 call to ``compress()`` will begin a new zstd block.
448
449 Here is how this API should be used::
450
451 cctx = zstd.ZstdCompressor()
452 cobj = cctx.compressobj()
453 data = cobj.compress(b'raw input 0')
454 data = cobj.compress(b'raw input 1')
455 data = cobj.flush()
456
457 Or to flush blocks::
458
459 cctx.zstd.ZstdCompressor()
460 cobj = cctx.compressobj()
461 data = cobj.compress(b'chunk in first block')
462 data = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
463 data = cobj.compress(b'chunk in second block')
464 data = cobj.flush()
465
466 For best performance results, keep input chunks under 256KB. This avoids
467 extra allocations for a large output object.
468
469 It is possible to declare the input size of the data that will be fed into
470 the compressor::
471
472 cctx = zstd.ZstdCompressor()
473 cobj = cctx.compressobj(size=6)
474 data = cobj.compress(b'foobar')
475 data = cobj.flush()
476
477 Chunker API
478 ^^^^^^^^^^^
479
480 ``chunker(size=None, chunk_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE)`` returns
481 an object that can be used to iteratively feed chunks of data into a compressor
482 and produce output chunks of a uniform size.
483
484 The object returned by ``chunker()`` exposes the following methods:
485
486 ``compress(data)``
487 Feeds new input data into the compressor.
488
489 ``flush()``
490 Flushes all data currently in the compressor.
491
492 ``finish()``
493 Signals the end of input data. No new data can be compressed after this
494 method is called.
495
496 ``compress()``, ``flush()``, and ``finish()`` all return an iterator of
497 ``bytes`` instances holding compressed data. The iterator may be empty. Callers
498 MUST iterate through all elements of the returned iterator before performing
499 another operation on the object.
500
501 All chunks emitted by ``compress()`` will have a length of ``chunk_size``.
502
503 ``flush()`` and ``finish()`` may return a final chunk smaller than
504 ``chunk_size``.
505
506 Here is how the API should be used::
507
508 cctx = zstd.ZstdCompressor()
509 chunker = cctx.chunker(chunk_size=32768)
510
511 with open(path, 'rb') as fh:
512 while True:
513 in_chunk = fh.read(32768)
514 if not in_chunk:
515 break
516
517 for out_chunk in chunker.compress(in_chunk):
518 # Do something with output chunk of size 32768.
519
520 for out_chunk in chunker.finish():
521 # Do something with output chunks that finalize the zstd frame.
522
523 The ``chunker()`` API is often a better alternative to ``compressobj()``.
524
525 ``compressobj()`` will emit output data as it is available. This results in a
526 *stream* of output chunks of varying sizes. The consistency of the output chunk
527 size with ``chunker()`` is more appropriate for many usages, such as sending
528 compressed data to a socket.
529
530 ``compressobj()`` may also perform extra memory reallocations in order to
531 dynamically adjust the sizes of the output chunks. Since ``chunker()`` output
532 chunks are all the same size (except for flushed or final chunks), there is
533 less memory allocation overhead.
534
535 Batch Compression API
536 ^^^^^^^^^^^^^^^^^^^^^
537
538 (Experimental. Not yet supported in CFFI bindings.)
539
540 ``multi_compress_to_buffer(data, [threads=0])`` performs compression of multiple
541 inputs as a single operation.
542
543 Data to be compressed can be passed as a ``BufferWithSegmentsCollection``, a
544 ``BufferWithSegments``, or a list containing byte like objects. Each element of
545 the container will be compressed individually using the configured parameters
546 on the ``ZstdCompressor`` instance.
547
548 The ``threads`` argument controls how many threads to use for compression. The
549 default is ``0`` which means to use a single thread. Negative values use the
550 number of logical CPUs in the machine.
551
552 The function returns a ``BufferWithSegmentsCollection``. This type represents
553 N discrete memory allocations, eaching holding 1 or more compressed frames.
554
555 Output data is written to shared memory buffers. This means that unlike
556 regular Python objects, a reference to *any* object within the collection
557 keeps the shared buffer and therefore memory backing it alive. This can have
558 undesirable effects on process memory usage.
559
560 The API and behavior of this function is experimental and will likely change.
561 Known deficiencies include:
562
563 * If asked to use multiple threads, it will always spawn that many threads,
564 even if the input is too small to use them. It should automatically lower
565 the thread count when the extra threads would just add overhead.
566 * The buffer allocation strategy is fixed. There is room to make it dynamic,
567 perhaps even to allow one output buffer per input, facilitating a variation
568 of the API to return a list without the adverse effects of shared memory
569 buffers.
570
571 ZstdDecompressor
572 ----------------
573
574 The ``ZstdDecompressor`` class provides an interface for performing
575 decompression. It is effectively a wrapper around the ``ZSTD_DCtx`` type from
576 the C API.
577
578 Each instance is associated with parameters that control decompression. These
579 come from the following named arguments (all optional):
580
581 dict_data
582 Compression dictionary to use.
583 max_window_size
584 Sets an uppet limit on the window size for decompression operations in
585 kibibytes. This setting can be used to prevent large memory allocations
586 for inputs using large compression windows.
587 format
588 Set the format of data for the decoder. By default, this is
589 ``zstd.FORMAT_ZSTD1``. It can be set to ``zstd.FORMAT_ZSTD1_MAGICLESS`` to
590 allow decoding frames without the 4 byte magic header. Not all decompression
591 APIs support this mode.
592
593 The interface of this class is very similar to ``ZstdCompressor`` (by design).
594
595 Unless specified otherwise, assume that no two methods of ``ZstdDecompressor``
596 instances can be called from multiple Python threads simultaneously. In other
597 words, assume instances are not thread safe unless stated otherwise.
598
599 Utility Methods
600 ^^^^^^^^^^^^^^^
601
602 ``memory_size()`` obtains the size of the underlying zstd decompression context,
603 in bytes.::
604
605 dctx = zstd.ZstdDecompressor()
606 size = dctx.memory_size()
21 .. |ci-test| image:: https://github.com/indygreg/python-zstandard/workflows/.github/workflows/test.yml/badge.svg
22 :target: https://github.com/indygreg/python-zstandard/blob/main/.github/workflows/test.yml
607 23
608 Simple API
609 ^^^^^^^^^^
610
611 ``decompress(data)`` can be used to decompress an entire compressed zstd
612 frame in a single operation.::
613
614 dctx = zstd.ZstdDecompressor()
615 decompressed = dctx.decompress(data)
616
617 By default, ``decompress(data)`` will only work on data written with the content
618 size encoded in its header (this is the default behavior of
619 ``ZstdCompressor().compress()`` but may not be true for streaming compression). If
620 compressed data without an embedded content size is seen, ``zstd.ZstdError`` will
621 be raised.
622
623 If the compressed data doesn't have its content size embedded within it,
624 decompression can be attempted by specifying the ``max_output_size``
625 argument.::
626
627 dctx = zstd.ZstdDecompressor()
628 uncompressed = dctx.decompress(data, max_output_size=1048576)
629
630 Ideally, ``max_output_size`` will be identical to the decompressed output
631 size.
632
633 If ``max_output_size`` is too small to hold the decompressed data,
634 ``zstd.ZstdError`` will be raised.
635
636 If ``max_output_size`` is larger than the decompressed data, the allocated
637 output buffer will be resized to only use the space required.
638
639 Please note that an allocation of the requested ``max_output_size`` will be
640 performed every time the method is called. Setting to a very large value could
641 result in a lot of work for the memory allocator and may result in
642 ``MemoryError`` being raised if the allocation fails.
643
644 .. important::
645
646 If the exact size of decompressed data is unknown (not passed in explicitly
647 and not stored in the zstandard frame), for performance reasons it is
648 encouraged to use a streaming API.
649
650 Stream Reader API
651 ^^^^^^^^^^^^^^^^^
652
653 ``stream_reader(source)`` can be used to obtain an object conforming to the
654 ``io.RawIOBase`` interface for reading decompressed output as a stream::
655
656 with open(path, 'rb') as fh:
657 dctx = zstd.ZstdDecompressor()
658 reader = dctx.stream_reader(fh)
659 while True:
660 chunk = reader.read(16384)
661 if not chunk:
662 break
663
664 # Do something with decompressed chunk.
665
666 The stream can also be used as a context manager::
667
668 with open(path, 'rb') as fh:
669 dctx = zstd.ZstdDecompressor()
670 with dctx.stream_reader(fh) as reader:
671 ...
672
673 When used as a context manager, the stream is closed and the underlying
674 resources are released when the context manager exits. Future operations against
675 the stream will fail.
676
677 The ``source`` argument to ``stream_reader()`` can be any object with a
678 ``read(size)`` method or any object implementing the *buffer protocol*.
679
680 If the ``source`` is a stream, you can specify how large ``read()`` requests
681 to that stream should be via the ``read_size`` argument. It defaults to
682 ``zstandard.DECOMPRESSION_RECOMMENDED_INPUT_SIZE``.::
683
684 with open(path, 'rb') as fh:
685 dctx = zstd.ZstdDecompressor()
686 # Will perform fh.read(8192) when obtaining data for the decompressor.
687 with dctx.stream_reader(fh, read_size=8192) as reader:
688 ...
689
690 The stream returned by ``stream_reader()`` is not writable.
691
692 The stream returned by ``stream_reader()`` is *partially* seekable.
693 Absolute and relative positions (``SEEK_SET`` and ``SEEK_CUR``) forward
694 of the current position are allowed. Offsets behind the current read
695 position and offsets relative to the end of stream are not allowed and
696 will raise ``ValueError`` if attempted.
697
698 ``tell()`` returns the number of decompressed bytes read so far.
699
700 Not all I/O methods are implemented. Notably missing is support for
701 ``readline()``, ``readlines()``, and linewise iteration support. This is
702 because streams operate on binary data - not text data. If you want to
703 convert decompressed output to text, you can chain an ``io.TextIOWrapper``
704 to the stream::
705
706 with open(path, 'rb') as fh:
707 dctx = zstd.ZstdDecompressor()
708 stream_reader = dctx.stream_reader(fh)
709 text_stream = io.TextIOWrapper(stream_reader, encoding='utf-8')
710
711 for line in text_stream:
712 ...
713
714 The ``read_across_frames`` argument to ``stream_reader()`` controls the
715 behavior of read operations when the end of a zstd *frame* is encountered.
716 When ``False`` (the default), a read will complete when the end of a
717 zstd *frame* is encountered. When ``True``, a read can potentially
718 return data spanning multiple zstd *frames*.
719
720 Streaming Input API
721 ^^^^^^^^^^^^^^^^^^^
722
723 ``stream_writer(fh)`` allows you to *stream* data into a decompressor.
724
725 Returned instances implement the ``io.RawIOBase`` interface. Only methods
726 that involve writing will do useful things.
727
728 The argument to ``stream_writer()`` is typically an object that also implements
729 ``io.RawIOBase``. But any object with a ``write(data)`` method will work. Many
730 common Python types conform to this interface, including open file handles
731 and ``io.BytesIO``.
732
733 Behavior is similar to ``ZstdCompressor.stream_writer()``: compressed data
734 is sent to the decompressor by calling ``write(data)`` and decompressed
735 output is written to the underlying stream by calling its ``write(data)``
736 method.::
737
738 dctx = zstd.ZstdDecompressor()
739 decompressor = dctx.stream_writer(fh)
740
741 decompressor.write(compressed_data)
742 ...
743
744
745 Calls to ``write()`` will return the number of bytes written to the output
746 object. Not all inputs will result in bytes being written, so return values
747 of ``0`` are possible.
748
749 Like the ``stream_writer()`` compressor, instances can be used as context
750 managers. However, context managers add no extra special behavior and offer
751 little to no benefit to being used.
752
753 Calling ``close()`` will mark the stream as closed and subsequent I/O operations
754 will raise ``ValueError`` (per the documented behavior of ``io.RawIOBase``).
755 ``close()`` will also call ``close()`` on the underlying stream if such a
756 method exists.
757
758 The size of chunks being ``write()`` to the destination can be specified::
759
760 dctx = zstd.ZstdDecompressor()
761 with dctx.stream_writer(fh, write_size=16384) as decompressor:
762 pass
763
764 You can see how much memory is being used by the decompressor::
765
766 dctx = zstd.ZstdDecompressor()
767 with dctx.stream_writer(fh) as decompressor:
768 byte_size = decompressor.memory_size()
769
770 ``stream_writer()`` accepts a ``write_return_read`` boolean argument to control
771 the return value of ``write()``. When ``False`` (the default)``, ``write()``
772 returns the number of bytes that were ``write()``en to the underlying stream.
773 When ``True``, ``write()`` returns the number of bytes read from the input.
774 ``True`` is the *proper* behavior for ``write()`` as specified by the
775 ``io.RawIOBase`` interface and will become the default in a future release.
776
777 Streaming Output API
778 ^^^^^^^^^^^^^^^^^^^^
779
780 ``read_to_iter(fh)`` provides a mechanism to stream decompressed data out of a
781 compressed source as an iterator of data chunks.::
782
783 dctx = zstd.ZstdDecompressor()
784 for chunk in dctx.read_to_iter(fh):
785 # Do something with original data.
786
787 ``read_to_iter()`` accepts an object with a ``read(size)`` method that will
788 return compressed bytes or an object conforming to the buffer protocol that
789 can expose its data as a contiguous range of bytes.
790
791 ``read_to_iter()`` returns an iterator whose elements are chunks of the
792 decompressed data.
793
794 The size of requested ``read()`` from the source can be specified::
795
796 dctx = zstd.ZstdDecompressor()
797 for chunk in dctx.read_to_iter(fh, read_size=16384):
798 pass
799
800 It is also possible to skip leading bytes in the input data::
801
802 dctx = zstd.ZstdDecompressor()
803 for chunk in dctx.read_to_iter(fh, skip_bytes=1):
804 pass
805
806 .. tip::
24 .. |ci-wheel| image:: https://github.com/indygreg/python-zstandard/workflows/.github/workflows/wheel.yml/badge.svg
25 :target: https://github.com/indygreg/python-zstandard/blob/main/.github/workflows/wheel.yml
807 26
808 Skipping leading bytes is useful if the source data contains extra
809 *header* data. Traditionally, you would need to create a slice or
810 ``memoryview`` of the data you want to decompress. This would create
811 overhead. It is more efficient to pass the offset into this API.
812
813 Similarly to ``ZstdCompressor.read_to_iter()``, the consumer of the iterator
814 controls when data is decompressed. If the iterator isn't consumed,
815 decompression is put on hold.
816
817 When ``read_to_iter()`` is passed an object conforming to the buffer protocol,
818 the behavior may seem similar to what occurs when the simple decompression
819 API is used. However, this API works when the decompressed size is unknown.
820 Furthermore, if feeding large inputs, the decompressor will work in chunks
821 instead of performing a single operation.
822
823 Stream Copying API
824 ^^^^^^^^^^^^^^^^^^
825
826 ``copy_stream(ifh, ofh)`` can be used to copy data across 2 streams while
827 performing decompression.::
828
829 dctx = zstd.ZstdDecompressor()
830 dctx.copy_stream(ifh, ofh)
831
832 e.g. to decompress a file to another file::
833
834 dctx = zstd.ZstdDecompressor()
835 with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
836 dctx.copy_stream(ifh, ofh)
837
838 The size of chunks being ``read()`` and ``write()`` from and to the streams
839 can be specified::
840
841 dctx = zstd.ZstdDecompressor()
842 dctx.copy_stream(ifh, ofh, read_size=8192, write_size=16384)
843
844 Decompressor API
845 ^^^^^^^^^^^^^^^^
846
847 ``decompressobj()`` returns an object that exposes a ``decompress(data)``
848 method. Compressed data chunks are fed into ``decompress(data)`` and
849 uncompressed output (or an empty bytes) is returned. Output from subsequent
850 calls needs to be concatenated to reassemble the full decompressed byte
851 sequence.
852
853 The purpose of ``decompressobj()`` is to provide an API-compatible interface
854 with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor``. This allows callers
855 to swap in different decompressor objects while using the same API.
856
857 Each object is single use: once an input frame is decoded, ``decompress()``
858 can no longer be called.
859
860 Here is how this API should be used::
861
862 dctx = zstd.ZstdDecompressor()
863 dobj = dctx.decompressobj()
864 data = dobj.decompress(compressed_chunk_0)
865 data = dobj.decompress(compressed_chunk_1)
866
867 By default, calls to ``decompress()`` write output data in chunks of size
868 ``DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE``. These chunks are concatenated
869 before being returned to the caller. It is possible to define the size of
870 these temporary chunks by passing ``write_size`` to ``decompressobj()``::
871
872 dctx = zstd.ZstdDecompressor()
873 dobj = dctx.decompressobj(write_size=1048576)
874
875 .. note::
876
877 Because calls to ``decompress()`` may need to perform multiple
878 memory (re)allocations, this streaming decompression API isn't as
879 efficient as other APIs.
880
881 For compatibility with the standard library APIs, instances expose a
882 ``flush([length=None])`` method. This method no-ops and has no meaningful
883 side-effects, making it safe to call any time.
884
885 Batch Decompression API
886 ^^^^^^^^^^^^^^^^^^^^^^^
887
888 (Experimental. Not yet supported in CFFI bindings.)
889
890 ``multi_decompress_to_buffer()`` performs decompression of multiple
891 frames as a single operation and returns a ``BufferWithSegmentsCollection``
892 containing decompressed data for all inputs.
893
894 Compressed frames can be passed to the function as a ``BufferWithSegments``,
895 a ``BufferWithSegmentsCollection``, or as a list containing objects that
896 conform to the buffer protocol. For best performance, pass a
897 ``BufferWithSegmentsCollection`` or a ``BufferWithSegments``, as
898 minimal input validation will be done for that type. If calling from
899 Python (as opposed to C), constructing one of these instances may add
900 overhead cancelling out the performance overhead of validation for list
901 inputs.::
902
903 dctx = zstd.ZstdDecompressor()
904 results = dctx.multi_decompress_to_buffer([b'...', b'...'])
905
906 The decompressed size of each frame MUST be discoverable. It can either be
907 embedded within the zstd frame (``write_content_size=True`` argument to
908 ``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument.
909
910 The ``decompressed_sizes`` argument is an object conforming to the buffer
911 protocol which holds an array of 64-bit unsigned integers in the machine's
912 native format defining the decompressed sizes of each frame. If this argument
913 is passed, it avoids having to scan each frame for its decompressed size.
914 This frame scanning can add noticeable overhead in some scenarios.::
915
916 frames = [...]
917 sizes = struct.pack('=QQQQ', len0, len1, len2, len3)
918
919 dctx = zstd.ZstdDecompressor()
920 results = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes)
921
922 The ``threads`` argument controls the number of threads to use to perform
923 decompression operations. The default (``0``) or the value ``1`` means to
924 use a single thread. Negative values use the number of logical CPUs in the
925 machine.
926
927 .. note::
928
929 It is possible to pass a ``mmap.mmap()`` instance into this function by
930 wrapping it with a ``BufferWithSegments`` instance (which will define the
931 offsets of frames within the memory mapped region).
932
933 This function is logically equivalent to performing ``dctx.decompress()``
934 on each input frame and returning the result.
935
936 This function exists to perform decompression on multiple frames as fast
937 as possible by having as little overhead as possible. Since decompression is
938 performed as a single operation and since the decompressed output is stored in
939 a single buffer, extra memory allocations, Python objects, and Python function
940 calls are avoided. This is ideal for scenarios where callers know up front that
941 they need to access data for multiple frames, such as when *delta chains* are
942 being used.
943
944 Currently, the implementation always spawns multiple threads when requested,
945 even if the amount of work to do is small. In the future, it will be smarter
946 about avoiding threads and their associated overhead when the amount of
947 work to do is small.
948
949 Prefix Dictionary Chain Decompression
950 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
951
952 ``decompress_content_dict_chain(frames)`` performs decompression of a list of
953 zstd frames produced using chained *prefix* dictionary compression. Such
954 a list of frames is produced by compressing discrete inputs where each
955 non-initial input is compressed with a *prefix* dictionary consisting of the
956 content of the previous input.
957
958 For example, say you have the following inputs::
959
960 inputs = [b'input 1', b'input 2', b'input 3']
961
962 The zstd frame chain consists of:
963
964 1. ``b'input 1'`` compressed in standalone/discrete mode
965 2. ``b'input 2'`` compressed using ``b'input 1'`` as a *prefix* dictionary
966 3. ``b'input 3'`` compressed using ``b'input 2'`` as a *prefix* dictionary
967
968 Each zstd frame **must** have the content size written.
969
970 The following Python code can be used to produce a *prefix dictionary chain*::
971
972 def make_chain(inputs):
973 frames = []
974
975 # First frame is compressed in standalone/discrete mode.
976 zctx = zstd.ZstdCompressor()
977 frames.append(zctx.compress(inputs[0]))
978
979 # Subsequent frames use the previous fulltext as a prefix dictionary
980 for i, raw in enumerate(inputs[1:]):
981 dict_data = zstd.ZstdCompressionDict(
982 inputs[i], dict_type=zstd.DICT_TYPE_RAWCONTENT)
983 zctx = zstd.ZstdCompressor(dict_data=dict_data)
984 frames.append(zctx.compress(raw))
985
986 return frames
987
988 ``decompress_content_dict_chain()`` returns the uncompressed data of the last
989 element in the input chain.
990
991
992 .. note::
993
994 It is possible to implement *prefix dictionary chain* decompression
995 on top of other APIs. However, this function will likely be faster -
996 especially for long input chains - as it avoids the overhead of instantiating
997 and passing around intermediate objects between C and Python.
998
999 Multi-Threaded Compression
1000 --------------------------
27 .. |ci-typing| image:: https://github.com/indygreg/python-zstandard/workflows/.github/workflows/typing.yml/badge.svg
28 :target: https://github.com/indygreg/python-zstandard/blob/main/.github/workflows/typing.yml
1001 29
1002 ``ZstdCompressor`` accepts a ``threads`` argument that controls the number
1003 of threads to use for compression. The way this works is that input is split
1004 into segments and each segment is fed into a worker pool for compression. Once
1005 a segment is compressed, it is flushed/appended to the output.
1006
1007 .. note::
1008
1009 These threads are created at the C layer and are not Python threads. So they
1010 work outside the GIL. It is therefore possible to CPU saturate multiple cores
1011 from Python.
1012
1013 The segment size for multi-threaded compression is chosen from the window size
1014 of the compressor. This is derived from the ``window_log`` attribute of a
1015 ``ZstdCompressionParameters`` instance. By default, segment sizes are in the 1+MB
1016 range.
1017
1018 If multi-threaded compression is requested and the input is smaller than the
1019 configured segment size, only a single compression thread will be used. If the
1020 input is smaller than the segment size multiplied by the thread pool size or
1021 if data cannot be delivered to the compressor fast enough, not all requested
1022 compressor threads may be active simultaneously.
1023
1024 Compared to non-multi-threaded compression, multi-threaded compression has
1025 higher per-operation overhead. This includes extra memory operations,
1026 thread creation, lock acquisition, etc.
1027
1028 Due to the nature of multi-threaded compression using *N* compression
1029 *states*, the output from multi-threaded compression will likely be larger
1030 than non-multi-threaded compression. The difference is usually small. But
1031 there is a CPU/wall time versus size trade off that may warrant investigation.
1032
1033 Output from multi-threaded compression does not require any special handling
1034 on the decompression side. To the decompressor, data generated with single
1035 threaded compressor looks the same as data generated by a multi-threaded
1036 compressor and does not require any special handling or additional resource
1037 requirements.
1038
1039 Dictionary Creation and Management
1040 ----------------------------------
1041
1042 Compression dictionaries are represented with the ``ZstdCompressionDict`` type.
1043
1044 Instances can be constructed from bytes::
1045
1046 dict_data = zstd.ZstdCompressionDict(data)
1047
1048 It is possible to construct a dictionary from *any* data. If the data doesn't
1049 begin with a magic header, it will be treated as a *prefix* dictionary.
1050 *Prefix* dictionaries allow compression operations to reference raw data
1051 within the dictionary.
1052
1053 It is possible to force the use of *prefix* dictionaries or to require a
1054 dictionary header:
1055
1056 dict_data = zstd.ZstdCompressionDict(data,
1057 dict_type=zstd.DICT_TYPE_RAWCONTENT)
1058
1059 dict_data = zstd.ZstdCompressionDict(data,
1060 dict_type=zstd.DICT_TYPE_FULLDICT)
1061
1062 You can see how many bytes are in the dictionary by calling ``len()``::
1063
1064 dict_data = zstd.train_dictionary(size, samples)
1065 dict_size = len(dict_data) # will not be larger than ``size``
1066
1067 Once you have a dictionary, you can pass it to the objects performing
1068 compression and decompression::
1069
1070 dict_data = zstd.train_dictionary(131072, samples)
1071
1072 cctx = zstd.ZstdCompressor(dict_data=dict_data)
1073 for source_data in input_data:
1074 compressed = cctx.compress(source_data)
1075 # Do something with compressed data.
1076
1077 dctx = zstd.ZstdDecompressor(dict_data=dict_data)
1078 for compressed_data in input_data:
1079 buffer = io.BytesIO()
1080 with dctx.stream_writer(buffer) as decompressor:
1081 decompressor.write(compressed_data)
1082 # Do something with raw data in ``buffer``.
1083
1084 Dictionaries have unique integer IDs. You can retrieve this ID via::
1085
1086 dict_id = zstd.dictionary_id(dict_data)
1087
1088 You can obtain the raw data in the dict (useful for persisting and constructing
1089 a ``ZstdCompressionDict`` later) via ``as_bytes()``::
1090
1091 dict_data = zstd.train_dictionary(size, samples)
1092 raw_data = dict_data.as_bytes()
1093
1094 By default, when a ``ZstdCompressionDict`` is *attached* to a
1095 ``ZstdCompressor``, each ``ZstdCompressor`` performs work to prepare the
1096 dictionary for use. This is fine if only 1 compression operation is being
1097 performed or if the ``ZstdCompressor`` is being reused for multiple operations.
1098 But if multiple ``ZstdCompressor`` instances are being used with the dictionary,
1099 this can add overhead.
1100
1101 It is possible to *precompute* the dictionary so it can readily be consumed
1102 by multiple ``ZstdCompressor`` instances::
1103
1104 d = zstd.ZstdCompressionDict(data)
1105
1106 # Precompute for compression level 3.
1107 d.precompute_compress(level=3)
1108
1109 # Precompute with specific compression parameters.
1110 params = zstd.ZstdCompressionParameters(...)
1111 d.precompute_compress(compression_params=params)
1112
1113 .. note::
1114
1115 When a dictionary is precomputed, the compression parameters used to
1116 precompute the dictionary overwrite some of the compression parameters
1117 specified to ``ZstdCompressor.__init__``.
1118
1119 Training Dictionaries
1120 ^^^^^^^^^^^^^^^^^^^^^
1121
1122 Unless using *prefix* dictionaries, dictionary data is produced by *training*
1123 on existing data::
1124
1125 dict_data = zstd.train_dictionary(size, samples)
1126
1127 This takes a target dictionary size and list of bytes instances and creates and
1128 returns a ``ZstdCompressionDict``.
1129
1130 The dictionary training mechanism is known as *cover*. More details about it are
1131 available in the paper *Effective Construction of Relative Lempel-Ziv
1132 Dictionaries* (authors: Liao, Petri, Moffat, Wirth).
1133
1134 The cover algorithm takes parameters ``k` and ``d``. These are the
1135 *segment size* and *dmer size*, respectively. The returned dictionary
1136 instance created by this function has ``k`` and ``d`` attributes
1137 containing the values for these parameters. If a ``ZstdCompressionDict``
1138 is constructed from raw bytes data (a content-only dictionary), the
1139 ``k`` and ``d`` attributes will be ``0``.
1140
1141 The segment and dmer size parameters to the cover algorithm can either be
1142 specified manually or ``train_dictionary()`` can try multiple values
1143 and pick the best one, where *best* means the smallest compressed data size.
1144 This later mode is called *optimization* mode.
1145
1146 If none of ``k``, ``d``, ``steps``, ``threads``, ``level``, ``notifications``,
1147 or ``dict_id`` (basically anything from the underlying ``ZDICT_cover_params_t``
1148 struct) are defined, *optimization* mode is used with default parameter
1149 values.
1150
1151 If ``steps`` or ``threads`` are defined, then *optimization* mode is engaged
1152 with explicit control over those parameters. Specifying ``threads=0`` or
1153 ``threads=1`` can be used to engage *optimization* mode if other parameters
1154 are not defined.
1155
1156 Otherwise, non-*optimization* mode is used with the parameters specified.
1157
1158 This function takes the following arguments:
1159
1160 dict_size
1161 Target size in bytes of the dictionary to generate.
1162 samples
1163 A list of bytes holding samples the dictionary will be trained from.
1164 k
1165 Parameter to cover algorithm defining the segment size. A reasonable range
1166 is [16, 2048+].
1167 d
1168 Parameter to cover algorithm defining the dmer size. A reasonable range is
1169 [6, 16]. ``d`` must be less than or equal to ``k``.
1170 dict_id
1171 Integer dictionary ID for the produced dictionary. Default is 0, which uses
1172 a random value.
1173 steps
1174 Number of steps through ``k`` values to perform when trying parameter
1175 variations.
1176 threads
1177 Number of threads to use when trying parameter variations. Default is 0,
1178 which means to use a single thread. A negative value can be specified to
1179 use as many threads as there are detected logical CPUs.
1180 level
1181 Integer target compression level when trying parameter variations.
1182 notifications
1183 Controls writing of informational messages to ``stderr``. ``0`` (the
1184 default) means to write nothing. ``1`` writes errors. ``2`` writes
1185 progression info. ``3`` writes more details. And ``4`` writes all info.
1186
1187 Explicit Compression Parameters
1188 -------------------------------
1189
1190 Zstandard offers a high-level *compression level* that maps to lower-level
1191 compression parameters. For many consumers, this numeric level is the only
1192 compression setting you'll need to touch.
1193
1194 But for advanced use cases, it might be desirable to tweak these lower-level
1195 settings.
1196
1197 The ``ZstdCompressionParameters`` type represents these low-level compression
1198 settings.
1199
1200 Instances of this type can be constructed from a myriad of keyword arguments
1201 (defined below) for complete low-level control over each adjustable
1202 compression setting.
30 .. |ci-sdist| image:: https://github.com/indygreg/python-zstandard/workflows/.github/workflows/sdist.yml/badge.svg
31 :target: https://github.com/indygreg/python-zstandard/blob/main/.github/workflows/sdist.yml
1203 32
1204 From a higher level, one can construct a ``ZstdCompressionParameters`` instance
1205 given a desired compression level and target input and dictionary size
1206 using ``ZstdCompressionParameters.from_level()``. e.g.::
1207
1208 # Derive compression settings for compression level 7.
1209 params = zstd.ZstdCompressionParameters.from_level(7)
1210
1211 # With an input size of 1MB
1212 params = zstd.ZstdCompressionParameters.from_level(7, source_size=1048576)
1213
1214 Using ``from_level()``, it is also possible to override individual compression
1215 parameters or to define additional settings that aren't automatically derived.
1216 e.g.::
1217
1218 params = zstd.ZstdCompressionParameters.from_level(4, window_log=10)
1219 params = zstd.ZstdCompressionParameters.from_level(5, threads=4)
1220
1221 Or you can define low-level compression settings directly::
1222
1223 params = zstd.ZstdCompressionParameters(window_log=12, enable_ldm=True)
1224
1225 Once a ``ZstdCompressionParameters`` instance is obtained, it can be used to
1226 configure a compressor::
1227
1228 cctx = zstd.ZstdCompressor(compression_params=params)
1229
1230 The named arguments and attributes of ``ZstdCompressionParameters`` are as
1231 follows:
1232
1233 * format
1234 * compression_level
1235 * window_log
1236 * hash_log
1237 * chain_log
1238 * search_log
1239 * min_match
1240 * target_length
1241 * strategy
1242 * compression_strategy (deprecated: same as ``strategy``)
1243 * write_content_size
1244 * write_checksum
1245 * write_dict_id
1246 * job_size
1247 * overlap_log
1248 * overlap_size_log (deprecated: same as ``overlap_log``)
1249 * force_max_window
1250 * enable_ldm
1251 * ldm_hash_log
1252 * ldm_min_match
1253 * ldm_bucket_size_log
1254 * ldm_hash_rate_log
1255 * ldm_hash_every_log (deprecated: same as ``ldm_hash_rate_log``)
1256 * threads
1257
1258 Some of these are very low-level settings. It may help to consult the official
1259 zstandard documentation for their behavior. Look for the ``ZSTD_p_*`` constants
1260 in ``zstd.h`` (https://github.com/facebook/zstd/blob/dev/lib/zstd.h).
1261
1262 Frame Inspection
1263 ----------------
1264
1265 Data emitted from zstd compression is encapsulated in a *frame*. This frame
1266 begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing
1267 the frame in more detail. For more info, see
1268 https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md.
1269
1270 ``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes
1271 instance and return a ``FrameParameters`` object describing the frame.
1272
1273 Depending on which fields are present in the frame and their values, the
1274 length of the frame parameters varies. If insufficient bytes are passed
1275 in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure
1276 frame parameters can be parsed, pass in at least 18 bytes.
1277
1278 ``FrameParameters`` instances have the following attributes:
1279
1280 content_size
1281 Integer size of original, uncompressed content. This will be ``0`` if the
1282 original content size isn't written to the frame (controlled with the
1283 ``write_content_size`` argument to ``ZstdCompressor``) or if the input
1284 content size was ``0``.
1285
1286 window_size
1287 Integer size of maximum back-reference distance in compressed data.
1288
1289 dict_id
1290 Integer of dictionary ID used for compression. ``0`` if no dictionary
1291 ID was used or if the dictionary ID was ``0``.
1292
1293 has_checksum
1294 Bool indicating whether a 4 byte content checksum is stored at the end
1295 of the frame.
1296
1297 ``zstd.frame_header_size(data)`` returns the size of the zstandard frame
1298 header.
1299
1300 ``zstd.frame_content_size(data)`` returns the content size as parsed from
1301 the frame header. ``-1`` means the content size is unknown. ``0`` means
1302 an empty frame. The content size is usually correct. However, it may not
1303 be accurate.
1304
1305 Misc Functionality
1306 ------------------
1307
1308 estimate_decompression_context_size()
1309 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1310
1311 Estimate the memory size requirements for a decompressor instance.
1312
1313 Constants
1314 ---------
1315
1316 The following module constants/attributes are exposed:
1317
1318 ZSTD_VERSION
1319 This module attribute exposes a 3-tuple of the Zstandard version. e.g.
1320 ``(1, 0, 0)``
1321 MAX_COMPRESSION_LEVEL
1322 Integer max compression level accepted by compression functions
1323 COMPRESSION_RECOMMENDED_INPUT_SIZE
1324 Recommended chunk size to feed to compressor functions
1325 COMPRESSION_RECOMMENDED_OUTPUT_SIZE
1326 Recommended chunk size for compression output
1327 DECOMPRESSION_RECOMMENDED_INPUT_SIZE
1328 Recommended chunk size to feed into decompresor functions
1329 DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE
1330 Recommended chunk size for decompression output
1331
1332 FRAME_HEADER
1333 bytes containing header of the Zstandard frame
1334 MAGIC_NUMBER
1335 Frame header as an integer
1336
1337 FLUSH_BLOCK
1338 Flushing behavior that denotes to flush a zstd block. A decompressor will
1339 be able to decode all data fed into the compressor so far.
1340 FLUSH_FRAME
1341 Flushing behavior that denotes to end a zstd frame. Any new data fed
1342 to the compressor will start a new frame.
1343
1344 CONTENTSIZE_UNKNOWN
1345 Value for content size when the content size is unknown.
1346 CONTENTSIZE_ERROR
1347 Value for content size when content size couldn't be determined.
1348
1349 WINDOWLOG_MIN
1350 Minimum value for compression parameter
1351 WINDOWLOG_MAX
1352 Maximum value for compression parameter
1353 CHAINLOG_MIN
1354 Minimum value for compression parameter
1355 CHAINLOG_MAX
1356 Maximum value for compression parameter
1357 HASHLOG_MIN
1358 Minimum value for compression parameter
1359 HASHLOG_MAX
1360 Maximum value for compression parameter
1361 SEARCHLOG_MIN
1362 Minimum value for compression parameter
1363 SEARCHLOG_MAX
1364 Maximum value for compression parameter
1365 MINMATCH_MIN
1366 Minimum value for compression parameter
1367 MINMATCH_MAX
1368 Maximum value for compression parameter
1369 SEARCHLENGTH_MIN
1370 Minimum value for compression parameter
1371
1372 Deprecated: use ``MINMATCH_MIN``
1373 SEARCHLENGTH_MAX
1374 Maximum value for compression parameter
1375
1376 Deprecated: use ``MINMATCH_MAX``
1377 TARGETLENGTH_MIN
1378 Minimum value for compression parameter
1379 STRATEGY_FAST
1380 Compression strategy
1381 STRATEGY_DFAST
1382 Compression strategy
1383 STRATEGY_GREEDY
1384 Compression strategy
1385 STRATEGY_LAZY
1386 Compression strategy
1387 STRATEGY_LAZY2
1388 Compression strategy
1389 STRATEGY_BTLAZY2
1390 Compression strategy
1391 STRATEGY_BTOPT
1392 Compression strategy
1393 STRATEGY_BTULTRA
1394 Compression strategy
1395 STRATEGY_BTULTRA2
1396 Compression strategy
1397
1398 FORMAT_ZSTD1
1399 Zstandard frame format
1400 FORMAT_ZSTD1_MAGICLESS
1401 Zstandard frame format without magic header
33 .. |ci-anaconda| image:: https://github.com/indygreg/python-zstandard/workflows/.github/workflows/anaconda.yml/badge.svg
34 :target: https://github.com/indygreg/python-zstandard/blob/main/.github/workflows/anaconda.yml
1402 35
1403 Performance Considerations
1404 --------------------------
1405
1406 The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a
1407 persistent compression or decompression *context*. Reusing a ``ZstdCompressor``
1408 or ``ZstdDecompressor`` instance for multiple operations is faster than
1409 instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each
1410 operation. The differences are magnified as the size of data decreases. For
1411 example, the difference between *context* reuse and non-reuse for 100,000
1412 100 byte inputs will be significant (possiby over 10x faster to reuse contexts)
1413 whereas 10 100,000,000 byte inputs will be more similar in speed (because the
1414 time spent doing compression dwarfs time spent creating new *contexts*).
1415
1416 Buffer Types
1417 ------------
1418
1419 The API exposes a handful of custom types for interfacing with memory buffers.
1420 The primary goal of these types is to facilitate efficient multi-object
1421 operations.
1422
1423 The essential idea is to have a single memory allocation provide backing
1424 storage for multiple logical objects. This has 2 main advantages: fewer
1425 allocations and optimal memory access patterns. This avoids having to allocate
1426 a Python object for each logical object and furthermore ensures that access of
1427 data for objects can be sequential (read: fast) in memory.
1428
1429 BufferWithSegments
1430 ^^^^^^^^^^^^^^^^^^
1431
1432 The ``BufferWithSegments`` type represents a memory buffer containing N
1433 discrete items of known lengths (segments). It is essentially a fixed size
1434 memory address and an array of 2-tuples of ``(offset, length)`` 64-bit
1435 unsigned native endian integers defining the byte offset and length of each
1436 segment within the buffer.
1437
1438 Instances behave like containers.
1439
1440 ``len()`` returns the number of segments within the instance.
1441
1442 ``o[index]`` or ``__getitem__`` obtains a ``BufferSegment`` representing an
1443 individual segment within the backing buffer. That returned object references
1444 (not copies) memory. This means that iterating all objects doesn't copy
1445 data within the buffer.
1446
1447 The ``.size`` attribute contains the total size in bytes of the backing
1448 buffer.
1449
1450 Instances conform to the buffer protocol. So a reference to the backing bytes
1451 can be obtained via ``memoryview(o)``. A *copy* of the backing bytes can also
1452 be obtained via ``.tobytes()``.
1453
1454 The ``.segments`` attribute exposes the array of ``(offset, length)`` for
1455 segments within the buffer. It is a ``BufferSegments`` type.
1456
1457 BufferSegment
1458 ^^^^^^^^^^^^^
1459
1460 The ``BufferSegment`` type represents a segment within a ``BufferWithSegments``.
1461 It is essentially a reference to N bytes within a ``BufferWithSegments``.
1462
1463 ``len()`` returns the length of the segment in bytes.
1464
1465 ``.offset`` contains the byte offset of this segment within its parent
1466 ``BufferWithSegments`` instance.
1467
1468 The object conforms to the buffer protocol. ``.tobytes()`` can be called to
1469 obtain a ``bytes`` instance with a copy of the backing bytes.
1470
1471 BufferSegments
1472 ^^^^^^^^^^^^^^
1473
1474 This type represents an array of ``(offset, length)`` integers defining segments
1475 within a ``BufferWithSegments``.
1476
1477 The array members are 64-bit unsigned integers using host/native bit order.
1478
1479 Instances conform to the buffer protocol.
1480
1481 BufferWithSegmentsCollection
1482 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1483
1484 The ``BufferWithSegmentsCollection`` type represents a virtual spanning view
1485 of multiple ``BufferWithSegments`` instances.
1486
1487 Instances are constructed from 1 or more ``BufferWithSegments`` instances. The
1488 resulting object behaves like an ordered sequence whose members are the
1489 segments within each ``BufferWithSegments``.
1490
1491 ``len()`` returns the number of segments within all ``BufferWithSegments``
1492 instances.
1493
1494 ``o[index]`` and ``__getitem__(index)`` return the ``BufferSegment`` at
1495 that offset as if all ``BufferWithSegments`` instances were a single
1496 entity.
1497
1498 If the object is composed of 2 ``BufferWithSegments`` instances with the
1499 first having 2 segments and the second have 3 segments, then ``b[0]``
1500 and ``b[1]`` access segments in the first object and ``b[2]``, ``b[3]``,
1501 and ``b[4]`` access segments from the second.
1502
1503 Choosing an API
1504 ===============
1505
1506 There are multiple APIs for performing compression and decompression. This is
1507 because different applications have different needs and the library wants to
1508 facilitate optimal use in as many use cases as possible.
1509
1510 From a high-level, APIs are divided into *one-shot* and *streaming*: either you
1511 are operating on all data at once or you operate on it piecemeal.
1512
1513 The *one-shot* APIs are useful for small data, where the input or output
1514 size is known. (The size can come from a buffer length, file size, or
1515 stored in the zstd frame header.) A limitation of the *one-shot* APIs is that
1516 input and output must fit in memory simultaneously. For say a 4 GB input,
1517 this is often not feasible.
1518
1519 The *one-shot* APIs also perform all work as a single operation. So, if you
1520 feed it large input, it could take a long time for the function to return.
1521
1522 The streaming APIs do not have the limitations of the simple API. But the
1523 price you pay for this flexibility is that they are more complex than a
1524 single function call.
1525
1526 The streaming APIs put the caller in control of compression and decompression
1527 behavior by allowing them to directly control either the input or output side
1528 of the operation.
1529
1530 With the *streaming input*, *compressor*, and *decompressor* APIs, the caller
1531 has full control over the input to the compression or decompression stream.
1532 They can directly choose when new data is operated on.
1533
1534 With the *streaming ouput* APIs, the caller has full control over the output
1535 of the compression or decompression stream. It can choose when to receive
1536 new data.
1537
1538 When using the *streaming* APIs that operate on file-like or stream objects,
1539 it is important to consider what happens in that object when I/O is requested.
1540 There is potential for long pauses as data is read or written from the
1541 underlying stream (say from interacting with a filesystem or network). This
1542 could add considerable overhead.
1543
1544 Thread Safety
1545 =============
1546
1547 ``ZstdCompressor`` and ``ZstdDecompressor`` instances have no guarantees
1548 about thread safety. Do not operate on the same ``ZstdCompressor`` and
1549 ``ZstdDecompressor`` instance simultaneously from different threads. It is
1550 fine to have different threads call into a single instance, just not at the
1551 same time.
1552
1553 Some operations require multiple function calls to complete. e.g. streaming
1554 operations. A single ``ZstdCompressor`` or ``ZstdDecompressor`` cannot be used
1555 for simultaneously active operations. e.g. you must not start a streaming
1556 operation when another streaming operation is already active.
1557
1558 The C extension releases the GIL during non-trivial calls into the zstd C
1559 API. Non-trivial calls are notably compression and decompression. Trivial
1560 calls are things like parsing frame parameters. Where the GIL is released
1561 is considered an implementation detail and can change in any release.
1562
1563 APIs that accept bytes-like objects don't enforce that the underlying object
1564 is read-only. However, it is assumed that the passed object is read-only for
1565 the duration of the function call. It is possible to pass a mutable object
1566 (like a ``bytearray``) to e.g. ``ZstdCompressor.compress()``, have the GIL
1567 released, and mutate the object from another thread. Such a race condition
1568 is a bug in the consumer of python-zstandard. Most Python data types are
1569 immutable, so unless you are doing something fancy, you don't need to
1570 worry about this.
1571
1572 Note on Zstandard's *Experimental* API
1573 ======================================
1574
1575 Many of the Zstandard APIs used by this module are marked as *experimental*
1576 within the Zstandard project.
1577
1578 It is unclear how Zstandard's C API will evolve over time, especially with
1579 regards to this *experimental* functionality. We will try to maintain
1580 backwards compatibility at the Python API level. However, we cannot
1581 guarantee this for things not under our control.
1582
1583 Since a copy of the Zstandard source code is distributed with this
1584 module and since we compile against it, the behavior of a specific
1585 version of this module should be constant for all of time. So if you
1586 pin the version of this module used in your projects (which is a Python
1587 best practice), you should be shielded from unwanted future changes.
1588
1589 Donate
1590 ======
1591
1592 A lot of time has been invested into this project by the author.
1593
1594 If you find this project useful and would like to thank the author for
1595 their work, consider donating some money. Any amount is appreciated.
1596
1597 .. image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif
1598 :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard&currency_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted
1599 :alt: Donate via PayPal
1600
1601 .. |ci-status| image:: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master
1602 :target: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master
36 .. |ci-sphinx| image:: https://github.com/indygreg/python-zstandard/workflows/.github/workflows/sphinx.yml/badge.svg
37 :target: https://github.com/indygreg/python-zstandard/blob/main/.github/workflows/sphinx.yml
This diff has been collapsed as it changes many lines, (1142 lines changed) Show them Hide them
@@ -1,128 +1,101
1 1 /**
2 * Copyright (c) 2017-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
2 * Copyright (c) 2017-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 extern PyObject* ZstdError;
12
13 PyDoc_STRVAR(BufferWithSegments__doc__,
14 "BufferWithSegments - A memory buffer holding known sub-segments.\n"
15 "\n"
16 "This type represents a contiguous chunk of memory containing N discrete\n"
17 "items within sub-segments of that memory.\n"
18 "\n"
19 "Segments within the buffer are stored as an array of\n"
20 "``(offset, length)`` pairs, where each element is an unsigned 64-bit\n"
21 "integer using the host/native bit order representation.\n"
22 "\n"
23 "The type exists to facilitate operations against N>1 items without the\n"
24 "overhead of Python object creation and management.\n"
25 );
11 extern PyObject *ZstdError;
26 12
27 static void BufferWithSegments_dealloc(ZstdBufferWithSegments* self) {
28 /* Backing memory is either canonically owned by a Py_buffer or by us. */
29 if (self->parent.buf) {
30 PyBuffer_Release(&self->parent);
31 }
32 else if (self->useFree) {
33 free(self->data);
34 }
35 else {
36 PyMem_Free(self->data);
37 }
13 static void BufferWithSegments_dealloc(ZstdBufferWithSegments *self) {
14 /* Backing memory is either canonically owned by a Py_buffer or by us. */
15 if (self->parent.buf) {
16 PyBuffer_Release(&self->parent);
17 }
18 else if (self->useFree) {
19 free(self->data);
20 }
21 else {
22 PyMem_Free(self->data);
23 }
38 24
39 self->data = NULL;
25 self->data = NULL;
40 26
41 if (self->useFree) {
42 free(self->segments);
43 }
44 else {
45 PyMem_Free(self->segments);
46 }
27 if (self->useFree) {
28 free(self->segments);
29 }
30 else {
31 PyMem_Free(self->segments);
32 }
47 33
48 self->segments = NULL;
34 self->segments = NULL;
49 35
50 PyObject_Del(self);
36 PyObject_Del(self);
51 37 }
52 38
53 static int BufferWithSegments_init(ZstdBufferWithSegments* self, PyObject* args, PyObject* kwargs) {
54 static char* kwlist[] = {
55 "data",
56 "segments",
57 NULL
58 };
39 static int BufferWithSegments_init(ZstdBufferWithSegments *self, PyObject *args,
40 PyObject *kwargs) {
41 static char *kwlist[] = {"data", "segments", NULL};
59 42
60 Py_buffer segments;
61 Py_ssize_t segmentCount;
62 Py_ssize_t i;
43 Py_buffer segments;
44 Py_ssize_t segmentCount;
45 Py_ssize_t i;
63 46
64 memset(&self->parent, 0, sizeof(self->parent));
47 memset(&self->parent, 0, sizeof(self->parent));
65 48
66 #if PY_MAJOR_VERSION >= 3
67 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*y*:BufferWithSegments",
68 #else
69 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*s*:BufferWithSegments",
70 #endif
71 kwlist, &self->parent, &segments)) {
72 return -1;
73 }
49 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*y*:BufferWithSegments",
50 kwlist, &self->parent, &segments)) {
51 return -1;
52 }
74 53
75 if (!PyBuffer_IsContiguous(&self->parent, 'C') || self->parent.ndim > 1) {
76 PyErr_SetString(PyExc_ValueError, "data buffer should be contiguous and have a single dimension");
77 goto except;
78 }
54 if (segments.len % sizeof(BufferSegment)) {
55 PyErr_Format(PyExc_ValueError,
56 "segments array size is not a multiple of %zu",
57 sizeof(BufferSegment));
58 goto except;
59 }
79 60
80 if (!PyBuffer_IsContiguous(&segments, 'C') || segments.ndim > 1) {
81 PyErr_SetString(PyExc_ValueError, "segments buffer should be contiguous and have a single dimension");
82 goto except;
83 }
61 segmentCount = segments.len / sizeof(BufferSegment);
84 62
85 if (segments.len % sizeof(BufferSegment)) {
86 PyErr_Format(PyExc_ValueError, "segments array size is not a multiple of %zu",
87 sizeof(BufferSegment));
88 goto except;
89 }
63 /* Validate segments data, as blindly trusting it could lead to arbitrary
64 memory access. */
65 for (i = 0; i < segmentCount; i++) {
66 BufferSegment *segment = &((BufferSegment *)(segments.buf))[i];
90 67
91 segmentCount = segments.len / sizeof(BufferSegment);
92
93 /* Validate segments data, as blindly trusting it could lead to arbitrary
94 memory access. */
95 for (i = 0; i < segmentCount; i++) {
96 BufferSegment* segment = &((BufferSegment*)(segments.buf))[i];
68 if (segment->offset + segment->length >
69 (unsigned long long)self->parent.len) {
70 PyErr_SetString(PyExc_ValueError,
71 "offset within segments array references memory "
72 "outside buffer");
73 goto except;
74 return -1;
75 }
76 }
97 77
98 if (segment->offset + segment->length > (unsigned long long)self->parent.len) {
99 PyErr_SetString(PyExc_ValueError, "offset within segments array references memory outside buffer");
100 goto except;
101 return -1;
102 }
103 }
78 /* Make a copy of the segments data. It is cheap to do so and is a guard
79 against caller changing offsets, which has security implications. */
80 self->segments = PyMem_Malloc(segments.len);
81 if (!self->segments) {
82 PyErr_NoMemory();
83 goto except;
84 }
104 85
105 /* Make a copy of the segments data. It is cheap to do so and is a guard
106 against caller changing offsets, which has security implications. */
107 self->segments = PyMem_Malloc(segments.len);
108 if (!self->segments) {
109 PyErr_NoMemory();
110 goto except;
111 }
86 memcpy(self->segments, segments.buf, segments.len);
87 PyBuffer_Release(&segments);
112 88
113 memcpy(self->segments, segments.buf, segments.len);
114 PyBuffer_Release(&segments);
89 self->data = self->parent.buf;
90 self->dataSize = self->parent.len;
91 self->segmentCount = segmentCount;
115 92
116 self->data = self->parent.buf;
117 self->dataSize = self->parent.len;
118 self->segmentCount = segmentCount;
119
120 return 0;
93 return 0;
121 94
122 95 except:
123 PyBuffer_Release(&self->parent);
124 PyBuffer_Release(&segments);
125 return -1;
96 PyBuffer_Release(&self->parent);
97 PyBuffer_Release(&segments);
98 return -1;
126 99 }
127 100
128 101 /**
@@ -131,662 +104,475 except:
131 104 * Ownership of the backing memory and BufferSegments will be transferred to
132 105 * the created object and freed when the BufferWithSegments is destroyed.
133 106 */
134 ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize,
135 BufferSegment* segments, Py_ssize_t segmentsSize) {
136 ZstdBufferWithSegments* result = NULL;
137 Py_ssize_t i;
107 ZstdBufferWithSegments *
108 BufferWithSegments_FromMemory(void *data, unsigned long long dataSize,
109 BufferSegment *segments,
110 Py_ssize_t segmentsSize) {
111 ZstdBufferWithSegments *result = NULL;
112 Py_ssize_t i;
138 113
139 if (NULL == data) {
140 PyErr_SetString(PyExc_ValueError, "data is NULL");
141 return NULL;
142 }
114 if (NULL == data) {
115 PyErr_SetString(PyExc_ValueError, "data is NULL");
116 return NULL;
117 }
143 118
144 if (NULL == segments) {
145 PyErr_SetString(PyExc_ValueError, "segments is NULL");
146 return NULL;
147 }
119 if (NULL == segments) {
120 PyErr_SetString(PyExc_ValueError, "segments is NULL");
121 return NULL;
122 }
148 123
149 for (i = 0; i < segmentsSize; i++) {
150 BufferSegment* segment = &segments[i];
124 for (i = 0; i < segmentsSize; i++) {
125 BufferSegment *segment = &segments[i];
151 126
152 if (segment->offset + segment->length > dataSize) {
153 PyErr_SetString(PyExc_ValueError, "offset in segments overflows buffer size");
154 return NULL;
155 }
156 }
127 if (segment->offset + segment->length > dataSize) {
128 PyErr_SetString(PyExc_ValueError,
129 "offset in segments overflows buffer size");
130 return NULL;
131 }
132 }
157 133
158 result = PyObject_New(ZstdBufferWithSegments, &ZstdBufferWithSegmentsType);
159 if (NULL == result) {
160 return NULL;
161 }
134 result = PyObject_New(ZstdBufferWithSegments, ZstdBufferWithSegmentsType);
135 if (NULL == result) {
136 return NULL;
137 }
162 138
163 result->useFree = 0;
139 result->useFree = 0;
164 140
165 memset(&result->parent, 0, sizeof(result->parent));
166 result->data = data;
167 result->dataSize = dataSize;
168 result->segments = segments;
169 result->segmentCount = segmentsSize;
141 memset(&result->parent, 0, sizeof(result->parent));
142 result->data = data;
143 result->dataSize = dataSize;
144 result->segments = segments;
145 result->segmentCount = segmentsSize;
170 146
171 return result;
147 return result;
172 148 }
173 149
174 static Py_ssize_t BufferWithSegments_length(ZstdBufferWithSegments* self) {
175 return self->segmentCount;
150 static Py_ssize_t BufferWithSegments_length(ZstdBufferWithSegments *self) {
151 return self->segmentCount;
176 152 }
177 153
178 static ZstdBufferSegment* BufferWithSegments_item(ZstdBufferWithSegments* self, Py_ssize_t i) {
179 ZstdBufferSegment* result = NULL;
154 static ZstdBufferSegment *BufferWithSegments_item(ZstdBufferWithSegments *self,
155 Py_ssize_t i) {
156 ZstdBufferSegment *result = NULL;
157
158 if (i < 0) {
159 PyErr_SetString(PyExc_IndexError, "offset must be non-negative");
160 return NULL;
161 }
180 162
181 if (i < 0) {
182 PyErr_SetString(PyExc_IndexError, "offset must be non-negative");
183 return NULL;
184 }
163 if (i >= self->segmentCount) {
164 PyErr_Format(PyExc_IndexError, "offset must be less than %zd",
165 self->segmentCount);
166 return NULL;
167 }
185 168
186 if (i >= self->segmentCount) {
187 PyErr_Format(PyExc_IndexError, "offset must be less than %zd", self->segmentCount);
188 return NULL;
189 }
169 if (self->segments[i].length > PY_SSIZE_T_MAX) {
170 PyErr_Format(PyExc_ValueError,
171 "item at offset %zd is too large for this platform", i);
172 return NULL;
173 }
190 174
191 if (self->segments[i].length > PY_SSIZE_T_MAX) {
192 PyErr_Format(PyExc_ValueError,
193 "item at offset %zd is too large for this platform", i);
194 return NULL;
195 }
175 result = (ZstdBufferSegment *)PyObject_CallObject(
176 (PyObject *)ZstdBufferSegmentType, NULL);
177 if (NULL == result) {
178 return NULL;
179 }
180
181 result->parent = (PyObject *)self;
182 Py_INCREF(self);
183
184 result->data = (char *)self->data + self->segments[i].offset;
185 result->dataSize = (Py_ssize_t)self->segments[i].length;
186 result->offset = self->segments[i].offset;
196 187
197 result = (ZstdBufferSegment*)PyObject_CallObject((PyObject*)&ZstdBufferSegmentType, NULL);
198 if (NULL == result) {
199 return NULL;
200 }
188 return result;
189 }
201 190
202 result->parent = (PyObject*)self;
203 Py_INCREF(self);
191 static int BufferWithSegments_getbuffer(ZstdBufferWithSegments *self,
192 Py_buffer *view, int flags) {
193 if (self->dataSize > PY_SSIZE_T_MAX) {
194 view->obj = NULL;
195 PyErr_SetString(PyExc_BufferError,
196 "buffer is too large for this platform");
197 return -1;
198 }
204 199
205 result->data = (char*)self->data + self->segments[i].offset;
206 result->dataSize = (Py_ssize_t)self->segments[i].length;
207 result->offset = self->segments[i].offset;
208
209 return result;
200 return PyBuffer_FillInfo(view, (PyObject *)self, self->data,
201 (Py_ssize_t)self->dataSize, 1, flags);
210 202 }
211 203
212 #if PY_MAJOR_VERSION >= 3
213 static int BufferWithSegments_getbuffer(ZstdBufferWithSegments* self, Py_buffer* view, int flags) {
214 if (self->dataSize > PY_SSIZE_T_MAX) {
215 view->obj = NULL;
216 PyErr_SetString(PyExc_BufferError, "buffer is too large for this platform");
217 return -1;
218 }
204 static PyObject *BufferWithSegments_tobytes(ZstdBufferWithSegments *self) {
205 if (self->dataSize > PY_SSIZE_T_MAX) {
206 PyErr_SetString(PyExc_ValueError,
207 "buffer is too large for this platform");
208 return NULL;
209 }
219 210
220 return PyBuffer_FillInfo(view, (PyObject*)self, self->data, (Py_ssize_t)self->dataSize, 1, flags);
221 }
222 #else
223 static Py_ssize_t BufferWithSegments_getreadbuffer(ZstdBufferWithSegments* self, Py_ssize_t segment, void **ptrptr) {
224 if (segment != 0) {
225 PyErr_SetString(PyExc_ValueError, "segment number must be 0");
226 return -1;
227 }
228
229 if (self->dataSize > PY_SSIZE_T_MAX) {
230 PyErr_SetString(PyExc_ValueError, "buffer is too large for this platform");
231 return -1;
232 }
233
234 *ptrptr = self->data;
235 return (Py_ssize_t)self->dataSize;
211 return PyBytes_FromStringAndSize(self->data, (Py_ssize_t)self->dataSize);
236 212 }
237 213
238 static Py_ssize_t BufferWithSegments_getsegcount(ZstdBufferWithSegments* self, Py_ssize_t* len) {
239 if (len) {
240 *len = 1;
241 }
214 static ZstdBufferSegments *
215 BufferWithSegments_segments(ZstdBufferWithSegments *self) {
216 ZstdBufferSegments *result = (ZstdBufferSegments *)PyObject_CallObject(
217 (PyObject *)ZstdBufferSegmentsType, NULL);
218 if (NULL == result) {
219 return NULL;
220 }
242 221
243 return 1;
222 result->parent = (PyObject *)self;
223 Py_INCREF(self);
224 result->segments = self->segments;
225 result->segmentCount = self->segmentCount;
226
227 return result;
244 228 }
229
230 #if PY_VERSION_HEX < 0x03090000
231 static PyBufferProcs BufferWithSegments_as_buffer = {
232 (getbufferproc)BufferWithSegments_getbuffer, /* bf_getbuffer */
233 0 /* bf_releasebuffer */
234 };
245 235 #endif
246 236
247 PyDoc_STRVAR(BufferWithSegments_tobytes__doc__,
248 "Obtain a bytes instance for this buffer.\n"
249 );
237 static PyMethodDef BufferWithSegments_methods[] = {
238 {"segments", (PyCFunction)BufferWithSegments_segments, METH_NOARGS, NULL},
239 {"tobytes", (PyCFunction)BufferWithSegments_tobytes, METH_NOARGS, NULL},
240 {NULL, NULL}};
250 241
251 static PyObject* BufferWithSegments_tobytes(ZstdBufferWithSegments* self) {
252 if (self->dataSize > PY_SSIZE_T_MAX) {
253 PyErr_SetString(PyExc_ValueError, "buffer is too large for this platform");
254 return NULL;
255 }
256
257 return PyBytes_FromStringAndSize(self->data, (Py_ssize_t)self->dataSize);
258 }
259
260 PyDoc_STRVAR(BufferWithSegments_segments__doc__,
261 "Obtain a BufferSegments describing segments in this sintance.\n"
262 );
242 static PyMemberDef BufferWithSegments_members[] = {
243 {"size", T_ULONGLONG, offsetof(ZstdBufferWithSegments, dataSize), READONLY,
244 "total size of the buffer in bytes"},
245 {NULL}};
263 246
264 static ZstdBufferSegments* BufferWithSegments_segments(ZstdBufferWithSegments* self) {
265 ZstdBufferSegments* result = (ZstdBufferSegments*)PyObject_CallObject((PyObject*)&ZstdBufferSegmentsType, NULL);
266 if (NULL == result) {
267 return NULL;
268 }
269
270 result->parent = (PyObject*)self;
271 Py_INCREF(self);
272 result->segments = self->segments;
273 result->segmentCount = self->segmentCount;
274
275 return result;
276 }
277
278 static PySequenceMethods BufferWithSegments_sq = {
279 (lenfunc)BufferWithSegments_length, /* sq_length */
280 0, /* sq_concat */
281 0, /* sq_repeat */
282 (ssizeargfunc)BufferWithSegments_item, /* sq_item */
283 0, /* sq_ass_item */
284 0, /* sq_contains */
285 0, /* sq_inplace_concat */
286 0 /* sq_inplace_repeat */
247 PyType_Slot ZstdBufferWithSegmentsSlots[] = {
248 {Py_tp_dealloc, BufferWithSegments_dealloc},
249 {Py_sq_length, BufferWithSegments_length},
250 {Py_sq_item, BufferWithSegments_item},
251 #if PY_VERSION_HEX >= 0x03090000
252 {Py_bf_getbuffer, BufferWithSegments_getbuffer},
253 #endif
254 {Py_tp_methods, BufferWithSegments_methods},
255 {Py_tp_members, BufferWithSegments_members},
256 {Py_tp_init, BufferWithSegments_init},
257 {Py_tp_new, PyType_GenericNew},
258 {0, NULL},
287 259 };
288 260
289 static PyBufferProcs BufferWithSegments_as_buffer = {
290 #if PY_MAJOR_VERSION >= 3
291 (getbufferproc)BufferWithSegments_getbuffer, /* bf_getbuffer */
292 0 /* bf_releasebuffer */
293 #else
294 (readbufferproc)BufferWithSegments_getreadbuffer, /* bf_getreadbuffer */
295 0, /* bf_getwritebuffer */
296 (segcountproc)BufferWithSegments_getsegcount, /* bf_getsegcount */
297 0 /* bf_getcharbuffer */
298 #endif
299 };
300
301 static PyMethodDef BufferWithSegments_methods[] = {
302 { "segments", (PyCFunction)BufferWithSegments_segments,
303 METH_NOARGS, BufferWithSegments_segments__doc__ },
304 { "tobytes", (PyCFunction)BufferWithSegments_tobytes,
305 METH_NOARGS, BufferWithSegments_tobytes__doc__ },
306 { NULL, NULL }
307 };
308
309 static PyMemberDef BufferWithSegments_members[] = {
310 { "size", T_ULONGLONG, offsetof(ZstdBufferWithSegments, dataSize),
311 READONLY, "total size of the buffer in bytes" },
312 { NULL }
261 PyType_Spec ZstdBufferWithSegmentsSpec = {
262 "zstd.BufferWithSegments",
263 sizeof(ZstdBufferWithSegments),
264 0,
265 Py_TPFLAGS_DEFAULT,
266 ZstdBufferWithSegmentsSlots,
313 267 };
314 268
315 PyTypeObject ZstdBufferWithSegmentsType = {
316 PyVarObject_HEAD_INIT(NULL, 0)
317 "zstd.BufferWithSegments", /* tp_name */
318 sizeof(ZstdBufferWithSegments),/* tp_basicsize */
319 0, /* tp_itemsize */
320 (destructor)BufferWithSegments_dealloc, /* tp_dealloc */
321 0, /* tp_print */
322 0, /* tp_getattr */
323 0, /* tp_setattr */
324 0, /* tp_compare */
325 0, /* tp_repr */
326 0, /* tp_as_number */
327 &BufferWithSegments_sq, /* tp_as_sequence */
328 0, /* tp_as_mapping */
329 0, /* tp_hash */
330 0, /* tp_call */
331 0, /* tp_str */
332 0, /* tp_getattro */
333 0, /* tp_setattro */
334 &BufferWithSegments_as_buffer, /* tp_as_buffer */
335 Py_TPFLAGS_DEFAULT, /* tp_flags */
336 BufferWithSegments__doc__, /* tp_doc */
337 0, /* tp_traverse */
338 0, /* tp_clear */
339 0, /* tp_richcompare */
340 0, /* tp_weaklistoffset */
341 0, /* tp_iter */
342 0, /* tp_iternext */
343 BufferWithSegments_methods, /* tp_methods */
344 BufferWithSegments_members, /* tp_members */
345 0, /* tp_getset */
346 0, /* tp_base */
347 0, /* tp_dict */
348 0, /* tp_descr_get */
349 0, /* tp_descr_set */
350 0, /* tp_dictoffset */
351 (initproc)BufferWithSegments_init, /* tp_init */
352 0, /* tp_alloc */
353 PyType_GenericNew, /* tp_new */
354 };
269 PyTypeObject *ZstdBufferWithSegmentsType;
355 270
356 PyDoc_STRVAR(BufferSegments__doc__,
357 "BufferSegments - Represents segments/offsets within a BufferWithSegments\n"
358 );
271 static void BufferSegments_dealloc(ZstdBufferSegments *self) {
272 Py_CLEAR(self->parent);
273 PyObject_Del(self);
274 }
359 275
360 static void BufferSegments_dealloc(ZstdBufferSegments* self) {
361 Py_CLEAR(self->parent);
362 PyObject_Del(self);
276 static int BufferSegments_getbuffer(ZstdBufferSegments *self, Py_buffer *view,
277 int flags) {
278 return PyBuffer_FillInfo(view, (PyObject *)self, (void *)self->segments,
279 self->segmentCount * sizeof(BufferSegment), 1,
280 flags);
363 281 }
364 282
365 #if PY_MAJOR_VERSION >= 3
366 static int BufferSegments_getbuffer(ZstdBufferSegments* self, Py_buffer* view, int flags) {
367 return PyBuffer_FillInfo(view, (PyObject*)self,
368 (void*)self->segments, self->segmentCount * sizeof(BufferSegment),
369 1, flags);
370 }
371 #else
372 static Py_ssize_t BufferSegments_getreadbuffer(ZstdBufferSegments* self, Py_ssize_t segment, void **ptrptr) {
373 if (segment != 0) {
374 PyErr_SetString(PyExc_ValueError, "segment number must be 0");
375 return -1;
376 }
377
378 *ptrptr = (void*)self->segments;
379 return self->segmentCount * sizeof(BufferSegment);
380 }
283 PyType_Slot ZstdBufferSegmentsSlots[] = {
284 {Py_tp_dealloc, BufferSegments_dealloc},
285 #if PY_VERSION_HEX >= 0x03090000
286 {Py_bf_getbuffer, BufferSegments_getbuffer},
287 #endif
288 {Py_tp_new, PyType_GenericNew},
289 {0, NULL},
290 };
381 291
382 static Py_ssize_t BufferSegments_getsegcount(ZstdBufferSegments* self, Py_ssize_t* len) {
383 if (len) {
384 *len = 1;
385 }
386
387 return 1;
388 }
389 #endif
390
391 static PyBufferProcs BufferSegments_as_buffer = {
392 #if PY_MAJOR_VERSION >= 3
393 (getbufferproc)BufferSegments_getbuffer,
394 0
395 #else
396 (readbufferproc)BufferSegments_getreadbuffer,
397 0,
398 (segcountproc)BufferSegments_getsegcount,
399 0
400 #endif
292 PyType_Spec ZstdBufferSegmentsSpec = {
293 "zstd.BufferSegments",
294 sizeof(ZstdBufferSegments),
295 0,
296 Py_TPFLAGS_DEFAULT,
297 ZstdBufferSegmentsSlots,
401 298 };
402 299
403 PyTypeObject ZstdBufferSegmentsType = {
404 PyVarObject_HEAD_INIT(NULL, 0)
405 "zstd.BufferSegments", /* tp_name */
406 sizeof(ZstdBufferSegments),/* tp_basicsize */
407 0, /* tp_itemsize */
408 (destructor)BufferSegments_dealloc, /* tp_dealloc */
409 0, /* tp_print */
410 0, /* tp_getattr */
411 0, /* tp_setattr */
412 0, /* tp_compare */
413 0, /* tp_repr */
414 0, /* tp_as_number */
415 0, /* tp_as_sequence */
416 0, /* tp_as_mapping */
417 0, /* tp_hash */
418 0, /* tp_call */
419 0, /* tp_str */
420 0, /* tp_getattro */
421 0, /* tp_setattro */
422 &BufferSegments_as_buffer, /* tp_as_buffer */
423 Py_TPFLAGS_DEFAULT, /* tp_flags */
424 BufferSegments__doc__, /* tp_doc */
425 0, /* tp_traverse */
426 0, /* tp_clear */
427 0, /* tp_richcompare */
428 0, /* tp_weaklistoffset */
429 0, /* tp_iter */
430 0, /* tp_iternext */
431 0, /* tp_methods */
432 0, /* tp_members */
433 0, /* tp_getset */
434 0, /* tp_base */
435 0, /* tp_dict */
436 0, /* tp_descr_get */
437 0, /* tp_descr_set */
438 0, /* tp_dictoffset */
439 0, /* tp_init */
440 0, /* tp_alloc */
441 PyType_GenericNew, /* tp_new */
442 };
300 #if PY_VERSION_HEX < 0x03090000
301 static PyBufferProcs BufferSegments_as_buffer = {
302 (getbufferproc)BufferSegments_getbuffer, 0};
303 #endif
443 304
444 PyDoc_STRVAR(BufferSegment__doc__,
445 "BufferSegment - Represents a segment within a BufferWithSegments\n"
446 );
305 PyTypeObject *ZstdBufferSegmentsType;
447 306
448 static void BufferSegment_dealloc(ZstdBufferSegment* self) {
449 Py_CLEAR(self->parent);
450 PyObject_Del(self);
307 static void BufferSegment_dealloc(ZstdBufferSegment *self) {
308 Py_CLEAR(self->parent);
309 PyObject_Del(self);
451 310 }
452 311
453 static Py_ssize_t BufferSegment_length(ZstdBufferSegment* self) {
454 return self->dataSize;
312 static Py_ssize_t BufferSegment_length(ZstdBufferSegment *self) {
313 return self->dataSize;
314 }
315
316 static int BufferSegment_getbuffer(ZstdBufferSegment *self, Py_buffer *view,
317 int flags) {
318 return PyBuffer_FillInfo(view, (PyObject *)self, self->data, self->dataSize,
319 1, flags);
320 }
321
322 static PyObject *BufferSegment_tobytes(ZstdBufferSegment *self) {
323 return PyBytes_FromStringAndSize(self->data, self->dataSize);
455 324 }
456 325
457 #if PY_MAJOR_VERSION >= 3
458 static int BufferSegment_getbuffer(ZstdBufferSegment* self, Py_buffer* view, int flags) {
459 return PyBuffer_FillInfo(view, (PyObject*)self,
460 self->data, self->dataSize, 1, flags);
461 }
462 #else
463 static Py_ssize_t BufferSegment_getreadbuffer(ZstdBufferSegment* self, Py_ssize_t segment, void **ptrptr) {
464 if (segment != 0) {
465 PyErr_SetString(PyExc_ValueError, "segment number must be 0");
466 return -1;
467 }
468
469 *ptrptr = self->data;
470 return self->dataSize;
471 }
472
473 static Py_ssize_t BufferSegment_getsegcount(ZstdBufferSegment* self, Py_ssize_t* len) {
474 if (len) {
475 *len = 1;
476 }
477
478 return 1;
479 }
326 #if PY_VERSION_HEX < 0x03090000
327 static PyBufferProcs BufferSegment_as_buffer = {
328 (getbufferproc)BufferSegment_getbuffer, 0};
480 329 #endif
481 330
482 PyDoc_STRVAR(BufferSegment_tobytes__doc__,
483 "Obtain a bytes instance for this segment.\n"
484 );
485
486 static PyObject* BufferSegment_tobytes(ZstdBufferSegment* self) {
487 return PyBytes_FromStringAndSize(self->data, self->dataSize);
488 }
489
490 static PySequenceMethods BufferSegment_sq = {
491 (lenfunc)BufferSegment_length, /* sq_length */
492 0, /* sq_concat */
493 0, /* sq_repeat */
494 0, /* sq_item */
495 0, /* sq_ass_item */
496 0, /* sq_contains */
497 0, /* sq_inplace_concat */
498 0 /* sq_inplace_repeat */
499 };
500
501 static PyBufferProcs BufferSegment_as_buffer = {
502 #if PY_MAJOR_VERSION >= 3
503 (getbufferproc)BufferSegment_getbuffer,
504 0
505 #else
506 (readbufferproc)BufferSegment_getreadbuffer,
507 0,
508 (segcountproc)BufferSegment_getsegcount,
509 0
510 #endif
511 };
512
513 331 static PyMethodDef BufferSegment_methods[] = {
514 { "tobytes", (PyCFunction)BufferSegment_tobytes,
515 METH_NOARGS, BufferSegment_tobytes__doc__ },
516 { NULL, NULL }
517 };
332 {"tobytes", (PyCFunction)BufferSegment_tobytes, METH_NOARGS, NULL},
333 {NULL, NULL}};
518 334
519 335 static PyMemberDef BufferSegment_members[] = {
520 { "offset", T_ULONGLONG, offsetof(ZstdBufferSegment, offset), READONLY,
521 "offset of segment within parent buffer" },
522 { NULL }
336 {"offset", T_ULONGLONG, offsetof(ZstdBufferSegment, offset), READONLY,
337 "offset of segment within parent buffer"},
338 {NULL}};
339
340 PyType_Slot ZstdBufferSegmentSlots[] = {
341 {Py_tp_dealloc, BufferSegment_dealloc},
342 {Py_sq_length, BufferSegment_length},
343 #if PY_VERSION_HEX >= 0x03090000
344 {Py_bf_getbuffer, BufferSegment_getbuffer},
345 #endif
346 {Py_tp_methods, BufferSegment_methods},
347 {Py_tp_members, BufferSegment_members},
348 {Py_tp_new, PyType_GenericNew},
349 {0, NULL},
350 };
351
352 PyType_Spec ZstdBufferSegmentSpec = {
353 "zstd.BufferSegment",
354 sizeof(ZstdBufferSegment),
355 0,
356 Py_TPFLAGS_DEFAULT,
357 ZstdBufferSegmentSlots,
523 358 };
524 359
525 PyTypeObject ZstdBufferSegmentType = {
526 PyVarObject_HEAD_INIT(NULL, 0)
527 "zstd.BufferSegment", /* tp_name */
528 sizeof(ZstdBufferSegment),/* tp_basicsize */
529 0, /* tp_itemsize */
530 (destructor)BufferSegment_dealloc, /* tp_dealloc */
531 0, /* tp_print */
532 0, /* tp_getattr */
533 0, /* tp_setattr */
534 0, /* tp_compare */
535 0, /* tp_repr */
536 0, /* tp_as_number */
537 &BufferSegment_sq, /* tp_as_sequence */
538 0, /* tp_as_mapping */
539 0, /* tp_hash */
540 0, /* tp_call */
541 0, /* tp_str */
542 0, /* tp_getattro */
543 0, /* tp_setattro */
544 &BufferSegment_as_buffer, /* tp_as_buffer */
545 Py_TPFLAGS_DEFAULT, /* tp_flags */
546 BufferSegment__doc__, /* tp_doc */
547 0, /* tp_traverse */
548 0, /* tp_clear */
549 0, /* tp_richcompare */
550 0, /* tp_weaklistoffset */
551 0, /* tp_iter */
552 0, /* tp_iternext */
553 BufferSegment_methods, /* tp_methods */
554 BufferSegment_members, /* tp_members */
555 0, /* tp_getset */
556 0, /* tp_base */
557 0, /* tp_dict */
558 0, /* tp_descr_get */
559 0, /* tp_descr_set */
560 0, /* tp_dictoffset */
561 0, /* tp_init */
562 0, /* tp_alloc */
563 PyType_GenericNew, /* tp_new */
564 };
360 PyTypeObject *ZstdBufferSegmentType;
361
362 static void
363 BufferWithSegmentsCollection_dealloc(ZstdBufferWithSegmentsCollection *self) {
364 Py_ssize_t i;
365
366 if (self->firstElements) {
367 PyMem_Free(self->firstElements);
368 self->firstElements = NULL;
369 }
370
371 if (self->buffers) {
372 for (i = 0; i < self->bufferCount; i++) {
373 Py_CLEAR(self->buffers[i]);
374 }
375
376 PyMem_Free(self->buffers);
377 self->buffers = NULL;
378 }
379
380 PyObject_Del(self);
381 }
382
383 static int
384 BufferWithSegmentsCollection_init(ZstdBufferWithSegmentsCollection *self,
385 PyObject *args) {
386 Py_ssize_t size;
387 Py_ssize_t i;
388 Py_ssize_t offset = 0;
389
390 size = PyTuple_Size(args);
391 if (-1 == size) {
392 return -1;
393 }
394
395 if (0 == size) {
396 PyErr_SetString(PyExc_ValueError, "must pass at least 1 argument");
397 return -1;
398 }
565 399
566 PyDoc_STRVAR(BufferWithSegmentsCollection__doc__,
567 "Represents a collection of BufferWithSegments.\n"
568 );
400 for (i = 0; i < size; i++) {
401 PyObject *item = PyTuple_GET_ITEM(args, i);
402 if (!PyObject_TypeCheck(item, ZstdBufferWithSegmentsType)) {
403 PyErr_SetString(PyExc_TypeError,
404 "arguments must be BufferWithSegments instances");
405 return -1;
406 }
569 407
570 static void BufferWithSegmentsCollection_dealloc(ZstdBufferWithSegmentsCollection* self) {
571 Py_ssize_t i;
408 if (0 == ((ZstdBufferWithSegments *)item)->segmentCount ||
409 0 == ((ZstdBufferWithSegments *)item)->dataSize) {
410 PyErr_SetString(PyExc_ValueError,
411 "ZstdBufferWithSegments cannot be empty");
412 return -1;
413 }
414 }
415
416 self->buffers = PyMem_Malloc(size * sizeof(ZstdBufferWithSegments *));
417 if (NULL == self->buffers) {
418 PyErr_NoMemory();
419 return -1;
420 }
572 421
573 if (self->firstElements) {
574 PyMem_Free(self->firstElements);
575 self->firstElements = NULL;
576 }
422 self->firstElements = PyMem_Malloc(size * sizeof(Py_ssize_t));
423 if (NULL == self->firstElements) {
424 PyMem_Free(self->buffers);
425 self->buffers = NULL;
426 PyErr_NoMemory();
427 return -1;
428 }
429
430 self->bufferCount = size;
431
432 for (i = 0; i < size; i++) {
433 ZstdBufferWithSegments *item =
434 (ZstdBufferWithSegments *)PyTuple_GET_ITEM(args, i);
577 435
578 if (self->buffers) {
579 for (i = 0; i < self->bufferCount; i++) {
580 Py_CLEAR(self->buffers[i]);
581 }
436 self->buffers[i] = item;
437 Py_INCREF(item);
438
439 if (i > 0) {
440 self->firstElements[i - 1] = offset;
441 }
582 442
583 PyMem_Free(self->buffers);
584 self->buffers = NULL;
585 }
443 offset += item->segmentCount;
444 }
586 445
587 PyObject_Del(self);
446 self->firstElements[size - 1] = offset;
447
448 return 0;
588 449 }
589 450
590 static int BufferWithSegmentsCollection_init(ZstdBufferWithSegmentsCollection* self, PyObject* args) {
591 Py_ssize_t size;
592 Py_ssize_t i;
593 Py_ssize_t offset = 0;
594
595 size = PyTuple_Size(args);
596 if (-1 == size) {
597 return -1;
598 }
599
600 if (0 == size) {
601 PyErr_SetString(PyExc_ValueError, "must pass at least 1 argument");
602 return -1;
603 }
604
605 for (i = 0; i < size; i++) {
606 PyObject* item = PyTuple_GET_ITEM(args, i);
607 if (!PyObject_TypeCheck(item, &ZstdBufferWithSegmentsType)) {
608 PyErr_SetString(PyExc_TypeError, "arguments must be BufferWithSegments instances");
609 return -1;
610 }
611
612 if (0 == ((ZstdBufferWithSegments*)item)->segmentCount ||
613 0 == ((ZstdBufferWithSegments*)item)->dataSize) {
614 PyErr_SetString(PyExc_ValueError, "ZstdBufferWithSegments cannot be empty");
615 return -1;
616 }
617 }
451 static PyObject *
452 BufferWithSegmentsCollection_size(ZstdBufferWithSegmentsCollection *self) {
453 Py_ssize_t i;
454 Py_ssize_t j;
455 unsigned long long size = 0;
618 456
619 self->buffers = PyMem_Malloc(size * sizeof(ZstdBufferWithSegments*));
620 if (NULL == self->buffers) {
621 PyErr_NoMemory();
622 return -1;
623 }
624
625 self->firstElements = PyMem_Malloc(size * sizeof(Py_ssize_t));
626 if (NULL == self->firstElements) {
627 PyMem_Free(self->buffers);
628 self->buffers = NULL;
629 PyErr_NoMemory();
630 return -1;
631 }
632
633 self->bufferCount = size;
457 for (i = 0; i < self->bufferCount; i++) {
458 for (j = 0; j < self->buffers[i]->segmentCount; j++) {
459 size += self->buffers[i]->segments[j].length;
460 }
461 }
634 462
635 for (i = 0; i < size; i++) {
636 ZstdBufferWithSegments* item = (ZstdBufferWithSegments*)PyTuple_GET_ITEM(args, i);
637
638 self->buffers[i] = item;
639 Py_INCREF(item);
463 return PyLong_FromUnsignedLongLong(size);
464 }
640 465
641 if (i > 0) {
642 self->firstElements[i - 1] = offset;
643 }
644
645 offset += item->segmentCount;
646 }
647
648 self->firstElements[size - 1] = offset;
649
650 return 0;
466 Py_ssize_t
467 BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection *self) {
468 return self->firstElements[self->bufferCount - 1];
651 469 }
652 470
653 static PyObject* BufferWithSegmentsCollection_size(ZstdBufferWithSegmentsCollection* self) {
654 Py_ssize_t i;
655 Py_ssize_t j;
656 unsigned long long size = 0;
471 static ZstdBufferSegment *
472 BufferWithSegmentsCollection_item(ZstdBufferWithSegmentsCollection *self,
473 Py_ssize_t i) {
474 Py_ssize_t bufferOffset;
475
476 if (i < 0) {
477 PyErr_SetString(PyExc_IndexError, "offset must be non-negative");
478 return NULL;
479 }
480
481 if (i >= BufferWithSegmentsCollection_length(self)) {
482 PyErr_Format(PyExc_IndexError, "offset must be less than %zd",
483 BufferWithSegmentsCollection_length(self));
484 return NULL;
485 }
657 486
658 for (i = 0; i < self->bufferCount; i++) {
659 for (j = 0; j < self->buffers[i]->segmentCount; j++) {
660 size += self->buffers[i]->segments[j].length;
661 }
662 }
487 for (bufferOffset = 0; bufferOffset < self->bufferCount; bufferOffset++) {
488 Py_ssize_t offset = 0;
489
490 if (i < self->firstElements[bufferOffset]) {
491 if (bufferOffset > 0) {
492 offset = self->firstElements[bufferOffset - 1];
493 }
663 494
664 return PyLong_FromUnsignedLongLong(size);
665 }
495 return BufferWithSegments_item(self->buffers[bufferOffset],
496 i - offset);
497 }
498 }
666 499
667 Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection* self) {
668 return self->firstElements[self->bufferCount - 1];
500 PyErr_SetString(ZstdError,
501 "error resolving segment; this should not happen");
502 return NULL;
669 503 }
670 504
671 static ZstdBufferSegment* BufferWithSegmentsCollection_item(ZstdBufferWithSegmentsCollection* self, Py_ssize_t i) {
672 Py_ssize_t bufferOffset;
673
674 if (i < 0) {
675 PyErr_SetString(PyExc_IndexError, "offset must be non-negative");
676 return NULL;
677 }
678
679 if (i >= BufferWithSegmentsCollection_length(self)) {
680 PyErr_Format(PyExc_IndexError, "offset must be less than %zd",
681 BufferWithSegmentsCollection_length(self));
682 return NULL;
683 }
684
685 for (bufferOffset = 0; bufferOffset < self->bufferCount; bufferOffset++) {
686 Py_ssize_t offset = 0;
505 static PyMethodDef BufferWithSegmentsCollection_methods[] = {
506 {"size", (PyCFunction)BufferWithSegmentsCollection_size, METH_NOARGS,
507 PyDoc_STR("total size in bytes of all segments")},
508 {NULL, NULL}};
687 509
688 if (i < self->firstElements[bufferOffset]) {
689 if (bufferOffset > 0) {
690 offset = self->firstElements[bufferOffset - 1];
691 }
692
693 return BufferWithSegments_item(self->buffers[bufferOffset], i - offset);
694 }
695 }
696
697 PyErr_SetString(ZstdError, "error resolving segment; this should not happen");
698 return NULL;
699 }
700
701 static PySequenceMethods BufferWithSegmentsCollection_sq = {
702 (lenfunc)BufferWithSegmentsCollection_length, /* sq_length */
703 0, /* sq_concat */
704 0, /* sq_repeat */
705 (ssizeargfunc)BufferWithSegmentsCollection_item, /* sq_item */
706 0, /* sq_ass_item */
707 0, /* sq_contains */
708 0, /* sq_inplace_concat */
709 0 /* sq_inplace_repeat */
510 PyType_Slot ZstdBufferWithSegmentsCollectionSlots[] = {
511 {Py_tp_dealloc, BufferWithSegmentsCollection_dealloc},
512 {Py_sq_length, BufferWithSegmentsCollection_length},
513 {Py_sq_item, BufferWithSegmentsCollection_item},
514 {Py_tp_methods, BufferWithSegmentsCollection_methods},
515 {Py_tp_init, BufferWithSegmentsCollection_init},
516 {Py_tp_new, PyType_GenericNew},
517 {0, NULL},
710 518 };
711 519
712 static PyMethodDef BufferWithSegmentsCollection_methods[] = {
713 { "size", (PyCFunction)BufferWithSegmentsCollection_size,
714 METH_NOARGS, PyDoc_STR("total size in bytes of all segments") },
715 { NULL, NULL }
520 PyType_Spec ZstdBufferWithSegmentsCollectionSpec = {
521 "zstd.BufferWithSegmentsCollection",
522 sizeof(ZstdBufferWithSegmentsCollection),
523 0,
524 Py_TPFLAGS_DEFAULT,
525 ZstdBufferWithSegmentsCollectionSlots,
716 526 };
717 527
718 PyTypeObject ZstdBufferWithSegmentsCollectionType = {
719 PyVarObject_HEAD_INIT(NULL, 0)
720 "zstd.BufferWithSegmentsCollection", /* tp_name */
721 sizeof(ZstdBufferWithSegmentsCollection),/* tp_basicsize */
722 0, /* tp_itemsize */
723 (destructor)BufferWithSegmentsCollection_dealloc, /* tp_dealloc */
724 0, /* tp_print */
725 0, /* tp_getattr */
726 0, /* tp_setattr */
727 0, /* tp_compare */
728 0, /* tp_repr */
729 0, /* tp_as_number */
730 &BufferWithSegmentsCollection_sq, /* tp_as_sequence */
731 0, /* tp_as_mapping */
732 0, /* tp_hash */
733 0, /* tp_call */
734 0, /* tp_str */
735 0, /* tp_getattro */
736 0, /* tp_setattro */
737 0, /* tp_as_buffer */
738 Py_TPFLAGS_DEFAULT, /* tp_flags */
739 BufferWithSegmentsCollection__doc__, /* tp_doc */
740 0, /* tp_traverse */
741 0, /* tp_clear */
742 0, /* tp_richcompare */
743 0, /* tp_weaklistoffset */
744 /* TODO implement iterator for performance. */
745 0, /* tp_iter */
746 0, /* tp_iternext */
747 BufferWithSegmentsCollection_methods, /* tp_methods */
748 0, /* tp_members */
749 0, /* tp_getset */
750 0, /* tp_base */
751 0, /* tp_dict */
752 0, /* tp_descr_get */
753 0, /* tp_descr_set */
754 0, /* tp_dictoffset */
755 (initproc)BufferWithSegmentsCollection_init, /* tp_init */
756 0, /* tp_alloc */
757 PyType_GenericNew, /* tp_new */
758 };
528 PyTypeObject *ZstdBufferWithSegmentsCollectionType;
529
530 void bufferutil_module_init(PyObject *mod) {
531 ZstdBufferWithSegmentsType =
532 (PyTypeObject *)PyType_FromSpec(&ZstdBufferWithSegmentsSpec);
533 #if PY_VERSION_HEX < 0x03090000
534 ZstdBufferWithSegmentsType->tp_as_buffer = &BufferWithSegments_as_buffer;
535 #endif
536 if (PyType_Ready(ZstdBufferWithSegmentsType) < 0) {
537 return;
538 }
539
540 Py_INCREF(ZstdBufferWithSegmentsType);
541 PyModule_AddObject(mod, "BufferWithSegments",
542 (PyObject *)ZstdBufferWithSegmentsType);
543
544 ZstdBufferSegmentsType =
545 (PyTypeObject *)PyType_FromSpec(&ZstdBufferSegmentsSpec);
546 #if PY_VERSION_HEX < 0x03090000
547 ZstdBufferSegmentsType->tp_as_buffer = &BufferSegments_as_buffer;
548 #endif
549 if (PyType_Ready(ZstdBufferSegmentsType) < 0) {
550 return;
551 }
759 552
760 void bufferutil_module_init(PyObject* mod) {
761 Py_SET_TYPE(&ZstdBufferWithSegmentsType, &PyType_Type);
762 if (PyType_Ready(&ZstdBufferWithSegmentsType) < 0) {
763 return;
764 }
553 Py_INCREF(ZstdBufferSegmentsType);
554 PyModule_AddObject(mod, "BufferSegments",
555 (PyObject *)ZstdBufferSegmentsType);
765 556
766 Py_INCREF(&ZstdBufferWithSegmentsType);
767 PyModule_AddObject(mod, "BufferWithSegments", (PyObject*)&ZstdBufferWithSegmentsType);
768
769 Py_SET_TYPE(&ZstdBufferSegmentsType, &PyType_Type);
770 if (PyType_Ready(&ZstdBufferSegmentsType) < 0) {
771 return;
772 }
773
774 Py_INCREF(&ZstdBufferSegmentsType);
775 PyModule_AddObject(mod, "BufferSegments", (PyObject*)&ZstdBufferSegmentsType);
557 ZstdBufferSegmentType =
558 (PyTypeObject *)PyType_FromSpec(&ZstdBufferSegmentSpec);
559 #if PY_VERSION_HEX < 0x03090000
560 ZstdBufferSegmentType->tp_as_buffer = &BufferSegment_as_buffer;
561 #endif
562 if (PyType_Ready(ZstdBufferSegmentType) < 0) {
563 return;
564 }
776 565
777 Py_SET_TYPE(&ZstdBufferSegmentType, &PyType_Type);
778 if (PyType_Ready(&ZstdBufferSegmentType) < 0) {
779 return;
780 }
781
782 Py_INCREF(&ZstdBufferSegmentType);
783 PyModule_AddObject(mod, "BufferSegment", (PyObject*)&ZstdBufferSegmentType);
566 Py_INCREF(ZstdBufferSegmentType);
567 PyModule_AddObject(mod, "BufferSegment", (PyObject *)ZstdBufferSegmentType);
784 568
785 Py_SET_TYPE(&ZstdBufferWithSegmentsCollectionType, &PyType_Type);
786 if (PyType_Ready(&ZstdBufferWithSegmentsCollectionType) < 0) {
787 return;
788 }
569 ZstdBufferWithSegmentsCollectionType =
570 (PyTypeObject *)PyType_FromSpec(&ZstdBufferWithSegmentsCollectionSpec);
571 if (PyType_Ready(ZstdBufferWithSegmentsCollectionType) < 0) {
572 return;
573 }
789 574
790 Py_INCREF(&ZstdBufferWithSegmentsCollectionType);
791 PyModule_AddObject(mod, "BufferWithSegmentsCollection", (PyObject*)&ZstdBufferWithSegmentsCollectionType);
575 Py_INCREF(ZstdBufferWithSegmentsCollectionType);
576 PyModule_AddObject(mod, "BufferWithSegmentsCollection",
577 (PyObject *)ZstdBufferWithSegmentsCollectionType);
792 578 }
This diff has been collapsed as it changes many lines, (528 lines changed) Show them Hide them
@@ -1,360 +1,310
1 1 /**
2 * Copyright (c) 2018-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
2 * Copyright (c) 2018-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 extern PyObject* ZstdError;
11 extern PyObject *ZstdError;
12 12
13 PyDoc_STRVAR(ZstdCompressionChunkerIterator__doc__,
14 "Iterator of output chunks from ZstdCompressionChunker.\n"
15 );
13 static void
14 ZstdCompressionChunkerIterator_dealloc(ZstdCompressionChunkerIterator *self) {
15 Py_XDECREF(self->chunker);
16 16
17 static void ZstdCompressionChunkerIterator_dealloc(ZstdCompressionChunkerIterator* self) {
18 Py_XDECREF(self->chunker);
19
20 PyObject_Del(self);
17 PyObject_Del(self);
21 18 }
22 19
23 static PyObject* ZstdCompressionChunkerIterator_iter(PyObject* self) {
24 Py_INCREF(self);
25 return self;
20 static PyObject *ZstdCompressionChunkerIterator_iter(PyObject *self) {
21 Py_INCREF(self);
22 return self;
26 23 }
27 24
28 static PyObject* ZstdCompressionChunkerIterator_iternext(ZstdCompressionChunkerIterator* self) {
29 size_t zresult;
30 PyObject* chunk;
31 ZstdCompressionChunker* chunker = self->chunker;
32 ZSTD_EndDirective zFlushMode;
33
34 if (self->mode != compressionchunker_mode_normal && chunker->input.pos != chunker->input.size) {
35 PyErr_SetString(ZstdError, "input should have been fully consumed before calling flush() or finish()");
36 return NULL;
37 }
25 static PyObject *
26 ZstdCompressionChunkerIterator_iternext(ZstdCompressionChunkerIterator *self) {
27 size_t zresult;
28 PyObject *chunk;
29 ZstdCompressionChunker *chunker = self->chunker;
30 ZSTD_EndDirective zFlushMode;
38 31
39 if (chunker->finished) {
40 return NULL;
41 }
32 if (self->mode != compressionchunker_mode_normal &&
33 chunker->input.pos != chunker->input.size) {
34 PyErr_SetString(ZstdError, "input should have been fully consumed "
35 "before calling flush() or finish()");
36 return NULL;
37 }
42 38
43 /* If we have data left in the input, consume it. */
44 while (chunker->input.pos < chunker->input.size) {
45 Py_BEGIN_ALLOW_THREADS
46 zresult = ZSTD_compressStream2(chunker->compressor->cctx, &chunker->output,
47 &chunker->input, ZSTD_e_continue);
48 Py_END_ALLOW_THREADS
39 if (chunker->finished) {
40 return NULL;
41 }
42
43 /* If we have data left in the input, consume it. */
44 while (chunker->input.pos < chunker->input.size) {
45 Py_BEGIN_ALLOW_THREADS zresult =
46 ZSTD_compressStream2(chunker->compressor->cctx, &chunker->output,
47 &chunker->input, ZSTD_e_continue);
48 Py_END_ALLOW_THREADS
49 49
50 /* Input is fully consumed. */
51 if (chunker->input.pos == chunker->input.size) {
52 chunker->input.src = NULL;
53 chunker->input.pos = 0;
54 chunker->input.size = 0;
55 PyBuffer_Release(&chunker->inBuffer);
56 }
50 /* Input is fully consumed. */
51 if (chunker->input.pos == chunker->input.size) {
52 chunker->input.src = NULL;
53 chunker->input.pos = 0;
54 chunker->input.size = 0;
55 PyBuffer_Release(&chunker->inBuffer);
56 }
57 57
58 if (ZSTD_isError(zresult)) {
59 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
60 return NULL;
61 }
58 if (ZSTD_isError(zresult)) {
59 PyErr_Format(ZstdError, "zstd compress error: %s",
60 ZSTD_getErrorName(zresult));
61 return NULL;
62 }
62 63
63 /* If it produced a full output chunk, emit it. */
64 if (chunker->output.pos == chunker->output.size) {
65 chunk = PyBytes_FromStringAndSize(chunker->output.dst, chunker->output.pos);
66 if (!chunk) {
67 return NULL;
68 }
64 /* If it produced a full output chunk, emit it. */
65 if (chunker->output.pos == chunker->output.size) {
66 chunk = PyBytes_FromStringAndSize(chunker->output.dst,
67 chunker->output.pos);
68 if (!chunk) {
69 return NULL;
70 }
69 71
70 chunker->output.pos = 0;
72 chunker->output.pos = 0;
71 73
72 return chunk;
73 }
74 return chunk;
75 }
74 76
75 /* Else continue to compress available input data. */
76 }
77 /* Else continue to compress available input data. */
78 }
77 79
78 /* We also need this here for the special case of an empty input buffer. */
79 if (chunker->input.pos == chunker->input.size) {
80 chunker->input.src = NULL;
81 chunker->input.pos = 0;
82 chunker->input.size = 0;
83 PyBuffer_Release(&chunker->inBuffer);
84 }
80 /* We also need this here for the special case of an empty input buffer. */
81 if (chunker->input.pos == chunker->input.size) {
82 chunker->input.src = NULL;
83 chunker->input.pos = 0;
84 chunker->input.size = 0;
85 PyBuffer_Release(&chunker->inBuffer);
86 }
85 87
86 /* No more input data. A partial chunk may be in chunker->output.
87 * If we're in normal compression mode, we're done. Otherwise if we're in
88 * flush or finish mode, we need to emit what data remains.
89 */
90 if (self->mode == compressionchunker_mode_normal) {
91 /* We don't need to set StopIteration. */
92 return NULL;
93 }
94
95 if (self->mode == compressionchunker_mode_flush) {
96 zFlushMode = ZSTD_e_flush;
97 }
98 else if (self->mode == compressionchunker_mode_finish) {
99 zFlushMode = ZSTD_e_end;
100 }
101 else {
102 PyErr_SetString(ZstdError, "unhandled compression mode; this should never happen");
103 return NULL;
104 }
88 /* No more input data. A partial chunk may be in chunker->output.
89 * If we're in normal compression mode, we're done. Otherwise if we're in
90 * flush or finish mode, we need to emit what data remains.
91 */
92 if (self->mode == compressionchunker_mode_normal) {
93 /* We don't need to set StopIteration. */
94 return NULL;
95 }
105 96
106 Py_BEGIN_ALLOW_THREADS
107 zresult = ZSTD_compressStream2(chunker->compressor->cctx, &chunker->output,
108 &chunker->input, zFlushMode);
109 Py_END_ALLOW_THREADS
97 if (self->mode == compressionchunker_mode_flush) {
98 zFlushMode = ZSTD_e_flush;
99 }
100 else if (self->mode == compressionchunker_mode_finish) {
101 zFlushMode = ZSTD_e_end;
102 }
103 else {
104 PyErr_SetString(ZstdError,
105 "unhandled compression mode; this should never happen");
106 return NULL;
107 }
110 108
111 if (ZSTD_isError(zresult)) {
112 PyErr_Format(ZstdError, "zstd compress error: %s",
113 ZSTD_getErrorName(zresult));
114 return NULL;
115 }
109 Py_BEGIN_ALLOW_THREADS zresult =
110 ZSTD_compressStream2(chunker->compressor->cctx, &chunker->output,
111 &chunker->input, zFlushMode);
112 Py_END_ALLOW_THREADS
116 113
117 if (!zresult && chunker->output.pos == 0) {
118 return NULL;
119 }
114 if (ZSTD_isError(zresult)) {
115 PyErr_Format(ZstdError, "zstd compress error: %s",
116 ZSTD_getErrorName(zresult));
117 return NULL;
118 }
119
120 if (!zresult && chunker->output.pos == 0) {
121 return NULL;
122 }
120 123
121 chunk = PyBytes_FromStringAndSize(chunker->output.dst, chunker->output.pos);
122 if (!chunk) {
123 return NULL;
124 }
124 chunk = PyBytes_FromStringAndSize(chunker->output.dst, chunker->output.pos);
125 if (!chunk) {
126 return NULL;
127 }
125 128
126 chunker->output.pos = 0;
129 chunker->output.pos = 0;
127 130
128 if (!zresult && self->mode == compressionchunker_mode_finish) {
129 chunker->finished = 1;
130 }
131 if (!zresult && self->mode == compressionchunker_mode_finish) {
132 chunker->finished = 1;
133 }
131 134
132 return chunk;
135 return chunk;
133 136 }
134 137
135 PyTypeObject ZstdCompressionChunkerIteratorType = {
136 PyVarObject_HEAD_INIT(NULL, 0)
137 "zstd.ZstdCompressionChunkerIterator", /* tp_name */
138 sizeof(ZstdCompressionChunkerIterator), /* tp_basicsize */
139 0, /* tp_itemsize */
140 (destructor)ZstdCompressionChunkerIterator_dealloc, /* tp_dealloc */
141 0, /* tp_print */
142 0, /* tp_getattr */
143 0, /* tp_setattr */
144 0, /* tp_compare */
145 0, /* tp_repr */
146 0, /* tp_as_number */
147 0, /* tp_as_sequence */
148 0, /* tp_as_mapping */
149 0, /* tp_hash */
150 0, /* tp_call */
151 0, /* tp_str */
152 0, /* tp_getattro */
153 0, /* tp_setattro */
154 0, /* tp_as_buffer */
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
156 ZstdCompressionChunkerIterator__doc__, /* tp_doc */
157 0, /* tp_traverse */
158 0, /* tp_clear */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
161 ZstdCompressionChunkerIterator_iter, /* tp_iter */
162 (iternextfunc)ZstdCompressionChunkerIterator_iternext, /* tp_iternext */
163 0, /* tp_methods */
164 0, /* tp_members */
165 0, /* tp_getset */
166 0, /* tp_base */
167 0, /* tp_dict */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
171 0, /* tp_init */
172 0, /* tp_alloc */
173 PyType_GenericNew, /* tp_new */
138 PyType_Slot ZstdCompressionChunkerIteratorSlots[] = {
139 {Py_tp_dealloc, ZstdCompressionChunkerIterator_dealloc},
140 {Py_tp_iter, ZstdCompressionChunkerIterator_iter},
141 {Py_tp_iternext, ZstdCompressionChunkerIterator_iternext},
142 {Py_tp_new, PyType_GenericNew},
143 {0, NULL},
174 144 };
175 145
176 PyDoc_STRVAR(ZstdCompressionChunker__doc__,
177 "Compress chunks iteratively into exact chunk sizes.\n"
178 );
146 PyType_Spec ZstdCompressionChunkerIteratorSpec = {
147 "zstd.ZstdCompressionChunkerIterator",
148 sizeof(ZstdCompressionChunkerIterator),
149 0,
150 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
151 ZstdCompressionChunkerIteratorSlots,
152 };
153
154 PyTypeObject *ZstdCompressionChunkerIteratorType;
179 155
180 static void ZstdCompressionChunker_dealloc(ZstdCompressionChunker* self) {
181 PyBuffer_Release(&self->inBuffer);
182 self->input.src = NULL;
156 static void ZstdCompressionChunker_dealloc(ZstdCompressionChunker *self) {
157 PyBuffer_Release(&self->inBuffer);
158 self->input.src = NULL;
183 159
184 PyMem_Free(self->output.dst);
185 self->output.dst = NULL;
160 PyMem_Free(self->output.dst);
161 self->output.dst = NULL;
186 162
187 Py_XDECREF(self->compressor);
163 Py_XDECREF(self->compressor);
188 164
189 PyObject_Del(self);
165 PyObject_Del(self);
190 166 }
191 167
192 static ZstdCompressionChunkerIterator* ZstdCompressionChunker_compress(ZstdCompressionChunker* self, PyObject* args, PyObject* kwargs) {
193 static char* kwlist[] = {
194 "data",
195 NULL
196 };
168 static ZstdCompressionChunkerIterator *
169 ZstdCompressionChunker_compress(ZstdCompressionChunker *self, PyObject *args,
170 PyObject *kwargs) {
171 static char *kwlist[] = {"data", NULL};
197 172
198 ZstdCompressionChunkerIterator* result;
173 ZstdCompressionChunkerIterator *result;
199 174
200 if (self->finished) {
201 PyErr_SetString(ZstdError, "cannot call compress() after compression finished");
202 return NULL;
203 }
175 if (self->finished) {
176 PyErr_SetString(ZstdError,
177 "cannot call compress() after compression finished");
178 return NULL;
179 }
204 180
205 if (self->inBuffer.obj) {
206 PyErr_SetString(ZstdError,
207 "cannot perform operation before consuming output from previous operation");
208 return NULL;
209 }
181 if (self->inBuffer.obj) {
182 PyErr_SetString(ZstdError, "cannot perform operation before consuming "
183 "output from previous operation");
184 return NULL;
185 }
210 186
211 #if PY_MAJOR_VERSION >= 3
212 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress",
213 #else
214 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:compress",
215 #endif
216 kwlist, &self->inBuffer)) {
217 return NULL;
218 }
219
220 if (!PyBuffer_IsContiguous(&self->inBuffer, 'C') || self->inBuffer.ndim > 1) {
221 PyErr_SetString(PyExc_ValueError,
222 "data buffer should be contiguous and have at most one dimension");
223 PyBuffer_Release(&self->inBuffer);
224 return NULL;
225 }
187 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress", kwlist,
188 &self->inBuffer)) {
189 return NULL;
190 }
226 191
227 result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
228 if (!result) {
229 PyBuffer_Release(&self->inBuffer);
230 return NULL;
231 }
192 result = (ZstdCompressionChunkerIterator *)PyObject_CallObject(
193 (PyObject *)ZstdCompressionChunkerIteratorType, NULL);
194 if (!result) {
195 PyBuffer_Release(&self->inBuffer);
196 return NULL;
197 }
232 198
233 self->input.src = self->inBuffer.buf;
234 self->input.size = self->inBuffer.len;
235 self->input.pos = 0;
199 self->input.src = self->inBuffer.buf;
200 self->input.size = self->inBuffer.len;
201 self->input.pos = 0;
236 202
237 result->chunker = self;
238 Py_INCREF(result->chunker);
203 result->chunker = self;
204 Py_INCREF(result->chunker);
239 205
240 result->mode = compressionchunker_mode_normal;
206 result->mode = compressionchunker_mode_normal;
241 207
242 return result;
208 return result;
243 209 }
244 210
245 static ZstdCompressionChunkerIterator* ZstdCompressionChunker_finish(ZstdCompressionChunker* self) {
246 ZstdCompressionChunkerIterator* result;
211 static ZstdCompressionChunkerIterator *
212 ZstdCompressionChunker_finish(ZstdCompressionChunker *self) {
213 ZstdCompressionChunkerIterator *result;
247 214
248 if (self->finished) {
249 PyErr_SetString(ZstdError, "cannot call finish() after compression finished");
250 return NULL;
251 }
215 if (self->finished) {
216 PyErr_SetString(ZstdError,
217 "cannot call finish() after compression finished");
218 return NULL;
219 }
252 220
253 if (self->inBuffer.obj) {
254 PyErr_SetString(ZstdError,
255 "cannot call finish() before consuming output from previous operation");
256 return NULL;
257 }
221 if (self->inBuffer.obj) {
222 PyErr_SetString(ZstdError, "cannot call finish() before consuming "
223 "output from previous operation");
224 return NULL;
225 }
258 226
259 result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
260 if (!result) {
261 return NULL;
262 }
227 result = (ZstdCompressionChunkerIterator *)PyObject_CallObject(
228 (PyObject *)ZstdCompressionChunkerIteratorType, NULL);
229 if (!result) {
230 return NULL;
231 }
263 232
264 result->chunker = self;
265 Py_INCREF(result->chunker);
233 result->chunker = self;
234 Py_INCREF(result->chunker);
266 235
267 result->mode = compressionchunker_mode_finish;
236 result->mode = compressionchunker_mode_finish;
268 237
269 return result;
238 return result;
270 239 }
271 240
272 static ZstdCompressionChunkerIterator* ZstdCompressionChunker_flush(ZstdCompressionChunker* self, PyObject* args, PyObject* kwargs) {
273 ZstdCompressionChunkerIterator* result;
241 static ZstdCompressionChunkerIterator *
242 ZstdCompressionChunker_flush(ZstdCompressionChunker *self, PyObject *args,
243 PyObject *kwargs) {
244 ZstdCompressionChunkerIterator *result;
274 245
275 if (self->finished) {
276 PyErr_SetString(ZstdError, "cannot call flush() after compression finished");
277 return NULL;
278 }
246 if (self->finished) {
247 PyErr_SetString(ZstdError,
248 "cannot call flush() after compression finished");
249 return NULL;
250 }
279 251
280 if (self->inBuffer.obj) {
281 PyErr_SetString(ZstdError,
282 "cannot call flush() before consuming output from previous operation");
283 return NULL;
284 }
252 if (self->inBuffer.obj) {
253 PyErr_SetString(ZstdError, "cannot call flush() before consuming "
254 "output from previous operation");
255 return NULL;
256 }
285 257
286 result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
287 if (!result) {
288 return NULL;
289 }
258 result = (ZstdCompressionChunkerIterator *)PyObject_CallObject(
259 (PyObject *)ZstdCompressionChunkerIteratorType, NULL);
260 if (!result) {
261 return NULL;
262 }
290 263
291 result->chunker = self;
292 Py_INCREF(result->chunker);
264 result->chunker = self;
265 Py_INCREF(result->chunker);
293 266
294 result->mode = compressionchunker_mode_flush;
267 result->mode = compressionchunker_mode_flush;
295 268
296 return result;
269 return result;
297 270 }
298 271
299 272 static PyMethodDef ZstdCompressionChunker_methods[] = {
300 { "compress", (PyCFunction)ZstdCompressionChunker_compress, METH_VARARGS | METH_KEYWORDS,
301 PyDoc_STR("compress data") },
302 { "finish", (PyCFunction)ZstdCompressionChunker_finish, METH_NOARGS,
303 PyDoc_STR("finish compression operation") },
304 { "flush", (PyCFunction)ZstdCompressionChunker_flush, METH_VARARGS | METH_KEYWORDS,
305 PyDoc_STR("finish compression operation") },
306 { NULL, NULL }
273 {"compress", (PyCFunction)ZstdCompressionChunker_compress,
274 METH_VARARGS | METH_KEYWORDS, PyDoc_STR("compress data")},
275 {"finish", (PyCFunction)ZstdCompressionChunker_finish, METH_NOARGS,
276 PyDoc_STR("finish compression operation")},
277 {"flush", (PyCFunction)ZstdCompressionChunker_flush,
278 METH_VARARGS | METH_KEYWORDS, PyDoc_STR("finish compression operation")},
279 {NULL, NULL}};
280
281 PyType_Slot ZstdCompressionChunkerSlots[] = {
282 {Py_tp_dealloc, ZstdCompressionChunker_dealloc},
283 {Py_tp_methods, ZstdCompressionChunker_methods},
284 {Py_tp_new, PyType_GenericNew},
285 {0, NULL},
307 286 };
308 287
309 PyTypeObject ZstdCompressionChunkerType = {
310 PyVarObject_HEAD_INIT(NULL, 0)
311 "zstd.ZstdCompressionChunkerType", /* tp_name */
312 sizeof(ZstdCompressionChunker), /* tp_basicsize */
313 0, /* tp_itemsize */
314 (destructor)ZstdCompressionChunker_dealloc, /* tp_dealloc */
315 0, /* tp_print */
316 0, /* tp_getattr */
317 0, /* tp_setattr */
318 0, /* tp_compare */
319 0, /* tp_repr */
320 0, /* tp_as_number */
321 0, /* tp_as_sequence */
322 0, /* tp_as_mapping */
323 0, /* tp_hash */
324 0, /* tp_call */
325 0, /* tp_str */
326 0, /* tp_getattro */
327 0, /* tp_setattro */
328 0, /* tp_as_buffer */
329 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
330 ZstdCompressionChunker__doc__, /* tp_doc */
331 0, /* tp_traverse */
332 0, /* tp_clear */
333 0, /* tp_richcompare */
334 0, /* tp_weaklistoffset */
335 0, /* tp_iter */
336 0, /* tp_iternext */
337 ZstdCompressionChunker_methods, /* tp_methods */
338 0, /* tp_members */
339 0, /* tp_getset */
340 0, /* tp_base */
341 0, /* tp_dict */
342 0, /* tp_descr_get */
343 0, /* tp_descr_set */
344 0, /* tp_dictoffset */
345 0, /* tp_init */
346 0, /* tp_alloc */
347 PyType_GenericNew, /* tp_new */
288 PyType_Spec ZstdCompressionChunkerSpec = {
289 "zstd.ZstdCompressionChunkerType",
290 sizeof(ZstdCompressionChunker),
291 0,
292 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
293 ZstdCompressionChunkerSlots,
348 294 };
349 295
350 void compressionchunker_module_init(PyObject* module) {
351 Py_SET_TYPE(&ZstdCompressionChunkerIteratorType, &PyType_Type);
352 if (PyType_Ready(&ZstdCompressionChunkerIteratorType) < 0) {
353 return;
354 }
296 PyTypeObject *ZstdCompressionChunkerType;
355 297
356 Py_SET_TYPE(&ZstdCompressionChunkerType, &PyType_Type);
357 if (PyType_Ready(&ZstdCompressionChunkerType) < 0) {
358 return;
359 }
298 void compressionchunker_module_init(PyObject *module) {
299 ZstdCompressionChunkerIteratorType =
300 (PyTypeObject *)PyType_FromSpec(&ZstdCompressionChunkerIteratorSpec);
301 if (PyType_Ready(ZstdCompressionChunkerIteratorType) < 0) {
302 return;
303 }
304
305 ZstdCompressionChunkerType =
306 (PyTypeObject *)PyType_FromSpec(&ZstdCompressionChunkerSpec);
307 if (PyType_Ready(ZstdCompressionChunkerType) < 0) {
308 return;
309 }
360 310 }
This diff has been collapsed as it changes many lines, (617 lines changed) Show them Hide them
@@ -1,411 +1,348
1 1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 extern PyObject* ZstdError;
11 extern PyObject *ZstdError;
12 12
13 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs) {
14 static char* kwlist[] = {
15 "dict_size",
16 "samples",
17 "k",
18 "d",
19 "notifications",
20 "dict_id",
21 "level",
22 "steps",
23 "threads",
24 NULL
25 };
13 ZstdCompressionDict *train_dictionary(PyObject *self, PyObject *args,
14 PyObject *kwargs) {
15 static char *kwlist[] = {
16 "dict_size", "samples", "k", "d",
17 "f", "split_point", "accel", "notifications",
18 "dict_id", "level", "steps", "threads",
19 NULL};
26 20
27 size_t capacity;
28 PyObject* samples;
29 unsigned k = 0;
30 unsigned d = 0;
31 unsigned notifications = 0;
32 unsigned dictID = 0;
33 int level = 0;
34 unsigned steps = 0;
35 int threads = 0;
36 ZDICT_cover_params_t params;
37 Py_ssize_t samplesLen;
38 Py_ssize_t i;
39 size_t samplesSize = 0;
40 void* sampleBuffer = NULL;
41 size_t* sampleSizes = NULL;
42 void* sampleOffset;
43 Py_ssize_t sampleSize;
44 void* dict = NULL;
45 size_t zresult;
46 ZstdCompressionDict* result = NULL;
21 size_t capacity;
22 PyObject *samples;
23 unsigned k = 0;
24 unsigned d = 0;
25 unsigned f = 0;
26 double splitPoint = 0.0;
27 unsigned accel = 0;
28 unsigned notifications = 0;
29 unsigned dictID = 0;
30 int level = 0;
31 unsigned steps = 0;
32 int threads = 0;
33 ZDICT_fastCover_params_t params;
34 Py_ssize_t samplesLen;
35 Py_ssize_t i;
36 size_t samplesSize = 0;
37 void *sampleBuffer = NULL;
38 size_t *sampleSizes = NULL;
39 void *sampleOffset;
40 Py_ssize_t sampleSize;
41 void *dict = NULL;
42 size_t zresult;
43 ZstdCompressionDict *result = NULL;
47 44
48 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IIIIiIi:train_dictionary",
49 kwlist, &capacity, &PyList_Type, &samples,
50 &k, &d, &notifications, &dictID, &level, &steps, &threads)) {
51 return NULL;
52 }
45 if (!PyArg_ParseTupleAndKeywords(
46 args, kwargs, "nO!|IIIdIIIiIi:train_dictionary", kwlist, &capacity,
47 &PyList_Type, &samples, &k, &d, &f, &splitPoint, &accel,
48 &notifications, &dictID, &level, &steps, &threads)) {
49 return NULL;
50 }
53 51
54 if (threads < 0) {
55 threads = cpu_count();
56 }
52 if (threads < 0) {
53 threads = cpu_count();
54 }
57 55
58 memset(&params, 0, sizeof(params));
59 params.k = k;
60 params.d = d;
61 params.steps = steps;
62 params.nbThreads = threads;
63 params.zParams.notificationLevel = notifications;
64 params.zParams.dictID = dictID;
65 params.zParams.compressionLevel = level;
56 if (!steps && !threads) {
57 /* Defaults from ZDICT_trainFromBuffer() */
58 d = d ? d : 8;
59 steps = steps ? steps : 4;
60 level = level ? level : 3;
61 }
66 62
67 /* Figure out total size of input samples. */
68 samplesLen = PyList_Size(samples);
69 for (i = 0; i < samplesLen; i++) {
70 PyObject* sampleItem = PyList_GET_ITEM(samples, i);
63 memset(&params, 0, sizeof(params));
64 params.k = k;
65 params.d = d;
66 params.f = f;
67 params.steps = steps;
68 params.nbThreads = threads;
69 params.splitPoint = splitPoint;
70 params.accel = accel;
71
72 params.zParams.compressionLevel = level;
73 params.zParams.dictID = dictID;
74 params.zParams.notificationLevel = notifications;
71 75
72 if (!PyBytes_Check(sampleItem)) {
73 PyErr_SetString(PyExc_ValueError, "samples must be bytes");
74 return NULL;
75 }
76 samplesSize += PyBytes_GET_SIZE(sampleItem);
77 }
76 /* Figure out total size of input samples. */
77 samplesLen = PyList_Size(samples);
78 for (i = 0; i < samplesLen; i++) {
79 PyObject *sampleItem = PyList_GET_ITEM(samples, i);
78 80
79 sampleBuffer = PyMem_Malloc(samplesSize);
80 if (!sampleBuffer) {
81 PyErr_NoMemory();
82 goto finally;
83 }
81 if (!PyBytes_Check(sampleItem)) {
82 PyErr_SetString(PyExc_ValueError, "samples must be bytes");
83 return NULL;
84 }
85 samplesSize += PyBytes_GET_SIZE(sampleItem);
86 }
84 87
85 sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t));
86 if (!sampleSizes) {
87 PyErr_NoMemory();
88 goto finally;
89 }
88 sampleBuffer = PyMem_Malloc(samplesSize);
89 if (!sampleBuffer) {
90 PyErr_NoMemory();
91 goto finally;
92 }
90 93
91 sampleOffset = sampleBuffer;
92 for (i = 0; i < samplesLen; i++) {
93 PyObject* sampleItem = PyList_GET_ITEM(samples, i);
94 sampleSize = PyBytes_GET_SIZE(sampleItem);
95 sampleSizes[i] = sampleSize;
96 memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize);
97 sampleOffset = (char*)sampleOffset + sampleSize;
98 }
99
100 dict = PyMem_Malloc(capacity);
101 if (!dict) {
102 PyErr_NoMemory();
103 goto finally;
104 }
94 sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t));
95 if (!sampleSizes) {
96 PyErr_NoMemory();
97 goto finally;
98 }
105 99
106 Py_BEGIN_ALLOW_THREADS
107 /* No parameters uses the default function, which will use default params
108 and call ZDICT_optimizeTrainFromBuffer_cover under the hood. */
109 if (!params.k && !params.d && !params.zParams.compressionLevel
110 && !params.zParams.notificationLevel && !params.zParams.dictID) {
111 zresult = ZDICT_trainFromBuffer(dict, capacity, sampleBuffer,
112 sampleSizes, (unsigned)samplesLen);
113 }
114 /* Use optimize mode if user controlled steps or threads explicitly. */
115 else if (params.steps || params.nbThreads) {
116 zresult = ZDICT_optimizeTrainFromBuffer_cover(dict, capacity,
117 sampleBuffer, sampleSizes, (unsigned)samplesLen, &params);
118 }
119 /* Non-optimize mode with explicit control. */
120 else {
121 zresult = ZDICT_trainFromBuffer_cover(dict, capacity,
122 sampleBuffer, sampleSizes, (unsigned)samplesLen, params);
123 }
124 Py_END_ALLOW_THREADS
100 sampleOffset = sampleBuffer;
101 for (i = 0; i < samplesLen; i++) {
102 PyObject *sampleItem = PyList_GET_ITEM(samples, i);
103 sampleSize = PyBytes_GET_SIZE(sampleItem);
104 sampleSizes[i] = sampleSize;
105 memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize);
106 sampleOffset = (char *)sampleOffset + sampleSize;
107 }
108
109 dict = PyMem_Malloc(capacity);
110 if (!dict) {
111 PyErr_NoMemory();
112 goto finally;
113 }
114
115 Py_BEGIN_ALLOW_THREADS zresult = ZDICT_optimizeTrainFromBuffer_fastCover(
116 dict, capacity, sampleBuffer, sampleSizes, (unsigned)samplesLen,
117 &params);
118 Py_END_ALLOW_THREADS
125 119
126 if (ZDICT_isError(zresult)) {
127 PyMem_Free(dict);
128 PyErr_Format(ZstdError, "cannot train dict: %s", ZDICT_getErrorName(zresult));
129 goto finally;
130 }
120 if (ZDICT_isError(zresult)) {
121 PyMem_Free(dict);
122 PyErr_Format(ZstdError, "cannot train dict: %s",
123 ZDICT_getErrorName(zresult));
124 goto finally;
125 }
131 126
132 result = PyObject_New(ZstdCompressionDict, &ZstdCompressionDictType);
133 if (!result) {
134 PyMem_Free(dict);
135 goto finally;
136 }
127 result = PyObject_New(ZstdCompressionDict, ZstdCompressionDictType);
128 if (!result) {
129 PyMem_Free(dict);
130 goto finally;
131 }
137 132
138 result->dictData = dict;
139 result->dictSize = zresult;
140 result->dictType = ZSTD_dct_fullDict;
141 result->d = params.d;
142 result->k = params.k;
143 result->cdict = NULL;
144 result->ddict = NULL;
133 result->dictData = dict;
134 result->dictSize = zresult;
135 result->dictType = ZSTD_dct_fullDict;
136 result->d = params.d;
137 result->k = params.k;
138 result->cdict = NULL;
139 result->ddict = NULL;
145 140
146 141 finally:
147 PyMem_Free(sampleBuffer);
148 PyMem_Free(sampleSizes);
142 PyMem_Free(sampleBuffer);
143 PyMem_Free(sampleSizes);
149 144
150 return result;
145 return result;
151 146 }
152 147
153 int ensure_ddict(ZstdCompressionDict* dict) {
154 if (dict->ddict) {
155 return 0;
156 }
148 int ensure_ddict(ZstdCompressionDict *dict) {
149 if (dict->ddict) {
150 return 0;
151 }
157 152
158 Py_BEGIN_ALLOW_THREADS
159 dict->ddict = ZSTD_createDDict_advanced(dict->dictData, dict->dictSize,
160 ZSTD_dlm_byRef, dict->dictType, ZSTD_defaultCMem);
161 Py_END_ALLOW_THREADS
162 if (!dict->ddict) {
163 PyErr_SetString(ZstdError, "could not create decompression dict");
164 return 1;
165 }
153 Py_BEGIN_ALLOW_THREADS dict->ddict = ZSTD_createDDict_advanced(
154 dict->dictData, dict->dictSize, ZSTD_dlm_byRef, dict->dictType,
155 ZSTD_defaultCMem);
156 Py_END_ALLOW_THREADS if (!dict->ddict) {
157 PyErr_SetString(ZstdError, "could not create decompression dict");
158 return 1;
159 }
166 160
167 return 0;
161 return 0;
168 162 }
169 163
170 PyDoc_STRVAR(ZstdCompressionDict__doc__,
171 "ZstdCompressionDict(data) - Represents a computed compression dictionary\n"
172 "\n"
173 "This type holds the results of a computed Zstandard compression dictionary.\n"
174 "Instances are obtained by calling ``train_dictionary()`` or by passing\n"
175 "bytes obtained from another source into the constructor.\n"
176 );
164 static int ZstdCompressionDict_init(ZstdCompressionDict *self, PyObject *args,
165 PyObject *kwargs) {
166 static char *kwlist[] = {"data", "dict_type", NULL};
167
168 int result = -1;
169 Py_buffer source;
170 unsigned dictType = ZSTD_dct_auto;
177 171
178 static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args, PyObject* kwargs) {
179 static char* kwlist[] = {
180 "data",
181 "dict_type",
182 NULL
183 };
172 self->dictData = NULL;
173 self->dictSize = 0;
174 self->cdict = NULL;
175 self->ddict = NULL;
184 176
185 int result = -1;
186 Py_buffer source;
187 unsigned dictType = ZSTD_dct_auto;
188
189 self->dictData = NULL;
190 self->dictSize = 0;
191 self->cdict = NULL;
192 self->ddict = NULL;
177 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|I:ZstdCompressionDict",
178 kwlist, &source, &dictType)) {
179 return -1;
180 }
193 181
194 #if PY_MAJOR_VERSION >= 3
195 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|I:ZstdCompressionDict",
196 #else
197 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|I:ZstdCompressionDict",
198 #endif
199 kwlist, &source, &dictType)) {
200 return -1;
201 }
182 if (dictType != ZSTD_dct_auto && dictType != ZSTD_dct_rawContent &&
183 dictType != ZSTD_dct_fullDict) {
184 PyErr_Format(
185 PyExc_ValueError,
186 "invalid dictionary load mode: %d; must use DICT_TYPE_* constants",
187 dictType);
188 goto finally;
189 }
202 190
203 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
204 PyErr_SetString(PyExc_ValueError,
205 "data buffer should be contiguous and have at most one dimension");
206 goto finally;
207 }
191 self->dictType = dictType;
208 192
209 if (dictType != ZSTD_dct_auto && dictType != ZSTD_dct_rawContent
210 && dictType != ZSTD_dct_fullDict) {
211 PyErr_Format(PyExc_ValueError,
212 "invalid dictionary load mode: %d; must use DICT_TYPE_* constants",
213 dictType);
214 goto finally;
215 }
216
217 self->dictType = dictType;
193 self->dictData = PyMem_Malloc(source.len);
194 if (!self->dictData) {
195 PyErr_NoMemory();
196 goto finally;
197 }
218 198
219 self->dictData = PyMem_Malloc(source.len);
220 if (!self->dictData) {
221 PyErr_NoMemory();
222 goto finally;
223 }
199 memcpy(self->dictData, source.buf, source.len);
200 self->dictSize = source.len;
224 201
225 memcpy(self->dictData, source.buf, source.len);
226 self->dictSize = source.len;
227
228 result = 0;
202 result = 0;
229 203
230 204 finally:
231 PyBuffer_Release(&source);
232 return result;
205 PyBuffer_Release(&source);
206 return result;
233 207 }
234 208
235 static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) {
236 if (self->cdict) {
237 ZSTD_freeCDict(self->cdict);
238 self->cdict = NULL;
239 }
209 static void ZstdCompressionDict_dealloc(ZstdCompressionDict *self) {
210 if (self->cdict) {
211 ZSTD_freeCDict(self->cdict);
212 self->cdict = NULL;
213 }
240 214
241 if (self->ddict) {
242 ZSTD_freeDDict(self->ddict);
243 self->ddict = NULL;
244 }
215 if (self->ddict) {
216 ZSTD_freeDDict(self->ddict);
217 self->ddict = NULL;
218 }
245 219
246 if (self->dictData) {
247 PyMem_Free(self->dictData);
248 self->dictData = NULL;
249 }
220 if (self->dictData) {
221 PyMem_Free(self->dictData);
222 self->dictData = NULL;
223 }
250 224
251 PyObject_Del(self);
225 PyObject_Del(self);
252 226 }
253 227
254 PyDoc_STRVAR(ZstdCompressionDict_precompute_compress__doc__,
255 "Precompute a dictionary so it can be used by multiple compressors.\n"
256 );
228 static PyObject *
229 ZstdCompressionDict_precompute_compress(ZstdCompressionDict *self,
230 PyObject *args, PyObject *kwargs) {
231 static char *kwlist[] = {"level", "compression_params", NULL};
257 232
258 static PyObject* ZstdCompressionDict_precompute_compress(ZstdCompressionDict* self, PyObject* args, PyObject* kwargs) {
259 static char* kwlist[] = {
260 "level",
261 "compression_params",
262 NULL
263 };
233 int level = 0;
234 ZstdCompressionParametersObject *compressionParams = NULL;
235 ZSTD_compressionParameters cParams;
236 size_t zresult;
264 237
265 int level = 0;
266 ZstdCompressionParametersObject* compressionParams = NULL;
267 ZSTD_compressionParameters cParams;
268 size_t zresult;
238 if (!PyArg_ParseTupleAndKeywords(
239 args, kwargs, "|iO!:precompute_compress", kwlist, &level,
240 ZstdCompressionParametersType, &compressionParams)) {
241 return NULL;
242 }
269 243
270 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!:precompute_compress", kwlist,
271 &level, &ZstdCompressionParametersType, &compressionParams)) {
272 return NULL;
273 }
244 if (level && compressionParams) {
245 PyErr_SetString(PyExc_ValueError,
246 "must only specify one of level or compression_params");
247 return NULL;
248 }
274 249
275 if (level && compressionParams) {
276 PyErr_SetString(PyExc_ValueError,
277 "must only specify one of level or compression_params");
278 return NULL;
279 }
250 if (!level && !compressionParams) {
251 PyErr_SetString(PyExc_ValueError,
252 "must specify one of level or compression_params");
253 return NULL;
254 }
280 255
281 if (!level && !compressionParams) {
282 PyErr_SetString(PyExc_ValueError,
283 "must specify one of level or compression_params");
284 return NULL;
285 }
286
287 if (self->cdict) {
288 zresult = ZSTD_freeCDict(self->cdict);
289 self->cdict = NULL;
290 if (ZSTD_isError(zresult)) {
291 PyErr_Format(ZstdError, "unable to free CDict: %s",
292 ZSTD_getErrorName(zresult));
293 return NULL;
294 }
295 }
256 if (self->cdict) {
257 zresult = ZSTD_freeCDict(self->cdict);
258 self->cdict = NULL;
259 if (ZSTD_isError(zresult)) {
260 PyErr_Format(ZstdError, "unable to free CDict: %s",
261 ZSTD_getErrorName(zresult));
262 return NULL;
263 }
264 }
296 265
297 if (level) {
298 cParams = ZSTD_getCParams(level, 0, self->dictSize);
299 }
300 else {
301 if (to_cparams(compressionParams, &cParams)) {
302 return NULL;
303 }
304 }
266 if (level) {
267 cParams = ZSTD_getCParams(level, 0, self->dictSize);
268 }
269 else {
270 if (to_cparams(compressionParams, &cParams)) {
271 return NULL;
272 }
273 }
305 274
306 assert(!self->cdict);
307 self->cdict = ZSTD_createCDict_advanced(self->dictData, self->dictSize,
308 ZSTD_dlm_byRef, self->dictType, cParams, ZSTD_defaultCMem);
275 assert(!self->cdict);
276 self->cdict = ZSTD_createCDict_advanced(self->dictData, self->dictSize,
277 ZSTD_dlm_byRef, self->dictType,
278 cParams, ZSTD_defaultCMem);
309 279
310 if (!self->cdict) {
311 PyErr_SetString(ZstdError, "unable to precompute dictionary");
312 return NULL;
313 }
280 if (!self->cdict) {
281 PyErr_SetString(ZstdError, "unable to precompute dictionary");
282 return NULL;
283 }
314 284
315 Py_RETURN_NONE;
285 Py_RETURN_NONE;
316 286 }
317 287
318 static PyObject* ZstdCompressionDict_dict_id(ZstdCompressionDict* self) {
319 unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize);
288 static PyObject *ZstdCompressionDict_dict_id(ZstdCompressionDict *self) {
289 unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize);
320 290
321 return PyLong_FromLong(dictID);
291 return PyLong_FromLong(dictID);
322 292 }
323 293
324 static PyObject* ZstdCompressionDict_as_bytes(ZstdCompressionDict* self) {
325 return PyBytes_FromStringAndSize(self->dictData, self->dictSize);
294 static PyObject *ZstdCompressionDict_as_bytes(ZstdCompressionDict *self) {
295 return PyBytes_FromStringAndSize(self->dictData, self->dictSize);
326 296 }
327 297
328 298 static PyMethodDef ZstdCompressionDict_methods[] = {
329 { "dict_id", (PyCFunction)ZstdCompressionDict_dict_id, METH_NOARGS,
330 PyDoc_STR("dict_id() -- obtain the numeric dictionary ID") },
331 { "as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS,
332 PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary data") },
333 { "precompute_compress", (PyCFunction)ZstdCompressionDict_precompute_compress,
334 METH_VARARGS | METH_KEYWORDS, ZstdCompressionDict_precompute_compress__doc__ },
335 { NULL, NULL }
336 };
299 {"dict_id", (PyCFunction)ZstdCompressionDict_dict_id, METH_NOARGS,
300 PyDoc_STR("dict_id() -- obtain the numeric dictionary ID")},
301 {"as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS,
302 PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary "
303 "data")},
304 {"precompute_compress",
305 (PyCFunction)ZstdCompressionDict_precompute_compress,
306 METH_VARARGS | METH_KEYWORDS, NULL},
307 {NULL, NULL}};
337 308
338 309 static PyMemberDef ZstdCompressionDict_members[] = {
339 { "k", T_UINT, offsetof(ZstdCompressionDict, k), READONLY,
340 "segment size" },
341 { "d", T_UINT, offsetof(ZstdCompressionDict, d), READONLY,
342 "dmer size" },
343 { NULL }
344 };
310 {"k", T_UINT, offsetof(ZstdCompressionDict, k), READONLY, "segment size"},
311 {"d", T_UINT, offsetof(ZstdCompressionDict, d), READONLY, "dmer size"},
312 {NULL}};
345 313
346 static Py_ssize_t ZstdCompressionDict_length(ZstdCompressionDict* self) {
347 return self->dictSize;
314 static Py_ssize_t ZstdCompressionDict_length(ZstdCompressionDict *self) {
315 return self->dictSize;
348 316 }
349 317
350 static PySequenceMethods ZstdCompressionDict_sq = {
351 (lenfunc)ZstdCompressionDict_length, /* sq_length */
352 0, /* sq_concat */
353 0, /* sq_repeat */
354 0, /* sq_item */
355 0, /* sq_ass_item */
356 0, /* sq_contains */
357 0, /* sq_inplace_concat */
358 0 /* sq_inplace_repeat */
318 PyType_Slot ZstdCompressionDictSlots[] = {
319 {Py_tp_dealloc, ZstdCompressionDict_dealloc},
320 {Py_sq_length, ZstdCompressionDict_length},
321 {Py_tp_methods, ZstdCompressionDict_methods},
322 {Py_tp_members, ZstdCompressionDict_members},
323 {Py_tp_init, ZstdCompressionDict_init},
324 {Py_tp_new, PyType_GenericNew},
325 {0, NULL},
359 326 };
360 327
361 PyTypeObject ZstdCompressionDictType = {
362 PyVarObject_HEAD_INIT(NULL, 0)
363 "zstd.ZstdCompressionDict", /* tp_name */
364 sizeof(ZstdCompressionDict), /* tp_basicsize */
365 0, /* tp_itemsize */
366 (destructor)ZstdCompressionDict_dealloc, /* tp_dealloc */
367 0, /* tp_print */
368 0, /* tp_getattr */
369 0, /* tp_setattr */
370 0, /* tp_compare */
371 0, /* tp_repr */
372 0, /* tp_as_number */
373 &ZstdCompressionDict_sq, /* tp_as_sequence */
374 0, /* tp_as_mapping */
375 0, /* tp_hash */
376 0, /* tp_call */
377 0, /* tp_str */
378 0, /* tp_getattro */
379 0, /* tp_setattro */
380 0, /* tp_as_buffer */
381 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
382 ZstdCompressionDict__doc__, /* tp_doc */
383 0, /* tp_traverse */
384 0, /* tp_clear */
385 0, /* tp_richcompare */
386 0, /* tp_weaklistoffset */
387 0, /* tp_iter */
388 0, /* tp_iternext */
389 ZstdCompressionDict_methods, /* tp_methods */
390 ZstdCompressionDict_members, /* tp_members */
391 0, /* tp_getset */
392 0, /* tp_base */
393 0, /* tp_dict */
394 0, /* tp_descr_get */
395 0, /* tp_descr_set */
396 0, /* tp_dictoffset */
397 (initproc)ZstdCompressionDict_init, /* tp_init */
398 0, /* tp_alloc */
399 PyType_GenericNew, /* tp_new */
328 PyType_Spec ZstdCompressionDictSpec = {
329 "zstd.ZstdCompressionDict",
330 sizeof(ZstdCompressionDict),
331 0,
332 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
333 ZstdCompressionDictSlots,
400 334 };
401 335
402 void compressiondict_module_init(PyObject* mod) {
403 Py_SET_TYPE(&ZstdCompressionDictType, &PyType_Type);
404 if (PyType_Ready(&ZstdCompressionDictType) < 0) {
405 return;
406 }
336 PyTypeObject *ZstdCompressionDictType;
407 337
408 Py_INCREF((PyObject*)&ZstdCompressionDictType);
409 PyModule_AddObject(mod, "ZstdCompressionDict",
410 (PyObject*)&ZstdCompressionDictType);
338 void compressiondict_module_init(PyObject *mod) {
339 ZstdCompressionDictType =
340 (PyTypeObject *)PyType_FromSpec(&ZstdCompressionDictSpec);
341 if (PyType_Ready(ZstdCompressionDictType) < 0) {
342 return;
343 }
344
345 Py_INCREF((PyObject *)ZstdCompressionDictType);
346 PyModule_AddObject(mod, "ZstdCompressionDict",
347 (PyObject *)ZstdCompressionDictType);
411 348 }
This diff has been collapsed as it changes many lines, (873 lines changed) Show them Hide them
@@ -1,449 +1,416
1 1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 extern PyObject* ZstdError;
12
13 int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value) {
14 size_t zresult = ZSTD_CCtxParams_setParameter(params, param, value);
15 if (ZSTD_isError(zresult)) {
16 PyErr_Format(ZstdError, "unable to set compression context parameter: %s",
17 ZSTD_getErrorName(zresult));
18 return 1;
19 }
20
21 return 0;
22 }
11 extern PyObject *ZstdError;
23 12
24 #define TRY_SET_PARAMETER(params, param, value) if (set_parameter(params, param, value)) return -1;
13 int set_parameter(ZSTD_CCtx_params *params, ZSTD_cParameter param, int value) {
14 size_t zresult = ZSTD_CCtxParams_setParameter(params, param, value);
15 if (ZSTD_isError(zresult)) {
16 PyErr_Format(ZstdError,
17 "unable to set compression context parameter: %s",
18 ZSTD_getErrorName(zresult));
19 return 1;
20 }
25 21
26 #define TRY_COPY_PARAMETER(source, dest, param) { \
27 int result; \
28 size_t zresult = ZSTD_CCtxParams_getParameter(source, param, &result); \
29 if (ZSTD_isError(zresult)) { \
30 return 1; \
31 } \
32 zresult = ZSTD_CCtxParams_setParameter(dest, param, result); \
33 if (ZSTD_isError(zresult)) { \
34 return 1; \
35 } \
22 return 0;
36 23 }
37 24
38 int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj) {
39 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_nbWorkers);
25 #define TRY_SET_PARAMETER(params, param, value) \
26 if (set_parameter(params, param, value)) \
27 return -1;
28
29 #define TRY_COPY_PARAMETER(source, dest, param) \
30 { \
31 int result; \
32 size_t zresult = ZSTD_CCtxParams_getParameter(source, param, &result); \
33 if (ZSTD_isError(zresult)) { \
34 return 1; \
35 } \
36 zresult = ZSTD_CCtxParams_setParameter(dest, param, result); \
37 if (ZSTD_isError(zresult)) { \
38 return 1; \
39 } \
40 }
41
42 int set_parameters(ZSTD_CCtx_params *params,
43 ZstdCompressionParametersObject *obj) {
44 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_nbWorkers);
40 45
41 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_format);
42 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_compressionLevel);
43 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_windowLog);
44 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_hashLog);
45 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_chainLog);
46 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_searchLog);
47 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_minMatch);
48 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_targetLength);
49 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_strategy);
50 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_contentSizeFlag);
51 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_checksumFlag);
52 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_dictIDFlag);
53 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_jobSize);
54 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_overlapLog);
55 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_forceMaxWindow);
56 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_enableLongDistanceMatching);
57 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashLog);
58 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmMinMatch);
59 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmBucketSizeLog);
60 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashRateLog);
46 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_format);
47 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_compressionLevel);
48 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_windowLog);
49 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_hashLog);
50 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_chainLog);
51 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_searchLog);
52 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_minMatch);
53 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_targetLength);
54 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_strategy);
55 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_contentSizeFlag);
56 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_checksumFlag);
57 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_dictIDFlag);
58 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_jobSize);
59 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_overlapLog);
60 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_forceMaxWindow);
61 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_enableLongDistanceMatching);
62 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashLog);
63 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmMinMatch);
64 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmBucketSizeLog);
65 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashRateLog);
61 66
62 return 0;
67 return 0;
63 68 }
64 69
65 int reset_params(ZstdCompressionParametersObject* params) {
66 if (params->params) {
67 ZSTD_CCtxParams_reset(params->params);
68 }
69 else {
70 params->params = ZSTD_createCCtxParams();
71 if (!params->params) {
72 PyErr_NoMemory();
73 return 1;
74 }
75 }
70 int reset_params(ZstdCompressionParametersObject *params) {
71 if (params->params) {
72 ZSTD_CCtxParams_reset(params->params);
73 }
74 else {
75 params->params = ZSTD_createCCtxParams();
76 if (!params->params) {
77 PyErr_NoMemory();
78 return 1;
79 }
80 }
76 81
77 return set_parameters(params->params, params);
78 }
79
80 #define TRY_GET_PARAMETER(params, param, value) { \
81 size_t zresult = ZSTD_CCtxParams_getParameter(params, param, value); \
82 if (ZSTD_isError(zresult)) { \
83 PyErr_Format(ZstdError, "unable to retrieve parameter: %s", ZSTD_getErrorName(zresult)); \
84 return 1; \
85 } \
82 return set_parameters(params->params, params);
86 83 }
87 84
88 int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams) {
89 int value;
90
91 TRY_GET_PARAMETER(params->params, ZSTD_c_windowLog, &value);
92 cparams->windowLog = value;
85 #define TRY_GET_PARAMETER(params, param, value) \
86 { \
87 size_t zresult = ZSTD_CCtxParams_getParameter(params, param, value); \
88 if (ZSTD_isError(zresult)) { \
89 PyErr_Format(ZstdError, "unable to retrieve parameter: %s", \
90 ZSTD_getErrorName(zresult)); \
91 return 1; \
92 } \
93 }
93 94
94 TRY_GET_PARAMETER(params->params, ZSTD_c_chainLog, &value);
95 cparams->chainLog = value;
95 int to_cparams(ZstdCompressionParametersObject *params,
96 ZSTD_compressionParameters *cparams) {
97 int value;
96 98
97 TRY_GET_PARAMETER(params->params, ZSTD_c_hashLog, &value);
98 cparams->hashLog = value;
99 TRY_GET_PARAMETER(params->params, ZSTD_c_windowLog, &value);
100 cparams->windowLog = value;
99 101
100 TRY_GET_PARAMETER(params->params, ZSTD_c_searchLog, &value);
101 cparams->searchLog = value;
102 TRY_GET_PARAMETER(params->params, ZSTD_c_chainLog, &value);
103 cparams->chainLog = value;
102 104
103 TRY_GET_PARAMETER(params->params, ZSTD_c_minMatch, &value);
104 cparams->minMatch = value;
105 TRY_GET_PARAMETER(params->params, ZSTD_c_hashLog, &value);
106 cparams->hashLog = value;
107
108 TRY_GET_PARAMETER(params->params, ZSTD_c_searchLog, &value);
109 cparams->searchLog = value;
105 110
106 TRY_GET_PARAMETER(params->params, ZSTD_c_targetLength, &value);
107 cparams->targetLength = value;
111 TRY_GET_PARAMETER(params->params, ZSTD_c_minMatch, &value);
112 cparams->minMatch = value;
108 113
109 TRY_GET_PARAMETER(params->params, ZSTD_c_strategy, &value);
110 cparams->strategy = value;
114 TRY_GET_PARAMETER(params->params, ZSTD_c_targetLength, &value);
115 cparams->targetLength = value;
111 116
112 return 0;
117 TRY_GET_PARAMETER(params->params, ZSTD_c_strategy, &value);
118 cparams->strategy = value;
119
120 return 0;
113 121 }
114 122
115 static int ZstdCompressionParameters_init(ZstdCompressionParametersObject* self, PyObject* args, PyObject* kwargs) {
116 static char* kwlist[] = {
117 "format",
118 "compression_level",
119 "window_log",
120 "hash_log",
121 "chain_log",
122 "search_log",
123 "min_match",
124 "target_length",
125 "compression_strategy",
126 "strategy",
127 "write_content_size",
128 "write_checksum",
129 "write_dict_id",
130 "job_size",
131 "overlap_log",
132 "overlap_size_log",
133 "force_max_window",
134 "enable_ldm",
135 "ldm_hash_log",
136 "ldm_min_match",
137 "ldm_bucket_size_log",
138 "ldm_hash_rate_log",
139 "ldm_hash_every_log",
140 "threads",
141 NULL
142 };
123 static int ZstdCompressionParameters_init(ZstdCompressionParametersObject *self,
124 PyObject *args, PyObject *kwargs) {
125 static char *kwlist[] = {"format",
126 "compression_level",
127 "window_log",
128 "hash_log",
129 "chain_log",
130 "search_log",
131 "min_match",
132 "target_length",
133 "strategy",
134 "write_content_size",
135 "write_checksum",
136 "write_dict_id",
137 "job_size",
138 "overlap_log",
139 "force_max_window",
140 "enable_ldm",
141 "ldm_hash_log",
142 "ldm_min_match",
143 "ldm_bucket_size_log",
144 "ldm_hash_rate_log",
145 "threads",
146 NULL};
143 147
144 int format = 0;
145 int compressionLevel = 0;
146 int windowLog = 0;
147 int hashLog = 0;
148 int chainLog = 0;
149 int searchLog = 0;
150 int minMatch = 0;
151 int targetLength = 0;
152 int compressionStrategy = -1;
153 int strategy = -1;
154 int contentSizeFlag = 1;
155 int checksumFlag = 0;
156 int dictIDFlag = 0;
157 int jobSize = 0;
158 int overlapLog = -1;
159 int overlapSizeLog = -1;
160 int forceMaxWindow = 0;
161 int enableLDM = 0;
162 int ldmHashLog = 0;
163 int ldmMinMatch = 0;
164 int ldmBucketSizeLog = 0;
165 int ldmHashRateLog = -1;
166 int ldmHashEveryLog = -1;
167 int threads = 0;
148 int format = 0;
149 int compressionLevel = 0;
150 int windowLog = 0;
151 int hashLog = 0;
152 int chainLog = 0;
153 int searchLog = 0;
154 int minMatch = 0;
155 int targetLength = 0;
156 int strategy = -1;
157 int contentSizeFlag = 1;
158 int checksumFlag = 0;
159 int dictIDFlag = 0;
160 int jobSize = 0;
161 int overlapLog = -1;
162 int forceMaxWindow = 0;
163 int enableLDM = 0;
164 int ldmHashLog = 0;
165 int ldmMinMatch = 0;
166 int ldmBucketSizeLog = 0;
167 int ldmHashRateLog = -1;
168 int threads = 0;
168 169
169 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
170 "|iiiiiiiiiiiiiiiiiiiiiiii:CompressionParameters",
171 kwlist, &format, &compressionLevel, &windowLog, &hashLog, &chainLog,
172 &searchLog, &minMatch, &targetLength, &compressionStrategy, &strategy,
173 &contentSizeFlag, &checksumFlag, &dictIDFlag, &jobSize, &overlapLog,
174 &overlapSizeLog, &forceMaxWindow, &enableLDM, &ldmHashLog, &ldmMinMatch,
175 &ldmBucketSizeLog, &ldmHashRateLog, &ldmHashEveryLog, &threads)) {
176 return -1;
177 }
170 if (!PyArg_ParseTupleAndKeywords(
171 args, kwargs, "|iiiiiiiiiiiiiiiiiiiii:ZstdCompressionParameters",
172 kwlist, &format, &compressionLevel, &windowLog, &hashLog, &chainLog,
173 &searchLog, &minMatch, &targetLength, &strategy, &contentSizeFlag,
174 &checksumFlag, &dictIDFlag, &jobSize, &overlapLog, &forceMaxWindow,
175 &enableLDM, &ldmHashLog, &ldmMinMatch, &ldmBucketSizeLog,
176 &ldmHashRateLog, &threads)) {
177 return -1;
178 }
178 179
179 if (reset_params(self)) {
180 return -1;
181 }
180 if (reset_params(self)) {
181 return -1;
182 }
182 183
183 if (threads < 0) {
184 threads = cpu_count();
185 }
184 if (threads < 0) {
185 threads = cpu_count();
186 }
186 187
187 /* We need to set ZSTD_c_nbWorkers before ZSTD_c_jobSize and ZSTD_c_overlapLog
188 * because setting ZSTD_c_nbWorkers resets the other parameters. */
189 TRY_SET_PARAMETER(self->params, ZSTD_c_nbWorkers, threads);
188 /* We need to set ZSTD_c_nbWorkers before ZSTD_c_jobSize and
189 * ZSTD_c_overlapLog because setting ZSTD_c_nbWorkers resets the other
190 * parameters. */
191 TRY_SET_PARAMETER(self->params, ZSTD_c_nbWorkers, threads);
190 192
191 TRY_SET_PARAMETER(self->params, ZSTD_c_format, format);
192 TRY_SET_PARAMETER(self->params, ZSTD_c_compressionLevel, compressionLevel);
193 TRY_SET_PARAMETER(self->params, ZSTD_c_windowLog, windowLog);
194 TRY_SET_PARAMETER(self->params, ZSTD_c_hashLog, hashLog);
195 TRY_SET_PARAMETER(self->params, ZSTD_c_chainLog, chainLog);
196 TRY_SET_PARAMETER(self->params, ZSTD_c_searchLog, searchLog);
197 TRY_SET_PARAMETER(self->params, ZSTD_c_minMatch, minMatch);
198 TRY_SET_PARAMETER(self->params, ZSTD_c_targetLength, targetLength);
193 TRY_SET_PARAMETER(self->params, ZSTD_c_format, format);
194 TRY_SET_PARAMETER(self->params, ZSTD_c_compressionLevel, compressionLevel);
195 TRY_SET_PARAMETER(self->params, ZSTD_c_windowLog, windowLog);
196 TRY_SET_PARAMETER(self->params, ZSTD_c_hashLog, hashLog);
197 TRY_SET_PARAMETER(self->params, ZSTD_c_chainLog, chainLog);
198 TRY_SET_PARAMETER(self->params, ZSTD_c_searchLog, searchLog);
199 TRY_SET_PARAMETER(self->params, ZSTD_c_minMatch, minMatch);
200 TRY_SET_PARAMETER(self->params, ZSTD_c_targetLength, targetLength);
199 201
200 if (compressionStrategy != -1 && strategy != -1) {
201 PyErr_SetString(PyExc_ValueError, "cannot specify both compression_strategy and strategy");
202 return -1;
202 if (strategy == -1) {
203 strategy = 0;
203 204 }
204 205
205 if (compressionStrategy != -1) {
206 strategy = compressionStrategy;
207 }
208 else if (strategy == -1) {
209 strategy = 0;
210 }
206 TRY_SET_PARAMETER(self->params, ZSTD_c_strategy, strategy);
207 TRY_SET_PARAMETER(self->params, ZSTD_c_contentSizeFlag, contentSizeFlag);
208 TRY_SET_PARAMETER(self->params, ZSTD_c_checksumFlag, checksumFlag);
209 TRY_SET_PARAMETER(self->params, ZSTD_c_dictIDFlag, dictIDFlag);
210 TRY_SET_PARAMETER(self->params, ZSTD_c_jobSize, jobSize);
211 211
212 TRY_SET_PARAMETER(self->params, ZSTD_c_strategy, strategy);
213 TRY_SET_PARAMETER(self->params, ZSTD_c_contentSizeFlag, contentSizeFlag);
214 TRY_SET_PARAMETER(self->params, ZSTD_c_checksumFlag, checksumFlag);
215 TRY_SET_PARAMETER(self->params, ZSTD_c_dictIDFlag, dictIDFlag);
216 TRY_SET_PARAMETER(self->params, ZSTD_c_jobSize, jobSize);
217
218 if (overlapLog != -1 && overlapSizeLog != -1) {
219 PyErr_SetString(PyExc_ValueError, "cannot specify both overlap_log and overlap_size_log");
220 return -1;
221 }
212 if (overlapLog == -1) {
213 overlapLog = 0;
214 }
222 215
223 if (overlapSizeLog != -1) {
224 overlapLog = overlapSizeLog;
225 }
226 else if (overlapLog == -1) {
227 overlapLog = 0;
228 }
229
230 TRY_SET_PARAMETER(self->params, ZSTD_c_overlapLog, overlapLog);
231 TRY_SET_PARAMETER(self->params, ZSTD_c_forceMaxWindow, forceMaxWindow);
232 TRY_SET_PARAMETER(self->params, ZSTD_c_enableLongDistanceMatching, enableLDM);
233 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashLog, ldmHashLog);
234 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmMinMatch, ldmMinMatch);
235 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmBucketSizeLog, ldmBucketSizeLog);
216 TRY_SET_PARAMETER(self->params, ZSTD_c_overlapLog, overlapLog);
217 TRY_SET_PARAMETER(self->params, ZSTD_c_forceMaxWindow, forceMaxWindow);
218 TRY_SET_PARAMETER(self->params, ZSTD_c_enableLongDistanceMatching,
219 enableLDM);
220 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashLog, ldmHashLog);
221 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmMinMatch, ldmMinMatch);
222 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmBucketSizeLog, ldmBucketSizeLog);
236 223
237 if (ldmHashRateLog != -1 && ldmHashEveryLog != -1) {
238 PyErr_SetString(PyExc_ValueError, "cannot specify both ldm_hash_rate_log and ldm_hash_everyLog");
239 return -1;
240 }
224 if (ldmHashRateLog == -1) {
225 ldmHashRateLog = 0;
226 }
241 227
242 if (ldmHashEveryLog != -1) {
243 ldmHashRateLog = ldmHashEveryLog;
244 }
245 else if (ldmHashRateLog == -1) {
246 ldmHashRateLog = 0;
247 }
228 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashRateLog, ldmHashRateLog);
248 229
249 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashRateLog, ldmHashRateLog);
250
251 return 0;
230 return 0;
252 231 }
253 232
254 PyDoc_STRVAR(ZstdCompressionParameters_from_level__doc__,
255 "Create a CompressionParameters from a compression level and target sizes\n"
256 );
233 ZstdCompressionParametersObject *
234 CompressionParameters_from_level(PyObject *undef, PyObject *args,
235 PyObject *kwargs) {
236 int managedKwargs = 0;
237 int level;
238 PyObject *sourceSize = NULL;
239 PyObject *dictSize = NULL;
240 unsigned PY_LONG_LONG iSourceSize = 0;
241 Py_ssize_t iDictSize = 0;
242 PyObject *val;
243 ZSTD_compressionParameters params;
244 ZstdCompressionParametersObject *result = NULL;
245 int res;
257 246
258 ZstdCompressionParametersObject* CompressionParameters_from_level(PyObject* undef, PyObject* args, PyObject* kwargs) {
259 int managedKwargs = 0;
260 int level;
261 PyObject* sourceSize = NULL;
262 PyObject* dictSize = NULL;
263 unsigned PY_LONG_LONG iSourceSize = 0;
264 Py_ssize_t iDictSize = 0;
265 PyObject* val;
266 ZSTD_compressionParameters params;
267 ZstdCompressionParametersObject* result = NULL;
268 int res;
247 if (!PyArg_ParseTuple(args, "i:from_level", &level)) {
248 return NULL;
249 }
269 250
270 if (!PyArg_ParseTuple(args, "i:from_level",
271 &level)) {
272 return NULL;
273 }
251 if (!kwargs) {
252 kwargs = PyDict_New();
253 if (!kwargs) {
254 return NULL;
255 }
256 managedKwargs = 1;
257 }
274 258
275 if (!kwargs) {
276 kwargs = PyDict_New();
277 if (!kwargs) {
278 return NULL;
279 }
280 managedKwargs = 1;
281 }
259 sourceSize = PyDict_GetItemString(kwargs, "source_size");
260 if (sourceSize) {
261 iSourceSize = PyLong_AsUnsignedLongLong(sourceSize);
262 if (iSourceSize == (unsigned PY_LONG_LONG)(-1)) {
263 goto cleanup;
264 }
282 265
283 sourceSize = PyDict_GetItemString(kwargs, "source_size");
284 if (sourceSize) {
285 #if PY_MAJOR_VERSION >= 3
286 iSourceSize = PyLong_AsUnsignedLongLong(sourceSize);
287 if (iSourceSize == (unsigned PY_LONG_LONG)(-1)) {
288 goto cleanup;
289 }
290 #else
291 iSourceSize = PyInt_AsUnsignedLongLongMask(sourceSize);
292 #endif
266 PyDict_DelItemString(kwargs, "source_size");
267 }
293 268
294 PyDict_DelItemString(kwargs, "source_size");
295 }
269 dictSize = PyDict_GetItemString(kwargs, "dict_size");
270 if (dictSize) {
271 iDictSize = PyLong_AsSsize_t(dictSize);
272 if (iDictSize == -1) {
273 goto cleanup;
274 }
275
276 PyDict_DelItemString(kwargs, "dict_size");
277 }
278
279 params = ZSTD_getCParams(level, iSourceSize, iDictSize);
296 280
297 dictSize = PyDict_GetItemString(kwargs, "dict_size");
298 if (dictSize) {
299 #if PY_MAJOR_VERSION >= 3
300 iDictSize = PyLong_AsSsize_t(dictSize);
301 #else
302 iDictSize = PyInt_AsSsize_t(dictSize);
303 #endif
304 if (iDictSize == -1) {
305 goto cleanup;
306 }
307
308 PyDict_DelItemString(kwargs, "dict_size");
309 }
281 /* Values derived from the input level and sizes are passed along to the
282 constructor. But only if a value doesn't already exist. */
283 val = PyDict_GetItemString(kwargs, "window_log");
284 if (!val) {
285 val = PyLong_FromUnsignedLong(params.windowLog);
286 if (!val) {
287 goto cleanup;
288 }
289 PyDict_SetItemString(kwargs, "window_log", val);
290 Py_DECREF(val);
291 }
310 292
311
312 params = ZSTD_getCParams(level, iSourceSize, iDictSize);
313
314 /* Values derived from the input level and sizes are passed along to the
315 constructor. But only if a value doesn't already exist. */
316 val = PyDict_GetItemString(kwargs, "window_log");
317 if (!val) {
318 val = PyLong_FromUnsignedLong(params.windowLog);
319 if (!val) {
320 goto cleanup;
321 }
322 PyDict_SetItemString(kwargs, "window_log", val);
323 Py_DECREF(val);
324 }
293 val = PyDict_GetItemString(kwargs, "chain_log");
294 if (!val) {
295 val = PyLong_FromUnsignedLong(params.chainLog);
296 if (!val) {
297 goto cleanup;
298 }
299 PyDict_SetItemString(kwargs, "chain_log", val);
300 Py_DECREF(val);
301 }
325 302
326 val = PyDict_GetItemString(kwargs, "chain_log");
327 if (!val) {
328 val = PyLong_FromUnsignedLong(params.chainLog);
329 if (!val) {
330 goto cleanup;
331 }
332 PyDict_SetItemString(kwargs, "chain_log", val);
333 Py_DECREF(val);
334 }
335
336 val = PyDict_GetItemString(kwargs, "hash_log");
337 if (!val) {
338 val = PyLong_FromUnsignedLong(params.hashLog);
339 if (!val) {
340 goto cleanup;
341 }
342 PyDict_SetItemString(kwargs, "hash_log", val);
343 Py_DECREF(val);
344 }
303 val = PyDict_GetItemString(kwargs, "hash_log");
304 if (!val) {
305 val = PyLong_FromUnsignedLong(params.hashLog);
306 if (!val) {
307 goto cleanup;
308 }
309 PyDict_SetItemString(kwargs, "hash_log", val);
310 Py_DECREF(val);
311 }
345 312
346 val = PyDict_GetItemString(kwargs, "search_log");
347 if (!val) {
348 val = PyLong_FromUnsignedLong(params.searchLog);
349 if (!val) {
350 goto cleanup;
351 }
352 PyDict_SetItemString(kwargs, "search_log", val);
353 Py_DECREF(val);
354 }
313 val = PyDict_GetItemString(kwargs, "search_log");
314 if (!val) {
315 val = PyLong_FromUnsignedLong(params.searchLog);
316 if (!val) {
317 goto cleanup;
318 }
319 PyDict_SetItemString(kwargs, "search_log", val);
320 Py_DECREF(val);
321 }
355 322
356 val = PyDict_GetItemString(kwargs, "min_match");
357 if (!val) {
358 val = PyLong_FromUnsignedLong(params.minMatch);
359 if (!val) {
360 goto cleanup;
361 }
362 PyDict_SetItemString(kwargs, "min_match", val);
363 Py_DECREF(val);
364 }
323 val = PyDict_GetItemString(kwargs, "min_match");
324 if (!val) {
325 val = PyLong_FromUnsignedLong(params.minMatch);
326 if (!val) {
327 goto cleanup;
328 }
329 PyDict_SetItemString(kwargs, "min_match", val);
330 Py_DECREF(val);
331 }
365 332
366 val = PyDict_GetItemString(kwargs, "target_length");
367 if (!val) {
368 val = PyLong_FromUnsignedLong(params.targetLength);
369 if (!val) {
370 goto cleanup;
371 }
372 PyDict_SetItemString(kwargs, "target_length", val);
373 Py_DECREF(val);
374 }
333 val = PyDict_GetItemString(kwargs, "target_length");
334 if (!val) {
335 val = PyLong_FromUnsignedLong(params.targetLength);
336 if (!val) {
337 goto cleanup;
338 }
339 PyDict_SetItemString(kwargs, "target_length", val);
340 Py_DECREF(val);
341 }
375 342
376 val = PyDict_GetItemString(kwargs, "compression_strategy");
377 if (!val) {
378 val = PyLong_FromUnsignedLong(params.strategy);
379 if (!val) {
380 goto cleanup;
381 }
382 PyDict_SetItemString(kwargs, "compression_strategy", val);
383 Py_DECREF(val);
384 }
343 val = PyDict_GetItemString(kwargs, "strategy");
344 if (!val) {
345 val = PyLong_FromUnsignedLong(params.strategy);
346 if (!val) {
347 goto cleanup;
348 }
349 PyDict_SetItemString(kwargs, "strategy", val);
350 Py_DECREF(val);
351 }
385 352
386 result = PyObject_New(ZstdCompressionParametersObject, &ZstdCompressionParametersType);
387 if (!result) {
388 goto cleanup;
389 }
353 result = PyObject_New(ZstdCompressionParametersObject,
354 ZstdCompressionParametersType);
355 if (!result) {
356 goto cleanup;
357 }
390 358
391 result->params = NULL;
359 result->params = NULL;
392 360
393 val = PyTuple_New(0);
394 if (!val) {
395 Py_CLEAR(result);
396 goto cleanup;
397 }
361 val = PyTuple_New(0);
362 if (!val) {
363 Py_CLEAR(result);
364 goto cleanup;
365 }
398 366
399 res = ZstdCompressionParameters_init(result, val, kwargs);
400 Py_DECREF(val);
367 res = ZstdCompressionParameters_init(result, val, kwargs);
368 Py_DECREF(val);
401 369
402 if (res) {
403 Py_CLEAR(result);
404 goto cleanup;
405 }
370 if (res) {
371 Py_CLEAR(result);
372 goto cleanup;
373 }
406 374
407 375 cleanup:
408 if (managedKwargs) {
409 Py_DECREF(kwargs);
410 }
376 if (managedKwargs) {
377 Py_DECREF(kwargs);
378 }
411 379
412 return result;
380 return result;
413 381 }
414 382
415 PyDoc_STRVAR(ZstdCompressionParameters_estimated_compression_context_size__doc__,
416 "Estimate the size in bytes of a compression context for compression parameters\n"
417 );
418
419 PyObject* ZstdCompressionParameters_estimated_compression_context_size(ZstdCompressionParametersObject* self) {
420 return PyLong_FromSize_t(ZSTD_estimateCCtxSize_usingCCtxParams(self->params));
383 PyObject *ZstdCompressionParameters_estimated_compression_context_size(
384 ZstdCompressionParametersObject *self) {
385 return PyLong_FromSize_t(
386 ZSTD_estimateCCtxSize_usingCCtxParams(self->params));
421 387 }
422 388
423 PyDoc_STRVAR(ZstdCompressionParameters__doc__,
424 "ZstdCompressionParameters: low-level control over zstd compression");
389 static void
390 ZstdCompressionParameters_dealloc(ZstdCompressionParametersObject *self) {
391 if (self->params) {
392 ZSTD_freeCCtxParams(self->params);
393 self->params = NULL;
394 }
425 395
426 static void ZstdCompressionParameters_dealloc(ZstdCompressionParametersObject* self) {
427 if (self->params) {
428 ZSTD_freeCCtxParams(self->params);
429 self->params = NULL;
430 }
431
432 PyObject_Del(self);
396 PyObject_Del(self);
433 397 }
434 398
435 #define PARAM_GETTER(name, param) PyObject* ZstdCompressionParameters_get_##name(PyObject* self, void* unused) { \
436 int result; \
437 size_t zresult; \
438 ZstdCompressionParametersObject* p = (ZstdCompressionParametersObject*)(self); \
439 zresult = ZSTD_CCtxParams_getParameter(p->params, param, &result); \
440 if (ZSTD_isError(zresult)) { \
441 PyErr_Format(ZstdError, "unable to get compression parameter: %s", \
442 ZSTD_getErrorName(zresult)); \
443 return NULL; \
444 } \
445 return PyLong_FromLong(result); \
446 }
399 #define PARAM_GETTER(name, param) \
400 PyObject *ZstdCompressionParameters_get_##name(PyObject *self, \
401 void *unused) { \
402 int result; \
403 size_t zresult; \
404 ZstdCompressionParametersObject *p = \
405 (ZstdCompressionParametersObject *)(self); \
406 zresult = ZSTD_CCtxParams_getParameter(p->params, param, &result); \
407 if (ZSTD_isError(zresult)) { \
408 PyErr_Format(ZstdError, "unable to get compression parameter: %s", \
409 ZSTD_getErrorName(zresult)); \
410 return NULL; \
411 } \
412 return PyLong_FromLong(result); \
413 }
447 414
448 415 PARAM_GETTER(format, ZSTD_c_format)
449 416 PARAM_GETTER(compression_level, ZSTD_c_compressionLevel)
@@ -453,7 +420,7 PARAM_GETTER(chain_log, ZSTD_c_chainLog)
453 420 PARAM_GETTER(search_log, ZSTD_c_searchLog)
454 421 PARAM_GETTER(min_match, ZSTD_c_minMatch)
455 422 PARAM_GETTER(target_length, ZSTD_c_targetLength)
456 PARAM_GETTER(compression_strategy, ZSTD_c_strategy)
423 PARAM_GETTER(strategy, ZSTD_c_strategy)
457 424 PARAM_GETTER(write_content_size, ZSTD_c_contentSizeFlag)
458 425 PARAM_GETTER(write_checksum, ZSTD_c_checksumFlag)
459 426 PARAM_GETTER(write_dict_id, ZSTD_c_dictIDFlag)
@@ -468,105 +435,67 PARAM_GETTER(ldm_hash_rate_log, ZSTD_c_l
468 435 PARAM_GETTER(threads, ZSTD_c_nbWorkers)
469 436
470 437 static PyMethodDef ZstdCompressionParameters_methods[] = {
471 {
472 "from_level",
473 (PyCFunction)CompressionParameters_from_level,
474 METH_VARARGS | METH_KEYWORDS | METH_STATIC,
475 ZstdCompressionParameters_from_level__doc__
476 },
477 {
478 "estimated_compression_context_size",
479 (PyCFunction)ZstdCompressionParameters_estimated_compression_context_size,
480 METH_NOARGS,
481 ZstdCompressionParameters_estimated_compression_context_size__doc__
482 },
483 { NULL, NULL }
484 };
438 {"from_level", (PyCFunction)CompressionParameters_from_level,
439 METH_VARARGS | METH_KEYWORDS | METH_STATIC, NULL},
440 {"estimated_compression_context_size",
441 (PyCFunction)ZstdCompressionParameters_estimated_compression_context_size,
442 METH_NOARGS, NULL},
443 {NULL, NULL}};
485 444
486 #define GET_SET_ENTRY(name) { #name, ZstdCompressionParameters_get_##name, NULL, NULL, NULL }
445 #define GET_SET_ENTRY(name) \
446 { #name, ZstdCompressionParameters_get_##name, NULL, NULL, NULL }
487 447
488 448 static PyGetSetDef ZstdCompressionParameters_getset[] = {
489 GET_SET_ENTRY(format),
490 GET_SET_ENTRY(compression_level),
491 GET_SET_ENTRY(window_log),
492 GET_SET_ENTRY(hash_log),
493 GET_SET_ENTRY(chain_log),
494 GET_SET_ENTRY(search_log),
495 GET_SET_ENTRY(min_match),
496 GET_SET_ENTRY(target_length),
497 GET_SET_ENTRY(compression_strategy),
498 GET_SET_ENTRY(write_content_size),
499 GET_SET_ENTRY(write_checksum),
500 GET_SET_ENTRY(write_dict_id),
501 GET_SET_ENTRY(threads),
502 GET_SET_ENTRY(job_size),
503 GET_SET_ENTRY(overlap_log),
504 /* TODO remove this deprecated attribute */
505 { "overlap_size_log", ZstdCompressionParameters_get_overlap_log, NULL, NULL, NULL },
506 GET_SET_ENTRY(force_max_window),
507 GET_SET_ENTRY(enable_ldm),
508 GET_SET_ENTRY(ldm_hash_log),
509 GET_SET_ENTRY(ldm_min_match),
510 GET_SET_ENTRY(ldm_bucket_size_log),
511 GET_SET_ENTRY(ldm_hash_rate_log),
512 /* TODO remove this deprecated attribute */
513 { "ldm_hash_every_log", ZstdCompressionParameters_get_ldm_hash_rate_log, NULL, NULL, NULL },
514 { NULL }
449 GET_SET_ENTRY(format),
450 GET_SET_ENTRY(compression_level),
451 GET_SET_ENTRY(window_log),
452 GET_SET_ENTRY(hash_log),
453 GET_SET_ENTRY(chain_log),
454 GET_SET_ENTRY(search_log),
455 GET_SET_ENTRY(min_match),
456 GET_SET_ENTRY(target_length),
457 GET_SET_ENTRY(strategy),
458 GET_SET_ENTRY(write_content_size),
459 GET_SET_ENTRY(write_checksum),
460 GET_SET_ENTRY(write_dict_id),
461 GET_SET_ENTRY(threads),
462 GET_SET_ENTRY(job_size),
463 GET_SET_ENTRY(overlap_log),
464 GET_SET_ENTRY(force_max_window),
465 GET_SET_ENTRY(enable_ldm),
466 GET_SET_ENTRY(ldm_hash_log),
467 GET_SET_ENTRY(ldm_min_match),
468 GET_SET_ENTRY(ldm_bucket_size_log),
469 GET_SET_ENTRY(ldm_hash_rate_log),
470 {NULL}};
471
472 PyType_Slot ZstdCompressionParametersSlots[] = {
473 {Py_tp_dealloc, ZstdCompressionParameters_dealloc},
474 {Py_tp_methods, ZstdCompressionParameters_methods},
475 {Py_tp_getset, ZstdCompressionParameters_getset},
476 {Py_tp_init, ZstdCompressionParameters_init},
477 {Py_tp_new, PyType_GenericNew},
478 {0, NULL},
515 479 };
516 480
517 PyTypeObject ZstdCompressionParametersType = {
518 PyVarObject_HEAD_INIT(NULL, 0)
519 "ZstdCompressionParameters", /* tp_name */
520 sizeof(ZstdCompressionParametersObject), /* tp_basicsize */
521 0, /* tp_itemsize */
522 (destructor)ZstdCompressionParameters_dealloc, /* tp_dealloc */
523 0, /* tp_print */
524 0, /* tp_getattr */
525 0, /* tp_setattr */
526 0, /* tp_compare */
527 0, /* tp_repr */
528 0, /* tp_as_number */
529 0, /* tp_as_sequence */
530 0, /* tp_as_mapping */
531 0, /* tp_hash */
532 0, /* tp_call */
533 0, /* tp_str */
534 0, /* tp_getattro */
535 0, /* tp_setattro */
536 0, /* tp_as_buffer */
537 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
538 ZstdCompressionParameters__doc__, /* tp_doc */
539 0, /* tp_traverse */
540 0, /* tp_clear */
541 0, /* tp_richcompare */
542 0, /* tp_weaklistoffset */
543 0, /* tp_iter */
544 0, /* tp_iternext */
545 ZstdCompressionParameters_methods, /* tp_methods */
546 0, /* tp_members */
547 ZstdCompressionParameters_getset, /* tp_getset */
548 0, /* tp_base */
549 0, /* tp_dict */
550 0, /* tp_descr_get */
551 0, /* tp_descr_set */
552 0, /* tp_dictoffset */
553 (initproc)ZstdCompressionParameters_init, /* tp_init */
554 0, /* tp_alloc */
555 PyType_GenericNew, /* tp_new */
481 PyType_Spec ZstdCompressionParametersSpec = {
482 "zstd.ZstdCompressionParameters",
483 sizeof(ZstdCompressionParametersObject),
484 0,
485 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
486 ZstdCompressionParametersSlots,
556 487 };
557 488
558 void compressionparams_module_init(PyObject* mod) {
559 Py_SET_TYPE(&ZstdCompressionParametersType, &PyType_Type);
560 if (PyType_Ready(&ZstdCompressionParametersType) < 0) {
561 return;
562 }
489 PyTypeObject *ZstdCompressionParametersType;
563 490
564 Py_INCREF(&ZstdCompressionParametersType);
565 PyModule_AddObject(mod, "ZstdCompressionParameters",
566 (PyObject*)&ZstdCompressionParametersType);
491 void compressionparams_module_init(PyObject *mod) {
492 ZstdCompressionParametersType =
493 (PyTypeObject *)PyType_FromSpec(&ZstdCompressionParametersSpec);
494 if (PyType_Ready(ZstdCompressionParametersType) < 0) {
495 return;
496 }
567 497
568 /* TODO remove deprecated alias. */
569 Py_INCREF(&ZstdCompressionParametersType);
570 PyModule_AddObject(mod, "CompressionParameters",
571 (PyObject*)&ZstdCompressionParametersType);
498 Py_INCREF(ZstdCompressionParametersType);
499 PyModule_AddObject(mod, "ZstdCompressionParameters",
500 (PyObject *)ZstdCompressionParametersType);
572 501 }
This diff has been collapsed as it changes many lines, (1246 lines changed) Show them Hide them
@@ -1,214 +1,218
1 1 /**
2 * Copyright (c) 2017-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
2 * Copyright (c) 2017-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 extern PyObject* ZstdError;
11 extern PyObject *ZstdError;
12 12
13 static void set_unsupported_operation(void) {
14 PyObject* iomod;
15 PyObject* exc;
13 static void compressionreader_dealloc(ZstdCompressionReader *self) {
14 Py_XDECREF(self->compressor);
15 Py_XDECREF(self->reader);
16 16
17 iomod = PyImport_ImportModule("io");
18 if (NULL == iomod) {
19 return;
20 }
17 if (self->buffer.buf) {
18 PyBuffer_Release(&self->buffer);
19 memset(&self->buffer, 0, sizeof(self->buffer));
20 }
21 21
22 exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
23 if (NULL == exc) {
24 Py_DECREF(iomod);
25 return;
26 }
27
28 PyErr_SetNone(exc);
29 Py_DECREF(exc);
30 Py_DECREF(iomod);
22 PyObject_Del(self);
31 23 }
32 24
33 static void reader_dealloc(ZstdCompressionReader* self) {
34 Py_XDECREF(self->compressor);
35 Py_XDECREF(self->reader);
25 static ZstdCompressionReader *
26 compressionreader_enter(ZstdCompressionReader *self) {
27 if (self->entered) {
28 PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
29 return NULL;
30 }
36 31
37 if (self->buffer.buf) {
38 PyBuffer_Release(&self->buffer);
39 memset(&self->buffer, 0, sizeof(self->buffer));
40 }
32 if (self->closed) {
33 PyErr_SetString(PyExc_ValueError, "stream is closed");
34 return NULL;
35 }
41 36
42 PyObject_Del(self);
37 self->entered = 1;
38
39 Py_INCREF(self);
40 return self;
43 41 }
44 42
45 static ZstdCompressionReader* reader_enter(ZstdCompressionReader* self) {
46 if (self->entered) {
47 PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
48 return NULL;
49 }
43 static PyObject *compressionreader_exit(ZstdCompressionReader *self,
44 PyObject *args) {
45 PyObject *exc_type;
46 PyObject *exc_value;
47 PyObject *exc_tb;
48 PyObject *result;
50 49
51 self->entered = 1;
52
53 Py_INCREF(self);
54 return self;
55 }
50 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value,
51 &exc_tb)) {
52 return NULL;
53 }
56 54
57 static PyObject* reader_exit(ZstdCompressionReader* self, PyObject* args) {
58 PyObject* exc_type;
59 PyObject* exc_value;
60 PyObject* exc_tb;
55 self->entered = 0;
61 56
62 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
63 return NULL;
64 }
57 result = PyObject_CallMethod((PyObject *)self, "close", NULL);
58 if (NULL == result) {
59 return NULL;
60 }
65 61
66 self->entered = 0;
67 self->closed = 1;
68
69 /* Release resources associated with source. */
70 Py_CLEAR(self->reader);
71 if (self->buffer.buf) {
72 PyBuffer_Release(&self->buffer);
73 memset(&self->buffer, 0, sizeof(self->buffer));
74 }
62 /* Release resources associated with source. */
63 Py_CLEAR(self->reader);
64 if (self->buffer.buf) {
65 PyBuffer_Release(&self->buffer);
66 memset(&self->buffer, 0, sizeof(self->buffer));
67 }
75 68
76 69 Py_CLEAR(self->compressor);
77 70
78 Py_RETURN_FALSE;
71 Py_RETURN_FALSE;
79 72 }
80 73
81 static PyObject* reader_readable(ZstdCompressionReader* self) {
82 Py_RETURN_TRUE;
74 static PyObject *compressionreader_readable(ZstdCompressionReader *self) {
75 Py_RETURN_TRUE;
83 76 }
84 77
85 static PyObject* reader_writable(ZstdCompressionReader* self) {
86 Py_RETURN_FALSE;
78 static PyObject *compressionreader_writable(ZstdCompressionReader *self) {
79 Py_RETURN_FALSE;
87 80 }
88 81
89 static PyObject* reader_seekable(ZstdCompressionReader* self) {
90 Py_RETURN_FALSE;
82 static PyObject *compressionreader_seekable(ZstdCompressionReader *self) {
83 Py_RETURN_FALSE;
84 }
85
86 static PyObject *compressionreader_readline(PyObject *self, PyObject *args) {
87 set_io_unsupported_operation();
88 return NULL;
91 89 }
92 90
93 static PyObject* reader_readline(PyObject* self, PyObject* args) {
94 set_unsupported_operation();
95 return NULL;
91 static PyObject *compressionreader_readlines(PyObject *self, PyObject *args) {
92 set_io_unsupported_operation();
93 return NULL;
96 94 }
97 95
98 static PyObject* reader_readlines(PyObject* self, PyObject* args) {
99 set_unsupported_operation();
100 return NULL;
96 static PyObject *compressionreader_write(PyObject *self, PyObject *args) {
97 PyErr_SetString(PyExc_OSError, "stream is not writable");
98 return NULL;
101 99 }
102 100
103 static PyObject* reader_write(PyObject* self, PyObject* args) {
104 PyErr_SetString(PyExc_OSError, "stream is not writable");
105 return NULL;
101 static PyObject *compressionreader_writelines(PyObject *self, PyObject *args) {
102 PyErr_SetString(PyExc_OSError, "stream is not writable");
103 return NULL;
106 104 }
107 105
108 static PyObject* reader_writelines(PyObject* self, PyObject* args) {
109 PyErr_SetString(PyExc_OSError, "stream is not writable");
110 return NULL;
106 static PyObject *compressionreader_isatty(PyObject *self) {
107 Py_RETURN_FALSE;
108 }
109
110 static PyObject *compressionreader_flush(PyObject *self) {
111 Py_RETURN_NONE;
111 112 }
112 113
113 static PyObject* reader_isatty(PyObject* self) {
114 Py_RETURN_FALSE;
115 }
114 static PyObject *compressionreader_close(ZstdCompressionReader *self) {
115 if (self->closed) {
116 Py_RETURN_NONE;
117 }
118
119 self->closed = 1;
116 120
117 static PyObject* reader_flush(PyObject* self) {
118 Py_RETURN_NONE;
121 if (self->closefd && self->reader != NULL &&
122 PyObject_HasAttrString(self->reader, "close")) {
123 return PyObject_CallMethod(self->reader, "close", NULL);
124 }
125
126 Py_RETURN_NONE;
119 127 }
120 128
121 static PyObject* reader_close(ZstdCompressionReader* self) {
122 self->closed = 1;
123 Py_RETURN_NONE;
124 }
125
126 static PyObject* reader_tell(ZstdCompressionReader* self) {
127 /* TODO should this raise OSError since stream isn't seekable? */
128 return PyLong_FromUnsignedLongLong(self->bytesCompressed);
129 static PyObject *compressionreader_tell(ZstdCompressionReader *self) {
130 /* TODO should this raise OSError since stream isn't seekable? */
131 return PyLong_FromUnsignedLongLong(self->bytesCompressed);
129 132 }
130 133
131 int read_compressor_input(ZstdCompressionReader* self) {
132 if (self->finishedInput) {
133 return 0;
134 }
134 int read_compressor_input(ZstdCompressionReader *self) {
135 if (self->finishedInput) {
136 return 0;
137 }
135 138
136 if (self->input.pos != self->input.size) {
137 return 0;
138 }
139 if (self->input.pos != self->input.size) {
140 return 0;
141 }
139 142
140 if (self->reader) {
141 Py_buffer buffer;
143 if (self->reader) {
144 Py_buffer buffer;
142 145
143 assert(self->readResult == NULL);
146 assert(self->readResult == NULL);
144 147
145 self->readResult = PyObject_CallMethod(self->reader, "read",
146 "k", self->readSize);
148 self->readResult =
149 PyObject_CallMethod(self->reader, "read", "k", self->readSize);
147 150
148 if (NULL == self->readResult) {
149 return -1;
150 }
151 if (NULL == self->readResult) {
152 return -1;
153 }
151 154
152 memset(&buffer, 0, sizeof(buffer));
155 memset(&buffer, 0, sizeof(buffer));
153 156
154 if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
155 return -1;
156 }
157 if (0 !=
158 PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
159 return -1;
160 }
157 161
158 /* EOF */
159 if (0 == buffer.len) {
160 self->finishedInput = 1;
161 Py_CLEAR(self->readResult);
162 }
163 else {
164 self->input.src = buffer.buf;
165 self->input.size = buffer.len;
166 self->input.pos = 0;
167 }
162 /* EOF */
163 if (0 == buffer.len) {
164 self->finishedInput = 1;
165 Py_CLEAR(self->readResult);
166 }
167 else {
168 self->input.src = buffer.buf;
169 self->input.size = buffer.len;
170 self->input.pos = 0;
171 }
168 172
169 PyBuffer_Release(&buffer);
170 }
171 else {
172 assert(self->buffer.buf);
173 PyBuffer_Release(&buffer);
174 }
175 else {
176 assert(self->buffer.buf);
173 177
174 self->input.src = self->buffer.buf;
175 self->input.size = self->buffer.len;
176 self->input.pos = 0;
177 }
178 self->input.src = self->buffer.buf;
179 self->input.size = self->buffer.len;
180 self->input.pos = 0;
181 }
178 182
179 return 1;
183 return 1;
180 184 }
181 185
182 int compress_input(ZstdCompressionReader* self, ZSTD_outBuffer* output) {
183 size_t oldPos;
184 size_t zresult;
186 int compress_input(ZstdCompressionReader *self, ZSTD_outBuffer *output) {
187 size_t oldPos;
188 size_t zresult;
185 189
186 /* If we have data left over, consume it. */
187 if (self->input.pos < self->input.size) {
188 oldPos = output->pos;
190 /* If we have data left over, consume it. */
191 if (self->input.pos < self->input.size) {
192 oldPos = output->pos;
189 193
190 Py_BEGIN_ALLOW_THREADS
191 zresult = ZSTD_compressStream2(self->compressor->cctx,
192 output, &self->input, ZSTD_e_continue);
193 Py_END_ALLOW_THREADS
194 Py_BEGIN_ALLOW_THREADS zresult = ZSTD_compressStream2(
195 self->compressor->cctx, output, &self->input, ZSTD_e_continue);
196 Py_END_ALLOW_THREADS
194 197
195 self->bytesCompressed += output->pos - oldPos;
198 self->bytesCompressed += output->pos - oldPos;
196 199
197 /* Input exhausted. Clear out state tracking. */
198 if (self->input.pos == self->input.size) {
199 memset(&self->input, 0, sizeof(self->input));
200 Py_CLEAR(self->readResult);
200 /* Input exhausted. Clear out state tracking. */
201 if (self->input.pos == self->input.size) {
202 memset(&self->input, 0, sizeof(self->input));
203 Py_CLEAR(self->readResult);
201 204
202 if (self->buffer.buf) {
203 self->finishedInput = 1;
204 }
205 }
205 if (self->buffer.buf) {
206 self->finishedInput = 1;
207 }
208 }
206 209
207 if (ZSTD_isError(zresult)) {
208 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
209 return -1;
210 }
211 }
210 if (ZSTD_isError(zresult)) {
211 PyErr_Format(ZstdError, "zstd compress error: %s",
212 ZSTD_getErrorName(zresult));
213 return -1;
214 }
215 }
212 216
213 217 if (output->pos && output->pos == output->size) {
214 218 return 1;
@@ -218,601 +222,591 int compress_input(ZstdCompressionReader
218 222 }
219 223 }
220 224
221 static PyObject* reader_read(ZstdCompressionReader* self, PyObject* args, PyObject* kwargs) {
222 static char* kwlist[] = {
223 "size",
224 NULL
225 };
225 static PyObject *compressionreader_read(ZstdCompressionReader *self,
226 PyObject *args, PyObject *kwargs) {
227 static char *kwlist[] = {"size", NULL};
226 228
227 Py_ssize_t size = -1;
228 PyObject* result = NULL;
229 char* resultBuffer;
230 Py_ssize_t resultSize;
231 size_t zresult;
232 size_t oldPos;
233 int readResult, compressResult;
229 Py_ssize_t size = -1;
230 PyObject *result = NULL;
231 char *resultBuffer;
232 Py_ssize_t resultSize;
233 size_t zresult;
234 size_t oldPos;
235 int readResult, compressResult;
234 236
235 if (self->closed) {
236 PyErr_SetString(PyExc_ValueError, "stream is closed");
237 return NULL;
238 }
237 if (self->closed) {
238 PyErr_SetString(PyExc_ValueError, "stream is closed");
239 return NULL;
240 }
239 241
240 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
241 return NULL;
242 }
242 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
243 return NULL;
244 }
243 245
244 if (size < -1) {
245 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
246 return NULL;
247 }
246 if (size < -1) {
247 PyErr_SetString(PyExc_ValueError,
248 "cannot read negative amounts less than -1");
249 return NULL;
250 }
248 251
249 if (size == -1) {
250 return PyObject_CallMethod((PyObject*)self, "readall", NULL);
251 }
252 if (size == -1) {
253 return PyObject_CallMethod((PyObject *)self, "readall", NULL);
254 }
252 255
253 if (self->finishedOutput || size == 0) {
254 return PyBytes_FromStringAndSize("", 0);
255 }
256 if (self->finishedOutput || size == 0) {
257 return PyBytes_FromStringAndSize("", 0);
258 }
256 259
257 result = PyBytes_FromStringAndSize(NULL, size);
258 if (NULL == result) {
259 return NULL;
260 }
260 result = PyBytes_FromStringAndSize(NULL, size);
261 if (NULL == result) {
262 return NULL;
263 }
261 264
262 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
265 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
263 266
264 self->output.dst = resultBuffer;
265 self->output.size = resultSize;
266 self->output.pos = 0;
267 self->output.dst = resultBuffer;
268 self->output.size = resultSize;
269 self->output.pos = 0;
267 270
268 271 readinput:
269 272
270 273 compressResult = compress_input(self, &self->output);
271 274
272 if (-1 == compressResult) {
273 Py_XDECREF(result);
274 return NULL;
275 }
276 else if (0 == compressResult) {
277 /* There is room in the output. We fall through to below, which will
278 * either get more input for us or will attempt to end the stream.
279 */
280 }
281 else if (1 == compressResult) {
282 memset(&self->output, 0, sizeof(self->output));
283 return result;
284 }
285 else {
286 assert(0);
287 }
275 if (-1 == compressResult) {
276 Py_XDECREF(result);
277 return NULL;
278 }
279 else if (0 == compressResult) {
280 /* There is room in the output. We fall through to below, which will
281 * either get more input for us or will attempt to end the stream.
282 */
283 }
284 else if (1 == compressResult) {
285 memset(&self->output, 0, sizeof(self->output));
286 return result;
287 }
288 else {
289 assert(0);
290 }
288 291
289 readResult = read_compressor_input(self);
292 readResult = read_compressor_input(self);
290 293
291 if (-1 == readResult) {
292 return NULL;
293 }
294 else if (0 == readResult) { }
295 else if (1 == readResult) { }
296 else {
297 assert(0);
298 }
294 if (-1 == readResult) {
295 return NULL;
296 }
297 else if (0 == readResult) {
298 }
299 else if (1 == readResult) {
300 }
301 else {
302 assert(0);
303 }
299 304
300 if (self->input.size) {
301 goto readinput;
302 }
305 if (self->input.size) {
306 goto readinput;
307 }
303 308
304 /* Else EOF */
305 oldPos = self->output.pos;
309 /* Else EOF */
310 oldPos = self->output.pos;
306 311
307 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
308 &self->input, ZSTD_e_end);
312 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
313 &self->input, ZSTD_e_end);
309 314
310 self->bytesCompressed += self->output.pos - oldPos;
315 self->bytesCompressed += self->output.pos - oldPos;
311 316
312 if (ZSTD_isError(zresult)) {
313 PyErr_Format(ZstdError, "error ending compression stream: %s",
314 ZSTD_getErrorName(zresult));
315 Py_XDECREF(result);
316 return NULL;
317 }
317 if (ZSTD_isError(zresult)) {
318 PyErr_Format(ZstdError, "error ending compression stream: %s",
319 ZSTD_getErrorName(zresult));
320 Py_XDECREF(result);
321 return NULL;
322 }
318 323
319 assert(self->output.pos);
324 assert(self->output.pos);
320 325
321 if (0 == zresult) {
322 self->finishedOutput = 1;
323 }
326 if (0 == zresult) {
327 self->finishedOutput = 1;
328 }
324 329
325 if (safe_pybytes_resize(&result, self->output.pos)) {
326 Py_XDECREF(result);
327 return NULL;
328 }
330 if (safe_pybytes_resize(&result, self->output.pos)) {
331 Py_XDECREF(result);
332 return NULL;
333 }
329 334
330 memset(&self->output, 0, sizeof(self->output));
335 memset(&self->output, 0, sizeof(self->output));
331 336
332 return result;
337 return result;
333 338 }
334 339
335 static PyObject* reader_read1(ZstdCompressionReader* self, PyObject* args, PyObject* kwargs) {
336 static char* kwlist[] = {
337 "size",
338 NULL
339 };
340 static PyObject *compressionreader_read1(ZstdCompressionReader *self,
341 PyObject *args, PyObject *kwargs) {
342 static char *kwlist[] = {"size", NULL};
343
344 Py_ssize_t size = -1;
345 PyObject *result = NULL;
346 char *resultBuffer;
347 Py_ssize_t resultSize;
348 ZSTD_outBuffer output;
349 int compressResult;
350 size_t oldPos;
351 size_t zresult;
340 352
341 Py_ssize_t size = -1;
342 PyObject* result = NULL;
343 char* resultBuffer;
344 Py_ssize_t resultSize;
345 ZSTD_outBuffer output;
346 int compressResult;
347 size_t oldPos;
348 size_t zresult;
353 if (self->closed) {
354 PyErr_SetString(PyExc_ValueError, "stream is closed");
355 return NULL;
356 }
349 357
350 if (self->closed) {
351 PyErr_SetString(PyExc_ValueError, "stream is closed");
352 return NULL;
353 }
358 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n:read1", kwlist, &size)) {
359 return NULL;
360 }
354 361
355 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n:read1", kwlist, &size)) {
356 return NULL;
357 }
358
359 if (size < -1) {
360 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
361 return NULL;
362 }
362 if (size < -1) {
363 PyErr_SetString(PyExc_ValueError,
364 "cannot read negative amounts less than -1");
365 return NULL;
366 }
363 367
364 if (self->finishedOutput || size == 0) {
365 return PyBytes_FromStringAndSize("", 0);
366 }
368 if (self->finishedOutput || size == 0) {
369 return PyBytes_FromStringAndSize("", 0);
370 }
367 371
368 if (size == -1) {
369 size = ZSTD_CStreamOutSize();
370 }
372 if (size == -1) {
373 size = ZSTD_CStreamOutSize();
374 }
371 375
372 result = PyBytes_FromStringAndSize(NULL, size);
373 if (NULL == result) {
374 return NULL;
375 }
376 result = PyBytes_FromStringAndSize(NULL, size);
377 if (NULL == result) {
378 return NULL;
379 }
376 380
377 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
381 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
378 382
379 output.dst = resultBuffer;
380 output.size = resultSize;
381 output.pos = 0;
383 output.dst = resultBuffer;
384 output.size = resultSize;
385 output.pos = 0;
382 386
383 /* read1() is supposed to use at most 1 read() from the underlying stream.
384 However, we can't satisfy this requirement with compression because
385 not every input will generate output. We /could/ flush the compressor,
386 but this may not be desirable. We allow multiple read() from the
387 underlying stream. But unlike read(), we return as soon as output data
388 is available.
389 */
387 /* read1() is supposed to use at most 1 read() from the underlying stream.
388 However, we can't satisfy this requirement with compression because
389 not every input will generate output. We /could/ flush the compressor,
390 but this may not be desirable. We allow multiple read() from the
391 underlying stream. But unlike read(), we return as soon as output data
392 is available.
393 */
390 394
391 compressResult = compress_input(self, &output);
395 compressResult = compress_input(self, &output);
392 396
393 if (-1 == compressResult) {
394 Py_XDECREF(result);
395 return NULL;
396 }
397 else if (0 == compressResult || 1 == compressResult) { }
398 else {
399 assert(0);
400 }
397 if (-1 == compressResult) {
398 Py_XDECREF(result);
399 return NULL;
400 }
401 else if (0 == compressResult || 1 == compressResult) {
402 }
403 else {
404 assert(0);
405 }
401 406
402 if (output.pos) {
403 goto finally;
404 }
407 if (output.pos) {
408 goto finally;
409 }
405 410
406 while (!self->finishedInput) {
407 int readResult = read_compressor_input(self);
411 while (!self->finishedInput) {
412 int readResult = read_compressor_input(self);
408 413
409 if (-1 == readResult) {
410 Py_XDECREF(result);
411 return NULL;
412 }
413 else if (0 == readResult || 1 == readResult) { }
414 else {
415 assert(0);
416 }
414 if (-1 == readResult) {
415 Py_XDECREF(result);
416 return NULL;
417 }
418 else if (0 == readResult || 1 == readResult) {
419 }
420 else {
421 assert(0);
422 }
417 423
418 compressResult = compress_input(self, &output);
424 compressResult = compress_input(self, &output);
419 425
420 if (-1 == compressResult) {
421 Py_XDECREF(result);
422 return NULL;
423 }
424 else if (0 == compressResult || 1 == compressResult) { }
425 else {
426 assert(0);
427 }
426 if (-1 == compressResult) {
427 Py_XDECREF(result);
428 return NULL;
429 }
430 else if (0 == compressResult || 1 == compressResult) {
431 }
432 else {
433 assert(0);
434 }
428 435
429 if (output.pos) {
430 goto finally;
431 }
432 }
436 if (output.pos) {
437 goto finally;
438 }
439 }
433 440
434 /* EOF */
435 oldPos = output.pos;
441 /* EOF */
442 oldPos = output.pos;
436 443
437 zresult = ZSTD_compressStream2(self->compressor->cctx, &output, &self->input,
438 ZSTD_e_end);
444 zresult = ZSTD_compressStream2(self->compressor->cctx, &output,
445 &self->input, ZSTD_e_end);
439 446
440 self->bytesCompressed += output.pos - oldPos;
447 self->bytesCompressed += output.pos - oldPos;
441 448
442 if (ZSTD_isError(zresult)) {
443 PyErr_Format(ZstdError, "error ending compression stream: %s",
444 ZSTD_getErrorName(zresult));
445 Py_XDECREF(result);
446 return NULL;
447 }
449 if (ZSTD_isError(zresult)) {
450 PyErr_Format(ZstdError, "error ending compression stream: %s",
451 ZSTD_getErrorName(zresult));
452 Py_XDECREF(result);
453 return NULL;
454 }
448 455
449 if (zresult == 0) {
450 self->finishedOutput = 1;
451 }
456 if (zresult == 0) {
457 self->finishedOutput = 1;
458 }
452 459
453 460 finally:
454 if (result) {
455 if (safe_pybytes_resize(&result, output.pos)) {
456 Py_XDECREF(result);
457 return NULL;
458 }
459 }
461 if (result) {
462 if (safe_pybytes_resize(&result, output.pos)) {
463 Py_XDECREF(result);
464 return NULL;
465 }
466 }
460 467
461 return result;
468 return result;
462 469 }
463 470
464 static PyObject* reader_readall(PyObject* self) {
465 PyObject* chunks = NULL;
466 PyObject* empty = NULL;
467 PyObject* result = NULL;
471 static PyObject *compressionreader_readall(PyObject *self) {
472 PyObject *chunks = NULL;
473 PyObject *empty = NULL;
474 PyObject *result = NULL;
468 475
469 /* Our strategy is to collect chunks into a list then join all the
470 * chunks at the end. We could potentially use e.g. an io.BytesIO. But
471 * this feels simple enough to implement and avoids potentially expensive
472 * reallocations of large buffers.
473 */
474 chunks = PyList_New(0);
475 if (NULL == chunks) {
476 return NULL;
477 }
476 /* Our strategy is to collect chunks into a list then join all the
477 * chunks at the end. We could potentially use e.g. an io.BytesIO. But
478 * this feels simple enough to implement and avoids potentially expensive
479 * reallocations of large buffers.
480 */
481 chunks = PyList_New(0);
482 if (NULL == chunks) {
483 return NULL;
484 }
478 485
479 while (1) {
480 PyObject* chunk = PyObject_CallMethod(self, "read", "i", 1048576);
481 if (NULL == chunk) {
482 Py_DECREF(chunks);
483 return NULL;
484 }
486 while (1) {
487 PyObject *chunk = PyObject_CallMethod(self, "read", "i", 1048576);
488 if (NULL == chunk) {
489 Py_DECREF(chunks);
490 return NULL;
491 }
485 492
486 if (!PyBytes_Size(chunk)) {
487 Py_DECREF(chunk);
488 break;
489 }
493 if (!PyBytes_Size(chunk)) {
494 Py_DECREF(chunk);
495 break;
496 }
490 497
491 if (PyList_Append(chunks, chunk)) {
492 Py_DECREF(chunk);
493 Py_DECREF(chunks);
494 return NULL;
495 }
498 if (PyList_Append(chunks, chunk)) {
499 Py_DECREF(chunk);
500 Py_DECREF(chunks);
501 return NULL;
502 }
496 503
497 Py_DECREF(chunk);
498 }
504 Py_DECREF(chunk);
505 }
499 506
500 empty = PyBytes_FromStringAndSize("", 0);
501 if (NULL == empty) {
502 Py_DECREF(chunks);
503 return NULL;
504 }
507 empty = PyBytes_FromStringAndSize("", 0);
508 if (NULL == empty) {
509 Py_DECREF(chunks);
510 return NULL;
511 }
505 512
506 result = PyObject_CallMethod(empty, "join", "O", chunks);
513 result = PyObject_CallMethod(empty, "join", "O", chunks);
507 514
508 Py_DECREF(empty);
509 Py_DECREF(chunks);
515 Py_DECREF(empty);
516 Py_DECREF(chunks);
510 517
511 return result;
518 return result;
512 519 }
513 520
514 static PyObject* reader_readinto(ZstdCompressionReader* self, PyObject* args) {
515 Py_buffer dest;
516 ZSTD_outBuffer output;
517 int readResult, compressResult;
518 PyObject* result = NULL;
519 size_t zresult;
520 size_t oldPos;
521 static PyObject *compressionreader_readinto(ZstdCompressionReader *self,
522 PyObject *args) {
523 Py_buffer dest;
524 ZSTD_outBuffer output;
525 int readResult, compressResult;
526 PyObject *result = NULL;
527 size_t zresult;
528 size_t oldPos;
521 529
522 if (self->closed) {
523 PyErr_SetString(PyExc_ValueError, "stream is closed");
524 return NULL;
525 }
530 if (self->closed) {
531 PyErr_SetString(PyExc_ValueError, "stream is closed");
532 return NULL;
533 }
526 534
527 if (self->finishedOutput) {
528 return PyLong_FromLong(0);
529 }
535 if (self->finishedOutput) {
536 return PyLong_FromLong(0);
537 }
530 538
531 if (!PyArg_ParseTuple(args, "w*:readinto", &dest)) {
532 return NULL;
533 }
539 if (!PyArg_ParseTuple(args, "w*:readinto", &dest)) {
540 return NULL;
541 }
534 542
535 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
536 PyErr_SetString(PyExc_ValueError,
537 "destination buffer should be contiguous and have at most one dimension");
538 goto finally;
539 }
543 output.dst = dest.buf;
544 output.size = dest.len;
545 output.pos = 0;
540 546
541 output.dst = dest.buf;
542 output.size = dest.len;
543 output.pos = 0;
544
545 compressResult = compress_input(self, &output);
547 compressResult = compress_input(self, &output);
546 548
547 if (-1 == compressResult) {
548 goto finally;
549 }
550 else if (0 == compressResult) { }
551 else if (1 == compressResult) {
552 result = PyLong_FromSize_t(output.pos);
553 goto finally;
554 }
555 else {
556 assert(0);
557 }
549 if (-1 == compressResult) {
550 goto finally;
551 }
552 else if (0 == compressResult) {
553 }
554 else if (1 == compressResult) {
555 result = PyLong_FromSize_t(output.pos);
556 goto finally;
557 }
558 else {
559 assert(0);
560 }
561
562 while (!self->finishedInput) {
563 readResult = read_compressor_input(self);
558 564
559 while (!self->finishedInput) {
560 readResult = read_compressor_input(self);
565 if (-1 == readResult) {
566 goto finally;
567 }
568 else if (0 == readResult || 1 == readResult) {
569 }
570 else {
571 assert(0);
572 }
561 573
562 if (-1 == readResult) {
563 goto finally;
564 }
565 else if (0 == readResult || 1 == readResult) {}
566 else {
567 assert(0);
568 }
569
570 compressResult = compress_input(self, &output);
574 compressResult = compress_input(self, &output);
571 575
572 if (-1 == compressResult) {
573 goto finally;
574 }
575 else if (0 == compressResult) { }
576 else if (1 == compressResult) {
577 result = PyLong_FromSize_t(output.pos);
578 goto finally;
579 }
580 else {
581 assert(0);
582 }
583 }
576 if (-1 == compressResult) {
577 goto finally;
578 }
579 else if (0 == compressResult) {
580 }
581 else if (1 == compressResult) {
582 result = PyLong_FromSize_t(output.pos);
583 goto finally;
584 }
585 else {
586 assert(0);
587 }
588 }
584 589
585 /* EOF */
586 oldPos = output.pos;
590 /* EOF */
591 oldPos = output.pos;
587 592
588 zresult = ZSTD_compressStream2(self->compressor->cctx, &output, &self->input,
589 ZSTD_e_end);
593 zresult = ZSTD_compressStream2(self->compressor->cctx, &output,
594 &self->input, ZSTD_e_end);
590 595
591 self->bytesCompressed += self->output.pos - oldPos;
596 self->bytesCompressed += self->output.pos - oldPos;
592 597
593 if (ZSTD_isError(zresult)) {
594 PyErr_Format(ZstdError, "error ending compression stream: %s",
595 ZSTD_getErrorName(zresult));
596 goto finally;
597 }
598 if (ZSTD_isError(zresult)) {
599 PyErr_Format(ZstdError, "error ending compression stream: %s",
600 ZSTD_getErrorName(zresult));
601 goto finally;
602 }
598 603
599 assert(output.pos);
604 assert(output.pos);
600 605
601 if (0 == zresult) {
602 self->finishedOutput = 1;
603 }
606 if (0 == zresult) {
607 self->finishedOutput = 1;
608 }
604 609
605 result = PyLong_FromSize_t(output.pos);
610 result = PyLong_FromSize_t(output.pos);
606 611
607 612 finally:
608 PyBuffer_Release(&dest);
613 PyBuffer_Release(&dest);
609 614
610 return result;
615 return result;
611 616 }
612 617
613 static PyObject* reader_readinto1(ZstdCompressionReader* self, PyObject* args) {
614 Py_buffer dest;
615 PyObject* result = NULL;
616 ZSTD_outBuffer output;
617 int compressResult;
618 size_t oldPos;
619 size_t zresult;
618 static PyObject *compressionreader_readinto1(ZstdCompressionReader *self,
619 PyObject *args) {
620 Py_buffer dest;
621 PyObject *result = NULL;
622 ZSTD_outBuffer output;
623 int compressResult;
624 size_t oldPos;
625 size_t zresult;
620 626
621 if (self->closed) {
622 PyErr_SetString(PyExc_ValueError, "stream is closed");
623 return NULL;
624 }
627 if (self->closed) {
628 PyErr_SetString(PyExc_ValueError, "stream is closed");
629 return NULL;
630 }
625 631
626 if (self->finishedOutput) {
627 return PyLong_FromLong(0);
628 }
632 if (self->finishedOutput) {
633 return PyLong_FromLong(0);
634 }
629 635
630 if (!PyArg_ParseTuple(args, "w*:readinto1", &dest)) {
631 return NULL;
632 }
636 if (!PyArg_ParseTuple(args, "w*:readinto1", &dest)) {
637 return NULL;
638 }
633 639
634 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
635 PyErr_SetString(PyExc_ValueError,
636 "destination buffer should be contiguous and have at most one dimension");
637 goto finally;
638 }
640 output.dst = dest.buf;
641 output.size = dest.len;
642 output.pos = 0;
639 643
640 output.dst = dest.buf;
641 output.size = dest.len;
642 output.pos = 0;
643
644 compressResult = compress_input(self, &output);
644 compressResult = compress_input(self, &output);
645 645
646 if (-1 == compressResult) {
647 goto finally;
648 }
649 else if (0 == compressResult || 1 == compressResult) { }
650 else {
651 assert(0);
652 }
646 if (-1 == compressResult) {
647 goto finally;
648 }
649 else if (0 == compressResult || 1 == compressResult) {
650 }
651 else {
652 assert(0);
653 }
653 654
654 if (output.pos) {
655 result = PyLong_FromSize_t(output.pos);
656 goto finally;
657 }
655 if (output.pos) {
656 result = PyLong_FromSize_t(output.pos);
657 goto finally;
658 }
658 659
659 while (!self->finishedInput) {
660 int readResult = read_compressor_input(self);
660 while (!self->finishedInput) {
661 int readResult = read_compressor_input(self);
661 662
662 if (-1 == readResult) {
663 goto finally;
664 }
665 else if (0 == readResult || 1 == readResult) { }
666 else {
667 assert(0);
668 }
663 if (-1 == readResult) {
664 goto finally;
665 }
666 else if (0 == readResult || 1 == readResult) {
667 }
668 else {
669 assert(0);
670 }
669 671
670 compressResult = compress_input(self, &output);
672 compressResult = compress_input(self, &output);
671 673
672 if (-1 == compressResult) {
673 goto finally;
674 }
675 else if (0 == compressResult) { }
676 else if (1 == compressResult) {
677 result = PyLong_FromSize_t(output.pos);
678 goto finally;
679 }
680 else {
681 assert(0);
682 }
674 if (-1 == compressResult) {
675 goto finally;
676 }
677 else if (0 == compressResult) {
678 }
679 else if (1 == compressResult) {
680 result = PyLong_FromSize_t(output.pos);
681 goto finally;
682 }
683 else {
684 assert(0);
685 }
683 686
684 /* If we produced output and we're not done with input, emit
685 * that output now, as we've hit restrictions of read1().
686 */
687 if (output.pos && !self->finishedInput) {
688 result = PyLong_FromSize_t(output.pos);
689 goto finally;
690 }
687 /* If we produced output and we're not done with input, emit
688 * that output now, as we've hit restrictions of read1().
689 */
690 if (output.pos && !self->finishedInput) {
691 result = PyLong_FromSize_t(output.pos);
692 goto finally;
693 }
691 694
692 /* Otherwise we either have no output or we've exhausted the
693 * input. Either we try to get more input or we fall through
694 * to EOF below */
695 }
695 /* Otherwise we either have no output or we've exhausted the
696 * input. Either we try to get more input or we fall through
697 * to EOF below */
698 }
696 699
697 /* EOF */
698 oldPos = output.pos;
700 /* EOF */
701 oldPos = output.pos;
699 702
700 zresult = ZSTD_compressStream2(self->compressor->cctx, &output, &self->input,
701 ZSTD_e_end);
703 zresult = ZSTD_compressStream2(self->compressor->cctx, &output,
704 &self->input, ZSTD_e_end);
702 705
703 self->bytesCompressed += self->output.pos - oldPos;
706 self->bytesCompressed += self->output.pos - oldPos;
704 707
705 if (ZSTD_isError(zresult)) {
706 PyErr_Format(ZstdError, "error ending compression stream: %s",
707 ZSTD_getErrorName(zresult));
708 goto finally;
709 }
708 if (ZSTD_isError(zresult)) {
709 PyErr_Format(ZstdError, "error ending compression stream: %s",
710 ZSTD_getErrorName(zresult));
711 goto finally;
712 }
710 713
711 assert(output.pos);
714 assert(output.pos);
712 715
713 if (0 == zresult) {
714 self->finishedOutput = 1;
715 }
716 if (0 == zresult) {
717 self->finishedOutput = 1;
718 }
716 719
717 result = PyLong_FromSize_t(output.pos);
720 result = PyLong_FromSize_t(output.pos);
718 721
719 722 finally:
720 PyBuffer_Release(&dest);
723 PyBuffer_Release(&dest);
721 724
722 return result;
725 return result;
723 726 }
724 727
725 static PyObject* reader_iter(PyObject* self) {
726 set_unsupported_operation();
727 return NULL;
728 static PyObject *compressionreader_iter(PyObject *self) {
729 set_io_unsupported_operation();
730 return NULL;
728 731 }
729 732
730 static PyObject* reader_iternext(PyObject* self) {
731 set_unsupported_operation();
732 return NULL;
733 static PyObject *compressionreader_iternext(PyObject *self) {
734 set_io_unsupported_operation();
735 return NULL;
733 736 }
734 737
735 static PyMethodDef reader_methods[] = {
736 { "__enter__", (PyCFunction)reader_enter, METH_NOARGS,
737 PyDoc_STR("Enter a compression context") },
738 { "__exit__", (PyCFunction)reader_exit, METH_VARARGS,
739 PyDoc_STR("Exit a compression context") },
740 { "close", (PyCFunction)reader_close, METH_NOARGS,
741 PyDoc_STR("Close the stream so it cannot perform any more operations") },
742 { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
743 { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
744 { "readable", (PyCFunction)reader_readable, METH_NOARGS,
745 PyDoc_STR("Returns True") },
746 { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS, PyDoc_STR("read compressed data") },
747 { "read1", (PyCFunction)reader_read1, METH_VARARGS | METH_KEYWORDS, NULL },
748 { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
749 { "readinto", (PyCFunction)reader_readinto, METH_VARARGS, NULL },
750 { "readinto1", (PyCFunction)reader_readinto1, METH_VARARGS, NULL },
751 { "readline", (PyCFunction)reader_readline, METH_VARARGS, PyDoc_STR("Not implemented") },
752 { "readlines", (PyCFunction)reader_readlines, METH_VARARGS, PyDoc_STR("Not implemented") },
753 { "seekable", (PyCFunction)reader_seekable, METH_NOARGS,
754 PyDoc_STR("Returns False") },
755 { "tell", (PyCFunction)reader_tell, METH_NOARGS,
756 PyDoc_STR("Returns current number of bytes compressed") },
757 { "writable", (PyCFunction)reader_writable, METH_NOARGS,
758 PyDoc_STR("Returns False") },
759 { "write", reader_write, METH_VARARGS, PyDoc_STR("Raises OSError") },
760 { "writelines", reader_writelines, METH_VARARGS, PyDoc_STR("Not implemented") },
761 { NULL, NULL }
762 };
738 static PyMethodDef compressionreader_methods[] = {
739 {"__enter__", (PyCFunction)compressionreader_enter, METH_NOARGS,
740 PyDoc_STR("Enter a compression context")},
741 {"__exit__", (PyCFunction)compressionreader_exit, METH_VARARGS,
742 PyDoc_STR("Exit a compression context")},
743 {"close", (PyCFunction)compressionreader_close, METH_NOARGS,
744 PyDoc_STR("Close the stream so it cannot perform any more operations")},
745 {"flush", (PyCFunction)compressionreader_flush, METH_NOARGS,
746 PyDoc_STR("no-ops")},
747 {"isatty", (PyCFunction)compressionreader_isatty, METH_NOARGS,
748 PyDoc_STR("Returns False")},
749 {"readable", (PyCFunction)compressionreader_readable, METH_NOARGS,
750 PyDoc_STR("Returns True")},
751 {"read", (PyCFunction)compressionreader_read, METH_VARARGS | METH_KEYWORDS,
752 PyDoc_STR("read compressed data")},
753 {"read1", (PyCFunction)compressionreader_read1,
754 METH_VARARGS | METH_KEYWORDS, NULL},
755 {"readall", (PyCFunction)compressionreader_readall, METH_NOARGS,
756 PyDoc_STR("Not implemented")},
757 {"readinto", (PyCFunction)compressionreader_readinto, METH_VARARGS, NULL},
758 {"readinto1", (PyCFunction)compressionreader_readinto1, METH_VARARGS, NULL},
759 {"readline", (PyCFunction)compressionreader_readline, METH_VARARGS,
760 PyDoc_STR("Not implemented")},
761 {"readlines", (PyCFunction)compressionreader_readlines, METH_VARARGS,
762 PyDoc_STR("Not implemented")},
763 {"seekable", (PyCFunction)compressionreader_seekable, METH_NOARGS,
764 PyDoc_STR("Returns False")},
765 {"tell", (PyCFunction)compressionreader_tell, METH_NOARGS,
766 PyDoc_STR("Returns current number of bytes compressed")},
767 {"writable", (PyCFunction)compressionreader_writable, METH_NOARGS,
768 PyDoc_STR("Returns False")},
769 {"write", compressionreader_write, METH_VARARGS,
770 PyDoc_STR("Raises OSError")},
771 {"writelines", compressionreader_writelines, METH_VARARGS,
772 PyDoc_STR("Not implemented")},
773 {NULL, NULL}};
763 774
764 static PyMemberDef reader_members[] = {
765 { "closed", T_BOOL, offsetof(ZstdCompressionReader, closed),
766 READONLY, "whether stream is closed" },
767 { NULL }
775 static PyMemberDef compressionreader_members[] = {
776 {"closed", T_BOOL, offsetof(ZstdCompressionReader, closed), READONLY,
777 "whether stream is closed"},
778 {NULL}};
779
780 PyType_Slot ZstdCompressionReaderSlots[] = {
781 {Py_tp_dealloc, compressionreader_dealloc},
782 {Py_tp_iter, compressionreader_iter},
783 {Py_tp_iternext, compressionreader_iternext},
784 {Py_tp_methods, compressionreader_methods},
785 {Py_tp_members, compressionreader_members},
786 {Py_tp_new, PyType_GenericNew},
787 {0, NULL},
768 788 };
769 789
770 PyTypeObject ZstdCompressionReaderType = {
771 PyVarObject_HEAD_INIT(NULL, 0)
772 "zstd.ZstdCompressionReader", /* tp_name */
773 sizeof(ZstdCompressionReader), /* tp_basicsize */
774 0, /* tp_itemsize */
775 (destructor)reader_dealloc, /* tp_dealloc */
776 0, /* tp_print */
777 0, /* tp_getattr */
778 0, /* tp_setattr */
779 0, /* tp_compare */
780 0, /* tp_repr */
781 0, /* tp_as_number */
782 0, /* tp_as_sequence */
783 0, /* tp_as_mapping */
784 0, /* tp_hash */
785 0, /* tp_call */
786 0, /* tp_str */
787 0, /* tp_getattro */
788 0, /* tp_setattro */
789 0, /* tp_as_buffer */
790 Py_TPFLAGS_DEFAULT, /* tp_flags */
791 0, /* tp_doc */
792 0, /* tp_traverse */
793 0, /* tp_clear */
794 0, /* tp_richcompare */
795 0, /* tp_weaklistoffset */
796 reader_iter, /* tp_iter */
797 reader_iternext, /* tp_iternext */
798 reader_methods, /* tp_methods */
799 reader_members, /* tp_members */
800 0, /* tp_getset */
801 0, /* tp_base */
802 0, /* tp_dict */
803 0, /* tp_descr_get */
804 0, /* tp_descr_set */
805 0, /* tp_dictoffset */
806 0, /* tp_init */
807 0, /* tp_alloc */
808 PyType_GenericNew, /* tp_new */
790 PyType_Spec ZstdCompressionReaderSpec = {
791 "zstd.ZstdCompressionReader",
792 sizeof(ZstdCompressionReader),
793 0,
794 Py_TPFLAGS_DEFAULT,
795 ZstdCompressionReaderSlots,
809 796 };
810 797
811 void compressionreader_module_init(PyObject* mod) {
812 /* TODO make reader a sub-class of io.RawIOBase */
798 PyTypeObject *ZstdCompressionReaderType;
799
800 void compressionreader_module_init(PyObject *mod) {
801 /* TODO make reader a sub-class of io.RawIOBase */
813 802
814 Py_SET_TYPE(&ZstdCompressionReaderType, &PyType_Type);
815 if (PyType_Ready(&ZstdCompressionReaderType) < 0) {
816 return;
817 }
803 ZstdCompressionReaderType =
804 (PyTypeObject *)PyType_FromSpec(&ZstdCompressionReaderSpec);
805 if (PyType_Ready(ZstdCompressionReaderType) < 0) {
806 return;
807 }
808
809 Py_INCREF((PyObject *)ZstdCompressionReaderType);
810 PyModule_AddObject(mod, "ZstdCompressionReader",
811 (PyObject *)ZstdCompressionReaderType);
818 812 }
This diff has been collapsed as it changes many lines, (575 lines changed) Show them Hide them
@@ -1,372 +1,353
1 1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 extern PyObject* ZstdError;
12
13 PyDoc_STRVAR(ZstdCompresssionWriter__doc__,
14 """A context manager used for writing compressed output to a writer.\n"
15 );
11 extern PyObject *ZstdError;
16 12
17 static void ZstdCompressionWriter_dealloc(ZstdCompressionWriter* self) {
18 Py_XDECREF(self->compressor);
19 Py_XDECREF(self->writer);
13 static void ZstdCompressionWriter_dealloc(ZstdCompressionWriter *self) {
14 Py_XDECREF(self->compressor);
15 Py_XDECREF(self->writer);
20 16
21 PyMem_Free(self->output.dst);
22 self->output.dst = NULL;
17 PyMem_Free(self->output.dst);
18 self->output.dst = NULL;
23 19
24 PyObject_Del(self);
20 PyObject_Del(self);
25 21 }
26 22
27 static PyObject* ZstdCompressionWriter_enter(ZstdCompressionWriter* self) {
28 if (self->closed) {
29 PyErr_SetString(PyExc_ValueError, "stream is closed");
30 return NULL;
31 }
23 static PyObject *ZstdCompressionWriter_enter(ZstdCompressionWriter *self) {
24 if (self->closed) {
25 PyErr_SetString(PyExc_ValueError, "stream is closed");
26 return NULL;
27 }
32 28
33 if (self->entered) {
34 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
35 return NULL;
36 }
29 if (self->entered) {
30 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
31 return NULL;
32 }
37 33
38 self->entered = 1;
34 self->entered = 1;
39 35
40 Py_INCREF(self);
41 return (PyObject*)self;
36 Py_INCREF(self);
37 return (PyObject *)self;
42 38 }
43 39
44 static PyObject* ZstdCompressionWriter_exit(ZstdCompressionWriter* self, PyObject* args) {
45 PyObject* exc_type;
46 PyObject* exc_value;
47 PyObject* exc_tb;
48
49 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
50 return NULL;
51 }
40 static PyObject *ZstdCompressionWriter_exit(ZstdCompressionWriter *self,
41 PyObject *args) {
42 PyObject *exc_type;
43 PyObject *exc_value;
44 PyObject *exc_tb;
45 PyObject *result;
52 46
53 self->entered = 0;
47 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value,
48 &exc_tb)) {
49 return NULL;
50 }
54 51
55 if (exc_type == Py_None && exc_value == Py_None && exc_tb == Py_None) {
56 PyObject* result = PyObject_CallMethod((PyObject*)self, "close", NULL);
52 self->entered = 0;
57 53
58 if (NULL == result) {
59 return NULL;
60 }
61 }
54 result = PyObject_CallMethod((PyObject *)self, "close", NULL);
62 55
63 Py_RETURN_FALSE;
56 if (NULL == result) {
57 return NULL;
58 }
59
60 Py_RETURN_FALSE;
64 61 }
65 62
66 static PyObject* ZstdCompressionWriter_memory_size(ZstdCompressionWriter* self) {
67 return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->compressor->cctx));
63 static PyObject *
64 ZstdCompressionWriter_memory_size(ZstdCompressionWriter *self) {
65 return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->compressor->cctx));
68 66 }
69 67
70 static PyObject* ZstdCompressionWriter_write(ZstdCompressionWriter* self, PyObject* args, PyObject* kwargs) {
71 static char* kwlist[] = {
72 "data",
73 NULL
74 };
68 static PyObject *ZstdCompressionWriter_write(ZstdCompressionWriter *self,
69 PyObject *args, PyObject *kwargs) {
70 static char *kwlist[] = {"data", NULL};
75 71
76 PyObject* result = NULL;
77 Py_buffer source;
78 size_t zresult;
79 ZSTD_inBuffer input;
80 PyObject* res;
81 Py_ssize_t totalWrite = 0;
72 PyObject *result = NULL;
73 Py_buffer source;
74 size_t zresult;
75 ZSTD_inBuffer input;
76 PyObject *res;
77 Py_ssize_t totalWrite = 0;
82 78
83 #if PY_MAJOR_VERSION >= 3
84 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write",
85 #else
86 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:write",
87 #endif
88 kwlist, &source)) {
89 return NULL;
90 }
79 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write", kwlist,
80 &source)) {
81 return NULL;
82 }
91 83
92 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
93 PyErr_SetString(PyExc_ValueError,
94 "data buffer should be contiguous and have at most one dimension");
95 goto finally;
96 }
84 if (self->closed) {
85 PyErr_SetString(PyExc_ValueError, "stream is closed");
86 return NULL;
87 }
97 88
98 if (self->closed) {
99 PyErr_SetString(PyExc_ValueError, "stream is closed");
100 return NULL;
101 }
89 self->output.pos = 0;
102 90
103 self->output.pos = 0;
91 input.src = source.buf;
92 input.size = source.len;
93 input.pos = 0;
104 94
105 input.src = source.buf;
106 input.size = source.len;
107 input.pos = 0;
95 while (input.pos < (size_t)source.len) {
96 Py_BEGIN_ALLOW_THREADS zresult = ZSTD_compressStream2(
97 self->compressor->cctx, &self->output, &input, ZSTD_e_continue);
98 Py_END_ALLOW_THREADS
108 99
109 while (input.pos < (size_t)source.len) {
110 Py_BEGIN_ALLOW_THREADS
111 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output, &input, ZSTD_e_continue);
112 Py_END_ALLOW_THREADS
113
114 if (ZSTD_isError(zresult)) {
115 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
116 goto finally;
117 }
100 if (ZSTD_isError(zresult)) {
101 PyErr_Format(ZstdError, "zstd compress error: %s",
102 ZSTD_getErrorName(zresult));
103 goto finally;
104 }
118 105
119 /* Copy data from output buffer to writer. */
120 if (self->output.pos) {
121 #if PY_MAJOR_VERSION >= 3
122 res = PyObject_CallMethod(self->writer, "write", "y#",
123 #else
124 res = PyObject_CallMethod(self->writer, "write", "s#",
125 #endif
126 self->output.dst, self->output.pos);
127 Py_XDECREF(res);
128 totalWrite += self->output.pos;
129 self->bytesCompressed += self->output.pos;
130 }
131 self->output.pos = 0;
132 }
106 /* Copy data from output buffer to writer. */
107 if (self->output.pos) {
108 res = PyObject_CallMethod(self->writer, "write", "y#",
109 self->output.dst, self->output.pos);
110 if (NULL == res) {
111 goto finally;
112 }
113 Py_XDECREF(res);
114 totalWrite += self->output.pos;
115 self->bytesCompressed += self->output.pos;
116 }
117 self->output.pos = 0;
118 }
133 119
134 if (self->writeReturnRead) {
135 result = PyLong_FromSize_t(input.pos);
136 }
137 else {
138 result = PyLong_FromSsize_t(totalWrite);
139 }
120 if (self->writeReturnRead) {
121 result = PyLong_FromSize_t(input.pos);
122 }
123 else {
124 result = PyLong_FromSsize_t(totalWrite);
125 }
140 126
141 127 finally:
142 PyBuffer_Release(&source);
143 return result;
128 PyBuffer_Release(&source);
129 return result;
144 130 }
145 131
146 static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args, PyObject* kwargs) {
147 static char* kwlist[] = {
148 "flush_mode",
149 NULL
150 };
132 static PyObject *ZstdCompressionWriter_flush(ZstdCompressionWriter *self,
133 PyObject *args, PyObject *kwargs) {
134 static char *kwlist[] = {"flush_mode", NULL};
151 135
152 size_t zresult;
153 ZSTD_inBuffer input;
154 PyObject* res;
155 Py_ssize_t totalWrite = 0;
156 unsigned flush_mode = 0;
157 ZSTD_EndDirective flush;
136 size_t zresult;
137 ZSTD_inBuffer input;
138 PyObject *res;
139 Py_ssize_t totalWrite = 0;
140 unsigned flush_mode = 0;
141 ZSTD_EndDirective flush;
142
143 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|I:flush", kwlist,
144 &flush_mode)) {
145 return NULL;
146 }
158 147
159 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|I:flush",
160 kwlist, &flush_mode)) {
161 return NULL;
162 }
148 switch (flush_mode) {
149 case 0:
150 flush = ZSTD_e_flush;
151 break;
152 case 1:
153 flush = ZSTD_e_end;
154 break;
155 default:
156 PyErr_Format(PyExc_ValueError, "unknown flush_mode: %d", flush_mode);
157 return NULL;
158 }
163 159
164 switch (flush_mode) {
165 case 0:
166 flush = ZSTD_e_flush;
167 break;
168 case 1:
169 flush = ZSTD_e_end;
170 break;
171 default:
172 PyErr_Format(PyExc_ValueError, "unknown flush_mode: %d", flush_mode);
173 return NULL;
174 }
160 if (self->closed) {
161 PyErr_SetString(PyExc_ValueError, "stream is closed");
162 return NULL;
163 }
175 164
176 if (self->closed) {
177 PyErr_SetString(PyExc_ValueError, "stream is closed");
178 return NULL;
179 }
165 self->output.pos = 0;
166
167 input.src = NULL;
168 input.size = 0;
169 input.pos = 0;
180 170
181 self->output.pos = 0;
182
183 input.src = NULL;
184 input.size = 0;
185 input.pos = 0;
171 while (1) {
172 Py_BEGIN_ALLOW_THREADS zresult = ZSTD_compressStream2(
173 self->compressor->cctx, &self->output, &input, flush);
174 Py_END_ALLOW_THREADS
186 175
187 while (1) {
188 Py_BEGIN_ALLOW_THREADS
189 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output, &input, flush);
190 Py_END_ALLOW_THREADS
191
192 if (ZSTD_isError(zresult)) {
193 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
194 return NULL;
195 }
176 if (ZSTD_isError(zresult)) {
177 PyErr_Format(ZstdError, "zstd compress error: %s",
178 ZSTD_getErrorName(zresult));
179 return NULL;
180 }
196 181
197 /* Copy data from output buffer to writer. */
198 if (self->output.pos) {
199 #if PY_MAJOR_VERSION >= 3
200 res = PyObject_CallMethod(self->writer, "write", "y#",
201 #else
202 res = PyObject_CallMethod(self->writer, "write", "s#",
203 #endif
204 self->output.dst, self->output.pos);
205 Py_XDECREF(res);
206 totalWrite += self->output.pos;
207 self->bytesCompressed += self->output.pos;
208 }
182 /* Copy data from output buffer to writer. */
183 if (self->output.pos) {
184 res = PyObject_CallMethod(self->writer, "write", "y#",
185 self->output.dst, self->output.pos);
186 if (NULL == res) {
187 return NULL;
188 }
189 Py_XDECREF(res);
190 totalWrite += self->output.pos;
191 self->bytesCompressed += self->output.pos;
192 }
193
194 self->output.pos = 0;
209 195
210 self->output.pos = 0;
196 if (!zresult) {
197 break;
198 }
199 }
211 200
212 if (!zresult) {
213 break;
214 }
215 }
201 if (!self->closing && PyObject_HasAttrString(self->writer, "flush")) {
202 res = PyObject_CallMethod(self->writer, "flush", NULL);
203 if (NULL == res) {
204 return NULL;
205 }
206 Py_XDECREF(res);
207 }
216 208
217 return PyLong_FromSsize_t(totalWrite);
209 return PyLong_FromSsize_t(totalWrite);
218 210 }
219 211
220 static PyObject* ZstdCompressionWriter_close(ZstdCompressionWriter* self) {
221 PyObject* result;
212 static PyObject *ZstdCompressionWriter_close(ZstdCompressionWriter *self) {
213 PyObject *result;
222 214
223 if (self->closed) {
224 Py_RETURN_NONE;
225 }
215 if (self->closed) {
216 Py_RETURN_NONE;
217 }
226 218
227 result = PyObject_CallMethod((PyObject*)self, "flush", "I", 1);
228 self->closed = 1;
219 self->closing = 1;
220 result = PyObject_CallMethod((PyObject *)self, "flush", "I", 1);
221 self->closing = 0;
222 self->closed = 1;
229 223
230 if (NULL == result) {
231 return NULL;
232 }
224 if (NULL == result) {
225 return NULL;
226 }
233 227
234 228 /* Call close on underlying stream as well. */
235 if (PyObject_HasAttrString(self->writer, "close")) {
236 return PyObject_CallMethod(self->writer, "close", NULL);
237 }
238
239 Py_RETURN_NONE;
240 }
229 if (self->closefd && PyObject_HasAttrString(self->writer, "close")) {
230 return PyObject_CallMethod(self->writer, "close", NULL);
231 }
241 232
242 static PyObject* ZstdCompressionWriter_fileno(ZstdCompressionWriter* self) {
243 if (PyObject_HasAttrString(self->writer, "fileno")) {
244 return PyObject_CallMethod(self->writer, "fileno", NULL);
245 }
246 else {
247 PyErr_SetString(PyExc_OSError, "fileno not available on underlying writer");
248 return NULL;
249 }
233 Py_RETURN_NONE;
250 234 }
251 235
252 static PyObject* ZstdCompressionWriter_tell(ZstdCompressionWriter* self) {
253 return PyLong_FromUnsignedLongLong(self->bytesCompressed);
236 static PyObject *ZstdCompressionWriter_fileno(ZstdCompressionWriter *self) {
237 if (PyObject_HasAttrString(self->writer, "fileno")) {
238 return PyObject_CallMethod(self->writer, "fileno", NULL);
239 }
240 else {
241 PyErr_SetString(PyExc_OSError,
242 "fileno not available on underlying writer");
243 return NULL;
244 }
254 245 }
255 246
256 static PyObject* ZstdCompressionWriter_writelines(PyObject* self, PyObject* args) {
257 PyErr_SetNone(PyExc_NotImplementedError);
258 return NULL;
247 static PyObject *ZstdCompressionWriter_tell(ZstdCompressionWriter *self) {
248 return PyLong_FromUnsignedLongLong(self->bytesCompressed);
259 249 }
260 250
261 static PyObject* ZstdCompressionWriter_false(PyObject* self, PyObject* args) {
262 Py_RETURN_FALSE;
263 }
264
265 static PyObject* ZstdCompressionWriter_true(PyObject* self, PyObject* args) {
266 Py_RETURN_TRUE;
251 static PyObject *ZstdCompressionWriter_writelines(PyObject *self,
252 PyObject *args) {
253 PyErr_SetNone(PyExc_NotImplementedError);
254 return NULL;
267 255 }
268 256
269 static PyObject* ZstdCompressionWriter_unsupported(PyObject* self, PyObject* args, PyObject* kwargs) {
270 PyObject* iomod;
271 PyObject* exc;
257 static PyObject *ZstdCompressionWriter_iter(PyObject *self) {
258 set_io_unsupported_operation();
259 return NULL;
260 }
272 261
273 iomod = PyImport_ImportModule("io");
274 if (NULL == iomod) {
275 return NULL;
276 }
262 static PyObject *ZstdCompressionWriter_iternext(PyObject *self) {
263 set_io_unsupported_operation();
264 return NULL;
265 }
277 266
278 exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
279 if (NULL == exc) {
280 Py_DECREF(iomod);
281 return NULL;
282 }
267 static PyObject *ZstdCompressionWriter_false(PyObject *self, PyObject *args) {
268 Py_RETURN_FALSE;
269 }
283 270
284 PyErr_SetNone(exc);
285 Py_DECREF(exc);
286 Py_DECREF(iomod);
271 static PyObject *ZstdCompressionWriter_true(PyObject *self, PyObject *args) {
272 Py_RETURN_TRUE;
273 }
287 274
288 return NULL;
275 static PyObject *ZstdCompressionWriter_unsupported(PyObject *self,
276 PyObject *args,
277 PyObject *kwargs) {
278 set_io_unsupported_operation();
279 return NULL;
289 280 }
290 281
291 282 static PyMethodDef ZstdCompressionWriter_methods[] = {
292 { "__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS,
293 PyDoc_STR("Enter a compression context.") },
294 { "__exit__", (PyCFunction)ZstdCompressionWriter_exit, METH_VARARGS,
295 PyDoc_STR("Exit a compression context.") },
296 { "close", (PyCFunction)ZstdCompressionWriter_close, METH_NOARGS, NULL },
297 { "fileno", (PyCFunction)ZstdCompressionWriter_fileno, METH_NOARGS, NULL },
298 { "isatty", (PyCFunction)ZstdCompressionWriter_false, METH_NOARGS, NULL },
299 { "readable", (PyCFunction)ZstdCompressionWriter_false, METH_NOARGS, NULL },
300 { "readline", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
301 { "readlines", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
302 { "seek", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
303 { "seekable", ZstdCompressionWriter_false, METH_NOARGS, NULL },
304 { "truncate", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
305 { "writable", ZstdCompressionWriter_true, METH_NOARGS, NULL },
306 { "writelines", ZstdCompressionWriter_writelines, METH_VARARGS, NULL },
307 { "read", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
308 { "readall", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
309 { "readinto", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
310 { "memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS,
311 PyDoc_STR("Obtain the memory size of the underlying compressor") },
312 { "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS | METH_KEYWORDS,
313 PyDoc_STR("Compress data") },
314 { "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_VARARGS | METH_KEYWORDS,
315 PyDoc_STR("Flush data and finish a zstd frame") },
316 { "tell", (PyCFunction)ZstdCompressionWriter_tell, METH_NOARGS,
317 PyDoc_STR("Returns current number of bytes compressed") },
318 { NULL, NULL }
319 };
283 {"__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS,
284 PyDoc_STR("Enter a compression context.")},
285 {"__exit__", (PyCFunction)ZstdCompressionWriter_exit, METH_VARARGS,
286 PyDoc_STR("Exit a compression context.")},
287 {"close", (PyCFunction)ZstdCompressionWriter_close, METH_NOARGS, NULL},
288 {"fileno", (PyCFunction)ZstdCompressionWriter_fileno, METH_NOARGS, NULL},
289 {"isatty", (PyCFunction)ZstdCompressionWriter_false, METH_NOARGS, NULL},
290 {"readable", (PyCFunction)ZstdCompressionWriter_false, METH_NOARGS, NULL},
291 {"readline", (PyCFunction)ZstdCompressionWriter_unsupported,
292 METH_VARARGS | METH_KEYWORDS, NULL},
293 {"readlines", (PyCFunction)ZstdCompressionWriter_unsupported,
294 METH_VARARGS | METH_KEYWORDS, NULL},
295 {"seek", (PyCFunction)ZstdCompressionWriter_unsupported,
296 METH_VARARGS | METH_KEYWORDS, NULL},
297 {"seekable", ZstdCompressionWriter_false, METH_NOARGS, NULL},
298 {"truncate", (PyCFunction)ZstdCompressionWriter_unsupported,
299 METH_VARARGS | METH_KEYWORDS, NULL},
300 {"writable", ZstdCompressionWriter_true, METH_NOARGS, NULL},
301 {"writelines", ZstdCompressionWriter_writelines, METH_VARARGS, NULL},
302 {"read", (PyCFunction)ZstdCompressionWriter_unsupported,
303 METH_VARARGS | METH_KEYWORDS, NULL},
304 {"readall", (PyCFunction)ZstdCompressionWriter_unsupported,
305 METH_VARARGS | METH_KEYWORDS, NULL},
306 {"readinto", (PyCFunction)ZstdCompressionWriter_unsupported,
307 METH_VARARGS | METH_KEYWORDS, NULL},
308 {"memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS,
309 PyDoc_STR("Obtain the memory size of the underlying compressor")},
310 {"write", (PyCFunction)ZstdCompressionWriter_write,
311 METH_VARARGS | METH_KEYWORDS, PyDoc_STR("Compress data")},
312 {"flush", (PyCFunction)ZstdCompressionWriter_flush,
313 METH_VARARGS | METH_KEYWORDS,
314 PyDoc_STR("Flush data and finish a zstd frame")},
315 {"tell", (PyCFunction)ZstdCompressionWriter_tell, METH_NOARGS,
316 PyDoc_STR("Returns current number of bytes compressed")},
317 {NULL, NULL}};
320 318
321 319 static PyMemberDef ZstdCompressionWriter_members[] = {
322 { "closed", T_BOOL, offsetof(ZstdCompressionWriter, closed), READONLY, NULL },
323 { NULL }
320 {"closed", T_BOOL, offsetof(ZstdCompressionWriter, closed), READONLY, NULL},
321 {NULL}};
322
323 PyType_Slot ZstdCompressionWriterSlots[] = {
324 {Py_tp_dealloc, ZstdCompressionWriter_dealloc},
325 {Py_tp_iter, ZstdCompressionWriter_iter},
326 {Py_tp_iternext, ZstdCompressionWriter_iternext},
327 {Py_tp_methods, ZstdCompressionWriter_methods},
328 {Py_tp_members, ZstdCompressionWriter_members},
329 {Py_tp_new, PyType_GenericNew},
330 {0, NULL},
324 331 };
325 332
326 PyTypeObject ZstdCompressionWriterType = {
327 PyVarObject_HEAD_INIT(NULL, 0)
328 "zstd.ZstdCompressionWriter", /* tp_name */
329 sizeof(ZstdCompressionWriter), /* tp_basicsize */
330 0, /* tp_itemsize */
331 (destructor)ZstdCompressionWriter_dealloc, /* tp_dealloc */
332 0, /* tp_print */
333 0, /* tp_getattr */
334 0, /* tp_setattr */
335 0, /* tp_compare */
336 0, /* tp_repr */
337 0, /* tp_as_number */
338 0, /* tp_as_sequence */
339 0, /* tp_as_mapping */
340 0, /* tp_hash */
341 0, /* tp_call */
342 0, /* tp_str */
343 0, /* tp_getattro */
344 0, /* tp_setattro */
345 0, /* tp_as_buffer */
346 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
347 ZstdCompresssionWriter__doc__, /* tp_doc */
348 0, /* tp_traverse */
349 0, /* tp_clear */
350 0, /* tp_richcompare */
351 0, /* tp_weaklistoffset */
352 0, /* tp_iter */
353 0, /* tp_iternext */
354 ZstdCompressionWriter_methods, /* tp_methods */
355 ZstdCompressionWriter_members, /* tp_members */
356 0, /* tp_getset */
357 0, /* tp_base */
358 0, /* tp_dict */
359 0, /* tp_descr_get */
360 0, /* tp_descr_set */
361 0, /* tp_dictoffset */
362 0, /* tp_init */
363 0, /* tp_alloc */
364 PyType_GenericNew, /* tp_new */
333 PyType_Spec ZstdCompressionWriterSpec = {
334 "zstd.ZstdCompressionWriter",
335 sizeof(ZstdCompressionWriter),
336 0,
337 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
338 ZstdCompressionWriterSlots,
365 339 };
366 340
367 void compressionwriter_module_init(PyObject* mod) {
368 Py_SET_TYPE(&ZstdCompressionWriterType, &PyType_Type);
369 if (PyType_Ready(&ZstdCompressionWriterType) < 0) {
370 return;
371 }
341 PyTypeObject *ZstdCompressionWriterType;
342
343 void compressionwriter_module_init(PyObject *mod) {
344 ZstdCompressionWriterType =
345 (PyTypeObject *)PyType_FromSpec(&ZstdCompressionWriterSpec);
346 if (PyType_Ready(ZstdCompressionWriterType) < 0) {
347 return;
348 }
349
350 Py_INCREF((PyObject *)ZstdCompressionWriterType);
351 PyModule_AddObject(mod, "ZstdCompressionWriter",
352 (PyObject *)ZstdCompressionWriterType);
372 353 }
@@ -1,256 +1,220
1 1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 extern PyObject* ZstdError;
12
13 PyDoc_STRVAR(ZstdCompressionObj__doc__,
14 "Perform compression using a standard library compatible API.\n"
15 );
11 extern PyObject *ZstdError;
16 12
17 static void ZstdCompressionObj_dealloc(ZstdCompressionObj* self) {
18 PyMem_Free(self->output.dst);
19 self->output.dst = NULL;
13 static void ZstdCompressionObj_dealloc(ZstdCompressionObj *self) {
14 PyMem_Free(self->output.dst);
15 self->output.dst = NULL;
20 16
21 Py_XDECREF(self->compressor);
17 Py_XDECREF(self->compressor);
22 18
23 PyObject_Del(self);
19 PyObject_Del(self);
24 20 }
25 21
26 static PyObject* ZstdCompressionObj_compress(ZstdCompressionObj* self, PyObject* args, PyObject* kwargs) {
27 static char* kwlist[] = {
28 "data",
29 NULL
30 };
22 static PyObject *ZstdCompressionObj_compress(ZstdCompressionObj *self,
23 PyObject *args, PyObject *kwargs) {
24 static char *kwlist[] = {"data", NULL};
31 25
32 Py_buffer source;
33 ZSTD_inBuffer input;
34 size_t zresult;
35 PyObject* result = NULL;
36 Py_ssize_t resultSize = 0;
37
38 if (self->finished) {
39 PyErr_SetString(ZstdError, "cannot call compress() after compressor finished");
40 return NULL;
41 }
26 Py_buffer source;
27 ZSTD_inBuffer input;
28 size_t zresult;
29 PyObject *result = NULL;
30 Py_ssize_t resultSize = 0;
42 31
43 #if PY_MAJOR_VERSION >= 3
44 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress",
45 #else
46 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:compress",
47 #endif
48 kwlist, &source)) {
49 return NULL;
50 }
32 if (self->finished) {
33 PyErr_SetString(ZstdError,
34 "cannot call compress() after compressor finished");
35 return NULL;
36 }
51 37
52 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
53 PyErr_SetString(PyExc_ValueError,
54 "data buffer should be contiguous and have at most one dimension");
55 goto finally;
56 }
38 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress", kwlist,
39 &source)) {
40 return NULL;
41 }
57 42
58 input.src = source.buf;
59 input.size = source.len;
60 input.pos = 0;
43 input.src = source.buf;
44 input.size = source.len;
45 input.pos = 0;
46
47 while (input.pos < (size_t)source.len) {
48 Py_BEGIN_ALLOW_THREADS zresult = ZSTD_compressStream2(
49 self->compressor->cctx, &self->output, &input, ZSTD_e_continue);
50 Py_END_ALLOW_THREADS
61 51
62 while (input.pos < (size_t)source.len) {
63 Py_BEGIN_ALLOW_THREADS
64 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
65 &input, ZSTD_e_continue);
66 Py_END_ALLOW_THREADS
52 if (ZSTD_isError(zresult)) {
53 PyErr_Format(ZstdError, "zstd compress error: %s",
54 ZSTD_getErrorName(zresult));
55 Py_CLEAR(result);
56 goto finally;
57 }
67 58
68 if (ZSTD_isError(zresult)) {
69 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
70 Py_CLEAR(result);
71 goto finally;
72 }
59 if (self->output.pos) {
60 if (result) {
61 resultSize = PyBytes_GET_SIZE(result);
73 62
74 if (self->output.pos) {
75 if (result) {
76 resultSize = PyBytes_GET_SIZE(result);
63 if (safe_pybytes_resize(&result,
64 resultSize + self->output.pos)) {
65 Py_CLEAR(result);
66 goto finally;
67 }
77 68
78 if (safe_pybytes_resize(&result, resultSize + self->output.pos)) {
79 Py_CLEAR(result);
80 goto finally;
81 }
69 memcpy(PyBytes_AS_STRING(result) + resultSize, self->output.dst,
70 self->output.pos);
71 }
72 else {
73 result = PyBytes_FromStringAndSize(self->output.dst,
74 self->output.pos);
75 if (!result) {
76 goto finally;
77 }
78 }
82 79
83 memcpy(PyBytes_AS_STRING(result) + resultSize,
84 self->output.dst, self->output.pos);
85 }
86 else {
87 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
88 if (!result) {
89 goto finally;
90 }
91 }
80 self->output.pos = 0;
81 }
82 }
92 83
93 self->output.pos = 0;
94 }
95 }
96
97 if (NULL == result) {
98 result = PyBytes_FromString("");
99 }
84 if (NULL == result) {
85 result = PyBytes_FromString("");
86 }
100 87
101 88 finally:
102 PyBuffer_Release(&source);
89 PyBuffer_Release(&source);
103 90
104 return result;
91 return result;
105 92 }
106 93
107 static PyObject* ZstdCompressionObj_flush(ZstdCompressionObj* self, PyObject* args, PyObject* kwargs) {
108 static char* kwlist[] = {
109 "flush_mode",
110 NULL
111 };
94 static PyObject *ZstdCompressionObj_flush(ZstdCompressionObj *self,
95 PyObject *args, PyObject *kwargs) {
96 static char *kwlist[] = {"flush_mode", NULL};
97
98 int flushMode = compressorobj_flush_finish;
99 size_t zresult;
100 PyObject *result = NULL;
101 Py_ssize_t resultSize = 0;
102 ZSTD_inBuffer input;
103 ZSTD_EndDirective zFlushMode;
112 104
113 int flushMode = compressorobj_flush_finish;
114 size_t zresult;
115 PyObject* result = NULL;
116 Py_ssize_t resultSize = 0;
117 ZSTD_inBuffer input;
118 ZSTD_EndDirective zFlushMode;
105 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:flush", kwlist,
106 &flushMode)) {
107 return NULL;
108 }
119 109
120 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:flush", kwlist, &flushMode)) {
121 return NULL;
122 }
123
124 if (flushMode != compressorobj_flush_finish && flushMode != compressorobj_flush_block) {
125 PyErr_SetString(PyExc_ValueError, "flush mode not recognized");
126 return NULL;
127 }
110 if (flushMode != compressorobj_flush_finish &&
111 flushMode != compressorobj_flush_block) {
112 PyErr_SetString(PyExc_ValueError, "flush mode not recognized");
113 return NULL;
114 }
128 115
129 if (self->finished) {
130 PyErr_SetString(ZstdError, "compressor object already finished");
131 return NULL;
132 }
116 if (self->finished) {
117 PyErr_SetString(ZstdError, "compressor object already finished");
118 return NULL;
119 }
133 120
134 switch (flushMode) {
135 case compressorobj_flush_block:
136 zFlushMode = ZSTD_e_flush;
137 break;
121 switch (flushMode) {
122 case compressorobj_flush_block:
123 zFlushMode = ZSTD_e_flush;
124 break;
138 125
139 case compressorobj_flush_finish:
140 zFlushMode = ZSTD_e_end;
141 self->finished = 1;
142 break;
126 case compressorobj_flush_finish:
127 zFlushMode = ZSTD_e_end;
128 self->finished = 1;
129 break;
143 130
144 default:
145 PyErr_SetString(ZstdError, "unhandled flush mode");
146 return NULL;
147 }
131 default:
132 PyErr_SetString(ZstdError, "unhandled flush mode");
133 return NULL;
134 }
148 135
149 assert(self->output.pos == 0);
136 assert(self->output.pos == 0);
150 137
151 input.src = NULL;
152 input.size = 0;
153 input.pos = 0;
138 input.src = NULL;
139 input.size = 0;
140 input.pos = 0;
154 141
155 while (1) {
156 Py_BEGIN_ALLOW_THREADS
157 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
158 &input, zFlushMode);
159 Py_END_ALLOW_THREADS
142 while (1) {
143 Py_BEGIN_ALLOW_THREADS zresult = ZSTD_compressStream2(
144 self->compressor->cctx, &self->output, &input, zFlushMode);
145 Py_END_ALLOW_THREADS
160 146
161 if (ZSTD_isError(zresult)) {
162 PyErr_Format(ZstdError, "error ending compression stream: %s",
163 ZSTD_getErrorName(zresult));
164 return NULL;
165 }
147 if (ZSTD_isError(zresult)) {
148 PyErr_Format(ZstdError, "error ending compression stream: %s",
149 ZSTD_getErrorName(zresult));
150 return NULL;
151 }
166 152
167 if (self->output.pos) {
168 if (result) {
169 resultSize = PyBytes_GET_SIZE(result);
153 if (self->output.pos) {
154 if (result) {
155 resultSize = PyBytes_GET_SIZE(result);
170 156
171 if (safe_pybytes_resize(&result, resultSize + self->output.pos)) {
172 Py_XDECREF(result);
173 return NULL;
174 }
157 if (safe_pybytes_resize(&result,
158 resultSize + self->output.pos)) {
159 Py_XDECREF(result);
160 return NULL;
161 }
175 162
176 memcpy(PyBytes_AS_STRING(result) + resultSize,
177 self->output.dst, self->output.pos);
178 }
179 else {
180 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
181 if (!result) {
182 return NULL;
183 }
184 }
163 memcpy(PyBytes_AS_STRING(result) + resultSize, self->output.dst,
164 self->output.pos);
165 }
166 else {
167 result = PyBytes_FromStringAndSize(self->output.dst,
168 self->output.pos);
169 if (!result) {
170 return NULL;
171 }
172 }
185 173
186 self->output.pos = 0;
187 }
174 self->output.pos = 0;
175 }
188 176
189 if (!zresult) {
190 break;
191 }
192 }
177 if (!zresult) {
178 break;
179 }
180 }
193 181
194 if (result) {
195 return result;
196 }
197 else {
198 return PyBytes_FromString("");
199 }
182 if (result) {
183 return result;
184 }
185 else {
186 return PyBytes_FromString("");
187 }
200 188 }
201 189
202 190 static PyMethodDef ZstdCompressionObj_methods[] = {
203 { "compress", (PyCFunction)ZstdCompressionObj_compress, METH_VARARGS | METH_KEYWORDS,
204 PyDoc_STR("compress data") },
205 { "flush", (PyCFunction)ZstdCompressionObj_flush, METH_VARARGS | METH_KEYWORDS,
206 PyDoc_STR("finish compression operation") },
207 { NULL, NULL }
191 {"compress", (PyCFunction)ZstdCompressionObj_compress,
192 METH_VARARGS | METH_KEYWORDS, PyDoc_STR("compress data")},
193 {"flush", (PyCFunction)ZstdCompressionObj_flush,
194 METH_VARARGS | METH_KEYWORDS, PyDoc_STR("finish compression operation")},
195 {NULL, NULL}};
196
197 PyType_Slot ZstdCompressionObjSlots[] = {
198 {Py_tp_dealloc, ZstdCompressionObj_dealloc},
199 {Py_tp_methods, ZstdCompressionObj_methods},
200 {Py_tp_new, PyType_GenericNew},
201 {0, NULL},
208 202 };
209 203
210 PyTypeObject ZstdCompressionObjType = {
211 PyVarObject_HEAD_INIT(NULL, 0)
212 "zstd.ZstdCompressionObj", /* tp_name */
213 sizeof(ZstdCompressionObj), /* tp_basicsize */
214 0, /* tp_itemsize */
215 (destructor)ZstdCompressionObj_dealloc, /* tp_dealloc */
216 0, /* tp_print */
217 0, /* tp_getattr */
218 0, /* tp_setattr */
219 0, /* tp_compare */
220 0, /* tp_repr */
221 0, /* tp_as_number */
222 0, /* tp_as_sequence */
223 0, /* tp_as_mapping */
224 0, /* tp_hash */
225 0, /* tp_call */
226 0, /* tp_str */
227 0, /* tp_getattro */
228 0, /* tp_setattro */
229 0, /* tp_as_buffer */
230 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
231 ZstdCompressionObj__doc__, /* tp_doc */
232 0, /* tp_traverse */
233 0, /* tp_clear */
234 0, /* tp_richcompare */
235 0, /* tp_weaklistoffset */
236 0, /* tp_iter */
237 0, /* tp_iternext */
238 ZstdCompressionObj_methods, /* tp_methods */
239 0, /* tp_members */
240 0, /* tp_getset */
241 0, /* tp_base */
242 0, /* tp_dict */
243 0, /* tp_descr_get */
244 0, /* tp_descr_set */
245 0, /* tp_dictoffset */
246 0, /* tp_init */
247 0, /* tp_alloc */
248 PyType_GenericNew, /* tp_new */
204 PyType_Spec ZstdCompressionObjSpec = {
205 "zstd.ZstdCompressionObj",
206 sizeof(ZstdCompressionObj),
207 0,
208 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
209 ZstdCompressionObjSlots,
249 210 };
250 211
251 void compressobj_module_init(PyObject* module) {
252 Py_SET_TYPE(&ZstdCompressionObjType, &PyType_Type);
253 if (PyType_Ready(&ZstdCompressionObjType) < 0) {
254 return;
255 }
212 PyTypeObject *ZstdCompressionObjType;
213
214 void compressobj_module_init(PyObject *module) {
215 ZstdCompressionObjType =
216 (PyTypeObject *)PyType_FromSpec(&ZstdCompressionObjSpec);
217 if (PyType_Ready(ZstdCompressionObjType) < 0) {
218 return;
219 }
256 220 }
This diff has been collapsed as it changes many lines, (2647 lines changed) Show them Hide them
@@ -1,1670 +1,1557
1 1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
8 8
9 9 #include "python-zstandard.h"
10 #include "pool.h"
10
11 extern PyObject *ZstdError;
11 12
12 extern PyObject* ZstdError;
13
14 int setup_cctx(ZstdCompressor* compressor) {
15 size_t zresult;
13 int setup_cctx(ZstdCompressor *compressor) {
14 size_t zresult;
16 15
17 assert(compressor);
18 assert(compressor->cctx);
19 assert(compressor->params);
16 assert(compressor);
17 assert(compressor->cctx);
18 assert(compressor->params);
20 19
21 zresult = ZSTD_CCtx_setParametersUsingCCtxParams(compressor->cctx, compressor->params);
22 if (ZSTD_isError(zresult)) {
23 PyErr_Format(ZstdError, "could not set compression parameters: %s",
24 ZSTD_getErrorName(zresult));
25 return 1;
26 }
20 zresult = ZSTD_CCtx_setParametersUsingCCtxParams(compressor->cctx,
21 compressor->params);
22 if (ZSTD_isError(zresult)) {
23 PyErr_Format(ZstdError, "could not set compression parameters: %s",
24 ZSTD_getErrorName(zresult));
25 return 1;
26 }
27 27
28 if (compressor->dict) {
29 if (compressor->dict->cdict) {
30 zresult = ZSTD_CCtx_refCDict(compressor->cctx, compressor->dict->cdict);
31 }
32 else {
33 zresult = ZSTD_CCtx_loadDictionary_advanced(compressor->cctx,
34 compressor->dict->dictData, compressor->dict->dictSize,
35 ZSTD_dlm_byRef, compressor->dict->dictType);
36 }
37 if (ZSTD_isError(zresult)) {
38 PyErr_Format(ZstdError, "could not load compression dictionary: %s",
39 ZSTD_getErrorName(zresult));
40 return 1;
41 }
42 }
28 if (compressor->dict) {
29 if (compressor->dict->cdict) {
30 zresult =
31 ZSTD_CCtx_refCDict(compressor->cctx, compressor->dict->cdict);
32 }
33 else {
34 zresult = ZSTD_CCtx_loadDictionary_advanced(
35 compressor->cctx, compressor->dict->dictData,
36 compressor->dict->dictSize, ZSTD_dlm_byRef,
37 compressor->dict->dictType);
38 }
39 if (ZSTD_isError(zresult)) {
40 PyErr_Format(ZstdError, "could not load compression dictionary: %s",
41 ZSTD_getErrorName(zresult));
42 return 1;
43 }
44 }
43 45
44 return 0;
46 return 0;
45 47 }
46 48
47 static PyObject* frame_progression(ZSTD_CCtx* cctx) {
48 PyObject* result = NULL;
49 PyObject* value;
50 ZSTD_frameProgression progression;
49 static PyObject *frame_progression(ZSTD_CCtx *cctx) {
50 PyObject *result = NULL;
51 PyObject *value;
52 ZSTD_frameProgression progression;
51 53
52 result = PyTuple_New(3);
53 if (!result) {
54 return NULL;
55 }
54 result = PyTuple_New(3);
55 if (!result) {
56 return NULL;
57 }
56 58
57 progression = ZSTD_getFrameProgression(cctx);
59 progression = ZSTD_getFrameProgression(cctx);
58 60
59 value = PyLong_FromUnsignedLongLong(progression.ingested);
60 if (!value) {
61 Py_DECREF(result);
62 return NULL;
63 }
61 value = PyLong_FromUnsignedLongLong(progression.ingested);
62 if (!value) {
63 Py_DECREF(result);
64 return NULL;
65 }
64 66
65 PyTuple_SET_ITEM(result, 0, value);
67 PyTuple_SET_ITEM(result, 0, value);
66 68
67 value = PyLong_FromUnsignedLongLong(progression.consumed);
68 if (!value) {
69 Py_DECREF(result);
70 return NULL;
71 }
69 value = PyLong_FromUnsignedLongLong(progression.consumed);
70 if (!value) {
71 Py_DECREF(result);
72 return NULL;
73 }
72 74
73 PyTuple_SET_ITEM(result, 1, value);
75 PyTuple_SET_ITEM(result, 1, value);
74 76
75 value = PyLong_FromUnsignedLongLong(progression.produced);
76 if (!value) {
77 Py_DECREF(result);
78 return NULL;
79 }
77 value = PyLong_FromUnsignedLongLong(progression.produced);
78 if (!value) {
79 Py_DECREF(result);
80 return NULL;
81 }
80 82
81 PyTuple_SET_ITEM(result, 2, value);
83 PyTuple_SET_ITEM(result, 2, value);
82 84
83 return result;
85 return result;
84 86 }
85 87
86 PyDoc_STRVAR(ZstdCompressor__doc__,
87 "ZstdCompressor(level=None, dict_data=None, compression_params=None)\n"
88 "\n"
89 "Create an object used to perform Zstandard compression.\n"
90 "\n"
91 "An instance can compress data various ways. Instances can be used multiple\n"
92 "times. Each compression operation will use the compression parameters\n"
93 "defined at construction time.\n"
94 "\n"
95 "Compression can be configured via the following names arguments:\n"
96 "\n"
97 "level\n"
98 " Integer compression level.\n"
99 "dict_data\n"
100 " A ``ZstdCompressionDict`` to be used to compress with dictionary data.\n"
101 "compression_params\n"
102 " A ``CompressionParameters`` instance defining low-level compression"
103 " parameters. If defined, this will overwrite the ``level`` argument.\n"
104 "write_checksum\n"
105 " If True, a 4 byte content checksum will be written with the compressed\n"
106 " data, allowing the decompressor to perform content verification.\n"
107 "write_content_size\n"
108 " If True (the default), the decompressed content size will be included in\n"
109 " the header of the compressed data. This data will only be written if the\n"
110 " compressor knows the size of the input data.\n"
111 "write_dict_id\n"
112 " Determines whether the dictionary ID will be written into the compressed\n"
113 " data. Defaults to True. Only adds content to the compressed data if\n"
114 " a dictionary is being used.\n"
115 "threads\n"
116 " Number of threads to use to compress data concurrently. When set,\n"
117 " compression operations are performed on multiple threads. The default\n"
118 " value (0) disables multi-threaded compression. A value of ``-1`` means to\n"
119 " set the number of threads to the number of detected logical CPUs.\n"
120 );
88 static int ZstdCompressor_init(ZstdCompressor *self, PyObject *args,
89 PyObject *kwargs) {
90 static char *kwlist[] = {"level",
91 "dict_data",
92 "compression_params",
93 "write_checksum",
94 "write_content_size",
95 "write_dict_id",
96 "threads",
97 NULL};
98
99 int level = 3;
100 PyObject *dict = NULL;
101 PyObject *params = NULL;
102 PyObject *writeChecksum = NULL;
103 PyObject *writeContentSize = NULL;
104 PyObject *writeDictID = NULL;
105 int threads = 0;
106
107 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iOOOOOi:ZstdCompressor",
108 kwlist, &level, &dict, &params,
109 &writeChecksum, &writeContentSize,
110 &writeDictID, &threads)) {
111 return -1;
112 }
113
114 if (level > ZSTD_maxCLevel()) {
115 PyErr_Format(PyExc_ValueError, "level must be less than %d",
116 ZSTD_maxCLevel() + 1);
117 return -1;
118 }
121 119
122 static int ZstdCompressor_init(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
123 static char* kwlist[] = {
124 "level",
125 "dict_data",
126 "compression_params",
127 "write_checksum",
128 "write_content_size",
129 "write_dict_id",
130 "threads",
131 NULL
132 };
120 if (threads < 0) {
121 threads = cpu_count();
122 }
123
124 if (dict) {
125 if (dict == Py_None) {
126 dict = NULL;
127 }
128 else if (!PyObject_IsInstance(dict,
129 (PyObject *)ZstdCompressionDictType)) {
130 PyErr_Format(PyExc_TypeError,
131 "dict_data must be zstd.ZstdCompressionDict");
132 return -1;
133 }
134 }
133 135
134 int level = 3;
135 ZstdCompressionDict* dict = NULL;
136 ZstdCompressionParametersObject* params = NULL;
137 PyObject* writeChecksum = NULL;
138 PyObject* writeContentSize = NULL;
139 PyObject* writeDictID = NULL;
140 int threads = 0;
136 if (params) {
137 if (params == Py_None) {
138 params = NULL;
139 }
140 else if (!PyObject_IsInstance(
141 params, (PyObject *)ZstdCompressionParametersType)) {
142 PyErr_Format(
143 PyExc_TypeError,
144 "compression_params must be zstd.ZstdCompressionParameters");
145 return -1;
146 }
147 }
141 148
142 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOOi:ZstdCompressor",
143 kwlist, &level, &ZstdCompressionDictType, &dict,
144 &ZstdCompressionParametersType, &params,
145 &writeChecksum, &writeContentSize, &writeDictID, &threads)) {
146 return -1;
147 }
148
149 if (level > ZSTD_maxCLevel()) {
150 PyErr_Format(PyExc_ValueError, "level must be less than %d",
151 ZSTD_maxCLevel() + 1);
152 return -1;
153 }
154
155 if (threads < 0) {
156 threads = cpu_count();
157 }
149 if (writeChecksum == Py_None) {
150 writeChecksum = NULL;
151 }
152 if (writeContentSize == Py_None) {
153 writeContentSize = NULL;
154 }
155 if (writeDictID == Py_None) {
156 writeDictID = NULL;
157 }
158 158
159 /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the
160 overhead of each compression operation. */
161 self->cctx = ZSTD_createCCtx();
162 if (!self->cctx) {
163 PyErr_NoMemory();
164 return -1;
165 }
159 /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the
160 overhead of each compression operation. */
161 self->cctx = ZSTD_createCCtx();
162 if (!self->cctx) {
163 PyErr_NoMemory();
164 return -1;
165 }
166 166
167 /* TODO stuff the original parameters away somewhere so we can reset later. This
168 will allow us to do things like automatically adjust cparams based on input
169 size (assuming zstd isn't doing that internally). */
167 /* TODO stuff the original parameters away somewhere so we can reset later.
168 This will allow us to do things like automatically adjust cparams based
169 on input size (assuming zstd isn't doing that internally). */
170 170
171 self->params = ZSTD_createCCtxParams();
172 if (!self->params) {
173 PyErr_NoMemory();
174 return -1;
175 }
171 self->params = ZSTD_createCCtxParams();
172 if (!self->params) {
173 PyErr_NoMemory();
174 return -1;
175 }
176 176
177 if (params && writeChecksum) {
178 PyErr_SetString(PyExc_ValueError,
179 "cannot define compression_params and write_checksum");
180 return -1;
181 }
177 if (params && writeChecksum) {
178 PyErr_SetString(PyExc_ValueError,
179 "cannot define compression_params and write_checksum");
180 return -1;
181 }
182 182
183 if (params && writeContentSize) {
184 PyErr_SetString(PyExc_ValueError,
185 "cannot define compression_params and write_content_size");
186 return -1;
187 }
183 if (params && writeContentSize) {
184 PyErr_SetString(
185 PyExc_ValueError,
186 "cannot define compression_params and write_content_size");
187 return -1;
188 }
188 189
189 if (params && writeDictID) {
190 PyErr_SetString(PyExc_ValueError,
191 "cannot define compression_params and write_dict_id");
192 return -1;
193 }
190 if (params && writeDictID) {
191 PyErr_SetString(PyExc_ValueError,
192 "cannot define compression_params and write_dict_id");
193 return -1;
194 }
194 195
195 if (params && threads) {
196 PyErr_SetString(PyExc_ValueError,
197 "cannot define compression_params and threads");
198 return -1;
199 }
196 if (params && threads) {
197 PyErr_SetString(PyExc_ValueError,
198 "cannot define compression_params and threads");
199 return -1;
200 }
200 201
201 if (params) {
202 if (set_parameters(self->params, params)) {
203 return -1;
204 }
205 }
206 else {
207 if (set_parameter(self->params, ZSTD_c_compressionLevel, level)) {
208 return -1;
209 }
210
211 if (set_parameter(self->params, ZSTD_c_contentSizeFlag,
212 writeContentSize ? PyObject_IsTrue(writeContentSize) : 1)) {
213 return -1;
214 }
202 if (params) {
203 if (set_parameters(self->params,
204 (ZstdCompressionParametersObject *)params)) {
205 return -1;
206 }
207 }
208 else {
209 if (set_parameter(self->params, ZSTD_c_compressionLevel, level)) {
210 return -1;
211 }
215 212
216 if (set_parameter(self->params, ZSTD_c_checksumFlag,
217 writeChecksum ? PyObject_IsTrue(writeChecksum) : 0)) {
218 return -1;
219 }
213 if (set_parameter(self->params, ZSTD_c_contentSizeFlag,
214 writeContentSize ? PyObject_IsTrue(writeContentSize)
215 : 1)) {
216 return -1;
217 }
220 218
221 if (set_parameter(self->params, ZSTD_c_dictIDFlag,
222 writeDictID ? PyObject_IsTrue(writeDictID) : 1)) {
223 return -1;
224 }
219 if (set_parameter(self->params, ZSTD_c_checksumFlag,
220 writeChecksum ? PyObject_IsTrue(writeChecksum) : 0)) {
221 return -1;
222 }
225 223
226 if (threads) {
227 if (set_parameter(self->params, ZSTD_c_nbWorkers, threads)) {
228 return -1;
229 }
230 }
231 }
224 if (set_parameter(self->params, ZSTD_c_dictIDFlag,
225 writeDictID ? PyObject_IsTrue(writeDictID) : 1)) {
226 return -1;
227 }
232 228
233 if (dict) {
234 self->dict = dict;
235 Py_INCREF(dict);
236 }
229 if (threads) {
230 if (set_parameter(self->params, ZSTD_c_nbWorkers, threads)) {
231 return -1;
232 }
233 }
234 }
235
236 if (dict) {
237 self->dict = (ZstdCompressionDict *)dict;
238 Py_INCREF(dict);
239 }
237 240
238 241 if (setup_cctx(self)) {
239 242 return -1;
240 243 }
241 244
242 return 0;
243 }
244
245 static void ZstdCompressor_dealloc(ZstdCompressor* self) {
246 if (self->cctx) {
247 ZSTD_freeCCtx(self->cctx);
248 self->cctx = NULL;
249 }
250
251 if (self->params) {
252 ZSTD_freeCCtxParams(self->params);
253 self->params = NULL;
254 }
255
256 Py_XDECREF(self->dict);
257 PyObject_Del(self);
245 return 0;
258 246 }
259 247
260 PyDoc_STRVAR(ZstdCompressor_memory_size__doc__,
261 "memory_size()\n"
262 "\n"
263 "Obtain the memory usage of this compressor, in bytes.\n"
264 );
248 static void ZstdCompressor_dealloc(ZstdCompressor *self) {
249 if (self->cctx) {
250 ZSTD_freeCCtx(self->cctx);
251 self->cctx = NULL;
252 }
265 253
266 static PyObject* ZstdCompressor_memory_size(ZstdCompressor* self) {
267 if (self->cctx) {
268 return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->cctx));
269 }
270 else {
271 PyErr_SetString(ZstdError, "no compressor context found; this should never happen");
272 return NULL;
273 }
254 if (self->params) {
255 ZSTD_freeCCtxParams(self->params);
256 self->params = NULL;
257 }
258
259 Py_XDECREF(self->dict);
260 PyObject_Del(self);
274 261 }
275 262
276 PyDoc_STRVAR(ZstdCompressor_frame_progression__doc__,
277 "frame_progression()\n"
278 "\n"
279 "Return information on how much work the compressor has done.\n"
280 "\n"
281 "Returns a 3-tuple of (ingested, consumed, produced).\n"
282 );
263 static PyObject *ZstdCompressor_memory_size(ZstdCompressor *self) {
264 if (self->cctx) {
265 return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->cctx));
266 }
267 else {
268 PyErr_SetString(
269 ZstdError, "no compressor context found; this should never happen");
270 return NULL;
271 }
272 }
283 273
284 static PyObject* ZstdCompressor_frame_progression(ZstdCompressor* self) {
285 return frame_progression(self->cctx);
274 static PyObject *ZstdCompressor_frame_progression(ZstdCompressor *self) {
275 return frame_progression(self->cctx);
286 276 }
287 277
288 PyDoc_STRVAR(ZstdCompressor_copy_stream__doc__,
289 "copy_stream(ifh, ofh[, size=0, read_size=default, write_size=default])\n"
290 "compress data between streams\n"
291 "\n"
292 "Data will be read from ``ifh``, compressed, and written to ``ofh``.\n"
293 "``ifh`` must have a ``read(size)`` method. ``ofh`` must have a ``write(data)``\n"
294 "method.\n"
295 "\n"
296 "An optional ``size`` argument specifies the size of the source stream.\n"
297 "If defined, compression parameters will be tuned based on the size.\n"
298 "\n"
299 "Optional arguments ``read_size`` and ``write_size`` define the chunk sizes\n"
300 "of ``read()`` and ``write()`` operations, respectively. By default, they use\n"
301 "the default compression stream input and output sizes, respectively.\n"
302 );
278 static PyObject *ZstdCompressor_copy_stream(ZstdCompressor *self,
279 PyObject *args, PyObject *kwargs) {
280 static char *kwlist[] = {"ifh", "ofh", "size",
281 "read_size", "write_size", NULL};
303 282
304 static PyObject* ZstdCompressor_copy_stream(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
305 static char* kwlist[] = {
306 "ifh",
307 "ofh",
308 "size",
309 "read_size",
310 "write_size",
311 NULL
312 };
283 PyObject *source;
284 PyObject *dest;
285 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
286 size_t inSize = ZSTD_CStreamInSize();
287 size_t outSize = ZSTD_CStreamOutSize();
288 ZSTD_inBuffer input;
289 ZSTD_outBuffer output;
290 Py_ssize_t totalRead = 0;
291 Py_ssize_t totalWrite = 0;
292 char *readBuffer;
293 Py_ssize_t readSize;
294 PyObject *readResult = NULL;
295 PyObject *res = NULL;
296 size_t zresult;
297 PyObject *writeResult;
298 PyObject *totalReadPy;
299 PyObject *totalWritePy;
300
301 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|Kkk:copy_stream", kwlist,
302 &source, &dest, &sourceSize, &inSize,
303 &outSize)) {
304 return NULL;
305 }
306
307 if (!PyObject_HasAttrString(source, "read")) {
308 PyErr_SetString(PyExc_ValueError,
309 "first argument must have a read() method");
310 return NULL;
311 }
313 312
314 PyObject* source;
315 PyObject* dest;
316 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
317 size_t inSize = ZSTD_CStreamInSize();
318 size_t outSize = ZSTD_CStreamOutSize();
319 ZSTD_inBuffer input;
320 ZSTD_outBuffer output;
321 Py_ssize_t totalRead = 0;
322 Py_ssize_t totalWrite = 0;
323 char* readBuffer;
324 Py_ssize_t readSize;
325 PyObject* readResult = NULL;
326 PyObject* res = NULL;
327 size_t zresult;
328 PyObject* writeResult;
329 PyObject* totalReadPy;
330 PyObject* totalWritePy;
313 if (!PyObject_HasAttrString(dest, "write")) {
314 PyErr_SetString(PyExc_ValueError,
315 "second argument must have a write() method");
316 return NULL;
317 }
331 318
332 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|Kkk:copy_stream", kwlist,
333 &source, &dest, &sourceSize, &inSize, &outSize)) {
334 return NULL;
335 }
319 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
336 320
337 if (!PyObject_HasAttrString(source, "read")) {
338 PyErr_SetString(PyExc_ValueError, "first argument must have a read() method");
339 return NULL;
340 }
321 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
322 if (ZSTD_isError(zresult)) {
323 PyErr_Format(ZstdError, "error setting source size: %s",
324 ZSTD_getErrorName(zresult));
325 return NULL;
326 }
341 327
342 if (!PyObject_HasAttrString(dest, "write")) {
343 PyErr_SetString(PyExc_ValueError, "second argument must have a write() method");
344 return NULL;
345 }
346
347 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
348
349 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
350 if (ZSTD_isError(zresult)) {
351 PyErr_Format(ZstdError, "error setting source size: %s",
352 ZSTD_getErrorName(zresult));
353 return NULL;
354 }
328 /* Prevent free on uninitialized memory in finally. */
329 output.dst = PyMem_Malloc(outSize);
330 if (!output.dst) {
331 PyErr_NoMemory();
332 res = NULL;
333 goto finally;
334 }
335 output.size = outSize;
336 output.pos = 0;
355 337
356 /* Prevent free on uninitialized memory in finally. */
357 output.dst = PyMem_Malloc(outSize);
358 if (!output.dst) {
359 PyErr_NoMemory();
360 res = NULL;
361 goto finally;
362 }
363 output.size = outSize;
364 output.pos = 0;
338 input.src = NULL;
339 input.size = 0;
340 input.pos = 0;
365 341
366 input.src = NULL;
367 input.size = 0;
368 input.pos = 0;
342 while (1) {
343 /* Try to read from source stream. */
344 readResult = PyObject_CallMethod(source, "read", "n", inSize);
345 if (!readResult) {
346 goto finally;
347 }
348
349 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
369 350
370 while (1) {
371 /* Try to read from source stream. */
372 readResult = PyObject_CallMethod(source, "read", "n", inSize);
373 if (!readResult) {
374 PyErr_SetString(ZstdError, "could not read() from source");
375 goto finally;
376 }
351 /* If no data was read, we're at EOF. */
352 if (0 == readSize) {
353 break;
354 }
355
356 totalRead += readSize;
377 357
378 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
358 /* Send data to compressor */
359 input.src = readBuffer;
360 input.size = readSize;
361 input.pos = 0;
379 362
380 /* If no data was read, we're at EOF. */
381 if (0 == readSize) {
382 break;
383 }
363 while (input.pos < input.size) {
364 Py_BEGIN_ALLOW_THREADS zresult = ZSTD_compressStream2(
365 self->cctx, &output, &input, ZSTD_e_continue);
366 Py_END_ALLOW_THREADS
384 367
385 totalRead += readSize;
386
387 /* Send data to compressor */
388 input.src = readBuffer;
389 input.size = readSize;
390 input.pos = 0;
368 if (ZSTD_isError(zresult)) {
369 res = NULL;
370 PyErr_Format(ZstdError, "zstd compress error: %s",
371 ZSTD_getErrorName(zresult));
372 goto finally;
373 }
391 374
392 while (input.pos < input.size) {
393 Py_BEGIN_ALLOW_THREADS
394 zresult = ZSTD_compressStream2(self->cctx, &output, &input, ZSTD_e_continue);
395 Py_END_ALLOW_THREADS
396
397 if (ZSTD_isError(zresult)) {
398 res = NULL;
399 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
400 goto finally;
401 }
375 if (output.pos) {
376 writeResult = PyObject_CallMethod(dest, "write", "y#",
377 output.dst, output.pos);
378 if (NULL == writeResult) {
379 res = NULL;
380 goto finally;
381 }
382 Py_XDECREF(writeResult);
383 totalWrite += output.pos;
384 output.pos = 0;
385 }
386 }
402 387
403 if (output.pos) {
404 #if PY_MAJOR_VERSION >= 3
405 writeResult = PyObject_CallMethod(dest, "write", "y#",
406 #else
407 writeResult = PyObject_CallMethod(dest, "write", "s#",
408 #endif
409 output.dst, output.pos);
410 Py_XDECREF(writeResult);
411 totalWrite += output.pos;
412 output.pos = 0;
413 }
414 }
388 Py_CLEAR(readResult);
389 }
390
391 /* We've finished reading. Now flush the compressor stream. */
392 assert(input.pos == input.size);
415 393
416 Py_CLEAR(readResult);
417 }
394 while (1) {
395 Py_BEGIN_ALLOW_THREADS zresult =
396 ZSTD_compressStream2(self->cctx, &output, &input, ZSTD_e_end);
397 Py_END_ALLOW_THREADS
418 398
419 /* We've finished reading. Now flush the compressor stream. */
420 assert(input.pos == input.size);
421
422 while (1) {
423 Py_BEGIN_ALLOW_THREADS
424 zresult = ZSTD_compressStream2(self->cctx, &output, &input, ZSTD_e_end);
425 Py_END_ALLOW_THREADS
399 if (ZSTD_isError(zresult)) {
400 PyErr_Format(ZstdError, "error ending compression stream: %s",
401 ZSTD_getErrorName(zresult));
402 res = NULL;
403 goto finally;
404 }
426 405
427 if (ZSTD_isError(zresult)) {
428 PyErr_Format(ZstdError, "error ending compression stream: %s",
429 ZSTD_getErrorName(zresult));
430 res = NULL;
431 goto finally;
432 }
406 if (output.pos) {
407 writeResult = PyObject_CallMethod(dest, "write", "y#", output.dst,
408 output.pos);
409 if (NULL == writeResult) {
410 res = NULL;
411 goto finally;
412 }
413 totalWrite += output.pos;
414 Py_XDECREF(writeResult);
415 output.pos = 0;
416 }
433 417
434 if (output.pos) {
435 #if PY_MAJOR_VERSION >= 3
436 writeResult = PyObject_CallMethod(dest, "write", "y#",
437 #else
438 writeResult = PyObject_CallMethod(dest, "write", "s#",
439 #endif
440 output.dst, output.pos);
441 totalWrite += output.pos;
442 Py_XDECREF(writeResult);
443 output.pos = 0;
444 }
418 if (!zresult) {
419 break;
420 }
421 }
445 422
446 if (!zresult) {
447 break;
448 }
449 }
450
451 totalReadPy = PyLong_FromSsize_t(totalRead);
452 totalWritePy = PyLong_FromSsize_t(totalWrite);
453 res = PyTuple_Pack(2, totalReadPy, totalWritePy);
454 Py_DECREF(totalReadPy);
455 Py_DECREF(totalWritePy);
423 totalReadPy = PyLong_FromSsize_t(totalRead);
424 totalWritePy = PyLong_FromSsize_t(totalWrite);
425 res = PyTuple_Pack(2, totalReadPy, totalWritePy);
426 Py_DECREF(totalReadPy);
427 Py_DECREF(totalWritePy);
456 428
457 429 finally:
458 if (output.dst) {
459 PyMem_Free(output.dst);
460 }
430 if (output.dst) {
431 PyMem_Free(output.dst);
432 }
461 433
462 Py_XDECREF(readResult);
434 Py_XDECREF(readResult);
463 435
464 return res;
436 return res;
465 437 }
466 438
467 PyDoc_STRVAR(ZstdCompressor_stream_reader__doc__,
468 "stream_reader(source, [size=0])\n"
469 "\n"
470 "Obtain an object that behaves like an I/O stream.\n"
471 "\n"
472 "The source object can be any object with a ``read(size)`` method\n"
473 "or an object that conforms to the buffer protocol.\n"
474 );
439 static ZstdCompressionReader *ZstdCompressor_stream_reader(ZstdCompressor *self,
440 PyObject *args,
441 PyObject *kwargs) {
442 static char *kwlist[] = {"source", "size", "read_size", "closefd", NULL};
443
444 PyObject *source;
445 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
446 size_t readSize = ZSTD_CStreamInSize();
447 PyObject *closefd = NULL;
448 ZstdCompressionReader *result = NULL;
449 size_t zresult;
475 450
476 static ZstdCompressionReader* ZstdCompressor_stream_reader(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
477 static char* kwlist[] = {
478 "source",
479 "size",
480 "read_size",
481 NULL
482 };
451 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|KkO:stream_reader",
452 kwlist, &source, &sourceSize, &readSize,
453 &closefd)) {
454 return NULL;
455 }
483 456
484 PyObject* source;
485 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
486 size_t readSize = ZSTD_CStreamInSize();
487 ZstdCompressionReader* result = NULL;
488 size_t zresult;
457 result = (ZstdCompressionReader *)PyObject_CallObject(
458 (PyObject *)ZstdCompressionReaderType, NULL);
459 if (!result) {
460 return NULL;
461 }
489 462
490 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kk:stream_reader", kwlist,
491 &source, &sourceSize, &readSize)) {
492 return NULL;
493 }
494
495 result = (ZstdCompressionReader*)PyObject_CallObject((PyObject*)&ZstdCompressionReaderType, NULL);
496 if (!result) {
497 return NULL;
498 }
463 result->entered = 0;
464 result->closed = 0;
499 465
500 if (PyObject_HasAttrString(source, "read")) {
501 result->reader = source;
502 Py_INCREF(source);
503 result->readSize = readSize;
504 }
505 else if (1 == PyObject_CheckBuffer(source)) {
506 if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) {
507 goto except;
508 }
466 if (PyObject_HasAttrString(source, "read")) {
467 result->reader = source;
468 Py_INCREF(source);
469 result->readSize = readSize;
470 }
471 else if (1 == PyObject_CheckBuffer(source)) {
472 if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) {
473 goto except;
474 }
509 475
510 assert(result->buffer.len >= 0);
476 assert(result->buffer.len >= 0);
511 477
512 sourceSize = result->buffer.len;
513 }
514 else {
515 PyErr_SetString(PyExc_TypeError,
516 "must pass an object with a read() method or that conforms to the buffer protocol");
517 goto except;
518 }
478 sourceSize = result->buffer.len;
479 }
480 else {
481 PyErr_SetString(PyExc_TypeError,
482 "must pass an object with a read() method or that "
483 "conforms to the buffer protocol");
484 goto except;
485 }
519 486
520 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
487 result->closefd = closefd ? PyObject_IsTrue(closefd) : 1;
521 488
522 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
523 if (ZSTD_isError(zresult)) {
524 PyErr_Format(ZstdError, "error setting source source: %s",
525 ZSTD_getErrorName(zresult));
526 goto except;
527 }
489 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
528 490
529 result->compressor = self;
530 Py_INCREF(self);
491 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
492 if (ZSTD_isError(zresult)) {
493 PyErr_Format(ZstdError, "error setting source source: %s",
494 ZSTD_getErrorName(zresult));
495 goto except;
496 }
531 497
532 return result;
498 result->compressor = self;
499 Py_INCREF(self);
500
501 return result;
533 502
534 503 except:
535 Py_CLEAR(result);
504 Py_CLEAR(result);
536 505
537 return NULL;
506 return NULL;
538 507 }
539 508
540 PyDoc_STRVAR(ZstdCompressor_compress__doc__,
541 "compress(data)\n"
542 "\n"
543 "Compress data in a single operation.\n"
544 "\n"
545 "This is the simplest mechanism to perform compression: simply pass in a\n"
546 "value and get a compressed value back. It is almost the most prone to abuse.\n"
547 "The input and output values must fit in memory, so passing in very large\n"
548 "values can result in excessive memory usage. For this reason, one of the\n"
549 "streaming based APIs is preferred for larger values.\n"
550 );
509 static PyObject *ZstdCompressor_compress(ZstdCompressor *self, PyObject *args,
510 PyObject *kwargs) {
511 static char *kwlist[] = {"data", NULL};
551 512
552 static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
553 static char* kwlist[] = {
554 "data",
555 NULL
556 };
513 Py_buffer source;
514 size_t destSize;
515 PyObject *output = NULL;
516 size_t zresult;
517 ZSTD_outBuffer outBuffer;
518 ZSTD_inBuffer inBuffer;
557 519
558 Py_buffer source;
559 size_t destSize;
560 PyObject* output = NULL;
561 size_t zresult;
562 ZSTD_outBuffer outBuffer;
563 ZSTD_inBuffer inBuffer;
520 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|O:compress", kwlist,
521 &source)) {
522 return NULL;
523 }
564 524
565 #if PY_MAJOR_VERSION >= 3
566 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|O:compress",
567 #else
568 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|O:compress",
569 #endif
570 kwlist, &source)) {
571 return NULL;
572 }
525 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
573 526
574 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
575 PyErr_SetString(PyExc_ValueError,
576 "data buffer should be contiguous and have at most one dimension");
577 goto finally;
578 }
579
580 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
527 destSize = ZSTD_compressBound(source.len);
528 output = PyBytes_FromStringAndSize(NULL, destSize);
529 if (!output) {
530 goto finally;
531 }
581 532
582 destSize = ZSTD_compressBound(source.len);
583 output = PyBytes_FromStringAndSize(NULL, destSize);
584 if (!output) {
585 goto finally;
586 }
533 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, source.len);
534 if (ZSTD_isError(zresult)) {
535 PyErr_Format(ZstdError, "error setting source size: %s",
536 ZSTD_getErrorName(zresult));
537 Py_CLEAR(output);
538 goto finally;
539 }
587 540
588 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, source.len);
589 if (ZSTD_isError(zresult)) {
590 PyErr_Format(ZstdError, "error setting source size: %s",
591 ZSTD_getErrorName(zresult));
592 Py_CLEAR(output);
593 goto finally;
594 }
541 inBuffer.src = source.buf;
542 inBuffer.size = source.len;
543 inBuffer.pos = 0;
595 544
596 inBuffer.src = source.buf;
597 inBuffer.size = source.len;
598 inBuffer.pos = 0;
545 outBuffer.dst = PyBytes_AsString(output);
546 outBuffer.size = destSize;
547 outBuffer.pos = 0;
599 548
600 outBuffer.dst = PyBytes_AsString(output);
601 outBuffer.size = destSize;
602 outBuffer.pos = 0;
603
604 Py_BEGIN_ALLOW_THREADS
605 /* By avoiding ZSTD_compress(), we don't necessarily write out content
606 size. This means the argument to ZstdCompressor to control frame
607 parameters is honored. */
608 zresult = ZSTD_compressStream2(self->cctx, &outBuffer, &inBuffer, ZSTD_e_end);
609 Py_END_ALLOW_THREADS
549 Py_BEGIN_ALLOW_THREADS
550 /* By avoiding ZSTD_compress(), we don't necessarily write out content
551 size. This means the argument to ZstdCompressor to control frame
552 parameters is honored. */
553 zresult =
554 ZSTD_compressStream2(self->cctx, &outBuffer, &inBuffer, ZSTD_e_end);
555 Py_END_ALLOW_THREADS
610 556
611 if (ZSTD_isError(zresult)) {
612 PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult));
613 Py_CLEAR(output);
614 goto finally;
615 }
616 else if (zresult) {
617 PyErr_SetString(ZstdError, "unexpected partial frame flush");
618 Py_CLEAR(output);
619 goto finally;
620 }
557 if (ZSTD_isError(zresult)) {
558 PyErr_Format(ZstdError, "cannot compress: %s",
559 ZSTD_getErrorName(zresult));
560 Py_CLEAR(output);
561 goto finally;
562 }
563 else if (zresult) {
564 PyErr_SetString(ZstdError, "unexpected partial frame flush");
565 Py_CLEAR(output);
566 goto finally;
567 }
621 568
622 Py_SET_SIZE(output, outBuffer.pos);
569 Py_SET_SIZE(output, outBuffer.pos);
623 570
624 571 finally:
625 PyBuffer_Release(&source);
626 return output;
572 PyBuffer_Release(&source);
573 return output;
627 574 }
628 575
629 PyDoc_STRVAR(ZstdCompressionObj__doc__,
630 "compressobj()\n"
631 "\n"
632 "Return an object exposing ``compress(data)`` and ``flush()`` methods.\n"
633 "\n"
634 "The returned object exposes an API similar to ``zlib.compressobj`` and\n"
635 "``bz2.BZ2Compressor`` so that callers can swap in the zstd compressor\n"
636 "without changing how compression is performed.\n"
637 );
576 static ZstdCompressionObj *ZstdCompressor_compressobj(ZstdCompressor *self,
577 PyObject *args,
578 PyObject *kwargs) {
579 static char *kwlist[] = {"size", NULL};
638 580
639 static ZstdCompressionObj* ZstdCompressor_compressobj(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
640 static char* kwlist[] = {
641 "size",
642 NULL
643 };
581 unsigned long long inSize = ZSTD_CONTENTSIZE_UNKNOWN;
582 size_t outSize = ZSTD_CStreamOutSize();
583 ZstdCompressionObj *result = NULL;
584 size_t zresult;
644 585
645 unsigned long long inSize = ZSTD_CONTENTSIZE_UNKNOWN;
646 size_t outSize = ZSTD_CStreamOutSize();
647 ZstdCompressionObj* result = NULL;
648 size_t zresult;
586 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|K:compressobj", kwlist,
587 &inSize)) {
588 return NULL;
589 }
649 590
650 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|K:compressobj", kwlist, &inSize)) {
651 return NULL;
652 }
591 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
653 592
654 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
655
656 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, inSize);
657 if (ZSTD_isError(zresult)) {
658 PyErr_Format(ZstdError, "error setting source size: %s",
659 ZSTD_getErrorName(zresult));
660 return NULL;
661 }
593 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, inSize);
594 if (ZSTD_isError(zresult)) {
595 PyErr_Format(ZstdError, "error setting source size: %s",
596 ZSTD_getErrorName(zresult));
597 return NULL;
598 }
662 599
663 result = (ZstdCompressionObj*)PyObject_CallObject((PyObject*)&ZstdCompressionObjType, NULL);
664 if (!result) {
665 return NULL;
666 }
600 result = (ZstdCompressionObj *)PyObject_CallObject(
601 (PyObject *)ZstdCompressionObjType, NULL);
602 if (!result) {
603 return NULL;
604 }
667 605
668 result->output.dst = PyMem_Malloc(outSize);
669 if (!result->output.dst) {
670 PyErr_NoMemory();
671 Py_DECREF(result);
672 return NULL;
673 }
674 result->output.size = outSize;
675 result->compressor = self;
676 Py_INCREF(result->compressor);
606 result->output.dst = PyMem_Malloc(outSize);
607 if (!result->output.dst) {
608 PyErr_NoMemory();
609 Py_DECREF(result);
610 return NULL;
611 }
612 result->output.size = outSize;
613 result->compressor = self;
614 Py_INCREF(result->compressor);
677 615
678 return result;
616 return result;
679 617 }
680 618
681 PyDoc_STRVAR(ZstdCompressor_read_to_iter__doc__,
682 "read_to_iter(reader, [size=0, read_size=default, write_size=default])\n"
683 "Read uncompressed data from a reader and return an iterator\n"
684 "\n"
685 "Returns an iterator of compressed data produced from reading from ``reader``.\n"
686 "\n"
687 "Uncompressed data will be obtained from ``reader`` by calling the\n"
688 "``read(size)`` method of it. The source data will be streamed into a\n"
689 "compressor. As compressed data is available, it will be exposed to the\n"
690 "iterator.\n"
691 "\n"
692 "Data is read from the source in chunks of ``read_size``. Compressed chunks\n"
693 "are at most ``write_size`` bytes. Both values default to the zstd input and\n"
694 "and output defaults, respectively.\n"
695 "\n"
696 "The caller is partially in control of how fast data is fed into the\n"
697 "compressor by how it consumes the returned iterator. The compressor will\n"
698 "not consume from the reader unless the caller consumes from the iterator.\n"
699 );
619 static ZstdCompressorIterator *ZstdCompressor_read_to_iter(ZstdCompressor *self,
620 PyObject *args,
621 PyObject *kwargs) {
622 static char *kwlist[] = {"reader", "size", "read_size", "write_size", NULL};
623
624 PyObject *reader;
625 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
626 size_t inSize = ZSTD_CStreamInSize();
627 size_t outSize = ZSTD_CStreamOutSize();
628 ZstdCompressorIterator *result;
629 size_t zresult;
700 630
701 static ZstdCompressorIterator* ZstdCompressor_read_to_iter(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
702 static char* kwlist[] = {
703 "reader",
704 "size",
705 "read_size",
706 "write_size",
707 NULL
708 };
631 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kkk:read_to_iter", kwlist,
632 &reader, &sourceSize, &inSize, &outSize)) {
633 return NULL;
634 }
709 635
710 PyObject* reader;
711 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
712 size_t inSize = ZSTD_CStreamInSize();
713 size_t outSize = ZSTD_CStreamOutSize();
714 ZstdCompressorIterator* result;
715 size_t zresult;
716
717 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kkk:read_to_iter", kwlist,
718 &reader, &sourceSize, &inSize, &outSize)) {
719 return NULL;
720 }
636 result = (ZstdCompressorIterator *)PyObject_CallObject(
637 (PyObject *)ZstdCompressorIteratorType, NULL);
638 if (!result) {
639 return NULL;
640 }
641 if (PyObject_HasAttrString(reader, "read")) {
642 result->reader = reader;
643 Py_INCREF(result->reader);
644 }
645 else if (1 == PyObject_CheckBuffer(reader)) {
646 if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) {
647 goto except;
648 }
721 649
722 result = (ZstdCompressorIterator*)PyObject_CallObject((PyObject*)&ZstdCompressorIteratorType, NULL);
723 if (!result) {
724 return NULL;
725 }
726 if (PyObject_HasAttrString(reader, "read")) {
727 result->reader = reader;
728 Py_INCREF(result->reader);
729 }
730 else if (1 == PyObject_CheckBuffer(reader)) {
731 if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) {
732 goto except;
733 }
650 sourceSize = result->buffer.len;
651 }
652 else {
653 PyErr_SetString(PyExc_ValueError,
654 "must pass an object with a read() method or conforms "
655 "to buffer protocol");
656 goto except;
657 }
734 658
735 sourceSize = result->buffer.len;
736 }
737 else {
738 PyErr_SetString(PyExc_ValueError,
739 "must pass an object with a read() method or conforms to buffer protocol");
740 goto except;
741 }
659 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
742 660
743 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
661 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
662 if (ZSTD_isError(zresult)) {
663 PyErr_Format(ZstdError, "error setting source size: %s",
664 ZSTD_getErrorName(zresult));
665 return NULL;
666 }
744 667
745 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
746 if (ZSTD_isError(zresult)) {
747 PyErr_Format(ZstdError, "error setting source size: %s",
748 ZSTD_getErrorName(zresult));
749 return NULL;
750 }
751
752 result->compressor = self;
753 Py_INCREF(result->compressor);
668 result->compressor = self;
669 Py_INCREF(result->compressor);
754 670
755 result->inSize = inSize;
756 result->outSize = outSize;
671 result->inSize = inSize;
672 result->outSize = outSize;
757 673
758 result->output.dst = PyMem_Malloc(outSize);
759 if (!result->output.dst) {
760 PyErr_NoMemory();
761 goto except;
762 }
763 result->output.size = outSize;
674 result->output.dst = PyMem_Malloc(outSize);
675 if (!result->output.dst) {
676 PyErr_NoMemory();
677 goto except;
678 }
679 result->output.size = outSize;
764 680
765 goto finally;
681 goto finally;
766 682
767 683 except:
768 Py_CLEAR(result);
684 Py_CLEAR(result);
769 685
770 686 finally:
771 return result;
687 return result;
772 688 }
773 689
774 PyDoc_STRVAR(ZstdCompressor_stream_writer___doc__,
775 "Create a context manager to write compressed data to an object.\n"
776 "\n"
777 "The passed object must have a ``write()`` method.\n"
778 "\n"
779 "The caller feeds input data to the object by calling ``compress(data)``.\n"
780 "Compressed data is written to the argument given to this function.\n"
781 "\n"
782 "The function takes an optional ``size`` argument indicating the total size\n"
783 "of the eventual input. If specified, the size will influence compression\n"
784 "parameter tuning and could result in the size being written into the\n"
785 "header of the compressed data.\n"
786 "\n"
787 "An optional ``write_size`` argument is also accepted. It defines the maximum\n"
788 "byte size of chunks fed to ``write()``. By default, it uses the zstd default\n"
789 "for a compressor output stream.\n"
790 );
690 static ZstdCompressionWriter *ZstdCompressor_stream_writer(ZstdCompressor *self,
691 PyObject *args,
692 PyObject *kwargs) {
693 static char *kwlist[] = {
694 "writer", "size", "write_size", "write_return_read", "closefd", NULL};
695
696 PyObject *writer;
697 ZstdCompressionWriter *result;
698 size_t zresult;
699 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
700 size_t outSize = ZSTD_CStreamOutSize();
701 PyObject *writeReturnRead = NULL;
702 PyObject *closefd = NULL;
791 703
792 static ZstdCompressionWriter* ZstdCompressor_stream_writer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
793 static char* kwlist[] = {
794 "writer",
795 "size",
796 "write_size",
797 "write_return_read",
798 NULL
799 };
704 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|KkOO:stream_writer",
705 kwlist, &writer, &sourceSize, &outSize,
706 &writeReturnRead, &closefd)) {
707 return NULL;
708 }
800 709
801 PyObject* writer;
802 ZstdCompressionWriter* result;
803 size_t zresult;
804 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
805 size_t outSize = ZSTD_CStreamOutSize();
806 PyObject* writeReturnRead = NULL;
710 if (!PyObject_HasAttrString(writer, "write")) {
711 PyErr_SetString(PyExc_ValueError,
712 "must pass an object with a write() method");
713 return NULL;
714 }
807 715
808 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|KkO:stream_writer", kwlist,
809 &writer, &sourceSize, &outSize, &writeReturnRead)) {
810 return NULL;
811 }
716 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
812 717
813 if (!PyObject_HasAttrString(writer, "write")) {
814 PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method");
815 return NULL;
816 }
817
818 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
718 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
719 if (ZSTD_isError(zresult)) {
720 PyErr_Format(ZstdError, "error setting source size: %s",
721 ZSTD_getErrorName(zresult));
722 return NULL;
723 }
819 724
820 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
821 if (ZSTD_isError(zresult)) {
822 PyErr_Format(ZstdError, "error setting source size: %s",
823 ZSTD_getErrorName(zresult));
824 return NULL;
825 }
725 result = (ZstdCompressionWriter *)PyObject_CallObject(
726 (PyObject *)ZstdCompressionWriterType, NULL);
727 if (!result) {
728 return NULL;
729 }
826 730
827 result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL);
828 if (!result) {
829 return NULL;
830 }
731 result->entered = 0;
732 result->closing = 0;
733 result->closed = 0;
831 734
832 result->output.dst = PyMem_Malloc(outSize);
833 if (!result->output.dst) {
834 Py_DECREF(result);
835 return (ZstdCompressionWriter*)PyErr_NoMemory();
836 }
735 result->output.dst = PyMem_Malloc(outSize);
736 if (!result->output.dst) {
737 Py_DECREF(result);
738 return (ZstdCompressionWriter *)PyErr_NoMemory();
739 }
837 740
838 result->output.pos = 0;
839 result->output.size = outSize;
741 result->output.pos = 0;
742 result->output.size = outSize;
840 743
841 result->compressor = self;
842 Py_INCREF(result->compressor);
744 result->compressor = self;
745 Py_INCREF(result->compressor);
843 746
844 result->writer = writer;
845 Py_INCREF(result->writer);
747 result->writer = writer;
748 Py_INCREF(result->writer);
846 749
847 result->outSize = outSize;
848 result->bytesCompressed = 0;
849 result->writeReturnRead = writeReturnRead ? PyObject_IsTrue(writeReturnRead) : 0;
750 result->outSize = outSize;
751 result->bytesCompressed = 0;
752 result->writeReturnRead =
753 writeReturnRead ? PyObject_IsTrue(writeReturnRead) : 1;
754 result->closefd = closefd ? PyObject_IsTrue(closefd) : 1;
850 755
851 return result;
756 return result;
852 757 }
853 758
854 PyDoc_STRVAR(ZstdCompressor_chunker__doc__,
855 "Create an object for iterative compressing to same-sized chunks.\n"
856 );
857
858 static ZstdCompressionChunker* ZstdCompressor_chunker(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
859 static char* kwlist[] = {
860 "size",
861 "chunk_size",
862 NULL
863 };
759 static ZstdCompressionChunker *
760 ZstdCompressor_chunker(ZstdCompressor *self, PyObject *args, PyObject *kwargs) {
761 static char *kwlist[] = {"size", "chunk_size", NULL};
864 762
865 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
866 size_t chunkSize = ZSTD_CStreamOutSize();
867 ZstdCompressionChunker* chunker;
868 size_t zresult;
763 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
764 size_t chunkSize = ZSTD_CStreamOutSize();
765 ZstdCompressionChunker *chunker;
766 size_t zresult;
869 767
870 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|Kk:chunker", kwlist,
871 &sourceSize, &chunkSize)) {
872 return NULL;
873 }
768 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|Kk:chunker", kwlist,
769 &sourceSize, &chunkSize)) {
770 return NULL;
771 }
874 772
875 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
773 ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only);
876 774
877 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
878 if (ZSTD_isError(zresult)) {
879 PyErr_Format(ZstdError, "error setting source size: %s",
880 ZSTD_getErrorName(zresult));
881 return NULL;
882 }
775 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
776 if (ZSTD_isError(zresult)) {
777 PyErr_Format(ZstdError, "error setting source size: %s",
778 ZSTD_getErrorName(zresult));
779 return NULL;
780 }
883 781
884 chunker = (ZstdCompressionChunker*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerType, NULL);
885 if (!chunker) {
886 return NULL;
887 }
782 chunker = (ZstdCompressionChunker *)PyObject_CallObject(
783 (PyObject *)ZstdCompressionChunkerType, NULL);
784 if (!chunker) {
785 return NULL;
786 }
888 787
889 chunker->output.dst = PyMem_Malloc(chunkSize);
890 if (!chunker->output.dst) {
891 PyErr_NoMemory();
892 Py_DECREF(chunker);
893 return NULL;
894 }
895 chunker->output.size = chunkSize;
896 chunker->output.pos = 0;
788 chunker->output.dst = PyMem_Malloc(chunkSize);
789 if (!chunker->output.dst) {
790 PyErr_NoMemory();
791 Py_DECREF(chunker);
792 return NULL;
793 }
794 chunker->output.size = chunkSize;
795 chunker->output.pos = 0;
897 796
898 chunker->compressor = self;
899 Py_INCREF(chunker->compressor);
797 chunker->compressor = self;
798 Py_INCREF(chunker->compressor);
900 799
901 chunker->chunkSize = chunkSize;
800 chunker->chunkSize = chunkSize;
902 801
903 return chunker;
802 return chunker;
904 803 }
905 804
906 805 typedef struct {
907 void* sourceData;
908 size_t sourceSize;
806 void *sourceData;
807 size_t sourceSize;
909 808 } DataSource;
910 809
911 810 typedef struct {
912 DataSource* sources;
913 Py_ssize_t sourcesSize;
914 unsigned long long totalSourceSize;
811 DataSource *sources;
812 Py_ssize_t sourcesSize;
813 unsigned long long totalSourceSize;
915 814 } DataSources;
916 815
917 816 typedef struct {
918 void* dest;
919 Py_ssize_t destSize;
920 BufferSegment* segments;
921 Py_ssize_t segmentsSize;
922 } DestBuffer;
817 void *dest;
818 Py_ssize_t destSize;
819 BufferSegment *segments;
820 Py_ssize_t segmentsSize;
821 } CompressorDestBuffer;
923 822
924 823 typedef enum {
925 WorkerError_none = 0,
926 WorkerError_zstd = 1,
927 WorkerError_no_memory = 2,
928 WorkerError_nospace = 3,
929 } WorkerError;
824 CompressorWorkerError_none = 0,
825 CompressorWorkerError_zstd = 1,
826 CompressorWorkerError_no_memory = 2,
827 CompressorWorkerError_nospace = 3,
828 } CompressorWorkerError;
930 829
931 830 /**
932 * Holds state for an individual worker performing multi_compress_to_buffer work.
831 * Holds state for an individual worker performing multi_compress_to_buffer
832 * work.
933 833 */
934 834 typedef struct {
935 /* Used for compression. */
936 ZSTD_CCtx* cctx;
835 /* Used for compression. */
836 ZSTD_CCtx *cctx;
937 837
938 /* What to compress. */
939 DataSource* sources;
940 Py_ssize_t sourcesSize;
941 Py_ssize_t startOffset;
942 Py_ssize_t endOffset;
943 unsigned long long totalSourceSize;
838 /* What to compress. */
839 DataSource *sources;
840 Py_ssize_t sourcesSize;
841 Py_ssize_t startOffset;
842 Py_ssize_t endOffset;
843 unsigned long long totalSourceSize;
944 844
945 /* Result storage. */
946 DestBuffer* destBuffers;
947 Py_ssize_t destCount;
845 /* Result storage. */
846 CompressorDestBuffer *destBuffers;
847 Py_ssize_t destCount;
948 848
949 /* Error tracking. */
950 WorkerError error;
951 size_t zresult;
952 Py_ssize_t errorOffset;
953 } WorkerState;
849 /* Error tracking. */
850 CompressorWorkerError error;
851 size_t zresult;
852 Py_ssize_t errorOffset;
853 } CompressorWorkerState;
954 854
955 static void compress_worker(WorkerState* state) {
956 Py_ssize_t inputOffset = state->startOffset;
957 Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
958 Py_ssize_t currentBufferStartOffset = state->startOffset;
959 size_t zresult;
960 void* newDest;
961 size_t allocationSize;
962 size_t boundSize;
963 Py_ssize_t destOffset = 0;
964 DataSource* sources = state->sources;
965 DestBuffer* destBuffer;
855 #ifdef HAVE_ZSTD_POOL_APIS
856 static void compress_worker(CompressorWorkerState *state) {
857 Py_ssize_t inputOffset = state->startOffset;
858 Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1;
859 Py_ssize_t currentBufferStartOffset = state->startOffset;
860 size_t zresult;
861 void *newDest;
862 size_t allocationSize;
863 size_t boundSize;
864 Py_ssize_t destOffset = 0;
865 DataSource *sources = state->sources;
866 CompressorDestBuffer *destBuffer;
966 867
967 assert(!state->destBuffers);
968 assert(0 == state->destCount);
868 assert(!state->destBuffers);
869 assert(0 == state->destCount);
969 870
970 /*
971 * The total size of the compressed data is unknown until we actually
972 * compress data. That means we can't pre-allocate the exact size we need.
973 *
974 * There is a cost to every allocation and reallocation. So, it is in our
975 * interest to minimize the number of allocations.
976 *
977 * There is also a cost to too few allocations. If allocations are too
978 * large they may fail. If buffers are shared and all inputs become
979 * irrelevant at different lifetimes, then a reference to one segment
980 * in the buffer will keep the entire buffer alive. This leads to excessive
981 * memory usage.
982 *
983 * Our current strategy is to assume a compression ratio of 16:1 and
984 * allocate buffers of that size, rounded up to the nearest power of 2
985 * (because computers like round numbers). That ratio is greater than what
986 * most inputs achieve. This is by design: we don't want to over-allocate.
987 * But we don't want to under-allocate and lead to too many buffers either.
988 */
871 /*
872 * The total size of the compressed data is unknown until we actually
873 * compress data. That means we can't pre-allocate the exact size we need.
874 *
875 * There is a cost to every allocation and reallocation. So, it is in our
876 * interest to minimize the number of allocations.
877 *
878 * There is also a cost to too few allocations. If allocations are too
879 * large they may fail. If buffers are shared and all inputs become
880 * irrelevant at different lifetimes, then a reference to one segment
881 * in the buffer will keep the entire buffer alive. This leads to excessive
882 * memory usage.
883 *
884 * Our current strategy is to assume a compression ratio of 16:1 and
885 * allocate buffers of that size, rounded up to the nearest power of 2
886 * (because computers like round numbers). That ratio is greater than what
887 * most inputs achieve. This is by design: we don't want to over-allocate.
888 * But we don't want to under-allocate and lead to too many buffers either.
889 */
989 890
990 state->destCount = 1;
891 state->destCount = 1;
991 892
992 state->destBuffers = calloc(1, sizeof(DestBuffer));
993 if (NULL == state->destBuffers) {
994 state->error = WorkerError_no_memory;
995 return;
996 }
893 state->destBuffers = calloc(1, sizeof(CompressorDestBuffer));
894 if (NULL == state->destBuffers) {
895 state->error = CompressorWorkerError_no_memory;
896 return;
897 }
997 898
998 destBuffer = &state->destBuffers[state->destCount - 1];
899 destBuffer = &state->destBuffers[state->destCount - 1];
999 900
1000 /*
1001 * Rather than track bounds and grow the segments buffer, allocate space
1002 * to hold remaining items then truncate when we're done with it.
1003 */
1004 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
1005 if (NULL == destBuffer->segments) {
1006 state->error = WorkerError_no_memory;
1007 return;
1008 }
901 /*
902 * Rather than track bounds and grow the segments buffer, allocate space
903 * to hold remaining items then truncate when we're done with it.
904 */
905 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
906 if (NULL == destBuffer->segments) {
907 state->error = CompressorWorkerError_no_memory;
908 return;
909 }
1009 910
1010 destBuffer->segmentsSize = remainingItems;
911 destBuffer->segmentsSize = remainingItems;
1011 912
1012 assert(state->totalSourceSize <= SIZE_MAX);
1013 allocationSize = roundpow2((size_t)state->totalSourceSize >> 4);
913 assert(state->totalSourceSize <= SIZE_MAX);
914 allocationSize = roundpow2((size_t)state->totalSourceSize >> 4);
1014 915
1015 /* If the maximum size of the output is larger than that, round up. */
1016 boundSize = ZSTD_compressBound(sources[inputOffset].sourceSize);
916 /* If the maximum size of the output is larger than that, round up. */
917 boundSize = ZSTD_compressBound(sources[inputOffset].sourceSize);
1017 918
1018 if (boundSize > allocationSize) {
1019 allocationSize = roundpow2(boundSize);
1020 }
919 if (boundSize > allocationSize) {
920 allocationSize = roundpow2(boundSize);
921 }
1021 922
1022 destBuffer->dest = malloc(allocationSize);
1023 if (NULL == destBuffer->dest) {
1024 state->error = WorkerError_no_memory;
1025 return;
1026 }
923 destBuffer->dest = malloc(allocationSize);
924 if (NULL == destBuffer->dest) {
925 state->error = CompressorWorkerError_no_memory;
926 return;
927 }
928
929 destBuffer->destSize = allocationSize;
1027 930
1028 destBuffer->destSize = allocationSize;
931 for (inputOffset = state->startOffset; inputOffset <= state->endOffset;
932 inputOffset++) {
933 void *source = sources[inputOffset].sourceData;
934 size_t sourceSize = sources[inputOffset].sourceSize;
935 size_t destAvailable;
936 void *dest;
937 ZSTD_outBuffer opOutBuffer;
938 ZSTD_inBuffer opInBuffer;
1029 939
1030 for (inputOffset = state->startOffset; inputOffset <= state->endOffset; inputOffset++) {
1031 void* source = sources[inputOffset].sourceData;
1032 size_t sourceSize = sources[inputOffset].sourceSize;
1033 size_t destAvailable;
1034 void* dest;
1035 ZSTD_outBuffer opOutBuffer;
1036 ZSTD_inBuffer opInBuffer;
1037
1038 destAvailable = destBuffer->destSize - destOffset;
1039 boundSize = ZSTD_compressBound(sourceSize);
940 destAvailable = destBuffer->destSize - destOffset;
941 boundSize = ZSTD_compressBound(sourceSize);
1040 942
1041 /*
1042 * Not enough space in current buffer to hold largest compressed output.
1043 * So allocate and switch to a new output buffer.
1044 */
1045 if (boundSize > destAvailable) {
1046 /*
1047 * The downsizing of the existing buffer is optional. It should be cheap
1048 * (unlike growing). So we just do it.
1049 */
1050 if (destAvailable) {
1051 newDest = realloc(destBuffer->dest, destOffset);
1052 if (NULL == newDest) {
1053 state->error = WorkerError_no_memory;
1054 return;
1055 }
943 /*
944 * Not enough space in current buffer to hold largest compressed output.
945 * So allocate and switch to a new output buffer.
946 */
947 if (boundSize > destAvailable) {
948 /*
949 * The downsizing of the existing buffer is optional. It should be
950 * cheap (unlike growing). So we just do it.
951 */
952 if (destAvailable) {
953 newDest = realloc(destBuffer->dest, destOffset);
954 if (NULL == newDest) {
955 state->error = CompressorWorkerError_no_memory;
956 return;
957 }
1056 958
1057 destBuffer->dest = newDest;
1058 destBuffer->destSize = destOffset;
1059 }
959 destBuffer->dest = newDest;
960 destBuffer->destSize = destOffset;
961 }
1060 962
1061 /* Truncate segments buffer. */
1062 newDest = realloc(destBuffer->segments,
1063 (inputOffset - currentBufferStartOffset + 1) * sizeof(BufferSegment));
1064 if (NULL == newDest) {
1065 state->error = WorkerError_no_memory;
1066 return;
1067 }
963 /* Truncate segments buffer. */
964 newDest = realloc(destBuffer->segments,
965 (inputOffset - currentBufferStartOffset + 1) *
966 sizeof(BufferSegment));
967 if (NULL == newDest) {
968 state->error = CompressorWorkerError_no_memory;
969 return;
970 }
1068 971
1069 destBuffer->segments = newDest;
1070 destBuffer->segmentsSize = inputOffset - currentBufferStartOffset;
972 destBuffer->segments = newDest;
973 destBuffer->segmentsSize = inputOffset - currentBufferStartOffset;
1071 974
1072 /* Grow space for new struct. */
1073 /* TODO consider over-allocating so we don't do this every time. */
1074 newDest = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer));
1075 if (NULL == newDest) {
1076 state->error = WorkerError_no_memory;
1077 return;
1078 }
975 /* Grow space for new struct. */
976 /* TODO consider over-allocating so we don't do this every time. */
977 newDest =
978 realloc(state->destBuffers,
979 (state->destCount + 1) * sizeof(CompressorDestBuffer));
980 if (NULL == newDest) {
981 state->error = CompressorWorkerError_no_memory;
982 return;
983 }
1079 984
1080 state->destBuffers = newDest;
1081 state->destCount++;
985 state->destBuffers = newDest;
986 state->destCount++;
1082 987
1083 destBuffer = &state->destBuffers[state->destCount - 1];
988 destBuffer = &state->destBuffers[state->destCount - 1];
1084 989
1085 /* Don't take any chances with non-NULL pointers. */
1086 memset(destBuffer, 0, sizeof(DestBuffer));
990 /* Don't take any chances with non-NULL pointers. */
991 memset(destBuffer, 0, sizeof(CompressorDestBuffer));
1087 992
1088 /**
1089 * We could dynamically update allocation size based on work done so far.
1090 * For now, keep is simple.
1091 */
1092 assert(state->totalSourceSize <= SIZE_MAX);
1093 allocationSize = roundpow2((size_t)state->totalSourceSize >> 4);
993 /**
994 * We could dynamically update allocation size based on work done so
995 * far. For now, keep is simple.
996 */
997 assert(state->totalSourceSize <= SIZE_MAX);
998 allocationSize = roundpow2((size_t)state->totalSourceSize >> 4);
1094 999
1095 if (boundSize > allocationSize) {
1096 allocationSize = roundpow2(boundSize);
1097 }
1000 if (boundSize > allocationSize) {
1001 allocationSize = roundpow2(boundSize);
1002 }
1098 1003
1099 destBuffer->dest = malloc(allocationSize);
1100 if (NULL == destBuffer->dest) {
1101 state->error = WorkerError_no_memory;
1102 return;
1103 }
1004 destBuffer->dest = malloc(allocationSize);
1005 if (NULL == destBuffer->dest) {
1006 state->error = CompressorWorkerError_no_memory;
1007 return;
1008 }
1104 1009
1105 destBuffer->destSize = allocationSize;
1106 destAvailable = allocationSize;
1107 destOffset = 0;
1010 destBuffer->destSize = allocationSize;
1011 destAvailable = allocationSize;
1012 destOffset = 0;
1108 1013
1109 destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment));
1110 if (NULL == destBuffer->segments) {
1111 state->error = WorkerError_no_memory;
1112 return;
1113 }
1014 destBuffer->segments =
1015 calloc(remainingItems, sizeof(BufferSegment));
1016 if (NULL == destBuffer->segments) {
1017 state->error = CompressorWorkerError_no_memory;
1018 return;
1019 }
1114 1020
1115 destBuffer->segmentsSize = remainingItems;
1116 currentBufferStartOffset = inputOffset;
1117 }
1021 destBuffer->segmentsSize = remainingItems;
1022 currentBufferStartOffset = inputOffset;
1023 }
1024
1025 dest = (char *)destBuffer->dest + destOffset;
1118 1026
1119 dest = (char*)destBuffer->dest + destOffset;
1027 opInBuffer.src = source;
1028 opInBuffer.size = sourceSize;
1029 opInBuffer.pos = 0;
1120 1030
1121 opInBuffer.src = source;
1122 opInBuffer.size = sourceSize;
1123 opInBuffer.pos = 0;
1031 opOutBuffer.dst = dest;
1032 opOutBuffer.size = destAvailable;
1033 opOutBuffer.pos = 0;
1124 1034
1125 opOutBuffer.dst = dest;
1126 opOutBuffer.size = destAvailable;
1127 opOutBuffer.pos = 0;
1128
1129 zresult = ZSTD_CCtx_setPledgedSrcSize(state->cctx, sourceSize);
1130 if (ZSTD_isError(zresult)) {
1131 state->error = WorkerError_zstd;
1132 state->zresult = zresult;
1133 state->errorOffset = inputOffset;
1134 break;
1135 }
1035 zresult = ZSTD_CCtx_setPledgedSrcSize(state->cctx, sourceSize);
1036 if (ZSTD_isError(zresult)) {
1037 state->error = CompressorWorkerError_zstd;
1038 state->zresult = zresult;
1039 state->errorOffset = inputOffset;
1040 break;
1041 }
1136 1042
1137 zresult = ZSTD_compressStream2(state->cctx, &opOutBuffer, &opInBuffer, ZSTD_e_end);
1138 if (ZSTD_isError(zresult)) {
1139 state->error = WorkerError_zstd;
1140 state->zresult = zresult;
1141 state->errorOffset = inputOffset;
1142 break;
1143 }
1144 else if (zresult) {
1145 state->error = WorkerError_nospace;
1146 state->errorOffset = inputOffset;
1147 break;
1148 }
1043 zresult = ZSTD_compressStream2(state->cctx, &opOutBuffer, &opInBuffer,
1044 ZSTD_e_end);
1045 if (ZSTD_isError(zresult)) {
1046 state->error = CompressorWorkerError_zstd;
1047 state->zresult = zresult;
1048 state->errorOffset = inputOffset;
1049 break;
1050 }
1051 else if (zresult) {
1052 state->error = CompressorWorkerError_nospace;
1053 state->errorOffset = inputOffset;
1054 break;
1055 }
1149 1056
1150 destBuffer->segments[inputOffset - currentBufferStartOffset].offset = destOffset;
1151 destBuffer->segments[inputOffset - currentBufferStartOffset].length = opOutBuffer.pos;
1057 destBuffer->segments[inputOffset - currentBufferStartOffset].offset =
1058 destOffset;
1059 destBuffer->segments[inputOffset - currentBufferStartOffset].length =
1060 opOutBuffer.pos;
1152 1061
1153 destOffset += opOutBuffer.pos;
1154 remainingItems--;
1155 }
1062 destOffset += opOutBuffer.pos;
1063 remainingItems--;
1064 }
1156 1065
1157 if (destBuffer->destSize > destOffset) {
1158 newDest = realloc(destBuffer->dest, destOffset);
1159 if (NULL == newDest) {
1160 state->error = WorkerError_no_memory;
1161 return;
1162 }
1066 if (destBuffer->destSize > destOffset) {
1067 newDest = realloc(destBuffer->dest, destOffset);
1068 if (NULL == newDest) {
1069 state->error = CompressorWorkerError_no_memory;
1070 return;
1071 }
1163 1072
1164 destBuffer->dest = newDest;
1165 destBuffer->destSize = destOffset;
1166 }
1073 destBuffer->dest = newDest;
1074 destBuffer->destSize = destOffset;
1075 }
1167 1076 }
1077 #endif
1078
1079 /* We can only use the pool.h APIs if we provide the full library,
1080 as these are private APIs. */
1081 #ifdef HAVE_ZSTD_POOL_APIS
1168 1082
1169 ZstdBufferWithSegmentsCollection* compress_from_datasources(ZstdCompressor* compressor,
1170 DataSources* sources, Py_ssize_t threadCount) {
1171 unsigned long long bytesPerWorker;
1172 POOL_ctx* pool = NULL;
1173 WorkerState* workerStates = NULL;
1174 Py_ssize_t i;
1175 unsigned long long workerBytes = 0;
1176 Py_ssize_t workerStartOffset = 0;
1177 Py_ssize_t currentThread = 0;
1178 int errored = 0;
1179 Py_ssize_t segmentsCount = 0;
1180 Py_ssize_t segmentIndex;
1181 PyObject* segmentsArg = NULL;
1182 ZstdBufferWithSegments* buffer;
1183 ZstdBufferWithSegmentsCollection* result = NULL;
1184
1185 assert(sources->sourcesSize > 0);
1186 assert(sources->totalSourceSize > 0);
1187 assert(threadCount >= 1);
1083 ZstdBufferWithSegmentsCollection *
1084 compress_from_datasources(ZstdCompressor *compressor, DataSources *sources,
1085 Py_ssize_t threadCount) {
1086 unsigned long long bytesPerWorker;
1087 POOL_ctx *pool = NULL;
1088 CompressorWorkerState *workerStates = NULL;
1089 Py_ssize_t i;
1090 unsigned long long workerBytes = 0;
1091 Py_ssize_t workerStartOffset = 0;
1092 Py_ssize_t currentThread = 0;
1093 int errored = 0;
1094 Py_ssize_t segmentsCount = 0;
1095 Py_ssize_t segmentIndex;
1096 PyObject *segmentsArg = NULL;
1097 ZstdBufferWithSegments *buffer;
1098 ZstdBufferWithSegmentsCollection *result = NULL;
1188 1099
1189 /* More threads than inputs makes no sense. */
1190 threadCount = sources->sourcesSize < threadCount ? sources->sourcesSize
1191 : threadCount;
1100 assert(sources->sourcesSize > 0);
1101 assert(sources->totalSourceSize > 0);
1102 assert(threadCount >= 1);
1192 1103
1193 /* TODO lower thread count when input size is too small and threads would add
1194 overhead. */
1104 /* More threads than inputs makes no sense. */
1105 threadCount =
1106 sources->sourcesSize < threadCount ? sources->sourcesSize : threadCount;
1195 1107
1196 workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState));
1197 if (NULL == workerStates) {
1198 PyErr_NoMemory();
1199 goto finally;
1200 }
1201
1202 memset(workerStates, 0, threadCount * sizeof(WorkerState));
1108 /* TODO lower thread count when input size is too small and threads would
1109 add overhead. */
1203 1110
1204 if (threadCount > 1) {
1205 pool = POOL_create(threadCount, 1);
1206 if (NULL == pool) {
1207 PyErr_SetString(ZstdError, "could not initialize zstd thread pool");
1208 goto finally;
1209 }
1210 }
1111 workerStates = PyMem_Malloc(threadCount * sizeof(CompressorWorkerState));
1112 if (NULL == workerStates) {
1113 PyErr_NoMemory();
1114 goto finally;
1115 }
1116
1117 memset(workerStates, 0, threadCount * sizeof(CompressorWorkerState));
1211 1118
1212 bytesPerWorker = sources->totalSourceSize / threadCount;
1119 if (threadCount > 1) {
1120 pool = POOL_create(threadCount, 1);
1121 if (NULL == pool) {
1122 PyErr_SetString(ZstdError, "could not initialize zstd thread pool");
1123 goto finally;
1124 }
1125 }
1213 1126
1214 for (i = 0; i < threadCount; i++) {
1215 size_t zresult;
1127 bytesPerWorker = sources->totalSourceSize / threadCount;
1128
1129 for (i = 0; i < threadCount; i++) {
1130 size_t zresult;
1216 1131
1217 workerStates[i].cctx = ZSTD_createCCtx();
1218 if (!workerStates[i].cctx) {
1219 PyErr_NoMemory();
1220 goto finally;
1221 }
1132 workerStates[i].cctx = ZSTD_createCCtx();
1133 if (!workerStates[i].cctx) {
1134 PyErr_NoMemory();
1135 goto finally;
1136 }
1222 1137
1223 zresult = ZSTD_CCtx_setParametersUsingCCtxParams(workerStates[i].cctx,
1224 compressor->params);
1225 if (ZSTD_isError(zresult)) {
1226 PyErr_Format(ZstdError, "could not set compression parameters: %s",
1227 ZSTD_getErrorName(zresult));
1228 goto finally;
1229 }
1138 zresult = ZSTD_CCtx_setParametersUsingCCtxParams(workerStates[i].cctx,
1139 compressor->params);
1140 if (ZSTD_isError(zresult)) {
1141 PyErr_Format(ZstdError, "could not set compression parameters: %s",
1142 ZSTD_getErrorName(zresult));
1143 goto finally;
1144 }
1230 1145
1231 if (compressor->dict) {
1232 if (compressor->dict->cdict) {
1233 zresult = ZSTD_CCtx_refCDict(workerStates[i].cctx, compressor->dict->cdict);
1234 }
1235 else {
1236 zresult = ZSTD_CCtx_loadDictionary_advanced(
1237 workerStates[i].cctx,
1238 compressor->dict->dictData,
1239 compressor->dict->dictSize,
1240 ZSTD_dlm_byRef,
1241 compressor->dict->dictType);
1242 }
1243
1244 if (ZSTD_isError(zresult)) {
1245 PyErr_Format(ZstdError, "could not load compression dictionary: %s",
1246 ZSTD_getErrorName(zresult));
1247 goto finally;
1248 }
1146 if (compressor->dict) {
1147 if (compressor->dict->cdict) {
1148 zresult = ZSTD_CCtx_refCDict(workerStates[i].cctx,
1149 compressor->dict->cdict);
1150 }
1151 else {
1152 zresult = ZSTD_CCtx_loadDictionary_advanced(
1153 workerStates[i].cctx, compressor->dict->dictData,
1154 compressor->dict->dictSize, ZSTD_dlm_byRef,
1155 compressor->dict->dictType);
1156 }
1249 1157
1250 }
1158 if (ZSTD_isError(zresult)) {
1159 PyErr_Format(ZstdError,
1160 "could not load compression dictionary: %s",
1161 ZSTD_getErrorName(zresult));
1162 goto finally;
1163 }
1164 }
1251 1165
1252 workerStates[i].sources = sources->sources;
1253 workerStates[i].sourcesSize = sources->sourcesSize;
1254 }
1255
1256 Py_BEGIN_ALLOW_THREADS
1257 for (i = 0; i < sources->sourcesSize; i++) {
1258 workerBytes += sources->sources[i].sourceSize;
1166 workerStates[i].sources = sources->sources;
1167 workerStates[i].sourcesSize = sources->sourcesSize;
1168 }
1259 1169
1260 /*
1261 * The last worker/thread needs to handle all remaining work. Don't
1262 * trigger it prematurely. Defer to the block outside of the loop
1263 * to run the last worker/thread. But do still process this loop
1264 * so workerBytes is correct.
1265 */
1266 if (currentThread == threadCount - 1) {
1267 continue;
1268 }
1170 Py_BEGIN_ALLOW_THREADS for (i = 0; i < sources->sourcesSize; i++) {
1171 workerBytes += sources->sources[i].sourceSize;
1172
1173 /*
1174 * The last worker/thread needs to handle all remaining work. Don't
1175 * trigger it prematurely. Defer to the block outside of the loop
1176 * to run the last worker/thread. But do still process this loop
1177 * so workerBytes is correct.
1178 */
1179 if (currentThread == threadCount - 1) {
1180 continue;
1181 }
1269 1182
1270 if (workerBytes >= bytesPerWorker) {
1271 assert(currentThread < threadCount);
1272 workerStates[currentThread].totalSourceSize = workerBytes;
1273 workerStates[currentThread].startOffset = workerStartOffset;
1274 workerStates[currentThread].endOffset = i;
1183 if (workerBytes >= bytesPerWorker) {
1184 assert(currentThread < threadCount);
1185 workerStates[currentThread].totalSourceSize = workerBytes;
1186 workerStates[currentThread].startOffset = workerStartOffset;
1187 workerStates[currentThread].endOffset = i;
1275 1188
1276 if (threadCount > 1) {
1277 POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]);
1278 }
1279 else {
1280 compress_worker(&workerStates[currentThread]);
1281 }
1189 if (threadCount > 1) {
1190 POOL_add(pool, (POOL_function)compress_worker,
1191 &workerStates[currentThread]);
1192 }
1193 else {
1194 compress_worker(&workerStates[currentThread]);
1195 }
1282 1196
1283 currentThread++;
1284 workerStartOffset = i + 1;
1285 workerBytes = 0;
1286 }
1287 }
1197 currentThread++;
1198 workerStartOffset = i + 1;
1199 workerBytes = 0;
1200 }
1201 }
1288 1202
1289 if (workerBytes) {
1290 assert(currentThread < threadCount);
1291 workerStates[currentThread].totalSourceSize = workerBytes;
1292 workerStates[currentThread].startOffset = workerStartOffset;
1293 workerStates[currentThread].endOffset = sources->sourcesSize - 1;
1203 if (workerBytes) {
1204 assert(currentThread < threadCount);
1205 workerStates[currentThread].totalSourceSize = workerBytes;
1206 workerStates[currentThread].startOffset = workerStartOffset;
1207 workerStates[currentThread].endOffset = sources->sourcesSize - 1;
1294 1208
1295 if (threadCount > 1) {
1296 POOL_add(pool, (POOL_function)compress_worker, &workerStates[currentThread]);
1297 }
1298 else {
1299 compress_worker(&workerStates[currentThread]);
1300 }
1301 }
1209 if (threadCount > 1) {
1210 POOL_add(pool, (POOL_function)compress_worker,
1211 &workerStates[currentThread]);
1212 }
1213 else {
1214 compress_worker(&workerStates[currentThread]);
1215 }
1216 }
1302 1217
1303 if (threadCount > 1) {
1304 POOL_free(pool);
1305 pool = NULL;
1306 }
1218 if (threadCount > 1) {
1219 POOL_free(pool);
1220 pool = NULL;
1221 }
1307 1222
1308 Py_END_ALLOW_THREADS
1223 Py_END_ALLOW_THREADS
1309 1224
1310 for (i = 0; i < threadCount; i++) {
1311 switch (workerStates[i].error) {
1312 case WorkerError_no_memory:
1313 PyErr_NoMemory();
1314 errored = 1;
1315 break;
1225 for (i = 0; i < threadCount; i++) {
1226 switch (workerStates[i].error) {
1227 case CompressorWorkerError_no_memory:
1228 PyErr_NoMemory();
1229 errored = 1;
1230 break;
1316 1231
1317 case WorkerError_zstd:
1318 PyErr_Format(ZstdError, "error compressing item %zd: %s",
1319 workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult));
1320 errored = 1;
1321 break;
1232 case CompressorWorkerError_zstd:
1233 PyErr_Format(ZstdError, "error compressing item %zd: %s",
1234 workerStates[i].errorOffset,
1235 ZSTD_getErrorName(workerStates[i].zresult));
1236 errored = 1;
1237 break;
1322 1238
1323 case WorkerError_nospace:
1324 PyErr_Format(ZstdError, "error compressing item %zd: not enough space in output",
1325 workerStates[i].errorOffset);
1326 errored = 1;
1327 break;
1239 case CompressorWorkerError_nospace:
1240 PyErr_Format(
1241 ZstdError,
1242 "error compressing item %zd: not enough space in output",
1243 workerStates[i].errorOffset);
1244 errored = 1;
1245 break;
1328 1246
1329 default:
1330 ;
1331 }
1332
1333 if (errored) {
1334 break;
1335 }
1247 default:;
1248 }
1336 1249
1337 }
1250 if (errored) {
1251 break;
1252 }
1253 }
1338 1254
1339 if (errored) {
1340 goto finally;
1341 }
1255 if (errored) {
1256 goto finally;
1257 }
1342 1258
1343 segmentsCount = 0;
1344 for (i = 0; i < threadCount; i++) {
1345 WorkerState* state = &workerStates[i];
1346 segmentsCount += state->destCount;
1347 }
1259 segmentsCount = 0;
1260 for (i = 0; i < threadCount; i++) {
1261 CompressorWorkerState *state = &workerStates[i];
1262 segmentsCount += state->destCount;
1263 }
1348 1264
1349 segmentsArg = PyTuple_New(segmentsCount);
1350 if (NULL == segmentsArg) {
1351 goto finally;
1352 }
1265 segmentsArg = PyTuple_New(segmentsCount);
1266 if (NULL == segmentsArg) {
1267 goto finally;
1268 }
1353 1269
1354 segmentIndex = 0;
1270 segmentIndex = 0;
1355 1271
1356 for (i = 0; i < threadCount; i++) {
1357 Py_ssize_t j;
1358 WorkerState* state = &workerStates[i];
1272 for (i = 0; i < threadCount; i++) {
1273 Py_ssize_t j;
1274 CompressorWorkerState *state = &workerStates[i];
1359 1275
1360 for (j = 0; j < state->destCount; j++) {
1361 DestBuffer* destBuffer = &state->destBuffers[j];
1362 buffer = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize,
1363 destBuffer->segments, destBuffer->segmentsSize);
1276 for (j = 0; j < state->destCount; j++) {
1277 CompressorDestBuffer *destBuffer = &state->destBuffers[j];
1278 buffer = BufferWithSegments_FromMemory(
1279 destBuffer->dest, destBuffer->destSize, destBuffer->segments,
1280 destBuffer->segmentsSize);
1364 1281
1365 if (NULL == buffer) {
1366 goto finally;
1367 }
1282 if (NULL == buffer) {
1283 goto finally;
1284 }
1368 1285
1369 /* Tell instance to use free() instsead of PyMem_Free(). */
1370 buffer->useFree = 1;
1286 /* Tell instance to use free() instsead of PyMem_Free(). */
1287 buffer->useFree = 1;
1371 1288
1372 /*
1373 * BufferWithSegments_FromMemory takes ownership of the backing memory.
1374 * Unset it here so it doesn't get freed below.
1375 */
1376 destBuffer->dest = NULL;
1377 destBuffer->segments = NULL;
1289 /*
1290 * BufferWithSegments_FromMemory takes ownership of the backing
1291 * memory. Unset it here so it doesn't get freed below.
1292 */
1293 destBuffer->dest = NULL;
1294 destBuffer->segments = NULL;
1378 1295
1379 PyTuple_SET_ITEM(segmentsArg, segmentIndex++, (PyObject*)buffer);
1380 }
1381 }
1296 PyTuple_SET_ITEM(segmentsArg, segmentIndex++, (PyObject *)buffer);
1297 }
1298 }
1382 1299
1383 result = (ZstdBufferWithSegmentsCollection*)PyObject_CallObject(
1384 (PyObject*)&ZstdBufferWithSegmentsCollectionType, segmentsArg);
1300 result = (ZstdBufferWithSegmentsCollection *)PyObject_CallObject(
1301 (PyObject *)ZstdBufferWithSegmentsCollectionType, segmentsArg);
1385 1302
1386 1303 finally:
1387 Py_CLEAR(segmentsArg);
1304 Py_CLEAR(segmentsArg);
1388 1305
1389 if (pool) {
1390 POOL_free(pool);
1391 }
1306 if (pool) {
1307 POOL_free(pool);
1308 }
1309
1310 if (workerStates) {
1311 Py_ssize_t j;
1392 1312
1393 if (workerStates) {
1394 Py_ssize_t j;
1313 for (i = 0; i < threadCount; i++) {
1314 CompressorWorkerState state = workerStates[i];
1395 1315
1396 for (i = 0; i < threadCount; i++) {
1397 WorkerState state = workerStates[i];
1316 if (state.cctx) {
1317 ZSTD_freeCCtx(state.cctx);
1318 }
1398 1319
1399 if (state.cctx) {
1400 ZSTD_freeCCtx(state.cctx);
1401 }
1320 /* malloc() is used in worker thread. */
1402 1321
1403 /* malloc() is used in worker thread. */
1322 for (j = 0; j < state.destCount; j++) {
1323 if (state.destBuffers) {
1324 free(state.destBuffers[j].dest);
1325 free(state.destBuffers[j].segments);
1326 }
1327 }
1404 1328
1405 for (j = 0; j < state.destCount; j++) {
1406 if (state.destBuffers) {
1407 free(state.destBuffers[j].dest);
1408 free(state.destBuffers[j].segments);
1409 }
1410 }
1329 free(state.destBuffers);
1330 }
1411 1331
1412
1413 free(state.destBuffers);
1414 }
1332 PyMem_Free(workerStates);
1333 }
1415 1334
1416 PyMem_Free(workerStates);
1417 }
1335 return result;
1336 }
1337 #endif
1418 1338
1419 return result;
1420 }
1339 #ifdef HAVE_ZSTD_POOL_APIS
1340 static ZstdBufferWithSegmentsCollection *
1341 ZstdCompressor_multi_compress_to_buffer(ZstdCompressor *self, PyObject *args,
1342 PyObject *kwargs) {
1343 static char *kwlist[] = {"data", "threads", NULL};
1421 1344
1422 PyDoc_STRVAR(ZstdCompressor_multi_compress_to_buffer__doc__,
1423 "Compress multiple pieces of data as a single operation\n"
1424 "\n"
1425 "Receives a ``BufferWithSegmentsCollection``, a ``BufferWithSegments``, or\n"
1426 "a list of bytes like objects holding data to compress.\n"
1427 "\n"
1428 "Returns a ``BufferWithSegmentsCollection`` holding compressed data.\n"
1429 "\n"
1430 "This function is optimized to perform multiple compression operations as\n"
1431 "as possible with as little overhead as possbile.\n"
1432 );
1345 PyObject *data;
1346 int threads = 0;
1347 Py_buffer *dataBuffers = NULL;
1348 DataSources sources;
1349 Py_ssize_t i;
1350 Py_ssize_t sourceCount = 0;
1351 ZstdBufferWithSegmentsCollection *result = NULL;
1352
1353 memset(&sources, 0, sizeof(sources));
1433 1354
1434 static ZstdBufferWithSegmentsCollection* ZstdCompressor_multi_compress_to_buffer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
1435 static char* kwlist[] = {
1436 "data",
1437 "threads",
1438 NULL
1439 };
1355 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
1356 "O|i:multi_compress_to_buffer", kwlist,
1357 &data, &threads)) {
1358 return NULL;
1359 }
1440 1360
1441 PyObject* data;
1442 int threads = 0;
1443 Py_buffer* dataBuffers = NULL;
1444 DataSources sources;
1445 Py_ssize_t i;
1446 Py_ssize_t sourceCount = 0;
1447 ZstdBufferWithSegmentsCollection* result = NULL;
1361 if (threads < 0) {
1362 threads = cpu_count();
1363 }
1448 1364
1449 memset(&sources, 0, sizeof(sources));
1365 if (threads < 2) {
1366 threads = 1;
1367 }
1450 1368
1451 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:multi_compress_to_buffer", kwlist,
1452 &data, &threads)) {
1453 return NULL;
1454 }
1369 if (PyObject_TypeCheck(data, ZstdBufferWithSegmentsType)) {
1370 ZstdBufferWithSegments *buffer = (ZstdBufferWithSegments *)data;
1455 1371
1456 if (threads < 0) {
1457 threads = cpu_count();
1458 }
1459
1460 if (threads < 2) {
1461 threads = 1;
1462 }
1372 sources.sources =
1373 PyMem_Malloc(buffer->segmentCount * sizeof(DataSource));
1374 if (NULL == sources.sources) {
1375 PyErr_NoMemory();
1376 goto finally;
1377 }
1463 1378
1464 if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsType)) {
1465 ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)data;
1379 for (i = 0; i < buffer->segmentCount; i++) {
1380 if (buffer->segments[i].length > SIZE_MAX) {
1381 PyErr_Format(
1382 PyExc_ValueError,
1383 "buffer segment %zd is too large for this platform", i);
1384 goto finally;
1385 }
1466 1386
1467 sources.sources = PyMem_Malloc(buffer->segmentCount * sizeof(DataSource));
1468 if (NULL == sources.sources) {
1469 PyErr_NoMemory();
1470 goto finally;
1471 }
1472
1473 for (i = 0; i < buffer->segmentCount; i++) {
1474 if (buffer->segments[i].length > SIZE_MAX) {
1475 PyErr_Format(PyExc_ValueError,
1476 "buffer segment %zd is too large for this platform", i);
1477 goto finally;
1478 }
1387 sources.sources[i].sourceData =
1388 (char *)buffer->data + buffer->segments[i].offset;
1389 sources.sources[i].sourceSize = (size_t)buffer->segments[i].length;
1390 sources.totalSourceSize += buffer->segments[i].length;
1391 }
1479 1392
1480 sources.sources[i].sourceData = (char*)buffer->data + buffer->segments[i].offset;
1481 sources.sources[i].sourceSize = (size_t)buffer->segments[i].length;
1482 sources.totalSourceSize += buffer->segments[i].length;
1483 }
1393 sources.sourcesSize = buffer->segmentCount;
1394 }
1395 else if (PyObject_TypeCheck(data, ZstdBufferWithSegmentsCollectionType)) {
1396 Py_ssize_t j;
1397 Py_ssize_t offset = 0;
1398 ZstdBufferWithSegments *buffer;
1399 ZstdBufferWithSegmentsCollection *collection =
1400 (ZstdBufferWithSegmentsCollection *)data;
1484 1401
1485 sources.sourcesSize = buffer->segmentCount;
1486 }
1487 else if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsCollectionType)) {
1488 Py_ssize_t j;
1489 Py_ssize_t offset = 0;
1490 ZstdBufferWithSegments* buffer;
1491 ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)data;
1402 sourceCount = BufferWithSegmentsCollection_length(collection);
1492 1403
1493 sourceCount = BufferWithSegmentsCollection_length(collection);
1404 sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource));
1405 if (NULL == sources.sources) {
1406 PyErr_NoMemory();
1407 goto finally;
1408 }
1494 1409
1495 sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource));
1496 if (NULL == sources.sources) {
1497 PyErr_NoMemory();
1498 goto finally;
1499 }
1410 for (i = 0; i < collection->bufferCount; i++) {
1411 buffer = collection->buffers[i];
1500 1412
1501 for (i = 0; i < collection->bufferCount; i++) {
1502 buffer = collection->buffers[i];
1503
1504 for (j = 0; j < buffer->segmentCount; j++) {
1505 if (buffer->segments[j].length > SIZE_MAX) {
1506 PyErr_Format(PyExc_ValueError,
1507 "buffer segment %zd in buffer %zd is too large for this platform",
1508 j, i);
1509 goto finally;
1510 }
1413 for (j = 0; j < buffer->segmentCount; j++) {
1414 if (buffer->segments[j].length > SIZE_MAX) {
1415 PyErr_Format(PyExc_ValueError,
1416 "buffer segment %zd in buffer %zd is too "
1417 "large for this platform",
1418 j, i);
1419 goto finally;
1420 }
1511 1421
1512 sources.sources[offset].sourceData = (char*)buffer->data + buffer->segments[j].offset;
1513 sources.sources[offset].sourceSize = (size_t)buffer->segments[j].length;
1514 sources.totalSourceSize += buffer->segments[j].length;
1422 sources.sources[offset].sourceData =
1423 (char *)buffer->data + buffer->segments[j].offset;
1424 sources.sources[offset].sourceSize =
1425 (size_t)buffer->segments[j].length;
1426 sources.totalSourceSize += buffer->segments[j].length;
1515 1427
1516 offset++;
1517 }
1518 }
1428 offset++;
1429 }
1430 }
1519 1431
1520 sources.sourcesSize = sourceCount;
1521 }
1522 else if (PyList_Check(data)) {
1523 sourceCount = PyList_GET_SIZE(data);
1432 sources.sourcesSize = sourceCount;
1433 }
1434 else if (PyList_Check(data)) {
1435 sourceCount = PyList_GET_SIZE(data);
1524 1436
1525 sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource));
1526 if (NULL == sources.sources) {
1527 PyErr_NoMemory();
1528 goto finally;
1529 }
1437 sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource));
1438 if (NULL == sources.sources) {
1439 PyErr_NoMemory();
1440 goto finally;
1441 }
1530 1442
1531 dataBuffers = PyMem_Malloc(sourceCount * sizeof(Py_buffer));
1532 if (NULL == dataBuffers) {
1533 PyErr_NoMemory();
1534 goto finally;
1535 }
1443 dataBuffers = PyMem_Malloc(sourceCount * sizeof(Py_buffer));
1444 if (NULL == dataBuffers) {
1445 PyErr_NoMemory();
1446 goto finally;
1447 }
1536 1448
1537 memset(dataBuffers, 0, sourceCount * sizeof(Py_buffer));
1449 memset(dataBuffers, 0, sourceCount * sizeof(Py_buffer));
1538 1450
1539 for (i = 0; i < sourceCount; i++) {
1540 if (0 != PyObject_GetBuffer(PyList_GET_ITEM(data, i),
1541 &dataBuffers[i], PyBUF_CONTIG_RO)) {
1542 PyErr_Clear();
1543 PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i);
1544 goto finally;
1545 }
1451 for (i = 0; i < sourceCount; i++) {
1452 if (0 != PyObject_GetBuffer(PyList_GET_ITEM(data, i),
1453 &dataBuffers[i], PyBUF_CONTIG_RO)) {
1454 PyErr_Clear();
1455 PyErr_Format(PyExc_TypeError,
1456 "item %zd not a bytes like object", i);
1457 goto finally;
1458 }
1546 1459
1547 sources.sources[i].sourceData = dataBuffers[i].buf;
1548 sources.sources[i].sourceSize = dataBuffers[i].len;
1549 sources.totalSourceSize += dataBuffers[i].len;
1550 }
1460 sources.sources[i].sourceData = dataBuffers[i].buf;
1461 sources.sources[i].sourceSize = dataBuffers[i].len;
1462 sources.totalSourceSize += dataBuffers[i].len;
1463 }
1551 1464
1552 sources.sourcesSize = sourceCount;
1553 }
1554 else {
1555 PyErr_SetString(PyExc_TypeError, "argument must be list of BufferWithSegments");
1556 goto finally;
1557 }
1465 sources.sourcesSize = sourceCount;
1466 }
1467 else {
1468 PyErr_SetString(PyExc_TypeError,
1469 "argument must be list of BufferWithSegments");
1470 goto finally;
1471 }
1558 1472
1559 if (0 == sources.sourcesSize) {
1560 PyErr_SetString(PyExc_ValueError, "no source elements found");
1561 goto finally;
1562 }
1473 if (0 == sources.sourcesSize) {
1474 PyErr_SetString(PyExc_ValueError, "no source elements found");
1475 goto finally;
1476 }
1563 1477
1564 if (0 == sources.totalSourceSize) {
1565 PyErr_SetString(PyExc_ValueError, "source elements are empty");
1566 goto finally;
1567 }
1478 if (0 == sources.totalSourceSize) {
1479 PyErr_SetString(PyExc_ValueError, "source elements are empty");
1480 goto finally;
1481 }
1568 1482
1569 if (sources.totalSourceSize > SIZE_MAX) {
1570 PyErr_SetString(PyExc_ValueError, "sources are too large for this platform");
1571 goto finally;
1572 }
1483 if (sources.totalSourceSize > SIZE_MAX) {
1484 PyErr_SetString(PyExc_ValueError,
1485 "sources are too large for this platform");
1486 goto finally;
1487 }
1573 1488
1574 result = compress_from_datasources(self, &sources, threads);
1489 result = compress_from_datasources(self, &sources, threads);
1575 1490
1576 1491 finally:
1577 PyMem_Free(sources.sources);
1492 PyMem_Free(sources.sources);
1493
1494 if (dataBuffers) {
1495 for (i = 0; i < sourceCount; i++) {
1496 PyBuffer_Release(&dataBuffers[i]);
1497 }
1578 1498
1579 if (dataBuffers) {
1580 for (i = 0; i < sourceCount; i++) {
1581 PyBuffer_Release(&dataBuffers[i]);
1582 }
1499 PyMem_Free(dataBuffers);
1500 }
1583 1501
1584 PyMem_Free(dataBuffers);
1585 }
1586
1587 return result;
1502 return result;
1588 1503 }
1504 #endif
1589 1505
1590 1506 static PyMethodDef ZstdCompressor_methods[] = {
1591 { "chunker", (PyCFunction)ZstdCompressor_chunker,
1592 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_chunker__doc__ },
1593 { "compress", (PyCFunction)ZstdCompressor_compress,
1594 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ },
1595 { "compressobj", (PyCFunction)ZstdCompressor_compressobj,
1596 METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ },
1597 { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream,
1598 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ },
1599 { "stream_reader", (PyCFunction)ZstdCompressor_stream_reader,
1600 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_reader__doc__ },
1601 { "stream_writer", (PyCFunction)ZstdCompressor_stream_writer,
1602 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ },
1603 { "read_to_iter", (PyCFunction)ZstdCompressor_read_to_iter,
1604 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ },
1605 /* TODO Remove deprecated API */
1606 { "read_from", (PyCFunction)ZstdCompressor_read_to_iter,
1607 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ },
1608 /* TODO remove deprecated API */
1609 { "write_to", (PyCFunction)ZstdCompressor_stream_writer,
1610 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ },
1611 { "multi_compress_to_buffer", (PyCFunction)ZstdCompressor_multi_compress_to_buffer,
1612 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_multi_compress_to_buffer__doc__ },
1613 { "memory_size", (PyCFunction)ZstdCompressor_memory_size,
1614 METH_NOARGS, ZstdCompressor_memory_size__doc__ },
1615 { "frame_progression", (PyCFunction)ZstdCompressor_frame_progression,
1616 METH_NOARGS, ZstdCompressor_frame_progression__doc__ },
1617 { NULL, NULL }
1507 {"chunker", (PyCFunction)ZstdCompressor_chunker,
1508 METH_VARARGS | METH_KEYWORDS, NULL},
1509 {"compress", (PyCFunction)ZstdCompressor_compress,
1510 METH_VARARGS | METH_KEYWORDS, NULL},
1511 {"compressobj", (PyCFunction)ZstdCompressor_compressobj,
1512 METH_VARARGS | METH_KEYWORDS, NULL},
1513 {"copy_stream", (PyCFunction)ZstdCompressor_copy_stream,
1514 METH_VARARGS | METH_KEYWORDS, NULL},
1515 {"stream_reader", (PyCFunction)ZstdCompressor_stream_reader,
1516 METH_VARARGS | METH_KEYWORDS, NULL},
1517 {"stream_writer", (PyCFunction)ZstdCompressor_stream_writer,
1518 METH_VARARGS | METH_KEYWORDS, NULL},
1519 {"read_to_iter", (PyCFunction)ZstdCompressor_read_to_iter,
1520 METH_VARARGS | METH_KEYWORDS, NULL},
1521 #ifdef HAVE_ZSTD_POOL_APIS
1522 {"multi_compress_to_buffer",
1523 (PyCFunction)ZstdCompressor_multi_compress_to_buffer,
1524 METH_VARARGS | METH_KEYWORDS, NULL},
1525 #endif
1526 {"memory_size", (PyCFunction)ZstdCompressor_memory_size, METH_NOARGS, NULL},
1527 {"frame_progression", (PyCFunction)ZstdCompressor_frame_progression,
1528 METH_NOARGS, NULL},
1529 {NULL, NULL}};
1530
1531 PyType_Slot ZstdCompressorSlots[] = {
1532 {Py_tp_dealloc, ZstdCompressor_dealloc},
1533 {Py_tp_methods, ZstdCompressor_methods},
1534 {Py_tp_init, ZstdCompressor_init},
1535 {Py_tp_new, PyType_GenericNew},
1536 {0, NULL},
1618 1537 };
1619 1538
1620 PyTypeObject ZstdCompressorType = {
1621 PyVarObject_HEAD_INIT(NULL, 0)
1622 "zstd.ZstdCompressor", /* tp_name */
1623 sizeof(ZstdCompressor), /* tp_basicsize */
1624 0, /* tp_itemsize */
1625 (destructor)ZstdCompressor_dealloc, /* tp_dealloc */
1626 0, /* tp_print */
1627 0, /* tp_getattr */
1628 0, /* tp_setattr */
1629 0, /* tp_compare */
1630 0, /* tp_repr */
1631 0, /* tp_as_number */
1632 0, /* tp_as_sequence */
1633 0, /* tp_as_mapping */
1634 0, /* tp_hash */
1635 0, /* tp_call */
1636 0, /* tp_str */
1637 0, /* tp_getattro */
1638 0, /* tp_setattro */
1639 0, /* tp_as_buffer */
1640 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
1641 ZstdCompressor__doc__, /* tp_doc */
1642 0, /* tp_traverse */
1643 0, /* tp_clear */
1644 0, /* tp_richcompare */
1645 0, /* tp_weaklistoffset */
1646 0, /* tp_iter */
1647 0, /* tp_iternext */
1648 ZstdCompressor_methods, /* tp_methods */
1649 0, /* tp_members */
1650 0, /* tp_getset */
1651 0, /* tp_base */
1652 0, /* tp_dict */
1653 0, /* tp_descr_get */
1654 0, /* tp_descr_set */
1655 0, /* tp_dictoffset */
1656 (initproc)ZstdCompressor_init, /* tp_init */
1657 0, /* tp_alloc */
1658 PyType_GenericNew, /* tp_new */
1539 PyType_Spec ZstdCompressorSpec = {
1540 "zstd.ZstdCompressor",
1541 sizeof(ZstdCompressor),
1542 0,
1543 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
1544 ZstdCompressorSlots,
1659 1545 };
1660 1546
1661 void compressor_module_init(PyObject* mod) {
1662 Py_SET_TYPE(&ZstdCompressorType, &PyType_Type);
1663 if (PyType_Ready(&ZstdCompressorType) < 0) {
1664 return;
1665 }
1547 PyTypeObject *ZstdCompressorType;
1666 1548
1667 Py_INCREF((PyObject*)&ZstdCompressorType);
1668 PyModule_AddObject(mod, "ZstdCompressor",
1669 (PyObject*)&ZstdCompressorType);
1549 void compressor_module_init(PyObject *mod) {
1550 ZstdCompressorType = (PyTypeObject *)PyType_FromSpec(&ZstdCompressorSpec);
1551 if (PyType_Ready(ZstdCompressorType) < 0) {
1552 return;
1553 }
1554
1555 Py_INCREF((PyObject *)ZstdCompressorType);
1556 PyModule_AddObject(mod, "ZstdCompressor", (PyObject *)ZstdCompressorType);
1670 1557 }
@@ -1,235 +1,212
1 1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 11 #define min(a, b) (((a) < (b)) ? (a) : (b))
12 12
13 extern PyObject* ZstdError;
13 extern PyObject *ZstdError;
14 14
15 PyDoc_STRVAR(ZstdCompressorIterator__doc__,
16 "Represents an iterator of compressed data.\n"
17 );
18
19 static void ZstdCompressorIterator_dealloc(ZstdCompressorIterator* self) {
20 Py_XDECREF(self->readResult);
21 Py_XDECREF(self->compressor);
22 Py_XDECREF(self->reader);
15 static void ZstdCompressorIterator_dealloc(ZstdCompressorIterator *self) {
16 Py_XDECREF(self->readResult);
17 Py_XDECREF(self->compressor);
18 Py_XDECREF(self->reader);
23 19
24 if (self->buffer.buf) {
25 PyBuffer_Release(&self->buffer);
26 memset(&self->buffer, 0, sizeof(self->buffer));
27 }
20 if (self->buffer.buf) {
21 PyBuffer_Release(&self->buffer);
22 memset(&self->buffer, 0, sizeof(self->buffer));
23 }
28 24
29 if (self->output.dst) {
30 PyMem_Free(self->output.dst);
31 self->output.dst = NULL;
32 }
25 if (self->output.dst) {
26 PyMem_Free(self->output.dst);
27 self->output.dst = NULL;
28 }
33 29
34 PyObject_Del(self);
30 PyObject_Del(self);
35 31 }
36 32
37 static PyObject* ZstdCompressorIterator_iter(PyObject* self) {
38 Py_INCREF(self);
39 return self;
33 static PyObject *ZstdCompressorIterator_iter(PyObject *self) {
34 Py_INCREF(self);
35 return self;
40 36 }
41 37
42 static PyObject* ZstdCompressorIterator_iternext(ZstdCompressorIterator* self) {
43 size_t zresult;
44 PyObject* readResult = NULL;
45 PyObject* chunk;
46 char* readBuffer;
47 Py_ssize_t readSize = 0;
48 Py_ssize_t bufferRemaining;
38 static PyObject *ZstdCompressorIterator_iternext(ZstdCompressorIterator *self) {
39 size_t zresult;
40 PyObject *readResult = NULL;
41 PyObject *chunk;
42 char *readBuffer;
43 Py_ssize_t readSize = 0;
44 Py_ssize_t bufferRemaining;
49 45
50 if (self->finishedOutput) {
51 PyErr_SetString(PyExc_StopIteration, "output flushed");
52 return NULL;
53 }
46 if (self->finishedOutput) {
47 PyErr_SetString(PyExc_StopIteration, "output flushed");
48 return NULL;
49 }
54 50
55 51 feedcompressor:
56 52
57 /* If we have data left in the input, consume it. */
58 if (self->input.pos < self->input.size) {
59 Py_BEGIN_ALLOW_THREADS
60 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
61 &self->input, ZSTD_e_continue);
62 Py_END_ALLOW_THREADS
53 /* If we have data left in the input, consume it. */
54 if (self->input.pos < self->input.size) {
55 Py_BEGIN_ALLOW_THREADS zresult =
56 ZSTD_compressStream2(self->compressor->cctx, &self->output,
57 &self->input, ZSTD_e_continue);
58 Py_END_ALLOW_THREADS
63 59
64 /* Release the Python object holding the input buffer. */
65 if (self->input.pos == self->input.size) {
66 self->input.src = NULL;
67 self->input.pos = 0;
68 self->input.size = 0;
69 Py_DECREF(self->readResult);
70 self->readResult = NULL;
71 }
60 /* Release the Python object holding the input buffer. */
61 if (self->input.pos == self->input.size) {
62 self->input.src = NULL;
63 self->input.pos = 0;
64 self->input.size = 0;
65 Py_DECREF(self->readResult);
66 self->readResult = NULL;
67 }
72 68
73 if (ZSTD_isError(zresult)) {
74 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
75 return NULL;
76 }
77
78 /* If it produced output data, emit it. */
79 if (self->output.pos) {
80 chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
81 self->output.pos = 0;
82 return chunk;
83 }
84 }
69 if (ZSTD_isError(zresult)) {
70 PyErr_Format(ZstdError, "zstd compress error: %s",
71 ZSTD_getErrorName(zresult));
72 return NULL;
73 }
85 74
86 /* We should never have output data sitting around after a previous call. */
87 assert(self->output.pos == 0);
75 /* If it produced output data, emit it. */
76 if (self->output.pos) {
77 chunk =
78 PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
79 self->output.pos = 0;
80 return chunk;
81 }
82 }
88 83
89 /* The code above should have either emitted a chunk and returned or consumed
90 the entire input buffer. So the state of the input buffer is not
91 relevant. */
92 if (!self->finishedInput) {
93 if (self->reader) {
94 readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize);
95 if (!readResult) {
96 PyErr_SetString(ZstdError, "could not read() from source");
97 return NULL;
98 }
84 /* We should never have output data sitting around after a previous call. */
85 assert(self->output.pos == 0);
99 86
100 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
101 }
102 else {
103 assert(self->buffer.buf);
87 /* The code above should have either emitted a chunk and returned or
88 consumed the entire input buffer. So the state of the input buffer is not
89 relevant. */
90 if (!self->finishedInput) {
91 if (self->reader) {
92 readResult =
93 PyObject_CallMethod(self->reader, "read", "I", self->inSize);
94 if (!readResult) {
95 return NULL;
96 }
104 97
105 /* Only support contiguous C arrays. */
106 assert(self->buffer.strides == NULL && self->buffer.suboffsets == NULL);
107 assert(self->buffer.itemsize == 1);
98 PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize);
99 }
100 else {
101 assert(self->buffer.buf);
108 102
109 readBuffer = (char*)self->buffer.buf + self->bufferOffset;
110 bufferRemaining = self->buffer.len - self->bufferOffset;
111 readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
112 self->bufferOffset += readSize;
113 }
103 /* Only support contiguous C arrays. */
104 assert(self->buffer.strides == NULL &&
105 self->buffer.suboffsets == NULL);
106 assert(self->buffer.itemsize == 1);
107
108 readBuffer = (char *)self->buffer.buf + self->bufferOffset;
109 bufferRemaining = self->buffer.len - self->bufferOffset;
110 readSize = min(bufferRemaining, (Py_ssize_t)self->inSize);
111 self->bufferOffset += readSize;
112 }
114 113
115 if (0 == readSize) {
116 Py_XDECREF(readResult);
117 self->finishedInput = 1;
118 }
119 else {
120 self->readResult = readResult;
121 }
122 }
114 if (0 == readSize) {
115 Py_XDECREF(readResult);
116 self->finishedInput = 1;
117 }
118 else {
119 self->readResult = readResult;
120 }
121 }
123 122
124 /* EOF */
125 if (0 == readSize) {
126 self->input.src = NULL;
127 self->input.size = 0;
128 self->input.pos = 0;
123 /* EOF */
124 if (0 == readSize) {
125 self->input.src = NULL;
126 self->input.size = 0;
127 self->input.pos = 0;
129 128
130 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
131 &self->input, ZSTD_e_end);
132 if (ZSTD_isError(zresult)) {
133 PyErr_Format(ZstdError, "error ending compression stream: %s",
134 ZSTD_getErrorName(zresult));
135 return NULL;
136 }
129 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
130 &self->input, ZSTD_e_end);
131 if (ZSTD_isError(zresult)) {
132 PyErr_Format(ZstdError, "error ending compression stream: %s",
133 ZSTD_getErrorName(zresult));
134 return NULL;
135 }
137 136
138 assert(self->output.pos);
137 assert(self->output.pos);
139 138
140 if (0 == zresult) {
141 self->finishedOutput = 1;
142 }
139 if (0 == zresult) {
140 self->finishedOutput = 1;
141 }
143 142
144 chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
145 self->output.pos = 0;
146 return chunk;
147 }
143 chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
144 self->output.pos = 0;
145 return chunk;
146 }
148 147
149 /* New data from reader. Feed into compressor. */
150 self->input.src = readBuffer;
151 self->input.size = readSize;
152 self->input.pos = 0;
148 /* New data from reader. Feed into compressor. */
149 self->input.src = readBuffer;
150 self->input.size = readSize;
151 self->input.pos = 0;
153 152
154 Py_BEGIN_ALLOW_THREADS
155 zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output,
156 &self->input, ZSTD_e_continue);
157 Py_END_ALLOW_THREADS
153 Py_BEGIN_ALLOW_THREADS zresult = ZSTD_compressStream2(
154 self->compressor->cctx, &self->output, &self->input, ZSTD_e_continue);
155 Py_END_ALLOW_THREADS
158 156
159 /* The input buffer currently points to memory managed by Python
160 (readBuffer). This object was allocated by this function. If it wasn't
161 fully consumed, we need to release it in a subsequent function call.
162 If it is fully consumed, do that now.
163 */
164 if (self->input.pos == self->input.size) {
165 self->input.src = NULL;
166 self->input.pos = 0;
167 self->input.size = 0;
168 Py_XDECREF(self->readResult);
169 self->readResult = NULL;
170 }
157 /* The input buffer currently points to memory managed by Python
158 (readBuffer). This object was allocated by this function. If it wasn't
159 fully consumed, we need to release it in a subsequent function call.
160 If it is fully consumed, do that now.
161 */
162 if (self->input.pos == self->input.size) {
163 self->input.src = NULL;
164 self->input.pos = 0;
165 self->input.size = 0;
166 Py_XDECREF(self->readResult);
167 self->readResult = NULL;
168 }
171 169
172 if (ZSTD_isError(zresult)) {
173 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
174 return NULL;
175 }
170 if (ZSTD_isError(zresult)) {
171 PyErr_Format(ZstdError, "zstd compress error: %s",
172 ZSTD_getErrorName(zresult));
173 return NULL;
174 }
176 175
177 assert(self->input.pos <= self->input.size);
176 assert(self->input.pos <= self->input.size);
178 177
179 /* If we didn't write anything, start the process over. */
180 if (0 == self->output.pos) {
181 goto feedcompressor;
182 }
178 /* If we didn't write anything, start the process over. */
179 if (0 == self->output.pos) {
180 goto feedcompressor;
181 }
183 182
184 chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
185 self->output.pos = 0;
186 return chunk;
183 chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
184 self->output.pos = 0;
185 return chunk;
187 186 }
188 187
189 PyTypeObject ZstdCompressorIteratorType = {
190 PyVarObject_HEAD_INIT(NULL, 0)
191 "zstd.ZstdCompressorIterator", /* tp_name */
192 sizeof(ZstdCompressorIterator), /* tp_basicsize */
193 0, /* tp_itemsize */
194 (destructor)ZstdCompressorIterator_dealloc, /* tp_dealloc */
195 0, /* tp_print */
196 0, /* tp_getattr */
197 0, /* tp_setattr */
198 0, /* tp_compare */
199 0, /* tp_repr */
200 0, /* tp_as_number */
201 0, /* tp_as_sequence */
202 0, /* tp_as_mapping */
203 0, /* tp_hash */
204 0, /* tp_call */
205 0, /* tp_str */
206 0, /* tp_getattro */
207 0, /* tp_setattro */
208 0, /* tp_as_buffer */
209 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
210 ZstdCompressorIterator__doc__, /* tp_doc */
211 0, /* tp_traverse */
212 0, /* tp_clear */
213 0, /* tp_richcompare */
214 0, /* tp_weaklistoffset */
215 ZstdCompressorIterator_iter, /* tp_iter */
216 (iternextfunc)ZstdCompressorIterator_iternext, /* tp_iternext */
217 0, /* tp_methods */
218 0, /* tp_members */
219 0, /* tp_getset */
220 0, /* tp_base */
221 0, /* tp_dict */
222 0, /* tp_descr_get */
223 0, /* tp_descr_set */
224 0, /* tp_dictoffset */
225 0, /* tp_init */
226 0, /* tp_alloc */
227 PyType_GenericNew, /* tp_new */
188 PyType_Slot ZstdCompressorIteratorSlots[] = {
189 {Py_tp_dealloc, ZstdCompressorIterator_dealloc},
190 {Py_tp_iter, ZstdCompressorIterator_iter},
191 {Py_tp_iternext, ZstdCompressorIterator_iternext},
192 {Py_tp_new, PyType_GenericNew},
193 {0, NULL},
228 194 };
229 195
230 void compressoriterator_module_init(PyObject* mod) {
231 Py_SET_TYPE(&ZstdCompressorIteratorType, &PyType_Type);
232 if (PyType_Ready(&ZstdCompressorIteratorType) < 0) {
233 return;
234 }
196 PyType_Spec ZstdCompressorIteratorSpec = {
197 "zstd.ZstdCompressorIterator",
198 sizeof(ZstdCompressorIterator),
199 0,
200 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
201 ZstdCompressorIteratorSlots,
202 };
203
204 PyTypeObject *ZstdCompressorIteratorType;
205
206 void compressoriterator_module_init(PyObject *mod) {
207 ZstdCompressorIteratorType =
208 (PyTypeObject *)PyType_FromSpec(&ZstdCompressorIteratorSpec);
209 if (PyType_Ready(ZstdCompressorIteratorType) < 0) {
210 return;
211 }
235 212 }
@@ -1,110 +1,109
1 1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 extern PyObject* ZstdError;
11 extern PyObject *ZstdError;
12 12
13 13 static char frame_header[] = {
14 '\x28',
15 '\xb5',
16 '\x2f',
17 '\xfd',
14 '\x28',
15 '\xb5',
16 '\x2f',
17 '\xfd',
18 18 };
19 19
20 void constants_module_init(PyObject* mod) {
21 PyObject* version;
22 PyObject* zstdVersion;
23 PyObject* frameHeader;
20 void constants_module_init(PyObject *mod) {
21 PyObject *version;
22 PyObject *zstdVersion;
23 PyObject *frameHeader;
24
25 version = PyUnicode_FromString(PYTHON_ZSTANDARD_VERSION);
26 PyModule_AddObject(mod, "__version__", version);
24 27
25 #if PY_MAJOR_VERSION >= 3
26 version = PyUnicode_FromString(PYTHON_ZSTANDARD_VERSION);
27 #else
28 version = PyString_FromString(PYTHON_ZSTANDARD_VERSION);
29 #endif
30 PyModule_AddObject(mod, "__version__", version);
28 ZstdError = PyErr_NewException("zstd.ZstdError", NULL, NULL);
29 PyModule_AddObject(mod, "ZstdError", ZstdError);
31 30
32 ZstdError = PyErr_NewException("zstd.ZstdError", NULL, NULL);
33 PyModule_AddObject(mod, "ZstdError", ZstdError);
31 PyModule_AddIntConstant(mod, "FLUSH_BLOCK", 0);
32 PyModule_AddIntConstant(mod, "FLUSH_FRAME", 1);
34 33
35 PyModule_AddIntConstant(mod, "FLUSH_BLOCK", 0);
36 PyModule_AddIntConstant(mod, "FLUSH_FRAME", 1);
34 PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_FINISH",
35 compressorobj_flush_finish);
36 PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_BLOCK",
37 compressorobj_flush_block);
37 38
38 PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_FINISH", compressorobj_flush_finish);
39 PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_BLOCK", compressorobj_flush_block);
40
41 /* For now, the version is a simple tuple instead of a dedicated type. */
42 zstdVersion = PyTuple_New(3);
43 PyTuple_SetItem(zstdVersion, 0, PyLong_FromLong(ZSTD_VERSION_MAJOR));
44 PyTuple_SetItem(zstdVersion, 1, PyLong_FromLong(ZSTD_VERSION_MINOR));
45 PyTuple_SetItem(zstdVersion, 2, PyLong_FromLong(ZSTD_VERSION_RELEASE));
46 PyModule_AddObject(mod, "ZSTD_VERSION", zstdVersion);
39 /* For now, the version is a simple tuple instead of a dedicated type. */
40 zstdVersion = PyTuple_New(3);
41 PyTuple_SetItem(zstdVersion, 0, PyLong_FromLong(ZSTD_VERSION_MAJOR));
42 PyTuple_SetItem(zstdVersion, 1, PyLong_FromLong(ZSTD_VERSION_MINOR));
43 PyTuple_SetItem(zstdVersion, 2, PyLong_FromLong(ZSTD_VERSION_RELEASE));
44 PyModule_AddObject(mod, "ZSTD_VERSION", zstdVersion);
47 45
48 frameHeader = PyBytes_FromStringAndSize(frame_header, sizeof(frame_header));
49 if (frameHeader) {
50 PyModule_AddObject(mod, "FRAME_HEADER", frameHeader);
51 }
52 else {
53 PyErr_Format(PyExc_ValueError, "could not create frame header object");
54 }
46 frameHeader = PyBytes_FromStringAndSize(frame_header, sizeof(frame_header));
47 if (frameHeader) {
48 PyModule_AddObject(mod, "FRAME_HEADER", frameHeader);
49 }
50 else {
51 PyErr_Format(PyExc_ValueError, "could not create frame header object");
52 }
55 53
56 PyModule_AddObject(mod, "CONTENTSIZE_UNKNOWN",
57 PyLong_FromUnsignedLongLong(ZSTD_CONTENTSIZE_UNKNOWN));
58 PyModule_AddObject(mod, "CONTENTSIZE_ERROR",
59 PyLong_FromUnsignedLongLong(ZSTD_CONTENTSIZE_ERROR));
54 PyModule_AddObject(mod, "CONTENTSIZE_UNKNOWN",
55 PyLong_FromUnsignedLongLong(ZSTD_CONTENTSIZE_UNKNOWN));
56 PyModule_AddObject(mod, "CONTENTSIZE_ERROR",
57 PyLong_FromUnsignedLongLong(ZSTD_CONTENTSIZE_ERROR));
60 58
61 PyModule_AddIntConstant(mod, "MAX_COMPRESSION_LEVEL", ZSTD_maxCLevel());
62 PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_INPUT_SIZE",
63 (long)ZSTD_CStreamInSize());
64 PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_OUTPUT_SIZE",
65 (long)ZSTD_CStreamOutSize());
66 PyModule_AddIntConstant(mod, "DECOMPRESSION_RECOMMENDED_INPUT_SIZE",
67 (long)ZSTD_DStreamInSize());
68 PyModule_AddIntConstant(mod, "DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE",
69 (long)ZSTD_DStreamOutSize());
59 PyModule_AddIntConstant(mod, "MAX_COMPRESSION_LEVEL", ZSTD_maxCLevel());
60 PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_INPUT_SIZE",
61 (long)ZSTD_CStreamInSize());
62 PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_OUTPUT_SIZE",
63 (long)ZSTD_CStreamOutSize());
64 PyModule_AddIntConstant(mod, "DECOMPRESSION_RECOMMENDED_INPUT_SIZE",
65 (long)ZSTD_DStreamInSize());
66 PyModule_AddIntConstant(mod, "DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE",
67 (long)ZSTD_DStreamOutSize());
70 68
71 PyModule_AddIntConstant(mod, "MAGIC_NUMBER", ZSTD_MAGICNUMBER);
72 PyModule_AddIntConstant(mod, "BLOCKSIZELOG_MAX", ZSTD_BLOCKSIZELOG_MAX);
73 PyModule_AddIntConstant(mod, "BLOCKSIZE_MAX", ZSTD_BLOCKSIZE_MAX);
74 PyModule_AddIntConstant(mod, "WINDOWLOG_MIN", ZSTD_WINDOWLOG_MIN);
75 PyModule_AddIntConstant(mod, "WINDOWLOG_MAX", ZSTD_WINDOWLOG_MAX);
76 PyModule_AddIntConstant(mod, "CHAINLOG_MIN", ZSTD_CHAINLOG_MIN);
77 PyModule_AddIntConstant(mod, "CHAINLOG_MAX", ZSTD_CHAINLOG_MAX);
78 PyModule_AddIntConstant(mod, "HASHLOG_MIN", ZSTD_HASHLOG_MIN);
79 PyModule_AddIntConstant(mod, "HASHLOG_MAX", ZSTD_HASHLOG_MAX);
80 PyModule_AddIntConstant(mod, "HASHLOG3_MAX", ZSTD_HASHLOG3_MAX);
81 PyModule_AddIntConstant(mod, "SEARCHLOG_MIN", ZSTD_SEARCHLOG_MIN);
82 PyModule_AddIntConstant(mod, "SEARCHLOG_MAX", ZSTD_SEARCHLOG_MAX);
83 PyModule_AddIntConstant(mod, "MINMATCH_MIN", ZSTD_MINMATCH_MIN);
84 PyModule_AddIntConstant(mod, "MINMATCH_MAX", ZSTD_MINMATCH_MAX);
85 /* TODO SEARCHLENGTH_* is deprecated. */
86 PyModule_AddIntConstant(mod, "SEARCHLENGTH_MIN", ZSTD_MINMATCH_MIN);
87 PyModule_AddIntConstant(mod, "SEARCHLENGTH_MAX", ZSTD_MINMATCH_MAX);
88 PyModule_AddIntConstant(mod, "TARGETLENGTH_MIN", ZSTD_TARGETLENGTH_MIN);
89 PyModule_AddIntConstant(mod, "TARGETLENGTH_MAX", ZSTD_TARGETLENGTH_MAX);
90 PyModule_AddIntConstant(mod, "LDM_MINMATCH_MIN", ZSTD_LDM_MINMATCH_MIN);
91 PyModule_AddIntConstant(mod, "LDM_MINMATCH_MAX", ZSTD_LDM_MINMATCH_MAX);
92 PyModule_AddIntConstant(mod, "LDM_BUCKETSIZELOG_MAX", ZSTD_LDM_BUCKETSIZELOG_MAX);
69 PyModule_AddIntConstant(mod, "MAGIC_NUMBER", ZSTD_MAGICNUMBER);
70 PyModule_AddIntConstant(mod, "BLOCKSIZELOG_MAX", ZSTD_BLOCKSIZELOG_MAX);
71 PyModule_AddIntConstant(mod, "BLOCKSIZE_MAX", ZSTD_BLOCKSIZE_MAX);
72 PyModule_AddIntConstant(mod, "WINDOWLOG_MIN", ZSTD_WINDOWLOG_MIN);
73 PyModule_AddIntConstant(mod, "WINDOWLOG_MAX", ZSTD_WINDOWLOG_MAX);
74 PyModule_AddIntConstant(mod, "CHAINLOG_MIN", ZSTD_CHAINLOG_MIN);
75 PyModule_AddIntConstant(mod, "CHAINLOG_MAX", ZSTD_CHAINLOG_MAX);
76 PyModule_AddIntConstant(mod, "HASHLOG_MIN", ZSTD_HASHLOG_MIN);
77 PyModule_AddIntConstant(mod, "HASHLOG_MAX", ZSTD_HASHLOG_MAX);
78 PyModule_AddIntConstant(mod, "SEARCHLOG_MIN", ZSTD_SEARCHLOG_MIN);
79 PyModule_AddIntConstant(mod, "SEARCHLOG_MAX", ZSTD_SEARCHLOG_MAX);
80 PyModule_AddIntConstant(mod, "MINMATCH_MIN", ZSTD_MINMATCH_MIN);
81 PyModule_AddIntConstant(mod, "MINMATCH_MAX", ZSTD_MINMATCH_MAX);
82 /* TODO SEARCHLENGTH_* is deprecated. */
83 PyModule_AddIntConstant(mod, "SEARCHLENGTH_MIN", ZSTD_MINMATCH_MIN);
84 PyModule_AddIntConstant(mod, "SEARCHLENGTH_MAX", ZSTD_MINMATCH_MAX);
85 PyModule_AddIntConstant(mod, "TARGETLENGTH_MIN", ZSTD_TARGETLENGTH_MIN);
86 PyModule_AddIntConstant(mod, "TARGETLENGTH_MAX", ZSTD_TARGETLENGTH_MAX);
87 PyModule_AddIntConstant(mod, "LDM_MINMATCH_MIN", ZSTD_LDM_MINMATCH_MIN);
88 PyModule_AddIntConstant(mod, "LDM_MINMATCH_MAX", ZSTD_LDM_MINMATCH_MAX);
89 PyModule_AddIntConstant(mod, "LDM_BUCKETSIZELOG_MAX",
90 ZSTD_LDM_BUCKETSIZELOG_MAX);
93 91
94 PyModule_AddIntConstant(mod, "STRATEGY_FAST", ZSTD_fast);
95 PyModule_AddIntConstant(mod, "STRATEGY_DFAST", ZSTD_dfast);
96 PyModule_AddIntConstant(mod, "STRATEGY_GREEDY", ZSTD_greedy);
97 PyModule_AddIntConstant(mod, "STRATEGY_LAZY", ZSTD_lazy);
98 PyModule_AddIntConstant(mod, "STRATEGY_LAZY2", ZSTD_lazy2);
99 PyModule_AddIntConstant(mod, "STRATEGY_BTLAZY2", ZSTD_btlazy2);
100 PyModule_AddIntConstant(mod, "STRATEGY_BTOPT", ZSTD_btopt);
101 PyModule_AddIntConstant(mod, "STRATEGY_BTULTRA", ZSTD_btultra);
102 PyModule_AddIntConstant(mod, "STRATEGY_BTULTRA2", ZSTD_btultra2);
92 PyModule_AddIntConstant(mod, "STRATEGY_FAST", ZSTD_fast);
93 PyModule_AddIntConstant(mod, "STRATEGY_DFAST", ZSTD_dfast);
94 PyModule_AddIntConstant(mod, "STRATEGY_GREEDY", ZSTD_greedy);
95 PyModule_AddIntConstant(mod, "STRATEGY_LAZY", ZSTD_lazy);
96 PyModule_AddIntConstant(mod, "STRATEGY_LAZY2", ZSTD_lazy2);
97 PyModule_AddIntConstant(mod, "STRATEGY_BTLAZY2", ZSTD_btlazy2);
98 PyModule_AddIntConstant(mod, "STRATEGY_BTOPT", ZSTD_btopt);
99 PyModule_AddIntConstant(mod, "STRATEGY_BTULTRA", ZSTD_btultra);
100 PyModule_AddIntConstant(mod, "STRATEGY_BTULTRA2", ZSTD_btultra2);
103 101
104 PyModule_AddIntConstant(mod, "DICT_TYPE_AUTO", ZSTD_dct_auto);
105 PyModule_AddIntConstant(mod, "DICT_TYPE_RAWCONTENT", ZSTD_dct_rawContent);
106 PyModule_AddIntConstant(mod, "DICT_TYPE_FULLDICT", ZSTD_dct_fullDict);
102 PyModule_AddIntConstant(mod, "DICT_TYPE_AUTO", ZSTD_dct_auto);
103 PyModule_AddIntConstant(mod, "DICT_TYPE_RAWCONTENT", ZSTD_dct_rawContent);
104 PyModule_AddIntConstant(mod, "DICT_TYPE_FULLDICT", ZSTD_dct_fullDict);
107 105
108 PyModule_AddIntConstant(mod, "FORMAT_ZSTD1", ZSTD_f_zstd1);
109 PyModule_AddIntConstant(mod, "FORMAT_ZSTD1_MAGICLESS", ZSTD_f_zstd1_magicless);
106 PyModule_AddIntConstant(mod, "FORMAT_ZSTD1", ZSTD_f_zstd1);
107 PyModule_AddIntConstant(mod, "FORMAT_ZSTD1_MAGICLESS",
108 ZSTD_f_zstd1_magicless);
110 109 }
This diff has been collapsed as it changes many lines, (1147 lines changed) Show them Hide them
@@ -1,105 +1,108
1 1 /**
2 * Copyright (c) 2017-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
2 * Copyright (c) 2017-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 extern PyObject* ZstdError;
11 extern PyObject *ZstdError;
12 12
13 static void set_unsupported_operation(void) {
14 PyObject* iomod;
15 PyObject* exc;
13 static void decompressionreader_dealloc(ZstdDecompressionReader *self) {
14 Py_XDECREF(self->decompressor);
15 Py_XDECREF(self->reader);
16 16
17 iomod = PyImport_ImportModule("io");
18 if (NULL == iomod) {
19 return;
20 }
17 if (self->buffer.buf) {
18 PyBuffer_Release(&self->buffer);
19 }
21 20
22 exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
23 if (NULL == exc) {
24 Py_DECREF(iomod);
25 return;
26 }
21 Py_CLEAR(self->readResult);
27 22
28 PyErr_SetNone(exc);
29 Py_DECREF(exc);
30 Py_DECREF(iomod);
23 PyObject_Del(self);
31 24 }
32 25
33 static void reader_dealloc(ZstdDecompressionReader* self) {
34 Py_XDECREF(self->decompressor);
35 Py_XDECREF(self->reader);
36
37 if (self->buffer.buf) {
38 PyBuffer_Release(&self->buffer);
39 }
40
41 PyObject_Del(self);
42 }
26 static ZstdDecompressionReader *
27 decompressionreader_enter(ZstdDecompressionReader *self) {
28 if (self->entered) {
29 PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
30 return NULL;
31 }
43 32
44 static ZstdDecompressionReader* reader_enter(ZstdDecompressionReader* self) {
45 if (self->entered) {
46 PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
47 return NULL;
48 }
33 if (self->closed) {
34 PyErr_SetString(PyExc_ValueError, "stream is closed");
35 return NULL;
36 }
49 37
50 self->entered = 1;
38 self->entered = 1;
51 39
52 Py_INCREF(self);
53 return self;
40 Py_INCREF(self);
41 return self;
54 42 }
55 43
56 static PyObject* reader_exit(ZstdDecompressionReader* self, PyObject* args) {
57 PyObject* exc_type;
58 PyObject* exc_value;
59 PyObject* exc_tb;
44 static PyObject *decompressionreader_exit(ZstdDecompressionReader *self,
45 PyObject *args) {
46 PyObject *exc_type;
47 PyObject *exc_value;
48 PyObject *exc_tb;
60 49
61 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
62 return NULL;
63 }
50 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value,
51 &exc_tb)) {
52 return NULL;
53 }
54
55 self->entered = 0;
64 56
65 self->entered = 0;
66 self->closed = 1;
57 if (NULL == PyObject_CallMethod((PyObject *)self, "close", NULL)) {
58 return NULL;
59 }
67 60
68 /* Release resources. */
69 Py_CLEAR(self->reader);
70 if (self->buffer.buf) {
71 PyBuffer_Release(&self->buffer);
72 memset(&self->buffer, 0, sizeof(self->buffer));
73 }
61 /* Release resources. */
62 Py_CLEAR(self->reader);
63 if (self->buffer.buf) {
64 PyBuffer_Release(&self->buffer);
65 memset(&self->buffer, 0, sizeof(self->buffer));
66 }
74 67
75 Py_CLEAR(self->decompressor);
68 Py_CLEAR(self->decompressor);
76 69
77 Py_RETURN_FALSE;
70 Py_RETURN_FALSE;
78 71 }
79 72
80 static PyObject* reader_readable(PyObject* self) {
81 Py_RETURN_TRUE;
73 static PyObject *decompressionreader_readable(PyObject *self) {
74 Py_RETURN_TRUE;
82 75 }
83 76
84 static PyObject* reader_writable(PyObject* self) {
85 Py_RETURN_FALSE;
77 static PyObject *decompressionreader_writable(PyObject *self) {
78 Py_RETURN_FALSE;
86 79 }
87 80
88 static PyObject* reader_seekable(PyObject* self) {
89 Py_RETURN_TRUE;
81 static PyObject *decompressionreader_seekable(PyObject *self) {
82 Py_RETURN_FALSE;
90 83 }
91 84
92 static PyObject* reader_close(ZstdDecompressionReader* self) {
93 self->closed = 1;
94 Py_RETURN_NONE;
85 static PyObject *decompressionreader_close(ZstdDecompressionReader *self) {
86 if (self->closed) {
87 Py_RETURN_NONE;
88 }
89
90 self->closed = 1;
91
92 if (self->closefd && self->reader != NULL &&
93 PyObject_HasAttrString(self->reader, "close")) {
94 return PyObject_CallMethod(self->reader, "close", NULL);
95 }
96
97 Py_RETURN_NONE;
95 98 }
96 99
97 static PyObject* reader_flush(PyObject* self) {
98 Py_RETURN_NONE;
100 static PyObject *decompressionreader_flush(PyObject *self) {
101 Py_RETURN_NONE;
99 102 }
100 103
101 static PyObject* reader_isatty(PyObject* self) {
102 Py_RETURN_FALSE;
104 static PyObject *decompressionreader_isatty(PyObject *self) {
105 Py_RETURN_FALSE;
103 106 }
104 107
105 108 /**
@@ -109,28 +112,29 static PyObject* reader_isatty(PyObject*
109 112 * Returns 1 if new input data is available.
110 113 * Returns -1 on error and sets a Python exception as a side-effect.
111 114 */
112 int read_decompressor_input(ZstdDecompressionReader* self) {
113 if (self->finishedInput) {
114 return 0;
115 }
115 int read_decompressor_input(ZstdDecompressionReader *self) {
116 if (self->finishedInput) {
117 return 0;
118 }
116 119
117 if (self->input.pos != self->input.size) {
118 return 0;
119 }
120 if (self->input.pos != self->input.size) {
121 return 0;
122 }
120 123
121 if (self->reader) {
124 if (self->reader) {
122 125 Py_buffer buffer;
123 126
124 127 assert(self->readResult == NULL);
125 self->readResult = PyObject_CallMethod(self->reader, "read",
126 "k", self->readSize);
128 self->readResult =
129 PyObject_CallMethod(self->reader, "read", "k", self->readSize);
127 130 if (NULL == self->readResult) {
128 131 return -1;
129 132 }
130 133
131 134 memset(&buffer, 0, sizeof(buffer));
132 135
133 if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
136 if (0 !=
137 PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
134 138 return -1;
135 139 }
136 140
@@ -146,21 +150,21 int read_decompressor_input(ZstdDecompre
146 150 }
147 151
148 152 PyBuffer_Release(&buffer);
149 }
150 else {
151 assert(self->buffer.buf);
153 }
154 else {
155 assert(self->buffer.buf);
152 156 /*
153 157 * We should only get here once since expectation is we always
154 158 * exhaust input buffer before reading again.
155 159 */
156 160 assert(self->input.src == NULL);
157 161
158 self->input.src = self->buffer.buf;
162 self->input.src = self->buffer.buf;
159 163 self->input.size = self->buffer.len;
160 164 self->input.pos = 0;
161 }
165 }
162 166
163 return 1;
167 return 1;
164 168 }
165 169
166 170 /**
@@ -170,612 +174,607 int read_decompressor_input(ZstdDecompre
170 174 * Returns 1 if output buffer should be emitted.
171 175 * Returns -1 on error and sets a Python exception.
172 176 */
173 int decompress_input(ZstdDecompressionReader* self, ZSTD_outBuffer* output) {
174 size_t zresult;
177 int decompress_input(ZstdDecompressionReader *self, ZSTD_outBuffer *output) {
178 size_t zresult;
175 179
176 if (self->input.pos >= self->input.size) {
177 return 0;
178 }
180 if (self->input.pos >= self->input.size) {
181 return 0;
182 }
179 183
180 Py_BEGIN_ALLOW_THREADS
181 zresult = ZSTD_decompressStream(self->decompressor->dctx, output, &self->input);
182 Py_END_ALLOW_THREADS
184 Py_BEGIN_ALLOW_THREADS zresult =
185 ZSTD_decompressStream(self->decompressor->dctx, output, &self->input);
186 Py_END_ALLOW_THREADS
183 187
184 /* Input exhausted. Clear our state tracking. */
185 if (self->input.pos == self->input.size) {
186 memset(&self->input, 0, sizeof(self->input));
187 Py_CLEAR(self->readResult);
188 /* Input exhausted. Clear our state tracking. */
189 if (self->input.pos == self->input.size) {
190 memset(&self->input, 0, sizeof(self->input));
191 Py_CLEAR(self->readResult);
188 192
189 if (self->buffer.buf) {
190 self->finishedInput = 1;
191 }
192 }
193 if (self->buffer.buf) {
194 self->finishedInput = 1;
195 }
196 }
193 197
194 if (ZSTD_isError(zresult)) {
195 PyErr_Format(ZstdError, "zstd decompress error: %s", ZSTD_getErrorName(zresult));
196 return -1;
197 }
198 if (ZSTD_isError(zresult)) {
199 PyErr_Format(ZstdError, "zstd decompress error: %s",
200 ZSTD_getErrorName(zresult));
201 return -1;
202 }
198 203
199 /* We fulfilled the full read request. Signal to emit. */
200 if (output->pos && output->pos == output->size) {
201 return 1;
202 }
203 /* We're at the end of a frame and we aren't allowed to return data
204 spanning frames. */
205 else if (output->pos && zresult == 0 && !self->readAcrossFrames) {
206 return 1;
207 }
204 /* We fulfilled the full read request. Signal to emit. */
205 if (output->pos && output->pos == output->size) {
206 return 1;
207 }
208 /* We're at the end of a frame and we aren't allowed to return data
209 spanning frames. */
210 else if (output->pos && zresult == 0 && !self->readAcrossFrames) {
211 return 1;
212 }
208 213
209 /* There is more room in the output. Signal to collect more data. */
210 return 0;
214 /* There is more room in the output. Signal to collect more data. */
215 return 0;
211 216 }
212 217
213 static PyObject* reader_read(ZstdDecompressionReader* self, PyObject* args, PyObject* kwargs) {
214 static char* kwlist[] = {
215 "size",
216 NULL
217 };
218 static PyObject *decompressionreader_read(ZstdDecompressionReader *self,
219 PyObject *args, PyObject *kwargs) {
220 static char *kwlist[] = {"size", NULL};
218 221
219 Py_ssize_t size = -1;
220 PyObject* result = NULL;
221 char* resultBuffer;
222 Py_ssize_t resultSize;
223 ZSTD_outBuffer output;
224 int decompressResult, readResult;
222 Py_ssize_t size = -1;
223 PyObject *result = NULL;
224 char *resultBuffer;
225 Py_ssize_t resultSize;
226 ZSTD_outBuffer output;
227 int decompressResult, readResult;
225 228
226 if (self->closed) {
227 PyErr_SetString(PyExc_ValueError, "stream is closed");
228 return NULL;
229 }
229 if (self->closed) {
230 PyErr_SetString(PyExc_ValueError, "stream is closed");
231 return NULL;
232 }
230 233
231 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
232 return NULL;
233 }
234 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
235 return NULL;
236 }
234 237
235 if (size < -1) {
236 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
237 return NULL;
238 }
238 if (size < -1) {
239 PyErr_SetString(PyExc_ValueError,
240 "cannot read negative amounts less than -1");
241 return NULL;
242 }
239 243
240 if (size == -1) {
241 return PyObject_CallMethod((PyObject*)self, "readall", NULL);
242 }
244 if (size == -1) {
245 return PyObject_CallMethod((PyObject *)self, "readall", NULL);
246 }
243 247
244 if (self->finishedOutput || size == 0) {
245 return PyBytes_FromStringAndSize("", 0);
246 }
248 if (self->finishedOutput || size == 0) {
249 return PyBytes_FromStringAndSize("", 0);
250 }
247 251
248 result = PyBytes_FromStringAndSize(NULL, size);
249 if (NULL == result) {
250 return NULL;
251 }
252 result = PyBytes_FromStringAndSize(NULL, size);
253 if (NULL == result) {
254 return NULL;
255 }
252 256
253 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
257 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
254 258
255 output.dst = resultBuffer;
256 output.size = resultSize;
257 output.pos = 0;
259 output.dst = resultBuffer;
260 output.size = resultSize;
261 output.pos = 0;
258 262
259 263 readinput:
260 264
261 decompressResult = decompress_input(self, &output);
265 decompressResult = decompress_input(self, &output);
262 266
263 if (-1 == decompressResult) {
264 Py_XDECREF(result);
265 return NULL;
266 }
267 else if (0 == decompressResult) { }
268 else if (1 == decompressResult) {
269 self->bytesDecompressed += output.pos;
267 if (-1 == decompressResult) {
268 Py_XDECREF(result);
269 return NULL;
270 }
271 else if (0 == decompressResult) {
272 }
273 else if (1 == decompressResult) {
274 self->bytesDecompressed += output.pos;
270 275
271 if (output.pos != output.size) {
272 if (safe_pybytes_resize(&result, output.pos)) {
273 Py_XDECREF(result);
274 return NULL;
275 }
276 }
277 return result;
278 }
279 else {
280 assert(0);
281 }
276 if (output.pos != output.size) {
277 if (safe_pybytes_resize(&result, output.pos)) {
278 Py_XDECREF(result);
279 return NULL;
280 }
281 }
282 return result;
283 }
284 else {
285 assert(0);
286 }
282 287
283 readResult = read_decompressor_input(self);
288 readResult = read_decompressor_input(self);
284 289
285 if (-1 == readResult) {
286 Py_XDECREF(result);
287 return NULL;
288 }
289 else if (0 == readResult) {}
290 else if (1 == readResult) {}
291 else {
292 assert(0);
293 }
290 if (-1 == readResult) {
291 Py_XDECREF(result);
292 return NULL;
293 }
294 else if (0 == readResult) {
295 }
296 else if (1 == readResult) {
297 }
298 else {
299 assert(0);
300 }
294 301
295 if (self->input.size) {
296 goto readinput;
297 }
302 if (self->input.size) {
303 goto readinput;
304 }
298 305
299 /* EOF */
300 self->bytesDecompressed += output.pos;
306 /* EOF */
307 self->bytesDecompressed += output.pos;
301 308
302 if (safe_pybytes_resize(&result, output.pos)) {
303 Py_XDECREF(result);
304 return NULL;
305 }
309 if (safe_pybytes_resize(&result, output.pos)) {
310 Py_XDECREF(result);
311 return NULL;
312 }
306 313
307 return result;
314 return result;
308 315 }
309 316
310 static PyObject* reader_read1(ZstdDecompressionReader* self, PyObject* args, PyObject* kwargs) {
311 static char* kwlist[] = {
312 "size",
313 NULL
314 };
317 static PyObject *decompressionreader_read1(ZstdDecompressionReader *self,
318 PyObject *args, PyObject *kwargs) {
319 static char *kwlist[] = {"size", NULL};
315 320
316 Py_ssize_t size = -1;
317 PyObject* result = NULL;
318 char* resultBuffer;
319 Py_ssize_t resultSize;
320 ZSTD_outBuffer output;
321 Py_ssize_t size = -1;
322 PyObject *result = NULL;
323 char *resultBuffer;
324 Py_ssize_t resultSize;
325 ZSTD_outBuffer output;
321 326
322 if (self->closed) {
323 PyErr_SetString(PyExc_ValueError, "stream is closed");
324 return NULL;
325 }
327 if (self->closed) {
328 PyErr_SetString(PyExc_ValueError, "stream is closed");
329 return NULL;
330 }
331
332 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
333 return NULL;
334 }
326 335
327 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
328 return NULL;
329 }
336 if (size < -1) {
337 PyErr_SetString(PyExc_ValueError,
338 "cannot read negative amounts less than -1");
339 return NULL;
340 }
330 341
331 if (size < -1) {
332 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
333 return NULL;
334 }
342 if (self->finishedOutput || size == 0) {
343 return PyBytes_FromStringAndSize("", 0);
344 }
335 345
336 if (self->finishedOutput || size == 0) {
337 return PyBytes_FromStringAndSize("", 0);
338 }
346 if (size == -1) {
347 size = ZSTD_DStreamOutSize();
348 }
339 349
340 if (size == -1) {
341 size = ZSTD_DStreamOutSize();
342 }
350 result = PyBytes_FromStringAndSize(NULL, size);
351 if (NULL == result) {
352 return NULL;
353 }
343 354
344 result = PyBytes_FromStringAndSize(NULL, size);
345 if (NULL == result) {
346 return NULL;
347 }
355 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
348 356
349 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
357 output.dst = resultBuffer;
358 output.size = resultSize;
359 output.pos = 0;
350 360
351 output.dst = resultBuffer;
352 output.size = resultSize;
353 output.pos = 0;
361 /* read1() is supposed to use at most 1 read() from the underlying stream.
362 * However, we can't satisfy this requirement with decompression due to the
363 * nature of how decompression works. Our strategy is to read + decompress
364 * until we get any output, at which point we return. This satisfies the
365 * intent of the read1() API to limit read operations.
366 */
367 while (!self->finishedInput) {
368 int readResult, decompressResult;
354 369
355 /* read1() is supposed to use at most 1 read() from the underlying stream.
356 * However, we can't satisfy this requirement with decompression due to the
357 * nature of how decompression works. Our strategy is to read + decompress
358 * until we get any output, at which point we return. This satisfies the
359 * intent of the read1() API to limit read operations.
360 */
361 while (!self->finishedInput) {
362 int readResult, decompressResult;
370 readResult = read_decompressor_input(self);
371 if (-1 == readResult) {
372 Py_XDECREF(result);
373 return NULL;
374 }
375 else if (0 == readResult || 1 == readResult) {
376 }
377 else {
378 assert(0);
379 }
363 380
364 readResult = read_decompressor_input(self);
365 if (-1 == readResult) {
366 Py_XDECREF(result);
367 return NULL;
368 }
369 else if (0 == readResult || 1 == readResult) { }
370 else {
371 assert(0);
372 }
381 decompressResult = decompress_input(self, &output);
373 382
374 decompressResult = decompress_input(self, &output);
375
376 if (-1 == decompressResult) {
377 Py_XDECREF(result);
378 return NULL;
379 }
380 else if (0 == decompressResult || 1 == decompressResult) { }
381 else {
382 assert(0);
383 }
383 if (-1 == decompressResult) {
384 Py_XDECREF(result);
385 return NULL;
386 }
387 else if (0 == decompressResult || 1 == decompressResult) {
388 }
389 else {
390 assert(0);
391 }
384 392
385 if (output.pos) {
386 break;
387 }
388 }
393 if (output.pos) {
394 break;
395 }
396 }
389 397
390 self->bytesDecompressed += output.pos;
391 if (safe_pybytes_resize(&result, output.pos)) {
392 Py_XDECREF(result);
393 return NULL;
394 }
398 self->bytesDecompressed += output.pos;
399 if (safe_pybytes_resize(&result, output.pos)) {
400 Py_XDECREF(result);
401 return NULL;
402 }
395 403
396 return result;
404 return result;
397 405 }
398 406
399 static PyObject* reader_readinto(ZstdDecompressionReader* self, PyObject* args) {
400 Py_buffer dest;
401 ZSTD_outBuffer output;
402 int decompressResult, readResult;
403 PyObject* result = NULL;
407 static PyObject *decompressionreader_readinto(ZstdDecompressionReader *self,
408 PyObject *args) {
409 Py_buffer dest;
410 ZSTD_outBuffer output;
411 int decompressResult, readResult;
412 PyObject *result = NULL;
404 413
405 if (self->closed) {
406 PyErr_SetString(PyExc_ValueError, "stream is closed");
407 return NULL;
408 }
414 if (self->closed) {
415 PyErr_SetString(PyExc_ValueError, "stream is closed");
416 return NULL;
417 }
409 418
410 if (self->finishedOutput) {
411 return PyLong_FromLong(0);
412 }
413
414 if (!PyArg_ParseTuple(args, "w*:readinto", &dest)) {
415 return NULL;
416 }
419 if (self->finishedOutput) {
420 return PyLong_FromLong(0);
421 }
417 422
418 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
419 PyErr_SetString(PyExc_ValueError,
420 "destination buffer should be contiguous and have at most one dimension");
421 goto finally;
422 }
423 if (!PyArg_ParseTuple(args, "w*:readinto", &dest)) {
424 return NULL;
425 }
423 426
424 output.dst = dest.buf;
425 output.size = dest.len;
426 output.pos = 0;
427 output.dst = dest.buf;
428 output.size = dest.len;
429 output.pos = 0;
427 430
428 431 readinput:
429 432
430 decompressResult = decompress_input(self, &output);
433 decompressResult = decompress_input(self, &output);
431 434
432 if (-1 == decompressResult) {
433 goto finally;
434 }
435 else if (0 == decompressResult) { }
436 else if (1 == decompressResult) {
437 self->bytesDecompressed += output.pos;
438 result = PyLong_FromSize_t(output.pos);
439 goto finally;
440 }
441 else {
442 assert(0);
443 }
435 if (-1 == decompressResult) {
436 goto finally;
437 }
438 else if (0 == decompressResult) {
439 }
440 else if (1 == decompressResult) {
441 self->bytesDecompressed += output.pos;
442 result = PyLong_FromSize_t(output.pos);
443 goto finally;
444 }
445 else {
446 assert(0);
447 }
444 448
445 readResult = read_decompressor_input(self);
449 readResult = read_decompressor_input(self);
446 450
447 if (-1 == readResult) {
448 goto finally;
449 }
450 else if (0 == readResult) {}
451 else if (1 == readResult) {}
452 else {
453 assert(0);
454 }
451 if (-1 == readResult) {
452 goto finally;
453 }
454 else if (0 == readResult) {
455 }
456 else if (1 == readResult) {
457 }
458 else {
459 assert(0);
460 }
455 461
456 if (self->input.size) {
457 goto readinput;
458 }
462 if (self->input.size) {
463 goto readinput;
464 }
459 465
460 /* EOF */
461 self->bytesDecompressed += output.pos;
462 result = PyLong_FromSize_t(output.pos);
466 /* EOF */
467 self->bytesDecompressed += output.pos;
468 result = PyLong_FromSize_t(output.pos);
463 469
464 470 finally:
465 PyBuffer_Release(&dest);
471 PyBuffer_Release(&dest);
466 472
467 return result;
473 return result;
468 474 }
469 475
470 static PyObject* reader_readinto1(ZstdDecompressionReader* self, PyObject* args) {
471 Py_buffer dest;
472 ZSTD_outBuffer output;
473 PyObject* result = NULL;
476 static PyObject *decompressionreader_readinto1(ZstdDecompressionReader *self,
477 PyObject *args) {
478 Py_buffer dest;
479 ZSTD_outBuffer output;
480 PyObject *result = NULL;
474 481
475 if (self->closed) {
476 PyErr_SetString(PyExc_ValueError, "stream is closed");
477 return NULL;
478 }
479
480 if (self->finishedOutput) {
481 return PyLong_FromLong(0);
482 }
482 if (self->closed) {
483 PyErr_SetString(PyExc_ValueError, "stream is closed");
484 return NULL;
485 }
483 486
484 if (!PyArg_ParseTuple(args, "w*:readinto1", &dest)) {
485 return NULL;
486 }
487 if (self->finishedOutput) {
488 return PyLong_FromLong(0);
489 }
490
491 if (!PyArg_ParseTuple(args, "w*:readinto1", &dest)) {
492 return NULL;
493 }
487 494
488 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
489 PyErr_SetString(PyExc_ValueError,
490 "destination buffer should be contiguous and have at most one dimension");
491 goto finally;
492 }
495 output.dst = dest.buf;
496 output.size = dest.len;
497 output.pos = 0;
493 498
494 output.dst = dest.buf;
495 output.size = dest.len;
496 output.pos = 0;
499 while (!self->finishedInput && !self->finishedOutput) {
500 int decompressResult, readResult;
501
502 readResult = read_decompressor_input(self);
497 503
498 while (!self->finishedInput && !self->finishedOutput) {
499 int decompressResult, readResult;
500
501 readResult = read_decompressor_input(self);
504 if (-1 == readResult) {
505 goto finally;
506 }
507 else if (0 == readResult || 1 == readResult) {
508 }
509 else {
510 assert(0);
511 }
502 512
503 if (-1 == readResult) {
504 goto finally;
505 }
506 else if (0 == readResult || 1 == readResult) {}
507 else {
508 assert(0);
509 }
510
511 decompressResult = decompress_input(self, &output);
513 decompressResult = decompress_input(self, &output);
512 514
513 if (-1 == decompressResult) {
514 goto finally;
515 }
516 else if (0 == decompressResult || 1 == decompressResult) {}
517 else {
518 assert(0);
519 }
515 if (-1 == decompressResult) {
516 goto finally;
517 }
518 else if (0 == decompressResult || 1 == decompressResult) {
519 }
520 else {
521 assert(0);
522 }
520 523
521 if (output.pos) {
522 break;
523 }
524 }
524 if (output.pos) {
525 break;
526 }
527 }
525 528
526 self->bytesDecompressed += output.pos;
527 result = PyLong_FromSize_t(output.pos);
529 self->bytesDecompressed += output.pos;
530 result = PyLong_FromSize_t(output.pos);
528 531
529 532 finally:
530 PyBuffer_Release(&dest);
533 PyBuffer_Release(&dest);
531 534
532 return result;
535 return result;
533 536 }
534 537
535 static PyObject* reader_readall(PyObject* self) {
536 PyObject* chunks = NULL;
537 PyObject* empty = NULL;
538 PyObject* result = NULL;
538 static PyObject *decompressionreader_readall(PyObject *self) {
539 PyObject *chunks = NULL;
540 PyObject *empty = NULL;
541 PyObject *result = NULL;
539 542
540 /* Our strategy is to collect chunks into a list then join all the
541 * chunks at the end. We could potentially use e.g. an io.BytesIO. But
542 * this feels simple enough to implement and avoids potentially expensive
543 * reallocations of large buffers.
544 */
545 chunks = PyList_New(0);
546 if (NULL == chunks) {
547 return NULL;
548 }
543 /* Our strategy is to collect chunks into a list then join all the
544 * chunks at the end. We could potentially use e.g. an io.BytesIO. But
545 * this feels simple enough to implement and avoids potentially expensive
546 * reallocations of large buffers.
547 */
548 chunks = PyList_New(0);
549 if (NULL == chunks) {
550 return NULL;
551 }
549 552
550 while (1) {
551 PyObject* chunk = PyObject_CallMethod(self, "read", "i", 1048576);
552 if (NULL == chunk) {
553 Py_DECREF(chunks);
554 return NULL;
555 }
553 while (1) {
554 PyObject *chunk = PyObject_CallMethod(self, "read", "i", 1048576);
555 if (NULL == chunk) {
556 Py_DECREF(chunks);
557 return NULL;
558 }
556 559
557 if (!PyBytes_Size(chunk)) {
558 Py_DECREF(chunk);
559 break;
560 }
560 if (!PyBytes_Size(chunk)) {
561 Py_DECREF(chunk);
562 break;
563 }
561 564
562 if (PyList_Append(chunks, chunk)) {
563 Py_DECREF(chunk);
564 Py_DECREF(chunks);
565 return NULL;
566 }
565 if (PyList_Append(chunks, chunk)) {
566 Py_DECREF(chunk);
567 Py_DECREF(chunks);
568 return NULL;
569 }
567 570
568 Py_DECREF(chunk);
569 }
571 Py_DECREF(chunk);
572 }
570 573
571 empty = PyBytes_FromStringAndSize("", 0);
572 if (NULL == empty) {
573 Py_DECREF(chunks);
574 return NULL;
575 }
574 empty = PyBytes_FromStringAndSize("", 0);
575 if (NULL == empty) {
576 Py_DECREF(chunks);
577 return NULL;
578 }
576 579
577 result = PyObject_CallMethod(empty, "join", "O", chunks);
580 result = PyObject_CallMethod(empty, "join", "O", chunks);
578 581
579 Py_DECREF(empty);
580 Py_DECREF(chunks);
582 Py_DECREF(empty);
583 Py_DECREF(chunks);
581 584
582 return result;
585 return result;
583 586 }
584 587
585 static PyObject* reader_readline(PyObject* self) {
586 set_unsupported_operation();
587 return NULL;
588 static PyObject *decompressionreader_readline(PyObject *self, PyObject *args,
589 PyObject *kwargs) {
590 set_io_unsupported_operation();
591 return NULL;
588 592 }
589 593
590 static PyObject* reader_readlines(PyObject* self) {
591 set_unsupported_operation();
592 return NULL;
594 static PyObject *decompressionreader_readlines(PyObject *self, PyObject *args,
595 PyObject *kwargs) {
596 set_io_unsupported_operation();
597 return NULL;
593 598 }
594 599
595 static PyObject* reader_seek(ZstdDecompressionReader* self, PyObject* args) {
596 Py_ssize_t pos;
597 int whence = 0;
598 unsigned long long readAmount = 0;
599 size_t defaultOutSize = ZSTD_DStreamOutSize();
600 static PyObject *decompressionreader_seek(ZstdDecompressionReader *self,
601 PyObject *args) {
602 Py_ssize_t pos;
603 int whence = 0;
604 unsigned long long readAmount = 0;
605 size_t defaultOutSize = ZSTD_DStreamOutSize();
600 606
601 if (self->closed) {
602 PyErr_SetString(PyExc_ValueError, "stream is closed");
603 return NULL;
604 }
607 if (self->closed) {
608 PyErr_SetString(PyExc_ValueError, "stream is closed");
609 return NULL;
610 }
611
612 if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &whence)) {
613 return NULL;
614 }
605 615
606 if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &whence)) {
607 return NULL;
608 }
616 if (whence == SEEK_SET) {
617 if (pos < 0) {
618 PyErr_SetString(PyExc_OSError,
619 "cannot seek to negative position with SEEK_SET");
620 return NULL;
621 }
609 622
610 if (whence == SEEK_SET) {
611 if (pos < 0) {
612 PyErr_SetString(PyExc_ValueError,
613 "cannot seek to negative position with SEEK_SET");
614 return NULL;
615 }
623 if ((unsigned long long)pos < self->bytesDecompressed) {
624 PyErr_SetString(PyExc_OSError,
625 "cannot seek zstd decompression stream backwards");
626 return NULL;
627 }
616 628
617 if ((unsigned long long)pos < self->bytesDecompressed) {
618 PyErr_SetString(PyExc_ValueError,
619 "cannot seek zstd decompression stream backwards");
620 return NULL;
621 }
629 readAmount = pos - self->bytesDecompressed;
630 }
631 else if (whence == SEEK_CUR) {
632 if (pos < 0) {
633 PyErr_SetString(PyExc_OSError,
634 "cannot seek zstd decompression stream backwards");
635 return NULL;
636 }
622 637
623 readAmount = pos - self->bytesDecompressed;
624 }
625 else if (whence == SEEK_CUR) {
626 if (pos < 0) {
627 PyErr_SetString(PyExc_ValueError,
628 "cannot seek zstd decompression stream backwards");
629 return NULL;
630 }
631
632 readAmount = pos;
633 }
634 else if (whence == SEEK_END) {
635 /* We /could/ support this with pos==0. But let's not do that until someone
636 needs it. */
637 PyErr_SetString(PyExc_ValueError,
638 "zstd decompression streams cannot be seeked with SEEK_END");
639 return NULL;
640 }
638 readAmount = pos;
639 }
640 else if (whence == SEEK_END) {
641 /* We /could/ support this with pos==0. But let's not do that until
642 someone needs it. */
643 PyErr_SetString(
644 PyExc_OSError,
645 "zstd decompression streams cannot be seeked with SEEK_END");
646 return NULL;
647 }
641 648
642 /* It is a bit inefficient to do this via the Python API. But since there
643 is a bit of state tracking involved to read from this type, it is the
644 easiest to implement. */
645 while (readAmount) {
646 Py_ssize_t readSize;
647 PyObject* readResult = PyObject_CallMethod((PyObject*)self, "read", "K",
648 readAmount < defaultOutSize ? readAmount : defaultOutSize);
649 /* It is a bit inefficient to do this via the Python API. But since there
650 is a bit of state tracking involved to read from this type, it is the
651 easiest to implement. */
652 while (readAmount) {
653 Py_ssize_t readSize;
654 PyObject *readResult = PyObject_CallMethod(
655 (PyObject *)self, "read", "K",
656 readAmount < defaultOutSize ? readAmount : defaultOutSize);
649 657
650 if (!readResult) {
651 return NULL;
652 }
658 if (!readResult) {
659 return NULL;
660 }
653 661
654 readSize = PyBytes_GET_SIZE(readResult);
662 readSize = PyBytes_GET_SIZE(readResult);
655 663
656 Py_CLEAR(readResult);
664 Py_CLEAR(readResult);
657 665
658 /* Empty read means EOF. */
659 if (!readSize) {
660 break;
661 }
666 /* Empty read means EOF. */
667 if (!readSize) {
668 break;
669 }
662 670
663 readAmount -= readSize;
664 }
671 readAmount -= readSize;
672 }
665 673
666 return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
674 return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
667 675 }
668 676
669 static PyObject* reader_tell(ZstdDecompressionReader* self) {
670 /* TODO should this raise OSError since stream isn't seekable? */
671 return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
677 static PyObject *decompressionreader_tell(ZstdDecompressionReader *self) {
678 /* TODO should this raise OSError since stream isn't seekable? */
679 return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
672 680 }
673 681
674 static PyObject* reader_write(PyObject* self, PyObject* args) {
675 set_unsupported_operation();
676 return NULL;
677 }
678
679 static PyObject* reader_writelines(PyObject* self, PyObject* args) {
680 set_unsupported_operation();
681 return NULL;
682 }
683
684 static PyObject* reader_iter(PyObject* self) {
685 set_unsupported_operation();
686 return NULL;
682 static PyObject *decompressionreader_write(PyObject *self, PyObject *args) {
683 set_io_unsupported_operation();
684 return NULL;
687 685 }
688 686
689 static PyObject* reader_iternext(PyObject* self) {
690 set_unsupported_operation();
691 return NULL;
687 static PyObject *decompressionreader_writelines(PyObject *self,
688 PyObject *args) {
689 set_io_unsupported_operation();
690 return NULL;
691 }
692
693 static PyObject *decompressionreader_iter(PyObject *self) {
694 set_io_unsupported_operation();
695 return NULL;
696 }
697
698 static PyObject *decompressionreader_iternext(PyObject *self) {
699 set_io_unsupported_operation();
700 return NULL;
692 701 }
693 702
694 static PyMethodDef reader_methods[] = {
695 { "__enter__", (PyCFunction)reader_enter, METH_NOARGS,
696 PyDoc_STR("Enter a compression context") },
697 { "__exit__", (PyCFunction)reader_exit, METH_VARARGS,
698 PyDoc_STR("Exit a compression context") },
699 { "close", (PyCFunction)reader_close, METH_NOARGS,
700 PyDoc_STR("Close the stream so it cannot perform any more operations") },
701 { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
702 { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
703 { "readable", (PyCFunction)reader_readable, METH_NOARGS,
704 PyDoc_STR("Returns True") },
705 { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS,
706 PyDoc_STR("read compressed data") },
707 { "read1", (PyCFunction)reader_read1, METH_VARARGS | METH_KEYWORDS,
708 PyDoc_STR("read compressed data") },
709 { "readinto", (PyCFunction)reader_readinto, METH_VARARGS, NULL },
710 { "readinto1", (PyCFunction)reader_readinto1, METH_VARARGS, NULL },
711 { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
712 { "readline", (PyCFunction)reader_readline, METH_NOARGS, PyDoc_STR("Not implemented") },
713 { "readlines", (PyCFunction)reader_readlines, METH_NOARGS, PyDoc_STR("Not implemented") },
714 { "seek", (PyCFunction)reader_seek, METH_VARARGS, PyDoc_STR("Seek the stream") },
715 { "seekable", (PyCFunction)reader_seekable, METH_NOARGS,
716 PyDoc_STR("Returns True") },
717 { "tell", (PyCFunction)reader_tell, METH_NOARGS,
718 PyDoc_STR("Returns current number of bytes compressed") },
719 { "writable", (PyCFunction)reader_writable, METH_NOARGS,
720 PyDoc_STR("Returns False") },
721 { "write", (PyCFunction)reader_write, METH_VARARGS, PyDoc_STR("unsupported operation") },
722 { "writelines", (PyCFunction)reader_writelines, METH_VARARGS, PyDoc_STR("unsupported operation") },
723 { NULL, NULL }
724 };
703 static PyMethodDef decompressionreader_methods[] = {
704 {"__enter__", (PyCFunction)decompressionreader_enter, METH_NOARGS,
705 PyDoc_STR("Enter a compression context")},
706 {"__exit__", (PyCFunction)decompressionreader_exit, METH_VARARGS,
707 PyDoc_STR("Exit a compression context")},
708 {"close", (PyCFunction)decompressionreader_close, METH_NOARGS,
709 PyDoc_STR("Close the stream so it cannot perform any more operations")},
710 {"flush", (PyCFunction)decompressionreader_flush, METH_NOARGS,
711 PyDoc_STR("no-ops")},
712 {"isatty", (PyCFunction)decompressionreader_isatty, METH_NOARGS,
713 PyDoc_STR("Returns False")},
714 {"readable", (PyCFunction)decompressionreader_readable, METH_NOARGS,
715 PyDoc_STR("Returns True")},
716 {"read", (PyCFunction)decompressionreader_read,
717 METH_VARARGS | METH_KEYWORDS, PyDoc_STR("read compressed data")},
718 {"read1", (PyCFunction)decompressionreader_read1,
719 METH_VARARGS | METH_KEYWORDS, PyDoc_STR("read compressed data")},
720 {"readinto", (PyCFunction)decompressionreader_readinto, METH_VARARGS, NULL},
721 {"readinto1", (PyCFunction)decompressionreader_readinto1, METH_VARARGS,
722 NULL},
723 {"readall", (PyCFunction)decompressionreader_readall, METH_NOARGS,
724 PyDoc_STR("Not implemented")},
725 {"readline", (PyCFunction)decompressionreader_readline,
726 METH_VARARGS | METH_KEYWORDS, PyDoc_STR("Not implemented")},
727 {"readlines", (PyCFunction)decompressionreader_readlines,
728 METH_VARARGS | METH_KEYWORDS, PyDoc_STR("Not implemented")},
729 {"seek", (PyCFunction)decompressionreader_seek, METH_VARARGS,
730 PyDoc_STR("Seek the stream")},
731 {"seekable", (PyCFunction)decompressionreader_seekable, METH_NOARGS,
732 PyDoc_STR("Returns False")},
733 {"tell", (PyCFunction)decompressionreader_tell, METH_NOARGS,
734 PyDoc_STR("Returns current number of bytes compressed")},
735 {"writable", (PyCFunction)decompressionreader_writable, METH_NOARGS,
736 PyDoc_STR("Returns False")},
737 {"write", (PyCFunction)decompressionreader_write, METH_VARARGS,
738 PyDoc_STR("unsupported operation")},
739 {"writelines", (PyCFunction)decompressionreader_writelines, METH_VARARGS,
740 PyDoc_STR("unsupported operation")},
741 {NULL, NULL}};
725 742
726 static PyMemberDef reader_members[] = {
727 { "closed", T_BOOL, offsetof(ZstdDecompressionReader, closed),
728 READONLY, "whether stream is closed" },
729 { NULL }
743 static PyMemberDef decompressionreader_members[] = {
744 {"closed", T_BOOL, offsetof(ZstdDecompressionReader, closed), READONLY,
745 "whether stream is closed"},
746 {NULL}};
747
748 PyType_Slot ZstdDecompressionReaderSlots[] = {
749 {Py_tp_dealloc, decompressionreader_dealloc},
750 {Py_tp_iter, decompressionreader_iter},
751 {Py_tp_iternext, decompressionreader_iternext},
752 {Py_tp_methods, decompressionreader_methods},
753 {Py_tp_members, decompressionreader_members},
754 {Py_tp_new, PyType_GenericNew},
755 {0, NULL},
730 756 };
731 757
732 PyTypeObject ZstdDecompressionReaderType = {
733 PyVarObject_HEAD_INIT(NULL, 0)
734 "zstd.ZstdDecompressionReader", /* tp_name */
735 sizeof(ZstdDecompressionReader), /* tp_basicsize */
736 0, /* tp_itemsize */
737 (destructor)reader_dealloc, /* tp_dealloc */
738 0, /* tp_print */
739 0, /* tp_getattr */
740 0, /* tp_setattr */
741 0, /* tp_compare */
742 0, /* tp_repr */
743 0, /* tp_as_number */
744 0, /* tp_as_sequence */
745 0, /* tp_as_mapping */
746 0, /* tp_hash */
747 0, /* tp_call */
748 0, /* tp_str */
749 0, /* tp_getattro */
750 0, /* tp_setattro */
751 0, /* tp_as_buffer */
752 Py_TPFLAGS_DEFAULT, /* tp_flags */
753 0, /* tp_doc */
754 0, /* tp_traverse */
755 0, /* tp_clear */
756 0, /* tp_richcompare */
757 0, /* tp_weaklistoffset */
758 reader_iter, /* tp_iter */
759 reader_iternext, /* tp_iternext */
760 reader_methods, /* tp_methods */
761 reader_members, /* tp_members */
762 0, /* tp_getset */
763 0, /* tp_base */
764 0, /* tp_dict */
765 0, /* tp_descr_get */
766 0, /* tp_descr_set */
767 0, /* tp_dictoffset */
768 0, /* tp_init */
769 0, /* tp_alloc */
770 PyType_GenericNew, /* tp_new */
758 PyType_Spec ZstdDecompressionReaderSpec = {
759 "zstd.ZstdDecompressionReader",
760 sizeof(ZstdDecompressionReader),
761 0,
762 Py_TPFLAGS_DEFAULT,
763 ZstdDecompressionReaderSlots,
771 764 };
772 765
766 PyTypeObject *ZstdDecompressionReaderType;
773 767
774 void decompressionreader_module_init(PyObject* mod) {
775 /* TODO make reader a sub-class of io.RawIOBase */
768 void decompressionreader_module_init(PyObject *mod) {
769 /* TODO make reader a sub-class of io.RawIOBase */
776 770
777 Py_SET_TYPE(&ZstdDecompressionReaderType, &PyType_Type);
778 if (PyType_Ready(&ZstdDecompressionReaderType) < 0) {
779 return;
780 }
771 ZstdDecompressionReaderType =
772 (PyTypeObject *)PyType_FromSpec(&ZstdDecompressionReaderSpec);
773 if (PyType_Ready(ZstdDecompressionReaderType) < 0) {
774 return;
775 }
776
777 Py_INCREF((PyObject *)ZstdDecompressionReaderType);
778 PyModule_AddObject(mod, "ZstdDecompressionReader",
779 (PyObject *)ZstdDecompressionReaderType);
781 780 }
@@ -1,295 +1,273
1 1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
8 8
9 9 #include "python-zstandard.h"
10 10
11 extern PyObject* ZstdError;
12
13 PyDoc_STRVAR(ZstdDecompressionWriter__doc,
14 """A context manager used for writing decompressed output.\n"
15 );
11 extern PyObject *ZstdError;
16 12
17 static void ZstdDecompressionWriter_dealloc(ZstdDecompressionWriter* self) {
18 Py_XDECREF(self->decompressor);
19 Py_XDECREF(self->writer);
13 static void ZstdDecompressionWriter_dealloc(ZstdDecompressionWriter *self) {
14 Py_XDECREF(self->decompressor);
15 Py_XDECREF(self->writer);
20 16
21 PyObject_Del(self);
17 PyObject_Del(self);
22 18 }
23 19
24 static PyObject* ZstdDecompressionWriter_enter(ZstdDecompressionWriter* self) {
25 if (self->closed) {
26 PyErr_SetString(PyExc_ValueError, "stream is closed");
27 return NULL;
28 }
20 static PyObject *ZstdDecompressionWriter_enter(ZstdDecompressionWriter *self) {
21 if (self->closed) {
22 PyErr_SetString(PyExc_ValueError, "stream is closed");
23 return NULL;
24 }
29 25
30 if (self->entered) {
31 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
32 return NULL;
33 }
26 if (self->entered) {
27 PyErr_SetString(ZstdError, "cannot __enter__ multiple times");
28 return NULL;
29 }
34 30
35 self->entered = 1;
31 self->entered = 1;
36 32
37 Py_INCREF(self);
38 return (PyObject*)self;
33 Py_INCREF(self);
34 return (PyObject *)self;
39 35 }
40 36
41 static PyObject* ZstdDecompressionWriter_exit(ZstdDecompressionWriter* self, PyObject* args) {
42 self->entered = 0;
37 static PyObject *ZstdDecompressionWriter_exit(ZstdDecompressionWriter *self,
38 PyObject *args) {
39 self->entered = 0;
43 40
44 if (NULL == PyObject_CallMethod((PyObject*)self, "close", NULL)) {
45 return NULL;
46 }
41 if (NULL == PyObject_CallMethod((PyObject *)self, "close", NULL)) {
42 return NULL;
43 }
47 44
48 Py_RETURN_FALSE;
45 Py_RETURN_FALSE;
49 46 }
50 47
51 static PyObject* ZstdDecompressionWriter_memory_size(ZstdDecompressionWriter* self) {
52 return PyLong_FromSize_t(ZSTD_sizeof_DCtx(self->decompressor->dctx));
48 static PyObject *
49 ZstdDecompressionWriter_memory_size(ZstdDecompressionWriter *self) {
50 return PyLong_FromSize_t(ZSTD_sizeof_DCtx(self->decompressor->dctx));
53 51 }
54 52
55 static PyObject* ZstdDecompressionWriter_write(ZstdDecompressionWriter* self, PyObject* args, PyObject* kwargs) {
56 static char* kwlist[] = {
57 "data",
58 NULL
59 };
53 static PyObject *ZstdDecompressionWriter_write(ZstdDecompressionWriter *self,
54 PyObject *args,
55 PyObject *kwargs) {
56 static char *kwlist[] = {"data", NULL};
60 57
61 PyObject* result = NULL;
62 Py_buffer source;
63 size_t zresult = 0;
64 ZSTD_inBuffer input;
65 ZSTD_outBuffer output;
66 PyObject* res;
67 Py_ssize_t totalWrite = 0;
58 PyObject *result = NULL;
59 Py_buffer source;
60 size_t zresult = 0;
61 ZSTD_inBuffer input;
62 ZSTD_outBuffer output;
63 PyObject *res;
64 Py_ssize_t totalWrite = 0;
68 65
69 #if PY_MAJOR_VERSION >= 3
70 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write",
71 #else
72 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:write",
73 #endif
74 kwlist, &source)) {
75 return NULL;
76 }
66 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write", kwlist,
67 &source)) {
68 return NULL;
69 }
77 70
78 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
79 PyErr_SetString(PyExc_ValueError,
80 "data buffer should be contiguous and have at most one dimension");
81 goto finally;
82 }
71 if (self->closed) {
72 PyErr_SetString(PyExc_ValueError, "stream is closed");
73 return NULL;
74 }
83 75
84 if (self->closed) {
85 PyErr_SetString(PyExc_ValueError, "stream is closed");
86 return NULL;
87 }
76 output.dst = PyMem_Malloc(self->outSize);
77 if (!output.dst) {
78 PyErr_NoMemory();
79 goto finally;
80 }
81 output.size = self->outSize;
82 output.pos = 0;
88 83
89 output.dst = PyMem_Malloc(self->outSize);
90 if (!output.dst) {
91 PyErr_NoMemory();
92 goto finally;
93 }
94 output.size = self->outSize;
95 output.pos = 0;
84 input.src = source.buf;
85 input.size = source.len;
86 input.pos = 0;
96 87
97 input.src = source.buf;
98 input.size = source.len;
99 input.pos = 0;
88 while (input.pos < (size_t)source.len) {
89 Py_BEGIN_ALLOW_THREADS zresult =
90 ZSTD_decompressStream(self->decompressor->dctx, &output, &input);
91 Py_END_ALLOW_THREADS
100 92
101 while (input.pos < (size_t)source.len) {
102 Py_BEGIN_ALLOW_THREADS
103 zresult = ZSTD_decompressStream(self->decompressor->dctx, &output, &input);
104 Py_END_ALLOW_THREADS
93 if (ZSTD_isError(zresult)) {
94 PyMem_Free(output.dst);
95 PyErr_Format(ZstdError, "zstd decompress error: %s",
96 ZSTD_getErrorName(zresult));
97 goto finally;
98 }
105 99
106 if (ZSTD_isError(zresult)) {
107 PyMem_Free(output.dst);
108 PyErr_Format(ZstdError, "zstd decompress error: %s",
109 ZSTD_getErrorName(zresult));
110 goto finally;
111 }
100 if (output.pos) {
101 res = PyObject_CallMethod(self->writer, "write", "y#", output.dst,
102 output.pos);
103 if (NULL == res) {
104 goto finally;
105 }
106 Py_XDECREF(res);
107 totalWrite += output.pos;
108 output.pos = 0;
109 }
110 }
112 111
113 if (output.pos) {
114 #if PY_MAJOR_VERSION >= 3
115 res = PyObject_CallMethod(self->writer, "write", "y#",
116 #else
117 res = PyObject_CallMethod(self->writer, "write", "s#",
118 #endif
119 output.dst, output.pos);
120 Py_XDECREF(res);
121 totalWrite += output.pos;
122 output.pos = 0;
123 }
124 }
112 PyMem_Free(output.dst);
125 113
126 PyMem_Free(output.dst);
127
128 if (self->writeReturnRead) {
129 result = PyLong_FromSize_t(input.pos);
130 }
131 else {
132 result = PyLong_FromSsize_t(totalWrite);
133 }
114 if (self->writeReturnRead) {
115 result = PyLong_FromSize_t(input.pos);
116 }
117 else {
118 result = PyLong_FromSsize_t(totalWrite);
119 }
134 120
135 121 finally:
136 PyBuffer_Release(&source);
137 return result;
122 PyBuffer_Release(&source);
123 return result;
138 124 }
139 125
140 static PyObject* ZstdDecompressionWriter_close(ZstdDecompressionWriter* self) {
141 PyObject* result;
142
143 if (self->closed) {
144 Py_RETURN_NONE;
145 }
126 static PyObject *ZstdDecompressionWriter_close(ZstdDecompressionWriter *self) {
127 PyObject *result;
146 128
147 result = PyObject_CallMethod((PyObject*)self, "flush", NULL);
148 self->closed = 1;
149
150 if (NULL == result) {
151 return NULL;
152 }
129 if (self->closed) {
130 Py_RETURN_NONE;
131 }
153 132
154 /* Call close on underlying stream as well. */
155 if (PyObject_HasAttrString(self->writer, "close")) {
156 return PyObject_CallMethod(self->writer, "close", NULL);
157 }
158
159 Py_RETURN_NONE;
160 }
133 self->closing = 1;
134 result = PyObject_CallMethod((PyObject *)self, "flush", NULL);
135 self->closing = 0;
136 self->closed = 1;
161 137
162 static PyObject* ZstdDecompressionWriter_fileno(ZstdDecompressionWriter* self) {
163 if (PyObject_HasAttrString(self->writer, "fileno")) {
164 return PyObject_CallMethod(self->writer, "fileno", NULL);
165 }
166 else {
167 PyErr_SetString(PyExc_OSError, "fileno not available on underlying writer");
168 return NULL;
169 }
138 if (NULL == result) {
139 return NULL;
140 }
141
142 /* Call close on underlying stream as well. */
143 if (self->closefd && PyObject_HasAttrString(self->writer, "close")) {
144 return PyObject_CallMethod(self->writer, "close", NULL);
145 }
146
147 Py_RETURN_NONE;
170 148 }
171 149
172 static PyObject* ZstdDecompressionWriter_flush(ZstdDecompressionWriter* self) {
173 if (self->closed) {
174 PyErr_SetString(PyExc_ValueError, "stream is closed");
175 return NULL;
176 }
177
178 if (PyObject_HasAttrString(self->writer, "flush")) {
179 return PyObject_CallMethod(self->writer, "flush", NULL);
180 }
181 else {
182 Py_RETURN_NONE;
183 }
184 }
185
186 static PyObject* ZstdDecompressionWriter_false(PyObject* self, PyObject* args) {
187 Py_RETURN_FALSE;
150 static PyObject *ZstdDecompressionWriter_fileno(ZstdDecompressionWriter *self) {
151 if (PyObject_HasAttrString(self->writer, "fileno")) {
152 return PyObject_CallMethod(self->writer, "fileno", NULL);
153 }
154 else {
155 PyErr_SetString(PyExc_OSError,
156 "fileno not available on underlying writer");
157 return NULL;
158 }
188 159 }
189 160
190 static PyObject* ZstdDecompressionWriter_true(PyObject* self, PyObject* args) {
191 Py_RETURN_TRUE;
161 static PyObject *ZstdDecompressionWriter_flush(ZstdDecompressionWriter *self) {
162 if (self->closed) {
163 PyErr_SetString(PyExc_ValueError, "stream is closed");
164 return NULL;
165 }
166
167 if (!self->closing && PyObject_HasAttrString(self->writer, "flush")) {
168 return PyObject_CallMethod(self->writer, "flush", NULL);
169 }
170 else {
171 Py_RETURN_NONE;
172 }
173 }
174
175 static PyObject *ZstdDecompressionWriter_iter(PyObject *self) {
176 set_io_unsupported_operation();
177 return NULL;
192 178 }
193 179
194 static PyObject* ZstdDecompressionWriter_unsupported(PyObject* self, PyObject* args, PyObject* kwargs) {
195 PyObject* iomod;
196 PyObject* exc;
180 static PyObject *ZstdDecompressionWriter_iternext(PyObject *self) {
181 set_io_unsupported_operation();
182 return NULL;
183 }
197 184
198 iomod = PyImport_ImportModule("io");
199 if (NULL == iomod) {
200 return NULL;
201 }
185 static PyObject *ZstdDecompressionWriter_false(PyObject *self, PyObject *args) {
186 Py_RETURN_FALSE;
187 }
202 188
203 exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
204 if (NULL == exc) {
205 Py_DECREF(iomod);
206 return NULL;
207 }
189 static PyObject *ZstdDecompressionWriter_true(PyObject *self, PyObject *args) {
190 Py_RETURN_TRUE;
191 }
208 192
209 PyErr_SetNone(exc);
210 Py_DECREF(exc);
211 Py_DECREF(iomod);
212
213 return NULL;
193 static PyObject *ZstdDecompressionWriter_unsupported(PyObject *self,
194 PyObject *args,
195 PyObject *kwargs) {
196 set_io_unsupported_operation();
197 return NULL;
214 198 }
215 199
216 200 static PyMethodDef ZstdDecompressionWriter_methods[] = {
217 { "__enter__", (PyCFunction)ZstdDecompressionWriter_enter, METH_NOARGS,
218 PyDoc_STR("Enter a decompression context.") },
219 { "__exit__", (PyCFunction)ZstdDecompressionWriter_exit, METH_VARARGS,
220 PyDoc_STR("Exit a decompression context.") },
221 { "memory_size", (PyCFunction)ZstdDecompressionWriter_memory_size, METH_NOARGS,
222 PyDoc_STR("Obtain the memory size in bytes of the underlying decompressor.") },
223 { "close", (PyCFunction)ZstdDecompressionWriter_close, METH_NOARGS, NULL },
224 { "fileno", (PyCFunction)ZstdDecompressionWriter_fileno, METH_NOARGS, NULL },
225 { "flush", (PyCFunction)ZstdDecompressionWriter_flush, METH_NOARGS, NULL },
226 { "isatty", ZstdDecompressionWriter_false, METH_NOARGS, NULL },
227 { "readable", ZstdDecompressionWriter_false, METH_NOARGS, NULL },
228 { "readline", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
229 { "readlines", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
230 { "seek", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
231 { "seekable", ZstdDecompressionWriter_false, METH_NOARGS, NULL },
232 { "tell", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
233 { "truncate", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
234 { "writable", ZstdDecompressionWriter_true, METH_NOARGS, NULL },
235 { "writelines" , (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
236 { "read", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
237 { "readall", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
238 { "readinto", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL },
239 { "write", (PyCFunction)ZstdDecompressionWriter_write, METH_VARARGS | METH_KEYWORDS,
240 PyDoc_STR("Compress data") },
241 { NULL, NULL }
242 };
201 {"__enter__", (PyCFunction)ZstdDecompressionWriter_enter, METH_NOARGS,
202 PyDoc_STR("Enter a decompression context.")},
203 {"__exit__", (PyCFunction)ZstdDecompressionWriter_exit, METH_VARARGS,
204 PyDoc_STR("Exit a decompression context.")},
205 {"memory_size", (PyCFunction)ZstdDecompressionWriter_memory_size,
206 METH_NOARGS,
207 PyDoc_STR(
208 "Obtain the memory size in bytes of the underlying decompressor.")},
209 {"close", (PyCFunction)ZstdDecompressionWriter_close, METH_NOARGS, NULL},
210 {"fileno", (PyCFunction)ZstdDecompressionWriter_fileno, METH_NOARGS, NULL},
211 {"flush", (PyCFunction)ZstdDecompressionWriter_flush, METH_NOARGS, NULL},
212 {"isatty", ZstdDecompressionWriter_false, METH_NOARGS, NULL},
213 {"readable", ZstdDecompressionWriter_false, METH_NOARGS, NULL},
214 {"readline", (PyCFunction)ZstdDecompressionWriter_unsupported,
215 METH_VARARGS | METH_KEYWORDS, NULL},
216 {"readlines", (PyCFunction)ZstdDecompressionWriter_unsupported,
217 METH_VARARGS | METH_KEYWORDS, NULL},
218 {"seek", (PyCFunction)ZstdDecompressionWriter_unsupported,
219 METH_VARARGS | METH_KEYWORDS, NULL},
220 {"seekable", ZstdDecompressionWriter_false, METH_NOARGS, NULL},
221 {"tell", (PyCFunction)ZstdDecompressionWriter_unsupported,
222 METH_VARARGS | METH_KEYWORDS, NULL},
223 {"truncate", (PyCFunction)ZstdDecompressionWriter_unsupported,
224 METH_VARARGS | METH_KEYWORDS, NULL},
225 {"writable", ZstdDecompressionWriter_true, METH_NOARGS, NULL},
226 {"writelines", (PyCFunction)ZstdDecompressionWriter_unsupported,
227 METH_VARARGS | METH_KEYWORDS, NULL},
228 {"read", (PyCFunction)ZstdDecompressionWriter_unsupported,
229 METH_VARARGS | METH_KEYWORDS, NULL},
230 {"readall", (PyCFunction)ZstdDecompressionWriter_unsupported,
231 METH_VARARGS | METH_KEYWORDS, NULL},
232 {"readinto", (PyCFunction)ZstdDecompressionWriter_unsupported,
233 METH_VARARGS | METH_KEYWORDS, NULL},
234 {"write", (PyCFunction)ZstdDecompressionWriter_write,
235 METH_VARARGS | METH_KEYWORDS, PyDoc_STR("Compress data")},
236 {NULL, NULL}};
243 237
244 238 static PyMemberDef ZstdDecompressionWriter_members[] = {
245 { "closed", T_BOOL, offsetof(ZstdDecompressionWriter, closed), READONLY, NULL },
246 { NULL }
239 {"closed", T_BOOL, offsetof(ZstdDecompressionWriter, closed), READONLY,
240 NULL},
241 {NULL}};
242
243 PyType_Slot ZstdDecompressionWriterSlots[] = {
244 {Py_tp_dealloc, ZstdDecompressionWriter_dealloc},
245 {Py_tp_iter, ZstdDecompressionWriter_iter},
246 {Py_tp_iternext, ZstdDecompressionWriter_iternext},
247 {Py_tp_methods, ZstdDecompressionWriter_methods},
248 {Py_tp_members, ZstdDecompressionWriter_members},
249 {Py_tp_new, PyType_GenericNew},
250 {0, NULL},
247 251 };
248 252
249 PyTypeObject ZstdDecompressionWriterType = {
250 PyVarObject_HEAD_INIT(NULL, 0)
251 "zstd.ZstdDecompressionWriter", /* tp_name */
252 sizeof(ZstdDecompressionWriter),/* tp_basicsize */
253 0, /* tp_itemsize */
254 (destructor)ZstdDecompressionWriter_dealloc, /* tp_dealloc */
255 0, /* tp_print */
256 0, /* tp_getattr */
257 0, /* tp_setattr */
258 0, /* tp_compare */
259 0, /* tp_repr */
260 0, /* tp_as_number */
261 0, /* tp_as_sequence */
262 0, /* tp_as_mapping */
263 0, /* tp_hash */
264 0, /* tp_call */
265 0, /* tp_str */
266 0, /* tp_getattro */
267 0, /* tp_setattro */
268 0, /* tp_as_buffer */
269 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
270 ZstdDecompressionWriter__doc, /* tp_doc */
271 0, /* tp_traverse */
272 0, /* tp_clear */
273 0, /* tp_richcompare */
274 0, /* tp_weaklistoffset */
275 0, /* tp_iter */
276 0, /* tp_iternext */
277 ZstdDecompressionWriter_methods,/* tp_methods */
278 ZstdDecompressionWriter_members,/* tp_members */
279 0, /* tp_getset */
280 0, /* tp_base */
281 0, /* tp_dict */
282 0, /* tp_descr_get */
283 0, /* tp_descr_set */
284 0, /* tp_dictoffset */
285 0, /* tp_init */
286 0, /* tp_alloc */
287 PyType_GenericNew, /* tp_new */
253 PyType_Spec ZstdDecompressionWriterSpec = {
254 "zstd.ZstdDecompressionWriter",
255 sizeof(ZstdDecompressionWriter),
256 0,
257 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
258 ZstdDecompressionWriterSlots,
288 259 };
289 260
290 void decompressionwriter_module_init(PyObject* mod) {
291 Py_SET_TYPE(&ZstdDecompressionWriterType, &PyType_Type);
292 if (PyType_Ready(&ZstdDecompressionWriterType) < 0) {
293 return;
294 }
261 PyTypeObject *ZstdDecompressionWriterType;
262
263 void decompressionwriter_module_init(PyObject *mod) {
264 ZstdDecompressionWriterType =
265 (PyTypeObject *)PyType_FromSpec(&ZstdDecompressionWriterSpec);
266 if (PyType_Ready(ZstdDecompressionWriterType) < 0) {
267 return;
268 }
269
270 Py_INCREF((PyObject *)ZstdDecompressionWriterType);
271 PyModule_AddObject(mod, "ZstdDecompressionWriter",
272 (PyObject *)ZstdDecompressionWriterType);
295 273 }
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
This diff has been collapsed as it changes many lines, (721 lines changed) Show them Hide them
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: file was removed
The requested commit or file is too big and content was truncated. Show full diff
General Comments 0
You need to be logged in to leave comments. Login now