Show More
The requested changes are too big and content was truncated. Show full diff
@@ -0,0 +1,412 | |||
|
1 | # This file is automatically @generated by Cargo. | |
|
2 | # It is not intended for manual editing. | |
|
3 | version = 3 | |
|
4 | ||
|
5 | [[package]] | |
|
6 | name = "autocfg" | |
|
7 | version = "1.3.0" | |
|
8 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
9 | checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" | |
|
10 | ||
|
11 | [[package]] | |
|
12 | name = "bitflags" | |
|
13 | version = "2.5.0" | |
|
14 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
15 | checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" | |
|
16 | ||
|
17 | [[package]] | |
|
18 | name = "cc" | |
|
19 | version = "1.0.98" | |
|
20 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
21 | checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" | |
|
22 | dependencies = [ | |
|
23 | "jobserver", | |
|
24 | "libc", | |
|
25 | "once_cell", | |
|
26 | ] | |
|
27 | ||
|
28 | [[package]] | |
|
29 | name = "cfg-if" | |
|
30 | version = "1.0.0" | |
|
31 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
32 | checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" | |
|
33 | ||
|
34 | [[package]] | |
|
35 | name = "crossbeam-deque" | |
|
36 | version = "0.8.5" | |
|
37 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
38 | checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d" | |
|
39 | dependencies = [ | |
|
40 | "crossbeam-epoch", | |
|
41 | "crossbeam-utils", | |
|
42 | ] | |
|
43 | ||
|
44 | [[package]] | |
|
45 | name = "crossbeam-epoch" | |
|
46 | version = "0.9.18" | |
|
47 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
48 | checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" | |
|
49 | dependencies = [ | |
|
50 | "crossbeam-utils", | |
|
51 | ] | |
|
52 | ||
|
53 | [[package]] | |
|
54 | name = "crossbeam-utils" | |
|
55 | version = "0.8.20" | |
|
56 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
57 | checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" | |
|
58 | ||
|
59 | [[package]] | |
|
60 | name = "either" | |
|
61 | version = "1.12.0" | |
|
62 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
63 | checksum = "3dca9240753cf90908d7e4aac30f630662b02aebaa1b58a3cadabdb23385b58b" | |
|
64 | ||
|
65 | [[package]] | |
|
66 | name = "heck" | |
|
67 | version = "0.4.1" | |
|
68 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
69 | checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" | |
|
70 | ||
|
71 | [[package]] | |
|
72 | name = "hermit-abi" | |
|
73 | version = "0.3.9" | |
|
74 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
75 | checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" | |
|
76 | ||
|
77 | [[package]] | |
|
78 | name = "indoc" | |
|
79 | version = "2.0.5" | |
|
80 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
81 | checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" | |
|
82 | ||
|
83 | [[package]] | |
|
84 | name = "jobserver" | |
|
85 | version = "0.1.31" | |
|
86 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
87 | checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" | |
|
88 | dependencies = [ | |
|
89 | "libc", | |
|
90 | ] | |
|
91 | ||
|
92 | [[package]] | |
|
93 | name = "libc" | |
|
94 | version = "0.2.155" | |
|
95 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
96 | checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" | |
|
97 | ||
|
98 | [[package]] | |
|
99 | name = "lock_api" | |
|
100 | version = "0.4.12" | |
|
101 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
102 | checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" | |
|
103 | dependencies = [ | |
|
104 | "autocfg", | |
|
105 | "scopeguard", | |
|
106 | ] | |
|
107 | ||
|
108 | [[package]] | |
|
109 | name = "memoffset" | |
|
110 | version = "0.9.1" | |
|
111 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
112 | checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a" | |
|
113 | dependencies = [ | |
|
114 | "autocfg", | |
|
115 | ] | |
|
116 | ||
|
117 | [[package]] | |
|
118 | name = "num_cpus" | |
|
119 | version = "1.16.0" | |
|
120 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
121 | checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" | |
|
122 | dependencies = [ | |
|
123 | "hermit-abi", | |
|
124 | "libc", | |
|
125 | ] | |
|
126 | ||
|
127 | [[package]] | |
|
128 | name = "once_cell" | |
|
129 | version = "1.19.0" | |
|
130 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
131 | checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" | |
|
132 | ||
|
133 | [[package]] | |
|
134 | name = "parking_lot" | |
|
135 | version = "0.12.3" | |
|
136 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
137 | checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" | |
|
138 | dependencies = [ | |
|
139 | "lock_api", | |
|
140 | "parking_lot_core", | |
|
141 | ] | |
|
142 | ||
|
143 | [[package]] | |
|
144 | name = "parking_lot_core" | |
|
145 | version = "0.9.10" | |
|
146 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
147 | checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" | |
|
148 | dependencies = [ | |
|
149 | "cfg-if", | |
|
150 | "libc", | |
|
151 | "redox_syscall", | |
|
152 | "smallvec", | |
|
153 | "windows-targets", | |
|
154 | ] | |
|
155 | ||
|
156 | [[package]] | |
|
157 | name = "pkg-config" | |
|
158 | version = "0.3.30" | |
|
159 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
160 | checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" | |
|
161 | ||
|
162 | [[package]] | |
|
163 | name = "portable-atomic" | |
|
164 | version = "1.6.0" | |
|
165 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
166 | checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" | |
|
167 | ||
|
168 | [[package]] | |
|
169 | name = "proc-macro2" | |
|
170 | version = "1.0.84" | |
|
171 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
172 | checksum = "ec96c6a92621310b51366f1e28d05ef11489516e93be030060e5fc12024a49d6" | |
|
173 | dependencies = [ | |
|
174 | "unicode-ident", | |
|
175 | ] | |
|
176 | ||
|
177 | [[package]] | |
|
178 | name = "pyo3" | |
|
179 | version = "0.21.2" | |
|
180 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
181 | checksum = "a5e00b96a521718e08e03b1a622f01c8a8deb50719335de3f60b3b3950f069d8" | |
|
182 | dependencies = [ | |
|
183 | "cfg-if", | |
|
184 | "indoc", | |
|
185 | "libc", | |
|
186 | "memoffset", | |
|
187 | "parking_lot", | |
|
188 | "portable-atomic", | |
|
189 | "pyo3-build-config", | |
|
190 | "pyo3-ffi", | |
|
191 | "pyo3-macros", | |
|
192 | "unindent", | |
|
193 | ] | |
|
194 | ||
|
195 | [[package]] | |
|
196 | name = "pyo3-build-config" | |
|
197 | version = "0.21.2" | |
|
198 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
199 | checksum = "7883df5835fafdad87c0d888b266c8ec0f4c9ca48a5bed6bbb592e8dedee1b50" | |
|
200 | dependencies = [ | |
|
201 | "once_cell", | |
|
202 | "target-lexicon", | |
|
203 | ] | |
|
204 | ||
|
205 | [[package]] | |
|
206 | name = "pyo3-ffi" | |
|
207 | version = "0.21.2" | |
|
208 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
209 | checksum = "01be5843dc60b916ab4dad1dca6d20b9b4e6ddc8e15f50c47fe6d85f1fb97403" | |
|
210 | dependencies = [ | |
|
211 | "libc", | |
|
212 | "pyo3-build-config", | |
|
213 | ] | |
|
214 | ||
|
215 | [[package]] | |
|
216 | name = "pyo3-macros" | |
|
217 | version = "0.21.2" | |
|
218 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
219 | checksum = "77b34069fc0682e11b31dbd10321cbf94808394c56fd996796ce45217dfac53c" | |
|
220 | dependencies = [ | |
|
221 | "proc-macro2", | |
|
222 | "pyo3-macros-backend", | |
|
223 | "quote", | |
|
224 | "syn", | |
|
225 | ] | |
|
226 | ||
|
227 | [[package]] | |
|
228 | name = "pyo3-macros-backend" | |
|
229 | version = "0.21.2" | |
|
230 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
231 | checksum = "08260721f32db5e1a5beae69a55553f56b99bd0e1c3e6e0a5e8851a9d0f5a85c" | |
|
232 | dependencies = [ | |
|
233 | "heck", | |
|
234 | "proc-macro2", | |
|
235 | "pyo3-build-config", | |
|
236 | "quote", | |
|
237 | "syn", | |
|
238 | ] | |
|
239 | ||
|
240 | [[package]] | |
|
241 | name = "python-zstandard" | |
|
242 | version = "0.23.0-pre" | |
|
243 | dependencies = [ | |
|
244 | "libc", | |
|
245 | "num_cpus", | |
|
246 | "pyo3", | |
|
247 | "rayon", | |
|
248 | "zstd-safe", | |
|
249 | "zstd-sys", | |
|
250 | ] | |
|
251 | ||
|
252 | [[package]] | |
|
253 | name = "quote" | |
|
254 | version = "1.0.36" | |
|
255 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
256 | checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" | |
|
257 | dependencies = [ | |
|
258 | "proc-macro2", | |
|
259 | ] | |
|
260 | ||
|
261 | [[package]] | |
|
262 | name = "rayon" | |
|
263 | version = "1.10.0" | |
|
264 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
265 | checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" | |
|
266 | dependencies = [ | |
|
267 | "either", | |
|
268 | "rayon-core", | |
|
269 | ] | |
|
270 | ||
|
271 | [[package]] | |
|
272 | name = "rayon-core" | |
|
273 | version = "1.12.1" | |
|
274 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
275 | checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2" | |
|
276 | dependencies = [ | |
|
277 | "crossbeam-deque", | |
|
278 | "crossbeam-utils", | |
|
279 | ] | |
|
280 | ||
|
281 | [[package]] | |
|
282 | name = "redox_syscall" | |
|
283 | version = "0.5.1" | |
|
284 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
285 | checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" | |
|
286 | dependencies = [ | |
|
287 | "bitflags", | |
|
288 | ] | |
|
289 | ||
|
290 | [[package]] | |
|
291 | name = "scopeguard" | |
|
292 | version = "1.2.0" | |
|
293 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
294 | checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" | |
|
295 | ||
|
296 | [[package]] | |
|
297 | name = "smallvec" | |
|
298 | version = "1.13.2" | |
|
299 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
300 | checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" | |
|
301 | ||
|
302 | [[package]] | |
|
303 | name = "syn" | |
|
304 | version = "2.0.66" | |
|
305 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
306 | checksum = "c42f3f41a2de00b01c0aaad383c5a45241efc8b2d1eda5661812fda5f3cdcff5" | |
|
307 | dependencies = [ | |
|
308 | "proc-macro2", | |
|
309 | "quote", | |
|
310 | "unicode-ident", | |
|
311 | ] | |
|
312 | ||
|
313 | [[package]] | |
|
314 | name = "target-lexicon" | |
|
315 | version = "0.12.14" | |
|
316 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
317 | checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f" | |
|
318 | ||
|
319 | [[package]] | |
|
320 | name = "unicode-ident" | |
|
321 | version = "1.0.12" | |
|
322 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
323 | checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" | |
|
324 | ||
|
325 | [[package]] | |
|
326 | name = "unindent" | |
|
327 | version = "0.2.3" | |
|
328 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
329 | checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" | |
|
330 | ||
|
331 | [[package]] | |
|
332 | name = "windows-targets" | |
|
333 | version = "0.52.5" | |
|
334 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
335 | checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" | |
|
336 | dependencies = [ | |
|
337 | "windows_aarch64_gnullvm", | |
|
338 | "windows_aarch64_msvc", | |
|
339 | "windows_i686_gnu", | |
|
340 | "windows_i686_gnullvm", | |
|
341 | "windows_i686_msvc", | |
|
342 | "windows_x86_64_gnu", | |
|
343 | "windows_x86_64_gnullvm", | |
|
344 | "windows_x86_64_msvc", | |
|
345 | ] | |
|
346 | ||
|
347 | [[package]] | |
|
348 | name = "windows_aarch64_gnullvm" | |
|
349 | version = "0.52.5" | |
|
350 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
351 | checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" | |
|
352 | ||
|
353 | [[package]] | |
|
354 | name = "windows_aarch64_msvc" | |
|
355 | version = "0.52.5" | |
|
356 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
357 | checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" | |
|
358 | ||
|
359 | [[package]] | |
|
360 | name = "windows_i686_gnu" | |
|
361 | version = "0.52.5" | |
|
362 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
363 | checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" | |
|
364 | ||
|
365 | [[package]] | |
|
366 | name = "windows_i686_gnullvm" | |
|
367 | version = "0.52.5" | |
|
368 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
369 | checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" | |
|
370 | ||
|
371 | [[package]] | |
|
372 | name = "windows_i686_msvc" | |
|
373 | version = "0.52.5" | |
|
374 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
375 | checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" | |
|
376 | ||
|
377 | [[package]] | |
|
378 | name = "windows_x86_64_gnu" | |
|
379 | version = "0.52.5" | |
|
380 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
381 | checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" | |
|
382 | ||
|
383 | [[package]] | |
|
384 | name = "windows_x86_64_gnullvm" | |
|
385 | version = "0.52.5" | |
|
386 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
387 | checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" | |
|
388 | ||
|
389 | [[package]] | |
|
390 | name = "windows_x86_64_msvc" | |
|
391 | version = "0.52.5" | |
|
392 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
393 | checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" | |
|
394 | ||
|
395 | [[package]] | |
|
396 | name = "zstd-safe" | |
|
397 | version = "7.1.0" | |
|
398 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
399 | checksum = "1cd99b45c6bc03a018c8b8a86025678c87e55526064e38f9df301989dce7ec0a" | |
|
400 | dependencies = [ | |
|
401 | "zstd-sys", | |
|
402 | ] | |
|
403 | ||
|
404 | [[package]] | |
|
405 | name = "zstd-sys" | |
|
406 | version = "2.0.10+zstd.1.5.6" | |
|
407 | source = "registry+https://github.com/rust-lang/crates.io-index" | |
|
408 | checksum = "c253a4914af5bafc8fa8c86ee400827e83cf6ec01195ec1f1ed8441bf00d65aa" | |
|
409 | dependencies = [ | |
|
410 | "cc", | |
|
411 | "pkg-config", | |
|
412 | ] |
@@ -0,0 +1,30 | |||
|
1 | [package] | |
|
2 | name = "python-zstandard" | |
|
3 | version = "0.23.0" | |
|
4 | authors = ["Gregory Szorc <gregory.szorc@gmail.com>"] | |
|
5 | edition = "2021" | |
|
6 | license = "BSD-3-Clause" | |
|
7 | description = "Python bindings to zstandard compression library" | |
|
8 | readme = "README.rst" | |
|
9 | ||
|
10 | [lib] | |
|
11 | name = "backend_rust" | |
|
12 | crate-type = ["cdylib"] | |
|
13 | path = "rust-ext/src/lib.rs" | |
|
14 | ||
|
15 | [dependencies] | |
|
16 | libc = "0.2.155" | |
|
17 | num_cpus = "1.16.0" | |
|
18 | rayon = "1.10.0" | |
|
19 | ||
|
20 | [dependencies.zstd-safe] | |
|
21 | version = "7.1.0" | |
|
22 | features = ["experimental", "legacy", "zstdmt"] | |
|
23 | ||
|
24 | [dependencies.zstd-sys] | |
|
25 | version = "2.0.10+zstd.1.5.6" | |
|
26 | features = ["experimental", "legacy", "zstdmt"] | |
|
27 | ||
|
28 | [dependencies.pyo3] | |
|
29 | version = "0.21.2" | |
|
30 | features = ["extension-module"] |
@@ -0,0 +1,353 | |||
|
1 | /** | |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
8 | ||
|
9 | /* A Python C extension for Zstandard. */ | |
|
10 | ||
|
11 | #if defined(_WIN32) | |
|
12 | #define WIN32_LEAN_AND_MEAN | |
|
13 | #include <Windows.h> | |
|
14 | #elif defined(__APPLE__) || defined(__OpenBSD__) || defined(__FreeBSD__) || \ | |
|
15 | defined(__NetBSD__) || defined(__DragonFly__) | |
|
16 | #include <sys/types.h> | |
|
17 | ||
|
18 | #include <sys/sysctl.h> | |
|
19 | ||
|
20 | #endif | |
|
21 | ||
|
22 | #include "python-zstandard.h" | |
|
23 | ||
|
24 | #include "bufferutil.c" | |
|
25 | #include "compressionchunker.c" | |
|
26 | #include "compressiondict.c" | |
|
27 | #include "compressionparams.c" | |
|
28 | #include "compressionreader.c" | |
|
29 | #include "compressionwriter.c" | |
|
30 | #include "compressobj.c" | |
|
31 | #include "compressor.c" | |
|
32 | #include "compressoriterator.c" | |
|
33 | #include "constants.c" | |
|
34 | #include "decompressionreader.c" | |
|
35 | #include "decompressionwriter.c" | |
|
36 | #include "decompressobj.c" | |
|
37 | #include "decompressor.c" | |
|
38 | #include "decompressoriterator.c" | |
|
39 | #include "frameparams.c" | |
|
40 | ||
|
41 | PyObject *ZstdError; | |
|
42 | ||
|
43 | static PyObject *estimate_decompression_context_size(PyObject *self) { | |
|
44 | return PyLong_FromSize_t(ZSTD_estimateDCtxSize()); | |
|
45 | } | |
|
46 | ||
|
47 | static PyObject *frame_content_size(PyObject *self, PyObject *args, | |
|
48 | PyObject *kwargs) { | |
|
49 | static char *kwlist[] = {"source", NULL}; | |
|
50 | ||
|
51 | Py_buffer source; | |
|
52 | PyObject *result = NULL; | |
|
53 | unsigned long long size; | |
|
54 | ||
|
55 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_content_size", | |
|
56 | kwlist, &source)) { | |
|
57 | return NULL; | |
|
58 | } | |
|
59 | ||
|
60 | size = ZSTD_getFrameContentSize(source.buf, source.len); | |
|
61 | ||
|
62 | if (size == ZSTD_CONTENTSIZE_ERROR) { | |
|
63 | PyErr_SetString(ZstdError, "error when determining content size"); | |
|
64 | } | |
|
65 | else if (size == ZSTD_CONTENTSIZE_UNKNOWN) { | |
|
66 | result = PyLong_FromLong(-1); | |
|
67 | } | |
|
68 | else { | |
|
69 | result = PyLong_FromUnsignedLongLong(size); | |
|
70 | } | |
|
71 | ||
|
72 | PyBuffer_Release(&source); | |
|
73 | ||
|
74 | return result; | |
|
75 | } | |
|
76 | ||
|
77 | static PyObject *frame_header_size(PyObject *self, PyObject *args, | |
|
78 | PyObject *kwargs) { | |
|
79 | static char *kwlist[] = {"source", NULL}; | |
|
80 | ||
|
81 | Py_buffer source; | |
|
82 | PyObject *result = NULL; | |
|
83 | size_t zresult; | |
|
84 | ||
|
85 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_header_size", | |
|
86 | kwlist, &source)) { | |
|
87 | return NULL; | |
|
88 | } | |
|
89 | ||
|
90 | zresult = ZSTD_frameHeaderSize(source.buf, source.len); | |
|
91 | if (ZSTD_isError(zresult)) { | |
|
92 | PyErr_Format(ZstdError, "could not determine frame header size: %s", | |
|
93 | ZSTD_getErrorName(zresult)); | |
|
94 | } | |
|
95 | else { | |
|
96 | result = PyLong_FromSize_t(zresult); | |
|
97 | } | |
|
98 | ||
|
99 | PyBuffer_Release(&source); | |
|
100 | ||
|
101 | return result; | |
|
102 | } | |
|
103 | ||
|
104 | static char zstd_doc[] = "Interface to zstandard"; | |
|
105 | ||
|
106 | static PyMethodDef zstd_methods[] = { | |
|
107 | {"estimate_decompression_context_size", | |
|
108 | (PyCFunction)estimate_decompression_context_size, METH_NOARGS, NULL}, | |
|
109 | {"frame_content_size", (PyCFunction)frame_content_size, | |
|
110 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
111 | {"frame_header_size", (PyCFunction)frame_header_size, | |
|
112 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
113 | {"get_frame_parameters", (PyCFunction)get_frame_parameters, | |
|
114 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
115 | {"train_dictionary", (PyCFunction)train_dictionary, | |
|
116 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
117 | {NULL, NULL}}; | |
|
118 | ||
|
119 | void bufferutil_module_init(PyObject *mod); | |
|
120 | void compressobj_module_init(PyObject *mod); | |
|
121 | void compressor_module_init(PyObject *mod); | |
|
122 | void compressionparams_module_init(PyObject *mod); | |
|
123 | void constants_module_init(PyObject *mod); | |
|
124 | void compressionchunker_module_init(PyObject *mod); | |
|
125 | void compressiondict_module_init(PyObject *mod); | |
|
126 | void compressionreader_module_init(PyObject *mod); | |
|
127 | void compressionwriter_module_init(PyObject *mod); | |
|
128 | void compressoriterator_module_init(PyObject *mod); | |
|
129 | void decompressor_module_init(PyObject *mod); | |
|
130 | void decompressobj_module_init(PyObject *mod); | |
|
131 | void decompressionreader_module_init(PyObject *mod); | |
|
132 | void decompressionwriter_module_init(PyObject *mod); | |
|
133 | void decompressoriterator_module_init(PyObject *mod); | |
|
134 | void frameparams_module_init(PyObject *mod); | |
|
135 | ||
|
136 | void zstd_module_init(PyObject *m) { | |
|
137 | /* python-zstandard relies on unstable zstd C API features. This means | |
|
138 | that changes in zstd may break expectations in python-zstandard. | |
|
139 | ||
|
140 | python-zstandard is distributed with a copy of the zstd sources. | |
|
141 | python-zstandard is only guaranteed to work with the bundled version | |
|
142 | of zstd. | |
|
143 | ||
|
144 | However, downstream redistributors or packagers may unbundle zstd | |
|
145 | from python-zstandard. This can result in a mismatch between zstd | |
|
146 | versions and API semantics. This essentially "voids the warranty" | |
|
147 | of python-zstandard and may cause undefined behavior. | |
|
148 | ||
|
149 | We detect this mismatch here and refuse to load the module if this | |
|
150 | scenario is detected. | |
|
151 | */ | |
|
152 | PyObject *features = NULL; | |
|
153 | PyObject *feature = NULL; | |
|
154 | unsigned zstd_ver_no = ZSTD_versionNumber(); | |
|
155 | unsigned our_hardcoded_version = 10506; | |
|
156 | if (ZSTD_VERSION_NUMBER != our_hardcoded_version || | |
|
157 | zstd_ver_no != our_hardcoded_version) { | |
|
158 | PyErr_Format( | |
|
159 | PyExc_ImportError, | |
|
160 | "zstd C API versions mismatch; Python bindings were not " | |
|
161 | "compiled/linked against expected zstd version (%u returned by the " | |
|
162 | "lib, %u hardcoded in zstd headers, %u hardcoded in the cext)", | |
|
163 | zstd_ver_no, ZSTD_VERSION_NUMBER, our_hardcoded_version); | |
|
164 | return; | |
|
165 | } | |
|
166 | ||
|
167 | features = PySet_New(NULL); | |
|
168 | if (NULL == features) { | |
|
169 | PyErr_SetString(PyExc_ImportError, "could not create empty set"); | |
|
170 | return; | |
|
171 | } | |
|
172 | ||
|
173 | feature = PyUnicode_FromString("buffer_types"); | |
|
174 | if (NULL == feature) { | |
|
175 | PyErr_SetString(PyExc_ImportError, "could not create feature string"); | |
|
176 | return; | |
|
177 | } | |
|
178 | ||
|
179 | if (PySet_Add(features, feature) == -1) { | |
|
180 | return; | |
|
181 | } | |
|
182 | ||
|
183 | Py_DECREF(feature); | |
|
184 | ||
|
185 | #ifdef HAVE_ZSTD_POOL_APIS | |
|
186 | feature = PyUnicode_FromString("multi_compress_to_buffer"); | |
|
187 | if (NULL == feature) { | |
|
188 | PyErr_SetString(PyExc_ImportError, "could not create feature string"); | |
|
189 | return; | |
|
190 | } | |
|
191 | ||
|
192 | if (PySet_Add(features, feature) == -1) { | |
|
193 | return; | |
|
194 | } | |
|
195 | ||
|
196 | Py_DECREF(feature); | |
|
197 | #endif | |
|
198 | ||
|
199 | #ifdef HAVE_ZSTD_POOL_APIS | |
|
200 | feature = PyUnicode_FromString("multi_decompress_to_buffer"); | |
|
201 | if (NULL == feature) { | |
|
202 | PyErr_SetString(PyExc_ImportError, "could not create feature string"); | |
|
203 | return; | |
|
204 | } | |
|
205 | ||
|
206 | if (PySet_Add(features, feature) == -1) { | |
|
207 | return; | |
|
208 | } | |
|
209 | ||
|
210 | Py_DECREF(feature); | |
|
211 | #endif | |
|
212 | ||
|
213 | if (PyObject_SetAttrString(m, "backend_features", features) == -1) { | |
|
214 | return; | |
|
215 | } | |
|
216 | ||
|
217 | Py_DECREF(features); | |
|
218 | ||
|
219 | bufferutil_module_init(m); | |
|
220 | compressionparams_module_init(m); | |
|
221 | compressiondict_module_init(m); | |
|
222 | compressobj_module_init(m); | |
|
223 | compressor_module_init(m); | |
|
224 | compressionchunker_module_init(m); | |
|
225 | compressionreader_module_init(m); | |
|
226 | compressionwriter_module_init(m); | |
|
227 | compressoriterator_module_init(m); | |
|
228 | constants_module_init(m); | |
|
229 | decompressor_module_init(m); | |
|
230 | decompressobj_module_init(m); | |
|
231 | decompressionreader_module_init(m); | |
|
232 | decompressionwriter_module_init(m); | |
|
233 | decompressoriterator_module_init(m); | |
|
234 | frameparams_module_init(m); | |
|
235 | } | |
|
236 | ||
|
237 | #if defined(__GNUC__) && (__GNUC__ >= 4) | |
|
238 | #define PYTHON_ZSTD_VISIBILITY __attribute__((visibility("default"))) | |
|
239 | #else | |
|
240 | #define PYTHON_ZSTD_VISIBILITY | |
|
241 | #endif | |
|
242 | ||
|
243 | static struct PyModuleDef zstd_module = {PyModuleDef_HEAD_INIT, "zstd", | |
|
244 | zstd_doc, -1, zstd_methods}; | |
|
245 | ||
|
246 | PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC PyInit_zstd(void) { | |
|
247 | PyObject *m = PyModule_Create(&zstd_module); | |
|
248 | if (m) { | |
|
249 | zstd_module_init(m); | |
|
250 | if (PyErr_Occurred()) { | |
|
251 | Py_DECREF(m); | |
|
252 | m = NULL; | |
|
253 | } | |
|
254 | } | |
|
255 | return m; | |
|
256 | } | |
|
257 | ||
|
258 | /* Attempt to resolve the number of CPUs in the system. */ | |
|
259 | int cpu_count() { | |
|
260 | int count = 0; | |
|
261 | ||
|
262 | #if defined(_WIN32) | |
|
263 | SYSTEM_INFO si; | |
|
264 | si.dwNumberOfProcessors = 0; | |
|
265 | GetSystemInfo(&si); | |
|
266 | count = si.dwNumberOfProcessors; | |
|
267 | #elif defined(__APPLE__) | |
|
268 | int num; | |
|
269 | size_t size = sizeof(int); | |
|
270 | ||
|
271 | if (0 == sysctlbyname("hw.logicalcpu", &num, &size, NULL, 0)) { | |
|
272 | count = num; | |
|
273 | } | |
|
274 | #elif defined(__linux__) | |
|
275 | count = sysconf(_SC_NPROCESSORS_ONLN); | |
|
276 | #elif defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || \ | |
|
277 | defined(__DragonFly__) | |
|
278 | int mib[2]; | |
|
279 | size_t len = sizeof(count); | |
|
280 | mib[0] = CTL_HW; | |
|
281 | mib[1] = HW_NCPU; | |
|
282 | if (0 != sysctl(mib, 2, &count, &len, NULL, 0)) { | |
|
283 | count = 0; | |
|
284 | } | |
|
285 | #elif defined(__hpux) | |
|
286 | count = mpctl(MPC_GETNUMSPUS, NULL, NULL); | |
|
287 | #endif | |
|
288 | ||
|
289 | return count; | |
|
290 | } | |
|
291 | ||
|
292 | size_t roundpow2(size_t i) { | |
|
293 | i--; | |
|
294 | i |= i >> 1; | |
|
295 | i |= i >> 2; | |
|
296 | i |= i >> 4; | |
|
297 | i |= i >> 8; | |
|
298 | i |= i >> 16; | |
|
299 | i++; | |
|
300 | ||
|
301 | return i; | |
|
302 | } | |
|
303 | ||
|
304 | /* Safer version of _PyBytes_Resize(). | |
|
305 | * | |
|
306 | * _PyBytes_Resize() only works if the refcount is 1. In some scenarios, | |
|
307 | * we can get an object with a refcount > 1, even if it was just created | |
|
308 | * with PyBytes_FromStringAndSize()! That's because (at least) CPython | |
|
309 | * pre-allocates PyBytes instances of size 1 for every possible byte value. | |
|
310 | * | |
|
311 | * If non-0 is returned, obj may or may not be NULL. | |
|
312 | */ | |
|
313 | int safe_pybytes_resize(PyObject **obj, Py_ssize_t size) { | |
|
314 | PyObject *tmp; | |
|
315 | ||
|
316 | if ((*obj)->ob_refcnt == 1) { | |
|
317 | return _PyBytes_Resize(obj, size); | |
|
318 | } | |
|
319 | ||
|
320 | tmp = PyBytes_FromStringAndSize(NULL, size); | |
|
321 | if (!tmp) { | |
|
322 | return -1; | |
|
323 | } | |
|
324 | ||
|
325 | memcpy(PyBytes_AS_STRING(tmp), PyBytes_AS_STRING(*obj), | |
|
326 | PyBytes_GET_SIZE(*obj)); | |
|
327 | ||
|
328 | Py_DECREF(*obj); | |
|
329 | *obj = tmp; | |
|
330 | ||
|
331 | return 0; | |
|
332 | } | |
|
333 | ||
|
334 | // Set/raise an `io.UnsupportedOperation` exception. | |
|
335 | void set_io_unsupported_operation(void) { | |
|
336 | PyObject *iomod; | |
|
337 | PyObject *exc; | |
|
338 | ||
|
339 | iomod = PyImport_ImportModule("io"); | |
|
340 | if (NULL == iomod) { | |
|
341 | return; | |
|
342 | } | |
|
343 | ||
|
344 | exc = PyObject_GetAttrString(iomod, "UnsupportedOperation"); | |
|
345 | if (NULL == exc) { | |
|
346 | Py_DECREF(iomod); | |
|
347 | return; | |
|
348 | } | |
|
349 | ||
|
350 | PyErr_SetNone(exc); | |
|
351 | Py_DECREF(exc); | |
|
352 | Py_DECREF(iomod); | |
|
353 | } |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
@@ -273,6 +273,8 9751b9ccd74d8386687f88fbdfe280877840ec7d | |||
|
273 | 273 | b964f92261d4fbb64f19aa6af2b072f7730b913a 0 iQHNBAABCgA3FiEEH2b4zfZU6QXBHaBhoR4BzQ4F2VYFAme2VVUZHGFscGhhcmVAcmFwaGFlbGdvbWVzLmRldgAKCRChHgHNDgXZVn3aDACSMVaJexSgl1UfjBAKjwaF4t9Y2pBKnYibahXmddViwhhIISPzeVtvaM9y/4Cm4SP11S6PQ356aiZ3RjhtQbmRHQJe5cXGkBaxykIxLSC/KgDy9HXHDDATwvo+aF/QVBX8ig/cr0NdVpwtvQq7+rkDNfbObwu8pPIbZGqOoNM1ND2Kz6P+FqbNZfGPwLP/AaCtCl2dXcf/Z774JUsAEZ6InqvP1/m/atAG7phesXhem8cpPb6e8LohuiJpnbV2rUj7SEqk0eF2BRapSukSZC2vxdqsy4hcXO1uwJ3V3GPtegpdMG25OE3ALy/2WKoh4inJV+WfJy1+DEiSdP++Rpadv/By68WIBvWY/rKgWAYPqIE5IKH5CtcZkkFMtfoooFGiz7uvci5+ZaetZnHVPm9FZH3KZsNccsESDkT25I+rwynqt8LKt1qEA+Ur43U6ipG+LZxT7sOGGYYElU6cSoSIcrcMUfsbi0XhgpnZch4QwjoMyzWnXgcjnivnn3arMkw= |
|
274 | 274 | 89ab2459f62ac8da3eb4f3ee2120d1814ce805d5 0 iQJTBAABCgA9FiEE7SE+SGsjJJvcEHtZRcqpKnHKAnsFAmfIwvYfHHBpZXJyZS15dmVzLmRhdmlkQGVucy1seW9uLm9yZwAKCRBFyqkqccoCezXwD/9bb5PG/f9WN/7XgzumLCatokHgAGWTNLmEmP414yT5UAf/hyiMbzUwO9TGKbe8s1UPrlMDfp+zoP/wMc6UrT4vXKwOTbfANyUsZ/T67m4BxyTi+fke5a4Ghb9OsV/qMiwmO6jLC4hk24dodgmZwL/H6JFgsPEHTElSCa9sVv6LZ9NZwu5xn3JgjkP34/l7niMIhOSyfHNOpF4rfWcc8bbojxNHru8nzPx8OVBLVHnidh0dM1D3dkbkRKuxJftzVNkgaw+kI3mpAKntInrVZfqCmrni6biY/GF2Lr+sYA/rWXsdEhZG3EtysP0UhmUYRryKJeLPDZsWIaTqJVPLYRe38DNpfNxQYRUo3mZkbbE0kf389jSsRyVKIK2UYBEKGRg98BHq3Urvdn9lu9s725gfgJMeEsZRt/ljTbmdbpxMUVIFaB0Paw+PlzjTklxlFZCxEnBI34yhHxXNXfQgLvNbokPqF4T5Y5pCWplULlaCBosqu1EGHDxv+o+k2/NECE4RGnv9vON6sweADdCgL3yDlFn7PW0eudQlpnf2/mIzCivFL1BKyUJb1XcFwAQFHRJDyBdaWHtYAswkZMihMVysI8GfjePMpVF2Q4O5osYTPtIXAX92HXJwsp1jGaaP6g2Ipy9v04Q8RH33Nk5i/TEy54jJu7e435wrVhyCnHKntw== |
|
275 | 275 | 4654407c6df8510f8c84741c61b905d71f6adc8f 0 iQJTBAABCgA9FiEE7SE+SGsjJJvcEHtZRcqpKnHKAnsFAmfaBCYfHHBpZXJyZS15dmVzLmRhdmlkQGVucy1seW9uLm9yZwAKCRBFyqkqccoCe8lFD/411juPs3B0x9OBhEK8EChaieu7q8U8hL6pHQ64p7J2j+OORuvVtemsFZ2jvQ53ERX+eSOy+O1B6vZS9W+4scEoGFrPksNaiKQAlBnnEncePv2RJgBP2BNzlp2VYJgL0u7gbTWLeJcrqEZ+4JDmhrYlTPdZ21/cSUbX+kEShpO+tl10cEdmuZLxyu+/0uiSKYQ076MvNPoWL/75Y67kIRBC9Wufufhec0BneMwBF1vvpOEYrF33tJS2AZk+6YHZc7jOic1yMTadrXz+roFkDBsKLjJRk5W8d8f/IMKev0O4fk3m/s4nOFvWPFxGejCbBC3Aj1nyvUjS5HiIa4zBUw1mJl1Ws55xH+tTyNweSdl2v78xYynThpfe7a80zW9/CtGmcuG7CgMW/aq4TlxtMUd74uQowGHQw9vSJT1dWFASud4JPzrpnSTzUXgd2GBYwvhz0hdZdJfY0+eoNI2yz3T+sGEf0wmRF8iVZ1qXInIe7/qusMG93L9sl1H9SebHu/OlTQJLyGxqeidWZintRT6hzsbTYew6cK+Eevpg0TTMFibovjlINRHwwkgYv9/iU/+vlN176nuyywnNpkqv14Xdfpf26qPl6uDHH3JLRree1y4L/2due8hr9vfrNYq9+Cq6FJgjQsSTiJHUYjRKElMDkIkjQPXyMzW1yOk7n6LBsw== |
|
276 | 3d34a380ea3aecef2db1a468d487f675ae0b45f5 0 iQHNBAABCgA3FiEEH2b4zfZU6QXBHaBhoR4BzQ4F2VYFAmfujfsZHGFscGhhcmVAcmFwaGFlbGdvbWVzLmRldgAKCRChHgHNDgXZVslIC/4s39YHv+1dmkU1X1b+VU5G1XpnbHwA0sVymbKuDhlDjbacZhwTamLF8bJioiJhNlPEpbURnuh8ndaldrc4w2MdOLBGpKhPC+ixlEYh3eQqidyzy90xQKdOdilq4QJZ0CcIXpJV8SkgV0i+WQHT5zg3SRhder+NYv4Lu+XFmZ/FSp6Q5r6uyc4+/RIvnU7x+G/AMpeWec9EcruZ8cAuEZQ4yWdqsYXrya6JEfEJdmfW5Wb3ZlF6vAbIgoLlZApFM4Ld1C8wlhdjjX2X7CJmFWb2zAmENaF+6JhhTagkSGA1xapZCi5Ldd2XA1w+qBhAAGdmuTCISL4BSeQVg6pEWDd8QHNl1O0d/acY6yy4sRO8iP6hHbngs7Dd0j06canxHKEq4E9DW6hues7kmKzetol8k9VVk5KaAHvW2rjMDm9PTGY4S29mlU69PIbe2GgCVkhk0CXV6w+XKSdXK7N5cuJnjyEcIYw6Sp4bPJ3YfKF4wcvyueXzPC8glNHgb5cGi/I= | |
|
276 | 277 | ed68f9a47a090cc6450a7d5617baf123ba5fc42a 0 iQJTBAABCgA9FiEE7SE+SGsjJJvcEHtZRcqpKnHKAnsFAmfCIFcfHHBpZXJyZS15dmVzLmRhdmlkQGVucy1seW9uLm9yZwAKCRBFyqkqccoCeyueD/94hn/kb5U9hvtQlUm0el+rV+DAHcoS5GzF/V3Q3/ibpcym6c/JaFdi2S0+kNpam5DAaZBQpQj43YHJUehnMqpZ8XxZuFrhgVA95AQhjKtXualFcnfAnqrT7zdLEgKLHQ3qW6tU0djflkLnv1MDJmQXRKfibVQetgoyctKWzaTSf/V1eOHv0TutV3SEFnymdvMOi0wOmzOJIU/OLE7kWX2kUab1sDWagenoF9nAwDMLTgodLkNcSSTEv7t3iOKoZ8F+OECbGyRSC1bqwuGdL7ndyzXBPycMANYmT2584smRQsqNwpD+TTRNw3uMjqk1tsTkMUJdfaVYpPO2e2rhOZjFxDFfS95jDYLlG8rOwrmr9pYSwA7LV3ZJP9L3LBdcnIMxRF9iyoGLq5zLQgOzwhyDoqKGtTGKa/JnR+Hs6oCQYe12mQAlY797TQUHFUbNf+va0DaJpZzW0MFsUfr+SgL/ruHnOYa2ijejwxHLP38pvAMBSZKvBkW0gLcPS16kvk4XZtbHXeFbRhFiKOxgrkLQyncoFk7N5XNVucUsIsMd2JFwFTpxGrp+TploJw0AC/xAC816XY2iIulKVyZ3VjOI0+oqZRVr4T7fJCzs6bcqYvShMf96J1wS5crKBeTIiTZYJCakj/ql4GvJmSshxGa3flL695HMcqWUcE5Ql1BHgg== |
|
277 | 278 | cb2b2242428df5be87693b8acaac71deafcff2c5 0 iQJTBAABCgA9FiEE7SE+SGsjJJvcEHtZRcqpKnHKAnsFAmfYltMfHHBpZXJyZS15dmVzLmRhdmlkQGVucy1seW9uLm9yZwAKCRBFyqkqccoCe3evD/0QdpG+TO2caznVLGTXm5dccYKLfSdFymoBoZKkzGHqzZgbeT6uw3NubRNthqd++lby9pfzIi4DBV+LUc2/IC+jWBzNKKFIm6OpXXtVc2TRj0MN3Nbhx761QDmguVmWuyqZu8/uAttCbFvf91LE2Oky2FtLfUFDzcWDf0p+FzkfMZaZktQnSi9O3xFfbdA0j6dVmxuYfOIbGlCznxwXgbOaaAjMl6dCaE3MKjgZsrG6lmwFvCY3GVRp255sLm4bjHoGHyEJRsdp74BVzG6tdNJ5HjD6Wd1ATFUbhpE7CmEPZ6a5joCbxkXoOZxAYWF6IBDQeHaITnRL29xc6I15I5sKoCtixepPlJONucYke1BLSxW3nYHpnmeKeVhwEE03es6A1jDKxXZp5CZ54fBcGD/KlZ6Yd0tMaZGoczUpLU0suGU5xLtCTcS9XSEOsC1yt5kl7/A8r0GPaGq6oOOJNPJBXpZ5hQdaUSmM9ilzbSZBMnjd1TlqQq7tsJv471E+YehO5PAf2vqFtfcNSdDhinL7xGqZH5/vMOH0F+lIArleNIQDXsuXDeKz22vFdiBmJBSZPmAuJ2uyiTEaSbFXpDyM/CVL2sKNziWE3UqqTi5e2REdvUfqmAuc7Q9RHWqbt9ERh0bIIdbDgNkn81beX1AADAcUm8MS7mrddcgMMNPx8A== |
|
278 | 279 | 9b285d51e4b6fb01eb9de357092b993311bd1152 0 iQHNBAABCgA3FiEEH2b4zfZU6QXBHaBhoR4BzQ4F2VYFAmfjNHcZHGFscGhhcmVAcmFwaGFlbGdvbWVzLmRldgAKCRChHgHNDgXZVpLhC/9r0GZ2P280J5xao8BH223CXOSNznVgEHOwHqPD172iIV9/4wCfhhuHh5AGoLU46ARp25cy8sobnTXvRojmtu22jJxnq4uBsWkCCrF4bdsHI3GVuJS8oiZLkqdjkEjV1bEbtiBo/C2KPC+m+o5/fW91MgeGng5xjrKULozjQfwNNghQjLDNRqVoMTlQsjNhXyddSEaesfNYz/IUz0zcpfFw7aKlpEZVps0qKu8OcPggHOUEdaTgFBpzT2JxkGukIlVFtQdkDjz5EyNKjFXdTkKmhb6+4u5C+NDXM+d7p5/g3TU5cQa0ScCp1f3NqHl5cMF8axc5FUK+FTq9JOyPSBvFTxPTghc46mMb+4g2CRupigZJnO106saBqKsSZC76T6N0fN0wgUNEsMM0iS5binm6vTcNRHJdjezksWyfuPPmtGVKyGc5zanMY+xNhVASQlXp/nldS1p9iEMxUy4OjC++lrp0efkSnGX/4dJ718cDV78NoRVVG0hXazAgNKIr7nU= |
|
280 | 010a86744bfc6ede2e3ed3ecef3faee0de804f47 0 iQJTBAABCgA9FiEE7SE+SGsjJJvcEHtZRcqpKnHKAnsFAmfySqkfHHBpZXJyZS15dmVzLmRhdmlkQGVucy1seW9uLm9yZwAKCRBFyqkqccoCe5c+D/4z66Y4f8AGos/u9zBzCy2CFQ5sXyK+b8nMmutbaWSqp3FkgVq7/ULqY76eSDI3EMqW6sGUQNOfW7jWJHzW9TMZc8KC/Z4cLN2Q7fSlx0o0b7to2i6Gq4C6UJwExruNm9zL/LO8zBBsaN24SSLYCes2R6EKy0RYNWhn41a5qCEMkH9CYJHJZzpKUmDLO7yePolspkZdcxjOE8TM2MAJ9+S/BOPZmGEHcwP89XnXurYFXA4YA/jC+7v4vWnPEUYPB+YYiKTAig5CisQ3JYdPJm+HLYbrVhANLrTMUDDXMPSU/q0KgS+RdIfbpzNygiHihcuGw6Nw+/w2sdrR8RH1RbsdyCvQYLm3R3xcUM67ciAH6HKwnfn9/8yezsmJrX29w859rcbA+2A+Wqg9dFxQ7VBh9I4JU6Azf5V8pa1b2fmq7jOm2g58Be5cuCWJ9eoD04KYhcFwpfvHWN+MCW8bOQf93+V8fp/HvC7GolCCsqXZwK4usY4uZoRbuXyJ7TXTNOW2VeqQTLR50rM5HTV9zAHoFBK2aX/yWHFm9XvlqHHle6iVpRCWzASPMO69q8CdkJtzPkFu0ultVrmTTiZo2eP0GO9h3EwmEsijqQ2hyhl3Q2pH/FmRfFD9Z2qgCs+i58iQuTFrZUwljtUt/rFMkXmA2CrM92hxUqtpEDgFjIdFtw== |
@@ -289,6 +289,8 9751b9ccd74d8386687f88fbdfe280877840ec7d | |||
|
289 | 289 | b964f92261d4fbb64f19aa6af2b072f7730b913a 6.9.2 |
|
290 | 290 | 89ab2459f62ac8da3eb4f3ee2120d1814ce805d5 6.9.3 |
|
291 | 291 | 4654407c6df8510f8c84741c61b905d71f6adc8f 6.9.4 |
|
292 | 3d34a380ea3aecef2db1a468d487f675ae0b45f5 6.9.5 | |
|
292 | 293 | ed68f9a47a090cc6450a7d5617baf123ba5fc42a 7.0rc0 |
|
293 | 294 | cb2b2242428df5be87693b8acaac71deafcff2c5 7.0rc1 |
|
294 | 295 | 9b285d51e4b6fb01eb9de357092b993311bd1152 7.0 |
|
296 | 010a86744bfc6ede2e3ed3ecef3faee0de804f47 7.0.1 |
@@ -591,6 +591,9 build-c-wheel-macos: | |||
|
591 | 591 | tags: |
|
592 | 592 | - macos |
|
593 | 593 | script: |
|
594 | - sh -c 'which "$PYTHON"' | |
|
595 | - sh -c '"$PYTHON" -V' | |
|
596 | - sh -c 'which cibuildwheel' | |
|
594 | 597 | - PLATFORM=`$PYTHON -c 'import sys; print(sys.platform)'` |
|
595 | 598 | - rm -rf tmp-wheels |
|
596 | 599 | - cibuildwheel --output-dir tmp-wheels/ |
@@ -1,9 +1,10 | |||
|
1 | 1 | graft c-ext |
|
2 | 2 | graft debian |
|
3 | graft rust-ext | |
|
3 | 4 | graft zstd |
|
4 | 5 | graft tests |
|
5 | 6 | include make_cffi.py |
|
6 | 7 | include setup_zstd.py |
|
7 |
include |
|
|
8 | include Cargo.lock | |
|
9 | include Cargo.toml | |
|
8 | 10 | include LICENSE |
|
9 | include NEWS.rst |
This diff has been collapsed as it changes many lines, (1597 lines changed) Show them Hide them | |||
@@ -2,6 +2,8 | |||
|
2 | 2 | python-zstandard |
|
3 | 3 | ================ |
|
4 | 4 | |
|
5 | | |ci-test| |ci-wheel| |ci-typing| |ci-sdist| |ci-anaconda| |ci-sphinx| | |
|
6 | ||
|
5 | 7 | This project provides Python bindings for interfacing with the |
|
6 | 8 | `Zstandard <http://www.zstd.net>`_ compression library. A C extension |
|
7 | 9 | and CFFI interface are provided. |
@@ -11,1592 +13,25 underlying C API through a Pythonic inte | |||
|
11 | 13 | performance. This means exposing most of the features and flexibility |
|
12 | 14 | of the C API while not sacrificing usability or safety that Python provides. |
|
13 | 15 | |
|
14 |
The canonical home for this project |
|
|
15 | the author. For convenience, that repository is frequently synchronized to | |
|
16 | The canonical home for this project is | |
|
16 | 17 | https://github.com/indygreg/python-zstandard. |
|
17 | 18 | |
|
18 | | |ci-status| | |
|
19 | ||
|
20 | Requirements | |
|
21 | ============ | |
|
22 | ||
|
23 | This extension is designed to run with Python 2.7, 3.5, 3.6, 3.7, and 3.8 | |
|
24 | on common platforms (Linux, Windows, and OS X). On PyPy (both PyPy2 and PyPy3) we support version 6.0.0 and above. | |
|
25 | x86 and x86_64 are well-tested on Windows. Only x86_64 is well-tested on Linux and macOS. | |
|
26 | ||
|
27 | Installing | |
|
28 | ========== | |
|
29 | ||
|
30 | This package is uploaded to PyPI at https://pypi.python.org/pypi/zstandard. | |
|
31 | So, to install this package:: | |
|
32 | ||
|
33 | $ pip install zstandard | |
|
34 | ||
|
35 | Binary wheels are made available for some platforms. If you need to | |
|
36 | install from a source distribution, all you should need is a working C | |
|
37 | compiler and the Python development headers/libraries. On many Linux | |
|
38 | distributions, you can install a ``python-dev`` or ``python-devel`` | |
|
39 | package to provide these dependencies. | |
|
40 | ||
|
41 | Packages are also uploaded to Anaconda Cloud at | |
|
42 | https://anaconda.org/indygreg/zstandard. See that URL for how to install | |
|
43 | this package with ``conda``. | |
|
44 | ||
|
45 | Performance | |
|
46 | =========== | |
|
47 | ||
|
48 | zstandard is a highly tunable compression algorithm. In its default settings | |
|
49 | (compression level 3), it will be faster at compression and decompression and | |
|
50 | will have better compression ratios than zlib on most data sets. When tuned | |
|
51 | for speed, it approaches lz4's speed and ratios. When tuned for compression | |
|
52 | ratio, it approaches lzma ratios and compression speed, but decompression | |
|
53 | speed is much faster. See the official zstandard documentation for more. | |
|
54 | ||
|
55 | zstandard and this library support multi-threaded compression. There is a | |
|
56 | mechanism to compress large inputs using multiple threads. | |
|
57 | ||
|
58 | The performance of this library is usually very similar to what the zstandard | |
|
59 | C API can deliver. Overhead in this library is due to general Python overhead | |
|
60 | and can't easily be avoided by *any* zstandard Python binding. This library | |
|
61 | exposes multiple APIs for performing compression and decompression so callers | |
|
62 | can pick an API suitable for their need. Contrast with the compression | |
|
63 | modules in Python's standard library (like ``zlib``), which only offer limited | |
|
64 | mechanisms for performing operations. The API flexibility means consumers can | |
|
65 | choose to use APIs that facilitate zero copying or minimize Python object | |
|
66 | creation and garbage collection overhead. | |
|
67 | ||
|
68 | This library is capable of single-threaded throughputs well over 1 GB/s. For | |
|
69 | exact numbers, measure yourself. The source code repository has a ``bench.py`` | |
|
70 | script that can be used to measure things. | |
|
71 | ||
|
72 | API | |
|
73 | === | |
|
74 | ||
|
75 | To interface with Zstandard, simply import the ``zstandard`` module:: | |
|
76 | ||
|
77 | import zstandard | |
|
78 | ||
|
79 | It is a popular convention to alias the module as a different name for | |
|
80 | brevity:: | |
|
81 | ||
|
82 | import zstandard as zstd | |
|
83 | ||
|
84 | This module attempts to import and use either the C extension or CFFI | |
|
85 | implementation. On Python platforms known to support C extensions (like | |
|
86 | CPython), it raises an ImportError if the C extension cannot be imported. | |
|
87 | On Python platforms known to not support C extensions (like PyPy), it only | |
|
88 | attempts to import the CFFI implementation and raises ImportError if that | |
|
89 | can't be done. On other platforms, it first tries to import the C extension | |
|
90 | then falls back to CFFI if that fails and raises ImportError if CFFI fails. | |
|
91 | ||
|
92 | To change the module import behavior, a ``PYTHON_ZSTANDARD_IMPORT_POLICY`` | |
|
93 | environment variable can be set. The following values are accepted: | |
|
94 | ||
|
95 | default | |
|
96 | The behavior described above. | |
|
97 | cffi_fallback | |
|
98 | Always try to import the C extension then fall back to CFFI if that | |
|
99 | fails. | |
|
100 | cext | |
|
101 | Only attempt to import the C extension. | |
|
102 | cffi | |
|
103 | Only attempt to import the CFFI implementation. | |
|
104 | ||
|
105 | In addition, the ``zstandard`` module exports a ``backend`` attribute | |
|
106 | containing the string name of the backend being used. It will be one | |
|
107 | of ``cext`` or ``cffi`` (for *C extension* and *cffi*, respectively). | |
|
108 | ||
|
109 | The types, functions, and attributes exposed by the ``zstandard`` module | |
|
110 | are documented in the sections below. | |
|
111 | ||
|
112 | .. note:: | |
|
113 | ||
|
114 | The documentation in this section makes references to various zstd | |
|
115 | concepts and functionality. The source repository contains a | |
|
116 | ``docs/concepts.rst`` file explaining these in more detail. | |
|
117 | ||
|
118 | ZstdCompressor | |
|
119 | -------------- | |
|
120 | ||
|
121 | The ``ZstdCompressor`` class provides an interface for performing | |
|
122 | compression operations. Each instance is essentially a wrapper around a | |
|
123 | ``ZSTD_CCtx`` from the C API. | |
|
124 | ||
|
125 | Each instance is associated with parameters that control compression | |
|
126 | behavior. These come from the following named arguments (all optional): | |
|
127 | ||
|
128 | level | |
|
129 | Integer compression level. Valid values are between 1 and 22. | |
|
130 | dict_data | |
|
131 | Compression dictionary to use. | |
|
132 | ||
|
133 | Note: When using dictionary data and ``compress()`` is called multiple | |
|
134 | times, the ``ZstdCompressionParameters`` derived from an integer | |
|
135 | compression ``level`` and the first compressed data's size will be reused | |
|
136 | for all subsequent operations. This may not be desirable if source data | |
|
137 | size varies significantly. | |
|
138 | compression_params | |
|
139 | A ``ZstdCompressionParameters`` instance defining compression settings. | |
|
140 | write_checksum | |
|
141 | Whether a 4 byte checksum should be written with the compressed data. | |
|
142 | Defaults to False. If True, the decompressor can verify that decompressed | |
|
143 | data matches the original input data. | |
|
144 | write_content_size | |
|
145 | Whether the size of the uncompressed data will be written into the | |
|
146 | header of compressed data. Defaults to True. The data will only be | |
|
147 | written if the compressor knows the size of the input data. This is | |
|
148 | often not true for streaming compression. | |
|
149 | write_dict_id | |
|
150 | Whether to write the dictionary ID into the compressed data. | |
|
151 | Defaults to True. The dictionary ID is only written if a dictionary | |
|
152 | is being used. | |
|
153 | threads | |
|
154 | Enables and sets the number of threads to use for multi-threaded compression | |
|
155 | operations. Defaults to 0, which means to use single-threaded compression. | |
|
156 | Negative values will resolve to the number of logical CPUs in the system. | |
|
157 | Read below for more info on multi-threaded compression. This argument only | |
|
158 | controls thread count for operations that operate on individual pieces of | |
|
159 | data. APIs that spawn multiple threads for working on multiple pieces of | |
|
160 | data have their own ``threads`` argument. | |
|
161 | ||
|
162 | ``compression_params`` is mutually exclusive with ``level``, ``write_checksum``, | |
|
163 | ``write_content_size``, ``write_dict_id``, and ``threads``. | |
|
164 | ||
|
165 | Unless specified otherwise, assume that no two methods of ``ZstdCompressor`` | |
|
166 | instances can be called from multiple Python threads simultaneously. In other | |
|
167 | words, assume instances are not thread safe unless stated otherwise. | |
|
168 | ||
|
169 | Utility Methods | |
|
170 | ^^^^^^^^^^^^^^^ | |
|
171 | ||
|
172 | ``frame_progression()`` returns a 3-tuple containing the number of bytes | |
|
173 | ingested, consumed, and produced by the current compression operation. | |
|
174 | ||
|
175 | ``memory_size()`` obtains the memory utilization of the underlying zstd | |
|
176 | compression context, in bytes.:: | |
|
177 | ||
|
178 | cctx = zstd.ZstdCompressor() | |
|
179 | memory = cctx.memory_size() | |
|
180 | ||
|
181 | Simple API | |
|
182 | ^^^^^^^^^^ | |
|
183 | ||
|
184 | ``compress(data)`` compresses and returns data as a one-shot operation.:: | |
|
185 | ||
|
186 | cctx = zstd.ZstdCompressor() | |
|
187 | compressed = cctx.compress(b'data to compress') | |
|
188 | ||
|
189 | The ``data`` argument can be any object that implements the *buffer protocol*. | |
|
190 | ||
|
191 | Stream Reader API | |
|
192 | ^^^^^^^^^^^^^^^^^ | |
|
193 | ||
|
194 | ``stream_reader(source)`` can be used to obtain an object conforming to the | |
|
195 | ``io.RawIOBase`` interface for reading compressed output as a stream:: | |
|
196 | ||
|
197 | with open(path, 'rb') as fh: | |
|
198 | cctx = zstd.ZstdCompressor() | |
|
199 | reader = cctx.stream_reader(fh) | |
|
200 | while True: | |
|
201 | chunk = reader.read(16384) | |
|
202 | if not chunk: | |
|
203 | break | |
|
204 | ||
|
205 | # Do something with compressed chunk. | |
|
206 | ||
|
207 | Instances can also be used as context managers:: | |
|
208 | ||
|
209 | with open(path, 'rb') as fh: | |
|
210 | with cctx.stream_reader(fh) as reader: | |
|
211 | while True: | |
|
212 | chunk = reader.read(16384) | |
|
213 | if not chunk: | |
|
214 | break | |
|
215 | ||
|
216 | # Do something with compressed chunk. | |
|
217 | ||
|
218 | When the context manager exits or ``close()`` is called, the stream is closed, | |
|
219 | underlying resources are released, and future operations against the compression | |
|
220 | stream will fail. | |
|
221 | ||
|
222 | The ``source`` argument to ``stream_reader()`` can be any object with a | |
|
223 | ``read(size)`` method or any object implementing the *buffer protocol*. | |
|
224 | ||
|
225 | ``stream_reader()`` accepts a ``size`` argument specifying how large the input | |
|
226 | stream is. This is used to adjust compression parameters so they are | |
|
227 | tailored to the source size.:: | |
|
228 | ||
|
229 | with open(path, 'rb') as fh: | |
|
230 | cctx = zstd.ZstdCompressor() | |
|
231 | with cctx.stream_reader(fh, size=os.stat(path).st_size) as reader: | |
|
232 | ... | |
|
233 | ||
|
234 | If the ``source`` is a stream, you can specify how large ``read()`` requests | |
|
235 | to that stream should be via the ``read_size`` argument. It defaults to | |
|
236 | ``zstandard.COMPRESSION_RECOMMENDED_INPUT_SIZE``.:: | |
|
237 | ||
|
238 | with open(path, 'rb') as fh: | |
|
239 | cctx = zstd.ZstdCompressor() | |
|
240 | # Will perform fh.read(8192) when obtaining data to feed into the | |
|
241 | # compressor. | |
|
242 | with cctx.stream_reader(fh, read_size=8192) as reader: | |
|
243 | ... | |
|
244 | ||
|
245 | The stream returned by ``stream_reader()`` is neither writable nor seekable | |
|
246 | (even if the underlying source is seekable). ``readline()`` and | |
|
247 | ``readlines()`` are not implemented because they don't make sense for | |
|
248 | compressed data. ``tell()`` returns the number of compressed bytes | |
|
249 | emitted so far. | |
|
250 | ||
|
251 | Streaming Input API | |
|
252 | ^^^^^^^^^^^^^^^^^^^ | |
|
253 | ||
|
254 | ``stream_writer(fh)`` allows you to *stream* data into a compressor. | |
|
255 | ||
|
256 | Returned instances implement the ``io.RawIOBase`` interface. Only methods | |
|
257 | that involve writing will do useful things. | |
|
258 | ||
|
259 | The argument to ``stream_writer()`` must have a ``write(data)`` method. As | |
|
260 | compressed data is available, ``write()`` will be called with the compressed | |
|
261 | data as its argument. Many common Python types implement ``write()``, including | |
|
262 | open file handles and ``io.BytesIO``. | |
|
263 | ||
|
264 | The ``write(data)`` method is used to feed data into the compressor. | |
|
265 | ||
|
266 | The ``flush([flush_mode=FLUSH_BLOCK])`` method can be called to evict whatever | |
|
267 | data remains within the compressor's internal state into the output object. This | |
|
268 | may result in 0 or more ``write()`` calls to the output object. This method | |
|
269 | accepts an optional ``flush_mode`` argument to control the flushing behavior. | |
|
270 | Its value can be any of the ``FLUSH_*`` constants. | |
|
271 | ||
|
272 | Both ``write()`` and ``flush()`` return the number of bytes written to the | |
|
273 | object's ``write()``. In many cases, small inputs do not accumulate enough | |
|
274 | data to cause a write and ``write()`` will return ``0``. | |
|
275 | ||
|
276 | Calling ``close()`` will mark the stream as closed and subsequent I/O | |
|
277 | operations will raise ``ValueError`` (per the documented behavior of | |
|
278 | ``io.RawIOBase``). ``close()`` will also call ``close()`` on the underlying | |
|
279 | stream if such a method exists. | |
|
280 | ||
|
281 | Typically usage is as follows:: | |
|
282 | ||
|
283 | cctx = zstd.ZstdCompressor(level=10) | |
|
284 | compressor = cctx.stream_writer(fh) | |
|
285 | ||
|
286 | compressor.write(b'chunk 0\n') | |
|
287 | compressor.write(b'chunk 1\n') | |
|
288 | compressor.flush() | |
|
289 | # Receiver will be able to decode ``chunk 0\nchunk 1\n`` at this point. | |
|
290 | # Receiver is also expecting more data in the zstd *frame*. | |
|
291 | ||
|
292 | compressor.write(b'chunk 2\n') | |
|
293 | compressor.flush(zstd.FLUSH_FRAME) | |
|
294 | # Receiver will be able to decode ``chunk 0\nchunk 1\nchunk 2``. | |
|
295 | # Receiver is expecting no more data, as the zstd frame is closed. | |
|
296 | # Any future calls to ``write()`` at this point will construct a new | |
|
297 | # zstd frame. | |
|
298 | ||
|
299 | Instances can be used as context managers. Exiting the context manager is | |
|
300 | the equivalent of calling ``close()``, which is equivalent to calling | |
|
301 | ``flush(zstd.FLUSH_FRAME)``:: | |
|
302 | ||
|
303 | cctx = zstd.ZstdCompressor(level=10) | |
|
304 | with cctx.stream_writer(fh) as compressor: | |
|
305 | compressor.write(b'chunk 0') | |
|
306 | compressor.write(b'chunk 1') | |
|
307 | ... | |
|
308 | ||
|
309 | .. important:: | |
|
310 | ||
|
311 | If ``flush(FLUSH_FRAME)`` is not called, emitted data doesn't constitute | |
|
312 | a full zstd *frame* and consumers of this data may complain about malformed | |
|
313 | input. It is recommended to use instances as a context manager to ensure | |
|
314 | *frames* are properly finished. | |
|
315 | ||
|
316 | If the size of the data being fed to this streaming compressor is known, | |
|
317 | you can declare it before compression begins:: | |
|
318 | ||
|
319 | cctx = zstd.ZstdCompressor() | |
|
320 | with cctx.stream_writer(fh, size=data_len) as compressor: | |
|
321 | compressor.write(chunk0) | |
|
322 | compressor.write(chunk1) | |
|
323 | ... | |
|
324 | ||
|
325 | Declaring the size of the source data allows compression parameters to | |
|
326 | be tuned. And if ``write_content_size`` is used, it also results in the | |
|
327 | content size being written into the frame header of the output data. | |
|
328 | ||
|
329 | The size of chunks being ``write()`` to the destination can be specified:: | |
|
330 | ||
|
331 | cctx = zstd.ZstdCompressor() | |
|
332 | with cctx.stream_writer(fh, write_size=32768) as compressor: | |
|
333 | ... | |
|
334 | ||
|
335 | To see how much memory is being used by the streaming compressor:: | |
|
336 | ||
|
337 | cctx = zstd.ZstdCompressor() | |
|
338 | with cctx.stream_writer(fh) as compressor: | |
|
339 | ... | |
|
340 | byte_size = compressor.memory_size() | |
|
341 | ||
|
342 | Thte total number of bytes written so far are exposed via ``tell()``:: | |
|
343 | ||
|
344 | cctx = zstd.ZstdCompressor() | |
|
345 | with cctx.stream_writer(fh) as compressor: | |
|
346 | ... | |
|
347 | total_written = compressor.tell() | |
|
348 | ||
|
349 | ``stream_writer()`` accepts a ``write_return_read`` boolean argument to control | |
|
350 | the return value of ``write()``. When ``False`` (the default), ``write()`` returns | |
|
351 | the number of bytes that were ``write()``en to the underlying object. When | |
|
352 | ``True``, ``write()`` returns the number of bytes read from the input that | |
|
353 | were subsequently written to the compressor. ``True`` is the *proper* behavior | |
|
354 | for ``write()`` as specified by the ``io.RawIOBase`` interface and will become | |
|
355 | the default value in a future release. | |
|
356 | ||
|
357 | Streaming Output API | |
|
358 | ^^^^^^^^^^^^^^^^^^^^ | |
|
359 | ||
|
360 | ``read_to_iter(reader)`` provides a mechanism to stream data out of a | |
|
361 | compressor as an iterator of data chunks.:: | |
|
362 | ||
|
363 | cctx = zstd.ZstdCompressor() | |
|
364 | for chunk in cctx.read_to_iter(fh): | |
|
365 | # Do something with emitted data. | |
|
366 | ||
|
367 | ``read_to_iter()`` accepts an object that has a ``read(size)`` method or | |
|
368 | conforms to the buffer protocol. | |
|
369 | ||
|
370 | Uncompressed data is fetched from the source either by calling ``read(size)`` | |
|
371 | or by fetching a slice of data from the object directly (in the case where | |
|
372 | the buffer protocol is being used). The returned iterator consists of chunks | |
|
373 | of compressed data. | |
|
374 | ||
|
375 | If reading from the source via ``read()``, ``read()`` will be called until | |
|
376 | it raises or returns an empty bytes (``b''``). It is perfectly valid for | |
|
377 | the source to deliver fewer bytes than were what requested by ``read(size)``. | |
|
378 | ||
|
379 | Like ``stream_writer()``, ``read_to_iter()`` also accepts a ``size`` argument | |
|
380 | declaring the size of the input stream:: | |
|
381 | ||
|
382 | cctx = zstd.ZstdCompressor() | |
|
383 | for chunk in cctx.read_to_iter(fh, size=some_int): | |
|
384 | pass | |
|
385 | ||
|
386 | You can also control the size that data is ``read()`` from the source and | |
|
387 | the ideal size of output chunks:: | |
|
388 | ||
|
389 | cctx = zstd.ZstdCompressor() | |
|
390 | for chunk in cctx.read_to_iter(fh, read_size=16384, write_size=8192): | |
|
391 | pass | |
|
392 | ||
|
393 | Unlike ``stream_writer()``, ``read_to_iter()`` does not give direct control | |
|
394 | over the sizes of chunks fed into the compressor. Instead, chunk sizes will | |
|
395 | be whatever the object being read from delivers. These will often be of a | |
|
396 | uniform size. | |
|
397 | ||
|
398 | Stream Copying API | |
|
399 | ^^^^^^^^^^^^^^^^^^ | |
|
400 | ||
|
401 | ``copy_stream(ifh, ofh)`` can be used to copy data between 2 streams while | |
|
402 | compressing it.:: | |
|
403 | ||
|
404 | cctx = zstd.ZstdCompressor() | |
|
405 | cctx.copy_stream(ifh, ofh) | |
|
406 | ||
|
407 | For example, say you wish to compress a file:: | |
|
408 | ||
|
409 | cctx = zstd.ZstdCompressor() | |
|
410 | with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh: | |
|
411 | cctx.copy_stream(ifh, ofh) | |
|
19 | For usage documentation, see https://python-zstandard.readthedocs.org/. | |
|
412 | 20 | |
|
413 | It is also possible to declare the size of the source stream:: | |
|
414 | ||
|
415 | cctx = zstd.ZstdCompressor() | |
|
416 | cctx.copy_stream(ifh, ofh, size=len_of_input) | |
|
417 | ||
|
418 | You can also specify how large the chunks that are ``read()`` and ``write()`` | |
|
419 | from and to the streams:: | |
|
420 | ||
|
421 | cctx = zstd.ZstdCompressor() | |
|
422 | cctx.copy_stream(ifh, ofh, read_size=32768, write_size=16384) | |
|
423 | ||
|
424 | The stream copier returns a 2-tuple of bytes read and written:: | |
|
425 | ||
|
426 | cctx = zstd.ZstdCompressor() | |
|
427 | read_count, write_count = cctx.copy_stream(ifh, ofh) | |
|
428 | ||
|
429 | Compressor API | |
|
430 | ^^^^^^^^^^^^^^ | |
|
431 | ||
|
432 | ``compressobj()`` returns an object that exposes ``compress(data)`` and | |
|
433 | ``flush()`` methods. Each returns compressed data or an empty bytes. | |
|
434 | ||
|
435 | The purpose of ``compressobj()`` is to provide an API-compatible interface | |
|
436 | with ``zlib.compressobj``, ``bz2.BZ2Compressor``, etc. This allows callers to | |
|
437 | swap in different compressor objects while using the same API. | |
|
438 | ||
|
439 | ``flush()`` accepts an optional argument indicating how to end the stream. | |
|
440 | ``zstd.COMPRESSOBJ_FLUSH_FINISH`` (the default) ends the compression stream. | |
|
441 | Once this type of flush is performed, ``compress()`` and ``flush()`` can | |
|
442 | no longer be called. This type of flush **must** be called to end the | |
|
443 | compression context. If not called, returned data may be incomplete. | |
|
444 | ||
|
445 | A ``zstd.COMPRESSOBJ_FLUSH_BLOCK`` argument to ``flush()`` will flush a | |
|
446 | zstd block. Flushes of this type can be performed multiple times. The next | |
|
447 | call to ``compress()`` will begin a new zstd block. | |
|
448 | ||
|
449 | Here is how this API should be used:: | |
|
450 | ||
|
451 | cctx = zstd.ZstdCompressor() | |
|
452 | cobj = cctx.compressobj() | |
|
453 | data = cobj.compress(b'raw input 0') | |
|
454 | data = cobj.compress(b'raw input 1') | |
|
455 | data = cobj.flush() | |
|
456 | ||
|
457 | Or to flush blocks:: | |
|
458 | ||
|
459 | cctx.zstd.ZstdCompressor() | |
|
460 | cobj = cctx.compressobj() | |
|
461 | data = cobj.compress(b'chunk in first block') | |
|
462 | data = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK) | |
|
463 | data = cobj.compress(b'chunk in second block') | |
|
464 | data = cobj.flush() | |
|
465 | ||
|
466 | For best performance results, keep input chunks under 256KB. This avoids | |
|
467 | extra allocations for a large output object. | |
|
468 | ||
|
469 | It is possible to declare the input size of the data that will be fed into | |
|
470 | the compressor:: | |
|
471 | ||
|
472 | cctx = zstd.ZstdCompressor() | |
|
473 | cobj = cctx.compressobj(size=6) | |
|
474 | data = cobj.compress(b'foobar') | |
|
475 | data = cobj.flush() | |
|
476 | ||
|
477 | Chunker API | |
|
478 | ^^^^^^^^^^^ | |
|
479 | ||
|
480 | ``chunker(size=None, chunk_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE)`` returns | |
|
481 | an object that can be used to iteratively feed chunks of data into a compressor | |
|
482 | and produce output chunks of a uniform size. | |
|
483 | ||
|
484 | The object returned by ``chunker()`` exposes the following methods: | |
|
485 | ||
|
486 | ``compress(data)`` | |
|
487 | Feeds new input data into the compressor. | |
|
488 | ||
|
489 | ``flush()`` | |
|
490 | Flushes all data currently in the compressor. | |
|
491 | ||
|
492 | ``finish()`` | |
|
493 | Signals the end of input data. No new data can be compressed after this | |
|
494 | method is called. | |
|
495 | ||
|
496 | ``compress()``, ``flush()``, and ``finish()`` all return an iterator of | |
|
497 | ``bytes`` instances holding compressed data. The iterator may be empty. Callers | |
|
498 | MUST iterate through all elements of the returned iterator before performing | |
|
499 | another operation on the object. | |
|
500 | ||
|
501 | All chunks emitted by ``compress()`` will have a length of ``chunk_size``. | |
|
502 | ||
|
503 | ``flush()`` and ``finish()`` may return a final chunk smaller than | |
|
504 | ``chunk_size``. | |
|
505 | ||
|
506 | Here is how the API should be used:: | |
|
507 | ||
|
508 | cctx = zstd.ZstdCompressor() | |
|
509 | chunker = cctx.chunker(chunk_size=32768) | |
|
510 | ||
|
511 | with open(path, 'rb') as fh: | |
|
512 | while True: | |
|
513 | in_chunk = fh.read(32768) | |
|
514 | if not in_chunk: | |
|
515 | break | |
|
516 | ||
|
517 | for out_chunk in chunker.compress(in_chunk): | |
|
518 | # Do something with output chunk of size 32768. | |
|
519 | ||
|
520 | for out_chunk in chunker.finish(): | |
|
521 | # Do something with output chunks that finalize the zstd frame. | |
|
522 | ||
|
523 | The ``chunker()`` API is often a better alternative to ``compressobj()``. | |
|
524 | ||
|
525 | ``compressobj()`` will emit output data as it is available. This results in a | |
|
526 | *stream* of output chunks of varying sizes. The consistency of the output chunk | |
|
527 | size with ``chunker()`` is more appropriate for many usages, such as sending | |
|
528 | compressed data to a socket. | |
|
529 | ||
|
530 | ``compressobj()`` may also perform extra memory reallocations in order to | |
|
531 | dynamically adjust the sizes of the output chunks. Since ``chunker()`` output | |
|
532 | chunks are all the same size (except for flushed or final chunks), there is | |
|
533 | less memory allocation overhead. | |
|
534 | ||
|
535 | Batch Compression API | |
|
536 | ^^^^^^^^^^^^^^^^^^^^^ | |
|
537 | ||
|
538 | (Experimental. Not yet supported in CFFI bindings.) | |
|
539 | ||
|
540 | ``multi_compress_to_buffer(data, [threads=0])`` performs compression of multiple | |
|
541 | inputs as a single operation. | |
|
542 | ||
|
543 | Data to be compressed can be passed as a ``BufferWithSegmentsCollection``, a | |
|
544 | ``BufferWithSegments``, or a list containing byte like objects. Each element of | |
|
545 | the container will be compressed individually using the configured parameters | |
|
546 | on the ``ZstdCompressor`` instance. | |
|
547 | ||
|
548 | The ``threads`` argument controls how many threads to use for compression. The | |
|
549 | default is ``0`` which means to use a single thread. Negative values use the | |
|
550 | number of logical CPUs in the machine. | |
|
551 | ||
|
552 | The function returns a ``BufferWithSegmentsCollection``. This type represents | |
|
553 | N discrete memory allocations, eaching holding 1 or more compressed frames. | |
|
554 | ||
|
555 | Output data is written to shared memory buffers. This means that unlike | |
|
556 | regular Python objects, a reference to *any* object within the collection | |
|
557 | keeps the shared buffer and therefore memory backing it alive. This can have | |
|
558 | undesirable effects on process memory usage. | |
|
559 | ||
|
560 | The API and behavior of this function is experimental and will likely change. | |
|
561 | Known deficiencies include: | |
|
562 | ||
|
563 | * If asked to use multiple threads, it will always spawn that many threads, | |
|
564 | even if the input is too small to use them. It should automatically lower | |
|
565 | the thread count when the extra threads would just add overhead. | |
|
566 | * The buffer allocation strategy is fixed. There is room to make it dynamic, | |
|
567 | perhaps even to allow one output buffer per input, facilitating a variation | |
|
568 | of the API to return a list without the adverse effects of shared memory | |
|
569 | buffers. | |
|
570 | ||
|
571 | ZstdDecompressor | |
|
572 | ---------------- | |
|
573 | ||
|
574 | The ``ZstdDecompressor`` class provides an interface for performing | |
|
575 | decompression. It is effectively a wrapper around the ``ZSTD_DCtx`` type from | |
|
576 | the C API. | |
|
577 | ||
|
578 | Each instance is associated with parameters that control decompression. These | |
|
579 | come from the following named arguments (all optional): | |
|
580 | ||
|
581 | dict_data | |
|
582 | Compression dictionary to use. | |
|
583 | max_window_size | |
|
584 | Sets an uppet limit on the window size for decompression operations in | |
|
585 | kibibytes. This setting can be used to prevent large memory allocations | |
|
586 | for inputs using large compression windows. | |
|
587 | format | |
|
588 | Set the format of data for the decoder. By default, this is | |
|
589 | ``zstd.FORMAT_ZSTD1``. It can be set to ``zstd.FORMAT_ZSTD1_MAGICLESS`` to | |
|
590 | allow decoding frames without the 4 byte magic header. Not all decompression | |
|
591 | APIs support this mode. | |
|
592 | ||
|
593 | The interface of this class is very similar to ``ZstdCompressor`` (by design). | |
|
594 | ||
|
595 | Unless specified otherwise, assume that no two methods of ``ZstdDecompressor`` | |
|
596 | instances can be called from multiple Python threads simultaneously. In other | |
|
597 | words, assume instances are not thread safe unless stated otherwise. | |
|
598 | ||
|
599 | Utility Methods | |
|
600 | ^^^^^^^^^^^^^^^ | |
|
601 | ||
|
602 | ``memory_size()`` obtains the size of the underlying zstd decompression context, | |
|
603 | in bytes.:: | |
|
604 | ||
|
605 | dctx = zstd.ZstdDecompressor() | |
|
606 | size = dctx.memory_size() | |
|
21 | .. |ci-test| image:: https://github.com/indygreg/python-zstandard/workflows/.github/workflows/test.yml/badge.svg | |
|
22 | :target: https://github.com/indygreg/python-zstandard/blob/main/.github/workflows/test.yml | |
|
607 | 23 | |
|
608 | Simple API | |
|
609 | ^^^^^^^^^^ | |
|
610 | ||
|
611 | ``decompress(data)`` can be used to decompress an entire compressed zstd | |
|
612 | frame in a single operation.:: | |
|
613 | ||
|
614 | dctx = zstd.ZstdDecompressor() | |
|
615 | decompressed = dctx.decompress(data) | |
|
616 | ||
|
617 | By default, ``decompress(data)`` will only work on data written with the content | |
|
618 | size encoded in its header (this is the default behavior of | |
|
619 | ``ZstdCompressor().compress()`` but may not be true for streaming compression). If | |
|
620 | compressed data without an embedded content size is seen, ``zstd.ZstdError`` will | |
|
621 | be raised. | |
|
622 | ||
|
623 | If the compressed data doesn't have its content size embedded within it, | |
|
624 | decompression can be attempted by specifying the ``max_output_size`` | |
|
625 | argument.:: | |
|
626 | ||
|
627 | dctx = zstd.ZstdDecompressor() | |
|
628 | uncompressed = dctx.decompress(data, max_output_size=1048576) | |
|
629 | ||
|
630 | Ideally, ``max_output_size`` will be identical to the decompressed output | |
|
631 | size. | |
|
632 | ||
|
633 | If ``max_output_size`` is too small to hold the decompressed data, | |
|
634 | ``zstd.ZstdError`` will be raised. | |
|
635 | ||
|
636 | If ``max_output_size`` is larger than the decompressed data, the allocated | |
|
637 | output buffer will be resized to only use the space required. | |
|
638 | ||
|
639 | Please note that an allocation of the requested ``max_output_size`` will be | |
|
640 | performed every time the method is called. Setting to a very large value could | |
|
641 | result in a lot of work for the memory allocator and may result in | |
|
642 | ``MemoryError`` being raised if the allocation fails. | |
|
643 | ||
|
644 | .. important:: | |
|
645 | ||
|
646 | If the exact size of decompressed data is unknown (not passed in explicitly | |
|
647 | and not stored in the zstandard frame), for performance reasons it is | |
|
648 | encouraged to use a streaming API. | |
|
649 | ||
|
650 | Stream Reader API | |
|
651 | ^^^^^^^^^^^^^^^^^ | |
|
652 | ||
|
653 | ``stream_reader(source)`` can be used to obtain an object conforming to the | |
|
654 | ``io.RawIOBase`` interface for reading decompressed output as a stream:: | |
|
655 | ||
|
656 | with open(path, 'rb') as fh: | |
|
657 | dctx = zstd.ZstdDecompressor() | |
|
658 | reader = dctx.stream_reader(fh) | |
|
659 | while True: | |
|
660 | chunk = reader.read(16384) | |
|
661 | if not chunk: | |
|
662 | break | |
|
663 | ||
|
664 | # Do something with decompressed chunk. | |
|
665 | ||
|
666 | The stream can also be used as a context manager:: | |
|
667 | ||
|
668 | with open(path, 'rb') as fh: | |
|
669 | dctx = zstd.ZstdDecompressor() | |
|
670 | with dctx.stream_reader(fh) as reader: | |
|
671 | ... | |
|
672 | ||
|
673 | When used as a context manager, the stream is closed and the underlying | |
|
674 | resources are released when the context manager exits. Future operations against | |
|
675 | the stream will fail. | |
|
676 | ||
|
677 | The ``source`` argument to ``stream_reader()`` can be any object with a | |
|
678 | ``read(size)`` method or any object implementing the *buffer protocol*. | |
|
679 | ||
|
680 | If the ``source`` is a stream, you can specify how large ``read()`` requests | |
|
681 | to that stream should be via the ``read_size`` argument. It defaults to | |
|
682 | ``zstandard.DECOMPRESSION_RECOMMENDED_INPUT_SIZE``.:: | |
|
683 | ||
|
684 | with open(path, 'rb') as fh: | |
|
685 | dctx = zstd.ZstdDecompressor() | |
|
686 | # Will perform fh.read(8192) when obtaining data for the decompressor. | |
|
687 | with dctx.stream_reader(fh, read_size=8192) as reader: | |
|
688 | ... | |
|
689 | ||
|
690 | The stream returned by ``stream_reader()`` is not writable. | |
|
691 | ||
|
692 | The stream returned by ``stream_reader()`` is *partially* seekable. | |
|
693 | Absolute and relative positions (``SEEK_SET`` and ``SEEK_CUR``) forward | |
|
694 | of the current position are allowed. Offsets behind the current read | |
|
695 | position and offsets relative to the end of stream are not allowed and | |
|
696 | will raise ``ValueError`` if attempted. | |
|
697 | ||
|
698 | ``tell()`` returns the number of decompressed bytes read so far. | |
|
699 | ||
|
700 | Not all I/O methods are implemented. Notably missing is support for | |
|
701 | ``readline()``, ``readlines()``, and linewise iteration support. This is | |
|
702 | because streams operate on binary data - not text data. If you want to | |
|
703 | convert decompressed output to text, you can chain an ``io.TextIOWrapper`` | |
|
704 | to the stream:: | |
|
705 | ||
|
706 | with open(path, 'rb') as fh: | |
|
707 | dctx = zstd.ZstdDecompressor() | |
|
708 | stream_reader = dctx.stream_reader(fh) | |
|
709 | text_stream = io.TextIOWrapper(stream_reader, encoding='utf-8') | |
|
710 | ||
|
711 | for line in text_stream: | |
|
712 | ... | |
|
713 | ||
|
714 | The ``read_across_frames`` argument to ``stream_reader()`` controls the | |
|
715 | behavior of read operations when the end of a zstd *frame* is encountered. | |
|
716 | When ``False`` (the default), a read will complete when the end of a | |
|
717 | zstd *frame* is encountered. When ``True``, a read can potentially | |
|
718 | return data spanning multiple zstd *frames*. | |
|
719 | ||
|
720 | Streaming Input API | |
|
721 | ^^^^^^^^^^^^^^^^^^^ | |
|
722 | ||
|
723 | ``stream_writer(fh)`` allows you to *stream* data into a decompressor. | |
|
724 | ||
|
725 | Returned instances implement the ``io.RawIOBase`` interface. Only methods | |
|
726 | that involve writing will do useful things. | |
|
727 | ||
|
728 | The argument to ``stream_writer()`` is typically an object that also implements | |
|
729 | ``io.RawIOBase``. But any object with a ``write(data)`` method will work. Many | |
|
730 | common Python types conform to this interface, including open file handles | |
|
731 | and ``io.BytesIO``. | |
|
732 | ||
|
733 | Behavior is similar to ``ZstdCompressor.stream_writer()``: compressed data | |
|
734 | is sent to the decompressor by calling ``write(data)`` and decompressed | |
|
735 | output is written to the underlying stream by calling its ``write(data)`` | |
|
736 | method.:: | |
|
737 | ||
|
738 | dctx = zstd.ZstdDecompressor() | |
|
739 | decompressor = dctx.stream_writer(fh) | |
|
740 | ||
|
741 | decompressor.write(compressed_data) | |
|
742 | ... | |
|
743 | ||
|
744 | ||
|
745 | Calls to ``write()`` will return the number of bytes written to the output | |
|
746 | object. Not all inputs will result in bytes being written, so return values | |
|
747 | of ``0`` are possible. | |
|
748 | ||
|
749 | Like the ``stream_writer()`` compressor, instances can be used as context | |
|
750 | managers. However, context managers add no extra special behavior and offer | |
|
751 | little to no benefit to being used. | |
|
752 | ||
|
753 | Calling ``close()`` will mark the stream as closed and subsequent I/O operations | |
|
754 | will raise ``ValueError`` (per the documented behavior of ``io.RawIOBase``). | |
|
755 | ``close()`` will also call ``close()`` on the underlying stream if such a | |
|
756 | method exists. | |
|
757 | ||
|
758 | The size of chunks being ``write()`` to the destination can be specified:: | |
|
759 | ||
|
760 | dctx = zstd.ZstdDecompressor() | |
|
761 | with dctx.stream_writer(fh, write_size=16384) as decompressor: | |
|
762 | pass | |
|
763 | ||
|
764 | You can see how much memory is being used by the decompressor:: | |
|
765 | ||
|
766 | dctx = zstd.ZstdDecompressor() | |
|
767 | with dctx.stream_writer(fh) as decompressor: | |
|
768 | byte_size = decompressor.memory_size() | |
|
769 | ||
|
770 | ``stream_writer()`` accepts a ``write_return_read`` boolean argument to control | |
|
771 | the return value of ``write()``. When ``False`` (the default)``, ``write()`` | |
|
772 | returns the number of bytes that were ``write()``en to the underlying stream. | |
|
773 | When ``True``, ``write()`` returns the number of bytes read from the input. | |
|
774 | ``True`` is the *proper* behavior for ``write()`` as specified by the | |
|
775 | ``io.RawIOBase`` interface and will become the default in a future release. | |
|
776 | ||
|
777 | Streaming Output API | |
|
778 | ^^^^^^^^^^^^^^^^^^^^ | |
|
779 | ||
|
780 | ``read_to_iter(fh)`` provides a mechanism to stream decompressed data out of a | |
|
781 | compressed source as an iterator of data chunks.:: | |
|
782 | ||
|
783 | dctx = zstd.ZstdDecompressor() | |
|
784 | for chunk in dctx.read_to_iter(fh): | |
|
785 | # Do something with original data. | |
|
786 | ||
|
787 | ``read_to_iter()`` accepts an object with a ``read(size)`` method that will | |
|
788 | return compressed bytes or an object conforming to the buffer protocol that | |
|
789 | can expose its data as a contiguous range of bytes. | |
|
790 | ||
|
791 | ``read_to_iter()`` returns an iterator whose elements are chunks of the | |
|
792 | decompressed data. | |
|
793 | ||
|
794 | The size of requested ``read()`` from the source can be specified:: | |
|
795 | ||
|
796 | dctx = zstd.ZstdDecompressor() | |
|
797 | for chunk in dctx.read_to_iter(fh, read_size=16384): | |
|
798 | pass | |
|
799 | ||
|
800 | It is also possible to skip leading bytes in the input data:: | |
|
801 | ||
|
802 | dctx = zstd.ZstdDecompressor() | |
|
803 | for chunk in dctx.read_to_iter(fh, skip_bytes=1): | |
|
804 | pass | |
|
805 | ||
|
806 | .. tip:: | |
|
24 | .. |ci-wheel| image:: https://github.com/indygreg/python-zstandard/workflows/.github/workflows/wheel.yml/badge.svg | |
|
25 | :target: https://github.com/indygreg/python-zstandard/blob/main/.github/workflows/wheel.yml | |
|
807 | 26 | |
|
808 | Skipping leading bytes is useful if the source data contains extra | |
|
809 | *header* data. Traditionally, you would need to create a slice or | |
|
810 | ``memoryview`` of the data you want to decompress. This would create | |
|
811 | overhead. It is more efficient to pass the offset into this API. | |
|
812 | ||
|
813 | Similarly to ``ZstdCompressor.read_to_iter()``, the consumer of the iterator | |
|
814 | controls when data is decompressed. If the iterator isn't consumed, | |
|
815 | decompression is put on hold. | |
|
816 | ||
|
817 | When ``read_to_iter()`` is passed an object conforming to the buffer protocol, | |
|
818 | the behavior may seem similar to what occurs when the simple decompression | |
|
819 | API is used. However, this API works when the decompressed size is unknown. | |
|
820 | Furthermore, if feeding large inputs, the decompressor will work in chunks | |
|
821 | instead of performing a single operation. | |
|
822 | ||
|
823 | Stream Copying API | |
|
824 | ^^^^^^^^^^^^^^^^^^ | |
|
825 | ||
|
826 | ``copy_stream(ifh, ofh)`` can be used to copy data across 2 streams while | |
|
827 | performing decompression.:: | |
|
828 | ||
|
829 | dctx = zstd.ZstdDecompressor() | |
|
830 | dctx.copy_stream(ifh, ofh) | |
|
831 | ||
|
832 | e.g. to decompress a file to another file:: | |
|
833 | ||
|
834 | dctx = zstd.ZstdDecompressor() | |
|
835 | with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh: | |
|
836 | dctx.copy_stream(ifh, ofh) | |
|
837 | ||
|
838 | The size of chunks being ``read()`` and ``write()`` from and to the streams | |
|
839 | can be specified:: | |
|
840 | ||
|
841 | dctx = zstd.ZstdDecompressor() | |
|
842 | dctx.copy_stream(ifh, ofh, read_size=8192, write_size=16384) | |
|
843 | ||
|
844 | Decompressor API | |
|
845 | ^^^^^^^^^^^^^^^^ | |
|
846 | ||
|
847 | ``decompressobj()`` returns an object that exposes a ``decompress(data)`` | |
|
848 | method. Compressed data chunks are fed into ``decompress(data)`` and | |
|
849 | uncompressed output (or an empty bytes) is returned. Output from subsequent | |
|
850 | calls needs to be concatenated to reassemble the full decompressed byte | |
|
851 | sequence. | |
|
852 | ||
|
853 | The purpose of ``decompressobj()`` is to provide an API-compatible interface | |
|
854 | with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor``. This allows callers | |
|
855 | to swap in different decompressor objects while using the same API. | |
|
856 | ||
|
857 | Each object is single use: once an input frame is decoded, ``decompress()`` | |
|
858 | can no longer be called. | |
|
859 | ||
|
860 | Here is how this API should be used:: | |
|
861 | ||
|
862 | dctx = zstd.ZstdDecompressor() | |
|
863 | dobj = dctx.decompressobj() | |
|
864 | data = dobj.decompress(compressed_chunk_0) | |
|
865 | data = dobj.decompress(compressed_chunk_1) | |
|
866 | ||
|
867 | By default, calls to ``decompress()`` write output data in chunks of size | |
|
868 | ``DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE``. These chunks are concatenated | |
|
869 | before being returned to the caller. It is possible to define the size of | |
|
870 | these temporary chunks by passing ``write_size`` to ``decompressobj()``:: | |
|
871 | ||
|
872 | dctx = zstd.ZstdDecompressor() | |
|
873 | dobj = dctx.decompressobj(write_size=1048576) | |
|
874 | ||
|
875 | .. note:: | |
|
876 | ||
|
877 | Because calls to ``decompress()`` may need to perform multiple | |
|
878 | memory (re)allocations, this streaming decompression API isn't as | |
|
879 | efficient as other APIs. | |
|
880 | ||
|
881 | For compatibility with the standard library APIs, instances expose a | |
|
882 | ``flush([length=None])`` method. This method no-ops and has no meaningful | |
|
883 | side-effects, making it safe to call any time. | |
|
884 | ||
|
885 | Batch Decompression API | |
|
886 | ^^^^^^^^^^^^^^^^^^^^^^^ | |
|
887 | ||
|
888 | (Experimental. Not yet supported in CFFI bindings.) | |
|
889 | ||
|
890 | ``multi_decompress_to_buffer()`` performs decompression of multiple | |
|
891 | frames as a single operation and returns a ``BufferWithSegmentsCollection`` | |
|
892 | containing decompressed data for all inputs. | |
|
893 | ||
|
894 | Compressed frames can be passed to the function as a ``BufferWithSegments``, | |
|
895 | a ``BufferWithSegmentsCollection``, or as a list containing objects that | |
|
896 | conform to the buffer protocol. For best performance, pass a | |
|
897 | ``BufferWithSegmentsCollection`` or a ``BufferWithSegments``, as | |
|
898 | minimal input validation will be done for that type. If calling from | |
|
899 | Python (as opposed to C), constructing one of these instances may add | |
|
900 | overhead cancelling out the performance overhead of validation for list | |
|
901 | inputs.:: | |
|
902 | ||
|
903 | dctx = zstd.ZstdDecompressor() | |
|
904 | results = dctx.multi_decompress_to_buffer([b'...', b'...']) | |
|
905 | ||
|
906 | The decompressed size of each frame MUST be discoverable. It can either be | |
|
907 | embedded within the zstd frame (``write_content_size=True`` argument to | |
|
908 | ``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument. | |
|
909 | ||
|
910 | The ``decompressed_sizes`` argument is an object conforming to the buffer | |
|
911 | protocol which holds an array of 64-bit unsigned integers in the machine's | |
|
912 | native format defining the decompressed sizes of each frame. If this argument | |
|
913 | is passed, it avoids having to scan each frame for its decompressed size. | |
|
914 | This frame scanning can add noticeable overhead in some scenarios.:: | |
|
915 | ||
|
916 | frames = [...] | |
|
917 | sizes = struct.pack('=QQQQ', len0, len1, len2, len3) | |
|
918 | ||
|
919 | dctx = zstd.ZstdDecompressor() | |
|
920 | results = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes) | |
|
921 | ||
|
922 | The ``threads`` argument controls the number of threads to use to perform | |
|
923 | decompression operations. The default (``0``) or the value ``1`` means to | |
|
924 | use a single thread. Negative values use the number of logical CPUs in the | |
|
925 | machine. | |
|
926 | ||
|
927 | .. note:: | |
|
928 | ||
|
929 | It is possible to pass a ``mmap.mmap()`` instance into this function by | |
|
930 | wrapping it with a ``BufferWithSegments`` instance (which will define the | |
|
931 | offsets of frames within the memory mapped region). | |
|
932 | ||
|
933 | This function is logically equivalent to performing ``dctx.decompress()`` | |
|
934 | on each input frame and returning the result. | |
|
935 | ||
|
936 | This function exists to perform decompression on multiple frames as fast | |
|
937 | as possible by having as little overhead as possible. Since decompression is | |
|
938 | performed as a single operation and since the decompressed output is stored in | |
|
939 | a single buffer, extra memory allocations, Python objects, and Python function | |
|
940 | calls are avoided. This is ideal for scenarios where callers know up front that | |
|
941 | they need to access data for multiple frames, such as when *delta chains* are | |
|
942 | being used. | |
|
943 | ||
|
944 | Currently, the implementation always spawns multiple threads when requested, | |
|
945 | even if the amount of work to do is small. In the future, it will be smarter | |
|
946 | about avoiding threads and their associated overhead when the amount of | |
|
947 | work to do is small. | |
|
948 | ||
|
949 | Prefix Dictionary Chain Decompression | |
|
950 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
|
951 | ||
|
952 | ``decompress_content_dict_chain(frames)`` performs decompression of a list of | |
|
953 | zstd frames produced using chained *prefix* dictionary compression. Such | |
|
954 | a list of frames is produced by compressing discrete inputs where each | |
|
955 | non-initial input is compressed with a *prefix* dictionary consisting of the | |
|
956 | content of the previous input. | |
|
957 | ||
|
958 | For example, say you have the following inputs:: | |
|
959 | ||
|
960 | inputs = [b'input 1', b'input 2', b'input 3'] | |
|
961 | ||
|
962 | The zstd frame chain consists of: | |
|
963 | ||
|
964 | 1. ``b'input 1'`` compressed in standalone/discrete mode | |
|
965 | 2. ``b'input 2'`` compressed using ``b'input 1'`` as a *prefix* dictionary | |
|
966 | 3. ``b'input 3'`` compressed using ``b'input 2'`` as a *prefix* dictionary | |
|
967 | ||
|
968 | Each zstd frame **must** have the content size written. | |
|
969 | ||
|
970 | The following Python code can be used to produce a *prefix dictionary chain*:: | |
|
971 | ||
|
972 | def make_chain(inputs): | |
|
973 | frames = [] | |
|
974 | ||
|
975 | # First frame is compressed in standalone/discrete mode. | |
|
976 | zctx = zstd.ZstdCompressor() | |
|
977 | frames.append(zctx.compress(inputs[0])) | |
|
978 | ||
|
979 | # Subsequent frames use the previous fulltext as a prefix dictionary | |
|
980 | for i, raw in enumerate(inputs[1:]): | |
|
981 | dict_data = zstd.ZstdCompressionDict( | |
|
982 | inputs[i], dict_type=zstd.DICT_TYPE_RAWCONTENT) | |
|
983 | zctx = zstd.ZstdCompressor(dict_data=dict_data) | |
|
984 | frames.append(zctx.compress(raw)) | |
|
985 | ||
|
986 | return frames | |
|
987 | ||
|
988 | ``decompress_content_dict_chain()`` returns the uncompressed data of the last | |
|
989 | element in the input chain. | |
|
990 | ||
|
991 | ||
|
992 | .. note:: | |
|
993 | ||
|
994 | It is possible to implement *prefix dictionary chain* decompression | |
|
995 | on top of other APIs. However, this function will likely be faster - | |
|
996 | especially for long input chains - as it avoids the overhead of instantiating | |
|
997 | and passing around intermediate objects between C and Python. | |
|
998 | ||
|
999 | Multi-Threaded Compression | |
|
1000 | -------------------------- | |
|
27 | .. |ci-typing| image:: https://github.com/indygreg/python-zstandard/workflows/.github/workflows/typing.yml/badge.svg | |
|
28 | :target: https://github.com/indygreg/python-zstandard/blob/main/.github/workflows/typing.yml | |
|
1001 | 29 | |
|
1002 | ``ZstdCompressor`` accepts a ``threads`` argument that controls the number | |
|
1003 | of threads to use for compression. The way this works is that input is split | |
|
1004 | into segments and each segment is fed into a worker pool for compression. Once | |
|
1005 | a segment is compressed, it is flushed/appended to the output. | |
|
1006 | ||
|
1007 | .. note:: | |
|
1008 | ||
|
1009 | These threads are created at the C layer and are not Python threads. So they | |
|
1010 | work outside the GIL. It is therefore possible to CPU saturate multiple cores | |
|
1011 | from Python. | |
|
1012 | ||
|
1013 | The segment size for multi-threaded compression is chosen from the window size | |
|
1014 | of the compressor. This is derived from the ``window_log`` attribute of a | |
|
1015 | ``ZstdCompressionParameters`` instance. By default, segment sizes are in the 1+MB | |
|
1016 | range. | |
|
1017 | ||
|
1018 | If multi-threaded compression is requested and the input is smaller than the | |
|
1019 | configured segment size, only a single compression thread will be used. If the | |
|
1020 | input is smaller than the segment size multiplied by the thread pool size or | |
|
1021 | if data cannot be delivered to the compressor fast enough, not all requested | |
|
1022 | compressor threads may be active simultaneously. | |
|
1023 | ||
|
1024 | Compared to non-multi-threaded compression, multi-threaded compression has | |
|
1025 | higher per-operation overhead. This includes extra memory operations, | |
|
1026 | thread creation, lock acquisition, etc. | |
|
1027 | ||
|
1028 | Due to the nature of multi-threaded compression using *N* compression | |
|
1029 | *states*, the output from multi-threaded compression will likely be larger | |
|
1030 | than non-multi-threaded compression. The difference is usually small. But | |
|
1031 | there is a CPU/wall time versus size trade off that may warrant investigation. | |
|
1032 | ||
|
1033 | Output from multi-threaded compression does not require any special handling | |
|
1034 | on the decompression side. To the decompressor, data generated with single | |
|
1035 | threaded compressor looks the same as data generated by a multi-threaded | |
|
1036 | compressor and does not require any special handling or additional resource | |
|
1037 | requirements. | |
|
1038 | ||
|
1039 | Dictionary Creation and Management | |
|
1040 | ---------------------------------- | |
|
1041 | ||
|
1042 | Compression dictionaries are represented with the ``ZstdCompressionDict`` type. | |
|
1043 | ||
|
1044 | Instances can be constructed from bytes:: | |
|
1045 | ||
|
1046 | dict_data = zstd.ZstdCompressionDict(data) | |
|
1047 | ||
|
1048 | It is possible to construct a dictionary from *any* data. If the data doesn't | |
|
1049 | begin with a magic header, it will be treated as a *prefix* dictionary. | |
|
1050 | *Prefix* dictionaries allow compression operations to reference raw data | |
|
1051 | within the dictionary. | |
|
1052 | ||
|
1053 | It is possible to force the use of *prefix* dictionaries or to require a | |
|
1054 | dictionary header: | |
|
1055 | ||
|
1056 | dict_data = zstd.ZstdCompressionDict(data, | |
|
1057 | dict_type=zstd.DICT_TYPE_RAWCONTENT) | |
|
1058 | ||
|
1059 | dict_data = zstd.ZstdCompressionDict(data, | |
|
1060 | dict_type=zstd.DICT_TYPE_FULLDICT) | |
|
1061 | ||
|
1062 | You can see how many bytes are in the dictionary by calling ``len()``:: | |
|
1063 | ||
|
1064 | dict_data = zstd.train_dictionary(size, samples) | |
|
1065 | dict_size = len(dict_data) # will not be larger than ``size`` | |
|
1066 | ||
|
1067 | Once you have a dictionary, you can pass it to the objects performing | |
|
1068 | compression and decompression:: | |
|
1069 | ||
|
1070 | dict_data = zstd.train_dictionary(131072, samples) | |
|
1071 | ||
|
1072 | cctx = zstd.ZstdCompressor(dict_data=dict_data) | |
|
1073 | for source_data in input_data: | |
|
1074 | compressed = cctx.compress(source_data) | |
|
1075 | # Do something with compressed data. | |
|
1076 | ||
|
1077 | dctx = zstd.ZstdDecompressor(dict_data=dict_data) | |
|
1078 | for compressed_data in input_data: | |
|
1079 | buffer = io.BytesIO() | |
|
1080 | with dctx.stream_writer(buffer) as decompressor: | |
|
1081 | decompressor.write(compressed_data) | |
|
1082 | # Do something with raw data in ``buffer``. | |
|
1083 | ||
|
1084 | Dictionaries have unique integer IDs. You can retrieve this ID via:: | |
|
1085 | ||
|
1086 | dict_id = zstd.dictionary_id(dict_data) | |
|
1087 | ||
|
1088 | You can obtain the raw data in the dict (useful for persisting and constructing | |
|
1089 | a ``ZstdCompressionDict`` later) via ``as_bytes()``:: | |
|
1090 | ||
|
1091 | dict_data = zstd.train_dictionary(size, samples) | |
|
1092 | raw_data = dict_data.as_bytes() | |
|
1093 | ||
|
1094 | By default, when a ``ZstdCompressionDict`` is *attached* to a | |
|
1095 | ``ZstdCompressor``, each ``ZstdCompressor`` performs work to prepare the | |
|
1096 | dictionary for use. This is fine if only 1 compression operation is being | |
|
1097 | performed or if the ``ZstdCompressor`` is being reused for multiple operations. | |
|
1098 | But if multiple ``ZstdCompressor`` instances are being used with the dictionary, | |
|
1099 | this can add overhead. | |
|
1100 | ||
|
1101 | It is possible to *precompute* the dictionary so it can readily be consumed | |
|
1102 | by multiple ``ZstdCompressor`` instances:: | |
|
1103 | ||
|
1104 | d = zstd.ZstdCompressionDict(data) | |
|
1105 | ||
|
1106 | # Precompute for compression level 3. | |
|
1107 | d.precompute_compress(level=3) | |
|
1108 | ||
|
1109 | # Precompute with specific compression parameters. | |
|
1110 | params = zstd.ZstdCompressionParameters(...) | |
|
1111 | d.precompute_compress(compression_params=params) | |
|
1112 | ||
|
1113 | .. note:: | |
|
1114 | ||
|
1115 | When a dictionary is precomputed, the compression parameters used to | |
|
1116 | precompute the dictionary overwrite some of the compression parameters | |
|
1117 | specified to ``ZstdCompressor.__init__``. | |
|
1118 | ||
|
1119 | Training Dictionaries | |
|
1120 | ^^^^^^^^^^^^^^^^^^^^^ | |
|
1121 | ||
|
1122 | Unless using *prefix* dictionaries, dictionary data is produced by *training* | |
|
1123 | on existing data:: | |
|
1124 | ||
|
1125 | dict_data = zstd.train_dictionary(size, samples) | |
|
1126 | ||
|
1127 | This takes a target dictionary size and list of bytes instances and creates and | |
|
1128 | returns a ``ZstdCompressionDict``. | |
|
1129 | ||
|
1130 | The dictionary training mechanism is known as *cover*. More details about it are | |
|
1131 | available in the paper *Effective Construction of Relative Lempel-Ziv | |
|
1132 | Dictionaries* (authors: Liao, Petri, Moffat, Wirth). | |
|
1133 | ||
|
1134 | The cover algorithm takes parameters ``k` and ``d``. These are the | |
|
1135 | *segment size* and *dmer size*, respectively. The returned dictionary | |
|
1136 | instance created by this function has ``k`` and ``d`` attributes | |
|
1137 | containing the values for these parameters. If a ``ZstdCompressionDict`` | |
|
1138 | is constructed from raw bytes data (a content-only dictionary), the | |
|
1139 | ``k`` and ``d`` attributes will be ``0``. | |
|
1140 | ||
|
1141 | The segment and dmer size parameters to the cover algorithm can either be | |
|
1142 | specified manually or ``train_dictionary()`` can try multiple values | |
|
1143 | and pick the best one, where *best* means the smallest compressed data size. | |
|
1144 | This later mode is called *optimization* mode. | |
|
1145 | ||
|
1146 | If none of ``k``, ``d``, ``steps``, ``threads``, ``level``, ``notifications``, | |
|
1147 | or ``dict_id`` (basically anything from the underlying ``ZDICT_cover_params_t`` | |
|
1148 | struct) are defined, *optimization* mode is used with default parameter | |
|
1149 | values. | |
|
1150 | ||
|
1151 | If ``steps`` or ``threads`` are defined, then *optimization* mode is engaged | |
|
1152 | with explicit control over those parameters. Specifying ``threads=0`` or | |
|
1153 | ``threads=1`` can be used to engage *optimization* mode if other parameters | |
|
1154 | are not defined. | |
|
1155 | ||
|
1156 | Otherwise, non-*optimization* mode is used with the parameters specified. | |
|
1157 | ||
|
1158 | This function takes the following arguments: | |
|
1159 | ||
|
1160 | dict_size | |
|
1161 | Target size in bytes of the dictionary to generate. | |
|
1162 | samples | |
|
1163 | A list of bytes holding samples the dictionary will be trained from. | |
|
1164 | k | |
|
1165 | Parameter to cover algorithm defining the segment size. A reasonable range | |
|
1166 | is [16, 2048+]. | |
|
1167 | d | |
|
1168 | Parameter to cover algorithm defining the dmer size. A reasonable range is | |
|
1169 | [6, 16]. ``d`` must be less than or equal to ``k``. | |
|
1170 | dict_id | |
|
1171 | Integer dictionary ID for the produced dictionary. Default is 0, which uses | |
|
1172 | a random value. | |
|
1173 | steps | |
|
1174 | Number of steps through ``k`` values to perform when trying parameter | |
|
1175 | variations. | |
|
1176 | threads | |
|
1177 | Number of threads to use when trying parameter variations. Default is 0, | |
|
1178 | which means to use a single thread. A negative value can be specified to | |
|
1179 | use as many threads as there are detected logical CPUs. | |
|
1180 | level | |
|
1181 | Integer target compression level when trying parameter variations. | |
|
1182 | notifications | |
|
1183 | Controls writing of informational messages to ``stderr``. ``0`` (the | |
|
1184 | default) means to write nothing. ``1`` writes errors. ``2`` writes | |
|
1185 | progression info. ``3`` writes more details. And ``4`` writes all info. | |
|
1186 | ||
|
1187 | Explicit Compression Parameters | |
|
1188 | ------------------------------- | |
|
1189 | ||
|
1190 | Zstandard offers a high-level *compression level* that maps to lower-level | |
|
1191 | compression parameters. For many consumers, this numeric level is the only | |
|
1192 | compression setting you'll need to touch. | |
|
1193 | ||
|
1194 | But for advanced use cases, it might be desirable to tweak these lower-level | |
|
1195 | settings. | |
|
1196 | ||
|
1197 | The ``ZstdCompressionParameters`` type represents these low-level compression | |
|
1198 | settings. | |
|
1199 | ||
|
1200 | Instances of this type can be constructed from a myriad of keyword arguments | |
|
1201 | (defined below) for complete low-level control over each adjustable | |
|
1202 | compression setting. | |
|
30 | .. |ci-sdist| image:: https://github.com/indygreg/python-zstandard/workflows/.github/workflows/sdist.yml/badge.svg | |
|
31 | :target: https://github.com/indygreg/python-zstandard/blob/main/.github/workflows/sdist.yml | |
|
1203 | 32 | |
|
1204 | From a higher level, one can construct a ``ZstdCompressionParameters`` instance | |
|
1205 | given a desired compression level and target input and dictionary size | |
|
1206 | using ``ZstdCompressionParameters.from_level()``. e.g.:: | |
|
1207 | ||
|
1208 | # Derive compression settings for compression level 7. | |
|
1209 | params = zstd.ZstdCompressionParameters.from_level(7) | |
|
1210 | ||
|
1211 | # With an input size of 1MB | |
|
1212 | params = zstd.ZstdCompressionParameters.from_level(7, source_size=1048576) | |
|
1213 | ||
|
1214 | Using ``from_level()``, it is also possible to override individual compression | |
|
1215 | parameters or to define additional settings that aren't automatically derived. | |
|
1216 | e.g.:: | |
|
1217 | ||
|
1218 | params = zstd.ZstdCompressionParameters.from_level(4, window_log=10) | |
|
1219 | params = zstd.ZstdCompressionParameters.from_level(5, threads=4) | |
|
1220 | ||
|
1221 | Or you can define low-level compression settings directly:: | |
|
1222 | ||
|
1223 | params = zstd.ZstdCompressionParameters(window_log=12, enable_ldm=True) | |
|
1224 | ||
|
1225 | Once a ``ZstdCompressionParameters`` instance is obtained, it can be used to | |
|
1226 | configure a compressor:: | |
|
1227 | ||
|
1228 | cctx = zstd.ZstdCompressor(compression_params=params) | |
|
1229 | ||
|
1230 | The named arguments and attributes of ``ZstdCompressionParameters`` are as | |
|
1231 | follows: | |
|
1232 | ||
|
1233 | * format | |
|
1234 | * compression_level | |
|
1235 | * window_log | |
|
1236 | * hash_log | |
|
1237 | * chain_log | |
|
1238 | * search_log | |
|
1239 | * min_match | |
|
1240 | * target_length | |
|
1241 | * strategy | |
|
1242 | * compression_strategy (deprecated: same as ``strategy``) | |
|
1243 | * write_content_size | |
|
1244 | * write_checksum | |
|
1245 | * write_dict_id | |
|
1246 | * job_size | |
|
1247 | * overlap_log | |
|
1248 | * overlap_size_log (deprecated: same as ``overlap_log``) | |
|
1249 | * force_max_window | |
|
1250 | * enable_ldm | |
|
1251 | * ldm_hash_log | |
|
1252 | * ldm_min_match | |
|
1253 | * ldm_bucket_size_log | |
|
1254 | * ldm_hash_rate_log | |
|
1255 | * ldm_hash_every_log (deprecated: same as ``ldm_hash_rate_log``) | |
|
1256 | * threads | |
|
1257 | ||
|
1258 | Some of these are very low-level settings. It may help to consult the official | |
|
1259 | zstandard documentation for their behavior. Look for the ``ZSTD_p_*`` constants | |
|
1260 | in ``zstd.h`` (https://github.com/facebook/zstd/blob/dev/lib/zstd.h). | |
|
1261 | ||
|
1262 | Frame Inspection | |
|
1263 | ---------------- | |
|
1264 | ||
|
1265 | Data emitted from zstd compression is encapsulated in a *frame*. This frame | |
|
1266 | begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing | |
|
1267 | the frame in more detail. For more info, see | |
|
1268 | https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md. | |
|
1269 | ||
|
1270 | ``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes | |
|
1271 | instance and return a ``FrameParameters`` object describing the frame. | |
|
1272 | ||
|
1273 | Depending on which fields are present in the frame and their values, the | |
|
1274 | length of the frame parameters varies. If insufficient bytes are passed | |
|
1275 | in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure | |
|
1276 | frame parameters can be parsed, pass in at least 18 bytes. | |
|
1277 | ||
|
1278 | ``FrameParameters`` instances have the following attributes: | |
|
1279 | ||
|
1280 | content_size | |
|
1281 | Integer size of original, uncompressed content. This will be ``0`` if the | |
|
1282 | original content size isn't written to the frame (controlled with the | |
|
1283 | ``write_content_size`` argument to ``ZstdCompressor``) or if the input | |
|
1284 | content size was ``0``. | |
|
1285 | ||
|
1286 | window_size | |
|
1287 | Integer size of maximum back-reference distance in compressed data. | |
|
1288 | ||
|
1289 | dict_id | |
|
1290 | Integer of dictionary ID used for compression. ``0`` if no dictionary | |
|
1291 | ID was used or if the dictionary ID was ``0``. | |
|
1292 | ||
|
1293 | has_checksum | |
|
1294 | Bool indicating whether a 4 byte content checksum is stored at the end | |
|
1295 | of the frame. | |
|
1296 | ||
|
1297 | ``zstd.frame_header_size(data)`` returns the size of the zstandard frame | |
|
1298 | header. | |
|
1299 | ||
|
1300 | ``zstd.frame_content_size(data)`` returns the content size as parsed from | |
|
1301 | the frame header. ``-1`` means the content size is unknown. ``0`` means | |
|
1302 | an empty frame. The content size is usually correct. However, it may not | |
|
1303 | be accurate. | |
|
1304 | ||
|
1305 | Misc Functionality | |
|
1306 | ------------------ | |
|
1307 | ||
|
1308 | estimate_decompression_context_size() | |
|
1309 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
|
1310 | ||
|
1311 | Estimate the memory size requirements for a decompressor instance. | |
|
1312 | ||
|
1313 | Constants | |
|
1314 | --------- | |
|
1315 | ||
|
1316 | The following module constants/attributes are exposed: | |
|
1317 | ||
|
1318 | ZSTD_VERSION | |
|
1319 | This module attribute exposes a 3-tuple of the Zstandard version. e.g. | |
|
1320 | ``(1, 0, 0)`` | |
|
1321 | MAX_COMPRESSION_LEVEL | |
|
1322 | Integer max compression level accepted by compression functions | |
|
1323 | COMPRESSION_RECOMMENDED_INPUT_SIZE | |
|
1324 | Recommended chunk size to feed to compressor functions | |
|
1325 | COMPRESSION_RECOMMENDED_OUTPUT_SIZE | |
|
1326 | Recommended chunk size for compression output | |
|
1327 | DECOMPRESSION_RECOMMENDED_INPUT_SIZE | |
|
1328 | Recommended chunk size to feed into decompresor functions | |
|
1329 | DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE | |
|
1330 | Recommended chunk size for decompression output | |
|
1331 | ||
|
1332 | FRAME_HEADER | |
|
1333 | bytes containing header of the Zstandard frame | |
|
1334 | MAGIC_NUMBER | |
|
1335 | Frame header as an integer | |
|
1336 | ||
|
1337 | FLUSH_BLOCK | |
|
1338 | Flushing behavior that denotes to flush a zstd block. A decompressor will | |
|
1339 | be able to decode all data fed into the compressor so far. | |
|
1340 | FLUSH_FRAME | |
|
1341 | Flushing behavior that denotes to end a zstd frame. Any new data fed | |
|
1342 | to the compressor will start a new frame. | |
|
1343 | ||
|
1344 | CONTENTSIZE_UNKNOWN | |
|
1345 | Value for content size when the content size is unknown. | |
|
1346 | CONTENTSIZE_ERROR | |
|
1347 | Value for content size when content size couldn't be determined. | |
|
1348 | ||
|
1349 | WINDOWLOG_MIN | |
|
1350 | Minimum value for compression parameter | |
|
1351 | WINDOWLOG_MAX | |
|
1352 | Maximum value for compression parameter | |
|
1353 | CHAINLOG_MIN | |
|
1354 | Minimum value for compression parameter | |
|
1355 | CHAINLOG_MAX | |
|
1356 | Maximum value for compression parameter | |
|
1357 | HASHLOG_MIN | |
|
1358 | Minimum value for compression parameter | |
|
1359 | HASHLOG_MAX | |
|
1360 | Maximum value for compression parameter | |
|
1361 | SEARCHLOG_MIN | |
|
1362 | Minimum value for compression parameter | |
|
1363 | SEARCHLOG_MAX | |
|
1364 | Maximum value for compression parameter | |
|
1365 | MINMATCH_MIN | |
|
1366 | Minimum value for compression parameter | |
|
1367 | MINMATCH_MAX | |
|
1368 | Maximum value for compression parameter | |
|
1369 | SEARCHLENGTH_MIN | |
|
1370 | Minimum value for compression parameter | |
|
1371 | ||
|
1372 | Deprecated: use ``MINMATCH_MIN`` | |
|
1373 | SEARCHLENGTH_MAX | |
|
1374 | Maximum value for compression parameter | |
|
1375 | ||
|
1376 | Deprecated: use ``MINMATCH_MAX`` | |
|
1377 | TARGETLENGTH_MIN | |
|
1378 | Minimum value for compression parameter | |
|
1379 | STRATEGY_FAST | |
|
1380 | Compression strategy | |
|
1381 | STRATEGY_DFAST | |
|
1382 | Compression strategy | |
|
1383 | STRATEGY_GREEDY | |
|
1384 | Compression strategy | |
|
1385 | STRATEGY_LAZY | |
|
1386 | Compression strategy | |
|
1387 | STRATEGY_LAZY2 | |
|
1388 | Compression strategy | |
|
1389 | STRATEGY_BTLAZY2 | |
|
1390 | Compression strategy | |
|
1391 | STRATEGY_BTOPT | |
|
1392 | Compression strategy | |
|
1393 | STRATEGY_BTULTRA | |
|
1394 | Compression strategy | |
|
1395 | STRATEGY_BTULTRA2 | |
|
1396 | Compression strategy | |
|
1397 | ||
|
1398 | FORMAT_ZSTD1 | |
|
1399 | Zstandard frame format | |
|
1400 | FORMAT_ZSTD1_MAGICLESS | |
|
1401 | Zstandard frame format without magic header | |
|
33 | .. |ci-anaconda| image:: https://github.com/indygreg/python-zstandard/workflows/.github/workflows/anaconda.yml/badge.svg | |
|
34 | :target: https://github.com/indygreg/python-zstandard/blob/main/.github/workflows/anaconda.yml | |
|
1402 | 35 | |
|
1403 | Performance Considerations | |
|
1404 | -------------------------- | |
|
1405 | ||
|
1406 | The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a | |
|
1407 | persistent compression or decompression *context*. Reusing a ``ZstdCompressor`` | |
|
1408 | or ``ZstdDecompressor`` instance for multiple operations is faster than | |
|
1409 | instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each | |
|
1410 | operation. The differences are magnified as the size of data decreases. For | |
|
1411 | example, the difference between *context* reuse and non-reuse for 100,000 | |
|
1412 | 100 byte inputs will be significant (possiby over 10x faster to reuse contexts) | |
|
1413 | whereas 10 100,000,000 byte inputs will be more similar in speed (because the | |
|
1414 | time spent doing compression dwarfs time spent creating new *contexts*). | |
|
1415 | ||
|
1416 | Buffer Types | |
|
1417 | ------------ | |
|
1418 | ||
|
1419 | The API exposes a handful of custom types for interfacing with memory buffers. | |
|
1420 | The primary goal of these types is to facilitate efficient multi-object | |
|
1421 | operations. | |
|
1422 | ||
|
1423 | The essential idea is to have a single memory allocation provide backing | |
|
1424 | storage for multiple logical objects. This has 2 main advantages: fewer | |
|
1425 | allocations and optimal memory access patterns. This avoids having to allocate | |
|
1426 | a Python object for each logical object and furthermore ensures that access of | |
|
1427 | data for objects can be sequential (read: fast) in memory. | |
|
1428 | ||
|
1429 | BufferWithSegments | |
|
1430 | ^^^^^^^^^^^^^^^^^^ | |
|
1431 | ||
|
1432 | The ``BufferWithSegments`` type represents a memory buffer containing N | |
|
1433 | discrete items of known lengths (segments). It is essentially a fixed size | |
|
1434 | memory address and an array of 2-tuples of ``(offset, length)`` 64-bit | |
|
1435 | unsigned native endian integers defining the byte offset and length of each | |
|
1436 | segment within the buffer. | |
|
1437 | ||
|
1438 | Instances behave like containers. | |
|
1439 | ||
|
1440 | ``len()`` returns the number of segments within the instance. | |
|
1441 | ||
|
1442 | ``o[index]`` or ``__getitem__`` obtains a ``BufferSegment`` representing an | |
|
1443 | individual segment within the backing buffer. That returned object references | |
|
1444 | (not copies) memory. This means that iterating all objects doesn't copy | |
|
1445 | data within the buffer. | |
|
1446 | ||
|
1447 | The ``.size`` attribute contains the total size in bytes of the backing | |
|
1448 | buffer. | |
|
1449 | ||
|
1450 | Instances conform to the buffer protocol. So a reference to the backing bytes | |
|
1451 | can be obtained via ``memoryview(o)``. A *copy* of the backing bytes can also | |
|
1452 | be obtained via ``.tobytes()``. | |
|
1453 | ||
|
1454 | The ``.segments`` attribute exposes the array of ``(offset, length)`` for | |
|
1455 | segments within the buffer. It is a ``BufferSegments`` type. | |
|
1456 | ||
|
1457 | BufferSegment | |
|
1458 | ^^^^^^^^^^^^^ | |
|
1459 | ||
|
1460 | The ``BufferSegment`` type represents a segment within a ``BufferWithSegments``. | |
|
1461 | It is essentially a reference to N bytes within a ``BufferWithSegments``. | |
|
1462 | ||
|
1463 | ``len()`` returns the length of the segment in bytes. | |
|
1464 | ||
|
1465 | ``.offset`` contains the byte offset of this segment within its parent | |
|
1466 | ``BufferWithSegments`` instance. | |
|
1467 | ||
|
1468 | The object conforms to the buffer protocol. ``.tobytes()`` can be called to | |
|
1469 | obtain a ``bytes`` instance with a copy of the backing bytes. | |
|
1470 | ||
|
1471 | BufferSegments | |
|
1472 | ^^^^^^^^^^^^^^ | |
|
1473 | ||
|
1474 | This type represents an array of ``(offset, length)`` integers defining segments | |
|
1475 | within a ``BufferWithSegments``. | |
|
1476 | ||
|
1477 | The array members are 64-bit unsigned integers using host/native bit order. | |
|
1478 | ||
|
1479 | Instances conform to the buffer protocol. | |
|
1480 | ||
|
1481 | BufferWithSegmentsCollection | |
|
1482 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
|
1483 | ||
|
1484 | The ``BufferWithSegmentsCollection`` type represents a virtual spanning view | |
|
1485 | of multiple ``BufferWithSegments`` instances. | |
|
1486 | ||
|
1487 | Instances are constructed from 1 or more ``BufferWithSegments`` instances. The | |
|
1488 | resulting object behaves like an ordered sequence whose members are the | |
|
1489 | segments within each ``BufferWithSegments``. | |
|
1490 | ||
|
1491 | ``len()`` returns the number of segments within all ``BufferWithSegments`` | |
|
1492 | instances. | |
|
1493 | ||
|
1494 | ``o[index]`` and ``__getitem__(index)`` return the ``BufferSegment`` at | |
|
1495 | that offset as if all ``BufferWithSegments`` instances were a single | |
|
1496 | entity. | |
|
1497 | ||
|
1498 | If the object is composed of 2 ``BufferWithSegments`` instances with the | |
|
1499 | first having 2 segments and the second have 3 segments, then ``b[0]`` | |
|
1500 | and ``b[1]`` access segments in the first object and ``b[2]``, ``b[3]``, | |
|
1501 | and ``b[4]`` access segments from the second. | |
|
1502 | ||
|
1503 | Choosing an API | |
|
1504 | =============== | |
|
1505 | ||
|
1506 | There are multiple APIs for performing compression and decompression. This is | |
|
1507 | because different applications have different needs and the library wants to | |
|
1508 | facilitate optimal use in as many use cases as possible. | |
|
1509 | ||
|
1510 | From a high-level, APIs are divided into *one-shot* and *streaming*: either you | |
|
1511 | are operating on all data at once or you operate on it piecemeal. | |
|
1512 | ||
|
1513 | The *one-shot* APIs are useful for small data, where the input or output | |
|
1514 | size is known. (The size can come from a buffer length, file size, or | |
|
1515 | stored in the zstd frame header.) A limitation of the *one-shot* APIs is that | |
|
1516 | input and output must fit in memory simultaneously. For say a 4 GB input, | |
|
1517 | this is often not feasible. | |
|
1518 | ||
|
1519 | The *one-shot* APIs also perform all work as a single operation. So, if you | |
|
1520 | feed it large input, it could take a long time for the function to return. | |
|
1521 | ||
|
1522 | The streaming APIs do not have the limitations of the simple API. But the | |
|
1523 | price you pay for this flexibility is that they are more complex than a | |
|
1524 | single function call. | |
|
1525 | ||
|
1526 | The streaming APIs put the caller in control of compression and decompression | |
|
1527 | behavior by allowing them to directly control either the input or output side | |
|
1528 | of the operation. | |
|
1529 | ||
|
1530 | With the *streaming input*, *compressor*, and *decompressor* APIs, the caller | |
|
1531 | has full control over the input to the compression or decompression stream. | |
|
1532 | They can directly choose when new data is operated on. | |
|
1533 | ||
|
1534 | With the *streaming ouput* APIs, the caller has full control over the output | |
|
1535 | of the compression or decompression stream. It can choose when to receive | |
|
1536 | new data. | |
|
1537 | ||
|
1538 | When using the *streaming* APIs that operate on file-like or stream objects, | |
|
1539 | it is important to consider what happens in that object when I/O is requested. | |
|
1540 | There is potential for long pauses as data is read or written from the | |
|
1541 | underlying stream (say from interacting with a filesystem or network). This | |
|
1542 | could add considerable overhead. | |
|
1543 | ||
|
1544 | Thread Safety | |
|
1545 | ============= | |
|
1546 | ||
|
1547 | ``ZstdCompressor`` and ``ZstdDecompressor`` instances have no guarantees | |
|
1548 | about thread safety. Do not operate on the same ``ZstdCompressor`` and | |
|
1549 | ``ZstdDecompressor`` instance simultaneously from different threads. It is | |
|
1550 | fine to have different threads call into a single instance, just not at the | |
|
1551 | same time. | |
|
1552 | ||
|
1553 | Some operations require multiple function calls to complete. e.g. streaming | |
|
1554 | operations. A single ``ZstdCompressor`` or ``ZstdDecompressor`` cannot be used | |
|
1555 | for simultaneously active operations. e.g. you must not start a streaming | |
|
1556 | operation when another streaming operation is already active. | |
|
1557 | ||
|
1558 | The C extension releases the GIL during non-trivial calls into the zstd C | |
|
1559 | API. Non-trivial calls are notably compression and decompression. Trivial | |
|
1560 | calls are things like parsing frame parameters. Where the GIL is released | |
|
1561 | is considered an implementation detail and can change in any release. | |
|
1562 | ||
|
1563 | APIs that accept bytes-like objects don't enforce that the underlying object | |
|
1564 | is read-only. However, it is assumed that the passed object is read-only for | |
|
1565 | the duration of the function call. It is possible to pass a mutable object | |
|
1566 | (like a ``bytearray``) to e.g. ``ZstdCompressor.compress()``, have the GIL | |
|
1567 | released, and mutate the object from another thread. Such a race condition | |
|
1568 | is a bug in the consumer of python-zstandard. Most Python data types are | |
|
1569 | immutable, so unless you are doing something fancy, you don't need to | |
|
1570 | worry about this. | |
|
1571 | ||
|
1572 | Note on Zstandard's *Experimental* API | |
|
1573 | ====================================== | |
|
1574 | ||
|
1575 | Many of the Zstandard APIs used by this module are marked as *experimental* | |
|
1576 | within the Zstandard project. | |
|
1577 | ||
|
1578 | It is unclear how Zstandard's C API will evolve over time, especially with | |
|
1579 | regards to this *experimental* functionality. We will try to maintain | |
|
1580 | backwards compatibility at the Python API level. However, we cannot | |
|
1581 | guarantee this for things not under our control. | |
|
1582 | ||
|
1583 | Since a copy of the Zstandard source code is distributed with this | |
|
1584 | module and since we compile against it, the behavior of a specific | |
|
1585 | version of this module should be constant for all of time. So if you | |
|
1586 | pin the version of this module used in your projects (which is a Python | |
|
1587 | best practice), you should be shielded from unwanted future changes. | |
|
1588 | ||
|
1589 | Donate | |
|
1590 | ====== | |
|
1591 | ||
|
1592 | A lot of time has been invested into this project by the author. | |
|
1593 | ||
|
1594 | If you find this project useful and would like to thank the author for | |
|
1595 | their work, consider donating some money. Any amount is appreciated. | |
|
1596 | ||
|
1597 | .. image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif | |
|
1598 | :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard¤cy_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted | |
|
1599 | :alt: Donate via PayPal | |
|
1600 | ||
|
1601 | .. |ci-status| image:: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master | |
|
1602 | :target: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master | |
|
36 | .. |ci-sphinx| image:: https://github.com/indygreg/python-zstandard/workflows/.github/workflows/sphinx.yml/badge.svg | |
|
37 | :target: https://github.com/indygreg/python-zstandard/blob/main/.github/workflows/sphinx.yml |
This diff has been collapsed as it changes many lines, (1142 lines changed) Show them Hide them | |||
@@ -1,128 +1,101 | |||
|
1 | 1 | /** |
|
2 | * Copyright (c) 2017-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
2 | * Copyright (c) 2017-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
8 | 8 | |
|
9 | 9 | #include "python-zstandard.h" |
|
10 | 10 | |
|
11 |
extern PyObject* |
|
|
12 | ||
|
13 | PyDoc_STRVAR(BufferWithSegments__doc__, | |
|
14 | "BufferWithSegments - A memory buffer holding known sub-segments.\n" | |
|
15 | "\n" | |
|
16 | "This type represents a contiguous chunk of memory containing N discrete\n" | |
|
17 | "items within sub-segments of that memory.\n" | |
|
18 | "\n" | |
|
19 | "Segments within the buffer are stored as an array of\n" | |
|
20 | "``(offset, length)`` pairs, where each element is an unsigned 64-bit\n" | |
|
21 | "integer using the host/native bit order representation.\n" | |
|
22 | "\n" | |
|
23 | "The type exists to facilitate operations against N>1 items without the\n" | |
|
24 | "overhead of Python object creation and management.\n" | |
|
25 | ); | |
|
11 | extern PyObject *ZstdError; | |
|
26 | 12 | |
|
27 |
static void BufferWithSegments_dealloc(ZstdBufferWithSegments* |
|
|
28 |
|
|
|
29 |
|
|
|
30 |
|
|
|
31 | } | |
|
32 |
|
|
|
33 |
|
|
|
34 | } | |
|
35 |
|
|
|
36 |
|
|
|
37 | } | |
|
13 | static void BufferWithSegments_dealloc(ZstdBufferWithSegments *self) { | |
|
14 | /* Backing memory is either canonically owned by a Py_buffer or by us. */ | |
|
15 | if (self->parent.buf) { | |
|
16 | PyBuffer_Release(&self->parent); | |
|
17 | } | |
|
18 | else if (self->useFree) { | |
|
19 | free(self->data); | |
|
20 | } | |
|
21 | else { | |
|
22 | PyMem_Free(self->data); | |
|
23 | } | |
|
38 | 24 | |
|
39 |
|
|
|
25 | self->data = NULL; | |
|
40 | 26 | |
|
41 |
|
|
|
42 |
|
|
|
43 | } | |
|
44 |
|
|
|
45 |
|
|
|
46 | } | |
|
27 | if (self->useFree) { | |
|
28 | free(self->segments); | |
|
29 | } | |
|
30 | else { | |
|
31 | PyMem_Free(self->segments); | |
|
32 | } | |
|
47 | 33 | |
|
48 |
|
|
|
34 | self->segments = NULL; | |
|
49 | 35 | |
|
50 |
|
|
|
36 | PyObject_Del(self); | |
|
51 | 37 | } |
|
52 | 38 | |
|
53 |
static int BufferWithSegments_init(ZstdBufferWithSegments* |
|
|
54 | static char* kwlist[] = { | |
|
55 | "data", | |
|
56 | "segments", | |
|
57 | NULL | |
|
58 | }; | |
|
39 | static int BufferWithSegments_init(ZstdBufferWithSegments *self, PyObject *args, | |
|
40 | PyObject *kwargs) { | |
|
41 | static char *kwlist[] = {"data", "segments", NULL}; | |
|
59 | 42 | |
|
60 |
|
|
|
61 |
|
|
|
62 |
|
|
|
43 | Py_buffer segments; | |
|
44 | Py_ssize_t segmentCount; | |
|
45 | Py_ssize_t i; | |
|
63 | 46 | |
|
64 |
|
|
|
47 | memset(&self->parent, 0, sizeof(self->parent)); | |
|
65 | 48 | |
|
66 | #if PY_MAJOR_VERSION >= 3 | |
|
67 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*y*:BufferWithSegments", | |
|
68 | #else | |
|
69 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*s*:BufferWithSegments", | |
|
70 | #endif | |
|
71 | kwlist, &self->parent, &segments)) { | |
|
72 | return -1; | |
|
73 | } | |
|
49 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*y*:BufferWithSegments", | |
|
50 | kwlist, &self->parent, &segments)) { | |
|
51 | return -1; | |
|
52 | } | |
|
74 | 53 | |
|
75 | if (!PyBuffer_IsContiguous(&self->parent, 'C') || self->parent.ndim > 1) { | |
|
76 | PyErr_SetString(PyExc_ValueError, "data buffer should be contiguous and have a single dimension"); | |
|
77 | goto except; | |
|
78 | } | |
|
54 | if (segments.len % sizeof(BufferSegment)) { | |
|
55 | PyErr_Format(PyExc_ValueError, | |
|
56 | "segments array size is not a multiple of %zu", | |
|
57 | sizeof(BufferSegment)); | |
|
58 | goto except; | |
|
59 | } | |
|
79 | 60 | |
|
80 | if (!PyBuffer_IsContiguous(&segments, 'C') || segments.ndim > 1) { | |
|
81 | PyErr_SetString(PyExc_ValueError, "segments buffer should be contiguous and have a single dimension"); | |
|
82 | goto except; | |
|
83 | } | |
|
61 | segmentCount = segments.len / sizeof(BufferSegment); | |
|
84 | 62 | |
|
85 | if (segments.len % sizeof(BufferSegment)) { | |
|
86 | PyErr_Format(PyExc_ValueError, "segments array size is not a multiple of %zu", | |
|
87 | sizeof(BufferSegment)); | |
|
88 | goto except; | |
|
89 | } | |
|
63 | /* Validate segments data, as blindly trusting it could lead to arbitrary | |
|
64 | memory access. */ | |
|
65 | for (i = 0; i < segmentCount; i++) { | |
|
66 | BufferSegment *segment = &((BufferSegment *)(segments.buf))[i]; | |
|
90 | 67 | |
|
91 | segmentCount = segments.len / sizeof(BufferSegment); | |
|
92 | ||
|
93 | /* Validate segments data, as blindly trusting it could lead to arbitrary | |
|
94 | memory access. */ | |
|
95 | for (i = 0; i < segmentCount; i++) { | |
|
96 | BufferSegment* segment = &((BufferSegment*)(segments.buf))[i]; | |
|
68 | if (segment->offset + segment->length > | |
|
69 | (unsigned long long)self->parent.len) { | |
|
70 | PyErr_SetString(PyExc_ValueError, | |
|
71 | "offset within segments array references memory " | |
|
72 | "outside buffer"); | |
|
73 | goto except; | |
|
74 | return -1; | |
|
75 | } | |
|
76 | } | |
|
97 | 77 | |
|
98 | if (segment->offset + segment->length > (unsigned long long)self->parent.len) { | |
|
99 | PyErr_SetString(PyExc_ValueError, "offset within segments array references memory outside buffer"); | |
|
100 | goto except; | |
|
101 | return -1; | |
|
102 | } | |
|
103 | } | |
|
78 | /* Make a copy of the segments data. It is cheap to do so and is a guard | |
|
79 | against caller changing offsets, which has security implications. */ | |
|
80 | self->segments = PyMem_Malloc(segments.len); | |
|
81 | if (!self->segments) { | |
|
82 | PyErr_NoMemory(); | |
|
83 | goto except; | |
|
84 | } | |
|
104 | 85 | |
|
105 | /* Make a copy of the segments data. It is cheap to do so and is a guard | |
|
106 | against caller changing offsets, which has security implications. */ | |
|
107 | self->segments = PyMem_Malloc(segments.len); | |
|
108 | if (!self->segments) { | |
|
109 | PyErr_NoMemory(); | |
|
110 | goto except; | |
|
111 | } | |
|
86 | memcpy(self->segments, segments.buf, segments.len); | |
|
87 | PyBuffer_Release(&segments); | |
|
112 | 88 | |
|
113 | memcpy(self->segments, segments.buf, segments.len); | |
|
114 | PyBuffer_Release(&segments); | |
|
89 | self->data = self->parent.buf; | |
|
90 | self->dataSize = self->parent.len; | |
|
91 | self->segmentCount = segmentCount; | |
|
115 | 92 | |
|
116 | self->data = self->parent.buf; | |
|
117 | self->dataSize = self->parent.len; | |
|
118 | self->segmentCount = segmentCount; | |
|
119 | ||
|
120 | return 0; | |
|
93 | return 0; | |
|
121 | 94 | |
|
122 | 95 | except: |
|
123 |
|
|
|
124 |
|
|
|
125 |
|
|
|
96 | PyBuffer_Release(&self->parent); | |
|
97 | PyBuffer_Release(&segments); | |
|
98 | return -1; | |
|
126 | 99 | } |
|
127 | 100 | |
|
128 | 101 | /** |
@@ -131,662 +104,475 except: | |||
|
131 | 104 | * Ownership of the backing memory and BufferSegments will be transferred to |
|
132 | 105 | * the created object and freed when the BufferWithSegments is destroyed. |
|
133 | 106 | */ |
|
134 | ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, | |
|
135 | BufferSegment* segments, Py_ssize_t segmentsSize) { | |
|
136 | ZstdBufferWithSegments* result = NULL; | |
|
137 | Py_ssize_t i; | |
|
107 | ZstdBufferWithSegments * | |
|
108 | BufferWithSegments_FromMemory(void *data, unsigned long long dataSize, | |
|
109 | BufferSegment *segments, | |
|
110 | Py_ssize_t segmentsSize) { | |
|
111 | ZstdBufferWithSegments *result = NULL; | |
|
112 | Py_ssize_t i; | |
|
138 | 113 | |
|
139 |
|
|
|
140 |
|
|
|
141 |
|
|
|
142 | } | |
|
114 | if (NULL == data) { | |
|
115 | PyErr_SetString(PyExc_ValueError, "data is NULL"); | |
|
116 | return NULL; | |
|
117 | } | |
|
143 | 118 | |
|
144 |
|
|
|
145 |
|
|
|
146 |
|
|
|
147 | } | |
|
119 | if (NULL == segments) { | |
|
120 | PyErr_SetString(PyExc_ValueError, "segments is NULL"); | |
|
121 | return NULL; | |
|
122 | } | |
|
148 | 123 | |
|
149 |
|
|
|
150 |
|
|
|
124 | for (i = 0; i < segmentsSize; i++) { | |
|
125 | BufferSegment *segment = &segments[i]; | |
|
151 | 126 | |
|
152 |
|
|
|
153 | PyErr_SetString(PyExc_ValueError, "offset in segments overflows buffer size"); | |
|
154 | return NULL; | |
|
155 | } | |
|
156 | } | |
|
127 | if (segment->offset + segment->length > dataSize) { | |
|
128 | PyErr_SetString(PyExc_ValueError, | |
|
129 | "offset in segments overflows buffer size"); | |
|
130 | return NULL; | |
|
131 | } | |
|
132 | } | |
|
157 | 133 | |
|
158 |
|
|
|
159 |
|
|
|
160 |
|
|
|
161 | } | |
|
134 | result = PyObject_New(ZstdBufferWithSegments, ZstdBufferWithSegmentsType); | |
|
135 | if (NULL == result) { | |
|
136 | return NULL; | |
|
137 | } | |
|
162 | 138 | |
|
163 |
|
|
|
139 | result->useFree = 0; | |
|
164 | 140 | |
|
165 |
|
|
|
166 |
|
|
|
167 |
|
|
|
168 |
|
|
|
169 |
|
|
|
141 | memset(&result->parent, 0, sizeof(result->parent)); | |
|
142 | result->data = data; | |
|
143 | result->dataSize = dataSize; | |
|
144 | result->segments = segments; | |
|
145 | result->segmentCount = segmentsSize; | |
|
170 | 146 | |
|
171 |
|
|
|
147 | return result; | |
|
172 | 148 | } |
|
173 | 149 | |
|
174 |
static Py_ssize_t BufferWithSegments_length(ZstdBufferWithSegments* |
|
|
175 |
|
|
|
150 | static Py_ssize_t BufferWithSegments_length(ZstdBufferWithSegments *self) { | |
|
151 | return self->segmentCount; | |
|
176 | 152 | } |
|
177 | 153 | |
|
178 |
static ZstdBufferSegment* |
|
|
179 | ZstdBufferSegment* result = NULL; | |
|
154 | static ZstdBufferSegment *BufferWithSegments_item(ZstdBufferWithSegments *self, | |
|
155 | Py_ssize_t i) { | |
|
156 | ZstdBufferSegment *result = NULL; | |
|
157 | ||
|
158 | if (i < 0) { | |
|
159 | PyErr_SetString(PyExc_IndexError, "offset must be non-negative"); | |
|
160 | return NULL; | |
|
161 | } | |
|
180 | 162 | |
|
181 | if (i < 0) { | |
|
182 |
|
|
|
183 | return NULL; | |
|
184 | } | |
|
163 | if (i >= self->segmentCount) { | |
|
164 | PyErr_Format(PyExc_IndexError, "offset must be less than %zd", | |
|
165 | self->segmentCount); | |
|
166 | return NULL; | |
|
167 | } | |
|
185 | 168 | |
|
186 | if (i >= self->segmentCount) { | |
|
187 | PyErr_Format(PyExc_IndexError, "offset must be less than %zd", self->segmentCount); | |
|
188 | return NULL; | |
|
189 | } | |
|
169 | if (self->segments[i].length > PY_SSIZE_T_MAX) { | |
|
170 | PyErr_Format(PyExc_ValueError, | |
|
171 | "item at offset %zd is too large for this platform", i); | |
|
172 | return NULL; | |
|
173 | } | |
|
190 | 174 | |
|
191 | if (self->segments[i].length > PY_SSIZE_T_MAX) { | |
|
192 | PyErr_Format(PyExc_ValueError, | |
|
193 | "item at offset %zd is too large for this platform", i); | |
|
194 |
|
|
|
195 | } | |
|
175 | result = (ZstdBufferSegment *)PyObject_CallObject( | |
|
176 | (PyObject *)ZstdBufferSegmentType, NULL); | |
|
177 | if (NULL == result) { | |
|
178 | return NULL; | |
|
179 | } | |
|
180 | ||
|
181 | result->parent = (PyObject *)self; | |
|
182 | Py_INCREF(self); | |
|
183 | ||
|
184 | result->data = (char *)self->data + self->segments[i].offset; | |
|
185 | result->dataSize = (Py_ssize_t)self->segments[i].length; | |
|
186 | result->offset = self->segments[i].offset; | |
|
196 | 187 | |
|
197 | result = (ZstdBufferSegment*)PyObject_CallObject((PyObject*)&ZstdBufferSegmentType, NULL); | |
|
198 | if (NULL == result) { | |
|
199 | return NULL; | |
|
200 | } | |
|
188 | return result; | |
|
189 | } | |
|
201 | 190 | |
|
202 | result->parent = (PyObject*)self; | |
|
203 | Py_INCREF(self); | |
|
191 | static int BufferWithSegments_getbuffer(ZstdBufferWithSegments *self, | |
|
192 | Py_buffer *view, int flags) { | |
|
193 | if (self->dataSize > PY_SSIZE_T_MAX) { | |
|
194 | view->obj = NULL; | |
|
195 | PyErr_SetString(PyExc_BufferError, | |
|
196 | "buffer is too large for this platform"); | |
|
197 | return -1; | |
|
198 | } | |
|
204 | 199 | |
|
205 | result->data = (char*)self->data + self->segments[i].offset; | |
|
206 | result->dataSize = (Py_ssize_t)self->segments[i].length; | |
|
207 | result->offset = self->segments[i].offset; | |
|
208 | ||
|
209 | return result; | |
|
200 | return PyBuffer_FillInfo(view, (PyObject *)self, self->data, | |
|
201 | (Py_ssize_t)self->dataSize, 1, flags); | |
|
210 | 202 | } |
|
211 | 203 | |
|
212 | #if PY_MAJOR_VERSION >= 3 | |
|
213 | static int BufferWithSegments_getbuffer(ZstdBufferWithSegments* self, Py_buffer* view, int flags) { | |
|
214 | if (self->dataSize > PY_SSIZE_T_MAX) { | |
|
215 | view->obj = NULL; | |
|
216 | PyErr_SetString(PyExc_BufferError, "buffer is too large for this platform"); | |
|
217 | return -1; | |
|
218 | } | |
|
204 | static PyObject *BufferWithSegments_tobytes(ZstdBufferWithSegments *self) { | |
|
205 | if (self->dataSize > PY_SSIZE_T_MAX) { | |
|
206 | PyErr_SetString(PyExc_ValueError, | |
|
207 | "buffer is too large for this platform"); | |
|
208 | return NULL; | |
|
209 | } | |
|
219 | 210 | |
|
220 |
|
|
|
221 | } | |
|
222 | #else | |
|
223 | static Py_ssize_t BufferWithSegments_getreadbuffer(ZstdBufferWithSegments* self, Py_ssize_t segment, void **ptrptr) { | |
|
224 | if (segment != 0) { | |
|
225 | PyErr_SetString(PyExc_ValueError, "segment number must be 0"); | |
|
226 | return -1; | |
|
227 | } | |
|
228 | ||
|
229 | if (self->dataSize > PY_SSIZE_T_MAX) { | |
|
230 | PyErr_SetString(PyExc_ValueError, "buffer is too large for this platform"); | |
|
231 | return -1; | |
|
232 | } | |
|
233 | ||
|
234 | *ptrptr = self->data; | |
|
235 | return (Py_ssize_t)self->dataSize; | |
|
211 | return PyBytes_FromStringAndSize(self->data, (Py_ssize_t)self->dataSize); | |
|
236 | 212 | } |
|
237 | 213 | |
|
238 | static Py_ssize_t BufferWithSegments_getsegcount(ZstdBufferWithSegments* self, Py_ssize_t* len) { | |
|
239 | if (len) { | |
|
240 | *len = 1; | |
|
241 | } | |
|
214 | static ZstdBufferSegments * | |
|
215 | BufferWithSegments_segments(ZstdBufferWithSegments *self) { | |
|
216 | ZstdBufferSegments *result = (ZstdBufferSegments *)PyObject_CallObject( | |
|
217 | (PyObject *)ZstdBufferSegmentsType, NULL); | |
|
218 | if (NULL == result) { | |
|
219 | return NULL; | |
|
220 | } | |
|
242 | 221 | |
|
243 | return 1; | |
|
222 | result->parent = (PyObject *)self; | |
|
223 | Py_INCREF(self); | |
|
224 | result->segments = self->segments; | |
|
225 | result->segmentCount = self->segmentCount; | |
|
226 | ||
|
227 | return result; | |
|
244 | 228 | } |
|
229 | ||
|
230 | #if PY_VERSION_HEX < 0x03090000 | |
|
231 | static PyBufferProcs BufferWithSegments_as_buffer = { | |
|
232 | (getbufferproc)BufferWithSegments_getbuffer, /* bf_getbuffer */ | |
|
233 | 0 /* bf_releasebuffer */ | |
|
234 | }; | |
|
245 | 235 | #endif |
|
246 | 236 | |
|
247 | PyDoc_STRVAR(BufferWithSegments_tobytes__doc__, | |
|
248 | "Obtain a bytes instance for this buffer.\n" | |
|
249 | ); | |
|
237 | static PyMethodDef BufferWithSegments_methods[] = { | |
|
238 | {"segments", (PyCFunction)BufferWithSegments_segments, METH_NOARGS, NULL}, | |
|
239 | {"tobytes", (PyCFunction)BufferWithSegments_tobytes, METH_NOARGS, NULL}, | |
|
240 | {NULL, NULL}}; | |
|
250 | 241 | |
|
251 |
static Py |
|
|
252 | if (self->dataSize > PY_SSIZE_T_MAX) { | |
|
253 | PyErr_SetString(PyExc_ValueError, "buffer is too large for this platform"); | |
|
254 | return NULL; | |
|
255 | } | |
|
256 | ||
|
257 | return PyBytes_FromStringAndSize(self->data, (Py_ssize_t)self->dataSize); | |
|
258 | } | |
|
259 | ||
|
260 | PyDoc_STRVAR(BufferWithSegments_segments__doc__, | |
|
261 | "Obtain a BufferSegments describing segments in this sintance.\n" | |
|
262 | ); | |
|
242 | static PyMemberDef BufferWithSegments_members[] = { | |
|
243 | {"size", T_ULONGLONG, offsetof(ZstdBufferWithSegments, dataSize), READONLY, | |
|
244 | "total size of the buffer in bytes"}, | |
|
245 | {NULL}}; | |
|
263 | 246 | |
|
264 | static ZstdBufferSegments* BufferWithSegments_segments(ZstdBufferWithSegments* self) { | |
|
265 | ZstdBufferSegments* result = (ZstdBufferSegments*)PyObject_CallObject((PyObject*)&ZstdBufferSegmentsType, NULL); | |
|
266 | if (NULL == result) { | |
|
267 | return NULL; | |
|
268 | } | |
|
269 | ||
|
270 | result->parent = (PyObject*)self; | |
|
271 | Py_INCREF(self); | |
|
272 | result->segments = self->segments; | |
|
273 | result->segmentCount = self->segmentCount; | |
|
274 | ||
|
275 | return result; | |
|
276 | } | |
|
277 | ||
|
278 | static PySequenceMethods BufferWithSegments_sq = { | |
|
279 | (lenfunc)BufferWithSegments_length, /* sq_length */ | |
|
280 | 0, /* sq_concat */ | |
|
281 | 0, /* sq_repeat */ | |
|
282 | (ssizeargfunc)BufferWithSegments_item, /* sq_item */ | |
|
283 | 0, /* sq_ass_item */ | |
|
284 | 0, /* sq_contains */ | |
|
285 | 0, /* sq_inplace_concat */ | |
|
286 | 0 /* sq_inplace_repeat */ | |
|
247 | PyType_Slot ZstdBufferWithSegmentsSlots[] = { | |
|
248 | {Py_tp_dealloc, BufferWithSegments_dealloc}, | |
|
249 | {Py_sq_length, BufferWithSegments_length}, | |
|
250 | {Py_sq_item, BufferWithSegments_item}, | |
|
251 | #if PY_VERSION_HEX >= 0x03090000 | |
|
252 | {Py_bf_getbuffer, BufferWithSegments_getbuffer}, | |
|
253 | #endif | |
|
254 | {Py_tp_methods, BufferWithSegments_methods}, | |
|
255 | {Py_tp_members, BufferWithSegments_members}, | |
|
256 | {Py_tp_init, BufferWithSegments_init}, | |
|
257 | {Py_tp_new, PyType_GenericNew}, | |
|
258 | {0, NULL}, | |
|
287 | 259 | }; |
|
288 | 260 | |
|
289 | static PyBufferProcs BufferWithSegments_as_buffer = { | |
|
290 | #if PY_MAJOR_VERSION >= 3 | |
|
291 | (getbufferproc)BufferWithSegments_getbuffer, /* bf_getbuffer */ | |
|
292 | 0 /* bf_releasebuffer */ | |
|
293 | #else | |
|
294 | (readbufferproc)BufferWithSegments_getreadbuffer, /* bf_getreadbuffer */ | |
|
295 | 0, /* bf_getwritebuffer */ | |
|
296 | (segcountproc)BufferWithSegments_getsegcount, /* bf_getsegcount */ | |
|
297 | 0 /* bf_getcharbuffer */ | |
|
298 | #endif | |
|
299 | }; | |
|
300 | ||
|
301 | static PyMethodDef BufferWithSegments_methods[] = { | |
|
302 | { "segments", (PyCFunction)BufferWithSegments_segments, | |
|
303 | METH_NOARGS, BufferWithSegments_segments__doc__ }, | |
|
304 | { "tobytes", (PyCFunction)BufferWithSegments_tobytes, | |
|
305 | METH_NOARGS, BufferWithSegments_tobytes__doc__ }, | |
|
306 | { NULL, NULL } | |
|
307 | }; | |
|
308 | ||
|
309 | static PyMemberDef BufferWithSegments_members[] = { | |
|
310 | { "size", T_ULONGLONG, offsetof(ZstdBufferWithSegments, dataSize), | |
|
311 | READONLY, "total size of the buffer in bytes" }, | |
|
312 | { NULL } | |
|
261 | PyType_Spec ZstdBufferWithSegmentsSpec = { | |
|
262 | "zstd.BufferWithSegments", | |
|
263 | sizeof(ZstdBufferWithSegments), | |
|
264 | 0, | |
|
265 | Py_TPFLAGS_DEFAULT, | |
|
266 | ZstdBufferWithSegmentsSlots, | |
|
313 | 267 | }; |
|
314 | 268 | |
|
315 |
PyTypeObject ZstdBufferWithSegmentsType |
|
|
316 | PyVarObject_HEAD_INIT(NULL, 0) | |
|
317 | "zstd.BufferWithSegments", /* tp_name */ | |
|
318 | sizeof(ZstdBufferWithSegments),/* tp_basicsize */ | |
|
319 | 0, /* tp_itemsize */ | |
|
320 | (destructor)BufferWithSegments_dealloc, /* tp_dealloc */ | |
|
321 | 0, /* tp_print */ | |
|
322 | 0, /* tp_getattr */ | |
|
323 | 0, /* tp_setattr */ | |
|
324 | 0, /* tp_compare */ | |
|
325 | 0, /* tp_repr */ | |
|
326 | 0, /* tp_as_number */ | |
|
327 | &BufferWithSegments_sq, /* tp_as_sequence */ | |
|
328 | 0, /* tp_as_mapping */ | |
|
329 | 0, /* tp_hash */ | |
|
330 | 0, /* tp_call */ | |
|
331 | 0, /* tp_str */ | |
|
332 | 0, /* tp_getattro */ | |
|
333 | 0, /* tp_setattro */ | |
|
334 | &BufferWithSegments_as_buffer, /* tp_as_buffer */ | |
|
335 | Py_TPFLAGS_DEFAULT, /* tp_flags */ | |
|
336 | BufferWithSegments__doc__, /* tp_doc */ | |
|
337 | 0, /* tp_traverse */ | |
|
338 | 0, /* tp_clear */ | |
|
339 | 0, /* tp_richcompare */ | |
|
340 | 0, /* tp_weaklistoffset */ | |
|
341 | 0, /* tp_iter */ | |
|
342 | 0, /* tp_iternext */ | |
|
343 | BufferWithSegments_methods, /* tp_methods */ | |
|
344 | BufferWithSegments_members, /* tp_members */ | |
|
345 | 0, /* tp_getset */ | |
|
346 | 0, /* tp_base */ | |
|
347 | 0, /* tp_dict */ | |
|
348 | 0, /* tp_descr_get */ | |
|
349 | 0, /* tp_descr_set */ | |
|
350 | 0, /* tp_dictoffset */ | |
|
351 | (initproc)BufferWithSegments_init, /* tp_init */ | |
|
352 | 0, /* tp_alloc */ | |
|
353 | PyType_GenericNew, /* tp_new */ | |
|
354 | }; | |
|
269 | PyTypeObject *ZstdBufferWithSegmentsType; | |
|
355 | 270 | |
|
356 | PyDoc_STRVAR(BufferSegments__doc__, | |
|
357 | "BufferSegments - Represents segments/offsets within a BufferWithSegments\n" | |
|
358 | ); | |
|
271 | static void BufferSegments_dealloc(ZstdBufferSegments *self) { | |
|
272 | Py_CLEAR(self->parent); | |
|
273 | PyObject_Del(self); | |
|
274 | } | |
|
359 | 275 | |
|
360 |
static |
|
|
361 | Py_CLEAR(self->parent); | |
|
362 | PyObject_Del(self); | |
|
276 | static int BufferSegments_getbuffer(ZstdBufferSegments *self, Py_buffer *view, | |
|
277 | int flags) { | |
|
278 | return PyBuffer_FillInfo(view, (PyObject *)self, (void *)self->segments, | |
|
279 | self->segmentCount * sizeof(BufferSegment), 1, | |
|
280 | flags); | |
|
363 | 281 | } |
|
364 | 282 | |
|
365 | #if PY_MAJOR_VERSION >= 3 | |
|
366 | static int BufferSegments_getbuffer(ZstdBufferSegments* self, Py_buffer* view, int flags) { | |
|
367 | return PyBuffer_FillInfo(view, (PyObject*)self, | |
|
368 | (void*)self->segments, self->segmentCount * sizeof(BufferSegment), | |
|
369 | 1, flags); | |
|
370 | } | |
|
371 | #else | |
|
372 | static Py_ssize_t BufferSegments_getreadbuffer(ZstdBufferSegments* self, Py_ssize_t segment, void **ptrptr) { | |
|
373 | if (segment != 0) { | |
|
374 | PyErr_SetString(PyExc_ValueError, "segment number must be 0"); | |
|
375 | return -1; | |
|
376 | } | |
|
377 | ||
|
378 | *ptrptr = (void*)self->segments; | |
|
379 | return self->segmentCount * sizeof(BufferSegment); | |
|
380 | } | |
|
283 | PyType_Slot ZstdBufferSegmentsSlots[] = { | |
|
284 | {Py_tp_dealloc, BufferSegments_dealloc}, | |
|
285 | #if PY_VERSION_HEX >= 0x03090000 | |
|
286 | {Py_bf_getbuffer, BufferSegments_getbuffer}, | |
|
287 | #endif | |
|
288 | {Py_tp_new, PyType_GenericNew}, | |
|
289 | {0, NULL}, | |
|
290 | }; | |
|
381 | 291 | |
|
382 | static Py_ssize_t BufferSegments_getsegcount(ZstdBufferSegments* self, Py_ssize_t* len) { | |
|
383 | if (len) { | |
|
384 | *len = 1; | |
|
385 | } | |
|
386 | ||
|
387 | return 1; | |
|
388 | } | |
|
389 | #endif | |
|
390 | ||
|
391 | static PyBufferProcs BufferSegments_as_buffer = { | |
|
392 | #if PY_MAJOR_VERSION >= 3 | |
|
393 | (getbufferproc)BufferSegments_getbuffer, | |
|
394 | 0 | |
|
395 | #else | |
|
396 | (readbufferproc)BufferSegments_getreadbuffer, | |
|
397 | 0, | |
|
398 | (segcountproc)BufferSegments_getsegcount, | |
|
399 | 0 | |
|
400 | #endif | |
|
292 | PyType_Spec ZstdBufferSegmentsSpec = { | |
|
293 | "zstd.BufferSegments", | |
|
294 | sizeof(ZstdBufferSegments), | |
|
295 | 0, | |
|
296 | Py_TPFLAGS_DEFAULT, | |
|
297 | ZstdBufferSegmentsSlots, | |
|
401 | 298 | }; |
|
402 | 299 | |
|
403 | PyTypeObject ZstdBufferSegmentsType = { | |
|
404 | PyVarObject_HEAD_INIT(NULL, 0) | |
|
405 | "zstd.BufferSegments", /* tp_name */ | |
|
406 | sizeof(ZstdBufferSegments),/* tp_basicsize */ | |
|
407 | 0, /* tp_itemsize */ | |
|
408 | (destructor)BufferSegments_dealloc, /* tp_dealloc */ | |
|
409 | 0, /* tp_print */ | |
|
410 | 0, /* tp_getattr */ | |
|
411 | 0, /* tp_setattr */ | |
|
412 | 0, /* tp_compare */ | |
|
413 | 0, /* tp_repr */ | |
|
414 | 0, /* tp_as_number */ | |
|
415 | 0, /* tp_as_sequence */ | |
|
416 | 0, /* tp_as_mapping */ | |
|
417 | 0, /* tp_hash */ | |
|
418 | 0, /* tp_call */ | |
|
419 | 0, /* tp_str */ | |
|
420 | 0, /* tp_getattro */ | |
|
421 | 0, /* tp_setattro */ | |
|
422 | &BufferSegments_as_buffer, /* tp_as_buffer */ | |
|
423 | Py_TPFLAGS_DEFAULT, /* tp_flags */ | |
|
424 | BufferSegments__doc__, /* tp_doc */ | |
|
425 | 0, /* tp_traverse */ | |
|
426 | 0, /* tp_clear */ | |
|
427 | 0, /* tp_richcompare */ | |
|
428 | 0, /* tp_weaklistoffset */ | |
|
429 | 0, /* tp_iter */ | |
|
430 | 0, /* tp_iternext */ | |
|
431 | 0, /* tp_methods */ | |
|
432 | 0, /* tp_members */ | |
|
433 | 0, /* tp_getset */ | |
|
434 | 0, /* tp_base */ | |
|
435 | 0, /* tp_dict */ | |
|
436 | 0, /* tp_descr_get */ | |
|
437 | 0, /* tp_descr_set */ | |
|
438 | 0, /* tp_dictoffset */ | |
|
439 | 0, /* tp_init */ | |
|
440 | 0, /* tp_alloc */ | |
|
441 | PyType_GenericNew, /* tp_new */ | |
|
442 | }; | |
|
300 | #if PY_VERSION_HEX < 0x03090000 | |
|
301 | static PyBufferProcs BufferSegments_as_buffer = { | |
|
302 | (getbufferproc)BufferSegments_getbuffer, 0}; | |
|
303 | #endif | |
|
443 | 304 | |
|
444 | PyDoc_STRVAR(BufferSegment__doc__, | |
|
445 | "BufferSegment - Represents a segment within a BufferWithSegments\n" | |
|
446 | ); | |
|
305 | PyTypeObject *ZstdBufferSegmentsType; | |
|
447 | 306 | |
|
448 |
static void BufferSegment_dealloc(ZstdBufferSegment* |
|
|
449 |
|
|
|
450 |
|
|
|
307 | static void BufferSegment_dealloc(ZstdBufferSegment *self) { | |
|
308 | Py_CLEAR(self->parent); | |
|
309 | PyObject_Del(self); | |
|
451 | 310 | } |
|
452 | 311 | |
|
453 |
static Py_ssize_t BufferSegment_length(ZstdBufferSegment* |
|
|
454 |
|
|
|
312 | static Py_ssize_t BufferSegment_length(ZstdBufferSegment *self) { | |
|
313 | return self->dataSize; | |
|
314 | } | |
|
315 | ||
|
316 | static int BufferSegment_getbuffer(ZstdBufferSegment *self, Py_buffer *view, | |
|
317 | int flags) { | |
|
318 | return PyBuffer_FillInfo(view, (PyObject *)self, self->data, self->dataSize, | |
|
319 | 1, flags); | |
|
320 | } | |
|
321 | ||
|
322 | static PyObject *BufferSegment_tobytes(ZstdBufferSegment *self) { | |
|
323 | return PyBytes_FromStringAndSize(self->data, self->dataSize); | |
|
455 | 324 | } |
|
456 | 325 | |
|
457 |
#if PY_ |
|
|
458 | static int BufferSegment_getbuffer(ZstdBufferSegment* self, Py_buffer* view, int flags) { | |
|
459 | return PyBuffer_FillInfo(view, (PyObject*)self, | |
|
460 | self->data, self->dataSize, 1, flags); | |
|
461 | } | |
|
462 | #else | |
|
463 | static Py_ssize_t BufferSegment_getreadbuffer(ZstdBufferSegment* self, Py_ssize_t segment, void **ptrptr) { | |
|
464 | if (segment != 0) { | |
|
465 | PyErr_SetString(PyExc_ValueError, "segment number must be 0"); | |
|
466 | return -1; | |
|
467 | } | |
|
468 | ||
|
469 | *ptrptr = self->data; | |
|
470 | return self->dataSize; | |
|
471 | } | |
|
472 | ||
|
473 | static Py_ssize_t BufferSegment_getsegcount(ZstdBufferSegment* self, Py_ssize_t* len) { | |
|
474 | if (len) { | |
|
475 | *len = 1; | |
|
476 | } | |
|
477 | ||
|
478 | return 1; | |
|
479 | } | |
|
326 | #if PY_VERSION_HEX < 0x03090000 | |
|
327 | static PyBufferProcs BufferSegment_as_buffer = { | |
|
328 | (getbufferproc)BufferSegment_getbuffer, 0}; | |
|
480 | 329 | #endif |
|
481 | 330 | |
|
482 | PyDoc_STRVAR(BufferSegment_tobytes__doc__, | |
|
483 | "Obtain a bytes instance for this segment.\n" | |
|
484 | ); | |
|
485 | ||
|
486 | static PyObject* BufferSegment_tobytes(ZstdBufferSegment* self) { | |
|
487 | return PyBytes_FromStringAndSize(self->data, self->dataSize); | |
|
488 | } | |
|
489 | ||
|
490 | static PySequenceMethods BufferSegment_sq = { | |
|
491 | (lenfunc)BufferSegment_length, /* sq_length */ | |
|
492 | 0, /* sq_concat */ | |
|
493 | 0, /* sq_repeat */ | |
|
494 | 0, /* sq_item */ | |
|
495 | 0, /* sq_ass_item */ | |
|
496 | 0, /* sq_contains */ | |
|
497 | 0, /* sq_inplace_concat */ | |
|
498 | 0 /* sq_inplace_repeat */ | |
|
499 | }; | |
|
500 | ||
|
501 | static PyBufferProcs BufferSegment_as_buffer = { | |
|
502 | #if PY_MAJOR_VERSION >= 3 | |
|
503 | (getbufferproc)BufferSegment_getbuffer, | |
|
504 | 0 | |
|
505 | #else | |
|
506 | (readbufferproc)BufferSegment_getreadbuffer, | |
|
507 | 0, | |
|
508 | (segcountproc)BufferSegment_getsegcount, | |
|
509 | 0 | |
|
510 | #endif | |
|
511 | }; | |
|
512 | ||
|
513 | 331 | static PyMethodDef BufferSegment_methods[] = { |
|
514 |
|
|
|
515 | METH_NOARGS, BufferSegment_tobytes__doc__ }, | |
|
516 | { NULL, NULL } | |
|
517 | }; | |
|
332 | {"tobytes", (PyCFunction)BufferSegment_tobytes, METH_NOARGS, NULL}, | |
|
333 | {NULL, NULL}}; | |
|
518 | 334 | |
|
519 | 335 | static PyMemberDef BufferSegment_members[] = { |
|
520 |
|
|
|
521 |
|
|
|
522 |
|
|
|
336 | {"offset", T_ULONGLONG, offsetof(ZstdBufferSegment, offset), READONLY, | |
|
337 | "offset of segment within parent buffer"}, | |
|
338 | {NULL}}; | |
|
339 | ||
|
340 | PyType_Slot ZstdBufferSegmentSlots[] = { | |
|
341 | {Py_tp_dealloc, BufferSegment_dealloc}, | |
|
342 | {Py_sq_length, BufferSegment_length}, | |
|
343 | #if PY_VERSION_HEX >= 0x03090000 | |
|
344 | {Py_bf_getbuffer, BufferSegment_getbuffer}, | |
|
345 | #endif | |
|
346 | {Py_tp_methods, BufferSegment_methods}, | |
|
347 | {Py_tp_members, BufferSegment_members}, | |
|
348 | {Py_tp_new, PyType_GenericNew}, | |
|
349 | {0, NULL}, | |
|
350 | }; | |
|
351 | ||
|
352 | PyType_Spec ZstdBufferSegmentSpec = { | |
|
353 | "zstd.BufferSegment", | |
|
354 | sizeof(ZstdBufferSegment), | |
|
355 | 0, | |
|
356 | Py_TPFLAGS_DEFAULT, | |
|
357 | ZstdBufferSegmentSlots, | |
|
523 | 358 | }; |
|
524 | 359 | |
|
525 |
PyTypeObject ZstdBufferSegmentType |
|
|
526 | PyVarObject_HEAD_INIT(NULL, 0) | |
|
527 | "zstd.BufferSegment", /* tp_name */ | |
|
528 | sizeof(ZstdBufferSegment),/* tp_basicsize */ | |
|
529 | 0, /* tp_itemsize */ | |
|
530 | (destructor)BufferSegment_dealloc, /* tp_dealloc */ | |
|
531 | 0, /* tp_print */ | |
|
532 | 0, /* tp_getattr */ | |
|
533 | 0, /* tp_setattr */ | |
|
534 | 0, /* tp_compare */ | |
|
535 | 0, /* tp_repr */ | |
|
536 | 0, /* tp_as_number */ | |
|
537 | &BufferSegment_sq, /* tp_as_sequence */ | |
|
538 | 0, /* tp_as_mapping */ | |
|
539 | 0, /* tp_hash */ | |
|
540 | 0, /* tp_call */ | |
|
541 | 0, /* tp_str */ | |
|
542 | 0, /* tp_getattro */ | |
|
543 | 0, /* tp_setattro */ | |
|
544 | &BufferSegment_as_buffer, /* tp_as_buffer */ | |
|
545 | Py_TPFLAGS_DEFAULT, /* tp_flags */ | |
|
546 | BufferSegment__doc__, /* tp_doc */ | |
|
547 | 0, /* tp_traverse */ | |
|
548 | 0, /* tp_clear */ | |
|
549 | 0, /* tp_richcompare */ | |
|
550 | 0, /* tp_weaklistoffset */ | |
|
551 | 0, /* tp_iter */ | |
|
552 | 0, /* tp_iternext */ | |
|
553 | BufferSegment_methods, /* tp_methods */ | |
|
554 | BufferSegment_members, /* tp_members */ | |
|
555 | 0, /* tp_getset */ | |
|
556 | 0, /* tp_base */ | |
|
557 | 0, /* tp_dict */ | |
|
558 | 0, /* tp_descr_get */ | |
|
559 | 0, /* tp_descr_set */ | |
|
560 | 0, /* tp_dictoffset */ | |
|
561 | 0, /* tp_init */ | |
|
562 | 0, /* tp_alloc */ | |
|
563 | PyType_GenericNew, /* tp_new */ | |
|
564 | }; | |
|
360 | PyTypeObject *ZstdBufferSegmentType; | |
|
361 | ||
|
362 | static void | |
|
363 | BufferWithSegmentsCollection_dealloc(ZstdBufferWithSegmentsCollection *self) { | |
|
364 | Py_ssize_t i; | |
|
365 | ||
|
366 | if (self->firstElements) { | |
|
367 | PyMem_Free(self->firstElements); | |
|
368 | self->firstElements = NULL; | |
|
369 | } | |
|
370 | ||
|
371 | if (self->buffers) { | |
|
372 | for (i = 0; i < self->bufferCount; i++) { | |
|
373 | Py_CLEAR(self->buffers[i]); | |
|
374 | } | |
|
375 | ||
|
376 | PyMem_Free(self->buffers); | |
|
377 | self->buffers = NULL; | |
|
378 | } | |
|
379 | ||
|
380 | PyObject_Del(self); | |
|
381 | } | |
|
382 | ||
|
383 | static int | |
|
384 | BufferWithSegmentsCollection_init(ZstdBufferWithSegmentsCollection *self, | |
|
385 | PyObject *args) { | |
|
386 | Py_ssize_t size; | |
|
387 | Py_ssize_t i; | |
|
388 | Py_ssize_t offset = 0; | |
|
389 | ||
|
390 | size = PyTuple_Size(args); | |
|
391 | if (-1 == size) { | |
|
392 | return -1; | |
|
393 | } | |
|
394 | ||
|
395 | if (0 == size) { | |
|
396 | PyErr_SetString(PyExc_ValueError, "must pass at least 1 argument"); | |
|
397 | return -1; | |
|
398 | } | |
|
565 | 399 | |
|
566 | PyDoc_STRVAR(BufferWithSegmentsCollection__doc__, | |
|
567 | "Represents a collection of BufferWithSegments.\n" | |
|
568 | ); | |
|
400 | for (i = 0; i < size; i++) { | |
|
401 | PyObject *item = PyTuple_GET_ITEM(args, i); | |
|
402 | if (!PyObject_TypeCheck(item, ZstdBufferWithSegmentsType)) { | |
|
403 | PyErr_SetString(PyExc_TypeError, | |
|
404 | "arguments must be BufferWithSegments instances"); | |
|
405 | return -1; | |
|
406 | } | |
|
569 | 407 | |
|
570 | static void BufferWithSegmentsCollection_dealloc(ZstdBufferWithSegmentsCollection* self) { | |
|
571 | Py_ssize_t i; | |
|
408 | if (0 == ((ZstdBufferWithSegments *)item)->segmentCount || | |
|
409 | 0 == ((ZstdBufferWithSegments *)item)->dataSize) { | |
|
410 | PyErr_SetString(PyExc_ValueError, | |
|
411 | "ZstdBufferWithSegments cannot be empty"); | |
|
412 | return -1; | |
|
413 | } | |
|
414 | } | |
|
415 | ||
|
416 | self->buffers = PyMem_Malloc(size * sizeof(ZstdBufferWithSegments *)); | |
|
417 | if (NULL == self->buffers) { | |
|
418 | PyErr_NoMemory(); | |
|
419 | return -1; | |
|
420 | } | |
|
572 | 421 | |
|
573 | if (self->firstElements) { | |
|
574 | PyMem_Free(self->firstElements); | |
|
575 | self->firstElements = NULL; | |
|
576 | } | |
|
422 | self->firstElements = PyMem_Malloc(size * sizeof(Py_ssize_t)); | |
|
423 | if (NULL == self->firstElements) { | |
|
424 | PyMem_Free(self->buffers); | |
|
425 | self->buffers = NULL; | |
|
426 | PyErr_NoMemory(); | |
|
427 | return -1; | |
|
428 | } | |
|
429 | ||
|
430 | self->bufferCount = size; | |
|
431 | ||
|
432 | for (i = 0; i < size; i++) { | |
|
433 | ZstdBufferWithSegments *item = | |
|
434 | (ZstdBufferWithSegments *)PyTuple_GET_ITEM(args, i); | |
|
577 | 435 | |
|
578 | if (self->buffers) { | |
|
579 | for (i = 0; i < self->bufferCount; i++) { | |
|
580 | Py_CLEAR(self->buffers[i]); | |
|
581 | } | |
|
436 | self->buffers[i] = item; | |
|
437 | Py_INCREF(item); | |
|
438 | ||
|
439 | if (i > 0) { | |
|
440 | self->firstElements[i - 1] = offset; | |
|
441 | } | |
|
582 | 442 | |
|
583 | PyMem_Free(self->buffers); | |
|
584 | self->buffers = NULL; | |
|
585 | } | |
|
443 | offset += item->segmentCount; | |
|
444 | } | |
|
586 | 445 | |
|
587 | PyObject_Del(self); | |
|
446 | self->firstElements[size - 1] = offset; | |
|
447 | ||
|
448 | return 0; | |
|
588 | 449 | } |
|
589 | 450 | |
|
590 | static int BufferWithSegmentsCollection_init(ZstdBufferWithSegmentsCollection* self, PyObject* args) { | |
|
591 | Py_ssize_t size; | |
|
592 |
|
|
|
593 |
|
|
|
594 | ||
|
595 | size = PyTuple_Size(args); | |
|
596 | if (-1 == size) { | |
|
597 | return -1; | |
|
598 | } | |
|
599 | ||
|
600 | if (0 == size) { | |
|
601 | PyErr_SetString(PyExc_ValueError, "must pass at least 1 argument"); | |
|
602 | return -1; | |
|
603 | } | |
|
604 | ||
|
605 | for (i = 0; i < size; i++) { | |
|
606 | PyObject* item = PyTuple_GET_ITEM(args, i); | |
|
607 | if (!PyObject_TypeCheck(item, &ZstdBufferWithSegmentsType)) { | |
|
608 | PyErr_SetString(PyExc_TypeError, "arguments must be BufferWithSegments instances"); | |
|
609 | return -1; | |
|
610 | } | |
|
611 | ||
|
612 | if (0 == ((ZstdBufferWithSegments*)item)->segmentCount || | |
|
613 | 0 == ((ZstdBufferWithSegments*)item)->dataSize) { | |
|
614 | PyErr_SetString(PyExc_ValueError, "ZstdBufferWithSegments cannot be empty"); | |
|
615 | return -1; | |
|
616 | } | |
|
617 | } | |
|
451 | static PyObject * | |
|
452 | BufferWithSegmentsCollection_size(ZstdBufferWithSegmentsCollection *self) { | |
|
453 | Py_ssize_t i; | |
|
454 | Py_ssize_t j; | |
|
455 | unsigned long long size = 0; | |
|
618 | 456 | |
|
619 | self->buffers = PyMem_Malloc(size * sizeof(ZstdBufferWithSegments*)); | |
|
620 | if (NULL == self->buffers) { | |
|
621 | PyErr_NoMemory(); | |
|
622 | return -1; | |
|
623 | } | |
|
624 | ||
|
625 | self->firstElements = PyMem_Malloc(size * sizeof(Py_ssize_t)); | |
|
626 | if (NULL == self->firstElements) { | |
|
627 | PyMem_Free(self->buffers); | |
|
628 | self->buffers = NULL; | |
|
629 | PyErr_NoMemory(); | |
|
630 | return -1; | |
|
631 | } | |
|
632 | ||
|
633 | self->bufferCount = size; | |
|
457 | for (i = 0; i < self->bufferCount; i++) { | |
|
458 | for (j = 0; j < self->buffers[i]->segmentCount; j++) { | |
|
459 | size += self->buffers[i]->segments[j].length; | |
|
460 | } | |
|
461 | } | |
|
634 | 462 | |
|
635 | for (i = 0; i < size; i++) { | |
|
636 | ZstdBufferWithSegments* item = (ZstdBufferWithSegments*)PyTuple_GET_ITEM(args, i); | |
|
637 | ||
|
638 | self->buffers[i] = item; | |
|
639 | Py_INCREF(item); | |
|
463 | return PyLong_FromUnsignedLongLong(size); | |
|
464 | } | |
|
640 | 465 | |
|
641 | if (i > 0) { | |
|
642 | self->firstElements[i - 1] = offset; | |
|
643 | } | |
|
644 | ||
|
645 | offset += item->segmentCount; | |
|
646 | } | |
|
647 | ||
|
648 | self->firstElements[size - 1] = offset; | |
|
649 | ||
|
650 | return 0; | |
|
466 | Py_ssize_t | |
|
467 | BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection *self) { | |
|
468 | return self->firstElements[self->bufferCount - 1]; | |
|
651 | 469 | } |
|
652 | 470 | |
|
653 | static PyObject* BufferWithSegmentsCollection_size(ZstdBufferWithSegmentsCollection* self) { | |
|
654 | Py_ssize_t i; | |
|
655 | Py_ssize_t j; | |
|
656 | unsigned long long size = 0; | |
|
471 | static ZstdBufferSegment * | |
|
472 | BufferWithSegmentsCollection_item(ZstdBufferWithSegmentsCollection *self, | |
|
473 | Py_ssize_t i) { | |
|
474 | Py_ssize_t bufferOffset; | |
|
475 | ||
|
476 | if (i < 0) { | |
|
477 | PyErr_SetString(PyExc_IndexError, "offset must be non-negative"); | |
|
478 | return NULL; | |
|
479 | } | |
|
480 | ||
|
481 | if (i >= BufferWithSegmentsCollection_length(self)) { | |
|
482 | PyErr_Format(PyExc_IndexError, "offset must be less than %zd", | |
|
483 | BufferWithSegmentsCollection_length(self)); | |
|
484 | return NULL; | |
|
485 | } | |
|
657 | 486 | |
|
658 |
|
|
|
659 | for (j = 0; j < self->buffers[i]->segmentCount; j++) { | |
|
660 | size += self->buffers[i]->segments[j].length; | |
|
661 | } | |
|
662 | } | |
|
487 | for (bufferOffset = 0; bufferOffset < self->bufferCount; bufferOffset++) { | |
|
488 | Py_ssize_t offset = 0; | |
|
489 | ||
|
490 | if (i < self->firstElements[bufferOffset]) { | |
|
491 | if (bufferOffset > 0) { | |
|
492 | offset = self->firstElements[bufferOffset - 1]; | |
|
493 | } | |
|
663 | 494 | |
|
664 | return PyLong_FromUnsignedLongLong(size); | |
|
665 | } | |
|
495 | return BufferWithSegments_item(self->buffers[bufferOffset], | |
|
496 | i - offset); | |
|
497 | } | |
|
498 | } | |
|
666 | 499 | |
|
667 | Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection* self) { | |
|
668 | return self->firstElements[self->bufferCount - 1]; | |
|
500 | PyErr_SetString(ZstdError, | |
|
501 | "error resolving segment; this should not happen"); | |
|
502 | return NULL; | |
|
669 | 503 | } |
|
670 | 504 | |
|
671 | static ZstdBufferSegment* BufferWithSegmentsCollection_item(ZstdBufferWithSegmentsCollection* self, Py_ssize_t i) { | |
|
672 | Py_ssize_t bufferOffset; | |
|
673 | ||
|
674 | if (i < 0) { | |
|
675 | PyErr_SetString(PyExc_IndexError, "offset must be non-negative"); | |
|
676 | return NULL; | |
|
677 | } | |
|
678 | ||
|
679 | if (i >= BufferWithSegmentsCollection_length(self)) { | |
|
680 | PyErr_Format(PyExc_IndexError, "offset must be less than %zd", | |
|
681 | BufferWithSegmentsCollection_length(self)); | |
|
682 | return NULL; | |
|
683 | } | |
|
684 | ||
|
685 | for (bufferOffset = 0; bufferOffset < self->bufferCount; bufferOffset++) { | |
|
686 | Py_ssize_t offset = 0; | |
|
505 | static PyMethodDef BufferWithSegmentsCollection_methods[] = { | |
|
506 | {"size", (PyCFunction)BufferWithSegmentsCollection_size, METH_NOARGS, | |
|
507 | PyDoc_STR("total size in bytes of all segments")}, | |
|
508 | {NULL, NULL}}; | |
|
687 | 509 | |
|
688 | if (i < self->firstElements[bufferOffset]) { | |
|
689 | if (bufferOffset > 0) { | |
|
690 | offset = self->firstElements[bufferOffset - 1]; | |
|
691 | } | |
|
692 | ||
|
693 | return BufferWithSegments_item(self->buffers[bufferOffset], i - offset); | |
|
694 | } | |
|
695 | } | |
|
696 | ||
|
697 | PyErr_SetString(ZstdError, "error resolving segment; this should not happen"); | |
|
698 | return NULL; | |
|
699 | } | |
|
700 | ||
|
701 | static PySequenceMethods BufferWithSegmentsCollection_sq = { | |
|
702 | (lenfunc)BufferWithSegmentsCollection_length, /* sq_length */ | |
|
703 | 0, /* sq_concat */ | |
|
704 | 0, /* sq_repeat */ | |
|
705 | (ssizeargfunc)BufferWithSegmentsCollection_item, /* sq_item */ | |
|
706 | 0, /* sq_ass_item */ | |
|
707 | 0, /* sq_contains */ | |
|
708 | 0, /* sq_inplace_concat */ | |
|
709 | 0 /* sq_inplace_repeat */ | |
|
510 | PyType_Slot ZstdBufferWithSegmentsCollectionSlots[] = { | |
|
511 | {Py_tp_dealloc, BufferWithSegmentsCollection_dealloc}, | |
|
512 | {Py_sq_length, BufferWithSegmentsCollection_length}, | |
|
513 | {Py_sq_item, BufferWithSegmentsCollection_item}, | |
|
514 | {Py_tp_methods, BufferWithSegmentsCollection_methods}, | |
|
515 | {Py_tp_init, BufferWithSegmentsCollection_init}, | |
|
516 | {Py_tp_new, PyType_GenericNew}, | |
|
517 | {0, NULL}, | |
|
710 | 518 | }; |
|
711 | 519 | |
|
712 |
|
|
|
713 |
|
|
|
714 | METH_NOARGS, PyDoc_STR("total size in bytes of all segments") }, | |
|
715 | { NULL, NULL } | |
|
520 | PyType_Spec ZstdBufferWithSegmentsCollectionSpec = { | |
|
521 | "zstd.BufferWithSegmentsCollection", | |
|
522 | sizeof(ZstdBufferWithSegmentsCollection), | |
|
523 | 0, | |
|
524 | Py_TPFLAGS_DEFAULT, | |
|
525 | ZstdBufferWithSegmentsCollectionSlots, | |
|
716 | 526 | }; |
|
717 | 527 | |
|
718 |
PyTypeObject ZstdBufferWithSegmentsCollectionType |
|
|
719 | PyVarObject_HEAD_INIT(NULL, 0) | |
|
720 | "zstd.BufferWithSegmentsCollection", /* tp_name */ | |
|
721 | sizeof(ZstdBufferWithSegmentsCollection),/* tp_basicsize */ | |
|
722 | 0, /* tp_itemsize */ | |
|
723 | (destructor)BufferWithSegmentsCollection_dealloc, /* tp_dealloc */ | |
|
724 | 0, /* tp_print */ | |
|
725 | 0, /* tp_getattr */ | |
|
726 | 0, /* tp_setattr */ | |
|
727 | 0, /* tp_compare */ | |
|
728 | 0, /* tp_repr */ | |
|
729 | 0, /* tp_as_number */ | |
|
730 | &BufferWithSegmentsCollection_sq, /* tp_as_sequence */ | |
|
731 | 0, /* tp_as_mapping */ | |
|
732 | 0, /* tp_hash */ | |
|
733 | 0, /* tp_call */ | |
|
734 | 0, /* tp_str */ | |
|
735 | 0, /* tp_getattro */ | |
|
736 | 0, /* tp_setattro */ | |
|
737 | 0, /* tp_as_buffer */ | |
|
738 | Py_TPFLAGS_DEFAULT, /* tp_flags */ | |
|
739 | BufferWithSegmentsCollection__doc__, /* tp_doc */ | |
|
740 | 0, /* tp_traverse */ | |
|
741 | 0, /* tp_clear */ | |
|
742 | 0, /* tp_richcompare */ | |
|
743 | 0, /* tp_weaklistoffset */ | |
|
744 | /* TODO implement iterator for performance. */ | |
|
745 | 0, /* tp_iter */ | |
|
746 | 0, /* tp_iternext */ | |
|
747 | BufferWithSegmentsCollection_methods, /* tp_methods */ | |
|
748 | 0, /* tp_members */ | |
|
749 | 0, /* tp_getset */ | |
|
750 | 0, /* tp_base */ | |
|
751 | 0, /* tp_dict */ | |
|
752 | 0, /* tp_descr_get */ | |
|
753 | 0, /* tp_descr_set */ | |
|
754 | 0, /* tp_dictoffset */ | |
|
755 | (initproc)BufferWithSegmentsCollection_init, /* tp_init */ | |
|
756 | 0, /* tp_alloc */ | |
|
757 | PyType_GenericNew, /* tp_new */ | |
|
758 | }; | |
|
528 | PyTypeObject *ZstdBufferWithSegmentsCollectionType; | |
|
529 | ||
|
530 | void bufferutil_module_init(PyObject *mod) { | |
|
531 | ZstdBufferWithSegmentsType = | |
|
532 | (PyTypeObject *)PyType_FromSpec(&ZstdBufferWithSegmentsSpec); | |
|
533 | #if PY_VERSION_HEX < 0x03090000 | |
|
534 | ZstdBufferWithSegmentsType->tp_as_buffer = &BufferWithSegments_as_buffer; | |
|
535 | #endif | |
|
536 | if (PyType_Ready(ZstdBufferWithSegmentsType) < 0) { | |
|
537 | return; | |
|
538 | } | |
|
539 | ||
|
540 | Py_INCREF(ZstdBufferWithSegmentsType); | |
|
541 | PyModule_AddObject(mod, "BufferWithSegments", | |
|
542 | (PyObject *)ZstdBufferWithSegmentsType); | |
|
543 | ||
|
544 | ZstdBufferSegmentsType = | |
|
545 | (PyTypeObject *)PyType_FromSpec(&ZstdBufferSegmentsSpec); | |
|
546 | #if PY_VERSION_HEX < 0x03090000 | |
|
547 | ZstdBufferSegmentsType->tp_as_buffer = &BufferSegments_as_buffer; | |
|
548 | #endif | |
|
549 | if (PyType_Ready(ZstdBufferSegmentsType) < 0) { | |
|
550 | return; | |
|
551 | } | |
|
759 | 552 | |
|
760 | void bufferutil_module_init(PyObject* mod) { | |
|
761 | Py_SET_TYPE(&ZstdBufferWithSegmentsType, &PyType_Type); | |
|
762 | if (PyType_Ready(&ZstdBufferWithSegmentsType) < 0) { | |
|
763 | return; | |
|
764 | } | |
|
553 | Py_INCREF(ZstdBufferSegmentsType); | |
|
554 | PyModule_AddObject(mod, "BufferSegments", | |
|
555 | (PyObject *)ZstdBufferSegmentsType); | |
|
765 | 556 | |
|
766 |
|
|
|
767 | PyModule_AddObject(mod, "BufferWithSegments", (PyObject*)&ZstdBufferWithSegmentsType); | |
|
768 | ||
|
769 | Py_SET_TYPE(&ZstdBufferSegmentsType, &PyType_Type); | |
|
770 | if (PyType_Ready(&ZstdBufferSegmentsType) < 0) { | |
|
771 | return; | |
|
772 | } | |
|
773 | ||
|
774 | Py_INCREF(&ZstdBufferSegmentsType); | |
|
775 | PyModule_AddObject(mod, "BufferSegments", (PyObject*)&ZstdBufferSegmentsType); | |
|
557 | ZstdBufferSegmentType = | |
|
558 | (PyTypeObject *)PyType_FromSpec(&ZstdBufferSegmentSpec); | |
|
559 | #if PY_VERSION_HEX < 0x03090000 | |
|
560 | ZstdBufferSegmentType->tp_as_buffer = &BufferSegment_as_buffer; | |
|
561 | #endif | |
|
562 | if (PyType_Ready(ZstdBufferSegmentType) < 0) { | |
|
563 | return; | |
|
564 | } | |
|
776 | 565 | |
|
777 |
|
|
|
778 | if (PyType_Ready(&ZstdBufferSegmentType) < 0) { | |
|
779 | return; | |
|
780 | } | |
|
781 | ||
|
782 | Py_INCREF(&ZstdBufferSegmentType); | |
|
783 | PyModule_AddObject(mod, "BufferSegment", (PyObject*)&ZstdBufferSegmentType); | |
|
566 | Py_INCREF(ZstdBufferSegmentType); | |
|
567 | PyModule_AddObject(mod, "BufferSegment", (PyObject *)ZstdBufferSegmentType); | |
|
784 | 568 | |
|
785 |
|
|
|
786 |
|
|
|
787 | return; | |
|
788 | } | |
|
569 | ZstdBufferWithSegmentsCollectionType = | |
|
570 | (PyTypeObject *)PyType_FromSpec(&ZstdBufferWithSegmentsCollectionSpec); | |
|
571 | if (PyType_Ready(ZstdBufferWithSegmentsCollectionType) < 0) { | |
|
572 | return; | |
|
573 | } | |
|
789 | 574 | |
|
790 |
|
|
|
791 |
|
|
|
575 | Py_INCREF(ZstdBufferWithSegmentsCollectionType); | |
|
576 | PyModule_AddObject(mod, "BufferWithSegmentsCollection", | |
|
577 | (PyObject *)ZstdBufferWithSegmentsCollectionType); | |
|
792 | 578 | } |
This diff has been collapsed as it changes many lines, (528 lines changed) Show them Hide them | |||
@@ -1,360 +1,310 | |||
|
1 | 1 | /** |
|
2 | * Copyright (c) 2018-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
2 | * Copyright (c) 2018-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
8 | 8 | |
|
9 | 9 | #include "python-zstandard.h" |
|
10 | 10 | |
|
11 |
extern PyObject* |
|
|
11 | extern PyObject *ZstdError; | |
|
12 | 12 | |
|
13 | PyDoc_STRVAR(ZstdCompressionChunkerIterator__doc__, | |
|
14 | "Iterator of output chunks from ZstdCompressionChunker.\n" | |
|
15 | ); | |
|
13 | static void | |
|
14 | ZstdCompressionChunkerIterator_dealloc(ZstdCompressionChunkerIterator *self) { | |
|
15 | Py_XDECREF(self->chunker); | |
|
16 | 16 | |
|
17 | static void ZstdCompressionChunkerIterator_dealloc(ZstdCompressionChunkerIterator* self) { | |
|
18 | Py_XDECREF(self->chunker); | |
|
19 | ||
|
20 | PyObject_Del(self); | |
|
17 | PyObject_Del(self); | |
|
21 | 18 | } |
|
22 | 19 | |
|
23 |
static PyObject* |
|
|
24 |
|
|
|
25 |
|
|
|
20 | static PyObject *ZstdCompressionChunkerIterator_iter(PyObject *self) { | |
|
21 | Py_INCREF(self); | |
|
22 | return self; | |
|
26 | 23 | } |
|
27 | 24 | |
|
28 | static PyObject* ZstdCompressionChunkerIterator_iternext(ZstdCompressionChunkerIterator* self) { | |
|
29 | size_t zresult; | |
|
30 | PyObject* chunk; | |
|
31 | ZstdCompressionChunker* chunker = self->chunker; | |
|
32 | ZSTD_EndDirective zFlushMode; | |
|
33 | ||
|
34 | if (self->mode != compressionchunker_mode_normal && chunker->input.pos != chunker->input.size) { | |
|
35 | PyErr_SetString(ZstdError, "input should have been fully consumed before calling flush() or finish()"); | |
|
36 | return NULL; | |
|
37 | } | |
|
25 | static PyObject * | |
|
26 | ZstdCompressionChunkerIterator_iternext(ZstdCompressionChunkerIterator *self) { | |
|
27 | size_t zresult; | |
|
28 | PyObject *chunk; | |
|
29 | ZstdCompressionChunker *chunker = self->chunker; | |
|
30 | ZSTD_EndDirective zFlushMode; | |
|
38 | 31 | |
|
39 | if (chunker->finished) { | |
|
40 | return NULL; | |
|
41 | } | |
|
32 | if (self->mode != compressionchunker_mode_normal && | |
|
33 | chunker->input.pos != chunker->input.size) { | |
|
34 | PyErr_SetString(ZstdError, "input should have been fully consumed " | |
|
35 | "before calling flush() or finish()"); | |
|
36 | return NULL; | |
|
37 | } | |
|
42 | 38 | |
|
43 | /* If we have data left in the input, consume it. */ | |
|
44 | while (chunker->input.pos < chunker->input.size) { | |
|
45 | Py_BEGIN_ALLOW_THREADS | |
|
46 | zresult = ZSTD_compressStream2(chunker->compressor->cctx, &chunker->output, | |
|
47 | &chunker->input, ZSTD_e_continue); | |
|
48 | Py_END_ALLOW_THREADS | |
|
39 | if (chunker->finished) { | |
|
40 | return NULL; | |
|
41 | } | |
|
42 | ||
|
43 | /* If we have data left in the input, consume it. */ | |
|
44 | while (chunker->input.pos < chunker->input.size) { | |
|
45 | Py_BEGIN_ALLOW_THREADS zresult = | |
|
46 | ZSTD_compressStream2(chunker->compressor->cctx, &chunker->output, | |
|
47 | &chunker->input, ZSTD_e_continue); | |
|
48 | Py_END_ALLOW_THREADS | |
|
49 | 49 | |
|
50 |
|
|
|
51 |
|
|
|
52 |
|
|
|
53 |
|
|
|
54 |
|
|
|
55 |
|
|
|
56 | } | |
|
50 | /* Input is fully consumed. */ | |
|
51 | if (chunker->input.pos == chunker->input.size) { | |
|
52 | chunker->input.src = NULL; | |
|
53 | chunker->input.pos = 0; | |
|
54 | chunker->input.size = 0; | |
|
55 | PyBuffer_Release(&chunker->inBuffer); | |
|
56 | } | |
|
57 | 57 | |
|
58 |
|
|
|
59 |
|
|
|
60 | return NULL; | |
|
61 | } | |
|
58 | if (ZSTD_isError(zresult)) { | |
|
59 | PyErr_Format(ZstdError, "zstd compress error: %s", | |
|
60 | ZSTD_getErrorName(zresult)); | |
|
61 | return NULL; | |
|
62 | } | |
|
62 | 63 | |
|
63 |
|
|
|
64 |
|
|
|
65 |
|
|
|
66 | if (!chunk) { | |
|
67 | return NULL; | |
|
68 | } | |
|
64 | /* If it produced a full output chunk, emit it. */ | |
|
65 | if (chunker->output.pos == chunker->output.size) { | |
|
66 | chunk = PyBytes_FromStringAndSize(chunker->output.dst, | |
|
67 | chunker->output.pos); | |
|
68 | if (!chunk) { | |
|
69 | return NULL; | |
|
70 | } | |
|
69 | 71 | |
|
70 |
|
|
|
72 | chunker->output.pos = 0; | |
|
71 | 73 | |
|
72 |
|
|
|
73 | } | |
|
74 | return chunk; | |
|
75 | } | |
|
74 | 76 | |
|
75 |
|
|
|
76 | } | |
|
77 | /* Else continue to compress available input data. */ | |
|
78 | } | |
|
77 | 79 | |
|
78 |
|
|
|
79 |
|
|
|
80 |
|
|
|
81 |
|
|
|
82 |
|
|
|
83 |
|
|
|
84 | } | |
|
80 | /* We also need this here for the special case of an empty input buffer. */ | |
|
81 | if (chunker->input.pos == chunker->input.size) { | |
|
82 | chunker->input.src = NULL; | |
|
83 | chunker->input.pos = 0; | |
|
84 | chunker->input.size = 0; | |
|
85 | PyBuffer_Release(&chunker->inBuffer); | |
|
86 | } | |
|
85 | 87 | |
|
86 |
|
|
|
87 |
|
|
|
88 |
|
|
|
89 | */ | |
|
90 |
|
|
|
91 |
|
|
|
92 |
|
|
|
93 | } | |
|
94 | ||
|
95 | if (self->mode == compressionchunker_mode_flush) { | |
|
96 | zFlushMode = ZSTD_e_flush; | |
|
97 | } | |
|
98 | else if (self->mode == compressionchunker_mode_finish) { | |
|
99 | zFlushMode = ZSTD_e_end; | |
|
100 | } | |
|
101 | else { | |
|
102 | PyErr_SetString(ZstdError, "unhandled compression mode; this should never happen"); | |
|
103 | return NULL; | |
|
104 | } | |
|
88 | /* No more input data. A partial chunk may be in chunker->output. | |
|
89 | * If we're in normal compression mode, we're done. Otherwise if we're in | |
|
90 | * flush or finish mode, we need to emit what data remains. | |
|
91 | */ | |
|
92 | if (self->mode == compressionchunker_mode_normal) { | |
|
93 | /* We don't need to set StopIteration. */ | |
|
94 | return NULL; | |
|
95 | } | |
|
105 | 96 | |
|
106 | Py_BEGIN_ALLOW_THREADS | |
|
107 | zresult = ZSTD_compressStream2(chunker->compressor->cctx, &chunker->output, | |
|
108 | &chunker->input, zFlushMode); | |
|
109 | Py_END_ALLOW_THREADS | |
|
97 | if (self->mode == compressionchunker_mode_flush) { | |
|
98 | zFlushMode = ZSTD_e_flush; | |
|
99 | } | |
|
100 | else if (self->mode == compressionchunker_mode_finish) { | |
|
101 | zFlushMode = ZSTD_e_end; | |
|
102 | } | |
|
103 | else { | |
|
104 | PyErr_SetString(ZstdError, | |
|
105 | "unhandled compression mode; this should never happen"); | |
|
106 | return NULL; | |
|
107 | } | |
|
110 | 108 | |
|
111 | if (ZSTD_isError(zresult)) { | |
|
112 | PyErr_Format(ZstdError, "zstd compress error: %s", | |
|
113 | ZSTD_getErrorName(zresult)); | |
|
114 | return NULL; | |
|
115 | } | |
|
109 | Py_BEGIN_ALLOW_THREADS zresult = | |
|
110 | ZSTD_compressStream2(chunker->compressor->cctx, &chunker->output, | |
|
111 | &chunker->input, zFlushMode); | |
|
112 | Py_END_ALLOW_THREADS | |
|
116 | 113 | |
|
117 | if (!zresult && chunker->output.pos == 0) { | |
|
118 | return NULL; | |
|
119 | } | |
|
114 | if (ZSTD_isError(zresult)) { | |
|
115 | PyErr_Format(ZstdError, "zstd compress error: %s", | |
|
116 | ZSTD_getErrorName(zresult)); | |
|
117 | return NULL; | |
|
118 | } | |
|
119 | ||
|
120 | if (!zresult && chunker->output.pos == 0) { | |
|
121 | return NULL; | |
|
122 | } | |
|
120 | 123 | |
|
121 |
|
|
|
122 |
|
|
|
123 |
|
|
|
124 | } | |
|
124 | chunk = PyBytes_FromStringAndSize(chunker->output.dst, chunker->output.pos); | |
|
125 | if (!chunk) { | |
|
126 | return NULL; | |
|
127 | } | |
|
125 | 128 | |
|
126 |
|
|
|
129 | chunker->output.pos = 0; | |
|
127 | 130 | |
|
128 |
|
|
|
129 |
|
|
|
130 | } | |
|
131 | if (!zresult && self->mode == compressionchunker_mode_finish) { | |
|
132 | chunker->finished = 1; | |
|
133 | } | |
|
131 | 134 | |
|
132 |
|
|
|
135 | return chunk; | |
|
133 | 136 | } |
|
134 | 137 | |
|
135 |
PyType |
|
|
136 | PyVarObject_HEAD_INIT(NULL, 0) | |
|
137 | "zstd.ZstdCompressionChunkerIterator", /* tp_name */ | |
|
138 | sizeof(ZstdCompressionChunkerIterator), /* tp_basicsize */ | |
|
139 | 0, /* tp_itemsize */ | |
|
140 | (destructor)ZstdCompressionChunkerIterator_dealloc, /* tp_dealloc */ | |
|
141 | 0, /* tp_print */ | |
|
142 | 0, /* tp_getattr */ | |
|
143 | 0, /* tp_setattr */ | |
|
144 | 0, /* tp_compare */ | |
|
145 | 0, /* tp_repr */ | |
|
146 | 0, /* tp_as_number */ | |
|
147 | 0, /* tp_as_sequence */ | |
|
148 | 0, /* tp_as_mapping */ | |
|
149 | 0, /* tp_hash */ | |
|
150 | 0, /* tp_call */ | |
|
151 | 0, /* tp_str */ | |
|
152 | 0, /* tp_getattro */ | |
|
153 | 0, /* tp_setattro */ | |
|
154 | 0, /* tp_as_buffer */ | |
|
155 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
|
156 | ZstdCompressionChunkerIterator__doc__, /* tp_doc */ | |
|
157 | 0, /* tp_traverse */ | |
|
158 | 0, /* tp_clear */ | |
|
159 | 0, /* tp_richcompare */ | |
|
160 | 0, /* tp_weaklistoffset */ | |
|
161 | ZstdCompressionChunkerIterator_iter, /* tp_iter */ | |
|
162 | (iternextfunc)ZstdCompressionChunkerIterator_iternext, /* tp_iternext */ | |
|
163 | 0, /* tp_methods */ | |
|
164 | 0, /* tp_members */ | |
|
165 | 0, /* tp_getset */ | |
|
166 | 0, /* tp_base */ | |
|
167 | 0, /* tp_dict */ | |
|
168 | 0, /* tp_descr_get */ | |
|
169 | 0, /* tp_descr_set */ | |
|
170 | 0, /* tp_dictoffset */ | |
|
171 | 0, /* tp_init */ | |
|
172 | 0, /* tp_alloc */ | |
|
173 | PyType_GenericNew, /* tp_new */ | |
|
138 | PyType_Slot ZstdCompressionChunkerIteratorSlots[] = { | |
|
139 | {Py_tp_dealloc, ZstdCompressionChunkerIterator_dealloc}, | |
|
140 | {Py_tp_iter, ZstdCompressionChunkerIterator_iter}, | |
|
141 | {Py_tp_iternext, ZstdCompressionChunkerIterator_iternext}, | |
|
142 | {Py_tp_new, PyType_GenericNew}, | |
|
143 | {0, NULL}, | |
|
174 | 144 | }; |
|
175 | 145 | |
|
176 |
Py |
|
|
177 | "Compress chunks iteratively into exact chunk sizes.\n" | |
|
178 | ); | |
|
146 | PyType_Spec ZstdCompressionChunkerIteratorSpec = { | |
|
147 | "zstd.ZstdCompressionChunkerIterator", | |
|
148 | sizeof(ZstdCompressionChunkerIterator), | |
|
149 | 0, | |
|
150 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, | |
|
151 | ZstdCompressionChunkerIteratorSlots, | |
|
152 | }; | |
|
153 | ||
|
154 | PyTypeObject *ZstdCompressionChunkerIteratorType; | |
|
179 | 155 | |
|
180 |
static void ZstdCompressionChunker_dealloc(ZstdCompressionChunker* |
|
|
181 |
|
|
|
182 |
|
|
|
156 | static void ZstdCompressionChunker_dealloc(ZstdCompressionChunker *self) { | |
|
157 | PyBuffer_Release(&self->inBuffer); | |
|
158 | self->input.src = NULL; | |
|
183 | 159 | |
|
184 |
|
|
|
185 |
|
|
|
160 | PyMem_Free(self->output.dst); | |
|
161 | self->output.dst = NULL; | |
|
186 | 162 | |
|
187 |
|
|
|
163 | Py_XDECREF(self->compressor); | |
|
188 | 164 | |
|
189 |
|
|
|
165 | PyObject_Del(self); | |
|
190 | 166 | } |
|
191 | 167 | |
|
192 | static ZstdCompressionChunkerIterator* ZstdCompressionChunker_compress(ZstdCompressionChunker* self, PyObject* args, PyObject* kwargs) { | |
|
193 | static char* kwlist[] = { | |
|
194 | "data", | |
|
195 | NULL | |
|
196 | }; | |
|
168 | static ZstdCompressionChunkerIterator * | |
|
169 | ZstdCompressionChunker_compress(ZstdCompressionChunker *self, PyObject *args, | |
|
170 | PyObject *kwargs) { | |
|
171 | static char *kwlist[] = {"data", NULL}; | |
|
197 | 172 | |
|
198 |
|
|
|
173 | ZstdCompressionChunkerIterator *result; | |
|
199 | 174 | |
|
200 |
|
|
|
201 | PyErr_SetString(ZstdError, "cannot call compress() after compression finished"); | |
|
202 | return NULL; | |
|
203 | } | |
|
175 | if (self->finished) { | |
|
176 | PyErr_SetString(ZstdError, | |
|
177 | "cannot call compress() after compression finished"); | |
|
178 | return NULL; | |
|
179 | } | |
|
204 | 180 | |
|
205 |
|
|
|
206 | PyErr_SetString(ZstdError, | |
|
207 | "cannot perform operation before consuming output from previous operation"); | |
|
208 |
|
|
|
209 | } | |
|
181 | if (self->inBuffer.obj) { | |
|
182 | PyErr_SetString(ZstdError, "cannot perform operation before consuming " | |
|
183 | "output from previous operation"); | |
|
184 | return NULL; | |
|
185 | } | |
|
210 | 186 | |
|
211 | #if PY_MAJOR_VERSION >= 3 | |
|
212 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress", | |
|
213 | #else | |
|
214 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:compress", | |
|
215 | #endif | |
|
216 | kwlist, &self->inBuffer)) { | |
|
217 | return NULL; | |
|
218 | } | |
|
219 | ||
|
220 | if (!PyBuffer_IsContiguous(&self->inBuffer, 'C') || self->inBuffer.ndim > 1) { | |
|
221 | PyErr_SetString(PyExc_ValueError, | |
|
222 | "data buffer should be contiguous and have at most one dimension"); | |
|
223 | PyBuffer_Release(&self->inBuffer); | |
|
224 | return NULL; | |
|
225 | } | |
|
187 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress", kwlist, | |
|
188 | &self->inBuffer)) { | |
|
189 | return NULL; | |
|
190 | } | |
|
226 | 191 | |
|
227 |
|
|
|
228 | if (!result) { | |
|
229 | PyBuffer_Release(&self->inBuffer); | |
|
230 | return NULL; | |
|
231 | } | |
|
192 | result = (ZstdCompressionChunkerIterator *)PyObject_CallObject( | |
|
193 | (PyObject *)ZstdCompressionChunkerIteratorType, NULL); | |
|
194 | if (!result) { | |
|
195 | PyBuffer_Release(&self->inBuffer); | |
|
196 | return NULL; | |
|
197 | } | |
|
232 | 198 | |
|
233 |
|
|
|
234 |
|
|
|
235 |
|
|
|
199 | self->input.src = self->inBuffer.buf; | |
|
200 | self->input.size = self->inBuffer.len; | |
|
201 | self->input.pos = 0; | |
|
236 | 202 | |
|
237 |
|
|
|
238 |
|
|
|
203 | result->chunker = self; | |
|
204 | Py_INCREF(result->chunker); | |
|
239 | 205 | |
|
240 |
|
|
|
206 | result->mode = compressionchunker_mode_normal; | |
|
241 | 207 | |
|
242 |
|
|
|
208 | return result; | |
|
243 | 209 | } |
|
244 | 210 | |
|
245 | static ZstdCompressionChunkerIterator* ZstdCompressionChunker_finish(ZstdCompressionChunker* self) { | |
|
246 | ZstdCompressionChunkerIterator* result; | |
|
211 | static ZstdCompressionChunkerIterator * | |
|
212 | ZstdCompressionChunker_finish(ZstdCompressionChunker *self) { | |
|
213 | ZstdCompressionChunkerIterator *result; | |
|
247 | 214 | |
|
248 |
|
|
|
249 | PyErr_SetString(ZstdError, "cannot call finish() after compression finished"); | |
|
250 | return NULL; | |
|
251 | } | |
|
215 | if (self->finished) { | |
|
216 | PyErr_SetString(ZstdError, | |
|
217 | "cannot call finish() after compression finished"); | |
|
218 | return NULL; | |
|
219 | } | |
|
252 | 220 | |
|
253 |
|
|
|
254 | PyErr_SetString(ZstdError, | |
|
255 | "cannot call finish() before consuming output from previous operation"); | |
|
256 |
|
|
|
257 | } | |
|
221 | if (self->inBuffer.obj) { | |
|
222 | PyErr_SetString(ZstdError, "cannot call finish() before consuming " | |
|
223 | "output from previous operation"); | |
|
224 | return NULL; | |
|
225 | } | |
|
258 | 226 | |
|
259 |
|
|
|
260 | if (!result) { | |
|
261 | return NULL; | |
|
262 | } | |
|
227 | result = (ZstdCompressionChunkerIterator *)PyObject_CallObject( | |
|
228 | (PyObject *)ZstdCompressionChunkerIteratorType, NULL); | |
|
229 | if (!result) { | |
|
230 | return NULL; | |
|
231 | } | |
|
263 | 232 | |
|
264 |
|
|
|
265 |
|
|
|
233 | result->chunker = self; | |
|
234 | Py_INCREF(result->chunker); | |
|
266 | 235 | |
|
267 |
|
|
|
236 | result->mode = compressionchunker_mode_finish; | |
|
268 | 237 | |
|
269 |
|
|
|
238 | return result; | |
|
270 | 239 | } |
|
271 | 240 | |
|
272 | static ZstdCompressionChunkerIterator* ZstdCompressionChunker_flush(ZstdCompressionChunker* self, PyObject* args, PyObject* kwargs) { | |
|
273 | ZstdCompressionChunkerIterator* result; | |
|
241 | static ZstdCompressionChunkerIterator * | |
|
242 | ZstdCompressionChunker_flush(ZstdCompressionChunker *self, PyObject *args, | |
|
243 | PyObject *kwargs) { | |
|
244 | ZstdCompressionChunkerIterator *result; | |
|
274 | 245 | |
|
275 |
|
|
|
276 | PyErr_SetString(ZstdError, "cannot call flush() after compression finished"); | |
|
277 | return NULL; | |
|
278 | } | |
|
246 | if (self->finished) { | |
|
247 | PyErr_SetString(ZstdError, | |
|
248 | "cannot call flush() after compression finished"); | |
|
249 | return NULL; | |
|
250 | } | |
|
279 | 251 | |
|
280 |
|
|
|
281 | PyErr_SetString(ZstdError, | |
|
282 | "cannot call flush() before consuming output from previous operation"); | |
|
283 |
|
|
|
284 | } | |
|
252 | if (self->inBuffer.obj) { | |
|
253 | PyErr_SetString(ZstdError, "cannot call flush() before consuming " | |
|
254 | "output from previous operation"); | |
|
255 | return NULL; | |
|
256 | } | |
|
285 | 257 | |
|
286 |
|
|
|
287 | if (!result) { | |
|
288 | return NULL; | |
|
289 | } | |
|
258 | result = (ZstdCompressionChunkerIterator *)PyObject_CallObject( | |
|
259 | (PyObject *)ZstdCompressionChunkerIteratorType, NULL); | |
|
260 | if (!result) { | |
|
261 | return NULL; | |
|
262 | } | |
|
290 | 263 | |
|
291 |
|
|
|
292 |
|
|
|
264 | result->chunker = self; | |
|
265 | Py_INCREF(result->chunker); | |
|
293 | 266 | |
|
294 |
|
|
|
267 | result->mode = compressionchunker_mode_flush; | |
|
295 | 268 | |
|
296 |
|
|
|
269 | return result; | |
|
297 | 270 | } |
|
298 | 271 | |
|
299 | 272 | static PyMethodDef ZstdCompressionChunker_methods[] = { |
|
300 |
|
|
|
301 |
|
|
|
302 |
|
|
|
303 |
|
|
|
304 |
|
|
|
305 |
|
|
|
306 |
|
|
|
273 | {"compress", (PyCFunction)ZstdCompressionChunker_compress, | |
|
274 | METH_VARARGS | METH_KEYWORDS, PyDoc_STR("compress data")}, | |
|
275 | {"finish", (PyCFunction)ZstdCompressionChunker_finish, METH_NOARGS, | |
|
276 | PyDoc_STR("finish compression operation")}, | |
|
277 | {"flush", (PyCFunction)ZstdCompressionChunker_flush, | |
|
278 | METH_VARARGS | METH_KEYWORDS, PyDoc_STR("finish compression operation")}, | |
|
279 | {NULL, NULL}}; | |
|
280 | ||
|
281 | PyType_Slot ZstdCompressionChunkerSlots[] = { | |
|
282 | {Py_tp_dealloc, ZstdCompressionChunker_dealloc}, | |
|
283 | {Py_tp_methods, ZstdCompressionChunker_methods}, | |
|
284 | {Py_tp_new, PyType_GenericNew}, | |
|
285 | {0, NULL}, | |
|
307 | 286 | }; |
|
308 | 287 | |
|
309 |
PyType |
|
|
310 | PyVarObject_HEAD_INIT(NULL, 0) | |
|
311 | "zstd.ZstdCompressionChunkerType", /* tp_name */ | |
|
312 | sizeof(ZstdCompressionChunker), /* tp_basicsize */ | |
|
313 | 0, /* tp_itemsize */ | |
|
314 | (destructor)ZstdCompressionChunker_dealloc, /* tp_dealloc */ | |
|
315 | 0, /* tp_print */ | |
|
316 | 0, /* tp_getattr */ | |
|
317 | 0, /* tp_setattr */ | |
|
318 | 0, /* tp_compare */ | |
|
319 | 0, /* tp_repr */ | |
|
320 | 0, /* tp_as_number */ | |
|
321 | 0, /* tp_as_sequence */ | |
|
322 | 0, /* tp_as_mapping */ | |
|
323 | 0, /* tp_hash */ | |
|
324 | 0, /* tp_call */ | |
|
325 | 0, /* tp_str */ | |
|
326 | 0, /* tp_getattro */ | |
|
327 | 0, /* tp_setattro */ | |
|
328 | 0, /* tp_as_buffer */ | |
|
329 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
|
330 | ZstdCompressionChunker__doc__, /* tp_doc */ | |
|
331 | 0, /* tp_traverse */ | |
|
332 | 0, /* tp_clear */ | |
|
333 | 0, /* tp_richcompare */ | |
|
334 | 0, /* tp_weaklistoffset */ | |
|
335 | 0, /* tp_iter */ | |
|
336 | 0, /* tp_iternext */ | |
|
337 | ZstdCompressionChunker_methods, /* tp_methods */ | |
|
338 | 0, /* tp_members */ | |
|
339 | 0, /* tp_getset */ | |
|
340 | 0, /* tp_base */ | |
|
341 | 0, /* tp_dict */ | |
|
342 | 0, /* tp_descr_get */ | |
|
343 | 0, /* tp_descr_set */ | |
|
344 | 0, /* tp_dictoffset */ | |
|
345 | 0, /* tp_init */ | |
|
346 | 0, /* tp_alloc */ | |
|
347 | PyType_GenericNew, /* tp_new */ | |
|
288 | PyType_Spec ZstdCompressionChunkerSpec = { | |
|
289 | "zstd.ZstdCompressionChunkerType", | |
|
290 | sizeof(ZstdCompressionChunker), | |
|
291 | 0, | |
|
292 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, | |
|
293 | ZstdCompressionChunkerSlots, | |
|
348 | 294 | }; |
|
349 | 295 | |
|
350 | void compressionchunker_module_init(PyObject* module) { | |
|
351 | Py_SET_TYPE(&ZstdCompressionChunkerIteratorType, &PyType_Type); | |
|
352 | if (PyType_Ready(&ZstdCompressionChunkerIteratorType) < 0) { | |
|
353 | return; | |
|
354 | } | |
|
296 | PyTypeObject *ZstdCompressionChunkerType; | |
|
355 | 297 | |
|
356 | Py_SET_TYPE(&ZstdCompressionChunkerType, &PyType_Type); | |
|
357 |
|
|
|
358 | return; | |
|
359 | } | |
|
298 | void compressionchunker_module_init(PyObject *module) { | |
|
299 | ZstdCompressionChunkerIteratorType = | |
|
300 | (PyTypeObject *)PyType_FromSpec(&ZstdCompressionChunkerIteratorSpec); | |
|
301 | if (PyType_Ready(ZstdCompressionChunkerIteratorType) < 0) { | |
|
302 | return; | |
|
303 | } | |
|
304 | ||
|
305 | ZstdCompressionChunkerType = | |
|
306 | (PyTypeObject *)PyType_FromSpec(&ZstdCompressionChunkerSpec); | |
|
307 | if (PyType_Ready(ZstdCompressionChunkerType) < 0) { | |
|
308 | return; | |
|
309 | } | |
|
360 | 310 | } |
This diff has been collapsed as it changes many lines, (617 lines changed) Show them Hide them | |||
@@ -1,411 +1,348 | |||
|
1 | 1 | /** |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
8 | 8 | |
|
9 | 9 | #include "python-zstandard.h" |
|
10 | 10 | |
|
11 |
extern PyObject* |
|
|
11 | extern PyObject *ZstdError; | |
|
12 | 12 | |
|
13 |
ZstdCompressionDict* |
|
|
14 | static char* kwlist[] = { | |
|
15 | "dict_size", | |
|
16 | "samples", | |
|
17 | "k", | |
|
18 | "d", | |
|
19 | "notifications", | |
|
20 | "dict_id", | |
|
21 | "level", | |
|
22 | "steps", | |
|
23 | "threads", | |
|
24 | NULL | |
|
25 | }; | |
|
13 | ZstdCompressionDict *train_dictionary(PyObject *self, PyObject *args, | |
|
14 | PyObject *kwargs) { | |
|
15 | static char *kwlist[] = { | |
|
16 | "dict_size", "samples", "k", "d", | |
|
17 | "f", "split_point", "accel", "notifications", | |
|
18 | "dict_id", "level", "steps", "threads", | |
|
19 | NULL}; | |
|
26 | 20 | |
|
27 |
|
|
|
28 |
|
|
|
29 |
|
|
|
30 |
|
|
|
31 |
|
|
|
32 | unsigned dictID = 0; | |
|
33 | int level = 0; | |
|
34 |
|
|
|
35 | int threads = 0; | |
|
36 | ZDICT_cover_params_t params; | |
|
37 | Py_ssize_t samplesLen; | |
|
38 | Py_ssize_t i; | |
|
39 | size_t samplesSize = 0; | |
|
40 | void* sampleBuffer = NULL; | |
|
41 | size_t* sampleSizes = NULL; | |
|
42 | void* sampleOffset; | |
|
43 | Py_ssize_t sampleSize; | |
|
44 | void* dict = NULL; | |
|
45 | size_t zresult; | |
|
46 | ZstdCompressionDict* result = NULL; | |
|
21 | size_t capacity; | |
|
22 | PyObject *samples; | |
|
23 | unsigned k = 0; | |
|
24 | unsigned d = 0; | |
|
25 | unsigned f = 0; | |
|
26 | double splitPoint = 0.0; | |
|
27 | unsigned accel = 0; | |
|
28 | unsigned notifications = 0; | |
|
29 | unsigned dictID = 0; | |
|
30 | int level = 0; | |
|
31 | unsigned steps = 0; | |
|
32 | int threads = 0; | |
|
33 | ZDICT_fastCover_params_t params; | |
|
34 | Py_ssize_t samplesLen; | |
|
35 | Py_ssize_t i; | |
|
36 | size_t samplesSize = 0; | |
|
37 | void *sampleBuffer = NULL; | |
|
38 | size_t *sampleSizes = NULL; | |
|
39 | void *sampleOffset; | |
|
40 | Py_ssize_t sampleSize; | |
|
41 | void *dict = NULL; | |
|
42 | size_t zresult; | |
|
43 | ZstdCompressionDict *result = NULL; | |
|
47 | 44 | |
|
48 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "nO!|IIIIiIi:train_dictionary", | |
|
49 | kwlist, &capacity, &PyList_Type, &samples, | |
|
50 | &k, &d, ¬ifications, &dictID, &level, &steps, &threads)) { | |
|
51 | return NULL; | |
|
52 | } | |
|
45 | if (!PyArg_ParseTupleAndKeywords( | |
|
46 | args, kwargs, "nO!|IIIdIIIiIi:train_dictionary", kwlist, &capacity, | |
|
47 | &PyList_Type, &samples, &k, &d, &f, &splitPoint, &accel, | |
|
48 | ¬ifications, &dictID, &level, &steps, &threads)) { | |
|
49 | return NULL; | |
|
50 | } | |
|
53 | 51 | |
|
54 |
|
|
|
55 |
|
|
|
56 | } | |
|
52 | if (threads < 0) { | |
|
53 | threads = cpu_count(); | |
|
54 | } | |
|
57 | 55 | |
|
58 | memset(¶ms, 0, sizeof(params)); | |
|
59 | params.k = k; | |
|
60 | params.d = d; | |
|
61 | params.steps = steps; | |
|
62 | params.nbThreads = threads; | |
|
63 | params.zParams.notificationLevel = notifications; | |
|
64 | params.zParams.dictID = dictID; | |
|
65 | params.zParams.compressionLevel = level; | |
|
56 | if (!steps && !threads) { | |
|
57 | /* Defaults from ZDICT_trainFromBuffer() */ | |
|
58 | d = d ? d : 8; | |
|
59 | steps = steps ? steps : 4; | |
|
60 | level = level ? level : 3; | |
|
61 | } | |
|
66 | 62 | |
|
67 | /* Figure out total size of input samples. */ | |
|
68 | samplesLen = PyList_Size(samples); | |
|
69 | for (i = 0; i < samplesLen; i++) { | |
|
70 | PyObject* sampleItem = PyList_GET_ITEM(samples, i); | |
|
63 | memset(¶ms, 0, sizeof(params)); | |
|
64 | params.k = k; | |
|
65 | params.d = d; | |
|
66 | params.f = f; | |
|
67 | params.steps = steps; | |
|
68 | params.nbThreads = threads; | |
|
69 | params.splitPoint = splitPoint; | |
|
70 | params.accel = accel; | |
|
71 | ||
|
72 | params.zParams.compressionLevel = level; | |
|
73 | params.zParams.dictID = dictID; | |
|
74 | params.zParams.notificationLevel = notifications; | |
|
71 | 75 | |
|
72 | if (!PyBytes_Check(sampleItem)) { | |
|
73 | PyErr_SetString(PyExc_ValueError, "samples must be bytes"); | |
|
74 | return NULL; | |
|
75 | } | |
|
76 | samplesSize += PyBytes_GET_SIZE(sampleItem); | |
|
77 | } | |
|
76 | /* Figure out total size of input samples. */ | |
|
77 | samplesLen = PyList_Size(samples); | |
|
78 | for (i = 0; i < samplesLen; i++) { | |
|
79 | PyObject *sampleItem = PyList_GET_ITEM(samples, i); | |
|
78 | 80 | |
|
79 | sampleBuffer = PyMem_Malloc(samplesSize); | |
|
80 | if (!sampleBuffer) { | |
|
81 | PyErr_NoMemory(); | |
|
82 | goto finally; | |
|
83 | } | |
|
81 | if (!PyBytes_Check(sampleItem)) { | |
|
82 | PyErr_SetString(PyExc_ValueError, "samples must be bytes"); | |
|
83 | return NULL; | |
|
84 | } | |
|
85 | samplesSize += PyBytes_GET_SIZE(sampleItem); | |
|
86 | } | |
|
84 | 87 | |
|
85 |
|
|
|
86 |
|
|
|
87 |
|
|
|
88 |
|
|
|
89 | } | |
|
88 | sampleBuffer = PyMem_Malloc(samplesSize); | |
|
89 | if (!sampleBuffer) { | |
|
90 | PyErr_NoMemory(); | |
|
91 | goto finally; | |
|
92 | } | |
|
90 | 93 | |
|
91 | sampleOffset = sampleBuffer; | |
|
92 | for (i = 0; i < samplesLen; i++) { | |
|
93 | PyObject* sampleItem = PyList_GET_ITEM(samples, i); | |
|
94 | sampleSize = PyBytes_GET_SIZE(sampleItem); | |
|
95 | sampleSizes[i] = sampleSize; | |
|
96 | memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize); | |
|
97 | sampleOffset = (char*)sampleOffset + sampleSize; | |
|
98 | } | |
|
99 | ||
|
100 | dict = PyMem_Malloc(capacity); | |
|
101 | if (!dict) { | |
|
102 | PyErr_NoMemory(); | |
|
103 | goto finally; | |
|
104 | } | |
|
94 | sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t)); | |
|
95 | if (!sampleSizes) { | |
|
96 | PyErr_NoMemory(); | |
|
97 | goto finally; | |
|
98 | } | |
|
105 | 99 | |
|
106 | Py_BEGIN_ALLOW_THREADS | |
|
107 | /* No parameters uses the default function, which will use default params | |
|
108 | and call ZDICT_optimizeTrainFromBuffer_cover under the hood. */ | |
|
109 | if (!params.k && !params.d && !params.zParams.compressionLevel | |
|
110 | && !params.zParams.notificationLevel && !params.zParams.dictID) { | |
|
111 | zresult = ZDICT_trainFromBuffer(dict, capacity, sampleBuffer, | |
|
112 | sampleSizes, (unsigned)samplesLen); | |
|
113 | } | |
|
114 | /* Use optimize mode if user controlled steps or threads explicitly. */ | |
|
115 | else if (params.steps || params.nbThreads) { | |
|
116 | zresult = ZDICT_optimizeTrainFromBuffer_cover(dict, capacity, | |
|
117 | sampleBuffer, sampleSizes, (unsigned)samplesLen, ¶ms); | |
|
118 | } | |
|
119 | /* Non-optimize mode with explicit control. */ | |
|
120 | else { | |
|
121 | zresult = ZDICT_trainFromBuffer_cover(dict, capacity, | |
|
122 |
|
|
|
123 | } | |
|
124 |
|
|
|
100 | sampleOffset = sampleBuffer; | |
|
101 | for (i = 0; i < samplesLen; i++) { | |
|
102 | PyObject *sampleItem = PyList_GET_ITEM(samples, i); | |
|
103 | sampleSize = PyBytes_GET_SIZE(sampleItem); | |
|
104 | sampleSizes[i] = sampleSize; | |
|
105 | memcpy(sampleOffset, PyBytes_AS_STRING(sampleItem), sampleSize); | |
|
106 | sampleOffset = (char *)sampleOffset + sampleSize; | |
|
107 | } | |
|
108 | ||
|
109 | dict = PyMem_Malloc(capacity); | |
|
110 | if (!dict) { | |
|
111 | PyErr_NoMemory(); | |
|
112 | goto finally; | |
|
113 | } | |
|
114 | ||
|
115 | Py_BEGIN_ALLOW_THREADS zresult = ZDICT_optimizeTrainFromBuffer_fastCover( | |
|
116 | dict, capacity, sampleBuffer, sampleSizes, (unsigned)samplesLen, | |
|
117 | ¶ms); | |
|
118 | Py_END_ALLOW_THREADS | |
|
125 | 119 | |
|
126 |
|
|
|
127 |
|
|
|
128 |
|
|
|
129 | goto finally; | |
|
130 | } | |
|
120 | if (ZDICT_isError(zresult)) { | |
|
121 | PyMem_Free(dict); | |
|
122 | PyErr_Format(ZstdError, "cannot train dict: %s", | |
|
123 | ZDICT_getErrorName(zresult)); | |
|
124 | goto finally; | |
|
125 | } | |
|
131 | 126 | |
|
132 |
|
|
|
133 |
|
|
|
134 |
|
|
|
135 |
|
|
|
136 | } | |
|
127 | result = PyObject_New(ZstdCompressionDict, ZstdCompressionDictType); | |
|
128 | if (!result) { | |
|
129 | PyMem_Free(dict); | |
|
130 | goto finally; | |
|
131 | } | |
|
137 | 132 | |
|
138 |
|
|
|
139 |
|
|
|
140 |
|
|
|
141 |
|
|
|
142 |
|
|
|
143 |
|
|
|
144 |
|
|
|
133 | result->dictData = dict; | |
|
134 | result->dictSize = zresult; | |
|
135 | result->dictType = ZSTD_dct_fullDict; | |
|
136 | result->d = params.d; | |
|
137 | result->k = params.k; | |
|
138 | result->cdict = NULL; | |
|
139 | result->ddict = NULL; | |
|
145 | 140 | |
|
146 | 141 | finally: |
|
147 |
|
|
|
148 |
|
|
|
142 | PyMem_Free(sampleBuffer); | |
|
143 | PyMem_Free(sampleSizes); | |
|
149 | 144 | |
|
150 |
|
|
|
145 | return result; | |
|
151 | 146 | } |
|
152 | 147 | |
|
153 |
int ensure_ddict(ZstdCompressionDict* |
|
|
154 |
|
|
|
155 |
|
|
|
156 | } | |
|
148 | int ensure_ddict(ZstdCompressionDict *dict) { | |
|
149 | if (dict->ddict) { | |
|
150 | return 0; | |
|
151 | } | |
|
157 | 152 | |
|
158 | Py_BEGIN_ALLOW_THREADS | |
|
159 | dict->ddict = ZSTD_createDDict_advanced(dict->dictData, dict->dictSize, | |
|
160 | ZSTD_dlm_byRef, dict->dictType, ZSTD_defaultCMem); | |
|
161 |
|
|
|
162 | if (!dict->ddict) { | |
|
163 | PyErr_SetString(ZstdError, "could not create decompression dict"); | |
|
164 | return 1; | |
|
165 | } | |
|
153 | Py_BEGIN_ALLOW_THREADS dict->ddict = ZSTD_createDDict_advanced( | |
|
154 | dict->dictData, dict->dictSize, ZSTD_dlm_byRef, dict->dictType, | |
|
155 | ZSTD_defaultCMem); | |
|
156 | Py_END_ALLOW_THREADS if (!dict->ddict) { | |
|
157 | PyErr_SetString(ZstdError, "could not create decompression dict"); | |
|
158 | return 1; | |
|
159 | } | |
|
166 | 160 | |
|
167 |
|
|
|
161 | return 0; | |
|
168 | 162 | } |
|
169 | 163 | |
|
170 | PyDoc_STRVAR(ZstdCompressionDict__doc__, | |
|
171 | "ZstdCompressionDict(data) - Represents a computed compression dictionary\n" | |
|
172 | "\n" | |
|
173 | "This type holds the results of a computed Zstandard compression dictionary.\n" | |
|
174 | "Instances are obtained by calling ``train_dictionary()`` or by passing\n" | |
|
175 | "bytes obtained from another source into the constructor.\n" | |
|
176 | ); | |
|
164 | static int ZstdCompressionDict_init(ZstdCompressionDict *self, PyObject *args, | |
|
165 | PyObject *kwargs) { | |
|
166 | static char *kwlist[] = {"data", "dict_type", NULL}; | |
|
167 | ||
|
168 | int result = -1; | |
|
169 | Py_buffer source; | |
|
170 | unsigned dictType = ZSTD_dct_auto; | |
|
177 | 171 | |
|
178 | static int ZstdCompressionDict_init(ZstdCompressionDict* self, PyObject* args, PyObject* kwargs) { | |
|
179 | static char* kwlist[] = { | |
|
180 | "data", | |
|
181 | "dict_type", | |
|
182 | NULL | |
|
183 | }; | |
|
172 | self->dictData = NULL; | |
|
173 | self->dictSize = 0; | |
|
174 | self->cdict = NULL; | |
|
175 | self->ddict = NULL; | |
|
184 | 176 | |
|
185 | int result = -1; | |
|
186 | Py_buffer source; | |
|
187 | unsigned dictType = ZSTD_dct_auto; | |
|
188 | ||
|
189 | self->dictData = NULL; | |
|
190 | self->dictSize = 0; | |
|
191 | self->cdict = NULL; | |
|
192 | self->ddict = NULL; | |
|
177 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|I:ZstdCompressionDict", | |
|
178 | kwlist, &source, &dictType)) { | |
|
179 | return -1; | |
|
180 | } | |
|
193 | 181 | |
|
194 | #if PY_MAJOR_VERSION >= 3 | |
|
195 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|I:ZstdCompressionDict", | |
|
196 | #else | |
|
197 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|I:ZstdCompressionDict", | |
|
198 | #endif | |
|
199 | kwlist, &source, &dictType)) { | |
|
200 | return -1; | |
|
201 | } | |
|
182 | if (dictType != ZSTD_dct_auto && dictType != ZSTD_dct_rawContent && | |
|
183 | dictType != ZSTD_dct_fullDict) { | |
|
184 | PyErr_Format( | |
|
185 | PyExc_ValueError, | |
|
186 | "invalid dictionary load mode: %d; must use DICT_TYPE_* constants", | |
|
187 | dictType); | |
|
188 | goto finally; | |
|
189 | } | |
|
202 | 190 | |
|
203 | if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { | |
|
204 | PyErr_SetString(PyExc_ValueError, | |
|
205 | "data buffer should be contiguous and have at most one dimension"); | |
|
206 | goto finally; | |
|
207 | } | |
|
191 | self->dictType = dictType; | |
|
208 | 192 | |
|
209 | if (dictType != ZSTD_dct_auto && dictType != ZSTD_dct_rawContent | |
|
210 | && dictType != ZSTD_dct_fullDict) { | |
|
211 | PyErr_Format(PyExc_ValueError, | |
|
212 | "invalid dictionary load mode: %d; must use DICT_TYPE_* constants", | |
|
213 | dictType); | |
|
214 | goto finally; | |
|
215 | } | |
|
216 | ||
|
217 | self->dictType = dictType; | |
|
193 | self->dictData = PyMem_Malloc(source.len); | |
|
194 | if (!self->dictData) { | |
|
195 | PyErr_NoMemory(); | |
|
196 | goto finally; | |
|
197 | } | |
|
218 | 198 | |
|
219 | self->dictData = PyMem_Malloc(source.len); | |
|
220 | if (!self->dictData) { | |
|
221 | PyErr_NoMemory(); | |
|
222 | goto finally; | |
|
223 | } | |
|
199 | memcpy(self->dictData, source.buf, source.len); | |
|
200 | self->dictSize = source.len; | |
|
224 | 201 | |
|
225 | memcpy(self->dictData, source.buf, source.len); | |
|
226 | self->dictSize = source.len; | |
|
227 | ||
|
228 | result = 0; | |
|
202 | result = 0; | |
|
229 | 203 | |
|
230 | 204 | finally: |
|
231 |
|
|
|
232 |
|
|
|
205 | PyBuffer_Release(&source); | |
|
206 | return result; | |
|
233 | 207 | } |
|
234 | 208 | |
|
235 |
static void ZstdCompressionDict_dealloc(ZstdCompressionDict* |
|
|
236 |
|
|
|
237 |
|
|
|
238 |
|
|
|
239 | } | |
|
209 | static void ZstdCompressionDict_dealloc(ZstdCompressionDict *self) { | |
|
210 | if (self->cdict) { | |
|
211 | ZSTD_freeCDict(self->cdict); | |
|
212 | self->cdict = NULL; | |
|
213 | } | |
|
240 | 214 | |
|
241 |
|
|
|
242 |
|
|
|
243 |
|
|
|
244 | } | |
|
215 | if (self->ddict) { | |
|
216 | ZSTD_freeDDict(self->ddict); | |
|
217 | self->ddict = NULL; | |
|
218 | } | |
|
245 | 219 | |
|
246 |
|
|
|
247 |
|
|
|
248 |
|
|
|
249 | } | |
|
220 | if (self->dictData) { | |
|
221 | PyMem_Free(self->dictData); | |
|
222 | self->dictData = NULL; | |
|
223 | } | |
|
250 | 224 | |
|
251 |
|
|
|
225 | PyObject_Del(self); | |
|
252 | 226 | } |
|
253 | 227 | |
|
254 | PyDoc_STRVAR(ZstdCompressionDict_precompute_compress__doc__, | |
|
255 | "Precompute a dictionary so it can be used by multiple compressors.\n" | |
|
256 | ); | |
|
228 | static PyObject * | |
|
229 | ZstdCompressionDict_precompute_compress(ZstdCompressionDict *self, | |
|
230 | PyObject *args, PyObject *kwargs) { | |
|
231 | static char *kwlist[] = {"level", "compression_params", NULL}; | |
|
257 | 232 | |
|
258 | static PyObject* ZstdCompressionDict_precompute_compress(ZstdCompressionDict* self, PyObject* args, PyObject* kwargs) { | |
|
259 | static char* kwlist[] = { | |
|
260 | "level", | |
|
261 | "compression_params", | |
|
262 | NULL | |
|
263 | }; | |
|
233 | int level = 0; | |
|
234 | ZstdCompressionParametersObject *compressionParams = NULL; | |
|
235 | ZSTD_compressionParameters cParams; | |
|
236 | size_t zresult; | |
|
264 | 237 | |
|
265 | int level = 0; | |
|
266 | ZstdCompressionParametersObject* compressionParams = NULL; | |
|
267 | ZSTD_compressionParameters cParams; | |
|
268 | size_t zresult; | |
|
238 | if (!PyArg_ParseTupleAndKeywords( | |
|
239 | args, kwargs, "|iO!:precompute_compress", kwlist, &level, | |
|
240 | ZstdCompressionParametersType, &compressionParams)) { | |
|
241 | return NULL; | |
|
242 | } | |
|
269 | 243 | |
|
270 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!:precompute_compress", kwlist, | |
|
271 | &level, &ZstdCompressionParametersType, &compressionParams)) { | |
|
272 | return NULL; | |
|
273 | } | |
|
244 | if (level && compressionParams) { | |
|
245 | PyErr_SetString(PyExc_ValueError, | |
|
246 | "must only specify one of level or compression_params"); | |
|
247 | return NULL; | |
|
248 | } | |
|
274 | 249 | |
|
275 |
|
|
|
276 |
|
|
|
277 |
|
|
|
278 |
|
|
|
279 | } | |
|
250 | if (!level && !compressionParams) { | |
|
251 | PyErr_SetString(PyExc_ValueError, | |
|
252 | "must specify one of level or compression_params"); | |
|
253 | return NULL; | |
|
254 | } | |
|
280 | 255 | |
|
281 | if (!level && !compressionParams) { | |
|
282 | PyErr_SetString(PyExc_ValueError, | |
|
283 | "must specify one of level or compression_params"); | |
|
284 | return NULL; | |
|
285 | } | |
|
286 | ||
|
287 | if (self->cdict) { | |
|
288 | zresult = ZSTD_freeCDict(self->cdict); | |
|
289 | self->cdict = NULL; | |
|
290 | if (ZSTD_isError(zresult)) { | |
|
291 | PyErr_Format(ZstdError, "unable to free CDict: %s", | |
|
292 | ZSTD_getErrorName(zresult)); | |
|
293 | return NULL; | |
|
294 | } | |
|
295 | } | |
|
256 | if (self->cdict) { | |
|
257 | zresult = ZSTD_freeCDict(self->cdict); | |
|
258 | self->cdict = NULL; | |
|
259 | if (ZSTD_isError(zresult)) { | |
|
260 | PyErr_Format(ZstdError, "unable to free CDict: %s", | |
|
261 | ZSTD_getErrorName(zresult)); | |
|
262 | return NULL; | |
|
263 | } | |
|
264 | } | |
|
296 | 265 | |
|
297 |
|
|
|
298 |
|
|
|
299 | } | |
|
300 |
|
|
|
301 |
|
|
|
302 |
|
|
|
303 | } | |
|
304 | } | |
|
266 | if (level) { | |
|
267 | cParams = ZSTD_getCParams(level, 0, self->dictSize); | |
|
268 | } | |
|
269 | else { | |
|
270 | if (to_cparams(compressionParams, &cParams)) { | |
|
271 | return NULL; | |
|
272 | } | |
|
273 | } | |
|
305 | 274 | |
|
306 |
|
|
|
307 |
|
|
|
308 | ZSTD_dlm_byRef, self->dictType, cParams, ZSTD_defaultCMem); | |
|
275 | assert(!self->cdict); | |
|
276 | self->cdict = ZSTD_createCDict_advanced(self->dictData, self->dictSize, | |
|
277 | ZSTD_dlm_byRef, self->dictType, | |
|
278 | cParams, ZSTD_defaultCMem); | |
|
309 | 279 | |
|
310 |
|
|
|
311 |
|
|
|
312 |
|
|
|
313 | } | |
|
280 | if (!self->cdict) { | |
|
281 | PyErr_SetString(ZstdError, "unable to precompute dictionary"); | |
|
282 | return NULL; | |
|
283 | } | |
|
314 | 284 | |
|
315 |
|
|
|
285 | Py_RETURN_NONE; | |
|
316 | 286 | } |
|
317 | 287 | |
|
318 |
static PyObject* |
|
|
319 |
|
|
|
288 | static PyObject *ZstdCompressionDict_dict_id(ZstdCompressionDict *self) { | |
|
289 | unsigned dictID = ZDICT_getDictID(self->dictData, self->dictSize); | |
|
320 | 290 | |
|
321 |
|
|
|
291 | return PyLong_FromLong(dictID); | |
|
322 | 292 | } |
|
323 | 293 | |
|
324 |
static PyObject* |
|
|
325 |
|
|
|
294 | static PyObject *ZstdCompressionDict_as_bytes(ZstdCompressionDict *self) { | |
|
295 | return PyBytes_FromStringAndSize(self->dictData, self->dictSize); | |
|
326 | 296 | } |
|
327 | 297 | |
|
328 | 298 | static PyMethodDef ZstdCompressionDict_methods[] = { |
|
329 |
|
|
|
330 |
|
|
|
331 |
|
|
|
332 |
|
|
|
333 | { "precompute_compress", (PyCFunction)ZstdCompressionDict_precompute_compress, | |
|
334 | METH_VARARGS | METH_KEYWORDS, ZstdCompressionDict_precompute_compress__doc__ }, | |
|
335 | { NULL, NULL } | |
|
336 | }; | |
|
299 | {"dict_id", (PyCFunction)ZstdCompressionDict_dict_id, METH_NOARGS, | |
|
300 | PyDoc_STR("dict_id() -- obtain the numeric dictionary ID")}, | |
|
301 | {"as_bytes", (PyCFunction)ZstdCompressionDict_as_bytes, METH_NOARGS, | |
|
302 | PyDoc_STR("as_bytes() -- obtain the raw bytes constituting the dictionary " | |
|
303 | "data")}, | |
|
304 | {"precompute_compress", | |
|
305 | (PyCFunction)ZstdCompressionDict_precompute_compress, | |
|
306 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
307 | {NULL, NULL}}; | |
|
337 | 308 | |
|
338 | 309 | static PyMemberDef ZstdCompressionDict_members[] = { |
|
339 |
|
|
|
340 | "segment size" }, | |
|
341 | { "d", T_UINT, offsetof(ZstdCompressionDict, d), READONLY, | |
|
342 | "dmer size" }, | |
|
343 | { NULL } | |
|
344 | }; | |
|
310 | {"k", T_UINT, offsetof(ZstdCompressionDict, k), READONLY, "segment size"}, | |
|
311 | {"d", T_UINT, offsetof(ZstdCompressionDict, d), READONLY, "dmer size"}, | |
|
312 | {NULL}}; | |
|
345 | 313 | |
|
346 |
static Py_ssize_t ZstdCompressionDict_length(ZstdCompressionDict* |
|
|
347 |
|
|
|
314 | static Py_ssize_t ZstdCompressionDict_length(ZstdCompressionDict *self) { | |
|
315 | return self->dictSize; | |
|
348 | 316 | } |
|
349 | 317 | |
|
350 |
|
|
|
351 | (lenfunc)ZstdCompressionDict_length, /* sq_length */ | |
|
352 | 0, /* sq_concat */ | |
|
353 | 0, /* sq_repeat */ | |
|
354 | 0, /* sq_item */ | |
|
355 | 0, /* sq_ass_item */ | |
|
356 | 0, /* sq_contains */ | |
|
357 | 0, /* sq_inplace_concat */ | |
|
358 | 0 /* sq_inplace_repeat */ | |
|
318 | PyType_Slot ZstdCompressionDictSlots[] = { | |
|
319 | {Py_tp_dealloc, ZstdCompressionDict_dealloc}, | |
|
320 | {Py_sq_length, ZstdCompressionDict_length}, | |
|
321 | {Py_tp_methods, ZstdCompressionDict_methods}, | |
|
322 | {Py_tp_members, ZstdCompressionDict_members}, | |
|
323 | {Py_tp_init, ZstdCompressionDict_init}, | |
|
324 | {Py_tp_new, PyType_GenericNew}, | |
|
325 | {0, NULL}, | |
|
359 | 326 | }; |
|
360 | 327 | |
|
361 |
PyType |
|
|
362 | PyVarObject_HEAD_INIT(NULL, 0) | |
|
363 | "zstd.ZstdCompressionDict", /* tp_name */ | |
|
364 | sizeof(ZstdCompressionDict), /* tp_basicsize */ | |
|
365 | 0, /* tp_itemsize */ | |
|
366 | (destructor)ZstdCompressionDict_dealloc, /* tp_dealloc */ | |
|
367 | 0, /* tp_print */ | |
|
368 | 0, /* tp_getattr */ | |
|
369 | 0, /* tp_setattr */ | |
|
370 | 0, /* tp_compare */ | |
|
371 | 0, /* tp_repr */ | |
|
372 | 0, /* tp_as_number */ | |
|
373 | &ZstdCompressionDict_sq, /* tp_as_sequence */ | |
|
374 | 0, /* tp_as_mapping */ | |
|
375 | 0, /* tp_hash */ | |
|
376 | 0, /* tp_call */ | |
|
377 | 0, /* tp_str */ | |
|
378 | 0, /* tp_getattro */ | |
|
379 | 0, /* tp_setattro */ | |
|
380 | 0, /* tp_as_buffer */ | |
|
381 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
|
382 | ZstdCompressionDict__doc__, /* tp_doc */ | |
|
383 | 0, /* tp_traverse */ | |
|
384 | 0, /* tp_clear */ | |
|
385 | 0, /* tp_richcompare */ | |
|
386 | 0, /* tp_weaklistoffset */ | |
|
387 | 0, /* tp_iter */ | |
|
388 | 0, /* tp_iternext */ | |
|
389 | ZstdCompressionDict_methods, /* tp_methods */ | |
|
390 | ZstdCompressionDict_members, /* tp_members */ | |
|
391 | 0, /* tp_getset */ | |
|
392 | 0, /* tp_base */ | |
|
393 | 0, /* tp_dict */ | |
|
394 | 0, /* tp_descr_get */ | |
|
395 | 0, /* tp_descr_set */ | |
|
396 | 0, /* tp_dictoffset */ | |
|
397 | (initproc)ZstdCompressionDict_init, /* tp_init */ | |
|
398 | 0, /* tp_alloc */ | |
|
399 | PyType_GenericNew, /* tp_new */ | |
|
328 | PyType_Spec ZstdCompressionDictSpec = { | |
|
329 | "zstd.ZstdCompressionDict", | |
|
330 | sizeof(ZstdCompressionDict), | |
|
331 | 0, | |
|
332 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, | |
|
333 | ZstdCompressionDictSlots, | |
|
400 | 334 | }; |
|
401 | 335 | |
|
402 | void compressiondict_module_init(PyObject* mod) { | |
|
403 | Py_SET_TYPE(&ZstdCompressionDictType, &PyType_Type); | |
|
404 | if (PyType_Ready(&ZstdCompressionDictType) < 0) { | |
|
405 | return; | |
|
406 | } | |
|
336 | PyTypeObject *ZstdCompressionDictType; | |
|
407 | 337 | |
|
408 | Py_INCREF((PyObject*)&ZstdCompressionDictType); | |
|
409 | PyModule_AddObject(mod, "ZstdCompressionDict", | |
|
410 |
|
|
|
338 | void compressiondict_module_init(PyObject *mod) { | |
|
339 | ZstdCompressionDictType = | |
|
340 | (PyTypeObject *)PyType_FromSpec(&ZstdCompressionDictSpec); | |
|
341 | if (PyType_Ready(ZstdCompressionDictType) < 0) { | |
|
342 | return; | |
|
343 | } | |
|
344 | ||
|
345 | Py_INCREF((PyObject *)ZstdCompressionDictType); | |
|
346 | PyModule_AddObject(mod, "ZstdCompressionDict", | |
|
347 | (PyObject *)ZstdCompressionDictType); | |
|
411 | 348 | } |
This diff has been collapsed as it changes many lines, (873 lines changed) Show them Hide them | |||
@@ -1,449 +1,416 | |||
|
1 | 1 | /** |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
8 | 8 | |
|
9 | 9 | #include "python-zstandard.h" |
|
10 | 10 | |
|
11 |
extern PyObject* |
|
|
12 | ||
|
13 | int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value) { | |
|
14 | size_t zresult = ZSTD_CCtxParams_setParameter(params, param, value); | |
|
15 | if (ZSTD_isError(zresult)) { | |
|
16 | PyErr_Format(ZstdError, "unable to set compression context parameter: %s", | |
|
17 | ZSTD_getErrorName(zresult)); | |
|
18 | return 1; | |
|
19 | } | |
|
20 | ||
|
21 | return 0; | |
|
22 | } | |
|
11 | extern PyObject *ZstdError; | |
|
23 | 12 | |
|
24 | #define TRY_SET_PARAMETER(params, param, value) if (set_parameter(params, param, value)) return -1; | |
|
13 | int set_parameter(ZSTD_CCtx_params *params, ZSTD_cParameter param, int value) { | |
|
14 | size_t zresult = ZSTD_CCtxParams_setParameter(params, param, value); | |
|
15 | if (ZSTD_isError(zresult)) { | |
|
16 | PyErr_Format(ZstdError, | |
|
17 | "unable to set compression context parameter: %s", | |
|
18 | ZSTD_getErrorName(zresult)); | |
|
19 | return 1; | |
|
20 | } | |
|
25 | 21 | |
|
26 | #define TRY_COPY_PARAMETER(source, dest, param) { \ | |
|
27 | int result; \ | |
|
28 | size_t zresult = ZSTD_CCtxParams_getParameter(source, param, &result); \ | |
|
29 | if (ZSTD_isError(zresult)) { \ | |
|
30 | return 1; \ | |
|
31 | } \ | |
|
32 | zresult = ZSTD_CCtxParams_setParameter(dest, param, result); \ | |
|
33 | if (ZSTD_isError(zresult)) { \ | |
|
34 | return 1; \ | |
|
35 | } \ | |
|
22 | return 0; | |
|
36 | 23 | } |
|
37 | 24 | |
|
38 | int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj) { | |
|
39 | TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_nbWorkers); | |
|
25 | #define TRY_SET_PARAMETER(params, param, value) \ | |
|
26 | if (set_parameter(params, param, value)) \ | |
|
27 | return -1; | |
|
28 | ||
|
29 | #define TRY_COPY_PARAMETER(source, dest, param) \ | |
|
30 | { \ | |
|
31 | int result; \ | |
|
32 | size_t zresult = ZSTD_CCtxParams_getParameter(source, param, &result); \ | |
|
33 | if (ZSTD_isError(zresult)) { \ | |
|
34 | return 1; \ | |
|
35 | } \ | |
|
36 | zresult = ZSTD_CCtxParams_setParameter(dest, param, result); \ | |
|
37 | if (ZSTD_isError(zresult)) { \ | |
|
38 | return 1; \ | |
|
39 | } \ | |
|
40 | } | |
|
41 | ||
|
42 | int set_parameters(ZSTD_CCtx_params *params, | |
|
43 | ZstdCompressionParametersObject *obj) { | |
|
44 | TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_nbWorkers); | |
|
40 | 45 | |
|
41 |
|
|
|
42 |
|
|
|
43 |
|
|
|
44 |
|
|
|
45 |
|
|
|
46 |
|
|
|
47 |
|
|
|
48 |
|
|
|
49 |
|
|
|
50 |
|
|
|
51 |
|
|
|
52 |
|
|
|
53 |
|
|
|
54 |
|
|
|
55 |
|
|
|
56 |
|
|
|
57 |
|
|
|
58 |
|
|
|
59 |
|
|
|
60 |
|
|
|
46 | TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_format); | |
|
47 | TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_compressionLevel); | |
|
48 | TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_windowLog); | |
|
49 | TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_hashLog); | |
|
50 | TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_chainLog); | |
|
51 | TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_searchLog); | |
|
52 | TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_minMatch); | |
|
53 | TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_targetLength); | |
|
54 | TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_strategy); | |
|
55 | TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_contentSizeFlag); | |
|
56 | TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_checksumFlag); | |
|
57 | TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_dictIDFlag); | |
|
58 | TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_jobSize); | |
|
59 | TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_overlapLog); | |
|
60 | TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_forceMaxWindow); | |
|
61 | TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_enableLongDistanceMatching); | |
|
62 | TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashLog); | |
|
63 | TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmMinMatch); | |
|
64 | TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmBucketSizeLog); | |
|
65 | TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashRateLog); | |
|
61 | 66 | |
|
62 |
|
|
|
67 | return 0; | |
|
63 | 68 | } |
|
64 | 69 | |
|
65 |
int reset_params(ZstdCompressionParametersObject* |
|
|
66 |
|
|
|
67 |
|
|
|
68 | } | |
|
69 |
|
|
|
70 |
|
|
|
71 |
|
|
|
72 |
|
|
|
73 | return 1; | |
|
74 | } | |
|
75 | } | |
|
70 | int reset_params(ZstdCompressionParametersObject *params) { | |
|
71 | if (params->params) { | |
|
72 | ZSTD_CCtxParams_reset(params->params); | |
|
73 | } | |
|
74 | else { | |
|
75 | params->params = ZSTD_createCCtxParams(); | |
|
76 | if (!params->params) { | |
|
77 | PyErr_NoMemory(); | |
|
78 | return 1; | |
|
79 | } | |
|
80 | } | |
|
76 | 81 | |
|
77 |
|
|
|
78 | } | |
|
79 | ||
|
80 | #define TRY_GET_PARAMETER(params, param, value) { \ | |
|
81 | size_t zresult = ZSTD_CCtxParams_getParameter(params, param, value); \ | |
|
82 | if (ZSTD_isError(zresult)) { \ | |
|
83 | PyErr_Format(ZstdError, "unable to retrieve parameter: %s", ZSTD_getErrorName(zresult)); \ | |
|
84 | return 1; \ | |
|
85 | } \ | |
|
82 | return set_parameters(params->params, params); | |
|
86 | 83 | } |
|
87 | 84 | |
|
88 | int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams) { | |
|
89 | int value; | |
|
90 | ||
|
91 | TRY_GET_PARAMETER(params->params, ZSTD_c_windowLog, &value); | |
|
92 | cparams->windowLog = value; | |
|
85 | #define TRY_GET_PARAMETER(params, param, value) \ | |
|
86 | { \ | |
|
87 | size_t zresult = ZSTD_CCtxParams_getParameter(params, param, value); \ | |
|
88 | if (ZSTD_isError(zresult)) { \ | |
|
89 | PyErr_Format(ZstdError, "unable to retrieve parameter: %s", \ | |
|
90 | ZSTD_getErrorName(zresult)); \ | |
|
91 | return 1; \ | |
|
92 | } \ | |
|
93 | } | |
|
93 | 94 | |
|
94 | TRY_GET_PARAMETER(params->params, ZSTD_c_chainLog, &value); | |
|
95 | cparams->chainLog = value; | |
|
95 | int to_cparams(ZstdCompressionParametersObject *params, | |
|
96 | ZSTD_compressionParameters *cparams) { | |
|
97 | int value; | |
|
96 | 98 | |
|
97 |
|
|
|
98 |
|
|
|
99 | TRY_GET_PARAMETER(params->params, ZSTD_c_windowLog, &value); | |
|
100 | cparams->windowLog = value; | |
|
99 | 101 | |
|
100 |
|
|
|
101 |
|
|
|
102 | TRY_GET_PARAMETER(params->params, ZSTD_c_chainLog, &value); | |
|
103 | cparams->chainLog = value; | |
|
102 | 104 | |
|
103 |
|
|
|
104 |
|
|
|
105 | TRY_GET_PARAMETER(params->params, ZSTD_c_hashLog, &value); | |
|
106 | cparams->hashLog = value; | |
|
107 | ||
|
108 | TRY_GET_PARAMETER(params->params, ZSTD_c_searchLog, &value); | |
|
109 | cparams->searchLog = value; | |
|
105 | 110 | |
|
106 |
|
|
|
107 |
|
|
|
111 | TRY_GET_PARAMETER(params->params, ZSTD_c_minMatch, &value); | |
|
112 | cparams->minMatch = value; | |
|
108 | 113 | |
|
109 |
|
|
|
110 |
|
|
|
114 | TRY_GET_PARAMETER(params->params, ZSTD_c_targetLength, &value); | |
|
115 | cparams->targetLength = value; | |
|
111 | 116 | |
|
112 | return 0; | |
|
117 | TRY_GET_PARAMETER(params->params, ZSTD_c_strategy, &value); | |
|
118 | cparams->strategy = value; | |
|
119 | ||
|
120 | return 0; | |
|
113 | 121 | } |
|
114 | 122 | |
|
115 |
static int ZstdCompressionParameters_init(ZstdCompressionParametersObject* |
|
|
116 | static char* kwlist[] = { | |
|
117 | "format", | |
|
118 | "compression_level", | |
|
119 | "window_log", | |
|
120 | "hash_log", | |
|
121 | "chain_log", | |
|
122 | "search_log", | |
|
123 | "min_match", | |
|
124 | "target_length", | |
|
125 | "compression_strategy", | |
|
126 | "strategy", | |
|
127 | "write_content_size", | |
|
128 | "write_checksum", | |
|
129 | "write_dict_id", | |
|
130 | "job_size", | |
|
131 | "overlap_log", | |
|
132 | "overlap_size_log", | |
|
133 | "force_max_window", | |
|
134 | "enable_ldm", | |
|
135 | "ldm_hash_log", | |
|
136 | "ldm_min_match", | |
|
137 | "ldm_bucket_size_log", | |
|
138 | "ldm_hash_rate_log", | |
|
139 | "ldm_hash_every_log", | |
|
140 | "threads", | |
|
141 | NULL | |
|
142 | }; | |
|
123 | static int ZstdCompressionParameters_init(ZstdCompressionParametersObject *self, | |
|
124 | PyObject *args, PyObject *kwargs) { | |
|
125 | static char *kwlist[] = {"format", | |
|
126 | "compression_level", | |
|
127 | "window_log", | |
|
128 | "hash_log", | |
|
129 | "chain_log", | |
|
130 | "search_log", | |
|
131 | "min_match", | |
|
132 | "target_length", | |
|
133 | "strategy", | |
|
134 | "write_content_size", | |
|
135 | "write_checksum", | |
|
136 | "write_dict_id", | |
|
137 | "job_size", | |
|
138 | "overlap_log", | |
|
139 | "force_max_window", | |
|
140 | "enable_ldm", | |
|
141 | "ldm_hash_log", | |
|
142 | "ldm_min_match", | |
|
143 | "ldm_bucket_size_log", | |
|
144 | "ldm_hash_rate_log", | |
|
145 | "threads", | |
|
146 | NULL}; | |
|
143 | 147 | |
|
144 |
|
|
|
145 |
|
|
|
146 |
|
|
|
147 |
|
|
|
148 |
|
|
|
149 |
|
|
|
150 |
|
|
|
151 |
|
|
|
152 |
|
|
|
153 | int strategy = -1; | |
|
154 | int contentSizeFlag = 1; | |
|
155 |
|
|
|
156 | int dictIDFlag = 0; | |
|
157 | int jobSize = 0; | |
|
158 | int overlapLog = -1; | |
|
159 | int overlapSizeLog = -1; | |
|
160 | int forceMaxWindow = 0; | |
|
161 | int enableLDM = 0; | |
|
162 |
|
|
|
163 | int ldmMinMatch = 0; | |
|
164 | int ldmBucketSizeLog = 0; | |
|
165 | int ldmHashRateLog = -1; | |
|
166 | int ldmHashEveryLog = -1; | |
|
167 | int threads = 0; | |
|
148 | int format = 0; | |
|
149 | int compressionLevel = 0; | |
|
150 | int windowLog = 0; | |
|
151 | int hashLog = 0; | |
|
152 | int chainLog = 0; | |
|
153 | int searchLog = 0; | |
|
154 | int minMatch = 0; | |
|
155 | int targetLength = 0; | |
|
156 | int strategy = -1; | |
|
157 | int contentSizeFlag = 1; | |
|
158 | int checksumFlag = 0; | |
|
159 | int dictIDFlag = 0; | |
|
160 | int jobSize = 0; | |
|
161 | int overlapLog = -1; | |
|
162 | int forceMaxWindow = 0; | |
|
163 | int enableLDM = 0; | |
|
164 | int ldmHashLog = 0; | |
|
165 | int ldmMinMatch = 0; | |
|
166 | int ldmBucketSizeLog = 0; | |
|
167 | int ldmHashRateLog = -1; | |
|
168 | int threads = 0; | |
|
168 | 169 | |
|
169 |
|
|
|
170 |
|
|
|
171 |
|
|
|
172 |
|
|
|
173 |
|
|
|
174 | &overlapSizeLog, &forceMaxWindow, &enableLDM, &ldmHashLog, &ldmMinMatch, | |
|
175 | &ldmBucketSizeLog, &ldmHashRateLog, &ldmHashEveryLog, &threads)) { | |
|
176 |
|
|
|
177 | } | |
|
170 | if (!PyArg_ParseTupleAndKeywords( | |
|
171 | args, kwargs, "|iiiiiiiiiiiiiiiiiiiii:ZstdCompressionParameters", | |
|
172 | kwlist, &format, &compressionLevel, &windowLog, &hashLog, &chainLog, | |
|
173 | &searchLog, &minMatch, &targetLength, &strategy, &contentSizeFlag, | |
|
174 | &checksumFlag, &dictIDFlag, &jobSize, &overlapLog, &forceMaxWindow, | |
|
175 | &enableLDM, &ldmHashLog, &ldmMinMatch, &ldmBucketSizeLog, | |
|
176 | &ldmHashRateLog, &threads)) { | |
|
177 | return -1; | |
|
178 | } | |
|
178 | 179 | |
|
179 |
|
|
|
180 |
|
|
|
181 | } | |
|
180 | if (reset_params(self)) { | |
|
181 | return -1; | |
|
182 | } | |
|
182 | 183 | |
|
183 |
|
|
|
184 |
|
|
|
185 | } | |
|
184 | if (threads < 0) { | |
|
185 | threads = cpu_count(); | |
|
186 | } | |
|
186 | 187 | |
|
187 |
|
|
|
188 |
|
|
|
189 | TRY_SET_PARAMETER(self->params, ZSTD_c_nbWorkers, threads); | |
|
188 | /* We need to set ZSTD_c_nbWorkers before ZSTD_c_jobSize and | |
|
189 | * ZSTD_c_overlapLog because setting ZSTD_c_nbWorkers resets the other | |
|
190 | * parameters. */ | |
|
191 | TRY_SET_PARAMETER(self->params, ZSTD_c_nbWorkers, threads); | |
|
190 | 192 | |
|
191 |
|
|
|
192 |
|
|
|
193 |
|
|
|
194 |
|
|
|
195 |
|
|
|
196 |
|
|
|
197 |
|
|
|
198 |
|
|
|
193 | TRY_SET_PARAMETER(self->params, ZSTD_c_format, format); | |
|
194 | TRY_SET_PARAMETER(self->params, ZSTD_c_compressionLevel, compressionLevel); | |
|
195 | TRY_SET_PARAMETER(self->params, ZSTD_c_windowLog, windowLog); | |
|
196 | TRY_SET_PARAMETER(self->params, ZSTD_c_hashLog, hashLog); | |
|
197 | TRY_SET_PARAMETER(self->params, ZSTD_c_chainLog, chainLog); | |
|
198 | TRY_SET_PARAMETER(self->params, ZSTD_c_searchLog, searchLog); | |
|
199 | TRY_SET_PARAMETER(self->params, ZSTD_c_minMatch, minMatch); | |
|
200 | TRY_SET_PARAMETER(self->params, ZSTD_c_targetLength, targetLength); | |
|
199 | 201 | |
|
200 | if (compressionStrategy != -1 && strategy != -1) { | |
|
201 | PyErr_SetString(PyExc_ValueError, "cannot specify both compression_strategy and strategy"); | |
|
202 | return -1; | |
|
202 | if (strategy == -1) { | |
|
203 | strategy = 0; | |
|
203 | 204 | } |
|
204 | 205 | |
|
205 | if (compressionStrategy != -1) { | |
|
206 | strategy = compressionStrategy; | |
|
207 | } | |
|
208 | else if (strategy == -1) { | |
|
209 | strategy = 0; | |
|
210 | } | |
|
206 | TRY_SET_PARAMETER(self->params, ZSTD_c_strategy, strategy); | |
|
207 | TRY_SET_PARAMETER(self->params, ZSTD_c_contentSizeFlag, contentSizeFlag); | |
|
208 | TRY_SET_PARAMETER(self->params, ZSTD_c_checksumFlag, checksumFlag); | |
|
209 | TRY_SET_PARAMETER(self->params, ZSTD_c_dictIDFlag, dictIDFlag); | |
|
210 | TRY_SET_PARAMETER(self->params, ZSTD_c_jobSize, jobSize); | |
|
211 | 211 | |
|
212 | TRY_SET_PARAMETER(self->params, ZSTD_c_strategy, strategy); | |
|
213 | TRY_SET_PARAMETER(self->params, ZSTD_c_contentSizeFlag, contentSizeFlag); | |
|
214 | TRY_SET_PARAMETER(self->params, ZSTD_c_checksumFlag, checksumFlag); | |
|
215 | TRY_SET_PARAMETER(self->params, ZSTD_c_dictIDFlag, dictIDFlag); | |
|
216 | TRY_SET_PARAMETER(self->params, ZSTD_c_jobSize, jobSize); | |
|
217 | ||
|
218 | if (overlapLog != -1 && overlapSizeLog != -1) { | |
|
219 | PyErr_SetString(PyExc_ValueError, "cannot specify both overlap_log and overlap_size_log"); | |
|
220 | return -1; | |
|
221 | } | |
|
212 | if (overlapLog == -1) { | |
|
213 | overlapLog = 0; | |
|
214 | } | |
|
222 | 215 | |
|
223 | if (overlapSizeLog != -1) { | |
|
224 | overlapLog = overlapSizeLog; | |
|
225 | } | |
|
226 | else if (overlapLog == -1) { | |
|
227 | overlapLog = 0; | |
|
228 | } | |
|
229 | ||
|
230 | TRY_SET_PARAMETER(self->params, ZSTD_c_overlapLog, overlapLog); | |
|
231 | TRY_SET_PARAMETER(self->params, ZSTD_c_forceMaxWindow, forceMaxWindow); | |
|
232 | TRY_SET_PARAMETER(self->params, ZSTD_c_enableLongDistanceMatching, enableLDM); | |
|
233 | TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashLog, ldmHashLog); | |
|
234 | TRY_SET_PARAMETER(self->params, ZSTD_c_ldmMinMatch, ldmMinMatch); | |
|
235 | TRY_SET_PARAMETER(self->params, ZSTD_c_ldmBucketSizeLog, ldmBucketSizeLog); | |
|
216 | TRY_SET_PARAMETER(self->params, ZSTD_c_overlapLog, overlapLog); | |
|
217 | TRY_SET_PARAMETER(self->params, ZSTD_c_forceMaxWindow, forceMaxWindow); | |
|
218 | TRY_SET_PARAMETER(self->params, ZSTD_c_enableLongDistanceMatching, | |
|
219 | enableLDM); | |
|
220 | TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashLog, ldmHashLog); | |
|
221 | TRY_SET_PARAMETER(self->params, ZSTD_c_ldmMinMatch, ldmMinMatch); | |
|
222 | TRY_SET_PARAMETER(self->params, ZSTD_c_ldmBucketSizeLog, ldmBucketSizeLog); | |
|
236 | 223 | |
|
237 |
|
|
|
238 | PyErr_SetString(PyExc_ValueError, "cannot specify both ldm_hash_rate_log and ldm_hash_everyLog"); | |
|
239 | return -1; | |
|
240 | } | |
|
224 | if (ldmHashRateLog == -1) { | |
|
225 | ldmHashRateLog = 0; | |
|
226 | } | |
|
241 | 227 | |
|
242 | if (ldmHashEveryLog != -1) { | |
|
243 | ldmHashRateLog = ldmHashEveryLog; | |
|
244 | } | |
|
245 | else if (ldmHashRateLog == -1) { | |
|
246 | ldmHashRateLog = 0; | |
|
247 | } | |
|
228 | TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashRateLog, ldmHashRateLog); | |
|
248 | 229 | |
|
249 | TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashRateLog, ldmHashRateLog); | |
|
250 | ||
|
251 | return 0; | |
|
230 | return 0; | |
|
252 | 231 | } |
|
253 | 232 | |
|
254 |
|
|
|
255 | "Create a CompressionParameters from a compression level and target sizes\n" | |
|
256 | ); | |
|
233 | ZstdCompressionParametersObject * | |
|
234 | CompressionParameters_from_level(PyObject *undef, PyObject *args, | |
|
235 | PyObject *kwargs) { | |
|
236 | int managedKwargs = 0; | |
|
237 | int level; | |
|
238 | PyObject *sourceSize = NULL; | |
|
239 | PyObject *dictSize = NULL; | |
|
240 | unsigned PY_LONG_LONG iSourceSize = 0; | |
|
241 | Py_ssize_t iDictSize = 0; | |
|
242 | PyObject *val; | |
|
243 | ZSTD_compressionParameters params; | |
|
244 | ZstdCompressionParametersObject *result = NULL; | |
|
245 | int res; | |
|
257 | 246 | |
|
258 | ZstdCompressionParametersObject* CompressionParameters_from_level(PyObject* undef, PyObject* args, PyObject* kwargs) { | |
|
259 | int managedKwargs = 0; | |
|
260 | int level; | |
|
261 | PyObject* sourceSize = NULL; | |
|
262 | PyObject* dictSize = NULL; | |
|
263 | unsigned PY_LONG_LONG iSourceSize = 0; | |
|
264 | Py_ssize_t iDictSize = 0; | |
|
265 | PyObject* val; | |
|
266 | ZSTD_compressionParameters params; | |
|
267 | ZstdCompressionParametersObject* result = NULL; | |
|
268 | int res; | |
|
247 | if (!PyArg_ParseTuple(args, "i:from_level", &level)) { | |
|
248 | return NULL; | |
|
249 | } | |
|
269 | 250 | |
|
270 | if (!PyArg_ParseTuple(args, "i:from_level", | |
|
271 | &level)) { | |
|
272 | return NULL; | |
|
273 | } | |
|
251 | if (!kwargs) { | |
|
252 | kwargs = PyDict_New(); | |
|
253 | if (!kwargs) { | |
|
254 | return NULL; | |
|
255 | } | |
|
256 | managedKwargs = 1; | |
|
257 | } | |
|
274 | 258 | |
|
275 | if (!kwargs) { | |
|
276 | kwargs = PyDict_New(); | |
|
277 | if (!kwargs) { | |
|
278 | return NULL; | |
|
279 | } | |
|
280 | managedKwargs = 1; | |
|
281 | } | |
|
259 | sourceSize = PyDict_GetItemString(kwargs, "source_size"); | |
|
260 | if (sourceSize) { | |
|
261 | iSourceSize = PyLong_AsUnsignedLongLong(sourceSize); | |
|
262 | if (iSourceSize == (unsigned PY_LONG_LONG)(-1)) { | |
|
263 | goto cleanup; | |
|
264 | } | |
|
282 | 265 | |
|
283 |
|
|
|
284 | if (sourceSize) { | |
|
285 | #if PY_MAJOR_VERSION >= 3 | |
|
286 | iSourceSize = PyLong_AsUnsignedLongLong(sourceSize); | |
|
287 | if (iSourceSize == (unsigned PY_LONG_LONG)(-1)) { | |
|
288 | goto cleanup; | |
|
289 | } | |
|
290 | #else | |
|
291 | iSourceSize = PyInt_AsUnsignedLongLongMask(sourceSize); | |
|
292 | #endif | |
|
266 | PyDict_DelItemString(kwargs, "source_size"); | |
|
267 | } | |
|
293 | 268 | |
|
294 |
|
|
|
295 | } | |
|
269 | dictSize = PyDict_GetItemString(kwargs, "dict_size"); | |
|
270 | if (dictSize) { | |
|
271 | iDictSize = PyLong_AsSsize_t(dictSize); | |
|
272 | if (iDictSize == -1) { | |
|
273 | goto cleanup; | |
|
274 | } | |
|
275 | ||
|
276 | PyDict_DelItemString(kwargs, "dict_size"); | |
|
277 | } | |
|
278 | ||
|
279 | params = ZSTD_getCParams(level, iSourceSize, iDictSize); | |
|
296 | 280 | |
|
297 | dictSize = PyDict_GetItemString(kwargs, "dict_size"); | |
|
298 | if (dictSize) { | |
|
299 | #if PY_MAJOR_VERSION >= 3 | |
|
300 | iDictSize = PyLong_AsSsize_t(dictSize); | |
|
301 | #else | |
|
302 | iDictSize = PyInt_AsSsize_t(dictSize); | |
|
303 | #endif | |
|
304 | if (iDictSize == -1) { | |
|
305 | goto cleanup; | |
|
306 | } | |
|
307 | ||
|
308 | PyDict_DelItemString(kwargs, "dict_size"); | |
|
309 | } | |
|
281 | /* Values derived from the input level and sizes are passed along to the | |
|
282 | constructor. But only if a value doesn't already exist. */ | |
|
283 | val = PyDict_GetItemString(kwargs, "window_log"); | |
|
284 | if (!val) { | |
|
285 | val = PyLong_FromUnsignedLong(params.windowLog); | |
|
286 | if (!val) { | |
|
287 | goto cleanup; | |
|
288 | } | |
|
289 | PyDict_SetItemString(kwargs, "window_log", val); | |
|
290 | Py_DECREF(val); | |
|
291 | } | |
|
310 | 292 | |
|
311 | ||
|
312 | params = ZSTD_getCParams(level, iSourceSize, iDictSize); | |
|
313 | ||
|
314 | /* Values derived from the input level and sizes are passed along to the | |
|
315 | constructor. But only if a value doesn't already exist. */ | |
|
316 | val = PyDict_GetItemString(kwargs, "window_log"); | |
|
317 | if (!val) { | |
|
318 | val = PyLong_FromUnsignedLong(params.windowLog); | |
|
319 | if (!val) { | |
|
320 | goto cleanup; | |
|
321 | } | |
|
322 | PyDict_SetItemString(kwargs, "window_log", val); | |
|
323 | Py_DECREF(val); | |
|
324 | } | |
|
293 | val = PyDict_GetItemString(kwargs, "chain_log"); | |
|
294 | if (!val) { | |
|
295 | val = PyLong_FromUnsignedLong(params.chainLog); | |
|
296 | if (!val) { | |
|
297 | goto cleanup; | |
|
298 | } | |
|
299 | PyDict_SetItemString(kwargs, "chain_log", val); | |
|
300 | Py_DECREF(val); | |
|
301 | } | |
|
325 | 302 | |
|
326 |
|
|
|
327 |
|
|
|
328 |
|
|
|
329 |
|
|
|
330 |
|
|
|
331 | } | |
|
332 |
|
|
|
333 |
|
|
|
334 | } | |
|
335 | ||
|
336 | val = PyDict_GetItemString(kwargs, "hash_log"); | |
|
337 | if (!val) { | |
|
338 | val = PyLong_FromUnsignedLong(params.hashLog); | |
|
339 | if (!val) { | |
|
340 | goto cleanup; | |
|
341 | } | |
|
342 | PyDict_SetItemString(kwargs, "hash_log", val); | |
|
343 | Py_DECREF(val); | |
|
344 | } | |
|
303 | val = PyDict_GetItemString(kwargs, "hash_log"); | |
|
304 | if (!val) { | |
|
305 | val = PyLong_FromUnsignedLong(params.hashLog); | |
|
306 | if (!val) { | |
|
307 | goto cleanup; | |
|
308 | } | |
|
309 | PyDict_SetItemString(kwargs, "hash_log", val); | |
|
310 | Py_DECREF(val); | |
|
311 | } | |
|
345 | 312 | |
|
346 |
|
|
|
347 |
|
|
|
348 |
|
|
|
349 |
|
|
|
350 |
|
|
|
351 | } | |
|
352 |
|
|
|
353 |
|
|
|
354 | } | |
|
313 | val = PyDict_GetItemString(kwargs, "search_log"); | |
|
314 | if (!val) { | |
|
315 | val = PyLong_FromUnsignedLong(params.searchLog); | |
|
316 | if (!val) { | |
|
317 | goto cleanup; | |
|
318 | } | |
|
319 | PyDict_SetItemString(kwargs, "search_log", val); | |
|
320 | Py_DECREF(val); | |
|
321 | } | |
|
355 | 322 | |
|
356 |
|
|
|
357 |
|
|
|
358 |
|
|
|
359 |
|
|
|
360 |
|
|
|
361 | } | |
|
362 |
|
|
|
363 |
|
|
|
364 | } | |
|
323 | val = PyDict_GetItemString(kwargs, "min_match"); | |
|
324 | if (!val) { | |
|
325 | val = PyLong_FromUnsignedLong(params.minMatch); | |
|
326 | if (!val) { | |
|
327 | goto cleanup; | |
|
328 | } | |
|
329 | PyDict_SetItemString(kwargs, "min_match", val); | |
|
330 | Py_DECREF(val); | |
|
331 | } | |
|
365 | 332 | |
|
366 |
|
|
|
367 |
|
|
|
368 |
|
|
|
369 |
|
|
|
370 |
|
|
|
371 | } | |
|
372 |
|
|
|
373 |
|
|
|
374 | } | |
|
333 | val = PyDict_GetItemString(kwargs, "target_length"); | |
|
334 | if (!val) { | |
|
335 | val = PyLong_FromUnsignedLong(params.targetLength); | |
|
336 | if (!val) { | |
|
337 | goto cleanup; | |
|
338 | } | |
|
339 | PyDict_SetItemString(kwargs, "target_length", val); | |
|
340 | Py_DECREF(val); | |
|
341 | } | |
|
375 | 342 | |
|
376 |
|
|
|
377 |
|
|
|
378 |
|
|
|
379 |
|
|
|
380 |
|
|
|
381 | } | |
|
382 |
|
|
|
383 |
|
|
|
384 | } | |
|
343 | val = PyDict_GetItemString(kwargs, "strategy"); | |
|
344 | if (!val) { | |
|
345 | val = PyLong_FromUnsignedLong(params.strategy); | |
|
346 | if (!val) { | |
|
347 | goto cleanup; | |
|
348 | } | |
|
349 | PyDict_SetItemString(kwargs, "strategy", val); | |
|
350 | Py_DECREF(val); | |
|
351 | } | |
|
385 | 352 | |
|
386 |
|
|
|
387 | if (!result) { | |
|
388 | goto cleanup; | |
|
389 | } | |
|
353 | result = PyObject_New(ZstdCompressionParametersObject, | |
|
354 | ZstdCompressionParametersType); | |
|
355 | if (!result) { | |
|
356 | goto cleanup; | |
|
357 | } | |
|
390 | 358 | |
|
391 |
|
|
|
359 | result->params = NULL; | |
|
392 | 360 | |
|
393 |
|
|
|
394 |
|
|
|
395 |
|
|
|
396 |
|
|
|
397 | } | |
|
361 | val = PyTuple_New(0); | |
|
362 | if (!val) { | |
|
363 | Py_CLEAR(result); | |
|
364 | goto cleanup; | |
|
365 | } | |
|
398 | 366 | |
|
399 |
|
|
|
400 |
|
|
|
367 | res = ZstdCompressionParameters_init(result, val, kwargs); | |
|
368 | Py_DECREF(val); | |
|
401 | 369 | |
|
402 |
|
|
|
403 |
|
|
|
404 |
|
|
|
405 | } | |
|
370 | if (res) { | |
|
371 | Py_CLEAR(result); | |
|
372 | goto cleanup; | |
|
373 | } | |
|
406 | 374 | |
|
407 | 375 | cleanup: |
|
408 |
|
|
|
409 |
|
|
|
410 | } | |
|
376 | if (managedKwargs) { | |
|
377 | Py_DECREF(kwargs); | |
|
378 | } | |
|
411 | 379 | |
|
412 |
|
|
|
380 | return result; | |
|
413 | 381 | } |
|
414 | 382 | |
|
415 |
Py |
|
|
416 | "Estimate the size in bytes of a compression context for compression parameters\n" | |
|
417 | ); | |
|
418 | ||
|
419 | PyObject* ZstdCompressionParameters_estimated_compression_context_size(ZstdCompressionParametersObject* self) { | |
|
420 | return PyLong_FromSize_t(ZSTD_estimateCCtxSize_usingCCtxParams(self->params)); | |
|
383 | PyObject *ZstdCompressionParameters_estimated_compression_context_size( | |
|
384 | ZstdCompressionParametersObject *self) { | |
|
385 | return PyLong_FromSize_t( | |
|
386 | ZSTD_estimateCCtxSize_usingCCtxParams(self->params)); | |
|
421 | 387 | } |
|
422 | 388 | |
|
423 | PyDoc_STRVAR(ZstdCompressionParameters__doc__, | |
|
424 | "ZstdCompressionParameters: low-level control over zstd compression"); | |
|
389 | static void | |
|
390 | ZstdCompressionParameters_dealloc(ZstdCompressionParametersObject *self) { | |
|
391 | if (self->params) { | |
|
392 | ZSTD_freeCCtxParams(self->params); | |
|
393 | self->params = NULL; | |
|
394 | } | |
|
425 | 395 | |
|
426 | static void ZstdCompressionParameters_dealloc(ZstdCompressionParametersObject* self) { | |
|
427 | if (self->params) { | |
|
428 | ZSTD_freeCCtxParams(self->params); | |
|
429 | self->params = NULL; | |
|
430 | } | |
|
431 | ||
|
432 | PyObject_Del(self); | |
|
396 | PyObject_Del(self); | |
|
433 | 397 | } |
|
434 | 398 | |
|
435 | #define PARAM_GETTER(name, param) PyObject* ZstdCompressionParameters_get_##name(PyObject* self, void* unused) { \ | |
|
436 | int result; \ | |
|
437 | size_t zresult; \ | |
|
438 | ZstdCompressionParametersObject* p = (ZstdCompressionParametersObject*)(self); \ | |
|
439 | zresult = ZSTD_CCtxParams_getParameter(p->params, param, &result); \ | |
|
440 | if (ZSTD_isError(zresult)) { \ | |
|
441 | PyErr_Format(ZstdError, "unable to get compression parameter: %s", \ | |
|
442 | ZSTD_getErrorName(zresult)); \ | |
|
443 | return NULL; \ | |
|
444 | } \ | |
|
445 | return PyLong_FromLong(result); \ | |
|
446 | } | |
|
399 | #define PARAM_GETTER(name, param) \ | |
|
400 | PyObject *ZstdCompressionParameters_get_##name(PyObject *self, \ | |
|
401 | void *unused) { \ | |
|
402 | int result; \ | |
|
403 | size_t zresult; \ | |
|
404 | ZstdCompressionParametersObject *p = \ | |
|
405 | (ZstdCompressionParametersObject *)(self); \ | |
|
406 | zresult = ZSTD_CCtxParams_getParameter(p->params, param, &result); \ | |
|
407 | if (ZSTD_isError(zresult)) { \ | |
|
408 | PyErr_Format(ZstdError, "unable to get compression parameter: %s", \ | |
|
409 | ZSTD_getErrorName(zresult)); \ | |
|
410 | return NULL; \ | |
|
411 | } \ | |
|
412 | return PyLong_FromLong(result); \ | |
|
413 | } | |
|
447 | 414 | |
|
448 | 415 | PARAM_GETTER(format, ZSTD_c_format) |
|
449 | 416 | PARAM_GETTER(compression_level, ZSTD_c_compressionLevel) |
@@ -453,7 +420,7 PARAM_GETTER(chain_log, ZSTD_c_chainLog) | |||
|
453 | 420 | PARAM_GETTER(search_log, ZSTD_c_searchLog) |
|
454 | 421 | PARAM_GETTER(min_match, ZSTD_c_minMatch) |
|
455 | 422 | PARAM_GETTER(target_length, ZSTD_c_targetLength) |
|
456 |
PARAM_GETTER( |
|
|
423 | PARAM_GETTER(strategy, ZSTD_c_strategy) | |
|
457 | 424 | PARAM_GETTER(write_content_size, ZSTD_c_contentSizeFlag) |
|
458 | 425 | PARAM_GETTER(write_checksum, ZSTD_c_checksumFlag) |
|
459 | 426 | PARAM_GETTER(write_dict_id, ZSTD_c_dictIDFlag) |
@@ -468,105 +435,67 PARAM_GETTER(ldm_hash_rate_log, ZSTD_c_l | |||
|
468 | 435 | PARAM_GETTER(threads, ZSTD_c_nbWorkers) |
|
469 | 436 | |
|
470 | 437 | static PyMethodDef ZstdCompressionParameters_methods[] = { |
|
471 | { | |
|
472 | "from_level", | |
|
473 | (PyCFunction)CompressionParameters_from_level, | |
|
474 | METH_VARARGS | METH_KEYWORDS | METH_STATIC, | |
|
475 | ZstdCompressionParameters_from_level__doc__ | |
|
476 | }, | |
|
477 | { | |
|
478 | "estimated_compression_context_size", | |
|
479 | (PyCFunction)ZstdCompressionParameters_estimated_compression_context_size, | |
|
480 | METH_NOARGS, | |
|
481 | ZstdCompressionParameters_estimated_compression_context_size__doc__ | |
|
482 | }, | |
|
483 | { NULL, NULL } | |
|
484 | }; | |
|
438 | {"from_level", (PyCFunction)CompressionParameters_from_level, | |
|
439 | METH_VARARGS | METH_KEYWORDS | METH_STATIC, NULL}, | |
|
440 | {"estimated_compression_context_size", | |
|
441 | (PyCFunction)ZstdCompressionParameters_estimated_compression_context_size, | |
|
442 | METH_NOARGS, NULL}, | |
|
443 | {NULL, NULL}}; | |
|
485 | 444 | |
|
486 | #define GET_SET_ENTRY(name) { #name, ZstdCompressionParameters_get_##name, NULL, NULL, NULL } | |
|
445 | #define GET_SET_ENTRY(name) \ | |
|
446 | { #name, ZstdCompressionParameters_get_##name, NULL, NULL, NULL } | |
|
487 | 447 | |
|
488 | 448 | static PyGetSetDef ZstdCompressionParameters_getset[] = { |
|
489 |
|
|
|
490 |
|
|
|
491 |
|
|
|
492 |
|
|
|
493 |
|
|
|
494 |
|
|
|
495 |
|
|
|
496 |
|
|
|
497 |
|
|
|
498 |
|
|
|
499 |
|
|
|
500 |
|
|
|
501 |
|
|
|
502 |
|
|
|
503 |
|
|
|
504 | /* TODO remove this deprecated attribute */ | |
|
505 | { "overlap_size_log", ZstdCompressionParameters_get_overlap_log, NULL, NULL, NULL }, | |
|
506 | GET_SET_ENTRY(force_max_window), | |
|
507 |
|
|
|
508 |
|
|
|
509 |
|
|
|
510 | GET_SET_ENTRY(ldm_bucket_size_log), | |
|
511 | GET_SET_ENTRY(ldm_hash_rate_log), | |
|
512 | /* TODO remove this deprecated attribute */ | |
|
513 | { "ldm_hash_every_log", ZstdCompressionParameters_get_ldm_hash_rate_log, NULL, NULL, NULL }, | |
|
514 | { NULL } | |
|
449 | GET_SET_ENTRY(format), | |
|
450 | GET_SET_ENTRY(compression_level), | |
|
451 | GET_SET_ENTRY(window_log), | |
|
452 | GET_SET_ENTRY(hash_log), | |
|
453 | GET_SET_ENTRY(chain_log), | |
|
454 | GET_SET_ENTRY(search_log), | |
|
455 | GET_SET_ENTRY(min_match), | |
|
456 | GET_SET_ENTRY(target_length), | |
|
457 | GET_SET_ENTRY(strategy), | |
|
458 | GET_SET_ENTRY(write_content_size), | |
|
459 | GET_SET_ENTRY(write_checksum), | |
|
460 | GET_SET_ENTRY(write_dict_id), | |
|
461 | GET_SET_ENTRY(threads), | |
|
462 | GET_SET_ENTRY(job_size), | |
|
463 | GET_SET_ENTRY(overlap_log), | |
|
464 | GET_SET_ENTRY(force_max_window), | |
|
465 | GET_SET_ENTRY(enable_ldm), | |
|
466 | GET_SET_ENTRY(ldm_hash_log), | |
|
467 | GET_SET_ENTRY(ldm_min_match), | |
|
468 | GET_SET_ENTRY(ldm_bucket_size_log), | |
|
469 | GET_SET_ENTRY(ldm_hash_rate_log), | |
|
470 | {NULL}}; | |
|
471 | ||
|
472 | PyType_Slot ZstdCompressionParametersSlots[] = { | |
|
473 | {Py_tp_dealloc, ZstdCompressionParameters_dealloc}, | |
|
474 | {Py_tp_methods, ZstdCompressionParameters_methods}, | |
|
475 | {Py_tp_getset, ZstdCompressionParameters_getset}, | |
|
476 | {Py_tp_init, ZstdCompressionParameters_init}, | |
|
477 | {Py_tp_new, PyType_GenericNew}, | |
|
478 | {0, NULL}, | |
|
515 | 479 | }; |
|
516 | 480 | |
|
517 |
PyType |
|
|
518 | PyVarObject_HEAD_INIT(NULL, 0) | |
|
519 | "ZstdCompressionParameters", /* tp_name */ | |
|
520 | sizeof(ZstdCompressionParametersObject), /* tp_basicsize */ | |
|
521 | 0, /* tp_itemsize */ | |
|
522 | (destructor)ZstdCompressionParameters_dealloc, /* tp_dealloc */ | |
|
523 | 0, /* tp_print */ | |
|
524 | 0, /* tp_getattr */ | |
|
525 | 0, /* tp_setattr */ | |
|
526 | 0, /* tp_compare */ | |
|
527 | 0, /* tp_repr */ | |
|
528 | 0, /* tp_as_number */ | |
|
529 | 0, /* tp_as_sequence */ | |
|
530 | 0, /* tp_as_mapping */ | |
|
531 | 0, /* tp_hash */ | |
|
532 | 0, /* tp_call */ | |
|
533 | 0, /* tp_str */ | |
|
534 | 0, /* tp_getattro */ | |
|
535 | 0, /* tp_setattro */ | |
|
536 | 0, /* tp_as_buffer */ | |
|
537 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
|
538 | ZstdCompressionParameters__doc__, /* tp_doc */ | |
|
539 | 0, /* tp_traverse */ | |
|
540 | 0, /* tp_clear */ | |
|
541 | 0, /* tp_richcompare */ | |
|
542 | 0, /* tp_weaklistoffset */ | |
|
543 | 0, /* tp_iter */ | |
|
544 | 0, /* tp_iternext */ | |
|
545 | ZstdCompressionParameters_methods, /* tp_methods */ | |
|
546 | 0, /* tp_members */ | |
|
547 | ZstdCompressionParameters_getset, /* tp_getset */ | |
|
548 | 0, /* tp_base */ | |
|
549 | 0, /* tp_dict */ | |
|
550 | 0, /* tp_descr_get */ | |
|
551 | 0, /* tp_descr_set */ | |
|
552 | 0, /* tp_dictoffset */ | |
|
553 | (initproc)ZstdCompressionParameters_init, /* tp_init */ | |
|
554 | 0, /* tp_alloc */ | |
|
555 | PyType_GenericNew, /* tp_new */ | |
|
481 | PyType_Spec ZstdCompressionParametersSpec = { | |
|
482 | "zstd.ZstdCompressionParameters", | |
|
483 | sizeof(ZstdCompressionParametersObject), | |
|
484 | 0, | |
|
485 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, | |
|
486 | ZstdCompressionParametersSlots, | |
|
556 | 487 | }; |
|
557 | 488 | |
|
558 | void compressionparams_module_init(PyObject* mod) { | |
|
559 | Py_SET_TYPE(&ZstdCompressionParametersType, &PyType_Type); | |
|
560 | if (PyType_Ready(&ZstdCompressionParametersType) < 0) { | |
|
561 | return; | |
|
562 | } | |
|
489 | PyTypeObject *ZstdCompressionParametersType; | |
|
563 | 490 | |
|
564 | Py_INCREF(&ZstdCompressionParametersType); | |
|
565 | PyModule_AddObject(mod, "ZstdCompressionParameters", | |
|
566 |
|
|
|
491 | void compressionparams_module_init(PyObject *mod) { | |
|
492 | ZstdCompressionParametersType = | |
|
493 | (PyTypeObject *)PyType_FromSpec(&ZstdCompressionParametersSpec); | |
|
494 | if (PyType_Ready(ZstdCompressionParametersType) < 0) { | |
|
495 | return; | |
|
496 | } | |
|
567 | 497 | |
|
568 | /* TODO remove deprecated alias. */ | |
|
569 | Py_INCREF(&ZstdCompressionParametersType); | |
|
570 | PyModule_AddObject(mod, "CompressionParameters", | |
|
571 | (PyObject*)&ZstdCompressionParametersType); | |
|
498 | Py_INCREF(ZstdCompressionParametersType); | |
|
499 | PyModule_AddObject(mod, "ZstdCompressionParameters", | |
|
500 | (PyObject *)ZstdCompressionParametersType); | |
|
572 | 501 | } |
This diff has been collapsed as it changes many lines, (1246 lines changed) Show them Hide them | |||
@@ -1,214 +1,218 | |||
|
1 | 1 | /** |
|
2 | * Copyright (c) 2017-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
2 | * Copyright (c) 2017-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
8 | 8 | |
|
9 | 9 | #include "python-zstandard.h" |
|
10 | 10 | |
|
11 |
extern PyObject* |
|
|
11 | extern PyObject *ZstdError; | |
|
12 | 12 | |
|
13 | static void set_unsupported_operation(void) { | |
|
14 | PyObject* iomod; | |
|
15 | PyObject* exc; | |
|
13 | static void compressionreader_dealloc(ZstdCompressionReader *self) { | |
|
14 | Py_XDECREF(self->compressor); | |
|
15 | Py_XDECREF(self->reader); | |
|
16 | 16 | |
|
17 | iomod = PyImport_ImportModule("io"); | |
|
18 | if (NULL == iomod) { | |
|
19 | return; | |
|
20 | } | |
|
17 | if (self->buffer.buf) { | |
|
18 | PyBuffer_Release(&self->buffer); | |
|
19 | memset(&self->buffer, 0, sizeof(self->buffer)); | |
|
20 | } | |
|
21 | 21 | |
|
22 | exc = PyObject_GetAttrString(iomod, "UnsupportedOperation"); | |
|
23 | if (NULL == exc) { | |
|
24 | Py_DECREF(iomod); | |
|
25 | return; | |
|
26 | } | |
|
27 | ||
|
28 | PyErr_SetNone(exc); | |
|
29 | Py_DECREF(exc); | |
|
30 | Py_DECREF(iomod); | |
|
22 | PyObject_Del(self); | |
|
31 | 23 | } |
|
32 | 24 | |
|
33 |
static |
|
|
34 | Py_XDECREF(self->compressor); | |
|
35 | Py_XDECREF(self->reader); | |
|
25 | static ZstdCompressionReader * | |
|
26 | compressionreader_enter(ZstdCompressionReader *self) { | |
|
27 | if (self->entered) { | |
|
28 | PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times"); | |
|
29 | return NULL; | |
|
30 | } | |
|
36 | 31 | |
|
37 | if (self->buffer.buf) { | |
|
38 | PyBuffer_Release(&self->buffer); | |
|
39 | memset(&self->buffer, 0, sizeof(self->buffer)); | |
|
40 | } | |
|
32 | if (self->closed) { | |
|
33 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |
|
34 | return NULL; | |
|
35 | } | |
|
41 | 36 | |
|
42 | PyObject_Del(self); | |
|
37 | self->entered = 1; | |
|
38 | ||
|
39 | Py_INCREF(self); | |
|
40 | return self; | |
|
43 | 41 | } |
|
44 | 42 | |
|
45 |
static |
|
|
46 | if (self->entered) { | |
|
47 | PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times"); | |
|
48 | return NULL; | |
|
49 | } | |
|
43 | static PyObject *compressionreader_exit(ZstdCompressionReader *self, | |
|
44 | PyObject *args) { | |
|
45 | PyObject *exc_type; | |
|
46 | PyObject *exc_value; | |
|
47 | PyObject *exc_tb; | |
|
48 | PyObject *result; | |
|
50 | 49 | |
|
51 | self->entered = 1; | |
|
52 | ||
|
53 | Py_INCREF(self); | |
|
54 | return self; | |
|
55 | } | |
|
50 | if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, | |
|
51 | &exc_tb)) { | |
|
52 | return NULL; | |
|
53 | } | |
|
56 | 54 | |
|
57 | static PyObject* reader_exit(ZstdCompressionReader* self, PyObject* args) { | |
|
58 | PyObject* exc_type; | |
|
59 | PyObject* exc_value; | |
|
60 | PyObject* exc_tb; | |
|
55 | self->entered = 0; | |
|
61 | 56 | |
|
62 | if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) { | |
|
63 | return NULL; | |
|
64 | } | |
|
57 | result = PyObject_CallMethod((PyObject *)self, "close", NULL); | |
|
58 | if (NULL == result) { | |
|
59 | return NULL; | |
|
60 | } | |
|
65 | 61 | |
|
66 | self->entered = 0; | |
|
67 | self->closed = 1; | |
|
68 | ||
|
69 | /* Release resources associated with source. */ | |
|
70 | Py_CLEAR(self->reader); | |
|
71 | if (self->buffer.buf) { | |
|
72 | PyBuffer_Release(&self->buffer); | |
|
73 | memset(&self->buffer, 0, sizeof(self->buffer)); | |
|
74 | } | |
|
62 | /* Release resources associated with source. */ | |
|
63 | Py_CLEAR(self->reader); | |
|
64 | if (self->buffer.buf) { | |
|
65 | PyBuffer_Release(&self->buffer); | |
|
66 | memset(&self->buffer, 0, sizeof(self->buffer)); | |
|
67 | } | |
|
75 | 68 | |
|
76 | 69 | Py_CLEAR(self->compressor); |
|
77 | 70 | |
|
78 |
|
|
|
71 | Py_RETURN_FALSE; | |
|
79 | 72 | } |
|
80 | 73 | |
|
81 |
static PyObject* |
|
|
82 |
|
|
|
74 | static PyObject *compressionreader_readable(ZstdCompressionReader *self) { | |
|
75 | Py_RETURN_TRUE; | |
|
83 | 76 | } |
|
84 | 77 | |
|
85 |
static PyObject* |
|
|
86 |
|
|
|
78 | static PyObject *compressionreader_writable(ZstdCompressionReader *self) { | |
|
79 | Py_RETURN_FALSE; | |
|
87 | 80 | } |
|
88 | 81 | |
|
89 |
static PyObject* |
|
|
90 |
|
|
|
82 | static PyObject *compressionreader_seekable(ZstdCompressionReader *self) { | |
|
83 | Py_RETURN_FALSE; | |
|
84 | } | |
|
85 | ||
|
86 | static PyObject *compressionreader_readline(PyObject *self, PyObject *args) { | |
|
87 | set_io_unsupported_operation(); | |
|
88 | return NULL; | |
|
91 | 89 | } |
|
92 | 90 | |
|
93 |
static PyObject* |
|
|
94 |
|
|
|
95 |
|
|
|
91 | static PyObject *compressionreader_readlines(PyObject *self, PyObject *args) { | |
|
92 | set_io_unsupported_operation(); | |
|
93 | return NULL; | |
|
96 | 94 | } |
|
97 | 95 | |
|
98 |
static PyObject* |
|
|
99 | set_unsupported_operation(); | |
|
100 |
|
|
|
96 | static PyObject *compressionreader_write(PyObject *self, PyObject *args) { | |
|
97 | PyErr_SetString(PyExc_OSError, "stream is not writable"); | |
|
98 | return NULL; | |
|
101 | 99 | } |
|
102 | 100 | |
|
103 |
static PyObject* |
|
|
104 |
|
|
|
105 |
|
|
|
101 | static PyObject *compressionreader_writelines(PyObject *self, PyObject *args) { | |
|
102 | PyErr_SetString(PyExc_OSError, "stream is not writable"); | |
|
103 | return NULL; | |
|
106 | 104 | } |
|
107 | 105 | |
|
108 |
static PyObject* |
|
|
109 | PyErr_SetString(PyExc_OSError, "stream is not writable"); | |
|
110 | return NULL; | |
|
106 | static PyObject *compressionreader_isatty(PyObject *self) { | |
|
107 | Py_RETURN_FALSE; | |
|
108 | } | |
|
109 | ||
|
110 | static PyObject *compressionreader_flush(PyObject *self) { | |
|
111 | Py_RETURN_NONE; | |
|
111 | 112 | } |
|
112 | 113 | |
|
113 | static PyObject* reader_isatty(PyObject* self) { | |
|
114 | Py_RETURN_FALSE; | |
|
115 | } | |
|
114 | static PyObject *compressionreader_close(ZstdCompressionReader *self) { | |
|
115 | if (self->closed) { | |
|
116 | Py_RETURN_NONE; | |
|
117 | } | |
|
118 | ||
|
119 | self->closed = 1; | |
|
116 | 120 | |
|
117 | static PyObject* reader_flush(PyObject* self) { | |
|
118 | Py_RETURN_NONE; | |
|
121 | if (self->closefd && self->reader != NULL && | |
|
122 | PyObject_HasAttrString(self->reader, "close")) { | |
|
123 | return PyObject_CallMethod(self->reader, "close", NULL); | |
|
124 | } | |
|
125 | ||
|
126 | Py_RETURN_NONE; | |
|
119 | 127 | } |
|
120 | 128 | |
|
121 |
static PyObject* |
|
|
122 | self->closed = 1; | |
|
123 | Py_RETURN_NONE; | |
|
124 | } | |
|
125 | ||
|
126 | static PyObject* reader_tell(ZstdCompressionReader* self) { | |
|
127 | /* TODO should this raise OSError since stream isn't seekable? */ | |
|
128 | return PyLong_FromUnsignedLongLong(self->bytesCompressed); | |
|
129 | static PyObject *compressionreader_tell(ZstdCompressionReader *self) { | |
|
130 | /* TODO should this raise OSError since stream isn't seekable? */ | |
|
131 | return PyLong_FromUnsignedLongLong(self->bytesCompressed); | |
|
129 | 132 | } |
|
130 | 133 | |
|
131 |
int read_compressor_input(ZstdCompressionReader* |
|
|
132 |
|
|
|
133 |
|
|
|
134 | } | |
|
134 | int read_compressor_input(ZstdCompressionReader *self) { | |
|
135 | if (self->finishedInput) { | |
|
136 | return 0; | |
|
137 | } | |
|
135 | 138 | |
|
136 |
|
|
|
137 |
|
|
|
138 | } | |
|
139 | if (self->input.pos != self->input.size) { | |
|
140 | return 0; | |
|
141 | } | |
|
139 | 142 | |
|
140 |
|
|
|
141 |
|
|
|
143 | if (self->reader) { | |
|
144 | Py_buffer buffer; | |
|
142 | 145 | |
|
143 |
|
|
|
146 | assert(self->readResult == NULL); | |
|
144 | 147 | |
|
145 | self->readResult = PyObject_CallMethod(self->reader, "read", | |
|
146 | "k", self->readSize); | |
|
148 | self->readResult = | |
|
149 | PyObject_CallMethod(self->reader, "read", "k", self->readSize); | |
|
147 | 150 | |
|
148 |
|
|
|
149 | return -1; | |
|
150 | } | |
|
151 | if (NULL == self->readResult) { | |
|
152 | return -1; | |
|
153 | } | |
|
151 | 154 | |
|
152 |
|
|
|
155 | memset(&buffer, 0, sizeof(buffer)); | |
|
153 | 156 | |
|
154 | if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) { | |
|
155 | return -1; | |
|
156 | } | |
|
157 | if (0 != | |
|
158 | PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) { | |
|
159 | return -1; | |
|
160 | } | |
|
157 | 161 | |
|
158 |
|
|
|
159 |
|
|
|
160 |
|
|
|
161 |
|
|
|
162 | } | |
|
163 | else { | |
|
164 |
|
|
|
165 |
|
|
|
166 |
|
|
|
167 | } | |
|
162 | /* EOF */ | |
|
163 | if (0 == buffer.len) { | |
|
164 | self->finishedInput = 1; | |
|
165 | Py_CLEAR(self->readResult); | |
|
166 | } | |
|
167 | else { | |
|
168 | self->input.src = buffer.buf; | |
|
169 | self->input.size = buffer.len; | |
|
170 | self->input.pos = 0; | |
|
171 | } | |
|
168 | 172 | |
|
169 |
|
|
|
170 | } | |
|
171 |
|
|
|
172 |
|
|
|
173 | PyBuffer_Release(&buffer); | |
|
174 | } | |
|
175 | else { | |
|
176 | assert(self->buffer.buf); | |
|
173 | 177 | |
|
174 |
|
|
|
175 |
|
|
|
176 |
|
|
|
177 | } | |
|
178 | self->input.src = self->buffer.buf; | |
|
179 | self->input.size = self->buffer.len; | |
|
180 | self->input.pos = 0; | |
|
181 | } | |
|
178 | 182 | |
|
179 |
|
|
|
183 | return 1; | |
|
180 | 184 | } |
|
181 | 185 | |
|
182 |
int compress_input(ZstdCompressionReader* |
|
|
183 |
|
|
|
184 |
|
|
|
186 | int compress_input(ZstdCompressionReader *self, ZSTD_outBuffer *output) { | |
|
187 | size_t oldPos; | |
|
188 | size_t zresult; | |
|
185 | 189 | |
|
186 |
|
|
|
187 |
|
|
|
188 |
|
|
|
190 | /* If we have data left over, consume it. */ | |
|
191 | if (self->input.pos < self->input.size) { | |
|
192 | oldPos = output->pos; | |
|
189 | 193 | |
|
190 | Py_BEGIN_ALLOW_THREADS | |
|
191 | zresult = ZSTD_compressStream2(self->compressor->cctx, | |
|
192 | output, &self->input, ZSTD_e_continue); | |
|
193 | Py_END_ALLOW_THREADS | |
|
194 | Py_BEGIN_ALLOW_THREADS zresult = ZSTD_compressStream2( | |
|
195 | self->compressor->cctx, output, &self->input, ZSTD_e_continue); | |
|
196 | Py_END_ALLOW_THREADS | |
|
194 | 197 | |
|
195 |
|
|
|
198 | self->bytesCompressed += output->pos - oldPos; | |
|
196 | 199 | |
|
197 |
|
|
|
198 |
|
|
|
199 |
|
|
|
200 |
|
|
|
200 | /* Input exhausted. Clear out state tracking. */ | |
|
201 | if (self->input.pos == self->input.size) { | |
|
202 | memset(&self->input, 0, sizeof(self->input)); | |
|
203 | Py_CLEAR(self->readResult); | |
|
201 | 204 | |
|
202 |
|
|
|
203 |
|
|
|
204 | } | |
|
205 | } | |
|
205 | if (self->buffer.buf) { | |
|
206 | self->finishedInput = 1; | |
|
207 | } | |
|
208 | } | |
|
206 | 209 | |
|
207 |
|
|
|
208 |
|
|
|
209 | return -1; | |
|
210 | } | |
|
211 | } | |
|
210 | if (ZSTD_isError(zresult)) { | |
|
211 | PyErr_Format(ZstdError, "zstd compress error: %s", | |
|
212 | ZSTD_getErrorName(zresult)); | |
|
213 | return -1; | |
|
214 | } | |
|
215 | } | |
|
212 | 216 | |
|
213 | 217 | if (output->pos && output->pos == output->size) { |
|
214 | 218 | return 1; |
@@ -218,601 +222,591 int compress_input(ZstdCompressionReader | |||
|
218 | 222 | } |
|
219 | 223 | } |
|
220 | 224 | |
|
221 |
static PyObject* |
|
|
222 | static char* kwlist[] = { | |
|
223 | "size", | |
|
224 | NULL | |
|
225 | }; | |
|
225 | static PyObject *compressionreader_read(ZstdCompressionReader *self, | |
|
226 | PyObject *args, PyObject *kwargs) { | |
|
227 | static char *kwlist[] = {"size", NULL}; | |
|
226 | 228 | |
|
227 |
|
|
|
228 |
|
|
|
229 |
|
|
|
230 |
|
|
|
231 |
|
|
|
232 |
|
|
|
233 |
|
|
|
229 | Py_ssize_t size = -1; | |
|
230 | PyObject *result = NULL; | |
|
231 | char *resultBuffer; | |
|
232 | Py_ssize_t resultSize; | |
|
233 | size_t zresult; | |
|
234 | size_t oldPos; | |
|
235 | int readResult, compressResult; | |
|
234 | 236 | |
|
235 |
|
|
|
236 |
|
|
|
237 |
|
|
|
238 | } | |
|
237 | if (self->closed) { | |
|
238 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |
|
239 | return NULL; | |
|
240 | } | |
|
239 | 241 | |
|
240 |
|
|
|
241 |
|
|
|
242 | } | |
|
242 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) { | |
|
243 | return NULL; | |
|
244 | } | |
|
243 | 245 | |
|
244 |
|
|
|
245 | PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1"); | |
|
246 | return NULL; | |
|
247 | } | |
|
246 | if (size < -1) { | |
|
247 | PyErr_SetString(PyExc_ValueError, | |
|
248 | "cannot read negative amounts less than -1"); | |
|
249 | return NULL; | |
|
250 | } | |
|
248 | 251 | |
|
249 |
|
|
|
250 |
|
|
|
251 | } | |
|
252 | if (size == -1) { | |
|
253 | return PyObject_CallMethod((PyObject *)self, "readall", NULL); | |
|
254 | } | |
|
252 | 255 | |
|
253 |
|
|
|
254 |
|
|
|
255 | } | |
|
256 | if (self->finishedOutput || size == 0) { | |
|
257 | return PyBytes_FromStringAndSize("", 0); | |
|
258 | } | |
|
256 | 259 | |
|
257 |
|
|
|
258 |
|
|
|
259 |
|
|
|
260 | } | |
|
260 | result = PyBytes_FromStringAndSize(NULL, size); | |
|
261 | if (NULL == result) { | |
|
262 | return NULL; | |
|
263 | } | |
|
261 | 264 | |
|
262 |
|
|
|
265 | PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize); | |
|
263 | 266 | |
|
264 |
|
|
|
265 |
|
|
|
266 |
|
|
|
267 | self->output.dst = resultBuffer; | |
|
268 | self->output.size = resultSize; | |
|
269 | self->output.pos = 0; | |
|
267 | 270 | |
|
268 | 271 | readinput: |
|
269 | 272 | |
|
270 | 273 | compressResult = compress_input(self, &self->output); |
|
271 | 274 | |
|
272 |
|
|
|
273 |
|
|
|
274 |
|
|
|
275 | } | |
|
276 |
|
|
|
277 |
|
|
|
278 |
|
|
|
279 | */ | |
|
280 | } | |
|
281 |
|
|
|
282 |
|
|
|
283 |
|
|
|
284 | } | |
|
285 |
|
|
|
286 |
|
|
|
287 | } | |
|
275 | if (-1 == compressResult) { | |
|
276 | Py_XDECREF(result); | |
|
277 | return NULL; | |
|
278 | } | |
|
279 | else if (0 == compressResult) { | |
|
280 | /* There is room in the output. We fall through to below, which will | |
|
281 | * either get more input for us or will attempt to end the stream. | |
|
282 | */ | |
|
283 | } | |
|
284 | else if (1 == compressResult) { | |
|
285 | memset(&self->output, 0, sizeof(self->output)); | |
|
286 | return result; | |
|
287 | } | |
|
288 | else { | |
|
289 | assert(0); | |
|
290 | } | |
|
288 | 291 | |
|
289 |
|
|
|
292 | readResult = read_compressor_input(self); | |
|
290 | 293 | |
|
291 |
|
|
|
292 |
|
|
|
293 | } | |
|
294 |
|
|
|
295 | else if (1 == readResult) { } | |
|
296 | else { | |
|
297 | assert(0); | |
|
298 | } | |
|
294 | if (-1 == readResult) { | |
|
295 | return NULL; | |
|
296 | } | |
|
297 | else if (0 == readResult) { | |
|
298 | } | |
|
299 | else if (1 == readResult) { | |
|
300 | } | |
|
301 | else { | |
|
302 | assert(0); | |
|
303 | } | |
|
299 | 304 | |
|
300 |
|
|
|
301 |
|
|
|
302 | } | |
|
305 | if (self->input.size) { | |
|
306 | goto readinput; | |
|
307 | } | |
|
303 | 308 | |
|
304 |
|
|
|
305 |
|
|
|
309 | /* Else EOF */ | |
|
310 | oldPos = self->output.pos; | |
|
306 | 311 | |
|
307 |
|
|
|
308 | &self->input, ZSTD_e_end); | |
|
312 | zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output, | |
|
313 | &self->input, ZSTD_e_end); | |
|
309 | 314 | |
|
310 |
|
|
|
315 | self->bytesCompressed += self->output.pos - oldPos; | |
|
311 | 316 | |
|
312 |
|
|
|
313 |
|
|
|
314 |
|
|
|
315 |
|
|
|
316 |
|
|
|
317 | } | |
|
317 | if (ZSTD_isError(zresult)) { | |
|
318 | PyErr_Format(ZstdError, "error ending compression stream: %s", | |
|
319 | ZSTD_getErrorName(zresult)); | |
|
320 | Py_XDECREF(result); | |
|
321 | return NULL; | |
|
322 | } | |
|
318 | 323 | |
|
319 |
|
|
|
324 | assert(self->output.pos); | |
|
320 | 325 | |
|
321 |
|
|
|
322 |
|
|
|
323 | } | |
|
326 | if (0 == zresult) { | |
|
327 | self->finishedOutput = 1; | |
|
328 | } | |
|
324 | 329 | |
|
325 |
|
|
|
326 |
|
|
|
327 |
|
|
|
328 | } | |
|
330 | if (safe_pybytes_resize(&result, self->output.pos)) { | |
|
331 | Py_XDECREF(result); | |
|
332 | return NULL; | |
|
333 | } | |
|
329 | 334 | |
|
330 |
|
|
|
335 | memset(&self->output, 0, sizeof(self->output)); | |
|
331 | 336 | |
|
332 |
|
|
|
337 | return result; | |
|
333 | 338 | } |
|
334 | 339 | |
|
335 |
static PyObject* |
|
|
336 | static char* kwlist[] = { | |
|
337 | "size", | |
|
338 | NULL | |
|
339 | }; | |
|
340 | static PyObject *compressionreader_read1(ZstdCompressionReader *self, | |
|
341 | PyObject *args, PyObject *kwargs) { | |
|
342 | static char *kwlist[] = {"size", NULL}; | |
|
343 | ||
|
344 | Py_ssize_t size = -1; | |
|
345 | PyObject *result = NULL; | |
|
346 | char *resultBuffer; | |
|
347 | Py_ssize_t resultSize; | |
|
348 | ZSTD_outBuffer output; | |
|
349 | int compressResult; | |
|
350 | size_t oldPos; | |
|
351 | size_t zresult; | |
|
340 | 352 | |
|
341 | Py_ssize_t size = -1; | |
|
342 | PyObject* result = NULL; | |
|
343 | char* resultBuffer; | |
|
344 | Py_ssize_t resultSize; | |
|
345 | ZSTD_outBuffer output; | |
|
346 | int compressResult; | |
|
347 | size_t oldPos; | |
|
348 | size_t zresult; | |
|
353 | if (self->closed) { | |
|
354 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |
|
355 | return NULL; | |
|
356 | } | |
|
349 | 357 | |
|
350 | if (self->closed) { | |
|
351 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |
|
352 | return NULL; | |
|
353 | } | |
|
358 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n:read1", kwlist, &size)) { | |
|
359 | return NULL; | |
|
360 | } | |
|
354 | 361 | |
|
355 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n:read1", kwlist, &size)) { | |
|
356 | return NULL; | |
|
357 | } | |
|
358 | ||
|
359 | if (size < -1) { | |
|
360 | PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1"); | |
|
361 | return NULL; | |
|
362 | } | |
|
362 | if (size < -1) { | |
|
363 | PyErr_SetString(PyExc_ValueError, | |
|
364 | "cannot read negative amounts less than -1"); | |
|
365 | return NULL; | |
|
366 | } | |
|
363 | 367 | |
|
364 |
|
|
|
365 |
|
|
|
366 | } | |
|
368 | if (self->finishedOutput || size == 0) { | |
|
369 | return PyBytes_FromStringAndSize("", 0); | |
|
370 | } | |
|
367 | 371 | |
|
368 |
|
|
|
369 |
|
|
|
370 | } | |
|
372 | if (size == -1) { | |
|
373 | size = ZSTD_CStreamOutSize(); | |
|
374 | } | |
|
371 | 375 | |
|
372 |
|
|
|
373 |
|
|
|
374 |
|
|
|
375 | } | |
|
376 | result = PyBytes_FromStringAndSize(NULL, size); | |
|
377 | if (NULL == result) { | |
|
378 | return NULL; | |
|
379 | } | |
|
376 | 380 | |
|
377 |
|
|
|
381 | PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize); | |
|
378 | 382 | |
|
379 |
|
|
|
380 |
|
|
|
381 |
|
|
|
383 | output.dst = resultBuffer; | |
|
384 | output.size = resultSize; | |
|
385 | output.pos = 0; | |
|
382 | 386 | |
|
383 |
|
|
|
384 |
|
|
|
385 |
|
|
|
386 |
|
|
|
387 |
|
|
|
388 |
|
|
|
389 | */ | |
|
387 | /* read1() is supposed to use at most 1 read() from the underlying stream. | |
|
388 | However, we can't satisfy this requirement with compression because | |
|
389 | not every input will generate output. We /could/ flush the compressor, | |
|
390 | but this may not be desirable. We allow multiple read() from the | |
|
391 | underlying stream. But unlike read(), we return as soon as output data | |
|
392 | is available. | |
|
393 | */ | |
|
390 | 394 | |
|
391 |
|
|
|
395 | compressResult = compress_input(self, &output); | |
|
392 | 396 | |
|
393 |
|
|
|
394 |
|
|
|
395 |
|
|
|
396 | } | |
|
397 |
|
|
|
398 | else { | |
|
399 | assert(0); | |
|
400 | } | |
|
397 | if (-1 == compressResult) { | |
|
398 | Py_XDECREF(result); | |
|
399 | return NULL; | |
|
400 | } | |
|
401 | else if (0 == compressResult || 1 == compressResult) { | |
|
402 | } | |
|
403 | else { | |
|
404 | assert(0); | |
|
405 | } | |
|
401 | 406 | |
|
402 |
|
|
|
403 |
|
|
|
404 | } | |
|
407 | if (output.pos) { | |
|
408 | goto finally; | |
|
409 | } | |
|
405 | 410 | |
|
406 |
|
|
|
407 |
|
|
|
411 | while (!self->finishedInput) { | |
|
412 | int readResult = read_compressor_input(self); | |
|
408 | 413 | |
|
409 |
|
|
|
410 |
|
|
|
411 |
|
|
|
412 | } | |
|
413 |
|
|
|
414 | else { | |
|
415 | assert(0); | |
|
416 | } | |
|
414 | if (-1 == readResult) { | |
|
415 | Py_XDECREF(result); | |
|
416 | return NULL; | |
|
417 | } | |
|
418 | else if (0 == readResult || 1 == readResult) { | |
|
419 | } | |
|
420 | else { | |
|
421 | assert(0); | |
|
422 | } | |
|
417 | 423 | |
|
418 |
|
|
|
424 | compressResult = compress_input(self, &output); | |
|
419 | 425 | |
|
420 |
|
|
|
421 |
|
|
|
422 |
|
|
|
423 | } | |
|
424 |
|
|
|
425 | else { | |
|
426 | assert(0); | |
|
427 | } | |
|
426 | if (-1 == compressResult) { | |
|
427 | Py_XDECREF(result); | |
|
428 | return NULL; | |
|
429 | } | |
|
430 | else if (0 == compressResult || 1 == compressResult) { | |
|
431 | } | |
|
432 | else { | |
|
433 | assert(0); | |
|
434 | } | |
|
428 | 435 | |
|
429 |
|
|
|
430 |
|
|
|
431 | } | |
|
432 | } | |
|
436 | if (output.pos) { | |
|
437 | goto finally; | |
|
438 | } | |
|
439 | } | |
|
433 | 440 | |
|
434 |
|
|
|
435 |
|
|
|
441 | /* EOF */ | |
|
442 | oldPos = output.pos; | |
|
436 | 443 | |
|
437 |
|
|
|
438 | ZSTD_e_end); | |
|
444 | zresult = ZSTD_compressStream2(self->compressor->cctx, &output, | |
|
445 | &self->input, ZSTD_e_end); | |
|
439 | 446 | |
|
440 |
|
|
|
447 | self->bytesCompressed += output.pos - oldPos; | |
|
441 | 448 | |
|
442 |
|
|
|
443 |
|
|
|
444 |
|
|
|
445 |
|
|
|
446 |
|
|
|
447 | } | |
|
449 | if (ZSTD_isError(zresult)) { | |
|
450 | PyErr_Format(ZstdError, "error ending compression stream: %s", | |
|
451 | ZSTD_getErrorName(zresult)); | |
|
452 | Py_XDECREF(result); | |
|
453 | return NULL; | |
|
454 | } | |
|
448 | 455 | |
|
449 |
|
|
|
450 |
|
|
|
451 | } | |
|
456 | if (zresult == 0) { | |
|
457 | self->finishedOutput = 1; | |
|
458 | } | |
|
452 | 459 | |
|
453 | 460 | finally: |
|
454 |
|
|
|
455 |
|
|
|
456 |
|
|
|
457 |
|
|
|
458 | } | |
|
459 | } | |
|
461 | if (result) { | |
|
462 | if (safe_pybytes_resize(&result, output.pos)) { | |
|
463 | Py_XDECREF(result); | |
|
464 | return NULL; | |
|
465 | } | |
|
466 | } | |
|
460 | 467 | |
|
461 |
|
|
|
468 | return result; | |
|
462 | 469 | } |
|
463 | 470 | |
|
464 |
static PyObject* |
|
|
465 |
|
|
|
466 |
|
|
|
467 |
|
|
|
471 | static PyObject *compressionreader_readall(PyObject *self) { | |
|
472 | PyObject *chunks = NULL; | |
|
473 | PyObject *empty = NULL; | |
|
474 | PyObject *result = NULL; | |
|
468 | 475 | |
|
469 |
|
|
|
470 |
|
|
|
471 |
|
|
|
472 |
|
|
|
473 | */ | |
|
474 |
|
|
|
475 |
|
|
|
476 |
|
|
|
477 | } | |
|
476 | /* Our strategy is to collect chunks into a list then join all the | |
|
477 | * chunks at the end. We could potentially use e.g. an io.BytesIO. But | |
|
478 | * this feels simple enough to implement and avoids potentially expensive | |
|
479 | * reallocations of large buffers. | |
|
480 | */ | |
|
481 | chunks = PyList_New(0); | |
|
482 | if (NULL == chunks) { | |
|
483 | return NULL; | |
|
484 | } | |
|
478 | 485 | |
|
479 |
|
|
|
480 |
|
|
|
481 |
|
|
|
482 |
|
|
|
483 |
|
|
|
484 | } | |
|
486 | while (1) { | |
|
487 | PyObject *chunk = PyObject_CallMethod(self, "read", "i", 1048576); | |
|
488 | if (NULL == chunk) { | |
|
489 | Py_DECREF(chunks); | |
|
490 | return NULL; | |
|
491 | } | |
|
485 | 492 | |
|
486 |
|
|
|
487 |
|
|
|
488 | break; | |
|
489 | } | |
|
493 | if (!PyBytes_Size(chunk)) { | |
|
494 | Py_DECREF(chunk); | |
|
495 | break; | |
|
496 | } | |
|
490 | 497 | |
|
491 |
|
|
|
492 |
|
|
|
493 |
|
|
|
494 |
|
|
|
495 | } | |
|
498 | if (PyList_Append(chunks, chunk)) { | |
|
499 | Py_DECREF(chunk); | |
|
500 | Py_DECREF(chunks); | |
|
501 | return NULL; | |
|
502 | } | |
|
496 | 503 | |
|
497 |
|
|
|
498 | } | |
|
504 | Py_DECREF(chunk); | |
|
505 | } | |
|
499 | 506 | |
|
500 |
|
|
|
501 |
|
|
|
502 |
|
|
|
503 |
|
|
|
504 | } | |
|
507 | empty = PyBytes_FromStringAndSize("", 0); | |
|
508 | if (NULL == empty) { | |
|
509 | Py_DECREF(chunks); | |
|
510 | return NULL; | |
|
511 | } | |
|
505 | 512 | |
|
506 |
|
|
|
513 | result = PyObject_CallMethod(empty, "join", "O", chunks); | |
|
507 | 514 | |
|
508 |
|
|
|
509 |
|
|
|
515 | Py_DECREF(empty); | |
|
516 | Py_DECREF(chunks); | |
|
510 | 517 | |
|
511 |
|
|
|
518 | return result; | |
|
512 | 519 | } |
|
513 | 520 | |
|
514 |
static PyObject* |
|
|
515 | Py_buffer dest; | |
|
516 | ZSTD_outBuffer output; | |
|
517 | int readResult, compressResult; | |
|
518 | PyObject* result = NULL; | |
|
519 | size_t zresult; | |
|
520 | size_t oldPos; | |
|
521 | static PyObject *compressionreader_readinto(ZstdCompressionReader *self, | |
|
522 | PyObject *args) { | |
|
523 | Py_buffer dest; | |
|
524 | ZSTD_outBuffer output; | |
|
525 | int readResult, compressResult; | |
|
526 | PyObject *result = NULL; | |
|
527 | size_t zresult; | |
|
528 | size_t oldPos; | |
|
521 | 529 | |
|
522 |
|
|
|
523 |
|
|
|
524 |
|
|
|
525 | } | |
|
530 | if (self->closed) { | |
|
531 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |
|
532 | return NULL; | |
|
533 | } | |
|
526 | 534 | |
|
527 |
|
|
|
528 |
|
|
|
529 | } | |
|
535 | if (self->finishedOutput) { | |
|
536 | return PyLong_FromLong(0); | |
|
537 | } | |
|
530 | 538 | |
|
531 |
|
|
|
532 |
|
|
|
533 | } | |
|
539 | if (!PyArg_ParseTuple(args, "w*:readinto", &dest)) { | |
|
540 | return NULL; | |
|
541 | } | |
|
534 | 542 | |
|
535 | if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) { | |
|
536 | PyErr_SetString(PyExc_ValueError, | |
|
537 | "destination buffer should be contiguous and have at most one dimension"); | |
|
538 | goto finally; | |
|
539 | } | |
|
543 | output.dst = dest.buf; | |
|
544 | output.size = dest.len; | |
|
545 | output.pos = 0; | |
|
540 | 546 | |
|
541 | output.dst = dest.buf; | |
|
542 | output.size = dest.len; | |
|
543 | output.pos = 0; | |
|
544 | ||
|
545 | compressResult = compress_input(self, &output); | |
|
547 | compressResult = compress_input(self, &output); | |
|
546 | 548 | |
|
547 |
|
|
|
548 |
|
|
|
549 | } | |
|
550 |
|
|
|
551 | else if (1 == compressResult) { | |
|
552 | result = PyLong_FromSize_t(output.pos); | |
|
553 | goto finally; | |
|
554 | } | |
|
555 | else { | |
|
556 | assert(0); | |
|
557 | } | |
|
549 | if (-1 == compressResult) { | |
|
550 | goto finally; | |
|
551 | } | |
|
552 | else if (0 == compressResult) { | |
|
553 | } | |
|
554 | else if (1 == compressResult) { | |
|
555 | result = PyLong_FromSize_t(output.pos); | |
|
556 | goto finally; | |
|
557 | } | |
|
558 | else { | |
|
559 | assert(0); | |
|
560 | } | |
|
561 | ||
|
562 | while (!self->finishedInput) { | |
|
563 | readResult = read_compressor_input(self); | |
|
558 | 564 | |
|
559 | while (!self->finishedInput) { | |
|
560 | readResult = read_compressor_input(self); | |
|
565 | if (-1 == readResult) { | |
|
566 | goto finally; | |
|
567 | } | |
|
568 | else if (0 == readResult || 1 == readResult) { | |
|
569 | } | |
|
570 | else { | |
|
571 | assert(0); | |
|
572 | } | |
|
561 | 573 | |
|
562 | if (-1 == readResult) { | |
|
563 | goto finally; | |
|
564 | } | |
|
565 | else if (0 == readResult || 1 == readResult) {} | |
|
566 | else { | |
|
567 | assert(0); | |
|
568 | } | |
|
569 | ||
|
570 | compressResult = compress_input(self, &output); | |
|
574 | compressResult = compress_input(self, &output); | |
|
571 | 575 | |
|
572 |
|
|
|
573 |
|
|
|
574 | } | |
|
575 |
|
|
|
576 | else if (1 == compressResult) { | |
|
577 | result = PyLong_FromSize_t(output.pos); | |
|
578 | goto finally; | |
|
579 | } | |
|
580 | else { | |
|
581 | assert(0); | |
|
582 | } | |
|
583 | } | |
|
576 | if (-1 == compressResult) { | |
|
577 | goto finally; | |
|
578 | } | |
|
579 | else if (0 == compressResult) { | |
|
580 | } | |
|
581 | else if (1 == compressResult) { | |
|
582 | result = PyLong_FromSize_t(output.pos); | |
|
583 | goto finally; | |
|
584 | } | |
|
585 | else { | |
|
586 | assert(0); | |
|
587 | } | |
|
588 | } | |
|
584 | 589 | |
|
585 |
|
|
|
586 |
|
|
|
590 | /* EOF */ | |
|
591 | oldPos = output.pos; | |
|
587 | 592 | |
|
588 |
|
|
|
589 | ZSTD_e_end); | |
|
593 | zresult = ZSTD_compressStream2(self->compressor->cctx, &output, | |
|
594 | &self->input, ZSTD_e_end); | |
|
590 | 595 | |
|
591 |
|
|
|
596 | self->bytesCompressed += self->output.pos - oldPos; | |
|
592 | 597 | |
|
593 |
|
|
|
594 |
|
|
|
595 |
|
|
|
596 |
|
|
|
597 | } | |
|
598 | if (ZSTD_isError(zresult)) { | |
|
599 | PyErr_Format(ZstdError, "error ending compression stream: %s", | |
|
600 | ZSTD_getErrorName(zresult)); | |
|
601 | goto finally; | |
|
602 | } | |
|
598 | 603 | |
|
599 |
|
|
|
604 | assert(output.pos); | |
|
600 | 605 | |
|
601 |
|
|
|
602 |
|
|
|
603 | } | |
|
606 | if (0 == zresult) { | |
|
607 | self->finishedOutput = 1; | |
|
608 | } | |
|
604 | 609 | |
|
605 |
|
|
|
610 | result = PyLong_FromSize_t(output.pos); | |
|
606 | 611 | |
|
607 | 612 | finally: |
|
608 |
|
|
|
613 | PyBuffer_Release(&dest); | |
|
609 | 614 | |
|
610 |
|
|
|
615 | return result; | |
|
611 | 616 | } |
|
612 | 617 | |
|
613 |
static PyObject* |
|
|
614 | Py_buffer dest; | |
|
615 | PyObject* result = NULL; | |
|
616 | ZSTD_outBuffer output; | |
|
617 | int compressResult; | |
|
618 | size_t oldPos; | |
|
619 | size_t zresult; | |
|
618 | static PyObject *compressionreader_readinto1(ZstdCompressionReader *self, | |
|
619 | PyObject *args) { | |
|
620 | Py_buffer dest; | |
|
621 | PyObject *result = NULL; | |
|
622 | ZSTD_outBuffer output; | |
|
623 | int compressResult; | |
|
624 | size_t oldPos; | |
|
625 | size_t zresult; | |
|
620 | 626 | |
|
621 |
|
|
|
622 |
|
|
|
623 |
|
|
|
624 | } | |
|
627 | if (self->closed) { | |
|
628 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |
|
629 | return NULL; | |
|
630 | } | |
|
625 | 631 | |
|
626 |
|
|
|
627 |
|
|
|
628 | } | |
|
632 | if (self->finishedOutput) { | |
|
633 | return PyLong_FromLong(0); | |
|
634 | } | |
|
629 | 635 | |
|
630 |
|
|
|
631 |
|
|
|
632 | } | |
|
636 | if (!PyArg_ParseTuple(args, "w*:readinto1", &dest)) { | |
|
637 | return NULL; | |
|
638 | } | |
|
633 | 639 | |
|
634 | if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) { | |
|
635 | PyErr_SetString(PyExc_ValueError, | |
|
636 | "destination buffer should be contiguous and have at most one dimension"); | |
|
637 | goto finally; | |
|
638 | } | |
|
640 | output.dst = dest.buf; | |
|
641 | output.size = dest.len; | |
|
642 | output.pos = 0; | |
|
639 | 643 | |
|
640 | output.dst = dest.buf; | |
|
641 | output.size = dest.len; | |
|
642 | output.pos = 0; | |
|
643 | ||
|
644 | compressResult = compress_input(self, &output); | |
|
644 | compressResult = compress_input(self, &output); | |
|
645 | 645 | |
|
646 |
|
|
|
647 |
|
|
|
648 | } | |
|
649 |
|
|
|
650 | else { | |
|
651 | assert(0); | |
|
652 | } | |
|
646 | if (-1 == compressResult) { | |
|
647 | goto finally; | |
|
648 | } | |
|
649 | else if (0 == compressResult || 1 == compressResult) { | |
|
650 | } | |
|
651 | else { | |
|
652 | assert(0); | |
|
653 | } | |
|
653 | 654 | |
|
654 |
|
|
|
655 |
|
|
|
656 |
|
|
|
657 | } | |
|
655 | if (output.pos) { | |
|
656 | result = PyLong_FromSize_t(output.pos); | |
|
657 | goto finally; | |
|
658 | } | |
|
658 | 659 | |
|
659 |
|
|
|
660 |
|
|
|
660 | while (!self->finishedInput) { | |
|
661 | int readResult = read_compressor_input(self); | |
|
661 | 662 | |
|
662 |
|
|
|
663 |
|
|
|
664 | } | |
|
665 |
|
|
|
666 | else { | |
|
667 | assert(0); | |
|
668 | } | |
|
663 | if (-1 == readResult) { | |
|
664 | goto finally; | |
|
665 | } | |
|
666 | else if (0 == readResult || 1 == readResult) { | |
|
667 | } | |
|
668 | else { | |
|
669 | assert(0); | |
|
670 | } | |
|
669 | 671 | |
|
670 |
|
|
|
672 | compressResult = compress_input(self, &output); | |
|
671 | 673 | |
|
672 |
|
|
|
673 |
|
|
|
674 | } | |
|
675 |
|
|
|
676 | else if (1 == compressResult) { | |
|
677 | result = PyLong_FromSize_t(output.pos); | |
|
678 | goto finally; | |
|
679 | } | |
|
680 | else { | |
|
681 | assert(0); | |
|
682 | } | |
|
674 | if (-1 == compressResult) { | |
|
675 | goto finally; | |
|
676 | } | |
|
677 | else if (0 == compressResult) { | |
|
678 | } | |
|
679 | else if (1 == compressResult) { | |
|
680 | result = PyLong_FromSize_t(output.pos); | |
|
681 | goto finally; | |
|
682 | } | |
|
683 | else { | |
|
684 | assert(0); | |
|
685 | } | |
|
683 | 686 | |
|
684 |
|
|
|
685 |
|
|
|
686 | */ | |
|
687 |
|
|
|
688 |
|
|
|
689 |
|
|
|
690 | } | |
|
687 | /* If we produced output and we're not done with input, emit | |
|
688 | * that output now, as we've hit restrictions of read1(). | |
|
689 | */ | |
|
690 | if (output.pos && !self->finishedInput) { | |
|
691 | result = PyLong_FromSize_t(output.pos); | |
|
692 | goto finally; | |
|
693 | } | |
|
691 | 694 | |
|
692 |
|
|
|
693 |
|
|
|
694 |
|
|
|
695 | } | |
|
695 | /* Otherwise we either have no output or we've exhausted the | |
|
696 | * input. Either we try to get more input or we fall through | |
|
697 | * to EOF below */ | |
|
698 | } | |
|
696 | 699 | |
|
697 |
|
|
|
698 |
|
|
|
700 | /* EOF */ | |
|
701 | oldPos = output.pos; | |
|
699 | 702 | |
|
700 |
|
|
|
701 | ZSTD_e_end); | |
|
703 | zresult = ZSTD_compressStream2(self->compressor->cctx, &output, | |
|
704 | &self->input, ZSTD_e_end); | |
|
702 | 705 | |
|
703 |
|
|
|
706 | self->bytesCompressed += self->output.pos - oldPos; | |
|
704 | 707 | |
|
705 |
|
|
|
706 |
|
|
|
707 |
|
|
|
708 |
|
|
|
709 | } | |
|
708 | if (ZSTD_isError(zresult)) { | |
|
709 | PyErr_Format(ZstdError, "error ending compression stream: %s", | |
|
710 | ZSTD_getErrorName(zresult)); | |
|
711 | goto finally; | |
|
712 | } | |
|
710 | 713 | |
|
711 |
|
|
|
714 | assert(output.pos); | |
|
712 | 715 | |
|
713 |
|
|
|
714 |
|
|
|
715 | } | |
|
716 | if (0 == zresult) { | |
|
717 | self->finishedOutput = 1; | |
|
718 | } | |
|
716 | 719 | |
|
717 |
|
|
|
720 | result = PyLong_FromSize_t(output.pos); | |
|
718 | 721 | |
|
719 | 722 | finally: |
|
720 |
|
|
|
723 | PyBuffer_Release(&dest); | |
|
721 | 724 | |
|
722 |
|
|
|
725 | return result; | |
|
723 | 726 | } |
|
724 | 727 | |
|
725 |
static PyObject* |
|
|
726 |
|
|
|
727 |
|
|
|
728 | static PyObject *compressionreader_iter(PyObject *self) { | |
|
729 | set_io_unsupported_operation(); | |
|
730 | return NULL; | |
|
728 | 731 | } |
|
729 | 732 | |
|
730 |
static PyObject* |
|
|
731 |
|
|
|
732 |
|
|
|
733 | static PyObject *compressionreader_iternext(PyObject *self) { | |
|
734 | set_io_unsupported_operation(); | |
|
735 | return NULL; | |
|
733 | 736 | } |
|
734 | 737 | |
|
735 | static PyMethodDef reader_methods[] = { | |
|
736 |
|
|
|
737 |
|
|
|
738 |
|
|
|
739 |
|
|
|
740 |
|
|
|
741 |
|
|
|
742 |
|
|
|
743 | { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") }, | |
|
744 |
|
|
|
745 |
|
|
|
746 | { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS, PyDoc_STR("read compressed data") }, | |
|
747 | { "read1", (PyCFunction)reader_read1, METH_VARARGS | METH_KEYWORDS, NULL }, | |
|
748 | { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") }, | |
|
749 | { "readinto", (PyCFunction)reader_readinto, METH_VARARGS, NULL }, | |
|
750 |
|
|
|
751 | { "readline", (PyCFunction)reader_readline, METH_VARARGS, PyDoc_STR("Not implemented") }, | |
|
752 | { "readlines", (PyCFunction)reader_readlines, METH_VARARGS, PyDoc_STR("Not implemented") }, | |
|
753 | { "seekable", (PyCFunction)reader_seekable, METH_NOARGS, | |
|
754 | PyDoc_STR("Returns False") }, | |
|
755 |
|
|
|
756 | PyDoc_STR("Returns current number of bytes compressed") }, | |
|
757 | { "writable", (PyCFunction)reader_writable, METH_NOARGS, | |
|
758 | PyDoc_STR("Returns False") }, | |
|
759 | { "write", reader_write, METH_VARARGS, PyDoc_STR("Raises OSError") }, | |
|
760 | { "writelines", reader_writelines, METH_VARARGS, PyDoc_STR("Not implemented") }, | |
|
761 | { NULL, NULL } | |
|
762 | }; | |
|
738 | static PyMethodDef compressionreader_methods[] = { | |
|
739 | {"__enter__", (PyCFunction)compressionreader_enter, METH_NOARGS, | |
|
740 | PyDoc_STR("Enter a compression context")}, | |
|
741 | {"__exit__", (PyCFunction)compressionreader_exit, METH_VARARGS, | |
|
742 | PyDoc_STR("Exit a compression context")}, | |
|
743 | {"close", (PyCFunction)compressionreader_close, METH_NOARGS, | |
|
744 | PyDoc_STR("Close the stream so it cannot perform any more operations")}, | |
|
745 | {"flush", (PyCFunction)compressionreader_flush, METH_NOARGS, | |
|
746 | PyDoc_STR("no-ops")}, | |
|
747 | {"isatty", (PyCFunction)compressionreader_isatty, METH_NOARGS, | |
|
748 | PyDoc_STR("Returns False")}, | |
|
749 | {"readable", (PyCFunction)compressionreader_readable, METH_NOARGS, | |
|
750 | PyDoc_STR("Returns True")}, | |
|
751 | {"read", (PyCFunction)compressionreader_read, METH_VARARGS | METH_KEYWORDS, | |
|
752 | PyDoc_STR("read compressed data")}, | |
|
753 | {"read1", (PyCFunction)compressionreader_read1, | |
|
754 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
755 | {"readall", (PyCFunction)compressionreader_readall, METH_NOARGS, | |
|
756 | PyDoc_STR("Not implemented")}, | |
|
757 | {"readinto", (PyCFunction)compressionreader_readinto, METH_VARARGS, NULL}, | |
|
758 | {"readinto1", (PyCFunction)compressionreader_readinto1, METH_VARARGS, NULL}, | |
|
759 | {"readline", (PyCFunction)compressionreader_readline, METH_VARARGS, | |
|
760 | PyDoc_STR("Not implemented")}, | |
|
761 | {"readlines", (PyCFunction)compressionreader_readlines, METH_VARARGS, | |
|
762 | PyDoc_STR("Not implemented")}, | |
|
763 | {"seekable", (PyCFunction)compressionreader_seekable, METH_NOARGS, | |
|
764 | PyDoc_STR("Returns False")}, | |
|
765 | {"tell", (PyCFunction)compressionreader_tell, METH_NOARGS, | |
|
766 | PyDoc_STR("Returns current number of bytes compressed")}, | |
|
767 | {"writable", (PyCFunction)compressionreader_writable, METH_NOARGS, | |
|
768 | PyDoc_STR("Returns False")}, | |
|
769 | {"write", compressionreader_write, METH_VARARGS, | |
|
770 | PyDoc_STR("Raises OSError")}, | |
|
771 | {"writelines", compressionreader_writelines, METH_VARARGS, | |
|
772 | PyDoc_STR("Not implemented")}, | |
|
773 | {NULL, NULL}}; | |
|
763 | 774 | |
|
764 | static PyMemberDef reader_members[] = { | |
|
765 |
|
|
|
766 |
|
|
|
767 | { NULL } | |
|
775 | static PyMemberDef compressionreader_members[] = { | |
|
776 | {"closed", T_BOOL, offsetof(ZstdCompressionReader, closed), READONLY, | |
|
777 | "whether stream is closed"}, | |
|
778 | {NULL}}; | |
|
779 | ||
|
780 | PyType_Slot ZstdCompressionReaderSlots[] = { | |
|
781 | {Py_tp_dealloc, compressionreader_dealloc}, | |
|
782 | {Py_tp_iter, compressionreader_iter}, | |
|
783 | {Py_tp_iternext, compressionreader_iternext}, | |
|
784 | {Py_tp_methods, compressionreader_methods}, | |
|
785 | {Py_tp_members, compressionreader_members}, | |
|
786 | {Py_tp_new, PyType_GenericNew}, | |
|
787 | {0, NULL}, | |
|
768 | 788 | }; |
|
769 | 789 | |
|
770 |
PyType |
|
|
771 | PyVarObject_HEAD_INIT(NULL, 0) | |
|
772 | "zstd.ZstdCompressionReader", /* tp_name */ | |
|
773 | sizeof(ZstdCompressionReader), /* tp_basicsize */ | |
|
774 | 0, /* tp_itemsize */ | |
|
775 | (destructor)reader_dealloc, /* tp_dealloc */ | |
|
776 | 0, /* tp_print */ | |
|
777 | 0, /* tp_getattr */ | |
|
778 | 0, /* tp_setattr */ | |
|
779 | 0, /* tp_compare */ | |
|
780 | 0, /* tp_repr */ | |
|
781 | 0, /* tp_as_number */ | |
|
782 | 0, /* tp_as_sequence */ | |
|
783 | 0, /* tp_as_mapping */ | |
|
784 | 0, /* tp_hash */ | |
|
785 | 0, /* tp_call */ | |
|
786 | 0, /* tp_str */ | |
|
787 | 0, /* tp_getattro */ | |
|
788 | 0, /* tp_setattro */ | |
|
789 | 0, /* tp_as_buffer */ | |
|
790 | Py_TPFLAGS_DEFAULT, /* tp_flags */ | |
|
791 | 0, /* tp_doc */ | |
|
792 | 0, /* tp_traverse */ | |
|
793 | 0, /* tp_clear */ | |
|
794 | 0, /* tp_richcompare */ | |
|
795 | 0, /* tp_weaklistoffset */ | |
|
796 | reader_iter, /* tp_iter */ | |
|
797 | reader_iternext, /* tp_iternext */ | |
|
798 | reader_methods, /* tp_methods */ | |
|
799 | reader_members, /* tp_members */ | |
|
800 | 0, /* tp_getset */ | |
|
801 | 0, /* tp_base */ | |
|
802 | 0, /* tp_dict */ | |
|
803 | 0, /* tp_descr_get */ | |
|
804 | 0, /* tp_descr_set */ | |
|
805 | 0, /* tp_dictoffset */ | |
|
806 | 0, /* tp_init */ | |
|
807 | 0, /* tp_alloc */ | |
|
808 | PyType_GenericNew, /* tp_new */ | |
|
790 | PyType_Spec ZstdCompressionReaderSpec = { | |
|
791 | "zstd.ZstdCompressionReader", | |
|
792 | sizeof(ZstdCompressionReader), | |
|
793 | 0, | |
|
794 | Py_TPFLAGS_DEFAULT, | |
|
795 | ZstdCompressionReaderSlots, | |
|
809 | 796 | }; |
|
810 | 797 | |
|
811 | void compressionreader_module_init(PyObject* mod) { | |
|
812 | /* TODO make reader a sub-class of io.RawIOBase */ | |
|
798 | PyTypeObject *ZstdCompressionReaderType; | |
|
799 | ||
|
800 | void compressionreader_module_init(PyObject *mod) { | |
|
801 | /* TODO make reader a sub-class of io.RawIOBase */ | |
|
813 | 802 | |
|
814 |
|
|
|
815 |
|
|
|
816 | return; | |
|
817 | } | |
|
803 | ZstdCompressionReaderType = | |
|
804 | (PyTypeObject *)PyType_FromSpec(&ZstdCompressionReaderSpec); | |
|
805 | if (PyType_Ready(ZstdCompressionReaderType) < 0) { | |
|
806 | return; | |
|
807 | } | |
|
808 | ||
|
809 | Py_INCREF((PyObject *)ZstdCompressionReaderType); | |
|
810 | PyModule_AddObject(mod, "ZstdCompressionReader", | |
|
811 | (PyObject *)ZstdCompressionReaderType); | |
|
818 | 812 | } |
This diff has been collapsed as it changes many lines, (575 lines changed) Show them Hide them | |||
@@ -1,372 +1,353 | |||
|
1 | 1 | /** |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
8 | 8 | |
|
9 | 9 | #include "python-zstandard.h" |
|
10 | 10 | |
|
11 |
extern PyObject* |
|
|
12 | ||
|
13 | PyDoc_STRVAR(ZstdCompresssionWriter__doc__, | |
|
14 | """A context manager used for writing compressed output to a writer.\n" | |
|
15 | ); | |
|
11 | extern PyObject *ZstdError; | |
|
16 | 12 | |
|
17 |
static void ZstdCompressionWriter_dealloc(ZstdCompressionWriter* |
|
|
18 |
|
|
|
19 |
|
|
|
13 | static void ZstdCompressionWriter_dealloc(ZstdCompressionWriter *self) { | |
|
14 | Py_XDECREF(self->compressor); | |
|
15 | Py_XDECREF(self->writer); | |
|
20 | 16 | |
|
21 |
|
|
|
22 |
|
|
|
17 | PyMem_Free(self->output.dst); | |
|
18 | self->output.dst = NULL; | |
|
23 | 19 | |
|
24 |
|
|
|
20 | PyObject_Del(self); | |
|
25 | 21 | } |
|
26 | 22 | |
|
27 |
static PyObject* |
|
|
28 |
|
|
|
29 |
|
|
|
30 |
|
|
|
31 | } | |
|
23 | static PyObject *ZstdCompressionWriter_enter(ZstdCompressionWriter *self) { | |
|
24 | if (self->closed) { | |
|
25 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |
|
26 | return NULL; | |
|
27 | } | |
|
32 | 28 | |
|
33 |
|
|
|
34 |
|
|
|
35 |
|
|
|
36 | } | |
|
29 | if (self->entered) { | |
|
30 | PyErr_SetString(ZstdError, "cannot __enter__ multiple times"); | |
|
31 | return NULL; | |
|
32 | } | |
|
37 | 33 | |
|
38 |
|
|
|
34 | self->entered = 1; | |
|
39 | 35 | |
|
40 |
|
|
|
41 |
|
|
|
36 | Py_INCREF(self); | |
|
37 | return (PyObject *)self; | |
|
42 | 38 | } |
|
43 | 39 | |
|
44 |
static PyObject* |
|
|
45 | PyObject* exc_type; | |
|
46 |
|
|
|
47 |
|
|
|
48 | ||
|
49 | if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) { | |
|
50 | return NULL; | |
|
51 | } | |
|
40 | static PyObject *ZstdCompressionWriter_exit(ZstdCompressionWriter *self, | |
|
41 | PyObject *args) { | |
|
42 | PyObject *exc_type; | |
|
43 | PyObject *exc_value; | |
|
44 | PyObject *exc_tb; | |
|
45 | PyObject *result; | |
|
52 | 46 | |
|
53 | self->entered = 0; | |
|
47 | if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, | |
|
48 | &exc_tb)) { | |
|
49 | return NULL; | |
|
50 | } | |
|
54 | 51 | |
|
55 | if (exc_type == Py_None && exc_value == Py_None && exc_tb == Py_None) { | |
|
56 | PyObject* result = PyObject_CallMethod((PyObject*)self, "close", NULL); | |
|
52 | self->entered = 0; | |
|
57 | 53 | |
|
58 | if (NULL == result) { | |
|
59 | return NULL; | |
|
60 | } | |
|
61 | } | |
|
54 | result = PyObject_CallMethod((PyObject *)self, "close", NULL); | |
|
62 | 55 | |
|
63 | Py_RETURN_FALSE; | |
|
56 | if (NULL == result) { | |
|
57 | return NULL; | |
|
58 | } | |
|
59 | ||
|
60 | Py_RETURN_FALSE; | |
|
64 | 61 | } |
|
65 | 62 | |
|
66 | static PyObject* ZstdCompressionWriter_memory_size(ZstdCompressionWriter* self) { | |
|
67 | return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->compressor->cctx)); | |
|
63 | static PyObject * | |
|
64 | ZstdCompressionWriter_memory_size(ZstdCompressionWriter *self) { | |
|
65 | return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->compressor->cctx)); | |
|
68 | 66 | } |
|
69 | 67 | |
|
70 |
static PyObject* |
|
|
71 | static char* kwlist[] = { | |
|
72 | "data", | |
|
73 | NULL | |
|
74 | }; | |
|
68 | static PyObject *ZstdCompressionWriter_write(ZstdCompressionWriter *self, | |
|
69 | PyObject *args, PyObject *kwargs) { | |
|
70 | static char *kwlist[] = {"data", NULL}; | |
|
75 | 71 | |
|
76 |
|
|
|
77 |
|
|
|
78 |
|
|
|
79 |
|
|
|
80 |
|
|
|
81 |
|
|
|
72 | PyObject *result = NULL; | |
|
73 | Py_buffer source; | |
|
74 | size_t zresult; | |
|
75 | ZSTD_inBuffer input; | |
|
76 | PyObject *res; | |
|
77 | Py_ssize_t totalWrite = 0; | |
|
82 | 78 | |
|
83 | #if PY_MAJOR_VERSION >= 3 | |
|
84 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write", | |
|
85 | #else | |
|
86 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:write", | |
|
87 | #endif | |
|
88 | kwlist, &source)) { | |
|
89 | return NULL; | |
|
90 | } | |
|
79 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write", kwlist, | |
|
80 | &source)) { | |
|
81 | return NULL; | |
|
82 | } | |
|
91 | 83 | |
|
92 | if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { | |
|
93 |
|
|
|
94 | "data buffer should be contiguous and have at most one dimension"); | |
|
95 | goto finally; | |
|
96 | } | |
|
84 | if (self->closed) { | |
|
85 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |
|
86 | return NULL; | |
|
87 | } | |
|
97 | 88 | |
|
98 | if (self->closed) { | |
|
99 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |
|
100 | return NULL; | |
|
101 | } | |
|
89 | self->output.pos = 0; | |
|
102 | 90 | |
|
103 | self->output.pos = 0; | |
|
91 | input.src = source.buf; | |
|
92 | input.size = source.len; | |
|
93 | input.pos = 0; | |
|
104 | 94 | |
|
105 | input.src = source.buf; | |
|
106 | input.size = source.len; | |
|
107 | input.pos = 0; | |
|
95 | while (input.pos < (size_t)source.len) { | |
|
96 | Py_BEGIN_ALLOW_THREADS zresult = ZSTD_compressStream2( | |
|
97 | self->compressor->cctx, &self->output, &input, ZSTD_e_continue); | |
|
98 | Py_END_ALLOW_THREADS | |
|
108 | 99 | |
|
109 | while (input.pos < (size_t)source.len) { | |
|
110 | Py_BEGIN_ALLOW_THREADS | |
|
111 | zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output, &input, ZSTD_e_continue); | |
|
112 | Py_END_ALLOW_THREADS | |
|
113 | ||
|
114 | if (ZSTD_isError(zresult)) { | |
|
115 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); | |
|
116 | goto finally; | |
|
117 | } | |
|
100 | if (ZSTD_isError(zresult)) { | |
|
101 | PyErr_Format(ZstdError, "zstd compress error: %s", | |
|
102 | ZSTD_getErrorName(zresult)); | |
|
103 | goto finally; | |
|
104 | } | |
|
118 | 105 | |
|
119 |
|
|
|
120 |
|
|
|
121 | #if PY_MAJOR_VERSION >= 3 | |
|
122 | res = PyObject_CallMethod(self->writer, "write", "y#", | |
|
123 | #else | |
|
124 | res = PyObject_CallMethod(self->writer, "write", "s#", | |
|
125 | #endif | |
|
126 | self->output.dst, self->output.pos); | |
|
127 | Py_XDECREF(res); | |
|
128 | totalWrite += self->output.pos; | |
|
129 | self->bytesCompressed += self->output.pos; | |
|
130 | } | |
|
131 | self->output.pos = 0; | |
|
132 | } | |
|
106 | /* Copy data from output buffer to writer. */ | |
|
107 | if (self->output.pos) { | |
|
108 | res = PyObject_CallMethod(self->writer, "write", "y#", | |
|
109 | self->output.dst, self->output.pos); | |
|
110 | if (NULL == res) { | |
|
111 | goto finally; | |
|
112 | } | |
|
113 | Py_XDECREF(res); | |
|
114 | totalWrite += self->output.pos; | |
|
115 | self->bytesCompressed += self->output.pos; | |
|
116 | } | |
|
117 | self->output.pos = 0; | |
|
118 | } | |
|
133 | 119 | |
|
134 |
|
|
|
135 |
|
|
|
136 | } | |
|
137 |
|
|
|
138 |
|
|
|
139 | } | |
|
120 | if (self->writeReturnRead) { | |
|
121 | result = PyLong_FromSize_t(input.pos); | |
|
122 | } | |
|
123 | else { | |
|
124 | result = PyLong_FromSsize_t(totalWrite); | |
|
125 | } | |
|
140 | 126 | |
|
141 | 127 | finally: |
|
142 |
|
|
|
143 |
|
|
|
128 | PyBuffer_Release(&source); | |
|
129 | return result; | |
|
144 | 130 | } |
|
145 | 131 | |
|
146 |
static PyObject* |
|
|
147 | static char* kwlist[] = { | |
|
148 | "flush_mode", | |
|
149 | NULL | |
|
150 | }; | |
|
132 | static PyObject *ZstdCompressionWriter_flush(ZstdCompressionWriter *self, | |
|
133 | PyObject *args, PyObject *kwargs) { | |
|
134 | static char *kwlist[] = {"flush_mode", NULL}; | |
|
151 | 135 | |
|
152 |
|
|
|
153 |
|
|
|
154 |
|
|
|
155 |
|
|
|
156 |
|
|
|
157 |
|
|
|
136 | size_t zresult; | |
|
137 | ZSTD_inBuffer input; | |
|
138 | PyObject *res; | |
|
139 | Py_ssize_t totalWrite = 0; | |
|
140 | unsigned flush_mode = 0; | |
|
141 | ZSTD_EndDirective flush; | |
|
142 | ||
|
143 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|I:flush", kwlist, | |
|
144 | &flush_mode)) { | |
|
145 | return NULL; | |
|
146 | } | |
|
158 | 147 | |
|
159 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|I:flush", | |
|
160 | kwlist, &flush_mode)) { | |
|
161 | return NULL; | |
|
162 | } | |
|
148 | switch (flush_mode) { | |
|
149 | case 0: | |
|
150 | flush = ZSTD_e_flush; | |
|
151 | break; | |
|
152 | case 1: | |
|
153 | flush = ZSTD_e_end; | |
|
154 | break; | |
|
155 | default: | |
|
156 | PyErr_Format(PyExc_ValueError, "unknown flush_mode: %d", flush_mode); | |
|
157 | return NULL; | |
|
158 | } | |
|
163 | 159 | |
|
164 | switch (flush_mode) { | |
|
165 | case 0: | |
|
166 | flush = ZSTD_e_flush; | |
|
167 | break; | |
|
168 | case 1: | |
|
169 | flush = ZSTD_e_end; | |
|
170 | break; | |
|
171 | default: | |
|
172 | PyErr_Format(PyExc_ValueError, "unknown flush_mode: %d", flush_mode); | |
|
173 | return NULL; | |
|
174 | } | |
|
160 | if (self->closed) { | |
|
161 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |
|
162 | return NULL; | |
|
163 | } | |
|
175 | 164 | |
|
176 | if (self->closed) { | |
|
177 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |
|
178 | return NULL; | |
|
179 | } | |
|
165 | self->output.pos = 0; | |
|
166 | ||
|
167 | input.src = NULL; | |
|
168 | input.size = 0; | |
|
169 | input.pos = 0; | |
|
180 | 170 | |
|
181 | self->output.pos = 0; | |
|
182 | ||
|
183 | input.src = NULL; | |
|
184 | input.size = 0; | |
|
185 | input.pos = 0; | |
|
171 | while (1) { | |
|
172 | Py_BEGIN_ALLOW_THREADS zresult = ZSTD_compressStream2( | |
|
173 | self->compressor->cctx, &self->output, &input, flush); | |
|
174 | Py_END_ALLOW_THREADS | |
|
186 | 175 | |
|
187 | while (1) { | |
|
188 | Py_BEGIN_ALLOW_THREADS | |
|
189 | zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output, &input, flush); | |
|
190 | Py_END_ALLOW_THREADS | |
|
191 | ||
|
192 | if (ZSTD_isError(zresult)) { | |
|
193 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); | |
|
194 | return NULL; | |
|
195 | } | |
|
176 | if (ZSTD_isError(zresult)) { | |
|
177 | PyErr_Format(ZstdError, "zstd compress error: %s", | |
|
178 | ZSTD_getErrorName(zresult)); | |
|
179 | return NULL; | |
|
180 | } | |
|
196 | 181 | |
|
197 |
|
|
|
198 |
|
|
|
199 | #if PY_MAJOR_VERSION >= 3 | |
|
200 | res = PyObject_CallMethod(self->writer, "write", "y#", | |
|
201 | #else | |
|
202 | res = PyObject_CallMethod(self->writer, "write", "s#", | |
|
203 | #endif | |
|
204 | self->output.dst, self->output.pos); | |
|
205 | Py_XDECREF(res); | |
|
206 | totalWrite += self->output.pos; | |
|
207 | self->bytesCompressed += self->output.pos; | |
|
208 | } | |
|
182 | /* Copy data from output buffer to writer. */ | |
|
183 | if (self->output.pos) { | |
|
184 | res = PyObject_CallMethod(self->writer, "write", "y#", | |
|
185 | self->output.dst, self->output.pos); | |
|
186 | if (NULL == res) { | |
|
187 | return NULL; | |
|
188 | } | |
|
189 | Py_XDECREF(res); | |
|
190 | totalWrite += self->output.pos; | |
|
191 | self->bytesCompressed += self->output.pos; | |
|
192 | } | |
|
193 | ||
|
194 | self->output.pos = 0; | |
|
209 | 195 | |
|
210 | self->output.pos = 0; | |
|
196 | if (!zresult) { | |
|
197 | break; | |
|
198 | } | |
|
199 | } | |
|
211 | 200 | |
|
212 | if (!zresult) { | |
|
213 | break; | |
|
214 | } | |
|
215 | } | |
|
201 | if (!self->closing && PyObject_HasAttrString(self->writer, "flush")) { | |
|
202 | res = PyObject_CallMethod(self->writer, "flush", NULL); | |
|
203 | if (NULL == res) { | |
|
204 | return NULL; | |
|
205 | } | |
|
206 | Py_XDECREF(res); | |
|
207 | } | |
|
216 | 208 | |
|
217 |
|
|
|
209 | return PyLong_FromSsize_t(totalWrite); | |
|
218 | 210 | } |
|
219 | 211 | |
|
220 |
static PyObject* |
|
|
221 |
|
|
|
212 | static PyObject *ZstdCompressionWriter_close(ZstdCompressionWriter *self) { | |
|
213 | PyObject *result; | |
|
222 | 214 | |
|
223 |
|
|
|
224 |
|
|
|
225 | } | |
|
215 | if (self->closed) { | |
|
216 | Py_RETURN_NONE; | |
|
217 | } | |
|
226 | 218 | |
|
227 | result = PyObject_CallMethod((PyObject*)self, "flush", "I", 1); | |
|
228 | self->closed = 1; | |
|
219 | self->closing = 1; | |
|
220 | result = PyObject_CallMethod((PyObject *)self, "flush", "I", 1); | |
|
221 | self->closing = 0; | |
|
222 | self->closed = 1; | |
|
229 | 223 | |
|
230 |
|
|
|
231 |
|
|
|
232 | } | |
|
224 | if (NULL == result) { | |
|
225 | return NULL; | |
|
226 | } | |
|
233 | 227 | |
|
234 | 228 | /* Call close on underlying stream as well. */ |
|
235 |
|
|
|
236 |
|
|
|
237 | } | |
|
238 | ||
|
239 | Py_RETURN_NONE; | |
|
240 | } | |
|
229 | if (self->closefd && PyObject_HasAttrString(self->writer, "close")) { | |
|
230 | return PyObject_CallMethod(self->writer, "close", NULL); | |
|
231 | } | |
|
241 | 232 | |
|
242 | static PyObject* ZstdCompressionWriter_fileno(ZstdCompressionWriter* self) { | |
|
243 | if (PyObject_HasAttrString(self->writer, "fileno")) { | |
|
244 | return PyObject_CallMethod(self->writer, "fileno", NULL); | |
|
245 | } | |
|
246 | else { | |
|
247 | PyErr_SetString(PyExc_OSError, "fileno not available on underlying writer"); | |
|
248 | return NULL; | |
|
249 | } | |
|
233 | Py_RETURN_NONE; | |
|
250 | 234 | } |
|
251 | 235 | |
|
252 |
static PyObject* |
|
|
253 | return PyLong_FromUnsignedLongLong(self->bytesCompressed); | |
|
236 | static PyObject *ZstdCompressionWriter_fileno(ZstdCompressionWriter *self) { | |
|
237 | if (PyObject_HasAttrString(self->writer, "fileno")) { | |
|
238 | return PyObject_CallMethod(self->writer, "fileno", NULL); | |
|
239 | } | |
|
240 | else { | |
|
241 | PyErr_SetString(PyExc_OSError, | |
|
242 | "fileno not available on underlying writer"); | |
|
243 | return NULL; | |
|
244 | } | |
|
254 | 245 | } |
|
255 | 246 | |
|
256 |
static PyObject* |
|
|
257 | PyErr_SetNone(PyExc_NotImplementedError); | |
|
258 | return NULL; | |
|
247 | static PyObject *ZstdCompressionWriter_tell(ZstdCompressionWriter *self) { | |
|
248 | return PyLong_FromUnsignedLongLong(self->bytesCompressed); | |
|
259 | 249 | } |
|
260 | 250 | |
|
261 |
static PyObject* |
|
|
262 | Py_RETURN_FALSE; | |
|
263 | } | |
|
264 | ||
|
265 | static PyObject* ZstdCompressionWriter_true(PyObject* self, PyObject* args) { | |
|
266 | Py_RETURN_TRUE; | |
|
251 | static PyObject *ZstdCompressionWriter_writelines(PyObject *self, | |
|
252 | PyObject *args) { | |
|
253 | PyErr_SetNone(PyExc_NotImplementedError); | |
|
254 | return NULL; | |
|
267 | 255 | } |
|
268 | 256 | |
|
269 |
static PyObject* |
|
|
270 | PyObject* iomod; | |
|
271 | PyObject* exc; | |
|
257 | static PyObject *ZstdCompressionWriter_iter(PyObject *self) { | |
|
258 | set_io_unsupported_operation(); | |
|
259 | return NULL; | |
|
260 | } | |
|
272 | 261 | |
|
273 | iomod = PyImport_ImportModule("io"); | |
|
274 | if (NULL == iomod) { | |
|
275 |
|
|
|
276 |
|
|
|
262 | static PyObject *ZstdCompressionWriter_iternext(PyObject *self) { | |
|
263 | set_io_unsupported_operation(); | |
|
264 | return NULL; | |
|
265 | } | |
|
277 | 266 | |
|
278 | exc = PyObject_GetAttrString(iomod, "UnsupportedOperation"); | |
|
279 | if (NULL == exc) { | |
|
280 | Py_DECREF(iomod); | |
|
281 | return NULL; | |
|
282 | } | |
|
267 | static PyObject *ZstdCompressionWriter_false(PyObject *self, PyObject *args) { | |
|
268 | Py_RETURN_FALSE; | |
|
269 | } | |
|
283 | 270 | |
|
284 | PyErr_SetNone(exc); | |
|
285 | Py_DECREF(exc); | |
|
286 | Py_DECREF(iomod); | |
|
271 | static PyObject *ZstdCompressionWriter_true(PyObject *self, PyObject *args) { | |
|
272 | Py_RETURN_TRUE; | |
|
273 | } | |
|
287 | 274 | |
|
288 | return NULL; | |
|
275 | static PyObject *ZstdCompressionWriter_unsupported(PyObject *self, | |
|
276 | PyObject *args, | |
|
277 | PyObject *kwargs) { | |
|
278 | set_io_unsupported_operation(); | |
|
279 | return NULL; | |
|
289 | 280 | } |
|
290 | 281 | |
|
291 | 282 | static PyMethodDef ZstdCompressionWriter_methods[] = { |
|
292 |
|
|
|
293 |
|
|
|
294 |
|
|
|
295 |
|
|
|
296 |
|
|
|
297 |
|
|
|
298 |
|
|
|
299 |
|
|
|
300 |
|
|
|
301 | { "readlines", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL }, | |
|
302 |
|
|
|
303 | { "seekable", ZstdCompressionWriter_false, METH_NOARGS, NULL }, | |
|
304 |
|
|
|
305 | { "writable", ZstdCompressionWriter_true, METH_NOARGS, NULL }, | |
|
306 |
|
|
|
307 |
|
|
|
308 | { "readall", (PyCFunction)ZstdCompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL }, | |
|
309 |
|
|
|
310 | { "memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS, | |
|
311 | PyDoc_STR("Obtain the memory size of the underlying compressor") }, | |
|
312 | { "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS | METH_KEYWORDS, | |
|
313 | PyDoc_STR("Compress data") }, | |
|
314 | { "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_VARARGS | METH_KEYWORDS, | |
|
315 | PyDoc_STR("Flush data and finish a zstd frame") }, | |
|
316 | { "tell", (PyCFunction)ZstdCompressionWriter_tell, METH_NOARGS, | |
|
317 | PyDoc_STR("Returns current number of bytes compressed") }, | |
|
318 | { NULL, NULL } | |
|
319 | }; | |
|
283 | {"__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS, | |
|
284 | PyDoc_STR("Enter a compression context.")}, | |
|
285 | {"__exit__", (PyCFunction)ZstdCompressionWriter_exit, METH_VARARGS, | |
|
286 | PyDoc_STR("Exit a compression context.")}, | |
|
287 | {"close", (PyCFunction)ZstdCompressionWriter_close, METH_NOARGS, NULL}, | |
|
288 | {"fileno", (PyCFunction)ZstdCompressionWriter_fileno, METH_NOARGS, NULL}, | |
|
289 | {"isatty", (PyCFunction)ZstdCompressionWriter_false, METH_NOARGS, NULL}, | |
|
290 | {"readable", (PyCFunction)ZstdCompressionWriter_false, METH_NOARGS, NULL}, | |
|
291 | {"readline", (PyCFunction)ZstdCompressionWriter_unsupported, | |
|
292 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
293 | {"readlines", (PyCFunction)ZstdCompressionWriter_unsupported, | |
|
294 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
295 | {"seek", (PyCFunction)ZstdCompressionWriter_unsupported, | |
|
296 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
297 | {"seekable", ZstdCompressionWriter_false, METH_NOARGS, NULL}, | |
|
298 | {"truncate", (PyCFunction)ZstdCompressionWriter_unsupported, | |
|
299 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
300 | {"writable", ZstdCompressionWriter_true, METH_NOARGS, NULL}, | |
|
301 | {"writelines", ZstdCompressionWriter_writelines, METH_VARARGS, NULL}, | |
|
302 | {"read", (PyCFunction)ZstdCompressionWriter_unsupported, | |
|
303 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
304 | {"readall", (PyCFunction)ZstdCompressionWriter_unsupported, | |
|
305 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
306 | {"readinto", (PyCFunction)ZstdCompressionWriter_unsupported, | |
|
307 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
308 | {"memory_size", (PyCFunction)ZstdCompressionWriter_memory_size, METH_NOARGS, | |
|
309 | PyDoc_STR("Obtain the memory size of the underlying compressor")}, | |
|
310 | {"write", (PyCFunction)ZstdCompressionWriter_write, | |
|
311 | METH_VARARGS | METH_KEYWORDS, PyDoc_STR("Compress data")}, | |
|
312 | {"flush", (PyCFunction)ZstdCompressionWriter_flush, | |
|
313 | METH_VARARGS | METH_KEYWORDS, | |
|
314 | PyDoc_STR("Flush data and finish a zstd frame")}, | |
|
315 | {"tell", (PyCFunction)ZstdCompressionWriter_tell, METH_NOARGS, | |
|
316 | PyDoc_STR("Returns current number of bytes compressed")}, | |
|
317 | {NULL, NULL}}; | |
|
320 | 318 | |
|
321 | 319 | static PyMemberDef ZstdCompressionWriter_members[] = { |
|
322 |
|
|
|
323 |
|
|
|
320 | {"closed", T_BOOL, offsetof(ZstdCompressionWriter, closed), READONLY, NULL}, | |
|
321 | {NULL}}; | |
|
322 | ||
|
323 | PyType_Slot ZstdCompressionWriterSlots[] = { | |
|
324 | {Py_tp_dealloc, ZstdCompressionWriter_dealloc}, | |
|
325 | {Py_tp_iter, ZstdCompressionWriter_iter}, | |
|
326 | {Py_tp_iternext, ZstdCompressionWriter_iternext}, | |
|
327 | {Py_tp_methods, ZstdCompressionWriter_methods}, | |
|
328 | {Py_tp_members, ZstdCompressionWriter_members}, | |
|
329 | {Py_tp_new, PyType_GenericNew}, | |
|
330 | {0, NULL}, | |
|
324 | 331 | }; |
|
325 | 332 | |
|
326 |
PyType |
|
|
327 | PyVarObject_HEAD_INIT(NULL, 0) | |
|
328 | "zstd.ZstdCompressionWriter", /* tp_name */ | |
|
329 | sizeof(ZstdCompressionWriter), /* tp_basicsize */ | |
|
330 | 0, /* tp_itemsize */ | |
|
331 | (destructor)ZstdCompressionWriter_dealloc, /* tp_dealloc */ | |
|
332 | 0, /* tp_print */ | |
|
333 | 0, /* tp_getattr */ | |
|
334 | 0, /* tp_setattr */ | |
|
335 | 0, /* tp_compare */ | |
|
336 | 0, /* tp_repr */ | |
|
337 | 0, /* tp_as_number */ | |
|
338 | 0, /* tp_as_sequence */ | |
|
339 | 0, /* tp_as_mapping */ | |
|
340 | 0, /* tp_hash */ | |
|
341 | 0, /* tp_call */ | |
|
342 | 0, /* tp_str */ | |
|
343 | 0, /* tp_getattro */ | |
|
344 | 0, /* tp_setattro */ | |
|
345 | 0, /* tp_as_buffer */ | |
|
346 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
|
347 | ZstdCompresssionWriter__doc__, /* tp_doc */ | |
|
348 | 0, /* tp_traverse */ | |
|
349 | 0, /* tp_clear */ | |
|
350 | 0, /* tp_richcompare */ | |
|
351 | 0, /* tp_weaklistoffset */ | |
|
352 | 0, /* tp_iter */ | |
|
353 | 0, /* tp_iternext */ | |
|
354 | ZstdCompressionWriter_methods, /* tp_methods */ | |
|
355 | ZstdCompressionWriter_members, /* tp_members */ | |
|
356 | 0, /* tp_getset */ | |
|
357 | 0, /* tp_base */ | |
|
358 | 0, /* tp_dict */ | |
|
359 | 0, /* tp_descr_get */ | |
|
360 | 0, /* tp_descr_set */ | |
|
361 | 0, /* tp_dictoffset */ | |
|
362 | 0, /* tp_init */ | |
|
363 | 0, /* tp_alloc */ | |
|
364 | PyType_GenericNew, /* tp_new */ | |
|
333 | PyType_Spec ZstdCompressionWriterSpec = { | |
|
334 | "zstd.ZstdCompressionWriter", | |
|
335 | sizeof(ZstdCompressionWriter), | |
|
336 | 0, | |
|
337 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, | |
|
338 | ZstdCompressionWriterSlots, | |
|
365 | 339 | }; |
|
366 | 340 | |
|
367 | void compressionwriter_module_init(PyObject* mod) { | |
|
368 | Py_SET_TYPE(&ZstdCompressionWriterType, &PyType_Type); | |
|
369 | if (PyType_Ready(&ZstdCompressionWriterType) < 0) { | |
|
370 | return; | |
|
371 | } | |
|
341 | PyTypeObject *ZstdCompressionWriterType; | |
|
342 | ||
|
343 | void compressionwriter_module_init(PyObject *mod) { | |
|
344 | ZstdCompressionWriterType = | |
|
345 | (PyTypeObject *)PyType_FromSpec(&ZstdCompressionWriterSpec); | |
|
346 | if (PyType_Ready(ZstdCompressionWriterType) < 0) { | |
|
347 | return; | |
|
348 | } | |
|
349 | ||
|
350 | Py_INCREF((PyObject *)ZstdCompressionWriterType); | |
|
351 | PyModule_AddObject(mod, "ZstdCompressionWriter", | |
|
352 | (PyObject *)ZstdCompressionWriterType); | |
|
372 | 353 | } |
@@ -1,256 +1,220 | |||
|
1 | 1 | /** |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
8 | 8 | |
|
9 | 9 | #include "python-zstandard.h" |
|
10 | 10 | |
|
11 |
extern PyObject* |
|
|
12 | ||
|
13 | PyDoc_STRVAR(ZstdCompressionObj__doc__, | |
|
14 | "Perform compression using a standard library compatible API.\n" | |
|
15 | ); | |
|
11 | extern PyObject *ZstdError; | |
|
16 | 12 | |
|
17 |
static void ZstdCompressionObj_dealloc(ZstdCompressionObj* |
|
|
18 |
|
|
|
19 |
|
|
|
13 | static void ZstdCompressionObj_dealloc(ZstdCompressionObj *self) { | |
|
14 | PyMem_Free(self->output.dst); | |
|
15 | self->output.dst = NULL; | |
|
20 | 16 | |
|
21 |
|
|
|
17 | Py_XDECREF(self->compressor); | |
|
22 | 18 | |
|
23 |
|
|
|
19 | PyObject_Del(self); | |
|
24 | 20 | } |
|
25 | 21 | |
|
26 |
static PyObject* |
|
|
27 | static char* kwlist[] = { | |
|
28 | "data", | |
|
29 | NULL | |
|
30 | }; | |
|
22 | static PyObject *ZstdCompressionObj_compress(ZstdCompressionObj *self, | |
|
23 | PyObject *args, PyObject *kwargs) { | |
|
24 | static char *kwlist[] = {"data", NULL}; | |
|
31 | 25 | |
|
32 |
|
|
|
33 |
|
|
|
34 |
|
|
|
35 |
|
|
|
36 |
|
|
|
37 | ||
|
38 | if (self->finished) { | |
|
39 | PyErr_SetString(ZstdError, "cannot call compress() after compressor finished"); | |
|
40 | return NULL; | |
|
41 | } | |
|
26 | Py_buffer source; | |
|
27 | ZSTD_inBuffer input; | |
|
28 | size_t zresult; | |
|
29 | PyObject *result = NULL; | |
|
30 | Py_ssize_t resultSize = 0; | |
|
42 | 31 | |
|
43 | #if PY_MAJOR_VERSION >= 3 | |
|
44 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress", | |
|
45 | #else | |
|
46 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:compress", | |
|
47 | #endif | |
|
48 | kwlist, &source)) { | |
|
49 | return NULL; | |
|
50 | } | |
|
32 | if (self->finished) { | |
|
33 | PyErr_SetString(ZstdError, | |
|
34 | "cannot call compress() after compressor finished"); | |
|
35 | return NULL; | |
|
36 | } | |
|
51 | 37 | |
|
52 | if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { | |
|
53 | PyErr_SetString(PyExc_ValueError, | |
|
54 | "data buffer should be contiguous and have at most one dimension"); | |
|
55 | goto finally; | |
|
56 | } | |
|
38 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress", kwlist, | |
|
39 | &source)) { | |
|
40 | return NULL; | |
|
41 | } | |
|
57 | 42 | |
|
58 |
|
|
|
59 |
|
|
|
60 |
|
|
|
43 | input.src = source.buf; | |
|
44 | input.size = source.len; | |
|
45 | input.pos = 0; | |
|
46 | ||
|
47 | while (input.pos < (size_t)source.len) { | |
|
48 | Py_BEGIN_ALLOW_THREADS zresult = ZSTD_compressStream2( | |
|
49 | self->compressor->cctx, &self->output, &input, ZSTD_e_continue); | |
|
50 | Py_END_ALLOW_THREADS | |
|
61 | 51 | |
|
62 | while (input.pos < (size_t)source.len) { | |
|
63 | Py_BEGIN_ALLOW_THREADS | |
|
64 | zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output, | |
|
65 | &input, ZSTD_e_continue); | |
|
66 | Py_END_ALLOW_THREADS | |
|
52 | if (ZSTD_isError(zresult)) { | |
|
53 | PyErr_Format(ZstdError, "zstd compress error: %s", | |
|
54 | ZSTD_getErrorName(zresult)); | |
|
55 | Py_CLEAR(result); | |
|
56 | goto finally; | |
|
57 | } | |
|
67 | 58 | |
|
68 | if (ZSTD_isError(zresult)) { | |
|
69 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); | |
|
70 | Py_CLEAR(result); | |
|
71 | goto finally; | |
|
72 | } | |
|
59 | if (self->output.pos) { | |
|
60 | if (result) { | |
|
61 | resultSize = PyBytes_GET_SIZE(result); | |
|
73 | 62 | |
|
74 | if (self->output.pos) { | |
|
75 | if (result) { | |
|
76 | resultSize = PyBytes_GET_SIZE(result); | |
|
63 | if (safe_pybytes_resize(&result, | |
|
64 | resultSize + self->output.pos)) { | |
|
65 | Py_CLEAR(result); | |
|
66 | goto finally; | |
|
67 | } | |
|
77 | 68 | |
|
78 | if (safe_pybytes_resize(&result, resultSize + self->output.pos)) { | |
|
79 | Py_CLEAR(result); | |
|
80 | goto finally; | |
|
81 | } | |
|
69 | memcpy(PyBytes_AS_STRING(result) + resultSize, self->output.dst, | |
|
70 | self->output.pos); | |
|
71 | } | |
|
72 | else { | |
|
73 | result = PyBytes_FromStringAndSize(self->output.dst, | |
|
74 | self->output.pos); | |
|
75 | if (!result) { | |
|
76 | goto finally; | |
|
77 | } | |
|
78 | } | |
|
82 | 79 | |
|
83 | memcpy(PyBytes_AS_STRING(result) + resultSize, | |
|
84 | self->output.dst, self->output.pos); | |
|
85 | } | |
|
86 | else { | |
|
87 | result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); | |
|
88 | if (!result) { | |
|
89 | goto finally; | |
|
90 | } | |
|
91 | } | |
|
80 | self->output.pos = 0; | |
|
81 | } | |
|
82 | } | |
|
92 | 83 | |
|
93 | self->output.pos = 0; | |
|
94 | } | |
|
95 | } | |
|
96 | ||
|
97 | if (NULL == result) { | |
|
98 | result = PyBytes_FromString(""); | |
|
99 | } | |
|
84 | if (NULL == result) { | |
|
85 | result = PyBytes_FromString(""); | |
|
86 | } | |
|
100 | 87 | |
|
101 | 88 | finally: |
|
102 |
|
|
|
89 | PyBuffer_Release(&source); | |
|
103 | 90 | |
|
104 |
|
|
|
91 | return result; | |
|
105 | 92 | } |
|
106 | 93 | |
|
107 |
static PyObject* |
|
|
108 | static char* kwlist[] = { | |
|
109 | "flush_mode", | |
|
110 | NULL | |
|
111 | }; | |
|
94 | static PyObject *ZstdCompressionObj_flush(ZstdCompressionObj *self, | |
|
95 | PyObject *args, PyObject *kwargs) { | |
|
96 | static char *kwlist[] = {"flush_mode", NULL}; | |
|
97 | ||
|
98 | int flushMode = compressorobj_flush_finish; | |
|
99 | size_t zresult; | |
|
100 | PyObject *result = NULL; | |
|
101 | Py_ssize_t resultSize = 0; | |
|
102 | ZSTD_inBuffer input; | |
|
103 | ZSTD_EndDirective zFlushMode; | |
|
112 | 104 | |
|
113 | int flushMode = compressorobj_flush_finish; | |
|
114 | size_t zresult; | |
|
115 | PyObject* result = NULL; | |
|
116 | Py_ssize_t resultSize = 0; | |
|
117 | ZSTD_inBuffer input; | |
|
118 | ZSTD_EndDirective zFlushMode; | |
|
105 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:flush", kwlist, | |
|
106 | &flushMode)) { | |
|
107 | return NULL; | |
|
108 | } | |
|
119 | 109 | |
|
120 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:flush", kwlist, &flushMode)) { | |
|
121 | return NULL; | |
|
122 | } | |
|
123 | ||
|
124 | if (flushMode != compressorobj_flush_finish && flushMode != compressorobj_flush_block) { | |
|
125 | PyErr_SetString(PyExc_ValueError, "flush mode not recognized"); | |
|
126 | return NULL; | |
|
127 | } | |
|
110 | if (flushMode != compressorobj_flush_finish && | |
|
111 | flushMode != compressorobj_flush_block) { | |
|
112 | PyErr_SetString(PyExc_ValueError, "flush mode not recognized"); | |
|
113 | return NULL; | |
|
114 | } | |
|
128 | 115 | |
|
129 |
|
|
|
130 |
|
|
|
131 |
|
|
|
132 | } | |
|
116 | if (self->finished) { | |
|
117 | PyErr_SetString(ZstdError, "compressor object already finished"); | |
|
118 | return NULL; | |
|
119 | } | |
|
133 | 120 | |
|
134 |
|
|
|
135 |
|
|
|
136 |
|
|
|
137 | break; | |
|
121 | switch (flushMode) { | |
|
122 | case compressorobj_flush_block: | |
|
123 | zFlushMode = ZSTD_e_flush; | |
|
124 | break; | |
|
138 | 125 | |
|
139 |
|
|
|
140 |
|
|
|
141 |
|
|
|
142 | break; | |
|
126 | case compressorobj_flush_finish: | |
|
127 | zFlushMode = ZSTD_e_end; | |
|
128 | self->finished = 1; | |
|
129 | break; | |
|
143 | 130 | |
|
144 |
|
|
|
145 |
|
|
|
146 |
|
|
|
147 | } | |
|
131 | default: | |
|
132 | PyErr_SetString(ZstdError, "unhandled flush mode"); | |
|
133 | return NULL; | |
|
134 | } | |
|
148 | 135 | |
|
149 |
|
|
|
136 | assert(self->output.pos == 0); | |
|
150 | 137 | |
|
151 |
|
|
|
152 |
|
|
|
153 |
|
|
|
138 | input.src = NULL; | |
|
139 | input.size = 0; | |
|
140 | input.pos = 0; | |
|
154 | 141 | |
|
155 |
|
|
|
156 | Py_BEGIN_ALLOW_THREADS | |
|
157 | zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output, | |
|
158 | &input, zFlushMode); | |
|
159 | Py_END_ALLOW_THREADS | |
|
142 | while (1) { | |
|
143 | Py_BEGIN_ALLOW_THREADS zresult = ZSTD_compressStream2( | |
|
144 | self->compressor->cctx, &self->output, &input, zFlushMode); | |
|
145 | Py_END_ALLOW_THREADS | |
|
160 | 146 | |
|
161 |
|
|
|
162 |
|
|
|
163 |
|
|
|
164 |
|
|
|
165 | } | |
|
147 | if (ZSTD_isError(zresult)) { | |
|
148 | PyErr_Format(ZstdError, "error ending compression stream: %s", | |
|
149 | ZSTD_getErrorName(zresult)); | |
|
150 | return NULL; | |
|
151 | } | |
|
166 | 152 | |
|
167 |
|
|
|
168 |
|
|
|
169 |
|
|
|
153 | if (self->output.pos) { | |
|
154 | if (result) { | |
|
155 | resultSize = PyBytes_GET_SIZE(result); | |
|
170 | 156 | |
|
171 | if (safe_pybytes_resize(&result, resultSize + self->output.pos)) { | |
|
172 | Py_XDECREF(result); | |
|
173 | return NULL; | |
|
174 | } | |
|
157 | if (safe_pybytes_resize(&result, | |
|
158 | resultSize + self->output.pos)) { | |
|
159 | Py_XDECREF(result); | |
|
160 | return NULL; | |
|
161 | } | |
|
175 | 162 | |
|
176 |
|
|
|
177 | self->output.dst, self->output.pos); | |
|
178 | } | |
|
179 | else { | |
|
180 |
|
|
|
181 | if (!result) { | |
|
182 | return NULL; | |
|
183 | } | |
|
184 | } | |
|
163 | memcpy(PyBytes_AS_STRING(result) + resultSize, self->output.dst, | |
|
164 | self->output.pos); | |
|
165 | } | |
|
166 | else { | |
|
167 | result = PyBytes_FromStringAndSize(self->output.dst, | |
|
168 | self->output.pos); | |
|
169 | if (!result) { | |
|
170 | return NULL; | |
|
171 | } | |
|
172 | } | |
|
185 | 173 | |
|
186 |
|
|
|
187 | } | |
|
174 | self->output.pos = 0; | |
|
175 | } | |
|
188 | 176 | |
|
189 |
|
|
|
190 | break; | |
|
191 | } | |
|
192 | } | |
|
177 | if (!zresult) { | |
|
178 | break; | |
|
179 | } | |
|
180 | } | |
|
193 | 181 | |
|
194 |
|
|
|
195 |
|
|
|
196 | } | |
|
197 |
|
|
|
198 |
|
|
|
199 | } | |
|
182 | if (result) { | |
|
183 | return result; | |
|
184 | } | |
|
185 | else { | |
|
186 | return PyBytes_FromString(""); | |
|
187 | } | |
|
200 | 188 | } |
|
201 | 189 | |
|
202 | 190 | static PyMethodDef ZstdCompressionObj_methods[] = { |
|
203 |
|
|
|
204 |
|
|
|
205 |
|
|
|
206 |
|
|
|
207 |
|
|
|
191 | {"compress", (PyCFunction)ZstdCompressionObj_compress, | |
|
192 | METH_VARARGS | METH_KEYWORDS, PyDoc_STR("compress data")}, | |
|
193 | {"flush", (PyCFunction)ZstdCompressionObj_flush, | |
|
194 | METH_VARARGS | METH_KEYWORDS, PyDoc_STR("finish compression operation")}, | |
|
195 | {NULL, NULL}}; | |
|
196 | ||
|
197 | PyType_Slot ZstdCompressionObjSlots[] = { | |
|
198 | {Py_tp_dealloc, ZstdCompressionObj_dealloc}, | |
|
199 | {Py_tp_methods, ZstdCompressionObj_methods}, | |
|
200 | {Py_tp_new, PyType_GenericNew}, | |
|
201 | {0, NULL}, | |
|
208 | 202 | }; |
|
209 | 203 | |
|
210 |
PyType |
|
|
211 | PyVarObject_HEAD_INIT(NULL, 0) | |
|
212 | "zstd.ZstdCompressionObj", /* tp_name */ | |
|
213 | sizeof(ZstdCompressionObj), /* tp_basicsize */ | |
|
214 | 0, /* tp_itemsize */ | |
|
215 | (destructor)ZstdCompressionObj_dealloc, /* tp_dealloc */ | |
|
216 | 0, /* tp_print */ | |
|
217 | 0, /* tp_getattr */ | |
|
218 | 0, /* tp_setattr */ | |
|
219 | 0, /* tp_compare */ | |
|
220 | 0, /* tp_repr */ | |
|
221 | 0, /* tp_as_number */ | |
|
222 | 0, /* tp_as_sequence */ | |
|
223 | 0, /* tp_as_mapping */ | |
|
224 | 0, /* tp_hash */ | |
|
225 | 0, /* tp_call */ | |
|
226 | 0, /* tp_str */ | |
|
227 | 0, /* tp_getattro */ | |
|
228 | 0, /* tp_setattro */ | |
|
229 | 0, /* tp_as_buffer */ | |
|
230 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
|
231 | ZstdCompressionObj__doc__, /* tp_doc */ | |
|
232 | 0, /* tp_traverse */ | |
|
233 | 0, /* tp_clear */ | |
|
234 | 0, /* tp_richcompare */ | |
|
235 | 0, /* tp_weaklistoffset */ | |
|
236 | 0, /* tp_iter */ | |
|
237 | 0, /* tp_iternext */ | |
|
238 | ZstdCompressionObj_methods, /* tp_methods */ | |
|
239 | 0, /* tp_members */ | |
|
240 | 0, /* tp_getset */ | |
|
241 | 0, /* tp_base */ | |
|
242 | 0, /* tp_dict */ | |
|
243 | 0, /* tp_descr_get */ | |
|
244 | 0, /* tp_descr_set */ | |
|
245 | 0, /* tp_dictoffset */ | |
|
246 | 0, /* tp_init */ | |
|
247 | 0, /* tp_alloc */ | |
|
248 | PyType_GenericNew, /* tp_new */ | |
|
204 | PyType_Spec ZstdCompressionObjSpec = { | |
|
205 | "zstd.ZstdCompressionObj", | |
|
206 | sizeof(ZstdCompressionObj), | |
|
207 | 0, | |
|
208 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, | |
|
209 | ZstdCompressionObjSlots, | |
|
249 | 210 | }; |
|
250 | 211 | |
|
251 | void compressobj_module_init(PyObject* module) { | |
|
252 | Py_SET_TYPE(&ZstdCompressionObjType, &PyType_Type); | |
|
253 | if (PyType_Ready(&ZstdCompressionObjType) < 0) { | |
|
254 | return; | |
|
255 | } | |
|
212 | PyTypeObject *ZstdCompressionObjType; | |
|
213 | ||
|
214 | void compressobj_module_init(PyObject *module) { | |
|
215 | ZstdCompressionObjType = | |
|
216 | (PyTypeObject *)PyType_FromSpec(&ZstdCompressionObjSpec); | |
|
217 | if (PyType_Ready(ZstdCompressionObjType) < 0) { | |
|
218 | return; | |
|
219 | } | |
|
256 | 220 | } |
This diff has been collapsed as it changes many lines, (2647 lines changed) Show them Hide them | |||
@@ -1,1670 +1,1557 | |||
|
1 | 1 | /** |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
8 | 8 | |
|
9 | 9 | #include "python-zstandard.h" |
|
10 | #include "pool.h" | |
|
10 | ||
|
11 | extern PyObject *ZstdError; | |
|
11 | 12 | |
|
12 | extern PyObject* ZstdError; | |
|
13 | ||
|
14 | int setup_cctx(ZstdCompressor* compressor) { | |
|
15 | size_t zresult; | |
|
13 | int setup_cctx(ZstdCompressor *compressor) { | |
|
14 | size_t zresult; | |
|
16 | 15 | |
|
17 |
|
|
|
18 |
|
|
|
19 |
|
|
|
16 | assert(compressor); | |
|
17 | assert(compressor->cctx); | |
|
18 | assert(compressor->params); | |
|
20 | 19 | |
|
21 |
|
|
|
22 | if (ZSTD_isError(zresult)) { | |
|
23 | PyErr_Format(ZstdError, "could not set compression parameters: %s", | |
|
24 | ZSTD_getErrorName(zresult)); | |
|
25 | return 1; | |
|
26 | } | |
|
20 | zresult = ZSTD_CCtx_setParametersUsingCCtxParams(compressor->cctx, | |
|
21 | compressor->params); | |
|
22 | if (ZSTD_isError(zresult)) { | |
|
23 | PyErr_Format(ZstdError, "could not set compression parameters: %s", | |
|
24 | ZSTD_getErrorName(zresult)); | |
|
25 | return 1; | |
|
26 | } | |
|
27 | 27 | |
|
28 |
|
|
|
29 |
|
|
|
30 | zresult = ZSTD_CCtx_refCDict(compressor->cctx, compressor->dict->cdict); | |
|
31 | } | |
|
32 | else { | |
|
33 | zresult = ZSTD_CCtx_loadDictionary_advanced(compressor->cctx, | |
|
34 | compressor->dict->dictData, compressor->dict->dictSize, | |
|
35 | ZSTD_dlm_byRef, compressor->dict->dictType); | |
|
36 | } | |
|
37 | if (ZSTD_isError(zresult)) { | |
|
38 | PyErr_Format(ZstdError, "could not load compression dictionary: %s", | |
|
39 | ZSTD_getErrorName(zresult)); | |
|
40 | return 1; | |
|
41 | } | |
|
42 | } | |
|
28 | if (compressor->dict) { | |
|
29 | if (compressor->dict->cdict) { | |
|
30 | zresult = | |
|
31 | ZSTD_CCtx_refCDict(compressor->cctx, compressor->dict->cdict); | |
|
32 | } | |
|
33 | else { | |
|
34 | zresult = ZSTD_CCtx_loadDictionary_advanced( | |
|
35 | compressor->cctx, compressor->dict->dictData, | |
|
36 | compressor->dict->dictSize, ZSTD_dlm_byRef, | |
|
37 | compressor->dict->dictType); | |
|
38 | } | |
|
39 | if (ZSTD_isError(zresult)) { | |
|
40 | PyErr_Format(ZstdError, "could not load compression dictionary: %s", | |
|
41 | ZSTD_getErrorName(zresult)); | |
|
42 | return 1; | |
|
43 | } | |
|
44 | } | |
|
43 | 45 | |
|
44 |
|
|
|
46 | return 0; | |
|
45 | 47 | } |
|
46 | 48 | |
|
47 |
static PyObject* |
|
|
48 |
|
|
|
49 |
|
|
|
50 |
|
|
|
49 | static PyObject *frame_progression(ZSTD_CCtx *cctx) { | |
|
50 | PyObject *result = NULL; | |
|
51 | PyObject *value; | |
|
52 | ZSTD_frameProgression progression; | |
|
51 | 53 | |
|
52 |
|
|
|
53 |
|
|
|
54 |
|
|
|
55 | } | |
|
54 | result = PyTuple_New(3); | |
|
55 | if (!result) { | |
|
56 | return NULL; | |
|
57 | } | |
|
56 | 58 | |
|
57 |
|
|
|
59 | progression = ZSTD_getFrameProgression(cctx); | |
|
58 | 60 | |
|
59 |
|
|
|
60 |
|
|
|
61 |
|
|
|
62 |
|
|
|
63 | } | |
|
61 | value = PyLong_FromUnsignedLongLong(progression.ingested); | |
|
62 | if (!value) { | |
|
63 | Py_DECREF(result); | |
|
64 | return NULL; | |
|
65 | } | |
|
64 | 66 | |
|
65 |
|
|
|
67 | PyTuple_SET_ITEM(result, 0, value); | |
|
66 | 68 | |
|
67 |
|
|
|
68 |
|
|
|
69 |
|
|
|
70 |
|
|
|
71 | } | |
|
69 | value = PyLong_FromUnsignedLongLong(progression.consumed); | |
|
70 | if (!value) { | |
|
71 | Py_DECREF(result); | |
|
72 | return NULL; | |
|
73 | } | |
|
72 | 74 | |
|
73 |
|
|
|
75 | PyTuple_SET_ITEM(result, 1, value); | |
|
74 | 76 | |
|
75 |
|
|
|
76 |
|
|
|
77 |
|
|
|
78 |
|
|
|
79 | } | |
|
77 | value = PyLong_FromUnsignedLongLong(progression.produced); | |
|
78 | if (!value) { | |
|
79 | Py_DECREF(result); | |
|
80 | return NULL; | |
|
81 | } | |
|
80 | 82 | |
|
81 |
|
|
|
83 | PyTuple_SET_ITEM(result, 2, value); | |
|
82 | 84 | |
|
83 |
|
|
|
85 | return result; | |
|
84 | 86 | } |
|
85 | 87 | |
|
86 | PyDoc_STRVAR(ZstdCompressor__doc__, | |
|
87 | "ZstdCompressor(level=None, dict_data=None, compression_params=None)\n" | |
|
88 | "\n" | |
|
89 | "Create an object used to perform Zstandard compression.\n" | |
|
90 | "\n" | |
|
91 | "An instance can compress data various ways. Instances can be used multiple\n" | |
|
92 | "times. Each compression operation will use the compression parameters\n" | |
|
93 | "defined at construction time.\n" | |
|
94 | "\n" | |
|
95 | "Compression can be configured via the following names arguments:\n" | |
|
96 | "\n" | |
|
97 | "level\n" | |
|
98 | " Integer compression level.\n" | |
|
99 | "dict_data\n" | |
|
100 | " A ``ZstdCompressionDict`` to be used to compress with dictionary data.\n" | |
|
101 | "compression_params\n" | |
|
102 | " A ``CompressionParameters`` instance defining low-level compression" | |
|
103 | " parameters. If defined, this will overwrite the ``level`` argument.\n" | |
|
104 | "write_checksum\n" | |
|
105 | " If True, a 4 byte content checksum will be written with the compressed\n" | |
|
106 | " data, allowing the decompressor to perform content verification.\n" | |
|
107 | "write_content_size\n" | |
|
108 | " If True (the default), the decompressed content size will be included in\n" | |
|
109 | " the header of the compressed data. This data will only be written if the\n" | |
|
110 | " compressor knows the size of the input data.\n" | |
|
111 | "write_dict_id\n" | |
|
112 | " Determines whether the dictionary ID will be written into the compressed\n" | |
|
113 | " data. Defaults to True. Only adds content to the compressed data if\n" | |
|
114 | " a dictionary is being used.\n" | |
|
115 | "threads\n" | |
|
116 | " Number of threads to use to compress data concurrently. When set,\n" | |
|
117 | " compression operations are performed on multiple threads. The default\n" | |
|
118 | " value (0) disables multi-threaded compression. A value of ``-1`` means to\n" | |
|
119 | " set the number of threads to the number of detected logical CPUs.\n" | |
|
120 | ); | |
|
88 | static int ZstdCompressor_init(ZstdCompressor *self, PyObject *args, | |
|
89 | PyObject *kwargs) { | |
|
90 | static char *kwlist[] = {"level", | |
|
91 | "dict_data", | |
|
92 | "compression_params", | |
|
93 | "write_checksum", | |
|
94 | "write_content_size", | |
|
95 | "write_dict_id", | |
|
96 | "threads", | |
|
97 | NULL}; | |
|
98 | ||
|
99 | int level = 3; | |
|
100 | PyObject *dict = NULL; | |
|
101 | PyObject *params = NULL; | |
|
102 | PyObject *writeChecksum = NULL; | |
|
103 | PyObject *writeContentSize = NULL; | |
|
104 | PyObject *writeDictID = NULL; | |
|
105 | int threads = 0; | |
|
106 | ||
|
107 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iOOOOOi:ZstdCompressor", | |
|
108 | kwlist, &level, &dict, ¶ms, | |
|
109 | &writeChecksum, &writeContentSize, | |
|
110 | &writeDictID, &threads)) { | |
|
111 | return -1; | |
|
112 | } | |
|
113 | ||
|
114 | if (level > ZSTD_maxCLevel()) { | |
|
115 | PyErr_Format(PyExc_ValueError, "level must be less than %d", | |
|
116 | ZSTD_maxCLevel() + 1); | |
|
117 | return -1; | |
|
118 | } | |
|
121 | 119 | |
|
122 | static int ZstdCompressor_init(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { | |
|
123 | static char* kwlist[] = { | |
|
124 | "level", | |
|
125 | "dict_data", | |
|
126 | "compression_params", | |
|
127 | "write_checksum", | |
|
128 | "write_content_size", | |
|
129 | "write_dict_id", | |
|
130 | "threads", | |
|
131 | NULL | |
|
132 | }; | |
|
120 | if (threads < 0) { | |
|
121 | threads = cpu_count(); | |
|
122 | } | |
|
123 | ||
|
124 | if (dict) { | |
|
125 | if (dict == Py_None) { | |
|
126 | dict = NULL; | |
|
127 | } | |
|
128 | else if (!PyObject_IsInstance(dict, | |
|
129 | (PyObject *)ZstdCompressionDictType)) { | |
|
130 | PyErr_Format(PyExc_TypeError, | |
|
131 | "dict_data must be zstd.ZstdCompressionDict"); | |
|
132 | return -1; | |
|
133 | } | |
|
134 | } | |
|
133 | 135 | |
|
134 | int level = 3; | |
|
135 | ZstdCompressionDict* dict = NULL; | |
|
136 | ZstdCompressionParametersObject* params = NULL; | |
|
137 | PyObject* writeChecksum = NULL; | |
|
138 | PyObject* writeContentSize = NULL; | |
|
139 | PyObject* writeDictID = NULL; | |
|
140 | int threads = 0; | |
|
136 | if (params) { | |
|
137 | if (params == Py_None) { | |
|
138 | params = NULL; | |
|
139 | } | |
|
140 | else if (!PyObject_IsInstance( | |
|
141 | params, (PyObject *)ZstdCompressionParametersType)) { | |
|
142 | PyErr_Format( | |
|
143 | PyExc_TypeError, | |
|
144 | "compression_params must be zstd.ZstdCompressionParameters"); | |
|
145 | return -1; | |
|
146 | } | |
|
147 | } | |
|
141 | 148 | |
|
142 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|iO!O!OOOi:ZstdCompressor", | |
|
143 | kwlist, &level, &ZstdCompressionDictType, &dict, | |
|
144 | &ZstdCompressionParametersType, ¶ms, | |
|
145 | &writeChecksum, &writeContentSize, &writeDictID, &threads)) { | |
|
146 | return -1; | |
|
147 | } | |
|
148 | ||
|
149 | if (level > ZSTD_maxCLevel()) { | |
|
150 | PyErr_Format(PyExc_ValueError, "level must be less than %d", | |
|
151 | ZSTD_maxCLevel() + 1); | |
|
152 | return -1; | |
|
153 | } | |
|
154 | ||
|
155 | if (threads < 0) { | |
|
156 | threads = cpu_count(); | |
|
157 | } | |
|
149 | if (writeChecksum == Py_None) { | |
|
150 | writeChecksum = NULL; | |
|
151 | } | |
|
152 | if (writeContentSize == Py_None) { | |
|
153 | writeContentSize = NULL; | |
|
154 | } | |
|
155 | if (writeDictID == Py_None) { | |
|
156 | writeDictID = NULL; | |
|
157 | } | |
|
158 | 158 | |
|
159 |
|
|
|
160 |
|
|
|
161 |
|
|
|
162 |
|
|
|
163 |
|
|
|
164 |
|
|
|
165 | } | |
|
159 | /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the | |
|
160 | overhead of each compression operation. */ | |
|
161 | self->cctx = ZSTD_createCCtx(); | |
|
162 | if (!self->cctx) { | |
|
163 | PyErr_NoMemory(); | |
|
164 | return -1; | |
|
165 | } | |
|
166 | 166 | |
|
167 |
|
|
|
168 |
|
|
|
169 |
|
|
|
167 | /* TODO stuff the original parameters away somewhere so we can reset later. | |
|
168 | This will allow us to do things like automatically adjust cparams based | |
|
169 | on input size (assuming zstd isn't doing that internally). */ | |
|
170 | 170 | |
|
171 |
|
|
|
172 |
|
|
|
173 |
|
|
|
174 |
|
|
|
175 | } | |
|
171 | self->params = ZSTD_createCCtxParams(); | |
|
172 | if (!self->params) { | |
|
173 | PyErr_NoMemory(); | |
|
174 | return -1; | |
|
175 | } | |
|
176 | 176 | |
|
177 |
|
|
|
178 |
|
|
|
179 |
|
|
|
180 |
|
|
|
181 | } | |
|
177 | if (params && writeChecksum) { | |
|
178 | PyErr_SetString(PyExc_ValueError, | |
|
179 | "cannot define compression_params and write_checksum"); | |
|
180 | return -1; | |
|
181 | } | |
|
182 | 182 | |
|
183 |
|
|
|
184 | PyErr_SetString(PyExc_ValueError, | |
|
185 | "cannot define compression_params and write_content_size"); | |
|
186 | return -1; | |
|
187 | } | |
|
183 | if (params && writeContentSize) { | |
|
184 | PyErr_SetString( | |
|
185 | PyExc_ValueError, | |
|
186 | "cannot define compression_params and write_content_size"); | |
|
187 | return -1; | |
|
188 | } | |
|
188 | 189 | |
|
189 |
|
|
|
190 |
|
|
|
191 |
|
|
|
192 |
|
|
|
193 | } | |
|
190 | if (params && writeDictID) { | |
|
191 | PyErr_SetString(PyExc_ValueError, | |
|
192 | "cannot define compression_params and write_dict_id"); | |
|
193 | return -1; | |
|
194 | } | |
|
194 | 195 | |
|
195 |
|
|
|
196 |
|
|
|
197 |
|
|
|
198 |
|
|
|
199 | } | |
|
196 | if (params && threads) { | |
|
197 | PyErr_SetString(PyExc_ValueError, | |
|
198 | "cannot define compression_params and threads"); | |
|
199 | return -1; | |
|
200 | } | |
|
200 | 201 | |
|
201 |
|
|
|
202 |
|
|
|
203 | return -1; | |
|
204 | } | |
|
205 | } | |
|
206 | else { | |
|
207 | if (set_parameter(self->params, ZSTD_c_compressionLevel, level)) { | |
|
208 | return -1; | |
|
209 | } | |
|
210 | ||
|
211 | if (set_parameter(self->params, ZSTD_c_contentSizeFlag, | |
|
212 | writeContentSize ? PyObject_IsTrue(writeContentSize) : 1)) { | |
|
213 | return -1; | |
|
214 | } | |
|
202 | if (params) { | |
|
203 | if (set_parameters(self->params, | |
|
204 | (ZstdCompressionParametersObject *)params)) { | |
|
205 | return -1; | |
|
206 | } | |
|
207 | } | |
|
208 | else { | |
|
209 | if (set_parameter(self->params, ZSTD_c_compressionLevel, level)) { | |
|
210 | return -1; | |
|
211 | } | |
|
215 | 212 | |
|
216 |
|
|
|
217 | writeChecksum ? PyObject_IsTrue(writeChecksum) : 0)) { | |
|
218 | return -1; | |
|
219 | } | |
|
213 | if (set_parameter(self->params, ZSTD_c_contentSizeFlag, | |
|
214 | writeContentSize ? PyObject_IsTrue(writeContentSize) | |
|
215 | : 1)) { | |
|
216 | return -1; | |
|
217 | } | |
|
220 | 218 | |
|
221 |
|
|
|
222 | writeDictID ? PyObject_IsTrue(writeDictID) : 1)) { | |
|
223 | return -1; | |
|
224 | } | |
|
219 | if (set_parameter(self->params, ZSTD_c_checksumFlag, | |
|
220 | writeChecksum ? PyObject_IsTrue(writeChecksum) : 0)) { | |
|
221 | return -1; | |
|
222 | } | |
|
225 | 223 | |
|
226 | if (threads) { | |
|
227 | if (set_parameter(self->params, ZSTD_c_nbWorkers, threads)) { | |
|
228 | return -1; | |
|
229 | } | |
|
230 | } | |
|
231 | } | |
|
224 | if (set_parameter(self->params, ZSTD_c_dictIDFlag, | |
|
225 | writeDictID ? PyObject_IsTrue(writeDictID) : 1)) { | |
|
226 | return -1; | |
|
227 | } | |
|
232 | 228 | |
|
233 | if (dict) { | |
|
234 | self->dict = dict; | |
|
235 | Py_INCREF(dict); | |
|
236 | } | |
|
229 | if (threads) { | |
|
230 | if (set_parameter(self->params, ZSTD_c_nbWorkers, threads)) { | |
|
231 | return -1; | |
|
232 | } | |
|
233 | } | |
|
234 | } | |
|
235 | ||
|
236 | if (dict) { | |
|
237 | self->dict = (ZstdCompressionDict *)dict; | |
|
238 | Py_INCREF(dict); | |
|
239 | } | |
|
237 | 240 | |
|
238 | 241 | if (setup_cctx(self)) { |
|
239 | 242 | return -1; |
|
240 | 243 | } |
|
241 | 244 | |
|
242 |
|
|
|
243 | } | |
|
244 | ||
|
245 | static void ZstdCompressor_dealloc(ZstdCompressor* self) { | |
|
246 | if (self->cctx) { | |
|
247 | ZSTD_freeCCtx(self->cctx); | |
|
248 | self->cctx = NULL; | |
|
249 | } | |
|
250 | ||
|
251 | if (self->params) { | |
|
252 | ZSTD_freeCCtxParams(self->params); | |
|
253 | self->params = NULL; | |
|
254 | } | |
|
255 | ||
|
256 | Py_XDECREF(self->dict); | |
|
257 | PyObject_Del(self); | |
|
245 | return 0; | |
|
258 | 246 | } |
|
259 | 247 | |
|
260 | PyDoc_STRVAR(ZstdCompressor_memory_size__doc__, | |
|
261 | "memory_size()\n" | |
|
262 | "\n" | |
|
263 | "Obtain the memory usage of this compressor, in bytes.\n" | |
|
264 | ); | |
|
248 | static void ZstdCompressor_dealloc(ZstdCompressor *self) { | |
|
249 | if (self->cctx) { | |
|
250 | ZSTD_freeCCtx(self->cctx); | |
|
251 | self->cctx = NULL; | |
|
252 | } | |
|
265 | 253 | |
|
266 | static PyObject* ZstdCompressor_memory_size(ZstdCompressor* self) { | |
|
267 | if (self->cctx) { | |
|
268 | return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->cctx)); | |
|
269 | } | |
|
270 | else { | |
|
271 | PyErr_SetString(ZstdError, "no compressor context found; this should never happen"); | |
|
272 | return NULL; | |
|
273 | } | |
|
254 | if (self->params) { | |
|
255 | ZSTD_freeCCtxParams(self->params); | |
|
256 | self->params = NULL; | |
|
257 | } | |
|
258 | ||
|
259 | Py_XDECREF(self->dict); | |
|
260 | PyObject_Del(self); | |
|
274 | 261 | } |
|
275 | 262 | |
|
276 | PyDoc_STRVAR(ZstdCompressor_frame_progression__doc__, | |
|
277 | "frame_progression()\n" | |
|
278 | "\n" | |
|
279 | "Return information on how much work the compressor has done.\n" | |
|
280 | "\n" | |
|
281 | "Returns a 3-tuple of (ingested, consumed, produced).\n" | |
|
282 | ); | |
|
263 | static PyObject *ZstdCompressor_memory_size(ZstdCompressor *self) { | |
|
264 | if (self->cctx) { | |
|
265 | return PyLong_FromSize_t(ZSTD_sizeof_CCtx(self->cctx)); | |
|
266 | } | |
|
267 | else { | |
|
268 | PyErr_SetString( | |
|
269 | ZstdError, "no compressor context found; this should never happen"); | |
|
270 | return NULL; | |
|
271 | } | |
|
272 | } | |
|
283 | 273 | |
|
284 |
static PyObject* |
|
|
285 |
|
|
|
274 | static PyObject *ZstdCompressor_frame_progression(ZstdCompressor *self) { | |
|
275 | return frame_progression(self->cctx); | |
|
286 | 276 | } |
|
287 | 277 | |
|
288 | PyDoc_STRVAR(ZstdCompressor_copy_stream__doc__, | |
|
289 | "copy_stream(ifh, ofh[, size=0, read_size=default, write_size=default])\n" | |
|
290 | "compress data between streams\n" | |
|
291 | "\n" | |
|
292 | "Data will be read from ``ifh``, compressed, and written to ``ofh``.\n" | |
|
293 | "``ifh`` must have a ``read(size)`` method. ``ofh`` must have a ``write(data)``\n" | |
|
294 | "method.\n" | |
|
295 | "\n" | |
|
296 | "An optional ``size`` argument specifies the size of the source stream.\n" | |
|
297 | "If defined, compression parameters will be tuned based on the size.\n" | |
|
298 | "\n" | |
|
299 | "Optional arguments ``read_size`` and ``write_size`` define the chunk sizes\n" | |
|
300 | "of ``read()`` and ``write()`` operations, respectively. By default, they use\n" | |
|
301 | "the default compression stream input and output sizes, respectively.\n" | |
|
302 | ); | |
|
278 | static PyObject *ZstdCompressor_copy_stream(ZstdCompressor *self, | |
|
279 | PyObject *args, PyObject *kwargs) { | |
|
280 | static char *kwlist[] = {"ifh", "ofh", "size", | |
|
281 | "read_size", "write_size", NULL}; | |
|
303 | 282 | |
|
304 | static PyObject* ZstdCompressor_copy_stream(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { | |
|
305 | static char* kwlist[] = { | |
|
306 | "ifh", | |
|
307 | "ofh", | |
|
308 | "size", | |
|
309 | "read_size", | |
|
310 | "write_size", | |
|
311 | NULL | |
|
312 | }; | |
|
283 | PyObject *source; | |
|
284 | PyObject *dest; | |
|
285 | unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN; | |
|
286 | size_t inSize = ZSTD_CStreamInSize(); | |
|
287 | size_t outSize = ZSTD_CStreamOutSize(); | |
|
288 | ZSTD_inBuffer input; | |
|
289 | ZSTD_outBuffer output; | |
|
290 | Py_ssize_t totalRead = 0; | |
|
291 | Py_ssize_t totalWrite = 0; | |
|
292 | char *readBuffer; | |
|
293 | Py_ssize_t readSize; | |
|
294 | PyObject *readResult = NULL; | |
|
295 | PyObject *res = NULL; | |
|
296 | size_t zresult; | |
|
297 | PyObject *writeResult; | |
|
298 | PyObject *totalReadPy; | |
|
299 | PyObject *totalWritePy; | |
|
300 | ||
|
301 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|Kkk:copy_stream", kwlist, | |
|
302 | &source, &dest, &sourceSize, &inSize, | |
|
303 | &outSize)) { | |
|
304 | return NULL; | |
|
305 | } | |
|
306 | ||
|
307 | if (!PyObject_HasAttrString(source, "read")) { | |
|
308 | PyErr_SetString(PyExc_ValueError, | |
|
309 | "first argument must have a read() method"); | |
|
310 | return NULL; | |
|
311 | } | |
|
313 | 312 | |
|
314 | PyObject* source; | |
|
315 | PyObject* dest; | |
|
316 | unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN; | |
|
317 | size_t inSize = ZSTD_CStreamInSize(); | |
|
318 | size_t outSize = ZSTD_CStreamOutSize(); | |
|
319 | ZSTD_inBuffer input; | |
|
320 | ZSTD_outBuffer output; | |
|
321 | Py_ssize_t totalRead = 0; | |
|
322 | Py_ssize_t totalWrite = 0; | |
|
323 | char* readBuffer; | |
|
324 | Py_ssize_t readSize; | |
|
325 | PyObject* readResult = NULL; | |
|
326 | PyObject* res = NULL; | |
|
327 | size_t zresult; | |
|
328 | PyObject* writeResult; | |
|
329 | PyObject* totalReadPy; | |
|
330 | PyObject* totalWritePy; | |
|
313 | if (!PyObject_HasAttrString(dest, "write")) { | |
|
314 | PyErr_SetString(PyExc_ValueError, | |
|
315 | "second argument must have a write() method"); | |
|
316 | return NULL; | |
|
317 | } | |
|
331 | 318 | |
|
332 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "OO|Kkk:copy_stream", kwlist, | |
|
333 | &source, &dest, &sourceSize, &inSize, &outSize)) { | |
|
334 | return NULL; | |
|
335 | } | |
|
319 | ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); | |
|
336 | 320 | |
|
337 | if (!PyObject_HasAttrString(source, "read")) { | |
|
338 | PyErr_SetString(PyExc_ValueError, "first argument must have a read() method"); | |
|
339 | return NULL; | |
|
340 | } | |
|
321 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize); | |
|
322 | if (ZSTD_isError(zresult)) { | |
|
323 | PyErr_Format(ZstdError, "error setting source size: %s", | |
|
324 | ZSTD_getErrorName(zresult)); | |
|
325 | return NULL; | |
|
326 | } | |
|
341 | 327 | |
|
342 | if (!PyObject_HasAttrString(dest, "write")) { | |
|
343 | PyErr_SetString(PyExc_ValueError, "second argument must have a write() method"); | |
|
344 | return NULL; | |
|
345 | } | |
|
346 | ||
|
347 | ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); | |
|
348 | ||
|
349 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize); | |
|
350 | if (ZSTD_isError(zresult)) { | |
|
351 | PyErr_Format(ZstdError, "error setting source size: %s", | |
|
352 | ZSTD_getErrorName(zresult)); | |
|
353 | return NULL; | |
|
354 | } | |
|
328 | /* Prevent free on uninitialized memory in finally. */ | |
|
329 | output.dst = PyMem_Malloc(outSize); | |
|
330 | if (!output.dst) { | |
|
331 | PyErr_NoMemory(); | |
|
332 | res = NULL; | |
|
333 | goto finally; | |
|
334 | } | |
|
335 | output.size = outSize; | |
|
336 | output.pos = 0; | |
|
355 | 337 | |
|
356 | /* Prevent free on uninitialized memory in finally. */ | |
|
357 | output.dst = PyMem_Malloc(outSize); | |
|
358 | if (!output.dst) { | |
|
359 | PyErr_NoMemory(); | |
|
360 | res = NULL; | |
|
361 | goto finally; | |
|
362 | } | |
|
363 | output.size = outSize; | |
|
364 | output.pos = 0; | |
|
338 | input.src = NULL; | |
|
339 | input.size = 0; | |
|
340 | input.pos = 0; | |
|
365 | 341 | |
|
366 | input.src = NULL; | |
|
367 | input.size = 0; | |
|
368 | input.pos = 0; | |
|
342 | while (1) { | |
|
343 | /* Try to read from source stream. */ | |
|
344 | readResult = PyObject_CallMethod(source, "read", "n", inSize); | |
|
345 | if (!readResult) { | |
|
346 | goto finally; | |
|
347 | } | |
|
348 | ||
|
349 | PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize); | |
|
369 | 350 | |
|
370 | while (1) { | |
|
371 | /* Try to read from source stream. */ | |
|
372 | readResult = PyObject_CallMethod(source, "read", "n", inSize); | |
|
373 | if (!readResult) { | |
|
374 | PyErr_SetString(ZstdError, "could not read() from source"); | |
|
375 | goto finally; | |
|
376 | } | |
|
351 | /* If no data was read, we're at EOF. */ | |
|
352 | if (0 == readSize) { | |
|
353 | break; | |
|
354 | } | |
|
355 | ||
|
356 | totalRead += readSize; | |
|
377 | 357 | |
|
378 | PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize); | |
|
358 | /* Send data to compressor */ | |
|
359 | input.src = readBuffer; | |
|
360 | input.size = readSize; | |
|
361 | input.pos = 0; | |
|
379 | 362 | |
|
380 | /* If no data was read, we're at EOF. */ | |
|
381 | if (0 == readSize) { | |
|
382 | break; | |
|
383 | } | |
|
363 | while (input.pos < input.size) { | |
|
364 | Py_BEGIN_ALLOW_THREADS zresult = ZSTD_compressStream2( | |
|
365 | self->cctx, &output, &input, ZSTD_e_continue); | |
|
366 | Py_END_ALLOW_THREADS | |
|
384 | 367 | |
|
385 | totalRead += readSize; | |
|
386 | ||
|
387 | /* Send data to compressor */ | |
|
388 | input.src = readBuffer; | |
|
389 | input.size = readSize; | |
|
390 | input.pos = 0; | |
|
368 | if (ZSTD_isError(zresult)) { | |
|
369 | res = NULL; | |
|
370 | PyErr_Format(ZstdError, "zstd compress error: %s", | |
|
371 | ZSTD_getErrorName(zresult)); | |
|
372 | goto finally; | |
|
373 | } | |
|
391 | 374 | |
|
392 | while (input.pos < input.size) { | |
|
393 | Py_BEGIN_ALLOW_THREADS | |
|
394 | zresult = ZSTD_compressStream2(self->cctx, &output, &input, ZSTD_e_continue); | |
|
395 | Py_END_ALLOW_THREADS | |
|
396 | ||
|
397 | if (ZSTD_isError(zresult)) { | |
|
398 | res = NULL; | |
|
399 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); | |
|
400 | goto finally; | |
|
401 | } | |
|
375 | if (output.pos) { | |
|
376 | writeResult = PyObject_CallMethod(dest, "write", "y#", | |
|
377 | output.dst, output.pos); | |
|
378 | if (NULL == writeResult) { | |
|
379 | res = NULL; | |
|
380 | goto finally; | |
|
381 | } | |
|
382 | Py_XDECREF(writeResult); | |
|
383 | totalWrite += output.pos; | |
|
384 | output.pos = 0; | |
|
385 | } | |
|
386 | } | |
|
402 | 387 | |
|
403 | if (output.pos) { | |
|
404 | #if PY_MAJOR_VERSION >= 3 | |
|
405 | writeResult = PyObject_CallMethod(dest, "write", "y#", | |
|
406 | #else | |
|
407 | writeResult = PyObject_CallMethod(dest, "write", "s#", | |
|
408 | #endif | |
|
409 | output.dst, output.pos); | |
|
410 | Py_XDECREF(writeResult); | |
|
411 | totalWrite += output.pos; | |
|
412 | output.pos = 0; | |
|
413 | } | |
|
414 | } | |
|
388 | Py_CLEAR(readResult); | |
|
389 | } | |
|
390 | ||
|
391 | /* We've finished reading. Now flush the compressor stream. */ | |
|
392 | assert(input.pos == input.size); | |
|
415 | 393 | |
|
416 | Py_CLEAR(readResult); | |
|
417 | } | |
|
394 | while (1) { | |
|
395 | Py_BEGIN_ALLOW_THREADS zresult = | |
|
396 | ZSTD_compressStream2(self->cctx, &output, &input, ZSTD_e_end); | |
|
397 | Py_END_ALLOW_THREADS | |
|
418 | 398 | |
|
419 | /* We've finished reading. Now flush the compressor stream. */ | |
|
420 | assert(input.pos == input.size); | |
|
421 | ||
|
422 | while (1) { | |
|
423 | Py_BEGIN_ALLOW_THREADS | |
|
424 | zresult = ZSTD_compressStream2(self->cctx, &output, &input, ZSTD_e_end); | |
|
425 | Py_END_ALLOW_THREADS | |
|
399 | if (ZSTD_isError(zresult)) { | |
|
400 | PyErr_Format(ZstdError, "error ending compression stream: %s", | |
|
401 | ZSTD_getErrorName(zresult)); | |
|
402 | res = NULL; | |
|
403 | goto finally; | |
|
404 | } | |
|
426 | 405 | |
|
427 | if (ZSTD_isError(zresult)) { | |
|
428 | PyErr_Format(ZstdError, "error ending compression stream: %s", | |
|
429 | ZSTD_getErrorName(zresult)); | |
|
430 | res = NULL; | |
|
431 | goto finally; | |
|
432 | } | |
|
406 | if (output.pos) { | |
|
407 | writeResult = PyObject_CallMethod(dest, "write", "y#", output.dst, | |
|
408 | output.pos); | |
|
409 | if (NULL == writeResult) { | |
|
410 | res = NULL; | |
|
411 | goto finally; | |
|
412 | } | |
|
413 | totalWrite += output.pos; | |
|
414 | Py_XDECREF(writeResult); | |
|
415 | output.pos = 0; | |
|
416 | } | |
|
433 | 417 | |
|
434 | if (output.pos) { | |
|
435 | #if PY_MAJOR_VERSION >= 3 | |
|
436 | writeResult = PyObject_CallMethod(dest, "write", "y#", | |
|
437 | #else | |
|
438 | writeResult = PyObject_CallMethod(dest, "write", "s#", | |
|
439 | #endif | |
|
440 | output.dst, output.pos); | |
|
441 | totalWrite += output.pos; | |
|
442 | Py_XDECREF(writeResult); | |
|
443 | output.pos = 0; | |
|
444 | } | |
|
418 | if (!zresult) { | |
|
419 | break; | |
|
420 | } | |
|
421 | } | |
|
445 | 422 | |
|
446 | if (!zresult) { | |
|
447 | break; | |
|
448 | } | |
|
449 | } | |
|
450 | ||
|
451 | totalReadPy = PyLong_FromSsize_t(totalRead); | |
|
452 | totalWritePy = PyLong_FromSsize_t(totalWrite); | |
|
453 | res = PyTuple_Pack(2, totalReadPy, totalWritePy); | |
|
454 | Py_DECREF(totalReadPy); | |
|
455 | Py_DECREF(totalWritePy); | |
|
423 | totalReadPy = PyLong_FromSsize_t(totalRead); | |
|
424 | totalWritePy = PyLong_FromSsize_t(totalWrite); | |
|
425 | res = PyTuple_Pack(2, totalReadPy, totalWritePy); | |
|
426 | Py_DECREF(totalReadPy); | |
|
427 | Py_DECREF(totalWritePy); | |
|
456 | 428 | |
|
457 | 429 | finally: |
|
458 |
|
|
|
459 |
|
|
|
460 | } | |
|
430 | if (output.dst) { | |
|
431 | PyMem_Free(output.dst); | |
|
432 | } | |
|
461 | 433 | |
|
462 |
|
|
|
434 | Py_XDECREF(readResult); | |
|
463 | 435 | |
|
464 |
|
|
|
436 | return res; | |
|
465 | 437 | } |
|
466 | 438 | |
|
467 | PyDoc_STRVAR(ZstdCompressor_stream_reader__doc__, | |
|
468 | "stream_reader(source, [size=0])\n" | |
|
469 | "\n" | |
|
470 | "Obtain an object that behaves like an I/O stream.\n" | |
|
471 | "\n" | |
|
472 | "The source object can be any object with a ``read(size)`` method\n" | |
|
473 | "or an object that conforms to the buffer protocol.\n" | |
|
474 | ); | |
|
439 | static ZstdCompressionReader *ZstdCompressor_stream_reader(ZstdCompressor *self, | |
|
440 | PyObject *args, | |
|
441 | PyObject *kwargs) { | |
|
442 | static char *kwlist[] = {"source", "size", "read_size", "closefd", NULL}; | |
|
443 | ||
|
444 | PyObject *source; | |
|
445 | unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN; | |
|
446 | size_t readSize = ZSTD_CStreamInSize(); | |
|
447 | PyObject *closefd = NULL; | |
|
448 | ZstdCompressionReader *result = NULL; | |
|
449 | size_t zresult; | |
|
475 | 450 | |
|
476 | static ZstdCompressionReader* ZstdCompressor_stream_reader(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { | |
|
477 | static char* kwlist[] = { | |
|
478 | "source", | |
|
479 | "size", | |
|
480 | "read_size", | |
|
481 | NULL | |
|
482 | }; | |
|
451 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|KkO:stream_reader", | |
|
452 | kwlist, &source, &sourceSize, &readSize, | |
|
453 | &closefd)) { | |
|
454 | return NULL; | |
|
455 | } | |
|
483 | 456 | |
|
484 | PyObject* source; | |
|
485 | unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN; | |
|
486 | size_t readSize = ZSTD_CStreamInSize(); | |
|
487 | ZstdCompressionReader* result = NULL; | |
|
488 | size_t zresult; | |
|
457 | result = (ZstdCompressionReader *)PyObject_CallObject( | |
|
458 | (PyObject *)ZstdCompressionReaderType, NULL); | |
|
459 | if (!result) { | |
|
460 | return NULL; | |
|
461 | } | |
|
489 | 462 | |
|
490 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kk:stream_reader", kwlist, | |
|
491 | &source, &sourceSize, &readSize)) { | |
|
492 | return NULL; | |
|
493 | } | |
|
494 | ||
|
495 | result = (ZstdCompressionReader*)PyObject_CallObject((PyObject*)&ZstdCompressionReaderType, NULL); | |
|
496 | if (!result) { | |
|
497 | return NULL; | |
|
498 | } | |
|
463 | result->entered = 0; | |
|
464 | result->closed = 0; | |
|
499 | 465 | |
|
500 |
|
|
|
501 |
|
|
|
502 |
|
|
|
503 |
|
|
|
504 | } | |
|
505 |
|
|
|
506 |
|
|
|
507 |
|
|
|
508 | } | |
|
466 | if (PyObject_HasAttrString(source, "read")) { | |
|
467 | result->reader = source; | |
|
468 | Py_INCREF(source); | |
|
469 | result->readSize = readSize; | |
|
470 | } | |
|
471 | else if (1 == PyObject_CheckBuffer(source)) { | |
|
472 | if (0 != PyObject_GetBuffer(source, &result->buffer, PyBUF_CONTIG_RO)) { | |
|
473 | goto except; | |
|
474 | } | |
|
509 | 475 | |
|
510 |
|
|
|
476 | assert(result->buffer.len >= 0); | |
|
511 | 477 | |
|
512 |
|
|
|
513 | } | |
|
514 |
|
|
|
515 |
|
|
|
516 |
|
|
|
517 | goto except; | |
|
518 | } | |
|
478 | sourceSize = result->buffer.len; | |
|
479 | } | |
|
480 | else { | |
|
481 | PyErr_SetString(PyExc_TypeError, | |
|
482 | "must pass an object with a read() method or that " | |
|
483 | "conforms to the buffer protocol"); | |
|
484 | goto except; | |
|
485 | } | |
|
519 | 486 | |
|
520 | ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); | |
|
487 | result->closefd = closefd ? PyObject_IsTrue(closefd) : 1; | |
|
521 | 488 | |
|
522 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize); | |
|
523 | if (ZSTD_isError(zresult)) { | |
|
524 | PyErr_Format(ZstdError, "error setting source source: %s", | |
|
525 | ZSTD_getErrorName(zresult)); | |
|
526 | goto except; | |
|
527 | } | |
|
489 | ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); | |
|
528 | 490 | |
|
529 | result->compressor = self; | |
|
530 | Py_INCREF(self); | |
|
491 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize); | |
|
492 | if (ZSTD_isError(zresult)) { | |
|
493 | PyErr_Format(ZstdError, "error setting source source: %s", | |
|
494 | ZSTD_getErrorName(zresult)); | |
|
495 | goto except; | |
|
496 | } | |
|
531 | 497 | |
|
532 | return result; | |
|
498 | result->compressor = self; | |
|
499 | Py_INCREF(self); | |
|
500 | ||
|
501 | return result; | |
|
533 | 502 | |
|
534 | 503 | except: |
|
535 |
|
|
|
504 | Py_CLEAR(result); | |
|
536 | 505 | |
|
537 |
|
|
|
506 | return NULL; | |
|
538 | 507 | } |
|
539 | 508 | |
|
540 | PyDoc_STRVAR(ZstdCompressor_compress__doc__, | |
|
541 | "compress(data)\n" | |
|
542 | "\n" | |
|
543 | "Compress data in a single operation.\n" | |
|
544 | "\n" | |
|
545 | "This is the simplest mechanism to perform compression: simply pass in a\n" | |
|
546 | "value and get a compressed value back. It is almost the most prone to abuse.\n" | |
|
547 | "The input and output values must fit in memory, so passing in very large\n" | |
|
548 | "values can result in excessive memory usage. For this reason, one of the\n" | |
|
549 | "streaming based APIs is preferred for larger values.\n" | |
|
550 | ); | |
|
509 | static PyObject *ZstdCompressor_compress(ZstdCompressor *self, PyObject *args, | |
|
510 | PyObject *kwargs) { | |
|
511 | static char *kwlist[] = {"data", NULL}; | |
|
551 | 512 | |
|
552 | static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { | |
|
553 | static char* kwlist[] = { | |
|
554 | "data", | |
|
555 | NULL | |
|
556 | }; | |
|
513 | Py_buffer source; | |
|
514 | size_t destSize; | |
|
515 | PyObject *output = NULL; | |
|
516 | size_t zresult; | |
|
517 | ZSTD_outBuffer outBuffer; | |
|
518 | ZSTD_inBuffer inBuffer; | |
|
557 | 519 | |
|
558 | Py_buffer source; | |
|
559 | size_t destSize; | |
|
560 | PyObject* output = NULL; | |
|
561 | size_t zresult; | |
|
562 | ZSTD_outBuffer outBuffer; | |
|
563 | ZSTD_inBuffer inBuffer; | |
|
520 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|O:compress", kwlist, | |
|
521 | &source)) { | |
|
522 | return NULL; | |
|
523 | } | |
|
564 | 524 | |
|
565 | #if PY_MAJOR_VERSION >= 3 | |
|
566 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|O:compress", | |
|
567 | #else | |
|
568 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|O:compress", | |
|
569 | #endif | |
|
570 | kwlist, &source)) { | |
|
571 | return NULL; | |
|
572 | } | |
|
525 | ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); | |
|
573 | 526 | |
|
574 | if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { | |
|
575 | PyErr_SetString(PyExc_ValueError, | |
|
576 | "data buffer should be contiguous and have at most one dimension"); | |
|
577 |
|
|
|
578 | } | |
|
579 | ||
|
580 | ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); | |
|
527 | destSize = ZSTD_compressBound(source.len); | |
|
528 | output = PyBytes_FromStringAndSize(NULL, destSize); | |
|
529 | if (!output) { | |
|
530 | goto finally; | |
|
531 | } | |
|
581 | 532 | |
|
582 | destSize = ZSTD_compressBound(source.len); | |
|
583 | output = PyBytes_FromStringAndSize(NULL, destSize); | |
|
584 | if (!output) { | |
|
585 | goto finally; | |
|
586 | } | |
|
533 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, source.len); | |
|
534 | if (ZSTD_isError(zresult)) { | |
|
535 | PyErr_Format(ZstdError, "error setting source size: %s", | |
|
536 | ZSTD_getErrorName(zresult)); | |
|
537 | Py_CLEAR(output); | |
|
538 | goto finally; | |
|
539 | } | |
|
587 | 540 | |
|
588 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, source.len); | |
|
589 | if (ZSTD_isError(zresult)) { | |
|
590 | PyErr_Format(ZstdError, "error setting source size: %s", | |
|
591 | ZSTD_getErrorName(zresult)); | |
|
592 | Py_CLEAR(output); | |
|
593 | goto finally; | |
|
594 | } | |
|
541 | inBuffer.src = source.buf; | |
|
542 | inBuffer.size = source.len; | |
|
543 | inBuffer.pos = 0; | |
|
595 | 544 | |
|
596 | inBuffer.src = source.buf; | |
|
597 | inBuffer.size = source.len; | |
|
598 |
|
|
|
545 | outBuffer.dst = PyBytes_AsString(output); | |
|
546 | outBuffer.size = destSize; | |
|
547 | outBuffer.pos = 0; | |
|
599 | 548 | |
|
600 | outBuffer.dst = PyBytes_AsString(output); | |
|
601 | outBuffer.size = destSize; | |
|
602 | outBuffer.pos = 0; | |
|
603 | ||
|
604 | Py_BEGIN_ALLOW_THREADS | |
|
605 | /* By avoiding ZSTD_compress(), we don't necessarily write out content | |
|
606 | size. This means the argument to ZstdCompressor to control frame | |
|
607 | parameters is honored. */ | |
|
608 | zresult = ZSTD_compressStream2(self->cctx, &outBuffer, &inBuffer, ZSTD_e_end); | |
|
609 | Py_END_ALLOW_THREADS | |
|
549 | Py_BEGIN_ALLOW_THREADS | |
|
550 | /* By avoiding ZSTD_compress(), we don't necessarily write out content | |
|
551 | size. This means the argument to ZstdCompressor to control frame | |
|
552 | parameters is honored. */ | |
|
553 | zresult = | |
|
554 | ZSTD_compressStream2(self->cctx, &outBuffer, &inBuffer, ZSTD_e_end); | |
|
555 | Py_END_ALLOW_THREADS | |
|
610 | 556 | |
|
611 |
|
|
|
612 |
|
|
|
613 | Py_CLEAR(output); | |
|
614 | goto finally; | |
|
615 | } | |
|
616 | else if (zresult) { | |
|
617 | PyErr_SetString(ZstdError, "unexpected partial frame flush"); | |
|
618 | Py_CLEAR(output); | |
|
619 | goto finally; | |
|
620 | } | |
|
557 | if (ZSTD_isError(zresult)) { | |
|
558 | PyErr_Format(ZstdError, "cannot compress: %s", | |
|
559 | ZSTD_getErrorName(zresult)); | |
|
560 | Py_CLEAR(output); | |
|
561 | goto finally; | |
|
562 | } | |
|
563 | else if (zresult) { | |
|
564 | PyErr_SetString(ZstdError, "unexpected partial frame flush"); | |
|
565 | Py_CLEAR(output); | |
|
566 | goto finally; | |
|
567 | } | |
|
621 | 568 | |
|
622 |
|
|
|
569 | Py_SET_SIZE(output, outBuffer.pos); | |
|
623 | 570 | |
|
624 | 571 | finally: |
|
625 |
|
|
|
626 |
|
|
|
572 | PyBuffer_Release(&source); | |
|
573 | return output; | |
|
627 | 574 | } |
|
628 | 575 | |
|
629 | PyDoc_STRVAR(ZstdCompressionObj__doc__, | |
|
630 | "compressobj()\n" | |
|
631 | "\n" | |
|
632 | "Return an object exposing ``compress(data)`` and ``flush()`` methods.\n" | |
|
633 | "\n" | |
|
634 | "The returned object exposes an API similar to ``zlib.compressobj`` and\n" | |
|
635 | "``bz2.BZ2Compressor`` so that callers can swap in the zstd compressor\n" | |
|
636 | "without changing how compression is performed.\n" | |
|
637 | ); | |
|
576 | static ZstdCompressionObj *ZstdCompressor_compressobj(ZstdCompressor *self, | |
|
577 | PyObject *args, | |
|
578 | PyObject *kwargs) { | |
|
579 | static char *kwlist[] = {"size", NULL}; | |
|
638 | 580 | |
|
639 | static ZstdCompressionObj* ZstdCompressor_compressobj(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { | |
|
640 | static char* kwlist[] = { | |
|
641 | "size", | |
|
642 | NULL | |
|
643 | }; | |
|
581 | unsigned long long inSize = ZSTD_CONTENTSIZE_UNKNOWN; | |
|
582 | size_t outSize = ZSTD_CStreamOutSize(); | |
|
583 | ZstdCompressionObj *result = NULL; | |
|
584 | size_t zresult; | |
|
644 | 585 | |
|
645 | unsigned long long inSize = ZSTD_CONTENTSIZE_UNKNOWN; | |
|
646 | size_t outSize = ZSTD_CStreamOutSize(); | |
|
647 | ZstdCompressionObj* result = NULL; | |
|
648 | size_t zresult; | |
|
586 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|K:compressobj", kwlist, | |
|
587 | &inSize)) { | |
|
588 | return NULL; | |
|
589 | } | |
|
649 | 590 | |
|
650 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|K:compressobj", kwlist, &inSize)) { | |
|
651 | return NULL; | |
|
652 | } | |
|
591 | ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); | |
|
653 | 592 | |
|
654 | ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); | |
|
655 | ||
|
656 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, inSize); | |
|
657 | if (ZSTD_isError(zresult)) { | |
|
658 | PyErr_Format(ZstdError, "error setting source size: %s", | |
|
659 | ZSTD_getErrorName(zresult)); | |
|
660 | return NULL; | |
|
661 | } | |
|
593 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, inSize); | |
|
594 | if (ZSTD_isError(zresult)) { | |
|
595 | PyErr_Format(ZstdError, "error setting source size: %s", | |
|
596 | ZSTD_getErrorName(zresult)); | |
|
597 | return NULL; | |
|
598 | } | |
|
662 | 599 | |
|
663 |
|
|
|
664 | if (!result) { | |
|
665 | return NULL; | |
|
666 | } | |
|
600 | result = (ZstdCompressionObj *)PyObject_CallObject( | |
|
601 | (PyObject *)ZstdCompressionObjType, NULL); | |
|
602 | if (!result) { | |
|
603 | return NULL; | |
|
604 | } | |
|
667 | 605 | |
|
668 |
|
|
|
669 |
|
|
|
670 |
|
|
|
671 |
|
|
|
672 |
|
|
|
673 | } | |
|
674 |
|
|
|
675 |
|
|
|
676 |
|
|
|
606 | result->output.dst = PyMem_Malloc(outSize); | |
|
607 | if (!result->output.dst) { | |
|
608 | PyErr_NoMemory(); | |
|
609 | Py_DECREF(result); | |
|
610 | return NULL; | |
|
611 | } | |
|
612 | result->output.size = outSize; | |
|
613 | result->compressor = self; | |
|
614 | Py_INCREF(result->compressor); | |
|
677 | 615 | |
|
678 |
|
|
|
616 | return result; | |
|
679 | 617 | } |
|
680 | 618 | |
|
681 | PyDoc_STRVAR(ZstdCompressor_read_to_iter__doc__, | |
|
682 | "read_to_iter(reader, [size=0, read_size=default, write_size=default])\n" | |
|
683 | "Read uncompressed data from a reader and return an iterator\n" | |
|
684 | "\n" | |
|
685 | "Returns an iterator of compressed data produced from reading from ``reader``.\n" | |
|
686 | "\n" | |
|
687 | "Uncompressed data will be obtained from ``reader`` by calling the\n" | |
|
688 | "``read(size)`` method of it. The source data will be streamed into a\n" | |
|
689 | "compressor. As compressed data is available, it will be exposed to the\n" | |
|
690 | "iterator.\n" | |
|
691 | "\n" | |
|
692 | "Data is read from the source in chunks of ``read_size``. Compressed chunks\n" | |
|
693 | "are at most ``write_size`` bytes. Both values default to the zstd input and\n" | |
|
694 | "and output defaults, respectively.\n" | |
|
695 | "\n" | |
|
696 | "The caller is partially in control of how fast data is fed into the\n" | |
|
697 | "compressor by how it consumes the returned iterator. The compressor will\n" | |
|
698 | "not consume from the reader unless the caller consumes from the iterator.\n" | |
|
699 | ); | |
|
619 | static ZstdCompressorIterator *ZstdCompressor_read_to_iter(ZstdCompressor *self, | |
|
620 | PyObject *args, | |
|
621 | PyObject *kwargs) { | |
|
622 | static char *kwlist[] = {"reader", "size", "read_size", "write_size", NULL}; | |
|
623 | ||
|
624 | PyObject *reader; | |
|
625 | unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN; | |
|
626 | size_t inSize = ZSTD_CStreamInSize(); | |
|
627 | size_t outSize = ZSTD_CStreamOutSize(); | |
|
628 | ZstdCompressorIterator *result; | |
|
629 | size_t zresult; | |
|
700 | 630 | |
|
701 | static ZstdCompressorIterator* ZstdCompressor_read_to_iter(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { | |
|
702 | static char* kwlist[] = { | |
|
703 | "reader", | |
|
704 | "size", | |
|
705 | "read_size", | |
|
706 | "write_size", | |
|
707 | NULL | |
|
708 | }; | |
|
631 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kkk:read_to_iter", kwlist, | |
|
632 | &reader, &sourceSize, &inSize, &outSize)) { | |
|
633 | return NULL; | |
|
634 | } | |
|
709 | 635 | |
|
710 | PyObject* reader; | |
|
711 | unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN; | |
|
712 | size_t inSize = ZSTD_CStreamInSize(); | |
|
713 | size_t outSize = ZSTD_CStreamOutSize(); | |
|
714 | ZstdCompressorIterator* result; | |
|
715 | size_t zresult; | |
|
716 | ||
|
717 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kkk:read_to_iter", kwlist, | |
|
718 | &reader, &sourceSize, &inSize, &outSize)) { | |
|
719 | return NULL; | |
|
720 | } | |
|
636 | result = (ZstdCompressorIterator *)PyObject_CallObject( | |
|
637 | (PyObject *)ZstdCompressorIteratorType, NULL); | |
|
638 | if (!result) { | |
|
639 | return NULL; | |
|
640 | } | |
|
641 | if (PyObject_HasAttrString(reader, "read")) { | |
|
642 | result->reader = reader; | |
|
643 | Py_INCREF(result->reader); | |
|
644 | } | |
|
645 | else if (1 == PyObject_CheckBuffer(reader)) { | |
|
646 | if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) { | |
|
647 | goto except; | |
|
648 | } | |
|
721 | 649 | |
|
722 | result = (ZstdCompressorIterator*)PyObject_CallObject((PyObject*)&ZstdCompressorIteratorType, NULL); | |
|
723 | if (!result) { | |
|
724 | return NULL; | |
|
725 | } | |
|
726 | if (PyObject_HasAttrString(reader, "read")) { | |
|
727 | result->reader = reader; | |
|
728 | Py_INCREF(result->reader); | |
|
729 | } | |
|
730 | else if (1 == PyObject_CheckBuffer(reader)) { | |
|
731 | if (0 != PyObject_GetBuffer(reader, &result->buffer, PyBUF_CONTIG_RO)) { | |
|
732 | goto except; | |
|
733 | } | |
|
650 | sourceSize = result->buffer.len; | |
|
651 | } | |
|
652 | else { | |
|
653 | PyErr_SetString(PyExc_ValueError, | |
|
654 | "must pass an object with a read() method or conforms " | |
|
655 | "to buffer protocol"); | |
|
656 | goto except; | |
|
657 | } | |
|
734 | 658 | |
|
735 | sourceSize = result->buffer.len; | |
|
736 | } | |
|
737 | else { | |
|
738 | PyErr_SetString(PyExc_ValueError, | |
|
739 | "must pass an object with a read() method or conforms to buffer protocol"); | |
|
740 | goto except; | |
|
741 | } | |
|
659 | ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); | |
|
742 | 660 | |
|
743 | ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); | |
|
661 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize); | |
|
662 | if (ZSTD_isError(zresult)) { | |
|
663 | PyErr_Format(ZstdError, "error setting source size: %s", | |
|
664 | ZSTD_getErrorName(zresult)); | |
|
665 | return NULL; | |
|
666 | } | |
|
744 | 667 | |
|
745 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize); | |
|
746 | if (ZSTD_isError(zresult)) { | |
|
747 | PyErr_Format(ZstdError, "error setting source size: %s", | |
|
748 | ZSTD_getErrorName(zresult)); | |
|
749 | return NULL; | |
|
750 | } | |
|
751 | ||
|
752 | result->compressor = self; | |
|
753 | Py_INCREF(result->compressor); | |
|
668 | result->compressor = self; | |
|
669 | Py_INCREF(result->compressor); | |
|
754 | 670 | |
|
755 |
|
|
|
756 |
|
|
|
671 | result->inSize = inSize; | |
|
672 | result->outSize = outSize; | |
|
757 | 673 | |
|
758 |
|
|
|
759 |
|
|
|
760 |
|
|
|
761 |
|
|
|
762 | } | |
|
763 |
|
|
|
674 | result->output.dst = PyMem_Malloc(outSize); | |
|
675 | if (!result->output.dst) { | |
|
676 | PyErr_NoMemory(); | |
|
677 | goto except; | |
|
678 | } | |
|
679 | result->output.size = outSize; | |
|
764 | 680 | |
|
765 |
|
|
|
681 | goto finally; | |
|
766 | 682 | |
|
767 | 683 | except: |
|
768 |
|
|
|
684 | Py_CLEAR(result); | |
|
769 | 685 | |
|
770 | 686 | finally: |
|
771 |
|
|
|
687 | return result; | |
|
772 | 688 | } |
|
773 | 689 | |
|
774 | PyDoc_STRVAR(ZstdCompressor_stream_writer___doc__, | |
|
775 | "Create a context manager to write compressed data to an object.\n" | |
|
776 | "\n" | |
|
777 | "The passed object must have a ``write()`` method.\n" | |
|
778 | "\n" | |
|
779 | "The caller feeds input data to the object by calling ``compress(data)``.\n" | |
|
780 | "Compressed data is written to the argument given to this function.\n" | |
|
781 | "\n" | |
|
782 | "The function takes an optional ``size`` argument indicating the total size\n" | |
|
783 | "of the eventual input. If specified, the size will influence compression\n" | |
|
784 | "parameter tuning and could result in the size being written into the\n" | |
|
785 | "header of the compressed data.\n" | |
|
786 | "\n" | |
|
787 | "An optional ``write_size`` argument is also accepted. It defines the maximum\n" | |
|
788 | "byte size of chunks fed to ``write()``. By default, it uses the zstd default\n" | |
|
789 | "for a compressor output stream.\n" | |
|
790 | ); | |
|
690 | static ZstdCompressionWriter *ZstdCompressor_stream_writer(ZstdCompressor *self, | |
|
691 | PyObject *args, | |
|
692 | PyObject *kwargs) { | |
|
693 | static char *kwlist[] = { | |
|
694 | "writer", "size", "write_size", "write_return_read", "closefd", NULL}; | |
|
695 | ||
|
696 | PyObject *writer; | |
|
697 | ZstdCompressionWriter *result; | |
|
698 | size_t zresult; | |
|
699 | unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN; | |
|
700 | size_t outSize = ZSTD_CStreamOutSize(); | |
|
701 | PyObject *writeReturnRead = NULL; | |
|
702 | PyObject *closefd = NULL; | |
|
791 | 703 | |
|
792 | static ZstdCompressionWriter* ZstdCompressor_stream_writer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { | |
|
793 | static char* kwlist[] = { | |
|
794 | "writer", | |
|
795 | "size", | |
|
796 | "write_size", | |
|
797 | "write_return_read", | |
|
798 | NULL | |
|
799 | }; | |
|
704 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|KkOO:stream_writer", | |
|
705 | kwlist, &writer, &sourceSize, &outSize, | |
|
706 | &writeReturnRead, &closefd)) { | |
|
707 | return NULL; | |
|
708 | } | |
|
800 | 709 | |
|
801 | PyObject* writer; | |
|
802 | ZstdCompressionWriter* result; | |
|
803 | size_t zresult; | |
|
804 | unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN; | |
|
805 | size_t outSize = ZSTD_CStreamOutSize(); | |
|
806 | PyObject* writeReturnRead = NULL; | |
|
710 | if (!PyObject_HasAttrString(writer, "write")) { | |
|
711 | PyErr_SetString(PyExc_ValueError, | |
|
712 | "must pass an object with a write() method"); | |
|
713 | return NULL; | |
|
714 | } | |
|
807 | 715 | |
|
808 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|KkO:stream_writer", kwlist, | |
|
809 | &writer, &sourceSize, &outSize, &writeReturnRead)) { | |
|
810 | return NULL; | |
|
811 | } | |
|
716 | ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); | |
|
812 | 717 | |
|
813 | if (!PyObject_HasAttrString(writer, "write")) { | |
|
814 | PyErr_SetString(PyExc_ValueError, "must pass an object with a write() method"); | |
|
815 | return NULL; | |
|
816 | } | |
|
817 | ||
|
818 | ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); | |
|
718 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize); | |
|
719 | if (ZSTD_isError(zresult)) { | |
|
720 | PyErr_Format(ZstdError, "error setting source size: %s", | |
|
721 | ZSTD_getErrorName(zresult)); | |
|
722 | return NULL; | |
|
723 | } | |
|
819 | 724 | |
|
820 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize); | |
|
821 | if (ZSTD_isError(zresult)) { | |
|
822 | PyErr_Format(ZstdError, "error setting source size: %s", | |
|
823 | ZSTD_getErrorName(zresult)); | |
|
824 | return NULL; | |
|
825 | } | |
|
725 | result = (ZstdCompressionWriter *)PyObject_CallObject( | |
|
726 | (PyObject *)ZstdCompressionWriterType, NULL); | |
|
727 | if (!result) { | |
|
728 | return NULL; | |
|
729 | } | |
|
826 | 730 | |
|
827 | result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL); | |
|
828 | if (!result) { | |
|
829 | return NULL; | |
|
830 | } | |
|
731 | result->entered = 0; | |
|
732 | result->closing = 0; | |
|
733 | result->closed = 0; | |
|
831 | 734 | |
|
832 |
|
|
|
833 |
|
|
|
834 |
|
|
|
835 |
|
|
|
836 | } | |
|
735 | result->output.dst = PyMem_Malloc(outSize); | |
|
736 | if (!result->output.dst) { | |
|
737 | Py_DECREF(result); | |
|
738 | return (ZstdCompressionWriter *)PyErr_NoMemory(); | |
|
739 | } | |
|
837 | 740 | |
|
838 |
|
|
|
839 |
|
|
|
741 | result->output.pos = 0; | |
|
742 | result->output.size = outSize; | |
|
840 | 743 | |
|
841 |
|
|
|
842 |
|
|
|
744 | result->compressor = self; | |
|
745 | Py_INCREF(result->compressor); | |
|
843 | 746 | |
|
844 |
|
|
|
845 |
|
|
|
747 | result->writer = writer; | |
|
748 | Py_INCREF(result->writer); | |
|
846 | 749 | |
|
847 |
|
|
|
848 |
|
|
|
849 | result->writeReturnRead = writeReturnRead ? PyObject_IsTrue(writeReturnRead) : 0; | |
|
750 | result->outSize = outSize; | |
|
751 | result->bytesCompressed = 0; | |
|
752 | result->writeReturnRead = | |
|
753 | writeReturnRead ? PyObject_IsTrue(writeReturnRead) : 1; | |
|
754 | result->closefd = closefd ? PyObject_IsTrue(closefd) : 1; | |
|
850 | 755 | |
|
851 |
|
|
|
756 | return result; | |
|
852 | 757 | } |
|
853 | 758 | |
|
854 | PyDoc_STRVAR(ZstdCompressor_chunker__doc__, | |
|
855 | "Create an object for iterative compressing to same-sized chunks.\n" | |
|
856 | ); | |
|
857 | ||
|
858 | static ZstdCompressionChunker* ZstdCompressor_chunker(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { | |
|
859 | static char* kwlist[] = { | |
|
860 | "size", | |
|
861 | "chunk_size", | |
|
862 | NULL | |
|
863 | }; | |
|
759 | static ZstdCompressionChunker * | |
|
760 | ZstdCompressor_chunker(ZstdCompressor *self, PyObject *args, PyObject *kwargs) { | |
|
761 | static char *kwlist[] = {"size", "chunk_size", NULL}; | |
|
864 | 762 | |
|
865 |
|
|
|
866 |
|
|
|
867 |
|
|
|
868 |
|
|
|
763 | unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN; | |
|
764 | size_t chunkSize = ZSTD_CStreamOutSize(); | |
|
765 | ZstdCompressionChunker *chunker; | |
|
766 | size_t zresult; | |
|
869 | 767 | |
|
870 |
|
|
|
871 | &sourceSize, &chunkSize)) { | |
|
872 |
|
|
|
873 | } | |
|
768 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|Kk:chunker", kwlist, | |
|
769 | &sourceSize, &chunkSize)) { | |
|
770 | return NULL; | |
|
771 | } | |
|
874 | 772 | |
|
875 |
|
|
|
773 | ZSTD_CCtx_reset(self->cctx, ZSTD_reset_session_only); | |
|
876 | 774 | |
|
877 |
|
|
|
878 |
|
|
|
879 |
|
|
|
880 |
|
|
|
881 |
|
|
|
882 | } | |
|
775 | zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize); | |
|
776 | if (ZSTD_isError(zresult)) { | |
|
777 | PyErr_Format(ZstdError, "error setting source size: %s", | |
|
778 | ZSTD_getErrorName(zresult)); | |
|
779 | return NULL; | |
|
780 | } | |
|
883 | 781 | |
|
884 |
|
|
|
885 | if (!chunker) { | |
|
886 | return NULL; | |
|
887 | } | |
|
782 | chunker = (ZstdCompressionChunker *)PyObject_CallObject( | |
|
783 | (PyObject *)ZstdCompressionChunkerType, NULL); | |
|
784 | if (!chunker) { | |
|
785 | return NULL; | |
|
786 | } | |
|
888 | 787 | |
|
889 |
|
|
|
890 |
|
|
|
891 |
|
|
|
892 |
|
|
|
893 |
|
|
|
894 | } | |
|
895 |
|
|
|
896 |
|
|
|
788 | chunker->output.dst = PyMem_Malloc(chunkSize); | |
|
789 | if (!chunker->output.dst) { | |
|
790 | PyErr_NoMemory(); | |
|
791 | Py_DECREF(chunker); | |
|
792 | return NULL; | |
|
793 | } | |
|
794 | chunker->output.size = chunkSize; | |
|
795 | chunker->output.pos = 0; | |
|
897 | 796 | |
|
898 |
|
|
|
899 |
|
|
|
797 | chunker->compressor = self; | |
|
798 | Py_INCREF(chunker->compressor); | |
|
900 | 799 | |
|
901 |
|
|
|
800 | chunker->chunkSize = chunkSize; | |
|
902 | 801 | |
|
903 |
|
|
|
802 | return chunker; | |
|
904 | 803 | } |
|
905 | 804 | |
|
906 | 805 | typedef struct { |
|
907 |
|
|
|
908 |
|
|
|
806 | void *sourceData; | |
|
807 | size_t sourceSize; | |
|
909 | 808 | } DataSource; |
|
910 | 809 | |
|
911 | 810 | typedef struct { |
|
912 |
|
|
|
913 |
|
|
|
914 |
|
|
|
811 | DataSource *sources; | |
|
812 | Py_ssize_t sourcesSize; | |
|
813 | unsigned long long totalSourceSize; | |
|
915 | 814 | } DataSources; |
|
916 | 815 | |
|
917 | 816 | typedef struct { |
|
918 |
|
|
|
919 |
|
|
|
920 |
|
|
|
921 |
|
|
|
922 | } DestBuffer; | |
|
817 | void *dest; | |
|
818 | Py_ssize_t destSize; | |
|
819 | BufferSegment *segments; | |
|
820 | Py_ssize_t segmentsSize; | |
|
821 | } CompressorDestBuffer; | |
|
923 | 822 | |
|
924 | 823 | typedef enum { |
|
925 |
|
|
|
926 |
|
|
|
927 |
|
|
|
928 |
|
|
|
929 | } WorkerError; | |
|
824 | CompressorWorkerError_none = 0, | |
|
825 | CompressorWorkerError_zstd = 1, | |
|
826 | CompressorWorkerError_no_memory = 2, | |
|
827 | CompressorWorkerError_nospace = 3, | |
|
828 | } CompressorWorkerError; | |
|
930 | 829 | |
|
931 | 830 | /** |
|
932 |
* Holds state for an individual worker performing multi_compress_to_buffer |
|
|
831 | * Holds state for an individual worker performing multi_compress_to_buffer | |
|
832 | * work. | |
|
933 | 833 | */ |
|
934 | 834 | typedef struct { |
|
935 |
|
|
|
936 |
|
|
|
835 | /* Used for compression. */ | |
|
836 | ZSTD_CCtx *cctx; | |
|
937 | 837 | |
|
938 |
|
|
|
939 |
|
|
|
940 |
|
|
|
941 |
|
|
|
942 |
|
|
|
943 |
|
|
|
838 | /* What to compress. */ | |
|
839 | DataSource *sources; | |
|
840 | Py_ssize_t sourcesSize; | |
|
841 | Py_ssize_t startOffset; | |
|
842 | Py_ssize_t endOffset; | |
|
843 | unsigned long long totalSourceSize; | |
|
944 | 844 | |
|
945 |
|
|
|
946 |
|
|
|
947 |
|
|
|
845 | /* Result storage. */ | |
|
846 | CompressorDestBuffer *destBuffers; | |
|
847 | Py_ssize_t destCount; | |
|
948 | 848 | |
|
949 |
|
|
|
950 |
|
|
|
951 |
|
|
|
952 |
|
|
|
953 | } WorkerState; | |
|
849 | /* Error tracking. */ | |
|
850 | CompressorWorkerError error; | |
|
851 | size_t zresult; | |
|
852 | Py_ssize_t errorOffset; | |
|
853 | } CompressorWorkerState; | |
|
954 | 854 | |
|
955 | static void compress_worker(WorkerState* state) { | |
|
956 | Py_ssize_t inputOffset = state->startOffset; | |
|
957 |
|
|
|
958 |
|
|
|
959 | size_t zresult; | |
|
960 | void* newDest; | |
|
961 | size_t allocationSize; | |
|
962 |
|
|
|
963 | Py_ssize_t destOffset = 0; | |
|
964 | DataSource* sources = state->sources; | |
|
965 | DestBuffer* destBuffer; | |
|
855 | #ifdef HAVE_ZSTD_POOL_APIS | |
|
856 | static void compress_worker(CompressorWorkerState *state) { | |
|
857 | Py_ssize_t inputOffset = state->startOffset; | |
|
858 | Py_ssize_t remainingItems = state->endOffset - state->startOffset + 1; | |
|
859 | Py_ssize_t currentBufferStartOffset = state->startOffset; | |
|
860 | size_t zresult; | |
|
861 | void *newDest; | |
|
862 | size_t allocationSize; | |
|
863 | size_t boundSize; | |
|
864 | Py_ssize_t destOffset = 0; | |
|
865 | DataSource *sources = state->sources; | |
|
866 | CompressorDestBuffer *destBuffer; | |
|
966 | 867 | |
|
967 |
|
|
|
968 |
|
|
|
868 | assert(!state->destBuffers); | |
|
869 | assert(0 == state->destCount); | |
|
969 | 870 | |
|
970 | /* | |
|
971 |
|
|
|
972 |
|
|
|
973 | * | |
|
974 |
|
|
|
975 |
|
|
|
976 | * | |
|
977 |
|
|
|
978 |
|
|
|
979 |
|
|
|
980 |
|
|
|
981 |
|
|
|
982 | * | |
|
983 |
|
|
|
984 |
|
|
|
985 |
|
|
|
986 |
|
|
|
987 |
|
|
|
988 | */ | |
|
871 | /* | |
|
872 | * The total size of the compressed data is unknown until we actually | |
|
873 | * compress data. That means we can't pre-allocate the exact size we need. | |
|
874 | * | |
|
875 | * There is a cost to every allocation and reallocation. So, it is in our | |
|
876 | * interest to minimize the number of allocations. | |
|
877 | * | |
|
878 | * There is also a cost to too few allocations. If allocations are too | |
|
879 | * large they may fail. If buffers are shared and all inputs become | |
|
880 | * irrelevant at different lifetimes, then a reference to one segment | |
|
881 | * in the buffer will keep the entire buffer alive. This leads to excessive | |
|
882 | * memory usage. | |
|
883 | * | |
|
884 | * Our current strategy is to assume a compression ratio of 16:1 and | |
|
885 | * allocate buffers of that size, rounded up to the nearest power of 2 | |
|
886 | * (because computers like round numbers). That ratio is greater than what | |
|
887 | * most inputs achieve. This is by design: we don't want to over-allocate. | |
|
888 | * But we don't want to under-allocate and lead to too many buffers either. | |
|
889 | */ | |
|
989 | 890 | |
|
990 |
|
|
|
891 | state->destCount = 1; | |
|
991 | 892 | |
|
992 |
|
|
|
993 |
|
|
|
994 |
|
|
|
995 | return; | |
|
996 | } | |
|
893 | state->destBuffers = calloc(1, sizeof(CompressorDestBuffer)); | |
|
894 | if (NULL == state->destBuffers) { | |
|
895 | state->error = CompressorWorkerError_no_memory; | |
|
896 | return; | |
|
897 | } | |
|
997 | 898 | |
|
998 |
|
|
|
899 | destBuffer = &state->destBuffers[state->destCount - 1]; | |
|
999 | 900 | |
|
1000 | /* | |
|
1001 |
|
|
|
1002 |
|
|
|
1003 | */ | |
|
1004 |
|
|
|
1005 |
|
|
|
1006 |
|
|
|
1007 | return; | |
|
1008 | } | |
|
901 | /* | |
|
902 | * Rather than track bounds and grow the segments buffer, allocate space | |
|
903 | * to hold remaining items then truncate when we're done with it. | |
|
904 | */ | |
|
905 | destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment)); | |
|
906 | if (NULL == destBuffer->segments) { | |
|
907 | state->error = CompressorWorkerError_no_memory; | |
|
908 | return; | |
|
909 | } | |
|
1009 | 910 | |
|
1010 |
|
|
|
911 | destBuffer->segmentsSize = remainingItems; | |
|
1011 | 912 | |
|
1012 |
|
|
|
1013 |
|
|
|
913 | assert(state->totalSourceSize <= SIZE_MAX); | |
|
914 | allocationSize = roundpow2((size_t)state->totalSourceSize >> 4); | |
|
1014 | 915 | |
|
1015 |
|
|
|
1016 |
|
|
|
916 | /* If the maximum size of the output is larger than that, round up. */ | |
|
917 | boundSize = ZSTD_compressBound(sources[inputOffset].sourceSize); | |
|
1017 | 918 | |
|
1018 |
|
|
|
1019 |
|
|
|
1020 | } | |
|
919 | if (boundSize > allocationSize) { | |
|
920 | allocationSize = roundpow2(boundSize); | |
|
921 | } | |
|
1021 | 922 | |
|
1022 |
|
|
|
1023 |
|
|
|
1024 |
|
|
|
1025 | return; | |
|
1026 | } | |
|
923 | destBuffer->dest = malloc(allocationSize); | |
|
924 | if (NULL == destBuffer->dest) { | |
|
925 | state->error = CompressorWorkerError_no_memory; | |
|
926 | return; | |
|
927 | } | |
|
928 | ||
|
929 | destBuffer->destSize = allocationSize; | |
|
1027 | 930 | |
|
1028 | destBuffer->destSize = allocationSize; | |
|
931 | for (inputOffset = state->startOffset; inputOffset <= state->endOffset; | |
|
932 | inputOffset++) { | |
|
933 | void *source = sources[inputOffset].sourceData; | |
|
934 | size_t sourceSize = sources[inputOffset].sourceSize; | |
|
935 | size_t destAvailable; | |
|
936 | void *dest; | |
|
937 | ZSTD_outBuffer opOutBuffer; | |
|
938 | ZSTD_inBuffer opInBuffer; | |
|
1029 | 939 | |
|
1030 | for (inputOffset = state->startOffset; inputOffset <= state->endOffset; inputOffset++) { | |
|
1031 | void* source = sources[inputOffset].sourceData; | |
|
1032 | size_t sourceSize = sources[inputOffset].sourceSize; | |
|
1033 | size_t destAvailable; | |
|
1034 | void* dest; | |
|
1035 | ZSTD_outBuffer opOutBuffer; | |
|
1036 | ZSTD_inBuffer opInBuffer; | |
|
1037 | ||
|
1038 | destAvailable = destBuffer->destSize - destOffset; | |
|
1039 | boundSize = ZSTD_compressBound(sourceSize); | |
|
940 | destAvailable = destBuffer->destSize - destOffset; | |
|
941 | boundSize = ZSTD_compressBound(sourceSize); | |
|
1040 | 942 | |
|
1041 | /* | |
|
1042 |
|
|
|
1043 |
|
|
|
1044 | */ | |
|
1045 |
|
|
|
1046 | /* | |
|
1047 |
|
|
|
1048 |
|
|
|
1049 | */ | |
|
1050 |
|
|
|
1051 |
|
|
|
1052 |
|
|
|
1053 |
|
|
|
1054 | return; | |
|
1055 | } | |
|
943 | /* | |
|
944 | * Not enough space in current buffer to hold largest compressed output. | |
|
945 | * So allocate and switch to a new output buffer. | |
|
946 | */ | |
|
947 | if (boundSize > destAvailable) { | |
|
948 | /* | |
|
949 | * The downsizing of the existing buffer is optional. It should be | |
|
950 | * cheap (unlike growing). So we just do it. | |
|
951 | */ | |
|
952 | if (destAvailable) { | |
|
953 | newDest = realloc(destBuffer->dest, destOffset); | |
|
954 | if (NULL == newDest) { | |
|
955 | state->error = CompressorWorkerError_no_memory; | |
|
956 | return; | |
|
957 | } | |
|
1056 | 958 | |
|
1057 |
|
|
|
1058 |
|
|
|
1059 | } | |
|
959 | destBuffer->dest = newDest; | |
|
960 | destBuffer->destSize = destOffset; | |
|
961 | } | |
|
1060 | 962 | |
|
1061 |
|
|
|
1062 |
|
|
|
1063 |
|
|
|
1064 | if (NULL == newDest) { | |
|
1065 | state->error = WorkerError_no_memory; | |
|
1066 | return; | |
|
1067 | } | |
|
963 | /* Truncate segments buffer. */ | |
|
964 | newDest = realloc(destBuffer->segments, | |
|
965 | (inputOffset - currentBufferStartOffset + 1) * | |
|
966 | sizeof(BufferSegment)); | |
|
967 | if (NULL == newDest) { | |
|
968 | state->error = CompressorWorkerError_no_memory; | |
|
969 | return; | |
|
970 | } | |
|
1068 | 971 | |
|
1069 |
|
|
|
1070 |
|
|
|
972 | destBuffer->segments = newDest; | |
|
973 | destBuffer->segmentsSize = inputOffset - currentBufferStartOffset; | |
|
1071 | 974 | |
|
1072 |
|
|
|
1073 |
|
|
|
1074 | newDest = realloc(state->destBuffers, (state->destCount + 1) * sizeof(DestBuffer)); | |
|
1075 | if (NULL == newDest) { | |
|
1076 | state->error = WorkerError_no_memory; | |
|
1077 | return; | |
|
1078 | } | |
|
975 | /* Grow space for new struct. */ | |
|
976 | /* TODO consider over-allocating so we don't do this every time. */ | |
|
977 | newDest = | |
|
978 | realloc(state->destBuffers, | |
|
979 | (state->destCount + 1) * sizeof(CompressorDestBuffer)); | |
|
980 | if (NULL == newDest) { | |
|
981 | state->error = CompressorWorkerError_no_memory; | |
|
982 | return; | |
|
983 | } | |
|
1079 | 984 | |
|
1080 |
|
|
|
1081 |
|
|
|
985 | state->destBuffers = newDest; | |
|
986 | state->destCount++; | |
|
1082 | 987 | |
|
1083 |
|
|
|
988 | destBuffer = &state->destBuffers[state->destCount - 1]; | |
|
1084 | 989 | |
|
1085 |
|
|
|
1086 |
|
|
|
990 | /* Don't take any chances with non-NULL pointers. */ | |
|
991 | memset(destBuffer, 0, sizeof(CompressorDestBuffer)); | |
|
1087 | 992 | |
|
1088 | /** | |
|
1089 |
|
|
|
1090 |
|
|
|
1091 | */ | |
|
1092 |
|
|
|
1093 |
|
|
|
993 | /** | |
|
994 | * We could dynamically update allocation size based on work done so | |
|
995 | * far. For now, keep is simple. | |
|
996 | */ | |
|
997 | assert(state->totalSourceSize <= SIZE_MAX); | |
|
998 | allocationSize = roundpow2((size_t)state->totalSourceSize >> 4); | |
|
1094 | 999 | |
|
1095 |
|
|
|
1096 |
|
|
|
1097 | } | |
|
1000 | if (boundSize > allocationSize) { | |
|
1001 | allocationSize = roundpow2(boundSize); | |
|
1002 | } | |
|
1098 | 1003 | |
|
1099 |
|
|
|
1100 |
|
|
|
1101 |
|
|
|
1102 | return; | |
|
1103 | } | |
|
1004 | destBuffer->dest = malloc(allocationSize); | |
|
1005 | if (NULL == destBuffer->dest) { | |
|
1006 | state->error = CompressorWorkerError_no_memory; | |
|
1007 | return; | |
|
1008 | } | |
|
1104 | 1009 | |
|
1105 |
|
|
|
1106 |
|
|
|
1107 |
|
|
|
1010 | destBuffer->destSize = allocationSize; | |
|
1011 | destAvailable = allocationSize; | |
|
1012 | destOffset = 0; | |
|
1108 | 1013 | |
|
1109 | destBuffer->segments = calloc(remainingItems, sizeof(BufferSegment)); | |
|
1110 | if (NULL == destBuffer->segments) { | |
|
1111 | state->error = WorkerError_no_memory; | |
|
1112 | return; | |
|
1113 | } | |
|
1014 | destBuffer->segments = | |
|
1015 | calloc(remainingItems, sizeof(BufferSegment)); | |
|
1016 | if (NULL == destBuffer->segments) { | |
|
1017 | state->error = CompressorWorkerError_no_memory; | |
|
1018 | return; | |
|
1019 | } | |
|
1114 | 1020 | |
|
1115 |
|
|
|
1116 |
|
|
|
1117 | } | |
|
1021 | destBuffer->segmentsSize = remainingItems; | |
|
1022 | currentBufferStartOffset = inputOffset; | |
|
1023 | } | |
|
1024 | ||
|
1025 | dest = (char *)destBuffer->dest + destOffset; | |
|
1118 | 1026 | |
|
1119 | dest = (char*)destBuffer->dest + destOffset; | |
|
1027 | opInBuffer.src = source; | |
|
1028 | opInBuffer.size = sourceSize; | |
|
1029 | opInBuffer.pos = 0; | |
|
1120 | 1030 | |
|
1121 | opInBuffer.src = source; | |
|
1122 | opInBuffer.size = sourceSize; | |
|
1123 |
|
|
|
1031 | opOutBuffer.dst = dest; | |
|
1032 | opOutBuffer.size = destAvailable; | |
|
1033 | opOutBuffer.pos = 0; | |
|
1124 | 1034 | |
|
1125 | opOutBuffer.dst = dest; | |
|
1126 | opOutBuffer.size = destAvailable; | |
|
1127 | opOutBuffer.pos = 0; | |
|
1128 | ||
|
1129 | zresult = ZSTD_CCtx_setPledgedSrcSize(state->cctx, sourceSize); | |
|
1130 | if (ZSTD_isError(zresult)) { | |
|
1131 | state->error = WorkerError_zstd; | |
|
1132 | state->zresult = zresult; | |
|
1133 | state->errorOffset = inputOffset; | |
|
1134 | break; | |
|
1135 | } | |
|
1035 | zresult = ZSTD_CCtx_setPledgedSrcSize(state->cctx, sourceSize); | |
|
1036 | if (ZSTD_isError(zresult)) { | |
|
1037 | state->error = CompressorWorkerError_zstd; | |
|
1038 | state->zresult = zresult; | |
|
1039 | state->errorOffset = inputOffset; | |
|
1040 | break; | |
|
1041 | } | |
|
1136 | 1042 | |
|
1137 |
|
|
|
1138 | if (ZSTD_isError(zresult)) { | |
|
1139 | state->error = WorkerError_zstd; | |
|
1140 | state->zresult = zresult; | |
|
1141 | state->errorOffset = inputOffset; | |
|
1142 | break; | |
|
1143 | } | |
|
1144 | else if (zresult) { | |
|
1145 | state->error = WorkerError_nospace; | |
|
1146 | state->errorOffset = inputOffset; | |
|
1147 | break; | |
|
1148 | } | |
|
1043 | zresult = ZSTD_compressStream2(state->cctx, &opOutBuffer, &opInBuffer, | |
|
1044 | ZSTD_e_end); | |
|
1045 | if (ZSTD_isError(zresult)) { | |
|
1046 | state->error = CompressorWorkerError_zstd; | |
|
1047 | state->zresult = zresult; | |
|
1048 | state->errorOffset = inputOffset; | |
|
1049 | break; | |
|
1050 | } | |
|
1051 | else if (zresult) { | |
|
1052 | state->error = CompressorWorkerError_nospace; | |
|
1053 | state->errorOffset = inputOffset; | |
|
1054 | break; | |
|
1055 | } | |
|
1149 | 1056 | |
|
1150 |
|
|
|
1151 | destBuffer->segments[inputOffset - currentBufferStartOffset].length = opOutBuffer.pos; | |
|
1057 | destBuffer->segments[inputOffset - currentBufferStartOffset].offset = | |
|
1058 | destOffset; | |
|
1059 | destBuffer->segments[inputOffset - currentBufferStartOffset].length = | |
|
1060 | opOutBuffer.pos; | |
|
1152 | 1061 | |
|
1153 |
|
|
|
1154 |
|
|
|
1155 | } | |
|
1062 | destOffset += opOutBuffer.pos; | |
|
1063 | remainingItems--; | |
|
1064 | } | |
|
1156 | 1065 | |
|
1157 |
|
|
|
1158 |
|
|
|
1159 |
|
|
|
1160 |
|
|
|
1161 | return; | |
|
1162 | } | |
|
1066 | if (destBuffer->destSize > destOffset) { | |
|
1067 | newDest = realloc(destBuffer->dest, destOffset); | |
|
1068 | if (NULL == newDest) { | |
|
1069 | state->error = CompressorWorkerError_no_memory; | |
|
1070 | return; | |
|
1071 | } | |
|
1163 | 1072 | |
|
1164 |
|
|
|
1165 |
|
|
|
1166 | } | |
|
1073 | destBuffer->dest = newDest; | |
|
1074 | destBuffer->destSize = destOffset; | |
|
1075 | } | |
|
1167 | 1076 | } |
|
1077 | #endif | |
|
1078 | ||
|
1079 | /* We can only use the pool.h APIs if we provide the full library, | |
|
1080 | as these are private APIs. */ | |
|
1081 | #ifdef HAVE_ZSTD_POOL_APIS | |
|
1168 | 1082 | |
|
1169 | ZstdBufferWithSegmentsCollection* compress_from_datasources(ZstdCompressor* compressor, | |
|
1170 | DataSources* sources, Py_ssize_t threadCount) { | |
|
1171 | unsigned long long bytesPerWorker; | |
|
1172 | POOL_ctx* pool = NULL; | |
|
1173 | WorkerState* workerStates = NULL; | |
|
1174 | Py_ssize_t i; | |
|
1175 | unsigned long long workerBytes = 0; | |
|
1176 | Py_ssize_t workerStartOffset = 0; | |
|
1177 | Py_ssize_t currentThread = 0; | |
|
1178 | int errored = 0; | |
|
1179 | Py_ssize_t segmentsCount = 0; | |
|
1180 |
|
|
|
1181 | PyObject* segmentsArg = NULL; | |
|
1182 | ZstdBufferWithSegments* buffer; | |
|
1183 | ZstdBufferWithSegmentsCollection* result = NULL; | |
|
1184 | ||
|
1185 | assert(sources->sourcesSize > 0); | |
|
1186 | assert(sources->totalSourceSize > 0); | |
|
1187 | assert(threadCount >= 1); | |
|
1083 | ZstdBufferWithSegmentsCollection * | |
|
1084 | compress_from_datasources(ZstdCompressor *compressor, DataSources *sources, | |
|
1085 | Py_ssize_t threadCount) { | |
|
1086 | unsigned long long bytesPerWorker; | |
|
1087 | POOL_ctx *pool = NULL; | |
|
1088 | CompressorWorkerState *workerStates = NULL; | |
|
1089 | Py_ssize_t i; | |
|
1090 | unsigned long long workerBytes = 0; | |
|
1091 | Py_ssize_t workerStartOffset = 0; | |
|
1092 | Py_ssize_t currentThread = 0; | |
|
1093 | int errored = 0; | |
|
1094 | Py_ssize_t segmentsCount = 0; | |
|
1095 | Py_ssize_t segmentIndex; | |
|
1096 | PyObject *segmentsArg = NULL; | |
|
1097 | ZstdBufferWithSegments *buffer; | |
|
1098 | ZstdBufferWithSegmentsCollection *result = NULL; | |
|
1188 | 1099 | |
|
1189 | /* More threads than inputs makes no sense. */ | |
|
1190 | threadCount = sources->sourcesSize < threadCount ? sources->sourcesSize | |
|
1191 | : threadCount; | |
|
1100 | assert(sources->sourcesSize > 0); | |
|
1101 | assert(sources->totalSourceSize > 0); | |
|
1102 | assert(threadCount >= 1); | |
|
1192 | 1103 | |
|
1193 | /* TODO lower thread count when input size is too small and threads would add | |
|
1194 | overhead. */ | |
|
1104 | /* More threads than inputs makes no sense. */ | |
|
1105 | threadCount = | |
|
1106 | sources->sourcesSize < threadCount ? sources->sourcesSize : threadCount; | |
|
1195 | 1107 | |
|
1196 | workerStates = PyMem_Malloc(threadCount * sizeof(WorkerState)); | |
|
1197 | if (NULL == workerStates) { | |
|
1198 | PyErr_NoMemory(); | |
|
1199 | goto finally; | |
|
1200 | } | |
|
1201 | ||
|
1202 | memset(workerStates, 0, threadCount * sizeof(WorkerState)); | |
|
1108 | /* TODO lower thread count when input size is too small and threads would | |
|
1109 | add overhead. */ | |
|
1203 | 1110 | |
|
1204 | if (threadCount > 1) { | |
|
1205 | pool = POOL_create(threadCount, 1); | |
|
1206 | if (NULL == pool) { | |
|
1207 | PyErr_SetString(ZstdError, "could not initialize zstd thread pool"); | |
|
1208 | goto finally; | |
|
1209 | } | |
|
1210 | } | |
|
1111 | workerStates = PyMem_Malloc(threadCount * sizeof(CompressorWorkerState)); | |
|
1112 | if (NULL == workerStates) { | |
|
1113 | PyErr_NoMemory(); | |
|
1114 | goto finally; | |
|
1115 | } | |
|
1116 | ||
|
1117 | memset(workerStates, 0, threadCount * sizeof(CompressorWorkerState)); | |
|
1211 | 1118 | |
|
1212 | bytesPerWorker = sources->totalSourceSize / threadCount; | |
|
1119 | if (threadCount > 1) { | |
|
1120 | pool = POOL_create(threadCount, 1); | |
|
1121 | if (NULL == pool) { | |
|
1122 | PyErr_SetString(ZstdError, "could not initialize zstd thread pool"); | |
|
1123 | goto finally; | |
|
1124 | } | |
|
1125 | } | |
|
1213 | 1126 | |
|
1214 | for (i = 0; i < threadCount; i++) { | |
|
1215 | size_t zresult; | |
|
1127 | bytesPerWorker = sources->totalSourceSize / threadCount; | |
|
1128 | ||
|
1129 | for (i = 0; i < threadCount; i++) { | |
|
1130 | size_t zresult; | |
|
1216 | 1131 | |
|
1217 |
|
|
|
1218 |
|
|
|
1219 |
|
|
|
1220 |
|
|
|
1221 | } | |
|
1132 | workerStates[i].cctx = ZSTD_createCCtx(); | |
|
1133 | if (!workerStates[i].cctx) { | |
|
1134 | PyErr_NoMemory(); | |
|
1135 | goto finally; | |
|
1136 | } | |
|
1222 | 1137 | |
|
1223 |
|
|
|
1224 | compressor->params); | |
|
1225 |
|
|
|
1226 |
|
|
|
1227 |
|
|
|
1228 |
|
|
|
1229 | } | |
|
1138 | zresult = ZSTD_CCtx_setParametersUsingCCtxParams(workerStates[i].cctx, | |
|
1139 | compressor->params); | |
|
1140 | if (ZSTD_isError(zresult)) { | |
|
1141 | PyErr_Format(ZstdError, "could not set compression parameters: %s", | |
|
1142 | ZSTD_getErrorName(zresult)); | |
|
1143 | goto finally; | |
|
1144 | } | |
|
1230 | 1145 | |
|
1231 |
|
|
|
1232 |
|
|
|
1233 |
|
|
|
1234 | } | |
|
1235 | else { | |
|
1236 | zresult = ZSTD_CCtx_loadDictionary_advanced( | |
|
1237 | workerStates[i].cctx, | |
|
1238 | compressor->dict->dictData, | |
|
1239 | compressor->dict->dictSize, | |
|
1240 | ZSTD_dlm_byRef, | |
|
1241 | compressor->dict->dictType); | |
|
1242 | } | |
|
1243 | ||
|
1244 | if (ZSTD_isError(zresult)) { | |
|
1245 | PyErr_Format(ZstdError, "could not load compression dictionary: %s", | |
|
1246 | ZSTD_getErrorName(zresult)); | |
|
1247 | goto finally; | |
|
1248 | } | |
|
1146 | if (compressor->dict) { | |
|
1147 | if (compressor->dict->cdict) { | |
|
1148 | zresult = ZSTD_CCtx_refCDict(workerStates[i].cctx, | |
|
1149 | compressor->dict->cdict); | |
|
1150 | } | |
|
1151 | else { | |
|
1152 | zresult = ZSTD_CCtx_loadDictionary_advanced( | |
|
1153 | workerStates[i].cctx, compressor->dict->dictData, | |
|
1154 | compressor->dict->dictSize, ZSTD_dlm_byRef, | |
|
1155 | compressor->dict->dictType); | |
|
1156 | } | |
|
1249 | 1157 | |
|
1250 | } | |
|
1158 | if (ZSTD_isError(zresult)) { | |
|
1159 | PyErr_Format(ZstdError, | |
|
1160 | "could not load compression dictionary: %s", | |
|
1161 | ZSTD_getErrorName(zresult)); | |
|
1162 | goto finally; | |
|
1163 | } | |
|
1164 | } | |
|
1251 | 1165 | |
|
1252 |
|
|
|
1253 |
|
|
|
1254 | } | |
|
1255 | ||
|
1256 | Py_BEGIN_ALLOW_THREADS | |
|
1257 | for (i = 0; i < sources->sourcesSize; i++) { | |
|
1258 | workerBytes += sources->sources[i].sourceSize; | |
|
1166 | workerStates[i].sources = sources->sources; | |
|
1167 | workerStates[i].sourcesSize = sources->sourcesSize; | |
|
1168 | } | |
|
1259 | 1169 | |
|
1260 | /* | |
|
1261 | * The last worker/thread needs to handle all remaining work. Don't | |
|
1262 | * trigger it prematurely. Defer to the block outside of the loop | |
|
1263 | * to run the last worker/thread. But do still process this loop | |
|
1264 | * so workerBytes is correct. | |
|
1265 | */ | |
|
1266 | if (currentThread == threadCount - 1) { | |
|
1267 | continue; | |
|
1268 | } | |
|
1170 | Py_BEGIN_ALLOW_THREADS for (i = 0; i < sources->sourcesSize; i++) { | |
|
1171 | workerBytes += sources->sources[i].sourceSize; | |
|
1172 | ||
|
1173 | /* | |
|
1174 | * The last worker/thread needs to handle all remaining work. Don't | |
|
1175 | * trigger it prematurely. Defer to the block outside of the loop | |
|
1176 | * to run the last worker/thread. But do still process this loop | |
|
1177 | * so workerBytes is correct. | |
|
1178 | */ | |
|
1179 | if (currentThread == threadCount - 1) { | |
|
1180 | continue; | |
|
1181 | } | |
|
1269 | 1182 | |
|
1270 |
|
|
|
1271 |
|
|
|
1272 |
|
|
|
1273 |
|
|
|
1274 |
|
|
|
1183 | if (workerBytes >= bytesPerWorker) { | |
|
1184 | assert(currentThread < threadCount); | |
|
1185 | workerStates[currentThread].totalSourceSize = workerBytes; | |
|
1186 | workerStates[currentThread].startOffset = workerStartOffset; | |
|
1187 | workerStates[currentThread].endOffset = i; | |
|
1275 | 1188 | |
|
1276 |
|
|
|
1277 |
|
|
|
1278 | } | |
|
1279 | else { | |
|
1280 | compress_worker(&workerStates[currentThread]); | |
|
1281 | } | |
|
1189 | if (threadCount > 1) { | |
|
1190 | POOL_add(pool, (POOL_function)compress_worker, | |
|
1191 | &workerStates[currentThread]); | |
|
1192 | } | |
|
1193 | else { | |
|
1194 | compress_worker(&workerStates[currentThread]); | |
|
1195 | } | |
|
1282 | 1196 | |
|
1283 |
|
|
|
1284 |
|
|
|
1285 |
|
|
|
1286 | } | |
|
1287 | } | |
|
1197 | currentThread++; | |
|
1198 | workerStartOffset = i + 1; | |
|
1199 | workerBytes = 0; | |
|
1200 | } | |
|
1201 | } | |
|
1288 | 1202 | |
|
1289 |
|
|
|
1290 |
|
|
|
1291 |
|
|
|
1292 |
|
|
|
1293 |
|
|
|
1203 | if (workerBytes) { | |
|
1204 | assert(currentThread < threadCount); | |
|
1205 | workerStates[currentThread].totalSourceSize = workerBytes; | |
|
1206 | workerStates[currentThread].startOffset = workerStartOffset; | |
|
1207 | workerStates[currentThread].endOffset = sources->sourcesSize - 1; | |
|
1294 | 1208 | |
|
1295 |
|
|
|
1296 |
|
|
|
1297 | } | |
|
1298 | else { | |
|
1299 | compress_worker(&workerStates[currentThread]); | |
|
1300 | } | |
|
1301 | } | |
|
1209 | if (threadCount > 1) { | |
|
1210 | POOL_add(pool, (POOL_function)compress_worker, | |
|
1211 | &workerStates[currentThread]); | |
|
1212 | } | |
|
1213 | else { | |
|
1214 | compress_worker(&workerStates[currentThread]); | |
|
1215 | } | |
|
1216 | } | |
|
1302 | 1217 | |
|
1303 |
|
|
|
1304 |
|
|
|
1305 |
|
|
|
1306 | } | |
|
1218 | if (threadCount > 1) { | |
|
1219 | POOL_free(pool); | |
|
1220 | pool = NULL; | |
|
1221 | } | |
|
1307 | 1222 | |
|
1308 |
|
|
|
1223 | Py_END_ALLOW_THREADS | |
|
1309 | 1224 | |
|
1310 |
|
|
|
1311 |
|
|
|
1312 |
|
|
|
1313 |
|
|
|
1314 |
|
|
|
1315 | break; | |
|
1225 | for (i = 0; i < threadCount; i++) { | |
|
1226 | switch (workerStates[i].error) { | |
|
1227 | case CompressorWorkerError_no_memory: | |
|
1228 | PyErr_NoMemory(); | |
|
1229 | errored = 1; | |
|
1230 | break; | |
|
1316 | 1231 | |
|
1317 |
|
|
|
1318 |
|
|
|
1319 | workerStates[i].errorOffset, ZSTD_getErrorName(workerStates[i].zresult)); | |
|
1320 | errored = 1; | |
|
1321 | break; | |
|
1232 | case CompressorWorkerError_zstd: | |
|
1233 | PyErr_Format(ZstdError, "error compressing item %zd: %s", | |
|
1234 | workerStates[i].errorOffset, | |
|
1235 | ZSTD_getErrorName(workerStates[i].zresult)); | |
|
1236 | errored = 1; | |
|
1237 | break; | |
|
1322 | 1238 | |
|
1323 |
|
|
|
1324 | PyErr_Format(ZstdError, "error compressing item %zd: not enough space in output", | |
|
1325 | workerStates[i].errorOffset); | |
|
1326 | errored = 1; | |
|
1327 | break; | |
|
1239 | case CompressorWorkerError_nospace: | |
|
1240 | PyErr_Format( | |
|
1241 | ZstdError, | |
|
1242 | "error compressing item %zd: not enough space in output", | |
|
1243 | workerStates[i].errorOffset); | |
|
1244 | errored = 1; | |
|
1245 | break; | |
|
1328 | 1246 | |
|
1329 | default: | |
|
1330 | ; | |
|
1331 | } | |
|
1332 | ||
|
1333 | if (errored) { | |
|
1334 | break; | |
|
1335 | } | |
|
1247 | default:; | |
|
1248 | } | |
|
1336 | 1249 | |
|
1337 | } | |
|
1250 | if (errored) { | |
|
1251 | break; | |
|
1252 | } | |
|
1253 | } | |
|
1338 | 1254 | |
|
1339 |
|
|
|
1340 |
|
|
|
1341 | } | |
|
1255 | if (errored) { | |
|
1256 | goto finally; | |
|
1257 | } | |
|
1342 | 1258 | |
|
1343 |
|
|
|
1344 |
|
|
|
1345 |
|
|
|
1346 |
|
|
|
1347 | } | |
|
1259 | segmentsCount = 0; | |
|
1260 | for (i = 0; i < threadCount; i++) { | |
|
1261 | CompressorWorkerState *state = &workerStates[i]; | |
|
1262 | segmentsCount += state->destCount; | |
|
1263 | } | |
|
1348 | 1264 | |
|
1349 |
|
|
|
1350 |
|
|
|
1351 |
|
|
|
1352 | } | |
|
1265 | segmentsArg = PyTuple_New(segmentsCount); | |
|
1266 | if (NULL == segmentsArg) { | |
|
1267 | goto finally; | |
|
1268 | } | |
|
1353 | 1269 | |
|
1354 |
|
|
|
1270 | segmentIndex = 0; | |
|
1355 | 1271 | |
|
1356 |
|
|
|
1357 |
|
|
|
1358 |
|
|
|
1272 | for (i = 0; i < threadCount; i++) { | |
|
1273 | Py_ssize_t j; | |
|
1274 | CompressorWorkerState *state = &workerStates[i]; | |
|
1359 | 1275 | |
|
1360 |
|
|
|
1361 |
|
|
|
1362 | buffer = BufferWithSegments_FromMemory(destBuffer->dest, destBuffer->destSize, | |
|
1363 | destBuffer->segments, destBuffer->segmentsSize); | |
|
1276 | for (j = 0; j < state->destCount; j++) { | |
|
1277 | CompressorDestBuffer *destBuffer = &state->destBuffers[j]; | |
|
1278 | buffer = BufferWithSegments_FromMemory( | |
|
1279 | destBuffer->dest, destBuffer->destSize, destBuffer->segments, | |
|
1280 | destBuffer->segmentsSize); | |
|
1364 | 1281 | |
|
1365 |
|
|
|
1366 | goto finally; | |
|
1367 | } | |
|
1282 | if (NULL == buffer) { | |
|
1283 | goto finally; | |
|
1284 | } | |
|
1368 | 1285 | |
|
1369 |
|
|
|
1370 |
|
|
|
1286 | /* Tell instance to use free() instsead of PyMem_Free(). */ | |
|
1287 | buffer->useFree = 1; | |
|
1371 | 1288 | |
|
1372 | /* | |
|
1373 |
|
|
|
1374 |
|
|
|
1375 | */ | |
|
1376 |
|
|
|
1377 |
|
|
|
1289 | /* | |
|
1290 | * BufferWithSegments_FromMemory takes ownership of the backing | |
|
1291 | * memory. Unset it here so it doesn't get freed below. | |
|
1292 | */ | |
|
1293 | destBuffer->dest = NULL; | |
|
1294 | destBuffer->segments = NULL; | |
|
1378 | 1295 | |
|
1379 |
|
|
|
1380 | } | |
|
1381 | } | |
|
1296 | PyTuple_SET_ITEM(segmentsArg, segmentIndex++, (PyObject *)buffer); | |
|
1297 | } | |
|
1298 | } | |
|
1382 | 1299 | |
|
1383 |
|
|
|
1384 |
|
|
|
1300 | result = (ZstdBufferWithSegmentsCollection *)PyObject_CallObject( | |
|
1301 | (PyObject *)ZstdBufferWithSegmentsCollectionType, segmentsArg); | |
|
1385 | 1302 | |
|
1386 | 1303 | finally: |
|
1387 |
|
|
|
1304 | Py_CLEAR(segmentsArg); | |
|
1388 | 1305 | |
|
1389 |
|
|
|
1390 |
|
|
|
1391 | } | |
|
1306 | if (pool) { | |
|
1307 | POOL_free(pool); | |
|
1308 | } | |
|
1309 | ||
|
1310 | if (workerStates) { | |
|
1311 | Py_ssize_t j; | |
|
1392 | 1312 | |
|
1393 | if (workerStates) { | |
|
1394 | Py_ssize_t j; | |
|
1313 | for (i = 0; i < threadCount; i++) { | |
|
1314 | CompressorWorkerState state = workerStates[i]; | |
|
1395 | 1315 | |
|
1396 | for (i = 0; i < threadCount; i++) { | |
|
1397 | WorkerState state = workerStates[i]; | |
|
1316 | if (state.cctx) { | |
|
1317 | ZSTD_freeCCtx(state.cctx); | |
|
1318 | } | |
|
1398 | 1319 | |
|
1399 | if (state.cctx) { | |
|
1400 | ZSTD_freeCCtx(state.cctx); | |
|
1401 | } | |
|
1320 | /* malloc() is used in worker thread. */ | |
|
1402 | 1321 | |
|
1403 | /* malloc() is used in worker thread. */ | |
|
1322 | for (j = 0; j < state.destCount; j++) { | |
|
1323 | if (state.destBuffers) { | |
|
1324 | free(state.destBuffers[j].dest); | |
|
1325 | free(state.destBuffers[j].segments); | |
|
1326 | } | |
|
1327 | } | |
|
1404 | 1328 | |
|
1405 | for (j = 0; j < state.destCount; j++) { | |
|
1406 | if (state.destBuffers) { | |
|
1407 | free(state.destBuffers[j].dest); | |
|
1408 | free(state.destBuffers[j].segments); | |
|
1409 | } | |
|
1410 | } | |
|
1329 | free(state.destBuffers); | |
|
1330 | } | |
|
1411 | 1331 | |
|
1412 | ||
|
1413 | free(state.destBuffers); | |
|
1414 | } | |
|
1332 | PyMem_Free(workerStates); | |
|
1333 | } | |
|
1415 | 1334 | |
|
1416 | PyMem_Free(workerStates); | |
|
1417 |
|
|
|
1335 | return result; | |
|
1336 | } | |
|
1337 | #endif | |
|
1418 | 1338 | |
|
1419 | return result; | |
|
1420 | } | |
|
1339 | #ifdef HAVE_ZSTD_POOL_APIS | |
|
1340 | static ZstdBufferWithSegmentsCollection * | |
|
1341 | ZstdCompressor_multi_compress_to_buffer(ZstdCompressor *self, PyObject *args, | |
|
1342 | PyObject *kwargs) { | |
|
1343 | static char *kwlist[] = {"data", "threads", NULL}; | |
|
1421 | 1344 | |
|
1422 | PyDoc_STRVAR(ZstdCompressor_multi_compress_to_buffer__doc__, | |
|
1423 | "Compress multiple pieces of data as a single operation\n" | |
|
1424 | "\n" | |
|
1425 | "Receives a ``BufferWithSegmentsCollection``, a ``BufferWithSegments``, or\n" | |
|
1426 | "a list of bytes like objects holding data to compress.\n" | |
|
1427 | "\n" | |
|
1428 | "Returns a ``BufferWithSegmentsCollection`` holding compressed data.\n" | |
|
1429 | "\n" | |
|
1430 | "This function is optimized to perform multiple compression operations as\n" | |
|
1431 | "as possible with as little overhead as possbile.\n" | |
|
1432 | ); | |
|
1345 | PyObject *data; | |
|
1346 | int threads = 0; | |
|
1347 | Py_buffer *dataBuffers = NULL; | |
|
1348 | DataSources sources; | |
|
1349 | Py_ssize_t i; | |
|
1350 | Py_ssize_t sourceCount = 0; | |
|
1351 | ZstdBufferWithSegmentsCollection *result = NULL; | |
|
1352 | ||
|
1353 | memset(&sources, 0, sizeof(sources)); | |
|
1433 | 1354 | |
|
1434 | static ZstdBufferWithSegmentsCollection* ZstdCompressor_multi_compress_to_buffer(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { | |
|
1435 | static char* kwlist[] = { | |
|
1436 | "data", | |
|
1437 | "threads", | |
|
1438 | NULL | |
|
1439 | }; | |
|
1355 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, | |
|
1356 | "O|i:multi_compress_to_buffer", kwlist, | |
|
1357 | &data, &threads)) { | |
|
1358 | return NULL; | |
|
1359 | } | |
|
1440 | 1360 | |
|
1441 | PyObject* data; | |
|
1442 | int threads = 0; | |
|
1443 | Py_buffer* dataBuffers = NULL; | |
|
1444 | DataSources sources; | |
|
1445 | Py_ssize_t i; | |
|
1446 | Py_ssize_t sourceCount = 0; | |
|
1447 | ZstdBufferWithSegmentsCollection* result = NULL; | |
|
1361 | if (threads < 0) { | |
|
1362 | threads = cpu_count(); | |
|
1363 | } | |
|
1448 | 1364 | |
|
1449 | memset(&sources, 0, sizeof(sources)); | |
|
1365 | if (threads < 2) { | |
|
1366 | threads = 1; | |
|
1367 | } | |
|
1450 | 1368 | |
|
1451 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|i:multi_compress_to_buffer", kwlist, | |
|
1452 | &data, &threads)) { | |
|
1453 | return NULL; | |
|
1454 | } | |
|
1369 | if (PyObject_TypeCheck(data, ZstdBufferWithSegmentsType)) { | |
|
1370 | ZstdBufferWithSegments *buffer = (ZstdBufferWithSegments *)data; | |
|
1455 | 1371 | |
|
1456 | if (threads < 0) { | |
|
1457 | threads = cpu_count(); | |
|
1458 | } | |
|
1459 | ||
|
1460 | if (threads < 2) { | |
|
1461 | threads = 1; | |
|
1462 | } | |
|
1372 | sources.sources = | |
|
1373 | PyMem_Malloc(buffer->segmentCount * sizeof(DataSource)); | |
|
1374 | if (NULL == sources.sources) { | |
|
1375 | PyErr_NoMemory(); | |
|
1376 | goto finally; | |
|
1377 | } | |
|
1463 | 1378 | |
|
1464 | if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsType)) { | |
|
1465 | ZstdBufferWithSegments* buffer = (ZstdBufferWithSegments*)data; | |
|
1379 | for (i = 0; i < buffer->segmentCount; i++) { | |
|
1380 | if (buffer->segments[i].length > SIZE_MAX) { | |
|
1381 | PyErr_Format( | |
|
1382 | PyExc_ValueError, | |
|
1383 | "buffer segment %zd is too large for this platform", i); | |
|
1384 | goto finally; | |
|
1385 | } | |
|
1466 | 1386 | |
|
1467 | sources.sources = PyMem_Malloc(buffer->segmentCount * sizeof(DataSource)); | |
|
1468 | if (NULL == sources.sources) { | |
|
1469 | PyErr_NoMemory(); | |
|
1470 | goto finally; | |
|
1471 | } | |
|
1472 | ||
|
1473 | for (i = 0; i < buffer->segmentCount; i++) { | |
|
1474 | if (buffer->segments[i].length > SIZE_MAX) { | |
|
1475 | PyErr_Format(PyExc_ValueError, | |
|
1476 | "buffer segment %zd is too large for this platform", i); | |
|
1477 | goto finally; | |
|
1478 | } | |
|
1387 | sources.sources[i].sourceData = | |
|
1388 | (char *)buffer->data + buffer->segments[i].offset; | |
|
1389 | sources.sources[i].sourceSize = (size_t)buffer->segments[i].length; | |
|
1390 | sources.totalSourceSize += buffer->segments[i].length; | |
|
1391 | } | |
|
1479 | 1392 | |
|
1480 | sources.sources[i].sourceData = (char*)buffer->data + buffer->segments[i].offset; | |
|
1481 | sources.sources[i].sourceSize = (size_t)buffer->segments[i].length; | |
|
1482 | sources.totalSourceSize += buffer->segments[i].length; | |
|
1483 | } | |
|
1393 | sources.sourcesSize = buffer->segmentCount; | |
|
1394 | } | |
|
1395 | else if (PyObject_TypeCheck(data, ZstdBufferWithSegmentsCollectionType)) { | |
|
1396 | Py_ssize_t j; | |
|
1397 | Py_ssize_t offset = 0; | |
|
1398 | ZstdBufferWithSegments *buffer; | |
|
1399 | ZstdBufferWithSegmentsCollection *collection = | |
|
1400 | (ZstdBufferWithSegmentsCollection *)data; | |
|
1484 | 1401 | |
|
1485 | sources.sourcesSize = buffer->segmentCount; | |
|
1486 | } | |
|
1487 | else if (PyObject_TypeCheck(data, &ZstdBufferWithSegmentsCollectionType)) { | |
|
1488 | Py_ssize_t j; | |
|
1489 | Py_ssize_t offset = 0; | |
|
1490 | ZstdBufferWithSegments* buffer; | |
|
1491 | ZstdBufferWithSegmentsCollection* collection = (ZstdBufferWithSegmentsCollection*)data; | |
|
1402 | sourceCount = BufferWithSegmentsCollection_length(collection); | |
|
1492 | 1403 | |
|
1493 | sourceCount = BufferWithSegmentsCollection_length(collection); | |
|
1404 | sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource)); | |
|
1405 | if (NULL == sources.sources) { | |
|
1406 | PyErr_NoMemory(); | |
|
1407 | goto finally; | |
|
1408 | } | |
|
1494 | 1409 | |
|
1495 | sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource)); | |
|
1496 | if (NULL == sources.sources) { | |
|
1497 | PyErr_NoMemory(); | |
|
1498 | goto finally; | |
|
1499 | } | |
|
1410 | for (i = 0; i < collection->bufferCount; i++) { | |
|
1411 | buffer = collection->buffers[i]; | |
|
1500 | 1412 | |
|
1501 |
|
|
|
1502 | buffer = collection->buffers[i]; | |
|
1503 | ||
|
1504 | for (j = 0; j < buffer->segmentCount; j++) { | |
|
1505 | if (buffer->segments[j].length > SIZE_MAX) { | |
|
1506 | PyErr_Format(PyExc_ValueError, | |
|
1507 | "buffer segment %zd in buffer %zd is too large for this platform", | |
|
1508 | j, i); | |
|
1509 | goto finally; | |
|
1510 | } | |
|
1413 | for (j = 0; j < buffer->segmentCount; j++) { | |
|
1414 | if (buffer->segments[j].length > SIZE_MAX) { | |
|
1415 | PyErr_Format(PyExc_ValueError, | |
|
1416 | "buffer segment %zd in buffer %zd is too " | |
|
1417 | "large for this platform", | |
|
1418 | j, i); | |
|
1419 | goto finally; | |
|
1420 | } | |
|
1511 | 1421 | |
|
1512 | sources.sources[offset].sourceData = (char*)buffer->data + buffer->segments[j].offset; | |
|
1513 | sources.sources[offset].sourceSize = (size_t)buffer->segments[j].length; | |
|
1514 | sources.totalSourceSize += buffer->segments[j].length; | |
|
1422 | sources.sources[offset].sourceData = | |
|
1423 | (char *)buffer->data + buffer->segments[j].offset; | |
|
1424 | sources.sources[offset].sourceSize = | |
|
1425 | (size_t)buffer->segments[j].length; | |
|
1426 | sources.totalSourceSize += buffer->segments[j].length; | |
|
1515 | 1427 | |
|
1516 | offset++; | |
|
1517 | } | |
|
1518 | } | |
|
1428 | offset++; | |
|
1429 | } | |
|
1430 | } | |
|
1519 | 1431 | |
|
1520 |
|
|
|
1521 | } | |
|
1522 |
|
|
|
1523 |
|
|
|
1432 | sources.sourcesSize = sourceCount; | |
|
1433 | } | |
|
1434 | else if (PyList_Check(data)) { | |
|
1435 | sourceCount = PyList_GET_SIZE(data); | |
|
1524 | 1436 | |
|
1525 |
|
|
|
1526 |
|
|
|
1527 |
|
|
|
1528 |
|
|
|
1529 | } | |
|
1437 | sources.sources = PyMem_Malloc(sourceCount * sizeof(DataSource)); | |
|
1438 | if (NULL == sources.sources) { | |
|
1439 | PyErr_NoMemory(); | |
|
1440 | goto finally; | |
|
1441 | } | |
|
1530 | 1442 | |
|
1531 |
|
|
|
1532 |
|
|
|
1533 |
|
|
|
1534 |
|
|
|
1535 | } | |
|
1443 | dataBuffers = PyMem_Malloc(sourceCount * sizeof(Py_buffer)); | |
|
1444 | if (NULL == dataBuffers) { | |
|
1445 | PyErr_NoMemory(); | |
|
1446 | goto finally; | |
|
1447 | } | |
|
1536 | 1448 | |
|
1537 |
|
|
|
1449 | memset(dataBuffers, 0, sourceCount * sizeof(Py_buffer)); | |
|
1538 | 1450 | |
|
1539 |
|
|
|
1540 |
|
|
|
1541 |
|
|
|
1542 | PyErr_Clear(); | |
|
1543 | PyErr_Format(PyExc_TypeError, "item %zd not a bytes like object", i); | |
|
1544 | goto finally; | |
|
1545 | } | |
|
1451 | for (i = 0; i < sourceCount; i++) { | |
|
1452 | if (0 != PyObject_GetBuffer(PyList_GET_ITEM(data, i), | |
|
1453 | &dataBuffers[i], PyBUF_CONTIG_RO)) { | |
|
1454 | PyErr_Clear(); | |
|
1455 | PyErr_Format(PyExc_TypeError, | |
|
1456 | "item %zd not a bytes like object", i); | |
|
1457 | goto finally; | |
|
1458 | } | |
|
1546 | 1459 | |
|
1547 |
|
|
|
1548 |
|
|
|
1549 |
|
|
|
1550 | } | |
|
1460 | sources.sources[i].sourceData = dataBuffers[i].buf; | |
|
1461 | sources.sources[i].sourceSize = dataBuffers[i].len; | |
|
1462 | sources.totalSourceSize += dataBuffers[i].len; | |
|
1463 | } | |
|
1551 | 1464 | |
|
1552 |
|
|
|
1553 | } | |
|
1554 |
|
|
|
1555 | PyErr_SetString(PyExc_TypeError, "argument must be list of BufferWithSegments"); | |
|
1556 | goto finally; | |
|
1557 | } | |
|
1465 | sources.sourcesSize = sourceCount; | |
|
1466 | } | |
|
1467 | else { | |
|
1468 | PyErr_SetString(PyExc_TypeError, | |
|
1469 | "argument must be list of BufferWithSegments"); | |
|
1470 | goto finally; | |
|
1471 | } | |
|
1558 | 1472 | |
|
1559 |
|
|
|
1560 |
|
|
|
1561 |
|
|
|
1562 | } | |
|
1473 | if (0 == sources.sourcesSize) { | |
|
1474 | PyErr_SetString(PyExc_ValueError, "no source elements found"); | |
|
1475 | goto finally; | |
|
1476 | } | |
|
1563 | 1477 | |
|
1564 |
|
|
|
1565 |
|
|
|
1566 |
|
|
|
1567 | } | |
|
1478 | if (0 == sources.totalSourceSize) { | |
|
1479 | PyErr_SetString(PyExc_ValueError, "source elements are empty"); | |
|
1480 | goto finally; | |
|
1481 | } | |
|
1568 | 1482 | |
|
1569 |
|
|
|
1570 | PyErr_SetString(PyExc_ValueError, "sources are too large for this platform"); | |
|
1571 | goto finally; | |
|
1572 | } | |
|
1483 | if (sources.totalSourceSize > SIZE_MAX) { | |
|
1484 | PyErr_SetString(PyExc_ValueError, | |
|
1485 | "sources are too large for this platform"); | |
|
1486 | goto finally; | |
|
1487 | } | |
|
1573 | 1488 | |
|
1574 |
|
|
|
1489 | result = compress_from_datasources(self, &sources, threads); | |
|
1575 | 1490 | |
|
1576 | 1491 | finally: |
|
1577 |
|
|
|
1492 | PyMem_Free(sources.sources); | |
|
1493 | ||
|
1494 | if (dataBuffers) { | |
|
1495 | for (i = 0; i < sourceCount; i++) { | |
|
1496 | PyBuffer_Release(&dataBuffers[i]); | |
|
1497 | } | |
|
1578 | 1498 | |
|
1579 | if (dataBuffers) { | |
|
1580 | for (i = 0; i < sourceCount; i++) { | |
|
1581 | PyBuffer_Release(&dataBuffers[i]); | |
|
1582 | } | |
|
1499 | PyMem_Free(dataBuffers); | |
|
1500 | } | |
|
1583 | 1501 | |
|
1584 | PyMem_Free(dataBuffers); | |
|
1585 | } | |
|
1586 | ||
|
1587 | return result; | |
|
1502 | return result; | |
|
1588 | 1503 | } |
|
1504 | #endif | |
|
1589 | 1505 | |
|
1590 | 1506 | static PyMethodDef ZstdCompressor_methods[] = { |
|
1591 |
|
|
|
1592 |
|
|
|
1593 |
|
|
|
1594 |
|
|
|
1595 |
|
|
|
1596 |
|
|
|
1597 |
|
|
|
1598 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_copy_stream__doc__ }, | |
|
1599 |
|
|
|
1600 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_reader__doc__ }, | |
|
1601 |
|
|
|
1602 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ }, | |
|
1603 |
|
|
|
1604 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ }, | |
|
1605 | /* TODO Remove deprecated API */ | |
|
1606 | { "read_from", (PyCFunction)ZstdCompressor_read_to_iter, | |
|
1607 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_read_to_iter__doc__ }, | |
|
1608 | /* TODO remove deprecated API */ | |
|
1609 | { "write_to", (PyCFunction)ZstdCompressor_stream_writer, | |
|
1610 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_stream_writer___doc__ }, | |
|
1611 |
|
|
|
1612 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_multi_compress_to_buffer__doc__ }, | |
|
1613 | { "memory_size", (PyCFunction)ZstdCompressor_memory_size, | |
|
1614 | METH_NOARGS, ZstdCompressor_memory_size__doc__ }, | |
|
1615 | { "frame_progression", (PyCFunction)ZstdCompressor_frame_progression, | |
|
1616 | METH_NOARGS, ZstdCompressor_frame_progression__doc__ }, | |
|
1617 | { NULL, NULL } | |
|
1507 | {"chunker", (PyCFunction)ZstdCompressor_chunker, | |
|
1508 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
1509 | {"compress", (PyCFunction)ZstdCompressor_compress, | |
|
1510 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
1511 | {"compressobj", (PyCFunction)ZstdCompressor_compressobj, | |
|
1512 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
1513 | {"copy_stream", (PyCFunction)ZstdCompressor_copy_stream, | |
|
1514 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
1515 | {"stream_reader", (PyCFunction)ZstdCompressor_stream_reader, | |
|
1516 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
1517 | {"stream_writer", (PyCFunction)ZstdCompressor_stream_writer, | |
|
1518 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
1519 | {"read_to_iter", (PyCFunction)ZstdCompressor_read_to_iter, | |
|
1520 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
1521 | #ifdef HAVE_ZSTD_POOL_APIS | |
|
1522 | {"multi_compress_to_buffer", | |
|
1523 | (PyCFunction)ZstdCompressor_multi_compress_to_buffer, | |
|
1524 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
1525 | #endif | |
|
1526 | {"memory_size", (PyCFunction)ZstdCompressor_memory_size, METH_NOARGS, NULL}, | |
|
1527 | {"frame_progression", (PyCFunction)ZstdCompressor_frame_progression, | |
|
1528 | METH_NOARGS, NULL}, | |
|
1529 | {NULL, NULL}}; | |
|
1530 | ||
|
1531 | PyType_Slot ZstdCompressorSlots[] = { | |
|
1532 | {Py_tp_dealloc, ZstdCompressor_dealloc}, | |
|
1533 | {Py_tp_methods, ZstdCompressor_methods}, | |
|
1534 | {Py_tp_init, ZstdCompressor_init}, | |
|
1535 | {Py_tp_new, PyType_GenericNew}, | |
|
1536 | {0, NULL}, | |
|
1618 | 1537 | }; |
|
1619 | 1538 | |
|
1620 |
PyType |
|
|
1621 | PyVarObject_HEAD_INIT(NULL, 0) | |
|
1622 | "zstd.ZstdCompressor", /* tp_name */ | |
|
1623 | sizeof(ZstdCompressor), /* tp_basicsize */ | |
|
1624 | 0, /* tp_itemsize */ | |
|
1625 | (destructor)ZstdCompressor_dealloc, /* tp_dealloc */ | |
|
1626 | 0, /* tp_print */ | |
|
1627 | 0, /* tp_getattr */ | |
|
1628 | 0, /* tp_setattr */ | |
|
1629 | 0, /* tp_compare */ | |
|
1630 | 0, /* tp_repr */ | |
|
1631 | 0, /* tp_as_number */ | |
|
1632 | 0, /* tp_as_sequence */ | |
|
1633 | 0, /* tp_as_mapping */ | |
|
1634 | 0, /* tp_hash */ | |
|
1635 | 0, /* tp_call */ | |
|
1636 | 0, /* tp_str */ | |
|
1637 | 0, /* tp_getattro */ | |
|
1638 | 0, /* tp_setattro */ | |
|
1639 | 0, /* tp_as_buffer */ | |
|
1640 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
|
1641 | ZstdCompressor__doc__, /* tp_doc */ | |
|
1642 | 0, /* tp_traverse */ | |
|
1643 | 0, /* tp_clear */ | |
|
1644 | 0, /* tp_richcompare */ | |
|
1645 | 0, /* tp_weaklistoffset */ | |
|
1646 | 0, /* tp_iter */ | |
|
1647 | 0, /* tp_iternext */ | |
|
1648 | ZstdCompressor_methods, /* tp_methods */ | |
|
1649 | 0, /* tp_members */ | |
|
1650 | 0, /* tp_getset */ | |
|
1651 | 0, /* tp_base */ | |
|
1652 | 0, /* tp_dict */ | |
|
1653 | 0, /* tp_descr_get */ | |
|
1654 | 0, /* tp_descr_set */ | |
|
1655 | 0, /* tp_dictoffset */ | |
|
1656 | (initproc)ZstdCompressor_init, /* tp_init */ | |
|
1657 | 0, /* tp_alloc */ | |
|
1658 | PyType_GenericNew, /* tp_new */ | |
|
1539 | PyType_Spec ZstdCompressorSpec = { | |
|
1540 | "zstd.ZstdCompressor", | |
|
1541 | sizeof(ZstdCompressor), | |
|
1542 | 0, | |
|
1543 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, | |
|
1544 | ZstdCompressorSlots, | |
|
1659 | 1545 | }; |
|
1660 | 1546 | |
|
1661 | void compressor_module_init(PyObject* mod) { | |
|
1662 | Py_SET_TYPE(&ZstdCompressorType, &PyType_Type); | |
|
1663 | if (PyType_Ready(&ZstdCompressorType) < 0) { | |
|
1664 | return; | |
|
1665 | } | |
|
1547 | PyTypeObject *ZstdCompressorType; | |
|
1666 | 1548 | |
|
1667 | Py_INCREF((PyObject*)&ZstdCompressorType); | |
|
1668 | PyModule_AddObject(mod, "ZstdCompressor", | |
|
1669 | (PyObject*)&ZstdCompressorType); | |
|
1549 | void compressor_module_init(PyObject *mod) { | |
|
1550 | ZstdCompressorType = (PyTypeObject *)PyType_FromSpec(&ZstdCompressorSpec); | |
|
1551 | if (PyType_Ready(ZstdCompressorType) < 0) { | |
|
1552 | return; | |
|
1553 | } | |
|
1554 | ||
|
1555 | Py_INCREF((PyObject *)ZstdCompressorType); | |
|
1556 | PyModule_AddObject(mod, "ZstdCompressor", (PyObject *)ZstdCompressorType); | |
|
1670 | 1557 | } |
@@ -1,235 +1,212 | |||
|
1 | 1 | /** |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
8 | 8 | |
|
9 | 9 | #include "python-zstandard.h" |
|
10 | 10 | |
|
11 | 11 | #define min(a, b) (((a) < (b)) ? (a) : (b)) |
|
12 | 12 | |
|
13 |
extern PyObject* |
|
|
13 | extern PyObject *ZstdError; | |
|
14 | 14 | |
|
15 | PyDoc_STRVAR(ZstdCompressorIterator__doc__, | |
|
16 | "Represents an iterator of compressed data.\n" | |
|
17 | ); | |
|
18 | ||
|
19 | static void ZstdCompressorIterator_dealloc(ZstdCompressorIterator* self) { | |
|
20 | Py_XDECREF(self->readResult); | |
|
21 | Py_XDECREF(self->compressor); | |
|
22 | Py_XDECREF(self->reader); | |
|
15 | static void ZstdCompressorIterator_dealloc(ZstdCompressorIterator *self) { | |
|
16 | Py_XDECREF(self->readResult); | |
|
17 | Py_XDECREF(self->compressor); | |
|
18 | Py_XDECREF(self->reader); | |
|
23 | 19 | |
|
24 |
|
|
|
25 |
|
|
|
26 |
|
|
|
27 | } | |
|
20 | if (self->buffer.buf) { | |
|
21 | PyBuffer_Release(&self->buffer); | |
|
22 | memset(&self->buffer, 0, sizeof(self->buffer)); | |
|
23 | } | |
|
28 | 24 | |
|
29 |
|
|
|
30 |
|
|
|
31 |
|
|
|
32 | } | |
|
25 | if (self->output.dst) { | |
|
26 | PyMem_Free(self->output.dst); | |
|
27 | self->output.dst = NULL; | |
|
28 | } | |
|
33 | 29 | |
|
34 |
|
|
|
30 | PyObject_Del(self); | |
|
35 | 31 | } |
|
36 | 32 | |
|
37 |
static PyObject* |
|
|
38 |
|
|
|
39 |
|
|
|
33 | static PyObject *ZstdCompressorIterator_iter(PyObject *self) { | |
|
34 | Py_INCREF(self); | |
|
35 | return self; | |
|
40 | 36 | } |
|
41 | 37 | |
|
42 |
static PyObject* |
|
|
43 |
|
|
|
44 |
|
|
|
45 |
|
|
|
46 |
|
|
|
47 |
|
|
|
48 |
|
|
|
38 | static PyObject *ZstdCompressorIterator_iternext(ZstdCompressorIterator *self) { | |
|
39 | size_t zresult; | |
|
40 | PyObject *readResult = NULL; | |
|
41 | PyObject *chunk; | |
|
42 | char *readBuffer; | |
|
43 | Py_ssize_t readSize = 0; | |
|
44 | Py_ssize_t bufferRemaining; | |
|
49 | 45 | |
|
50 |
|
|
|
51 |
|
|
|
52 |
|
|
|
53 | } | |
|
46 | if (self->finishedOutput) { | |
|
47 | PyErr_SetString(PyExc_StopIteration, "output flushed"); | |
|
48 | return NULL; | |
|
49 | } | |
|
54 | 50 | |
|
55 | 51 | feedcompressor: |
|
56 | 52 | |
|
57 |
|
|
|
58 |
|
|
|
59 |
|
|
|
60 |
|
|
|
61 |
|
|
|
62 |
|
|
|
53 | /* If we have data left in the input, consume it. */ | |
|
54 | if (self->input.pos < self->input.size) { | |
|
55 | Py_BEGIN_ALLOW_THREADS zresult = | |
|
56 | ZSTD_compressStream2(self->compressor->cctx, &self->output, | |
|
57 | &self->input, ZSTD_e_continue); | |
|
58 | Py_END_ALLOW_THREADS | |
|
63 | 59 | |
|
64 |
|
|
|
65 |
|
|
|
66 |
|
|
|
67 |
|
|
|
68 |
|
|
|
69 |
|
|
|
70 |
|
|
|
71 | } | |
|
60 | /* Release the Python object holding the input buffer. */ | |
|
61 | if (self->input.pos == self->input.size) { | |
|
62 | self->input.src = NULL; | |
|
63 | self->input.pos = 0; | |
|
64 | self->input.size = 0; | |
|
65 | Py_DECREF(self->readResult); | |
|
66 | self->readResult = NULL; | |
|
67 | } | |
|
72 | 68 | |
|
73 |
|
|
|
74 |
|
|
|
75 | return NULL; | |
|
76 | } | |
|
77 | ||
|
78 | /* If it produced output data, emit it. */ | |
|
79 | if (self->output.pos) { | |
|
80 | chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); | |
|
81 | self->output.pos = 0; | |
|
82 | return chunk; | |
|
83 | } | |
|
84 | } | |
|
69 | if (ZSTD_isError(zresult)) { | |
|
70 | PyErr_Format(ZstdError, "zstd compress error: %s", | |
|
71 | ZSTD_getErrorName(zresult)); | |
|
72 | return NULL; | |
|
73 | } | |
|
85 | 74 | |
|
86 | /* We should never have output data sitting around after a previous call. */ | |
|
87 | assert(self->output.pos == 0); | |
|
75 | /* If it produced output data, emit it. */ | |
|
76 | if (self->output.pos) { | |
|
77 | chunk = | |
|
78 | PyBytes_FromStringAndSize(self->output.dst, self->output.pos); | |
|
79 | self->output.pos = 0; | |
|
80 | return chunk; | |
|
81 | } | |
|
82 | } | |
|
88 | 83 | |
|
89 | /* The code above should have either emitted a chunk and returned or consumed | |
|
90 | the entire input buffer. So the state of the input buffer is not | |
|
91 | relevant. */ | |
|
92 | if (!self->finishedInput) { | |
|
93 | if (self->reader) { | |
|
94 | readResult = PyObject_CallMethod(self->reader, "read", "I", self->inSize); | |
|
95 | if (!readResult) { | |
|
96 | PyErr_SetString(ZstdError, "could not read() from source"); | |
|
97 | return NULL; | |
|
98 | } | |
|
84 | /* We should never have output data sitting around after a previous call. */ | |
|
85 | assert(self->output.pos == 0); | |
|
99 | 86 | |
|
100 | PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize); | |
|
101 | } | |
|
102 | else { | |
|
103 | assert(self->buffer.buf); | |
|
87 | /* The code above should have either emitted a chunk and returned or | |
|
88 | consumed the entire input buffer. So the state of the input buffer is not | |
|
89 | relevant. */ | |
|
90 | if (!self->finishedInput) { | |
|
91 | if (self->reader) { | |
|
92 | readResult = | |
|
93 | PyObject_CallMethod(self->reader, "read", "I", self->inSize); | |
|
94 | if (!readResult) { | |
|
95 | return NULL; | |
|
96 | } | |
|
104 | 97 | |
|
105 | /* Only support contiguous C arrays. */ | |
|
106 | assert(self->buffer.strides == NULL && self->buffer.suboffsets == NULL); | |
|
107 | assert(self->buffer.itemsize == 1); | |
|
98 | PyBytes_AsStringAndSize(readResult, &readBuffer, &readSize); | |
|
99 | } | |
|
100 | else { | |
|
101 | assert(self->buffer.buf); | |
|
108 | 102 | |
|
109 | readBuffer = (char*)self->buffer.buf + self->bufferOffset; | |
|
110 | bufferRemaining = self->buffer.len - self->bufferOffset; | |
|
111 | readSize = min(bufferRemaining, (Py_ssize_t)self->inSize); | |
|
112 | self->bufferOffset += readSize; | |
|
113 | } | |
|
103 | /* Only support contiguous C arrays. */ | |
|
104 | assert(self->buffer.strides == NULL && | |
|
105 | self->buffer.suboffsets == NULL); | |
|
106 | assert(self->buffer.itemsize == 1); | |
|
107 | ||
|
108 | readBuffer = (char *)self->buffer.buf + self->bufferOffset; | |
|
109 | bufferRemaining = self->buffer.len - self->bufferOffset; | |
|
110 | readSize = min(bufferRemaining, (Py_ssize_t)self->inSize); | |
|
111 | self->bufferOffset += readSize; | |
|
112 | } | |
|
114 | 113 | |
|
115 |
|
|
|
116 |
|
|
|
117 |
|
|
|
118 | } | |
|
119 | else { | |
|
120 |
|
|
|
121 | } | |
|
122 | } | |
|
114 | if (0 == readSize) { | |
|
115 | Py_XDECREF(readResult); | |
|
116 | self->finishedInput = 1; | |
|
117 | } | |
|
118 | else { | |
|
119 | self->readResult = readResult; | |
|
120 | } | |
|
121 | } | |
|
123 | 122 | |
|
124 |
|
|
|
125 |
|
|
|
126 |
|
|
|
127 |
|
|
|
128 |
|
|
|
123 | /* EOF */ | |
|
124 | if (0 == readSize) { | |
|
125 | self->input.src = NULL; | |
|
126 | self->input.size = 0; | |
|
127 | self->input.pos = 0; | |
|
129 | 128 | |
|
130 |
|
|
|
131 | &self->input, ZSTD_e_end); | |
|
132 |
|
|
|
133 |
|
|
|
134 |
|
|
|
135 |
|
|
|
136 | } | |
|
129 | zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output, | |
|
130 | &self->input, ZSTD_e_end); | |
|
131 | if (ZSTD_isError(zresult)) { | |
|
132 | PyErr_Format(ZstdError, "error ending compression stream: %s", | |
|
133 | ZSTD_getErrorName(zresult)); | |
|
134 | return NULL; | |
|
135 | } | |
|
137 | 136 | |
|
138 |
|
|
|
137 | assert(self->output.pos); | |
|
139 | 138 | |
|
140 |
|
|
|
141 |
|
|
|
142 | } | |
|
139 | if (0 == zresult) { | |
|
140 | self->finishedOutput = 1; | |
|
141 | } | |
|
143 | 142 | |
|
144 |
|
|
|
145 |
|
|
|
146 |
|
|
|
147 | } | |
|
143 | chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); | |
|
144 | self->output.pos = 0; | |
|
145 | return chunk; | |
|
146 | } | |
|
148 | 147 | |
|
149 |
|
|
|
150 |
|
|
|
151 |
|
|
|
152 |
|
|
|
148 | /* New data from reader. Feed into compressor. */ | |
|
149 | self->input.src = readBuffer; | |
|
150 | self->input.size = readSize; | |
|
151 | self->input.pos = 0; | |
|
153 | 152 | |
|
154 | Py_BEGIN_ALLOW_THREADS | |
|
155 | zresult = ZSTD_compressStream2(self->compressor->cctx, &self->output, | |
|
156 | &self->input, ZSTD_e_continue); | |
|
157 | Py_END_ALLOW_THREADS | |
|
153 | Py_BEGIN_ALLOW_THREADS zresult = ZSTD_compressStream2( | |
|
154 | self->compressor->cctx, &self->output, &self->input, ZSTD_e_continue); | |
|
155 | Py_END_ALLOW_THREADS | |
|
158 | 156 | |
|
159 |
|
|
|
160 |
|
|
|
161 |
|
|
|
162 |
|
|
|
163 | */ | |
|
164 |
|
|
|
165 |
|
|
|
166 |
|
|
|
167 |
|
|
|
168 |
|
|
|
169 |
|
|
|
170 | } | |
|
157 | /* The input buffer currently points to memory managed by Python | |
|
158 | (readBuffer). This object was allocated by this function. If it wasn't | |
|
159 | fully consumed, we need to release it in a subsequent function call. | |
|
160 | If it is fully consumed, do that now. | |
|
161 | */ | |
|
162 | if (self->input.pos == self->input.size) { | |
|
163 | self->input.src = NULL; | |
|
164 | self->input.pos = 0; | |
|
165 | self->input.size = 0; | |
|
166 | Py_XDECREF(self->readResult); | |
|
167 | self->readResult = NULL; | |
|
168 | } | |
|
171 | 169 | |
|
172 |
|
|
|
173 |
|
|
|
174 | return NULL; | |
|
175 | } | |
|
170 | if (ZSTD_isError(zresult)) { | |
|
171 | PyErr_Format(ZstdError, "zstd compress error: %s", | |
|
172 | ZSTD_getErrorName(zresult)); | |
|
173 | return NULL; | |
|
174 | } | |
|
176 | 175 | |
|
177 |
|
|
|
176 | assert(self->input.pos <= self->input.size); | |
|
178 | 177 | |
|
179 |
|
|
|
180 |
|
|
|
181 |
|
|
|
182 | } | |
|
178 | /* If we didn't write anything, start the process over. */ | |
|
179 | if (0 == self->output.pos) { | |
|
180 | goto feedcompressor; | |
|
181 | } | |
|
183 | 182 | |
|
184 |
|
|
|
185 |
|
|
|
186 |
|
|
|
183 | chunk = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); | |
|
184 | self->output.pos = 0; | |
|
185 | return chunk; | |
|
187 | 186 | } |
|
188 | 187 | |
|
189 |
PyType |
|
|
190 | PyVarObject_HEAD_INIT(NULL, 0) | |
|
191 | "zstd.ZstdCompressorIterator", /* tp_name */ | |
|
192 | sizeof(ZstdCompressorIterator), /* tp_basicsize */ | |
|
193 | 0, /* tp_itemsize */ | |
|
194 | (destructor)ZstdCompressorIterator_dealloc, /* tp_dealloc */ | |
|
195 | 0, /* tp_print */ | |
|
196 | 0, /* tp_getattr */ | |
|
197 | 0, /* tp_setattr */ | |
|
198 | 0, /* tp_compare */ | |
|
199 | 0, /* tp_repr */ | |
|
200 | 0, /* tp_as_number */ | |
|
201 | 0, /* tp_as_sequence */ | |
|
202 | 0, /* tp_as_mapping */ | |
|
203 | 0, /* tp_hash */ | |
|
204 | 0, /* tp_call */ | |
|
205 | 0, /* tp_str */ | |
|
206 | 0, /* tp_getattro */ | |
|
207 | 0, /* tp_setattro */ | |
|
208 | 0, /* tp_as_buffer */ | |
|
209 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
|
210 | ZstdCompressorIterator__doc__, /* tp_doc */ | |
|
211 | 0, /* tp_traverse */ | |
|
212 | 0, /* tp_clear */ | |
|
213 | 0, /* tp_richcompare */ | |
|
214 | 0, /* tp_weaklistoffset */ | |
|
215 | ZstdCompressorIterator_iter, /* tp_iter */ | |
|
216 | (iternextfunc)ZstdCompressorIterator_iternext, /* tp_iternext */ | |
|
217 | 0, /* tp_methods */ | |
|
218 | 0, /* tp_members */ | |
|
219 | 0, /* tp_getset */ | |
|
220 | 0, /* tp_base */ | |
|
221 | 0, /* tp_dict */ | |
|
222 | 0, /* tp_descr_get */ | |
|
223 | 0, /* tp_descr_set */ | |
|
224 | 0, /* tp_dictoffset */ | |
|
225 | 0, /* tp_init */ | |
|
226 | 0, /* tp_alloc */ | |
|
227 | PyType_GenericNew, /* tp_new */ | |
|
188 | PyType_Slot ZstdCompressorIteratorSlots[] = { | |
|
189 | {Py_tp_dealloc, ZstdCompressorIterator_dealloc}, | |
|
190 | {Py_tp_iter, ZstdCompressorIterator_iter}, | |
|
191 | {Py_tp_iternext, ZstdCompressorIterator_iternext}, | |
|
192 | {Py_tp_new, PyType_GenericNew}, | |
|
193 | {0, NULL}, | |
|
228 | 194 | }; |
|
229 | 195 | |
|
230 | void compressoriterator_module_init(PyObject* mod) { | |
|
231 | Py_SET_TYPE(&ZstdCompressorIteratorType, &PyType_Type); | |
|
232 | if (PyType_Ready(&ZstdCompressorIteratorType) < 0) { | |
|
233 | return; | |
|
234 | } | |
|
196 | PyType_Spec ZstdCompressorIteratorSpec = { | |
|
197 | "zstd.ZstdCompressorIterator", | |
|
198 | sizeof(ZstdCompressorIterator), | |
|
199 | 0, | |
|
200 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, | |
|
201 | ZstdCompressorIteratorSlots, | |
|
202 | }; | |
|
203 | ||
|
204 | PyTypeObject *ZstdCompressorIteratorType; | |
|
205 | ||
|
206 | void compressoriterator_module_init(PyObject *mod) { | |
|
207 | ZstdCompressorIteratorType = | |
|
208 | (PyTypeObject *)PyType_FromSpec(&ZstdCompressorIteratorSpec); | |
|
209 | if (PyType_Ready(ZstdCompressorIteratorType) < 0) { | |
|
210 | return; | |
|
211 | } | |
|
235 | 212 | } |
@@ -1,110 +1,109 | |||
|
1 | 1 | /** |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
8 | 8 | |
|
9 | 9 | #include "python-zstandard.h" |
|
10 | 10 | |
|
11 |
extern PyObject* |
|
|
11 | extern PyObject *ZstdError; | |
|
12 | 12 | |
|
13 | 13 | static char frame_header[] = { |
|
14 |
|
|
|
15 |
|
|
|
16 |
|
|
|
17 |
|
|
|
14 | '\x28', | |
|
15 | '\xb5', | |
|
16 | '\x2f', | |
|
17 | '\xfd', | |
|
18 | 18 | }; |
|
19 | 19 | |
|
20 |
void constants_module_init(PyObject* |
|
|
21 |
|
|
|
22 |
|
|
|
23 |
|
|
|
20 | void constants_module_init(PyObject *mod) { | |
|
21 | PyObject *version; | |
|
22 | PyObject *zstdVersion; | |
|
23 | PyObject *frameHeader; | |
|
24 | ||
|
25 | version = PyUnicode_FromString(PYTHON_ZSTANDARD_VERSION); | |
|
26 | PyModule_AddObject(mod, "__version__", version); | |
|
24 | 27 | |
|
25 | #if PY_MAJOR_VERSION >= 3 | |
|
26 | version = PyUnicode_FromString(PYTHON_ZSTANDARD_VERSION); | |
|
27 | #else | |
|
28 | version = PyString_FromString(PYTHON_ZSTANDARD_VERSION); | |
|
29 | #endif | |
|
30 | PyModule_AddObject(mod, "__version__", version); | |
|
28 | ZstdError = PyErr_NewException("zstd.ZstdError", NULL, NULL); | |
|
29 | PyModule_AddObject(mod, "ZstdError", ZstdError); | |
|
31 | 30 | |
|
32 | ZstdError = PyErr_NewException("zstd.ZstdError", NULL, NULL); | |
|
33 | PyModule_AddObject(mod, "ZstdError", ZstdError); | |
|
31 | PyModule_AddIntConstant(mod, "FLUSH_BLOCK", 0); | |
|
32 | PyModule_AddIntConstant(mod, "FLUSH_FRAME", 1); | |
|
34 | 33 | |
|
35 |
|
|
|
36 | PyModule_AddIntConstant(mod, "FLUSH_FRAME", 1); | |
|
34 | PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_FINISH", | |
|
35 | compressorobj_flush_finish); | |
|
36 | PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_BLOCK", | |
|
37 | compressorobj_flush_block); | |
|
37 | 38 | |
|
38 | PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_FINISH", compressorobj_flush_finish); | |
|
39 | PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_BLOCK", compressorobj_flush_block); | |
|
40 | ||
|
41 | /* For now, the version is a simple tuple instead of a dedicated type. */ | |
|
42 | zstdVersion = PyTuple_New(3); | |
|
43 | PyTuple_SetItem(zstdVersion, 0, PyLong_FromLong(ZSTD_VERSION_MAJOR)); | |
|
44 | PyTuple_SetItem(zstdVersion, 1, PyLong_FromLong(ZSTD_VERSION_MINOR)); | |
|
45 | PyTuple_SetItem(zstdVersion, 2, PyLong_FromLong(ZSTD_VERSION_RELEASE)); | |
|
46 | PyModule_AddObject(mod, "ZSTD_VERSION", zstdVersion); | |
|
39 | /* For now, the version is a simple tuple instead of a dedicated type. */ | |
|
40 | zstdVersion = PyTuple_New(3); | |
|
41 | PyTuple_SetItem(zstdVersion, 0, PyLong_FromLong(ZSTD_VERSION_MAJOR)); | |
|
42 | PyTuple_SetItem(zstdVersion, 1, PyLong_FromLong(ZSTD_VERSION_MINOR)); | |
|
43 | PyTuple_SetItem(zstdVersion, 2, PyLong_FromLong(ZSTD_VERSION_RELEASE)); | |
|
44 | PyModule_AddObject(mod, "ZSTD_VERSION", zstdVersion); | |
|
47 | 45 | |
|
48 |
|
|
|
49 |
|
|
|
50 |
|
|
|
51 | } | |
|
52 |
|
|
|
53 |
|
|
|
54 | } | |
|
46 | frameHeader = PyBytes_FromStringAndSize(frame_header, sizeof(frame_header)); | |
|
47 | if (frameHeader) { | |
|
48 | PyModule_AddObject(mod, "FRAME_HEADER", frameHeader); | |
|
49 | } | |
|
50 | else { | |
|
51 | PyErr_Format(PyExc_ValueError, "could not create frame header object"); | |
|
52 | } | |
|
55 | 53 | |
|
56 |
|
|
|
57 |
|
|
|
58 |
|
|
|
59 |
|
|
|
54 | PyModule_AddObject(mod, "CONTENTSIZE_UNKNOWN", | |
|
55 | PyLong_FromUnsignedLongLong(ZSTD_CONTENTSIZE_UNKNOWN)); | |
|
56 | PyModule_AddObject(mod, "CONTENTSIZE_ERROR", | |
|
57 | PyLong_FromUnsignedLongLong(ZSTD_CONTENTSIZE_ERROR)); | |
|
60 | 58 | |
|
61 |
|
|
|
62 |
|
|
|
63 |
|
|
|
64 |
|
|
|
65 |
|
|
|
66 |
|
|
|
67 |
|
|
|
68 |
|
|
|
69 |
|
|
|
59 | PyModule_AddIntConstant(mod, "MAX_COMPRESSION_LEVEL", ZSTD_maxCLevel()); | |
|
60 | PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_INPUT_SIZE", | |
|
61 | (long)ZSTD_CStreamInSize()); | |
|
62 | PyModule_AddIntConstant(mod, "COMPRESSION_RECOMMENDED_OUTPUT_SIZE", | |
|
63 | (long)ZSTD_CStreamOutSize()); | |
|
64 | PyModule_AddIntConstant(mod, "DECOMPRESSION_RECOMMENDED_INPUT_SIZE", | |
|
65 | (long)ZSTD_DStreamInSize()); | |
|
66 | PyModule_AddIntConstant(mod, "DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE", | |
|
67 | (long)ZSTD_DStreamOutSize()); | |
|
70 | 68 | |
|
71 |
|
|
|
72 |
|
|
|
73 |
|
|
|
74 |
|
|
|
75 |
|
|
|
76 |
|
|
|
77 |
|
|
|
78 |
|
|
|
79 |
|
|
|
80 |
|
|
|
81 |
|
|
|
82 |
|
|
|
83 |
|
|
|
84 | PyModule_AddIntConstant(mod, "MINMATCH_MAX", ZSTD_MINMATCH_MAX); | |
|
85 | /* TODO SEARCHLENGTH_* is deprecated. */ | |
|
86 |
|
|
|
87 |
|
|
|
88 |
|
|
|
89 |
|
|
|
90 |
|
|
|
91 |
|
|
|
92 | PyModule_AddIntConstant(mod, "LDM_BUCKETSIZELOG_MAX", ZSTD_LDM_BUCKETSIZELOG_MAX); | |
|
69 | PyModule_AddIntConstant(mod, "MAGIC_NUMBER", ZSTD_MAGICNUMBER); | |
|
70 | PyModule_AddIntConstant(mod, "BLOCKSIZELOG_MAX", ZSTD_BLOCKSIZELOG_MAX); | |
|
71 | PyModule_AddIntConstant(mod, "BLOCKSIZE_MAX", ZSTD_BLOCKSIZE_MAX); | |
|
72 | PyModule_AddIntConstant(mod, "WINDOWLOG_MIN", ZSTD_WINDOWLOG_MIN); | |
|
73 | PyModule_AddIntConstant(mod, "WINDOWLOG_MAX", ZSTD_WINDOWLOG_MAX); | |
|
74 | PyModule_AddIntConstant(mod, "CHAINLOG_MIN", ZSTD_CHAINLOG_MIN); | |
|
75 | PyModule_AddIntConstant(mod, "CHAINLOG_MAX", ZSTD_CHAINLOG_MAX); | |
|
76 | PyModule_AddIntConstant(mod, "HASHLOG_MIN", ZSTD_HASHLOG_MIN); | |
|
77 | PyModule_AddIntConstant(mod, "HASHLOG_MAX", ZSTD_HASHLOG_MAX); | |
|
78 | PyModule_AddIntConstant(mod, "SEARCHLOG_MIN", ZSTD_SEARCHLOG_MIN); | |
|
79 | PyModule_AddIntConstant(mod, "SEARCHLOG_MAX", ZSTD_SEARCHLOG_MAX); | |
|
80 | PyModule_AddIntConstant(mod, "MINMATCH_MIN", ZSTD_MINMATCH_MIN); | |
|
81 | PyModule_AddIntConstant(mod, "MINMATCH_MAX", ZSTD_MINMATCH_MAX); | |
|
82 | /* TODO SEARCHLENGTH_* is deprecated. */ | |
|
83 | PyModule_AddIntConstant(mod, "SEARCHLENGTH_MIN", ZSTD_MINMATCH_MIN); | |
|
84 | PyModule_AddIntConstant(mod, "SEARCHLENGTH_MAX", ZSTD_MINMATCH_MAX); | |
|
85 | PyModule_AddIntConstant(mod, "TARGETLENGTH_MIN", ZSTD_TARGETLENGTH_MIN); | |
|
86 | PyModule_AddIntConstant(mod, "TARGETLENGTH_MAX", ZSTD_TARGETLENGTH_MAX); | |
|
87 | PyModule_AddIntConstant(mod, "LDM_MINMATCH_MIN", ZSTD_LDM_MINMATCH_MIN); | |
|
88 | PyModule_AddIntConstant(mod, "LDM_MINMATCH_MAX", ZSTD_LDM_MINMATCH_MAX); | |
|
89 | PyModule_AddIntConstant(mod, "LDM_BUCKETSIZELOG_MAX", | |
|
90 | ZSTD_LDM_BUCKETSIZELOG_MAX); | |
|
93 | 91 | |
|
94 |
|
|
|
95 |
|
|
|
96 |
|
|
|
97 |
|
|
|
98 |
|
|
|
99 |
|
|
|
100 |
|
|
|
101 |
|
|
|
102 |
|
|
|
92 | PyModule_AddIntConstant(mod, "STRATEGY_FAST", ZSTD_fast); | |
|
93 | PyModule_AddIntConstant(mod, "STRATEGY_DFAST", ZSTD_dfast); | |
|
94 | PyModule_AddIntConstant(mod, "STRATEGY_GREEDY", ZSTD_greedy); | |
|
95 | PyModule_AddIntConstant(mod, "STRATEGY_LAZY", ZSTD_lazy); | |
|
96 | PyModule_AddIntConstant(mod, "STRATEGY_LAZY2", ZSTD_lazy2); | |
|
97 | PyModule_AddIntConstant(mod, "STRATEGY_BTLAZY2", ZSTD_btlazy2); | |
|
98 | PyModule_AddIntConstant(mod, "STRATEGY_BTOPT", ZSTD_btopt); | |
|
99 | PyModule_AddIntConstant(mod, "STRATEGY_BTULTRA", ZSTD_btultra); | |
|
100 | PyModule_AddIntConstant(mod, "STRATEGY_BTULTRA2", ZSTD_btultra2); | |
|
103 | 101 | |
|
104 |
|
|
|
105 |
|
|
|
106 |
|
|
|
102 | PyModule_AddIntConstant(mod, "DICT_TYPE_AUTO", ZSTD_dct_auto); | |
|
103 | PyModule_AddIntConstant(mod, "DICT_TYPE_RAWCONTENT", ZSTD_dct_rawContent); | |
|
104 | PyModule_AddIntConstant(mod, "DICT_TYPE_FULLDICT", ZSTD_dct_fullDict); | |
|
107 | 105 | |
|
108 |
|
|
|
109 |
|
|
|
106 | PyModule_AddIntConstant(mod, "FORMAT_ZSTD1", ZSTD_f_zstd1); | |
|
107 | PyModule_AddIntConstant(mod, "FORMAT_ZSTD1_MAGICLESS", | |
|
108 | ZSTD_f_zstd1_magicless); | |
|
110 | 109 | } |
This diff has been collapsed as it changes many lines, (1147 lines changed) Show them Hide them | |||
@@ -1,105 +1,108 | |||
|
1 | 1 | /** |
|
2 | * Copyright (c) 2017-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
2 | * Copyright (c) 2017-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
8 | 8 | |
|
9 | 9 | #include "python-zstandard.h" |
|
10 | 10 | |
|
11 |
extern PyObject* |
|
|
11 | extern PyObject *ZstdError; | |
|
12 | 12 | |
|
13 | static void set_unsupported_operation(void) { | |
|
14 | PyObject* iomod; | |
|
15 | PyObject* exc; | |
|
13 | static void decompressionreader_dealloc(ZstdDecompressionReader *self) { | |
|
14 | Py_XDECREF(self->decompressor); | |
|
15 | Py_XDECREF(self->reader); | |
|
16 | 16 | |
|
17 | iomod = PyImport_ImportModule("io"); | |
|
18 | if (NULL == iomod) { | |
|
19 | return; | |
|
20 | } | |
|
17 | if (self->buffer.buf) { | |
|
18 | PyBuffer_Release(&self->buffer); | |
|
19 | } | |
|
21 | 20 | |
|
22 | exc = PyObject_GetAttrString(iomod, "UnsupportedOperation"); | |
|
23 | if (NULL == exc) { | |
|
24 | Py_DECREF(iomod); | |
|
25 | return; | |
|
26 | } | |
|
21 | Py_CLEAR(self->readResult); | |
|
27 | 22 | |
|
28 | PyErr_SetNone(exc); | |
|
29 | Py_DECREF(exc); | |
|
30 | Py_DECREF(iomod); | |
|
23 | PyObject_Del(self); | |
|
31 | 24 | } |
|
32 | 25 | |
|
33 |
static |
|
|
34 | Py_XDECREF(self->decompressor); | |
|
35 | Py_XDECREF(self->reader); | |
|
36 | ||
|
37 | if (self->buffer.buf) { | |
|
38 | PyBuffer_Release(&self->buffer); | |
|
39 | } | |
|
40 | ||
|
41 | PyObject_Del(self); | |
|
42 | } | |
|
26 | static ZstdDecompressionReader * | |
|
27 | decompressionreader_enter(ZstdDecompressionReader *self) { | |
|
28 | if (self->entered) { | |
|
29 | PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times"); | |
|
30 | return NULL; | |
|
31 | } | |
|
43 | 32 | |
|
44 | static ZstdDecompressionReader* reader_enter(ZstdDecompressionReader* self) { | |
|
45 | if (self->entered) { | |
|
46 | PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times"); | |
|
47 | return NULL; | |
|
48 | } | |
|
33 | if (self->closed) { | |
|
34 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |
|
35 | return NULL; | |
|
36 | } | |
|
49 | 37 | |
|
50 |
|
|
|
38 | self->entered = 1; | |
|
51 | 39 | |
|
52 |
|
|
|
53 |
|
|
|
40 | Py_INCREF(self); | |
|
41 | return self; | |
|
54 | 42 | } |
|
55 | 43 | |
|
56 |
static PyObject* |
|
|
57 | PyObject* exc_type; | |
|
58 |
|
|
|
59 |
|
|
|
44 | static PyObject *decompressionreader_exit(ZstdDecompressionReader *self, | |
|
45 | PyObject *args) { | |
|
46 | PyObject *exc_type; | |
|
47 | PyObject *exc_value; | |
|
48 | PyObject *exc_tb; | |
|
60 | 49 | |
|
61 |
|
|
|
62 | return NULL; | |
|
63 | } | |
|
50 | if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, | |
|
51 | &exc_tb)) { | |
|
52 | return NULL; | |
|
53 | } | |
|
54 | ||
|
55 | self->entered = 0; | |
|
64 | 56 | |
|
65 | self->entered = 0; | |
|
66 | self->closed = 1; | |
|
57 | if (NULL == PyObject_CallMethod((PyObject *)self, "close", NULL)) { | |
|
58 | return NULL; | |
|
59 | } | |
|
67 | 60 | |
|
68 |
|
|
|
69 |
|
|
|
70 |
|
|
|
71 |
|
|
|
72 |
|
|
|
73 | } | |
|
61 | /* Release resources. */ | |
|
62 | Py_CLEAR(self->reader); | |
|
63 | if (self->buffer.buf) { | |
|
64 | PyBuffer_Release(&self->buffer); | |
|
65 | memset(&self->buffer, 0, sizeof(self->buffer)); | |
|
66 | } | |
|
74 | 67 | |
|
75 |
|
|
|
68 | Py_CLEAR(self->decompressor); | |
|
76 | 69 | |
|
77 |
|
|
|
70 | Py_RETURN_FALSE; | |
|
78 | 71 | } |
|
79 | 72 | |
|
80 |
static PyObject* |
|
|
81 |
|
|
|
73 | static PyObject *decompressionreader_readable(PyObject *self) { | |
|
74 | Py_RETURN_TRUE; | |
|
82 | 75 | } |
|
83 | 76 | |
|
84 |
static PyObject* |
|
|
85 |
|
|
|
77 | static PyObject *decompressionreader_writable(PyObject *self) { | |
|
78 | Py_RETURN_FALSE; | |
|
86 | 79 | } |
|
87 | 80 | |
|
88 |
static PyObject* |
|
|
89 |
|
|
|
81 | static PyObject *decompressionreader_seekable(PyObject *self) { | |
|
82 | Py_RETURN_FALSE; | |
|
90 | 83 | } |
|
91 | 84 | |
|
92 |
static PyObject* |
|
|
93 |
|
|
|
94 |
|
|
|
85 | static PyObject *decompressionreader_close(ZstdDecompressionReader *self) { | |
|
86 | if (self->closed) { | |
|
87 | Py_RETURN_NONE; | |
|
88 | } | |
|
89 | ||
|
90 | self->closed = 1; | |
|
91 | ||
|
92 | if (self->closefd && self->reader != NULL && | |
|
93 | PyObject_HasAttrString(self->reader, "close")) { | |
|
94 | return PyObject_CallMethod(self->reader, "close", NULL); | |
|
95 | } | |
|
96 | ||
|
97 | Py_RETURN_NONE; | |
|
95 | 98 | } |
|
96 | 99 | |
|
97 |
static PyObject* |
|
|
98 |
|
|
|
100 | static PyObject *decompressionreader_flush(PyObject *self) { | |
|
101 | Py_RETURN_NONE; | |
|
99 | 102 | } |
|
100 | 103 | |
|
101 |
static PyObject* |
|
|
102 |
|
|
|
104 | static PyObject *decompressionreader_isatty(PyObject *self) { | |
|
105 | Py_RETURN_FALSE; | |
|
103 | 106 | } |
|
104 | 107 | |
|
105 | 108 | /** |
@@ -109,28 +112,29 static PyObject* reader_isatty(PyObject* | |||
|
109 | 112 | * Returns 1 if new input data is available. |
|
110 | 113 | * Returns -1 on error and sets a Python exception as a side-effect. |
|
111 | 114 | */ |
|
112 |
int read_decompressor_input(ZstdDecompressionReader* |
|
|
113 |
|
|
|
114 |
|
|
|
115 | } | |
|
115 | int read_decompressor_input(ZstdDecompressionReader *self) { | |
|
116 | if (self->finishedInput) { | |
|
117 | return 0; | |
|
118 | } | |
|
116 | 119 | |
|
117 |
|
|
|
118 |
|
|
|
119 | } | |
|
120 | if (self->input.pos != self->input.size) { | |
|
121 | return 0; | |
|
122 | } | |
|
120 | 123 | |
|
121 |
|
|
|
124 | if (self->reader) { | |
|
122 | 125 | Py_buffer buffer; |
|
123 | 126 | |
|
124 | 127 | assert(self->readResult == NULL); |
|
125 | self->readResult = PyObject_CallMethod(self->reader, "read", | |
|
126 | "k", self->readSize); | |
|
128 | self->readResult = | |
|
129 | PyObject_CallMethod(self->reader, "read", "k", self->readSize); | |
|
127 | 130 | if (NULL == self->readResult) { |
|
128 | 131 | return -1; |
|
129 | 132 | } |
|
130 | 133 | |
|
131 | 134 | memset(&buffer, 0, sizeof(buffer)); |
|
132 | 135 | |
|
133 | if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) { | |
|
136 | if (0 != | |
|
137 | PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) { | |
|
134 | 138 | return -1; |
|
135 | 139 | } |
|
136 | 140 | |
@@ -146,21 +150,21 int read_decompressor_input(ZstdDecompre | |||
|
146 | 150 | } |
|
147 | 151 | |
|
148 | 152 | PyBuffer_Release(&buffer); |
|
149 | } | |
|
150 |
|
|
|
151 |
|
|
|
153 | } | |
|
154 | else { | |
|
155 | assert(self->buffer.buf); | |
|
152 | 156 | /* |
|
153 | 157 | * We should only get here once since expectation is we always |
|
154 | 158 | * exhaust input buffer before reading again. |
|
155 | 159 | */ |
|
156 | 160 | assert(self->input.src == NULL); |
|
157 | 161 | |
|
158 |
|
|
|
162 | self->input.src = self->buffer.buf; | |
|
159 | 163 | self->input.size = self->buffer.len; |
|
160 | 164 | self->input.pos = 0; |
|
161 | } | |
|
165 | } | |
|
162 | 166 | |
|
163 |
|
|
|
167 | return 1; | |
|
164 | 168 | } |
|
165 | 169 | |
|
166 | 170 | /** |
@@ -170,612 +174,607 int read_decompressor_input(ZstdDecompre | |||
|
170 | 174 | * Returns 1 if output buffer should be emitted. |
|
171 | 175 | * Returns -1 on error and sets a Python exception. |
|
172 | 176 | */ |
|
173 |
int decompress_input(ZstdDecompressionReader* |
|
|
174 |
|
|
|
177 | int decompress_input(ZstdDecompressionReader *self, ZSTD_outBuffer *output) { | |
|
178 | size_t zresult; | |
|
175 | 179 | |
|
176 |
|
|
|
177 |
|
|
|
178 | } | |
|
180 | if (self->input.pos >= self->input.size) { | |
|
181 | return 0; | |
|
182 | } | |
|
179 | 183 | |
|
180 |
|
|
|
181 |
|
|
|
182 |
|
|
|
184 | Py_BEGIN_ALLOW_THREADS zresult = | |
|
185 | ZSTD_decompressStream(self->decompressor->dctx, output, &self->input); | |
|
186 | Py_END_ALLOW_THREADS | |
|
183 | 187 | |
|
184 |
|
|
|
185 |
|
|
|
186 |
|
|
|
187 |
|
|
|
188 | /* Input exhausted. Clear our state tracking. */ | |
|
189 | if (self->input.pos == self->input.size) { | |
|
190 | memset(&self->input, 0, sizeof(self->input)); | |
|
191 | Py_CLEAR(self->readResult); | |
|
188 | 192 | |
|
189 |
|
|
|
190 |
|
|
|
191 | } | |
|
192 | } | |
|
193 | if (self->buffer.buf) { | |
|
194 | self->finishedInput = 1; | |
|
195 | } | |
|
196 | } | |
|
193 | 197 | |
|
194 |
|
|
|
195 |
|
|
|
196 | return -1; | |
|
197 | } | |
|
198 | if (ZSTD_isError(zresult)) { | |
|
199 | PyErr_Format(ZstdError, "zstd decompress error: %s", | |
|
200 | ZSTD_getErrorName(zresult)); | |
|
201 | return -1; | |
|
202 | } | |
|
198 | 203 | |
|
199 |
|
|
|
200 |
|
|
|
201 |
|
|
|
202 | } | |
|
203 |
|
|
|
204 |
|
|
|
205 |
|
|
|
206 |
|
|
|
207 | } | |
|
204 | /* We fulfilled the full read request. Signal to emit. */ | |
|
205 | if (output->pos && output->pos == output->size) { | |
|
206 | return 1; | |
|
207 | } | |
|
208 | /* We're at the end of a frame and we aren't allowed to return data | |
|
209 | spanning frames. */ | |
|
210 | else if (output->pos && zresult == 0 && !self->readAcrossFrames) { | |
|
211 | return 1; | |
|
212 | } | |
|
208 | 213 | |
|
209 |
|
|
|
210 |
|
|
|
214 | /* There is more room in the output. Signal to collect more data. */ | |
|
215 | return 0; | |
|
211 | 216 | } |
|
212 | 217 | |
|
213 |
static PyObject* |
|
|
214 | static char* kwlist[] = { | |
|
215 | "size", | |
|
216 | NULL | |
|
217 | }; | |
|
218 | static PyObject *decompressionreader_read(ZstdDecompressionReader *self, | |
|
219 | PyObject *args, PyObject *kwargs) { | |
|
220 | static char *kwlist[] = {"size", NULL}; | |
|
218 | 221 | |
|
219 |
|
|
|
220 |
|
|
|
221 |
|
|
|
222 |
|
|
|
223 |
|
|
|
224 |
|
|
|
222 | Py_ssize_t size = -1; | |
|
223 | PyObject *result = NULL; | |
|
224 | char *resultBuffer; | |
|
225 | Py_ssize_t resultSize; | |
|
226 | ZSTD_outBuffer output; | |
|
227 | int decompressResult, readResult; | |
|
225 | 228 | |
|
226 |
|
|
|
227 |
|
|
|
228 |
|
|
|
229 | } | |
|
229 | if (self->closed) { | |
|
230 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |
|
231 | return NULL; | |
|
232 | } | |
|
230 | 233 | |
|
231 |
|
|
|
232 |
|
|
|
233 | } | |
|
234 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) { | |
|
235 | return NULL; | |
|
236 | } | |
|
234 | 237 | |
|
235 |
|
|
|
236 | PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1"); | |
|
237 | return NULL; | |
|
238 | } | |
|
238 | if (size < -1) { | |
|
239 | PyErr_SetString(PyExc_ValueError, | |
|
240 | "cannot read negative amounts less than -1"); | |
|
241 | return NULL; | |
|
242 | } | |
|
239 | 243 | |
|
240 |
|
|
|
241 |
|
|
|
242 | } | |
|
244 | if (size == -1) { | |
|
245 | return PyObject_CallMethod((PyObject *)self, "readall", NULL); | |
|
246 | } | |
|
243 | 247 | |
|
244 |
|
|
|
245 |
|
|
|
246 | } | |
|
248 | if (self->finishedOutput || size == 0) { | |
|
249 | return PyBytes_FromStringAndSize("", 0); | |
|
250 | } | |
|
247 | 251 | |
|
248 |
|
|
|
249 |
|
|
|
250 |
|
|
|
251 | } | |
|
252 | result = PyBytes_FromStringAndSize(NULL, size); | |
|
253 | if (NULL == result) { | |
|
254 | return NULL; | |
|
255 | } | |
|
252 | 256 | |
|
253 |
|
|
|
257 | PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize); | |
|
254 | 258 | |
|
255 |
|
|
|
256 |
|
|
|
257 |
|
|
|
259 | output.dst = resultBuffer; | |
|
260 | output.size = resultSize; | |
|
261 | output.pos = 0; | |
|
258 | 262 | |
|
259 | 263 | readinput: |
|
260 | 264 | |
|
261 |
|
|
|
265 | decompressResult = decompress_input(self, &output); | |
|
262 | 266 | |
|
263 |
|
|
|
264 |
|
|
|
265 |
|
|
|
266 | } | |
|
267 |
|
|
|
268 | else if (1 == decompressResult) { | |
|
269 | self->bytesDecompressed += output.pos; | |
|
267 | if (-1 == decompressResult) { | |
|
268 | Py_XDECREF(result); | |
|
269 | return NULL; | |
|
270 | } | |
|
271 | else if (0 == decompressResult) { | |
|
272 | } | |
|
273 | else if (1 == decompressResult) { | |
|
274 | self->bytesDecompressed += output.pos; | |
|
270 | 275 | |
|
271 |
|
|
|
272 |
|
|
|
273 |
|
|
|
274 | return NULL; | |
|
275 | } | |
|
276 | } | |
|
277 |
|
|
|
278 | } | |
|
279 |
|
|
|
280 |
|
|
|
281 | } | |
|
276 | if (output.pos != output.size) { | |
|
277 | if (safe_pybytes_resize(&result, output.pos)) { | |
|
278 | Py_XDECREF(result); | |
|
279 | return NULL; | |
|
280 | } | |
|
281 | } | |
|
282 | return result; | |
|
283 | } | |
|
284 | else { | |
|
285 | assert(0); | |
|
286 | } | |
|
282 | 287 | |
|
283 |
|
|
|
288 | readResult = read_decompressor_input(self); | |
|
284 | 289 | |
|
285 |
|
|
|
286 |
|
|
|
287 |
|
|
|
288 | } | |
|
289 |
|
|
|
290 | else if (1 == readResult) {} | |
|
291 | else { | |
|
292 | assert(0); | |
|
293 | } | |
|
290 | if (-1 == readResult) { | |
|
291 | Py_XDECREF(result); | |
|
292 | return NULL; | |
|
293 | } | |
|
294 | else if (0 == readResult) { | |
|
295 | } | |
|
296 | else if (1 == readResult) { | |
|
297 | } | |
|
298 | else { | |
|
299 | assert(0); | |
|
300 | } | |
|
294 | 301 | |
|
295 |
|
|
|
296 |
|
|
|
297 | } | |
|
302 | if (self->input.size) { | |
|
303 | goto readinput; | |
|
304 | } | |
|
298 | 305 | |
|
299 |
|
|
|
300 |
|
|
|
306 | /* EOF */ | |
|
307 | self->bytesDecompressed += output.pos; | |
|
301 | 308 | |
|
302 |
|
|
|
303 |
|
|
|
304 |
|
|
|
305 | } | |
|
309 | if (safe_pybytes_resize(&result, output.pos)) { | |
|
310 | Py_XDECREF(result); | |
|
311 | return NULL; | |
|
312 | } | |
|
306 | 313 | |
|
307 |
|
|
|
314 | return result; | |
|
308 | 315 | } |
|
309 | 316 | |
|
310 |
static PyObject* |
|
|
311 | static char* kwlist[] = { | |
|
312 | "size", | |
|
313 | NULL | |
|
314 | }; | |
|
317 | static PyObject *decompressionreader_read1(ZstdDecompressionReader *self, | |
|
318 | PyObject *args, PyObject *kwargs) { | |
|
319 | static char *kwlist[] = {"size", NULL}; | |
|
315 | 320 | |
|
316 |
|
|
|
317 |
|
|
|
318 |
|
|
|
319 |
|
|
|
320 |
|
|
|
321 | Py_ssize_t size = -1; | |
|
322 | PyObject *result = NULL; | |
|
323 | char *resultBuffer; | |
|
324 | Py_ssize_t resultSize; | |
|
325 | ZSTD_outBuffer output; | |
|
321 | 326 | |
|
322 |
|
|
|
323 |
|
|
|
324 |
|
|
|
325 | } | |
|
327 | if (self->closed) { | |
|
328 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |
|
329 | return NULL; | |
|
330 | } | |
|
331 | ||
|
332 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) { | |
|
333 | return NULL; | |
|
334 | } | |
|
326 | 335 | |
|
327 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) { | |
|
328 | return NULL; | |
|
329 | } | |
|
336 | if (size < -1) { | |
|
337 | PyErr_SetString(PyExc_ValueError, | |
|
338 | "cannot read negative amounts less than -1"); | |
|
339 | return NULL; | |
|
340 | } | |
|
330 | 341 | |
|
331 | if (size < -1) { | |
|
332 | PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1"); | |
|
333 | return NULL; | |
|
334 | } | |
|
342 | if (self->finishedOutput || size == 0) { | |
|
343 | return PyBytes_FromStringAndSize("", 0); | |
|
344 | } | |
|
335 | 345 | |
|
336 | if (self->finishedOutput || size == 0) { | |
|
337 | return PyBytes_FromStringAndSize("", 0); | |
|
338 | } | |
|
346 | if (size == -1) { | |
|
347 | size = ZSTD_DStreamOutSize(); | |
|
348 | } | |
|
339 | 349 | |
|
340 | if (size == -1) { | |
|
341 | size = ZSTD_DStreamOutSize(); | |
|
342 | } | |
|
350 | result = PyBytes_FromStringAndSize(NULL, size); | |
|
351 | if (NULL == result) { | |
|
352 | return NULL; | |
|
353 | } | |
|
343 | 354 | |
|
344 | result = PyBytes_FromStringAndSize(NULL, size); | |
|
345 | if (NULL == result) { | |
|
346 | return NULL; | |
|
347 | } | |
|
355 | PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize); | |
|
348 | 356 | |
|
349 | PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize); | |
|
357 | output.dst = resultBuffer; | |
|
358 | output.size = resultSize; | |
|
359 | output.pos = 0; | |
|
350 | 360 | |
|
351 | output.dst = resultBuffer; | |
|
352 | output.size = resultSize; | |
|
353 | output.pos = 0; | |
|
361 | /* read1() is supposed to use at most 1 read() from the underlying stream. | |
|
362 | * However, we can't satisfy this requirement with decompression due to the | |
|
363 | * nature of how decompression works. Our strategy is to read + decompress | |
|
364 | * until we get any output, at which point we return. This satisfies the | |
|
365 | * intent of the read1() API to limit read operations. | |
|
366 | */ | |
|
367 | while (!self->finishedInput) { | |
|
368 | int readResult, decompressResult; | |
|
354 | 369 | |
|
355 | /* read1() is supposed to use at most 1 read() from the underlying stream. | |
|
356 | * However, we can't satisfy this requirement with decompression due to the | |
|
357 | * nature of how decompression works. Our strategy is to read + decompress | |
|
358 | * until we get any output, at which point we return. This satisfies the | |
|
359 | * intent of the read1() API to limit read operations. | |
|
360 | */ | |
|
361 | while (!self->finishedInput) { | |
|
362 | int readResult, decompressResult; | |
|
370 | readResult = read_decompressor_input(self); | |
|
371 | if (-1 == readResult) { | |
|
372 | Py_XDECREF(result); | |
|
373 | return NULL; | |
|
374 | } | |
|
375 | else if (0 == readResult || 1 == readResult) { | |
|
376 | } | |
|
377 | else { | |
|
378 | assert(0); | |
|
379 | } | |
|
363 | 380 | |
|
364 | readResult = read_decompressor_input(self); | |
|
365 | if (-1 == readResult) { | |
|
366 | Py_XDECREF(result); | |
|
367 | return NULL; | |
|
368 | } | |
|
369 | else if (0 == readResult || 1 == readResult) { } | |
|
370 | else { | |
|
371 | assert(0); | |
|
372 | } | |
|
381 | decompressResult = decompress_input(self, &output); | |
|
373 | 382 | |
|
374 | decompressResult = decompress_input(self, &output); | |
|
375 | ||
|
376 | if (-1 == decompressResult) { | |
|
377 | Py_XDECREF(result); | |
|
378 | return NULL; | |
|
379 | } | |
|
380 | else if (0 == decompressResult || 1 == decompressResult) { } | |
|
381 | else { | |
|
382 | assert(0); | |
|
383 | } | |
|
383 | if (-1 == decompressResult) { | |
|
384 | Py_XDECREF(result); | |
|
385 | return NULL; | |
|
386 | } | |
|
387 | else if (0 == decompressResult || 1 == decompressResult) { | |
|
388 | } | |
|
389 | else { | |
|
390 | assert(0); | |
|
391 | } | |
|
384 | 392 | |
|
385 |
|
|
|
386 |
|
|
|
387 | } | |
|
388 | } | |
|
393 | if (output.pos) { | |
|
394 | break; | |
|
395 | } | |
|
396 | } | |
|
389 | 397 | |
|
390 |
|
|
|
391 |
|
|
|
392 |
|
|
|
393 |
|
|
|
394 | } | |
|
398 | self->bytesDecompressed += output.pos; | |
|
399 | if (safe_pybytes_resize(&result, output.pos)) { | |
|
400 | Py_XDECREF(result); | |
|
401 | return NULL; | |
|
402 | } | |
|
395 | 403 | |
|
396 |
|
|
|
404 | return result; | |
|
397 | 405 | } |
|
398 | 406 | |
|
399 |
static PyObject* |
|
|
400 | Py_buffer dest; | |
|
401 | ZSTD_outBuffer output; | |
|
402 | int decompressResult, readResult; | |
|
403 | PyObject* result = NULL; | |
|
407 | static PyObject *decompressionreader_readinto(ZstdDecompressionReader *self, | |
|
408 | PyObject *args) { | |
|
409 | Py_buffer dest; | |
|
410 | ZSTD_outBuffer output; | |
|
411 | int decompressResult, readResult; | |
|
412 | PyObject *result = NULL; | |
|
404 | 413 | |
|
405 |
|
|
|
406 |
|
|
|
407 |
|
|
|
408 | } | |
|
414 | if (self->closed) { | |
|
415 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |
|
416 | return NULL; | |
|
417 | } | |
|
409 | 418 | |
|
410 |
|
|
|
411 |
|
|
|
412 | } | |
|
413 | ||
|
414 | if (!PyArg_ParseTuple(args, "w*:readinto", &dest)) { | |
|
415 | return NULL; | |
|
416 | } | |
|
419 | if (self->finishedOutput) { | |
|
420 | return PyLong_FromLong(0); | |
|
421 | } | |
|
417 | 422 | |
|
418 | if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) { | |
|
419 | PyErr_SetString(PyExc_ValueError, | |
|
420 | "destination buffer should be contiguous and have at most one dimension"); | |
|
421 | goto finally; | |
|
422 | } | |
|
423 | if (!PyArg_ParseTuple(args, "w*:readinto", &dest)) { | |
|
424 | return NULL; | |
|
425 | } | |
|
423 | 426 | |
|
424 |
|
|
|
425 |
|
|
|
426 |
|
|
|
427 | output.dst = dest.buf; | |
|
428 | output.size = dest.len; | |
|
429 | output.pos = 0; | |
|
427 | 430 | |
|
428 | 431 | readinput: |
|
429 | 432 | |
|
430 |
|
|
|
433 | decompressResult = decompress_input(self, &output); | |
|
431 | 434 | |
|
432 |
|
|
|
433 |
|
|
|
434 | } | |
|
435 |
|
|
|
436 | else if (1 == decompressResult) { | |
|
437 | self->bytesDecompressed += output.pos; | |
|
438 | result = PyLong_FromSize_t(output.pos); | |
|
439 | goto finally; | |
|
440 | } | |
|
441 | else { | |
|
442 | assert(0); | |
|
443 | } | |
|
435 | if (-1 == decompressResult) { | |
|
436 | goto finally; | |
|
437 | } | |
|
438 | else if (0 == decompressResult) { | |
|
439 | } | |
|
440 | else if (1 == decompressResult) { | |
|
441 | self->bytesDecompressed += output.pos; | |
|
442 | result = PyLong_FromSize_t(output.pos); | |
|
443 | goto finally; | |
|
444 | } | |
|
445 | else { | |
|
446 | assert(0); | |
|
447 | } | |
|
444 | 448 | |
|
445 |
|
|
|
449 | readResult = read_decompressor_input(self); | |
|
446 | 450 | |
|
447 |
|
|
|
448 |
|
|
|
449 | } | |
|
450 |
|
|
|
451 | else if (1 == readResult) {} | |
|
452 | else { | |
|
453 | assert(0); | |
|
454 | } | |
|
451 | if (-1 == readResult) { | |
|
452 | goto finally; | |
|
453 | } | |
|
454 | else if (0 == readResult) { | |
|
455 | } | |
|
456 | else if (1 == readResult) { | |
|
457 | } | |
|
458 | else { | |
|
459 | assert(0); | |
|
460 | } | |
|
455 | 461 | |
|
456 |
|
|
|
457 |
|
|
|
458 | } | |
|
462 | if (self->input.size) { | |
|
463 | goto readinput; | |
|
464 | } | |
|
459 | 465 | |
|
460 |
|
|
|
461 |
|
|
|
462 |
|
|
|
466 | /* EOF */ | |
|
467 | self->bytesDecompressed += output.pos; | |
|
468 | result = PyLong_FromSize_t(output.pos); | |
|
463 | 469 | |
|
464 | 470 | finally: |
|
465 |
|
|
|
471 | PyBuffer_Release(&dest); | |
|
466 | 472 | |
|
467 |
|
|
|
473 | return result; | |
|
468 | 474 | } |
|
469 | 475 | |
|
470 |
static PyObject* |
|
|
471 | Py_buffer dest; | |
|
472 | ZSTD_outBuffer output; | |
|
473 | PyObject* result = NULL; | |
|
476 | static PyObject *decompressionreader_readinto1(ZstdDecompressionReader *self, | |
|
477 | PyObject *args) { | |
|
478 | Py_buffer dest; | |
|
479 | ZSTD_outBuffer output; | |
|
480 | PyObject *result = NULL; | |
|
474 | 481 | |
|
475 |
|
|
|
476 |
|
|
|
477 |
|
|
|
478 | } | |
|
479 | ||
|
480 | if (self->finishedOutput) { | |
|
481 | return PyLong_FromLong(0); | |
|
482 | } | |
|
482 | if (self->closed) { | |
|
483 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |
|
484 | return NULL; | |
|
485 | } | |
|
483 | 486 | |
|
484 | if (!PyArg_ParseTuple(args, "w*:readinto1", &dest)) { | |
|
485 | return NULL; | |
|
486 | } | |
|
487 | if (self->finishedOutput) { | |
|
488 | return PyLong_FromLong(0); | |
|
489 | } | |
|
490 | ||
|
491 | if (!PyArg_ParseTuple(args, "w*:readinto1", &dest)) { | |
|
492 | return NULL; | |
|
493 | } | |
|
487 | 494 | |
|
488 | if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) { | |
|
489 | PyErr_SetString(PyExc_ValueError, | |
|
490 | "destination buffer should be contiguous and have at most one dimension"); | |
|
491 | goto finally; | |
|
492 | } | |
|
495 | output.dst = dest.buf; | |
|
496 | output.size = dest.len; | |
|
497 | output.pos = 0; | |
|
493 | 498 | |
|
494 | output.dst = dest.buf; | |
|
495 | output.size = dest.len; | |
|
496 | output.pos = 0; | |
|
499 | while (!self->finishedInput && !self->finishedOutput) { | |
|
500 | int decompressResult, readResult; | |
|
501 | ||
|
502 | readResult = read_decompressor_input(self); | |
|
497 | 503 | |
|
498 | while (!self->finishedInput && !self->finishedOutput) { | |
|
499 | int decompressResult, readResult; | |
|
500 | ||
|
501 | readResult = read_decompressor_input(self); | |
|
504 | if (-1 == readResult) { | |
|
505 | goto finally; | |
|
506 | } | |
|
507 | else if (0 == readResult || 1 == readResult) { | |
|
508 | } | |
|
509 | else { | |
|
510 | assert(0); | |
|
511 | } | |
|
502 | 512 | |
|
503 | if (-1 == readResult) { | |
|
504 | goto finally; | |
|
505 | } | |
|
506 | else if (0 == readResult || 1 == readResult) {} | |
|
507 | else { | |
|
508 | assert(0); | |
|
509 | } | |
|
510 | ||
|
511 | decompressResult = decompress_input(self, &output); | |
|
513 | decompressResult = decompress_input(self, &output); | |
|
512 | 514 | |
|
513 |
|
|
|
514 |
|
|
|
515 | } | |
|
516 |
|
|
|
517 | else { | |
|
518 | assert(0); | |
|
519 | } | |
|
515 | if (-1 == decompressResult) { | |
|
516 | goto finally; | |
|
517 | } | |
|
518 | else if (0 == decompressResult || 1 == decompressResult) { | |
|
519 | } | |
|
520 | else { | |
|
521 | assert(0); | |
|
522 | } | |
|
520 | 523 | |
|
521 |
|
|
|
522 | break; | |
|
523 | } | |
|
524 | } | |
|
524 | if (output.pos) { | |
|
525 | break; | |
|
526 | } | |
|
527 | } | |
|
525 | 528 | |
|
526 |
|
|
|
527 |
|
|
|
529 | self->bytesDecompressed += output.pos; | |
|
530 | result = PyLong_FromSize_t(output.pos); | |
|
528 | 531 | |
|
529 | 532 | finally: |
|
530 |
|
|
|
533 | PyBuffer_Release(&dest); | |
|
531 | 534 | |
|
532 |
|
|
|
535 | return result; | |
|
533 | 536 | } |
|
534 | 537 | |
|
535 |
static PyObject* |
|
|
536 |
|
|
|
537 |
|
|
|
538 |
|
|
|
538 | static PyObject *decompressionreader_readall(PyObject *self) { | |
|
539 | PyObject *chunks = NULL; | |
|
540 | PyObject *empty = NULL; | |
|
541 | PyObject *result = NULL; | |
|
539 | 542 | |
|
540 |
|
|
|
541 |
|
|
|
542 |
|
|
|
543 |
|
|
|
544 | */ | |
|
545 |
|
|
|
546 |
|
|
|
547 |
|
|
|
548 | } | |
|
543 | /* Our strategy is to collect chunks into a list then join all the | |
|
544 | * chunks at the end. We could potentially use e.g. an io.BytesIO. But | |
|
545 | * this feels simple enough to implement and avoids potentially expensive | |
|
546 | * reallocations of large buffers. | |
|
547 | */ | |
|
548 | chunks = PyList_New(0); | |
|
549 | if (NULL == chunks) { | |
|
550 | return NULL; | |
|
551 | } | |
|
549 | 552 | |
|
550 |
|
|
|
551 |
|
|
|
552 |
|
|
|
553 |
|
|
|
554 |
|
|
|
555 | } | |
|
553 | while (1) { | |
|
554 | PyObject *chunk = PyObject_CallMethod(self, "read", "i", 1048576); | |
|
555 | if (NULL == chunk) { | |
|
556 | Py_DECREF(chunks); | |
|
557 | return NULL; | |
|
558 | } | |
|
556 | 559 | |
|
557 |
|
|
|
558 |
|
|
|
559 | break; | |
|
560 | } | |
|
560 | if (!PyBytes_Size(chunk)) { | |
|
561 | Py_DECREF(chunk); | |
|
562 | break; | |
|
563 | } | |
|
561 | 564 | |
|
562 |
|
|
|
563 |
|
|
|
564 |
|
|
|
565 |
|
|
|
566 | } | |
|
565 | if (PyList_Append(chunks, chunk)) { | |
|
566 | Py_DECREF(chunk); | |
|
567 | Py_DECREF(chunks); | |
|
568 | return NULL; | |
|
569 | } | |
|
567 | 570 | |
|
568 |
|
|
|
569 | } | |
|
571 | Py_DECREF(chunk); | |
|
572 | } | |
|
570 | 573 | |
|
571 |
|
|
|
572 |
|
|
|
573 |
|
|
|
574 |
|
|
|
575 | } | |
|
574 | empty = PyBytes_FromStringAndSize("", 0); | |
|
575 | if (NULL == empty) { | |
|
576 | Py_DECREF(chunks); | |
|
577 | return NULL; | |
|
578 | } | |
|
576 | 579 | |
|
577 |
|
|
|
580 | result = PyObject_CallMethod(empty, "join", "O", chunks); | |
|
578 | 581 | |
|
579 |
|
|
|
580 |
|
|
|
582 | Py_DECREF(empty); | |
|
583 | Py_DECREF(chunks); | |
|
581 | 584 | |
|
582 |
|
|
|
585 | return result; | |
|
583 | 586 | } |
|
584 | 587 | |
|
585 |
static PyObject* |
|
|
586 | set_unsupported_operation(); | |
|
587 | return NULL; | |
|
588 | static PyObject *decompressionreader_readline(PyObject *self, PyObject *args, | |
|
589 | PyObject *kwargs) { | |
|
590 | set_io_unsupported_operation(); | |
|
591 | return NULL; | |
|
588 | 592 | } |
|
589 | 593 | |
|
590 |
static PyObject* |
|
|
591 | set_unsupported_operation(); | |
|
592 | return NULL; | |
|
594 | static PyObject *decompressionreader_readlines(PyObject *self, PyObject *args, | |
|
595 | PyObject *kwargs) { | |
|
596 | set_io_unsupported_operation(); | |
|
597 | return NULL; | |
|
593 | 598 | } |
|
594 | 599 | |
|
595 |
static PyObject* |
|
|
596 | Py_ssize_t pos; | |
|
597 | int whence = 0; | |
|
598 | unsigned long long readAmount = 0; | |
|
599 | size_t defaultOutSize = ZSTD_DStreamOutSize(); | |
|
600 | static PyObject *decompressionreader_seek(ZstdDecompressionReader *self, | |
|
601 | PyObject *args) { | |
|
602 | Py_ssize_t pos; | |
|
603 | int whence = 0; | |
|
604 | unsigned long long readAmount = 0; | |
|
605 | size_t defaultOutSize = ZSTD_DStreamOutSize(); | |
|
600 | 606 | |
|
601 |
|
|
|
602 |
|
|
|
603 |
|
|
|
604 | } | |
|
607 | if (self->closed) { | |
|
608 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |
|
609 | return NULL; | |
|
610 | } | |
|
611 | ||
|
612 | if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &whence)) { | |
|
613 | return NULL; | |
|
614 | } | |
|
605 | 615 | |
|
606 | if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &whence)) { | |
|
607 | return NULL; | |
|
608 | } | |
|
616 | if (whence == SEEK_SET) { | |
|
617 | if (pos < 0) { | |
|
618 | PyErr_SetString(PyExc_OSError, | |
|
619 | "cannot seek to negative position with SEEK_SET"); | |
|
620 | return NULL; | |
|
621 | } | |
|
609 | 622 | |
|
610 | if (whence == SEEK_SET) { | |
|
611 | if (pos < 0) { | |
|
612 | PyErr_SetString(PyExc_ValueError, | |
|
613 | "cannot seek to negative position with SEEK_SET"); | |
|
614 | return NULL; | |
|
615 | } | |
|
623 | if ((unsigned long long)pos < self->bytesDecompressed) { | |
|
624 | PyErr_SetString(PyExc_OSError, | |
|
625 | "cannot seek zstd decompression stream backwards"); | |
|
626 | return NULL; | |
|
627 | } | |
|
616 | 628 | |
|
617 |
|
|
|
618 | PyErr_SetString(PyExc_ValueError, | |
|
619 | "cannot seek zstd decompression stream backwards"); | |
|
620 | return NULL; | |
|
621 | } | |
|
629 | readAmount = pos - self->bytesDecompressed; | |
|
630 | } | |
|
631 | else if (whence == SEEK_CUR) { | |
|
632 | if (pos < 0) { | |
|
633 | PyErr_SetString(PyExc_OSError, | |
|
634 | "cannot seek zstd decompression stream backwards"); | |
|
635 | return NULL; | |
|
636 | } | |
|
622 | 637 | |
|
623 | readAmount = pos - self->bytesDecompressed; | |
|
624 | } | |
|
625 |
|
|
|
626 | if (pos < 0) { | |
|
627 | PyErr_SetString(PyExc_ValueError, | |
|
628 | "cannot seek zstd decompression stream backwards"); | |
|
629 | return NULL; | |
|
630 | } | |
|
631 | ||
|
632 | readAmount = pos; | |
|
633 | } | |
|
634 | else if (whence == SEEK_END) { | |
|
635 | /* We /could/ support this with pos==0. But let's not do that until someone | |
|
636 | needs it. */ | |
|
637 | PyErr_SetString(PyExc_ValueError, | |
|
638 | "zstd decompression streams cannot be seeked with SEEK_END"); | |
|
639 | return NULL; | |
|
640 | } | |
|
638 | readAmount = pos; | |
|
639 | } | |
|
640 | else if (whence == SEEK_END) { | |
|
641 | /* We /could/ support this with pos==0. But let's not do that until | |
|
642 | someone needs it. */ | |
|
643 | PyErr_SetString( | |
|
644 | PyExc_OSError, | |
|
645 | "zstd decompression streams cannot be seeked with SEEK_END"); | |
|
646 | return NULL; | |
|
647 | } | |
|
641 | 648 | |
|
642 |
|
|
|
643 |
|
|
|
644 |
|
|
|
645 |
|
|
|
646 |
|
|
|
647 |
|
|
|
648 | readAmount < defaultOutSize ? readAmount : defaultOutSize); | |
|
649 | /* It is a bit inefficient to do this via the Python API. But since there | |
|
650 | is a bit of state tracking involved to read from this type, it is the | |
|
651 | easiest to implement. */ | |
|
652 | while (readAmount) { | |
|
653 | Py_ssize_t readSize; | |
|
654 | PyObject *readResult = PyObject_CallMethod( | |
|
655 | (PyObject *)self, "read", "K", | |
|
656 | readAmount < defaultOutSize ? readAmount : defaultOutSize); | |
|
649 | 657 | |
|
650 |
|
|
|
651 |
|
|
|
652 | } | |
|
658 | if (!readResult) { | |
|
659 | return NULL; | |
|
660 | } | |
|
653 | 661 | |
|
654 |
|
|
|
662 | readSize = PyBytes_GET_SIZE(readResult); | |
|
655 | 663 | |
|
656 |
|
|
|
664 | Py_CLEAR(readResult); | |
|
657 | 665 | |
|
658 |
|
|
|
659 |
|
|
|
660 | break; | |
|
661 | } | |
|
666 | /* Empty read means EOF. */ | |
|
667 | if (!readSize) { | |
|
668 | break; | |
|
669 | } | |
|
662 | 670 | |
|
663 |
|
|
|
664 | } | |
|
671 | readAmount -= readSize; | |
|
672 | } | |
|
665 | 673 | |
|
666 |
|
|
|
674 | return PyLong_FromUnsignedLongLong(self->bytesDecompressed); | |
|
667 | 675 | } |
|
668 | 676 | |
|
669 |
static PyObject* |
|
|
670 |
|
|
|
671 |
|
|
|
677 | static PyObject *decompressionreader_tell(ZstdDecompressionReader *self) { | |
|
678 | /* TODO should this raise OSError since stream isn't seekable? */ | |
|
679 | return PyLong_FromUnsignedLongLong(self->bytesDecompressed); | |
|
672 | 680 | } |
|
673 | 681 | |
|
674 |
static PyObject* |
|
|
675 |
|
|
|
676 |
|
|
|
677 | } | |
|
678 | ||
|
679 | static PyObject* reader_writelines(PyObject* self, PyObject* args) { | |
|
680 | set_unsupported_operation(); | |
|
681 | return NULL; | |
|
682 | } | |
|
683 | ||
|
684 | static PyObject* reader_iter(PyObject* self) { | |
|
685 | set_unsupported_operation(); | |
|
686 | return NULL; | |
|
682 | static PyObject *decompressionreader_write(PyObject *self, PyObject *args) { | |
|
683 | set_io_unsupported_operation(); | |
|
684 | return NULL; | |
|
687 | 685 | } |
|
688 | 686 | |
|
689 |
static PyObject* |
|
|
690 | set_unsupported_operation(); | |
|
691 | return NULL; | |
|
687 | static PyObject *decompressionreader_writelines(PyObject *self, | |
|
688 | PyObject *args) { | |
|
689 | set_io_unsupported_operation(); | |
|
690 | return NULL; | |
|
691 | } | |
|
692 | ||
|
693 | static PyObject *decompressionreader_iter(PyObject *self) { | |
|
694 | set_io_unsupported_operation(); | |
|
695 | return NULL; | |
|
696 | } | |
|
697 | ||
|
698 | static PyObject *decompressionreader_iternext(PyObject *self) { | |
|
699 | set_io_unsupported_operation(); | |
|
700 | return NULL; | |
|
692 | 701 | } |
|
693 | 702 | |
|
694 | static PyMethodDef reader_methods[] = { | |
|
695 |
|
|
|
696 |
|
|
|
697 |
|
|
|
698 |
|
|
|
699 |
|
|
|
700 |
|
|
|
701 |
|
|
|
702 | { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") }, | |
|
703 |
|
|
|
704 |
|
|
|
705 |
|
|
|
706 | PyDoc_STR("read compressed data") }, | |
|
707 | { "read1", (PyCFunction)reader_read1, METH_VARARGS | METH_KEYWORDS, | |
|
708 |
|
|
|
709 | { "readinto", (PyCFunction)reader_readinto, METH_VARARGS, NULL }, | |
|
710 | { "readinto1", (PyCFunction)reader_readinto1, METH_VARARGS, NULL }, | |
|
711 | { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") }, | |
|
712 | { "readline", (PyCFunction)reader_readline, METH_NOARGS, PyDoc_STR("Not implemented") }, | |
|
713 | { "readlines", (PyCFunction)reader_readlines, METH_NOARGS, PyDoc_STR("Not implemented") }, | |
|
714 | { "seek", (PyCFunction)reader_seek, METH_VARARGS, PyDoc_STR("Seek the stream") }, | |
|
715 | { "seekable", (PyCFunction)reader_seekable, METH_NOARGS, | |
|
716 | PyDoc_STR("Returns True") }, | |
|
717 | { "tell", (PyCFunction)reader_tell, METH_NOARGS, | |
|
718 | PyDoc_STR("Returns current number of bytes compressed") }, | |
|
719 | { "writable", (PyCFunction)reader_writable, METH_NOARGS, | |
|
720 | PyDoc_STR("Returns False") }, | |
|
721 | { "write", (PyCFunction)reader_write, METH_VARARGS, PyDoc_STR("unsupported operation") }, | |
|
722 | { "writelines", (PyCFunction)reader_writelines, METH_VARARGS, PyDoc_STR("unsupported operation") }, | |
|
723 | { NULL, NULL } | |
|
724 | }; | |
|
703 | static PyMethodDef decompressionreader_methods[] = { | |
|
704 | {"__enter__", (PyCFunction)decompressionreader_enter, METH_NOARGS, | |
|
705 | PyDoc_STR("Enter a compression context")}, | |
|
706 | {"__exit__", (PyCFunction)decompressionreader_exit, METH_VARARGS, | |
|
707 | PyDoc_STR("Exit a compression context")}, | |
|
708 | {"close", (PyCFunction)decompressionreader_close, METH_NOARGS, | |
|
709 | PyDoc_STR("Close the stream so it cannot perform any more operations")}, | |
|
710 | {"flush", (PyCFunction)decompressionreader_flush, METH_NOARGS, | |
|
711 | PyDoc_STR("no-ops")}, | |
|
712 | {"isatty", (PyCFunction)decompressionreader_isatty, METH_NOARGS, | |
|
713 | PyDoc_STR("Returns False")}, | |
|
714 | {"readable", (PyCFunction)decompressionreader_readable, METH_NOARGS, | |
|
715 | PyDoc_STR("Returns True")}, | |
|
716 | {"read", (PyCFunction)decompressionreader_read, | |
|
717 | METH_VARARGS | METH_KEYWORDS, PyDoc_STR("read compressed data")}, | |
|
718 | {"read1", (PyCFunction)decompressionreader_read1, | |
|
719 | METH_VARARGS | METH_KEYWORDS, PyDoc_STR("read compressed data")}, | |
|
720 | {"readinto", (PyCFunction)decompressionreader_readinto, METH_VARARGS, NULL}, | |
|
721 | {"readinto1", (PyCFunction)decompressionreader_readinto1, METH_VARARGS, | |
|
722 | NULL}, | |
|
723 | {"readall", (PyCFunction)decompressionreader_readall, METH_NOARGS, | |
|
724 | PyDoc_STR("Not implemented")}, | |
|
725 | {"readline", (PyCFunction)decompressionreader_readline, | |
|
726 | METH_VARARGS | METH_KEYWORDS, PyDoc_STR("Not implemented")}, | |
|
727 | {"readlines", (PyCFunction)decompressionreader_readlines, | |
|
728 | METH_VARARGS | METH_KEYWORDS, PyDoc_STR("Not implemented")}, | |
|
729 | {"seek", (PyCFunction)decompressionreader_seek, METH_VARARGS, | |
|
730 | PyDoc_STR("Seek the stream")}, | |
|
731 | {"seekable", (PyCFunction)decompressionreader_seekable, METH_NOARGS, | |
|
732 | PyDoc_STR("Returns False")}, | |
|
733 | {"tell", (PyCFunction)decompressionreader_tell, METH_NOARGS, | |
|
734 | PyDoc_STR("Returns current number of bytes compressed")}, | |
|
735 | {"writable", (PyCFunction)decompressionreader_writable, METH_NOARGS, | |
|
736 | PyDoc_STR("Returns False")}, | |
|
737 | {"write", (PyCFunction)decompressionreader_write, METH_VARARGS, | |
|
738 | PyDoc_STR("unsupported operation")}, | |
|
739 | {"writelines", (PyCFunction)decompressionreader_writelines, METH_VARARGS, | |
|
740 | PyDoc_STR("unsupported operation")}, | |
|
741 | {NULL, NULL}}; | |
|
725 | 742 | |
|
726 | static PyMemberDef reader_members[] = { | |
|
727 |
|
|
|
728 |
|
|
|
729 | { NULL } | |
|
743 | static PyMemberDef decompressionreader_members[] = { | |
|
744 | {"closed", T_BOOL, offsetof(ZstdDecompressionReader, closed), READONLY, | |
|
745 | "whether stream is closed"}, | |
|
746 | {NULL}}; | |
|
747 | ||
|
748 | PyType_Slot ZstdDecompressionReaderSlots[] = { | |
|
749 | {Py_tp_dealloc, decompressionreader_dealloc}, | |
|
750 | {Py_tp_iter, decompressionreader_iter}, | |
|
751 | {Py_tp_iternext, decompressionreader_iternext}, | |
|
752 | {Py_tp_methods, decompressionreader_methods}, | |
|
753 | {Py_tp_members, decompressionreader_members}, | |
|
754 | {Py_tp_new, PyType_GenericNew}, | |
|
755 | {0, NULL}, | |
|
730 | 756 | }; |
|
731 | 757 | |
|
732 |
PyType |
|
|
733 | PyVarObject_HEAD_INIT(NULL, 0) | |
|
734 |
|
|
|
735 | sizeof(ZstdDecompressionReader), /* tp_basicsize */ | |
|
736 | 0, /* tp_itemsize */ | |
|
737 | (destructor)reader_dealloc, /* tp_dealloc */ | |
|
738 | 0, /* tp_print */ | |
|
739 | 0, /* tp_getattr */ | |
|
740 | 0, /* tp_setattr */ | |
|
741 | 0, /* tp_compare */ | |
|
742 | 0, /* tp_repr */ | |
|
743 | 0, /* tp_as_number */ | |
|
744 | 0, /* tp_as_sequence */ | |
|
745 | 0, /* tp_as_mapping */ | |
|
746 | 0, /* tp_hash */ | |
|
747 | 0, /* tp_call */ | |
|
748 | 0, /* tp_str */ | |
|
749 | 0, /* tp_getattro */ | |
|
750 | 0, /* tp_setattro */ | |
|
751 | 0, /* tp_as_buffer */ | |
|
752 | Py_TPFLAGS_DEFAULT, /* tp_flags */ | |
|
753 | 0, /* tp_doc */ | |
|
754 | 0, /* tp_traverse */ | |
|
755 | 0, /* tp_clear */ | |
|
756 | 0, /* tp_richcompare */ | |
|
757 | 0, /* tp_weaklistoffset */ | |
|
758 | reader_iter, /* tp_iter */ | |
|
759 | reader_iternext, /* tp_iternext */ | |
|
760 | reader_methods, /* tp_methods */ | |
|
761 | reader_members, /* tp_members */ | |
|
762 | 0, /* tp_getset */ | |
|
763 | 0, /* tp_base */ | |
|
764 | 0, /* tp_dict */ | |
|
765 | 0, /* tp_descr_get */ | |
|
766 | 0, /* tp_descr_set */ | |
|
767 | 0, /* tp_dictoffset */ | |
|
768 | 0, /* tp_init */ | |
|
769 | 0, /* tp_alloc */ | |
|
770 | PyType_GenericNew, /* tp_new */ | |
|
758 | PyType_Spec ZstdDecompressionReaderSpec = { | |
|
759 | "zstd.ZstdDecompressionReader", | |
|
760 | sizeof(ZstdDecompressionReader), | |
|
761 | 0, | |
|
762 | Py_TPFLAGS_DEFAULT, | |
|
763 | ZstdDecompressionReaderSlots, | |
|
771 | 764 | }; |
|
772 | 765 | |
|
766 | PyTypeObject *ZstdDecompressionReaderType; | |
|
773 | 767 | |
|
774 |
void decompressionreader_module_init(PyObject* |
|
|
775 |
|
|
|
768 | void decompressionreader_module_init(PyObject *mod) { | |
|
769 | /* TODO make reader a sub-class of io.RawIOBase */ | |
|
776 | 770 | |
|
777 |
|
|
|
778 |
|
|
|
779 | return; | |
|
780 | } | |
|
771 | ZstdDecompressionReaderType = | |
|
772 | (PyTypeObject *)PyType_FromSpec(&ZstdDecompressionReaderSpec); | |
|
773 | if (PyType_Ready(ZstdDecompressionReaderType) < 0) { | |
|
774 | return; | |
|
775 | } | |
|
776 | ||
|
777 | Py_INCREF((PyObject *)ZstdDecompressionReaderType); | |
|
778 | PyModule_AddObject(mod, "ZstdDecompressionReader", | |
|
779 | (PyObject *)ZstdDecompressionReaderType); | |
|
781 | 780 | } |
@@ -1,295 +1,273 | |||
|
1 | 1 | /** |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This software may be modified and distributed under the terms | |
|
6 | * of the BSD license. See the LICENSE file for details. | |
|
7 | */ | |
|
8 | 8 | |
|
9 | 9 | #include "python-zstandard.h" |
|
10 | 10 | |
|
11 |
extern PyObject* |
|
|
12 | ||
|
13 | PyDoc_STRVAR(ZstdDecompressionWriter__doc, | |
|
14 | """A context manager used for writing decompressed output.\n" | |
|
15 | ); | |
|
11 | extern PyObject *ZstdError; | |
|
16 | 12 | |
|
17 |
static void ZstdDecompressionWriter_dealloc(ZstdDecompressionWriter* |
|
|
18 |
|
|
|
19 |
|
|
|
13 | static void ZstdDecompressionWriter_dealloc(ZstdDecompressionWriter *self) { | |
|
14 | Py_XDECREF(self->decompressor); | |
|
15 | Py_XDECREF(self->writer); | |
|
20 | 16 | |
|
21 |
|
|
|
17 | PyObject_Del(self); | |
|
22 | 18 | } |
|
23 | 19 | |
|
24 |
static PyObject* |
|
|
25 |
|
|
|
26 |
|
|
|
27 |
|
|
|
28 | } | |
|
20 | static PyObject *ZstdDecompressionWriter_enter(ZstdDecompressionWriter *self) { | |
|
21 | if (self->closed) { | |
|
22 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |
|
23 | return NULL; | |
|
24 | } | |
|
29 | 25 | |
|
30 |
|
|
|
31 |
|
|
|
32 |
|
|
|
33 | } | |
|
26 | if (self->entered) { | |
|
27 | PyErr_SetString(ZstdError, "cannot __enter__ multiple times"); | |
|
28 | return NULL; | |
|
29 | } | |
|
34 | 30 | |
|
35 |
|
|
|
31 | self->entered = 1; | |
|
36 | 32 | |
|
37 |
|
|
|
38 |
|
|
|
33 | Py_INCREF(self); | |
|
34 | return (PyObject *)self; | |
|
39 | 35 | } |
|
40 | 36 | |
|
41 |
static PyObject* |
|
|
42 | self->entered = 0; | |
|
37 | static PyObject *ZstdDecompressionWriter_exit(ZstdDecompressionWriter *self, | |
|
38 | PyObject *args) { | |
|
39 | self->entered = 0; | |
|
43 | 40 | |
|
44 |
|
|
|
45 |
|
|
|
46 | } | |
|
41 | if (NULL == PyObject_CallMethod((PyObject *)self, "close", NULL)) { | |
|
42 | return NULL; | |
|
43 | } | |
|
47 | 44 | |
|
48 |
|
|
|
45 | Py_RETURN_FALSE; | |
|
49 | 46 | } |
|
50 | 47 | |
|
51 | static PyObject* ZstdDecompressionWriter_memory_size(ZstdDecompressionWriter* self) { | |
|
52 | return PyLong_FromSize_t(ZSTD_sizeof_DCtx(self->decompressor->dctx)); | |
|
48 | static PyObject * | |
|
49 | ZstdDecompressionWriter_memory_size(ZstdDecompressionWriter *self) { | |
|
50 | return PyLong_FromSize_t(ZSTD_sizeof_DCtx(self->decompressor->dctx)); | |
|
53 | 51 | } |
|
54 | 52 | |
|
55 |
static PyObject* |
|
|
56 | static char* kwlist[] = { | |
|
57 | "data", | |
|
58 | NULL | |
|
59 | }; | |
|
53 | static PyObject *ZstdDecompressionWriter_write(ZstdDecompressionWriter *self, | |
|
54 | PyObject *args, | |
|
55 | PyObject *kwargs) { | |
|
56 | static char *kwlist[] = {"data", NULL}; | |
|
60 | 57 | |
|
61 |
|
|
|
62 |
|
|
|
63 |
|
|
|
64 |
|
|
|
65 |
|
|
|
66 |
|
|
|
67 |
|
|
|
58 | PyObject *result = NULL; | |
|
59 | Py_buffer source; | |
|
60 | size_t zresult = 0; | |
|
61 | ZSTD_inBuffer input; | |
|
62 | ZSTD_outBuffer output; | |
|
63 | PyObject *res; | |
|
64 | Py_ssize_t totalWrite = 0; | |
|
68 | 65 | |
|
69 | #if PY_MAJOR_VERSION >= 3 | |
|
70 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write", | |
|
71 | #else | |
|
72 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:write", | |
|
73 | #endif | |
|
74 | kwlist, &source)) { | |
|
75 | return NULL; | |
|
76 | } | |
|
66 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:write", kwlist, | |
|
67 | &source)) { | |
|
68 | return NULL; | |
|
69 | } | |
|
77 | 70 | |
|
78 | if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) { | |
|
79 |
|
|
|
80 | "data buffer should be contiguous and have at most one dimension"); | |
|
81 | goto finally; | |
|
82 | } | |
|
71 | if (self->closed) { | |
|
72 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |
|
73 | return NULL; | |
|
74 | } | |
|
83 | 75 | |
|
84 | if (self->closed) { | |
|
85 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |
|
86 | return NULL; | |
|
87 | } | |
|
76 | output.dst = PyMem_Malloc(self->outSize); | |
|
77 | if (!output.dst) { | |
|
78 | PyErr_NoMemory(); | |
|
79 | goto finally; | |
|
80 | } | |
|
81 | output.size = self->outSize; | |
|
82 | output.pos = 0; | |
|
88 | 83 | |
|
89 | output.dst = PyMem_Malloc(self->outSize); | |
|
90 | if (!output.dst) { | |
|
91 | PyErr_NoMemory(); | |
|
92 | goto finally; | |
|
93 | } | |
|
94 | output.size = self->outSize; | |
|
95 | output.pos = 0; | |
|
84 | input.src = source.buf; | |
|
85 | input.size = source.len; | |
|
86 | input.pos = 0; | |
|
96 | 87 | |
|
97 | input.src = source.buf; | |
|
98 | input.size = source.len; | |
|
99 | input.pos = 0; | |
|
88 | while (input.pos < (size_t)source.len) { | |
|
89 | Py_BEGIN_ALLOW_THREADS zresult = | |
|
90 | ZSTD_decompressStream(self->decompressor->dctx, &output, &input); | |
|
91 | Py_END_ALLOW_THREADS | |
|
100 | 92 | |
|
101 | while (input.pos < (size_t)source.len) { | |
|
102 | Py_BEGIN_ALLOW_THREADS | |
|
103 | zresult = ZSTD_decompressStream(self->decompressor->dctx, &output, &input); | |
|
104 | Py_END_ALLOW_THREADS | |
|
93 | if (ZSTD_isError(zresult)) { | |
|
94 | PyMem_Free(output.dst); | |
|
95 | PyErr_Format(ZstdError, "zstd decompress error: %s", | |
|
96 | ZSTD_getErrorName(zresult)); | |
|
97 | goto finally; | |
|
98 | } | |
|
105 | 99 | |
|
106 | if (ZSTD_isError(zresult)) { | |
|
107 | PyMem_Free(output.dst); | |
|
108 | PyErr_Format(ZstdError, "zstd decompress error: %s", | |
|
109 | ZSTD_getErrorName(zresult)); | |
|
110 | goto finally; | |
|
111 | } | |
|
100 | if (output.pos) { | |
|
101 | res = PyObject_CallMethod(self->writer, "write", "y#", output.dst, | |
|
102 | output.pos); | |
|
103 | if (NULL == res) { | |
|
104 | goto finally; | |
|
105 | } | |
|
106 | Py_XDECREF(res); | |
|
107 | totalWrite += output.pos; | |
|
108 | output.pos = 0; | |
|
109 | } | |
|
110 | } | |
|
112 | 111 | |
|
113 | if (output.pos) { | |
|
114 | #if PY_MAJOR_VERSION >= 3 | |
|
115 | res = PyObject_CallMethod(self->writer, "write", "y#", | |
|
116 | #else | |
|
117 | res = PyObject_CallMethod(self->writer, "write", "s#", | |
|
118 | #endif | |
|
119 | output.dst, output.pos); | |
|
120 | Py_XDECREF(res); | |
|
121 | totalWrite += output.pos; | |
|
122 | output.pos = 0; | |
|
123 | } | |
|
124 | } | |
|
112 | PyMem_Free(output.dst); | |
|
125 | 113 | |
|
126 | PyMem_Free(output.dst); | |
|
127 | ||
|
128 | if (self->writeReturnRead) { | |
|
129 | result = PyLong_FromSize_t(input.pos); | |
|
130 | } | |
|
131 | else { | |
|
132 | result = PyLong_FromSsize_t(totalWrite); | |
|
133 | } | |
|
114 | if (self->writeReturnRead) { | |
|
115 | result = PyLong_FromSize_t(input.pos); | |
|
116 | } | |
|
117 | else { | |
|
118 | result = PyLong_FromSsize_t(totalWrite); | |
|
119 | } | |
|
134 | 120 | |
|
135 | 121 | finally: |
|
136 |
|
|
|
137 |
|
|
|
122 | PyBuffer_Release(&source); | |
|
123 | return result; | |
|
138 | 124 | } |
|
139 | 125 | |
|
140 |
static PyObject* |
|
|
141 |
|
|
|
142 | ||
|
143 | if (self->closed) { | |
|
144 | Py_RETURN_NONE; | |
|
145 | } | |
|
126 | static PyObject *ZstdDecompressionWriter_close(ZstdDecompressionWriter *self) { | |
|
127 | PyObject *result; | |
|
146 | 128 | |
|
147 | result = PyObject_CallMethod((PyObject*)self, "flush", NULL); | |
|
148 | self->closed = 1; | |
|
149 | ||
|
150 | if (NULL == result) { | |
|
151 | return NULL; | |
|
152 | } | |
|
129 | if (self->closed) { | |
|
130 | Py_RETURN_NONE; | |
|
131 | } | |
|
153 | 132 | |
|
154 | /* Call close on underlying stream as well. */ | |
|
155 | if (PyObject_HasAttrString(self->writer, "close")) { | |
|
156 | return PyObject_CallMethod(self->writer, "close", NULL); | |
|
157 | } | |
|
158 | ||
|
159 | Py_RETURN_NONE; | |
|
160 | } | |
|
133 | self->closing = 1; | |
|
134 | result = PyObject_CallMethod((PyObject *)self, "flush", NULL); | |
|
135 | self->closing = 0; | |
|
136 | self->closed = 1; | |
|
161 | 137 | |
|
162 | static PyObject* ZstdDecompressionWriter_fileno(ZstdDecompressionWriter* self) { | |
|
163 | if (PyObject_HasAttrString(self->writer, "fileno")) { | |
|
164 | return PyObject_CallMethod(self->writer, "fileno", NULL); | |
|
165 | } | |
|
166 | else { | |
|
167 | PyErr_SetString(PyExc_OSError, "fileno not available on underlying writer"); | |
|
168 | return NULL; | |
|
169 | } | |
|
138 | if (NULL == result) { | |
|
139 | return NULL; | |
|
140 | } | |
|
141 | ||
|
142 | /* Call close on underlying stream as well. */ | |
|
143 | if (self->closefd && PyObject_HasAttrString(self->writer, "close")) { | |
|
144 | return PyObject_CallMethod(self->writer, "close", NULL); | |
|
145 | } | |
|
146 | ||
|
147 | Py_RETURN_NONE; | |
|
170 | 148 | } |
|
171 | 149 | |
|
172 |
static PyObject* |
|
|
173 | if (self->closed) { | |
|
174 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |
|
175 | return NULL; | |
|
176 | } | |
|
177 | ||
|
178 | if (PyObject_HasAttrString(self->writer, "flush")) { | |
|
179 | return PyObject_CallMethod(self->writer, "flush", NULL); | |
|
180 | } | |
|
181 | else { | |
|
182 | Py_RETURN_NONE; | |
|
183 | } | |
|
184 | } | |
|
185 | ||
|
186 | static PyObject* ZstdDecompressionWriter_false(PyObject* self, PyObject* args) { | |
|
187 | Py_RETURN_FALSE; | |
|
150 | static PyObject *ZstdDecompressionWriter_fileno(ZstdDecompressionWriter *self) { | |
|
151 | if (PyObject_HasAttrString(self->writer, "fileno")) { | |
|
152 | return PyObject_CallMethod(self->writer, "fileno", NULL); | |
|
153 | } | |
|
154 | else { | |
|
155 | PyErr_SetString(PyExc_OSError, | |
|
156 | "fileno not available on underlying writer"); | |
|
157 | return NULL; | |
|
158 | } | |
|
188 | 159 | } |
|
189 | 160 | |
|
190 |
static PyObject* |
|
|
191 | Py_RETURN_TRUE; | |
|
161 | static PyObject *ZstdDecompressionWriter_flush(ZstdDecompressionWriter *self) { | |
|
162 | if (self->closed) { | |
|
163 | PyErr_SetString(PyExc_ValueError, "stream is closed"); | |
|
164 | return NULL; | |
|
165 | } | |
|
166 | ||
|
167 | if (!self->closing && PyObject_HasAttrString(self->writer, "flush")) { | |
|
168 | return PyObject_CallMethod(self->writer, "flush", NULL); | |
|
169 | } | |
|
170 | else { | |
|
171 | Py_RETURN_NONE; | |
|
172 | } | |
|
173 | } | |
|
174 | ||
|
175 | static PyObject *ZstdDecompressionWriter_iter(PyObject *self) { | |
|
176 | set_io_unsupported_operation(); | |
|
177 | return NULL; | |
|
192 | 178 | } |
|
193 | 179 | |
|
194 |
static PyObject* |
|
|
195 | PyObject* iomod; | |
|
196 | PyObject* exc; | |
|
180 | static PyObject *ZstdDecompressionWriter_iternext(PyObject *self) { | |
|
181 | set_io_unsupported_operation(); | |
|
182 | return NULL; | |
|
183 | } | |
|
197 | 184 | |
|
198 | iomod = PyImport_ImportModule("io"); | |
|
199 | if (NULL == iomod) { | |
|
200 | return NULL; | |
|
201 | } | |
|
185 | static PyObject *ZstdDecompressionWriter_false(PyObject *self, PyObject *args) { | |
|
186 | Py_RETURN_FALSE; | |
|
187 | } | |
|
202 | 188 | |
|
203 | exc = PyObject_GetAttrString(iomod, "UnsupportedOperation"); | |
|
204 | if (NULL == exc) { | |
|
205 | Py_DECREF(iomod); | |
|
206 | return NULL; | |
|
207 | } | |
|
189 | static PyObject *ZstdDecompressionWriter_true(PyObject *self, PyObject *args) { | |
|
190 | Py_RETURN_TRUE; | |
|
191 | } | |
|
208 | 192 | |
|
209 | PyErr_SetNone(exc); | |
|
210 | Py_DECREF(exc); | |
|
211 | Py_DECREF(iomod); | |
|
212 | ||
|
213 |
|
|
|
193 | static PyObject *ZstdDecompressionWriter_unsupported(PyObject *self, | |
|
194 | PyObject *args, | |
|
195 | PyObject *kwargs) { | |
|
196 | set_io_unsupported_operation(); | |
|
197 | return NULL; | |
|
214 | 198 | } |
|
215 | 199 | |
|
216 | 200 | static PyMethodDef ZstdDecompressionWriter_methods[] = { |
|
217 |
|
|
|
218 |
|
|
|
219 |
|
|
|
220 |
|
|
|
221 |
|
|
|
222 | PyDoc_STR("Obtain the memory size in bytes of the underlying decompressor.") }, | |
|
223 | { "close", (PyCFunction)ZstdDecompressionWriter_close, METH_NOARGS, NULL }, | |
|
224 | { "fileno", (PyCFunction)ZstdDecompressionWriter_fileno, METH_NOARGS, NULL }, | |
|
225 |
|
|
|
226 |
|
|
|
227 |
|
|
|
228 |
|
|
|
229 |
|
|
|
230 |
|
|
|
231 | { "seekable", ZstdDecompressionWriter_false, METH_NOARGS, NULL }, | |
|
232 |
|
|
|
233 | { "truncate", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL }, | |
|
234 | { "writable", ZstdDecompressionWriter_true, METH_NOARGS, NULL }, | |
|
235 | { "writelines" , (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL }, | |
|
236 |
|
|
|
237 |
|
|
|
238 | { "readinto", (PyCFunction)ZstdDecompressionWriter_unsupported, METH_VARARGS | METH_KEYWORDS, NULL }, | |
|
239 |
|
|
|
240 | PyDoc_STR("Compress data") }, | |
|
241 | { NULL, NULL } | |
|
242 | }; | |
|
201 | {"__enter__", (PyCFunction)ZstdDecompressionWriter_enter, METH_NOARGS, | |
|
202 | PyDoc_STR("Enter a decompression context.")}, | |
|
203 | {"__exit__", (PyCFunction)ZstdDecompressionWriter_exit, METH_VARARGS, | |
|
204 | PyDoc_STR("Exit a decompression context.")}, | |
|
205 | {"memory_size", (PyCFunction)ZstdDecompressionWriter_memory_size, | |
|
206 | METH_NOARGS, | |
|
207 | PyDoc_STR( | |
|
208 | "Obtain the memory size in bytes of the underlying decompressor.")}, | |
|
209 | {"close", (PyCFunction)ZstdDecompressionWriter_close, METH_NOARGS, NULL}, | |
|
210 | {"fileno", (PyCFunction)ZstdDecompressionWriter_fileno, METH_NOARGS, NULL}, | |
|
211 | {"flush", (PyCFunction)ZstdDecompressionWriter_flush, METH_NOARGS, NULL}, | |
|
212 | {"isatty", ZstdDecompressionWriter_false, METH_NOARGS, NULL}, | |
|
213 | {"readable", ZstdDecompressionWriter_false, METH_NOARGS, NULL}, | |
|
214 | {"readline", (PyCFunction)ZstdDecompressionWriter_unsupported, | |
|
215 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
216 | {"readlines", (PyCFunction)ZstdDecompressionWriter_unsupported, | |
|
217 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
218 | {"seek", (PyCFunction)ZstdDecompressionWriter_unsupported, | |
|
219 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
220 | {"seekable", ZstdDecompressionWriter_false, METH_NOARGS, NULL}, | |
|
221 | {"tell", (PyCFunction)ZstdDecompressionWriter_unsupported, | |
|
222 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
223 | {"truncate", (PyCFunction)ZstdDecompressionWriter_unsupported, | |
|
224 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
225 | {"writable", ZstdDecompressionWriter_true, METH_NOARGS, NULL}, | |
|
226 | {"writelines", (PyCFunction)ZstdDecompressionWriter_unsupported, | |
|
227 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
228 | {"read", (PyCFunction)ZstdDecompressionWriter_unsupported, | |
|
229 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
230 | {"readall", (PyCFunction)ZstdDecompressionWriter_unsupported, | |
|
231 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
232 | {"readinto", (PyCFunction)ZstdDecompressionWriter_unsupported, | |
|
233 | METH_VARARGS | METH_KEYWORDS, NULL}, | |
|
234 | {"write", (PyCFunction)ZstdDecompressionWriter_write, | |
|
235 | METH_VARARGS | METH_KEYWORDS, PyDoc_STR("Compress data")}, | |
|
236 | {NULL, NULL}}; | |
|
243 | 237 | |
|
244 | 238 | static PyMemberDef ZstdDecompressionWriter_members[] = { |
|
245 |
|
|
|
246 |
|
|
|
239 | {"closed", T_BOOL, offsetof(ZstdDecompressionWriter, closed), READONLY, | |
|
240 | NULL}, | |
|
241 | {NULL}}; | |
|
242 | ||
|
243 | PyType_Slot ZstdDecompressionWriterSlots[] = { | |
|
244 | {Py_tp_dealloc, ZstdDecompressionWriter_dealloc}, | |
|
245 | {Py_tp_iter, ZstdDecompressionWriter_iter}, | |
|
246 | {Py_tp_iternext, ZstdDecompressionWriter_iternext}, | |
|
247 | {Py_tp_methods, ZstdDecompressionWriter_methods}, | |
|
248 | {Py_tp_members, ZstdDecompressionWriter_members}, | |
|
249 | {Py_tp_new, PyType_GenericNew}, | |
|
250 | {0, NULL}, | |
|
247 | 251 | }; |
|
248 | 252 | |
|
249 |
PyType |
|
|
250 | PyVarObject_HEAD_INIT(NULL, 0) | |
|
251 |
|
|
|
252 | sizeof(ZstdDecompressionWriter),/* tp_basicsize */ | |
|
253 | 0, /* tp_itemsize */ | |
|
254 | (destructor)ZstdDecompressionWriter_dealloc, /* tp_dealloc */ | |
|
255 | 0, /* tp_print */ | |
|
256 | 0, /* tp_getattr */ | |
|
257 | 0, /* tp_setattr */ | |
|
258 | 0, /* tp_compare */ | |
|
259 | 0, /* tp_repr */ | |
|
260 | 0, /* tp_as_number */ | |
|
261 | 0, /* tp_as_sequence */ | |
|
262 | 0, /* tp_as_mapping */ | |
|
263 | 0, /* tp_hash */ | |
|
264 | 0, /* tp_call */ | |
|
265 | 0, /* tp_str */ | |
|
266 | 0, /* tp_getattro */ | |
|
267 | 0, /* tp_setattro */ | |
|
268 | 0, /* tp_as_buffer */ | |
|
269 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | |
|
270 | ZstdDecompressionWriter__doc, /* tp_doc */ | |
|
271 | 0, /* tp_traverse */ | |
|
272 | 0, /* tp_clear */ | |
|
273 | 0, /* tp_richcompare */ | |
|
274 | 0, /* tp_weaklistoffset */ | |
|
275 | 0, /* tp_iter */ | |
|
276 | 0, /* tp_iternext */ | |
|
277 | ZstdDecompressionWriter_methods,/* tp_methods */ | |
|
278 | ZstdDecompressionWriter_members,/* tp_members */ | |
|
279 | 0, /* tp_getset */ | |
|
280 | 0, /* tp_base */ | |
|
281 | 0, /* tp_dict */ | |
|
282 | 0, /* tp_descr_get */ | |
|
283 | 0, /* tp_descr_set */ | |
|
284 | 0, /* tp_dictoffset */ | |
|
285 | 0, /* tp_init */ | |
|
286 | 0, /* tp_alloc */ | |
|
287 | PyType_GenericNew, /* tp_new */ | |
|
253 | PyType_Spec ZstdDecompressionWriterSpec = { | |
|
254 | "zstd.ZstdDecompressionWriter", | |
|
255 | sizeof(ZstdDecompressionWriter), | |
|
256 | 0, | |
|
257 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, | |
|
258 | ZstdDecompressionWriterSlots, | |
|
288 | 259 | }; |
|
289 | 260 | |
|
290 | void decompressionwriter_module_init(PyObject* mod) { | |
|
291 | Py_SET_TYPE(&ZstdDecompressionWriterType, &PyType_Type); | |
|
292 | if (PyType_Ready(&ZstdDecompressionWriterType) < 0) { | |
|
293 | return; | |
|
294 | } | |
|
261 | PyTypeObject *ZstdDecompressionWriterType; | |
|
262 | ||
|
263 | void decompressionwriter_module_init(PyObject *mod) { | |
|
264 | ZstdDecompressionWriterType = | |
|
265 | (PyTypeObject *)PyType_FromSpec(&ZstdDecompressionWriterSpec); | |
|
266 | if (PyType_Ready(ZstdDecompressionWriterType) < 0) { | |
|
267 | return; | |
|
268 | } | |
|
269 | ||
|
270 | Py_INCREF((PyObject *)ZstdDecompressionWriterType); | |
|
271 | PyModule_AddObject(mod, "ZstdDecompressionWriter", | |
|
272 | (PyObject *)ZstdDecompressionWriterType); | |
|
295 | 273 | } |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
This diff has been collapsed as it changes many lines, (721 lines changed) Show them Hide them |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: file was removed | |
The requested commit or file is too big and content was truncated. Show full diff |
General Comments 0
You need to be logged in to leave comments.
Login now