##// END OF EJS Templates
zstandard: vendor python-zstandard 0.13.0...
Gregory Szorc -
r44446:de783805 default
parent child Browse files
Show More

The requested changes are too big and content was truncated. Show full diff

1 NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
@@ -1,102 +1,103 b''
1 1 # Files that just need to be migrated to the formatter.
2 2 # Do not add new files here!
3 3 mercurial/cext/manifest.c
4 4 mercurial/cext/osutil.c
5 5 # Vendored code that we should never format:
6 6 contrib/python-zstandard/c-ext/bufferutil.c
7 7 contrib/python-zstandard/c-ext/compressionchunker.c
8 8 contrib/python-zstandard/c-ext/compressiondict.c
9 9 contrib/python-zstandard/c-ext/compressionparams.c
10 10 contrib/python-zstandard/c-ext/compressionreader.c
11 11 contrib/python-zstandard/c-ext/compressionwriter.c
12 12 contrib/python-zstandard/c-ext/compressobj.c
13 13 contrib/python-zstandard/c-ext/compressor.c
14 14 contrib/python-zstandard/c-ext/compressoriterator.c
15 15 contrib/python-zstandard/c-ext/constants.c
16 16 contrib/python-zstandard/c-ext/decompressionreader.c
17 17 contrib/python-zstandard/c-ext/decompressionwriter.c
18 18 contrib/python-zstandard/c-ext/decompressobj.c
19 19 contrib/python-zstandard/c-ext/decompressor.c
20 20 contrib/python-zstandard/c-ext/decompressoriterator.c
21 21 contrib/python-zstandard/c-ext/frameparams.c
22 22 contrib/python-zstandard/c-ext/python-zstandard.h
23 23 contrib/python-zstandard/zstd.c
24 24 contrib/python-zstandard/zstd/common/bitstream.h
25 25 contrib/python-zstandard/zstd/common/compiler.h
26 26 contrib/python-zstandard/zstd/common/cpu.h
27 27 contrib/python-zstandard/zstd/common/debug.c
28 28 contrib/python-zstandard/zstd/common/debug.h
29 29 contrib/python-zstandard/zstd/common/entropy_common.c
30 30 contrib/python-zstandard/zstd/common/error_private.c
31 31 contrib/python-zstandard/zstd/common/error_private.h
32 32 contrib/python-zstandard/zstd/common/fse_decompress.c
33 33 contrib/python-zstandard/zstd/common/fse.h
34 34 contrib/python-zstandard/zstd/common/huf.h
35 35 contrib/python-zstandard/zstd/common/mem.h
36 36 contrib/python-zstandard/zstd/common/pool.c
37 37 contrib/python-zstandard/zstd/common/pool.h
38 38 contrib/python-zstandard/zstd/common/threading.c
39 39 contrib/python-zstandard/zstd/common/threading.h
40 40 contrib/python-zstandard/zstd/common/xxhash.c
41 41 contrib/python-zstandard/zstd/common/xxhash.h
42 42 contrib/python-zstandard/zstd/common/zstd_common.c
43 43 contrib/python-zstandard/zstd/common/zstd_errors.h
44 44 contrib/python-zstandard/zstd/common/zstd_internal.h
45 45 contrib/python-zstandard/zstd/compress/fse_compress.c
46 46 contrib/python-zstandard/zstd/compress/hist.c
47 47 contrib/python-zstandard/zstd/compress/hist.h
48 48 contrib/python-zstandard/zstd/compress/huf_compress.c
49 49 contrib/python-zstandard/zstd/compress/zstd_compress.c
50 50 contrib/python-zstandard/zstd/compress/zstd_compress_internal.h
51 51 contrib/python-zstandard/zstd/compress/zstd_compress_literals.c
52 52 contrib/python-zstandard/zstd/compress/zstd_compress_literals.h
53 53 contrib/python-zstandard/zstd/compress/zstd_compress_sequences.c
54 54 contrib/python-zstandard/zstd/compress/zstd_compress_sequences.h
55 contrib/python-zstandard/zstd/compress/zstd_cwksp.h
55 56 contrib/python-zstandard/zstd/compress/zstd_double_fast.c
56 57 contrib/python-zstandard/zstd/compress/zstd_double_fast.h
57 58 contrib/python-zstandard/zstd/compress/zstd_fast.c
58 59 contrib/python-zstandard/zstd/compress/zstd_fast.h
59 60 contrib/python-zstandard/zstd/compress/zstd_lazy.c
60 61 contrib/python-zstandard/zstd/compress/zstd_lazy.h
61 62 contrib/python-zstandard/zstd/compress/zstd_ldm.c
62 63 contrib/python-zstandard/zstd/compress/zstd_ldm.h
63 64 contrib/python-zstandard/zstd/compress/zstdmt_compress.c
64 65 contrib/python-zstandard/zstd/compress/zstdmt_compress.h
65 66 contrib/python-zstandard/zstd/compress/zstd_opt.c
66 67 contrib/python-zstandard/zstd/compress/zstd_opt.h
67 68 contrib/python-zstandard/zstd/decompress/huf_decompress.c
68 69 contrib/python-zstandard/zstd/decompress/zstd_ddict.c
69 70 contrib/python-zstandard/zstd/decompress/zstd_ddict.h
70 71 contrib/python-zstandard/zstd/decompress/zstd_decompress_block.c
71 72 contrib/python-zstandard/zstd/decompress/zstd_decompress_block.h
72 73 contrib/python-zstandard/zstd/decompress/zstd_decompress_internal.h
73 74 contrib/python-zstandard/zstd/decompress/zstd_decompress.c
74 75 contrib/python-zstandard/zstd/deprecated/zbuff_common.c
75 76 contrib/python-zstandard/zstd/deprecated/zbuff_compress.c
76 77 contrib/python-zstandard/zstd/deprecated/zbuff_decompress.c
77 78 contrib/python-zstandard/zstd/deprecated/zbuff.h
78 79 contrib/python-zstandard/zstd/dictBuilder/cover.c
79 80 contrib/python-zstandard/zstd/dictBuilder/cover.h
80 81 contrib/python-zstandard/zstd/dictBuilder/divsufsort.c
81 82 contrib/python-zstandard/zstd/dictBuilder/divsufsort.h
82 83 contrib/python-zstandard/zstd/dictBuilder/fastcover.c
83 84 contrib/python-zstandard/zstd/dictBuilder/zdict.c
84 85 contrib/python-zstandard/zstd/dictBuilder/zdict.h
85 86 contrib/python-zstandard/zstd/zstd.h
86 87 hgext/fsmonitor/pywatchman/bser.c
87 88 mercurial/thirdparty/xdiff/xdiff.h
88 89 mercurial/thirdparty/xdiff/xdiffi.c
89 90 mercurial/thirdparty/xdiff/xdiffi.h
90 91 mercurial/thirdparty/xdiff/xemit.c
91 92 mercurial/thirdparty/xdiff/xemit.h
92 93 mercurial/thirdparty/xdiff/xhistogram.c
93 94 mercurial/thirdparty/xdiff/xinclude.h
94 95 mercurial/thirdparty/xdiff/xmacros.h
95 96 mercurial/thirdparty/xdiff/xmerge.c
96 97 mercurial/thirdparty/xdiff/xpatience.c
97 98 mercurial/thirdparty/xdiff/xprepare.c
98 99 mercurial/thirdparty/xdiff/xprepare.h
99 100 mercurial/thirdparty/xdiff/xtypes.h
100 101 mercurial/thirdparty/xdiff/xutils.c
101 102 mercurial/thirdparty/xdiff/xutils.h
102 103 mercurial/thirdparty/zope/interface/_zope_interface_coptimizations.c
@@ -1,702 +1,721 b''
1 1 ===============
2 2 Version History
3 3 ===============
4 4
5 5 1.0.0 (not yet released)
6 6 ========================
7 7
8 8 Actions Blocking Release
9 9 ------------------------
10 10
11 11 * compression and decompression APIs that support ``io.RawIOBase`` interface
12 12 (#13).
13 13 * ``stream_writer()`` APIs should support ``io.RawIOBase`` interface.
14 14 * Properly handle non-blocking I/O and partial writes for objects implementing
15 15 ``io.RawIOBase``.
16 16 * Make ``write_return_read=True`` the default for objects implementing
17 17 ``io.RawIOBase``.
18 18 * Audit for consistent and proper behavior of ``flush()`` and ``close()`` for
19 19 all objects implementing ``io.RawIOBase``. Is calling ``close()`` on
20 20 wrapped stream acceptable, should ``__exit__`` always call ``close()``,
21 21 should ``close()`` imply ``flush()``, etc.
22 22 * Consider making reads across frames configurable behavior.
23 23 * Refactor module names so C and CFFI extensions live under ``zstandard``
24 24 package.
25 25 * Overall API design review.
26 26 * Use Python allocator where possible.
27 27 * Figure out what to do about experimental APIs not implemented by CFFI.
28 28 * APIs for auto adjusting compression parameters based on input size. e.g.
29 29 clamping the window log so it isn't too large for input.
30 30 * Consider allowing compressor and decompressor instances to be thread safe,
31 31 support concurrent operations. Or track when an operation is in progress and
32 32 refuse to let concurrent operations use the same instance.
33 33 * Support for magic-less frames for all decompression operations (``decompress()``
34 34 doesn't work due to sniffing the content size and the lack of a ZSTD API to
35 35 sniff magic-less frames - this should be fixed in 1.3.5.).
36 36 * Audit for complete flushing when ending compression streams.
37 37 * Deprecate legacy APIs.
38 38 * Audit for ability to control read/write sizes on all APIs.
39 39 * Detect memory leaks via bench.py.
40 40 * Remove low-level compression parameters from ``ZstdCompressor.__init__`` and
41 41 require use of ``CompressionParameters``.
42 42 * Expose ``ZSTD_getFrameProgression()`` from more compressor types.
43 43 * Support modifying compression parameters mid operation when supported by
44 44 zstd API.
45 45 * Expose ``ZSTD_CLEVEL_DEFAULT`` constant.
46 * Expose ``ZSTD_SRCSIZEHINT_{MIN,MAX}`` constants.
46 47 * Support ``ZSTD_p_forceAttachDict`` compression parameter.
47 * Support ``ZSTD_c_literalCompressionMode `` compression parameter.
48 * Support ``ZSTD_dictForceLoad`` dictionary compression parameter.
49 * Support ``ZSTD_c_targetCBlockSize`` compression parameter.
50 * Support ``ZSTD_c_literalCompressionMode`` compression parameter.
51 * Support ``ZSTD_c_srcSizeHint`` compression parameter.
48 52 * Use ``ZSTD_CCtx_getParameter()``/``ZSTD_CCtxParam_getParameter()`` for retrieving
49 53 compression parameters.
50 54 * Consider exposing ``ZSTDMT_toFlushNow()``.
51 55 * Expose ``ZDICT_trainFromBuffer_fastCover()``,
52 56 ``ZDICT_optimizeTrainFromBuffer_fastCover``.
57 * Expose ``ZSTD_Sequence`` struct and related ``ZSTD_getSequences()`` API.
53 58 * Expose and enforce ``ZSTD_minCLevel()`` for minimum compression level.
54 59 * Consider a ``chunker()`` API for decompression.
55 60 * Consider stats for ``chunker()`` API, including finding the last consumed
56 61 offset of input data.
57 62 * Consider exposing ``ZSTD_cParam_getBounds()`` and
58 63 ``ZSTD_dParam_getBounds()`` APIs.
59 64 * Consider controls over resetting compression contexts (session only, parameters,
60 65 or session and parameters).
61 66 * Actually use the CFFI backend in fuzzing tests.
62 67
63 68 Other Actions Not Blocking Release
64 69 ---------------------------------------
65 70
66 71 * Support for block compression APIs.
67 72 * API for ensuring max memory ceiling isn't exceeded.
68 73 * Move off nose for testing.
69 74
75 0.13.0 (released 2019-12-28)
76 ============================
77
78 Changes
79 -------
80
81 * ``pytest-xdist`` ``pytest`` extension is now installed so tests can be
82 run in parallel.
83 * CI now builds ``manylinux2010`` and ``manylinux2014`` binary wheels
84 instead of a mix of ``manylinux2010`` and ``manylinux1``.
85 * Official support for Python 3.8 has been added.
86 * Bundled zstandard library upgraded from 1.4.3 to 1.4.4.
87 * Python code has been reformatted with black.
88
70 89 0.12.0 (released 2019-09-15)
71 90 ============================
72 91
73 92 Backwards Compatibility Notes
74 93 -----------------------------
75 94
76 95 * Support for Python 3.4 has been dropped since Python 3.4 is no longer
77 96 a supported Python version upstream. (But it will likely continue to
78 97 work until Python 2.7 support is dropped and we port to Python 3.5+
79 98 APIs.)
80 99
81 100 Bug Fixes
82 101 ---------
83 102
84 103 * Fix ``ZstdDecompressor.__init__`` on 64-bit big-endian systems (#91).
85 104 * Fix memory leak in ``ZstdDecompressionReader.seek()`` (#82).
86 105
87 106 Changes
88 107 -------
89 108
90 109 * CI transitioned to Azure Pipelines (from AppVeyor and Travis CI).
91 110 * Switched to ``pytest`` for running tests (from ``nose``).
92 111 * Bundled zstandard library upgraded from 1.3.8 to 1.4.3.
93 112
94 113 0.11.1 (released 2019-05-14)
95 114 ============================
96 115
97 116 * Fix memory leak in ``ZstdDecompressionReader.seek()`` (#82).
98 117
99 118 0.11.0 (released 2019-02-24)
100 119 ============================
101 120
102 121 Backwards Compatibility Notes
103 122 -----------------------------
104 123
105 124 * ``ZstdDecompressor.read()`` now allows reading sizes of ``-1`` or ``0``
106 125 and defaults to ``-1``, per the documented behavior of
107 126 ``io.RawIOBase.read()``. Previously, we required an argument that was
108 127 a positive value.
109 128 * The ``readline()``, ``readlines()``, ``__iter__``, and ``__next__`` methods
110 129 of ``ZstdDecompressionReader()`` now raise ``io.UnsupportedOperation``
111 130 instead of ``NotImplementedError``.
112 131 * ``ZstdDecompressor.stream_reader()`` now accepts a ``read_across_frames``
113 132 argument. The default value will likely be changed in a future release
114 133 and consumers are advised to pass the argument to avoid unwanted change
115 134 of behavior in the future.
116 135 * ``setup.py`` now always disables the CFFI backend if the installed
117 136 CFFI package does not meet the minimum version requirements. Before, it was
118 137 possible for the CFFI backend to be generated and a run-time error to
119 138 occur.
120 139 * In the CFFI backend, ``CompressionReader`` and ``DecompressionReader``
121 140 were renamed to ``ZstdCompressionReader`` and ``ZstdDecompressionReader``,
122 141 respectively so naming is identical to the C extension. This should have
123 142 no meaningful end-user impact, as instances aren't meant to be
124 143 constructed directly.
125 144 * ``ZstdDecompressor.stream_writer()`` now accepts a ``write_return_read``
126 145 argument to control whether ``write()`` returns the number of bytes
127 146 read from the source / written to the decompressor. It defaults to off,
128 147 which preserves the existing behavior of returning the number of bytes
129 148 emitted from the decompressor. The default will change in a future release
130 149 so behavior aligns with the specified behavior of ``io.RawIOBase``.
131 150 * ``ZstdDecompressionWriter.__exit__`` now calls ``self.close()``. This
132 151 will result in that stream plus the underlying stream being closed as
133 152 well. If this behavior is not desirable, do not use instances as
134 153 context managers.
135 154 * ``ZstdCompressor.stream_writer()`` now accepts a ``write_return_read``
136 155 argument to control whether ``write()`` returns the number of bytes read
137 156 from the source / written to the compressor. It defaults to off, which
138 157 preserves the existing behavior of returning the number of bytes emitted
139 158 from the compressor. The default will change in a future release so
140 159 behavior aligns with the specified behavior of ``io.RawIOBase``.
141 160 * ``ZstdCompressionWriter.__exit__`` now calls ``self.close()``. This will
142 161 result in that stream plus any underlying stream being closed as well. If
143 162 this behavior is not desirable, do not use instances as context managers.
144 163 * ``ZstdDecompressionWriter`` no longer requires being used as a context
145 164 manager (#57).
146 165 * ``ZstdCompressionWriter`` no longer requires being used as a context
147 166 manager (#57).
148 167 * The ``overlap_size_log`` attribute on ``CompressionParameters`` instances
149 168 has been deprecated and will be removed in a future release. The
150 169 ``overlap_log`` attribute should be used instead.
151 170 * The ``overlap_size_log`` argument to ``CompressionParameters`` has been
152 171 deprecated and will be removed in a future release. The ``overlap_log``
153 172 argument should be used instead.
154 173 * The ``ldm_hash_every_log`` attribute on ``CompressionParameters`` instances
155 174 has been deprecated and will be removed in a future release. The
156 175 ``ldm_hash_rate_log`` attribute should be used instead.
157 176 * The ``ldm_hash_every_log`` argument to ``CompressionParameters`` has been
158 177 deprecated and will be removed in a future release. The ``ldm_hash_rate_log``
159 178 argument should be used instead.
160 179 * The ``compression_strategy`` argument to ``CompressionParameters`` has been
161 180 deprecated and will be removed in a future release. The ``strategy``
162 181 argument should be used instead.
163 182 * The ``SEARCHLENGTH_MIN`` and ``SEARCHLENGTH_MAX`` constants are deprecated
164 183 and will be removed in a future release. Use ``MINMATCH_MIN`` and
165 184 ``MINMATCH_MAX`` instead.
166 185 * The ``zstd_cffi`` module has been renamed to ``zstandard.cffi``. As had
167 186 been documented in the ``README`` file since the ``0.9.0`` release, the
168 187 module should not be imported directly at its new location. Instead,
169 188 ``import zstandard`` to cause an appropriate backend module to be loaded
170 189 automatically.
171 190
172 191 Bug Fixes
173 192 ---------
174 193
175 194 * CFFI backend could encounter a failure when sending an empty chunk into
176 195 ``ZstdDecompressionObj.decompress()``. The issue has been fixed.
177 196 * CFFI backend could encounter an error when calling
178 197 ``ZstdDecompressionReader.read()`` if there was data remaining in an
179 198 internal buffer. The issue has been fixed. (#71)
180 199
181 200 Changes
182 201 -------
183 202
184 203 * ``ZstDecompressionObj.decompress()`` now properly handles empty inputs in
185 204 the CFFI backend.
186 205 * ``ZstdCompressionReader`` now implements ``read1()`` and ``readinto1()``.
187 206 These are part of the ``io.BufferedIOBase`` interface.
188 207 * ``ZstdCompressionReader`` has gained a ``readinto(b)`` method for reading
189 208 compressed output into an existing buffer.
190 209 * ``ZstdCompressionReader.read()`` now defaults to ``size=-1`` and accepts
191 210 read sizes of ``-1`` and ``0``. The new behavior aligns with the documented
192 211 behavior of ``io.RawIOBase``.
193 212 * ``ZstdCompressionReader`` now implements ``readall()``. Previously, this
194 213 method raised ``NotImplementedError``.
195 214 * ``ZstdDecompressionReader`` now implements ``read1()`` and ``readinto1()``.
196 215 These are part of the ``io.BufferedIOBase`` interface.
197 216 * ``ZstdDecompressionReader.read()`` now defaults to ``size=-1`` and accepts
198 217 read sizes of ``-1`` and ``0``. The new behavior aligns with the documented
199 218 behavior of ``io.RawIOBase``.
200 219 * ``ZstdDecompressionReader()`` now implements ``readall()``. Previously, this
201 220 method raised ``NotImplementedError``.
202 221 * The ``readline()``, ``readlines()``, ``__iter__``, and ``__next__`` methods
203 222 of ``ZstdDecompressionReader()`` now raise ``io.UnsupportedOperation``
204 223 instead of ``NotImplementedError``. This reflects a decision to never
205 224 implement text-based I/O on (de)compressors and keep the low-level API
206 225 operating in the binary domain. (#13)
207 226 * ``README.rst`` now documented how to achieve linewise iteration using
208 227 an ``io.TextIOWrapper`` with a ``ZstdDecompressionReader``.
209 228 * ``ZstdDecompressionReader`` has gained a ``readinto(b)`` method for
210 229 reading decompressed output into an existing buffer. This allows chaining
211 230 to an ``io.TextIOWrapper`` on Python 3 without using an ``io.BufferedReader``.
212 231 * ``ZstdDecompressor.stream_reader()`` now accepts a ``read_across_frames``
213 232 argument to control behavior when the input data has multiple zstd
214 233 *frames*. When ``False`` (the default for backwards compatibility), a
215 234 ``read()`` will stop when the end of a zstd *frame* is encountered. When
216 235 ``True``, ``read()`` can potentially return data spanning multiple zstd
217 236 *frames*. The default will likely be changed to ``True`` in a future
218 237 release.
219 238 * ``setup.py`` now performs CFFI version sniffing and disables the CFFI
220 239 backend if CFFI is too old. Previously, we only used ``install_requires``
221 240 to enforce the CFFI version and not all build modes would properly enforce
222 241 the minimum CFFI version. (#69)
223 242 * CFFI's ``ZstdDecompressionReader.read()`` now properly handles data
224 243 remaining in any internal buffer. Before, repeated ``read()`` could
225 244 result in *random* errors. (#71)
226 245 * Upgraded various Python packages in CI environment.
227 246 * Upgrade to hypothesis 4.5.11.
228 247 * In the CFFI backend, ``CompressionReader`` and ``DecompressionReader``
229 248 were renamed to ``ZstdCompressionReader`` and ``ZstdDecompressionReader``,
230 249 respectively.
231 250 * ``ZstdDecompressor.stream_writer()`` now accepts a ``write_return_read``
232 251 argument to control whether ``write()`` returns the number of bytes read
233 252 from the source. It defaults to ``False`` to preserve backwards
234 253 compatibility.
235 254 * ``ZstdDecompressor.stream_writer()`` now implements the ``io.RawIOBase``
236 255 interface and behaves as a proper stream object.
237 256 * ``ZstdCompressor.stream_writer()`` now accepts a ``write_return_read``
238 257 argument to control whether ``write()`` returns the number of bytes read
239 258 from the source. It defaults to ``False`` to preserve backwards
240 259 compatibility.
241 260 * ``ZstdCompressionWriter`` now implements the ``io.RawIOBase`` interface and
242 261 behaves as a proper stream object. ``close()`` will now close the stream
243 262 and the underlying stream (if possible). ``__exit__`` will now call
244 263 ``close()``. Methods like ``writable()`` and ``fileno()`` are implemented.
245 264 * ``ZstdDecompressionWriter`` no longer must be used as a context manager.
246 265 * ``ZstdCompressionWriter`` no longer must be used as a context manager.
247 266 When not using as a context manager, it is important to call
248 267 ``flush(FRAME_FRAME)`` or the compression stream won't be properly
249 268 terminated and decoders may complain about malformed input.
250 269 * ``ZstdCompressionWriter.flush()`` (what is returned from
251 270 ``ZstdCompressor.stream_writer()``) now accepts an argument controlling the
252 271 flush behavior. Its value can be one of the new constants
253 272 ``FLUSH_BLOCK`` or ``FLUSH_FRAME``.
254 273 * ``ZstdDecompressionObj`` instances now have a ``flush([length=None])`` method.
255 274 This provides parity with standard library equivalent types. (#65)
256 275 * ``CompressionParameters`` no longer redundantly store individual compression
257 276 parameters on each instance. Instead, compression parameters are stored inside
258 277 the underlying ``ZSTD_CCtx_params`` instance. Attributes for obtaining
259 278 parameters are now properties rather than instance variables.
260 279 * Exposed the ``STRATEGY_BTULTRA2`` constant.
261 280 * ``CompressionParameters`` instances now expose an ``overlap_log`` attribute.
262 281 This behaves identically to the ``overlap_size_log`` attribute.
263 282 * ``CompressionParameters()`` now accepts an ``overlap_log`` argument that
264 283 behaves identically to the ``overlap_size_log`` argument. An error will be
265 284 raised if both arguments are specified.
266 285 * ``CompressionParameters`` instances now expose an ``ldm_hash_rate_log``
267 286 attribute. This behaves identically to the ``ldm_hash_every_log`` attribute.
268 287 * ``CompressionParameters()`` now accepts a ``ldm_hash_rate_log`` argument that
269 288 behaves identically to the ``ldm_hash_every_log`` argument. An error will be
270 289 raised if both arguments are specified.
271 290 * ``CompressionParameters()`` now accepts a ``strategy`` argument that behaves
272 291 identically to the ``compression_strategy`` argument. An error will be raised
273 292 if both arguments are specified.
274 293 * The ``MINMATCH_MIN`` and ``MINMATCH_MAX`` constants were added. They are
275 294 semantically equivalent to the old ``SEARCHLENGTH_MIN`` and
276 295 ``SEARCHLENGTH_MAX`` constants.
277 296 * Bundled zstandard library upgraded from 1.3.7 to 1.3.8.
278 297 * ``setup.py`` denotes support for Python 3.7 (Python 3.7 was supported and
279 298 tested in the 0.10 release).
280 299 * ``zstd_cffi`` module has been renamed to ``zstandard.cffi``.
281 300 * ``ZstdCompressor.stream_writer()`` now reuses a buffer in order to avoid
282 301 allocating a new buffer for every operation. This should result in faster
283 302 performance in cases where ``write()`` or ``flush()`` are being called
284 303 frequently. (#62)
285 304 * Bundled zstandard library upgraded from 1.3.6 to 1.3.7.
286 305
287 306 0.10.2 (released 2018-11-03)
288 307 ============================
289 308
290 309 Bug Fixes
291 310 ---------
292 311
293 312 * ``zstd_cffi.py`` added to ``setup.py`` (#60).
294 313
295 314 Changes
296 315 -------
297 316
298 317 * Change some integer casts to avoid ``ssize_t`` (#61).
299 318
300 319 0.10.1 (released 2018-10-08)
301 320 ============================
302 321
303 322 Backwards Compatibility Notes
304 323 -----------------------------
305 324
306 325 * ``ZstdCompressor.stream_reader().closed`` is now a property instead of a
307 326 method (#58).
308 327 * ``ZstdDecompressor.stream_reader().closed`` is now a property instead of a
309 328 method (#58).
310 329
311 330 Changes
312 331 -------
313 332
314 333 * Stop attempting to package Python 3.6 for Miniconda. The latest version of
315 334 Miniconda is using Python 3.7. The Python 3.6 Miniconda packages were a lie
316 335 since this were built against Python 3.7.
317 336 * ``ZstdCompressor.stream_reader()``'s and ``ZstdDecompressor.stream_reader()``'s
318 337 ``closed`` attribute is now a read-only property instead of a method. This now
319 338 properly matches the ``IOBase`` API and allows instances to be used in more
320 339 places that accept ``IOBase`` instances.
321 340
322 341 0.10.0 (released 2018-10-08)
323 342 ============================
324 343
325 344 Backwards Compatibility Notes
326 345 -----------------------------
327 346
328 347 * ``ZstdDecompressor.stream_reader().read()`` now consistently requires an
329 348 argument in both the C and CFFI backends. Before, the CFFI implementation
330 349 would assume a default value of ``-1``, which was later rejected.
331 350 * The ``compress_literals`` argument and attribute has been removed from
332 351 ``zstd.ZstdCompressionParameters`` because it was removed by the zstd 1.3.5
333 352 API.
334 353 * ``ZSTD_CCtx_setParametersUsingCCtxParams()`` is no longer called on every
335 354 operation performed against ``ZstdCompressor`` instances. The reason for this
336 355 change is that the zstd 1.3.5 API no longer allows this without calling
337 356 ``ZSTD_CCtx_resetParameters()`` first. But if we called
338 357 ``ZSTD_CCtx_resetParameters()`` on every operation, we'd have to redo
339 358 potentially expensive setup when using dictionaries. We now call
340 359 ``ZSTD_CCtx_reset()`` on every operation and don't attempt to change
341 360 compression parameters.
342 361 * Objects returned by ``ZstdCompressor.stream_reader()`` no longer need to be
343 362 used as a context manager. The context manager interface still exists and its
344 363 behavior is unchanged.
345 364 * Objects returned by ``ZstdDecompressor.stream_reader()`` no longer need to be
346 365 used as a context manager. The context manager interface still exists and its
347 366 behavior is unchanged.
348 367
349 368 Bug Fixes
350 369 ---------
351 370
352 371 * ``ZstdDecompressor.decompressobj().decompress()`` should now return all data
353 372 from internal buffers in more scenarios. Before, it was possible for data to
354 373 remain in internal buffers. This data would be emitted on a subsequent call
355 374 to ``decompress()``. The overall output stream would still be valid. But if
356 375 callers were expecting input data to exactly map to output data (say the
357 376 producer had used ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` and was attempting to
358 377 map input chunks to output chunks), then the previous behavior would be
359 378 wrong. The new behavior is such that output from
360 379 ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` fed into ``decompressobj().decompress()``
361 380 should produce all available compressed input.
362 381 * ``ZstdDecompressor.stream_reader().read()`` should no longer segfault after
363 382 a previous context manager resulted in error (#56).
364 383 * ``ZstdCompressor.compressobj().flush(COMPRESSOBJ_FLUSH_BLOCK)`` now returns
365 384 all data necessary to flush a block. Before, it was possible for the
366 385 ``flush()`` to not emit all data necessary to fully represent a block. This
367 386 would mean decompressors wouldn't be able to decompress all data that had been
368 387 fed into the compressor and ``flush()``ed. (#55).
369 388
370 389 New Features
371 390 ------------
372 391
373 392 * New module constants ``BLOCKSIZELOG_MAX``, ``BLOCKSIZE_MAX``,
374 393 ``TARGETLENGTH_MAX`` that expose constants from libzstd.
375 394 * New ``ZstdCompressor.chunker()`` API for manually feeding data into a
376 395 compressor and emitting chunks of a fixed size. Like ``compressobj()``, the
377 396 API doesn't impose restrictions on the input or output types for the
378 397 data streams. Unlike ``compressobj()``, it ensures output chunks are of a
379 398 fixed size. This makes this API useful when the compressed output is being
380 399 fed into an I/O layer, where uniform write sizes are useful.
381 400 * ``ZstdCompressor.stream_reader()`` no longer needs to be used as a context
382 401 manager (#34).
383 402 * ``ZstdDecompressor.stream_reader()`` no longer needs to be used as a context
384 403 manager (#34).
385 404 * Bundled zstandard library upgraded from 1.3.4 to 1.3.6.
386 405
387 406 Changes
388 407 -------
389 408
390 409 * Added ``zstd_cffi.py`` and ``NEWS.rst`` to ``MANIFEST.in``.
391 410 * ``zstandard.__version__`` is now defined (#50).
392 411 * Upgrade pip, setuptools, wheel, and cibuildwheel packages to latest versions.
393 412 * Upgrade various packages used in CI to latest versions. Notably tox (in
394 413 order to support Python 3.7).
395 414 * Use relative paths in setup.py to appease Python 3.7 (#51).
396 415 * Added CI for Python 3.7.
397 416
398 417 0.9.1 (released 2018-06-04)
399 418 ===========================
400 419
401 420 * Debian packaging support.
402 421 * Fix typo in setup.py (#44).
403 422 * Support building with mingw compiler (#46).
404 423
405 424 0.9.0 (released 2018-04-08)
406 425 ===========================
407 426
408 427 Backwards Compatibility Notes
409 428 -----------------------------
410 429
411 430 * CFFI 1.11 or newer is now required (previous requirement was 1.8).
412 431 * The primary module is now ``zstandard``. Please change imports of ``zstd``
413 432 and ``zstd_cffi`` to ``import zstandard``. See the README for more. Support
414 433 for importing the old names will be dropped in the next release.
415 434 * ``ZstdCompressor.read_from()`` and ``ZstdDecompressor.read_from()`` have
416 435 been renamed to ``read_to_iter()``. ``read_from()`` is aliased to the new
417 436 name and will be deleted in a future release.
418 437 * Support for Python 2.6 has been removed.
419 438 * Support for Python 3.3 has been removed.
420 439 * The ``selectivity`` argument to ``train_dictionary()`` has been removed, as
421 440 the feature disappeared from zstd 1.3.
422 441 * Support for legacy dictionaries has been removed. Cover dictionaries are now
423 442 the default. ``train_cover_dictionary()`` has effectively been renamed to
424 443 ``train_dictionary()``.
425 444 * The ``allow_empty`` argument from ``ZstdCompressor.compress()`` has been
426 445 deleted and the method now allows empty inputs to be compressed by default.
427 446 * ``estimate_compression_context_size()`` has been removed. Use
428 447 ``CompressionParameters.estimated_compression_context_size()`` instead.
429 448 * ``get_compression_parameters()`` has been removed. Use
430 449 ``CompressionParameters.from_level()`` instead.
431 450 * The arguments to ``CompressionParameters.__init__()`` have changed. If you
432 451 were using positional arguments before, the positions now map to different
433 452 arguments. It is recommended to use keyword arguments to construct
434 453 ``CompressionParameters`` instances.
435 454 * ``TARGETLENGTH_MAX`` constant has been removed (it disappeared from zstandard
436 455 1.3.4).
437 456 * ``ZstdCompressor.write_to()`` and ``ZstdDecompressor.write_to()`` have been
438 457 renamed to ``ZstdCompressor.stream_writer()`` and
439 458 ``ZstdDecompressor.stream_writer()``, respectively. The old names are still
440 459 aliased, but will be removed in the next major release.
441 460 * Content sizes are written into frame headers by default
442 461 (``ZstdCompressor(write_content_size=True)`` is now the default).
443 462 * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters``
444 463 for consistency with other types. The old name is an alias and will be removed
445 464 in the next major release.
446 465
447 466 Bug Fixes
448 467 ---------
449 468
450 469 * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40) (from 0.8.2).
451 470 * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35) (from 0.8.2).
452 471 * Fixed memory leak of ``ZSTD_DDict`` instances in CFFI's ``ZstdDecompressor``.
453 472
454 473 New Features
455 474 ------------
456 475
457 476 * Bundled zstandard library upgraded from 1.1.3 to 1.3.4. This delivers various
458 477 bug fixes and performance improvements. It also gives us access to newer
459 478 features.
460 479 * Support for negative compression levels.
461 480 * Support for *long distance matching* (facilitates compression ratios that approach
462 481 LZMA).
463 482 * Supporting for reading empty zstandard frames (with an embedded content size
464 483 of 0).
465 484 * Support for writing and partial support for reading zstandard frames without a
466 485 magic header.
467 486 * New ``stream_reader()`` API that exposes the ``io.RawIOBase`` interface (allows
468 487 you to ``.read()`` from a file-like object).
469 488 * Several minor features, bug fixes, and performance enhancements.
470 489 * Wheels for Linux and macOS are now provided with releases.
471 490
472 491 Changes
473 492 -------
474 493
475 494 * Functions accepting bytes data now use the buffer protocol and can accept
476 495 more types (like ``memoryview`` and ``bytearray``) (#26).
477 496 * Add #includes so compilation on OS X and BSDs works (#20).
478 497 * New ``ZstdDecompressor.stream_reader()`` API to obtain a read-only i/o stream
479 498 of decompressed data for a source.
480 499 * New ``ZstdCompressor.stream_reader()`` API to obtain a read-only i/o stream of
481 500 compressed data for a source.
482 501 * Renamed ``ZstdDecompressor.read_from()`` to ``ZstdDecompressor.read_to_iter()``.
483 502 The old name is still available.
484 503 * Renamed ``ZstdCompressor.read_from()`` to ``ZstdCompressor.read_to_iter()``.
485 504 ``read_from()`` is still available at its old location.
486 505 * Introduce the ``zstandard`` module to import and re-export the C or CFFI
487 506 *backend* as appropriate. Behavior can be controlled via the
488 507 ``PYTHON_ZSTANDARD_IMPORT_POLICY`` environment variable. See README for
489 508 usage info.
490 509 * Vendored version of zstd upgraded to 1.3.4.
491 510 * Added module constants ``CONTENTSIZE_UNKNOWN`` and ``CONTENTSIZE_ERROR``.
492 511 * Add ``STRATEGY_BTULTRA`` compression strategy constant.
493 512 * Switch from deprecated ``ZSTD_getDecompressedSize()`` to
494 513 ``ZSTD_getFrameContentSize()`` replacement.
495 514 * ``ZstdCompressor.compress()`` can now compress empty inputs without requiring
496 515 special handling.
497 516 * ``ZstdCompressor`` and ``ZstdDecompressor`` now have a ``memory_size()``
498 517 method for determining the current memory utilization of the underlying zstd
499 518 primitive.
500 519 * ``train_dictionary()`` has new arguments and functionality for trying multiple
501 520 variations of COVER parameters and selecting the best one.
502 521 * Added module constants ``LDM_MINMATCH_MIN``, ``LDM_MINMATCH_MAX``, and
503 522 ``LDM_BUCKETSIZELOG_MAX``.
504 523 * Converted all consumers to the zstandard *new advanced API*, which uses
505 524 ``ZSTD_compress_generic()``
506 525 * ``CompressionParameters.__init__`` now accepts several more arguments,
507 526 including support for *long distance matching*.
508 527 * ``ZstdCompressionDict.__init__`` now accepts a ``dict_type`` argument that
509 528 controls how the dictionary should be interpreted. This can be used to
510 529 force the use of *content-only* dictionaries or to require the presence
511 530 of the dictionary magic header.
512 531 * ``ZstdCompressionDict.precompute_compress()`` can be used to precompute the
513 532 compression dictionary so it can efficiently be used with multiple
514 533 ``ZstdCompressor`` instances.
515 534 * Digested dictionaries are now stored in ``ZstdCompressionDict`` instances,
516 535 created automatically on first use, and automatically reused by all
517 536 ``ZstdDecompressor`` instances bound to that dictionary.
518 537 * All meaningful functions now accept keyword arguments.
519 538 * ``ZstdDecompressor.decompressobj()`` now accepts a ``write_size`` argument
520 539 to control how much work to perform on every decompressor invocation.
521 540 * ``ZstdCompressor.write_to()`` now exposes a ``tell()``, which exposes the
522 541 total number of bytes written so far.
523 542 * ``ZstdDecompressor.stream_reader()`` now supports ``seek()`` when moving
524 543 forward in the stream.
525 544 * Removed ``TARGETLENGTH_MAX`` constant.
526 545 * Added ``frame_header_size(data)`` function.
527 546 * Added ``frame_content_size(data)`` function.
528 547 * Consumers of ``ZSTD_decompress*`` have been switched to the new *advanced
529 548 decompression* API.
530 549 * ``ZstdCompressor`` and ``ZstdCompressionParams`` can now be constructed with
531 550 negative compression levels.
532 551 * ``ZstdDecompressor`` now accepts a ``max_window_size`` argument to limit the
533 552 amount of memory required for decompression operations.
534 553 * ``FORMAT_ZSTD1`` and ``FORMAT_ZSTD1_MAGICLESS`` constants to be used with
535 554 the ``format`` compression parameter to control whether the frame magic
536 555 header is written.
537 556 * ``ZstdDecompressor`` now accepts a ``format`` argument to control the
538 557 expected frame format.
539 558 * ``ZstdCompressor`` now has a ``frame_progression()`` method to return
540 559 information about the current compression operation.
541 560 * Error messages in CFFI no longer have ``b''`` literals.
542 561 * Compiler warnings and underlying overflow issues on 32-bit platforms have been
543 562 fixed.
544 563 * Builds in CI now build with compiler warnings as errors. This should hopefully
545 564 fix new compiler warnings from being introduced.
546 565 * Make ``ZstdCompressor(write_content_size=True)`` and
547 566 ``CompressionParameters(write_content_size=True)`` the default.
548 567 * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters``.
549 568
550 569 0.8.2 (released 2018-02-22)
551 570 ---------------------------
552 571
553 572 * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40).
554 573 * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35).
555 574
556 575 0.8.1 (released 2017-04-08)
557 576 ---------------------------
558 577
559 578 * Add #includes so compilation on OS X and BSDs works (#20).
560 579
561 580 0.8.0 (released 2017-03-08)
562 581 ===========================
563 582
564 583 * CompressionParameters now has a estimated_compression_context_size() method.
565 584 zstd.estimate_compression_context_size() is now deprecated and slated for
566 585 removal.
567 586 * Implemented a lot of fuzzing tests.
568 587 * CompressionParameters instances now perform extra validation by calling
569 588 ZSTD_checkCParams() at construction time.
570 589 * multi_compress_to_buffer() API for compressing multiple inputs as a
571 590 single operation, as efficiently as possible.
572 591 * ZSTD_CStream instances are now used across multiple operations on
573 592 ZstdCompressor instances, resulting in much better performance for
574 593 APIs that do streaming.
575 594 * ZSTD_DStream instances are now used across multiple operations on
576 595 ZstdDecompressor instances, resulting in much better performance for
577 596 APIs that do streaming.
578 597 * train_dictionary() now releases the GIL.
579 598 * Support for training dictionaries using the COVER algorithm.
580 599 * multi_decompress_to_buffer() API for decompressing multiple frames as a
581 600 single operation, as efficiently as possible.
582 601 * Support for multi-threaded compression.
583 602 * Disable deprecation warnings when compiling CFFI module.
584 603 * Fixed memory leak in train_dictionary().
585 604 * Removed DictParameters type.
586 605 * train_dictionary() now accepts keyword arguments instead of a
587 606 DictParameters instance to control dictionary generation.
588 607
589 608 0.7.0 (released 2017-02-07)
590 609 ===========================
591 610
592 611 * Added zstd.get_frame_parameters() to obtain info about a zstd frame.
593 612 * Added ZstdDecompressor.decompress_content_dict_chain() for efficient
594 613 decompression of *content-only dictionary chains*.
595 614 * CFFI module fully implemented; all tests run against both C extension and
596 615 CFFI implementation.
597 616 * Vendored version of zstd updated to 1.1.3.
598 617 * Use ZstdDecompressor.decompress() now uses ZSTD_createDDict_byReference()
599 618 to avoid extra memory allocation of dict data.
600 619 * Add function names to error messages (by using ":name" in PyArg_Parse*
601 620 functions).
602 621 * Reuse decompression context across operations. Previously, we created a
603 622 new ZSTD_DCtx for each decompress(). This was measured to slow down
604 623 decompression by 40-200MB/s. The API guarantees say ZstdDecompressor
605 624 is not thread safe. So we reuse the ZSTD_DCtx across operations and make
606 625 things faster in the process.
607 626 * ZstdCompressor.write_to()'s compress() and flush() methods now return number
608 627 of bytes written.
609 628 * ZstdDecompressor.write_to()'s write() method now returns the number of bytes
610 629 written to the underlying output object.
611 630 * CompressionParameters instances now expose their values as attributes.
612 631 * CompressionParameters instances no longer are subscriptable nor behave
613 632 as tuples (backwards incompatible). Use attributes to obtain values.
614 633 * DictParameters instances now expose their values as attributes.
615 634
616 635 0.6.0 (released 2017-01-14)
617 636 ===========================
618 637
619 638 * Support for legacy zstd protocols (build time opt in feature).
620 639 * Automation improvements to test against Python 3.6, latest versions
621 640 of Tox, more deterministic AppVeyor behavior.
622 641 * CFFI "parser" improved to use a compiler preprocessor instead of rewriting
623 642 source code manually.
624 643 * Vendored version of zstd updated to 1.1.2.
625 644 * Documentation improvements.
626 645 * Introduce a bench.py script for performing (crude) benchmarks.
627 646 * ZSTD_CCtx instances are now reused across multiple compress() operations.
628 647 * ZstdCompressor.write_to() now has a flush() method.
629 648 * ZstdCompressor.compressobj()'s flush() method now accepts an argument to
630 649 flush a block (as opposed to ending the stream).
631 650 * Disallow compress(b'') when writing content sizes by default (issue #11).
632 651
633 652 0.5.2 (released 2016-11-12)
634 653 ===========================
635 654
636 655 * more packaging fixes for source distribution
637 656
638 657 0.5.1 (released 2016-11-12)
639 658 ===========================
640 659
641 660 * setup_zstd.py is included in the source distribution
642 661
643 662 0.5.0 (released 2016-11-10)
644 663 ===========================
645 664
646 665 * Vendored version of zstd updated to 1.1.1.
647 666 * Continuous integration for Python 3.6 and 3.7
648 667 * Continuous integration for Conda
649 668 * Added compression and decompression APIs providing similar interfaces
650 669 to the standard library ``zlib`` and ``bz2`` modules. This allows
651 670 coding to a common interface.
652 671 * ``zstd.__version__` is now defined.
653 672 * ``read_from()`` on various APIs now accepts objects implementing the buffer
654 673 protocol.
655 674 * ``read_from()`` has gained a ``skip_bytes`` argument. This allows callers
656 675 to pass in an existing buffer with a header without having to create a
657 676 slice or a new object.
658 677 * Implemented ``ZstdCompressionDict.as_bytes()``.
659 678 * Python's memory allocator is now used instead of ``malloc()``.
660 679 * Low-level zstd data structures are reused in more instances, cutting down
661 680 on overhead for certain operations.
662 681 * ``distutils`` boilerplate for obtaining an ``Extension`` instance
663 682 has now been refactored into a standalone ``setup_zstd.py`` file. This
664 683 allows other projects with ``setup.py`` files to reuse the
665 684 ``distutils`` code for this project without copying code.
666 685 * The monolithic ``zstd.c`` file has been split into a header file defining
667 686 types and separate ``.c`` source files for the implementation.
668 687
669 688 Older History
670 689 =============
671 690
672 691 2016-08-31 - Zstandard 1.0.0 is released and Gregory starts hacking on a
673 692 Python extension for use by the Mercurial project. A very hacky prototype
674 693 is sent to the mercurial-devel list for RFC.
675 694
676 695 2016-09-03 - Most functionality from Zstandard C API implemented. Source
677 696 code published on https://github.com/indygreg/python-zstandard. Travis-CI
678 697 automation configured. 0.0.1 release on PyPI.
679 698
680 699 2016-09-05 - After the API was rounded out a bit and support for Python
681 700 2.6 and 2.7 was added, version 0.1 was released to PyPI.
682 701
683 702 2016-09-05 - After the compressor and decompressor APIs were changed, 0.2
684 703 was released to PyPI.
685 704
686 705 2016-09-10 - 0.3 is released with a bunch of new features. ZstdCompressor
687 706 now accepts arguments controlling frame parameters. The source size can now
688 707 be declared when performing streaming compression. ZstdDecompressor.decompress()
689 708 is implemented. Compression dictionaries are now cached when using the simple
690 709 compression and decompression APIs. Memory size APIs added.
691 710 ZstdCompressor.read_from() and ZstdDecompressor.read_from() have been
692 711 implemented. This rounds out the major compression/decompression APIs planned
693 712 by the author.
694 713
695 714 2016-10-02 - 0.3.3 is released with a bug fix for read_from not fully
696 715 decoding a zstd frame (issue #2).
697 716
698 717 2016-10-02 - 0.4.0 is released with zstd 1.1.0, support for custom read and
699 718 write buffer sizes, and a few bug fixes involving failure to read/write
700 719 all data when buffer sizes were too small to hold remaining data.
701 720
702 721 2016-11-10 - 0.5.0 is released with zstd 1.1.1 and other enhancements.
@@ -1,1602 +1,1602 b''
1 1 ================
2 2 python-zstandard
3 3 ================
4 4
5 5 This project provides Python bindings for interfacing with the
6 6 `Zstandard <http://www.zstd.net>`_ compression library. A C extension
7 7 and CFFI interface are provided.
8 8
9 9 The primary goal of the project is to provide a rich interface to the
10 10 underlying C API through a Pythonic interface while not sacrificing
11 11 performance. This means exposing most of the features and flexibility
12 12 of the C API while not sacrificing usability or safety that Python provides.
13 13
14 14 The canonical home for this project lives in a Mercurial repository run by
15 15 the author. For convenience, that repository is frequently synchronized to
16 16 https://github.com/indygreg/python-zstandard.
17 17
18 18 | |ci-status|
19 19
20 20 Requirements
21 21 ============
22 22
23 This extension is designed to run with Python 2.7, 3.4, 3.5, 3.6, and 3.7
23 This extension is designed to run with Python 2.7, 3.5, 3.6, 3.7, and 3.8
24 24 on common platforms (Linux, Windows, and OS X). On PyPy (both PyPy2 and PyPy3) we support version 6.0.0 and above.
25 25 x86 and x86_64 are well-tested on Windows. Only x86_64 is well-tested on Linux and macOS.
26 26
27 27 Installing
28 28 ==========
29 29
30 30 This package is uploaded to PyPI at https://pypi.python.org/pypi/zstandard.
31 31 So, to install this package::
32 32
33 33 $ pip install zstandard
34 34
35 35 Binary wheels are made available for some platforms. If you need to
36 36 install from a source distribution, all you should need is a working C
37 37 compiler and the Python development headers/libraries. On many Linux
38 38 distributions, you can install a ``python-dev`` or ``python-devel``
39 39 package to provide these dependencies.
40 40
41 41 Packages are also uploaded to Anaconda Cloud at
42 42 https://anaconda.org/indygreg/zstandard. See that URL for how to install
43 43 this package with ``conda``.
44 44
45 45 Performance
46 46 ===========
47 47
48 48 zstandard is a highly tunable compression algorithm. In its default settings
49 49 (compression level 3), it will be faster at compression and decompression and
50 50 will have better compression ratios than zlib on most data sets. When tuned
51 51 for speed, it approaches lz4's speed and ratios. When tuned for compression
52 52 ratio, it approaches lzma ratios and compression speed, but decompression
53 53 speed is much faster. See the official zstandard documentation for more.
54 54
55 55 zstandard and this library support multi-threaded compression. There is a
56 56 mechanism to compress large inputs using multiple threads.
57 57
58 58 The performance of this library is usually very similar to what the zstandard
59 59 C API can deliver. Overhead in this library is due to general Python overhead
60 60 and can't easily be avoided by *any* zstandard Python binding. This library
61 61 exposes multiple APIs for performing compression and decompression so callers
62 62 can pick an API suitable for their need. Contrast with the compression
63 63 modules in Python's standard library (like ``zlib``), which only offer limited
64 64 mechanisms for performing operations. The API flexibility means consumers can
65 65 choose to use APIs that facilitate zero copying or minimize Python object
66 66 creation and garbage collection overhead.
67 67
68 68 This library is capable of single-threaded throughputs well over 1 GB/s. For
69 69 exact numbers, measure yourself. The source code repository has a ``bench.py``
70 70 script that can be used to measure things.
71 71
72 72 API
73 73 ===
74 74
75 75 To interface with Zstandard, simply import the ``zstandard`` module::
76 76
77 77 import zstandard
78 78
79 79 It is a popular convention to alias the module as a different name for
80 80 brevity::
81 81
82 82 import zstandard as zstd
83 83
84 84 This module attempts to import and use either the C extension or CFFI
85 85 implementation. On Python platforms known to support C extensions (like
86 86 CPython), it raises an ImportError if the C extension cannot be imported.
87 87 On Python platforms known to not support C extensions (like PyPy), it only
88 88 attempts to import the CFFI implementation and raises ImportError if that
89 89 can't be done. On other platforms, it first tries to import the C extension
90 90 then falls back to CFFI if that fails and raises ImportError if CFFI fails.
91 91
92 92 To change the module import behavior, a ``PYTHON_ZSTANDARD_IMPORT_POLICY``
93 93 environment variable can be set. The following values are accepted:
94 94
95 95 default
96 96 The behavior described above.
97 97 cffi_fallback
98 98 Always try to import the C extension then fall back to CFFI if that
99 99 fails.
100 100 cext
101 101 Only attempt to import the C extension.
102 102 cffi
103 103 Only attempt to import the CFFI implementation.
104 104
105 105 In addition, the ``zstandard`` module exports a ``backend`` attribute
106 106 containing the string name of the backend being used. It will be one
107 107 of ``cext`` or ``cffi`` (for *C extension* and *cffi*, respectively).
108 108
109 109 The types, functions, and attributes exposed by the ``zstandard`` module
110 110 are documented in the sections below.
111 111
112 112 .. note::
113 113
114 114 The documentation in this section makes references to various zstd
115 115 concepts and functionality. The source repository contains a
116 116 ``docs/concepts.rst`` file explaining these in more detail.
117 117
118 118 ZstdCompressor
119 119 --------------
120 120
121 121 The ``ZstdCompressor`` class provides an interface for performing
122 122 compression operations. Each instance is essentially a wrapper around a
123 123 ``ZSTD_CCtx`` from the C API.
124 124
125 125 Each instance is associated with parameters that control compression
126 126 behavior. These come from the following named arguments (all optional):
127 127
128 128 level
129 129 Integer compression level. Valid values are between 1 and 22.
130 130 dict_data
131 131 Compression dictionary to use.
132 132
133 133 Note: When using dictionary data and ``compress()`` is called multiple
134 134 times, the ``ZstdCompressionParameters`` derived from an integer
135 135 compression ``level`` and the first compressed data's size will be reused
136 136 for all subsequent operations. This may not be desirable if source data
137 137 size varies significantly.
138 138 compression_params
139 139 A ``ZstdCompressionParameters`` instance defining compression settings.
140 140 write_checksum
141 141 Whether a 4 byte checksum should be written with the compressed data.
142 142 Defaults to False. If True, the decompressor can verify that decompressed
143 143 data matches the original input data.
144 144 write_content_size
145 145 Whether the size of the uncompressed data will be written into the
146 146 header of compressed data. Defaults to True. The data will only be
147 147 written if the compressor knows the size of the input data. This is
148 148 often not true for streaming compression.
149 149 write_dict_id
150 150 Whether to write the dictionary ID into the compressed data.
151 151 Defaults to True. The dictionary ID is only written if a dictionary
152 152 is being used.
153 153 threads
154 154 Enables and sets the number of threads to use for multi-threaded compression
155 155 operations. Defaults to 0, which means to use single-threaded compression.
156 156 Negative values will resolve to the number of logical CPUs in the system.
157 157 Read below for more info on multi-threaded compression. This argument only
158 158 controls thread count for operations that operate on individual pieces of
159 159 data. APIs that spawn multiple threads for working on multiple pieces of
160 160 data have their own ``threads`` argument.
161 161
162 162 ``compression_params`` is mutually exclusive with ``level``, ``write_checksum``,
163 163 ``write_content_size``, ``write_dict_id``, and ``threads``.
164 164
165 165 Unless specified otherwise, assume that no two methods of ``ZstdCompressor``
166 166 instances can be called from multiple Python threads simultaneously. In other
167 167 words, assume instances are not thread safe unless stated otherwise.
168 168
169 169 Utility Methods
170 170 ^^^^^^^^^^^^^^^
171 171
172 172 ``frame_progression()`` returns a 3-tuple containing the number of bytes
173 173 ingested, consumed, and produced by the current compression operation.
174 174
175 175 ``memory_size()`` obtains the memory utilization of the underlying zstd
176 176 compression context, in bytes.::
177 177
178 178 cctx = zstd.ZstdCompressor()
179 179 memory = cctx.memory_size()
180 180
181 181 Simple API
182 182 ^^^^^^^^^^
183 183
184 184 ``compress(data)`` compresses and returns data as a one-shot operation.::
185 185
186 186 cctx = zstd.ZstdCompressor()
187 187 compressed = cctx.compress(b'data to compress')
188 188
189 189 The ``data`` argument can be any object that implements the *buffer protocol*.
190 190
191 191 Stream Reader API
192 192 ^^^^^^^^^^^^^^^^^
193 193
194 194 ``stream_reader(source)`` can be used to obtain an object conforming to the
195 195 ``io.RawIOBase`` interface for reading compressed output as a stream::
196 196
197 197 with open(path, 'rb') as fh:
198 198 cctx = zstd.ZstdCompressor()
199 199 reader = cctx.stream_reader(fh)
200 200 while True:
201 201 chunk = reader.read(16384)
202 202 if not chunk:
203 203 break
204 204
205 205 # Do something with compressed chunk.
206 206
207 207 Instances can also be used as context managers::
208 208
209 209 with open(path, 'rb') as fh:
210 210 with cctx.stream_reader(fh) as reader:
211 211 while True:
212 212 chunk = reader.read(16384)
213 213 if not chunk:
214 214 break
215 215
216 216 # Do something with compressed chunk.
217 217
218 218 When the context manager exits or ``close()`` is called, the stream is closed,
219 219 underlying resources are released, and future operations against the compression
220 220 stream will fail.
221 221
222 222 The ``source`` argument to ``stream_reader()`` can be any object with a
223 223 ``read(size)`` method or any object implementing the *buffer protocol*.
224 224
225 225 ``stream_reader()`` accepts a ``size`` argument specifying how large the input
226 226 stream is. This is used to adjust compression parameters so they are
227 227 tailored to the source size.::
228 228
229 229 with open(path, 'rb') as fh:
230 230 cctx = zstd.ZstdCompressor()
231 231 with cctx.stream_reader(fh, size=os.stat(path).st_size) as reader:
232 232 ...
233 233
234 234 If the ``source`` is a stream, you can specify how large ``read()`` requests
235 235 to that stream should be via the ``read_size`` argument. It defaults to
236 236 ``zstandard.COMPRESSION_RECOMMENDED_INPUT_SIZE``.::
237 237
238 238 with open(path, 'rb') as fh:
239 239 cctx = zstd.ZstdCompressor()
240 240 # Will perform fh.read(8192) when obtaining data to feed into the
241 241 # compressor.
242 242 with cctx.stream_reader(fh, read_size=8192) as reader:
243 243 ...
244 244
245 245 The stream returned by ``stream_reader()`` is neither writable nor seekable
246 246 (even if the underlying source is seekable). ``readline()`` and
247 247 ``readlines()`` are not implemented because they don't make sense for
248 248 compressed data. ``tell()`` returns the number of compressed bytes
249 249 emitted so far.
250 250
251 251 Streaming Input API
252 252 ^^^^^^^^^^^^^^^^^^^
253 253
254 254 ``stream_writer(fh)`` allows you to *stream* data into a compressor.
255 255
256 256 Returned instances implement the ``io.RawIOBase`` interface. Only methods
257 257 that involve writing will do useful things.
258 258
259 259 The argument to ``stream_writer()`` must have a ``write(data)`` method. As
260 260 compressed data is available, ``write()`` will be called with the compressed
261 261 data as its argument. Many common Python types implement ``write()``, including
262 262 open file handles and ``io.BytesIO``.
263 263
264 264 The ``write(data)`` method is used to feed data into the compressor.
265 265
266 266 The ``flush([flush_mode=FLUSH_BLOCK])`` method can be called to evict whatever
267 267 data remains within the compressor's internal state into the output object. This
268 268 may result in 0 or more ``write()`` calls to the output object. This method
269 269 accepts an optional ``flush_mode`` argument to control the flushing behavior.
270 270 Its value can be any of the ``FLUSH_*`` constants.
271 271
272 272 Both ``write()`` and ``flush()`` return the number of bytes written to the
273 273 object's ``write()``. In many cases, small inputs do not accumulate enough
274 274 data to cause a write and ``write()`` will return ``0``.
275 275
276 276 Calling ``close()`` will mark the stream as closed and subsequent I/O
277 277 operations will raise ``ValueError`` (per the documented behavior of
278 278 ``io.RawIOBase``). ``close()`` will also call ``close()`` on the underlying
279 279 stream if such a method exists.
280 280
281 281 Typically usage is as follows::
282 282
283 283 cctx = zstd.ZstdCompressor(level=10)
284 284 compressor = cctx.stream_writer(fh)
285 285
286 286 compressor.write(b'chunk 0\n')
287 287 compressor.write(b'chunk 1\n')
288 288 compressor.flush()
289 289 # Receiver will be able to decode ``chunk 0\nchunk 1\n`` at this point.
290 290 # Receiver is also expecting more data in the zstd *frame*.
291 291
292 292 compressor.write(b'chunk 2\n')
293 293 compressor.flush(zstd.FLUSH_FRAME)
294 294 # Receiver will be able to decode ``chunk 0\nchunk 1\nchunk 2``.
295 295 # Receiver is expecting no more data, as the zstd frame is closed.
296 296 # Any future calls to ``write()`` at this point will construct a new
297 297 # zstd frame.
298 298
299 299 Instances can be used as context managers. Exiting the context manager is
300 300 the equivalent of calling ``close()``, which is equivalent to calling
301 301 ``flush(zstd.FLUSH_FRAME)``::
302 302
303 303 cctx = zstd.ZstdCompressor(level=10)
304 304 with cctx.stream_writer(fh) as compressor:
305 305 compressor.write(b'chunk 0')
306 306 compressor.write(b'chunk 1')
307 307 ...
308 308
309 309 .. important::
310 310
311 311 If ``flush(FLUSH_FRAME)`` is not called, emitted data doesn't constitute
312 312 a full zstd *frame* and consumers of this data may complain about malformed
313 313 input. It is recommended to use instances as a context manager to ensure
314 314 *frames* are properly finished.
315 315
316 316 If the size of the data being fed to this streaming compressor is known,
317 317 you can declare it before compression begins::
318 318
319 319 cctx = zstd.ZstdCompressor()
320 320 with cctx.stream_writer(fh, size=data_len) as compressor:
321 321 compressor.write(chunk0)
322 322 compressor.write(chunk1)
323 323 ...
324 324
325 325 Declaring the size of the source data allows compression parameters to
326 326 be tuned. And if ``write_content_size`` is used, it also results in the
327 327 content size being written into the frame header of the output data.
328 328
329 329 The size of chunks being ``write()`` to the destination can be specified::
330 330
331 331 cctx = zstd.ZstdCompressor()
332 332 with cctx.stream_writer(fh, write_size=32768) as compressor:
333 333 ...
334 334
335 335 To see how much memory is being used by the streaming compressor::
336 336
337 337 cctx = zstd.ZstdCompressor()
338 338 with cctx.stream_writer(fh) as compressor:
339 339 ...
340 340 byte_size = compressor.memory_size()
341 341
342 342 Thte total number of bytes written so far are exposed via ``tell()``::
343 343
344 344 cctx = zstd.ZstdCompressor()
345 345 with cctx.stream_writer(fh) as compressor:
346 346 ...
347 347 total_written = compressor.tell()
348 348
349 349 ``stream_writer()`` accepts a ``write_return_read`` boolean argument to control
350 350 the return value of ``write()``. When ``False`` (the default), ``write()`` returns
351 351 the number of bytes that were ``write()``en to the underlying object. When
352 352 ``True``, ``write()`` returns the number of bytes read from the input that
353 353 were subsequently written to the compressor. ``True`` is the *proper* behavior
354 354 for ``write()`` as specified by the ``io.RawIOBase`` interface and will become
355 355 the default value in a future release.
356 356
357 357 Streaming Output API
358 358 ^^^^^^^^^^^^^^^^^^^^
359 359
360 360 ``read_to_iter(reader)`` provides a mechanism to stream data out of a
361 361 compressor as an iterator of data chunks.::
362 362
363 363 cctx = zstd.ZstdCompressor()
364 364 for chunk in cctx.read_to_iter(fh):
365 365 # Do something with emitted data.
366 366
367 367 ``read_to_iter()`` accepts an object that has a ``read(size)`` method or
368 368 conforms to the buffer protocol.
369 369
370 370 Uncompressed data is fetched from the source either by calling ``read(size)``
371 371 or by fetching a slice of data from the object directly (in the case where
372 372 the buffer protocol is being used). The returned iterator consists of chunks
373 373 of compressed data.
374 374
375 375 If reading from the source via ``read()``, ``read()`` will be called until
376 376 it raises or returns an empty bytes (``b''``). It is perfectly valid for
377 377 the source to deliver fewer bytes than were what requested by ``read(size)``.
378 378
379 379 Like ``stream_writer()``, ``read_to_iter()`` also accepts a ``size`` argument
380 380 declaring the size of the input stream::
381 381
382 382 cctx = zstd.ZstdCompressor()
383 383 for chunk in cctx.read_to_iter(fh, size=some_int):
384 384 pass
385 385
386 386 You can also control the size that data is ``read()`` from the source and
387 387 the ideal size of output chunks::
388 388
389 389 cctx = zstd.ZstdCompressor()
390 390 for chunk in cctx.read_to_iter(fh, read_size=16384, write_size=8192):
391 391 pass
392 392
393 393 Unlike ``stream_writer()``, ``read_to_iter()`` does not give direct control
394 394 over the sizes of chunks fed into the compressor. Instead, chunk sizes will
395 395 be whatever the object being read from delivers. These will often be of a
396 396 uniform size.
397 397
398 398 Stream Copying API
399 399 ^^^^^^^^^^^^^^^^^^
400 400
401 401 ``copy_stream(ifh, ofh)`` can be used to copy data between 2 streams while
402 402 compressing it.::
403 403
404 404 cctx = zstd.ZstdCompressor()
405 405 cctx.copy_stream(ifh, ofh)
406 406
407 407 For example, say you wish to compress a file::
408 408
409 409 cctx = zstd.ZstdCompressor()
410 410 with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
411 411 cctx.copy_stream(ifh, ofh)
412 412
413 413 It is also possible to declare the size of the source stream::
414 414
415 415 cctx = zstd.ZstdCompressor()
416 416 cctx.copy_stream(ifh, ofh, size=len_of_input)
417 417
418 418 You can also specify how large the chunks that are ``read()`` and ``write()``
419 419 from and to the streams::
420 420
421 421 cctx = zstd.ZstdCompressor()
422 422 cctx.copy_stream(ifh, ofh, read_size=32768, write_size=16384)
423 423
424 424 The stream copier returns a 2-tuple of bytes read and written::
425 425
426 426 cctx = zstd.ZstdCompressor()
427 427 read_count, write_count = cctx.copy_stream(ifh, ofh)
428 428
429 429 Compressor API
430 430 ^^^^^^^^^^^^^^
431 431
432 432 ``compressobj()`` returns an object that exposes ``compress(data)`` and
433 433 ``flush()`` methods. Each returns compressed data or an empty bytes.
434 434
435 435 The purpose of ``compressobj()`` is to provide an API-compatible interface
436 436 with ``zlib.compressobj``, ``bz2.BZ2Compressor``, etc. This allows callers to
437 437 swap in different compressor objects while using the same API.
438 438
439 439 ``flush()`` accepts an optional argument indicating how to end the stream.
440 440 ``zstd.COMPRESSOBJ_FLUSH_FINISH`` (the default) ends the compression stream.
441 441 Once this type of flush is performed, ``compress()`` and ``flush()`` can
442 442 no longer be called. This type of flush **must** be called to end the
443 443 compression context. If not called, returned data may be incomplete.
444 444
445 445 A ``zstd.COMPRESSOBJ_FLUSH_BLOCK`` argument to ``flush()`` will flush a
446 446 zstd block. Flushes of this type can be performed multiple times. The next
447 447 call to ``compress()`` will begin a new zstd block.
448 448
449 449 Here is how this API should be used::
450 450
451 451 cctx = zstd.ZstdCompressor()
452 452 cobj = cctx.compressobj()
453 453 data = cobj.compress(b'raw input 0')
454 454 data = cobj.compress(b'raw input 1')
455 455 data = cobj.flush()
456 456
457 457 Or to flush blocks::
458 458
459 459 cctx.zstd.ZstdCompressor()
460 460 cobj = cctx.compressobj()
461 461 data = cobj.compress(b'chunk in first block')
462 462 data = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
463 463 data = cobj.compress(b'chunk in second block')
464 464 data = cobj.flush()
465 465
466 466 For best performance results, keep input chunks under 256KB. This avoids
467 467 extra allocations for a large output object.
468 468
469 469 It is possible to declare the input size of the data that will be fed into
470 470 the compressor::
471 471
472 472 cctx = zstd.ZstdCompressor()
473 473 cobj = cctx.compressobj(size=6)
474 474 data = cobj.compress(b'foobar')
475 475 data = cobj.flush()
476 476
477 477 Chunker API
478 478 ^^^^^^^^^^^
479 479
480 480 ``chunker(size=None, chunk_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE)`` returns
481 481 an object that can be used to iteratively feed chunks of data into a compressor
482 482 and produce output chunks of a uniform size.
483 483
484 484 The object returned by ``chunker()`` exposes the following methods:
485 485
486 486 ``compress(data)``
487 487 Feeds new input data into the compressor.
488 488
489 489 ``flush()``
490 490 Flushes all data currently in the compressor.
491 491
492 492 ``finish()``
493 493 Signals the end of input data. No new data can be compressed after this
494 494 method is called.
495 495
496 496 ``compress()``, ``flush()``, and ``finish()`` all return an iterator of
497 497 ``bytes`` instances holding compressed data. The iterator may be empty. Callers
498 498 MUST iterate through all elements of the returned iterator before performing
499 499 another operation on the object.
500 500
501 501 All chunks emitted by ``compress()`` will have a length of ``chunk_size``.
502 502
503 503 ``flush()`` and ``finish()`` may return a final chunk smaller than
504 504 ``chunk_size``.
505 505
506 506 Here is how the API should be used::
507 507
508 508 cctx = zstd.ZstdCompressor()
509 509 chunker = cctx.chunker(chunk_size=32768)
510 510
511 511 with open(path, 'rb') as fh:
512 512 while True:
513 513 in_chunk = fh.read(32768)
514 514 if not in_chunk:
515 515 break
516 516
517 517 for out_chunk in chunker.compress(in_chunk):
518 518 # Do something with output chunk of size 32768.
519 519
520 520 for out_chunk in chunker.finish():
521 521 # Do something with output chunks that finalize the zstd frame.
522 522
523 523 The ``chunker()`` API is often a better alternative to ``compressobj()``.
524 524
525 525 ``compressobj()`` will emit output data as it is available. This results in a
526 526 *stream* of output chunks of varying sizes. The consistency of the output chunk
527 527 size with ``chunker()`` is more appropriate for many usages, such as sending
528 528 compressed data to a socket.
529 529
530 530 ``compressobj()`` may also perform extra memory reallocations in order to
531 531 dynamically adjust the sizes of the output chunks. Since ``chunker()`` output
532 532 chunks are all the same size (except for flushed or final chunks), there is
533 533 less memory allocation overhead.
534 534
535 535 Batch Compression API
536 536 ^^^^^^^^^^^^^^^^^^^^^
537 537
538 538 (Experimental. Not yet supported in CFFI bindings.)
539 539
540 540 ``multi_compress_to_buffer(data, [threads=0])`` performs compression of multiple
541 541 inputs as a single operation.
542 542
543 543 Data to be compressed can be passed as a ``BufferWithSegmentsCollection``, a
544 544 ``BufferWithSegments``, or a list containing byte like objects. Each element of
545 545 the container will be compressed individually using the configured parameters
546 546 on the ``ZstdCompressor`` instance.
547 547
548 548 The ``threads`` argument controls how many threads to use for compression. The
549 549 default is ``0`` which means to use a single thread. Negative values use the
550 550 number of logical CPUs in the machine.
551 551
552 552 The function returns a ``BufferWithSegmentsCollection``. This type represents
553 553 N discrete memory allocations, eaching holding 1 or more compressed frames.
554 554
555 555 Output data is written to shared memory buffers. This means that unlike
556 556 regular Python objects, a reference to *any* object within the collection
557 557 keeps the shared buffer and therefore memory backing it alive. This can have
558 558 undesirable effects on process memory usage.
559 559
560 560 The API and behavior of this function is experimental and will likely change.
561 561 Known deficiencies include:
562 562
563 563 * If asked to use multiple threads, it will always spawn that many threads,
564 564 even if the input is too small to use them. It should automatically lower
565 565 the thread count when the extra threads would just add overhead.
566 566 * The buffer allocation strategy is fixed. There is room to make it dynamic,
567 567 perhaps even to allow one output buffer per input, facilitating a variation
568 568 of the API to return a list without the adverse effects of shared memory
569 569 buffers.
570 570
571 571 ZstdDecompressor
572 572 ----------------
573 573
574 574 The ``ZstdDecompressor`` class provides an interface for performing
575 575 decompression. It is effectively a wrapper around the ``ZSTD_DCtx`` type from
576 576 the C API.
577 577
578 578 Each instance is associated with parameters that control decompression. These
579 579 come from the following named arguments (all optional):
580 580
581 581 dict_data
582 582 Compression dictionary to use.
583 583 max_window_size
584 584 Sets an uppet limit on the window size for decompression operations in
585 585 kibibytes. This setting can be used to prevent large memory allocations
586 586 for inputs using large compression windows.
587 587 format
588 588 Set the format of data for the decoder. By default, this is
589 589 ``zstd.FORMAT_ZSTD1``. It can be set to ``zstd.FORMAT_ZSTD1_MAGICLESS`` to
590 590 allow decoding frames without the 4 byte magic header. Not all decompression
591 591 APIs support this mode.
592 592
593 593 The interface of this class is very similar to ``ZstdCompressor`` (by design).
594 594
595 595 Unless specified otherwise, assume that no two methods of ``ZstdDecompressor``
596 596 instances can be called from multiple Python threads simultaneously. In other
597 597 words, assume instances are not thread safe unless stated otherwise.
598 598
599 599 Utility Methods
600 600 ^^^^^^^^^^^^^^^
601 601
602 602 ``memory_size()`` obtains the size of the underlying zstd decompression context,
603 603 in bytes.::
604 604
605 605 dctx = zstd.ZstdDecompressor()
606 606 size = dctx.memory_size()
607 607
608 608 Simple API
609 609 ^^^^^^^^^^
610 610
611 611 ``decompress(data)`` can be used to decompress an entire compressed zstd
612 612 frame in a single operation.::
613 613
614 614 dctx = zstd.ZstdDecompressor()
615 615 decompressed = dctx.decompress(data)
616 616
617 617 By default, ``decompress(data)`` will only work on data written with the content
618 618 size encoded in its header (this is the default behavior of
619 619 ``ZstdCompressor().compress()`` but may not be true for streaming compression). If
620 620 compressed data without an embedded content size is seen, ``zstd.ZstdError`` will
621 621 be raised.
622 622
623 623 If the compressed data doesn't have its content size embedded within it,
624 624 decompression can be attempted by specifying the ``max_output_size``
625 625 argument.::
626 626
627 627 dctx = zstd.ZstdDecompressor()
628 628 uncompressed = dctx.decompress(data, max_output_size=1048576)
629 629
630 630 Ideally, ``max_output_size`` will be identical to the decompressed output
631 631 size.
632 632
633 633 If ``max_output_size`` is too small to hold the decompressed data,
634 634 ``zstd.ZstdError`` will be raised.
635 635
636 636 If ``max_output_size`` is larger than the decompressed data, the allocated
637 637 output buffer will be resized to only use the space required.
638 638
639 639 Please note that an allocation of the requested ``max_output_size`` will be
640 640 performed every time the method is called. Setting to a very large value could
641 641 result in a lot of work for the memory allocator and may result in
642 642 ``MemoryError`` being raised if the allocation fails.
643 643
644 644 .. important::
645 645
646 646 If the exact size of decompressed data is unknown (not passed in explicitly
647 647 and not stored in the zstandard frame), for performance reasons it is
648 648 encouraged to use a streaming API.
649 649
650 650 Stream Reader API
651 651 ^^^^^^^^^^^^^^^^^
652 652
653 653 ``stream_reader(source)`` can be used to obtain an object conforming to the
654 654 ``io.RawIOBase`` interface for reading decompressed output as a stream::
655 655
656 656 with open(path, 'rb') as fh:
657 657 dctx = zstd.ZstdDecompressor()
658 658 reader = dctx.stream_reader(fh)
659 659 while True:
660 660 chunk = reader.read(16384)
661 661 if not chunk:
662 662 break
663 663
664 664 # Do something with decompressed chunk.
665 665
666 666 The stream can also be used as a context manager::
667 667
668 668 with open(path, 'rb') as fh:
669 669 dctx = zstd.ZstdDecompressor()
670 670 with dctx.stream_reader(fh) as reader:
671 671 ...
672 672
673 673 When used as a context manager, the stream is closed and the underlying
674 674 resources are released when the context manager exits. Future operations against
675 675 the stream will fail.
676 676
677 677 The ``source`` argument to ``stream_reader()`` can be any object with a
678 678 ``read(size)`` method or any object implementing the *buffer protocol*.
679 679
680 680 If the ``source`` is a stream, you can specify how large ``read()`` requests
681 681 to that stream should be via the ``read_size`` argument. It defaults to
682 682 ``zstandard.DECOMPRESSION_RECOMMENDED_INPUT_SIZE``.::
683 683
684 684 with open(path, 'rb') as fh:
685 685 dctx = zstd.ZstdDecompressor()
686 686 # Will perform fh.read(8192) when obtaining data for the decompressor.
687 687 with dctx.stream_reader(fh, read_size=8192) as reader:
688 688 ...
689 689
690 690 The stream returned by ``stream_reader()`` is not writable.
691 691
692 692 The stream returned by ``stream_reader()`` is *partially* seekable.
693 693 Absolute and relative positions (``SEEK_SET`` and ``SEEK_CUR``) forward
694 694 of the current position are allowed. Offsets behind the current read
695 695 position and offsets relative to the end of stream are not allowed and
696 696 will raise ``ValueError`` if attempted.
697 697
698 698 ``tell()`` returns the number of decompressed bytes read so far.
699 699
700 700 Not all I/O methods are implemented. Notably missing is support for
701 701 ``readline()``, ``readlines()``, and linewise iteration support. This is
702 702 because streams operate on binary data - not text data. If you want to
703 703 convert decompressed output to text, you can chain an ``io.TextIOWrapper``
704 704 to the stream::
705 705
706 706 with open(path, 'rb') as fh:
707 707 dctx = zstd.ZstdDecompressor()
708 708 stream_reader = dctx.stream_reader(fh)
709 709 text_stream = io.TextIOWrapper(stream_reader, encoding='utf-8')
710 710
711 711 for line in text_stream:
712 712 ...
713 713
714 714 The ``read_across_frames`` argument to ``stream_reader()`` controls the
715 715 behavior of read operations when the end of a zstd *frame* is encountered.
716 716 When ``False`` (the default), a read will complete when the end of a
717 717 zstd *frame* is encountered. When ``True``, a read can potentially
718 718 return data spanning multiple zstd *frames*.
719 719
720 720 Streaming Input API
721 721 ^^^^^^^^^^^^^^^^^^^
722 722
723 723 ``stream_writer(fh)`` allows you to *stream* data into a decompressor.
724 724
725 725 Returned instances implement the ``io.RawIOBase`` interface. Only methods
726 726 that involve writing will do useful things.
727 727
728 728 The argument to ``stream_writer()`` is typically an object that also implements
729 729 ``io.RawIOBase``. But any object with a ``write(data)`` method will work. Many
730 730 common Python types conform to this interface, including open file handles
731 731 and ``io.BytesIO``.
732 732
733 733 Behavior is similar to ``ZstdCompressor.stream_writer()``: compressed data
734 734 is sent to the decompressor by calling ``write(data)`` and decompressed
735 735 output is written to the underlying stream by calling its ``write(data)``
736 736 method.::
737 737
738 738 dctx = zstd.ZstdDecompressor()
739 739 decompressor = dctx.stream_writer(fh)
740 740
741 741 decompressor.write(compressed_data)
742 742 ...
743 743
744 744
745 745 Calls to ``write()`` will return the number of bytes written to the output
746 746 object. Not all inputs will result in bytes being written, so return values
747 747 of ``0`` are possible.
748 748
749 749 Like the ``stream_writer()`` compressor, instances can be used as context
750 750 managers. However, context managers add no extra special behavior and offer
751 751 little to no benefit to being used.
752 752
753 753 Calling ``close()`` will mark the stream as closed and subsequent I/O operations
754 754 will raise ``ValueError`` (per the documented behavior of ``io.RawIOBase``).
755 755 ``close()`` will also call ``close()`` on the underlying stream if such a
756 756 method exists.
757 757
758 758 The size of chunks being ``write()`` to the destination can be specified::
759 759
760 760 dctx = zstd.ZstdDecompressor()
761 761 with dctx.stream_writer(fh, write_size=16384) as decompressor:
762 762 pass
763 763
764 764 You can see how much memory is being used by the decompressor::
765 765
766 766 dctx = zstd.ZstdDecompressor()
767 767 with dctx.stream_writer(fh) as decompressor:
768 768 byte_size = decompressor.memory_size()
769 769
770 770 ``stream_writer()`` accepts a ``write_return_read`` boolean argument to control
771 771 the return value of ``write()``. When ``False`` (the default)``, ``write()``
772 772 returns the number of bytes that were ``write()``en to the underlying stream.
773 773 When ``True``, ``write()`` returns the number of bytes read from the input.
774 774 ``True`` is the *proper* behavior for ``write()`` as specified by the
775 775 ``io.RawIOBase`` interface and will become the default in a future release.
776 776
777 777 Streaming Output API
778 778 ^^^^^^^^^^^^^^^^^^^^
779 779
780 780 ``read_to_iter(fh)`` provides a mechanism to stream decompressed data out of a
781 781 compressed source as an iterator of data chunks.::
782 782
783 783 dctx = zstd.ZstdDecompressor()
784 784 for chunk in dctx.read_to_iter(fh):
785 785 # Do something with original data.
786 786
787 787 ``read_to_iter()`` accepts an object with a ``read(size)`` method that will
788 788 return compressed bytes or an object conforming to the buffer protocol that
789 789 can expose its data as a contiguous range of bytes.
790 790
791 791 ``read_to_iter()`` returns an iterator whose elements are chunks of the
792 792 decompressed data.
793 793
794 794 The size of requested ``read()`` from the source can be specified::
795 795
796 796 dctx = zstd.ZstdDecompressor()
797 797 for chunk in dctx.read_to_iter(fh, read_size=16384):
798 798 pass
799 799
800 800 It is also possible to skip leading bytes in the input data::
801 801
802 802 dctx = zstd.ZstdDecompressor()
803 803 for chunk in dctx.read_to_iter(fh, skip_bytes=1):
804 804 pass
805 805
806 806 .. tip::
807 807
808 808 Skipping leading bytes is useful if the source data contains extra
809 809 *header* data. Traditionally, you would need to create a slice or
810 810 ``memoryview`` of the data you want to decompress. This would create
811 811 overhead. It is more efficient to pass the offset into this API.
812 812
813 813 Similarly to ``ZstdCompressor.read_to_iter()``, the consumer of the iterator
814 814 controls when data is decompressed. If the iterator isn't consumed,
815 815 decompression is put on hold.
816 816
817 817 When ``read_to_iter()`` is passed an object conforming to the buffer protocol,
818 818 the behavior may seem similar to what occurs when the simple decompression
819 819 API is used. However, this API works when the decompressed size is unknown.
820 820 Furthermore, if feeding large inputs, the decompressor will work in chunks
821 821 instead of performing a single operation.
822 822
823 823 Stream Copying API
824 824 ^^^^^^^^^^^^^^^^^^
825 825
826 826 ``copy_stream(ifh, ofh)`` can be used to copy data across 2 streams while
827 827 performing decompression.::
828 828
829 829 dctx = zstd.ZstdDecompressor()
830 830 dctx.copy_stream(ifh, ofh)
831 831
832 832 e.g. to decompress a file to another file::
833 833
834 834 dctx = zstd.ZstdDecompressor()
835 835 with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
836 836 dctx.copy_stream(ifh, ofh)
837 837
838 838 The size of chunks being ``read()`` and ``write()`` from and to the streams
839 839 can be specified::
840 840
841 841 dctx = zstd.ZstdDecompressor()
842 842 dctx.copy_stream(ifh, ofh, read_size=8192, write_size=16384)
843 843
844 844 Decompressor API
845 845 ^^^^^^^^^^^^^^^^
846 846
847 847 ``decompressobj()`` returns an object that exposes a ``decompress(data)``
848 848 method. Compressed data chunks are fed into ``decompress(data)`` and
849 849 uncompressed output (or an empty bytes) is returned. Output from subsequent
850 850 calls needs to be concatenated to reassemble the full decompressed byte
851 851 sequence.
852 852
853 853 The purpose of ``decompressobj()`` is to provide an API-compatible interface
854 854 with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor``. This allows callers
855 855 to swap in different decompressor objects while using the same API.
856 856
857 857 Each object is single use: once an input frame is decoded, ``decompress()``
858 858 can no longer be called.
859 859
860 860 Here is how this API should be used::
861 861
862 862 dctx = zstd.ZstdDecompressor()
863 863 dobj = dctx.decompressobj()
864 864 data = dobj.decompress(compressed_chunk_0)
865 865 data = dobj.decompress(compressed_chunk_1)
866 866
867 867 By default, calls to ``decompress()`` write output data in chunks of size
868 868 ``DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE``. These chunks are concatenated
869 869 before being returned to the caller. It is possible to define the size of
870 870 these temporary chunks by passing ``write_size`` to ``decompressobj()``::
871 871
872 872 dctx = zstd.ZstdDecompressor()
873 873 dobj = dctx.decompressobj(write_size=1048576)
874 874
875 875 .. note::
876 876
877 877 Because calls to ``decompress()`` may need to perform multiple
878 878 memory (re)allocations, this streaming decompression API isn't as
879 879 efficient as other APIs.
880 880
881 881 For compatibility with the standard library APIs, instances expose a
882 882 ``flush([length=None])`` method. This method no-ops and has no meaningful
883 883 side-effects, making it safe to call any time.
884 884
885 885 Batch Decompression API
886 886 ^^^^^^^^^^^^^^^^^^^^^^^
887 887
888 888 (Experimental. Not yet supported in CFFI bindings.)
889 889
890 890 ``multi_decompress_to_buffer()`` performs decompression of multiple
891 891 frames as a single operation and returns a ``BufferWithSegmentsCollection``
892 892 containing decompressed data for all inputs.
893 893
894 894 Compressed frames can be passed to the function as a ``BufferWithSegments``,
895 895 a ``BufferWithSegmentsCollection``, or as a list containing objects that
896 896 conform to the buffer protocol. For best performance, pass a
897 897 ``BufferWithSegmentsCollection`` or a ``BufferWithSegments``, as
898 898 minimal input validation will be done for that type. If calling from
899 899 Python (as opposed to C), constructing one of these instances may add
900 900 overhead cancelling out the performance overhead of validation for list
901 901 inputs.::
902 902
903 903 dctx = zstd.ZstdDecompressor()
904 904 results = dctx.multi_decompress_to_buffer([b'...', b'...'])
905 905
906 906 The decompressed size of each frame MUST be discoverable. It can either be
907 907 embedded within the zstd frame (``write_content_size=True`` argument to
908 908 ``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument.
909 909
910 910 The ``decompressed_sizes`` argument is an object conforming to the buffer
911 911 protocol which holds an array of 64-bit unsigned integers in the machine's
912 912 native format defining the decompressed sizes of each frame. If this argument
913 913 is passed, it avoids having to scan each frame for its decompressed size.
914 914 This frame scanning can add noticeable overhead in some scenarios.::
915 915
916 916 frames = [...]
917 917 sizes = struct.pack('=QQQQ', len0, len1, len2, len3)
918 918
919 919 dctx = zstd.ZstdDecompressor()
920 920 results = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes)
921 921
922 922 The ``threads`` argument controls the number of threads to use to perform
923 923 decompression operations. The default (``0``) or the value ``1`` means to
924 924 use a single thread. Negative values use the number of logical CPUs in the
925 925 machine.
926 926
927 927 .. note::
928 928
929 929 It is possible to pass a ``mmap.mmap()`` instance into this function by
930 930 wrapping it with a ``BufferWithSegments`` instance (which will define the
931 931 offsets of frames within the memory mapped region).
932 932
933 933 This function is logically equivalent to performing ``dctx.decompress()``
934 934 on each input frame and returning the result.
935 935
936 936 This function exists to perform decompression on multiple frames as fast
937 937 as possible by having as little overhead as possible. Since decompression is
938 938 performed as a single operation and since the decompressed output is stored in
939 939 a single buffer, extra memory allocations, Python objects, and Python function
940 940 calls are avoided. This is ideal for scenarios where callers know up front that
941 941 they need to access data for multiple frames, such as when *delta chains* are
942 942 being used.
943 943
944 944 Currently, the implementation always spawns multiple threads when requested,
945 945 even if the amount of work to do is small. In the future, it will be smarter
946 946 about avoiding threads and their associated overhead when the amount of
947 947 work to do is small.
948 948
949 949 Prefix Dictionary Chain Decompression
950 950 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
951 951
952 952 ``decompress_content_dict_chain(frames)`` performs decompression of a list of
953 953 zstd frames produced using chained *prefix* dictionary compression. Such
954 954 a list of frames is produced by compressing discrete inputs where each
955 955 non-initial input is compressed with a *prefix* dictionary consisting of the
956 956 content of the previous input.
957 957
958 958 For example, say you have the following inputs::
959 959
960 960 inputs = [b'input 1', b'input 2', b'input 3']
961 961
962 962 The zstd frame chain consists of:
963 963
964 964 1. ``b'input 1'`` compressed in standalone/discrete mode
965 965 2. ``b'input 2'`` compressed using ``b'input 1'`` as a *prefix* dictionary
966 966 3. ``b'input 3'`` compressed using ``b'input 2'`` as a *prefix* dictionary
967 967
968 968 Each zstd frame **must** have the content size written.
969 969
970 970 The following Python code can be used to produce a *prefix dictionary chain*::
971 971
972 972 def make_chain(inputs):
973 973 frames = []
974 974
975 975 # First frame is compressed in standalone/discrete mode.
976 976 zctx = zstd.ZstdCompressor()
977 977 frames.append(zctx.compress(inputs[0]))
978 978
979 979 # Subsequent frames use the previous fulltext as a prefix dictionary
980 980 for i, raw in enumerate(inputs[1:]):
981 981 dict_data = zstd.ZstdCompressionDict(
982 982 inputs[i], dict_type=zstd.DICT_TYPE_RAWCONTENT)
983 983 zctx = zstd.ZstdCompressor(dict_data=dict_data)
984 984 frames.append(zctx.compress(raw))
985 985
986 986 return frames
987 987
988 988 ``decompress_content_dict_chain()`` returns the uncompressed data of the last
989 989 element in the input chain.
990 990
991 991
992 992 .. note::
993 993
994 994 It is possible to implement *prefix dictionary chain* decompression
995 995 on top of other APIs. However, this function will likely be faster -
996 996 especially for long input chains - as it avoids the overhead of instantiating
997 997 and passing around intermediate objects between C and Python.
998 998
999 999 Multi-Threaded Compression
1000 1000 --------------------------
1001 1001
1002 1002 ``ZstdCompressor`` accepts a ``threads`` argument that controls the number
1003 1003 of threads to use for compression. The way this works is that input is split
1004 1004 into segments and each segment is fed into a worker pool for compression. Once
1005 1005 a segment is compressed, it is flushed/appended to the output.
1006 1006
1007 1007 .. note::
1008 1008
1009 1009 These threads are created at the C layer and are not Python threads. So they
1010 1010 work outside the GIL. It is therefore possible to CPU saturate multiple cores
1011 1011 from Python.
1012 1012
1013 1013 The segment size for multi-threaded compression is chosen from the window size
1014 1014 of the compressor. This is derived from the ``window_log`` attribute of a
1015 1015 ``ZstdCompressionParameters`` instance. By default, segment sizes are in the 1+MB
1016 1016 range.
1017 1017
1018 1018 If multi-threaded compression is requested and the input is smaller than the
1019 1019 configured segment size, only a single compression thread will be used. If the
1020 1020 input is smaller than the segment size multiplied by the thread pool size or
1021 1021 if data cannot be delivered to the compressor fast enough, not all requested
1022 1022 compressor threads may be active simultaneously.
1023 1023
1024 1024 Compared to non-multi-threaded compression, multi-threaded compression has
1025 1025 higher per-operation overhead. This includes extra memory operations,
1026 1026 thread creation, lock acquisition, etc.
1027 1027
1028 1028 Due to the nature of multi-threaded compression using *N* compression
1029 1029 *states*, the output from multi-threaded compression will likely be larger
1030 1030 than non-multi-threaded compression. The difference is usually small. But
1031 1031 there is a CPU/wall time versus size trade off that may warrant investigation.
1032 1032
1033 1033 Output from multi-threaded compression does not require any special handling
1034 1034 on the decompression side. To the decompressor, data generated with single
1035 1035 threaded compressor looks the same as data generated by a multi-threaded
1036 1036 compressor and does not require any special handling or additional resource
1037 1037 requirements.
1038 1038
1039 1039 Dictionary Creation and Management
1040 1040 ----------------------------------
1041 1041
1042 1042 Compression dictionaries are represented with the ``ZstdCompressionDict`` type.
1043 1043
1044 1044 Instances can be constructed from bytes::
1045 1045
1046 1046 dict_data = zstd.ZstdCompressionDict(data)
1047 1047
1048 1048 It is possible to construct a dictionary from *any* data. If the data doesn't
1049 1049 begin with a magic header, it will be treated as a *prefix* dictionary.
1050 1050 *Prefix* dictionaries allow compression operations to reference raw data
1051 1051 within the dictionary.
1052 1052
1053 1053 It is possible to force the use of *prefix* dictionaries or to require a
1054 1054 dictionary header:
1055 1055
1056 1056 dict_data = zstd.ZstdCompressionDict(data,
1057 1057 dict_type=zstd.DICT_TYPE_RAWCONTENT)
1058 1058
1059 1059 dict_data = zstd.ZstdCompressionDict(data,
1060 1060 dict_type=zstd.DICT_TYPE_FULLDICT)
1061 1061
1062 1062 You can see how many bytes are in the dictionary by calling ``len()``::
1063 1063
1064 1064 dict_data = zstd.train_dictionary(size, samples)
1065 1065 dict_size = len(dict_data) # will not be larger than ``size``
1066 1066
1067 1067 Once you have a dictionary, you can pass it to the objects performing
1068 1068 compression and decompression::
1069 1069
1070 1070 dict_data = zstd.train_dictionary(131072, samples)
1071 1071
1072 1072 cctx = zstd.ZstdCompressor(dict_data=dict_data)
1073 1073 for source_data in input_data:
1074 1074 compressed = cctx.compress(source_data)
1075 1075 # Do something with compressed data.
1076 1076
1077 1077 dctx = zstd.ZstdDecompressor(dict_data=dict_data)
1078 1078 for compressed_data in input_data:
1079 1079 buffer = io.BytesIO()
1080 1080 with dctx.stream_writer(buffer) as decompressor:
1081 1081 decompressor.write(compressed_data)
1082 1082 # Do something with raw data in ``buffer``.
1083 1083
1084 1084 Dictionaries have unique integer IDs. You can retrieve this ID via::
1085 1085
1086 1086 dict_id = zstd.dictionary_id(dict_data)
1087 1087
1088 1088 You can obtain the raw data in the dict (useful for persisting and constructing
1089 1089 a ``ZstdCompressionDict`` later) via ``as_bytes()``::
1090 1090
1091 1091 dict_data = zstd.train_dictionary(size, samples)
1092 1092 raw_data = dict_data.as_bytes()
1093 1093
1094 1094 By default, when a ``ZstdCompressionDict`` is *attached* to a
1095 1095 ``ZstdCompressor``, each ``ZstdCompressor`` performs work to prepare the
1096 1096 dictionary for use. This is fine if only 1 compression operation is being
1097 1097 performed or if the ``ZstdCompressor`` is being reused for multiple operations.
1098 1098 But if multiple ``ZstdCompressor`` instances are being used with the dictionary,
1099 1099 this can add overhead.
1100 1100
1101 1101 It is possible to *precompute* the dictionary so it can readily be consumed
1102 1102 by multiple ``ZstdCompressor`` instances::
1103 1103
1104 1104 d = zstd.ZstdCompressionDict(data)
1105 1105
1106 1106 # Precompute for compression level 3.
1107 1107 d.precompute_compress(level=3)
1108 1108
1109 1109 # Precompute with specific compression parameters.
1110 1110 params = zstd.ZstdCompressionParameters(...)
1111 1111 d.precompute_compress(compression_params=params)
1112 1112
1113 1113 .. note::
1114 1114
1115 1115 When a dictionary is precomputed, the compression parameters used to
1116 1116 precompute the dictionary overwrite some of the compression parameters
1117 1117 specified to ``ZstdCompressor.__init__``.
1118 1118
1119 1119 Training Dictionaries
1120 1120 ^^^^^^^^^^^^^^^^^^^^^
1121 1121
1122 1122 Unless using *prefix* dictionaries, dictionary data is produced by *training*
1123 1123 on existing data::
1124 1124
1125 1125 dict_data = zstd.train_dictionary(size, samples)
1126 1126
1127 1127 This takes a target dictionary size and list of bytes instances and creates and
1128 1128 returns a ``ZstdCompressionDict``.
1129 1129
1130 1130 The dictionary training mechanism is known as *cover*. More details about it are
1131 1131 available in the paper *Effective Construction of Relative Lempel-Ziv
1132 1132 Dictionaries* (authors: Liao, Petri, Moffat, Wirth).
1133 1133
1134 1134 The cover algorithm takes parameters ``k` and ``d``. These are the
1135 1135 *segment size* and *dmer size*, respectively. The returned dictionary
1136 1136 instance created by this function has ``k`` and ``d`` attributes
1137 1137 containing the values for these parameters. If a ``ZstdCompressionDict``
1138 1138 is constructed from raw bytes data (a content-only dictionary), the
1139 1139 ``k`` and ``d`` attributes will be ``0``.
1140 1140
1141 1141 The segment and dmer size parameters to the cover algorithm can either be
1142 1142 specified manually or ``train_dictionary()`` can try multiple values
1143 1143 and pick the best one, where *best* means the smallest compressed data size.
1144 1144 This later mode is called *optimization* mode.
1145 1145
1146 1146 If none of ``k``, ``d``, ``steps``, ``threads``, ``level``, ``notifications``,
1147 1147 or ``dict_id`` (basically anything from the underlying ``ZDICT_cover_params_t``
1148 1148 struct) are defined, *optimization* mode is used with default parameter
1149 1149 values.
1150 1150
1151 1151 If ``steps`` or ``threads`` are defined, then *optimization* mode is engaged
1152 1152 with explicit control over those parameters. Specifying ``threads=0`` or
1153 1153 ``threads=1`` can be used to engage *optimization* mode if other parameters
1154 1154 are not defined.
1155 1155
1156 1156 Otherwise, non-*optimization* mode is used with the parameters specified.
1157 1157
1158 1158 This function takes the following arguments:
1159 1159
1160 1160 dict_size
1161 1161 Target size in bytes of the dictionary to generate.
1162 1162 samples
1163 1163 A list of bytes holding samples the dictionary will be trained from.
1164 1164 k
1165 1165 Parameter to cover algorithm defining the segment size. A reasonable range
1166 1166 is [16, 2048+].
1167 1167 d
1168 1168 Parameter to cover algorithm defining the dmer size. A reasonable range is
1169 1169 [6, 16]. ``d`` must be less than or equal to ``k``.
1170 1170 dict_id
1171 1171 Integer dictionary ID for the produced dictionary. Default is 0, which uses
1172 1172 a random value.
1173 1173 steps
1174 1174 Number of steps through ``k`` values to perform when trying parameter
1175 1175 variations.
1176 1176 threads
1177 1177 Number of threads to use when trying parameter variations. Default is 0,
1178 1178 which means to use a single thread. A negative value can be specified to
1179 1179 use as many threads as there are detected logical CPUs.
1180 1180 level
1181 1181 Integer target compression level when trying parameter variations.
1182 1182 notifications
1183 1183 Controls writing of informational messages to ``stderr``. ``0`` (the
1184 1184 default) means to write nothing. ``1`` writes errors. ``2`` writes
1185 1185 progression info. ``3`` writes more details. And ``4`` writes all info.
1186 1186
1187 1187 Explicit Compression Parameters
1188 1188 -------------------------------
1189 1189
1190 1190 Zstandard offers a high-level *compression level* that maps to lower-level
1191 1191 compression parameters. For many consumers, this numeric level is the only
1192 1192 compression setting you'll need to touch.
1193 1193
1194 1194 But for advanced use cases, it might be desirable to tweak these lower-level
1195 1195 settings.
1196 1196
1197 1197 The ``ZstdCompressionParameters`` type represents these low-level compression
1198 1198 settings.
1199 1199
1200 1200 Instances of this type can be constructed from a myriad of keyword arguments
1201 1201 (defined below) for complete low-level control over each adjustable
1202 1202 compression setting.
1203 1203
1204 1204 From a higher level, one can construct a ``ZstdCompressionParameters`` instance
1205 1205 given a desired compression level and target input and dictionary size
1206 1206 using ``ZstdCompressionParameters.from_level()``. e.g.::
1207 1207
1208 1208 # Derive compression settings for compression level 7.
1209 1209 params = zstd.ZstdCompressionParameters.from_level(7)
1210 1210
1211 1211 # With an input size of 1MB
1212 1212 params = zstd.ZstdCompressionParameters.from_level(7, source_size=1048576)
1213 1213
1214 1214 Using ``from_level()``, it is also possible to override individual compression
1215 1215 parameters or to define additional settings that aren't automatically derived.
1216 1216 e.g.::
1217 1217
1218 1218 params = zstd.ZstdCompressionParameters.from_level(4, window_log=10)
1219 1219 params = zstd.ZstdCompressionParameters.from_level(5, threads=4)
1220 1220
1221 1221 Or you can define low-level compression settings directly::
1222 1222
1223 1223 params = zstd.ZstdCompressionParameters(window_log=12, enable_ldm=True)
1224 1224
1225 1225 Once a ``ZstdCompressionParameters`` instance is obtained, it can be used to
1226 1226 configure a compressor::
1227 1227
1228 1228 cctx = zstd.ZstdCompressor(compression_params=params)
1229 1229
1230 1230 The named arguments and attributes of ``ZstdCompressionParameters`` are as
1231 1231 follows:
1232 1232
1233 1233 * format
1234 1234 * compression_level
1235 1235 * window_log
1236 1236 * hash_log
1237 1237 * chain_log
1238 1238 * search_log
1239 1239 * min_match
1240 1240 * target_length
1241 1241 * strategy
1242 1242 * compression_strategy (deprecated: same as ``strategy``)
1243 1243 * write_content_size
1244 1244 * write_checksum
1245 1245 * write_dict_id
1246 1246 * job_size
1247 1247 * overlap_log
1248 1248 * overlap_size_log (deprecated: same as ``overlap_log``)
1249 1249 * force_max_window
1250 1250 * enable_ldm
1251 1251 * ldm_hash_log
1252 1252 * ldm_min_match
1253 1253 * ldm_bucket_size_log
1254 1254 * ldm_hash_rate_log
1255 1255 * ldm_hash_every_log (deprecated: same as ``ldm_hash_rate_log``)
1256 1256 * threads
1257 1257
1258 1258 Some of these are very low-level settings. It may help to consult the official
1259 1259 zstandard documentation for their behavior. Look for the ``ZSTD_p_*`` constants
1260 1260 in ``zstd.h`` (https://github.com/facebook/zstd/blob/dev/lib/zstd.h).
1261 1261
1262 1262 Frame Inspection
1263 1263 ----------------
1264 1264
1265 1265 Data emitted from zstd compression is encapsulated in a *frame*. This frame
1266 1266 begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing
1267 1267 the frame in more detail. For more info, see
1268 1268 https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md.
1269 1269
1270 1270 ``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes
1271 1271 instance and return a ``FrameParameters`` object describing the frame.
1272 1272
1273 1273 Depending on which fields are present in the frame and their values, the
1274 1274 length of the frame parameters varies. If insufficient bytes are passed
1275 1275 in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure
1276 1276 frame parameters can be parsed, pass in at least 18 bytes.
1277 1277
1278 1278 ``FrameParameters`` instances have the following attributes:
1279 1279
1280 1280 content_size
1281 1281 Integer size of original, uncompressed content. This will be ``0`` if the
1282 1282 original content size isn't written to the frame (controlled with the
1283 1283 ``write_content_size`` argument to ``ZstdCompressor``) or if the input
1284 1284 content size was ``0``.
1285 1285
1286 1286 window_size
1287 1287 Integer size of maximum back-reference distance in compressed data.
1288 1288
1289 1289 dict_id
1290 1290 Integer of dictionary ID used for compression. ``0`` if no dictionary
1291 1291 ID was used or if the dictionary ID was ``0``.
1292 1292
1293 1293 has_checksum
1294 1294 Bool indicating whether a 4 byte content checksum is stored at the end
1295 1295 of the frame.
1296 1296
1297 1297 ``zstd.frame_header_size(data)`` returns the size of the zstandard frame
1298 1298 header.
1299 1299
1300 1300 ``zstd.frame_content_size(data)`` returns the content size as parsed from
1301 1301 the frame header. ``-1`` means the content size is unknown. ``0`` means
1302 1302 an empty frame. The content size is usually correct. However, it may not
1303 1303 be accurate.
1304 1304
1305 1305 Misc Functionality
1306 1306 ------------------
1307 1307
1308 1308 estimate_decompression_context_size()
1309 1309 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1310 1310
1311 1311 Estimate the memory size requirements for a decompressor instance.
1312 1312
1313 1313 Constants
1314 1314 ---------
1315 1315
1316 1316 The following module constants/attributes are exposed:
1317 1317
1318 1318 ZSTD_VERSION
1319 1319 This module attribute exposes a 3-tuple of the Zstandard version. e.g.
1320 1320 ``(1, 0, 0)``
1321 1321 MAX_COMPRESSION_LEVEL
1322 1322 Integer max compression level accepted by compression functions
1323 1323 COMPRESSION_RECOMMENDED_INPUT_SIZE
1324 1324 Recommended chunk size to feed to compressor functions
1325 1325 COMPRESSION_RECOMMENDED_OUTPUT_SIZE
1326 1326 Recommended chunk size for compression output
1327 1327 DECOMPRESSION_RECOMMENDED_INPUT_SIZE
1328 1328 Recommended chunk size to feed into decompresor functions
1329 1329 DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE
1330 1330 Recommended chunk size for decompression output
1331 1331
1332 1332 FRAME_HEADER
1333 1333 bytes containing header of the Zstandard frame
1334 1334 MAGIC_NUMBER
1335 1335 Frame header as an integer
1336 1336
1337 1337 FLUSH_BLOCK
1338 1338 Flushing behavior that denotes to flush a zstd block. A decompressor will
1339 1339 be able to decode all data fed into the compressor so far.
1340 1340 FLUSH_FRAME
1341 1341 Flushing behavior that denotes to end a zstd frame. Any new data fed
1342 1342 to the compressor will start a new frame.
1343 1343
1344 1344 CONTENTSIZE_UNKNOWN
1345 1345 Value for content size when the content size is unknown.
1346 1346 CONTENTSIZE_ERROR
1347 1347 Value for content size when content size couldn't be determined.
1348 1348
1349 1349 WINDOWLOG_MIN
1350 1350 Minimum value for compression parameter
1351 1351 WINDOWLOG_MAX
1352 1352 Maximum value for compression parameter
1353 1353 CHAINLOG_MIN
1354 1354 Minimum value for compression parameter
1355 1355 CHAINLOG_MAX
1356 1356 Maximum value for compression parameter
1357 1357 HASHLOG_MIN
1358 1358 Minimum value for compression parameter
1359 1359 HASHLOG_MAX
1360 1360 Maximum value for compression parameter
1361 1361 SEARCHLOG_MIN
1362 1362 Minimum value for compression parameter
1363 1363 SEARCHLOG_MAX
1364 1364 Maximum value for compression parameter
1365 1365 MINMATCH_MIN
1366 1366 Minimum value for compression parameter
1367 1367 MINMATCH_MAX
1368 1368 Maximum value for compression parameter
1369 1369 SEARCHLENGTH_MIN
1370 1370 Minimum value for compression parameter
1371 1371
1372 1372 Deprecated: use ``MINMATCH_MIN``
1373 1373 SEARCHLENGTH_MAX
1374 1374 Maximum value for compression parameter
1375 1375
1376 1376 Deprecated: use ``MINMATCH_MAX``
1377 1377 TARGETLENGTH_MIN
1378 1378 Minimum value for compression parameter
1379 1379 STRATEGY_FAST
1380 1380 Compression strategy
1381 1381 STRATEGY_DFAST
1382 1382 Compression strategy
1383 1383 STRATEGY_GREEDY
1384 1384 Compression strategy
1385 1385 STRATEGY_LAZY
1386 1386 Compression strategy
1387 1387 STRATEGY_LAZY2
1388 1388 Compression strategy
1389 1389 STRATEGY_BTLAZY2
1390 1390 Compression strategy
1391 1391 STRATEGY_BTOPT
1392 1392 Compression strategy
1393 1393 STRATEGY_BTULTRA
1394 1394 Compression strategy
1395 1395 STRATEGY_BTULTRA2
1396 1396 Compression strategy
1397 1397
1398 1398 FORMAT_ZSTD1
1399 1399 Zstandard frame format
1400 1400 FORMAT_ZSTD1_MAGICLESS
1401 1401 Zstandard frame format without magic header
1402 1402
1403 1403 Performance Considerations
1404 1404 --------------------------
1405 1405
1406 1406 The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a
1407 1407 persistent compression or decompression *context*. Reusing a ``ZstdCompressor``
1408 1408 or ``ZstdDecompressor`` instance for multiple operations is faster than
1409 1409 instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each
1410 1410 operation. The differences are magnified as the size of data decreases. For
1411 1411 example, the difference between *context* reuse and non-reuse for 100,000
1412 1412 100 byte inputs will be significant (possiby over 10x faster to reuse contexts)
1413 1413 whereas 10 100,000,000 byte inputs will be more similar in speed (because the
1414 1414 time spent doing compression dwarfs time spent creating new *contexts*).
1415 1415
1416 1416 Buffer Types
1417 1417 ------------
1418 1418
1419 1419 The API exposes a handful of custom types for interfacing with memory buffers.
1420 1420 The primary goal of these types is to facilitate efficient multi-object
1421 1421 operations.
1422 1422
1423 1423 The essential idea is to have a single memory allocation provide backing
1424 1424 storage for multiple logical objects. This has 2 main advantages: fewer
1425 1425 allocations and optimal memory access patterns. This avoids having to allocate
1426 1426 a Python object for each logical object and furthermore ensures that access of
1427 1427 data for objects can be sequential (read: fast) in memory.
1428 1428
1429 1429 BufferWithSegments
1430 1430 ^^^^^^^^^^^^^^^^^^
1431 1431
1432 1432 The ``BufferWithSegments`` type represents a memory buffer containing N
1433 1433 discrete items of known lengths (segments). It is essentially a fixed size
1434 1434 memory address and an array of 2-tuples of ``(offset, length)`` 64-bit
1435 1435 unsigned native endian integers defining the byte offset and length of each
1436 1436 segment within the buffer.
1437 1437
1438 1438 Instances behave like containers.
1439 1439
1440 1440 ``len()`` returns the number of segments within the instance.
1441 1441
1442 1442 ``o[index]`` or ``__getitem__`` obtains a ``BufferSegment`` representing an
1443 1443 individual segment within the backing buffer. That returned object references
1444 1444 (not copies) memory. This means that iterating all objects doesn't copy
1445 1445 data within the buffer.
1446 1446
1447 1447 The ``.size`` attribute contains the total size in bytes of the backing
1448 1448 buffer.
1449 1449
1450 1450 Instances conform to the buffer protocol. So a reference to the backing bytes
1451 1451 can be obtained via ``memoryview(o)``. A *copy* of the backing bytes can also
1452 1452 be obtained via ``.tobytes()``.
1453 1453
1454 1454 The ``.segments`` attribute exposes the array of ``(offset, length)`` for
1455 1455 segments within the buffer. It is a ``BufferSegments`` type.
1456 1456
1457 1457 BufferSegment
1458 1458 ^^^^^^^^^^^^^
1459 1459
1460 1460 The ``BufferSegment`` type represents a segment within a ``BufferWithSegments``.
1461 1461 It is essentially a reference to N bytes within a ``BufferWithSegments``.
1462 1462
1463 1463 ``len()`` returns the length of the segment in bytes.
1464 1464
1465 1465 ``.offset`` contains the byte offset of this segment within its parent
1466 1466 ``BufferWithSegments`` instance.
1467 1467
1468 1468 The object conforms to the buffer protocol. ``.tobytes()`` can be called to
1469 1469 obtain a ``bytes`` instance with a copy of the backing bytes.
1470 1470
1471 1471 BufferSegments
1472 1472 ^^^^^^^^^^^^^^
1473 1473
1474 1474 This type represents an array of ``(offset, length)`` integers defining segments
1475 1475 within a ``BufferWithSegments``.
1476 1476
1477 1477 The array members are 64-bit unsigned integers using host/native bit order.
1478 1478
1479 1479 Instances conform to the buffer protocol.
1480 1480
1481 1481 BufferWithSegmentsCollection
1482 1482 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1483 1483
1484 1484 The ``BufferWithSegmentsCollection`` type represents a virtual spanning view
1485 1485 of multiple ``BufferWithSegments`` instances.
1486 1486
1487 1487 Instances are constructed from 1 or more ``BufferWithSegments`` instances. The
1488 1488 resulting object behaves like an ordered sequence whose members are the
1489 1489 segments within each ``BufferWithSegments``.
1490 1490
1491 1491 ``len()`` returns the number of segments within all ``BufferWithSegments``
1492 1492 instances.
1493 1493
1494 1494 ``o[index]`` and ``__getitem__(index)`` return the ``BufferSegment`` at
1495 1495 that offset as if all ``BufferWithSegments`` instances were a single
1496 1496 entity.
1497 1497
1498 1498 If the object is composed of 2 ``BufferWithSegments`` instances with the
1499 1499 first having 2 segments and the second have 3 segments, then ``b[0]``
1500 1500 and ``b[1]`` access segments in the first object and ``b[2]``, ``b[3]``,
1501 1501 and ``b[4]`` access segments from the second.
1502 1502
1503 1503 Choosing an API
1504 1504 ===============
1505 1505
1506 1506 There are multiple APIs for performing compression and decompression. This is
1507 1507 because different applications have different needs and the library wants to
1508 1508 facilitate optimal use in as many use cases as possible.
1509 1509
1510 1510 From a high-level, APIs are divided into *one-shot* and *streaming*: either you
1511 1511 are operating on all data at once or you operate on it piecemeal.
1512 1512
1513 1513 The *one-shot* APIs are useful for small data, where the input or output
1514 1514 size is known. (The size can come from a buffer length, file size, or
1515 1515 stored in the zstd frame header.) A limitation of the *one-shot* APIs is that
1516 1516 input and output must fit in memory simultaneously. For say a 4 GB input,
1517 1517 this is often not feasible.
1518 1518
1519 1519 The *one-shot* APIs also perform all work as a single operation. So, if you
1520 1520 feed it large input, it could take a long time for the function to return.
1521 1521
1522 1522 The streaming APIs do not have the limitations of the simple API. But the
1523 1523 price you pay for this flexibility is that they are more complex than a
1524 1524 single function call.
1525 1525
1526 1526 The streaming APIs put the caller in control of compression and decompression
1527 1527 behavior by allowing them to directly control either the input or output side
1528 1528 of the operation.
1529 1529
1530 1530 With the *streaming input*, *compressor*, and *decompressor* APIs, the caller
1531 1531 has full control over the input to the compression or decompression stream.
1532 1532 They can directly choose when new data is operated on.
1533 1533
1534 1534 With the *streaming ouput* APIs, the caller has full control over the output
1535 1535 of the compression or decompression stream. It can choose when to receive
1536 1536 new data.
1537 1537
1538 1538 When using the *streaming* APIs that operate on file-like or stream objects,
1539 1539 it is important to consider what happens in that object when I/O is requested.
1540 1540 There is potential for long pauses as data is read or written from the
1541 1541 underlying stream (say from interacting with a filesystem or network). This
1542 1542 could add considerable overhead.
1543 1543
1544 1544 Thread Safety
1545 1545 =============
1546 1546
1547 1547 ``ZstdCompressor`` and ``ZstdDecompressor`` instances have no guarantees
1548 1548 about thread safety. Do not operate on the same ``ZstdCompressor`` and
1549 1549 ``ZstdDecompressor`` instance simultaneously from different threads. It is
1550 1550 fine to have different threads call into a single instance, just not at the
1551 1551 same time.
1552 1552
1553 1553 Some operations require multiple function calls to complete. e.g. streaming
1554 1554 operations. A single ``ZstdCompressor`` or ``ZstdDecompressor`` cannot be used
1555 1555 for simultaneously active operations. e.g. you must not start a streaming
1556 1556 operation when another streaming operation is already active.
1557 1557
1558 1558 The C extension releases the GIL during non-trivial calls into the zstd C
1559 1559 API. Non-trivial calls are notably compression and decompression. Trivial
1560 1560 calls are things like parsing frame parameters. Where the GIL is released
1561 1561 is considered an implementation detail and can change in any release.
1562 1562
1563 1563 APIs that accept bytes-like objects don't enforce that the underlying object
1564 1564 is read-only. However, it is assumed that the passed object is read-only for
1565 1565 the duration of the function call. It is possible to pass a mutable object
1566 1566 (like a ``bytearray``) to e.g. ``ZstdCompressor.compress()``, have the GIL
1567 1567 released, and mutate the object from another thread. Such a race condition
1568 1568 is a bug in the consumer of python-zstandard. Most Python data types are
1569 1569 immutable, so unless you are doing something fancy, you don't need to
1570 1570 worry about this.
1571 1571
1572 1572 Note on Zstandard's *Experimental* API
1573 1573 ======================================
1574 1574
1575 1575 Many of the Zstandard APIs used by this module are marked as *experimental*
1576 1576 within the Zstandard project.
1577 1577
1578 1578 It is unclear how Zstandard's C API will evolve over time, especially with
1579 1579 regards to this *experimental* functionality. We will try to maintain
1580 1580 backwards compatibility at the Python API level. However, we cannot
1581 1581 guarantee this for things not under our control.
1582 1582
1583 1583 Since a copy of the Zstandard source code is distributed with this
1584 1584 module and since we compile against it, the behavior of a specific
1585 1585 version of this module should be constant for all of time. So if you
1586 1586 pin the version of this module used in your projects (which is a Python
1587 1587 best practice), you should be shielded from unwanted future changes.
1588 1588
1589 1589 Donate
1590 1590 ======
1591 1591
1592 1592 A lot of time has been invested into this project by the author.
1593 1593
1594 1594 If you find this project useful and would like to thank the author for
1595 1595 their work, consider donating some money. Any amount is appreciated.
1596 1596
1597 1597 .. image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif
1598 1598 :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard&currency_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted
1599 1599 :alt: Donate via PayPal
1600 1600
1601 1601 .. |ci-status| image:: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master
1602 1602 :target: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master
@@ -1,359 +1,359 b''
1 1 /**
2 2 * Copyright (c) 2016-present, Gregory Szorc
3 3 * All rights reserved.
4 4 *
5 5 * This software may be modified and distributed under the terms
6 6 * of the BSD license. See the LICENSE file for details.
7 7 */
8 8
9 9 #define PY_SSIZE_T_CLEAN
10 10 #include <Python.h>
11 11 #include "structmember.h"
12 12
13 13 #define ZSTD_STATIC_LINKING_ONLY
14 14 #define ZDICT_STATIC_LINKING_ONLY
15 15 #include <zstd.h>
16 16 #include <zdict.h>
17 17
18 18 /* Remember to change the string in zstandard/__init__ as well */
19 #define PYTHON_ZSTANDARD_VERSION "0.12.0"
19 #define PYTHON_ZSTANDARD_VERSION "0.13.0"
20 20
21 21 typedef enum {
22 22 compressorobj_flush_finish,
23 23 compressorobj_flush_block,
24 24 } CompressorObj_Flush;
25 25
26 26 /*
27 27 Represents a ZstdCompressionParameters type.
28 28
29 29 This type holds all the low-level compression parameters that can be set.
30 30 */
31 31 typedef struct {
32 32 PyObject_HEAD
33 33 ZSTD_CCtx_params* params;
34 34 } ZstdCompressionParametersObject;
35 35
36 36 extern PyTypeObject ZstdCompressionParametersType;
37 37
38 38 /*
39 39 Represents a FrameParameters type.
40 40
41 41 This type is basically a wrapper around ZSTD_frameParams.
42 42 */
43 43 typedef struct {
44 44 PyObject_HEAD
45 45 unsigned long long frameContentSize;
46 46 unsigned long long windowSize;
47 47 unsigned dictID;
48 48 char checksumFlag;
49 49 } FrameParametersObject;
50 50
51 51 extern PyTypeObject FrameParametersType;
52 52
53 53 /*
54 54 Represents a ZstdCompressionDict type.
55 55
56 56 Instances hold data used for a zstd compression dictionary.
57 57 */
58 58 typedef struct {
59 59 PyObject_HEAD
60 60
61 61 /* Pointer to dictionary data. Owned by self. */
62 62 void* dictData;
63 63 /* Size of dictionary data. */
64 64 size_t dictSize;
65 65 ZSTD_dictContentType_e dictType;
66 66 /* k parameter for cover dictionaries. Only populated by train_cover_dict(). */
67 67 unsigned k;
68 68 /* d parameter for cover dictionaries. Only populated by train_cover_dict(). */
69 69 unsigned d;
70 70 /* Digested dictionary, suitable for reuse. */
71 71 ZSTD_CDict* cdict;
72 72 ZSTD_DDict* ddict;
73 73 } ZstdCompressionDict;
74 74
75 75 extern PyTypeObject ZstdCompressionDictType;
76 76
77 77 /*
78 78 Represents a ZstdCompressor type.
79 79 */
80 80 typedef struct {
81 81 PyObject_HEAD
82 82
83 83 /* Number of threads to use for operations. */
84 84 unsigned int threads;
85 85 /* Pointer to compression dictionary to use. NULL if not using dictionary
86 86 compression. */
87 87 ZstdCompressionDict* dict;
88 88 /* Compression context to use. Populated during object construction. */
89 89 ZSTD_CCtx* cctx;
90 90 /* Compression parameters in use. */
91 91 ZSTD_CCtx_params* params;
92 92 } ZstdCompressor;
93 93
94 94 extern PyTypeObject ZstdCompressorType;
95 95
96 96 typedef struct {
97 97 PyObject_HEAD
98 98
99 99 ZstdCompressor* compressor;
100 100 ZSTD_outBuffer output;
101 101 int finished;
102 102 } ZstdCompressionObj;
103 103
104 104 extern PyTypeObject ZstdCompressionObjType;
105 105
106 106 typedef struct {
107 107 PyObject_HEAD
108 108
109 109 ZstdCompressor* compressor;
110 110 PyObject* writer;
111 111 ZSTD_outBuffer output;
112 112 size_t outSize;
113 113 int entered;
114 114 int closed;
115 115 int writeReturnRead;
116 116 unsigned long long bytesCompressed;
117 117 } ZstdCompressionWriter;
118 118
119 119 extern PyTypeObject ZstdCompressionWriterType;
120 120
121 121 typedef struct {
122 122 PyObject_HEAD
123 123
124 124 ZstdCompressor* compressor;
125 125 PyObject* reader;
126 126 Py_buffer buffer;
127 127 Py_ssize_t bufferOffset;
128 128 size_t inSize;
129 129 size_t outSize;
130 130
131 131 ZSTD_inBuffer input;
132 132 ZSTD_outBuffer output;
133 133 int finishedOutput;
134 134 int finishedInput;
135 135 PyObject* readResult;
136 136 } ZstdCompressorIterator;
137 137
138 138 extern PyTypeObject ZstdCompressorIteratorType;
139 139
140 140 typedef struct {
141 141 PyObject_HEAD
142 142
143 143 ZstdCompressor* compressor;
144 144 PyObject* reader;
145 145 Py_buffer buffer;
146 146 size_t readSize;
147 147
148 148 int entered;
149 149 int closed;
150 150 unsigned long long bytesCompressed;
151 151
152 152 ZSTD_inBuffer input;
153 153 ZSTD_outBuffer output;
154 154 int finishedInput;
155 155 int finishedOutput;
156 156 PyObject* readResult;
157 157 } ZstdCompressionReader;
158 158
159 159 extern PyTypeObject ZstdCompressionReaderType;
160 160
161 161 typedef struct {
162 162 PyObject_HEAD
163 163
164 164 ZstdCompressor* compressor;
165 165 ZSTD_inBuffer input;
166 166 ZSTD_outBuffer output;
167 167 Py_buffer inBuffer;
168 168 int finished;
169 169 size_t chunkSize;
170 170 } ZstdCompressionChunker;
171 171
172 172 extern PyTypeObject ZstdCompressionChunkerType;
173 173
174 174 typedef enum {
175 175 compressionchunker_mode_normal,
176 176 compressionchunker_mode_flush,
177 177 compressionchunker_mode_finish,
178 178 } CompressionChunkerMode;
179 179
180 180 typedef struct {
181 181 PyObject_HEAD
182 182
183 183 ZstdCompressionChunker* chunker;
184 184 CompressionChunkerMode mode;
185 185 } ZstdCompressionChunkerIterator;
186 186
187 187 extern PyTypeObject ZstdCompressionChunkerIteratorType;
188 188
189 189 typedef struct {
190 190 PyObject_HEAD
191 191
192 192 ZSTD_DCtx* dctx;
193 193 ZstdCompressionDict* dict;
194 194 size_t maxWindowSize;
195 195 ZSTD_format_e format;
196 196 } ZstdDecompressor;
197 197
198 198 extern PyTypeObject ZstdDecompressorType;
199 199
200 200 typedef struct {
201 201 PyObject_HEAD
202 202
203 203 ZstdDecompressor* decompressor;
204 204 size_t outSize;
205 205 int finished;
206 206 } ZstdDecompressionObj;
207 207
208 208 extern PyTypeObject ZstdDecompressionObjType;
209 209
210 210 typedef struct {
211 211 PyObject_HEAD
212 212
213 213 /* Parent decompressor to which this object is associated. */
214 214 ZstdDecompressor* decompressor;
215 215 /* Object to read() from (if reading from a stream). */
216 216 PyObject* reader;
217 217 /* Size for read() operations on reader. */
218 218 size_t readSize;
219 219 /* Whether a read() can return data spanning multiple zstd frames. */
220 220 int readAcrossFrames;
221 221 /* Buffer to read from (if reading from a buffer). */
222 222 Py_buffer buffer;
223 223
224 224 /* Whether the context manager is active. */
225 225 int entered;
226 226 /* Whether we've closed the stream. */
227 227 int closed;
228 228
229 229 /* Number of bytes decompressed and returned to user. */
230 230 unsigned long long bytesDecompressed;
231 231
232 232 /* Tracks data going into decompressor. */
233 233 ZSTD_inBuffer input;
234 234
235 235 /* Holds output from read() operation on reader. */
236 236 PyObject* readResult;
237 237
238 238 /* Whether all input has been sent to the decompressor. */
239 239 int finishedInput;
240 240 /* Whether all output has been flushed from the decompressor. */
241 241 int finishedOutput;
242 242 } ZstdDecompressionReader;
243 243
244 244 extern PyTypeObject ZstdDecompressionReaderType;
245 245
246 246 typedef struct {
247 247 PyObject_HEAD
248 248
249 249 ZstdDecompressor* decompressor;
250 250 PyObject* writer;
251 251 size_t outSize;
252 252 int entered;
253 253 int closed;
254 254 int writeReturnRead;
255 255 } ZstdDecompressionWriter;
256 256
257 257 extern PyTypeObject ZstdDecompressionWriterType;
258 258
259 259 typedef struct {
260 260 PyObject_HEAD
261 261
262 262 ZstdDecompressor* decompressor;
263 263 PyObject* reader;
264 264 Py_buffer buffer;
265 265 Py_ssize_t bufferOffset;
266 266 size_t inSize;
267 267 size_t outSize;
268 268 size_t skipBytes;
269 269 ZSTD_inBuffer input;
270 270 ZSTD_outBuffer output;
271 271 Py_ssize_t readCount;
272 272 int finishedInput;
273 273 int finishedOutput;
274 274 } ZstdDecompressorIterator;
275 275
276 276 extern PyTypeObject ZstdDecompressorIteratorType;
277 277
278 278 typedef struct {
279 279 int errored;
280 280 PyObject* chunk;
281 281 } DecompressorIteratorResult;
282 282
283 283 typedef struct {
284 284 /* The public API is that these are 64-bit unsigned integers. So these can't
285 285 * be size_t, even though values larger than SIZE_MAX or PY_SSIZE_T_MAX may
286 286 * be nonsensical for this platform. */
287 287 unsigned long long offset;
288 288 unsigned long long length;
289 289 } BufferSegment;
290 290
291 291 typedef struct {
292 292 PyObject_HEAD
293 293
294 294 PyObject* parent;
295 295 BufferSegment* segments;
296 296 Py_ssize_t segmentCount;
297 297 } ZstdBufferSegments;
298 298
299 299 extern PyTypeObject ZstdBufferSegmentsType;
300 300
301 301 typedef struct {
302 302 PyObject_HEAD
303 303
304 304 PyObject* parent;
305 305 void* data;
306 306 Py_ssize_t dataSize;
307 307 unsigned long long offset;
308 308 } ZstdBufferSegment;
309 309
310 310 extern PyTypeObject ZstdBufferSegmentType;
311 311
312 312 typedef struct {
313 313 PyObject_HEAD
314 314
315 315 Py_buffer parent;
316 316 void* data;
317 317 unsigned long long dataSize;
318 318 BufferSegment* segments;
319 319 Py_ssize_t segmentCount;
320 320 int useFree;
321 321 } ZstdBufferWithSegments;
322 322
323 323 extern PyTypeObject ZstdBufferWithSegmentsType;
324 324
325 325 /**
326 326 * An ordered collection of BufferWithSegments exposed as a squashed collection.
327 327 *
328 328 * This type provides a virtual view spanning multiple BufferWithSegments
329 329 * instances. It allows multiple instances to be "chained" together and
330 330 * exposed as a single collection. e.g. if there are 2 buffers holding
331 331 * 10 segments each, then o[14] will access the 5th segment in the 2nd buffer.
332 332 */
333 333 typedef struct {
334 334 PyObject_HEAD
335 335
336 336 /* An array of buffers that should be exposed through this instance. */
337 337 ZstdBufferWithSegments** buffers;
338 338 /* Number of elements in buffers array. */
339 339 Py_ssize_t bufferCount;
340 340 /* Array of first offset in each buffer instance. 0th entry corresponds
341 341 to number of elements in the 0th buffer. 1st entry corresponds to the
342 342 sum of elements in 0th and 1st buffers. */
343 343 Py_ssize_t* firstElements;
344 344 } ZstdBufferWithSegmentsCollection;
345 345
346 346 extern PyTypeObject ZstdBufferWithSegmentsCollectionType;
347 347
348 348 int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
349 349 int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj);
350 350 int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams);
351 351 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs);
352 352 int ensure_ddict(ZstdCompressionDict* dict);
353 353 int ensure_dctx(ZstdDecompressor* decompressor, int loadDict);
354 354 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
355 355 ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize);
356 356 Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*);
357 357 int cpu_count(void);
358 358 size_t roundpow2(size_t);
359 359 int safe_pybytes_resize(PyObject** obj, Py_ssize_t size);
@@ -1,207 +1,225 b''
1 1 # Copyright (c) 2016-present, Gregory Szorc
2 2 # All rights reserved.
3 3 #
4 4 # This software may be modified and distributed under the terms
5 5 # of the BSD license. See the LICENSE file for details.
6 6
7 7 from __future__ import absolute_import
8 8
9 9 import cffi
10 10 import distutils.ccompiler
11 11 import os
12 12 import re
13 13 import subprocess
14 14 import tempfile
15 15
16 16
17 17 HERE = os.path.abspath(os.path.dirname(__file__))
18 18
19 SOURCES = ['zstd/%s' % p for p in (
20 'common/debug.c',
21 'common/entropy_common.c',
22 'common/error_private.c',
23 'common/fse_decompress.c',
24 'common/pool.c',
25 'common/threading.c',
26 'common/xxhash.c',
27 'common/zstd_common.c',
28 'compress/fse_compress.c',
29 'compress/hist.c',
30 'compress/huf_compress.c',
31 'compress/zstd_compress.c',
32 'compress/zstd_compress_literals.c',
33 'compress/zstd_compress_sequences.c',
34 'compress/zstd_double_fast.c',
35 'compress/zstd_fast.c',
36 'compress/zstd_lazy.c',
37 'compress/zstd_ldm.c',
38 'compress/zstd_opt.c',
39 'compress/zstdmt_compress.c',
40 'decompress/huf_decompress.c',
41 'decompress/zstd_ddict.c',
42 'decompress/zstd_decompress.c',
43 'decompress/zstd_decompress_block.c',
44 'dictBuilder/cover.c',
45 'dictBuilder/fastcover.c',
46 'dictBuilder/divsufsort.c',
47 'dictBuilder/zdict.c',
48 )]
19 SOURCES = [
20 "zstd/%s" % p
21 for p in (
22 "common/debug.c",
23 "common/entropy_common.c",
24 "common/error_private.c",
25 "common/fse_decompress.c",
26 "common/pool.c",
27 "common/threading.c",
28 "common/xxhash.c",
29 "common/zstd_common.c",
30 "compress/fse_compress.c",
31 "compress/hist.c",
32 "compress/huf_compress.c",
33 "compress/zstd_compress.c",
34 "compress/zstd_compress_literals.c",
35 "compress/zstd_compress_sequences.c",
36 "compress/zstd_double_fast.c",
37 "compress/zstd_fast.c",
38 "compress/zstd_lazy.c",
39 "compress/zstd_ldm.c",
40 "compress/zstd_opt.c",
41 "compress/zstdmt_compress.c",
42 "decompress/huf_decompress.c",
43 "decompress/zstd_ddict.c",
44 "decompress/zstd_decompress.c",
45 "decompress/zstd_decompress_block.c",
46 "dictBuilder/cover.c",
47 "dictBuilder/fastcover.c",
48 "dictBuilder/divsufsort.c",
49 "dictBuilder/zdict.c",
50 )
51 ]
49 52
50 53 # Headers whose preprocessed output will be fed into cdef().
51 HEADERS = [os.path.join(HERE, 'zstd', *p) for p in (
52 ('zstd.h',),
53 ('dictBuilder', 'zdict.h'),
54 )]
54 HEADERS = [
55 os.path.join(HERE, "zstd", *p) for p in (("zstd.h",), ("dictBuilder", "zdict.h"),)
56 ]
55 57
56 INCLUDE_DIRS = [os.path.join(HERE, d) for d in (
57 'zstd',
58 'zstd/common',
59 'zstd/compress',
60 'zstd/decompress',
61 'zstd/dictBuilder',
62 )]
58 INCLUDE_DIRS = [
59 os.path.join(HERE, d)
60 for d in (
61 "zstd",
62 "zstd/common",
63 "zstd/compress",
64 "zstd/decompress",
65 "zstd/dictBuilder",
66 )
67 ]
63 68
64 69 # cffi can't parse some of the primitives in zstd.h. So we invoke the
65 70 # preprocessor and feed its output into cffi.
66 71 compiler = distutils.ccompiler.new_compiler()
67 72
68 73 # Needed for MSVC.
69 if hasattr(compiler, 'initialize'):
74 if hasattr(compiler, "initialize"):
70 75 compiler.initialize()
71 76
72 77 # Distutils doesn't set compiler.preprocessor, so invoke the preprocessor
73 78 # manually.
74 if compiler.compiler_type == 'unix':
75 args = list(compiler.executables['compiler'])
76 args.extend([
77 '-E',
78 '-DZSTD_STATIC_LINKING_ONLY',
79 '-DZDICT_STATIC_LINKING_ONLY',
80 ])
81 elif compiler.compiler_type == 'msvc':
79 if compiler.compiler_type == "unix":
80 args = list(compiler.executables["compiler"])
81 args.extend(
82 ["-E", "-DZSTD_STATIC_LINKING_ONLY", "-DZDICT_STATIC_LINKING_ONLY",]
83 )
84 elif compiler.compiler_type == "msvc":
82 85 args = [compiler.cc]
83 args.extend([
84 '/EP',
85 '/DZSTD_STATIC_LINKING_ONLY',
86 '/DZDICT_STATIC_LINKING_ONLY',
87 ])
86 args.extend(
87 ["/EP", "/DZSTD_STATIC_LINKING_ONLY", "/DZDICT_STATIC_LINKING_ONLY",]
88 )
88 89 else:
89 raise Exception('unsupported compiler type: %s' % compiler.compiler_type)
90 raise Exception("unsupported compiler type: %s" % compiler.compiler_type)
91
90 92
91 93 def preprocess(path):
92 with open(path, 'rb') as fh:
94 with open(path, "rb") as fh:
93 95 lines = []
94 96 it = iter(fh)
95 97
96 98 for l in it:
97 99 # zstd.h includes <stddef.h>, which is also included by cffi's
98 100 # boilerplate. This can lead to duplicate declarations. So we strip
99 101 # this include from the preprocessor invocation.
100 102 #
101 103 # The same things happens for including zstd.h, so give it the same
102 104 # treatment.
103 105 #
104 106 # We define ZSTD_STATIC_LINKING_ONLY, which is redundant with the inline
105 107 # #define in zstdmt_compress.h and results in a compiler warning. So drop
106 108 # the inline #define.
107 if l.startswith((b'#include <stddef.h>',
108 b'#include "zstd.h"',
109 b'#define ZSTD_STATIC_LINKING_ONLY')):
109 if l.startswith(
110 (
111 b"#include <stddef.h>",
112 b'#include "zstd.h"',
113 b"#define ZSTD_STATIC_LINKING_ONLY",
114 )
115 ):
110 116 continue
111 117
118 # The preprocessor environment on Windows doesn't define include
119 # paths, so the #include of limits.h fails. We work around this
120 # by removing that import and defining INT_MAX ourselves. This is
121 # a bit hacky. But it gets the job done.
122 # TODO make limits.h work on Windows so we ensure INT_MAX is
123 # correct.
124 if l.startswith(b"#include <limits.h>"):
125 l = b"#define INT_MAX 2147483647\n"
126
112 127 # ZSTDLIB_API may not be defined if we dropped zstd.h. It isn't
113 128 # important so just filter it out.
114 if l.startswith(b'ZSTDLIB_API'):
115 l = l[len(b'ZSTDLIB_API '):]
129 if l.startswith(b"ZSTDLIB_API"):
130 l = l[len(b"ZSTDLIB_API ") :]
116 131
117 132 lines.append(l)
118 133
119 fd, input_file = tempfile.mkstemp(suffix='.h')
120 os.write(fd, b''.join(lines))
134 fd, input_file = tempfile.mkstemp(suffix=".h")
135 os.write(fd, b"".join(lines))
121 136 os.close(fd)
122 137
123 138 try:
124 139 env = dict(os.environ)
125 if getattr(compiler, '_paths', None):
126 env['PATH'] = compiler._paths
127 process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE,
128 env=env)
140 if getattr(compiler, "_paths", None):
141 env["PATH"] = compiler._paths
142 process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE, env=env)
129 143 output = process.communicate()[0]
130 144 ret = process.poll()
131 145 if ret:
132 raise Exception('preprocessor exited with error')
146 raise Exception("preprocessor exited with error")
133 147
134 148 return output
135 149 finally:
136 150 os.unlink(input_file)
137 151
138 152
139 153 def normalize_output(output):
140 154 lines = []
141 155 for line in output.splitlines():
142 156 # CFFI's parser doesn't like __attribute__ on UNIX compilers.
143 157 if line.startswith(b'__attribute__ ((visibility ("default"))) '):
144 line = line[len(b'__attribute__ ((visibility ("default"))) '):]
158 line = line[len(b'__attribute__ ((visibility ("default"))) ') :]
145 159
146 if line.startswith(b'__attribute__((deprecated('):
160 if line.startswith(b"__attribute__((deprecated("):
147 161 continue
148 elif b'__declspec(deprecated(' in line:
162 elif b"__declspec(deprecated(" in line:
149 163 continue
150 164
151 165 lines.append(line)
152 166
153 return b'\n'.join(lines)
167 return b"\n".join(lines)
154 168
155 169
156 170 ffi = cffi.FFI()
157 171 # zstd.h uses a possible undefined MIN(). Define it until
158 172 # https://github.com/facebook/zstd/issues/976 is fixed.
159 173 # *_DISABLE_DEPRECATE_WARNINGS prevents the compiler from emitting a warning
160 174 # when cffi uses the function. Since we statically link against zstd, even
161 175 # if we use the deprecated functions it shouldn't be a huge problem.
162 ffi.set_source('_zstd_cffi', '''
176 ffi.set_source(
177 "_zstd_cffi",
178 """
163 179 #define MIN(a,b) ((a)<(b) ? (a) : (b))
164 180 #define ZSTD_STATIC_LINKING_ONLY
165 181 #include <zstd.h>
166 182 #define ZDICT_STATIC_LINKING_ONLY
167 183 #define ZDICT_DISABLE_DEPRECATE_WARNINGS
168 184 #include <zdict.h>
169 ''', sources=SOURCES,
170 include_dirs=INCLUDE_DIRS,
171 extra_compile_args=['-DZSTD_MULTITHREAD'])
185 """,
186 sources=SOURCES,
187 include_dirs=INCLUDE_DIRS,
188 extra_compile_args=["-DZSTD_MULTITHREAD"],
189 )
172 190
173 DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ')
191 DEFINE = re.compile(b"^\\#define ([a-zA-Z0-9_]+) ")
174 192
175 193 sources = []
176 194
177 195 # Feed normalized preprocessor output for headers into the cdef parser.
178 196 for header in HEADERS:
179 197 preprocessed = preprocess(header)
180 198 sources.append(normalize_output(preprocessed))
181 199
182 200 # #define's are effectively erased as part of going through preprocessor.
183 201 # So perform a manual pass to re-add those to the cdef source.
184 with open(header, 'rb') as fh:
202 with open(header, "rb") as fh:
185 203 for line in fh:
186 204 line = line.strip()
187 205 m = DEFINE.match(line)
188 206 if not m:
189 207 continue
190 208
191 if m.group(1) == b'ZSTD_STATIC_LINKING_ONLY':
209 if m.group(1) == b"ZSTD_STATIC_LINKING_ONLY":
192 210 continue
193 211
194 212 # The parser doesn't like some constants with complex values.
195 if m.group(1) in (b'ZSTD_LIB_VERSION', b'ZSTD_VERSION_STRING'):
213 if m.group(1) in (b"ZSTD_LIB_VERSION", b"ZSTD_VERSION_STRING"):
196 214 continue
197 215
198 216 # The ... is magic syntax by the cdef parser to resolve the
199 217 # value at compile time.
200 sources.append(m.group(0) + b' ...')
218 sources.append(m.group(0) + b" ...")
201 219
202 cdeflines = b'\n'.join(sources).splitlines()
220 cdeflines = b"\n".join(sources).splitlines()
203 221 cdeflines = [l for l in cdeflines if l.strip()]
204 ffi.cdef(b'\n'.join(cdeflines).decode('latin1'))
222 ffi.cdef(b"\n".join(cdeflines).decode("latin1"))
205 223
206 if __name__ == '__main__':
224 if __name__ == "__main__":
207 225 ffi.compile()
@@ -1,112 +1,118 b''
1 1 #!/usr/bin/env python
2 2 # Copyright (c) 2016-present, Gregory Szorc
3 3 # All rights reserved.
4 4 #
5 5 # This software may be modified and distributed under the terms
6 6 # of the BSD license. See the LICENSE file for details.
7 7
8 8 from __future__ import print_function
9 9
10 10 from distutils.version import LooseVersion
11 11 import os
12 12 import sys
13 13 from setuptools import setup
14 14
15 15 # Need change in 1.10 for ffi.from_buffer() to handle all buffer types
16 16 # (like memoryview).
17 17 # Need feature in 1.11 for ffi.gc() to declare size of objects so we avoid
18 18 # garbage collection pitfalls.
19 MINIMUM_CFFI_VERSION = '1.11'
19 MINIMUM_CFFI_VERSION = "1.11"
20 20
21 21 try:
22 22 import cffi
23 23
24 24 # PyPy (and possibly other distros) have CFFI distributed as part of
25 25 # them. The install_requires for CFFI below won't work. We need to sniff
26 26 # out the CFFI version here and reject CFFI if it is too old.
27 27 cffi_version = LooseVersion(cffi.__version__)
28 28 if cffi_version < LooseVersion(MINIMUM_CFFI_VERSION):
29 print('CFFI 1.11 or newer required (%s found); '
30 'not building CFFI backend' % cffi_version,
31 file=sys.stderr)
29 print(
30 "CFFI 1.11 or newer required (%s found); "
31 "not building CFFI backend" % cffi_version,
32 file=sys.stderr,
33 )
32 34 cffi = None
33 35
34 36 except ImportError:
35 37 cffi = None
36 38
37 39 import setup_zstd
38 40
39 41 SUPPORT_LEGACY = False
40 42 SYSTEM_ZSTD = False
41 43 WARNINGS_AS_ERRORS = False
42 44
43 if os.environ.get('ZSTD_WARNINGS_AS_ERRORS', ''):
45 if os.environ.get("ZSTD_WARNINGS_AS_ERRORS", ""):
44 46 WARNINGS_AS_ERRORS = True
45 47
46 if '--legacy' in sys.argv:
48 if "--legacy" in sys.argv:
47 49 SUPPORT_LEGACY = True
48 sys.argv.remove('--legacy')
50 sys.argv.remove("--legacy")
49 51
50 if '--system-zstd' in sys.argv:
52 if "--system-zstd" in sys.argv:
51 53 SYSTEM_ZSTD = True
52 sys.argv.remove('--system-zstd')
54 sys.argv.remove("--system-zstd")
53 55
54 if '--warnings-as-errors' in sys.argv:
56 if "--warnings-as-errors" in sys.argv:
55 57 WARNINGS_AS_ERRORS = True
56 sys.argv.remove('--warning-as-errors')
58 sys.argv.remove("--warning-as-errors")
57 59
58 60 # Code for obtaining the Extension instance is in its own module to
59 61 # facilitate reuse in other projects.
60 62 extensions = [
61 setup_zstd.get_c_extension(name='zstd',
62 support_legacy=SUPPORT_LEGACY,
63 system_zstd=SYSTEM_ZSTD,
64 warnings_as_errors=WARNINGS_AS_ERRORS),
63 setup_zstd.get_c_extension(
64 name="zstd",
65 support_legacy=SUPPORT_LEGACY,
66 system_zstd=SYSTEM_ZSTD,
67 warnings_as_errors=WARNINGS_AS_ERRORS,
68 ),
65 69 ]
66 70
67 71 install_requires = []
68 72
69 73 if cffi:
70 74 import make_cffi
75
71 76 extensions.append(make_cffi.ffi.distutils_extension())
72 install_requires.append('cffi>=%s' % MINIMUM_CFFI_VERSION)
77 install_requires.append("cffi>=%s" % MINIMUM_CFFI_VERSION)
73 78
74 79 version = None
75 80
76 with open('c-ext/python-zstandard.h', 'r') as fh:
81 with open("c-ext/python-zstandard.h", "r") as fh:
77 82 for line in fh:
78 if not line.startswith('#define PYTHON_ZSTANDARD_VERSION'):
83 if not line.startswith("#define PYTHON_ZSTANDARD_VERSION"):
79 84 continue
80 85
81 86 version = line.split()[2][1:-1]
82 87 break
83 88
84 89 if not version:
85 raise Exception('could not resolve package version; '
86 'this should never happen')
90 raise Exception("could not resolve package version; " "this should never happen")
87 91
88 92 setup(
89 name='zstandard',
93 name="zstandard",
90 94 version=version,
91 description='Zstandard bindings for Python',
92 long_description=open('README.rst', 'r').read(),
93 url='https://github.com/indygreg/python-zstandard',
94 author='Gregory Szorc',
95 author_email='gregory.szorc@gmail.com',
96 license='BSD',
95 description="Zstandard bindings for Python",
96 long_description=open("README.rst", "r").read(),
97 url="https://github.com/indygreg/python-zstandard",
98 author="Gregory Szorc",
99 author_email="gregory.szorc@gmail.com",
100 license="BSD",
97 101 classifiers=[
98 'Development Status :: 4 - Beta',
99 'Intended Audience :: Developers',
100 'License :: OSI Approved :: BSD License',
101 'Programming Language :: C',
102 'Programming Language :: Python :: 2.7',
103 'Programming Language :: Python :: 3.5',
104 'Programming Language :: Python :: 3.6',
105 'Programming Language :: Python :: 3.7',
102 "Development Status :: 4 - Beta",
103 "Intended Audience :: Developers",
104 "License :: OSI Approved :: BSD License",
105 "Programming Language :: C",
106 "Programming Language :: Python :: 2.7",
107 "Programming Language :: Python :: 3.5",
108 "Programming Language :: Python :: 3.6",
109 "Programming Language :: Python :: 3.7",
110 "Programming Language :: Python :: 3.8",
106 111 ],
107 keywords='zstandard zstd compression',
108 packages=['zstandard'],
112 keywords="zstandard zstd compression",
113 packages=["zstandard"],
109 114 ext_modules=extensions,
110 test_suite='tests',
115 test_suite="tests",
111 116 install_requires=install_requires,
117 tests_require=["hypothesis"],
112 118 )
@@ -1,192 +1,206 b''
1 1 # Copyright (c) 2016-present, Gregory Szorc
2 2 # All rights reserved.
3 3 #
4 4 # This software may be modified and distributed under the terms
5 5 # of the BSD license. See the LICENSE file for details.
6 6
7 7 import distutils.ccompiler
8 8 import os
9 9
10 10 from distutils.extension import Extension
11 11
12 12
13 zstd_sources = ['zstd/%s' % p for p in (
14 'common/debug.c',
15 'common/entropy_common.c',
16 'common/error_private.c',
17 'common/fse_decompress.c',
18 'common/pool.c',
19 'common/threading.c',
20 'common/xxhash.c',
21 'common/zstd_common.c',
22 'compress/fse_compress.c',
23 'compress/hist.c',
24 'compress/huf_compress.c',
25 'compress/zstd_compress_literals.c',
26 'compress/zstd_compress_sequences.c',
27 'compress/zstd_compress.c',
28 'compress/zstd_double_fast.c',
29 'compress/zstd_fast.c',
30 'compress/zstd_lazy.c',
31 'compress/zstd_ldm.c',
32 'compress/zstd_opt.c',
33 'compress/zstdmt_compress.c',
34 'decompress/huf_decompress.c',
35 'decompress/zstd_ddict.c',
36 'decompress/zstd_decompress.c',
37 'decompress/zstd_decompress_block.c',
38 'dictBuilder/cover.c',
39 'dictBuilder/divsufsort.c',
40 'dictBuilder/fastcover.c',
41 'dictBuilder/zdict.c',
42 )]
13 zstd_sources = [
14 "zstd/%s" % p
15 for p in (
16 "common/debug.c",
17 "common/entropy_common.c",
18 "common/error_private.c",
19 "common/fse_decompress.c",
20 "common/pool.c",
21 "common/threading.c",
22 "common/xxhash.c",
23 "common/zstd_common.c",
24 "compress/fse_compress.c",
25 "compress/hist.c",
26 "compress/huf_compress.c",
27 "compress/zstd_compress_literals.c",
28 "compress/zstd_compress_sequences.c",
29 "compress/zstd_compress.c",
30 "compress/zstd_double_fast.c",
31 "compress/zstd_fast.c",
32 "compress/zstd_lazy.c",
33 "compress/zstd_ldm.c",
34 "compress/zstd_opt.c",
35 "compress/zstdmt_compress.c",
36 "decompress/huf_decompress.c",
37 "decompress/zstd_ddict.c",
38 "decompress/zstd_decompress.c",
39 "decompress/zstd_decompress_block.c",
40 "dictBuilder/cover.c",
41 "dictBuilder/divsufsort.c",
42 "dictBuilder/fastcover.c",
43 "dictBuilder/zdict.c",
44 )
45 ]
43 46
44 zstd_sources_legacy = ['zstd/%s' % p for p in (
45 'deprecated/zbuff_common.c',
46 'deprecated/zbuff_compress.c',
47 'deprecated/zbuff_decompress.c',
48 'legacy/zstd_v01.c',
49 'legacy/zstd_v02.c',
50 'legacy/zstd_v03.c',
51 'legacy/zstd_v04.c',
52 'legacy/zstd_v05.c',
53 'legacy/zstd_v06.c',
54 'legacy/zstd_v07.c'
55 )]
47 zstd_sources_legacy = [
48 "zstd/%s" % p
49 for p in (
50 "deprecated/zbuff_common.c",
51 "deprecated/zbuff_compress.c",
52 "deprecated/zbuff_decompress.c",
53 "legacy/zstd_v01.c",
54 "legacy/zstd_v02.c",
55 "legacy/zstd_v03.c",
56 "legacy/zstd_v04.c",
57 "legacy/zstd_v05.c",
58 "legacy/zstd_v06.c",
59 "legacy/zstd_v07.c",
60 )
61 ]
56 62
57 63 zstd_includes = [
58 'zstd',
59 'zstd/common',
60 'zstd/compress',
61 'zstd/decompress',
62 'zstd/dictBuilder',
64 "zstd",
65 "zstd/common",
66 "zstd/compress",
67 "zstd/decompress",
68 "zstd/dictBuilder",
63 69 ]
64 70
65 71 zstd_includes_legacy = [
66 'zstd/deprecated',
67 'zstd/legacy',
72 "zstd/deprecated",
73 "zstd/legacy",
68 74 ]
69 75
70 76 ext_includes = [
71 'c-ext',
72 'zstd/common',
77 "c-ext",
78 "zstd/common",
73 79 ]
74 80
75 81 ext_sources = [
76 'zstd/common/pool.c',
77 'zstd/common/threading.c',
78 'zstd.c',
79 'c-ext/bufferutil.c',
80 'c-ext/compressiondict.c',
81 'c-ext/compressobj.c',
82 'c-ext/compressor.c',
83 'c-ext/compressoriterator.c',
84 'c-ext/compressionchunker.c',
85 'c-ext/compressionparams.c',
86 'c-ext/compressionreader.c',
87 'c-ext/compressionwriter.c',
88 'c-ext/constants.c',
89 'c-ext/decompressobj.c',
90 'c-ext/decompressor.c',
91 'c-ext/decompressoriterator.c',
92 'c-ext/decompressionreader.c',
93 'c-ext/decompressionwriter.c',
94 'c-ext/frameparams.c',
82 "zstd/common/error_private.c",
83 "zstd/common/pool.c",
84 "zstd/common/threading.c",
85 "zstd/common/zstd_common.c",
86 "zstd.c",
87 "c-ext/bufferutil.c",
88 "c-ext/compressiondict.c",
89 "c-ext/compressobj.c",
90 "c-ext/compressor.c",
91 "c-ext/compressoriterator.c",
92 "c-ext/compressionchunker.c",
93 "c-ext/compressionparams.c",
94 "c-ext/compressionreader.c",
95 "c-ext/compressionwriter.c",
96 "c-ext/constants.c",
97 "c-ext/decompressobj.c",
98 "c-ext/decompressor.c",
99 "c-ext/decompressoriterator.c",
100 "c-ext/decompressionreader.c",
101 "c-ext/decompressionwriter.c",
102 "c-ext/frameparams.c",
95 103 ]
96 104
97 105 zstd_depends = [
98 'c-ext/python-zstandard.h',
106 "c-ext/python-zstandard.h",
99 107 ]
100 108
101 109
102 def get_c_extension(support_legacy=False, system_zstd=False, name='zstd',
103 warnings_as_errors=False, root=None):
110 def get_c_extension(
111 support_legacy=False,
112 system_zstd=False,
113 name="zstd",
114 warnings_as_errors=False,
115 root=None,
116 ):
104 117 """Obtain a distutils.extension.Extension for the C extension.
105 118
106 119 ``support_legacy`` controls whether to compile in legacy zstd format support.
107 120
108 121 ``system_zstd`` controls whether to compile against the system zstd library.
109 122 For this to work, the system zstd library and headers must match what
110 123 python-zstandard is coded against exactly.
111 124
112 125 ``name`` is the module name of the C extension to produce.
113 126
114 127 ``warnings_as_errors`` controls whether compiler warnings are turned into
115 128 compiler errors.
116 129
117 130 ``root`` defines a root path that source should be computed as relative
118 131 to. This should be the directory with the main ``setup.py`` that is
119 132 being invoked. If not defined, paths will be relative to this file.
120 133 """
121 134 actual_root = os.path.abspath(os.path.dirname(__file__))
122 135 root = root or actual_root
123 136
124 137 sources = set([os.path.join(actual_root, p) for p in ext_sources])
125 138 if not system_zstd:
126 139 sources.update([os.path.join(actual_root, p) for p in zstd_sources])
127 140 if support_legacy:
128 sources.update([os.path.join(actual_root, p)
129 for p in zstd_sources_legacy])
141 sources.update([os.path.join(actual_root, p) for p in zstd_sources_legacy])
130 142 sources = list(sources)
131 143
132 144 include_dirs = set([os.path.join(actual_root, d) for d in ext_includes])
133 145 if not system_zstd:
134 include_dirs.update([os.path.join(actual_root, d)
135 for d in zstd_includes])
146 include_dirs.update([os.path.join(actual_root, d) for d in zstd_includes])
136 147 if support_legacy:
137 include_dirs.update([os.path.join(actual_root, d)
138 for d in zstd_includes_legacy])
148 include_dirs.update(
149 [os.path.join(actual_root, d) for d in zstd_includes_legacy]
150 )
139 151 include_dirs = list(include_dirs)
140 152
141 153 depends = [os.path.join(actual_root, p) for p in zstd_depends]
142 154
143 155 compiler = distutils.ccompiler.new_compiler()
144 156
145 157 # Needed for MSVC.
146 if hasattr(compiler, 'initialize'):
158 if hasattr(compiler, "initialize"):
147 159 compiler.initialize()
148 160
149 if compiler.compiler_type == 'unix':
150 compiler_type = 'unix'
151 elif compiler.compiler_type == 'msvc':
152 compiler_type = 'msvc'
153 elif compiler.compiler_type == 'mingw32':
154 compiler_type = 'mingw32'
161 if compiler.compiler_type == "unix":
162 compiler_type = "unix"
163 elif compiler.compiler_type == "msvc":
164 compiler_type = "msvc"
165 elif compiler.compiler_type == "mingw32":
166 compiler_type = "mingw32"
155 167 else:
156 raise Exception('unhandled compiler type: %s' %
157 compiler.compiler_type)
168 raise Exception("unhandled compiler type: %s" % compiler.compiler_type)
158 169
159 extra_args = ['-DZSTD_MULTITHREAD']
170 extra_args = ["-DZSTD_MULTITHREAD"]
160 171
161 172 if not system_zstd:
162 extra_args.append('-DZSTDLIB_VISIBILITY=')
163 extra_args.append('-DZDICTLIB_VISIBILITY=')
164 extra_args.append('-DZSTDERRORLIB_VISIBILITY=')
173 extra_args.append("-DZSTDLIB_VISIBILITY=")
174 extra_args.append("-DZDICTLIB_VISIBILITY=")
175 extra_args.append("-DZSTDERRORLIB_VISIBILITY=")
165 176
166 if compiler_type == 'unix':
167 extra_args.append('-fvisibility=hidden')
177 if compiler_type == "unix":
178 extra_args.append("-fvisibility=hidden")
168 179
169 180 if not system_zstd and support_legacy:
170 extra_args.append('-DZSTD_LEGACY_SUPPORT=1')
181 extra_args.append("-DZSTD_LEGACY_SUPPORT=1")
171 182
172 183 if warnings_as_errors:
173 if compiler_type in ('unix', 'mingw32'):
174 extra_args.append('-Werror')
175 elif compiler_type == 'msvc':
176 extra_args.append('/WX')
184 if compiler_type in ("unix", "mingw32"):
185 extra_args.append("-Werror")
186 elif compiler_type == "msvc":
187 extra_args.append("/WX")
177 188 else:
178 189 assert False
179 190
180 libraries = ['zstd'] if system_zstd else []
191 libraries = ["zstd"] if system_zstd else []
181 192
182 193 # Python 3.7 doesn't like absolute paths. So normalize to relative.
183 194 sources = [os.path.relpath(p, root) for p in sources]
184 195 include_dirs = [os.path.relpath(p, root) for p in include_dirs]
185 196 depends = [os.path.relpath(p, root) for p in depends]
186 197
187 198 # TODO compile with optimizations.
188 return Extension(name, sources,
189 include_dirs=include_dirs,
190 depends=depends,
191 extra_compile_args=extra_args,
192 libraries=libraries)
199 return Extension(
200 name,
201 sources,
202 include_dirs=include_dirs,
203 depends=depends,
204 extra_compile_args=extra_args,
205 libraries=libraries,
206 )
@@ -1,185 +1,197 b''
1 1 import imp
2 2 import inspect
3 3 import io
4 4 import os
5 5 import types
6 import unittest
6 7
7 8 try:
8 9 import hypothesis
9 10 except ImportError:
10 11 hypothesis = None
11 12
12 13
14 class TestCase(unittest.TestCase):
15 if not getattr(unittest.TestCase, "assertRaisesRegex", False):
16 assertRaisesRegex = unittest.TestCase.assertRaisesRegexp
17
18
13 19 def make_cffi(cls):
14 20 """Decorator to add CFFI versions of each test method."""
15 21
16 22 # The module containing this class definition should
17 23 # `import zstandard as zstd`. Otherwise things may blow up.
18 24 mod = inspect.getmodule(cls)
19 if not hasattr(mod, 'zstd'):
25 if not hasattr(mod, "zstd"):
20 26 raise Exception('test module does not contain "zstd" symbol')
21 27
22 if not hasattr(mod.zstd, 'backend'):
23 raise Exception('zstd symbol does not have "backend" attribute; did '
24 'you `import zstandard as zstd`?')
28 if not hasattr(mod.zstd, "backend"):
29 raise Exception(
30 'zstd symbol does not have "backend" attribute; did '
31 "you `import zstandard as zstd`?"
32 )
25 33
26 34 # If `import zstandard` already chose the cffi backend, there is nothing
27 35 # for us to do: we only add the cffi variation if the default backend
28 36 # is the C extension.
29 if mod.zstd.backend == 'cffi':
37 if mod.zstd.backend == "cffi":
30 38 return cls
31 39
32 40 old_env = dict(os.environ)
33 os.environ['PYTHON_ZSTANDARD_IMPORT_POLICY'] = 'cffi'
41 os.environ["PYTHON_ZSTANDARD_IMPORT_POLICY"] = "cffi"
34 42 try:
35 43 try:
36 mod_info = imp.find_module('zstandard')
37 mod = imp.load_module('zstandard_cffi', *mod_info)
44 mod_info = imp.find_module("zstandard")
45 mod = imp.load_module("zstandard_cffi", *mod_info)
38 46 except ImportError:
39 47 return cls
40 48 finally:
41 49 os.environ.clear()
42 50 os.environ.update(old_env)
43 51
44 if mod.backend != 'cffi':
45 raise Exception('got the zstandard %s backend instead of cffi' % mod.backend)
52 if mod.backend != "cffi":
53 raise Exception("got the zstandard %s backend instead of cffi" % mod.backend)
46 54
47 55 # If CFFI version is available, dynamically construct test methods
48 56 # that use it.
49 57
50 58 for attr in dir(cls):
51 59 fn = getattr(cls, attr)
52 60 if not inspect.ismethod(fn) and not inspect.isfunction(fn):
53 61 continue
54 62
55 if not fn.__name__.startswith('test_'):
63 if not fn.__name__.startswith("test_"):
56 64 continue
57 65
58 name = '%s_cffi' % fn.__name__
66 name = "%s_cffi" % fn.__name__
59 67
60 68 # Replace the "zstd" symbol with the CFFI module instance. Then copy
61 69 # the function object and install it in a new attribute.
62 70 if isinstance(fn, types.FunctionType):
63 71 globs = dict(fn.__globals__)
64 globs['zstd'] = mod
65 new_fn = types.FunctionType(fn.__code__, globs, name,
66 fn.__defaults__, fn.__closure__)
72 globs["zstd"] = mod
73 new_fn = types.FunctionType(
74 fn.__code__, globs, name, fn.__defaults__, fn.__closure__
75 )
67 76 new_method = new_fn
68 77 else:
69 78 globs = dict(fn.__func__.func_globals)
70 globs['zstd'] = mod
71 new_fn = types.FunctionType(fn.__func__.func_code, globs, name,
72 fn.__func__.func_defaults,
73 fn.__func__.func_closure)
74 new_method = types.UnboundMethodType(new_fn, fn.im_self,
75 fn.im_class)
79 globs["zstd"] = mod
80 new_fn = types.FunctionType(
81 fn.__func__.func_code,
82 globs,
83 name,
84 fn.__func__.func_defaults,
85 fn.__func__.func_closure,
86 )
87 new_method = types.UnboundMethodType(new_fn, fn.im_self, fn.im_class)
76 88
77 89 setattr(cls, name, new_method)
78 90
79 91 return cls
80 92
81 93
82 94 class NonClosingBytesIO(io.BytesIO):
83 95 """BytesIO that saves the underlying buffer on close().
84 96
85 97 This allows us to access written data after close().
86 98 """
99
87 100 def __init__(self, *args, **kwargs):
88 101 super(NonClosingBytesIO, self).__init__(*args, **kwargs)
89 102 self._saved_buffer = None
90 103
91 104 def close(self):
92 105 self._saved_buffer = self.getvalue()
93 106 return super(NonClosingBytesIO, self).close()
94 107
95 108 def getvalue(self):
96 109 if self.closed:
97 110 return self._saved_buffer
98 111 else:
99 112 return super(NonClosingBytesIO, self).getvalue()
100 113
101 114
102 115 class OpCountingBytesIO(NonClosingBytesIO):
103 116 def __init__(self, *args, **kwargs):
104 117 self._flush_count = 0
105 118 self._read_count = 0
106 119 self._write_count = 0
107 120 return super(OpCountingBytesIO, self).__init__(*args, **kwargs)
108 121
109 122 def flush(self):
110 123 self._flush_count += 1
111 124 return super(OpCountingBytesIO, self).flush()
112 125
113 126 def read(self, *args):
114 127 self._read_count += 1
115 128 return super(OpCountingBytesIO, self).read(*args)
116 129
117 130 def write(self, data):
118 131 self._write_count += 1
119 132 return super(OpCountingBytesIO, self).write(data)
120 133
121 134
122 135 _source_files = []
123 136
124 137
125 138 def random_input_data():
126 139 """Obtain the raw content of source files.
127 140
128 141 This is used for generating "random" data to feed into fuzzing, since it is
129 142 faster than random content generation.
130 143 """
131 144 if _source_files:
132 145 return _source_files
133 146
134 147 for root, dirs, files in os.walk(os.path.dirname(__file__)):
135 148 dirs[:] = list(sorted(dirs))
136 149 for f in sorted(files):
137 150 try:
138 with open(os.path.join(root, f), 'rb') as fh:
151 with open(os.path.join(root, f), "rb") as fh:
139 152 data = fh.read()
140 153 if data:
141 154 _source_files.append(data)
142 155 except OSError:
143 156 pass
144 157
145 158 # Also add some actual random data.
146 159 _source_files.append(os.urandom(100))
147 160 _source_files.append(os.urandom(1000))
148 161 _source_files.append(os.urandom(10000))
149 162 _source_files.append(os.urandom(100000))
150 163 _source_files.append(os.urandom(1000000))
151 164
152 165 return _source_files
153 166
154 167
155 168 def generate_samples():
156 169 inputs = [
157 b'foo',
158 b'bar',
159 b'abcdef',
160 b'sometext',
161 b'baz',
170 b"foo",
171 b"bar",
172 b"abcdef",
173 b"sometext",
174 b"baz",
162 175 ]
163 176
164 177 samples = []
165 178
166 179 for i in range(128):
167 180 samples.append(inputs[i % 5])
168 181 samples.append(inputs[i % 5] * (i + 3))
169 182 samples.append(inputs[-(i % 5)] * (i + 2))
170 183
171 184 return samples
172 185
173 186
174 187 if hypothesis:
175 188 default_settings = hypothesis.settings(deadline=10000)
176 hypothesis.settings.register_profile('default', default_settings)
189 hypothesis.settings.register_profile("default", default_settings)
177 190
178 191 ci_settings = hypothesis.settings(deadline=20000, max_examples=1000)
179 hypothesis.settings.register_profile('ci', ci_settings)
192 hypothesis.settings.register_profile("ci", ci_settings)
180 193
181 194 expensive_settings = hypothesis.settings(deadline=None, max_examples=10000)
182 hypothesis.settings.register_profile('expensive', expensive_settings)
195 hypothesis.settings.register_profile("expensive", expensive_settings)
183 196
184 hypothesis.settings.load_profile(
185 os.environ.get('HYPOTHESIS_PROFILE', 'default'))
197 hypothesis.settings.load_profile(os.environ.get("HYPOTHESIS_PROFILE", "default"))
@@ -1,135 +1,146 b''
1 1 import struct
2 2 import unittest
3 3
4 4 import zstandard as zstd
5 5
6 ss = struct.Struct('=QQ')
6 from .common import TestCase
7
8 ss = struct.Struct("=QQ")
7 9
8 10
9 class TestBufferWithSegments(unittest.TestCase):
11 class TestBufferWithSegments(TestCase):
10 12 def test_arguments(self):
11 if not hasattr(zstd, 'BufferWithSegments'):
12 self.skipTest('BufferWithSegments not available')
13 if not hasattr(zstd, "BufferWithSegments"):
14 self.skipTest("BufferWithSegments not available")
13 15
14 16 with self.assertRaises(TypeError):
15 17 zstd.BufferWithSegments()
16 18
17 19 with self.assertRaises(TypeError):
18 zstd.BufferWithSegments(b'foo')
20 zstd.BufferWithSegments(b"foo")
19 21
20 22 # Segments data should be a multiple of 16.
21 with self.assertRaisesRegexp(ValueError, 'segments array size is not a multiple of 16'):
22 zstd.BufferWithSegments(b'foo', b'\x00\x00')
23 with self.assertRaisesRegex(
24 ValueError, "segments array size is not a multiple of 16"
25 ):
26 zstd.BufferWithSegments(b"foo", b"\x00\x00")
23 27
24 28 def test_invalid_offset(self):
25 if not hasattr(zstd, 'BufferWithSegments'):
26 self.skipTest('BufferWithSegments not available')
29 if not hasattr(zstd, "BufferWithSegments"):
30 self.skipTest("BufferWithSegments not available")
27 31
28 with self.assertRaisesRegexp(ValueError, 'offset within segments array references memory'):
29 zstd.BufferWithSegments(b'foo', ss.pack(0, 4))
32 with self.assertRaisesRegex(
33 ValueError, "offset within segments array references memory"
34 ):
35 zstd.BufferWithSegments(b"foo", ss.pack(0, 4))
30 36
31 37 def test_invalid_getitem(self):
32 if not hasattr(zstd, 'BufferWithSegments'):
33 self.skipTest('BufferWithSegments not available')
38 if not hasattr(zstd, "BufferWithSegments"):
39 self.skipTest("BufferWithSegments not available")
34 40
35 b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
41 b = zstd.BufferWithSegments(b"foo", ss.pack(0, 3))
36 42
37 with self.assertRaisesRegexp(IndexError, 'offset must be non-negative'):
43 with self.assertRaisesRegex(IndexError, "offset must be non-negative"):
38 44 test = b[-10]
39 45
40 with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'):
46 with self.assertRaisesRegex(IndexError, "offset must be less than 1"):
41 47 test = b[1]
42 48
43 with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'):
49 with self.assertRaisesRegex(IndexError, "offset must be less than 1"):
44 50 test = b[2]
45 51
46 52 def test_single(self):
47 if not hasattr(zstd, 'BufferWithSegments'):
48 self.skipTest('BufferWithSegments not available')
53 if not hasattr(zstd, "BufferWithSegments"):
54 self.skipTest("BufferWithSegments not available")
49 55
50 b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
56 b = zstd.BufferWithSegments(b"foo", ss.pack(0, 3))
51 57 self.assertEqual(len(b), 1)
52 58 self.assertEqual(b.size, 3)
53 self.assertEqual(b.tobytes(), b'foo')
59 self.assertEqual(b.tobytes(), b"foo")
54 60
55 61 self.assertEqual(len(b[0]), 3)
56 62 self.assertEqual(b[0].offset, 0)
57 self.assertEqual(b[0].tobytes(), b'foo')
63 self.assertEqual(b[0].tobytes(), b"foo")
58 64
59 65 def test_multiple(self):
60 if not hasattr(zstd, 'BufferWithSegments'):
61 self.skipTest('BufferWithSegments not available')
66 if not hasattr(zstd, "BufferWithSegments"):
67 self.skipTest("BufferWithSegments not available")
62 68
63 b = zstd.BufferWithSegments(b'foofooxfooxy', b''.join([ss.pack(0, 3),
64 ss.pack(3, 4),
65 ss.pack(7, 5)]))
69 b = zstd.BufferWithSegments(
70 b"foofooxfooxy", b"".join([ss.pack(0, 3), ss.pack(3, 4), ss.pack(7, 5)])
71 )
66 72 self.assertEqual(len(b), 3)
67 73 self.assertEqual(b.size, 12)
68 self.assertEqual(b.tobytes(), b'foofooxfooxy')
74 self.assertEqual(b.tobytes(), b"foofooxfooxy")
69 75
70 self.assertEqual(b[0].tobytes(), b'foo')
71 self.assertEqual(b[1].tobytes(), b'foox')
72 self.assertEqual(b[2].tobytes(), b'fooxy')
76 self.assertEqual(b[0].tobytes(), b"foo")
77 self.assertEqual(b[1].tobytes(), b"foox")
78 self.assertEqual(b[2].tobytes(), b"fooxy")
73 79
74 80
75 class TestBufferWithSegmentsCollection(unittest.TestCase):
81 class TestBufferWithSegmentsCollection(TestCase):
76 82 def test_empty_constructor(self):
77 if not hasattr(zstd, 'BufferWithSegmentsCollection'):
78 self.skipTest('BufferWithSegmentsCollection not available')
83 if not hasattr(zstd, "BufferWithSegmentsCollection"):
84 self.skipTest("BufferWithSegmentsCollection not available")
79 85
80 with self.assertRaisesRegexp(ValueError, 'must pass at least 1 argument'):
86 with self.assertRaisesRegex(ValueError, "must pass at least 1 argument"):
81 87 zstd.BufferWithSegmentsCollection()
82 88
83 89 def test_argument_validation(self):
84 if not hasattr(zstd, 'BufferWithSegmentsCollection'):
85 self.skipTest('BufferWithSegmentsCollection not available')
90 if not hasattr(zstd, "BufferWithSegmentsCollection"):
91 self.skipTest("BufferWithSegmentsCollection not available")
86 92
87 with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'):
93 with self.assertRaisesRegex(TypeError, "arguments must be BufferWithSegments"):
88 94 zstd.BufferWithSegmentsCollection(None)
89 95
90 with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'):
91 zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'foo', ss.pack(0, 3)),
92 None)
96 with self.assertRaisesRegex(TypeError, "arguments must be BufferWithSegments"):
97 zstd.BufferWithSegmentsCollection(
98 zstd.BufferWithSegments(b"foo", ss.pack(0, 3)), None
99 )
93 100
94 with self.assertRaisesRegexp(ValueError, 'ZstdBufferWithSegments cannot be empty'):
95 zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'', b''))
101 with self.assertRaisesRegex(
102 ValueError, "ZstdBufferWithSegments cannot be empty"
103 ):
104 zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b"", b""))
96 105
97 106 def test_length(self):
98 if not hasattr(zstd, 'BufferWithSegmentsCollection'):
99 self.skipTest('BufferWithSegmentsCollection not available')
107 if not hasattr(zstd, "BufferWithSegmentsCollection"):
108 self.skipTest("BufferWithSegmentsCollection not available")
100 109
101 b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
102 b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3),
103 ss.pack(3, 3)]))
110 b1 = zstd.BufferWithSegments(b"foo", ss.pack(0, 3))
111 b2 = zstd.BufferWithSegments(
112 b"barbaz", b"".join([ss.pack(0, 3), ss.pack(3, 3)])
113 )
104 114
105 115 c = zstd.BufferWithSegmentsCollection(b1)
106 116 self.assertEqual(len(c), 1)
107 117 self.assertEqual(c.size(), 3)
108 118
109 119 c = zstd.BufferWithSegmentsCollection(b2)
110 120 self.assertEqual(len(c), 2)
111 121 self.assertEqual(c.size(), 6)
112 122
113 123 c = zstd.BufferWithSegmentsCollection(b1, b2)
114 124 self.assertEqual(len(c), 3)
115 125 self.assertEqual(c.size(), 9)
116 126
117 127 def test_getitem(self):
118 if not hasattr(zstd, 'BufferWithSegmentsCollection'):
119 self.skipTest('BufferWithSegmentsCollection not available')
128 if not hasattr(zstd, "BufferWithSegmentsCollection"):
129 self.skipTest("BufferWithSegmentsCollection not available")
120 130
121 b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
122 b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3),
123 ss.pack(3, 3)]))
131 b1 = zstd.BufferWithSegments(b"foo", ss.pack(0, 3))
132 b2 = zstd.BufferWithSegments(
133 b"barbaz", b"".join([ss.pack(0, 3), ss.pack(3, 3)])
134 )
124 135
125 136 c = zstd.BufferWithSegmentsCollection(b1, b2)
126 137
127 with self.assertRaisesRegexp(IndexError, 'offset must be less than 3'):
138 with self.assertRaisesRegex(IndexError, "offset must be less than 3"):
128 139 c[3]
129 140
130 with self.assertRaisesRegexp(IndexError, 'offset must be less than 3'):
141 with self.assertRaisesRegex(IndexError, "offset must be less than 3"):
131 142 c[4]
132 143
133 self.assertEqual(c[0].tobytes(), b'foo')
134 self.assertEqual(c[1].tobytes(), b'bar')
135 self.assertEqual(c[2].tobytes(), b'baz')
144 self.assertEqual(c[0].tobytes(), b"foo")
145 self.assertEqual(c[1].tobytes(), b"bar")
146 self.assertEqual(c[2].tobytes(), b"baz")
This diff has been collapsed as it changes many lines, (811 lines changed) Show them Hide them
@@ -1,1735 +1,1770 b''
1 1 import hashlib
2 2 import io
3 3 import os
4 4 import struct
5 5 import sys
6 6 import tarfile
7 7 import tempfile
8 8 import unittest
9 9
10 10 import zstandard as zstd
11 11
12 12 from .common import (
13 13 make_cffi,
14 14 NonClosingBytesIO,
15 15 OpCountingBytesIO,
16 TestCase,
16 17 )
17 18
18 19
19 20 if sys.version_info[0] >= 3:
20 21 next = lambda it: it.__next__()
21 22 else:
22 23 next = lambda it: it.next()
23 24
24 25
25 26 def multithreaded_chunk_size(level, source_size=0):
26 params = zstd.ZstdCompressionParameters.from_level(level,
27 source_size=source_size)
27 params = zstd.ZstdCompressionParameters.from_level(level, source_size=source_size)
28 28
29 29 return 1 << (params.window_log + 2)
30 30
31 31
32 32 @make_cffi
33 class TestCompressor(unittest.TestCase):
33 class TestCompressor(TestCase):
34 34 def test_level_bounds(self):
35 35 with self.assertRaises(ValueError):
36 36 zstd.ZstdCompressor(level=23)
37 37
38 38 def test_memory_size(self):
39 39 cctx = zstd.ZstdCompressor(level=1)
40 40 self.assertGreater(cctx.memory_size(), 100)
41 41
42 42
43 43 @make_cffi
44 class TestCompressor_compress(unittest.TestCase):
44 class TestCompressor_compress(TestCase):
45 45 def test_compress_empty(self):
46 46 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
47 result = cctx.compress(b'')
48 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
47 result = cctx.compress(b"")
48 self.assertEqual(result, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00")
49 49 params = zstd.get_frame_parameters(result)
50 50 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
51 51 self.assertEqual(params.window_size, 524288)
52 52 self.assertEqual(params.dict_id, 0)
53 53 self.assertFalse(params.has_checksum, 0)
54 54
55 55 cctx = zstd.ZstdCompressor()
56 result = cctx.compress(b'')
57 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x20\x00\x01\x00\x00')
56 result = cctx.compress(b"")
57 self.assertEqual(result, b"\x28\xb5\x2f\xfd\x20\x00\x01\x00\x00")
58 58 params = zstd.get_frame_parameters(result)
59 59 self.assertEqual(params.content_size, 0)
60 60
61 61 def test_input_types(self):
62 62 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
63 expected = b'\x28\xb5\x2f\xfd\x00\x00\x19\x00\x00\x66\x6f\x6f'
63 expected = b"\x28\xb5\x2f\xfd\x00\x00\x19\x00\x00\x66\x6f\x6f"
64 64
65 65 mutable_array = bytearray(3)
66 mutable_array[:] = b'foo'
66 mutable_array[:] = b"foo"
67 67
68 68 sources = [
69 memoryview(b'foo'),
70 bytearray(b'foo'),
69 memoryview(b"foo"),
70 bytearray(b"foo"),
71 71 mutable_array,
72 72 ]
73 73
74 74 for source in sources:
75 75 self.assertEqual(cctx.compress(source), expected)
76 76
77 77 def test_compress_large(self):
78 78 chunks = []
79 79 for i in range(255):
80 chunks.append(struct.Struct('>B').pack(i) * 16384)
80 chunks.append(struct.Struct(">B").pack(i) * 16384)
81 81
82 82 cctx = zstd.ZstdCompressor(level=3, write_content_size=False)
83 result = cctx.compress(b''.join(chunks))
83 result = cctx.compress(b"".join(chunks))
84 84 self.assertEqual(len(result), 999)
85 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
85 self.assertEqual(result[0:4], b"\x28\xb5\x2f\xfd")
86 86
87 87 # This matches the test for read_to_iter() below.
88 88 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
89 result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o')
90 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00'
91 b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0'
92 b'\x02\x09\x00\x00\x6f')
89 result = cctx.compress(b"f" * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b"o")
90 self.assertEqual(
91 result,
92 b"\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00"
93 b"\x10\x66\x66\x01\x00\xfb\xff\x39\xc0"
94 b"\x02\x09\x00\x00\x6f",
95 )
93 96
94 97 def test_negative_level(self):
95 98 cctx = zstd.ZstdCompressor(level=-4)
96 result = cctx.compress(b'foo' * 256)
99 result = cctx.compress(b"foo" * 256)
97 100
98 101 def test_no_magic(self):
99 params = zstd.ZstdCompressionParameters.from_level(
100 1, format=zstd.FORMAT_ZSTD1)
102 params = zstd.ZstdCompressionParameters.from_level(1, format=zstd.FORMAT_ZSTD1)
101 103 cctx = zstd.ZstdCompressor(compression_params=params)
102 magic = cctx.compress(b'foobar')
104 magic = cctx.compress(b"foobar")
103 105
104 106 params = zstd.ZstdCompressionParameters.from_level(
105 1, format=zstd.FORMAT_ZSTD1_MAGICLESS)
107 1, format=zstd.FORMAT_ZSTD1_MAGICLESS
108 )
106 109 cctx = zstd.ZstdCompressor(compression_params=params)
107 no_magic = cctx.compress(b'foobar')
110 no_magic = cctx.compress(b"foobar")
108 111
109 self.assertEqual(magic[0:4], b'\x28\xb5\x2f\xfd')
112 self.assertEqual(magic[0:4], b"\x28\xb5\x2f\xfd")
110 113 self.assertEqual(magic[4:], no_magic)
111 114
112 115 def test_write_checksum(self):
113 116 cctx = zstd.ZstdCompressor(level=1)
114 no_checksum = cctx.compress(b'foobar')
117 no_checksum = cctx.compress(b"foobar")
115 118 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
116 with_checksum = cctx.compress(b'foobar')
119 with_checksum = cctx.compress(b"foobar")
117 120
118 121 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
119 122
120 123 no_params = zstd.get_frame_parameters(no_checksum)
121 124 with_params = zstd.get_frame_parameters(with_checksum)
122 125
123 126 self.assertFalse(no_params.has_checksum)
124 127 self.assertTrue(with_params.has_checksum)
125 128
126 129 def test_write_content_size(self):
127 130 cctx = zstd.ZstdCompressor(level=1)
128 with_size = cctx.compress(b'foobar' * 256)
131 with_size = cctx.compress(b"foobar" * 256)
129 132 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
130 no_size = cctx.compress(b'foobar' * 256)
133 no_size = cctx.compress(b"foobar" * 256)
131 134
132 135 self.assertEqual(len(with_size), len(no_size) + 1)
133 136
134 137 no_params = zstd.get_frame_parameters(no_size)
135 138 with_params = zstd.get_frame_parameters(with_size)
136 139 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
137 140 self.assertEqual(with_params.content_size, 1536)
138 141
139 142 def test_no_dict_id(self):
140 143 samples = []
141 144 for i in range(128):
142 samples.append(b'foo' * 64)
143 samples.append(b'bar' * 64)
144 samples.append(b'foobar' * 64)
145 samples.append(b"foo" * 64)
146 samples.append(b"bar" * 64)
147 samples.append(b"foobar" * 64)
145 148
146 149 d = zstd.train_dictionary(1024, samples)
147 150
148 151 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
149 with_dict_id = cctx.compress(b'foobarfoobar')
152 with_dict_id = cctx.compress(b"foobarfoobar")
150 153
151 154 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
152 no_dict_id = cctx.compress(b'foobarfoobar')
155 no_dict_id = cctx.compress(b"foobarfoobar")
153 156
154 157 self.assertEqual(len(with_dict_id), len(no_dict_id) + 4)
155 158
156 159 no_params = zstd.get_frame_parameters(no_dict_id)
157 160 with_params = zstd.get_frame_parameters(with_dict_id)
158 161 self.assertEqual(no_params.dict_id, 0)
159 162 self.assertEqual(with_params.dict_id, 1880053135)
160 163
161 164 def test_compress_dict_multiple(self):
162 165 samples = []
163 166 for i in range(128):
164 samples.append(b'foo' * 64)
165 samples.append(b'bar' * 64)
166 samples.append(b'foobar' * 64)
167 samples.append(b"foo" * 64)
168 samples.append(b"bar" * 64)
169 samples.append(b"foobar" * 64)
167 170
168 171 d = zstd.train_dictionary(8192, samples)
169 172
170 173 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
171 174
172 175 for i in range(32):
173 cctx.compress(b'foo bar foobar foo bar foobar')
176 cctx.compress(b"foo bar foobar foo bar foobar")
174 177
175 178 def test_dict_precompute(self):
176 179 samples = []
177 180 for i in range(128):
178 samples.append(b'foo' * 64)
179 samples.append(b'bar' * 64)
180 samples.append(b'foobar' * 64)
181 samples.append(b"foo" * 64)
182 samples.append(b"bar" * 64)
183 samples.append(b"foobar" * 64)
181 184
182 185 d = zstd.train_dictionary(8192, samples)
183 186 d.precompute_compress(level=1)
184 187
185 188 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
186 189
187 190 for i in range(32):
188 cctx.compress(b'foo bar foobar foo bar foobar')
191 cctx.compress(b"foo bar foobar foo bar foobar")
189 192
190 193 def test_multithreaded(self):
191 194 chunk_size = multithreaded_chunk_size(1)
192 source = b''.join([b'x' * chunk_size, b'y' * chunk_size])
195 source = b"".join([b"x" * chunk_size, b"y" * chunk_size])
193 196
194 197 cctx = zstd.ZstdCompressor(level=1, threads=2)
195 198 compressed = cctx.compress(source)
196 199
197 200 params = zstd.get_frame_parameters(compressed)
198 201 self.assertEqual(params.content_size, chunk_size * 2)
199 202 self.assertEqual(params.dict_id, 0)
200 203 self.assertFalse(params.has_checksum)
201 204
202 205 dctx = zstd.ZstdDecompressor()
203 206 self.assertEqual(dctx.decompress(compressed), source)
204 207
205 208 def test_multithreaded_dict(self):
206 209 samples = []
207 210 for i in range(128):
208 samples.append(b'foo' * 64)
209 samples.append(b'bar' * 64)
210 samples.append(b'foobar' * 64)
211 samples.append(b"foo" * 64)
212 samples.append(b"bar" * 64)
213 samples.append(b"foobar" * 64)
211 214
212 215 d = zstd.train_dictionary(1024, samples)
213 216
214 217 cctx = zstd.ZstdCompressor(dict_data=d, threads=2)
215 218
216 result = cctx.compress(b'foo')
217 params = zstd.get_frame_parameters(result);
218 self.assertEqual(params.content_size, 3);
219 result = cctx.compress(b"foo")
220 params = zstd.get_frame_parameters(result)
221 self.assertEqual(params.content_size, 3)
219 222 self.assertEqual(params.dict_id, d.dict_id())
220 223
221 self.assertEqual(result,
222 b'\x28\xb5\x2f\xfd\x23\x8f\x55\x0f\x70\x03\x19\x00\x00'
223 b'\x66\x6f\x6f')
224 self.assertEqual(
225 result,
226 b"\x28\xb5\x2f\xfd\x23\x8f\x55\x0f\x70\x03\x19\x00\x00" b"\x66\x6f\x6f",
227 )
224 228
225 229 def test_multithreaded_compression_params(self):
226 230 params = zstd.ZstdCompressionParameters.from_level(0, threads=2)
227 231 cctx = zstd.ZstdCompressor(compression_params=params)
228 232
229 result = cctx.compress(b'foo')
230 params = zstd.get_frame_parameters(result);
231 self.assertEqual(params.content_size, 3);
233 result = cctx.compress(b"foo")
234 params = zstd.get_frame_parameters(result)
235 self.assertEqual(params.content_size, 3)
232 236
233 self.assertEqual(result,
234 b'\x28\xb5\x2f\xfd\x20\x03\x19\x00\x00\x66\x6f\x6f')
237 self.assertEqual(result, b"\x28\xb5\x2f\xfd\x20\x03\x19\x00\x00\x66\x6f\x6f")
235 238
236 239
237 240 @make_cffi
238 class TestCompressor_compressobj(unittest.TestCase):
241 class TestCompressor_compressobj(TestCase):
239 242 def test_compressobj_empty(self):
240 243 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
241 244 cobj = cctx.compressobj()
242 self.assertEqual(cobj.compress(b''), b'')
243 self.assertEqual(cobj.flush(),
244 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
245 self.assertEqual(cobj.compress(b""), b"")
246 self.assertEqual(cobj.flush(), b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00")
245 247
246 248 def test_input_types(self):
247 expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f'
249 expected = b"\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f"
248 250 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
249 251
250 252 mutable_array = bytearray(3)
251 mutable_array[:] = b'foo'
253 mutable_array[:] = b"foo"
252 254
253 255 sources = [
254 memoryview(b'foo'),
255 bytearray(b'foo'),
256 memoryview(b"foo"),
257 bytearray(b"foo"),
256 258 mutable_array,
257 259 ]
258 260
259 261 for source in sources:
260 262 cobj = cctx.compressobj()
261 self.assertEqual(cobj.compress(source), b'')
263 self.assertEqual(cobj.compress(source), b"")
262 264 self.assertEqual(cobj.flush(), expected)
263 265
264 266 def test_compressobj_large(self):
265 267 chunks = []
266 268 for i in range(255):
267 chunks.append(struct.Struct('>B').pack(i) * 16384)
269 chunks.append(struct.Struct(">B").pack(i) * 16384)
268 270
269 271 cctx = zstd.ZstdCompressor(level=3)
270 272 cobj = cctx.compressobj()
271 273
272 result = cobj.compress(b''.join(chunks)) + cobj.flush()
274 result = cobj.compress(b"".join(chunks)) + cobj.flush()
273 275 self.assertEqual(len(result), 999)
274 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
276 self.assertEqual(result[0:4], b"\x28\xb5\x2f\xfd")
275 277
276 278 params = zstd.get_frame_parameters(result)
277 279 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
278 280 self.assertEqual(params.window_size, 2097152)
279 281 self.assertEqual(params.dict_id, 0)
280 282 self.assertFalse(params.has_checksum)
281 283
282 284 def test_write_checksum(self):
283 285 cctx = zstd.ZstdCompressor(level=1)
284 286 cobj = cctx.compressobj()
285 no_checksum = cobj.compress(b'foobar') + cobj.flush()
287 no_checksum = cobj.compress(b"foobar") + cobj.flush()
286 288 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
287 289 cobj = cctx.compressobj()
288 with_checksum = cobj.compress(b'foobar') + cobj.flush()
290 with_checksum = cobj.compress(b"foobar") + cobj.flush()
289 291
290 292 no_params = zstd.get_frame_parameters(no_checksum)
291 293 with_params = zstd.get_frame_parameters(with_checksum)
292 294 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
293 295 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
294 296 self.assertEqual(no_params.dict_id, 0)
295 297 self.assertEqual(with_params.dict_id, 0)
296 298 self.assertFalse(no_params.has_checksum)
297 299 self.assertTrue(with_params.has_checksum)
298 300
299 301 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
300 302
301 303 def test_write_content_size(self):
302 304 cctx = zstd.ZstdCompressor(level=1)
303 cobj = cctx.compressobj(size=len(b'foobar' * 256))
304 with_size = cobj.compress(b'foobar' * 256) + cobj.flush()
305 cobj = cctx.compressobj(size=len(b"foobar" * 256))
306 with_size = cobj.compress(b"foobar" * 256) + cobj.flush()
305 307 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
306 cobj = cctx.compressobj(size=len(b'foobar' * 256))
307 no_size = cobj.compress(b'foobar' * 256) + cobj.flush()
308 cobj = cctx.compressobj(size=len(b"foobar" * 256))
309 no_size = cobj.compress(b"foobar" * 256) + cobj.flush()
308 310
309 311 no_params = zstd.get_frame_parameters(no_size)
310 312 with_params = zstd.get_frame_parameters(with_size)
311 313 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
312 314 self.assertEqual(with_params.content_size, 1536)
313 315 self.assertEqual(no_params.dict_id, 0)
314 316 self.assertEqual(with_params.dict_id, 0)
315 317 self.assertFalse(no_params.has_checksum)
316 318 self.assertFalse(with_params.has_checksum)
317 319
318 320 self.assertEqual(len(with_size), len(no_size) + 1)
319 321
320 322 def test_compress_after_finished(self):
321 323 cctx = zstd.ZstdCompressor()
322 324 cobj = cctx.compressobj()
323 325
324 cobj.compress(b'foo')
326 cobj.compress(b"foo")
325 327 cobj.flush()
326 328
327 with self.assertRaisesRegexp(zstd.ZstdError, r'cannot call compress\(\) after compressor'):
328 cobj.compress(b'foo')
329 with self.assertRaisesRegex(
330 zstd.ZstdError, r"cannot call compress\(\) after compressor"
331 ):
332 cobj.compress(b"foo")
329 333
330 with self.assertRaisesRegexp(zstd.ZstdError, 'compressor object already finished'):
334 with self.assertRaisesRegex(
335 zstd.ZstdError, "compressor object already finished"
336 ):
331 337 cobj.flush()
332 338
333 339 def test_flush_block_repeated(self):
334 340 cctx = zstd.ZstdCompressor(level=1)
335 341 cobj = cctx.compressobj()
336 342
337 self.assertEqual(cobj.compress(b'foo'), b'')
338 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
339 b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo')
340 self.assertEqual(cobj.compress(b'bar'), b'')
343 self.assertEqual(cobj.compress(b"foo"), b"")
344 self.assertEqual(
345 cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
346 b"\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo",
347 )
348 self.assertEqual(cobj.compress(b"bar"), b"")
341 349 # 3 byte header plus content.
342 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
343 b'\x18\x00\x00bar')
344 self.assertEqual(cobj.flush(), b'\x01\x00\x00')
350 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b"\x18\x00\x00bar")
351 self.assertEqual(cobj.flush(), b"\x01\x00\x00")
345 352
346 353 def test_flush_empty_block(self):
347 354 cctx = zstd.ZstdCompressor(write_checksum=True)
348 355 cobj = cctx.compressobj()
349 356
350 cobj.compress(b'foobar')
357 cobj.compress(b"foobar")
351 358 cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
352 359 # No-op if no block is active (this is internal to zstd).
353 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b'')
360 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b"")
354 361
355 362 trailing = cobj.flush()
356 363 # 3 bytes block header + 4 bytes frame checksum
357 364 self.assertEqual(len(trailing), 7)
358 365 header = trailing[0:3]
359 self.assertEqual(header, b'\x01\x00\x00')
366 self.assertEqual(header, b"\x01\x00\x00")
360 367
361 368 def test_multithreaded(self):
362 369 source = io.BytesIO()
363 source.write(b'a' * 1048576)
364 source.write(b'b' * 1048576)
365 source.write(b'c' * 1048576)
370 source.write(b"a" * 1048576)
371 source.write(b"b" * 1048576)
372 source.write(b"c" * 1048576)
366 373 source.seek(0)
367 374
368 375 cctx = zstd.ZstdCompressor(level=1, threads=2)
369 376 cobj = cctx.compressobj()
370 377
371 378 chunks = []
372 379 while True:
373 380 d = source.read(8192)
374 381 if not d:
375 382 break
376 383
377 384 chunks.append(cobj.compress(d))
378 385
379 386 chunks.append(cobj.flush())
380 387
381 compressed = b''.join(chunks)
388 compressed = b"".join(chunks)
382 389
383 self.assertEqual(len(compressed), 295)
390 self.assertEqual(len(compressed), 119)
384 391
385 392 def test_frame_progression(self):
386 393 cctx = zstd.ZstdCompressor()
387 394
388 395 self.assertEqual(cctx.frame_progression(), (0, 0, 0))
389 396
390 397 cobj = cctx.compressobj()
391 398
392 cobj.compress(b'foobar')
399 cobj.compress(b"foobar")
393 400 self.assertEqual(cctx.frame_progression(), (6, 0, 0))
394 401
395 402 cobj.flush()
396 403 self.assertEqual(cctx.frame_progression(), (6, 6, 15))
397 404
398 405 def test_bad_size(self):
399 406 cctx = zstd.ZstdCompressor()
400 407
401 408 cobj = cctx.compressobj(size=2)
402 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
403 cobj.compress(b'foo')
409 with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"):
410 cobj.compress(b"foo")
404 411
405 412 # Try another operation on this instance.
406 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
407 cobj.compress(b'aa')
413 with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"):
414 cobj.compress(b"aa")
408 415
409 416 # Try another operation on the compressor.
410 417 cctx.compressobj(size=4)
411 cctx.compress(b'foobar')
418 cctx.compress(b"foobar")
412 419
413 420
414 421 @make_cffi
415 class TestCompressor_copy_stream(unittest.TestCase):
422 class TestCompressor_copy_stream(TestCase):
416 423 def test_no_read(self):
417 424 source = object()
418 425 dest = io.BytesIO()
419 426
420 427 cctx = zstd.ZstdCompressor()
421 428 with self.assertRaises(ValueError):
422 429 cctx.copy_stream(source, dest)
423 430
424 431 def test_no_write(self):
425 432 source = io.BytesIO()
426 433 dest = object()
427 434
428 435 cctx = zstd.ZstdCompressor()
429 436 with self.assertRaises(ValueError):
430 437 cctx.copy_stream(source, dest)
431 438
432 439 def test_empty(self):
433 440 source = io.BytesIO()
434 441 dest = io.BytesIO()
435 442
436 443 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
437 444 r, w = cctx.copy_stream(source, dest)
438 445 self.assertEqual(int(r), 0)
439 446 self.assertEqual(w, 9)
440 447
441 self.assertEqual(dest.getvalue(),
442 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
448 self.assertEqual(dest.getvalue(), b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00")
443 449
444 450 def test_large_data(self):
445 451 source = io.BytesIO()
446 452 for i in range(255):
447 source.write(struct.Struct('>B').pack(i) * 16384)
453 source.write(struct.Struct(">B").pack(i) * 16384)
448 454 source.seek(0)
449 455
450 456 dest = io.BytesIO()
451 457 cctx = zstd.ZstdCompressor()
452 458 r, w = cctx.copy_stream(source, dest)
453 459
454 460 self.assertEqual(r, 255 * 16384)
455 461 self.assertEqual(w, 999)
456 462
457 463 params = zstd.get_frame_parameters(dest.getvalue())
458 464 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
459 465 self.assertEqual(params.window_size, 2097152)
460 466 self.assertEqual(params.dict_id, 0)
461 467 self.assertFalse(params.has_checksum)
462 468
463 469 def test_write_checksum(self):
464 source = io.BytesIO(b'foobar')
470 source = io.BytesIO(b"foobar")
465 471 no_checksum = io.BytesIO()
466 472
467 473 cctx = zstd.ZstdCompressor(level=1)
468 474 cctx.copy_stream(source, no_checksum)
469 475
470 476 source.seek(0)
471 477 with_checksum = io.BytesIO()
472 478 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
473 479 cctx.copy_stream(source, with_checksum)
474 480
475 self.assertEqual(len(with_checksum.getvalue()),
476 len(no_checksum.getvalue()) + 4)
481 self.assertEqual(len(with_checksum.getvalue()), len(no_checksum.getvalue()) + 4)
477 482
478 483 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
479 484 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
480 485 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
481 486 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
482 487 self.assertEqual(no_params.dict_id, 0)
483 488 self.assertEqual(with_params.dict_id, 0)
484 489 self.assertFalse(no_params.has_checksum)
485 490 self.assertTrue(with_params.has_checksum)
486 491
487 492 def test_write_content_size(self):
488 source = io.BytesIO(b'foobar' * 256)
493 source = io.BytesIO(b"foobar" * 256)
489 494 no_size = io.BytesIO()
490 495
491 496 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
492 497 cctx.copy_stream(source, no_size)
493 498
494 499 source.seek(0)
495 500 with_size = io.BytesIO()
496 501 cctx = zstd.ZstdCompressor(level=1)
497 502 cctx.copy_stream(source, with_size)
498 503
499 504 # Source content size is unknown, so no content size written.
500 self.assertEqual(len(with_size.getvalue()),
501 len(no_size.getvalue()))
505 self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()))
502 506
503 507 source.seek(0)
504 508 with_size = io.BytesIO()
505 509 cctx.copy_stream(source, with_size, size=len(source.getvalue()))
506 510
507 511 # We specified source size, so content size header is present.
508 self.assertEqual(len(with_size.getvalue()),
509 len(no_size.getvalue()) + 1)
512 self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()) + 1)
510 513
511 514 no_params = zstd.get_frame_parameters(no_size.getvalue())
512 515 with_params = zstd.get_frame_parameters(with_size.getvalue())
513 516 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
514 517 self.assertEqual(with_params.content_size, 1536)
515 518 self.assertEqual(no_params.dict_id, 0)
516 519 self.assertEqual(with_params.dict_id, 0)
517 520 self.assertFalse(no_params.has_checksum)
518 521 self.assertFalse(with_params.has_checksum)
519 522
520 523 def test_read_write_size(self):
521 source = OpCountingBytesIO(b'foobarfoobar')
524 source = OpCountingBytesIO(b"foobarfoobar")
522 525 dest = OpCountingBytesIO()
523 526 cctx = zstd.ZstdCompressor()
524 527 r, w = cctx.copy_stream(source, dest, read_size=1, write_size=1)
525 528
526 529 self.assertEqual(r, len(source.getvalue()))
527 530 self.assertEqual(w, 21)
528 531 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
529 532 self.assertEqual(dest._write_count, len(dest.getvalue()))
530 533
531 534 def test_multithreaded(self):
532 535 source = io.BytesIO()
533 source.write(b'a' * 1048576)
534 source.write(b'b' * 1048576)
535 source.write(b'c' * 1048576)
536 source.write(b"a" * 1048576)
537 source.write(b"b" * 1048576)
538 source.write(b"c" * 1048576)
536 539 source.seek(0)
537 540
538 541 dest = io.BytesIO()
539 542 cctx = zstd.ZstdCompressor(threads=2, write_content_size=False)
540 543 r, w = cctx.copy_stream(source, dest)
541 544 self.assertEqual(r, 3145728)
542 self.assertEqual(w, 295)
545 self.assertEqual(w, 111)
543 546
544 547 params = zstd.get_frame_parameters(dest.getvalue())
545 548 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
546 549 self.assertEqual(params.dict_id, 0)
547 550 self.assertFalse(params.has_checksum)
548 551
549 552 # Writing content size and checksum works.
550 553 cctx = zstd.ZstdCompressor(threads=2, write_checksum=True)
551 554 dest = io.BytesIO()
552 555 source.seek(0)
553 556 cctx.copy_stream(source, dest, size=len(source.getvalue()))
554 557
555 558 params = zstd.get_frame_parameters(dest.getvalue())
556 559 self.assertEqual(params.content_size, 3145728)
557 560 self.assertEqual(params.dict_id, 0)
558 561 self.assertTrue(params.has_checksum)
559 562
560 563 def test_bad_size(self):
561 564 source = io.BytesIO()
562 source.write(b'a' * 32768)
563 source.write(b'b' * 32768)
565 source.write(b"a" * 32768)
566 source.write(b"b" * 32768)
564 567 source.seek(0)
565 568
566 569 dest = io.BytesIO()
567 570
568 571 cctx = zstd.ZstdCompressor()
569 572
570 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
573 with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"):
571 574 cctx.copy_stream(source, dest, size=42)
572 575
573 576 # Try another operation on this compressor.
574 577 source.seek(0)
575 578 dest = io.BytesIO()
576 579 cctx.copy_stream(source, dest)
577 580
578 581
579 582 @make_cffi
580 class TestCompressor_stream_reader(unittest.TestCase):
583 class TestCompressor_stream_reader(TestCase):
581 584 def test_context_manager(self):
582 585 cctx = zstd.ZstdCompressor()
583 586
584 with cctx.stream_reader(b'foo') as reader:
585 with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'):
587 with cctx.stream_reader(b"foo") as reader:
588 with self.assertRaisesRegex(ValueError, "cannot __enter__ multiple times"):
586 589 with reader as reader2:
587 590 pass
588 591
589 592 def test_no_context_manager(self):
590 593 cctx = zstd.ZstdCompressor()
591 594
592 reader = cctx.stream_reader(b'foo')
595 reader = cctx.stream_reader(b"foo")
593 596 reader.read(4)
594 597 self.assertFalse(reader.closed)
595 598
596 599 reader.close()
597 600 self.assertTrue(reader.closed)
598 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
601 with self.assertRaisesRegex(ValueError, "stream is closed"):
599 602 reader.read(1)
600 603
601 604 def test_not_implemented(self):
602 605 cctx = zstd.ZstdCompressor()
603 606
604 with cctx.stream_reader(b'foo' * 60) as reader:
607 with cctx.stream_reader(b"foo" * 60) as reader:
605 608 with self.assertRaises(io.UnsupportedOperation):
606 609 reader.readline()
607 610
608 611 with self.assertRaises(io.UnsupportedOperation):
609 612 reader.readlines()
610 613
611 614 with self.assertRaises(io.UnsupportedOperation):
612 615 iter(reader)
613 616
614 617 with self.assertRaises(io.UnsupportedOperation):
615 618 next(reader)
616 619
617 620 with self.assertRaises(OSError):
618 621 reader.writelines([])
619 622
620 623 with self.assertRaises(OSError):
621 reader.write(b'foo')
624 reader.write(b"foo")
622 625
623 626 def test_constant_methods(self):
624 627 cctx = zstd.ZstdCompressor()
625 628
626 with cctx.stream_reader(b'boo') as reader:
629 with cctx.stream_reader(b"boo") as reader:
627 630 self.assertTrue(reader.readable())
628 631 self.assertFalse(reader.writable())
629 632 self.assertFalse(reader.seekable())
630 633 self.assertFalse(reader.isatty())
631 634 self.assertFalse(reader.closed)
632 635 self.assertIsNone(reader.flush())
633 636 self.assertFalse(reader.closed)
634 637
635 638 self.assertTrue(reader.closed)
636 639
637 640 def test_read_closed(self):
638 641 cctx = zstd.ZstdCompressor()
639 642
640 with cctx.stream_reader(b'foo' * 60) as reader:
643 with cctx.stream_reader(b"foo" * 60) as reader:
641 644 reader.close()
642 645 self.assertTrue(reader.closed)
643 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
646 with self.assertRaisesRegex(ValueError, "stream is closed"):
644 647 reader.read(10)
645 648
646 649 def test_read_sizes(self):
647 650 cctx = zstd.ZstdCompressor()
648 foo = cctx.compress(b'foo')
651 foo = cctx.compress(b"foo")
649 652
650 with cctx.stream_reader(b'foo') as reader:
651 with self.assertRaisesRegexp(ValueError, 'cannot read negative amounts less than -1'):
653 with cctx.stream_reader(b"foo") as reader:
654 with self.assertRaisesRegex(
655 ValueError, "cannot read negative amounts less than -1"
656 ):
652 657 reader.read(-2)
653 658
654 self.assertEqual(reader.read(0), b'')
659 self.assertEqual(reader.read(0), b"")
655 660 self.assertEqual(reader.read(), foo)
656 661
657 662 def test_read_buffer(self):
658 663 cctx = zstd.ZstdCompressor()
659 664
660 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
665 source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60])
661 666 frame = cctx.compress(source)
662 667
663 668 with cctx.stream_reader(source) as reader:
664 669 self.assertEqual(reader.tell(), 0)
665 670
666 671 # We should get entire frame in one read.
667 672 result = reader.read(8192)
668 673 self.assertEqual(result, frame)
669 674 self.assertEqual(reader.tell(), len(result))
670 self.assertEqual(reader.read(), b'')
675 self.assertEqual(reader.read(), b"")
671 676 self.assertEqual(reader.tell(), len(result))
672 677
673 678 def test_read_buffer_small_chunks(self):
674 679 cctx = zstd.ZstdCompressor()
675 680
676 source = b'foo' * 60
681 source = b"foo" * 60
677 682 chunks = []
678 683
679 684 with cctx.stream_reader(source) as reader:
680 685 self.assertEqual(reader.tell(), 0)
681 686
682 687 while True:
683 688 chunk = reader.read(1)
684 689 if not chunk:
685 690 break
686 691
687 692 chunks.append(chunk)
688 693 self.assertEqual(reader.tell(), sum(map(len, chunks)))
689 694
690 self.assertEqual(b''.join(chunks), cctx.compress(source))
695 self.assertEqual(b"".join(chunks), cctx.compress(source))
691 696
692 697 def test_read_stream(self):
693 698 cctx = zstd.ZstdCompressor()
694 699
695 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
700 source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60])
696 701 frame = cctx.compress(source)
697 702
698 703 with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader:
699 704 self.assertEqual(reader.tell(), 0)
700 705
701 706 chunk = reader.read(8192)
702 707 self.assertEqual(chunk, frame)
703 708 self.assertEqual(reader.tell(), len(chunk))
704 self.assertEqual(reader.read(), b'')
709 self.assertEqual(reader.read(), b"")
705 710 self.assertEqual(reader.tell(), len(chunk))
706 711
707 712 def test_read_stream_small_chunks(self):
708 713 cctx = zstd.ZstdCompressor()
709 714
710 source = b'foo' * 60
715 source = b"foo" * 60
711 716 chunks = []
712 717
713 718 with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader:
714 719 self.assertEqual(reader.tell(), 0)
715 720
716 721 while True:
717 722 chunk = reader.read(1)
718 723 if not chunk:
719 724 break
720 725
721 726 chunks.append(chunk)
722 727 self.assertEqual(reader.tell(), sum(map(len, chunks)))
723 728
724 self.assertEqual(b''.join(chunks), cctx.compress(source))
729 self.assertEqual(b"".join(chunks), cctx.compress(source))
725 730
726 731 def test_read_after_exit(self):
727 732 cctx = zstd.ZstdCompressor()
728 733
729 with cctx.stream_reader(b'foo' * 60) as reader:
734 with cctx.stream_reader(b"foo" * 60) as reader:
730 735 while reader.read(8192):
731 736 pass
732 737
733 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
738 with self.assertRaisesRegex(ValueError, "stream is closed"):
734 739 reader.read(10)
735 740
736 741 def test_bad_size(self):
737 742 cctx = zstd.ZstdCompressor()
738 743
739 source = io.BytesIO(b'foobar')
744 source = io.BytesIO(b"foobar")
740 745
741 746 with cctx.stream_reader(source, size=2) as reader:
742 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
747 with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"):
743 748 reader.read(10)
744 749
745 750 # Try another compression operation.
746 751 with cctx.stream_reader(source, size=42):
747 752 pass
748 753
749 754 def test_readall(self):
750 755 cctx = zstd.ZstdCompressor()
751 frame = cctx.compress(b'foo' * 1024)
756 frame = cctx.compress(b"foo" * 1024)
752 757
753 reader = cctx.stream_reader(b'foo' * 1024)
758 reader = cctx.stream_reader(b"foo" * 1024)
754 759 self.assertEqual(reader.readall(), frame)
755 760
756 761 def test_readinto(self):
757 762 cctx = zstd.ZstdCompressor()
758 foo = cctx.compress(b'foo')
763 foo = cctx.compress(b"foo")
759 764
760 reader = cctx.stream_reader(b'foo')
765 reader = cctx.stream_reader(b"foo")
761 766 with self.assertRaises(Exception):
762 reader.readinto(b'foobar')
767 reader.readinto(b"foobar")
763 768
764 769 # readinto() with sufficiently large destination.
765 770 b = bytearray(1024)
766 reader = cctx.stream_reader(b'foo')
771 reader = cctx.stream_reader(b"foo")
767 772 self.assertEqual(reader.readinto(b), len(foo))
768 self.assertEqual(b[0:len(foo)], foo)
773 self.assertEqual(b[0 : len(foo)], foo)
769 774 self.assertEqual(reader.readinto(b), 0)
770 self.assertEqual(b[0:len(foo)], foo)
775 self.assertEqual(b[0 : len(foo)], foo)
771 776
772 777 # readinto() with small reads.
773 778 b = bytearray(1024)
774 reader = cctx.stream_reader(b'foo', read_size=1)
779 reader = cctx.stream_reader(b"foo", read_size=1)
775 780 self.assertEqual(reader.readinto(b), len(foo))
776 self.assertEqual(b[0:len(foo)], foo)
781 self.assertEqual(b[0 : len(foo)], foo)
777 782
778 783 # Too small destination buffer.
779 784 b = bytearray(2)
780 reader = cctx.stream_reader(b'foo')
785 reader = cctx.stream_reader(b"foo")
781 786 self.assertEqual(reader.readinto(b), 2)
782 787 self.assertEqual(b[:], foo[0:2])
783 788 self.assertEqual(reader.readinto(b), 2)
784 789 self.assertEqual(b[:], foo[2:4])
785 790 self.assertEqual(reader.readinto(b), 2)
786 791 self.assertEqual(b[:], foo[4:6])
787 792
788 793 def test_readinto1(self):
789 794 cctx = zstd.ZstdCompressor()
790 foo = b''.join(cctx.read_to_iter(io.BytesIO(b'foo')))
795 foo = b"".join(cctx.read_to_iter(io.BytesIO(b"foo")))
791 796
792 reader = cctx.stream_reader(b'foo')
797 reader = cctx.stream_reader(b"foo")
793 798 with self.assertRaises(Exception):
794 reader.readinto1(b'foobar')
799 reader.readinto1(b"foobar")
795 800
796 801 b = bytearray(1024)
797 source = OpCountingBytesIO(b'foo')
802 source = OpCountingBytesIO(b"foo")
798 803 reader = cctx.stream_reader(source)
799 804 self.assertEqual(reader.readinto1(b), len(foo))
800 self.assertEqual(b[0:len(foo)], foo)
805 self.assertEqual(b[0 : len(foo)], foo)
801 806 self.assertEqual(source._read_count, 2)
802 807
803 808 # readinto1() with small reads.
804 809 b = bytearray(1024)
805 source = OpCountingBytesIO(b'foo')
810 source = OpCountingBytesIO(b"foo")
806 811 reader = cctx.stream_reader(source, read_size=1)
807 812 self.assertEqual(reader.readinto1(b), len(foo))
808 self.assertEqual(b[0:len(foo)], foo)
813 self.assertEqual(b[0 : len(foo)], foo)
809 814 self.assertEqual(source._read_count, 4)
810 815
811 816 def test_read1(self):
812 817 cctx = zstd.ZstdCompressor()
813 foo = b''.join(cctx.read_to_iter(io.BytesIO(b'foo')))
818 foo = b"".join(cctx.read_to_iter(io.BytesIO(b"foo")))
814 819
815 b = OpCountingBytesIO(b'foo')
820 b = OpCountingBytesIO(b"foo")
816 821 reader = cctx.stream_reader(b)
817 822
818 823 self.assertEqual(reader.read1(), foo)
819 824 self.assertEqual(b._read_count, 2)
820 825
821 b = OpCountingBytesIO(b'foo')
826 b = OpCountingBytesIO(b"foo")
822 827 reader = cctx.stream_reader(b)
823 828
824 self.assertEqual(reader.read1(0), b'')
829 self.assertEqual(reader.read1(0), b"")
825 830 self.assertEqual(reader.read1(2), foo[0:2])
826 831 self.assertEqual(b._read_count, 2)
827 832 self.assertEqual(reader.read1(2), foo[2:4])
828 833 self.assertEqual(reader.read1(1024), foo[4:])
829 834
830 835
831 836 @make_cffi
832 class TestCompressor_stream_writer(unittest.TestCase):
837 class TestCompressor_stream_writer(TestCase):
833 838 def test_io_api(self):
834 839 buffer = io.BytesIO()
835 840 cctx = zstd.ZstdCompressor()
836 841 writer = cctx.stream_writer(buffer)
837 842
838 843 self.assertFalse(writer.isatty())
839 844 self.assertFalse(writer.readable())
840 845
841 846 with self.assertRaises(io.UnsupportedOperation):
842 847 writer.readline()
843 848
844 849 with self.assertRaises(io.UnsupportedOperation):
845 850 writer.readline(42)
846 851
847 852 with self.assertRaises(io.UnsupportedOperation):
848 853 writer.readline(size=42)
849 854
850 855 with self.assertRaises(io.UnsupportedOperation):
851 856 writer.readlines()
852 857
853 858 with self.assertRaises(io.UnsupportedOperation):
854 859 writer.readlines(42)
855 860
856 861 with self.assertRaises(io.UnsupportedOperation):
857 862 writer.readlines(hint=42)
858 863
859 864 with self.assertRaises(io.UnsupportedOperation):
860 865 writer.seek(0)
861 866
862 867 with self.assertRaises(io.UnsupportedOperation):
863 868 writer.seek(10, os.SEEK_SET)
864 869
865 870 self.assertFalse(writer.seekable())
866 871
867 872 with self.assertRaises(io.UnsupportedOperation):
868 873 writer.truncate()
869 874
870 875 with self.assertRaises(io.UnsupportedOperation):
871 876 writer.truncate(42)
872 877
873 878 with self.assertRaises(io.UnsupportedOperation):
874 879 writer.truncate(size=42)
875 880
876 881 self.assertTrue(writer.writable())
877 882
878 883 with self.assertRaises(NotImplementedError):
879 884 writer.writelines([])
880 885
881 886 with self.assertRaises(io.UnsupportedOperation):
882 887 writer.read()
883 888
884 889 with self.assertRaises(io.UnsupportedOperation):
885 890 writer.read(42)
886 891
887 892 with self.assertRaises(io.UnsupportedOperation):
888 893 writer.read(size=42)
889 894
890 895 with self.assertRaises(io.UnsupportedOperation):
891 896 writer.readall()
892 897
893 898 with self.assertRaises(io.UnsupportedOperation):
894 899 writer.readinto(None)
895 900
896 901 with self.assertRaises(io.UnsupportedOperation):
897 902 writer.fileno()
898 903
899 904 self.assertFalse(writer.closed)
900 905
901 906 def test_fileno_file(self):
902 with tempfile.TemporaryFile('wb') as tf:
907 with tempfile.TemporaryFile("wb") as tf:
903 908 cctx = zstd.ZstdCompressor()
904 909 writer = cctx.stream_writer(tf)
905 910
906 911 self.assertEqual(writer.fileno(), tf.fileno())
907 912
908 913 def test_close(self):
909 914 buffer = NonClosingBytesIO()
910 915 cctx = zstd.ZstdCompressor(level=1)
911 916 writer = cctx.stream_writer(buffer)
912 917
913 writer.write(b'foo' * 1024)
918 writer.write(b"foo" * 1024)
914 919 self.assertFalse(writer.closed)
915 920 self.assertFalse(buffer.closed)
916 921 writer.close()
917 922 self.assertTrue(writer.closed)
918 923 self.assertTrue(buffer.closed)
919 924
920 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
921 writer.write(b'foo')
925 with self.assertRaisesRegex(ValueError, "stream is closed"):
926 writer.write(b"foo")
922 927
923 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
928 with self.assertRaisesRegex(ValueError, "stream is closed"):
924 929 writer.flush()
925 930
926 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
931 with self.assertRaisesRegex(ValueError, "stream is closed"):
927 932 with writer:
928 933 pass
929 934
930 self.assertEqual(buffer.getvalue(),
931 b'\x28\xb5\x2f\xfd\x00\x48\x55\x00\x00\x18\x66\x6f'
932 b'\x6f\x01\x00\xfa\xd3\x77\x43')
935 self.assertEqual(
936 buffer.getvalue(),
937 b"\x28\xb5\x2f\xfd\x00\x48\x55\x00\x00\x18\x66\x6f"
938 b"\x6f\x01\x00\xfa\xd3\x77\x43",
939 )
933 940
934 941 # Context manager exit should close stream.
935 942 buffer = io.BytesIO()
936 943 writer = cctx.stream_writer(buffer)
937 944
938 945 with writer:
939 writer.write(b'foo')
946 writer.write(b"foo")
940 947
941 948 self.assertTrue(writer.closed)
942 949
943 950 def test_empty(self):
944 951 buffer = NonClosingBytesIO()
945 952 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
946 953 with cctx.stream_writer(buffer) as compressor:
947 compressor.write(b'')
954 compressor.write(b"")
948 955
949 956 result = buffer.getvalue()
950 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
957 self.assertEqual(result, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00")
951 958
952 959 params = zstd.get_frame_parameters(result)
953 960 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
954 961 self.assertEqual(params.window_size, 524288)
955 962 self.assertEqual(params.dict_id, 0)
956 963 self.assertFalse(params.has_checksum)
957 964
958 965 # Test without context manager.
959 966 buffer = io.BytesIO()
960 967 compressor = cctx.stream_writer(buffer)
961 self.assertEqual(compressor.write(b''), 0)
962 self.assertEqual(buffer.getvalue(), b'')
968 self.assertEqual(compressor.write(b""), 0)
969 self.assertEqual(buffer.getvalue(), b"")
963 970 self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 9)
964 971 result = buffer.getvalue()
965 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
972 self.assertEqual(result, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00")
966 973
967 974 params = zstd.get_frame_parameters(result)
968 975 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
969 976 self.assertEqual(params.window_size, 524288)
970 977 self.assertEqual(params.dict_id, 0)
971 978 self.assertFalse(params.has_checksum)
972 979
973 980 # Test write_return_read=True
974 981 compressor = cctx.stream_writer(buffer, write_return_read=True)
975 self.assertEqual(compressor.write(b''), 0)
982 self.assertEqual(compressor.write(b""), 0)
976 983
977 984 def test_input_types(self):
978 expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f'
985 expected = b"\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f"
979 986 cctx = zstd.ZstdCompressor(level=1)
980 987
981 988 mutable_array = bytearray(3)
982 mutable_array[:] = b'foo'
989 mutable_array[:] = b"foo"
983 990
984 991 sources = [
985 memoryview(b'foo'),
986 bytearray(b'foo'),
992 memoryview(b"foo"),
993 bytearray(b"foo"),
987 994 mutable_array,
988 995 ]
989 996
990 997 for source in sources:
991 998 buffer = NonClosingBytesIO()
992 999 with cctx.stream_writer(buffer) as compressor:
993 1000 compressor.write(source)
994 1001
995 1002 self.assertEqual(buffer.getvalue(), expected)
996 1003
997 1004 compressor = cctx.stream_writer(buffer, write_return_read=True)
998 1005 self.assertEqual(compressor.write(source), len(source))
999 1006
1000 1007 def test_multiple_compress(self):
1001 1008 buffer = NonClosingBytesIO()
1002 1009 cctx = zstd.ZstdCompressor(level=5)
1003 1010 with cctx.stream_writer(buffer) as compressor:
1004 self.assertEqual(compressor.write(b'foo'), 0)
1005 self.assertEqual(compressor.write(b'bar'), 0)
1006 self.assertEqual(compressor.write(b'x' * 8192), 0)
1011 self.assertEqual(compressor.write(b"foo"), 0)
1012 self.assertEqual(compressor.write(b"bar"), 0)
1013 self.assertEqual(compressor.write(b"x" * 8192), 0)
1007 1014
1008 1015 result = buffer.getvalue()
1009 self.assertEqual(result,
1010 b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f'
1011 b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23')
1016 self.assertEqual(
1017 result,
1018 b"\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f"
1019 b"\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23",
1020 )
1012 1021
1013 1022 # Test without context manager.
1014 1023 buffer = io.BytesIO()
1015 1024 compressor = cctx.stream_writer(buffer)
1016 self.assertEqual(compressor.write(b'foo'), 0)
1017 self.assertEqual(compressor.write(b'bar'), 0)
1018 self.assertEqual(compressor.write(b'x' * 8192), 0)
1025 self.assertEqual(compressor.write(b"foo"), 0)
1026 self.assertEqual(compressor.write(b"bar"), 0)
1027 self.assertEqual(compressor.write(b"x" * 8192), 0)
1019 1028 self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23)
1020 1029 result = buffer.getvalue()
1021 self.assertEqual(result,
1022 b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f'
1023 b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23')
1030 self.assertEqual(
1031 result,
1032 b"\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f"
1033 b"\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23",
1034 )
1024 1035
1025 1036 # Test with write_return_read=True.
1026 1037 compressor = cctx.stream_writer(buffer, write_return_read=True)
1027 self.assertEqual(compressor.write(b'foo'), 3)
1028 self.assertEqual(compressor.write(b'barbiz'), 6)
1029 self.assertEqual(compressor.write(b'x' * 8192), 8192)
1038 self.assertEqual(compressor.write(b"foo"), 3)
1039 self.assertEqual(compressor.write(b"barbiz"), 6)
1040 self.assertEqual(compressor.write(b"x" * 8192), 8192)
1030 1041
1031 1042 def test_dictionary(self):
1032 1043 samples = []
1033 1044 for i in range(128):
1034 samples.append(b'foo' * 64)
1035 samples.append(b'bar' * 64)
1036 samples.append(b'foobar' * 64)
1045 samples.append(b"foo" * 64)
1046 samples.append(b"bar" * 64)
1047 samples.append(b"foobar" * 64)
1037 1048
1038 1049 d = zstd.train_dictionary(8192, samples)
1039 1050
1040 1051 h = hashlib.sha1(d.as_bytes()).hexdigest()
1041 self.assertEqual(h, '7a2e59a876db958f74257141045af8f912e00d4e')
1052 self.assertEqual(h, "7a2e59a876db958f74257141045af8f912e00d4e")
1042 1053
1043 1054 buffer = NonClosingBytesIO()
1044 1055 cctx = zstd.ZstdCompressor(level=9, dict_data=d)
1045 1056 with cctx.stream_writer(buffer) as compressor:
1046 self.assertEqual(compressor.write(b'foo'), 0)
1047 self.assertEqual(compressor.write(b'bar'), 0)
1048 self.assertEqual(compressor.write(b'foo' * 16384), 0)
1057 self.assertEqual(compressor.write(b"foo"), 0)
1058 self.assertEqual(compressor.write(b"bar"), 0)
1059 self.assertEqual(compressor.write(b"foo" * 16384), 0)
1049 1060
1050 1061 compressed = buffer.getvalue()
1051 1062
1052 1063 params = zstd.get_frame_parameters(compressed)
1053 1064 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1054 1065 self.assertEqual(params.window_size, 2097152)
1055 1066 self.assertEqual(params.dict_id, d.dict_id())
1056 1067 self.assertFalse(params.has_checksum)
1057 1068
1058 1069 h = hashlib.sha1(compressed).hexdigest()
1059 self.assertEqual(h, '0a7c05635061f58039727cdbe76388c6f4cfef06')
1070 self.assertEqual(h, "0a7c05635061f58039727cdbe76388c6f4cfef06")
1060 1071
1061 source = b'foo' + b'bar' + (b'foo' * 16384)
1072 source = b"foo" + b"bar" + (b"foo" * 16384)
1062 1073
1063 1074 dctx = zstd.ZstdDecompressor(dict_data=d)
1064 1075
1065 self.assertEqual(dctx.decompress(compressed, max_output_size=len(source)),
1066 source)
1076 self.assertEqual(
1077 dctx.decompress(compressed, max_output_size=len(source)), source
1078 )
1067 1079
1068 1080 def test_compression_params(self):
1069 1081 params = zstd.ZstdCompressionParameters(
1070 1082 window_log=20,
1071 1083 chain_log=6,
1072 1084 hash_log=12,
1073 1085 min_match=5,
1074 1086 search_log=4,
1075 1087 target_length=10,
1076 strategy=zstd.STRATEGY_FAST)
1088 strategy=zstd.STRATEGY_FAST,
1089 )
1077 1090
1078 1091 buffer = NonClosingBytesIO()
1079 1092 cctx = zstd.ZstdCompressor(compression_params=params)
1080 1093 with cctx.stream_writer(buffer) as compressor:
1081 self.assertEqual(compressor.write(b'foo'), 0)
1082 self.assertEqual(compressor.write(b'bar'), 0)
1083 self.assertEqual(compressor.write(b'foobar' * 16384), 0)
1094 self.assertEqual(compressor.write(b"foo"), 0)
1095 self.assertEqual(compressor.write(b"bar"), 0)
1096 self.assertEqual(compressor.write(b"foobar" * 16384), 0)
1084 1097
1085 1098 compressed = buffer.getvalue()
1086 1099
1087 1100 params = zstd.get_frame_parameters(compressed)
1088 1101 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1089 1102 self.assertEqual(params.window_size, 1048576)
1090 1103 self.assertEqual(params.dict_id, 0)
1091 1104 self.assertFalse(params.has_checksum)
1092 1105
1093 1106 h = hashlib.sha1(compressed).hexdigest()
1094 self.assertEqual(h, 'dd4bb7d37c1a0235b38a2f6b462814376843ef0b')
1107 self.assertEqual(h, "dd4bb7d37c1a0235b38a2f6b462814376843ef0b")
1095 1108
1096 1109 def test_write_checksum(self):
1097 1110 no_checksum = NonClosingBytesIO()
1098 1111 cctx = zstd.ZstdCompressor(level=1)
1099 1112 with cctx.stream_writer(no_checksum) as compressor:
1100 self.assertEqual(compressor.write(b'foobar'), 0)
1113 self.assertEqual(compressor.write(b"foobar"), 0)
1101 1114
1102 1115 with_checksum = NonClosingBytesIO()
1103 1116 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
1104 1117 with cctx.stream_writer(with_checksum) as compressor:
1105 self.assertEqual(compressor.write(b'foobar'), 0)
1118 self.assertEqual(compressor.write(b"foobar"), 0)
1106 1119
1107 1120 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
1108 1121 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
1109 1122 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1110 1123 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1111 1124 self.assertEqual(no_params.dict_id, 0)
1112 1125 self.assertEqual(with_params.dict_id, 0)
1113 1126 self.assertFalse(no_params.has_checksum)
1114 1127 self.assertTrue(with_params.has_checksum)
1115 1128
1116 self.assertEqual(len(with_checksum.getvalue()),
1117 len(no_checksum.getvalue()) + 4)
1129 self.assertEqual(len(with_checksum.getvalue()), len(no_checksum.getvalue()) + 4)
1118 1130
1119 1131 def test_write_content_size(self):
1120 1132 no_size = NonClosingBytesIO()
1121 1133 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
1122 1134 with cctx.stream_writer(no_size) as compressor:
1123 self.assertEqual(compressor.write(b'foobar' * 256), 0)
1135 self.assertEqual(compressor.write(b"foobar" * 256), 0)
1124 1136
1125 1137 with_size = NonClosingBytesIO()
1126 1138 cctx = zstd.ZstdCompressor(level=1)
1127 1139 with cctx.stream_writer(with_size) as compressor:
1128 self.assertEqual(compressor.write(b'foobar' * 256), 0)
1140 self.assertEqual(compressor.write(b"foobar" * 256), 0)
1129 1141
1130 1142 # Source size is not known in streaming mode, so header not
1131 1143 # written.
1132 self.assertEqual(len(with_size.getvalue()),
1133 len(no_size.getvalue()))
1144 self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()))
1134 1145
1135 1146 # Declaring size will write the header.
1136 1147 with_size = NonClosingBytesIO()
1137 with cctx.stream_writer(with_size, size=len(b'foobar' * 256)) as compressor:
1138 self.assertEqual(compressor.write(b'foobar' * 256), 0)
1148 with cctx.stream_writer(with_size, size=len(b"foobar" * 256)) as compressor:
1149 self.assertEqual(compressor.write(b"foobar" * 256), 0)
1139 1150
1140 1151 no_params = zstd.get_frame_parameters(no_size.getvalue())
1141 1152 with_params = zstd.get_frame_parameters(with_size.getvalue())
1142 1153 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1143 1154 self.assertEqual(with_params.content_size, 1536)
1144 1155 self.assertEqual(no_params.dict_id, 0)
1145 1156 self.assertEqual(with_params.dict_id, 0)
1146 1157 self.assertFalse(no_params.has_checksum)
1147 1158 self.assertFalse(with_params.has_checksum)
1148 1159
1149 self.assertEqual(len(with_size.getvalue()),
1150 len(no_size.getvalue()) + 1)
1160 self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()) + 1)
1151 1161
1152 1162 def test_no_dict_id(self):
1153 1163 samples = []
1154 1164 for i in range(128):
1155 samples.append(b'foo' * 64)
1156 samples.append(b'bar' * 64)
1157 samples.append(b'foobar' * 64)
1165 samples.append(b"foo" * 64)
1166 samples.append(b"bar" * 64)
1167 samples.append(b"foobar" * 64)
1158 1168
1159 1169 d = zstd.train_dictionary(1024, samples)
1160 1170
1161 1171 with_dict_id = NonClosingBytesIO()
1162 1172 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
1163 1173 with cctx.stream_writer(with_dict_id) as compressor:
1164 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
1174 self.assertEqual(compressor.write(b"foobarfoobar"), 0)
1165 1175
1166 self.assertEqual(with_dict_id.getvalue()[4:5], b'\x03')
1176 self.assertEqual(with_dict_id.getvalue()[4:5], b"\x03")
1167 1177
1168 1178 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
1169 1179 no_dict_id = NonClosingBytesIO()
1170 1180 with cctx.stream_writer(no_dict_id) as compressor:
1171 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
1181 self.assertEqual(compressor.write(b"foobarfoobar"), 0)
1172 1182
1173 self.assertEqual(no_dict_id.getvalue()[4:5], b'\x00')
1183 self.assertEqual(no_dict_id.getvalue()[4:5], b"\x00")
1174 1184
1175 1185 no_params = zstd.get_frame_parameters(no_dict_id.getvalue())
1176 1186 with_params = zstd.get_frame_parameters(with_dict_id.getvalue())
1177 1187 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1178 1188 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1179 1189 self.assertEqual(no_params.dict_id, 0)
1180 1190 self.assertEqual(with_params.dict_id, d.dict_id())
1181 1191 self.assertFalse(no_params.has_checksum)
1182 1192 self.assertFalse(with_params.has_checksum)
1183 1193
1184 self.assertEqual(len(with_dict_id.getvalue()),
1185 len(no_dict_id.getvalue()) + 4)
1194 self.assertEqual(len(with_dict_id.getvalue()), len(no_dict_id.getvalue()) + 4)
1186 1195
1187 1196 def test_memory_size(self):
1188 1197 cctx = zstd.ZstdCompressor(level=3)
1189 1198 buffer = io.BytesIO()
1190 1199 with cctx.stream_writer(buffer) as compressor:
1191 compressor.write(b'foo')
1200 compressor.write(b"foo")
1192 1201 size = compressor.memory_size()
1193 1202
1194 1203 self.assertGreater(size, 100000)
1195 1204
1196 1205 def test_write_size(self):
1197 1206 cctx = zstd.ZstdCompressor(level=3)
1198 1207 dest = OpCountingBytesIO()
1199 1208 with cctx.stream_writer(dest, write_size=1) as compressor:
1200 self.assertEqual(compressor.write(b'foo'), 0)
1201 self.assertEqual(compressor.write(b'bar'), 0)
1202 self.assertEqual(compressor.write(b'foobar'), 0)
1209 self.assertEqual(compressor.write(b"foo"), 0)
1210 self.assertEqual(compressor.write(b"bar"), 0)
1211 self.assertEqual(compressor.write(b"foobar"), 0)
1203 1212
1204 1213 self.assertEqual(len(dest.getvalue()), dest._write_count)
1205 1214
1206 1215 def test_flush_repeated(self):
1207 1216 cctx = zstd.ZstdCompressor(level=3)
1208 1217 dest = OpCountingBytesIO()
1209 1218 with cctx.stream_writer(dest) as compressor:
1210 self.assertEqual(compressor.write(b'foo'), 0)
1219 self.assertEqual(compressor.write(b"foo"), 0)
1211 1220 self.assertEqual(dest._write_count, 0)
1212 1221 self.assertEqual(compressor.flush(), 12)
1213 1222 self.assertEqual(dest._write_count, 1)
1214 self.assertEqual(compressor.write(b'bar'), 0)
1223 self.assertEqual(compressor.write(b"bar"), 0)
1215 1224 self.assertEqual(dest._write_count, 1)
1216 1225 self.assertEqual(compressor.flush(), 6)
1217 1226 self.assertEqual(dest._write_count, 2)
1218 self.assertEqual(compressor.write(b'baz'), 0)
1227 self.assertEqual(compressor.write(b"baz"), 0)
1219 1228
1220 1229 self.assertEqual(dest._write_count, 3)
1221 1230
1222 1231 def test_flush_empty_block(self):
1223 1232 cctx = zstd.ZstdCompressor(level=3, write_checksum=True)
1224 1233 dest = OpCountingBytesIO()
1225 1234 with cctx.stream_writer(dest) as compressor:
1226 self.assertEqual(compressor.write(b'foobar' * 8192), 0)
1235 self.assertEqual(compressor.write(b"foobar" * 8192), 0)
1227 1236 count = dest._write_count
1228 1237 offset = dest.tell()
1229 1238 self.assertEqual(compressor.flush(), 23)
1230 1239 self.assertGreater(dest._write_count, count)
1231 1240 self.assertGreater(dest.tell(), offset)
1232 1241 offset = dest.tell()
1233 1242 # Ending the write here should cause an empty block to be written
1234 1243 # to denote end of frame.
1235 1244
1236 1245 trailing = dest.getvalue()[offset:]
1237 1246 # 3 bytes block header + 4 bytes frame checksum
1238 1247 self.assertEqual(len(trailing), 7)
1239 1248
1240 1249 header = trailing[0:3]
1241 self.assertEqual(header, b'\x01\x00\x00')
1250 self.assertEqual(header, b"\x01\x00\x00")
1242 1251
1243 1252 def test_flush_frame(self):
1244 1253 cctx = zstd.ZstdCompressor(level=3)
1245 1254 dest = OpCountingBytesIO()
1246 1255
1247 1256 with cctx.stream_writer(dest) as compressor:
1248 self.assertEqual(compressor.write(b'foobar' * 8192), 0)
1257 self.assertEqual(compressor.write(b"foobar" * 8192), 0)
1249 1258 self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23)
1250 compressor.write(b'biz' * 16384)
1259 compressor.write(b"biz" * 16384)
1251 1260
1252 self.assertEqual(dest.getvalue(),
1253 # Frame 1.
1254 b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x30\x66\x6f\x6f'
1255 b'\x62\x61\x72\x01\x00\xf7\xbf\xe8\xa5\x08'
1256 # Frame 2.
1257 b'\x28\xb5\x2f\xfd\x00\x58\x5d\x00\x00\x18\x62\x69\x7a'
1258 b'\x01\x00\xfa\x3f\x75\x37\x04')
1261 self.assertEqual(
1262 dest.getvalue(),
1263 # Frame 1.
1264 b"\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x30\x66\x6f\x6f"
1265 b"\x62\x61\x72\x01\x00\xf7\xbf\xe8\xa5\x08"
1266 # Frame 2.
1267 b"\x28\xb5\x2f\xfd\x00\x58\x5d\x00\x00\x18\x62\x69\x7a"
1268 b"\x01\x00\xfa\x3f\x75\x37\x04",
1269 )
1259 1270
1260 1271 def test_bad_flush_mode(self):
1261 1272 cctx = zstd.ZstdCompressor()
1262 1273 dest = io.BytesIO()
1263 1274 with cctx.stream_writer(dest) as compressor:
1264 with self.assertRaisesRegexp(ValueError, 'unknown flush_mode: 42'):
1275 with self.assertRaisesRegex(ValueError, "unknown flush_mode: 42"):
1265 1276 compressor.flush(flush_mode=42)
1266 1277
1267 1278 def test_multithreaded(self):
1268 1279 dest = NonClosingBytesIO()
1269 1280 cctx = zstd.ZstdCompressor(threads=2)
1270 1281 with cctx.stream_writer(dest) as compressor:
1271 compressor.write(b'a' * 1048576)
1272 compressor.write(b'b' * 1048576)
1273 compressor.write(b'c' * 1048576)
1282 compressor.write(b"a" * 1048576)
1283 compressor.write(b"b" * 1048576)
1284 compressor.write(b"c" * 1048576)
1274 1285
1275 self.assertEqual(len(dest.getvalue()), 295)
1286 self.assertEqual(len(dest.getvalue()), 111)
1276 1287
1277 1288 def test_tell(self):
1278 1289 dest = io.BytesIO()
1279 1290 cctx = zstd.ZstdCompressor()
1280 1291 with cctx.stream_writer(dest) as compressor:
1281 1292 self.assertEqual(compressor.tell(), 0)
1282 1293
1283 1294 for i in range(256):
1284 compressor.write(b'foo' * (i + 1))
1295 compressor.write(b"foo" * (i + 1))
1285 1296 self.assertEqual(compressor.tell(), dest.tell())
1286 1297
1287 1298 def test_bad_size(self):
1288 1299 cctx = zstd.ZstdCompressor()
1289 1300
1290 1301 dest = io.BytesIO()
1291 1302
1292 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
1303 with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"):
1293 1304 with cctx.stream_writer(dest, size=2) as compressor:
1294 compressor.write(b'foo')
1305 compressor.write(b"foo")
1295 1306
1296 1307 # Test another operation.
1297 1308 with cctx.stream_writer(dest, size=42):
1298 1309 pass
1299 1310
1300 1311 def test_tarfile_compat(self):
1301 1312 dest = NonClosingBytesIO()
1302 1313 cctx = zstd.ZstdCompressor()
1303 1314 with cctx.stream_writer(dest) as compressor:
1304 with tarfile.open('tf', mode='w|', fileobj=compressor) as tf:
1305 tf.add(__file__, 'test_compressor.py')
1315 with tarfile.open("tf", mode="w|", fileobj=compressor) as tf:
1316 tf.add(__file__, "test_compressor.py")
1306 1317
1307 1318 dest = io.BytesIO(dest.getvalue())
1308 1319
1309 1320 dctx = zstd.ZstdDecompressor()
1310 1321 with dctx.stream_reader(dest) as reader:
1311 with tarfile.open(mode='r|', fileobj=reader) as tf:
1322 with tarfile.open(mode="r|", fileobj=reader) as tf:
1312 1323 for member in tf:
1313 self.assertEqual(member.name, 'test_compressor.py')
1324 self.assertEqual(member.name, "test_compressor.py")
1314 1325
1315 1326
1316 1327 @make_cffi
1317 class TestCompressor_read_to_iter(unittest.TestCase):
1328 class TestCompressor_read_to_iter(TestCase):
1318 1329 def test_type_validation(self):
1319 1330 cctx = zstd.ZstdCompressor()
1320 1331
1321 1332 # Object with read() works.
1322 1333 for chunk in cctx.read_to_iter(io.BytesIO()):
1323 1334 pass
1324 1335
1325 1336 # Buffer protocol works.
1326 for chunk in cctx.read_to_iter(b'foobar'):
1337 for chunk in cctx.read_to_iter(b"foobar"):
1327 1338 pass
1328 1339
1329 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
1340 with self.assertRaisesRegex(ValueError, "must pass an object with a read"):
1330 1341 for chunk in cctx.read_to_iter(True):
1331 1342 pass
1332 1343
1333 1344 def test_read_empty(self):
1334 1345 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
1335 1346
1336 1347 source = io.BytesIO()
1337 1348 it = cctx.read_to_iter(source)
1338 1349 chunks = list(it)
1339 1350 self.assertEqual(len(chunks), 1)
1340 compressed = b''.join(chunks)
1341 self.assertEqual(compressed, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
1351 compressed = b"".join(chunks)
1352 self.assertEqual(compressed, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00")
1342 1353
1343 1354 # And again with the buffer protocol.
1344 it = cctx.read_to_iter(b'')
1355 it = cctx.read_to_iter(b"")
1345 1356 chunks = list(it)
1346 1357 self.assertEqual(len(chunks), 1)
1347 compressed2 = b''.join(chunks)
1358 compressed2 = b"".join(chunks)
1348 1359 self.assertEqual(compressed2, compressed)
1349 1360
1350 1361 def test_read_large(self):
1351 1362 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
1352 1363
1353 1364 source = io.BytesIO()
1354 source.write(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
1355 source.write(b'o')
1365 source.write(b"f" * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
1366 source.write(b"o")
1356 1367 source.seek(0)
1357 1368
1358 1369 # Creating an iterator should not perform any compression until
1359 1370 # first read.
1360 1371 it = cctx.read_to_iter(source, size=len(source.getvalue()))
1361 1372 self.assertEqual(source.tell(), 0)
1362 1373
1363 1374 # We should have exactly 2 output chunks.
1364 1375 chunks = []
1365 1376 chunk = next(it)
1366 1377 self.assertIsNotNone(chunk)
1367 1378 self.assertEqual(source.tell(), zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
1368 1379 chunks.append(chunk)
1369 1380 chunk = next(it)
1370 1381 self.assertIsNotNone(chunk)
1371 1382 chunks.append(chunk)
1372 1383
1373 1384 self.assertEqual(source.tell(), len(source.getvalue()))
1374 1385
1375 1386 with self.assertRaises(StopIteration):
1376 1387 next(it)
1377 1388
1378 1389 # And again for good measure.
1379 1390 with self.assertRaises(StopIteration):
1380 1391 next(it)
1381 1392
1382 1393 # We should get the same output as the one-shot compression mechanism.
1383 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
1394 self.assertEqual(b"".join(chunks), cctx.compress(source.getvalue()))
1384 1395
1385 params = zstd.get_frame_parameters(b''.join(chunks))
1396 params = zstd.get_frame_parameters(b"".join(chunks))
1386 1397 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1387 1398 self.assertEqual(params.window_size, 262144)
1388 1399 self.assertEqual(params.dict_id, 0)
1389 1400 self.assertFalse(params.has_checksum)
1390 1401
1391 1402 # Now check the buffer protocol.
1392 1403 it = cctx.read_to_iter(source.getvalue())
1393 1404 chunks = list(it)
1394 1405 self.assertEqual(len(chunks), 2)
1395 1406
1396 params = zstd.get_frame_parameters(b''.join(chunks))
1407 params = zstd.get_frame_parameters(b"".join(chunks))
1397 1408 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1398 #self.assertEqual(params.window_size, 262144)
1409 # self.assertEqual(params.window_size, 262144)
1399 1410 self.assertEqual(params.dict_id, 0)
1400 1411 self.assertFalse(params.has_checksum)
1401 1412
1402 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
1413 self.assertEqual(b"".join(chunks), cctx.compress(source.getvalue()))
1403 1414
1404 1415 def test_read_write_size(self):
1405 source = OpCountingBytesIO(b'foobarfoobar')
1416 source = OpCountingBytesIO(b"foobarfoobar")
1406 1417 cctx = zstd.ZstdCompressor(level=3)
1407 1418 for chunk in cctx.read_to_iter(source, read_size=1, write_size=1):
1408 1419 self.assertEqual(len(chunk), 1)
1409 1420
1410 1421 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
1411 1422
1412 1423 def test_multithreaded(self):
1413 1424 source = io.BytesIO()
1414 source.write(b'a' * 1048576)
1415 source.write(b'b' * 1048576)
1416 source.write(b'c' * 1048576)
1425 source.write(b"a" * 1048576)
1426 source.write(b"b" * 1048576)
1427 source.write(b"c" * 1048576)
1417 1428 source.seek(0)
1418 1429
1419 1430 cctx = zstd.ZstdCompressor(threads=2)
1420 1431
1421 compressed = b''.join(cctx.read_to_iter(source))
1422 self.assertEqual(len(compressed), 295)
1432 compressed = b"".join(cctx.read_to_iter(source))
1433 self.assertEqual(len(compressed), 111)
1423 1434
1424 1435 def test_bad_size(self):
1425 1436 cctx = zstd.ZstdCompressor()
1426 1437
1427 source = io.BytesIO(b'a' * 42)
1438 source = io.BytesIO(b"a" * 42)
1428 1439
1429 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
1430 b''.join(cctx.read_to_iter(source, size=2))
1440 with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"):
1441 b"".join(cctx.read_to_iter(source, size=2))
1431 1442
1432 1443 # Test another operation on errored compressor.
1433 b''.join(cctx.read_to_iter(source))
1444 b"".join(cctx.read_to_iter(source))
1434 1445
1435 1446
1436 1447 @make_cffi
1437 class TestCompressor_chunker(unittest.TestCase):
1448 class TestCompressor_chunker(TestCase):
1438 1449 def test_empty(self):
1439 1450 cctx = zstd.ZstdCompressor(write_content_size=False)
1440 1451 chunker = cctx.chunker()
1441 1452
1442 it = chunker.compress(b'')
1453 it = chunker.compress(b"")
1443 1454
1444 1455 with self.assertRaises(StopIteration):
1445 1456 next(it)
1446 1457
1447 1458 it = chunker.finish()
1448 1459
1449 self.assertEqual(next(it), b'\x28\xb5\x2f\xfd\x00\x58\x01\x00\x00')
1460 self.assertEqual(next(it), b"\x28\xb5\x2f\xfd\x00\x58\x01\x00\x00")
1450 1461
1451 1462 with self.assertRaises(StopIteration):
1452 1463 next(it)
1453 1464
1454 1465 def test_simple_input(self):
1455 1466 cctx = zstd.ZstdCompressor()
1456 1467 chunker = cctx.chunker()
1457 1468
1458 it = chunker.compress(b'foobar')
1469 it = chunker.compress(b"foobar")
1459 1470
1460 1471 with self.assertRaises(StopIteration):
1461 1472 next(it)
1462 1473
1463 it = chunker.compress(b'baz' * 30)
1474 it = chunker.compress(b"baz" * 30)
1464 1475
1465 1476 with self.assertRaises(StopIteration):
1466 1477 next(it)
1467 1478
1468 1479 it = chunker.finish()
1469 1480
1470 self.assertEqual(next(it),
1471 b'\x28\xb5\x2f\xfd\x00\x58\x7d\x00\x00\x48\x66\x6f'
1472 b'\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e')
1481 self.assertEqual(
1482 next(it),
1483 b"\x28\xb5\x2f\xfd\x00\x58\x7d\x00\x00\x48\x66\x6f"
1484 b"\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e",
1485 )
1473 1486
1474 1487 with self.assertRaises(StopIteration):
1475 1488 next(it)
1476 1489
1477 1490 def test_input_size(self):
1478 1491 cctx = zstd.ZstdCompressor()
1479 1492 chunker = cctx.chunker(size=1024)
1480 1493
1481 it = chunker.compress(b'x' * 1000)
1494 it = chunker.compress(b"x" * 1000)
1482 1495
1483 1496 with self.assertRaises(StopIteration):
1484 1497 next(it)
1485 1498
1486 it = chunker.compress(b'y' * 24)
1499 it = chunker.compress(b"y" * 24)
1487 1500
1488 1501 with self.assertRaises(StopIteration):
1489 1502 next(it)
1490 1503
1491 1504 chunks = list(chunker.finish())
1492 1505
1493 self.assertEqual(chunks, [
1494 b'\x28\xb5\x2f\xfd\x60\x00\x03\x65\x00\x00\x18\x78\x78\x79\x02\x00'
1495 b'\xa0\x16\xe3\x2b\x80\x05'
1496 ])
1506 self.assertEqual(
1507 chunks,
1508 [
1509 b"\x28\xb5\x2f\xfd\x60\x00\x03\x65\x00\x00\x18\x78\x78\x79\x02\x00"
1510 b"\xa0\x16\xe3\x2b\x80\x05"
1511 ],
1512 )
1497 1513
1498 1514 dctx = zstd.ZstdDecompressor()
1499 1515
1500 self.assertEqual(dctx.decompress(b''.join(chunks)),
1501 (b'x' * 1000) + (b'y' * 24))
1516 self.assertEqual(dctx.decompress(b"".join(chunks)), (b"x" * 1000) + (b"y" * 24))
1502 1517
1503 1518 def test_small_chunk_size(self):
1504 1519 cctx = zstd.ZstdCompressor()
1505 1520 chunker = cctx.chunker(chunk_size=1)
1506 1521
1507 chunks = list(chunker.compress(b'foo' * 1024))
1522 chunks = list(chunker.compress(b"foo" * 1024))
1508 1523 self.assertEqual(chunks, [])
1509 1524
1510 1525 chunks = list(chunker.finish())
1511 1526 self.assertTrue(all(len(chunk) == 1 for chunk in chunks))
1512 1527
1513 1528 self.assertEqual(
1514 b''.join(chunks),
1515 b'\x28\xb5\x2f\xfd\x00\x58\x55\x00\x00\x18\x66\x6f\x6f\x01\x00'
1516 b'\xfa\xd3\x77\x43')
1529 b"".join(chunks),
1530 b"\x28\xb5\x2f\xfd\x00\x58\x55\x00\x00\x18\x66\x6f\x6f\x01\x00"
1531 b"\xfa\xd3\x77\x43",
1532 )
1517 1533
1518 1534 dctx = zstd.ZstdDecompressor()
1519 self.assertEqual(dctx.decompress(b''.join(chunks),
1520 max_output_size=10000),
1521 b'foo' * 1024)
1535 self.assertEqual(
1536 dctx.decompress(b"".join(chunks), max_output_size=10000), b"foo" * 1024
1537 )
1522 1538
1523 1539 def test_input_types(self):
1524 1540 cctx = zstd.ZstdCompressor()
1525 1541
1526 1542 mutable_array = bytearray(3)
1527 mutable_array[:] = b'foo'
1543 mutable_array[:] = b"foo"
1528 1544
1529 1545 sources = [
1530 memoryview(b'foo'),
1531 bytearray(b'foo'),
1546 memoryview(b"foo"),
1547 bytearray(b"foo"),
1532 1548 mutable_array,
1533 1549 ]
1534 1550
1535 1551 for source in sources:
1536 1552 chunker = cctx.chunker()
1537 1553
1538 1554 self.assertEqual(list(chunker.compress(source)), [])
1539 self.assertEqual(list(chunker.finish()), [
1540 b'\x28\xb5\x2f\xfd\x00\x58\x19\x00\x00\x66\x6f\x6f'
1541 ])
1555 self.assertEqual(
1556 list(chunker.finish()),
1557 [b"\x28\xb5\x2f\xfd\x00\x58\x19\x00\x00\x66\x6f\x6f"],
1558 )
1542 1559
1543 1560 def test_flush(self):
1544 1561 cctx = zstd.ZstdCompressor()
1545 1562 chunker = cctx.chunker()
1546 1563
1547 self.assertEqual(list(chunker.compress(b'foo' * 1024)), [])
1548 self.assertEqual(list(chunker.compress(b'bar' * 1024)), [])
1564 self.assertEqual(list(chunker.compress(b"foo" * 1024)), [])
1565 self.assertEqual(list(chunker.compress(b"bar" * 1024)), [])
1549 1566
1550 1567 chunks1 = list(chunker.flush())
1551 1568
1552 self.assertEqual(chunks1, [
1553 b'\x28\xb5\x2f\xfd\x00\x58\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72'
1554 b'\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02'
1555 ])
1569 self.assertEqual(
1570 chunks1,
1571 [
1572 b"\x28\xb5\x2f\xfd\x00\x58\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72"
1573 b"\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02"
1574 ],
1575 )
1556 1576
1557 1577 self.assertEqual(list(chunker.flush()), [])
1558 1578 self.assertEqual(list(chunker.flush()), [])
1559 1579
1560 self.assertEqual(list(chunker.compress(b'baz' * 1024)), [])
1580 self.assertEqual(list(chunker.compress(b"baz" * 1024)), [])
1561 1581
1562 1582 chunks2 = list(chunker.flush())
1563 1583 self.assertEqual(len(chunks2), 1)
1564 1584
1565 1585 chunks3 = list(chunker.finish())
1566 1586 self.assertEqual(len(chunks2), 1)
1567 1587
1568 1588 dctx = zstd.ZstdDecompressor()
1569 1589
1570 self.assertEqual(dctx.decompress(b''.join(chunks1 + chunks2 + chunks3),
1571 max_output_size=10000),
1572 (b'foo' * 1024) + (b'bar' * 1024) + (b'baz' * 1024))
1590 self.assertEqual(
1591 dctx.decompress(
1592 b"".join(chunks1 + chunks2 + chunks3), max_output_size=10000
1593 ),
1594 (b"foo" * 1024) + (b"bar" * 1024) + (b"baz" * 1024),
1595 )
1573 1596
1574 1597 def test_compress_after_finish(self):
1575 1598 cctx = zstd.ZstdCompressor()
1576 1599 chunker = cctx.chunker()
1577 1600
1578 list(chunker.compress(b'foo'))
1601 list(chunker.compress(b"foo"))
1579 1602 list(chunker.finish())
1580 1603
1581 with self.assertRaisesRegexp(
1582 zstd.ZstdError,
1583 r'cannot call compress\(\) after compression finished'):
1584 list(chunker.compress(b'foo'))
1604 with self.assertRaisesRegex(
1605 zstd.ZstdError, r"cannot call compress\(\) after compression finished"
1606 ):
1607 list(chunker.compress(b"foo"))
1585 1608
1586 1609 def test_flush_after_finish(self):
1587 1610 cctx = zstd.ZstdCompressor()
1588 1611 chunker = cctx.chunker()
1589 1612
1590 list(chunker.compress(b'foo'))
1613 list(chunker.compress(b"foo"))
1591 1614 list(chunker.finish())
1592 1615
1593 with self.assertRaisesRegexp(
1594 zstd.ZstdError,
1595 r'cannot call flush\(\) after compression finished'):
1616 with self.assertRaisesRegex(
1617 zstd.ZstdError, r"cannot call flush\(\) after compression finished"
1618 ):
1596 1619 list(chunker.flush())
1597 1620
1598 1621 def test_finish_after_finish(self):
1599 1622 cctx = zstd.ZstdCompressor()
1600 1623 chunker = cctx.chunker()
1601 1624
1602 list(chunker.compress(b'foo'))
1625 list(chunker.compress(b"foo"))
1603 1626 list(chunker.finish())
1604 1627
1605 with self.assertRaisesRegexp(
1606 zstd.ZstdError,
1607 r'cannot call finish\(\) after compression finished'):
1628 with self.assertRaisesRegex(
1629 zstd.ZstdError, r"cannot call finish\(\) after compression finished"
1630 ):
1608 1631 list(chunker.finish())
1609 1632
1610 1633
1611 class TestCompressor_multi_compress_to_buffer(unittest.TestCase):
1634 class TestCompressor_multi_compress_to_buffer(TestCase):
1612 1635 def test_invalid_inputs(self):
1613 1636 cctx = zstd.ZstdCompressor()
1614 1637
1615 if not hasattr(cctx, 'multi_compress_to_buffer'):
1616 self.skipTest('multi_compress_to_buffer not available')
1638 if not hasattr(cctx, "multi_compress_to_buffer"):
1639 self.skipTest("multi_compress_to_buffer not available")
1617 1640
1618 1641 with self.assertRaises(TypeError):
1619 1642 cctx.multi_compress_to_buffer(True)
1620 1643
1621 1644 with self.assertRaises(TypeError):
1622 1645 cctx.multi_compress_to_buffer((1, 2))
1623 1646
1624 with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'):
1625 cctx.multi_compress_to_buffer([u'foo'])
1647 with self.assertRaisesRegex(TypeError, "item 0 not a bytes like object"):
1648 cctx.multi_compress_to_buffer([u"foo"])
1626 1649
1627 1650 def test_empty_input(self):
1628 1651 cctx = zstd.ZstdCompressor()
1629 1652
1630 if not hasattr(cctx, 'multi_compress_to_buffer'):
1631 self.skipTest('multi_compress_to_buffer not available')
1653 if not hasattr(cctx, "multi_compress_to_buffer"):
1654 self.skipTest("multi_compress_to_buffer not available")
1632 1655
1633 with self.assertRaisesRegexp(ValueError, 'no source elements found'):
1656 with self.assertRaisesRegex(ValueError, "no source elements found"):
1634 1657 cctx.multi_compress_to_buffer([])
1635 1658
1636 with self.assertRaisesRegexp(ValueError, 'source elements are empty'):
1637 cctx.multi_compress_to_buffer([b'', b'', b''])
1659 with self.assertRaisesRegex(ValueError, "source elements are empty"):
1660 cctx.multi_compress_to_buffer([b"", b"", b""])
1638 1661
1639 1662 def test_list_input(self):
1640 1663 cctx = zstd.ZstdCompressor(write_checksum=True)
1641 1664
1642 if not hasattr(cctx, 'multi_compress_to_buffer'):
1643 self.skipTest('multi_compress_to_buffer not available')
1665 if not hasattr(cctx, "multi_compress_to_buffer"):
1666 self.skipTest("multi_compress_to_buffer not available")
1644 1667
1645 original = [b'foo' * 12, b'bar' * 6]
1668 original = [b"foo" * 12, b"bar" * 6]
1646 1669 frames = [cctx.compress(c) for c in original]
1647 1670 b = cctx.multi_compress_to_buffer(original)
1648 1671
1649 1672 self.assertIsInstance(b, zstd.BufferWithSegmentsCollection)
1650 1673
1651 1674 self.assertEqual(len(b), 2)
1652 1675 self.assertEqual(b.size(), 44)
1653 1676
1654 1677 self.assertEqual(b[0].tobytes(), frames[0])
1655 1678 self.assertEqual(b[1].tobytes(), frames[1])
1656 1679
1657 1680 def test_buffer_with_segments_input(self):
1658 1681 cctx = zstd.ZstdCompressor(write_checksum=True)
1659 1682
1660 if not hasattr(cctx, 'multi_compress_to_buffer'):
1661 self.skipTest('multi_compress_to_buffer not available')
1683 if not hasattr(cctx, "multi_compress_to_buffer"):
1684 self.skipTest("multi_compress_to_buffer not available")
1662 1685
1663 original = [b'foo' * 4, b'bar' * 6]
1686 original = [b"foo" * 4, b"bar" * 6]
1664 1687 frames = [cctx.compress(c) for c in original]
1665 1688
1666 offsets = struct.pack('=QQQQ', 0, len(original[0]),
1667 len(original[0]), len(original[1]))
1668 segments = zstd.BufferWithSegments(b''.join(original), offsets)
1689 offsets = struct.pack(
1690 "=QQQQ", 0, len(original[0]), len(original[0]), len(original[1])
1691 )
1692 segments = zstd.BufferWithSegments(b"".join(original), offsets)
1669 1693
1670 1694 result = cctx.multi_compress_to_buffer(segments)
1671 1695
1672 1696 self.assertEqual(len(result), 2)
1673 1697 self.assertEqual(result.size(), 47)
1674 1698
1675 1699 self.assertEqual(result[0].tobytes(), frames[0])
1676 1700 self.assertEqual(result[1].tobytes(), frames[1])
1677 1701
1678 1702 def test_buffer_with_segments_collection_input(self):
1679 1703 cctx = zstd.ZstdCompressor(write_checksum=True)
1680 1704
1681 if not hasattr(cctx, 'multi_compress_to_buffer'):
1682 self.skipTest('multi_compress_to_buffer not available')
1705 if not hasattr(cctx, "multi_compress_to_buffer"):
1706 self.skipTest("multi_compress_to_buffer not available")
1683 1707
1684 1708 original = [
1685 b'foo1',
1686 b'foo2' * 2,
1687 b'foo3' * 3,
1688 b'foo4' * 4,
1689 b'foo5' * 5,
1709 b"foo1",
1710 b"foo2" * 2,
1711 b"foo3" * 3,
1712 b"foo4" * 4,
1713 b"foo5" * 5,
1690 1714 ]
1691 1715
1692 1716 frames = [cctx.compress(c) for c in original]
1693 1717
1694 b = b''.join([original[0], original[1]])
1695 b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ',
1696 0, len(original[0]),
1697 len(original[0]), len(original[1])))
1698 b = b''.join([original[2], original[3], original[4]])
1699 b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ',
1700 0, len(original[2]),
1701 len(original[2]), len(original[3]),
1702 len(original[2]) + len(original[3]), len(original[4])))
1718 b = b"".join([original[0], original[1]])
1719 b1 = zstd.BufferWithSegments(
1720 b,
1721 struct.pack(
1722 "=QQQQ", 0, len(original[0]), len(original[0]), len(original[1])
1723 ),
1724 )
1725 b = b"".join([original[2], original[3], original[4]])
1726 b2 = zstd.BufferWithSegments(
1727 b,
1728 struct.pack(
1729 "=QQQQQQ",
1730 0,
1731 len(original[2]),
1732 len(original[2]),
1733 len(original[3]),
1734 len(original[2]) + len(original[3]),
1735 len(original[4]),
1736 ),
1737 )
1703 1738
1704 1739 c = zstd.BufferWithSegmentsCollection(b1, b2)
1705 1740
1706 1741 result = cctx.multi_compress_to_buffer(c)
1707 1742
1708 1743 self.assertEqual(len(result), len(frames))
1709 1744
1710 1745 for i, frame in enumerate(frames):
1711 1746 self.assertEqual(result[i].tobytes(), frame)
1712 1747
1713 1748 def test_multiple_threads(self):
1714 1749 # threads argument will cause multi-threaded ZSTD APIs to be used, which will
1715 1750 # make output different.
1716 1751 refcctx = zstd.ZstdCompressor(write_checksum=True)
1717 reference = [refcctx.compress(b'x' * 64), refcctx.compress(b'y' * 64)]
1752 reference = [refcctx.compress(b"x" * 64), refcctx.compress(b"y" * 64)]
1718 1753
1719 1754 cctx = zstd.ZstdCompressor(write_checksum=True)
1720 1755
1721 if not hasattr(cctx, 'multi_compress_to_buffer'):
1722 self.skipTest('multi_compress_to_buffer not available')
1756 if not hasattr(cctx, "multi_compress_to_buffer"):
1757 self.skipTest("multi_compress_to_buffer not available")
1723 1758
1724 1759 frames = []
1725 frames.extend(b'x' * 64 for i in range(256))
1726 frames.extend(b'y' * 64 for i in range(256))
1760 frames.extend(b"x" * 64 for i in range(256))
1761 frames.extend(b"y" * 64 for i in range(256))
1727 1762
1728 1763 result = cctx.multi_compress_to_buffer(frames, threads=-1)
1729 1764
1730 1765 self.assertEqual(len(result), 512)
1731 1766 for i in range(512):
1732 1767 if i < 256:
1733 1768 self.assertEqual(result[i].tobytes(), reference[0])
1734 1769 else:
1735 1770 self.assertEqual(result[i].tobytes(), reference[1])
This diff has been collapsed as it changes many lines, (631 lines changed) Show them Hide them
@@ -1,711 +1,836 b''
1 1 import io
2 2 import os
3 3 import unittest
4 4
5 5 try:
6 6 import hypothesis
7 7 import hypothesis.strategies as strategies
8 8 except ImportError:
9 raise unittest.SkipTest('hypothesis not available')
9 raise unittest.SkipTest("hypothesis not available")
10 10
11 11 import zstandard as zstd
12 12
13 from . common import (
13 from .common import (
14 14 make_cffi,
15 15 NonClosingBytesIO,
16 16 random_input_data,
17 TestCase,
17 18 )
18 19
19 20
20 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
21 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
21 22 @make_cffi
22 class TestCompressor_stream_reader_fuzzing(unittest.TestCase):
23 class TestCompressor_stream_reader_fuzzing(TestCase):
23 24 @hypothesis.settings(
24 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
25 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
26 level=strategies.integers(min_value=1, max_value=5),
27 source_read_size=strategies.integers(1, 16384),
28 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
29 def test_stream_source_read(self, original, level, source_read_size,
30 read_size):
25 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
26 )
27 @hypothesis.given(
28 original=strategies.sampled_from(random_input_data()),
29 level=strategies.integers(min_value=1, max_value=5),
30 source_read_size=strategies.integers(1, 16384),
31 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
32 )
33 def test_stream_source_read(self, original, level, source_read_size, read_size):
31 34 if read_size == 0:
32 35 read_size = -1
33 36
34 37 refctx = zstd.ZstdCompressor(level=level)
35 38 ref_frame = refctx.compress(original)
36 39
37 40 cctx = zstd.ZstdCompressor(level=level)
38 with cctx.stream_reader(io.BytesIO(original), size=len(original),
39 read_size=source_read_size) as reader:
41 with cctx.stream_reader(
42 io.BytesIO(original), size=len(original), read_size=source_read_size
43 ) as reader:
40 44 chunks = []
41 45 while True:
42 46 chunk = reader.read(read_size)
43 47 if not chunk:
44 48 break
45 49
46 50 chunks.append(chunk)
47 51
48 self.assertEqual(b''.join(chunks), ref_frame)
52 self.assertEqual(b"".join(chunks), ref_frame)
49 53
50 54 @hypothesis.settings(
51 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
52 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
53 level=strategies.integers(min_value=1, max_value=5),
54 source_read_size=strategies.integers(1, 16384),
55 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
56 def test_buffer_source_read(self, original, level, source_read_size,
57 read_size):
55 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
56 )
57 @hypothesis.given(
58 original=strategies.sampled_from(random_input_data()),
59 level=strategies.integers(min_value=1, max_value=5),
60 source_read_size=strategies.integers(1, 16384),
61 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
62 )
63 def test_buffer_source_read(self, original, level, source_read_size, read_size):
58 64 if read_size == 0:
59 65 read_size = -1
60 66
61 67 refctx = zstd.ZstdCompressor(level=level)
62 68 ref_frame = refctx.compress(original)
63 69
64 70 cctx = zstd.ZstdCompressor(level=level)
65 with cctx.stream_reader(original, size=len(original),
66 read_size=source_read_size) as reader:
71 with cctx.stream_reader(
72 original, size=len(original), read_size=source_read_size
73 ) as reader:
67 74 chunks = []
68 75 while True:
69 76 chunk = reader.read(read_size)
70 77 if not chunk:
71 78 break
72 79
73 80 chunks.append(chunk)
74 81
75 self.assertEqual(b''.join(chunks), ref_frame)
82 self.assertEqual(b"".join(chunks), ref_frame)
76 83
77 84 @hypothesis.settings(
78 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
79 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
80 level=strategies.integers(min_value=1, max_value=5),
81 source_read_size=strategies.integers(1, 16384),
82 read_sizes=strategies.data())
83 def test_stream_source_read_variance(self, original, level, source_read_size,
84 read_sizes):
85 suppress_health_check=[
86 hypothesis.HealthCheck.large_base_example,
87 hypothesis.HealthCheck.too_slow,
88 ]
89 )
90 @hypothesis.given(
91 original=strategies.sampled_from(random_input_data()),
92 level=strategies.integers(min_value=1, max_value=5),
93 source_read_size=strategies.integers(1, 16384),
94 read_sizes=strategies.data(),
95 )
96 def test_stream_source_read_variance(
97 self, original, level, source_read_size, read_sizes
98 ):
85 99 refctx = zstd.ZstdCompressor(level=level)
86 100 ref_frame = refctx.compress(original)
87 101
88 102 cctx = zstd.ZstdCompressor(level=level)
89 with cctx.stream_reader(io.BytesIO(original), size=len(original),
90 read_size=source_read_size) as reader:
103 with cctx.stream_reader(
104 io.BytesIO(original), size=len(original), read_size=source_read_size
105 ) as reader:
91 106 chunks = []
92 107 while True:
93 108 read_size = read_sizes.draw(strategies.integers(-1, 16384))
94 109 chunk = reader.read(read_size)
95 110 if not chunk and read_size:
96 111 break
97 112
98 113 chunks.append(chunk)
99 114
100 self.assertEqual(b''.join(chunks), ref_frame)
115 self.assertEqual(b"".join(chunks), ref_frame)
101 116
102 117 @hypothesis.settings(
103 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
104 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
105 level=strategies.integers(min_value=1, max_value=5),
106 source_read_size=strategies.integers(1, 16384),
107 read_sizes=strategies.data())
108 def test_buffer_source_read_variance(self, original, level, source_read_size,
109 read_sizes):
118 suppress_health_check=[
119 hypothesis.HealthCheck.large_base_example,
120 hypothesis.HealthCheck.too_slow,
121 ]
122 )
123 @hypothesis.given(
124 original=strategies.sampled_from(random_input_data()),
125 level=strategies.integers(min_value=1, max_value=5),
126 source_read_size=strategies.integers(1, 16384),
127 read_sizes=strategies.data(),
128 )
129 def test_buffer_source_read_variance(
130 self, original, level, source_read_size, read_sizes
131 ):
110 132
111 133 refctx = zstd.ZstdCompressor(level=level)
112 134 ref_frame = refctx.compress(original)
113 135
114 136 cctx = zstd.ZstdCompressor(level=level)
115 with cctx.stream_reader(original, size=len(original),
116 read_size=source_read_size) as reader:
137 with cctx.stream_reader(
138 original, size=len(original), read_size=source_read_size
139 ) as reader:
117 140 chunks = []
118 141 while True:
119 142 read_size = read_sizes.draw(strategies.integers(-1, 16384))
120 143 chunk = reader.read(read_size)
121 144 if not chunk and read_size:
122 145 break
123 146
124 147 chunks.append(chunk)
125 148
126 self.assertEqual(b''.join(chunks), ref_frame)
149 self.assertEqual(b"".join(chunks), ref_frame)
127 150
128 151 @hypothesis.settings(
129 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
130 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
131 level=strategies.integers(min_value=1, max_value=5),
132 source_read_size=strategies.integers(1, 16384),
133 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
134 def test_stream_source_readinto(self, original, level,
135 source_read_size, read_size):
152 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
153 )
154 @hypothesis.given(
155 original=strategies.sampled_from(random_input_data()),
156 level=strategies.integers(min_value=1, max_value=5),
157 source_read_size=strategies.integers(1, 16384),
158 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
159 )
160 def test_stream_source_readinto(self, original, level, source_read_size, read_size):
136 161 refctx = zstd.ZstdCompressor(level=level)
137 162 ref_frame = refctx.compress(original)
138 163
139 164 cctx = zstd.ZstdCompressor(level=level)
140 with cctx.stream_reader(io.BytesIO(original), size=len(original),
141 read_size=source_read_size) as reader:
165 with cctx.stream_reader(
166 io.BytesIO(original), size=len(original), read_size=source_read_size
167 ) as reader:
142 168 chunks = []
143 169 while True:
144 170 b = bytearray(read_size)
145 171 count = reader.readinto(b)
146 172
147 173 if not count:
148 174 break
149 175
150 176 chunks.append(bytes(b[0:count]))
151 177
152 self.assertEqual(b''.join(chunks), ref_frame)
178 self.assertEqual(b"".join(chunks), ref_frame)
153 179
154 180 @hypothesis.settings(
155 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
156 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
157 level=strategies.integers(min_value=1, max_value=5),
158 source_read_size=strategies.integers(1, 16384),
159 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
160 def test_buffer_source_readinto(self, original, level,
161 source_read_size, read_size):
181 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
182 )
183 @hypothesis.given(
184 original=strategies.sampled_from(random_input_data()),
185 level=strategies.integers(min_value=1, max_value=5),
186 source_read_size=strategies.integers(1, 16384),
187 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
188 )
189 def test_buffer_source_readinto(self, original, level, source_read_size, read_size):
162 190
163 191 refctx = zstd.ZstdCompressor(level=level)
164 192 ref_frame = refctx.compress(original)
165 193
166 194 cctx = zstd.ZstdCompressor(level=level)
167 with cctx.stream_reader(original, size=len(original),
168 read_size=source_read_size) as reader:
195 with cctx.stream_reader(
196 original, size=len(original), read_size=source_read_size
197 ) as reader:
169 198 chunks = []
170 199 while True:
171 200 b = bytearray(read_size)
172 201 count = reader.readinto(b)
173 202
174 203 if not count:
175 204 break
176 205
177 206 chunks.append(bytes(b[0:count]))
178 207
179 self.assertEqual(b''.join(chunks), ref_frame)
208 self.assertEqual(b"".join(chunks), ref_frame)
180 209
181 210 @hypothesis.settings(
182 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
183 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
184 level=strategies.integers(min_value=1, max_value=5),
185 source_read_size=strategies.integers(1, 16384),
186 read_sizes=strategies.data())
187 def test_stream_source_readinto_variance(self, original, level,
188 source_read_size, read_sizes):
211 suppress_health_check=[
212 hypothesis.HealthCheck.large_base_example,
213 hypothesis.HealthCheck.too_slow,
214 ]
215 )
216 @hypothesis.given(
217 original=strategies.sampled_from(random_input_data()),
218 level=strategies.integers(min_value=1, max_value=5),
219 source_read_size=strategies.integers(1, 16384),
220 read_sizes=strategies.data(),
221 )
222 def test_stream_source_readinto_variance(
223 self, original, level, source_read_size, read_sizes
224 ):
189 225 refctx = zstd.ZstdCompressor(level=level)
190 226 ref_frame = refctx.compress(original)
191 227
192 228 cctx = zstd.ZstdCompressor(level=level)
193 with cctx.stream_reader(io.BytesIO(original), size=len(original),
194 read_size=source_read_size) as reader:
229 with cctx.stream_reader(
230 io.BytesIO(original), size=len(original), read_size=source_read_size
231 ) as reader:
195 232 chunks = []
196 233 while True:
197 234 read_size = read_sizes.draw(strategies.integers(1, 16384))
198 235 b = bytearray(read_size)
199 236 count = reader.readinto(b)
200 237
201 238 if not count:
202 239 break
203 240
204 241 chunks.append(bytes(b[0:count]))
205 242
206 self.assertEqual(b''.join(chunks), ref_frame)
243 self.assertEqual(b"".join(chunks), ref_frame)
207 244
208 245 @hypothesis.settings(
209 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
210 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
211 level=strategies.integers(min_value=1, max_value=5),
212 source_read_size=strategies.integers(1, 16384),
213 read_sizes=strategies.data())
214 def test_buffer_source_readinto_variance(self, original, level,
215 source_read_size, read_sizes):
246 suppress_health_check=[
247 hypothesis.HealthCheck.large_base_example,
248 hypothesis.HealthCheck.too_slow,
249 ]
250 )
251 @hypothesis.given(
252 original=strategies.sampled_from(random_input_data()),
253 level=strategies.integers(min_value=1, max_value=5),
254 source_read_size=strategies.integers(1, 16384),
255 read_sizes=strategies.data(),
256 )
257 def test_buffer_source_readinto_variance(
258 self, original, level, source_read_size, read_sizes
259 ):
216 260
217 261 refctx = zstd.ZstdCompressor(level=level)
218 262 ref_frame = refctx.compress(original)
219 263
220 264 cctx = zstd.ZstdCompressor(level=level)
221 with cctx.stream_reader(original, size=len(original),
222 read_size=source_read_size) as reader:
265 with cctx.stream_reader(
266 original, size=len(original), read_size=source_read_size
267 ) as reader:
223 268 chunks = []
224 269 while True:
225 270 read_size = read_sizes.draw(strategies.integers(1, 16384))
226 271 b = bytearray(read_size)
227 272 count = reader.readinto(b)
228 273
229 274 if not count:
230 275 break
231 276
232 277 chunks.append(bytes(b[0:count]))
233 278
234 self.assertEqual(b''.join(chunks), ref_frame)
279 self.assertEqual(b"".join(chunks), ref_frame)
235 280
236 281 @hypothesis.settings(
237 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
238 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
239 level=strategies.integers(min_value=1, max_value=5),
240 source_read_size=strategies.integers(1, 16384),
241 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
242 def test_stream_source_read1(self, original, level, source_read_size,
243 read_size):
282 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
283 )
284 @hypothesis.given(
285 original=strategies.sampled_from(random_input_data()),
286 level=strategies.integers(min_value=1, max_value=5),
287 source_read_size=strategies.integers(1, 16384),
288 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
289 )
290 def test_stream_source_read1(self, original, level, source_read_size, read_size):
244 291 if read_size == 0:
245 292 read_size = -1
246 293
247 294 refctx = zstd.ZstdCompressor(level=level)
248 295 ref_frame = refctx.compress(original)
249 296
250 297 cctx = zstd.ZstdCompressor(level=level)
251 with cctx.stream_reader(io.BytesIO(original), size=len(original),
252 read_size=source_read_size) as reader:
298 with cctx.stream_reader(
299 io.BytesIO(original), size=len(original), read_size=source_read_size
300 ) as reader:
253 301 chunks = []
254 302 while True:
255 303 chunk = reader.read1(read_size)
256 304 if not chunk:
257 305 break
258 306
259 307 chunks.append(chunk)
260 308
261 self.assertEqual(b''.join(chunks), ref_frame)
309 self.assertEqual(b"".join(chunks), ref_frame)
262 310
263 311 @hypothesis.settings(
264 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
265 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
266 level=strategies.integers(min_value=1, max_value=5),
267 source_read_size=strategies.integers(1, 16384),
268 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
269 def test_buffer_source_read1(self, original, level, source_read_size,
270 read_size):
312 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
313 )
314 @hypothesis.given(
315 original=strategies.sampled_from(random_input_data()),
316 level=strategies.integers(min_value=1, max_value=5),
317 source_read_size=strategies.integers(1, 16384),
318 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
319 )
320 def test_buffer_source_read1(self, original, level, source_read_size, read_size):
271 321 if read_size == 0:
272 322 read_size = -1
273 323
274 324 refctx = zstd.ZstdCompressor(level=level)
275 325 ref_frame = refctx.compress(original)
276 326
277 327 cctx = zstd.ZstdCompressor(level=level)
278 with cctx.stream_reader(original, size=len(original),
279 read_size=source_read_size) as reader:
328 with cctx.stream_reader(
329 original, size=len(original), read_size=source_read_size
330 ) as reader:
280 331 chunks = []
281 332 while True:
282 333 chunk = reader.read1(read_size)
283 334 if not chunk:
284 335 break
285 336
286 337 chunks.append(chunk)
287 338
288 self.assertEqual(b''.join(chunks), ref_frame)
339 self.assertEqual(b"".join(chunks), ref_frame)
289 340
290 341 @hypothesis.settings(
291 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
292 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
293 level=strategies.integers(min_value=1, max_value=5),
294 source_read_size=strategies.integers(1, 16384),
295 read_sizes=strategies.data())
296 def test_stream_source_read1_variance(self, original, level, source_read_size,
297 read_sizes):
342 suppress_health_check=[
343 hypothesis.HealthCheck.large_base_example,
344 hypothesis.HealthCheck.too_slow,
345 ]
346 )
347 @hypothesis.given(
348 original=strategies.sampled_from(random_input_data()),
349 level=strategies.integers(min_value=1, max_value=5),
350 source_read_size=strategies.integers(1, 16384),
351 read_sizes=strategies.data(),
352 )
353 def test_stream_source_read1_variance(
354 self, original, level, source_read_size, read_sizes
355 ):
298 356 refctx = zstd.ZstdCompressor(level=level)
299 357 ref_frame = refctx.compress(original)
300 358
301 359 cctx = zstd.ZstdCompressor(level=level)
302 with cctx.stream_reader(io.BytesIO(original), size=len(original),
303 read_size=source_read_size) as reader:
360 with cctx.stream_reader(
361 io.BytesIO(original), size=len(original), read_size=source_read_size
362 ) as reader:
304 363 chunks = []
305 364 while True:
306 365 read_size = read_sizes.draw(strategies.integers(-1, 16384))
307 366 chunk = reader.read1(read_size)
308 367 if not chunk and read_size:
309 368 break
310 369
311 370 chunks.append(chunk)
312 371
313 self.assertEqual(b''.join(chunks), ref_frame)
372 self.assertEqual(b"".join(chunks), ref_frame)
314 373
315 374 @hypothesis.settings(
316 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
317 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
318 level=strategies.integers(min_value=1, max_value=5),
319 source_read_size=strategies.integers(1, 16384),
320 read_sizes=strategies.data())
321 def test_buffer_source_read1_variance(self, original, level, source_read_size,
322 read_sizes):
375 suppress_health_check=[
376 hypothesis.HealthCheck.large_base_example,
377 hypothesis.HealthCheck.too_slow,
378 ]
379 )
380 @hypothesis.given(
381 original=strategies.sampled_from(random_input_data()),
382 level=strategies.integers(min_value=1, max_value=5),
383 source_read_size=strategies.integers(1, 16384),
384 read_sizes=strategies.data(),
385 )
386 def test_buffer_source_read1_variance(
387 self, original, level, source_read_size, read_sizes
388 ):
323 389
324 390 refctx = zstd.ZstdCompressor(level=level)
325 391 ref_frame = refctx.compress(original)
326 392
327 393 cctx = zstd.ZstdCompressor(level=level)
328 with cctx.stream_reader(original, size=len(original),
329 read_size=source_read_size) as reader:
394 with cctx.stream_reader(
395 original, size=len(original), read_size=source_read_size
396 ) as reader:
330 397 chunks = []
331 398 while True:
332 399 read_size = read_sizes.draw(strategies.integers(-1, 16384))
333 400 chunk = reader.read1(read_size)
334 401 if not chunk and read_size:
335 402 break
336 403
337 404 chunks.append(chunk)
338 405
339 self.assertEqual(b''.join(chunks), ref_frame)
340
406 self.assertEqual(b"".join(chunks), ref_frame)
341 407
342 408 @hypothesis.settings(
343 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
344 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
345 level=strategies.integers(min_value=1, max_value=5),
346 source_read_size=strategies.integers(1, 16384),
347 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
348 def test_stream_source_readinto1(self, original, level, source_read_size,
349 read_size):
409 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
410 )
411 @hypothesis.given(
412 original=strategies.sampled_from(random_input_data()),
413 level=strategies.integers(min_value=1, max_value=5),
414 source_read_size=strategies.integers(1, 16384),
415 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
416 )
417 def test_stream_source_readinto1(
418 self, original, level, source_read_size, read_size
419 ):
350 420 if read_size == 0:
351 421 read_size = -1
352 422
353 423 refctx = zstd.ZstdCompressor(level=level)
354 424 ref_frame = refctx.compress(original)
355 425
356 426 cctx = zstd.ZstdCompressor(level=level)
357 with cctx.stream_reader(io.BytesIO(original), size=len(original),
358 read_size=source_read_size) as reader:
427 with cctx.stream_reader(
428 io.BytesIO(original), size=len(original), read_size=source_read_size
429 ) as reader:
359 430 chunks = []
360 431 while True:
361 432 b = bytearray(read_size)
362 433 count = reader.readinto1(b)
363 434
364 435 if not count:
365 436 break
366 437
367 438 chunks.append(bytes(b[0:count]))
368 439
369 self.assertEqual(b''.join(chunks), ref_frame)
440 self.assertEqual(b"".join(chunks), ref_frame)
370 441
371 442 @hypothesis.settings(
372 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
373 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
374 level=strategies.integers(min_value=1, max_value=5),
375 source_read_size=strategies.integers(1, 16384),
376 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
377 def test_buffer_source_readinto1(self, original, level, source_read_size,
378 read_size):
443 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
444 )
445 @hypothesis.given(
446 original=strategies.sampled_from(random_input_data()),
447 level=strategies.integers(min_value=1, max_value=5),
448 source_read_size=strategies.integers(1, 16384),
449 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
450 )
451 def test_buffer_source_readinto1(
452 self, original, level, source_read_size, read_size
453 ):
379 454 if read_size == 0:
380 455 read_size = -1
381 456
382 457 refctx = zstd.ZstdCompressor(level=level)
383 458 ref_frame = refctx.compress(original)
384 459
385 460 cctx = zstd.ZstdCompressor(level=level)
386 with cctx.stream_reader(original, size=len(original),
387 read_size=source_read_size) as reader:
461 with cctx.stream_reader(
462 original, size=len(original), read_size=source_read_size
463 ) as reader:
388 464 chunks = []
389 465 while True:
390 466 b = bytearray(read_size)
391 467 count = reader.readinto1(b)
392 468
393 469 if not count:
394 470 break
395 471
396 472 chunks.append(bytes(b[0:count]))
397 473
398 self.assertEqual(b''.join(chunks), ref_frame)
474 self.assertEqual(b"".join(chunks), ref_frame)
399 475
400 476 @hypothesis.settings(
401 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
402 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
403 level=strategies.integers(min_value=1, max_value=5),
404 source_read_size=strategies.integers(1, 16384),
405 read_sizes=strategies.data())
406 def test_stream_source_readinto1_variance(self, original, level, source_read_size,
407 read_sizes):
477 suppress_health_check=[
478 hypothesis.HealthCheck.large_base_example,
479 hypothesis.HealthCheck.too_slow,
480 ]
481 )
482 @hypothesis.given(
483 original=strategies.sampled_from(random_input_data()),
484 level=strategies.integers(min_value=1, max_value=5),
485 source_read_size=strategies.integers(1, 16384),
486 read_sizes=strategies.data(),
487 )
488 def test_stream_source_readinto1_variance(
489 self, original, level, source_read_size, read_sizes
490 ):
408 491 refctx = zstd.ZstdCompressor(level=level)
409 492 ref_frame = refctx.compress(original)
410 493
411 494 cctx = zstd.ZstdCompressor(level=level)
412 with cctx.stream_reader(io.BytesIO(original), size=len(original),
413 read_size=source_read_size) as reader:
495 with cctx.stream_reader(
496 io.BytesIO(original), size=len(original), read_size=source_read_size
497 ) as reader:
414 498 chunks = []
415 499 while True:
416 500 read_size = read_sizes.draw(strategies.integers(1, 16384))
417 501 b = bytearray(read_size)
418 502 count = reader.readinto1(b)
419 503
420 504 if not count:
421 505 break
422 506
423 507 chunks.append(bytes(b[0:count]))
424 508
425 self.assertEqual(b''.join(chunks), ref_frame)
509 self.assertEqual(b"".join(chunks), ref_frame)
426 510
427 511 @hypothesis.settings(
428 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
429 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
430 level=strategies.integers(min_value=1, max_value=5),
431 source_read_size=strategies.integers(1, 16384),
432 read_sizes=strategies.data())
433 def test_buffer_source_readinto1_variance(self, original, level, source_read_size,
434 read_sizes):
512 suppress_health_check=[
513 hypothesis.HealthCheck.large_base_example,
514 hypothesis.HealthCheck.too_slow,
515 ]
516 )
517 @hypothesis.given(
518 original=strategies.sampled_from(random_input_data()),
519 level=strategies.integers(min_value=1, max_value=5),
520 source_read_size=strategies.integers(1, 16384),
521 read_sizes=strategies.data(),
522 )
523 def test_buffer_source_readinto1_variance(
524 self, original, level, source_read_size, read_sizes
525 ):
435 526
436 527 refctx = zstd.ZstdCompressor(level=level)
437 528 ref_frame = refctx.compress(original)
438 529
439 530 cctx = zstd.ZstdCompressor(level=level)
440 with cctx.stream_reader(original, size=len(original),
441 read_size=source_read_size) as reader:
531 with cctx.stream_reader(
532 original, size=len(original), read_size=source_read_size
533 ) as reader:
442 534 chunks = []
443 535 while True:
444 536 read_size = read_sizes.draw(strategies.integers(1, 16384))
445 537 b = bytearray(read_size)
446 538 count = reader.readinto1(b)
447 539
448 540 if not count:
449 541 break
450 542
451 543 chunks.append(bytes(b[0:count]))
452 544
453 self.assertEqual(b''.join(chunks), ref_frame)
454
545 self.assertEqual(b"".join(chunks), ref_frame)
455 546
456 547
457 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
548 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
458 549 @make_cffi
459 class TestCompressor_stream_writer_fuzzing(unittest.TestCase):
460 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
461 level=strategies.integers(min_value=1, max_value=5),
462 write_size=strategies.integers(min_value=1, max_value=1048576))
550 class TestCompressor_stream_writer_fuzzing(TestCase):
551 @hypothesis.given(
552 original=strategies.sampled_from(random_input_data()),
553 level=strategies.integers(min_value=1, max_value=5),
554 write_size=strategies.integers(min_value=1, max_value=1048576),
555 )
463 556 def test_write_size_variance(self, original, level, write_size):
464 557 refctx = zstd.ZstdCompressor(level=level)
465 558 ref_frame = refctx.compress(original)
466 559
467 560 cctx = zstd.ZstdCompressor(level=level)
468 561 b = NonClosingBytesIO()
469 with cctx.stream_writer(b, size=len(original), write_size=write_size) as compressor:
562 with cctx.stream_writer(
563 b, size=len(original), write_size=write_size
564 ) as compressor:
470 565 compressor.write(original)
471 566
472 567 self.assertEqual(b.getvalue(), ref_frame)
473 568
474 569
475 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
570 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
476 571 @make_cffi
477 class TestCompressor_copy_stream_fuzzing(unittest.TestCase):
478 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
479 level=strategies.integers(min_value=1, max_value=5),
480 read_size=strategies.integers(min_value=1, max_value=1048576),
481 write_size=strategies.integers(min_value=1, max_value=1048576))
572 class TestCompressor_copy_stream_fuzzing(TestCase):
573 @hypothesis.given(
574 original=strategies.sampled_from(random_input_data()),
575 level=strategies.integers(min_value=1, max_value=5),
576 read_size=strategies.integers(min_value=1, max_value=1048576),
577 write_size=strategies.integers(min_value=1, max_value=1048576),
578 )
482 579 def test_read_write_size_variance(self, original, level, read_size, write_size):
483 580 refctx = zstd.ZstdCompressor(level=level)
484 581 ref_frame = refctx.compress(original)
485 582
486 583 cctx = zstd.ZstdCompressor(level=level)
487 584 source = io.BytesIO(original)
488 585 dest = io.BytesIO()
489 586
490 cctx.copy_stream(source, dest, size=len(original), read_size=read_size,
491 write_size=write_size)
587 cctx.copy_stream(
588 source, dest, size=len(original), read_size=read_size, write_size=write_size
589 )
492 590
493 591 self.assertEqual(dest.getvalue(), ref_frame)
494 592
495 593
496 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
594 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
497 595 @make_cffi
498 class TestCompressor_compressobj_fuzzing(unittest.TestCase):
596 class TestCompressor_compressobj_fuzzing(TestCase):
499 597 @hypothesis.settings(
500 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
501 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
502 level=strategies.integers(min_value=1, max_value=5),
503 chunk_sizes=strategies.data())
598 suppress_health_check=[
599 hypothesis.HealthCheck.large_base_example,
600 hypothesis.HealthCheck.too_slow,
601 ]
602 )
603 @hypothesis.given(
604 original=strategies.sampled_from(random_input_data()),
605 level=strategies.integers(min_value=1, max_value=5),
606 chunk_sizes=strategies.data(),
607 )
504 608 def test_random_input_sizes(self, original, level, chunk_sizes):
505 609 refctx = zstd.ZstdCompressor(level=level)
506 610 ref_frame = refctx.compress(original)
507 611
508 612 cctx = zstd.ZstdCompressor(level=level)
509 613 cobj = cctx.compressobj(size=len(original))
510 614
511 615 chunks = []
512 616 i = 0
513 617 while True:
514 618 chunk_size = chunk_sizes.draw(strategies.integers(1, 4096))
515 source = original[i:i + chunk_size]
619 source = original[i : i + chunk_size]
516 620 if not source:
517 621 break
518 622
519 623 chunks.append(cobj.compress(source))
520 624 i += chunk_size
521 625
522 626 chunks.append(cobj.flush())
523 627
524 self.assertEqual(b''.join(chunks), ref_frame)
628 self.assertEqual(b"".join(chunks), ref_frame)
525 629
526 630 @hypothesis.settings(
527 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
528 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
529 level=strategies.integers(min_value=1, max_value=5),
530 chunk_sizes=strategies.data(),
531 flushes=strategies.data())
631 suppress_health_check=[
632 hypothesis.HealthCheck.large_base_example,
633 hypothesis.HealthCheck.too_slow,
634 ]
635 )
636 @hypothesis.given(
637 original=strategies.sampled_from(random_input_data()),
638 level=strategies.integers(min_value=1, max_value=5),
639 chunk_sizes=strategies.data(),
640 flushes=strategies.data(),
641 )
532 642 def test_flush_block(self, original, level, chunk_sizes, flushes):
533 643 cctx = zstd.ZstdCompressor(level=level)
534 644 cobj = cctx.compressobj()
535 645
536 646 dctx = zstd.ZstdDecompressor()
537 647 dobj = dctx.decompressobj()
538 648
539 649 compressed_chunks = []
540 650 decompressed_chunks = []
541 651 i = 0
542 652 while True:
543 653 input_size = chunk_sizes.draw(strategies.integers(1, 4096))
544 source = original[i:i + input_size]
654 source = original[i : i + input_size]
545 655 if not source:
546 656 break
547 657
548 658 i += input_size
549 659
550 660 chunk = cobj.compress(source)
551 661 compressed_chunks.append(chunk)
552 662 decompressed_chunks.append(dobj.decompress(chunk))
553 663
554 664 if not flushes.draw(strategies.booleans()):
555 665 continue
556 666
557 667 chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
558 668 compressed_chunks.append(chunk)
559 669 decompressed_chunks.append(dobj.decompress(chunk))
560 670
561 self.assertEqual(b''.join(decompressed_chunks), original[0:i])
671 self.assertEqual(b"".join(decompressed_chunks), original[0:i])
562 672
563 673 chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_FINISH)
564 674 compressed_chunks.append(chunk)
565 675 decompressed_chunks.append(dobj.decompress(chunk))
566 676
567 self.assertEqual(dctx.decompress(b''.join(compressed_chunks),
568 max_output_size=len(original)),
569 original)
570 self.assertEqual(b''.join(decompressed_chunks), original)
677 self.assertEqual(
678 dctx.decompress(b"".join(compressed_chunks), max_output_size=len(original)),
679 original,
680 )
681 self.assertEqual(b"".join(decompressed_chunks), original)
682
571 683
572 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
684 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
573 685 @make_cffi
574 class TestCompressor_read_to_iter_fuzzing(unittest.TestCase):
575 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
576 level=strategies.integers(min_value=1, max_value=5),
577 read_size=strategies.integers(min_value=1, max_value=4096),
578 write_size=strategies.integers(min_value=1, max_value=4096))
686 class TestCompressor_read_to_iter_fuzzing(TestCase):
687 @hypothesis.given(
688 original=strategies.sampled_from(random_input_data()),
689 level=strategies.integers(min_value=1, max_value=5),
690 read_size=strategies.integers(min_value=1, max_value=4096),
691 write_size=strategies.integers(min_value=1, max_value=4096),
692 )
579 693 def test_read_write_size_variance(self, original, level, read_size, write_size):
580 694 refcctx = zstd.ZstdCompressor(level=level)
581 695 ref_frame = refcctx.compress(original)
582 696
583 697 source = io.BytesIO(original)
584 698
585 699 cctx = zstd.ZstdCompressor(level=level)
586 chunks = list(cctx.read_to_iter(source, size=len(original),
587 read_size=read_size,
588 write_size=write_size))
700 chunks = list(
701 cctx.read_to_iter(
702 source, size=len(original), read_size=read_size, write_size=write_size
703 )
704 )
589 705
590 self.assertEqual(b''.join(chunks), ref_frame)
706 self.assertEqual(b"".join(chunks), ref_frame)
591 707
592 708
593 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
594 class TestCompressor_multi_compress_to_buffer_fuzzing(unittest.TestCase):
595 @hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()),
596 min_size=1, max_size=1024),
597 threads=strategies.integers(min_value=1, max_value=8),
598 use_dict=strategies.booleans())
709 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
710 class TestCompressor_multi_compress_to_buffer_fuzzing(TestCase):
711 @hypothesis.given(
712 original=strategies.lists(
713 strategies.sampled_from(random_input_data()), min_size=1, max_size=1024
714 ),
715 threads=strategies.integers(min_value=1, max_value=8),
716 use_dict=strategies.booleans(),
717 )
599 718 def test_data_equivalence(self, original, threads, use_dict):
600 719 kwargs = {}
601 720
602 721 # Use a content dictionary because it is cheap to create.
603 722 if use_dict:
604 kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0])
723 kwargs["dict_data"] = zstd.ZstdCompressionDict(original[0])
605 724
606 cctx = zstd.ZstdCompressor(level=1,
607 write_checksum=True,
608 **kwargs)
725 cctx = zstd.ZstdCompressor(level=1, write_checksum=True, **kwargs)
609 726
610 if not hasattr(cctx, 'multi_compress_to_buffer'):
611 self.skipTest('multi_compress_to_buffer not available')
727 if not hasattr(cctx, "multi_compress_to_buffer"):
728 self.skipTest("multi_compress_to_buffer not available")
612 729
613 730 result = cctx.multi_compress_to_buffer(original, threads=-1)
614 731
615 732 self.assertEqual(len(result), len(original))
616 733
617 734 # The frame produced via the batch APIs may not be bit identical to that
618 735 # produced by compress() because compression parameters are adjusted
619 736 # from the first input in batch mode. So the only thing we can do is
620 737 # verify the decompressed data matches the input.
621 738 dctx = zstd.ZstdDecompressor(**kwargs)
622 739
623 740 for i, frame in enumerate(result):
624 741 self.assertEqual(dctx.decompress(frame), original[i])
625 742
626 743
627 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
744 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
628 745 @make_cffi
629 class TestCompressor_chunker_fuzzing(unittest.TestCase):
746 class TestCompressor_chunker_fuzzing(TestCase):
630 747 @hypothesis.settings(
631 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
632 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
633 level=strategies.integers(min_value=1, max_value=5),
634 chunk_size=strategies.integers(
635 min_value=1,
636 max_value=32 * 1048576),
637 input_sizes=strategies.data())
748 suppress_health_check=[
749 hypothesis.HealthCheck.large_base_example,
750 hypothesis.HealthCheck.too_slow,
751 ]
752 )
753 @hypothesis.given(
754 original=strategies.sampled_from(random_input_data()),
755 level=strategies.integers(min_value=1, max_value=5),
756 chunk_size=strategies.integers(min_value=1, max_value=32 * 1048576),
757 input_sizes=strategies.data(),
758 )
638 759 def test_random_input_sizes(self, original, level, chunk_size, input_sizes):
639 760 cctx = zstd.ZstdCompressor(level=level)
640 761 chunker = cctx.chunker(chunk_size=chunk_size)
641 762
642 763 chunks = []
643 764 i = 0
644 765 while True:
645 766 input_size = input_sizes.draw(strategies.integers(1, 4096))
646 source = original[i:i + input_size]
767 source = original[i : i + input_size]
647 768 if not source:
648 769 break
649 770
650 771 chunks.extend(chunker.compress(source))
651 772 i += input_size
652 773
653 774 chunks.extend(chunker.finish())
654 775
655 776 dctx = zstd.ZstdDecompressor()
656 777
657 self.assertEqual(dctx.decompress(b''.join(chunks),
658 max_output_size=len(original)),
659 original)
778 self.assertEqual(
779 dctx.decompress(b"".join(chunks), max_output_size=len(original)), original
780 )
660 781
661 782 self.assertTrue(all(len(chunk) == chunk_size for chunk in chunks[:-1]))
662 783
663 784 @hypothesis.settings(
664 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
665 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
666 level=strategies.integers(min_value=1, max_value=5),
667 chunk_size=strategies.integers(
668 min_value=1,
669 max_value=32 * 1048576),
670 input_sizes=strategies.data(),
671 flushes=strategies.data())
672 def test_flush_block(self, original, level, chunk_size, input_sizes,
673 flushes):
785 suppress_health_check=[
786 hypothesis.HealthCheck.large_base_example,
787 hypothesis.HealthCheck.too_slow,
788 ]
789 )
790 @hypothesis.given(
791 original=strategies.sampled_from(random_input_data()),
792 level=strategies.integers(min_value=1, max_value=5),
793 chunk_size=strategies.integers(min_value=1, max_value=32 * 1048576),
794 input_sizes=strategies.data(),
795 flushes=strategies.data(),
796 )
797 def test_flush_block(self, original, level, chunk_size, input_sizes, flushes):
674 798 cctx = zstd.ZstdCompressor(level=level)
675 799 chunker = cctx.chunker(chunk_size=chunk_size)
676 800
677 801 dctx = zstd.ZstdDecompressor()
678 802 dobj = dctx.decompressobj()
679 803
680 804 compressed_chunks = []
681 805 decompressed_chunks = []
682 806 i = 0
683 807 while True:
684 808 input_size = input_sizes.draw(strategies.integers(1, 4096))
685 source = original[i:i + input_size]
809 source = original[i : i + input_size]
686 810 if not source:
687 811 break
688 812
689 813 i += input_size
690 814
691 815 chunks = list(chunker.compress(source))
692 816 compressed_chunks.extend(chunks)
693 decompressed_chunks.append(dobj.decompress(b''.join(chunks)))
817 decompressed_chunks.append(dobj.decompress(b"".join(chunks)))
694 818
695 819 if not flushes.draw(strategies.booleans()):
696 820 continue
697 821
698 822 chunks = list(chunker.flush())
699 823 compressed_chunks.extend(chunks)
700 decompressed_chunks.append(dobj.decompress(b''.join(chunks)))
824 decompressed_chunks.append(dobj.decompress(b"".join(chunks)))
701 825
702 self.assertEqual(b''.join(decompressed_chunks), original[0:i])
826 self.assertEqual(b"".join(decompressed_chunks), original[0:i])
703 827
704 828 chunks = list(chunker.finish())
705 829 compressed_chunks.extend(chunks)
706 decompressed_chunks.append(dobj.decompress(b''.join(chunks)))
830 decompressed_chunks.append(dobj.decompress(b"".join(chunks)))
707 831
708 self.assertEqual(dctx.decompress(b''.join(compressed_chunks),
709 max_output_size=len(original)),
710 original)
711 self.assertEqual(b''.join(decompressed_chunks), original) No newline at end of file
832 self.assertEqual(
833 dctx.decompress(b"".join(compressed_chunks), max_output_size=len(original)),
834 original,
835 )
836 self.assertEqual(b"".join(decompressed_chunks), original)
@@ -1,228 +1,241 b''
1 1 import sys
2 2 import unittest
3 3
4 4 import zstandard as zstd
5 5
6 from . common import (
6 from .common import (
7 7 make_cffi,
8 TestCase,
8 9 )
9 10
10 11
11 12 @make_cffi
12 class TestCompressionParameters(unittest.TestCase):
13 class TestCompressionParameters(TestCase):
13 14 def test_bounds(self):
14 zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MIN,
15 chain_log=zstd.CHAINLOG_MIN,
16 hash_log=zstd.HASHLOG_MIN,
17 search_log=zstd.SEARCHLOG_MIN,
18 min_match=zstd.MINMATCH_MIN + 1,
19 target_length=zstd.TARGETLENGTH_MIN,
20 strategy=zstd.STRATEGY_FAST)
15 zstd.ZstdCompressionParameters(
16 window_log=zstd.WINDOWLOG_MIN,
17 chain_log=zstd.CHAINLOG_MIN,
18 hash_log=zstd.HASHLOG_MIN,
19 search_log=zstd.SEARCHLOG_MIN,
20 min_match=zstd.MINMATCH_MIN + 1,
21 target_length=zstd.TARGETLENGTH_MIN,
22 strategy=zstd.STRATEGY_FAST,
23 )
21 24
22 zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MAX,
23 chain_log=zstd.CHAINLOG_MAX,
24 hash_log=zstd.HASHLOG_MAX,
25 search_log=zstd.SEARCHLOG_MAX,
26 min_match=zstd.MINMATCH_MAX - 1,
27 target_length=zstd.TARGETLENGTH_MAX,
28 strategy=zstd.STRATEGY_BTULTRA2)
25 zstd.ZstdCompressionParameters(
26 window_log=zstd.WINDOWLOG_MAX,
27 chain_log=zstd.CHAINLOG_MAX,
28 hash_log=zstd.HASHLOG_MAX,
29 search_log=zstd.SEARCHLOG_MAX,
30 min_match=zstd.MINMATCH_MAX - 1,
31 target_length=zstd.TARGETLENGTH_MAX,
32 strategy=zstd.STRATEGY_BTULTRA2,
33 )
29 34
30 35 def test_from_level(self):
31 36 p = zstd.ZstdCompressionParameters.from_level(1)
32 37 self.assertIsInstance(p, zstd.CompressionParameters)
33 38
34 39 self.assertEqual(p.window_log, 19)
35 40
36 41 p = zstd.ZstdCompressionParameters.from_level(-4)
37 42 self.assertEqual(p.window_log, 19)
38 43
39 44 def test_members(self):
40 p = zstd.ZstdCompressionParameters(window_log=10,
41 chain_log=6,
42 hash_log=7,
43 search_log=4,
44 min_match=5,
45 target_length=8,
46 strategy=1)
45 p = zstd.ZstdCompressionParameters(
46 window_log=10,
47 chain_log=6,
48 hash_log=7,
49 search_log=4,
50 min_match=5,
51 target_length=8,
52 strategy=1,
53 )
47 54 self.assertEqual(p.window_log, 10)
48 55 self.assertEqual(p.chain_log, 6)
49 56 self.assertEqual(p.hash_log, 7)
50 57 self.assertEqual(p.search_log, 4)
51 58 self.assertEqual(p.min_match, 5)
52 59 self.assertEqual(p.target_length, 8)
53 60 self.assertEqual(p.compression_strategy, 1)
54 61
55 62 p = zstd.ZstdCompressionParameters(compression_level=2)
56 63 self.assertEqual(p.compression_level, 2)
57 64
58 65 p = zstd.ZstdCompressionParameters(threads=4)
59 66 self.assertEqual(p.threads, 4)
60 67
61 p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576,
62 overlap_log=6)
68 p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576, overlap_log=6)
63 69 self.assertEqual(p.threads, 2)
64 70 self.assertEqual(p.job_size, 1048576)
65 71 self.assertEqual(p.overlap_log, 6)
66 72 self.assertEqual(p.overlap_size_log, 6)
67 73
68 74 p = zstd.ZstdCompressionParameters(compression_level=-1)
69 75 self.assertEqual(p.compression_level, -1)
70 76
71 77 p = zstd.ZstdCompressionParameters(compression_level=-2)
72 78 self.assertEqual(p.compression_level, -2)
73 79
74 80 p = zstd.ZstdCompressionParameters(force_max_window=True)
75 81 self.assertEqual(p.force_max_window, 1)
76 82
77 83 p = zstd.ZstdCompressionParameters(enable_ldm=True)
78 84 self.assertEqual(p.enable_ldm, 1)
79 85
80 86 p = zstd.ZstdCompressionParameters(ldm_hash_log=7)
81 87 self.assertEqual(p.ldm_hash_log, 7)
82 88
83 89 p = zstd.ZstdCompressionParameters(ldm_min_match=6)
84 90 self.assertEqual(p.ldm_min_match, 6)
85 91
86 92 p = zstd.ZstdCompressionParameters(ldm_bucket_size_log=7)
87 93 self.assertEqual(p.ldm_bucket_size_log, 7)
88 94
89 95 p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8)
90 96 self.assertEqual(p.ldm_hash_every_log, 8)
91 97 self.assertEqual(p.ldm_hash_rate_log, 8)
92 98
93 99 def test_estimated_compression_context_size(self):
94 p = zstd.ZstdCompressionParameters(window_log=20,
95 chain_log=16,
96 hash_log=17,
97 search_log=1,
98 min_match=5,
99 target_length=16,
100 strategy=zstd.STRATEGY_DFAST)
100 p = zstd.ZstdCompressionParameters(
101 window_log=20,
102 chain_log=16,
103 hash_log=17,
104 search_log=1,
105 min_match=5,
106 target_length=16,
107 strategy=zstd.STRATEGY_DFAST,
108 )
101 109
102 110 # 32-bit has slightly different values from 64-bit.
103 self.assertAlmostEqual(p.estimated_compression_context_size(), 1294144,
104 delta=250)
111 self.assertAlmostEqual(
112 p.estimated_compression_context_size(), 1294464, delta=400
113 )
105 114
106 115 def test_strategy(self):
107 with self.assertRaisesRegexp(ValueError, 'cannot specify both compression_strategy'):
116 with self.assertRaisesRegex(
117 ValueError, "cannot specify both compression_strategy"
118 ):
108 119 zstd.ZstdCompressionParameters(strategy=0, compression_strategy=0)
109 120
110 121 p = zstd.ZstdCompressionParameters(strategy=2)
111 122 self.assertEqual(p.compression_strategy, 2)
112 123
113 124 p = zstd.ZstdCompressionParameters(strategy=3)
114 125 self.assertEqual(p.compression_strategy, 3)
115 126
116 127 def test_ldm_hash_rate_log(self):
117 with self.assertRaisesRegexp(ValueError, 'cannot specify both ldm_hash_rate_log'):
128 with self.assertRaisesRegex(
129 ValueError, "cannot specify both ldm_hash_rate_log"
130 ):
118 131 zstd.ZstdCompressionParameters(ldm_hash_rate_log=8, ldm_hash_every_log=4)
119 132
120 133 p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8)
121 134 self.assertEqual(p.ldm_hash_every_log, 8)
122 135
123 136 p = zstd.ZstdCompressionParameters(ldm_hash_every_log=16)
124 137 self.assertEqual(p.ldm_hash_every_log, 16)
125 138
126 139 def test_overlap_log(self):
127 with self.assertRaisesRegexp(ValueError, 'cannot specify both overlap_log'):
140 with self.assertRaisesRegex(ValueError, "cannot specify both overlap_log"):
128 141 zstd.ZstdCompressionParameters(overlap_log=1, overlap_size_log=9)
129 142
130 143 p = zstd.ZstdCompressionParameters(overlap_log=2)
131 144 self.assertEqual(p.overlap_log, 2)
132 145 self.assertEqual(p.overlap_size_log, 2)
133 146
134 147 p = zstd.ZstdCompressionParameters(overlap_size_log=4)
135 148 self.assertEqual(p.overlap_log, 4)
136 149 self.assertEqual(p.overlap_size_log, 4)
137 150
138 151
139 152 @make_cffi
140 class TestFrameParameters(unittest.TestCase):
153 class TestFrameParameters(TestCase):
141 154 def test_invalid_type(self):
142 155 with self.assertRaises(TypeError):
143 156 zstd.get_frame_parameters(None)
144 157
145 158 # Python 3 doesn't appear to convert unicode to Py_buffer.
146 159 if sys.version_info[0] >= 3:
147 160 with self.assertRaises(TypeError):
148 zstd.get_frame_parameters(u'foobarbaz')
161 zstd.get_frame_parameters(u"foobarbaz")
149 162 else:
150 163 # CPython will convert unicode to Py_buffer. But CFFI won't.
151 if zstd.backend == 'cffi':
164 if zstd.backend == "cffi":
152 165 with self.assertRaises(TypeError):
153 zstd.get_frame_parameters(u'foobarbaz')
166 zstd.get_frame_parameters(u"foobarbaz")
154 167 else:
155 168 with self.assertRaises(zstd.ZstdError):
156 zstd.get_frame_parameters(u'foobarbaz')
169 zstd.get_frame_parameters(u"foobarbaz")
157 170
158 171 def test_invalid_input_sizes(self):
159 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
160 zstd.get_frame_parameters(b'')
172 with self.assertRaisesRegex(zstd.ZstdError, "not enough data for frame"):
173 zstd.get_frame_parameters(b"")
161 174
162 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
175 with self.assertRaisesRegex(zstd.ZstdError, "not enough data for frame"):
163 176 zstd.get_frame_parameters(zstd.FRAME_HEADER)
164 177
165 178 def test_invalid_frame(self):
166 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
167 zstd.get_frame_parameters(b'foobarbaz')
179 with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"):
180 zstd.get_frame_parameters(b"foobarbaz")
168 181
169 182 def test_attributes(self):
170 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00')
183 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x00\x00")
171 184 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
172 185 self.assertEqual(params.window_size, 1024)
173 186 self.assertEqual(params.dict_id, 0)
174 187 self.assertFalse(params.has_checksum)
175 188
176 189 # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte.
177 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff')
190 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x01\x00\xff")
178 191 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
179 192 self.assertEqual(params.window_size, 1024)
180 193 self.assertEqual(params.dict_id, 255)
181 194 self.assertFalse(params.has_checksum)
182 195
183 196 # Lowest 3rd bit indicates if checksum is present.
184 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00')
197 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x04\x00")
185 198 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
186 199 self.assertEqual(params.window_size, 1024)
187 200 self.assertEqual(params.dict_id, 0)
188 201 self.assertTrue(params.has_checksum)
189 202
190 203 # Upper 2 bits indicate content size.
191 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x40\x00\xff\x00')
204 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x40\x00\xff\x00")
192 205 self.assertEqual(params.content_size, 511)
193 206 self.assertEqual(params.window_size, 1024)
194 207 self.assertEqual(params.dict_id, 0)
195 208 self.assertFalse(params.has_checksum)
196 209
197 210 # Window descriptor is 2nd byte after frame header.
198 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40')
211 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x00\x40")
199 212 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
200 213 self.assertEqual(params.window_size, 262144)
201 214 self.assertEqual(params.dict_id, 0)
202 215 self.assertFalse(params.has_checksum)
203 216
204 217 # Set multiple things.
205 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x45\x40\x0f\x10\x00')
218 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x45\x40\x0f\x10\x00")
206 219 self.assertEqual(params.content_size, 272)
207 220 self.assertEqual(params.window_size, 262144)
208 221 self.assertEqual(params.dict_id, 15)
209 222 self.assertTrue(params.has_checksum)
210 223
211 224 def test_input_types(self):
212 v = zstd.FRAME_HEADER + b'\x00\x00'
225 v = zstd.FRAME_HEADER + b"\x00\x00"
213 226
214 227 mutable_array = bytearray(len(v))
215 228 mutable_array[:] = v
216 229
217 230 sources = [
218 231 memoryview(v),
219 232 bytearray(v),
220 233 mutable_array,
221 234 ]
222 235
223 236 for source in sources:
224 237 params = zstd.get_frame_parameters(source)
225 238 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
226 239 self.assertEqual(params.window_size, 1024)
227 240 self.assertEqual(params.dict_id, 0)
228 241 self.assertFalse(params.has_checksum)
@@ -1,76 +1,105 b''
1 1 import io
2 2 import os
3 3 import sys
4 4 import unittest
5 5
6 6 try:
7 7 import hypothesis
8 8 import hypothesis.strategies as strategies
9 9 except ImportError:
10 raise unittest.SkipTest('hypothesis not available')
10 raise unittest.SkipTest("hypothesis not available")
11 11
12 12 import zstandard as zstd
13 13
14 14 from .common import (
15 15 make_cffi,
16 TestCase,
17 )
18
19
20 s_windowlog = strategies.integers(
21 min_value=zstd.WINDOWLOG_MIN, max_value=zstd.WINDOWLOG_MAX
22 )
23 s_chainlog = strategies.integers(
24 min_value=zstd.CHAINLOG_MIN, max_value=zstd.CHAINLOG_MAX
25 )
26 s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN, max_value=zstd.HASHLOG_MAX)
27 s_searchlog = strategies.integers(
28 min_value=zstd.SEARCHLOG_MIN, max_value=zstd.SEARCHLOG_MAX
29 )
30 s_minmatch = strategies.integers(
31 min_value=zstd.MINMATCH_MIN, max_value=zstd.MINMATCH_MAX
32 )
33 s_targetlength = strategies.integers(
34 min_value=zstd.TARGETLENGTH_MIN, max_value=zstd.TARGETLENGTH_MAX
35 )
36 s_strategy = strategies.sampled_from(
37 (
38 zstd.STRATEGY_FAST,
39 zstd.STRATEGY_DFAST,
40 zstd.STRATEGY_GREEDY,
41 zstd.STRATEGY_LAZY,
42 zstd.STRATEGY_LAZY2,
43 zstd.STRATEGY_BTLAZY2,
44 zstd.STRATEGY_BTOPT,
45 zstd.STRATEGY_BTULTRA,
46 zstd.STRATEGY_BTULTRA2,
47 )
16 48 )
17 49
18 50
19 s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN,
20 max_value=zstd.WINDOWLOG_MAX)
21 s_chainlog = strategies.integers(min_value=zstd.CHAINLOG_MIN,
22 max_value=zstd.CHAINLOG_MAX)
23 s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN,
24 max_value=zstd.HASHLOG_MAX)
25 s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN,
26 max_value=zstd.SEARCHLOG_MAX)
27 s_minmatch = strategies.integers(min_value=zstd.MINMATCH_MIN,
28 max_value=zstd.MINMATCH_MAX)
29 s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN,
30 max_value=zstd.TARGETLENGTH_MAX)
31 s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST,
32 zstd.STRATEGY_DFAST,
33 zstd.STRATEGY_GREEDY,
34 zstd.STRATEGY_LAZY,
35 zstd.STRATEGY_LAZY2,
36 zstd.STRATEGY_BTLAZY2,
37 zstd.STRATEGY_BTOPT,
38 zstd.STRATEGY_BTULTRA,
39 zstd.STRATEGY_BTULTRA2))
40
51 @make_cffi
52 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
53 class TestCompressionParametersHypothesis(TestCase):
54 @hypothesis.given(
55 s_windowlog,
56 s_chainlog,
57 s_hashlog,
58 s_searchlog,
59 s_minmatch,
60 s_targetlength,
61 s_strategy,
62 )
63 def test_valid_init(
64 self, windowlog, chainlog, hashlog, searchlog, minmatch, targetlength, strategy
65 ):
66 zstd.ZstdCompressionParameters(
67 window_log=windowlog,
68 chain_log=chainlog,
69 hash_log=hashlog,
70 search_log=searchlog,
71 min_match=minmatch,
72 target_length=targetlength,
73 strategy=strategy,
74 )
41 75
42 @make_cffi
43 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
44 class TestCompressionParametersHypothesis(unittest.TestCase):
45 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
46 s_minmatch, s_targetlength, s_strategy)
47 def test_valid_init(self, windowlog, chainlog, hashlog, searchlog,
48 minmatch, targetlength, strategy):
49 zstd.ZstdCompressionParameters(window_log=windowlog,
50 chain_log=chainlog,
51 hash_log=hashlog,
52 search_log=searchlog,
53 min_match=minmatch,
54 target_length=targetlength,
55 strategy=strategy)
56
57 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
58 s_minmatch, s_targetlength, s_strategy)
59 def test_estimated_compression_context_size(self, windowlog, chainlog,
60 hashlog, searchlog,
61 minmatch, targetlength,
62 strategy):
63 if minmatch == zstd.MINMATCH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
76 @hypothesis.given(
77 s_windowlog,
78 s_chainlog,
79 s_hashlog,
80 s_searchlog,
81 s_minmatch,
82 s_targetlength,
83 s_strategy,
84 )
85 def test_estimated_compression_context_size(
86 self, windowlog, chainlog, hashlog, searchlog, minmatch, targetlength, strategy
87 ):
88 if minmatch == zstd.MINMATCH_MIN and strategy in (
89 zstd.STRATEGY_FAST,
90 zstd.STRATEGY_GREEDY,
91 ):
64 92 minmatch += 1
65 93 elif minmatch == zstd.MINMATCH_MAX and strategy != zstd.STRATEGY_FAST:
66 94 minmatch -= 1
67 95
68 p = zstd.ZstdCompressionParameters(window_log=windowlog,
69 chain_log=chainlog,
70 hash_log=hashlog,
71 search_log=searchlog,
72 min_match=minmatch,
73 target_length=targetlength,
74 strategy=strategy)
96 p = zstd.ZstdCompressionParameters(
97 window_log=windowlog,
98 chain_log=chainlog,
99 hash_log=hashlog,
100 search_log=searchlog,
101 min_match=minmatch,
102 target_length=targetlength,
103 strategy=strategy,
104 )
75 105 size = p.estimated_compression_context_size()
76
This diff has been collapsed as it changes many lines, (729 lines changed) Show them Hide them
@@ -1,1611 +1,1670 b''
1 1 import io
2 2 import os
3 3 import random
4 4 import struct
5 5 import sys
6 6 import tempfile
7 7 import unittest
8 8
9 9 import zstandard as zstd
10 10
11 11 from .common import (
12 12 generate_samples,
13 13 make_cffi,
14 14 NonClosingBytesIO,
15 15 OpCountingBytesIO,
16 TestCase,
16 17 )
17 18
18 19
19 20 if sys.version_info[0] >= 3:
20 21 next = lambda it: it.__next__()
21 22 else:
22 23 next = lambda it: it.next()
23 24
24 25
25 26 @make_cffi
26 class TestFrameHeaderSize(unittest.TestCase):
27 class TestFrameHeaderSize(TestCase):
27 28 def test_empty(self):
28 with self.assertRaisesRegexp(
29 zstd.ZstdError, 'could not determine frame header size: Src size '
30 'is incorrect'):
31 zstd.frame_header_size(b'')
29 with self.assertRaisesRegex(
30 zstd.ZstdError,
31 "could not determine frame header size: Src size " "is incorrect",
32 ):
33 zstd.frame_header_size(b"")
32 34
33 35 def test_too_small(self):
34 with self.assertRaisesRegexp(
35 zstd.ZstdError, 'could not determine frame header size: Src size '
36 'is incorrect'):
37 zstd.frame_header_size(b'foob')
36 with self.assertRaisesRegex(
37 zstd.ZstdError,
38 "could not determine frame header size: Src size " "is incorrect",
39 ):
40 zstd.frame_header_size(b"foob")
38 41
39 42 def test_basic(self):
40 43 # It doesn't matter that it isn't a valid frame.
41 self.assertEqual(zstd.frame_header_size(b'long enough but no magic'), 6)
44 self.assertEqual(zstd.frame_header_size(b"long enough but no magic"), 6)
42 45
43 46
44 47 @make_cffi
45 class TestFrameContentSize(unittest.TestCase):
48 class TestFrameContentSize(TestCase):
46 49 def test_empty(self):
47 with self.assertRaisesRegexp(zstd.ZstdError,
48 'error when determining content size'):
49 zstd.frame_content_size(b'')
50 with self.assertRaisesRegex(
51 zstd.ZstdError, "error when determining content size"
52 ):
53 zstd.frame_content_size(b"")
50 54
51 55 def test_too_small(self):
52 with self.assertRaisesRegexp(zstd.ZstdError,
53 'error when determining content size'):
54 zstd.frame_content_size(b'foob')
56 with self.assertRaisesRegex(
57 zstd.ZstdError, "error when determining content size"
58 ):
59 zstd.frame_content_size(b"foob")
55 60
56 61 def test_bad_frame(self):
57 with self.assertRaisesRegexp(zstd.ZstdError,
58 'error when determining content size'):
59 zstd.frame_content_size(b'invalid frame header')
62 with self.assertRaisesRegex(
63 zstd.ZstdError, "error when determining content size"
64 ):
65 zstd.frame_content_size(b"invalid frame header")
60 66
61 67 def test_unknown(self):
62 68 cctx = zstd.ZstdCompressor(write_content_size=False)
63 frame = cctx.compress(b'foobar')
69 frame = cctx.compress(b"foobar")
64 70
65 71 self.assertEqual(zstd.frame_content_size(frame), -1)
66 72
67 73 def test_empty(self):
68 74 cctx = zstd.ZstdCompressor()
69 frame = cctx.compress(b'')
75 frame = cctx.compress(b"")
70 76
71 77 self.assertEqual(zstd.frame_content_size(frame), 0)
72 78
73 79 def test_basic(self):
74 80 cctx = zstd.ZstdCompressor()
75 frame = cctx.compress(b'foobar')
81 frame = cctx.compress(b"foobar")
76 82
77 83 self.assertEqual(zstd.frame_content_size(frame), 6)
78 84
79 85
80 86 @make_cffi
81 class TestDecompressor(unittest.TestCase):
87 class TestDecompressor(TestCase):
82 88 def test_memory_size(self):
83 89 dctx = zstd.ZstdDecompressor()
84 90
85 91 self.assertGreater(dctx.memory_size(), 100)
86 92
87 93
88 94 @make_cffi
89 class TestDecompressor_decompress(unittest.TestCase):
95 class TestDecompressor_decompress(TestCase):
90 96 def test_empty_input(self):
91 97 dctx = zstd.ZstdDecompressor()
92 98
93 with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'):
94 dctx.decompress(b'')
99 with self.assertRaisesRegex(
100 zstd.ZstdError, "error determining content size from frame header"
101 ):
102 dctx.decompress(b"")
95 103
96 104 def test_invalid_input(self):
97 105 dctx = zstd.ZstdDecompressor()
98 106
99 with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'):
100 dctx.decompress(b'foobar')
107 with self.assertRaisesRegex(
108 zstd.ZstdError, "error determining content size from frame header"
109 ):
110 dctx.decompress(b"foobar")
101 111
102 112 def test_input_types(self):
103 113 cctx = zstd.ZstdCompressor(level=1)
104 compressed = cctx.compress(b'foo')
114 compressed = cctx.compress(b"foo")
105 115
106 116 mutable_array = bytearray(len(compressed))
107 117 mutable_array[:] = compressed
108 118
109 119 sources = [
110 120 memoryview(compressed),
111 121 bytearray(compressed),
112 122 mutable_array,
113 123 ]
114 124
115 125 dctx = zstd.ZstdDecompressor()
116 126 for source in sources:
117 self.assertEqual(dctx.decompress(source), b'foo')
127 self.assertEqual(dctx.decompress(source), b"foo")
118 128
119 129 def test_no_content_size_in_frame(self):
120 130 cctx = zstd.ZstdCompressor(write_content_size=False)
121 compressed = cctx.compress(b'foobar')
131 compressed = cctx.compress(b"foobar")
122 132
123 133 dctx = zstd.ZstdDecompressor()
124 with self.assertRaisesRegexp(zstd.ZstdError, 'could not determine content size in frame header'):
134 with self.assertRaisesRegex(
135 zstd.ZstdError, "could not determine content size in frame header"
136 ):
125 137 dctx.decompress(compressed)
126 138
127 139 def test_content_size_present(self):
128 140 cctx = zstd.ZstdCompressor()
129 compressed = cctx.compress(b'foobar')
141 compressed = cctx.compress(b"foobar")
130 142
131 143 dctx = zstd.ZstdDecompressor()
132 144 decompressed = dctx.decompress(compressed)
133 self.assertEqual(decompressed, b'foobar')
145 self.assertEqual(decompressed, b"foobar")
134 146
135 147 def test_empty_roundtrip(self):
136 148 cctx = zstd.ZstdCompressor()
137 compressed = cctx.compress(b'')
149 compressed = cctx.compress(b"")
138 150
139 151 dctx = zstd.ZstdDecompressor()
140 152 decompressed = dctx.decompress(compressed)
141 153
142 self.assertEqual(decompressed, b'')
154 self.assertEqual(decompressed, b"")
143 155
144 156 def test_max_output_size(self):
145 157 cctx = zstd.ZstdCompressor(write_content_size=False)
146 source = b'foobar' * 256
158 source = b"foobar" * 256
147 159 compressed = cctx.compress(source)
148 160
149 161 dctx = zstd.ZstdDecompressor()
150 162 # Will fit into buffer exactly the size of input.
151 163 decompressed = dctx.decompress(compressed, max_output_size=len(source))
152 164 self.assertEqual(decompressed, source)
153 165
154 166 # Input size - 1 fails
155 with self.assertRaisesRegexp(zstd.ZstdError,
156 'decompression error: did not decompress full frame'):
167 with self.assertRaisesRegex(
168 zstd.ZstdError, "decompression error: did not decompress full frame"
169 ):
157 170 dctx.decompress(compressed, max_output_size=len(source) - 1)
158 171
159 172 # Input size + 1 works
160 173 decompressed = dctx.decompress(compressed, max_output_size=len(source) + 1)
161 174 self.assertEqual(decompressed, source)
162 175
163 176 # A much larger buffer works.
164 177 decompressed = dctx.decompress(compressed, max_output_size=len(source) * 64)
165 178 self.assertEqual(decompressed, source)
166 179
167 180 def test_stupidly_large_output_buffer(self):
168 181 cctx = zstd.ZstdCompressor(write_content_size=False)
169 compressed = cctx.compress(b'foobar' * 256)
182 compressed = cctx.compress(b"foobar" * 256)
170 183 dctx = zstd.ZstdDecompressor()
171 184
172 185 # Will get OverflowError on some Python distributions that can't
173 186 # handle really large integers.
174 187 with self.assertRaises((MemoryError, OverflowError)):
175 dctx.decompress(compressed, max_output_size=2**62)
188 dctx.decompress(compressed, max_output_size=2 ** 62)
176 189
177 190 def test_dictionary(self):
178 191 samples = []
179 192 for i in range(128):
180 samples.append(b'foo' * 64)
181 samples.append(b'bar' * 64)
182 samples.append(b'foobar' * 64)
193 samples.append(b"foo" * 64)
194 samples.append(b"bar" * 64)
195 samples.append(b"foobar" * 64)
183 196
184 197 d = zstd.train_dictionary(8192, samples)
185 198
186 orig = b'foobar' * 16384
199 orig = b"foobar" * 16384
187 200 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
188 201 compressed = cctx.compress(orig)
189 202
190 203 dctx = zstd.ZstdDecompressor(dict_data=d)
191 204 decompressed = dctx.decompress(compressed)
192 205
193 206 self.assertEqual(decompressed, orig)
194 207
195 208 def test_dictionary_multiple(self):
196 209 samples = []
197 210 for i in range(128):
198 samples.append(b'foo' * 64)
199 samples.append(b'bar' * 64)
200 samples.append(b'foobar' * 64)
211 samples.append(b"foo" * 64)
212 samples.append(b"bar" * 64)
213 samples.append(b"foobar" * 64)
201 214
202 215 d = zstd.train_dictionary(8192, samples)
203 216
204 sources = (b'foobar' * 8192, b'foo' * 8192, b'bar' * 8192)
217 sources = (b"foobar" * 8192, b"foo" * 8192, b"bar" * 8192)
205 218 compressed = []
206 219 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
207 220 for source in sources:
208 221 compressed.append(cctx.compress(source))
209 222
210 223 dctx = zstd.ZstdDecompressor(dict_data=d)
211 224 for i in range(len(sources)):
212 225 decompressed = dctx.decompress(compressed[i])
213 226 self.assertEqual(decompressed, sources[i])
214 227
215 228 def test_max_window_size(self):
216 with open(__file__, 'rb') as fh:
229 with open(__file__, "rb") as fh:
217 230 source = fh.read()
218 231
219 232 # If we write a content size, the decompressor engages single pass
220 233 # mode and the window size doesn't come into play.
221 234 cctx = zstd.ZstdCompressor(write_content_size=False)
222 235 frame = cctx.compress(source)
223 236
224 dctx = zstd.ZstdDecompressor(max_window_size=2**zstd.WINDOWLOG_MIN)
237 dctx = zstd.ZstdDecompressor(max_window_size=2 ** zstd.WINDOWLOG_MIN)
225 238
226 with self.assertRaisesRegexp(
227 zstd.ZstdError, 'decompression error: Frame requires too much memory'):
239 with self.assertRaisesRegex(
240 zstd.ZstdError, "decompression error: Frame requires too much memory"
241 ):
228 242 dctx.decompress(frame, max_output_size=len(source))
229 243
230 244
231 245 @make_cffi
232 class TestDecompressor_copy_stream(unittest.TestCase):
246 class TestDecompressor_copy_stream(TestCase):
233 247 def test_no_read(self):
234 248 source = object()
235 249 dest = io.BytesIO()
236 250
237 251 dctx = zstd.ZstdDecompressor()
238 252 with self.assertRaises(ValueError):
239 253 dctx.copy_stream(source, dest)
240 254
241 255 def test_no_write(self):
242 256 source = io.BytesIO()
243 257 dest = object()
244 258
245 259 dctx = zstd.ZstdDecompressor()
246 260 with self.assertRaises(ValueError):
247 261 dctx.copy_stream(source, dest)
248 262
249 263 def test_empty(self):
250 264 source = io.BytesIO()
251 265 dest = io.BytesIO()
252 266
253 267 dctx = zstd.ZstdDecompressor()
254 268 # TODO should this raise an error?
255 269 r, w = dctx.copy_stream(source, dest)
256 270
257 271 self.assertEqual(r, 0)
258 272 self.assertEqual(w, 0)
259 self.assertEqual(dest.getvalue(), b'')
273 self.assertEqual(dest.getvalue(), b"")
260 274
261 275 def test_large_data(self):
262 276 source = io.BytesIO()
263 277 for i in range(255):
264 source.write(struct.Struct('>B').pack(i) * 16384)
278 source.write(struct.Struct(">B").pack(i) * 16384)
265 279 source.seek(0)
266 280
267 281 compressed = io.BytesIO()
268 282 cctx = zstd.ZstdCompressor()
269 283 cctx.copy_stream(source, compressed)
270 284
271 285 compressed.seek(0)
272 286 dest = io.BytesIO()
273 287 dctx = zstd.ZstdDecompressor()
274 288 r, w = dctx.copy_stream(compressed, dest)
275 289
276 290 self.assertEqual(r, len(compressed.getvalue()))
277 291 self.assertEqual(w, len(source.getvalue()))
278 292
279 293 def test_read_write_size(self):
280 source = OpCountingBytesIO(zstd.ZstdCompressor().compress(
281 b'foobarfoobar'))
294 source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b"foobarfoobar"))
282 295
283 296 dest = OpCountingBytesIO()
284 297 dctx = zstd.ZstdDecompressor()
285 298 r, w = dctx.copy_stream(source, dest, read_size=1, write_size=1)
286 299
287 300 self.assertEqual(r, len(source.getvalue()))
288 self.assertEqual(w, len(b'foobarfoobar'))
301 self.assertEqual(w, len(b"foobarfoobar"))
289 302 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
290 303 self.assertEqual(dest._write_count, len(dest.getvalue()))
291 304
292 305
293 306 @make_cffi
294 class TestDecompressor_stream_reader(unittest.TestCase):
307 class TestDecompressor_stream_reader(TestCase):
295 308 def test_context_manager(self):
296 309 dctx = zstd.ZstdDecompressor()
297 310
298 with dctx.stream_reader(b'foo') as reader:
299 with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'):
311 with dctx.stream_reader(b"foo") as reader:
312 with self.assertRaisesRegex(ValueError, "cannot __enter__ multiple times"):
300 313 with reader as reader2:
301 314 pass
302 315
303 316 def test_not_implemented(self):
304 317 dctx = zstd.ZstdDecompressor()
305 318
306 with dctx.stream_reader(b'foo') as reader:
319 with dctx.stream_reader(b"foo") as reader:
307 320 with self.assertRaises(io.UnsupportedOperation):
308 321 reader.readline()
309 322
310 323 with self.assertRaises(io.UnsupportedOperation):
311 324 reader.readlines()
312 325
313 326 with self.assertRaises(io.UnsupportedOperation):
314 327 iter(reader)
315 328
316 329 with self.assertRaises(io.UnsupportedOperation):
317 330 next(reader)
318 331
319 332 with self.assertRaises(io.UnsupportedOperation):
320 reader.write(b'foo')
333 reader.write(b"foo")
321 334
322 335 with self.assertRaises(io.UnsupportedOperation):
323 336 reader.writelines([])
324 337
325 338 def test_constant_methods(self):
326 339 dctx = zstd.ZstdDecompressor()
327 340
328 with dctx.stream_reader(b'foo') as reader:
341 with dctx.stream_reader(b"foo") as reader:
329 342 self.assertFalse(reader.closed)
330 343 self.assertTrue(reader.readable())
331 344 self.assertFalse(reader.writable())
332 345 self.assertTrue(reader.seekable())
333 346 self.assertFalse(reader.isatty())
334 347 self.assertFalse(reader.closed)
335 348 self.assertIsNone(reader.flush())
336 349 self.assertFalse(reader.closed)
337 350
338 351 self.assertTrue(reader.closed)
339 352
340 353 def test_read_closed(self):
341 354 dctx = zstd.ZstdDecompressor()
342 355
343 with dctx.stream_reader(b'foo') as reader:
356 with dctx.stream_reader(b"foo") as reader:
344 357 reader.close()
345 358 self.assertTrue(reader.closed)
346 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
359 with self.assertRaisesRegex(ValueError, "stream is closed"):
347 360 reader.read(1)
348 361
349 362 def test_read_sizes(self):
350 363 cctx = zstd.ZstdCompressor()
351 foo = cctx.compress(b'foo')
364 foo = cctx.compress(b"foo")
352 365
353 366 dctx = zstd.ZstdDecompressor()
354 367
355 368 with dctx.stream_reader(foo) as reader:
356 with self.assertRaisesRegexp(ValueError, 'cannot read negative amounts less than -1'):
369 with self.assertRaisesRegex(
370 ValueError, "cannot read negative amounts less than -1"
371 ):
357 372 reader.read(-2)
358 373
359 self.assertEqual(reader.read(0), b'')
360 self.assertEqual(reader.read(), b'foo')
374 self.assertEqual(reader.read(0), b"")
375 self.assertEqual(reader.read(), b"foo")
361 376
362 377 def test_read_buffer(self):
363 378 cctx = zstd.ZstdCompressor()
364 379
365 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
380 source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60])
366 381 frame = cctx.compress(source)
367 382
368 383 dctx = zstd.ZstdDecompressor()
369 384
370 385 with dctx.stream_reader(frame) as reader:
371 386 self.assertEqual(reader.tell(), 0)
372 387
373 388 # We should get entire frame in one read.
374 389 result = reader.read(8192)
375 390 self.assertEqual(result, source)
376 391 self.assertEqual(reader.tell(), len(source))
377 392
378 393 # Read after EOF should return empty bytes.
379 self.assertEqual(reader.read(1), b'')
394 self.assertEqual(reader.read(1), b"")
380 395 self.assertEqual(reader.tell(), len(result))
381 396
382 397 self.assertTrue(reader.closed)
383 398
384 399 def test_read_buffer_small_chunks(self):
385 400 cctx = zstd.ZstdCompressor()
386 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
401 source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60])
387 402 frame = cctx.compress(source)
388 403
389 404 dctx = zstd.ZstdDecompressor()
390 405 chunks = []
391 406
392 407 with dctx.stream_reader(frame, read_size=1) as reader:
393 408 while True:
394 409 chunk = reader.read(1)
395 410 if not chunk:
396 411 break
397 412
398 413 chunks.append(chunk)
399 414 self.assertEqual(reader.tell(), sum(map(len, chunks)))
400 415
401 self.assertEqual(b''.join(chunks), source)
416 self.assertEqual(b"".join(chunks), source)
402 417
403 418 def test_read_stream(self):
404 419 cctx = zstd.ZstdCompressor()
405 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
420 source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60])
406 421 frame = cctx.compress(source)
407 422
408 423 dctx = zstd.ZstdDecompressor()
409 424 with dctx.stream_reader(io.BytesIO(frame)) as reader:
410 425 self.assertEqual(reader.tell(), 0)
411 426
412 427 chunk = reader.read(8192)
413 428 self.assertEqual(chunk, source)
414 429 self.assertEqual(reader.tell(), len(source))
415 self.assertEqual(reader.read(1), b'')
430 self.assertEqual(reader.read(1), b"")
416 431 self.assertEqual(reader.tell(), len(source))
417 432 self.assertFalse(reader.closed)
418 433
419 434 self.assertTrue(reader.closed)
420 435
421 436 def test_read_stream_small_chunks(self):
422 437 cctx = zstd.ZstdCompressor()
423 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
438 source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60])
424 439 frame = cctx.compress(source)
425 440
426 441 dctx = zstd.ZstdDecompressor()
427 442 chunks = []
428 443
429 444 with dctx.stream_reader(io.BytesIO(frame), read_size=1) as reader:
430 445 while True:
431 446 chunk = reader.read(1)
432 447 if not chunk:
433 448 break
434 449
435 450 chunks.append(chunk)
436 451 self.assertEqual(reader.tell(), sum(map(len, chunks)))
437 452
438 self.assertEqual(b''.join(chunks), source)
453 self.assertEqual(b"".join(chunks), source)
439 454
440 455 def test_read_after_exit(self):
441 456 cctx = zstd.ZstdCompressor()
442 frame = cctx.compress(b'foo' * 60)
457 frame = cctx.compress(b"foo" * 60)
443 458
444 459 dctx = zstd.ZstdDecompressor()
445 460
446 461 with dctx.stream_reader(frame) as reader:
447 462 while reader.read(16):
448 463 pass
449 464
450 465 self.assertTrue(reader.closed)
451 466
452 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
467 with self.assertRaisesRegex(ValueError, "stream is closed"):
453 468 reader.read(10)
454 469
455 470 def test_illegal_seeks(self):
456 471 cctx = zstd.ZstdCompressor()
457 frame = cctx.compress(b'foo' * 60)
472 frame = cctx.compress(b"foo" * 60)
458 473
459 474 dctx = zstd.ZstdDecompressor()
460 475
461 476 with dctx.stream_reader(frame) as reader:
462 with self.assertRaisesRegexp(ValueError,
463 'cannot seek to negative position'):
477 with self.assertRaisesRegex(ValueError, "cannot seek to negative position"):
464 478 reader.seek(-1, os.SEEK_SET)
465 479
466 480 reader.read(1)
467 481
468 with self.assertRaisesRegexp(
469 ValueError, 'cannot seek zstd decompression stream backwards'):
482 with self.assertRaisesRegex(
483 ValueError, "cannot seek zstd decompression stream backwards"
484 ):
470 485 reader.seek(0, os.SEEK_SET)
471 486
472 with self.assertRaisesRegexp(
473 ValueError, 'cannot seek zstd decompression stream backwards'):
487 with self.assertRaisesRegex(
488 ValueError, "cannot seek zstd decompression stream backwards"
489 ):
474 490 reader.seek(-1, os.SEEK_CUR)
475 491
476 with self.assertRaisesRegexp(
477 ValueError,
478 'zstd decompression streams cannot be seeked with SEEK_END'):
492 with self.assertRaisesRegex(
493 ValueError, "zstd decompression streams cannot be seeked with SEEK_END"
494 ):
479 495 reader.seek(0, os.SEEK_END)
480 496
481 497 reader.close()
482 498
483 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
499 with self.assertRaisesRegex(ValueError, "stream is closed"):
484 500 reader.seek(4, os.SEEK_SET)
485 501
486 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
502 with self.assertRaisesRegex(ValueError, "stream is closed"):
487 503 reader.seek(0)
488 504
489 505 def test_seek(self):
490 source = b'foobar' * 60
506 source = b"foobar" * 60
491 507 cctx = zstd.ZstdCompressor()
492 508 frame = cctx.compress(source)
493 509
494 510 dctx = zstd.ZstdDecompressor()
495 511
496 512 with dctx.stream_reader(frame) as reader:
497 513 reader.seek(3)
498 self.assertEqual(reader.read(3), b'bar')
514 self.assertEqual(reader.read(3), b"bar")
499 515
500 516 reader.seek(4, os.SEEK_CUR)
501 self.assertEqual(reader.read(2), b'ar')
517 self.assertEqual(reader.read(2), b"ar")
502 518
503 519 def test_no_context_manager(self):
504 source = b'foobar' * 60
520 source = b"foobar" * 60
505 521 cctx = zstd.ZstdCompressor()
506 522 frame = cctx.compress(source)
507 523
508 524 dctx = zstd.ZstdDecompressor()
509 525 reader = dctx.stream_reader(frame)
510 526
511 self.assertEqual(reader.read(6), b'foobar')
512 self.assertEqual(reader.read(18), b'foobar' * 3)
527 self.assertEqual(reader.read(6), b"foobar")
528 self.assertEqual(reader.read(18), b"foobar" * 3)
513 529 self.assertFalse(reader.closed)
514 530
515 531 # Calling close prevents subsequent use.
516 532 reader.close()
517 533 self.assertTrue(reader.closed)
518 534
519 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
535 with self.assertRaisesRegex(ValueError, "stream is closed"):
520 536 reader.read(6)
521 537
522 538 def test_read_after_error(self):
523 source = io.BytesIO(b'')
539 source = io.BytesIO(b"")
524 540 dctx = zstd.ZstdDecompressor()
525 541
526 542 reader = dctx.stream_reader(source)
527 543
528 544 with reader:
529 545 reader.read(0)
530 546
531 547 with reader:
532 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
548 with self.assertRaisesRegex(ValueError, "stream is closed"):
533 549 reader.read(100)
534 550
535 551 def test_partial_read(self):
536 552 # Inspired by https://github.com/indygreg/python-zstandard/issues/71.
537 553 buffer = io.BytesIO()
538 554 cctx = zstd.ZstdCompressor()
539 555 writer = cctx.stream_writer(buffer)
540 556 writer.write(bytearray(os.urandom(1000000)))
541 557 writer.flush(zstd.FLUSH_FRAME)
542 558 buffer.seek(0)
543 559
544 560 dctx = zstd.ZstdDecompressor()
545 561 reader = dctx.stream_reader(buffer)
546 562
547 563 while True:
548 564 chunk = reader.read(8192)
549 565 if not chunk:
550 566 break
551 567
552 568 def test_read_multiple_frames(self):
553 569 cctx = zstd.ZstdCompressor()
554 570 source = io.BytesIO()
555 571 writer = cctx.stream_writer(source)
556 writer.write(b'foo')
572 writer.write(b"foo")
557 573 writer.flush(zstd.FLUSH_FRAME)
558 writer.write(b'bar')
574 writer.write(b"bar")
559 575 writer.flush(zstd.FLUSH_FRAME)
560 576
561 577 dctx = zstd.ZstdDecompressor()
562 578
563 579 reader = dctx.stream_reader(source.getvalue())
564 self.assertEqual(reader.read(2), b'fo')
565 self.assertEqual(reader.read(2), b'o')
566 self.assertEqual(reader.read(2), b'ba')
567 self.assertEqual(reader.read(2), b'r')
580 self.assertEqual(reader.read(2), b"fo")
581 self.assertEqual(reader.read(2), b"o")
582 self.assertEqual(reader.read(2), b"ba")
583 self.assertEqual(reader.read(2), b"r")
568 584
569 585 source.seek(0)
570 586 reader = dctx.stream_reader(source)
571 self.assertEqual(reader.read(2), b'fo')
572 self.assertEqual(reader.read(2), b'o')
573 self.assertEqual(reader.read(2), b'ba')
574 self.assertEqual(reader.read(2), b'r')
587 self.assertEqual(reader.read(2), b"fo")
588 self.assertEqual(reader.read(2), b"o")
589 self.assertEqual(reader.read(2), b"ba")
590 self.assertEqual(reader.read(2), b"r")
575 591
576 592 reader = dctx.stream_reader(source.getvalue())
577 self.assertEqual(reader.read(3), b'foo')
578 self.assertEqual(reader.read(3), b'bar')
593 self.assertEqual(reader.read(3), b"foo")
594 self.assertEqual(reader.read(3), b"bar")
579 595
580 596 source.seek(0)
581 597 reader = dctx.stream_reader(source)
582 self.assertEqual(reader.read(3), b'foo')
583 self.assertEqual(reader.read(3), b'bar')
598 self.assertEqual(reader.read(3), b"foo")
599 self.assertEqual(reader.read(3), b"bar")
584 600
585 601 reader = dctx.stream_reader(source.getvalue())
586 self.assertEqual(reader.read(4), b'foo')
587 self.assertEqual(reader.read(4), b'bar')
602 self.assertEqual(reader.read(4), b"foo")
603 self.assertEqual(reader.read(4), b"bar")
588 604
589 605 source.seek(0)
590 606 reader = dctx.stream_reader(source)
591 self.assertEqual(reader.read(4), b'foo')
592 self.assertEqual(reader.read(4), b'bar')
607 self.assertEqual(reader.read(4), b"foo")
608 self.assertEqual(reader.read(4), b"bar")
593 609
594 610 reader = dctx.stream_reader(source.getvalue())
595 self.assertEqual(reader.read(128), b'foo')
596 self.assertEqual(reader.read(128), b'bar')
611 self.assertEqual(reader.read(128), b"foo")
612 self.assertEqual(reader.read(128), b"bar")
597 613
598 614 source.seek(0)
599 615 reader = dctx.stream_reader(source)
600 self.assertEqual(reader.read(128), b'foo')
601 self.assertEqual(reader.read(128), b'bar')
616 self.assertEqual(reader.read(128), b"foo")
617 self.assertEqual(reader.read(128), b"bar")
602 618
603 619 # Now tests for reads spanning frames.
604 620 reader = dctx.stream_reader(source.getvalue(), read_across_frames=True)
605 self.assertEqual(reader.read(3), b'foo')
606 self.assertEqual(reader.read(3), b'bar')
621 self.assertEqual(reader.read(3), b"foo")
622 self.assertEqual(reader.read(3), b"bar")
607 623
608 624 source.seek(0)
609 625 reader = dctx.stream_reader(source, read_across_frames=True)
610 self.assertEqual(reader.read(3), b'foo')
611 self.assertEqual(reader.read(3), b'bar')
626 self.assertEqual(reader.read(3), b"foo")
627 self.assertEqual(reader.read(3), b"bar")
612 628
613 629 reader = dctx.stream_reader(source.getvalue(), read_across_frames=True)
614 self.assertEqual(reader.read(6), b'foobar')
630 self.assertEqual(reader.read(6), b"foobar")
615 631
616 632 source.seek(0)
617 633 reader = dctx.stream_reader(source, read_across_frames=True)
618 self.assertEqual(reader.read(6), b'foobar')
634 self.assertEqual(reader.read(6), b"foobar")
619 635
620 636 reader = dctx.stream_reader(source.getvalue(), read_across_frames=True)
621 self.assertEqual(reader.read(7), b'foobar')
637 self.assertEqual(reader.read(7), b"foobar")
622 638
623 639 source.seek(0)
624 640 reader = dctx.stream_reader(source, read_across_frames=True)
625 self.assertEqual(reader.read(7), b'foobar')
641 self.assertEqual(reader.read(7), b"foobar")
626 642
627 643 reader = dctx.stream_reader(source.getvalue(), read_across_frames=True)
628 self.assertEqual(reader.read(128), b'foobar')
644 self.assertEqual(reader.read(128), b"foobar")
629 645
630 646 source.seek(0)
631 647 reader = dctx.stream_reader(source, read_across_frames=True)
632 self.assertEqual(reader.read(128), b'foobar')
648 self.assertEqual(reader.read(128), b"foobar")
633 649
634 650 def test_readinto(self):
635 651 cctx = zstd.ZstdCompressor()
636 foo = cctx.compress(b'foo')
652 foo = cctx.compress(b"foo")
637 653
638 654 dctx = zstd.ZstdDecompressor()
639 655
640 656 # Attempting to readinto() a non-writable buffer fails.
641 657 # The exact exception varies based on the backend.
642 658 reader = dctx.stream_reader(foo)
643 659 with self.assertRaises(Exception):
644 reader.readinto(b'foobar')
660 reader.readinto(b"foobar")
645 661
646 662 # readinto() with sufficiently large destination.
647 663 b = bytearray(1024)
648 664 reader = dctx.stream_reader(foo)
649 665 self.assertEqual(reader.readinto(b), 3)
650 self.assertEqual(b[0:3], b'foo')
666 self.assertEqual(b[0:3], b"foo")
651 667 self.assertEqual(reader.readinto(b), 0)
652 self.assertEqual(b[0:3], b'foo')
668 self.assertEqual(b[0:3], b"foo")
653 669
654 670 # readinto() with small reads.
655 671 b = bytearray(1024)
656 672 reader = dctx.stream_reader(foo, read_size=1)
657 673 self.assertEqual(reader.readinto(b), 3)
658 self.assertEqual(b[0:3], b'foo')
674 self.assertEqual(b[0:3], b"foo")
659 675
660 676 # Too small destination buffer.
661 677 b = bytearray(2)
662 678 reader = dctx.stream_reader(foo)
663 679 self.assertEqual(reader.readinto(b), 2)
664 self.assertEqual(b[:], b'fo')
680 self.assertEqual(b[:], b"fo")
665 681
666 682 def test_readinto1(self):
667 683 cctx = zstd.ZstdCompressor()
668 foo = cctx.compress(b'foo')
684 foo = cctx.compress(b"foo")
669 685
670 686 dctx = zstd.ZstdDecompressor()
671 687
672 688 reader = dctx.stream_reader(foo)
673 689 with self.assertRaises(Exception):
674 reader.readinto1(b'foobar')
690 reader.readinto1(b"foobar")
675 691
676 692 # Sufficiently large destination.
677 693 b = bytearray(1024)
678 694 reader = dctx.stream_reader(foo)
679 695 self.assertEqual(reader.readinto1(b), 3)
680 self.assertEqual(b[0:3], b'foo')
696 self.assertEqual(b[0:3], b"foo")
681 697 self.assertEqual(reader.readinto1(b), 0)
682 self.assertEqual(b[0:3], b'foo')
698 self.assertEqual(b[0:3], b"foo")
683 699
684 700 # readinto() with small reads.
685 701 b = bytearray(1024)
686 702 reader = dctx.stream_reader(foo, read_size=1)
687 703 self.assertEqual(reader.readinto1(b), 3)
688 self.assertEqual(b[0:3], b'foo')
704 self.assertEqual(b[0:3], b"foo")
689 705
690 706 # Too small destination buffer.
691 707 b = bytearray(2)
692 708 reader = dctx.stream_reader(foo)
693 709 self.assertEqual(reader.readinto1(b), 2)
694 self.assertEqual(b[:], b'fo')
710 self.assertEqual(b[:], b"fo")
695 711
696 712 def test_readall(self):
697 713 cctx = zstd.ZstdCompressor()
698 foo = cctx.compress(b'foo')
714 foo = cctx.compress(b"foo")
699 715
700 716 dctx = zstd.ZstdDecompressor()
701 717 reader = dctx.stream_reader(foo)
702 718
703 self.assertEqual(reader.readall(), b'foo')
719 self.assertEqual(reader.readall(), b"foo")
704 720
705 721 def test_read1(self):
706 722 cctx = zstd.ZstdCompressor()
707 foo = cctx.compress(b'foo')
723 foo = cctx.compress(b"foo")
708 724
709 725 dctx = zstd.ZstdDecompressor()
710 726
711 727 b = OpCountingBytesIO(foo)
712 728 reader = dctx.stream_reader(b)
713 729
714 self.assertEqual(reader.read1(), b'foo')
730 self.assertEqual(reader.read1(), b"foo")
715 731 self.assertEqual(b._read_count, 1)
716 732
717 733 b = OpCountingBytesIO(foo)
718 734 reader = dctx.stream_reader(b)
719 735
720 self.assertEqual(reader.read1(0), b'')
721 self.assertEqual(reader.read1(2), b'fo')
736 self.assertEqual(reader.read1(0), b"")
737 self.assertEqual(reader.read1(2), b"fo")
722 738 self.assertEqual(b._read_count, 1)
723 self.assertEqual(reader.read1(1), b'o')
739 self.assertEqual(reader.read1(1), b"o")
724 740 self.assertEqual(b._read_count, 1)
725 self.assertEqual(reader.read1(1), b'')
741 self.assertEqual(reader.read1(1), b"")
726 742 self.assertEqual(b._read_count, 2)
727 743
728 744 def test_read_lines(self):
729 745 cctx = zstd.ZstdCompressor()
730 source = b'\n'.join(('line %d' % i).encode('ascii') for i in range(1024))
746 source = b"\n".join(("line %d" % i).encode("ascii") for i in range(1024))
731 747
732 748 frame = cctx.compress(source)
733 749
734 750 dctx = zstd.ZstdDecompressor()
735 751 reader = dctx.stream_reader(frame)
736 tr = io.TextIOWrapper(reader, encoding='utf-8')
752 tr = io.TextIOWrapper(reader, encoding="utf-8")
737 753
738 754 lines = []
739 755 for line in tr:
740 lines.append(line.encode('utf-8'))
756 lines.append(line.encode("utf-8"))
741 757
742 758 self.assertEqual(len(lines), 1024)
743 self.assertEqual(b''.join(lines), source)
759 self.assertEqual(b"".join(lines), source)
744 760
745 761 reader = dctx.stream_reader(frame)
746 tr = io.TextIOWrapper(reader, encoding='utf-8')
762 tr = io.TextIOWrapper(reader, encoding="utf-8")
747 763
748 764 lines = tr.readlines()
749 765 self.assertEqual(len(lines), 1024)
750 self.assertEqual(''.join(lines).encode('utf-8'), source)
766 self.assertEqual("".join(lines).encode("utf-8"), source)
751 767
752 768 reader = dctx.stream_reader(frame)
753 tr = io.TextIOWrapper(reader, encoding='utf-8')
769 tr = io.TextIOWrapper(reader, encoding="utf-8")
754 770
755 771 lines = []
756 772 while True:
757 773 line = tr.readline()
758 774 if not line:
759 775 break
760 776
761 lines.append(line.encode('utf-8'))
777 lines.append(line.encode("utf-8"))
762 778
763 779 self.assertEqual(len(lines), 1024)
764 self.assertEqual(b''.join(lines), source)
780 self.assertEqual(b"".join(lines), source)
765 781
766 782
767 783 @make_cffi
768 class TestDecompressor_decompressobj(unittest.TestCase):
784 class TestDecompressor_decompressobj(TestCase):
769 785 def test_simple(self):
770 data = zstd.ZstdCompressor(level=1).compress(b'foobar')
786 data = zstd.ZstdCompressor(level=1).compress(b"foobar")
771 787
772 788 dctx = zstd.ZstdDecompressor()
773 789 dobj = dctx.decompressobj()
774 self.assertEqual(dobj.decompress(data), b'foobar')
790 self.assertEqual(dobj.decompress(data), b"foobar")
775 791 self.assertIsNone(dobj.flush())
776 792 self.assertIsNone(dobj.flush(10))
777 793 self.assertIsNone(dobj.flush(length=100))
778 794
779 795 def test_input_types(self):
780 compressed = zstd.ZstdCompressor(level=1).compress(b'foo')
796 compressed = zstd.ZstdCompressor(level=1).compress(b"foo")
781 797
782 798 dctx = zstd.ZstdDecompressor()
783 799
784 800 mutable_array = bytearray(len(compressed))
785 801 mutable_array[:] = compressed
786 802
787 803 sources = [
788 804 memoryview(compressed),
789 805 bytearray(compressed),
790 806 mutable_array,
791 807 ]
792 808
793 809 for source in sources:
794 810 dobj = dctx.decompressobj()
795 811 self.assertIsNone(dobj.flush())
796 812 self.assertIsNone(dobj.flush(10))
797 813 self.assertIsNone(dobj.flush(length=100))
798 self.assertEqual(dobj.decompress(source), b'foo')
814 self.assertEqual(dobj.decompress(source), b"foo")
799 815 self.assertIsNone(dobj.flush())
800 816
801 817 def test_reuse(self):
802 data = zstd.ZstdCompressor(level=1).compress(b'foobar')
818 data = zstd.ZstdCompressor(level=1).compress(b"foobar")
803 819
804 820 dctx = zstd.ZstdDecompressor()
805 821 dobj = dctx.decompressobj()
806 822 dobj.decompress(data)
807 823
808 with self.assertRaisesRegexp(zstd.ZstdError, 'cannot use a decompressobj'):
824 with self.assertRaisesRegex(zstd.ZstdError, "cannot use a decompressobj"):
809 825 dobj.decompress(data)
810 826 self.assertIsNone(dobj.flush())
811 827
812 828 def test_bad_write_size(self):
813 829 dctx = zstd.ZstdDecompressor()
814 830
815 with self.assertRaisesRegexp(ValueError, 'write_size must be positive'):
831 with self.assertRaisesRegex(ValueError, "write_size must be positive"):
816 832 dctx.decompressobj(write_size=0)
817 833
818 834 def test_write_size(self):
819 source = b'foo' * 64 + b'bar' * 128
835 source = b"foo" * 64 + b"bar" * 128
820 836 data = zstd.ZstdCompressor(level=1).compress(source)
821 837
822 838 dctx = zstd.ZstdDecompressor()
823 839
824 840 for i in range(128):
825 841 dobj = dctx.decompressobj(write_size=i + 1)
826 842 self.assertEqual(dobj.decompress(data), source)
827 843
828 844
829 845 def decompress_via_writer(data):
830 846 buffer = io.BytesIO()
831 847 dctx = zstd.ZstdDecompressor()
832 848 decompressor = dctx.stream_writer(buffer)
833 849 decompressor.write(data)
834 850
835 851 return buffer.getvalue()
836 852
837 853
838 854 @make_cffi
839 class TestDecompressor_stream_writer(unittest.TestCase):
855 class TestDecompressor_stream_writer(TestCase):
840 856 def test_io_api(self):
841 857 buffer = io.BytesIO()
842 858 dctx = zstd.ZstdDecompressor()
843 859 writer = dctx.stream_writer(buffer)
844 860
845 861 self.assertFalse(writer.closed)
846 862 self.assertFalse(writer.isatty())
847 863 self.assertFalse(writer.readable())
848 864
849 865 with self.assertRaises(io.UnsupportedOperation):
850 866 writer.readline()
851 867
852 868 with self.assertRaises(io.UnsupportedOperation):
853 869 writer.readline(42)
854 870
855 871 with self.assertRaises(io.UnsupportedOperation):
856 872 writer.readline(size=42)
857 873
858 874 with self.assertRaises(io.UnsupportedOperation):
859 875 writer.readlines()
860 876
861 877 with self.assertRaises(io.UnsupportedOperation):
862 878 writer.readlines(42)
863 879
864 880 with self.assertRaises(io.UnsupportedOperation):
865 881 writer.readlines(hint=42)
866 882
867 883 with self.assertRaises(io.UnsupportedOperation):
868 884 writer.seek(0)
869 885
870 886 with self.assertRaises(io.UnsupportedOperation):
871 887 writer.seek(10, os.SEEK_SET)
872 888
873 889 self.assertFalse(writer.seekable())
874 890
875 891 with self.assertRaises(io.UnsupportedOperation):
876 892 writer.tell()
877 893
878 894 with self.assertRaises(io.UnsupportedOperation):
879 895 writer.truncate()
880 896
881 897 with self.assertRaises(io.UnsupportedOperation):
882 898 writer.truncate(42)
883 899
884 900 with self.assertRaises(io.UnsupportedOperation):
885 901 writer.truncate(size=42)
886 902
887 903 self.assertTrue(writer.writable())
888 904
889 905 with self.assertRaises(io.UnsupportedOperation):
890 906 writer.writelines([])
891 907
892 908 with self.assertRaises(io.UnsupportedOperation):
893 909 writer.read()
894 910
895 911 with self.assertRaises(io.UnsupportedOperation):
896 912 writer.read(42)
897 913
898 914 with self.assertRaises(io.UnsupportedOperation):
899 915 writer.read(size=42)
900 916
901 917 with self.assertRaises(io.UnsupportedOperation):
902 918 writer.readall()
903 919
904 920 with self.assertRaises(io.UnsupportedOperation):
905 921 writer.readinto(None)
906 922
907 923 with self.assertRaises(io.UnsupportedOperation):
908 924 writer.fileno()
909 925
910 926 def test_fileno_file(self):
911 with tempfile.TemporaryFile('wb') as tf:
927 with tempfile.TemporaryFile("wb") as tf:
912 928 dctx = zstd.ZstdDecompressor()
913 929 writer = dctx.stream_writer(tf)
914 930
915 931 self.assertEqual(writer.fileno(), tf.fileno())
916 932
917 933 def test_close(self):
918 foo = zstd.ZstdCompressor().compress(b'foo')
934 foo = zstd.ZstdCompressor().compress(b"foo")
919 935
920 936 buffer = NonClosingBytesIO()
921 937 dctx = zstd.ZstdDecompressor()
922 938 writer = dctx.stream_writer(buffer)
923 939
924 940 writer.write(foo)
925 941 self.assertFalse(writer.closed)
926 942 self.assertFalse(buffer.closed)
927 943 writer.close()
928 944 self.assertTrue(writer.closed)
929 945 self.assertTrue(buffer.closed)
930 946
931 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
932 writer.write(b'')
947 with self.assertRaisesRegex(ValueError, "stream is closed"):
948 writer.write(b"")
933 949
934 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
950 with self.assertRaisesRegex(ValueError, "stream is closed"):
935 951 writer.flush()
936 952
937 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
953 with self.assertRaisesRegex(ValueError, "stream is closed"):
938 954 with writer:
939 955 pass
940 956
941 self.assertEqual(buffer.getvalue(), b'foo')
957 self.assertEqual(buffer.getvalue(), b"foo")
942 958
943 959 # Context manager exit should close stream.
944 960 buffer = NonClosingBytesIO()
945 961 writer = dctx.stream_writer(buffer)
946 962
947 963 with writer:
948 964 writer.write(foo)
949 965
950 966 self.assertTrue(writer.closed)
951 self.assertEqual(buffer.getvalue(), b'foo')
967 self.assertEqual(buffer.getvalue(), b"foo")
952 968
953 969 def test_flush(self):
954 970 buffer = OpCountingBytesIO()
955 971 dctx = zstd.ZstdDecompressor()
956 972 writer = dctx.stream_writer(buffer)
957 973
958 974 writer.flush()
959 975 self.assertEqual(buffer._flush_count, 1)
960 976 writer.flush()
961 977 self.assertEqual(buffer._flush_count, 2)
962 978
963 979 def test_empty_roundtrip(self):
964 980 cctx = zstd.ZstdCompressor()
965 empty = cctx.compress(b'')
966 self.assertEqual(decompress_via_writer(empty), b'')
981 empty = cctx.compress(b"")
982 self.assertEqual(decompress_via_writer(empty), b"")
967 983
968 984 def test_input_types(self):
969 985 cctx = zstd.ZstdCompressor(level=1)
970 compressed = cctx.compress(b'foo')
986 compressed = cctx.compress(b"foo")
971 987
972 988 mutable_array = bytearray(len(compressed))
973 989 mutable_array[:] = compressed
974 990
975 991 sources = [
976 992 memoryview(compressed),
977 993 bytearray(compressed),
978 994 mutable_array,
979 995 ]
980 996
981 997 dctx = zstd.ZstdDecompressor()
982 998 for source in sources:
983 999 buffer = io.BytesIO()
984 1000
985 1001 decompressor = dctx.stream_writer(buffer)
986 1002 decompressor.write(source)
987 self.assertEqual(buffer.getvalue(), b'foo')
1003 self.assertEqual(buffer.getvalue(), b"foo")
988 1004
989 1005 buffer = NonClosingBytesIO()
990 1006
991 1007 with dctx.stream_writer(buffer) as decompressor:
992 1008 self.assertEqual(decompressor.write(source), 3)
993 1009
994 self.assertEqual(buffer.getvalue(), b'foo')
1010 self.assertEqual(buffer.getvalue(), b"foo")
995 1011
996 1012 buffer = io.BytesIO()
997 1013 writer = dctx.stream_writer(buffer, write_return_read=True)
998 1014 self.assertEqual(writer.write(source), len(source))
999 self.assertEqual(buffer.getvalue(), b'foo')
1015 self.assertEqual(buffer.getvalue(), b"foo")
1000 1016
1001 1017 def test_large_roundtrip(self):
1002 1018 chunks = []
1003 1019 for i in range(255):
1004 chunks.append(struct.Struct('>B').pack(i) * 16384)
1005 orig = b''.join(chunks)
1020 chunks.append(struct.Struct(">B").pack(i) * 16384)
1021 orig = b"".join(chunks)
1006 1022 cctx = zstd.ZstdCompressor()
1007 1023 compressed = cctx.compress(orig)
1008 1024
1009 1025 self.assertEqual(decompress_via_writer(compressed), orig)
1010 1026
1011 1027 def test_multiple_calls(self):
1012 1028 chunks = []
1013 1029 for i in range(255):
1014 1030 for j in range(255):
1015 chunks.append(struct.Struct('>B').pack(j) * i)
1031 chunks.append(struct.Struct(">B").pack(j) * i)
1016 1032
1017 orig = b''.join(chunks)
1033 orig = b"".join(chunks)
1018 1034 cctx = zstd.ZstdCompressor()
1019 1035 compressed = cctx.compress(orig)
1020 1036
1021 1037 buffer = NonClosingBytesIO()
1022 1038 dctx = zstd.ZstdDecompressor()
1023 1039 with dctx.stream_writer(buffer) as decompressor:
1024 1040 pos = 0
1025 1041 while pos < len(compressed):
1026 1042 pos2 = pos + 8192
1027 1043 decompressor.write(compressed[pos:pos2])
1028 1044 pos += 8192
1029 1045 self.assertEqual(buffer.getvalue(), orig)
1030 1046
1031 1047 # Again with write_return_read=True
1032 1048 buffer = io.BytesIO()
1033 1049 writer = dctx.stream_writer(buffer, write_return_read=True)
1034 1050 pos = 0
1035 1051 while pos < len(compressed):
1036 1052 pos2 = pos + 8192
1037 1053 chunk = compressed[pos:pos2]
1038 1054 self.assertEqual(writer.write(chunk), len(chunk))
1039 1055 pos += 8192
1040 1056 self.assertEqual(buffer.getvalue(), orig)
1041 1057
1042 1058 def test_dictionary(self):
1043 1059 samples = []
1044 1060 for i in range(128):
1045 samples.append(b'foo' * 64)
1046 samples.append(b'bar' * 64)
1047 samples.append(b'foobar' * 64)
1061 samples.append(b"foo" * 64)
1062 samples.append(b"bar" * 64)
1063 samples.append(b"foobar" * 64)
1048 1064
1049 1065 d = zstd.train_dictionary(8192, samples)
1050 1066
1051 orig = b'foobar' * 16384
1067 orig = b"foobar" * 16384
1052 1068 buffer = NonClosingBytesIO()
1053 1069 cctx = zstd.ZstdCompressor(dict_data=d)
1054 1070 with cctx.stream_writer(buffer) as compressor:
1055 1071 self.assertEqual(compressor.write(orig), 0)
1056 1072
1057 1073 compressed = buffer.getvalue()
1058 1074 buffer = io.BytesIO()
1059 1075
1060 1076 dctx = zstd.ZstdDecompressor(dict_data=d)
1061 1077 decompressor = dctx.stream_writer(buffer)
1062 1078 self.assertEqual(decompressor.write(compressed), len(orig))
1063 1079 self.assertEqual(buffer.getvalue(), orig)
1064 1080
1065 1081 buffer = NonClosingBytesIO()
1066 1082
1067 1083 with dctx.stream_writer(buffer) as decompressor:
1068 1084 self.assertEqual(decompressor.write(compressed), len(orig))
1069 1085
1070 1086 self.assertEqual(buffer.getvalue(), orig)
1071 1087
1072 1088 def test_memory_size(self):
1073 1089 dctx = zstd.ZstdDecompressor()
1074 1090 buffer = io.BytesIO()
1075 1091
1076 1092 decompressor = dctx.stream_writer(buffer)
1077 1093 size = decompressor.memory_size()
1078 1094 self.assertGreater(size, 100000)
1079 1095
1080 1096 with dctx.stream_writer(buffer) as decompressor:
1081 1097 size = decompressor.memory_size()
1082 1098
1083 1099 self.assertGreater(size, 100000)
1084 1100
1085 1101 def test_write_size(self):
1086 source = zstd.ZstdCompressor().compress(b'foobarfoobar')
1102 source = zstd.ZstdCompressor().compress(b"foobarfoobar")
1087 1103 dest = OpCountingBytesIO()
1088 1104 dctx = zstd.ZstdDecompressor()
1089 1105 with dctx.stream_writer(dest, write_size=1) as decompressor:
1090 s = struct.Struct('>B')
1106 s = struct.Struct(">B")
1091 1107 for c in source:
1092 1108 if not isinstance(c, str):
1093 1109 c = s.pack(c)
1094 1110 decompressor.write(c)
1095 1111
1096 self.assertEqual(dest.getvalue(), b'foobarfoobar')
1112 self.assertEqual(dest.getvalue(), b"foobarfoobar")
1097 1113 self.assertEqual(dest._write_count, len(dest.getvalue()))
1098 1114
1099 1115
1100 1116 @make_cffi
1101 class TestDecompressor_read_to_iter(unittest.TestCase):
1117 class TestDecompressor_read_to_iter(TestCase):
1102 1118 def test_type_validation(self):
1103 1119 dctx = zstd.ZstdDecompressor()
1104 1120
1105 1121 # Object with read() works.
1106 1122 dctx.read_to_iter(io.BytesIO())
1107 1123
1108 1124 # Buffer protocol works.
1109 dctx.read_to_iter(b'foobar')
1125 dctx.read_to_iter(b"foobar")
1110 1126
1111 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
1112 b''.join(dctx.read_to_iter(True))
1127 with self.assertRaisesRegex(ValueError, "must pass an object with a read"):
1128 b"".join(dctx.read_to_iter(True))
1113 1129
1114 1130 def test_empty_input(self):
1115 1131 dctx = zstd.ZstdDecompressor()
1116 1132
1117 1133 source = io.BytesIO()
1118 1134 it = dctx.read_to_iter(source)
1119 1135 # TODO this is arguably wrong. Should get an error about missing frame foo.
1120 1136 with self.assertRaises(StopIteration):
1121 1137 next(it)
1122 1138
1123 it = dctx.read_to_iter(b'')
1139 it = dctx.read_to_iter(b"")
1124 1140 with self.assertRaises(StopIteration):
1125 1141 next(it)
1126 1142
1127 1143 def test_invalid_input(self):
1128 1144 dctx = zstd.ZstdDecompressor()
1129 1145
1130 source = io.BytesIO(b'foobar')
1146 source = io.BytesIO(b"foobar")
1131 1147 it = dctx.read_to_iter(source)
1132 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
1148 with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"):
1133 1149 next(it)
1134 1150
1135 it = dctx.read_to_iter(b'foobar')
1136 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
1151 it = dctx.read_to_iter(b"foobar")
1152 with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"):
1137 1153 next(it)
1138 1154
1139 1155 def test_empty_roundtrip(self):
1140 1156 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
1141 empty = cctx.compress(b'')
1157 empty = cctx.compress(b"")
1142 1158
1143 1159 source = io.BytesIO(empty)
1144 1160 source.seek(0)
1145 1161
1146 1162 dctx = zstd.ZstdDecompressor()
1147 1163 it = dctx.read_to_iter(source)
1148 1164
1149 1165 # No chunks should be emitted since there is no data.
1150 1166 with self.assertRaises(StopIteration):
1151 1167 next(it)
1152 1168
1153 1169 # Again for good measure.
1154 1170 with self.assertRaises(StopIteration):
1155 1171 next(it)
1156 1172
1157 1173 def test_skip_bytes_too_large(self):
1158 1174 dctx = zstd.ZstdDecompressor()
1159 1175
1160 with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'):
1161 b''.join(dctx.read_to_iter(b'', skip_bytes=1, read_size=1))
1176 with self.assertRaisesRegex(
1177 ValueError, "skip_bytes must be smaller than read_size"
1178 ):
1179 b"".join(dctx.read_to_iter(b"", skip_bytes=1, read_size=1))
1162 1180
1163 with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'):
1164 b''.join(dctx.read_to_iter(b'foobar', skip_bytes=10))
1181 with self.assertRaisesRegex(
1182 ValueError, "skip_bytes larger than first input chunk"
1183 ):
1184 b"".join(dctx.read_to_iter(b"foobar", skip_bytes=10))
1165 1185
1166 1186 def test_skip_bytes(self):
1167 1187 cctx = zstd.ZstdCompressor(write_content_size=False)
1168 compressed = cctx.compress(b'foobar')
1188 compressed = cctx.compress(b"foobar")
1169 1189
1170 1190 dctx = zstd.ZstdDecompressor()
1171 output = b''.join(dctx.read_to_iter(b'hdr' + compressed, skip_bytes=3))
1172 self.assertEqual(output, b'foobar')
1191 output = b"".join(dctx.read_to_iter(b"hdr" + compressed, skip_bytes=3))
1192 self.assertEqual(output, b"foobar")
1173 1193
1174 1194 def test_large_output(self):
1175 1195 source = io.BytesIO()
1176 source.write(b'f' * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
1177 source.write(b'o')
1196 source.write(b"f" * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
1197 source.write(b"o")
1178 1198 source.seek(0)
1179 1199
1180 1200 cctx = zstd.ZstdCompressor(level=1)
1181 1201 compressed = io.BytesIO(cctx.compress(source.getvalue()))
1182 1202 compressed.seek(0)
1183 1203
1184 1204 dctx = zstd.ZstdDecompressor()
1185 1205 it = dctx.read_to_iter(compressed)
1186 1206
1187 1207 chunks = []
1188 1208 chunks.append(next(it))
1189 1209 chunks.append(next(it))
1190 1210
1191 1211 with self.assertRaises(StopIteration):
1192 1212 next(it)
1193 1213
1194 decompressed = b''.join(chunks)
1214 decompressed = b"".join(chunks)
1195 1215 self.assertEqual(decompressed, source.getvalue())
1196 1216
1197 1217 # And again with buffer protocol.
1198 1218 it = dctx.read_to_iter(compressed.getvalue())
1199 1219 chunks = []
1200 1220 chunks.append(next(it))
1201 1221 chunks.append(next(it))
1202 1222
1203 1223 with self.assertRaises(StopIteration):
1204 1224 next(it)
1205 1225
1206 decompressed = b''.join(chunks)
1226 decompressed = b"".join(chunks)
1207 1227 self.assertEqual(decompressed, source.getvalue())
1208 1228
1209 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
1229 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
1210 1230 def test_large_input(self):
1211 bytes = list(struct.Struct('>B').pack(i) for i in range(256))
1231 bytes = list(struct.Struct(">B").pack(i) for i in range(256))
1212 1232 compressed = NonClosingBytesIO()
1213 1233 input_size = 0
1214 1234 cctx = zstd.ZstdCompressor(level=1)
1215 1235 with cctx.stream_writer(compressed) as compressor:
1216 1236 while True:
1217 1237 compressor.write(random.choice(bytes))
1218 1238 input_size += 1
1219 1239
1220 have_compressed = len(compressed.getvalue()) > zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
1240 have_compressed = (
1241 len(compressed.getvalue())
1242 > zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
1243 )
1221 1244 have_raw = input_size > zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE * 2
1222 1245 if have_compressed and have_raw:
1223 1246 break
1224 1247
1225 1248 compressed = io.BytesIO(compressed.getvalue())
1226 self.assertGreater(len(compressed.getvalue()),
1227 zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE)
1249 self.assertGreater(
1250 len(compressed.getvalue()), zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
1251 )
1228 1252
1229 1253 dctx = zstd.ZstdDecompressor()
1230 1254 it = dctx.read_to_iter(compressed)
1231 1255
1232 1256 chunks = []
1233 1257 chunks.append(next(it))
1234 1258 chunks.append(next(it))
1235 1259 chunks.append(next(it))
1236 1260
1237 1261 with self.assertRaises(StopIteration):
1238 1262 next(it)
1239 1263
1240 decompressed = b''.join(chunks)
1264 decompressed = b"".join(chunks)
1241 1265 self.assertEqual(len(decompressed), input_size)
1242 1266
1243 1267 # And again with buffer protocol.
1244 1268 it = dctx.read_to_iter(compressed.getvalue())
1245 1269
1246 1270 chunks = []
1247 1271 chunks.append(next(it))
1248 1272 chunks.append(next(it))
1249 1273 chunks.append(next(it))
1250 1274
1251 1275 with self.assertRaises(StopIteration):
1252 1276 next(it)
1253 1277
1254 decompressed = b''.join(chunks)
1278 decompressed = b"".join(chunks)
1255 1279 self.assertEqual(len(decompressed), input_size)
1256 1280
1257 1281 def test_interesting(self):
1258 1282 # Found this edge case via fuzzing.
1259 1283 cctx = zstd.ZstdCompressor(level=1)
1260 1284
1261 1285 source = io.BytesIO()
1262 1286
1263 1287 compressed = NonClosingBytesIO()
1264 1288 with cctx.stream_writer(compressed) as compressor:
1265 1289 for i in range(256):
1266 chunk = b'\0' * 1024
1290 chunk = b"\0" * 1024
1267 1291 compressor.write(chunk)
1268 1292 source.write(chunk)
1269 1293
1270 1294 dctx = zstd.ZstdDecompressor()
1271 1295
1272 simple = dctx.decompress(compressed.getvalue(),
1273 max_output_size=len(source.getvalue()))
1296 simple = dctx.decompress(
1297 compressed.getvalue(), max_output_size=len(source.getvalue())
1298 )
1274 1299 self.assertEqual(simple, source.getvalue())
1275 1300
1276 1301 compressed = io.BytesIO(compressed.getvalue())
1277 streamed = b''.join(dctx.read_to_iter(compressed))
1302 streamed = b"".join(dctx.read_to_iter(compressed))
1278 1303 self.assertEqual(streamed, source.getvalue())
1279 1304
1280 1305 def test_read_write_size(self):
1281 source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b'foobarfoobar'))
1306 source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b"foobarfoobar"))
1282 1307 dctx = zstd.ZstdDecompressor()
1283 1308 for chunk in dctx.read_to_iter(source, read_size=1, write_size=1):
1284 1309 self.assertEqual(len(chunk), 1)
1285 1310
1286 1311 self.assertEqual(source._read_count, len(source.getvalue()))
1287 1312
1288 1313 def test_magic_less(self):
1289 1314 params = zstd.CompressionParameters.from_level(
1290 1, format=zstd.FORMAT_ZSTD1_MAGICLESS)
1315 1, format=zstd.FORMAT_ZSTD1_MAGICLESS
1316 )
1291 1317 cctx = zstd.ZstdCompressor(compression_params=params)
1292 frame = cctx.compress(b'foobar')
1318 frame = cctx.compress(b"foobar")
1293 1319
1294 self.assertNotEqual(frame[0:4], b'\x28\xb5\x2f\xfd')
1320 self.assertNotEqual(frame[0:4], b"\x28\xb5\x2f\xfd")
1295 1321
1296 1322 dctx = zstd.ZstdDecompressor()
1297 with self.assertRaisesRegexp(
1298 zstd.ZstdError, 'error determining content size from frame header'):
1323 with self.assertRaisesRegex(
1324 zstd.ZstdError, "error determining content size from frame header"
1325 ):
1299 1326 dctx.decompress(frame)
1300 1327
1301 1328 dctx = zstd.ZstdDecompressor(format=zstd.FORMAT_ZSTD1_MAGICLESS)
1302 res = b''.join(dctx.read_to_iter(frame))
1303 self.assertEqual(res, b'foobar')
1329 res = b"".join(dctx.read_to_iter(frame))
1330 self.assertEqual(res, b"foobar")
1304 1331
1305 1332
1306 1333 @make_cffi
1307 class TestDecompressor_content_dict_chain(unittest.TestCase):
1334 class TestDecompressor_content_dict_chain(TestCase):
1308 1335 def test_bad_inputs_simple(self):
1309 1336 dctx = zstd.ZstdDecompressor()
1310 1337
1311 1338 with self.assertRaises(TypeError):
1312 dctx.decompress_content_dict_chain(b'foo')
1339 dctx.decompress_content_dict_chain(b"foo")
1313 1340
1314 1341 with self.assertRaises(TypeError):
1315 dctx.decompress_content_dict_chain((b'foo', b'bar'))
1342 dctx.decompress_content_dict_chain((b"foo", b"bar"))
1316 1343
1317 with self.assertRaisesRegexp(ValueError, 'empty input chain'):
1344 with self.assertRaisesRegex(ValueError, "empty input chain"):
1318 1345 dctx.decompress_content_dict_chain([])
1319 1346
1320 with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
1321 dctx.decompress_content_dict_chain([u'foo'])
1347 with self.assertRaisesRegex(ValueError, "chunk 0 must be bytes"):
1348 dctx.decompress_content_dict_chain([u"foo"])
1322 1349
1323 with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
1350 with self.assertRaisesRegex(ValueError, "chunk 0 must be bytes"):
1324 1351 dctx.decompress_content_dict_chain([True])
1325 1352
1326 with self.assertRaisesRegexp(ValueError, 'chunk 0 is too small to contain a zstd frame'):
1353 with self.assertRaisesRegex(
1354 ValueError, "chunk 0 is too small to contain a zstd frame"
1355 ):
1327 1356 dctx.decompress_content_dict_chain([zstd.FRAME_HEADER])
1328 1357
1329 with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'):
1330 dctx.decompress_content_dict_chain([b'foo' * 8])
1358 with self.assertRaisesRegex(ValueError, "chunk 0 is not a valid zstd frame"):
1359 dctx.decompress_content_dict_chain([b"foo" * 8])
1331 1360
1332 no_size = zstd.ZstdCompressor(write_content_size=False).compress(b'foo' * 64)
1361 no_size = zstd.ZstdCompressor(write_content_size=False).compress(b"foo" * 64)
1333 1362
1334 with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'):
1363 with self.assertRaisesRegex(
1364 ValueError, "chunk 0 missing content size in frame"
1365 ):
1335 1366 dctx.decompress_content_dict_chain([no_size])
1336 1367
1337 1368 # Corrupt first frame.
1338 frame = zstd.ZstdCompressor().compress(b'foo' * 64)
1369 frame = zstd.ZstdCompressor().compress(b"foo" * 64)
1339 1370 frame = frame[0:12] + frame[15:]
1340 with self.assertRaisesRegexp(zstd.ZstdError,
1341 'chunk 0 did not decompress full frame'):
1371 with self.assertRaisesRegex(
1372 zstd.ZstdError, "chunk 0 did not decompress full frame"
1373 ):
1342 1374 dctx.decompress_content_dict_chain([frame])
1343 1375
1344 1376 def test_bad_subsequent_input(self):
1345 initial = zstd.ZstdCompressor().compress(b'foo' * 64)
1377 initial = zstd.ZstdCompressor().compress(b"foo" * 64)
1346 1378
1347 1379 dctx = zstd.ZstdDecompressor()
1348 1380
1349 with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
1350 dctx.decompress_content_dict_chain([initial, u'foo'])
1381 with self.assertRaisesRegex(ValueError, "chunk 1 must be bytes"):
1382 dctx.decompress_content_dict_chain([initial, u"foo"])
1351 1383
1352 with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
1384 with self.assertRaisesRegex(ValueError, "chunk 1 must be bytes"):
1353 1385 dctx.decompress_content_dict_chain([initial, None])
1354 1386
1355 with self.assertRaisesRegexp(ValueError, 'chunk 1 is too small to contain a zstd frame'):
1387 with self.assertRaisesRegex(
1388 ValueError, "chunk 1 is too small to contain a zstd frame"
1389 ):
1356 1390 dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER])
1357 1391
1358 with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'):
1359 dctx.decompress_content_dict_chain([initial, b'foo' * 8])
1392 with self.assertRaisesRegex(ValueError, "chunk 1 is not a valid zstd frame"):
1393 dctx.decompress_content_dict_chain([initial, b"foo" * 8])
1360 1394
1361 no_size = zstd.ZstdCompressor(write_content_size=False).compress(b'foo' * 64)
1395 no_size = zstd.ZstdCompressor(write_content_size=False).compress(b"foo" * 64)
1362 1396
1363 with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'):
1397 with self.assertRaisesRegex(
1398 ValueError, "chunk 1 missing content size in frame"
1399 ):
1364 1400 dctx.decompress_content_dict_chain([initial, no_size])
1365 1401
1366 1402 # Corrupt second frame.
1367 cctx = zstd.ZstdCompressor(dict_data=zstd.ZstdCompressionDict(b'foo' * 64))
1368 frame = cctx.compress(b'bar' * 64)
1403 cctx = zstd.ZstdCompressor(dict_data=zstd.ZstdCompressionDict(b"foo" * 64))
1404 frame = cctx.compress(b"bar" * 64)
1369 1405 frame = frame[0:12] + frame[15:]
1370 1406
1371 with self.assertRaisesRegexp(zstd.ZstdError, 'chunk 1 did not decompress full frame'):
1407 with self.assertRaisesRegex(
1408 zstd.ZstdError, "chunk 1 did not decompress full frame"
1409 ):
1372 1410 dctx.decompress_content_dict_chain([initial, frame])
1373 1411
1374 1412 def test_simple(self):
1375 1413 original = [
1376 b'foo' * 64,
1377 b'foobar' * 64,
1378 b'baz' * 64,
1379 b'foobaz' * 64,
1380 b'foobarbaz' * 64,
1414 b"foo" * 64,
1415 b"foobar" * 64,
1416 b"baz" * 64,
1417 b"foobaz" * 64,
1418 b"foobarbaz" * 64,
1381 1419 ]
1382 1420
1383 1421 chunks = []
1384 1422 chunks.append(zstd.ZstdCompressor().compress(original[0]))
1385 1423 for i, chunk in enumerate(original[1:]):
1386 1424 d = zstd.ZstdCompressionDict(original[i])
1387 1425 cctx = zstd.ZstdCompressor(dict_data=d)
1388 1426 chunks.append(cctx.compress(chunk))
1389 1427
1390 1428 for i in range(1, len(original)):
1391 1429 chain = chunks[0:i]
1392 1430 expected = original[i - 1]
1393 1431 dctx = zstd.ZstdDecompressor()
1394 1432 decompressed = dctx.decompress_content_dict_chain(chain)
1395 1433 self.assertEqual(decompressed, expected)
1396 1434
1397 1435
1398 1436 # TODO enable for CFFI
1399 class TestDecompressor_multi_decompress_to_buffer(unittest.TestCase):
1437 class TestDecompressor_multi_decompress_to_buffer(TestCase):
1400 1438 def test_invalid_inputs(self):
1401 1439 dctx = zstd.ZstdDecompressor()
1402 1440
1403 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1404 self.skipTest('multi_decompress_to_buffer not available')
1441 if not hasattr(dctx, "multi_decompress_to_buffer"):
1442 self.skipTest("multi_decompress_to_buffer not available")
1405 1443
1406 1444 with self.assertRaises(TypeError):
1407 1445 dctx.multi_decompress_to_buffer(True)
1408 1446
1409 1447 with self.assertRaises(TypeError):
1410 1448 dctx.multi_decompress_to_buffer((1, 2))
1411 1449
1412 with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'):
1413 dctx.multi_decompress_to_buffer([u'foo'])
1450 with self.assertRaisesRegex(TypeError, "item 0 not a bytes like object"):
1451 dctx.multi_decompress_to_buffer([u"foo"])
1414 1452
1415 with self.assertRaisesRegexp(ValueError, 'could not determine decompressed size of item 0'):
1416 dctx.multi_decompress_to_buffer([b'foobarbaz'])
1453 with self.assertRaisesRegex(
1454 ValueError, "could not determine decompressed size of item 0"
1455 ):
1456 dctx.multi_decompress_to_buffer([b"foobarbaz"])
1417 1457
1418 1458 def test_list_input(self):
1419 1459 cctx = zstd.ZstdCompressor()
1420 1460
1421 original = [b'foo' * 4, b'bar' * 6]
1461 original = [b"foo" * 4, b"bar" * 6]
1422 1462 frames = [cctx.compress(d) for d in original]
1423 1463
1424 1464 dctx = zstd.ZstdDecompressor()
1425 1465
1426 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1427 self.skipTest('multi_decompress_to_buffer not available')
1466 if not hasattr(dctx, "multi_decompress_to_buffer"):
1467 self.skipTest("multi_decompress_to_buffer not available")
1428 1468
1429 1469 result = dctx.multi_decompress_to_buffer(frames)
1430 1470
1431 1471 self.assertEqual(len(result), len(frames))
1432 1472 self.assertEqual(result.size(), sum(map(len, original)))
1433 1473
1434 1474 for i, data in enumerate(original):
1435 1475 self.assertEqual(result[i].tobytes(), data)
1436 1476
1437 1477 self.assertEqual(result[0].offset, 0)
1438 1478 self.assertEqual(len(result[0]), 12)
1439 1479 self.assertEqual(result[1].offset, 12)
1440 1480 self.assertEqual(len(result[1]), 18)
1441 1481
1442 1482 def test_list_input_frame_sizes(self):
1443 1483 cctx = zstd.ZstdCompressor()
1444 1484
1445 original = [b'foo' * 4, b'bar' * 6, b'baz' * 8]
1485 original = [b"foo" * 4, b"bar" * 6, b"baz" * 8]
1446 1486 frames = [cctx.compress(d) for d in original]
1447 sizes = struct.pack('=' + 'Q' * len(original), *map(len, original))
1487 sizes = struct.pack("=" + "Q" * len(original), *map(len, original))
1448 1488
1449 1489 dctx = zstd.ZstdDecompressor()
1450 1490
1451 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1452 self.skipTest('multi_decompress_to_buffer not available')
1491 if not hasattr(dctx, "multi_decompress_to_buffer"):
1492 self.skipTest("multi_decompress_to_buffer not available")
1453 1493
1454 1494 result = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes)
1455 1495
1456 1496 self.assertEqual(len(result), len(frames))
1457 1497 self.assertEqual(result.size(), sum(map(len, original)))
1458 1498
1459 1499 for i, data in enumerate(original):
1460 1500 self.assertEqual(result[i].tobytes(), data)
1461 1501
1462 1502 def test_buffer_with_segments_input(self):
1463 1503 cctx = zstd.ZstdCompressor()
1464 1504
1465 original = [b'foo' * 4, b'bar' * 6]
1505 original = [b"foo" * 4, b"bar" * 6]
1466 1506 frames = [cctx.compress(d) for d in original]
1467 1507
1468 1508 dctx = zstd.ZstdDecompressor()
1469 1509
1470 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1471 self.skipTest('multi_decompress_to_buffer not available')
1510 if not hasattr(dctx, "multi_decompress_to_buffer"):
1511 self.skipTest("multi_decompress_to_buffer not available")
1472 1512
1473 segments = struct.pack('=QQQQ', 0, len(frames[0]), len(frames[0]), len(frames[1]))
1474 b = zstd.BufferWithSegments(b''.join(frames), segments)
1513 segments = struct.pack(
1514 "=QQQQ", 0, len(frames[0]), len(frames[0]), len(frames[1])
1515 )
1516 b = zstd.BufferWithSegments(b"".join(frames), segments)
1475 1517
1476 1518 result = dctx.multi_decompress_to_buffer(b)
1477 1519
1478 1520 self.assertEqual(len(result), len(frames))
1479 1521 self.assertEqual(result[0].offset, 0)
1480 1522 self.assertEqual(len(result[0]), 12)
1481 1523 self.assertEqual(result[1].offset, 12)
1482 1524 self.assertEqual(len(result[1]), 18)
1483 1525
1484 1526 def test_buffer_with_segments_sizes(self):
1485 1527 cctx = zstd.ZstdCompressor(write_content_size=False)
1486 original = [b'foo' * 4, b'bar' * 6, b'baz' * 8]
1528 original = [b"foo" * 4, b"bar" * 6, b"baz" * 8]
1487 1529 frames = [cctx.compress(d) for d in original]
1488 sizes = struct.pack('=' + 'Q' * len(original), *map(len, original))
1530 sizes = struct.pack("=" + "Q" * len(original), *map(len, original))
1489 1531
1490 1532 dctx = zstd.ZstdDecompressor()
1491 1533
1492 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1493 self.skipTest('multi_decompress_to_buffer not available')
1534 if not hasattr(dctx, "multi_decompress_to_buffer"):
1535 self.skipTest("multi_decompress_to_buffer not available")
1494 1536
1495 segments = struct.pack('=QQQQQQ', 0, len(frames[0]),
1496 len(frames[0]), len(frames[1]),
1497 len(frames[0]) + len(frames[1]), len(frames[2]))
1498 b = zstd.BufferWithSegments(b''.join(frames), segments)
1537 segments = struct.pack(
1538 "=QQQQQQ",
1539 0,
1540 len(frames[0]),
1541 len(frames[0]),
1542 len(frames[1]),
1543 len(frames[0]) + len(frames[1]),
1544 len(frames[2]),
1545 )
1546 b = zstd.BufferWithSegments(b"".join(frames), segments)
1499 1547
1500 1548 result = dctx.multi_decompress_to_buffer(b, decompressed_sizes=sizes)
1501 1549
1502 1550 self.assertEqual(len(result), len(frames))
1503 1551 self.assertEqual(result.size(), sum(map(len, original)))
1504 1552
1505 1553 for i, data in enumerate(original):
1506 1554 self.assertEqual(result[i].tobytes(), data)
1507 1555
1508 1556 def test_buffer_with_segments_collection_input(self):
1509 1557 cctx = zstd.ZstdCompressor()
1510 1558
1511 1559 original = [
1512 b'foo0' * 2,
1513 b'foo1' * 3,
1514 b'foo2' * 4,
1515 b'foo3' * 5,
1516 b'foo4' * 6,
1560 b"foo0" * 2,
1561 b"foo1" * 3,
1562 b"foo2" * 4,
1563 b"foo3" * 5,
1564 b"foo4" * 6,
1517 1565 ]
1518 1566
1519 if not hasattr(cctx, 'multi_compress_to_buffer'):
1520 self.skipTest('multi_compress_to_buffer not available')
1567 if not hasattr(cctx, "multi_compress_to_buffer"):
1568 self.skipTest("multi_compress_to_buffer not available")
1521 1569
1522 1570 frames = cctx.multi_compress_to_buffer(original)
1523 1571
1524 1572 # Check round trip.
1525 1573 dctx = zstd.ZstdDecompressor()
1526 1574
1527 1575 decompressed = dctx.multi_decompress_to_buffer(frames, threads=3)
1528 1576
1529 1577 self.assertEqual(len(decompressed), len(original))
1530 1578
1531 1579 for i, data in enumerate(original):
1532 1580 self.assertEqual(data, decompressed[i].tobytes())
1533 1581
1534 1582 # And a manual mode.
1535 b = b''.join([frames[0].tobytes(), frames[1].tobytes()])
1536 b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ',
1537 0, len(frames[0]),
1538 len(frames[0]), len(frames[1])))
1583 b = b"".join([frames[0].tobytes(), frames[1].tobytes()])
1584 b1 = zstd.BufferWithSegments(
1585 b, struct.pack("=QQQQ", 0, len(frames[0]), len(frames[0]), len(frames[1]))
1586 )
1539 1587
1540 b = b''.join([frames[2].tobytes(), frames[3].tobytes(), frames[4].tobytes()])
1541 b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ',
1542 0, len(frames[2]),
1543 len(frames[2]), len(frames[3]),
1544 len(frames[2]) + len(frames[3]), len(frames[4])))
1588 b = b"".join([frames[2].tobytes(), frames[3].tobytes(), frames[4].tobytes()])
1589 b2 = zstd.BufferWithSegments(
1590 b,
1591 struct.pack(
1592 "=QQQQQQ",
1593 0,
1594 len(frames[2]),
1595 len(frames[2]),
1596 len(frames[3]),
1597 len(frames[2]) + len(frames[3]),
1598 len(frames[4]),
1599 ),
1600 )
1545 1601
1546 1602 c = zstd.BufferWithSegmentsCollection(b1, b2)
1547 1603
1548 1604 dctx = zstd.ZstdDecompressor()
1549 1605 decompressed = dctx.multi_decompress_to_buffer(c)
1550 1606
1551 1607 self.assertEqual(len(decompressed), 5)
1552 1608 for i in range(5):
1553 1609 self.assertEqual(decompressed[i].tobytes(), original[i])
1554 1610
1555 1611 def test_dict(self):
1556 1612 d = zstd.train_dictionary(16384, generate_samples(), k=64, d=16)
1557 1613
1558 1614 cctx = zstd.ZstdCompressor(dict_data=d, level=1)
1559 1615 frames = [cctx.compress(s) for s in generate_samples()]
1560 1616
1561 1617 dctx = zstd.ZstdDecompressor(dict_data=d)
1562 1618
1563 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1564 self.skipTest('multi_decompress_to_buffer not available')
1619 if not hasattr(dctx, "multi_decompress_to_buffer"):
1620 self.skipTest("multi_decompress_to_buffer not available")
1565 1621
1566 1622 result = dctx.multi_decompress_to_buffer(frames)
1567 1623
1568 1624 self.assertEqual([o.tobytes() for o in result], generate_samples())
1569 1625
1570 1626 def test_multiple_threads(self):
1571 1627 cctx = zstd.ZstdCompressor()
1572 1628
1573 1629 frames = []
1574 frames.extend(cctx.compress(b'x' * 64) for i in range(256))
1575 frames.extend(cctx.compress(b'y' * 64) for i in range(256))
1630 frames.extend(cctx.compress(b"x" * 64) for i in range(256))
1631 frames.extend(cctx.compress(b"y" * 64) for i in range(256))
1576 1632
1577 1633 dctx = zstd.ZstdDecompressor()
1578 1634
1579 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1580 self.skipTest('multi_decompress_to_buffer not available')
1635 if not hasattr(dctx, "multi_decompress_to_buffer"):
1636 self.skipTest("multi_decompress_to_buffer not available")
1581 1637
1582 1638 result = dctx.multi_decompress_to_buffer(frames, threads=-1)
1583 1639
1584 1640 self.assertEqual(len(result), len(frames))
1585 1641 self.assertEqual(result.size(), 2 * 64 * 256)
1586 self.assertEqual(result[0].tobytes(), b'x' * 64)
1587 self.assertEqual(result[256].tobytes(), b'y' * 64)
1642 self.assertEqual(result[0].tobytes(), b"x" * 64)
1643 self.assertEqual(result[256].tobytes(), b"y" * 64)
1588 1644
1589 1645 def test_item_failure(self):
1590 1646 cctx = zstd.ZstdCompressor()
1591 frames = [cctx.compress(b'x' * 128), cctx.compress(b'y' * 128)]
1647 frames = [cctx.compress(b"x" * 128), cctx.compress(b"y" * 128)]
1592 1648
1593 frames[1] = frames[1][0:15] + b'extra' + frames[1][15:]
1649 frames[1] = frames[1][0:15] + b"extra" + frames[1][15:]
1594 1650
1595 1651 dctx = zstd.ZstdDecompressor()
1596 1652
1597 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1598 self.skipTest('multi_decompress_to_buffer not available')
1653 if not hasattr(dctx, "multi_decompress_to_buffer"):
1654 self.skipTest("multi_decompress_to_buffer not available")
1599 1655
1600 with self.assertRaisesRegexp(zstd.ZstdError,
1601 'error decompressing item 1: ('
1602 'Corrupted block|'
1603 'Destination buffer is too small)'):
1656 with self.assertRaisesRegex(
1657 zstd.ZstdError,
1658 "error decompressing item 1: ("
1659 "Corrupted block|"
1660 "Destination buffer is too small)",
1661 ):
1604 1662 dctx.multi_decompress_to_buffer(frames)
1605 1663
1606 with self.assertRaisesRegexp(zstd.ZstdError,
1607 'error decompressing item 1: ('
1608 'Corrupted block|'
1609 'Destination buffer is too small)'):
1664 with self.assertRaisesRegex(
1665 zstd.ZstdError,
1666 "error decompressing item 1: ("
1667 "Corrupted block|"
1668 "Destination buffer is too small)",
1669 ):
1610 1670 dctx.multi_decompress_to_buffer(frames, threads=2)
1611
@@ -1,485 +1,576 b''
1 1 import io
2 2 import os
3 3 import unittest
4 4
5 5 try:
6 6 import hypothesis
7 7 import hypothesis.strategies as strategies
8 8 except ImportError:
9 raise unittest.SkipTest('hypothesis not available')
9 raise unittest.SkipTest("hypothesis not available")
10 10
11 11 import zstandard as zstd
12 12
13 from . common import (
13 from .common import (
14 14 make_cffi,
15 15 NonClosingBytesIO,
16 16 random_input_data,
17 TestCase,
17 18 )
18 19
19 20
20 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
21 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
21 22 @make_cffi
22 class TestDecompressor_stream_reader_fuzzing(unittest.TestCase):
23 class TestDecompressor_stream_reader_fuzzing(TestCase):
23 24 @hypothesis.settings(
24 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
25 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
26 level=strategies.integers(min_value=1, max_value=5),
27 streaming=strategies.booleans(),
28 source_read_size=strategies.integers(1, 1048576),
29 read_sizes=strategies.data())
30 def test_stream_source_read_variance(self, original, level, streaming,
31 source_read_size, read_sizes):
25 suppress_health_check=[
26 hypothesis.HealthCheck.large_base_example,
27 hypothesis.HealthCheck.too_slow,
28 ]
29 )
30 @hypothesis.given(
31 original=strategies.sampled_from(random_input_data()),
32 level=strategies.integers(min_value=1, max_value=5),
33 streaming=strategies.booleans(),
34 source_read_size=strategies.integers(1, 1048576),
35 read_sizes=strategies.data(),
36 )
37 def test_stream_source_read_variance(
38 self, original, level, streaming, source_read_size, read_sizes
39 ):
32 40 cctx = zstd.ZstdCompressor(level=level)
33 41
34 42 if streaming:
35 43 source = io.BytesIO()
36 44 writer = cctx.stream_writer(source)
37 45 writer.write(original)
38 46 writer.flush(zstd.FLUSH_FRAME)
39 47 source.seek(0)
40 48 else:
41 49 frame = cctx.compress(original)
42 50 source = io.BytesIO(frame)
43 51
44 52 dctx = zstd.ZstdDecompressor()
45 53
46 54 chunks = []
47 55 with dctx.stream_reader(source, read_size=source_read_size) as reader:
48 56 while True:
49 57 read_size = read_sizes.draw(strategies.integers(-1, 131072))
50 58 chunk = reader.read(read_size)
51 59 if not chunk and read_size:
52 60 break
53 61
54 62 chunks.append(chunk)
55 63
56 self.assertEqual(b''.join(chunks), original)
64 self.assertEqual(b"".join(chunks), original)
57 65
58 66 # Similar to above except we have a constant read() size.
59 67 @hypothesis.settings(
60 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
61 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
62 level=strategies.integers(min_value=1, max_value=5),
63 streaming=strategies.booleans(),
64 source_read_size=strategies.integers(1, 1048576),
65 read_size=strategies.integers(-1, 131072))
66 def test_stream_source_read_size(self, original, level, streaming,
67 source_read_size, read_size):
68 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
69 )
70 @hypothesis.given(
71 original=strategies.sampled_from(random_input_data()),
72 level=strategies.integers(min_value=1, max_value=5),
73 streaming=strategies.booleans(),
74 source_read_size=strategies.integers(1, 1048576),
75 read_size=strategies.integers(-1, 131072),
76 )
77 def test_stream_source_read_size(
78 self, original, level, streaming, source_read_size, read_size
79 ):
68 80 if read_size == 0:
69 81 read_size = 1
70 82
71 83 cctx = zstd.ZstdCompressor(level=level)
72 84
73 85 if streaming:
74 86 source = io.BytesIO()
75 87 writer = cctx.stream_writer(source)
76 88 writer.write(original)
77 89 writer.flush(zstd.FLUSH_FRAME)
78 90 source.seek(0)
79 91 else:
80 92 frame = cctx.compress(original)
81 93 source = io.BytesIO(frame)
82 94
83 95 dctx = zstd.ZstdDecompressor()
84 96
85 97 chunks = []
86 98 reader = dctx.stream_reader(source, read_size=source_read_size)
87 99 while True:
88 100 chunk = reader.read(read_size)
89 101 if not chunk and read_size:
90 102 break
91 103
92 104 chunks.append(chunk)
93 105
94 self.assertEqual(b''.join(chunks), original)
106 self.assertEqual(b"".join(chunks), original)
95 107
96 108 @hypothesis.settings(
97 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
98 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
99 level=strategies.integers(min_value=1, max_value=5),
100 streaming=strategies.booleans(),
101 source_read_size=strategies.integers(1, 1048576),
102 read_sizes=strategies.data())
103 def test_buffer_source_read_variance(self, original, level, streaming,
104 source_read_size, read_sizes):
109 suppress_health_check=[
110 hypothesis.HealthCheck.large_base_example,
111 hypothesis.HealthCheck.too_slow,
112 ]
113 )
114 @hypothesis.given(
115 original=strategies.sampled_from(random_input_data()),
116 level=strategies.integers(min_value=1, max_value=5),
117 streaming=strategies.booleans(),
118 source_read_size=strategies.integers(1, 1048576),
119 read_sizes=strategies.data(),
120 )
121 def test_buffer_source_read_variance(
122 self, original, level, streaming, source_read_size, read_sizes
123 ):
105 124 cctx = zstd.ZstdCompressor(level=level)
106 125
107 126 if streaming:
108 127 source = io.BytesIO()
109 128 writer = cctx.stream_writer(source)
110 129 writer.write(original)
111 130 writer.flush(zstd.FLUSH_FRAME)
112 131 frame = source.getvalue()
113 132 else:
114 133 frame = cctx.compress(original)
115 134
116 135 dctx = zstd.ZstdDecompressor()
117 136 chunks = []
118 137
119 138 with dctx.stream_reader(frame, read_size=source_read_size) as reader:
120 139 while True:
121 140 read_size = read_sizes.draw(strategies.integers(-1, 131072))
122 141 chunk = reader.read(read_size)
123 142 if not chunk and read_size:
124 143 break
125 144
126 145 chunks.append(chunk)
127 146
128 self.assertEqual(b''.join(chunks), original)
147 self.assertEqual(b"".join(chunks), original)
129 148
130 149 # Similar to above except we have a constant read() size.
131 150 @hypothesis.settings(
132 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
133 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
134 level=strategies.integers(min_value=1, max_value=5),
135 streaming=strategies.booleans(),
136 source_read_size=strategies.integers(1, 1048576),
137 read_size=strategies.integers(-1, 131072))
138 def test_buffer_source_constant_read_size(self, original, level, streaming,
139 source_read_size, read_size):
151 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
152 )
153 @hypothesis.given(
154 original=strategies.sampled_from(random_input_data()),
155 level=strategies.integers(min_value=1, max_value=5),
156 streaming=strategies.booleans(),
157 source_read_size=strategies.integers(1, 1048576),
158 read_size=strategies.integers(-1, 131072),
159 )
160 def test_buffer_source_constant_read_size(
161 self, original, level, streaming, source_read_size, read_size
162 ):
140 163 if read_size == 0:
141 164 read_size = -1
142 165
143 166 cctx = zstd.ZstdCompressor(level=level)
144 167
145 168 if streaming:
146 169 source = io.BytesIO()
147 170 writer = cctx.stream_writer(source)
148 171 writer.write(original)
149 172 writer.flush(zstd.FLUSH_FRAME)
150 173 frame = source.getvalue()
151 174 else:
152 175 frame = cctx.compress(original)
153 176
154 177 dctx = zstd.ZstdDecompressor()
155 178 chunks = []
156 179
157 180 reader = dctx.stream_reader(frame, read_size=source_read_size)
158 181 while True:
159 182 chunk = reader.read(read_size)
160 183 if not chunk and read_size:
161 184 break
162 185
163 186 chunks.append(chunk)
164 187
165 self.assertEqual(b''.join(chunks), original)
188 self.assertEqual(b"".join(chunks), original)
166 189
167 190 @hypothesis.settings(
168 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
169 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
170 level=strategies.integers(min_value=1, max_value=5),
171 streaming=strategies.booleans(),
172 source_read_size=strategies.integers(1, 1048576))
173 def test_stream_source_readall(self, original, level, streaming,
174 source_read_size):
191 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
192 )
193 @hypothesis.given(
194 original=strategies.sampled_from(random_input_data()),
195 level=strategies.integers(min_value=1, max_value=5),
196 streaming=strategies.booleans(),
197 source_read_size=strategies.integers(1, 1048576),
198 )
199 def test_stream_source_readall(self, original, level, streaming, source_read_size):
175 200 cctx = zstd.ZstdCompressor(level=level)
176 201
177 202 if streaming:
178 203 source = io.BytesIO()
179 204 writer = cctx.stream_writer(source)
180 205 writer.write(original)
181 206 writer.flush(zstd.FLUSH_FRAME)
182 207 source.seek(0)
183 208 else:
184 209 frame = cctx.compress(original)
185 210 source = io.BytesIO(frame)
186 211
187 212 dctx = zstd.ZstdDecompressor()
188 213
189 214 data = dctx.stream_reader(source, read_size=source_read_size).readall()
190 215 self.assertEqual(data, original)
191 216
192 217 @hypothesis.settings(
193 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
194 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
195 level=strategies.integers(min_value=1, max_value=5),
196 streaming=strategies.booleans(),
197 source_read_size=strategies.integers(1, 1048576),
198 read_sizes=strategies.data())
199 def test_stream_source_read1_variance(self, original, level, streaming,
200 source_read_size, read_sizes):
218 suppress_health_check=[
219 hypothesis.HealthCheck.large_base_example,
220 hypothesis.HealthCheck.too_slow,
221 ]
222 )
223 @hypothesis.given(
224 original=strategies.sampled_from(random_input_data()),
225 level=strategies.integers(min_value=1, max_value=5),
226 streaming=strategies.booleans(),
227 source_read_size=strategies.integers(1, 1048576),
228 read_sizes=strategies.data(),
229 )
230 def test_stream_source_read1_variance(
231 self, original, level, streaming, source_read_size, read_sizes
232 ):
201 233 cctx = zstd.ZstdCompressor(level=level)
202 234
203 235 if streaming:
204 236 source = io.BytesIO()
205 237 writer = cctx.stream_writer(source)
206 238 writer.write(original)
207 239 writer.flush(zstd.FLUSH_FRAME)
208 240 source.seek(0)
209 241 else:
210 242 frame = cctx.compress(original)
211 243 source = io.BytesIO(frame)
212 244
213 245 dctx = zstd.ZstdDecompressor()
214 246
215 247 chunks = []
216 248 with dctx.stream_reader(source, read_size=source_read_size) as reader:
217 249 while True:
218 250 read_size = read_sizes.draw(strategies.integers(-1, 131072))
219 251 chunk = reader.read1(read_size)
220 252 if not chunk and read_size:
221 253 break
222 254
223 255 chunks.append(chunk)
224 256
225 self.assertEqual(b''.join(chunks), original)
257 self.assertEqual(b"".join(chunks), original)
226 258
227 259 @hypothesis.settings(
228 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
229 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
230 level=strategies.integers(min_value=1, max_value=5),
231 streaming=strategies.booleans(),
232 source_read_size=strategies.integers(1, 1048576),
233 read_sizes=strategies.data())
234 def test_stream_source_readinto1_variance(self, original, level, streaming,
235 source_read_size, read_sizes):
260 suppress_health_check=[
261 hypothesis.HealthCheck.large_base_example,
262 hypothesis.HealthCheck.too_slow,
263 ]
264 )
265 @hypothesis.given(
266 original=strategies.sampled_from(random_input_data()),
267 level=strategies.integers(min_value=1, max_value=5),
268 streaming=strategies.booleans(),
269 source_read_size=strategies.integers(1, 1048576),
270 read_sizes=strategies.data(),
271 )
272 def test_stream_source_readinto1_variance(
273 self, original, level, streaming, source_read_size, read_sizes
274 ):
236 275 cctx = zstd.ZstdCompressor(level=level)
237 276
238 277 if streaming:
239 278 source = io.BytesIO()
240 279 writer = cctx.stream_writer(source)
241 280 writer.write(original)
242 281 writer.flush(zstd.FLUSH_FRAME)
243 282 source.seek(0)
244 283 else:
245 284 frame = cctx.compress(original)
246 285 source = io.BytesIO(frame)
247 286
248 287 dctx = zstd.ZstdDecompressor()
249 288
250 289 chunks = []
251 290 with dctx.stream_reader(source, read_size=source_read_size) as reader:
252 291 while True:
253 292 read_size = read_sizes.draw(strategies.integers(1, 131072))
254 293 b = bytearray(read_size)
255 294 count = reader.readinto1(b)
256 295
257 296 if not count:
258 297 break
259 298
260 299 chunks.append(bytes(b[0:count]))
261 300
262 self.assertEqual(b''.join(chunks), original)
301 self.assertEqual(b"".join(chunks), original)
263 302
264 303 @hypothesis.settings(
265 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
304 suppress_health_check=[
305 hypothesis.HealthCheck.large_base_example,
306 hypothesis.HealthCheck.too_slow,
307 ]
308 )
266 309 @hypothesis.given(
267 310 original=strategies.sampled_from(random_input_data()),
268 311 level=strategies.integers(min_value=1, max_value=5),
269 312 source_read_size=strategies.integers(1, 1048576),
270 313 seek_amounts=strategies.data(),
271 read_sizes=strategies.data())
272 def test_relative_seeks(self, original, level, source_read_size, seek_amounts,
273 read_sizes):
314 read_sizes=strategies.data(),
315 )
316 def test_relative_seeks(
317 self, original, level, source_read_size, seek_amounts, read_sizes
318 ):
274 319 cctx = zstd.ZstdCompressor(level=level)
275 320 frame = cctx.compress(original)
276 321
277 322 dctx = zstd.ZstdDecompressor()
278 323
279 324 with dctx.stream_reader(frame, read_size=source_read_size) as reader:
280 325 while True:
281 326 amount = seek_amounts.draw(strategies.integers(0, 16384))
282 327 reader.seek(amount, os.SEEK_CUR)
283 328
284 329 offset = reader.tell()
285 330 read_amount = read_sizes.draw(strategies.integers(1, 16384))
286 331 chunk = reader.read(read_amount)
287 332
288 333 if not chunk:
289 334 break
290 335
291 self.assertEqual(original[offset:offset + len(chunk)], chunk)
336 self.assertEqual(original[offset : offset + len(chunk)], chunk)
292 337
293 338 @hypothesis.settings(
294 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
339 suppress_health_check=[
340 hypothesis.HealthCheck.large_base_example,
341 hypothesis.HealthCheck.too_slow,
342 ]
343 )
295 344 @hypothesis.given(
296 345 originals=strategies.data(),
297 346 frame_count=strategies.integers(min_value=2, max_value=10),
298 347 level=strategies.integers(min_value=1, max_value=5),
299 348 source_read_size=strategies.integers(1, 1048576),
300 read_sizes=strategies.data())
301 def test_multiple_frames(self, originals, frame_count, level,
302 source_read_size, read_sizes):
349 read_sizes=strategies.data(),
350 )
351 def test_multiple_frames(
352 self, originals, frame_count, level, source_read_size, read_sizes
353 ):
303 354
304 355 cctx = zstd.ZstdCompressor(level=level)
305 356 source = io.BytesIO()
306 357 buffer = io.BytesIO()
307 358 writer = cctx.stream_writer(buffer)
308 359
309 360 for i in range(frame_count):
310 361 data = originals.draw(strategies.sampled_from(random_input_data()))
311 362 source.write(data)
312 363 writer.write(data)
313 364 writer.flush(zstd.FLUSH_FRAME)
314 365
315 366 dctx = zstd.ZstdDecompressor()
316 367 buffer.seek(0)
317 reader = dctx.stream_reader(buffer, read_size=source_read_size,
318 read_across_frames=True)
368 reader = dctx.stream_reader(
369 buffer, read_size=source_read_size, read_across_frames=True
370 )
319 371
320 372 chunks = []
321 373
322 374 while True:
323 375 read_amount = read_sizes.draw(strategies.integers(-1, 16384))
324 376 chunk = reader.read(read_amount)
325 377
326 378 if not chunk and read_amount:
327 379 break
328 380
329 381 chunks.append(chunk)
330 382
331 self.assertEqual(source.getvalue(), b''.join(chunks))
383 self.assertEqual(source.getvalue(), b"".join(chunks))
332 384
333 385
334 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
386 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
335 387 @make_cffi
336 class TestDecompressor_stream_writer_fuzzing(unittest.TestCase):
337 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
338 level=strategies.integers(min_value=1, max_value=5),
339 write_size=strategies.integers(min_value=1, max_value=8192),
340 input_sizes=strategies.data())
388 class TestDecompressor_stream_writer_fuzzing(TestCase):
389 @hypothesis.settings(
390 suppress_health_check=[
391 hypothesis.HealthCheck.large_base_example,
392 hypothesis.HealthCheck.too_slow,
393 ]
394 )
395 @hypothesis.given(
396 original=strategies.sampled_from(random_input_data()),
397 level=strategies.integers(min_value=1, max_value=5),
398 write_size=strategies.integers(min_value=1, max_value=8192),
399 input_sizes=strategies.data(),
400 )
341 401 def test_write_size_variance(self, original, level, write_size, input_sizes):
342 402 cctx = zstd.ZstdCompressor(level=level)
343 403 frame = cctx.compress(original)
344 404
345 405 dctx = zstd.ZstdDecompressor()
346 406 source = io.BytesIO(frame)
347 407 dest = NonClosingBytesIO()
348 408
349 409 with dctx.stream_writer(dest, write_size=write_size) as decompressor:
350 410 while True:
351 411 input_size = input_sizes.draw(strategies.integers(1, 4096))
352 412 chunk = source.read(input_size)
353 413 if not chunk:
354 414 break
355 415
356 416 decompressor.write(chunk)
357 417
358 418 self.assertEqual(dest.getvalue(), original)
359 419
360 420
361 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
421 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
362 422 @make_cffi
363 class TestDecompressor_copy_stream_fuzzing(unittest.TestCase):
364 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
365 level=strategies.integers(min_value=1, max_value=5),
366 read_size=strategies.integers(min_value=1, max_value=8192),
367 write_size=strategies.integers(min_value=1, max_value=8192))
423 class TestDecompressor_copy_stream_fuzzing(TestCase):
424 @hypothesis.settings(
425 suppress_health_check=[
426 hypothesis.HealthCheck.large_base_example,
427 hypothesis.HealthCheck.too_slow,
428 ]
429 )
430 @hypothesis.given(
431 original=strategies.sampled_from(random_input_data()),
432 level=strategies.integers(min_value=1, max_value=5),
433 read_size=strategies.integers(min_value=1, max_value=8192),
434 write_size=strategies.integers(min_value=1, max_value=8192),
435 )
368 436 def test_read_write_size_variance(self, original, level, read_size, write_size):
369 437 cctx = zstd.ZstdCompressor(level=level)
370 438 frame = cctx.compress(original)
371 439
372 440 source = io.BytesIO(frame)
373 441 dest = io.BytesIO()
374 442
375 443 dctx = zstd.ZstdDecompressor()
376 444 dctx.copy_stream(source, dest, read_size=read_size, write_size=write_size)
377 445
378 446 self.assertEqual(dest.getvalue(), original)
379 447
380 448
381 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
449 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
382 450 @make_cffi
383 class TestDecompressor_decompressobj_fuzzing(unittest.TestCase):
384 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
385 level=strategies.integers(min_value=1, max_value=5),
386 chunk_sizes=strategies.data())
451 class TestDecompressor_decompressobj_fuzzing(TestCase):
452 @hypothesis.settings(
453 suppress_health_check=[
454 hypothesis.HealthCheck.large_base_example,
455 hypothesis.HealthCheck.too_slow,
456 ]
457 )
458 @hypothesis.given(
459 original=strategies.sampled_from(random_input_data()),
460 level=strategies.integers(min_value=1, max_value=5),
461 chunk_sizes=strategies.data(),
462 )
387 463 def test_random_input_sizes(self, original, level, chunk_sizes):
388 464 cctx = zstd.ZstdCompressor(level=level)
389 465 frame = cctx.compress(original)
390 466
391 467 source = io.BytesIO(frame)
392 468
393 469 dctx = zstd.ZstdDecompressor()
394 470 dobj = dctx.decompressobj()
395 471
396 472 chunks = []
397 473 while True:
398 474 chunk_size = chunk_sizes.draw(strategies.integers(1, 4096))
399 475 chunk = source.read(chunk_size)
400 476 if not chunk:
401 477 break
402 478
403 479 chunks.append(dobj.decompress(chunk))
404 480
405 self.assertEqual(b''.join(chunks), original)
481 self.assertEqual(b"".join(chunks), original)
406 482
407 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
408 level=strategies.integers(min_value=1, max_value=5),
409 write_size=strategies.integers(min_value=1,
410 max_value=4 * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE),
411 chunk_sizes=strategies.data())
483 @hypothesis.settings(
484 suppress_health_check=[
485 hypothesis.HealthCheck.large_base_example,
486 hypothesis.HealthCheck.too_slow,
487 ]
488 )
489 @hypothesis.given(
490 original=strategies.sampled_from(random_input_data()),
491 level=strategies.integers(min_value=1, max_value=5),
492 write_size=strategies.integers(
493 min_value=1, max_value=4 * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE
494 ),
495 chunk_sizes=strategies.data(),
496 )
412 497 def test_random_output_sizes(self, original, level, write_size, chunk_sizes):
413 498 cctx = zstd.ZstdCompressor(level=level)
414 499 frame = cctx.compress(original)
415 500
416 501 source = io.BytesIO(frame)
417 502
418 503 dctx = zstd.ZstdDecompressor()
419 504 dobj = dctx.decompressobj(write_size=write_size)
420 505
421 506 chunks = []
422 507 while True:
423 508 chunk_size = chunk_sizes.draw(strategies.integers(1, 4096))
424 509 chunk = source.read(chunk_size)
425 510 if not chunk:
426 511 break
427 512
428 513 chunks.append(dobj.decompress(chunk))
429 514
430 self.assertEqual(b''.join(chunks), original)
515 self.assertEqual(b"".join(chunks), original)
431 516
432 517
433 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
518 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
434 519 @make_cffi
435 class TestDecompressor_read_to_iter_fuzzing(unittest.TestCase):
436 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
437 level=strategies.integers(min_value=1, max_value=5),
438 read_size=strategies.integers(min_value=1, max_value=4096),
439 write_size=strategies.integers(min_value=1, max_value=4096))
520 class TestDecompressor_read_to_iter_fuzzing(TestCase):
521 @hypothesis.given(
522 original=strategies.sampled_from(random_input_data()),
523 level=strategies.integers(min_value=1, max_value=5),
524 read_size=strategies.integers(min_value=1, max_value=4096),
525 write_size=strategies.integers(min_value=1, max_value=4096),
526 )
440 527 def test_read_write_size_variance(self, original, level, read_size, write_size):
441 528 cctx = zstd.ZstdCompressor(level=level)
442 529 frame = cctx.compress(original)
443 530
444 531 source = io.BytesIO(frame)
445 532
446 533 dctx = zstd.ZstdDecompressor()
447 chunks = list(dctx.read_to_iter(source, read_size=read_size, write_size=write_size))
534 chunks = list(
535 dctx.read_to_iter(source, read_size=read_size, write_size=write_size)
536 )
448 537
449 self.assertEqual(b''.join(chunks), original)
538 self.assertEqual(b"".join(chunks), original)
450 539
451 540
452 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
453 class TestDecompressor_multi_decompress_to_buffer_fuzzing(unittest.TestCase):
454 @hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()),
455 min_size=1, max_size=1024),
456 threads=strategies.integers(min_value=1, max_value=8),
457 use_dict=strategies.booleans())
541 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
542 class TestDecompressor_multi_decompress_to_buffer_fuzzing(TestCase):
543 @hypothesis.given(
544 original=strategies.lists(
545 strategies.sampled_from(random_input_data()), min_size=1, max_size=1024
546 ),
547 threads=strategies.integers(min_value=1, max_value=8),
548 use_dict=strategies.booleans(),
549 )
458 550 def test_data_equivalence(self, original, threads, use_dict):
459 551 kwargs = {}
460 552 if use_dict:
461 kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0])
553 kwargs["dict_data"] = zstd.ZstdCompressionDict(original[0])
462 554
463 cctx = zstd.ZstdCompressor(level=1,
464 write_content_size=True,
465 write_checksum=True,
466 **kwargs)
555 cctx = zstd.ZstdCompressor(
556 level=1, write_content_size=True, write_checksum=True, **kwargs
557 )
467 558
468 if not hasattr(cctx, 'multi_compress_to_buffer'):
469 self.skipTest('multi_compress_to_buffer not available')
559 if not hasattr(cctx, "multi_compress_to_buffer"):
560 self.skipTest("multi_compress_to_buffer not available")
470 561
471 562 frames_buffer = cctx.multi_compress_to_buffer(original, threads=-1)
472 563
473 564 dctx = zstd.ZstdDecompressor(**kwargs)
474 565 result = dctx.multi_decompress_to_buffer(frames_buffer)
475 566
476 567 self.assertEqual(len(result), len(original))
477 568 for i, frame in enumerate(result):
478 569 self.assertEqual(frame.tobytes(), original[i])
479 570
480 571 frames_list = [f.tobytes() for f in frames_buffer]
481 572 result = dctx.multi_decompress_to_buffer(frames_list)
482 573
483 574 self.assertEqual(len(result), len(original))
484 575 for i, frame in enumerate(result):
485 576 self.assertEqual(frame.tobytes(), original[i])
@@ -1,15 +1,15 b''
1 1 import unittest
2 2
3 3 import zstandard as zstd
4 4
5 from . common import (
5 from .common import (
6 6 make_cffi,
7 TestCase,
7 8 )
8 9
9 10
10 11 @make_cffi
11 class TestSizes(unittest.TestCase):
12 class TestSizes(TestCase):
12 13 def test_decompression_size(self):
13 14 size = zstd.estimate_decompression_context_size()
14 15 self.assertGreater(size, 100000)
15
@@ -1,69 +1,70 b''
1 1 from __future__ import unicode_literals
2 2
3 3 import unittest
4 4
5 5 import zstandard as zstd
6 6
7 from . common import (
7 from .common import (
8 8 make_cffi,
9 TestCase,
9 10 )
10 11
11 12
12 13 @make_cffi
13 class TestModuleAttributes(unittest.TestCase):
14 class TestModuleAttributes(TestCase):
14 15 def test_version(self):
15 self.assertEqual(zstd.ZSTD_VERSION, (1, 4, 3))
16 self.assertEqual(zstd.ZSTD_VERSION, (1, 4, 4))
16 17
17 self.assertEqual(zstd.__version__, '0.12.0')
18 self.assertEqual(zstd.__version__, "0.13.0")
18 19
19 20 def test_constants(self):
20 21 self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22)
21 self.assertEqual(zstd.FRAME_HEADER, b'\x28\xb5\x2f\xfd')
22 self.assertEqual(zstd.FRAME_HEADER, b"\x28\xb5\x2f\xfd")
22 23
23 24 def test_hasattr(self):
24 25 attrs = (
25 'CONTENTSIZE_UNKNOWN',
26 'CONTENTSIZE_ERROR',
27 'COMPRESSION_RECOMMENDED_INPUT_SIZE',
28 'COMPRESSION_RECOMMENDED_OUTPUT_SIZE',
29 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE',
30 'DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE',
31 'MAGIC_NUMBER',
32 'FLUSH_BLOCK',
33 'FLUSH_FRAME',
34 'BLOCKSIZELOG_MAX',
35 'BLOCKSIZE_MAX',
36 'WINDOWLOG_MIN',
37 'WINDOWLOG_MAX',
38 'CHAINLOG_MIN',
39 'CHAINLOG_MAX',
40 'HASHLOG_MIN',
41 'HASHLOG_MAX',
42 'HASHLOG3_MAX',
43 'MINMATCH_MIN',
44 'MINMATCH_MAX',
45 'SEARCHLOG_MIN',
46 'SEARCHLOG_MAX',
47 'SEARCHLENGTH_MIN',
48 'SEARCHLENGTH_MAX',
49 'TARGETLENGTH_MIN',
50 'TARGETLENGTH_MAX',
51 'LDM_MINMATCH_MIN',
52 'LDM_MINMATCH_MAX',
53 'LDM_BUCKETSIZELOG_MAX',
54 'STRATEGY_FAST',
55 'STRATEGY_DFAST',
56 'STRATEGY_GREEDY',
57 'STRATEGY_LAZY',
58 'STRATEGY_LAZY2',
59 'STRATEGY_BTLAZY2',
60 'STRATEGY_BTOPT',
61 'STRATEGY_BTULTRA',
62 'STRATEGY_BTULTRA2',
63 'DICT_TYPE_AUTO',
64 'DICT_TYPE_RAWCONTENT',
65 'DICT_TYPE_FULLDICT',
26 "CONTENTSIZE_UNKNOWN",
27 "CONTENTSIZE_ERROR",
28 "COMPRESSION_RECOMMENDED_INPUT_SIZE",
29 "COMPRESSION_RECOMMENDED_OUTPUT_SIZE",
30 "DECOMPRESSION_RECOMMENDED_INPUT_SIZE",
31 "DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE",
32 "MAGIC_NUMBER",
33 "FLUSH_BLOCK",
34 "FLUSH_FRAME",
35 "BLOCKSIZELOG_MAX",
36 "BLOCKSIZE_MAX",
37 "WINDOWLOG_MIN",
38 "WINDOWLOG_MAX",
39 "CHAINLOG_MIN",
40 "CHAINLOG_MAX",
41 "HASHLOG_MIN",
42 "HASHLOG_MAX",
43 "HASHLOG3_MAX",
44 "MINMATCH_MIN",
45 "MINMATCH_MAX",
46 "SEARCHLOG_MIN",
47 "SEARCHLOG_MAX",
48 "SEARCHLENGTH_MIN",
49 "SEARCHLENGTH_MAX",
50 "TARGETLENGTH_MIN",
51 "TARGETLENGTH_MAX",
52 "LDM_MINMATCH_MIN",
53 "LDM_MINMATCH_MAX",
54 "LDM_BUCKETSIZELOG_MAX",
55 "STRATEGY_FAST",
56 "STRATEGY_DFAST",
57 "STRATEGY_GREEDY",
58 "STRATEGY_LAZY",
59 "STRATEGY_LAZY2",
60 "STRATEGY_BTLAZY2",
61 "STRATEGY_BTOPT",
62 "STRATEGY_BTULTRA",
63 "STRATEGY_BTULTRA2",
64 "DICT_TYPE_AUTO",
65 "DICT_TYPE_RAWCONTENT",
66 "DICT_TYPE_FULLDICT",
66 67 )
67 68
68 69 for a in attrs:
69 70 self.assertTrue(hasattr(zstd, a), a)
@@ -1,89 +1,92 b''
1 1 import struct
2 2 import sys
3 3 import unittest
4 4
5 5 import zstandard as zstd
6 6
7 from . common import (
7 from .common import (
8 8 generate_samples,
9 9 make_cffi,
10 10 random_input_data,
11 TestCase,
11 12 )
12 13
13 14 if sys.version_info[0] >= 3:
14 15 int_type = int
15 16 else:
16 17 int_type = long
17 18
18 19
19 20 @make_cffi
20 class TestTrainDictionary(unittest.TestCase):
21 class TestTrainDictionary(TestCase):
21 22 def test_no_args(self):
22 23 with self.assertRaises(TypeError):
23 24 zstd.train_dictionary()
24 25
25 26 def test_bad_args(self):
26 27 with self.assertRaises(TypeError):
27 zstd.train_dictionary(8192, u'foo')
28 zstd.train_dictionary(8192, u"foo")
28 29
29 30 with self.assertRaises(ValueError):
30 zstd.train_dictionary(8192, [u'foo'])
31 zstd.train_dictionary(8192, [u"foo"])
31 32
32 33 def test_no_params(self):
33 34 d = zstd.train_dictionary(8192, random_input_data())
34 35 self.assertIsInstance(d.dict_id(), int_type)
35 36
36 37 # The dictionary ID may be different across platforms.
37 expected = b'\x37\xa4\x30\xec' + struct.pack('<I', d.dict_id())
38 expected = b"\x37\xa4\x30\xec" + struct.pack("<I", d.dict_id())
38 39
39 40 data = d.as_bytes()
40 41 self.assertEqual(data[0:8], expected)
41 42
42 43 def test_basic(self):
43 44 d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
44 45 self.assertIsInstance(d.dict_id(), int_type)
45 46
46 47 data = d.as_bytes()
47 self.assertEqual(data[0:4], b'\x37\xa4\x30\xec')
48 self.assertEqual(data[0:4], b"\x37\xa4\x30\xec")
48 49
49 50 self.assertEqual(d.k, 64)
50 51 self.assertEqual(d.d, 16)
51 52
52 53 def test_set_dict_id(self):
53 d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16,
54 dict_id=42)
54 d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16, dict_id=42)
55 55 self.assertEqual(d.dict_id(), 42)
56 56
57 57 def test_optimize(self):
58 d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1,
59 d=16)
58 d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1, d=16)
60 59
61 60 # This varies by platform.
62 61 self.assertIn(d.k, (50, 2000))
63 62 self.assertEqual(d.d, 16)
64 63
64
65 65 @make_cffi
66 class TestCompressionDict(unittest.TestCase):
66 class TestCompressionDict(TestCase):
67 67 def test_bad_mode(self):
68 with self.assertRaisesRegexp(ValueError, 'invalid dictionary load mode'):
69 zstd.ZstdCompressionDict(b'foo', dict_type=42)
68 with self.assertRaisesRegex(ValueError, "invalid dictionary load mode"):
69 zstd.ZstdCompressionDict(b"foo", dict_type=42)
70 70
71 71 def test_bad_precompute_compress(self):
72 72 d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
73 73
74 with self.assertRaisesRegexp(ValueError, 'must specify one of level or '):
74 with self.assertRaisesRegex(ValueError, "must specify one of level or "):
75 75 d.precompute_compress()
76 76
77 with self.assertRaisesRegexp(ValueError, 'must only specify one of level or '):
78 d.precompute_compress(level=3,
79 compression_params=zstd.CompressionParameters())
77 with self.assertRaisesRegex(ValueError, "must only specify one of level or "):
78 d.precompute_compress(
79 level=3, compression_params=zstd.CompressionParameters()
80 )
80 81
81 82 def test_precompute_compress_rawcontent(self):
82 d = zstd.ZstdCompressionDict(b'dictcontent' * 64,
83 dict_type=zstd.DICT_TYPE_RAWCONTENT)
83 d = zstd.ZstdCompressionDict(
84 b"dictcontent" * 64, dict_type=zstd.DICT_TYPE_RAWCONTENT
85 )
84 86 d.precompute_compress(level=1)
85 87
86 d = zstd.ZstdCompressionDict(b'dictcontent' * 64,
87 dict_type=zstd.DICT_TYPE_FULLDICT)
88 with self.assertRaisesRegexp(zstd.ZstdError, 'unable to precompute dictionary'):
88 d = zstd.ZstdCompressionDict(
89 b"dictcontent" * 64, dict_type=zstd.DICT_TYPE_FULLDICT
90 )
91 with self.assertRaisesRegex(zstd.ZstdError, "unable to precompute dictionary"):
89 92 d.precompute_compress(level=1)
@@ -1,65 +1,75 b''
1 1 # Copyright (c) 2017-present, Gregory Szorc
2 2 # All rights reserved.
3 3 #
4 4 # This software may be modified and distributed under the terms
5 5 # of the BSD license. See the LICENSE file for details.
6 6
7 7 """Python interface to the Zstandard (zstd) compression library."""
8 8
9 9 from __future__ import absolute_import, unicode_literals
10 10
11 11 # This module serves 2 roles:
12 12 #
13 13 # 1) Export the C or CFFI "backend" through a central module.
14 14 # 2) Implement additional functionality built on top of C or CFFI backend.
15 15
16 16 import os
17 17 import platform
18 18
19 19 # Some Python implementations don't support C extensions. That's why we have
20 20 # a CFFI implementation in the first place. The code here import one of our
21 21 # "backends" then re-exports the symbols from this module. For convenience,
22 22 # we support falling back to the CFFI backend if the C extension can't be
23 23 # imported. But for performance reasons, we only do this on unknown Python
24 24 # implementation. Notably, for CPython we require the C extension by default.
25 25 # Because someone will inevitably want special behavior, the behavior is
26 26 # configurable via an environment variable. A potentially better way to handle
27 27 # this is to import a special ``__importpolicy__`` module or something
28 28 # defining a variable and `setup.py` could write the file with whatever
29 29 # policy was specified at build time. Until someone needs it, we go with
30 30 # the hacky but simple environment variable approach.
31 _module_policy = os.environ.get('PYTHON_ZSTANDARD_IMPORT_POLICY', 'default')
31 _module_policy = os.environ.get("PYTHON_ZSTANDARD_IMPORT_POLICY", "default")
32 32
33 if _module_policy == 'default':
34 if platform.python_implementation() in ('CPython',):
33 if _module_policy == "default":
34 if platform.python_implementation() in ("CPython",):
35 35 from zstd import *
36 backend = 'cext'
37 elif platform.python_implementation() in ('PyPy',):
36
37 backend = "cext"
38 elif platform.python_implementation() in ("PyPy",):
38 39 from .cffi import *
39 backend = 'cffi'
40
41 backend = "cffi"
40 42 else:
41 43 try:
42 44 from zstd import *
43 backend = 'cext'
45
46 backend = "cext"
44 47 except ImportError:
45 48 from .cffi import *
46 backend = 'cffi'
47 elif _module_policy == 'cffi_fallback':
49
50 backend = "cffi"
51 elif _module_policy == "cffi_fallback":
48 52 try:
49 53 from zstd import *
50 backend = 'cext'
54
55 backend = "cext"
51 56 except ImportError:
52 57 from .cffi import *
53 backend = 'cffi'
54 elif _module_policy == 'cext':
58
59 backend = "cffi"
60 elif _module_policy == "cext":
55 61 from zstd import *
56 backend = 'cext'
57 elif _module_policy == 'cffi':
62
63 backend = "cext"
64 elif _module_policy == "cffi":
58 65 from .cffi import *
59 backend = 'cffi'
66
67 backend = "cffi"
60 68 else:
61 raise ImportError('unknown module import policy: %s; use default, cffi_fallback, '
62 'cext, or cffi' % _module_policy)
69 raise ImportError(
70 "unknown module import policy: %s; use default, cffi_fallback, "
71 "cext, or cffi" % _module_policy
72 )
63 73
64 74 # Keep this in sync with python-zstandard.h.
65 __version__ = '0.12.0'
75 __version__ = "0.13.0"
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
General Comments 0
You need to be logged in to leave comments. Login now