Show More
The requested changes are too big and content was truncated. Show full diff
|
1 | NO CONTENT: new file 100644 | |
The requested commit or file is too big and content was truncated. Show full diff |
@@ -1,102 +1,103 b'' | |||
|
1 | 1 | # Files that just need to be migrated to the formatter. |
|
2 | 2 | # Do not add new files here! |
|
3 | 3 | mercurial/cext/manifest.c |
|
4 | 4 | mercurial/cext/osutil.c |
|
5 | 5 | # Vendored code that we should never format: |
|
6 | 6 | contrib/python-zstandard/c-ext/bufferutil.c |
|
7 | 7 | contrib/python-zstandard/c-ext/compressionchunker.c |
|
8 | 8 | contrib/python-zstandard/c-ext/compressiondict.c |
|
9 | 9 | contrib/python-zstandard/c-ext/compressionparams.c |
|
10 | 10 | contrib/python-zstandard/c-ext/compressionreader.c |
|
11 | 11 | contrib/python-zstandard/c-ext/compressionwriter.c |
|
12 | 12 | contrib/python-zstandard/c-ext/compressobj.c |
|
13 | 13 | contrib/python-zstandard/c-ext/compressor.c |
|
14 | 14 | contrib/python-zstandard/c-ext/compressoriterator.c |
|
15 | 15 | contrib/python-zstandard/c-ext/constants.c |
|
16 | 16 | contrib/python-zstandard/c-ext/decompressionreader.c |
|
17 | 17 | contrib/python-zstandard/c-ext/decompressionwriter.c |
|
18 | 18 | contrib/python-zstandard/c-ext/decompressobj.c |
|
19 | 19 | contrib/python-zstandard/c-ext/decompressor.c |
|
20 | 20 | contrib/python-zstandard/c-ext/decompressoriterator.c |
|
21 | 21 | contrib/python-zstandard/c-ext/frameparams.c |
|
22 | 22 | contrib/python-zstandard/c-ext/python-zstandard.h |
|
23 | 23 | contrib/python-zstandard/zstd.c |
|
24 | 24 | contrib/python-zstandard/zstd/common/bitstream.h |
|
25 | 25 | contrib/python-zstandard/zstd/common/compiler.h |
|
26 | 26 | contrib/python-zstandard/zstd/common/cpu.h |
|
27 | 27 | contrib/python-zstandard/zstd/common/debug.c |
|
28 | 28 | contrib/python-zstandard/zstd/common/debug.h |
|
29 | 29 | contrib/python-zstandard/zstd/common/entropy_common.c |
|
30 | 30 | contrib/python-zstandard/zstd/common/error_private.c |
|
31 | 31 | contrib/python-zstandard/zstd/common/error_private.h |
|
32 | 32 | contrib/python-zstandard/zstd/common/fse_decompress.c |
|
33 | 33 | contrib/python-zstandard/zstd/common/fse.h |
|
34 | 34 | contrib/python-zstandard/zstd/common/huf.h |
|
35 | 35 | contrib/python-zstandard/zstd/common/mem.h |
|
36 | 36 | contrib/python-zstandard/zstd/common/pool.c |
|
37 | 37 | contrib/python-zstandard/zstd/common/pool.h |
|
38 | 38 | contrib/python-zstandard/zstd/common/threading.c |
|
39 | 39 | contrib/python-zstandard/zstd/common/threading.h |
|
40 | 40 | contrib/python-zstandard/zstd/common/xxhash.c |
|
41 | 41 | contrib/python-zstandard/zstd/common/xxhash.h |
|
42 | 42 | contrib/python-zstandard/zstd/common/zstd_common.c |
|
43 | 43 | contrib/python-zstandard/zstd/common/zstd_errors.h |
|
44 | 44 | contrib/python-zstandard/zstd/common/zstd_internal.h |
|
45 | 45 | contrib/python-zstandard/zstd/compress/fse_compress.c |
|
46 | 46 | contrib/python-zstandard/zstd/compress/hist.c |
|
47 | 47 | contrib/python-zstandard/zstd/compress/hist.h |
|
48 | 48 | contrib/python-zstandard/zstd/compress/huf_compress.c |
|
49 | 49 | contrib/python-zstandard/zstd/compress/zstd_compress.c |
|
50 | 50 | contrib/python-zstandard/zstd/compress/zstd_compress_internal.h |
|
51 | 51 | contrib/python-zstandard/zstd/compress/zstd_compress_literals.c |
|
52 | 52 | contrib/python-zstandard/zstd/compress/zstd_compress_literals.h |
|
53 | 53 | contrib/python-zstandard/zstd/compress/zstd_compress_sequences.c |
|
54 | 54 | contrib/python-zstandard/zstd/compress/zstd_compress_sequences.h |
|
55 | contrib/python-zstandard/zstd/compress/zstd_cwksp.h | |
|
55 | 56 | contrib/python-zstandard/zstd/compress/zstd_double_fast.c |
|
56 | 57 | contrib/python-zstandard/zstd/compress/zstd_double_fast.h |
|
57 | 58 | contrib/python-zstandard/zstd/compress/zstd_fast.c |
|
58 | 59 | contrib/python-zstandard/zstd/compress/zstd_fast.h |
|
59 | 60 | contrib/python-zstandard/zstd/compress/zstd_lazy.c |
|
60 | 61 | contrib/python-zstandard/zstd/compress/zstd_lazy.h |
|
61 | 62 | contrib/python-zstandard/zstd/compress/zstd_ldm.c |
|
62 | 63 | contrib/python-zstandard/zstd/compress/zstd_ldm.h |
|
63 | 64 | contrib/python-zstandard/zstd/compress/zstdmt_compress.c |
|
64 | 65 | contrib/python-zstandard/zstd/compress/zstdmt_compress.h |
|
65 | 66 | contrib/python-zstandard/zstd/compress/zstd_opt.c |
|
66 | 67 | contrib/python-zstandard/zstd/compress/zstd_opt.h |
|
67 | 68 | contrib/python-zstandard/zstd/decompress/huf_decompress.c |
|
68 | 69 | contrib/python-zstandard/zstd/decompress/zstd_ddict.c |
|
69 | 70 | contrib/python-zstandard/zstd/decompress/zstd_ddict.h |
|
70 | 71 | contrib/python-zstandard/zstd/decompress/zstd_decompress_block.c |
|
71 | 72 | contrib/python-zstandard/zstd/decompress/zstd_decompress_block.h |
|
72 | 73 | contrib/python-zstandard/zstd/decompress/zstd_decompress_internal.h |
|
73 | 74 | contrib/python-zstandard/zstd/decompress/zstd_decompress.c |
|
74 | 75 | contrib/python-zstandard/zstd/deprecated/zbuff_common.c |
|
75 | 76 | contrib/python-zstandard/zstd/deprecated/zbuff_compress.c |
|
76 | 77 | contrib/python-zstandard/zstd/deprecated/zbuff_decompress.c |
|
77 | 78 | contrib/python-zstandard/zstd/deprecated/zbuff.h |
|
78 | 79 | contrib/python-zstandard/zstd/dictBuilder/cover.c |
|
79 | 80 | contrib/python-zstandard/zstd/dictBuilder/cover.h |
|
80 | 81 | contrib/python-zstandard/zstd/dictBuilder/divsufsort.c |
|
81 | 82 | contrib/python-zstandard/zstd/dictBuilder/divsufsort.h |
|
82 | 83 | contrib/python-zstandard/zstd/dictBuilder/fastcover.c |
|
83 | 84 | contrib/python-zstandard/zstd/dictBuilder/zdict.c |
|
84 | 85 | contrib/python-zstandard/zstd/dictBuilder/zdict.h |
|
85 | 86 | contrib/python-zstandard/zstd/zstd.h |
|
86 | 87 | hgext/fsmonitor/pywatchman/bser.c |
|
87 | 88 | mercurial/thirdparty/xdiff/xdiff.h |
|
88 | 89 | mercurial/thirdparty/xdiff/xdiffi.c |
|
89 | 90 | mercurial/thirdparty/xdiff/xdiffi.h |
|
90 | 91 | mercurial/thirdparty/xdiff/xemit.c |
|
91 | 92 | mercurial/thirdparty/xdiff/xemit.h |
|
92 | 93 | mercurial/thirdparty/xdiff/xhistogram.c |
|
93 | 94 | mercurial/thirdparty/xdiff/xinclude.h |
|
94 | 95 | mercurial/thirdparty/xdiff/xmacros.h |
|
95 | 96 | mercurial/thirdparty/xdiff/xmerge.c |
|
96 | 97 | mercurial/thirdparty/xdiff/xpatience.c |
|
97 | 98 | mercurial/thirdparty/xdiff/xprepare.c |
|
98 | 99 | mercurial/thirdparty/xdiff/xprepare.h |
|
99 | 100 | mercurial/thirdparty/xdiff/xtypes.h |
|
100 | 101 | mercurial/thirdparty/xdiff/xutils.c |
|
101 | 102 | mercurial/thirdparty/xdiff/xutils.h |
|
102 | 103 | mercurial/thirdparty/zope/interface/_zope_interface_coptimizations.c |
@@ -1,702 +1,721 b'' | |||
|
1 | 1 | =============== |
|
2 | 2 | Version History |
|
3 | 3 | =============== |
|
4 | 4 | |
|
5 | 5 | 1.0.0 (not yet released) |
|
6 | 6 | ======================== |
|
7 | 7 | |
|
8 | 8 | Actions Blocking Release |
|
9 | 9 | ------------------------ |
|
10 | 10 | |
|
11 | 11 | * compression and decompression APIs that support ``io.RawIOBase`` interface |
|
12 | 12 | (#13). |
|
13 | 13 | * ``stream_writer()`` APIs should support ``io.RawIOBase`` interface. |
|
14 | 14 | * Properly handle non-blocking I/O and partial writes for objects implementing |
|
15 | 15 | ``io.RawIOBase``. |
|
16 | 16 | * Make ``write_return_read=True`` the default for objects implementing |
|
17 | 17 | ``io.RawIOBase``. |
|
18 | 18 | * Audit for consistent and proper behavior of ``flush()`` and ``close()`` for |
|
19 | 19 | all objects implementing ``io.RawIOBase``. Is calling ``close()`` on |
|
20 | 20 | wrapped stream acceptable, should ``__exit__`` always call ``close()``, |
|
21 | 21 | should ``close()`` imply ``flush()``, etc. |
|
22 | 22 | * Consider making reads across frames configurable behavior. |
|
23 | 23 | * Refactor module names so C and CFFI extensions live under ``zstandard`` |
|
24 | 24 | package. |
|
25 | 25 | * Overall API design review. |
|
26 | 26 | * Use Python allocator where possible. |
|
27 | 27 | * Figure out what to do about experimental APIs not implemented by CFFI. |
|
28 | 28 | * APIs for auto adjusting compression parameters based on input size. e.g. |
|
29 | 29 | clamping the window log so it isn't too large for input. |
|
30 | 30 | * Consider allowing compressor and decompressor instances to be thread safe, |
|
31 | 31 | support concurrent operations. Or track when an operation is in progress and |
|
32 | 32 | refuse to let concurrent operations use the same instance. |
|
33 | 33 | * Support for magic-less frames for all decompression operations (``decompress()`` |
|
34 | 34 | doesn't work due to sniffing the content size and the lack of a ZSTD API to |
|
35 | 35 | sniff magic-less frames - this should be fixed in 1.3.5.). |
|
36 | 36 | * Audit for complete flushing when ending compression streams. |
|
37 | 37 | * Deprecate legacy APIs. |
|
38 | 38 | * Audit for ability to control read/write sizes on all APIs. |
|
39 | 39 | * Detect memory leaks via bench.py. |
|
40 | 40 | * Remove low-level compression parameters from ``ZstdCompressor.__init__`` and |
|
41 | 41 | require use of ``CompressionParameters``. |
|
42 | 42 | * Expose ``ZSTD_getFrameProgression()`` from more compressor types. |
|
43 | 43 | * Support modifying compression parameters mid operation when supported by |
|
44 | 44 | zstd API. |
|
45 | 45 | * Expose ``ZSTD_CLEVEL_DEFAULT`` constant. |
|
46 | * Expose ``ZSTD_SRCSIZEHINT_{MIN,MAX}`` constants. | |
|
46 | 47 | * Support ``ZSTD_p_forceAttachDict`` compression parameter. |
|
47 |
* Support ``ZSTD_ |
|
|
48 | * Support ``ZSTD_dictForceLoad`` dictionary compression parameter. | |
|
49 | * Support ``ZSTD_c_targetCBlockSize`` compression parameter. | |
|
50 | * Support ``ZSTD_c_literalCompressionMode`` compression parameter. | |
|
51 | * Support ``ZSTD_c_srcSizeHint`` compression parameter. | |
|
48 | 52 | * Use ``ZSTD_CCtx_getParameter()``/``ZSTD_CCtxParam_getParameter()`` for retrieving |
|
49 | 53 | compression parameters. |
|
50 | 54 | * Consider exposing ``ZSTDMT_toFlushNow()``. |
|
51 | 55 | * Expose ``ZDICT_trainFromBuffer_fastCover()``, |
|
52 | 56 | ``ZDICT_optimizeTrainFromBuffer_fastCover``. |
|
57 | * Expose ``ZSTD_Sequence`` struct and related ``ZSTD_getSequences()`` API. | |
|
53 | 58 | * Expose and enforce ``ZSTD_minCLevel()`` for minimum compression level. |
|
54 | 59 | * Consider a ``chunker()`` API for decompression. |
|
55 | 60 | * Consider stats for ``chunker()`` API, including finding the last consumed |
|
56 | 61 | offset of input data. |
|
57 | 62 | * Consider exposing ``ZSTD_cParam_getBounds()`` and |
|
58 | 63 | ``ZSTD_dParam_getBounds()`` APIs. |
|
59 | 64 | * Consider controls over resetting compression contexts (session only, parameters, |
|
60 | 65 | or session and parameters). |
|
61 | 66 | * Actually use the CFFI backend in fuzzing tests. |
|
62 | 67 | |
|
63 | 68 | Other Actions Not Blocking Release |
|
64 | 69 | --------------------------------------- |
|
65 | 70 | |
|
66 | 71 | * Support for block compression APIs. |
|
67 | 72 | * API for ensuring max memory ceiling isn't exceeded. |
|
68 | 73 | * Move off nose for testing. |
|
69 | 74 | |
|
75 | 0.13.0 (released 2019-12-28) | |
|
76 | ============================ | |
|
77 | ||
|
78 | Changes | |
|
79 | ------- | |
|
80 | ||
|
81 | * ``pytest-xdist`` ``pytest`` extension is now installed so tests can be | |
|
82 | run in parallel. | |
|
83 | * CI now builds ``manylinux2010`` and ``manylinux2014`` binary wheels | |
|
84 | instead of a mix of ``manylinux2010`` and ``manylinux1``. | |
|
85 | * Official support for Python 3.8 has been added. | |
|
86 | * Bundled zstandard library upgraded from 1.4.3 to 1.4.4. | |
|
87 | * Python code has been reformatted with black. | |
|
88 | ||
|
70 | 89 | 0.12.0 (released 2019-09-15) |
|
71 | 90 | ============================ |
|
72 | 91 | |
|
73 | 92 | Backwards Compatibility Notes |
|
74 | 93 | ----------------------------- |
|
75 | 94 | |
|
76 | 95 | * Support for Python 3.4 has been dropped since Python 3.4 is no longer |
|
77 | 96 | a supported Python version upstream. (But it will likely continue to |
|
78 | 97 | work until Python 2.7 support is dropped and we port to Python 3.5+ |
|
79 | 98 | APIs.) |
|
80 | 99 | |
|
81 | 100 | Bug Fixes |
|
82 | 101 | --------- |
|
83 | 102 | |
|
84 | 103 | * Fix ``ZstdDecompressor.__init__`` on 64-bit big-endian systems (#91). |
|
85 | 104 | * Fix memory leak in ``ZstdDecompressionReader.seek()`` (#82). |
|
86 | 105 | |
|
87 | 106 | Changes |
|
88 | 107 | ------- |
|
89 | 108 | |
|
90 | 109 | * CI transitioned to Azure Pipelines (from AppVeyor and Travis CI). |
|
91 | 110 | * Switched to ``pytest`` for running tests (from ``nose``). |
|
92 | 111 | * Bundled zstandard library upgraded from 1.3.8 to 1.4.3. |
|
93 | 112 | |
|
94 | 113 | 0.11.1 (released 2019-05-14) |
|
95 | 114 | ============================ |
|
96 | 115 | |
|
97 | 116 | * Fix memory leak in ``ZstdDecompressionReader.seek()`` (#82). |
|
98 | 117 | |
|
99 | 118 | 0.11.0 (released 2019-02-24) |
|
100 | 119 | ============================ |
|
101 | 120 | |
|
102 | 121 | Backwards Compatibility Notes |
|
103 | 122 | ----------------------------- |
|
104 | 123 | |
|
105 | 124 | * ``ZstdDecompressor.read()`` now allows reading sizes of ``-1`` or ``0`` |
|
106 | 125 | and defaults to ``-1``, per the documented behavior of |
|
107 | 126 | ``io.RawIOBase.read()``. Previously, we required an argument that was |
|
108 | 127 | a positive value. |
|
109 | 128 | * The ``readline()``, ``readlines()``, ``__iter__``, and ``__next__`` methods |
|
110 | 129 | of ``ZstdDecompressionReader()`` now raise ``io.UnsupportedOperation`` |
|
111 | 130 | instead of ``NotImplementedError``. |
|
112 | 131 | * ``ZstdDecompressor.stream_reader()`` now accepts a ``read_across_frames`` |
|
113 | 132 | argument. The default value will likely be changed in a future release |
|
114 | 133 | and consumers are advised to pass the argument to avoid unwanted change |
|
115 | 134 | of behavior in the future. |
|
116 | 135 | * ``setup.py`` now always disables the CFFI backend if the installed |
|
117 | 136 | CFFI package does not meet the minimum version requirements. Before, it was |
|
118 | 137 | possible for the CFFI backend to be generated and a run-time error to |
|
119 | 138 | occur. |
|
120 | 139 | * In the CFFI backend, ``CompressionReader`` and ``DecompressionReader`` |
|
121 | 140 | were renamed to ``ZstdCompressionReader`` and ``ZstdDecompressionReader``, |
|
122 | 141 | respectively so naming is identical to the C extension. This should have |
|
123 | 142 | no meaningful end-user impact, as instances aren't meant to be |
|
124 | 143 | constructed directly. |
|
125 | 144 | * ``ZstdDecompressor.stream_writer()`` now accepts a ``write_return_read`` |
|
126 | 145 | argument to control whether ``write()`` returns the number of bytes |
|
127 | 146 | read from the source / written to the decompressor. It defaults to off, |
|
128 | 147 | which preserves the existing behavior of returning the number of bytes |
|
129 | 148 | emitted from the decompressor. The default will change in a future release |
|
130 | 149 | so behavior aligns with the specified behavior of ``io.RawIOBase``. |
|
131 | 150 | * ``ZstdDecompressionWriter.__exit__`` now calls ``self.close()``. This |
|
132 | 151 | will result in that stream plus the underlying stream being closed as |
|
133 | 152 | well. If this behavior is not desirable, do not use instances as |
|
134 | 153 | context managers. |
|
135 | 154 | * ``ZstdCompressor.stream_writer()`` now accepts a ``write_return_read`` |
|
136 | 155 | argument to control whether ``write()`` returns the number of bytes read |
|
137 | 156 | from the source / written to the compressor. It defaults to off, which |
|
138 | 157 | preserves the existing behavior of returning the number of bytes emitted |
|
139 | 158 | from the compressor. The default will change in a future release so |
|
140 | 159 | behavior aligns with the specified behavior of ``io.RawIOBase``. |
|
141 | 160 | * ``ZstdCompressionWriter.__exit__`` now calls ``self.close()``. This will |
|
142 | 161 | result in that stream plus any underlying stream being closed as well. If |
|
143 | 162 | this behavior is not desirable, do not use instances as context managers. |
|
144 | 163 | * ``ZstdDecompressionWriter`` no longer requires being used as a context |
|
145 | 164 | manager (#57). |
|
146 | 165 | * ``ZstdCompressionWriter`` no longer requires being used as a context |
|
147 | 166 | manager (#57). |
|
148 | 167 | * The ``overlap_size_log`` attribute on ``CompressionParameters`` instances |
|
149 | 168 | has been deprecated and will be removed in a future release. The |
|
150 | 169 | ``overlap_log`` attribute should be used instead. |
|
151 | 170 | * The ``overlap_size_log`` argument to ``CompressionParameters`` has been |
|
152 | 171 | deprecated and will be removed in a future release. The ``overlap_log`` |
|
153 | 172 | argument should be used instead. |
|
154 | 173 | * The ``ldm_hash_every_log`` attribute on ``CompressionParameters`` instances |
|
155 | 174 | has been deprecated and will be removed in a future release. The |
|
156 | 175 | ``ldm_hash_rate_log`` attribute should be used instead. |
|
157 | 176 | * The ``ldm_hash_every_log`` argument to ``CompressionParameters`` has been |
|
158 | 177 | deprecated and will be removed in a future release. The ``ldm_hash_rate_log`` |
|
159 | 178 | argument should be used instead. |
|
160 | 179 | * The ``compression_strategy`` argument to ``CompressionParameters`` has been |
|
161 | 180 | deprecated and will be removed in a future release. The ``strategy`` |
|
162 | 181 | argument should be used instead. |
|
163 | 182 | * The ``SEARCHLENGTH_MIN`` and ``SEARCHLENGTH_MAX`` constants are deprecated |
|
164 | 183 | and will be removed in a future release. Use ``MINMATCH_MIN`` and |
|
165 | 184 | ``MINMATCH_MAX`` instead. |
|
166 | 185 | * The ``zstd_cffi`` module has been renamed to ``zstandard.cffi``. As had |
|
167 | 186 | been documented in the ``README`` file since the ``0.9.0`` release, the |
|
168 | 187 | module should not be imported directly at its new location. Instead, |
|
169 | 188 | ``import zstandard`` to cause an appropriate backend module to be loaded |
|
170 | 189 | automatically. |
|
171 | 190 | |
|
172 | 191 | Bug Fixes |
|
173 | 192 | --------- |
|
174 | 193 | |
|
175 | 194 | * CFFI backend could encounter a failure when sending an empty chunk into |
|
176 | 195 | ``ZstdDecompressionObj.decompress()``. The issue has been fixed. |
|
177 | 196 | * CFFI backend could encounter an error when calling |
|
178 | 197 | ``ZstdDecompressionReader.read()`` if there was data remaining in an |
|
179 | 198 | internal buffer. The issue has been fixed. (#71) |
|
180 | 199 | |
|
181 | 200 | Changes |
|
182 | 201 | ------- |
|
183 | 202 | |
|
184 | 203 | * ``ZstDecompressionObj.decompress()`` now properly handles empty inputs in |
|
185 | 204 | the CFFI backend. |
|
186 | 205 | * ``ZstdCompressionReader`` now implements ``read1()`` and ``readinto1()``. |
|
187 | 206 | These are part of the ``io.BufferedIOBase`` interface. |
|
188 | 207 | * ``ZstdCompressionReader`` has gained a ``readinto(b)`` method for reading |
|
189 | 208 | compressed output into an existing buffer. |
|
190 | 209 | * ``ZstdCompressionReader.read()`` now defaults to ``size=-1`` and accepts |
|
191 | 210 | read sizes of ``-1`` and ``0``. The new behavior aligns with the documented |
|
192 | 211 | behavior of ``io.RawIOBase``. |
|
193 | 212 | * ``ZstdCompressionReader`` now implements ``readall()``. Previously, this |
|
194 | 213 | method raised ``NotImplementedError``. |
|
195 | 214 | * ``ZstdDecompressionReader`` now implements ``read1()`` and ``readinto1()``. |
|
196 | 215 | These are part of the ``io.BufferedIOBase`` interface. |
|
197 | 216 | * ``ZstdDecompressionReader.read()`` now defaults to ``size=-1`` and accepts |
|
198 | 217 | read sizes of ``-1`` and ``0``. The new behavior aligns with the documented |
|
199 | 218 | behavior of ``io.RawIOBase``. |
|
200 | 219 | * ``ZstdDecompressionReader()`` now implements ``readall()``. Previously, this |
|
201 | 220 | method raised ``NotImplementedError``. |
|
202 | 221 | * The ``readline()``, ``readlines()``, ``__iter__``, and ``__next__`` methods |
|
203 | 222 | of ``ZstdDecompressionReader()`` now raise ``io.UnsupportedOperation`` |
|
204 | 223 | instead of ``NotImplementedError``. This reflects a decision to never |
|
205 | 224 | implement text-based I/O on (de)compressors and keep the low-level API |
|
206 | 225 | operating in the binary domain. (#13) |
|
207 | 226 | * ``README.rst`` now documented how to achieve linewise iteration using |
|
208 | 227 | an ``io.TextIOWrapper`` with a ``ZstdDecompressionReader``. |
|
209 | 228 | * ``ZstdDecompressionReader`` has gained a ``readinto(b)`` method for |
|
210 | 229 | reading decompressed output into an existing buffer. This allows chaining |
|
211 | 230 | to an ``io.TextIOWrapper`` on Python 3 without using an ``io.BufferedReader``. |
|
212 | 231 | * ``ZstdDecompressor.stream_reader()`` now accepts a ``read_across_frames`` |
|
213 | 232 | argument to control behavior when the input data has multiple zstd |
|
214 | 233 | *frames*. When ``False`` (the default for backwards compatibility), a |
|
215 | 234 | ``read()`` will stop when the end of a zstd *frame* is encountered. When |
|
216 | 235 | ``True``, ``read()`` can potentially return data spanning multiple zstd |
|
217 | 236 | *frames*. The default will likely be changed to ``True`` in a future |
|
218 | 237 | release. |
|
219 | 238 | * ``setup.py`` now performs CFFI version sniffing and disables the CFFI |
|
220 | 239 | backend if CFFI is too old. Previously, we only used ``install_requires`` |
|
221 | 240 | to enforce the CFFI version and not all build modes would properly enforce |
|
222 | 241 | the minimum CFFI version. (#69) |
|
223 | 242 | * CFFI's ``ZstdDecompressionReader.read()`` now properly handles data |
|
224 | 243 | remaining in any internal buffer. Before, repeated ``read()`` could |
|
225 | 244 | result in *random* errors. (#71) |
|
226 | 245 | * Upgraded various Python packages in CI environment. |
|
227 | 246 | * Upgrade to hypothesis 4.5.11. |
|
228 | 247 | * In the CFFI backend, ``CompressionReader`` and ``DecompressionReader`` |
|
229 | 248 | were renamed to ``ZstdCompressionReader`` and ``ZstdDecompressionReader``, |
|
230 | 249 | respectively. |
|
231 | 250 | * ``ZstdDecompressor.stream_writer()`` now accepts a ``write_return_read`` |
|
232 | 251 | argument to control whether ``write()`` returns the number of bytes read |
|
233 | 252 | from the source. It defaults to ``False`` to preserve backwards |
|
234 | 253 | compatibility. |
|
235 | 254 | * ``ZstdDecompressor.stream_writer()`` now implements the ``io.RawIOBase`` |
|
236 | 255 | interface and behaves as a proper stream object. |
|
237 | 256 | * ``ZstdCompressor.stream_writer()`` now accepts a ``write_return_read`` |
|
238 | 257 | argument to control whether ``write()`` returns the number of bytes read |
|
239 | 258 | from the source. It defaults to ``False`` to preserve backwards |
|
240 | 259 | compatibility. |
|
241 | 260 | * ``ZstdCompressionWriter`` now implements the ``io.RawIOBase`` interface and |
|
242 | 261 | behaves as a proper stream object. ``close()`` will now close the stream |
|
243 | 262 | and the underlying stream (if possible). ``__exit__`` will now call |
|
244 | 263 | ``close()``. Methods like ``writable()`` and ``fileno()`` are implemented. |
|
245 | 264 | * ``ZstdDecompressionWriter`` no longer must be used as a context manager. |
|
246 | 265 | * ``ZstdCompressionWriter`` no longer must be used as a context manager. |
|
247 | 266 | When not using as a context manager, it is important to call |
|
248 | 267 | ``flush(FRAME_FRAME)`` or the compression stream won't be properly |
|
249 | 268 | terminated and decoders may complain about malformed input. |
|
250 | 269 | * ``ZstdCompressionWriter.flush()`` (what is returned from |
|
251 | 270 | ``ZstdCompressor.stream_writer()``) now accepts an argument controlling the |
|
252 | 271 | flush behavior. Its value can be one of the new constants |
|
253 | 272 | ``FLUSH_BLOCK`` or ``FLUSH_FRAME``. |
|
254 | 273 | * ``ZstdDecompressionObj`` instances now have a ``flush([length=None])`` method. |
|
255 | 274 | This provides parity with standard library equivalent types. (#65) |
|
256 | 275 | * ``CompressionParameters`` no longer redundantly store individual compression |
|
257 | 276 | parameters on each instance. Instead, compression parameters are stored inside |
|
258 | 277 | the underlying ``ZSTD_CCtx_params`` instance. Attributes for obtaining |
|
259 | 278 | parameters are now properties rather than instance variables. |
|
260 | 279 | * Exposed the ``STRATEGY_BTULTRA2`` constant. |
|
261 | 280 | * ``CompressionParameters`` instances now expose an ``overlap_log`` attribute. |
|
262 | 281 | This behaves identically to the ``overlap_size_log`` attribute. |
|
263 | 282 | * ``CompressionParameters()`` now accepts an ``overlap_log`` argument that |
|
264 | 283 | behaves identically to the ``overlap_size_log`` argument. An error will be |
|
265 | 284 | raised if both arguments are specified. |
|
266 | 285 | * ``CompressionParameters`` instances now expose an ``ldm_hash_rate_log`` |
|
267 | 286 | attribute. This behaves identically to the ``ldm_hash_every_log`` attribute. |
|
268 | 287 | * ``CompressionParameters()`` now accepts a ``ldm_hash_rate_log`` argument that |
|
269 | 288 | behaves identically to the ``ldm_hash_every_log`` argument. An error will be |
|
270 | 289 | raised if both arguments are specified. |
|
271 | 290 | * ``CompressionParameters()`` now accepts a ``strategy`` argument that behaves |
|
272 | 291 | identically to the ``compression_strategy`` argument. An error will be raised |
|
273 | 292 | if both arguments are specified. |
|
274 | 293 | * The ``MINMATCH_MIN`` and ``MINMATCH_MAX`` constants were added. They are |
|
275 | 294 | semantically equivalent to the old ``SEARCHLENGTH_MIN`` and |
|
276 | 295 | ``SEARCHLENGTH_MAX`` constants. |
|
277 | 296 | * Bundled zstandard library upgraded from 1.3.7 to 1.3.8. |
|
278 | 297 | * ``setup.py`` denotes support for Python 3.7 (Python 3.7 was supported and |
|
279 | 298 | tested in the 0.10 release). |
|
280 | 299 | * ``zstd_cffi`` module has been renamed to ``zstandard.cffi``. |
|
281 | 300 | * ``ZstdCompressor.stream_writer()`` now reuses a buffer in order to avoid |
|
282 | 301 | allocating a new buffer for every operation. This should result in faster |
|
283 | 302 | performance in cases where ``write()`` or ``flush()`` are being called |
|
284 | 303 | frequently. (#62) |
|
285 | 304 | * Bundled zstandard library upgraded from 1.3.6 to 1.3.7. |
|
286 | 305 | |
|
287 | 306 | 0.10.2 (released 2018-11-03) |
|
288 | 307 | ============================ |
|
289 | 308 | |
|
290 | 309 | Bug Fixes |
|
291 | 310 | --------- |
|
292 | 311 | |
|
293 | 312 | * ``zstd_cffi.py`` added to ``setup.py`` (#60). |
|
294 | 313 | |
|
295 | 314 | Changes |
|
296 | 315 | ------- |
|
297 | 316 | |
|
298 | 317 | * Change some integer casts to avoid ``ssize_t`` (#61). |
|
299 | 318 | |
|
300 | 319 | 0.10.1 (released 2018-10-08) |
|
301 | 320 | ============================ |
|
302 | 321 | |
|
303 | 322 | Backwards Compatibility Notes |
|
304 | 323 | ----------------------------- |
|
305 | 324 | |
|
306 | 325 | * ``ZstdCompressor.stream_reader().closed`` is now a property instead of a |
|
307 | 326 | method (#58). |
|
308 | 327 | * ``ZstdDecompressor.stream_reader().closed`` is now a property instead of a |
|
309 | 328 | method (#58). |
|
310 | 329 | |
|
311 | 330 | Changes |
|
312 | 331 | ------- |
|
313 | 332 | |
|
314 | 333 | * Stop attempting to package Python 3.6 for Miniconda. The latest version of |
|
315 | 334 | Miniconda is using Python 3.7. The Python 3.6 Miniconda packages were a lie |
|
316 | 335 | since this were built against Python 3.7. |
|
317 | 336 | * ``ZstdCompressor.stream_reader()``'s and ``ZstdDecompressor.stream_reader()``'s |
|
318 | 337 | ``closed`` attribute is now a read-only property instead of a method. This now |
|
319 | 338 | properly matches the ``IOBase`` API and allows instances to be used in more |
|
320 | 339 | places that accept ``IOBase`` instances. |
|
321 | 340 | |
|
322 | 341 | 0.10.0 (released 2018-10-08) |
|
323 | 342 | ============================ |
|
324 | 343 | |
|
325 | 344 | Backwards Compatibility Notes |
|
326 | 345 | ----------------------------- |
|
327 | 346 | |
|
328 | 347 | * ``ZstdDecompressor.stream_reader().read()`` now consistently requires an |
|
329 | 348 | argument in both the C and CFFI backends. Before, the CFFI implementation |
|
330 | 349 | would assume a default value of ``-1``, which was later rejected. |
|
331 | 350 | * The ``compress_literals`` argument and attribute has been removed from |
|
332 | 351 | ``zstd.ZstdCompressionParameters`` because it was removed by the zstd 1.3.5 |
|
333 | 352 | API. |
|
334 | 353 | * ``ZSTD_CCtx_setParametersUsingCCtxParams()`` is no longer called on every |
|
335 | 354 | operation performed against ``ZstdCompressor`` instances. The reason for this |
|
336 | 355 | change is that the zstd 1.3.5 API no longer allows this without calling |
|
337 | 356 | ``ZSTD_CCtx_resetParameters()`` first. But if we called |
|
338 | 357 | ``ZSTD_CCtx_resetParameters()`` on every operation, we'd have to redo |
|
339 | 358 | potentially expensive setup when using dictionaries. We now call |
|
340 | 359 | ``ZSTD_CCtx_reset()`` on every operation and don't attempt to change |
|
341 | 360 | compression parameters. |
|
342 | 361 | * Objects returned by ``ZstdCompressor.stream_reader()`` no longer need to be |
|
343 | 362 | used as a context manager. The context manager interface still exists and its |
|
344 | 363 | behavior is unchanged. |
|
345 | 364 | * Objects returned by ``ZstdDecompressor.stream_reader()`` no longer need to be |
|
346 | 365 | used as a context manager. The context manager interface still exists and its |
|
347 | 366 | behavior is unchanged. |
|
348 | 367 | |
|
349 | 368 | Bug Fixes |
|
350 | 369 | --------- |
|
351 | 370 | |
|
352 | 371 | * ``ZstdDecompressor.decompressobj().decompress()`` should now return all data |
|
353 | 372 | from internal buffers in more scenarios. Before, it was possible for data to |
|
354 | 373 | remain in internal buffers. This data would be emitted on a subsequent call |
|
355 | 374 | to ``decompress()``. The overall output stream would still be valid. But if |
|
356 | 375 | callers were expecting input data to exactly map to output data (say the |
|
357 | 376 | producer had used ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` and was attempting to |
|
358 | 377 | map input chunks to output chunks), then the previous behavior would be |
|
359 | 378 | wrong. The new behavior is such that output from |
|
360 | 379 | ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` fed into ``decompressobj().decompress()`` |
|
361 | 380 | should produce all available compressed input. |
|
362 | 381 | * ``ZstdDecompressor.stream_reader().read()`` should no longer segfault after |
|
363 | 382 | a previous context manager resulted in error (#56). |
|
364 | 383 | * ``ZstdCompressor.compressobj().flush(COMPRESSOBJ_FLUSH_BLOCK)`` now returns |
|
365 | 384 | all data necessary to flush a block. Before, it was possible for the |
|
366 | 385 | ``flush()`` to not emit all data necessary to fully represent a block. This |
|
367 | 386 | would mean decompressors wouldn't be able to decompress all data that had been |
|
368 | 387 | fed into the compressor and ``flush()``ed. (#55). |
|
369 | 388 | |
|
370 | 389 | New Features |
|
371 | 390 | ------------ |
|
372 | 391 | |
|
373 | 392 | * New module constants ``BLOCKSIZELOG_MAX``, ``BLOCKSIZE_MAX``, |
|
374 | 393 | ``TARGETLENGTH_MAX`` that expose constants from libzstd. |
|
375 | 394 | * New ``ZstdCompressor.chunker()`` API for manually feeding data into a |
|
376 | 395 | compressor and emitting chunks of a fixed size. Like ``compressobj()``, the |
|
377 | 396 | API doesn't impose restrictions on the input or output types for the |
|
378 | 397 | data streams. Unlike ``compressobj()``, it ensures output chunks are of a |
|
379 | 398 | fixed size. This makes this API useful when the compressed output is being |
|
380 | 399 | fed into an I/O layer, where uniform write sizes are useful. |
|
381 | 400 | * ``ZstdCompressor.stream_reader()`` no longer needs to be used as a context |
|
382 | 401 | manager (#34). |
|
383 | 402 | * ``ZstdDecompressor.stream_reader()`` no longer needs to be used as a context |
|
384 | 403 | manager (#34). |
|
385 | 404 | * Bundled zstandard library upgraded from 1.3.4 to 1.3.6. |
|
386 | 405 | |
|
387 | 406 | Changes |
|
388 | 407 | ------- |
|
389 | 408 | |
|
390 | 409 | * Added ``zstd_cffi.py`` and ``NEWS.rst`` to ``MANIFEST.in``. |
|
391 | 410 | * ``zstandard.__version__`` is now defined (#50). |
|
392 | 411 | * Upgrade pip, setuptools, wheel, and cibuildwheel packages to latest versions. |
|
393 | 412 | * Upgrade various packages used in CI to latest versions. Notably tox (in |
|
394 | 413 | order to support Python 3.7). |
|
395 | 414 | * Use relative paths in setup.py to appease Python 3.7 (#51). |
|
396 | 415 | * Added CI for Python 3.7. |
|
397 | 416 | |
|
398 | 417 | 0.9.1 (released 2018-06-04) |
|
399 | 418 | =========================== |
|
400 | 419 | |
|
401 | 420 | * Debian packaging support. |
|
402 | 421 | * Fix typo in setup.py (#44). |
|
403 | 422 | * Support building with mingw compiler (#46). |
|
404 | 423 | |
|
405 | 424 | 0.9.0 (released 2018-04-08) |
|
406 | 425 | =========================== |
|
407 | 426 | |
|
408 | 427 | Backwards Compatibility Notes |
|
409 | 428 | ----------------------------- |
|
410 | 429 | |
|
411 | 430 | * CFFI 1.11 or newer is now required (previous requirement was 1.8). |
|
412 | 431 | * The primary module is now ``zstandard``. Please change imports of ``zstd`` |
|
413 | 432 | and ``zstd_cffi`` to ``import zstandard``. See the README for more. Support |
|
414 | 433 | for importing the old names will be dropped in the next release. |
|
415 | 434 | * ``ZstdCompressor.read_from()`` and ``ZstdDecompressor.read_from()`` have |
|
416 | 435 | been renamed to ``read_to_iter()``. ``read_from()`` is aliased to the new |
|
417 | 436 | name and will be deleted in a future release. |
|
418 | 437 | * Support for Python 2.6 has been removed. |
|
419 | 438 | * Support for Python 3.3 has been removed. |
|
420 | 439 | * The ``selectivity`` argument to ``train_dictionary()`` has been removed, as |
|
421 | 440 | the feature disappeared from zstd 1.3. |
|
422 | 441 | * Support for legacy dictionaries has been removed. Cover dictionaries are now |
|
423 | 442 | the default. ``train_cover_dictionary()`` has effectively been renamed to |
|
424 | 443 | ``train_dictionary()``. |
|
425 | 444 | * The ``allow_empty`` argument from ``ZstdCompressor.compress()`` has been |
|
426 | 445 | deleted and the method now allows empty inputs to be compressed by default. |
|
427 | 446 | * ``estimate_compression_context_size()`` has been removed. Use |
|
428 | 447 | ``CompressionParameters.estimated_compression_context_size()`` instead. |
|
429 | 448 | * ``get_compression_parameters()`` has been removed. Use |
|
430 | 449 | ``CompressionParameters.from_level()`` instead. |
|
431 | 450 | * The arguments to ``CompressionParameters.__init__()`` have changed. If you |
|
432 | 451 | were using positional arguments before, the positions now map to different |
|
433 | 452 | arguments. It is recommended to use keyword arguments to construct |
|
434 | 453 | ``CompressionParameters`` instances. |
|
435 | 454 | * ``TARGETLENGTH_MAX`` constant has been removed (it disappeared from zstandard |
|
436 | 455 | 1.3.4). |
|
437 | 456 | * ``ZstdCompressor.write_to()`` and ``ZstdDecompressor.write_to()`` have been |
|
438 | 457 | renamed to ``ZstdCompressor.stream_writer()`` and |
|
439 | 458 | ``ZstdDecompressor.stream_writer()``, respectively. The old names are still |
|
440 | 459 | aliased, but will be removed in the next major release. |
|
441 | 460 | * Content sizes are written into frame headers by default |
|
442 | 461 | (``ZstdCompressor(write_content_size=True)`` is now the default). |
|
443 | 462 | * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters`` |
|
444 | 463 | for consistency with other types. The old name is an alias and will be removed |
|
445 | 464 | in the next major release. |
|
446 | 465 | |
|
447 | 466 | Bug Fixes |
|
448 | 467 | --------- |
|
449 | 468 | |
|
450 | 469 | * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40) (from 0.8.2). |
|
451 | 470 | * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35) (from 0.8.2). |
|
452 | 471 | * Fixed memory leak of ``ZSTD_DDict`` instances in CFFI's ``ZstdDecompressor``. |
|
453 | 472 | |
|
454 | 473 | New Features |
|
455 | 474 | ------------ |
|
456 | 475 | |
|
457 | 476 | * Bundled zstandard library upgraded from 1.1.3 to 1.3.4. This delivers various |
|
458 | 477 | bug fixes and performance improvements. It also gives us access to newer |
|
459 | 478 | features. |
|
460 | 479 | * Support for negative compression levels. |
|
461 | 480 | * Support for *long distance matching* (facilitates compression ratios that approach |
|
462 | 481 | LZMA). |
|
463 | 482 | * Supporting for reading empty zstandard frames (with an embedded content size |
|
464 | 483 | of 0). |
|
465 | 484 | * Support for writing and partial support for reading zstandard frames without a |
|
466 | 485 | magic header. |
|
467 | 486 | * New ``stream_reader()`` API that exposes the ``io.RawIOBase`` interface (allows |
|
468 | 487 | you to ``.read()`` from a file-like object). |
|
469 | 488 | * Several minor features, bug fixes, and performance enhancements. |
|
470 | 489 | * Wheels for Linux and macOS are now provided with releases. |
|
471 | 490 | |
|
472 | 491 | Changes |
|
473 | 492 | ------- |
|
474 | 493 | |
|
475 | 494 | * Functions accepting bytes data now use the buffer protocol and can accept |
|
476 | 495 | more types (like ``memoryview`` and ``bytearray``) (#26). |
|
477 | 496 | * Add #includes so compilation on OS X and BSDs works (#20). |
|
478 | 497 | * New ``ZstdDecompressor.stream_reader()`` API to obtain a read-only i/o stream |
|
479 | 498 | of decompressed data for a source. |
|
480 | 499 | * New ``ZstdCompressor.stream_reader()`` API to obtain a read-only i/o stream of |
|
481 | 500 | compressed data for a source. |
|
482 | 501 | * Renamed ``ZstdDecompressor.read_from()`` to ``ZstdDecompressor.read_to_iter()``. |
|
483 | 502 | The old name is still available. |
|
484 | 503 | * Renamed ``ZstdCompressor.read_from()`` to ``ZstdCompressor.read_to_iter()``. |
|
485 | 504 | ``read_from()`` is still available at its old location. |
|
486 | 505 | * Introduce the ``zstandard`` module to import and re-export the C or CFFI |
|
487 | 506 | *backend* as appropriate. Behavior can be controlled via the |
|
488 | 507 | ``PYTHON_ZSTANDARD_IMPORT_POLICY`` environment variable. See README for |
|
489 | 508 | usage info. |
|
490 | 509 | * Vendored version of zstd upgraded to 1.3.4. |
|
491 | 510 | * Added module constants ``CONTENTSIZE_UNKNOWN`` and ``CONTENTSIZE_ERROR``. |
|
492 | 511 | * Add ``STRATEGY_BTULTRA`` compression strategy constant. |
|
493 | 512 | * Switch from deprecated ``ZSTD_getDecompressedSize()`` to |
|
494 | 513 | ``ZSTD_getFrameContentSize()`` replacement. |
|
495 | 514 | * ``ZstdCompressor.compress()`` can now compress empty inputs without requiring |
|
496 | 515 | special handling. |
|
497 | 516 | * ``ZstdCompressor`` and ``ZstdDecompressor`` now have a ``memory_size()`` |
|
498 | 517 | method for determining the current memory utilization of the underlying zstd |
|
499 | 518 | primitive. |
|
500 | 519 | * ``train_dictionary()`` has new arguments and functionality for trying multiple |
|
501 | 520 | variations of COVER parameters and selecting the best one. |
|
502 | 521 | * Added module constants ``LDM_MINMATCH_MIN``, ``LDM_MINMATCH_MAX``, and |
|
503 | 522 | ``LDM_BUCKETSIZELOG_MAX``. |
|
504 | 523 | * Converted all consumers to the zstandard *new advanced API*, which uses |
|
505 | 524 | ``ZSTD_compress_generic()`` |
|
506 | 525 | * ``CompressionParameters.__init__`` now accepts several more arguments, |
|
507 | 526 | including support for *long distance matching*. |
|
508 | 527 | * ``ZstdCompressionDict.__init__`` now accepts a ``dict_type`` argument that |
|
509 | 528 | controls how the dictionary should be interpreted. This can be used to |
|
510 | 529 | force the use of *content-only* dictionaries or to require the presence |
|
511 | 530 | of the dictionary magic header. |
|
512 | 531 | * ``ZstdCompressionDict.precompute_compress()`` can be used to precompute the |
|
513 | 532 | compression dictionary so it can efficiently be used with multiple |
|
514 | 533 | ``ZstdCompressor`` instances. |
|
515 | 534 | * Digested dictionaries are now stored in ``ZstdCompressionDict`` instances, |
|
516 | 535 | created automatically on first use, and automatically reused by all |
|
517 | 536 | ``ZstdDecompressor`` instances bound to that dictionary. |
|
518 | 537 | * All meaningful functions now accept keyword arguments. |
|
519 | 538 | * ``ZstdDecompressor.decompressobj()`` now accepts a ``write_size`` argument |
|
520 | 539 | to control how much work to perform on every decompressor invocation. |
|
521 | 540 | * ``ZstdCompressor.write_to()`` now exposes a ``tell()``, which exposes the |
|
522 | 541 | total number of bytes written so far. |
|
523 | 542 | * ``ZstdDecompressor.stream_reader()`` now supports ``seek()`` when moving |
|
524 | 543 | forward in the stream. |
|
525 | 544 | * Removed ``TARGETLENGTH_MAX`` constant. |
|
526 | 545 | * Added ``frame_header_size(data)`` function. |
|
527 | 546 | * Added ``frame_content_size(data)`` function. |
|
528 | 547 | * Consumers of ``ZSTD_decompress*`` have been switched to the new *advanced |
|
529 | 548 | decompression* API. |
|
530 | 549 | * ``ZstdCompressor`` and ``ZstdCompressionParams`` can now be constructed with |
|
531 | 550 | negative compression levels. |
|
532 | 551 | * ``ZstdDecompressor`` now accepts a ``max_window_size`` argument to limit the |
|
533 | 552 | amount of memory required for decompression operations. |
|
534 | 553 | * ``FORMAT_ZSTD1`` and ``FORMAT_ZSTD1_MAGICLESS`` constants to be used with |
|
535 | 554 | the ``format`` compression parameter to control whether the frame magic |
|
536 | 555 | header is written. |
|
537 | 556 | * ``ZstdDecompressor`` now accepts a ``format`` argument to control the |
|
538 | 557 | expected frame format. |
|
539 | 558 | * ``ZstdCompressor`` now has a ``frame_progression()`` method to return |
|
540 | 559 | information about the current compression operation. |
|
541 | 560 | * Error messages in CFFI no longer have ``b''`` literals. |
|
542 | 561 | * Compiler warnings and underlying overflow issues on 32-bit platforms have been |
|
543 | 562 | fixed. |
|
544 | 563 | * Builds in CI now build with compiler warnings as errors. This should hopefully |
|
545 | 564 | fix new compiler warnings from being introduced. |
|
546 | 565 | * Make ``ZstdCompressor(write_content_size=True)`` and |
|
547 | 566 | ``CompressionParameters(write_content_size=True)`` the default. |
|
548 | 567 | * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters``. |
|
549 | 568 | |
|
550 | 569 | 0.8.2 (released 2018-02-22) |
|
551 | 570 | --------------------------- |
|
552 | 571 | |
|
553 | 572 | * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40). |
|
554 | 573 | * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35). |
|
555 | 574 | |
|
556 | 575 | 0.8.1 (released 2017-04-08) |
|
557 | 576 | --------------------------- |
|
558 | 577 | |
|
559 | 578 | * Add #includes so compilation on OS X and BSDs works (#20). |
|
560 | 579 | |
|
561 | 580 | 0.8.0 (released 2017-03-08) |
|
562 | 581 | =========================== |
|
563 | 582 | |
|
564 | 583 | * CompressionParameters now has a estimated_compression_context_size() method. |
|
565 | 584 | zstd.estimate_compression_context_size() is now deprecated and slated for |
|
566 | 585 | removal. |
|
567 | 586 | * Implemented a lot of fuzzing tests. |
|
568 | 587 | * CompressionParameters instances now perform extra validation by calling |
|
569 | 588 | ZSTD_checkCParams() at construction time. |
|
570 | 589 | * multi_compress_to_buffer() API for compressing multiple inputs as a |
|
571 | 590 | single operation, as efficiently as possible. |
|
572 | 591 | * ZSTD_CStream instances are now used across multiple operations on |
|
573 | 592 | ZstdCompressor instances, resulting in much better performance for |
|
574 | 593 | APIs that do streaming. |
|
575 | 594 | * ZSTD_DStream instances are now used across multiple operations on |
|
576 | 595 | ZstdDecompressor instances, resulting in much better performance for |
|
577 | 596 | APIs that do streaming. |
|
578 | 597 | * train_dictionary() now releases the GIL. |
|
579 | 598 | * Support for training dictionaries using the COVER algorithm. |
|
580 | 599 | * multi_decompress_to_buffer() API for decompressing multiple frames as a |
|
581 | 600 | single operation, as efficiently as possible. |
|
582 | 601 | * Support for multi-threaded compression. |
|
583 | 602 | * Disable deprecation warnings when compiling CFFI module. |
|
584 | 603 | * Fixed memory leak in train_dictionary(). |
|
585 | 604 | * Removed DictParameters type. |
|
586 | 605 | * train_dictionary() now accepts keyword arguments instead of a |
|
587 | 606 | DictParameters instance to control dictionary generation. |
|
588 | 607 | |
|
589 | 608 | 0.7.0 (released 2017-02-07) |
|
590 | 609 | =========================== |
|
591 | 610 | |
|
592 | 611 | * Added zstd.get_frame_parameters() to obtain info about a zstd frame. |
|
593 | 612 | * Added ZstdDecompressor.decompress_content_dict_chain() for efficient |
|
594 | 613 | decompression of *content-only dictionary chains*. |
|
595 | 614 | * CFFI module fully implemented; all tests run against both C extension and |
|
596 | 615 | CFFI implementation. |
|
597 | 616 | * Vendored version of zstd updated to 1.1.3. |
|
598 | 617 | * Use ZstdDecompressor.decompress() now uses ZSTD_createDDict_byReference() |
|
599 | 618 | to avoid extra memory allocation of dict data. |
|
600 | 619 | * Add function names to error messages (by using ":name" in PyArg_Parse* |
|
601 | 620 | functions). |
|
602 | 621 | * Reuse decompression context across operations. Previously, we created a |
|
603 | 622 | new ZSTD_DCtx for each decompress(). This was measured to slow down |
|
604 | 623 | decompression by 40-200MB/s. The API guarantees say ZstdDecompressor |
|
605 | 624 | is not thread safe. So we reuse the ZSTD_DCtx across operations and make |
|
606 | 625 | things faster in the process. |
|
607 | 626 | * ZstdCompressor.write_to()'s compress() and flush() methods now return number |
|
608 | 627 | of bytes written. |
|
609 | 628 | * ZstdDecompressor.write_to()'s write() method now returns the number of bytes |
|
610 | 629 | written to the underlying output object. |
|
611 | 630 | * CompressionParameters instances now expose their values as attributes. |
|
612 | 631 | * CompressionParameters instances no longer are subscriptable nor behave |
|
613 | 632 | as tuples (backwards incompatible). Use attributes to obtain values. |
|
614 | 633 | * DictParameters instances now expose their values as attributes. |
|
615 | 634 | |
|
616 | 635 | 0.6.0 (released 2017-01-14) |
|
617 | 636 | =========================== |
|
618 | 637 | |
|
619 | 638 | * Support for legacy zstd protocols (build time opt in feature). |
|
620 | 639 | * Automation improvements to test against Python 3.6, latest versions |
|
621 | 640 | of Tox, more deterministic AppVeyor behavior. |
|
622 | 641 | * CFFI "parser" improved to use a compiler preprocessor instead of rewriting |
|
623 | 642 | source code manually. |
|
624 | 643 | * Vendored version of zstd updated to 1.1.2. |
|
625 | 644 | * Documentation improvements. |
|
626 | 645 | * Introduce a bench.py script for performing (crude) benchmarks. |
|
627 | 646 | * ZSTD_CCtx instances are now reused across multiple compress() operations. |
|
628 | 647 | * ZstdCompressor.write_to() now has a flush() method. |
|
629 | 648 | * ZstdCompressor.compressobj()'s flush() method now accepts an argument to |
|
630 | 649 | flush a block (as opposed to ending the stream). |
|
631 | 650 | * Disallow compress(b'') when writing content sizes by default (issue #11). |
|
632 | 651 | |
|
633 | 652 | 0.5.2 (released 2016-11-12) |
|
634 | 653 | =========================== |
|
635 | 654 | |
|
636 | 655 | * more packaging fixes for source distribution |
|
637 | 656 | |
|
638 | 657 | 0.5.1 (released 2016-11-12) |
|
639 | 658 | =========================== |
|
640 | 659 | |
|
641 | 660 | * setup_zstd.py is included in the source distribution |
|
642 | 661 | |
|
643 | 662 | 0.5.0 (released 2016-11-10) |
|
644 | 663 | =========================== |
|
645 | 664 | |
|
646 | 665 | * Vendored version of zstd updated to 1.1.1. |
|
647 | 666 | * Continuous integration for Python 3.6 and 3.7 |
|
648 | 667 | * Continuous integration for Conda |
|
649 | 668 | * Added compression and decompression APIs providing similar interfaces |
|
650 | 669 | to the standard library ``zlib`` and ``bz2`` modules. This allows |
|
651 | 670 | coding to a common interface. |
|
652 | 671 | * ``zstd.__version__` is now defined. |
|
653 | 672 | * ``read_from()`` on various APIs now accepts objects implementing the buffer |
|
654 | 673 | protocol. |
|
655 | 674 | * ``read_from()`` has gained a ``skip_bytes`` argument. This allows callers |
|
656 | 675 | to pass in an existing buffer with a header without having to create a |
|
657 | 676 | slice or a new object. |
|
658 | 677 | * Implemented ``ZstdCompressionDict.as_bytes()``. |
|
659 | 678 | * Python's memory allocator is now used instead of ``malloc()``. |
|
660 | 679 | * Low-level zstd data structures are reused in more instances, cutting down |
|
661 | 680 | on overhead for certain operations. |
|
662 | 681 | * ``distutils`` boilerplate for obtaining an ``Extension`` instance |
|
663 | 682 | has now been refactored into a standalone ``setup_zstd.py`` file. This |
|
664 | 683 | allows other projects with ``setup.py`` files to reuse the |
|
665 | 684 | ``distutils`` code for this project without copying code. |
|
666 | 685 | * The monolithic ``zstd.c`` file has been split into a header file defining |
|
667 | 686 | types and separate ``.c`` source files for the implementation. |
|
668 | 687 | |
|
669 | 688 | Older History |
|
670 | 689 | ============= |
|
671 | 690 | |
|
672 | 691 | 2016-08-31 - Zstandard 1.0.0 is released and Gregory starts hacking on a |
|
673 | 692 | Python extension for use by the Mercurial project. A very hacky prototype |
|
674 | 693 | is sent to the mercurial-devel list for RFC. |
|
675 | 694 | |
|
676 | 695 | 2016-09-03 - Most functionality from Zstandard C API implemented. Source |
|
677 | 696 | code published on https://github.com/indygreg/python-zstandard. Travis-CI |
|
678 | 697 | automation configured. 0.0.1 release on PyPI. |
|
679 | 698 | |
|
680 | 699 | 2016-09-05 - After the API was rounded out a bit and support for Python |
|
681 | 700 | 2.6 and 2.7 was added, version 0.1 was released to PyPI. |
|
682 | 701 | |
|
683 | 702 | 2016-09-05 - After the compressor and decompressor APIs were changed, 0.2 |
|
684 | 703 | was released to PyPI. |
|
685 | 704 | |
|
686 | 705 | 2016-09-10 - 0.3 is released with a bunch of new features. ZstdCompressor |
|
687 | 706 | now accepts arguments controlling frame parameters. The source size can now |
|
688 | 707 | be declared when performing streaming compression. ZstdDecompressor.decompress() |
|
689 | 708 | is implemented. Compression dictionaries are now cached when using the simple |
|
690 | 709 | compression and decompression APIs. Memory size APIs added. |
|
691 | 710 | ZstdCompressor.read_from() and ZstdDecompressor.read_from() have been |
|
692 | 711 | implemented. This rounds out the major compression/decompression APIs planned |
|
693 | 712 | by the author. |
|
694 | 713 | |
|
695 | 714 | 2016-10-02 - 0.3.3 is released with a bug fix for read_from not fully |
|
696 | 715 | decoding a zstd frame (issue #2). |
|
697 | 716 | |
|
698 | 717 | 2016-10-02 - 0.4.0 is released with zstd 1.1.0, support for custom read and |
|
699 | 718 | write buffer sizes, and a few bug fixes involving failure to read/write |
|
700 | 719 | all data when buffer sizes were too small to hold remaining data. |
|
701 | 720 | |
|
702 | 721 | 2016-11-10 - 0.5.0 is released with zstd 1.1.1 and other enhancements. |
@@ -1,1602 +1,1602 b'' | |||
|
1 | 1 | ================ |
|
2 | 2 | python-zstandard |
|
3 | 3 | ================ |
|
4 | 4 | |
|
5 | 5 | This project provides Python bindings for interfacing with the |
|
6 | 6 | `Zstandard <http://www.zstd.net>`_ compression library. A C extension |
|
7 | 7 | and CFFI interface are provided. |
|
8 | 8 | |
|
9 | 9 | The primary goal of the project is to provide a rich interface to the |
|
10 | 10 | underlying C API through a Pythonic interface while not sacrificing |
|
11 | 11 | performance. This means exposing most of the features and flexibility |
|
12 | 12 | of the C API while not sacrificing usability or safety that Python provides. |
|
13 | 13 | |
|
14 | 14 | The canonical home for this project lives in a Mercurial repository run by |
|
15 | 15 | the author. For convenience, that repository is frequently synchronized to |
|
16 | 16 | https://github.com/indygreg/python-zstandard. |
|
17 | 17 | |
|
18 | 18 | | |ci-status| |
|
19 | 19 | |
|
20 | 20 | Requirements |
|
21 | 21 | ============ |
|
22 | 22 | |
|
23 |
This extension is designed to run with Python 2.7, 3. |
|
|
23 | This extension is designed to run with Python 2.7, 3.5, 3.6, 3.7, and 3.8 | |
|
24 | 24 | on common platforms (Linux, Windows, and OS X). On PyPy (both PyPy2 and PyPy3) we support version 6.0.0 and above. |
|
25 | 25 | x86 and x86_64 are well-tested on Windows. Only x86_64 is well-tested on Linux and macOS. |
|
26 | 26 | |
|
27 | 27 | Installing |
|
28 | 28 | ========== |
|
29 | 29 | |
|
30 | 30 | This package is uploaded to PyPI at https://pypi.python.org/pypi/zstandard. |
|
31 | 31 | So, to install this package:: |
|
32 | 32 | |
|
33 | 33 | $ pip install zstandard |
|
34 | 34 | |
|
35 | 35 | Binary wheels are made available for some platforms. If you need to |
|
36 | 36 | install from a source distribution, all you should need is a working C |
|
37 | 37 | compiler and the Python development headers/libraries. On many Linux |
|
38 | 38 | distributions, you can install a ``python-dev`` or ``python-devel`` |
|
39 | 39 | package to provide these dependencies. |
|
40 | 40 | |
|
41 | 41 | Packages are also uploaded to Anaconda Cloud at |
|
42 | 42 | https://anaconda.org/indygreg/zstandard. See that URL for how to install |
|
43 | 43 | this package with ``conda``. |
|
44 | 44 | |
|
45 | 45 | Performance |
|
46 | 46 | =========== |
|
47 | 47 | |
|
48 | 48 | zstandard is a highly tunable compression algorithm. In its default settings |
|
49 | 49 | (compression level 3), it will be faster at compression and decompression and |
|
50 | 50 | will have better compression ratios than zlib on most data sets. When tuned |
|
51 | 51 | for speed, it approaches lz4's speed and ratios. When tuned for compression |
|
52 | 52 | ratio, it approaches lzma ratios and compression speed, but decompression |
|
53 | 53 | speed is much faster. See the official zstandard documentation for more. |
|
54 | 54 | |
|
55 | 55 | zstandard and this library support multi-threaded compression. There is a |
|
56 | 56 | mechanism to compress large inputs using multiple threads. |
|
57 | 57 | |
|
58 | 58 | The performance of this library is usually very similar to what the zstandard |
|
59 | 59 | C API can deliver. Overhead in this library is due to general Python overhead |
|
60 | 60 | and can't easily be avoided by *any* zstandard Python binding. This library |
|
61 | 61 | exposes multiple APIs for performing compression and decompression so callers |
|
62 | 62 | can pick an API suitable for their need. Contrast with the compression |
|
63 | 63 | modules in Python's standard library (like ``zlib``), which only offer limited |
|
64 | 64 | mechanisms for performing operations. The API flexibility means consumers can |
|
65 | 65 | choose to use APIs that facilitate zero copying or minimize Python object |
|
66 | 66 | creation and garbage collection overhead. |
|
67 | 67 | |
|
68 | 68 | This library is capable of single-threaded throughputs well over 1 GB/s. For |
|
69 | 69 | exact numbers, measure yourself. The source code repository has a ``bench.py`` |
|
70 | 70 | script that can be used to measure things. |
|
71 | 71 | |
|
72 | 72 | API |
|
73 | 73 | === |
|
74 | 74 | |
|
75 | 75 | To interface with Zstandard, simply import the ``zstandard`` module:: |
|
76 | 76 | |
|
77 | 77 | import zstandard |
|
78 | 78 | |
|
79 | 79 | It is a popular convention to alias the module as a different name for |
|
80 | 80 | brevity:: |
|
81 | 81 | |
|
82 | 82 | import zstandard as zstd |
|
83 | 83 | |
|
84 | 84 | This module attempts to import and use either the C extension or CFFI |
|
85 | 85 | implementation. On Python platforms known to support C extensions (like |
|
86 | 86 | CPython), it raises an ImportError if the C extension cannot be imported. |
|
87 | 87 | On Python platforms known to not support C extensions (like PyPy), it only |
|
88 | 88 | attempts to import the CFFI implementation and raises ImportError if that |
|
89 | 89 | can't be done. On other platforms, it first tries to import the C extension |
|
90 | 90 | then falls back to CFFI if that fails and raises ImportError if CFFI fails. |
|
91 | 91 | |
|
92 | 92 | To change the module import behavior, a ``PYTHON_ZSTANDARD_IMPORT_POLICY`` |
|
93 | 93 | environment variable can be set. The following values are accepted: |
|
94 | 94 | |
|
95 | 95 | default |
|
96 | 96 | The behavior described above. |
|
97 | 97 | cffi_fallback |
|
98 | 98 | Always try to import the C extension then fall back to CFFI if that |
|
99 | 99 | fails. |
|
100 | 100 | cext |
|
101 | 101 | Only attempt to import the C extension. |
|
102 | 102 | cffi |
|
103 | 103 | Only attempt to import the CFFI implementation. |
|
104 | 104 | |
|
105 | 105 | In addition, the ``zstandard`` module exports a ``backend`` attribute |
|
106 | 106 | containing the string name of the backend being used. It will be one |
|
107 | 107 | of ``cext`` or ``cffi`` (for *C extension* and *cffi*, respectively). |
|
108 | 108 | |
|
109 | 109 | The types, functions, and attributes exposed by the ``zstandard`` module |
|
110 | 110 | are documented in the sections below. |
|
111 | 111 | |
|
112 | 112 | .. note:: |
|
113 | 113 | |
|
114 | 114 | The documentation in this section makes references to various zstd |
|
115 | 115 | concepts and functionality. The source repository contains a |
|
116 | 116 | ``docs/concepts.rst`` file explaining these in more detail. |
|
117 | 117 | |
|
118 | 118 | ZstdCompressor |
|
119 | 119 | -------------- |
|
120 | 120 | |
|
121 | 121 | The ``ZstdCompressor`` class provides an interface for performing |
|
122 | 122 | compression operations. Each instance is essentially a wrapper around a |
|
123 | 123 | ``ZSTD_CCtx`` from the C API. |
|
124 | 124 | |
|
125 | 125 | Each instance is associated with parameters that control compression |
|
126 | 126 | behavior. These come from the following named arguments (all optional): |
|
127 | 127 | |
|
128 | 128 | level |
|
129 | 129 | Integer compression level. Valid values are between 1 and 22. |
|
130 | 130 | dict_data |
|
131 | 131 | Compression dictionary to use. |
|
132 | 132 | |
|
133 | 133 | Note: When using dictionary data and ``compress()`` is called multiple |
|
134 | 134 | times, the ``ZstdCompressionParameters`` derived from an integer |
|
135 | 135 | compression ``level`` and the first compressed data's size will be reused |
|
136 | 136 | for all subsequent operations. This may not be desirable if source data |
|
137 | 137 | size varies significantly. |
|
138 | 138 | compression_params |
|
139 | 139 | A ``ZstdCompressionParameters`` instance defining compression settings. |
|
140 | 140 | write_checksum |
|
141 | 141 | Whether a 4 byte checksum should be written with the compressed data. |
|
142 | 142 | Defaults to False. If True, the decompressor can verify that decompressed |
|
143 | 143 | data matches the original input data. |
|
144 | 144 | write_content_size |
|
145 | 145 | Whether the size of the uncompressed data will be written into the |
|
146 | 146 | header of compressed data. Defaults to True. The data will only be |
|
147 | 147 | written if the compressor knows the size of the input data. This is |
|
148 | 148 | often not true for streaming compression. |
|
149 | 149 | write_dict_id |
|
150 | 150 | Whether to write the dictionary ID into the compressed data. |
|
151 | 151 | Defaults to True. The dictionary ID is only written if a dictionary |
|
152 | 152 | is being used. |
|
153 | 153 | threads |
|
154 | 154 | Enables and sets the number of threads to use for multi-threaded compression |
|
155 | 155 | operations. Defaults to 0, which means to use single-threaded compression. |
|
156 | 156 | Negative values will resolve to the number of logical CPUs in the system. |
|
157 | 157 | Read below for more info on multi-threaded compression. This argument only |
|
158 | 158 | controls thread count for operations that operate on individual pieces of |
|
159 | 159 | data. APIs that spawn multiple threads for working on multiple pieces of |
|
160 | 160 | data have their own ``threads`` argument. |
|
161 | 161 | |
|
162 | 162 | ``compression_params`` is mutually exclusive with ``level``, ``write_checksum``, |
|
163 | 163 | ``write_content_size``, ``write_dict_id``, and ``threads``. |
|
164 | 164 | |
|
165 | 165 | Unless specified otherwise, assume that no two methods of ``ZstdCompressor`` |
|
166 | 166 | instances can be called from multiple Python threads simultaneously. In other |
|
167 | 167 | words, assume instances are not thread safe unless stated otherwise. |
|
168 | 168 | |
|
169 | 169 | Utility Methods |
|
170 | 170 | ^^^^^^^^^^^^^^^ |
|
171 | 171 | |
|
172 | 172 | ``frame_progression()`` returns a 3-tuple containing the number of bytes |
|
173 | 173 | ingested, consumed, and produced by the current compression operation. |
|
174 | 174 | |
|
175 | 175 | ``memory_size()`` obtains the memory utilization of the underlying zstd |
|
176 | 176 | compression context, in bytes.:: |
|
177 | 177 | |
|
178 | 178 | cctx = zstd.ZstdCompressor() |
|
179 | 179 | memory = cctx.memory_size() |
|
180 | 180 | |
|
181 | 181 | Simple API |
|
182 | 182 | ^^^^^^^^^^ |
|
183 | 183 | |
|
184 | 184 | ``compress(data)`` compresses and returns data as a one-shot operation.:: |
|
185 | 185 | |
|
186 | 186 | cctx = zstd.ZstdCompressor() |
|
187 | 187 | compressed = cctx.compress(b'data to compress') |
|
188 | 188 | |
|
189 | 189 | The ``data`` argument can be any object that implements the *buffer protocol*. |
|
190 | 190 | |
|
191 | 191 | Stream Reader API |
|
192 | 192 | ^^^^^^^^^^^^^^^^^ |
|
193 | 193 | |
|
194 | 194 | ``stream_reader(source)`` can be used to obtain an object conforming to the |
|
195 | 195 | ``io.RawIOBase`` interface for reading compressed output as a stream:: |
|
196 | 196 | |
|
197 | 197 | with open(path, 'rb') as fh: |
|
198 | 198 | cctx = zstd.ZstdCompressor() |
|
199 | 199 | reader = cctx.stream_reader(fh) |
|
200 | 200 | while True: |
|
201 | 201 | chunk = reader.read(16384) |
|
202 | 202 | if not chunk: |
|
203 | 203 | break |
|
204 | 204 | |
|
205 | 205 | # Do something with compressed chunk. |
|
206 | 206 | |
|
207 | 207 | Instances can also be used as context managers:: |
|
208 | 208 | |
|
209 | 209 | with open(path, 'rb') as fh: |
|
210 | 210 | with cctx.stream_reader(fh) as reader: |
|
211 | 211 | while True: |
|
212 | 212 | chunk = reader.read(16384) |
|
213 | 213 | if not chunk: |
|
214 | 214 | break |
|
215 | 215 | |
|
216 | 216 | # Do something with compressed chunk. |
|
217 | 217 | |
|
218 | 218 | When the context manager exits or ``close()`` is called, the stream is closed, |
|
219 | 219 | underlying resources are released, and future operations against the compression |
|
220 | 220 | stream will fail. |
|
221 | 221 | |
|
222 | 222 | The ``source`` argument to ``stream_reader()`` can be any object with a |
|
223 | 223 | ``read(size)`` method or any object implementing the *buffer protocol*. |
|
224 | 224 | |
|
225 | 225 | ``stream_reader()`` accepts a ``size`` argument specifying how large the input |
|
226 | 226 | stream is. This is used to adjust compression parameters so they are |
|
227 | 227 | tailored to the source size.:: |
|
228 | 228 | |
|
229 | 229 | with open(path, 'rb') as fh: |
|
230 | 230 | cctx = zstd.ZstdCompressor() |
|
231 | 231 | with cctx.stream_reader(fh, size=os.stat(path).st_size) as reader: |
|
232 | 232 | ... |
|
233 | 233 | |
|
234 | 234 | If the ``source`` is a stream, you can specify how large ``read()`` requests |
|
235 | 235 | to that stream should be via the ``read_size`` argument. It defaults to |
|
236 | 236 | ``zstandard.COMPRESSION_RECOMMENDED_INPUT_SIZE``.:: |
|
237 | 237 | |
|
238 | 238 | with open(path, 'rb') as fh: |
|
239 | 239 | cctx = zstd.ZstdCompressor() |
|
240 | 240 | # Will perform fh.read(8192) when obtaining data to feed into the |
|
241 | 241 | # compressor. |
|
242 | 242 | with cctx.stream_reader(fh, read_size=8192) as reader: |
|
243 | 243 | ... |
|
244 | 244 | |
|
245 | 245 | The stream returned by ``stream_reader()`` is neither writable nor seekable |
|
246 | 246 | (even if the underlying source is seekable). ``readline()`` and |
|
247 | 247 | ``readlines()`` are not implemented because they don't make sense for |
|
248 | 248 | compressed data. ``tell()`` returns the number of compressed bytes |
|
249 | 249 | emitted so far. |
|
250 | 250 | |
|
251 | 251 | Streaming Input API |
|
252 | 252 | ^^^^^^^^^^^^^^^^^^^ |
|
253 | 253 | |
|
254 | 254 | ``stream_writer(fh)`` allows you to *stream* data into a compressor. |
|
255 | 255 | |
|
256 | 256 | Returned instances implement the ``io.RawIOBase`` interface. Only methods |
|
257 | 257 | that involve writing will do useful things. |
|
258 | 258 | |
|
259 | 259 | The argument to ``stream_writer()`` must have a ``write(data)`` method. As |
|
260 | 260 | compressed data is available, ``write()`` will be called with the compressed |
|
261 | 261 | data as its argument. Many common Python types implement ``write()``, including |
|
262 | 262 | open file handles and ``io.BytesIO``. |
|
263 | 263 | |
|
264 | 264 | The ``write(data)`` method is used to feed data into the compressor. |
|
265 | 265 | |
|
266 | 266 | The ``flush([flush_mode=FLUSH_BLOCK])`` method can be called to evict whatever |
|
267 | 267 | data remains within the compressor's internal state into the output object. This |
|
268 | 268 | may result in 0 or more ``write()`` calls to the output object. This method |
|
269 | 269 | accepts an optional ``flush_mode`` argument to control the flushing behavior. |
|
270 | 270 | Its value can be any of the ``FLUSH_*`` constants. |
|
271 | 271 | |
|
272 | 272 | Both ``write()`` and ``flush()`` return the number of bytes written to the |
|
273 | 273 | object's ``write()``. In many cases, small inputs do not accumulate enough |
|
274 | 274 | data to cause a write and ``write()`` will return ``0``. |
|
275 | 275 | |
|
276 | 276 | Calling ``close()`` will mark the stream as closed and subsequent I/O |
|
277 | 277 | operations will raise ``ValueError`` (per the documented behavior of |
|
278 | 278 | ``io.RawIOBase``). ``close()`` will also call ``close()`` on the underlying |
|
279 | 279 | stream if such a method exists. |
|
280 | 280 | |
|
281 | 281 | Typically usage is as follows:: |
|
282 | 282 | |
|
283 | 283 | cctx = zstd.ZstdCompressor(level=10) |
|
284 | 284 | compressor = cctx.stream_writer(fh) |
|
285 | 285 | |
|
286 | 286 | compressor.write(b'chunk 0\n') |
|
287 | 287 | compressor.write(b'chunk 1\n') |
|
288 | 288 | compressor.flush() |
|
289 | 289 | # Receiver will be able to decode ``chunk 0\nchunk 1\n`` at this point. |
|
290 | 290 | # Receiver is also expecting more data in the zstd *frame*. |
|
291 | 291 | |
|
292 | 292 | compressor.write(b'chunk 2\n') |
|
293 | 293 | compressor.flush(zstd.FLUSH_FRAME) |
|
294 | 294 | # Receiver will be able to decode ``chunk 0\nchunk 1\nchunk 2``. |
|
295 | 295 | # Receiver is expecting no more data, as the zstd frame is closed. |
|
296 | 296 | # Any future calls to ``write()`` at this point will construct a new |
|
297 | 297 | # zstd frame. |
|
298 | 298 | |
|
299 | 299 | Instances can be used as context managers. Exiting the context manager is |
|
300 | 300 | the equivalent of calling ``close()``, which is equivalent to calling |
|
301 | 301 | ``flush(zstd.FLUSH_FRAME)``:: |
|
302 | 302 | |
|
303 | 303 | cctx = zstd.ZstdCompressor(level=10) |
|
304 | 304 | with cctx.stream_writer(fh) as compressor: |
|
305 | 305 | compressor.write(b'chunk 0') |
|
306 | 306 | compressor.write(b'chunk 1') |
|
307 | 307 | ... |
|
308 | 308 | |
|
309 | 309 | .. important:: |
|
310 | 310 | |
|
311 | 311 | If ``flush(FLUSH_FRAME)`` is not called, emitted data doesn't constitute |
|
312 | 312 | a full zstd *frame* and consumers of this data may complain about malformed |
|
313 | 313 | input. It is recommended to use instances as a context manager to ensure |
|
314 | 314 | *frames* are properly finished. |
|
315 | 315 | |
|
316 | 316 | If the size of the data being fed to this streaming compressor is known, |
|
317 | 317 | you can declare it before compression begins:: |
|
318 | 318 | |
|
319 | 319 | cctx = zstd.ZstdCompressor() |
|
320 | 320 | with cctx.stream_writer(fh, size=data_len) as compressor: |
|
321 | 321 | compressor.write(chunk0) |
|
322 | 322 | compressor.write(chunk1) |
|
323 | 323 | ... |
|
324 | 324 | |
|
325 | 325 | Declaring the size of the source data allows compression parameters to |
|
326 | 326 | be tuned. And if ``write_content_size`` is used, it also results in the |
|
327 | 327 | content size being written into the frame header of the output data. |
|
328 | 328 | |
|
329 | 329 | The size of chunks being ``write()`` to the destination can be specified:: |
|
330 | 330 | |
|
331 | 331 | cctx = zstd.ZstdCompressor() |
|
332 | 332 | with cctx.stream_writer(fh, write_size=32768) as compressor: |
|
333 | 333 | ... |
|
334 | 334 | |
|
335 | 335 | To see how much memory is being used by the streaming compressor:: |
|
336 | 336 | |
|
337 | 337 | cctx = zstd.ZstdCompressor() |
|
338 | 338 | with cctx.stream_writer(fh) as compressor: |
|
339 | 339 | ... |
|
340 | 340 | byte_size = compressor.memory_size() |
|
341 | 341 | |
|
342 | 342 | Thte total number of bytes written so far are exposed via ``tell()``:: |
|
343 | 343 | |
|
344 | 344 | cctx = zstd.ZstdCompressor() |
|
345 | 345 | with cctx.stream_writer(fh) as compressor: |
|
346 | 346 | ... |
|
347 | 347 | total_written = compressor.tell() |
|
348 | 348 | |
|
349 | 349 | ``stream_writer()`` accepts a ``write_return_read`` boolean argument to control |
|
350 | 350 | the return value of ``write()``. When ``False`` (the default), ``write()`` returns |
|
351 | 351 | the number of bytes that were ``write()``en to the underlying object. When |
|
352 | 352 | ``True``, ``write()`` returns the number of bytes read from the input that |
|
353 | 353 | were subsequently written to the compressor. ``True`` is the *proper* behavior |
|
354 | 354 | for ``write()`` as specified by the ``io.RawIOBase`` interface and will become |
|
355 | 355 | the default value in a future release. |
|
356 | 356 | |
|
357 | 357 | Streaming Output API |
|
358 | 358 | ^^^^^^^^^^^^^^^^^^^^ |
|
359 | 359 | |
|
360 | 360 | ``read_to_iter(reader)`` provides a mechanism to stream data out of a |
|
361 | 361 | compressor as an iterator of data chunks.:: |
|
362 | 362 | |
|
363 | 363 | cctx = zstd.ZstdCompressor() |
|
364 | 364 | for chunk in cctx.read_to_iter(fh): |
|
365 | 365 | # Do something with emitted data. |
|
366 | 366 | |
|
367 | 367 | ``read_to_iter()`` accepts an object that has a ``read(size)`` method or |
|
368 | 368 | conforms to the buffer protocol. |
|
369 | 369 | |
|
370 | 370 | Uncompressed data is fetched from the source either by calling ``read(size)`` |
|
371 | 371 | or by fetching a slice of data from the object directly (in the case where |
|
372 | 372 | the buffer protocol is being used). The returned iterator consists of chunks |
|
373 | 373 | of compressed data. |
|
374 | 374 | |
|
375 | 375 | If reading from the source via ``read()``, ``read()`` will be called until |
|
376 | 376 | it raises or returns an empty bytes (``b''``). It is perfectly valid for |
|
377 | 377 | the source to deliver fewer bytes than were what requested by ``read(size)``. |
|
378 | 378 | |
|
379 | 379 | Like ``stream_writer()``, ``read_to_iter()`` also accepts a ``size`` argument |
|
380 | 380 | declaring the size of the input stream:: |
|
381 | 381 | |
|
382 | 382 | cctx = zstd.ZstdCompressor() |
|
383 | 383 | for chunk in cctx.read_to_iter(fh, size=some_int): |
|
384 | 384 | pass |
|
385 | 385 | |
|
386 | 386 | You can also control the size that data is ``read()`` from the source and |
|
387 | 387 | the ideal size of output chunks:: |
|
388 | 388 | |
|
389 | 389 | cctx = zstd.ZstdCompressor() |
|
390 | 390 | for chunk in cctx.read_to_iter(fh, read_size=16384, write_size=8192): |
|
391 | 391 | pass |
|
392 | 392 | |
|
393 | 393 | Unlike ``stream_writer()``, ``read_to_iter()`` does not give direct control |
|
394 | 394 | over the sizes of chunks fed into the compressor. Instead, chunk sizes will |
|
395 | 395 | be whatever the object being read from delivers. These will often be of a |
|
396 | 396 | uniform size. |
|
397 | 397 | |
|
398 | 398 | Stream Copying API |
|
399 | 399 | ^^^^^^^^^^^^^^^^^^ |
|
400 | 400 | |
|
401 | 401 | ``copy_stream(ifh, ofh)`` can be used to copy data between 2 streams while |
|
402 | 402 | compressing it.:: |
|
403 | 403 | |
|
404 | 404 | cctx = zstd.ZstdCompressor() |
|
405 | 405 | cctx.copy_stream(ifh, ofh) |
|
406 | 406 | |
|
407 | 407 | For example, say you wish to compress a file:: |
|
408 | 408 | |
|
409 | 409 | cctx = zstd.ZstdCompressor() |
|
410 | 410 | with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh: |
|
411 | 411 | cctx.copy_stream(ifh, ofh) |
|
412 | 412 | |
|
413 | 413 | It is also possible to declare the size of the source stream:: |
|
414 | 414 | |
|
415 | 415 | cctx = zstd.ZstdCompressor() |
|
416 | 416 | cctx.copy_stream(ifh, ofh, size=len_of_input) |
|
417 | 417 | |
|
418 | 418 | You can also specify how large the chunks that are ``read()`` and ``write()`` |
|
419 | 419 | from and to the streams:: |
|
420 | 420 | |
|
421 | 421 | cctx = zstd.ZstdCompressor() |
|
422 | 422 | cctx.copy_stream(ifh, ofh, read_size=32768, write_size=16384) |
|
423 | 423 | |
|
424 | 424 | The stream copier returns a 2-tuple of bytes read and written:: |
|
425 | 425 | |
|
426 | 426 | cctx = zstd.ZstdCompressor() |
|
427 | 427 | read_count, write_count = cctx.copy_stream(ifh, ofh) |
|
428 | 428 | |
|
429 | 429 | Compressor API |
|
430 | 430 | ^^^^^^^^^^^^^^ |
|
431 | 431 | |
|
432 | 432 | ``compressobj()`` returns an object that exposes ``compress(data)`` and |
|
433 | 433 | ``flush()`` methods. Each returns compressed data or an empty bytes. |
|
434 | 434 | |
|
435 | 435 | The purpose of ``compressobj()`` is to provide an API-compatible interface |
|
436 | 436 | with ``zlib.compressobj``, ``bz2.BZ2Compressor``, etc. This allows callers to |
|
437 | 437 | swap in different compressor objects while using the same API. |
|
438 | 438 | |
|
439 | 439 | ``flush()`` accepts an optional argument indicating how to end the stream. |
|
440 | 440 | ``zstd.COMPRESSOBJ_FLUSH_FINISH`` (the default) ends the compression stream. |
|
441 | 441 | Once this type of flush is performed, ``compress()`` and ``flush()`` can |
|
442 | 442 | no longer be called. This type of flush **must** be called to end the |
|
443 | 443 | compression context. If not called, returned data may be incomplete. |
|
444 | 444 | |
|
445 | 445 | A ``zstd.COMPRESSOBJ_FLUSH_BLOCK`` argument to ``flush()`` will flush a |
|
446 | 446 | zstd block. Flushes of this type can be performed multiple times. The next |
|
447 | 447 | call to ``compress()`` will begin a new zstd block. |
|
448 | 448 | |
|
449 | 449 | Here is how this API should be used:: |
|
450 | 450 | |
|
451 | 451 | cctx = zstd.ZstdCompressor() |
|
452 | 452 | cobj = cctx.compressobj() |
|
453 | 453 | data = cobj.compress(b'raw input 0') |
|
454 | 454 | data = cobj.compress(b'raw input 1') |
|
455 | 455 | data = cobj.flush() |
|
456 | 456 | |
|
457 | 457 | Or to flush blocks:: |
|
458 | 458 | |
|
459 | 459 | cctx.zstd.ZstdCompressor() |
|
460 | 460 | cobj = cctx.compressobj() |
|
461 | 461 | data = cobj.compress(b'chunk in first block') |
|
462 | 462 | data = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK) |
|
463 | 463 | data = cobj.compress(b'chunk in second block') |
|
464 | 464 | data = cobj.flush() |
|
465 | 465 | |
|
466 | 466 | For best performance results, keep input chunks under 256KB. This avoids |
|
467 | 467 | extra allocations for a large output object. |
|
468 | 468 | |
|
469 | 469 | It is possible to declare the input size of the data that will be fed into |
|
470 | 470 | the compressor:: |
|
471 | 471 | |
|
472 | 472 | cctx = zstd.ZstdCompressor() |
|
473 | 473 | cobj = cctx.compressobj(size=6) |
|
474 | 474 | data = cobj.compress(b'foobar') |
|
475 | 475 | data = cobj.flush() |
|
476 | 476 | |
|
477 | 477 | Chunker API |
|
478 | 478 | ^^^^^^^^^^^ |
|
479 | 479 | |
|
480 | 480 | ``chunker(size=None, chunk_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE)`` returns |
|
481 | 481 | an object that can be used to iteratively feed chunks of data into a compressor |
|
482 | 482 | and produce output chunks of a uniform size. |
|
483 | 483 | |
|
484 | 484 | The object returned by ``chunker()`` exposes the following methods: |
|
485 | 485 | |
|
486 | 486 | ``compress(data)`` |
|
487 | 487 | Feeds new input data into the compressor. |
|
488 | 488 | |
|
489 | 489 | ``flush()`` |
|
490 | 490 | Flushes all data currently in the compressor. |
|
491 | 491 | |
|
492 | 492 | ``finish()`` |
|
493 | 493 | Signals the end of input data. No new data can be compressed after this |
|
494 | 494 | method is called. |
|
495 | 495 | |
|
496 | 496 | ``compress()``, ``flush()``, and ``finish()`` all return an iterator of |
|
497 | 497 | ``bytes`` instances holding compressed data. The iterator may be empty. Callers |
|
498 | 498 | MUST iterate through all elements of the returned iterator before performing |
|
499 | 499 | another operation on the object. |
|
500 | 500 | |
|
501 | 501 | All chunks emitted by ``compress()`` will have a length of ``chunk_size``. |
|
502 | 502 | |
|
503 | 503 | ``flush()`` and ``finish()`` may return a final chunk smaller than |
|
504 | 504 | ``chunk_size``. |
|
505 | 505 | |
|
506 | 506 | Here is how the API should be used:: |
|
507 | 507 | |
|
508 | 508 | cctx = zstd.ZstdCompressor() |
|
509 | 509 | chunker = cctx.chunker(chunk_size=32768) |
|
510 | 510 | |
|
511 | 511 | with open(path, 'rb') as fh: |
|
512 | 512 | while True: |
|
513 | 513 | in_chunk = fh.read(32768) |
|
514 | 514 | if not in_chunk: |
|
515 | 515 | break |
|
516 | 516 | |
|
517 | 517 | for out_chunk in chunker.compress(in_chunk): |
|
518 | 518 | # Do something with output chunk of size 32768. |
|
519 | 519 | |
|
520 | 520 | for out_chunk in chunker.finish(): |
|
521 | 521 | # Do something with output chunks that finalize the zstd frame. |
|
522 | 522 | |
|
523 | 523 | The ``chunker()`` API is often a better alternative to ``compressobj()``. |
|
524 | 524 | |
|
525 | 525 | ``compressobj()`` will emit output data as it is available. This results in a |
|
526 | 526 | *stream* of output chunks of varying sizes. The consistency of the output chunk |
|
527 | 527 | size with ``chunker()`` is more appropriate for many usages, such as sending |
|
528 | 528 | compressed data to a socket. |
|
529 | 529 | |
|
530 | 530 | ``compressobj()`` may also perform extra memory reallocations in order to |
|
531 | 531 | dynamically adjust the sizes of the output chunks. Since ``chunker()`` output |
|
532 | 532 | chunks are all the same size (except for flushed or final chunks), there is |
|
533 | 533 | less memory allocation overhead. |
|
534 | 534 | |
|
535 | 535 | Batch Compression API |
|
536 | 536 | ^^^^^^^^^^^^^^^^^^^^^ |
|
537 | 537 | |
|
538 | 538 | (Experimental. Not yet supported in CFFI bindings.) |
|
539 | 539 | |
|
540 | 540 | ``multi_compress_to_buffer(data, [threads=0])`` performs compression of multiple |
|
541 | 541 | inputs as a single operation. |
|
542 | 542 | |
|
543 | 543 | Data to be compressed can be passed as a ``BufferWithSegmentsCollection``, a |
|
544 | 544 | ``BufferWithSegments``, or a list containing byte like objects. Each element of |
|
545 | 545 | the container will be compressed individually using the configured parameters |
|
546 | 546 | on the ``ZstdCompressor`` instance. |
|
547 | 547 | |
|
548 | 548 | The ``threads`` argument controls how many threads to use for compression. The |
|
549 | 549 | default is ``0`` which means to use a single thread. Negative values use the |
|
550 | 550 | number of logical CPUs in the machine. |
|
551 | 551 | |
|
552 | 552 | The function returns a ``BufferWithSegmentsCollection``. This type represents |
|
553 | 553 | N discrete memory allocations, eaching holding 1 or more compressed frames. |
|
554 | 554 | |
|
555 | 555 | Output data is written to shared memory buffers. This means that unlike |
|
556 | 556 | regular Python objects, a reference to *any* object within the collection |
|
557 | 557 | keeps the shared buffer and therefore memory backing it alive. This can have |
|
558 | 558 | undesirable effects on process memory usage. |
|
559 | 559 | |
|
560 | 560 | The API and behavior of this function is experimental and will likely change. |
|
561 | 561 | Known deficiencies include: |
|
562 | 562 | |
|
563 | 563 | * If asked to use multiple threads, it will always spawn that many threads, |
|
564 | 564 | even if the input is too small to use them. It should automatically lower |
|
565 | 565 | the thread count when the extra threads would just add overhead. |
|
566 | 566 | * The buffer allocation strategy is fixed. There is room to make it dynamic, |
|
567 | 567 | perhaps even to allow one output buffer per input, facilitating a variation |
|
568 | 568 | of the API to return a list without the adverse effects of shared memory |
|
569 | 569 | buffers. |
|
570 | 570 | |
|
571 | 571 | ZstdDecompressor |
|
572 | 572 | ---------------- |
|
573 | 573 | |
|
574 | 574 | The ``ZstdDecompressor`` class provides an interface for performing |
|
575 | 575 | decompression. It is effectively a wrapper around the ``ZSTD_DCtx`` type from |
|
576 | 576 | the C API. |
|
577 | 577 | |
|
578 | 578 | Each instance is associated with parameters that control decompression. These |
|
579 | 579 | come from the following named arguments (all optional): |
|
580 | 580 | |
|
581 | 581 | dict_data |
|
582 | 582 | Compression dictionary to use. |
|
583 | 583 | max_window_size |
|
584 | 584 | Sets an uppet limit on the window size for decompression operations in |
|
585 | 585 | kibibytes. This setting can be used to prevent large memory allocations |
|
586 | 586 | for inputs using large compression windows. |
|
587 | 587 | format |
|
588 | 588 | Set the format of data for the decoder. By default, this is |
|
589 | 589 | ``zstd.FORMAT_ZSTD1``. It can be set to ``zstd.FORMAT_ZSTD1_MAGICLESS`` to |
|
590 | 590 | allow decoding frames without the 4 byte magic header. Not all decompression |
|
591 | 591 | APIs support this mode. |
|
592 | 592 | |
|
593 | 593 | The interface of this class is very similar to ``ZstdCompressor`` (by design). |
|
594 | 594 | |
|
595 | 595 | Unless specified otherwise, assume that no two methods of ``ZstdDecompressor`` |
|
596 | 596 | instances can be called from multiple Python threads simultaneously. In other |
|
597 | 597 | words, assume instances are not thread safe unless stated otherwise. |
|
598 | 598 | |
|
599 | 599 | Utility Methods |
|
600 | 600 | ^^^^^^^^^^^^^^^ |
|
601 | 601 | |
|
602 | 602 | ``memory_size()`` obtains the size of the underlying zstd decompression context, |
|
603 | 603 | in bytes.:: |
|
604 | 604 | |
|
605 | 605 | dctx = zstd.ZstdDecompressor() |
|
606 | 606 | size = dctx.memory_size() |
|
607 | 607 | |
|
608 | 608 | Simple API |
|
609 | 609 | ^^^^^^^^^^ |
|
610 | 610 | |
|
611 | 611 | ``decompress(data)`` can be used to decompress an entire compressed zstd |
|
612 | 612 | frame in a single operation.:: |
|
613 | 613 | |
|
614 | 614 | dctx = zstd.ZstdDecompressor() |
|
615 | 615 | decompressed = dctx.decompress(data) |
|
616 | 616 | |
|
617 | 617 | By default, ``decompress(data)`` will only work on data written with the content |
|
618 | 618 | size encoded in its header (this is the default behavior of |
|
619 | 619 | ``ZstdCompressor().compress()`` but may not be true for streaming compression). If |
|
620 | 620 | compressed data without an embedded content size is seen, ``zstd.ZstdError`` will |
|
621 | 621 | be raised. |
|
622 | 622 | |
|
623 | 623 | If the compressed data doesn't have its content size embedded within it, |
|
624 | 624 | decompression can be attempted by specifying the ``max_output_size`` |
|
625 | 625 | argument.:: |
|
626 | 626 | |
|
627 | 627 | dctx = zstd.ZstdDecompressor() |
|
628 | 628 | uncompressed = dctx.decompress(data, max_output_size=1048576) |
|
629 | 629 | |
|
630 | 630 | Ideally, ``max_output_size`` will be identical to the decompressed output |
|
631 | 631 | size. |
|
632 | 632 | |
|
633 | 633 | If ``max_output_size`` is too small to hold the decompressed data, |
|
634 | 634 | ``zstd.ZstdError`` will be raised. |
|
635 | 635 | |
|
636 | 636 | If ``max_output_size`` is larger than the decompressed data, the allocated |
|
637 | 637 | output buffer will be resized to only use the space required. |
|
638 | 638 | |
|
639 | 639 | Please note that an allocation of the requested ``max_output_size`` will be |
|
640 | 640 | performed every time the method is called. Setting to a very large value could |
|
641 | 641 | result in a lot of work for the memory allocator and may result in |
|
642 | 642 | ``MemoryError`` being raised if the allocation fails. |
|
643 | 643 | |
|
644 | 644 | .. important:: |
|
645 | 645 | |
|
646 | 646 | If the exact size of decompressed data is unknown (not passed in explicitly |
|
647 | 647 | and not stored in the zstandard frame), for performance reasons it is |
|
648 | 648 | encouraged to use a streaming API. |
|
649 | 649 | |
|
650 | 650 | Stream Reader API |
|
651 | 651 | ^^^^^^^^^^^^^^^^^ |
|
652 | 652 | |
|
653 | 653 | ``stream_reader(source)`` can be used to obtain an object conforming to the |
|
654 | 654 | ``io.RawIOBase`` interface for reading decompressed output as a stream:: |
|
655 | 655 | |
|
656 | 656 | with open(path, 'rb') as fh: |
|
657 | 657 | dctx = zstd.ZstdDecompressor() |
|
658 | 658 | reader = dctx.stream_reader(fh) |
|
659 | 659 | while True: |
|
660 | 660 | chunk = reader.read(16384) |
|
661 | 661 | if not chunk: |
|
662 | 662 | break |
|
663 | 663 | |
|
664 | 664 | # Do something with decompressed chunk. |
|
665 | 665 | |
|
666 | 666 | The stream can also be used as a context manager:: |
|
667 | 667 | |
|
668 | 668 | with open(path, 'rb') as fh: |
|
669 | 669 | dctx = zstd.ZstdDecompressor() |
|
670 | 670 | with dctx.stream_reader(fh) as reader: |
|
671 | 671 | ... |
|
672 | 672 | |
|
673 | 673 | When used as a context manager, the stream is closed and the underlying |
|
674 | 674 | resources are released when the context manager exits. Future operations against |
|
675 | 675 | the stream will fail. |
|
676 | 676 | |
|
677 | 677 | The ``source`` argument to ``stream_reader()`` can be any object with a |
|
678 | 678 | ``read(size)`` method or any object implementing the *buffer protocol*. |
|
679 | 679 | |
|
680 | 680 | If the ``source`` is a stream, you can specify how large ``read()`` requests |
|
681 | 681 | to that stream should be via the ``read_size`` argument. It defaults to |
|
682 | 682 | ``zstandard.DECOMPRESSION_RECOMMENDED_INPUT_SIZE``.:: |
|
683 | 683 | |
|
684 | 684 | with open(path, 'rb') as fh: |
|
685 | 685 | dctx = zstd.ZstdDecompressor() |
|
686 | 686 | # Will perform fh.read(8192) when obtaining data for the decompressor. |
|
687 | 687 | with dctx.stream_reader(fh, read_size=8192) as reader: |
|
688 | 688 | ... |
|
689 | 689 | |
|
690 | 690 | The stream returned by ``stream_reader()`` is not writable. |
|
691 | 691 | |
|
692 | 692 | The stream returned by ``stream_reader()`` is *partially* seekable. |
|
693 | 693 | Absolute and relative positions (``SEEK_SET`` and ``SEEK_CUR``) forward |
|
694 | 694 | of the current position are allowed. Offsets behind the current read |
|
695 | 695 | position and offsets relative to the end of stream are not allowed and |
|
696 | 696 | will raise ``ValueError`` if attempted. |
|
697 | 697 | |
|
698 | 698 | ``tell()`` returns the number of decompressed bytes read so far. |
|
699 | 699 | |
|
700 | 700 | Not all I/O methods are implemented. Notably missing is support for |
|
701 | 701 | ``readline()``, ``readlines()``, and linewise iteration support. This is |
|
702 | 702 | because streams operate on binary data - not text data. If you want to |
|
703 | 703 | convert decompressed output to text, you can chain an ``io.TextIOWrapper`` |
|
704 | 704 | to the stream:: |
|
705 | 705 | |
|
706 | 706 | with open(path, 'rb') as fh: |
|
707 | 707 | dctx = zstd.ZstdDecompressor() |
|
708 | 708 | stream_reader = dctx.stream_reader(fh) |
|
709 | 709 | text_stream = io.TextIOWrapper(stream_reader, encoding='utf-8') |
|
710 | 710 | |
|
711 | 711 | for line in text_stream: |
|
712 | 712 | ... |
|
713 | 713 | |
|
714 | 714 | The ``read_across_frames`` argument to ``stream_reader()`` controls the |
|
715 | 715 | behavior of read operations when the end of a zstd *frame* is encountered. |
|
716 | 716 | When ``False`` (the default), a read will complete when the end of a |
|
717 | 717 | zstd *frame* is encountered. When ``True``, a read can potentially |
|
718 | 718 | return data spanning multiple zstd *frames*. |
|
719 | 719 | |
|
720 | 720 | Streaming Input API |
|
721 | 721 | ^^^^^^^^^^^^^^^^^^^ |
|
722 | 722 | |
|
723 | 723 | ``stream_writer(fh)`` allows you to *stream* data into a decompressor. |
|
724 | 724 | |
|
725 | 725 | Returned instances implement the ``io.RawIOBase`` interface. Only methods |
|
726 | 726 | that involve writing will do useful things. |
|
727 | 727 | |
|
728 | 728 | The argument to ``stream_writer()`` is typically an object that also implements |
|
729 | 729 | ``io.RawIOBase``. But any object with a ``write(data)`` method will work. Many |
|
730 | 730 | common Python types conform to this interface, including open file handles |
|
731 | 731 | and ``io.BytesIO``. |
|
732 | 732 | |
|
733 | 733 | Behavior is similar to ``ZstdCompressor.stream_writer()``: compressed data |
|
734 | 734 | is sent to the decompressor by calling ``write(data)`` and decompressed |
|
735 | 735 | output is written to the underlying stream by calling its ``write(data)`` |
|
736 | 736 | method.:: |
|
737 | 737 | |
|
738 | 738 | dctx = zstd.ZstdDecompressor() |
|
739 | 739 | decompressor = dctx.stream_writer(fh) |
|
740 | 740 | |
|
741 | 741 | decompressor.write(compressed_data) |
|
742 | 742 | ... |
|
743 | 743 | |
|
744 | 744 | |
|
745 | 745 | Calls to ``write()`` will return the number of bytes written to the output |
|
746 | 746 | object. Not all inputs will result in bytes being written, so return values |
|
747 | 747 | of ``0`` are possible. |
|
748 | 748 | |
|
749 | 749 | Like the ``stream_writer()`` compressor, instances can be used as context |
|
750 | 750 | managers. However, context managers add no extra special behavior and offer |
|
751 | 751 | little to no benefit to being used. |
|
752 | 752 | |
|
753 | 753 | Calling ``close()`` will mark the stream as closed and subsequent I/O operations |
|
754 | 754 | will raise ``ValueError`` (per the documented behavior of ``io.RawIOBase``). |
|
755 | 755 | ``close()`` will also call ``close()`` on the underlying stream if such a |
|
756 | 756 | method exists. |
|
757 | 757 | |
|
758 | 758 | The size of chunks being ``write()`` to the destination can be specified:: |
|
759 | 759 | |
|
760 | 760 | dctx = zstd.ZstdDecompressor() |
|
761 | 761 | with dctx.stream_writer(fh, write_size=16384) as decompressor: |
|
762 | 762 | pass |
|
763 | 763 | |
|
764 | 764 | You can see how much memory is being used by the decompressor:: |
|
765 | 765 | |
|
766 | 766 | dctx = zstd.ZstdDecompressor() |
|
767 | 767 | with dctx.stream_writer(fh) as decompressor: |
|
768 | 768 | byte_size = decompressor.memory_size() |
|
769 | 769 | |
|
770 | 770 | ``stream_writer()`` accepts a ``write_return_read`` boolean argument to control |
|
771 | 771 | the return value of ``write()``. When ``False`` (the default)``, ``write()`` |
|
772 | 772 | returns the number of bytes that were ``write()``en to the underlying stream. |
|
773 | 773 | When ``True``, ``write()`` returns the number of bytes read from the input. |
|
774 | 774 | ``True`` is the *proper* behavior for ``write()`` as specified by the |
|
775 | 775 | ``io.RawIOBase`` interface and will become the default in a future release. |
|
776 | 776 | |
|
777 | 777 | Streaming Output API |
|
778 | 778 | ^^^^^^^^^^^^^^^^^^^^ |
|
779 | 779 | |
|
780 | 780 | ``read_to_iter(fh)`` provides a mechanism to stream decompressed data out of a |
|
781 | 781 | compressed source as an iterator of data chunks.:: |
|
782 | 782 | |
|
783 | 783 | dctx = zstd.ZstdDecompressor() |
|
784 | 784 | for chunk in dctx.read_to_iter(fh): |
|
785 | 785 | # Do something with original data. |
|
786 | 786 | |
|
787 | 787 | ``read_to_iter()`` accepts an object with a ``read(size)`` method that will |
|
788 | 788 | return compressed bytes or an object conforming to the buffer protocol that |
|
789 | 789 | can expose its data as a contiguous range of bytes. |
|
790 | 790 | |
|
791 | 791 | ``read_to_iter()`` returns an iterator whose elements are chunks of the |
|
792 | 792 | decompressed data. |
|
793 | 793 | |
|
794 | 794 | The size of requested ``read()`` from the source can be specified:: |
|
795 | 795 | |
|
796 | 796 | dctx = zstd.ZstdDecompressor() |
|
797 | 797 | for chunk in dctx.read_to_iter(fh, read_size=16384): |
|
798 | 798 | pass |
|
799 | 799 | |
|
800 | 800 | It is also possible to skip leading bytes in the input data:: |
|
801 | 801 | |
|
802 | 802 | dctx = zstd.ZstdDecompressor() |
|
803 | 803 | for chunk in dctx.read_to_iter(fh, skip_bytes=1): |
|
804 | 804 | pass |
|
805 | 805 | |
|
806 | 806 | .. tip:: |
|
807 | 807 | |
|
808 | 808 | Skipping leading bytes is useful if the source data contains extra |
|
809 | 809 | *header* data. Traditionally, you would need to create a slice or |
|
810 | 810 | ``memoryview`` of the data you want to decompress. This would create |
|
811 | 811 | overhead. It is more efficient to pass the offset into this API. |
|
812 | 812 | |
|
813 | 813 | Similarly to ``ZstdCompressor.read_to_iter()``, the consumer of the iterator |
|
814 | 814 | controls when data is decompressed. If the iterator isn't consumed, |
|
815 | 815 | decompression is put on hold. |
|
816 | 816 | |
|
817 | 817 | When ``read_to_iter()`` is passed an object conforming to the buffer protocol, |
|
818 | 818 | the behavior may seem similar to what occurs when the simple decompression |
|
819 | 819 | API is used. However, this API works when the decompressed size is unknown. |
|
820 | 820 | Furthermore, if feeding large inputs, the decompressor will work in chunks |
|
821 | 821 | instead of performing a single operation. |
|
822 | 822 | |
|
823 | 823 | Stream Copying API |
|
824 | 824 | ^^^^^^^^^^^^^^^^^^ |
|
825 | 825 | |
|
826 | 826 | ``copy_stream(ifh, ofh)`` can be used to copy data across 2 streams while |
|
827 | 827 | performing decompression.:: |
|
828 | 828 | |
|
829 | 829 | dctx = zstd.ZstdDecompressor() |
|
830 | 830 | dctx.copy_stream(ifh, ofh) |
|
831 | 831 | |
|
832 | 832 | e.g. to decompress a file to another file:: |
|
833 | 833 | |
|
834 | 834 | dctx = zstd.ZstdDecompressor() |
|
835 | 835 | with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh: |
|
836 | 836 | dctx.copy_stream(ifh, ofh) |
|
837 | 837 | |
|
838 | 838 | The size of chunks being ``read()`` and ``write()`` from and to the streams |
|
839 | 839 | can be specified:: |
|
840 | 840 | |
|
841 | 841 | dctx = zstd.ZstdDecompressor() |
|
842 | 842 | dctx.copy_stream(ifh, ofh, read_size=8192, write_size=16384) |
|
843 | 843 | |
|
844 | 844 | Decompressor API |
|
845 | 845 | ^^^^^^^^^^^^^^^^ |
|
846 | 846 | |
|
847 | 847 | ``decompressobj()`` returns an object that exposes a ``decompress(data)`` |
|
848 | 848 | method. Compressed data chunks are fed into ``decompress(data)`` and |
|
849 | 849 | uncompressed output (or an empty bytes) is returned. Output from subsequent |
|
850 | 850 | calls needs to be concatenated to reassemble the full decompressed byte |
|
851 | 851 | sequence. |
|
852 | 852 | |
|
853 | 853 | The purpose of ``decompressobj()`` is to provide an API-compatible interface |
|
854 | 854 | with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor``. This allows callers |
|
855 | 855 | to swap in different decompressor objects while using the same API. |
|
856 | 856 | |
|
857 | 857 | Each object is single use: once an input frame is decoded, ``decompress()`` |
|
858 | 858 | can no longer be called. |
|
859 | 859 | |
|
860 | 860 | Here is how this API should be used:: |
|
861 | 861 | |
|
862 | 862 | dctx = zstd.ZstdDecompressor() |
|
863 | 863 | dobj = dctx.decompressobj() |
|
864 | 864 | data = dobj.decompress(compressed_chunk_0) |
|
865 | 865 | data = dobj.decompress(compressed_chunk_1) |
|
866 | 866 | |
|
867 | 867 | By default, calls to ``decompress()`` write output data in chunks of size |
|
868 | 868 | ``DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE``. These chunks are concatenated |
|
869 | 869 | before being returned to the caller. It is possible to define the size of |
|
870 | 870 | these temporary chunks by passing ``write_size`` to ``decompressobj()``:: |
|
871 | 871 | |
|
872 | 872 | dctx = zstd.ZstdDecompressor() |
|
873 | 873 | dobj = dctx.decompressobj(write_size=1048576) |
|
874 | 874 | |
|
875 | 875 | .. note:: |
|
876 | 876 | |
|
877 | 877 | Because calls to ``decompress()`` may need to perform multiple |
|
878 | 878 | memory (re)allocations, this streaming decompression API isn't as |
|
879 | 879 | efficient as other APIs. |
|
880 | 880 | |
|
881 | 881 | For compatibility with the standard library APIs, instances expose a |
|
882 | 882 | ``flush([length=None])`` method. This method no-ops and has no meaningful |
|
883 | 883 | side-effects, making it safe to call any time. |
|
884 | 884 | |
|
885 | 885 | Batch Decompression API |
|
886 | 886 | ^^^^^^^^^^^^^^^^^^^^^^^ |
|
887 | 887 | |
|
888 | 888 | (Experimental. Not yet supported in CFFI bindings.) |
|
889 | 889 | |
|
890 | 890 | ``multi_decompress_to_buffer()`` performs decompression of multiple |
|
891 | 891 | frames as a single operation and returns a ``BufferWithSegmentsCollection`` |
|
892 | 892 | containing decompressed data for all inputs. |
|
893 | 893 | |
|
894 | 894 | Compressed frames can be passed to the function as a ``BufferWithSegments``, |
|
895 | 895 | a ``BufferWithSegmentsCollection``, or as a list containing objects that |
|
896 | 896 | conform to the buffer protocol. For best performance, pass a |
|
897 | 897 | ``BufferWithSegmentsCollection`` or a ``BufferWithSegments``, as |
|
898 | 898 | minimal input validation will be done for that type. If calling from |
|
899 | 899 | Python (as opposed to C), constructing one of these instances may add |
|
900 | 900 | overhead cancelling out the performance overhead of validation for list |
|
901 | 901 | inputs.:: |
|
902 | 902 | |
|
903 | 903 | dctx = zstd.ZstdDecompressor() |
|
904 | 904 | results = dctx.multi_decompress_to_buffer([b'...', b'...']) |
|
905 | 905 | |
|
906 | 906 | The decompressed size of each frame MUST be discoverable. It can either be |
|
907 | 907 | embedded within the zstd frame (``write_content_size=True`` argument to |
|
908 | 908 | ``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument. |
|
909 | 909 | |
|
910 | 910 | The ``decompressed_sizes`` argument is an object conforming to the buffer |
|
911 | 911 | protocol which holds an array of 64-bit unsigned integers in the machine's |
|
912 | 912 | native format defining the decompressed sizes of each frame. If this argument |
|
913 | 913 | is passed, it avoids having to scan each frame for its decompressed size. |
|
914 | 914 | This frame scanning can add noticeable overhead in some scenarios.:: |
|
915 | 915 | |
|
916 | 916 | frames = [...] |
|
917 | 917 | sizes = struct.pack('=QQQQ', len0, len1, len2, len3) |
|
918 | 918 | |
|
919 | 919 | dctx = zstd.ZstdDecompressor() |
|
920 | 920 | results = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes) |
|
921 | 921 | |
|
922 | 922 | The ``threads`` argument controls the number of threads to use to perform |
|
923 | 923 | decompression operations. The default (``0``) or the value ``1`` means to |
|
924 | 924 | use a single thread. Negative values use the number of logical CPUs in the |
|
925 | 925 | machine. |
|
926 | 926 | |
|
927 | 927 | .. note:: |
|
928 | 928 | |
|
929 | 929 | It is possible to pass a ``mmap.mmap()`` instance into this function by |
|
930 | 930 | wrapping it with a ``BufferWithSegments`` instance (which will define the |
|
931 | 931 | offsets of frames within the memory mapped region). |
|
932 | 932 | |
|
933 | 933 | This function is logically equivalent to performing ``dctx.decompress()`` |
|
934 | 934 | on each input frame and returning the result. |
|
935 | 935 | |
|
936 | 936 | This function exists to perform decompression on multiple frames as fast |
|
937 | 937 | as possible by having as little overhead as possible. Since decompression is |
|
938 | 938 | performed as a single operation and since the decompressed output is stored in |
|
939 | 939 | a single buffer, extra memory allocations, Python objects, and Python function |
|
940 | 940 | calls are avoided. This is ideal for scenarios where callers know up front that |
|
941 | 941 | they need to access data for multiple frames, such as when *delta chains* are |
|
942 | 942 | being used. |
|
943 | 943 | |
|
944 | 944 | Currently, the implementation always spawns multiple threads when requested, |
|
945 | 945 | even if the amount of work to do is small. In the future, it will be smarter |
|
946 | 946 | about avoiding threads and their associated overhead when the amount of |
|
947 | 947 | work to do is small. |
|
948 | 948 | |
|
949 | 949 | Prefix Dictionary Chain Decompression |
|
950 | 950 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
951 | 951 | |
|
952 | 952 | ``decompress_content_dict_chain(frames)`` performs decompression of a list of |
|
953 | 953 | zstd frames produced using chained *prefix* dictionary compression. Such |
|
954 | 954 | a list of frames is produced by compressing discrete inputs where each |
|
955 | 955 | non-initial input is compressed with a *prefix* dictionary consisting of the |
|
956 | 956 | content of the previous input. |
|
957 | 957 | |
|
958 | 958 | For example, say you have the following inputs:: |
|
959 | 959 | |
|
960 | 960 | inputs = [b'input 1', b'input 2', b'input 3'] |
|
961 | 961 | |
|
962 | 962 | The zstd frame chain consists of: |
|
963 | 963 | |
|
964 | 964 | 1. ``b'input 1'`` compressed in standalone/discrete mode |
|
965 | 965 | 2. ``b'input 2'`` compressed using ``b'input 1'`` as a *prefix* dictionary |
|
966 | 966 | 3. ``b'input 3'`` compressed using ``b'input 2'`` as a *prefix* dictionary |
|
967 | 967 | |
|
968 | 968 | Each zstd frame **must** have the content size written. |
|
969 | 969 | |
|
970 | 970 | The following Python code can be used to produce a *prefix dictionary chain*:: |
|
971 | 971 | |
|
972 | 972 | def make_chain(inputs): |
|
973 | 973 | frames = [] |
|
974 | 974 | |
|
975 | 975 | # First frame is compressed in standalone/discrete mode. |
|
976 | 976 | zctx = zstd.ZstdCompressor() |
|
977 | 977 | frames.append(zctx.compress(inputs[0])) |
|
978 | 978 | |
|
979 | 979 | # Subsequent frames use the previous fulltext as a prefix dictionary |
|
980 | 980 | for i, raw in enumerate(inputs[1:]): |
|
981 | 981 | dict_data = zstd.ZstdCompressionDict( |
|
982 | 982 | inputs[i], dict_type=zstd.DICT_TYPE_RAWCONTENT) |
|
983 | 983 | zctx = zstd.ZstdCompressor(dict_data=dict_data) |
|
984 | 984 | frames.append(zctx.compress(raw)) |
|
985 | 985 | |
|
986 | 986 | return frames |
|
987 | 987 | |
|
988 | 988 | ``decompress_content_dict_chain()`` returns the uncompressed data of the last |
|
989 | 989 | element in the input chain. |
|
990 | 990 | |
|
991 | 991 | |
|
992 | 992 | .. note:: |
|
993 | 993 | |
|
994 | 994 | It is possible to implement *prefix dictionary chain* decompression |
|
995 | 995 | on top of other APIs. However, this function will likely be faster - |
|
996 | 996 | especially for long input chains - as it avoids the overhead of instantiating |
|
997 | 997 | and passing around intermediate objects between C and Python. |
|
998 | 998 | |
|
999 | 999 | Multi-Threaded Compression |
|
1000 | 1000 | -------------------------- |
|
1001 | 1001 | |
|
1002 | 1002 | ``ZstdCompressor`` accepts a ``threads`` argument that controls the number |
|
1003 | 1003 | of threads to use for compression. The way this works is that input is split |
|
1004 | 1004 | into segments and each segment is fed into a worker pool for compression. Once |
|
1005 | 1005 | a segment is compressed, it is flushed/appended to the output. |
|
1006 | 1006 | |
|
1007 | 1007 | .. note:: |
|
1008 | 1008 | |
|
1009 | 1009 | These threads are created at the C layer and are not Python threads. So they |
|
1010 | 1010 | work outside the GIL. It is therefore possible to CPU saturate multiple cores |
|
1011 | 1011 | from Python. |
|
1012 | 1012 | |
|
1013 | 1013 | The segment size for multi-threaded compression is chosen from the window size |
|
1014 | 1014 | of the compressor. This is derived from the ``window_log`` attribute of a |
|
1015 | 1015 | ``ZstdCompressionParameters`` instance. By default, segment sizes are in the 1+MB |
|
1016 | 1016 | range. |
|
1017 | 1017 | |
|
1018 | 1018 | If multi-threaded compression is requested and the input is smaller than the |
|
1019 | 1019 | configured segment size, only a single compression thread will be used. If the |
|
1020 | 1020 | input is smaller than the segment size multiplied by the thread pool size or |
|
1021 | 1021 | if data cannot be delivered to the compressor fast enough, not all requested |
|
1022 | 1022 | compressor threads may be active simultaneously. |
|
1023 | 1023 | |
|
1024 | 1024 | Compared to non-multi-threaded compression, multi-threaded compression has |
|
1025 | 1025 | higher per-operation overhead. This includes extra memory operations, |
|
1026 | 1026 | thread creation, lock acquisition, etc. |
|
1027 | 1027 | |
|
1028 | 1028 | Due to the nature of multi-threaded compression using *N* compression |
|
1029 | 1029 | *states*, the output from multi-threaded compression will likely be larger |
|
1030 | 1030 | than non-multi-threaded compression. The difference is usually small. But |
|
1031 | 1031 | there is a CPU/wall time versus size trade off that may warrant investigation. |
|
1032 | 1032 | |
|
1033 | 1033 | Output from multi-threaded compression does not require any special handling |
|
1034 | 1034 | on the decompression side. To the decompressor, data generated with single |
|
1035 | 1035 | threaded compressor looks the same as data generated by a multi-threaded |
|
1036 | 1036 | compressor and does not require any special handling or additional resource |
|
1037 | 1037 | requirements. |
|
1038 | 1038 | |
|
1039 | 1039 | Dictionary Creation and Management |
|
1040 | 1040 | ---------------------------------- |
|
1041 | 1041 | |
|
1042 | 1042 | Compression dictionaries are represented with the ``ZstdCompressionDict`` type. |
|
1043 | 1043 | |
|
1044 | 1044 | Instances can be constructed from bytes:: |
|
1045 | 1045 | |
|
1046 | 1046 | dict_data = zstd.ZstdCompressionDict(data) |
|
1047 | 1047 | |
|
1048 | 1048 | It is possible to construct a dictionary from *any* data. If the data doesn't |
|
1049 | 1049 | begin with a magic header, it will be treated as a *prefix* dictionary. |
|
1050 | 1050 | *Prefix* dictionaries allow compression operations to reference raw data |
|
1051 | 1051 | within the dictionary. |
|
1052 | 1052 | |
|
1053 | 1053 | It is possible to force the use of *prefix* dictionaries or to require a |
|
1054 | 1054 | dictionary header: |
|
1055 | 1055 | |
|
1056 | 1056 | dict_data = zstd.ZstdCompressionDict(data, |
|
1057 | 1057 | dict_type=zstd.DICT_TYPE_RAWCONTENT) |
|
1058 | 1058 | |
|
1059 | 1059 | dict_data = zstd.ZstdCompressionDict(data, |
|
1060 | 1060 | dict_type=zstd.DICT_TYPE_FULLDICT) |
|
1061 | 1061 | |
|
1062 | 1062 | You can see how many bytes are in the dictionary by calling ``len()``:: |
|
1063 | 1063 | |
|
1064 | 1064 | dict_data = zstd.train_dictionary(size, samples) |
|
1065 | 1065 | dict_size = len(dict_data) # will not be larger than ``size`` |
|
1066 | 1066 | |
|
1067 | 1067 | Once you have a dictionary, you can pass it to the objects performing |
|
1068 | 1068 | compression and decompression:: |
|
1069 | 1069 | |
|
1070 | 1070 | dict_data = zstd.train_dictionary(131072, samples) |
|
1071 | 1071 | |
|
1072 | 1072 | cctx = zstd.ZstdCompressor(dict_data=dict_data) |
|
1073 | 1073 | for source_data in input_data: |
|
1074 | 1074 | compressed = cctx.compress(source_data) |
|
1075 | 1075 | # Do something with compressed data. |
|
1076 | 1076 | |
|
1077 | 1077 | dctx = zstd.ZstdDecompressor(dict_data=dict_data) |
|
1078 | 1078 | for compressed_data in input_data: |
|
1079 | 1079 | buffer = io.BytesIO() |
|
1080 | 1080 | with dctx.stream_writer(buffer) as decompressor: |
|
1081 | 1081 | decompressor.write(compressed_data) |
|
1082 | 1082 | # Do something with raw data in ``buffer``. |
|
1083 | 1083 | |
|
1084 | 1084 | Dictionaries have unique integer IDs. You can retrieve this ID via:: |
|
1085 | 1085 | |
|
1086 | 1086 | dict_id = zstd.dictionary_id(dict_data) |
|
1087 | 1087 | |
|
1088 | 1088 | You can obtain the raw data in the dict (useful for persisting and constructing |
|
1089 | 1089 | a ``ZstdCompressionDict`` later) via ``as_bytes()``:: |
|
1090 | 1090 | |
|
1091 | 1091 | dict_data = zstd.train_dictionary(size, samples) |
|
1092 | 1092 | raw_data = dict_data.as_bytes() |
|
1093 | 1093 | |
|
1094 | 1094 | By default, when a ``ZstdCompressionDict`` is *attached* to a |
|
1095 | 1095 | ``ZstdCompressor``, each ``ZstdCompressor`` performs work to prepare the |
|
1096 | 1096 | dictionary for use. This is fine if only 1 compression operation is being |
|
1097 | 1097 | performed or if the ``ZstdCompressor`` is being reused for multiple operations. |
|
1098 | 1098 | But if multiple ``ZstdCompressor`` instances are being used with the dictionary, |
|
1099 | 1099 | this can add overhead. |
|
1100 | 1100 | |
|
1101 | 1101 | It is possible to *precompute* the dictionary so it can readily be consumed |
|
1102 | 1102 | by multiple ``ZstdCompressor`` instances:: |
|
1103 | 1103 | |
|
1104 | 1104 | d = zstd.ZstdCompressionDict(data) |
|
1105 | 1105 | |
|
1106 | 1106 | # Precompute for compression level 3. |
|
1107 | 1107 | d.precompute_compress(level=3) |
|
1108 | 1108 | |
|
1109 | 1109 | # Precompute with specific compression parameters. |
|
1110 | 1110 | params = zstd.ZstdCompressionParameters(...) |
|
1111 | 1111 | d.precompute_compress(compression_params=params) |
|
1112 | 1112 | |
|
1113 | 1113 | .. note:: |
|
1114 | 1114 | |
|
1115 | 1115 | When a dictionary is precomputed, the compression parameters used to |
|
1116 | 1116 | precompute the dictionary overwrite some of the compression parameters |
|
1117 | 1117 | specified to ``ZstdCompressor.__init__``. |
|
1118 | 1118 | |
|
1119 | 1119 | Training Dictionaries |
|
1120 | 1120 | ^^^^^^^^^^^^^^^^^^^^^ |
|
1121 | 1121 | |
|
1122 | 1122 | Unless using *prefix* dictionaries, dictionary data is produced by *training* |
|
1123 | 1123 | on existing data:: |
|
1124 | 1124 | |
|
1125 | 1125 | dict_data = zstd.train_dictionary(size, samples) |
|
1126 | 1126 | |
|
1127 | 1127 | This takes a target dictionary size and list of bytes instances and creates and |
|
1128 | 1128 | returns a ``ZstdCompressionDict``. |
|
1129 | 1129 | |
|
1130 | 1130 | The dictionary training mechanism is known as *cover*. More details about it are |
|
1131 | 1131 | available in the paper *Effective Construction of Relative Lempel-Ziv |
|
1132 | 1132 | Dictionaries* (authors: Liao, Petri, Moffat, Wirth). |
|
1133 | 1133 | |
|
1134 | 1134 | The cover algorithm takes parameters ``k` and ``d``. These are the |
|
1135 | 1135 | *segment size* and *dmer size*, respectively. The returned dictionary |
|
1136 | 1136 | instance created by this function has ``k`` and ``d`` attributes |
|
1137 | 1137 | containing the values for these parameters. If a ``ZstdCompressionDict`` |
|
1138 | 1138 | is constructed from raw bytes data (a content-only dictionary), the |
|
1139 | 1139 | ``k`` and ``d`` attributes will be ``0``. |
|
1140 | 1140 | |
|
1141 | 1141 | The segment and dmer size parameters to the cover algorithm can either be |
|
1142 | 1142 | specified manually or ``train_dictionary()`` can try multiple values |
|
1143 | 1143 | and pick the best one, where *best* means the smallest compressed data size. |
|
1144 | 1144 | This later mode is called *optimization* mode. |
|
1145 | 1145 | |
|
1146 | 1146 | If none of ``k``, ``d``, ``steps``, ``threads``, ``level``, ``notifications``, |
|
1147 | 1147 | or ``dict_id`` (basically anything from the underlying ``ZDICT_cover_params_t`` |
|
1148 | 1148 | struct) are defined, *optimization* mode is used with default parameter |
|
1149 | 1149 | values. |
|
1150 | 1150 | |
|
1151 | 1151 | If ``steps`` or ``threads`` are defined, then *optimization* mode is engaged |
|
1152 | 1152 | with explicit control over those parameters. Specifying ``threads=0`` or |
|
1153 | 1153 | ``threads=1`` can be used to engage *optimization* mode if other parameters |
|
1154 | 1154 | are not defined. |
|
1155 | 1155 | |
|
1156 | 1156 | Otherwise, non-*optimization* mode is used with the parameters specified. |
|
1157 | 1157 | |
|
1158 | 1158 | This function takes the following arguments: |
|
1159 | 1159 | |
|
1160 | 1160 | dict_size |
|
1161 | 1161 | Target size in bytes of the dictionary to generate. |
|
1162 | 1162 | samples |
|
1163 | 1163 | A list of bytes holding samples the dictionary will be trained from. |
|
1164 | 1164 | k |
|
1165 | 1165 | Parameter to cover algorithm defining the segment size. A reasonable range |
|
1166 | 1166 | is [16, 2048+]. |
|
1167 | 1167 | d |
|
1168 | 1168 | Parameter to cover algorithm defining the dmer size. A reasonable range is |
|
1169 | 1169 | [6, 16]. ``d`` must be less than or equal to ``k``. |
|
1170 | 1170 | dict_id |
|
1171 | 1171 | Integer dictionary ID for the produced dictionary. Default is 0, which uses |
|
1172 | 1172 | a random value. |
|
1173 | 1173 | steps |
|
1174 | 1174 | Number of steps through ``k`` values to perform when trying parameter |
|
1175 | 1175 | variations. |
|
1176 | 1176 | threads |
|
1177 | 1177 | Number of threads to use when trying parameter variations. Default is 0, |
|
1178 | 1178 | which means to use a single thread. A negative value can be specified to |
|
1179 | 1179 | use as many threads as there are detected logical CPUs. |
|
1180 | 1180 | level |
|
1181 | 1181 | Integer target compression level when trying parameter variations. |
|
1182 | 1182 | notifications |
|
1183 | 1183 | Controls writing of informational messages to ``stderr``. ``0`` (the |
|
1184 | 1184 | default) means to write nothing. ``1`` writes errors. ``2`` writes |
|
1185 | 1185 | progression info. ``3`` writes more details. And ``4`` writes all info. |
|
1186 | 1186 | |
|
1187 | 1187 | Explicit Compression Parameters |
|
1188 | 1188 | ------------------------------- |
|
1189 | 1189 | |
|
1190 | 1190 | Zstandard offers a high-level *compression level* that maps to lower-level |
|
1191 | 1191 | compression parameters. For many consumers, this numeric level is the only |
|
1192 | 1192 | compression setting you'll need to touch. |
|
1193 | 1193 | |
|
1194 | 1194 | But for advanced use cases, it might be desirable to tweak these lower-level |
|
1195 | 1195 | settings. |
|
1196 | 1196 | |
|
1197 | 1197 | The ``ZstdCompressionParameters`` type represents these low-level compression |
|
1198 | 1198 | settings. |
|
1199 | 1199 | |
|
1200 | 1200 | Instances of this type can be constructed from a myriad of keyword arguments |
|
1201 | 1201 | (defined below) for complete low-level control over each adjustable |
|
1202 | 1202 | compression setting. |
|
1203 | 1203 | |
|
1204 | 1204 | From a higher level, one can construct a ``ZstdCompressionParameters`` instance |
|
1205 | 1205 | given a desired compression level and target input and dictionary size |
|
1206 | 1206 | using ``ZstdCompressionParameters.from_level()``. e.g.:: |
|
1207 | 1207 | |
|
1208 | 1208 | # Derive compression settings for compression level 7. |
|
1209 | 1209 | params = zstd.ZstdCompressionParameters.from_level(7) |
|
1210 | 1210 | |
|
1211 | 1211 | # With an input size of 1MB |
|
1212 | 1212 | params = zstd.ZstdCompressionParameters.from_level(7, source_size=1048576) |
|
1213 | 1213 | |
|
1214 | 1214 | Using ``from_level()``, it is also possible to override individual compression |
|
1215 | 1215 | parameters or to define additional settings that aren't automatically derived. |
|
1216 | 1216 | e.g.:: |
|
1217 | 1217 | |
|
1218 | 1218 | params = zstd.ZstdCompressionParameters.from_level(4, window_log=10) |
|
1219 | 1219 | params = zstd.ZstdCompressionParameters.from_level(5, threads=4) |
|
1220 | 1220 | |
|
1221 | 1221 | Or you can define low-level compression settings directly:: |
|
1222 | 1222 | |
|
1223 | 1223 | params = zstd.ZstdCompressionParameters(window_log=12, enable_ldm=True) |
|
1224 | 1224 | |
|
1225 | 1225 | Once a ``ZstdCompressionParameters`` instance is obtained, it can be used to |
|
1226 | 1226 | configure a compressor:: |
|
1227 | 1227 | |
|
1228 | 1228 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
1229 | 1229 | |
|
1230 | 1230 | The named arguments and attributes of ``ZstdCompressionParameters`` are as |
|
1231 | 1231 | follows: |
|
1232 | 1232 | |
|
1233 | 1233 | * format |
|
1234 | 1234 | * compression_level |
|
1235 | 1235 | * window_log |
|
1236 | 1236 | * hash_log |
|
1237 | 1237 | * chain_log |
|
1238 | 1238 | * search_log |
|
1239 | 1239 | * min_match |
|
1240 | 1240 | * target_length |
|
1241 | 1241 | * strategy |
|
1242 | 1242 | * compression_strategy (deprecated: same as ``strategy``) |
|
1243 | 1243 | * write_content_size |
|
1244 | 1244 | * write_checksum |
|
1245 | 1245 | * write_dict_id |
|
1246 | 1246 | * job_size |
|
1247 | 1247 | * overlap_log |
|
1248 | 1248 | * overlap_size_log (deprecated: same as ``overlap_log``) |
|
1249 | 1249 | * force_max_window |
|
1250 | 1250 | * enable_ldm |
|
1251 | 1251 | * ldm_hash_log |
|
1252 | 1252 | * ldm_min_match |
|
1253 | 1253 | * ldm_bucket_size_log |
|
1254 | 1254 | * ldm_hash_rate_log |
|
1255 | 1255 | * ldm_hash_every_log (deprecated: same as ``ldm_hash_rate_log``) |
|
1256 | 1256 | * threads |
|
1257 | 1257 | |
|
1258 | 1258 | Some of these are very low-level settings. It may help to consult the official |
|
1259 | 1259 | zstandard documentation for their behavior. Look for the ``ZSTD_p_*`` constants |
|
1260 | 1260 | in ``zstd.h`` (https://github.com/facebook/zstd/blob/dev/lib/zstd.h). |
|
1261 | 1261 | |
|
1262 | 1262 | Frame Inspection |
|
1263 | 1263 | ---------------- |
|
1264 | 1264 | |
|
1265 | 1265 | Data emitted from zstd compression is encapsulated in a *frame*. This frame |
|
1266 | 1266 | begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing |
|
1267 | 1267 | the frame in more detail. For more info, see |
|
1268 | 1268 | https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md. |
|
1269 | 1269 | |
|
1270 | 1270 | ``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes |
|
1271 | 1271 | instance and return a ``FrameParameters`` object describing the frame. |
|
1272 | 1272 | |
|
1273 | 1273 | Depending on which fields are present in the frame and their values, the |
|
1274 | 1274 | length of the frame parameters varies. If insufficient bytes are passed |
|
1275 | 1275 | in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure |
|
1276 | 1276 | frame parameters can be parsed, pass in at least 18 bytes. |
|
1277 | 1277 | |
|
1278 | 1278 | ``FrameParameters`` instances have the following attributes: |
|
1279 | 1279 | |
|
1280 | 1280 | content_size |
|
1281 | 1281 | Integer size of original, uncompressed content. This will be ``0`` if the |
|
1282 | 1282 | original content size isn't written to the frame (controlled with the |
|
1283 | 1283 | ``write_content_size`` argument to ``ZstdCompressor``) or if the input |
|
1284 | 1284 | content size was ``0``. |
|
1285 | 1285 | |
|
1286 | 1286 | window_size |
|
1287 | 1287 | Integer size of maximum back-reference distance in compressed data. |
|
1288 | 1288 | |
|
1289 | 1289 | dict_id |
|
1290 | 1290 | Integer of dictionary ID used for compression. ``0`` if no dictionary |
|
1291 | 1291 | ID was used or if the dictionary ID was ``0``. |
|
1292 | 1292 | |
|
1293 | 1293 | has_checksum |
|
1294 | 1294 | Bool indicating whether a 4 byte content checksum is stored at the end |
|
1295 | 1295 | of the frame. |
|
1296 | 1296 | |
|
1297 | 1297 | ``zstd.frame_header_size(data)`` returns the size of the zstandard frame |
|
1298 | 1298 | header. |
|
1299 | 1299 | |
|
1300 | 1300 | ``zstd.frame_content_size(data)`` returns the content size as parsed from |
|
1301 | 1301 | the frame header. ``-1`` means the content size is unknown. ``0`` means |
|
1302 | 1302 | an empty frame. The content size is usually correct. However, it may not |
|
1303 | 1303 | be accurate. |
|
1304 | 1304 | |
|
1305 | 1305 | Misc Functionality |
|
1306 | 1306 | ------------------ |
|
1307 | 1307 | |
|
1308 | 1308 | estimate_decompression_context_size() |
|
1309 | 1309 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
1310 | 1310 | |
|
1311 | 1311 | Estimate the memory size requirements for a decompressor instance. |
|
1312 | 1312 | |
|
1313 | 1313 | Constants |
|
1314 | 1314 | --------- |
|
1315 | 1315 | |
|
1316 | 1316 | The following module constants/attributes are exposed: |
|
1317 | 1317 | |
|
1318 | 1318 | ZSTD_VERSION |
|
1319 | 1319 | This module attribute exposes a 3-tuple of the Zstandard version. e.g. |
|
1320 | 1320 | ``(1, 0, 0)`` |
|
1321 | 1321 | MAX_COMPRESSION_LEVEL |
|
1322 | 1322 | Integer max compression level accepted by compression functions |
|
1323 | 1323 | COMPRESSION_RECOMMENDED_INPUT_SIZE |
|
1324 | 1324 | Recommended chunk size to feed to compressor functions |
|
1325 | 1325 | COMPRESSION_RECOMMENDED_OUTPUT_SIZE |
|
1326 | 1326 | Recommended chunk size for compression output |
|
1327 | 1327 | DECOMPRESSION_RECOMMENDED_INPUT_SIZE |
|
1328 | 1328 | Recommended chunk size to feed into decompresor functions |
|
1329 | 1329 | DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE |
|
1330 | 1330 | Recommended chunk size for decompression output |
|
1331 | 1331 | |
|
1332 | 1332 | FRAME_HEADER |
|
1333 | 1333 | bytes containing header of the Zstandard frame |
|
1334 | 1334 | MAGIC_NUMBER |
|
1335 | 1335 | Frame header as an integer |
|
1336 | 1336 | |
|
1337 | 1337 | FLUSH_BLOCK |
|
1338 | 1338 | Flushing behavior that denotes to flush a zstd block. A decompressor will |
|
1339 | 1339 | be able to decode all data fed into the compressor so far. |
|
1340 | 1340 | FLUSH_FRAME |
|
1341 | 1341 | Flushing behavior that denotes to end a zstd frame. Any new data fed |
|
1342 | 1342 | to the compressor will start a new frame. |
|
1343 | 1343 | |
|
1344 | 1344 | CONTENTSIZE_UNKNOWN |
|
1345 | 1345 | Value for content size when the content size is unknown. |
|
1346 | 1346 | CONTENTSIZE_ERROR |
|
1347 | 1347 | Value for content size when content size couldn't be determined. |
|
1348 | 1348 | |
|
1349 | 1349 | WINDOWLOG_MIN |
|
1350 | 1350 | Minimum value for compression parameter |
|
1351 | 1351 | WINDOWLOG_MAX |
|
1352 | 1352 | Maximum value for compression parameter |
|
1353 | 1353 | CHAINLOG_MIN |
|
1354 | 1354 | Minimum value for compression parameter |
|
1355 | 1355 | CHAINLOG_MAX |
|
1356 | 1356 | Maximum value for compression parameter |
|
1357 | 1357 | HASHLOG_MIN |
|
1358 | 1358 | Minimum value for compression parameter |
|
1359 | 1359 | HASHLOG_MAX |
|
1360 | 1360 | Maximum value for compression parameter |
|
1361 | 1361 | SEARCHLOG_MIN |
|
1362 | 1362 | Minimum value for compression parameter |
|
1363 | 1363 | SEARCHLOG_MAX |
|
1364 | 1364 | Maximum value for compression parameter |
|
1365 | 1365 | MINMATCH_MIN |
|
1366 | 1366 | Minimum value for compression parameter |
|
1367 | 1367 | MINMATCH_MAX |
|
1368 | 1368 | Maximum value for compression parameter |
|
1369 | 1369 | SEARCHLENGTH_MIN |
|
1370 | 1370 | Minimum value for compression parameter |
|
1371 | 1371 | |
|
1372 | 1372 | Deprecated: use ``MINMATCH_MIN`` |
|
1373 | 1373 | SEARCHLENGTH_MAX |
|
1374 | 1374 | Maximum value for compression parameter |
|
1375 | 1375 | |
|
1376 | 1376 | Deprecated: use ``MINMATCH_MAX`` |
|
1377 | 1377 | TARGETLENGTH_MIN |
|
1378 | 1378 | Minimum value for compression parameter |
|
1379 | 1379 | STRATEGY_FAST |
|
1380 | 1380 | Compression strategy |
|
1381 | 1381 | STRATEGY_DFAST |
|
1382 | 1382 | Compression strategy |
|
1383 | 1383 | STRATEGY_GREEDY |
|
1384 | 1384 | Compression strategy |
|
1385 | 1385 | STRATEGY_LAZY |
|
1386 | 1386 | Compression strategy |
|
1387 | 1387 | STRATEGY_LAZY2 |
|
1388 | 1388 | Compression strategy |
|
1389 | 1389 | STRATEGY_BTLAZY2 |
|
1390 | 1390 | Compression strategy |
|
1391 | 1391 | STRATEGY_BTOPT |
|
1392 | 1392 | Compression strategy |
|
1393 | 1393 | STRATEGY_BTULTRA |
|
1394 | 1394 | Compression strategy |
|
1395 | 1395 | STRATEGY_BTULTRA2 |
|
1396 | 1396 | Compression strategy |
|
1397 | 1397 | |
|
1398 | 1398 | FORMAT_ZSTD1 |
|
1399 | 1399 | Zstandard frame format |
|
1400 | 1400 | FORMAT_ZSTD1_MAGICLESS |
|
1401 | 1401 | Zstandard frame format without magic header |
|
1402 | 1402 | |
|
1403 | 1403 | Performance Considerations |
|
1404 | 1404 | -------------------------- |
|
1405 | 1405 | |
|
1406 | 1406 | The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a |
|
1407 | 1407 | persistent compression or decompression *context*. Reusing a ``ZstdCompressor`` |
|
1408 | 1408 | or ``ZstdDecompressor`` instance for multiple operations is faster than |
|
1409 | 1409 | instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each |
|
1410 | 1410 | operation. The differences are magnified as the size of data decreases. For |
|
1411 | 1411 | example, the difference between *context* reuse and non-reuse for 100,000 |
|
1412 | 1412 | 100 byte inputs will be significant (possiby over 10x faster to reuse contexts) |
|
1413 | 1413 | whereas 10 100,000,000 byte inputs will be more similar in speed (because the |
|
1414 | 1414 | time spent doing compression dwarfs time spent creating new *contexts*). |
|
1415 | 1415 | |
|
1416 | 1416 | Buffer Types |
|
1417 | 1417 | ------------ |
|
1418 | 1418 | |
|
1419 | 1419 | The API exposes a handful of custom types for interfacing with memory buffers. |
|
1420 | 1420 | The primary goal of these types is to facilitate efficient multi-object |
|
1421 | 1421 | operations. |
|
1422 | 1422 | |
|
1423 | 1423 | The essential idea is to have a single memory allocation provide backing |
|
1424 | 1424 | storage for multiple logical objects. This has 2 main advantages: fewer |
|
1425 | 1425 | allocations and optimal memory access patterns. This avoids having to allocate |
|
1426 | 1426 | a Python object for each logical object and furthermore ensures that access of |
|
1427 | 1427 | data for objects can be sequential (read: fast) in memory. |
|
1428 | 1428 | |
|
1429 | 1429 | BufferWithSegments |
|
1430 | 1430 | ^^^^^^^^^^^^^^^^^^ |
|
1431 | 1431 | |
|
1432 | 1432 | The ``BufferWithSegments`` type represents a memory buffer containing N |
|
1433 | 1433 | discrete items of known lengths (segments). It is essentially a fixed size |
|
1434 | 1434 | memory address and an array of 2-tuples of ``(offset, length)`` 64-bit |
|
1435 | 1435 | unsigned native endian integers defining the byte offset and length of each |
|
1436 | 1436 | segment within the buffer. |
|
1437 | 1437 | |
|
1438 | 1438 | Instances behave like containers. |
|
1439 | 1439 | |
|
1440 | 1440 | ``len()`` returns the number of segments within the instance. |
|
1441 | 1441 | |
|
1442 | 1442 | ``o[index]`` or ``__getitem__`` obtains a ``BufferSegment`` representing an |
|
1443 | 1443 | individual segment within the backing buffer. That returned object references |
|
1444 | 1444 | (not copies) memory. This means that iterating all objects doesn't copy |
|
1445 | 1445 | data within the buffer. |
|
1446 | 1446 | |
|
1447 | 1447 | The ``.size`` attribute contains the total size in bytes of the backing |
|
1448 | 1448 | buffer. |
|
1449 | 1449 | |
|
1450 | 1450 | Instances conform to the buffer protocol. So a reference to the backing bytes |
|
1451 | 1451 | can be obtained via ``memoryview(o)``. A *copy* of the backing bytes can also |
|
1452 | 1452 | be obtained via ``.tobytes()``. |
|
1453 | 1453 | |
|
1454 | 1454 | The ``.segments`` attribute exposes the array of ``(offset, length)`` for |
|
1455 | 1455 | segments within the buffer. It is a ``BufferSegments`` type. |
|
1456 | 1456 | |
|
1457 | 1457 | BufferSegment |
|
1458 | 1458 | ^^^^^^^^^^^^^ |
|
1459 | 1459 | |
|
1460 | 1460 | The ``BufferSegment`` type represents a segment within a ``BufferWithSegments``. |
|
1461 | 1461 | It is essentially a reference to N bytes within a ``BufferWithSegments``. |
|
1462 | 1462 | |
|
1463 | 1463 | ``len()`` returns the length of the segment in bytes. |
|
1464 | 1464 | |
|
1465 | 1465 | ``.offset`` contains the byte offset of this segment within its parent |
|
1466 | 1466 | ``BufferWithSegments`` instance. |
|
1467 | 1467 | |
|
1468 | 1468 | The object conforms to the buffer protocol. ``.tobytes()`` can be called to |
|
1469 | 1469 | obtain a ``bytes`` instance with a copy of the backing bytes. |
|
1470 | 1470 | |
|
1471 | 1471 | BufferSegments |
|
1472 | 1472 | ^^^^^^^^^^^^^^ |
|
1473 | 1473 | |
|
1474 | 1474 | This type represents an array of ``(offset, length)`` integers defining segments |
|
1475 | 1475 | within a ``BufferWithSegments``. |
|
1476 | 1476 | |
|
1477 | 1477 | The array members are 64-bit unsigned integers using host/native bit order. |
|
1478 | 1478 | |
|
1479 | 1479 | Instances conform to the buffer protocol. |
|
1480 | 1480 | |
|
1481 | 1481 | BufferWithSegmentsCollection |
|
1482 | 1482 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
1483 | 1483 | |
|
1484 | 1484 | The ``BufferWithSegmentsCollection`` type represents a virtual spanning view |
|
1485 | 1485 | of multiple ``BufferWithSegments`` instances. |
|
1486 | 1486 | |
|
1487 | 1487 | Instances are constructed from 1 or more ``BufferWithSegments`` instances. The |
|
1488 | 1488 | resulting object behaves like an ordered sequence whose members are the |
|
1489 | 1489 | segments within each ``BufferWithSegments``. |
|
1490 | 1490 | |
|
1491 | 1491 | ``len()`` returns the number of segments within all ``BufferWithSegments`` |
|
1492 | 1492 | instances. |
|
1493 | 1493 | |
|
1494 | 1494 | ``o[index]`` and ``__getitem__(index)`` return the ``BufferSegment`` at |
|
1495 | 1495 | that offset as if all ``BufferWithSegments`` instances were a single |
|
1496 | 1496 | entity. |
|
1497 | 1497 | |
|
1498 | 1498 | If the object is composed of 2 ``BufferWithSegments`` instances with the |
|
1499 | 1499 | first having 2 segments and the second have 3 segments, then ``b[0]`` |
|
1500 | 1500 | and ``b[1]`` access segments in the first object and ``b[2]``, ``b[3]``, |
|
1501 | 1501 | and ``b[4]`` access segments from the second. |
|
1502 | 1502 | |
|
1503 | 1503 | Choosing an API |
|
1504 | 1504 | =============== |
|
1505 | 1505 | |
|
1506 | 1506 | There are multiple APIs for performing compression and decompression. This is |
|
1507 | 1507 | because different applications have different needs and the library wants to |
|
1508 | 1508 | facilitate optimal use in as many use cases as possible. |
|
1509 | 1509 | |
|
1510 | 1510 | From a high-level, APIs are divided into *one-shot* and *streaming*: either you |
|
1511 | 1511 | are operating on all data at once or you operate on it piecemeal. |
|
1512 | 1512 | |
|
1513 | 1513 | The *one-shot* APIs are useful for small data, where the input or output |
|
1514 | 1514 | size is known. (The size can come from a buffer length, file size, or |
|
1515 | 1515 | stored in the zstd frame header.) A limitation of the *one-shot* APIs is that |
|
1516 | 1516 | input and output must fit in memory simultaneously. For say a 4 GB input, |
|
1517 | 1517 | this is often not feasible. |
|
1518 | 1518 | |
|
1519 | 1519 | The *one-shot* APIs also perform all work as a single operation. So, if you |
|
1520 | 1520 | feed it large input, it could take a long time for the function to return. |
|
1521 | 1521 | |
|
1522 | 1522 | The streaming APIs do not have the limitations of the simple API. But the |
|
1523 | 1523 | price you pay for this flexibility is that they are more complex than a |
|
1524 | 1524 | single function call. |
|
1525 | 1525 | |
|
1526 | 1526 | The streaming APIs put the caller in control of compression and decompression |
|
1527 | 1527 | behavior by allowing them to directly control either the input or output side |
|
1528 | 1528 | of the operation. |
|
1529 | 1529 | |
|
1530 | 1530 | With the *streaming input*, *compressor*, and *decompressor* APIs, the caller |
|
1531 | 1531 | has full control over the input to the compression or decompression stream. |
|
1532 | 1532 | They can directly choose when new data is operated on. |
|
1533 | 1533 | |
|
1534 | 1534 | With the *streaming ouput* APIs, the caller has full control over the output |
|
1535 | 1535 | of the compression or decompression stream. It can choose when to receive |
|
1536 | 1536 | new data. |
|
1537 | 1537 | |
|
1538 | 1538 | When using the *streaming* APIs that operate on file-like or stream objects, |
|
1539 | 1539 | it is important to consider what happens in that object when I/O is requested. |
|
1540 | 1540 | There is potential for long pauses as data is read or written from the |
|
1541 | 1541 | underlying stream (say from interacting with a filesystem or network). This |
|
1542 | 1542 | could add considerable overhead. |
|
1543 | 1543 | |
|
1544 | 1544 | Thread Safety |
|
1545 | 1545 | ============= |
|
1546 | 1546 | |
|
1547 | 1547 | ``ZstdCompressor`` and ``ZstdDecompressor`` instances have no guarantees |
|
1548 | 1548 | about thread safety. Do not operate on the same ``ZstdCompressor`` and |
|
1549 | 1549 | ``ZstdDecompressor`` instance simultaneously from different threads. It is |
|
1550 | 1550 | fine to have different threads call into a single instance, just not at the |
|
1551 | 1551 | same time. |
|
1552 | 1552 | |
|
1553 | 1553 | Some operations require multiple function calls to complete. e.g. streaming |
|
1554 | 1554 | operations. A single ``ZstdCompressor`` or ``ZstdDecompressor`` cannot be used |
|
1555 | 1555 | for simultaneously active operations. e.g. you must not start a streaming |
|
1556 | 1556 | operation when another streaming operation is already active. |
|
1557 | 1557 | |
|
1558 | 1558 | The C extension releases the GIL during non-trivial calls into the zstd C |
|
1559 | 1559 | API. Non-trivial calls are notably compression and decompression. Trivial |
|
1560 | 1560 | calls are things like parsing frame parameters. Where the GIL is released |
|
1561 | 1561 | is considered an implementation detail and can change in any release. |
|
1562 | 1562 | |
|
1563 | 1563 | APIs that accept bytes-like objects don't enforce that the underlying object |
|
1564 | 1564 | is read-only. However, it is assumed that the passed object is read-only for |
|
1565 | 1565 | the duration of the function call. It is possible to pass a mutable object |
|
1566 | 1566 | (like a ``bytearray``) to e.g. ``ZstdCompressor.compress()``, have the GIL |
|
1567 | 1567 | released, and mutate the object from another thread. Such a race condition |
|
1568 | 1568 | is a bug in the consumer of python-zstandard. Most Python data types are |
|
1569 | 1569 | immutable, so unless you are doing something fancy, you don't need to |
|
1570 | 1570 | worry about this. |
|
1571 | 1571 | |
|
1572 | 1572 | Note on Zstandard's *Experimental* API |
|
1573 | 1573 | ====================================== |
|
1574 | 1574 | |
|
1575 | 1575 | Many of the Zstandard APIs used by this module are marked as *experimental* |
|
1576 | 1576 | within the Zstandard project. |
|
1577 | 1577 | |
|
1578 | 1578 | It is unclear how Zstandard's C API will evolve over time, especially with |
|
1579 | 1579 | regards to this *experimental* functionality. We will try to maintain |
|
1580 | 1580 | backwards compatibility at the Python API level. However, we cannot |
|
1581 | 1581 | guarantee this for things not under our control. |
|
1582 | 1582 | |
|
1583 | 1583 | Since a copy of the Zstandard source code is distributed with this |
|
1584 | 1584 | module and since we compile against it, the behavior of a specific |
|
1585 | 1585 | version of this module should be constant for all of time. So if you |
|
1586 | 1586 | pin the version of this module used in your projects (which is a Python |
|
1587 | 1587 | best practice), you should be shielded from unwanted future changes. |
|
1588 | 1588 | |
|
1589 | 1589 | Donate |
|
1590 | 1590 | ====== |
|
1591 | 1591 | |
|
1592 | 1592 | A lot of time has been invested into this project by the author. |
|
1593 | 1593 | |
|
1594 | 1594 | If you find this project useful and would like to thank the author for |
|
1595 | 1595 | their work, consider donating some money. Any amount is appreciated. |
|
1596 | 1596 | |
|
1597 | 1597 | .. image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif |
|
1598 | 1598 | :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard¤cy_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted |
|
1599 | 1599 | :alt: Donate via PayPal |
|
1600 | 1600 | |
|
1601 | 1601 | .. |ci-status| image:: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master |
|
1602 | 1602 | :target: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master |
@@ -1,359 +1,359 b'' | |||
|
1 | 1 | /** |
|
2 | 2 | * Copyright (c) 2016-present, Gregory Szorc |
|
3 | 3 | * All rights reserved. |
|
4 | 4 | * |
|
5 | 5 | * This software may be modified and distributed under the terms |
|
6 | 6 | * of the BSD license. See the LICENSE file for details. |
|
7 | 7 | */ |
|
8 | 8 | |
|
9 | 9 | #define PY_SSIZE_T_CLEAN |
|
10 | 10 | #include <Python.h> |
|
11 | 11 | #include "structmember.h" |
|
12 | 12 | |
|
13 | 13 | #define ZSTD_STATIC_LINKING_ONLY |
|
14 | 14 | #define ZDICT_STATIC_LINKING_ONLY |
|
15 | 15 | #include <zstd.h> |
|
16 | 16 | #include <zdict.h> |
|
17 | 17 | |
|
18 | 18 | /* Remember to change the string in zstandard/__init__ as well */ |
|
19 |
#define PYTHON_ZSTANDARD_VERSION "0.1 |
|
|
19 | #define PYTHON_ZSTANDARD_VERSION "0.13.0" | |
|
20 | 20 | |
|
21 | 21 | typedef enum { |
|
22 | 22 | compressorobj_flush_finish, |
|
23 | 23 | compressorobj_flush_block, |
|
24 | 24 | } CompressorObj_Flush; |
|
25 | 25 | |
|
26 | 26 | /* |
|
27 | 27 | Represents a ZstdCompressionParameters type. |
|
28 | 28 | |
|
29 | 29 | This type holds all the low-level compression parameters that can be set. |
|
30 | 30 | */ |
|
31 | 31 | typedef struct { |
|
32 | 32 | PyObject_HEAD |
|
33 | 33 | ZSTD_CCtx_params* params; |
|
34 | 34 | } ZstdCompressionParametersObject; |
|
35 | 35 | |
|
36 | 36 | extern PyTypeObject ZstdCompressionParametersType; |
|
37 | 37 | |
|
38 | 38 | /* |
|
39 | 39 | Represents a FrameParameters type. |
|
40 | 40 | |
|
41 | 41 | This type is basically a wrapper around ZSTD_frameParams. |
|
42 | 42 | */ |
|
43 | 43 | typedef struct { |
|
44 | 44 | PyObject_HEAD |
|
45 | 45 | unsigned long long frameContentSize; |
|
46 | 46 | unsigned long long windowSize; |
|
47 | 47 | unsigned dictID; |
|
48 | 48 | char checksumFlag; |
|
49 | 49 | } FrameParametersObject; |
|
50 | 50 | |
|
51 | 51 | extern PyTypeObject FrameParametersType; |
|
52 | 52 | |
|
53 | 53 | /* |
|
54 | 54 | Represents a ZstdCompressionDict type. |
|
55 | 55 | |
|
56 | 56 | Instances hold data used for a zstd compression dictionary. |
|
57 | 57 | */ |
|
58 | 58 | typedef struct { |
|
59 | 59 | PyObject_HEAD |
|
60 | 60 | |
|
61 | 61 | /* Pointer to dictionary data. Owned by self. */ |
|
62 | 62 | void* dictData; |
|
63 | 63 | /* Size of dictionary data. */ |
|
64 | 64 | size_t dictSize; |
|
65 | 65 | ZSTD_dictContentType_e dictType; |
|
66 | 66 | /* k parameter for cover dictionaries. Only populated by train_cover_dict(). */ |
|
67 | 67 | unsigned k; |
|
68 | 68 | /* d parameter for cover dictionaries. Only populated by train_cover_dict(). */ |
|
69 | 69 | unsigned d; |
|
70 | 70 | /* Digested dictionary, suitable for reuse. */ |
|
71 | 71 | ZSTD_CDict* cdict; |
|
72 | 72 | ZSTD_DDict* ddict; |
|
73 | 73 | } ZstdCompressionDict; |
|
74 | 74 | |
|
75 | 75 | extern PyTypeObject ZstdCompressionDictType; |
|
76 | 76 | |
|
77 | 77 | /* |
|
78 | 78 | Represents a ZstdCompressor type. |
|
79 | 79 | */ |
|
80 | 80 | typedef struct { |
|
81 | 81 | PyObject_HEAD |
|
82 | 82 | |
|
83 | 83 | /* Number of threads to use for operations. */ |
|
84 | 84 | unsigned int threads; |
|
85 | 85 | /* Pointer to compression dictionary to use. NULL if not using dictionary |
|
86 | 86 | compression. */ |
|
87 | 87 | ZstdCompressionDict* dict; |
|
88 | 88 | /* Compression context to use. Populated during object construction. */ |
|
89 | 89 | ZSTD_CCtx* cctx; |
|
90 | 90 | /* Compression parameters in use. */ |
|
91 | 91 | ZSTD_CCtx_params* params; |
|
92 | 92 | } ZstdCompressor; |
|
93 | 93 | |
|
94 | 94 | extern PyTypeObject ZstdCompressorType; |
|
95 | 95 | |
|
96 | 96 | typedef struct { |
|
97 | 97 | PyObject_HEAD |
|
98 | 98 | |
|
99 | 99 | ZstdCompressor* compressor; |
|
100 | 100 | ZSTD_outBuffer output; |
|
101 | 101 | int finished; |
|
102 | 102 | } ZstdCompressionObj; |
|
103 | 103 | |
|
104 | 104 | extern PyTypeObject ZstdCompressionObjType; |
|
105 | 105 | |
|
106 | 106 | typedef struct { |
|
107 | 107 | PyObject_HEAD |
|
108 | 108 | |
|
109 | 109 | ZstdCompressor* compressor; |
|
110 | 110 | PyObject* writer; |
|
111 | 111 | ZSTD_outBuffer output; |
|
112 | 112 | size_t outSize; |
|
113 | 113 | int entered; |
|
114 | 114 | int closed; |
|
115 | 115 | int writeReturnRead; |
|
116 | 116 | unsigned long long bytesCompressed; |
|
117 | 117 | } ZstdCompressionWriter; |
|
118 | 118 | |
|
119 | 119 | extern PyTypeObject ZstdCompressionWriterType; |
|
120 | 120 | |
|
121 | 121 | typedef struct { |
|
122 | 122 | PyObject_HEAD |
|
123 | 123 | |
|
124 | 124 | ZstdCompressor* compressor; |
|
125 | 125 | PyObject* reader; |
|
126 | 126 | Py_buffer buffer; |
|
127 | 127 | Py_ssize_t bufferOffset; |
|
128 | 128 | size_t inSize; |
|
129 | 129 | size_t outSize; |
|
130 | 130 | |
|
131 | 131 | ZSTD_inBuffer input; |
|
132 | 132 | ZSTD_outBuffer output; |
|
133 | 133 | int finishedOutput; |
|
134 | 134 | int finishedInput; |
|
135 | 135 | PyObject* readResult; |
|
136 | 136 | } ZstdCompressorIterator; |
|
137 | 137 | |
|
138 | 138 | extern PyTypeObject ZstdCompressorIteratorType; |
|
139 | 139 | |
|
140 | 140 | typedef struct { |
|
141 | 141 | PyObject_HEAD |
|
142 | 142 | |
|
143 | 143 | ZstdCompressor* compressor; |
|
144 | 144 | PyObject* reader; |
|
145 | 145 | Py_buffer buffer; |
|
146 | 146 | size_t readSize; |
|
147 | 147 | |
|
148 | 148 | int entered; |
|
149 | 149 | int closed; |
|
150 | 150 | unsigned long long bytesCompressed; |
|
151 | 151 | |
|
152 | 152 | ZSTD_inBuffer input; |
|
153 | 153 | ZSTD_outBuffer output; |
|
154 | 154 | int finishedInput; |
|
155 | 155 | int finishedOutput; |
|
156 | 156 | PyObject* readResult; |
|
157 | 157 | } ZstdCompressionReader; |
|
158 | 158 | |
|
159 | 159 | extern PyTypeObject ZstdCompressionReaderType; |
|
160 | 160 | |
|
161 | 161 | typedef struct { |
|
162 | 162 | PyObject_HEAD |
|
163 | 163 | |
|
164 | 164 | ZstdCompressor* compressor; |
|
165 | 165 | ZSTD_inBuffer input; |
|
166 | 166 | ZSTD_outBuffer output; |
|
167 | 167 | Py_buffer inBuffer; |
|
168 | 168 | int finished; |
|
169 | 169 | size_t chunkSize; |
|
170 | 170 | } ZstdCompressionChunker; |
|
171 | 171 | |
|
172 | 172 | extern PyTypeObject ZstdCompressionChunkerType; |
|
173 | 173 | |
|
174 | 174 | typedef enum { |
|
175 | 175 | compressionchunker_mode_normal, |
|
176 | 176 | compressionchunker_mode_flush, |
|
177 | 177 | compressionchunker_mode_finish, |
|
178 | 178 | } CompressionChunkerMode; |
|
179 | 179 | |
|
180 | 180 | typedef struct { |
|
181 | 181 | PyObject_HEAD |
|
182 | 182 | |
|
183 | 183 | ZstdCompressionChunker* chunker; |
|
184 | 184 | CompressionChunkerMode mode; |
|
185 | 185 | } ZstdCompressionChunkerIterator; |
|
186 | 186 | |
|
187 | 187 | extern PyTypeObject ZstdCompressionChunkerIteratorType; |
|
188 | 188 | |
|
189 | 189 | typedef struct { |
|
190 | 190 | PyObject_HEAD |
|
191 | 191 | |
|
192 | 192 | ZSTD_DCtx* dctx; |
|
193 | 193 | ZstdCompressionDict* dict; |
|
194 | 194 | size_t maxWindowSize; |
|
195 | 195 | ZSTD_format_e format; |
|
196 | 196 | } ZstdDecompressor; |
|
197 | 197 | |
|
198 | 198 | extern PyTypeObject ZstdDecompressorType; |
|
199 | 199 | |
|
200 | 200 | typedef struct { |
|
201 | 201 | PyObject_HEAD |
|
202 | 202 | |
|
203 | 203 | ZstdDecompressor* decompressor; |
|
204 | 204 | size_t outSize; |
|
205 | 205 | int finished; |
|
206 | 206 | } ZstdDecompressionObj; |
|
207 | 207 | |
|
208 | 208 | extern PyTypeObject ZstdDecompressionObjType; |
|
209 | 209 | |
|
210 | 210 | typedef struct { |
|
211 | 211 | PyObject_HEAD |
|
212 | 212 | |
|
213 | 213 | /* Parent decompressor to which this object is associated. */ |
|
214 | 214 | ZstdDecompressor* decompressor; |
|
215 | 215 | /* Object to read() from (if reading from a stream). */ |
|
216 | 216 | PyObject* reader; |
|
217 | 217 | /* Size for read() operations on reader. */ |
|
218 | 218 | size_t readSize; |
|
219 | 219 | /* Whether a read() can return data spanning multiple zstd frames. */ |
|
220 | 220 | int readAcrossFrames; |
|
221 | 221 | /* Buffer to read from (if reading from a buffer). */ |
|
222 | 222 | Py_buffer buffer; |
|
223 | 223 | |
|
224 | 224 | /* Whether the context manager is active. */ |
|
225 | 225 | int entered; |
|
226 | 226 | /* Whether we've closed the stream. */ |
|
227 | 227 | int closed; |
|
228 | 228 | |
|
229 | 229 | /* Number of bytes decompressed and returned to user. */ |
|
230 | 230 | unsigned long long bytesDecompressed; |
|
231 | 231 | |
|
232 | 232 | /* Tracks data going into decompressor. */ |
|
233 | 233 | ZSTD_inBuffer input; |
|
234 | 234 | |
|
235 | 235 | /* Holds output from read() operation on reader. */ |
|
236 | 236 | PyObject* readResult; |
|
237 | 237 | |
|
238 | 238 | /* Whether all input has been sent to the decompressor. */ |
|
239 | 239 | int finishedInput; |
|
240 | 240 | /* Whether all output has been flushed from the decompressor. */ |
|
241 | 241 | int finishedOutput; |
|
242 | 242 | } ZstdDecompressionReader; |
|
243 | 243 | |
|
244 | 244 | extern PyTypeObject ZstdDecompressionReaderType; |
|
245 | 245 | |
|
246 | 246 | typedef struct { |
|
247 | 247 | PyObject_HEAD |
|
248 | 248 | |
|
249 | 249 | ZstdDecompressor* decompressor; |
|
250 | 250 | PyObject* writer; |
|
251 | 251 | size_t outSize; |
|
252 | 252 | int entered; |
|
253 | 253 | int closed; |
|
254 | 254 | int writeReturnRead; |
|
255 | 255 | } ZstdDecompressionWriter; |
|
256 | 256 | |
|
257 | 257 | extern PyTypeObject ZstdDecompressionWriterType; |
|
258 | 258 | |
|
259 | 259 | typedef struct { |
|
260 | 260 | PyObject_HEAD |
|
261 | 261 | |
|
262 | 262 | ZstdDecompressor* decompressor; |
|
263 | 263 | PyObject* reader; |
|
264 | 264 | Py_buffer buffer; |
|
265 | 265 | Py_ssize_t bufferOffset; |
|
266 | 266 | size_t inSize; |
|
267 | 267 | size_t outSize; |
|
268 | 268 | size_t skipBytes; |
|
269 | 269 | ZSTD_inBuffer input; |
|
270 | 270 | ZSTD_outBuffer output; |
|
271 | 271 | Py_ssize_t readCount; |
|
272 | 272 | int finishedInput; |
|
273 | 273 | int finishedOutput; |
|
274 | 274 | } ZstdDecompressorIterator; |
|
275 | 275 | |
|
276 | 276 | extern PyTypeObject ZstdDecompressorIteratorType; |
|
277 | 277 | |
|
278 | 278 | typedef struct { |
|
279 | 279 | int errored; |
|
280 | 280 | PyObject* chunk; |
|
281 | 281 | } DecompressorIteratorResult; |
|
282 | 282 | |
|
283 | 283 | typedef struct { |
|
284 | 284 | /* The public API is that these are 64-bit unsigned integers. So these can't |
|
285 | 285 | * be size_t, even though values larger than SIZE_MAX or PY_SSIZE_T_MAX may |
|
286 | 286 | * be nonsensical for this platform. */ |
|
287 | 287 | unsigned long long offset; |
|
288 | 288 | unsigned long long length; |
|
289 | 289 | } BufferSegment; |
|
290 | 290 | |
|
291 | 291 | typedef struct { |
|
292 | 292 | PyObject_HEAD |
|
293 | 293 | |
|
294 | 294 | PyObject* parent; |
|
295 | 295 | BufferSegment* segments; |
|
296 | 296 | Py_ssize_t segmentCount; |
|
297 | 297 | } ZstdBufferSegments; |
|
298 | 298 | |
|
299 | 299 | extern PyTypeObject ZstdBufferSegmentsType; |
|
300 | 300 | |
|
301 | 301 | typedef struct { |
|
302 | 302 | PyObject_HEAD |
|
303 | 303 | |
|
304 | 304 | PyObject* parent; |
|
305 | 305 | void* data; |
|
306 | 306 | Py_ssize_t dataSize; |
|
307 | 307 | unsigned long long offset; |
|
308 | 308 | } ZstdBufferSegment; |
|
309 | 309 | |
|
310 | 310 | extern PyTypeObject ZstdBufferSegmentType; |
|
311 | 311 | |
|
312 | 312 | typedef struct { |
|
313 | 313 | PyObject_HEAD |
|
314 | 314 | |
|
315 | 315 | Py_buffer parent; |
|
316 | 316 | void* data; |
|
317 | 317 | unsigned long long dataSize; |
|
318 | 318 | BufferSegment* segments; |
|
319 | 319 | Py_ssize_t segmentCount; |
|
320 | 320 | int useFree; |
|
321 | 321 | } ZstdBufferWithSegments; |
|
322 | 322 | |
|
323 | 323 | extern PyTypeObject ZstdBufferWithSegmentsType; |
|
324 | 324 | |
|
325 | 325 | /** |
|
326 | 326 | * An ordered collection of BufferWithSegments exposed as a squashed collection. |
|
327 | 327 | * |
|
328 | 328 | * This type provides a virtual view spanning multiple BufferWithSegments |
|
329 | 329 | * instances. It allows multiple instances to be "chained" together and |
|
330 | 330 | * exposed as a single collection. e.g. if there are 2 buffers holding |
|
331 | 331 | * 10 segments each, then o[14] will access the 5th segment in the 2nd buffer. |
|
332 | 332 | */ |
|
333 | 333 | typedef struct { |
|
334 | 334 | PyObject_HEAD |
|
335 | 335 | |
|
336 | 336 | /* An array of buffers that should be exposed through this instance. */ |
|
337 | 337 | ZstdBufferWithSegments** buffers; |
|
338 | 338 | /* Number of elements in buffers array. */ |
|
339 | 339 | Py_ssize_t bufferCount; |
|
340 | 340 | /* Array of first offset in each buffer instance. 0th entry corresponds |
|
341 | 341 | to number of elements in the 0th buffer. 1st entry corresponds to the |
|
342 | 342 | sum of elements in 0th and 1st buffers. */ |
|
343 | 343 | Py_ssize_t* firstElements; |
|
344 | 344 | } ZstdBufferWithSegmentsCollection; |
|
345 | 345 | |
|
346 | 346 | extern PyTypeObject ZstdBufferWithSegmentsCollectionType; |
|
347 | 347 | |
|
348 | 348 | int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); |
|
349 | 349 | int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj); |
|
350 | 350 | int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams); |
|
351 | 351 | FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs); |
|
352 | 352 | int ensure_ddict(ZstdCompressionDict* dict); |
|
353 | 353 | int ensure_dctx(ZstdDecompressor* decompressor, int loadDict); |
|
354 | 354 | ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs); |
|
355 | 355 | ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize); |
|
356 | 356 | Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*); |
|
357 | 357 | int cpu_count(void); |
|
358 | 358 | size_t roundpow2(size_t); |
|
359 | 359 | int safe_pybytes_resize(PyObject** obj, Py_ssize_t size); |
@@ -1,207 +1,225 b'' | |||
|
1 | 1 | # Copyright (c) 2016-present, Gregory Szorc |
|
2 | 2 | # All rights reserved. |
|
3 | 3 | # |
|
4 | 4 | # This software may be modified and distributed under the terms |
|
5 | 5 | # of the BSD license. See the LICENSE file for details. |
|
6 | 6 | |
|
7 | 7 | from __future__ import absolute_import |
|
8 | 8 | |
|
9 | 9 | import cffi |
|
10 | 10 | import distutils.ccompiler |
|
11 | 11 | import os |
|
12 | 12 | import re |
|
13 | 13 | import subprocess |
|
14 | 14 | import tempfile |
|
15 | 15 | |
|
16 | 16 | |
|
17 | 17 | HERE = os.path.abspath(os.path.dirname(__file__)) |
|
18 | 18 | |
|
19 | SOURCES = ['zstd/%s' % p for p in ( | |
|
20 | 'common/debug.c', | |
|
21 | 'common/entropy_common.c', | |
|
22 | 'common/error_private.c', | |
|
23 | 'common/fse_decompress.c', | |
|
24 | 'common/pool.c', | |
|
25 | 'common/threading.c', | |
|
26 |
|
|
|
27 |
|
|
|
28 | 'compress/fse_compress.c', | |
|
29 | 'compress/hist.c', | |
|
30 |
|
|
|
31 |
|
|
|
32 |
|
|
|
33 |
|
|
|
34 |
|
|
|
35 |
|
|
|
36 |
|
|
|
37 |
|
|
|
38 |
|
|
|
39 |
|
|
|
40 | 'decompress/huf_decompress.c', | |
|
41 |
|
|
|
42 |
|
|
|
43 |
|
|
|
44 | 'dictBuilder/cover.c', | |
|
45 | 'dictBuilder/fastcover.c', | |
|
46 |
|
|
|
47 |
|
|
|
48 | )] | |
|
19 | SOURCES = [ | |
|
20 | "zstd/%s" % p | |
|
21 | for p in ( | |
|
22 | "common/debug.c", | |
|
23 | "common/entropy_common.c", | |
|
24 | "common/error_private.c", | |
|
25 | "common/fse_decompress.c", | |
|
26 | "common/pool.c", | |
|
27 | "common/threading.c", | |
|
28 | "common/xxhash.c", | |
|
29 | "common/zstd_common.c", | |
|
30 | "compress/fse_compress.c", | |
|
31 | "compress/hist.c", | |
|
32 | "compress/huf_compress.c", | |
|
33 | "compress/zstd_compress.c", | |
|
34 | "compress/zstd_compress_literals.c", | |
|
35 | "compress/zstd_compress_sequences.c", | |
|
36 | "compress/zstd_double_fast.c", | |
|
37 | "compress/zstd_fast.c", | |
|
38 | "compress/zstd_lazy.c", | |
|
39 | "compress/zstd_ldm.c", | |
|
40 | "compress/zstd_opt.c", | |
|
41 | "compress/zstdmt_compress.c", | |
|
42 | "decompress/huf_decompress.c", | |
|
43 | "decompress/zstd_ddict.c", | |
|
44 | "decompress/zstd_decompress.c", | |
|
45 | "decompress/zstd_decompress_block.c", | |
|
46 | "dictBuilder/cover.c", | |
|
47 | "dictBuilder/fastcover.c", | |
|
48 | "dictBuilder/divsufsort.c", | |
|
49 | "dictBuilder/zdict.c", | |
|
50 | ) | |
|
51 | ] | |
|
49 | 52 | |
|
50 | 53 | # Headers whose preprocessed output will be fed into cdef(). |
|
51 | HEADERS = [os.path.join(HERE, 'zstd', *p) for p in ( | |
|
52 | ('zstd.h',), | |
|
53 | ('dictBuilder', 'zdict.h'), | |
|
54 | )] | |
|
54 | HEADERS = [ | |
|
55 | os.path.join(HERE, "zstd", *p) for p in (("zstd.h",), ("dictBuilder", "zdict.h"),) | |
|
56 | ] | |
|
55 | 57 | |
|
56 | INCLUDE_DIRS = [os.path.join(HERE, d) for d in ( | |
|
57 | 'zstd', | |
|
58 | 'zstd/common', | |
|
59 | 'zstd/compress', | |
|
60 | 'zstd/decompress', | |
|
61 | 'zstd/dictBuilder', | |
|
62 | )] | |
|
58 | INCLUDE_DIRS = [ | |
|
59 | os.path.join(HERE, d) | |
|
60 | for d in ( | |
|
61 | "zstd", | |
|
62 | "zstd/common", | |
|
63 | "zstd/compress", | |
|
64 | "zstd/decompress", | |
|
65 | "zstd/dictBuilder", | |
|
66 | ) | |
|
67 | ] | |
|
63 | 68 | |
|
64 | 69 | # cffi can't parse some of the primitives in zstd.h. So we invoke the |
|
65 | 70 | # preprocessor and feed its output into cffi. |
|
66 | 71 | compiler = distutils.ccompiler.new_compiler() |
|
67 | 72 | |
|
68 | 73 | # Needed for MSVC. |
|
69 |
if hasattr(compiler, |
|
|
74 | if hasattr(compiler, "initialize"): | |
|
70 | 75 | compiler.initialize() |
|
71 | 76 | |
|
72 | 77 | # Distutils doesn't set compiler.preprocessor, so invoke the preprocessor |
|
73 | 78 | # manually. |
|
74 |
if compiler.compiler_type == |
|
|
75 |
args = list(compiler.executables[ |
|
|
76 |
args.extend( |
|
|
77 | '-E', | |
|
78 | '-DZSTD_STATIC_LINKING_ONLY', | |
|
79 | '-DZDICT_STATIC_LINKING_ONLY', | |
|
80 | ]) | |
|
81 | elif compiler.compiler_type == 'msvc': | |
|
79 | if compiler.compiler_type == "unix": | |
|
80 | args = list(compiler.executables["compiler"]) | |
|
81 | args.extend( | |
|
82 | ["-E", "-DZSTD_STATIC_LINKING_ONLY", "-DZDICT_STATIC_LINKING_ONLY",] | |
|
83 | ) | |
|
84 | elif compiler.compiler_type == "msvc": | |
|
82 | 85 | args = [compiler.cc] |
|
83 |
args.extend( |
|
|
84 | '/EP', | |
|
85 | '/DZSTD_STATIC_LINKING_ONLY', | |
|
86 | '/DZDICT_STATIC_LINKING_ONLY', | |
|
87 | ]) | |
|
86 | args.extend( | |
|
87 | ["/EP", "/DZSTD_STATIC_LINKING_ONLY", "/DZDICT_STATIC_LINKING_ONLY",] | |
|
88 | ) | |
|
88 | 89 | else: |
|
89 |
raise Exception( |
|
|
90 | raise Exception("unsupported compiler type: %s" % compiler.compiler_type) | |
|
91 | ||
|
90 | 92 | |
|
91 | 93 | def preprocess(path): |
|
92 |
with open(path, |
|
|
94 | with open(path, "rb") as fh: | |
|
93 | 95 | lines = [] |
|
94 | 96 | it = iter(fh) |
|
95 | 97 | |
|
96 | 98 | for l in it: |
|
97 | 99 | # zstd.h includes <stddef.h>, which is also included by cffi's |
|
98 | 100 | # boilerplate. This can lead to duplicate declarations. So we strip |
|
99 | 101 | # this include from the preprocessor invocation. |
|
100 | 102 | # |
|
101 | 103 | # The same things happens for including zstd.h, so give it the same |
|
102 | 104 | # treatment. |
|
103 | 105 | # |
|
104 | 106 | # We define ZSTD_STATIC_LINKING_ONLY, which is redundant with the inline |
|
105 | 107 | # #define in zstdmt_compress.h and results in a compiler warning. So drop |
|
106 | 108 | # the inline #define. |
|
107 |
if l.startswith( |
|
|
108 | b'#include "zstd.h"', | |
|
109 | b'#define ZSTD_STATIC_LINKING_ONLY')): | |
|
109 | if l.startswith( | |
|
110 | ( | |
|
111 | b"#include <stddef.h>", | |
|
112 | b'#include "zstd.h"', | |
|
113 | b"#define ZSTD_STATIC_LINKING_ONLY", | |
|
114 | ) | |
|
115 | ): | |
|
110 | 116 | continue |
|
111 | 117 | |
|
118 | # The preprocessor environment on Windows doesn't define include | |
|
119 | # paths, so the #include of limits.h fails. We work around this | |
|
120 | # by removing that import and defining INT_MAX ourselves. This is | |
|
121 | # a bit hacky. But it gets the job done. | |
|
122 | # TODO make limits.h work on Windows so we ensure INT_MAX is | |
|
123 | # correct. | |
|
124 | if l.startswith(b"#include <limits.h>"): | |
|
125 | l = b"#define INT_MAX 2147483647\n" | |
|
126 | ||
|
112 | 127 | # ZSTDLIB_API may not be defined if we dropped zstd.h. It isn't |
|
113 | 128 | # important so just filter it out. |
|
114 |
if l.startswith(b |
|
|
115 |
l = l[len(b |
|
|
129 | if l.startswith(b"ZSTDLIB_API"): | |
|
130 | l = l[len(b"ZSTDLIB_API ") :] | |
|
116 | 131 | |
|
117 | 132 | lines.append(l) |
|
118 | 133 | |
|
119 |
fd, input_file = tempfile.mkstemp(suffix= |
|
|
120 |
os.write(fd, b |
|
|
134 | fd, input_file = tempfile.mkstemp(suffix=".h") | |
|
135 | os.write(fd, b"".join(lines)) | |
|
121 | 136 | os.close(fd) |
|
122 | 137 | |
|
123 | 138 | try: |
|
124 | 139 | env = dict(os.environ) |
|
125 |
if getattr(compiler, |
|
|
126 |
env[ |
|
|
127 | process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE, | |
|
128 | env=env) | |
|
140 | if getattr(compiler, "_paths", None): | |
|
141 | env["PATH"] = compiler._paths | |
|
142 | process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE, env=env) | |
|
129 | 143 | output = process.communicate()[0] |
|
130 | 144 | ret = process.poll() |
|
131 | 145 | if ret: |
|
132 |
raise Exception( |
|
|
146 | raise Exception("preprocessor exited with error") | |
|
133 | 147 | |
|
134 | 148 | return output |
|
135 | 149 | finally: |
|
136 | 150 | os.unlink(input_file) |
|
137 | 151 | |
|
138 | 152 | |
|
139 | 153 | def normalize_output(output): |
|
140 | 154 | lines = [] |
|
141 | 155 | for line in output.splitlines(): |
|
142 | 156 | # CFFI's parser doesn't like __attribute__ on UNIX compilers. |
|
143 | 157 | if line.startswith(b'__attribute__ ((visibility ("default"))) '): |
|
144 | line = line[len(b'__attribute__ ((visibility ("default"))) '):] | |
|
158 | line = line[len(b'__attribute__ ((visibility ("default"))) ') :] | |
|
145 | 159 | |
|
146 |
if line.startswith(b |
|
|
160 | if line.startswith(b"__attribute__((deprecated("): | |
|
147 | 161 | continue |
|
148 |
elif b |
|
|
162 | elif b"__declspec(deprecated(" in line: | |
|
149 | 163 | continue |
|
150 | 164 | |
|
151 | 165 | lines.append(line) |
|
152 | 166 | |
|
153 |
return b |
|
|
167 | return b"\n".join(lines) | |
|
154 | 168 | |
|
155 | 169 | |
|
156 | 170 | ffi = cffi.FFI() |
|
157 | 171 | # zstd.h uses a possible undefined MIN(). Define it until |
|
158 | 172 | # https://github.com/facebook/zstd/issues/976 is fixed. |
|
159 | 173 | # *_DISABLE_DEPRECATE_WARNINGS prevents the compiler from emitting a warning |
|
160 | 174 | # when cffi uses the function. Since we statically link against zstd, even |
|
161 | 175 | # if we use the deprecated functions it shouldn't be a huge problem. |
|
162 |
ffi.set_source( |
|
|
176 | ffi.set_source( | |
|
177 | "_zstd_cffi", | |
|
178 | """ | |
|
163 | 179 |
|
|
164 | 180 |
|
|
165 | 181 |
|
|
166 | 182 |
|
|
167 | 183 |
|
|
168 | 184 |
|
|
169 | ''', sources=SOURCES, | |
|
170 | include_dirs=INCLUDE_DIRS, | |
|
171 | extra_compile_args=['-DZSTD_MULTITHREAD']) | |
|
185 | """, | |
|
186 | sources=SOURCES, | |
|
187 | include_dirs=INCLUDE_DIRS, | |
|
188 | extra_compile_args=["-DZSTD_MULTITHREAD"], | |
|
189 | ) | |
|
172 | 190 | |
|
173 |
DEFINE = re.compile(b |
|
|
191 | DEFINE = re.compile(b"^\\#define ([a-zA-Z0-9_]+) ") | |
|
174 | 192 | |
|
175 | 193 | sources = [] |
|
176 | 194 | |
|
177 | 195 | # Feed normalized preprocessor output for headers into the cdef parser. |
|
178 | 196 | for header in HEADERS: |
|
179 | 197 | preprocessed = preprocess(header) |
|
180 | 198 | sources.append(normalize_output(preprocessed)) |
|
181 | 199 | |
|
182 | 200 | # #define's are effectively erased as part of going through preprocessor. |
|
183 | 201 | # So perform a manual pass to re-add those to the cdef source. |
|
184 |
with open(header, |
|
|
202 | with open(header, "rb") as fh: | |
|
185 | 203 | for line in fh: |
|
186 | 204 | line = line.strip() |
|
187 | 205 | m = DEFINE.match(line) |
|
188 | 206 | if not m: |
|
189 | 207 | continue |
|
190 | 208 | |
|
191 |
if m.group(1) == b |
|
|
209 | if m.group(1) == b"ZSTD_STATIC_LINKING_ONLY": | |
|
192 | 210 | continue |
|
193 | 211 | |
|
194 | 212 | # The parser doesn't like some constants with complex values. |
|
195 |
if m.group(1) in (b |
|
|
213 | if m.group(1) in (b"ZSTD_LIB_VERSION", b"ZSTD_VERSION_STRING"): | |
|
196 | 214 | continue |
|
197 | 215 | |
|
198 | 216 | # The ... is magic syntax by the cdef parser to resolve the |
|
199 | 217 | # value at compile time. |
|
200 |
sources.append(m.group(0) + b |
|
|
218 | sources.append(m.group(0) + b" ...") | |
|
201 | 219 | |
|
202 |
cdeflines = b |
|
|
220 | cdeflines = b"\n".join(sources).splitlines() | |
|
203 | 221 | cdeflines = [l for l in cdeflines if l.strip()] |
|
204 |
ffi.cdef(b |
|
|
222 | ffi.cdef(b"\n".join(cdeflines).decode("latin1")) | |
|
205 | 223 | |
|
206 |
if __name__ == |
|
|
224 | if __name__ == "__main__": | |
|
207 | 225 | ffi.compile() |
@@ -1,112 +1,118 b'' | |||
|
1 | 1 | #!/usr/bin/env python |
|
2 | 2 | # Copyright (c) 2016-present, Gregory Szorc |
|
3 | 3 | # All rights reserved. |
|
4 | 4 | # |
|
5 | 5 | # This software may be modified and distributed under the terms |
|
6 | 6 | # of the BSD license. See the LICENSE file for details. |
|
7 | 7 | |
|
8 | 8 | from __future__ import print_function |
|
9 | 9 | |
|
10 | 10 | from distutils.version import LooseVersion |
|
11 | 11 | import os |
|
12 | 12 | import sys |
|
13 | 13 | from setuptools import setup |
|
14 | 14 | |
|
15 | 15 | # Need change in 1.10 for ffi.from_buffer() to handle all buffer types |
|
16 | 16 | # (like memoryview). |
|
17 | 17 | # Need feature in 1.11 for ffi.gc() to declare size of objects so we avoid |
|
18 | 18 | # garbage collection pitfalls. |
|
19 |
MINIMUM_CFFI_VERSION = |
|
|
19 | MINIMUM_CFFI_VERSION = "1.11" | |
|
20 | 20 | |
|
21 | 21 | try: |
|
22 | 22 | import cffi |
|
23 | 23 | |
|
24 | 24 | # PyPy (and possibly other distros) have CFFI distributed as part of |
|
25 | 25 | # them. The install_requires for CFFI below won't work. We need to sniff |
|
26 | 26 | # out the CFFI version here and reject CFFI if it is too old. |
|
27 | 27 | cffi_version = LooseVersion(cffi.__version__) |
|
28 | 28 | if cffi_version < LooseVersion(MINIMUM_CFFI_VERSION): |
|
29 | print('CFFI 1.11 or newer required (%s found); ' | |
|
30 | 'not building CFFI backend' % cffi_version, | |
|
31 | file=sys.stderr) | |
|
29 | print( | |
|
30 | "CFFI 1.11 or newer required (%s found); " | |
|
31 | "not building CFFI backend" % cffi_version, | |
|
32 | file=sys.stderr, | |
|
33 | ) | |
|
32 | 34 | cffi = None |
|
33 | 35 | |
|
34 | 36 | except ImportError: |
|
35 | 37 | cffi = None |
|
36 | 38 | |
|
37 | 39 | import setup_zstd |
|
38 | 40 | |
|
39 | 41 | SUPPORT_LEGACY = False |
|
40 | 42 | SYSTEM_ZSTD = False |
|
41 | 43 | WARNINGS_AS_ERRORS = False |
|
42 | 44 | |
|
43 |
if os.environ.get( |
|
|
45 | if os.environ.get("ZSTD_WARNINGS_AS_ERRORS", ""): | |
|
44 | 46 | WARNINGS_AS_ERRORS = True |
|
45 | 47 | |
|
46 |
if |
|
|
48 | if "--legacy" in sys.argv: | |
|
47 | 49 | SUPPORT_LEGACY = True |
|
48 |
sys.argv.remove( |
|
|
50 | sys.argv.remove("--legacy") | |
|
49 | 51 | |
|
50 |
if |
|
|
52 | if "--system-zstd" in sys.argv: | |
|
51 | 53 | SYSTEM_ZSTD = True |
|
52 |
sys.argv.remove( |
|
|
54 | sys.argv.remove("--system-zstd") | |
|
53 | 55 | |
|
54 |
if |
|
|
56 | if "--warnings-as-errors" in sys.argv: | |
|
55 | 57 | WARNINGS_AS_ERRORS = True |
|
56 |
sys.argv.remove( |
|
|
58 | sys.argv.remove("--warning-as-errors") | |
|
57 | 59 | |
|
58 | 60 | # Code for obtaining the Extension instance is in its own module to |
|
59 | 61 | # facilitate reuse in other projects. |
|
60 | 62 | extensions = [ |
|
61 |
setup_zstd.get_c_extension( |
|
|
62 | support_legacy=SUPPORT_LEGACY, | |
|
63 | system_zstd=SYSTEM_ZSTD, | |
|
64 | warnings_as_errors=WARNINGS_AS_ERRORS), | |
|
63 | setup_zstd.get_c_extension( | |
|
64 | name="zstd", | |
|
65 | support_legacy=SUPPORT_LEGACY, | |
|
66 | system_zstd=SYSTEM_ZSTD, | |
|
67 | warnings_as_errors=WARNINGS_AS_ERRORS, | |
|
68 | ), | |
|
65 | 69 | ] |
|
66 | 70 | |
|
67 | 71 | install_requires = [] |
|
68 | 72 | |
|
69 | 73 | if cffi: |
|
70 | 74 | import make_cffi |
|
75 | ||
|
71 | 76 | extensions.append(make_cffi.ffi.distutils_extension()) |
|
72 |
install_requires.append( |
|
|
77 | install_requires.append("cffi>=%s" % MINIMUM_CFFI_VERSION) | |
|
73 | 78 | |
|
74 | 79 | version = None |
|
75 | 80 | |
|
76 |
with open( |
|
|
81 | with open("c-ext/python-zstandard.h", "r") as fh: | |
|
77 | 82 | for line in fh: |
|
78 |
if not line.startswith( |
|
|
83 | if not line.startswith("#define PYTHON_ZSTANDARD_VERSION"): | |
|
79 | 84 | continue |
|
80 | 85 | |
|
81 | 86 | version = line.split()[2][1:-1] |
|
82 | 87 | break |
|
83 | 88 | |
|
84 | 89 | if not version: |
|
85 |
raise Exception( |
|
|
86 | 'this should never happen') | |
|
90 | raise Exception("could not resolve package version; " "this should never happen") | |
|
87 | 91 | |
|
88 | 92 | setup( |
|
89 |
name= |
|
|
93 | name="zstandard", | |
|
90 | 94 | version=version, |
|
91 |
description= |
|
|
92 |
long_description=open( |
|
|
93 |
url= |
|
|
94 |
author= |
|
|
95 |
author_email= |
|
|
96 |
license= |
|
|
95 | description="Zstandard bindings for Python", | |
|
96 | long_description=open("README.rst", "r").read(), | |
|
97 | url="https://github.com/indygreg/python-zstandard", | |
|
98 | author="Gregory Szorc", | |
|
99 | author_email="gregory.szorc@gmail.com", | |
|
100 | license="BSD", | |
|
97 | 101 | classifiers=[ |
|
98 |
|
|
|
99 |
|
|
|
100 |
|
|
|
101 |
|
|
|
102 |
|
|
|
103 |
|
|
|
104 |
|
|
|
105 |
|
|
|
102 | "Development Status :: 4 - Beta", | |
|
103 | "Intended Audience :: Developers", | |
|
104 | "License :: OSI Approved :: BSD License", | |
|
105 | "Programming Language :: C", | |
|
106 | "Programming Language :: Python :: 2.7", | |
|
107 | "Programming Language :: Python :: 3.5", | |
|
108 | "Programming Language :: Python :: 3.6", | |
|
109 | "Programming Language :: Python :: 3.7", | |
|
110 | "Programming Language :: Python :: 3.8", | |
|
106 | 111 | ], |
|
107 |
keywords= |
|
|
108 |
packages=[ |
|
|
112 | keywords="zstandard zstd compression", | |
|
113 | packages=["zstandard"], | |
|
109 | 114 | ext_modules=extensions, |
|
110 |
test_suite= |
|
|
115 | test_suite="tests", | |
|
111 | 116 | install_requires=install_requires, |
|
117 | tests_require=["hypothesis"], | |
|
112 | 118 | ) |
@@ -1,192 +1,206 b'' | |||
|
1 | 1 | # Copyright (c) 2016-present, Gregory Szorc |
|
2 | 2 | # All rights reserved. |
|
3 | 3 | # |
|
4 | 4 | # This software may be modified and distributed under the terms |
|
5 | 5 | # of the BSD license. See the LICENSE file for details. |
|
6 | 6 | |
|
7 | 7 | import distutils.ccompiler |
|
8 | 8 | import os |
|
9 | 9 | |
|
10 | 10 | from distutils.extension import Extension |
|
11 | 11 | |
|
12 | 12 | |
|
13 | zstd_sources = ['zstd/%s' % p for p in ( | |
|
14 | 'common/debug.c', | |
|
15 | 'common/entropy_common.c', | |
|
16 | 'common/error_private.c', | |
|
17 | 'common/fse_decompress.c', | |
|
18 | 'common/pool.c', | |
|
19 | 'common/threading.c', | |
|
20 |
|
|
|
21 |
|
|
|
22 | 'compress/fse_compress.c', | |
|
23 | 'compress/hist.c', | |
|
24 |
|
|
|
25 | 'compress/zstd_compress_literals.c', | |
|
26 |
|
|
|
27 |
|
|
|
28 |
|
|
|
29 |
|
|
|
30 |
|
|
|
31 |
|
|
|
32 |
|
|
|
33 |
|
|
|
34 | 'decompress/huf_decompress.c', | |
|
35 |
|
|
|
36 |
|
|
|
37 |
|
|
|
38 | 'dictBuilder/cover.c', | |
|
39 | 'dictBuilder/divsufsort.c', | |
|
40 |
|
|
|
41 |
|
|
|
42 | )] | |
|
13 | zstd_sources = [ | |
|
14 | "zstd/%s" % p | |
|
15 | for p in ( | |
|
16 | "common/debug.c", | |
|
17 | "common/entropy_common.c", | |
|
18 | "common/error_private.c", | |
|
19 | "common/fse_decompress.c", | |
|
20 | "common/pool.c", | |
|
21 | "common/threading.c", | |
|
22 | "common/xxhash.c", | |
|
23 | "common/zstd_common.c", | |
|
24 | "compress/fse_compress.c", | |
|
25 | "compress/hist.c", | |
|
26 | "compress/huf_compress.c", | |
|
27 | "compress/zstd_compress_literals.c", | |
|
28 | "compress/zstd_compress_sequences.c", | |
|
29 | "compress/zstd_compress.c", | |
|
30 | "compress/zstd_double_fast.c", | |
|
31 | "compress/zstd_fast.c", | |
|
32 | "compress/zstd_lazy.c", | |
|
33 | "compress/zstd_ldm.c", | |
|
34 | "compress/zstd_opt.c", | |
|
35 | "compress/zstdmt_compress.c", | |
|
36 | "decompress/huf_decompress.c", | |
|
37 | "decompress/zstd_ddict.c", | |
|
38 | "decompress/zstd_decompress.c", | |
|
39 | "decompress/zstd_decompress_block.c", | |
|
40 | "dictBuilder/cover.c", | |
|
41 | "dictBuilder/divsufsort.c", | |
|
42 | "dictBuilder/fastcover.c", | |
|
43 | "dictBuilder/zdict.c", | |
|
44 | ) | |
|
45 | ] | |
|
43 | 46 | |
|
44 |
zstd_sources_legacy = [ |
|
|
45 | 'deprecated/zbuff_common.c', | |
|
46 | 'deprecated/zbuff_compress.c', | |
|
47 |
|
|
|
48 | 'legacy/zstd_v01.c', | |
|
49 | 'legacy/zstd_v02.c', | |
|
50 |
|
|
|
51 |
|
|
|
52 |
|
|
|
53 |
|
|
|
54 |
|
|
|
55 | )] | |
|
47 | zstd_sources_legacy = [ | |
|
48 | "zstd/%s" % p | |
|
49 | for p in ( | |
|
50 | "deprecated/zbuff_common.c", | |
|
51 | "deprecated/zbuff_compress.c", | |
|
52 | "deprecated/zbuff_decompress.c", | |
|
53 | "legacy/zstd_v01.c", | |
|
54 | "legacy/zstd_v02.c", | |
|
55 | "legacy/zstd_v03.c", | |
|
56 | "legacy/zstd_v04.c", | |
|
57 | "legacy/zstd_v05.c", | |
|
58 | "legacy/zstd_v06.c", | |
|
59 | "legacy/zstd_v07.c", | |
|
60 | ) | |
|
61 | ] | |
|
56 | 62 | |
|
57 | 63 | zstd_includes = [ |
|
58 |
|
|
|
59 |
|
|
|
60 |
|
|
|
61 |
|
|
|
62 |
|
|
|
64 | "zstd", | |
|
65 | "zstd/common", | |
|
66 | "zstd/compress", | |
|
67 | "zstd/decompress", | |
|
68 | "zstd/dictBuilder", | |
|
63 | 69 | ] |
|
64 | 70 | |
|
65 | 71 | zstd_includes_legacy = [ |
|
66 |
|
|
|
67 |
|
|
|
72 | "zstd/deprecated", | |
|
73 | "zstd/legacy", | |
|
68 | 74 | ] |
|
69 | 75 | |
|
70 | 76 | ext_includes = [ |
|
71 |
|
|
|
72 |
|
|
|
77 | "c-ext", | |
|
78 | "zstd/common", | |
|
73 | 79 | ] |
|
74 | 80 | |
|
75 | 81 | ext_sources = [ |
|
76 |
|
|
|
77 |
|
|
|
78 | 'zstd.c', | |
|
79 | 'c-ext/bufferutil.c', | |
|
80 | 'c-ext/compressiondict.c', | |
|
81 | 'c-ext/compressobj.c', | |
|
82 |
|
|
|
83 |
|
|
|
84 |
|
|
|
85 |
|
|
|
86 |
|
|
|
87 |
|
|
|
88 |
|
|
|
89 |
|
|
|
90 |
|
|
|
91 |
|
|
|
92 |
|
|
|
93 |
|
|
|
94 | 'c-ext/frameparams.c', | |
|
82 | "zstd/common/error_private.c", | |
|
83 | "zstd/common/pool.c", | |
|
84 | "zstd/common/threading.c", | |
|
85 | "zstd/common/zstd_common.c", | |
|
86 | "zstd.c", | |
|
87 | "c-ext/bufferutil.c", | |
|
88 | "c-ext/compressiondict.c", | |
|
89 | "c-ext/compressobj.c", | |
|
90 | "c-ext/compressor.c", | |
|
91 | "c-ext/compressoriterator.c", | |
|
92 | "c-ext/compressionchunker.c", | |
|
93 | "c-ext/compressionparams.c", | |
|
94 | "c-ext/compressionreader.c", | |
|
95 | "c-ext/compressionwriter.c", | |
|
96 | "c-ext/constants.c", | |
|
97 | "c-ext/decompressobj.c", | |
|
98 | "c-ext/decompressor.c", | |
|
99 | "c-ext/decompressoriterator.c", | |
|
100 | "c-ext/decompressionreader.c", | |
|
101 | "c-ext/decompressionwriter.c", | |
|
102 | "c-ext/frameparams.c", | |
|
95 | 103 | ] |
|
96 | 104 | |
|
97 | 105 | zstd_depends = [ |
|
98 |
|
|
|
106 | "c-ext/python-zstandard.h", | |
|
99 | 107 | ] |
|
100 | 108 | |
|
101 | 109 | |
|
102 | def get_c_extension(support_legacy=False, system_zstd=False, name='zstd', | |
|
103 | warnings_as_errors=False, root=None): | |
|
110 | def get_c_extension( | |
|
111 | support_legacy=False, | |
|
112 | system_zstd=False, | |
|
113 | name="zstd", | |
|
114 | warnings_as_errors=False, | |
|
115 | root=None, | |
|
116 | ): | |
|
104 | 117 | """Obtain a distutils.extension.Extension for the C extension. |
|
105 | 118 | |
|
106 | 119 | ``support_legacy`` controls whether to compile in legacy zstd format support. |
|
107 | 120 | |
|
108 | 121 | ``system_zstd`` controls whether to compile against the system zstd library. |
|
109 | 122 | For this to work, the system zstd library and headers must match what |
|
110 | 123 | python-zstandard is coded against exactly. |
|
111 | 124 | |
|
112 | 125 | ``name`` is the module name of the C extension to produce. |
|
113 | 126 | |
|
114 | 127 | ``warnings_as_errors`` controls whether compiler warnings are turned into |
|
115 | 128 | compiler errors. |
|
116 | 129 | |
|
117 | 130 | ``root`` defines a root path that source should be computed as relative |
|
118 | 131 | to. This should be the directory with the main ``setup.py`` that is |
|
119 | 132 | being invoked. If not defined, paths will be relative to this file. |
|
120 | 133 | """ |
|
121 | 134 | actual_root = os.path.abspath(os.path.dirname(__file__)) |
|
122 | 135 | root = root or actual_root |
|
123 | 136 | |
|
124 | 137 | sources = set([os.path.join(actual_root, p) for p in ext_sources]) |
|
125 | 138 | if not system_zstd: |
|
126 | 139 | sources.update([os.path.join(actual_root, p) for p in zstd_sources]) |
|
127 | 140 | if support_legacy: |
|
128 | sources.update([os.path.join(actual_root, p) | |
|
129 | for p in zstd_sources_legacy]) | |
|
141 | sources.update([os.path.join(actual_root, p) for p in zstd_sources_legacy]) | |
|
130 | 142 | sources = list(sources) |
|
131 | 143 | |
|
132 | 144 | include_dirs = set([os.path.join(actual_root, d) for d in ext_includes]) |
|
133 | 145 | if not system_zstd: |
|
134 | include_dirs.update([os.path.join(actual_root, d) | |
|
135 | for d in zstd_includes]) | |
|
146 | include_dirs.update([os.path.join(actual_root, d) for d in zstd_includes]) | |
|
136 | 147 | if support_legacy: |
|
137 |
include_dirs.update( |
|
|
138 |
|
|
|
148 | include_dirs.update( | |
|
149 | [os.path.join(actual_root, d) for d in zstd_includes_legacy] | |
|
150 | ) | |
|
139 | 151 | include_dirs = list(include_dirs) |
|
140 | 152 | |
|
141 | 153 | depends = [os.path.join(actual_root, p) for p in zstd_depends] |
|
142 | 154 | |
|
143 | 155 | compiler = distutils.ccompiler.new_compiler() |
|
144 | 156 | |
|
145 | 157 | # Needed for MSVC. |
|
146 |
if hasattr(compiler, |
|
|
158 | if hasattr(compiler, "initialize"): | |
|
147 | 159 | compiler.initialize() |
|
148 | 160 | |
|
149 |
if compiler.compiler_type == |
|
|
150 |
compiler_type = |
|
|
151 |
elif compiler.compiler_type == |
|
|
152 |
compiler_type = |
|
|
153 |
elif compiler.compiler_type == |
|
|
154 |
compiler_type = |
|
|
161 | if compiler.compiler_type == "unix": | |
|
162 | compiler_type = "unix" | |
|
163 | elif compiler.compiler_type == "msvc": | |
|
164 | compiler_type = "msvc" | |
|
165 | elif compiler.compiler_type == "mingw32": | |
|
166 | compiler_type = "mingw32" | |
|
155 | 167 | else: |
|
156 |
raise Exception( |
|
|
157 | compiler.compiler_type) | |
|
168 | raise Exception("unhandled compiler type: %s" % compiler.compiler_type) | |
|
158 | 169 | |
|
159 |
extra_args = [ |
|
|
170 | extra_args = ["-DZSTD_MULTITHREAD"] | |
|
160 | 171 | |
|
161 | 172 | if not system_zstd: |
|
162 |
extra_args.append( |
|
|
163 |
extra_args.append( |
|
|
164 |
extra_args.append( |
|
|
173 | extra_args.append("-DZSTDLIB_VISIBILITY=") | |
|
174 | extra_args.append("-DZDICTLIB_VISIBILITY=") | |
|
175 | extra_args.append("-DZSTDERRORLIB_VISIBILITY=") | |
|
165 | 176 | |
|
166 |
if compiler_type == |
|
|
167 |
extra_args.append( |
|
|
177 | if compiler_type == "unix": | |
|
178 | extra_args.append("-fvisibility=hidden") | |
|
168 | 179 | |
|
169 | 180 | if not system_zstd and support_legacy: |
|
170 |
extra_args.append( |
|
|
181 | extra_args.append("-DZSTD_LEGACY_SUPPORT=1") | |
|
171 | 182 | |
|
172 | 183 | if warnings_as_errors: |
|
173 |
if compiler_type in ( |
|
|
174 |
extra_args.append( |
|
|
175 |
elif compiler_type == |
|
|
176 |
extra_args.append( |
|
|
184 | if compiler_type in ("unix", "mingw32"): | |
|
185 | extra_args.append("-Werror") | |
|
186 | elif compiler_type == "msvc": | |
|
187 | extra_args.append("/WX") | |
|
177 | 188 | else: |
|
178 | 189 | assert False |
|
179 | 190 | |
|
180 |
libraries = [ |
|
|
191 | libraries = ["zstd"] if system_zstd else [] | |
|
181 | 192 | |
|
182 | 193 | # Python 3.7 doesn't like absolute paths. So normalize to relative. |
|
183 | 194 | sources = [os.path.relpath(p, root) for p in sources] |
|
184 | 195 | include_dirs = [os.path.relpath(p, root) for p in include_dirs] |
|
185 | 196 | depends = [os.path.relpath(p, root) for p in depends] |
|
186 | 197 | |
|
187 | 198 | # TODO compile with optimizations. |
|
188 |
return Extension( |
|
|
189 | include_dirs=include_dirs, | |
|
190 | depends=depends, | |
|
191 | extra_compile_args=extra_args, | |
|
192 | libraries=libraries) | |
|
199 | return Extension( | |
|
200 | name, | |
|
201 | sources, | |
|
202 | include_dirs=include_dirs, | |
|
203 | depends=depends, | |
|
204 | extra_compile_args=extra_args, | |
|
205 | libraries=libraries, | |
|
206 | ) |
@@ -1,185 +1,197 b'' | |||
|
1 | 1 | import imp |
|
2 | 2 | import inspect |
|
3 | 3 | import io |
|
4 | 4 | import os |
|
5 | 5 | import types |
|
6 | import unittest | |
|
6 | 7 | |
|
7 | 8 | try: |
|
8 | 9 | import hypothesis |
|
9 | 10 | except ImportError: |
|
10 | 11 | hypothesis = None |
|
11 | 12 | |
|
12 | 13 | |
|
14 | class TestCase(unittest.TestCase): | |
|
15 | if not getattr(unittest.TestCase, "assertRaisesRegex", False): | |
|
16 | assertRaisesRegex = unittest.TestCase.assertRaisesRegexp | |
|
17 | ||
|
18 | ||
|
13 | 19 | def make_cffi(cls): |
|
14 | 20 | """Decorator to add CFFI versions of each test method.""" |
|
15 | 21 | |
|
16 | 22 | # The module containing this class definition should |
|
17 | 23 | # `import zstandard as zstd`. Otherwise things may blow up. |
|
18 | 24 | mod = inspect.getmodule(cls) |
|
19 |
if not hasattr(mod, |
|
|
25 | if not hasattr(mod, "zstd"): | |
|
20 | 26 | raise Exception('test module does not contain "zstd" symbol') |
|
21 | 27 | |
|
22 |
if not hasattr(mod.zstd, |
|
|
23 | raise Exception('zstd symbol does not have "backend" attribute; did ' | |
|
24 | 'you `import zstandard as zstd`?') | |
|
28 | if not hasattr(mod.zstd, "backend"): | |
|
29 | raise Exception( | |
|
30 | 'zstd symbol does not have "backend" attribute; did ' | |
|
31 | "you `import zstandard as zstd`?" | |
|
32 | ) | |
|
25 | 33 | |
|
26 | 34 | # If `import zstandard` already chose the cffi backend, there is nothing |
|
27 | 35 | # for us to do: we only add the cffi variation if the default backend |
|
28 | 36 | # is the C extension. |
|
29 |
if mod.zstd.backend == |
|
|
37 | if mod.zstd.backend == "cffi": | |
|
30 | 38 | return cls |
|
31 | 39 | |
|
32 | 40 | old_env = dict(os.environ) |
|
33 |
os.environ[ |
|
|
41 | os.environ["PYTHON_ZSTANDARD_IMPORT_POLICY"] = "cffi" | |
|
34 | 42 | try: |
|
35 | 43 | try: |
|
36 |
mod_info = imp.find_module( |
|
|
37 |
mod = imp.load_module( |
|
|
44 | mod_info = imp.find_module("zstandard") | |
|
45 | mod = imp.load_module("zstandard_cffi", *mod_info) | |
|
38 | 46 | except ImportError: |
|
39 | 47 | return cls |
|
40 | 48 | finally: |
|
41 | 49 | os.environ.clear() |
|
42 | 50 | os.environ.update(old_env) |
|
43 | 51 | |
|
44 |
if mod.backend != |
|
|
45 |
raise Exception( |
|
|
52 | if mod.backend != "cffi": | |
|
53 | raise Exception("got the zstandard %s backend instead of cffi" % mod.backend) | |
|
46 | 54 | |
|
47 | 55 | # If CFFI version is available, dynamically construct test methods |
|
48 | 56 | # that use it. |
|
49 | 57 | |
|
50 | 58 | for attr in dir(cls): |
|
51 | 59 | fn = getattr(cls, attr) |
|
52 | 60 | if not inspect.ismethod(fn) and not inspect.isfunction(fn): |
|
53 | 61 | continue |
|
54 | 62 | |
|
55 |
if not fn.__name__.startswith( |
|
|
63 | if not fn.__name__.startswith("test_"): | |
|
56 | 64 | continue |
|
57 | 65 | |
|
58 |
name = |
|
|
66 | name = "%s_cffi" % fn.__name__ | |
|
59 | 67 | |
|
60 | 68 | # Replace the "zstd" symbol with the CFFI module instance. Then copy |
|
61 | 69 | # the function object and install it in a new attribute. |
|
62 | 70 | if isinstance(fn, types.FunctionType): |
|
63 | 71 | globs = dict(fn.__globals__) |
|
64 |
globs[ |
|
|
65 |
new_fn = types.FunctionType( |
|
|
66 |
|
|
|
72 | globs["zstd"] = mod | |
|
73 | new_fn = types.FunctionType( | |
|
74 | fn.__code__, globs, name, fn.__defaults__, fn.__closure__ | |
|
75 | ) | |
|
67 | 76 | new_method = new_fn |
|
68 | 77 | else: |
|
69 | 78 | globs = dict(fn.__func__.func_globals) |
|
70 |
globs[ |
|
|
71 |
new_fn = types.FunctionType( |
|
|
72 |
|
|
|
73 | fn.__func__.func_closure) | |
|
74 | new_method = types.UnboundMethodType(new_fn, fn.im_self, | |
|
75 | fn.im_class) | |
|
79 | globs["zstd"] = mod | |
|
80 | new_fn = types.FunctionType( | |
|
81 | fn.__func__.func_code, | |
|
82 | globs, | |
|
83 | name, | |
|
84 | fn.__func__.func_defaults, | |
|
85 | fn.__func__.func_closure, | |
|
86 | ) | |
|
87 | new_method = types.UnboundMethodType(new_fn, fn.im_self, fn.im_class) | |
|
76 | 88 | |
|
77 | 89 | setattr(cls, name, new_method) |
|
78 | 90 | |
|
79 | 91 | return cls |
|
80 | 92 | |
|
81 | 93 | |
|
82 | 94 | class NonClosingBytesIO(io.BytesIO): |
|
83 | 95 | """BytesIO that saves the underlying buffer on close(). |
|
84 | 96 | |
|
85 | 97 | This allows us to access written data after close(). |
|
86 | 98 | """ |
|
99 | ||
|
87 | 100 | def __init__(self, *args, **kwargs): |
|
88 | 101 | super(NonClosingBytesIO, self).__init__(*args, **kwargs) |
|
89 | 102 | self._saved_buffer = None |
|
90 | 103 | |
|
91 | 104 | def close(self): |
|
92 | 105 | self._saved_buffer = self.getvalue() |
|
93 | 106 | return super(NonClosingBytesIO, self).close() |
|
94 | 107 | |
|
95 | 108 | def getvalue(self): |
|
96 | 109 | if self.closed: |
|
97 | 110 | return self._saved_buffer |
|
98 | 111 | else: |
|
99 | 112 | return super(NonClosingBytesIO, self).getvalue() |
|
100 | 113 | |
|
101 | 114 | |
|
102 | 115 | class OpCountingBytesIO(NonClosingBytesIO): |
|
103 | 116 | def __init__(self, *args, **kwargs): |
|
104 | 117 | self._flush_count = 0 |
|
105 | 118 | self._read_count = 0 |
|
106 | 119 | self._write_count = 0 |
|
107 | 120 | return super(OpCountingBytesIO, self).__init__(*args, **kwargs) |
|
108 | 121 | |
|
109 | 122 | def flush(self): |
|
110 | 123 | self._flush_count += 1 |
|
111 | 124 | return super(OpCountingBytesIO, self).flush() |
|
112 | 125 | |
|
113 | 126 | def read(self, *args): |
|
114 | 127 | self._read_count += 1 |
|
115 | 128 | return super(OpCountingBytesIO, self).read(*args) |
|
116 | 129 | |
|
117 | 130 | def write(self, data): |
|
118 | 131 | self._write_count += 1 |
|
119 | 132 | return super(OpCountingBytesIO, self).write(data) |
|
120 | 133 | |
|
121 | 134 | |
|
122 | 135 | _source_files = [] |
|
123 | 136 | |
|
124 | 137 | |
|
125 | 138 | def random_input_data(): |
|
126 | 139 | """Obtain the raw content of source files. |
|
127 | 140 | |
|
128 | 141 | This is used for generating "random" data to feed into fuzzing, since it is |
|
129 | 142 | faster than random content generation. |
|
130 | 143 | """ |
|
131 | 144 | if _source_files: |
|
132 | 145 | return _source_files |
|
133 | 146 | |
|
134 | 147 | for root, dirs, files in os.walk(os.path.dirname(__file__)): |
|
135 | 148 | dirs[:] = list(sorted(dirs)) |
|
136 | 149 | for f in sorted(files): |
|
137 | 150 | try: |
|
138 |
with open(os.path.join(root, f), |
|
|
151 | with open(os.path.join(root, f), "rb") as fh: | |
|
139 | 152 | data = fh.read() |
|
140 | 153 | if data: |
|
141 | 154 | _source_files.append(data) |
|
142 | 155 | except OSError: |
|
143 | 156 | pass |
|
144 | 157 | |
|
145 | 158 | # Also add some actual random data. |
|
146 | 159 | _source_files.append(os.urandom(100)) |
|
147 | 160 | _source_files.append(os.urandom(1000)) |
|
148 | 161 | _source_files.append(os.urandom(10000)) |
|
149 | 162 | _source_files.append(os.urandom(100000)) |
|
150 | 163 | _source_files.append(os.urandom(1000000)) |
|
151 | 164 | |
|
152 | 165 | return _source_files |
|
153 | 166 | |
|
154 | 167 | |
|
155 | 168 | def generate_samples(): |
|
156 | 169 | inputs = [ |
|
157 |
b |
|
|
158 |
b |
|
|
159 |
b |
|
|
160 |
b |
|
|
161 |
b |
|
|
170 | b"foo", | |
|
171 | b"bar", | |
|
172 | b"abcdef", | |
|
173 | b"sometext", | |
|
174 | b"baz", | |
|
162 | 175 | ] |
|
163 | 176 | |
|
164 | 177 | samples = [] |
|
165 | 178 | |
|
166 | 179 | for i in range(128): |
|
167 | 180 | samples.append(inputs[i % 5]) |
|
168 | 181 | samples.append(inputs[i % 5] * (i + 3)) |
|
169 | 182 | samples.append(inputs[-(i % 5)] * (i + 2)) |
|
170 | 183 | |
|
171 | 184 | return samples |
|
172 | 185 | |
|
173 | 186 | |
|
174 | 187 | if hypothesis: |
|
175 | 188 | default_settings = hypothesis.settings(deadline=10000) |
|
176 |
hypothesis.settings.register_profile( |
|
|
189 | hypothesis.settings.register_profile("default", default_settings) | |
|
177 | 190 | |
|
178 | 191 | ci_settings = hypothesis.settings(deadline=20000, max_examples=1000) |
|
179 |
hypothesis.settings.register_profile( |
|
|
192 | hypothesis.settings.register_profile("ci", ci_settings) | |
|
180 | 193 | |
|
181 | 194 | expensive_settings = hypothesis.settings(deadline=None, max_examples=10000) |
|
182 |
hypothesis.settings.register_profile( |
|
|
195 | hypothesis.settings.register_profile("expensive", expensive_settings) | |
|
183 | 196 | |
|
184 | hypothesis.settings.load_profile( | |
|
185 | os.environ.get('HYPOTHESIS_PROFILE', 'default')) | |
|
197 | hypothesis.settings.load_profile(os.environ.get("HYPOTHESIS_PROFILE", "default")) |
@@ -1,135 +1,146 b'' | |||
|
1 | 1 | import struct |
|
2 | 2 | import unittest |
|
3 | 3 | |
|
4 | 4 | import zstandard as zstd |
|
5 | 5 | |
|
6 | ss = struct.Struct('=QQ') | |
|
6 | from .common import TestCase | |
|
7 | ||
|
8 | ss = struct.Struct("=QQ") | |
|
7 | 9 | |
|
8 | 10 | |
|
9 |
class TestBufferWithSegments( |
|
|
11 | class TestBufferWithSegments(TestCase): | |
|
10 | 12 | def test_arguments(self): |
|
11 |
if not hasattr(zstd, |
|
|
12 |
self.skipTest( |
|
|
13 | if not hasattr(zstd, "BufferWithSegments"): | |
|
14 | self.skipTest("BufferWithSegments not available") | |
|
13 | 15 | |
|
14 | 16 | with self.assertRaises(TypeError): |
|
15 | 17 | zstd.BufferWithSegments() |
|
16 | 18 | |
|
17 | 19 | with self.assertRaises(TypeError): |
|
18 |
zstd.BufferWithSegments(b |
|
|
20 | zstd.BufferWithSegments(b"foo") | |
|
19 | 21 | |
|
20 | 22 | # Segments data should be a multiple of 16. |
|
21 | with self.assertRaisesRegexp(ValueError, 'segments array size is not a multiple of 16'): | |
|
22 | zstd.BufferWithSegments(b'foo', b'\x00\x00') | |
|
23 | with self.assertRaisesRegex( | |
|
24 | ValueError, "segments array size is not a multiple of 16" | |
|
25 | ): | |
|
26 | zstd.BufferWithSegments(b"foo", b"\x00\x00") | |
|
23 | 27 | |
|
24 | 28 | def test_invalid_offset(self): |
|
25 |
if not hasattr(zstd, |
|
|
26 |
self.skipTest( |
|
|
29 | if not hasattr(zstd, "BufferWithSegments"): | |
|
30 | self.skipTest("BufferWithSegments not available") | |
|
27 | 31 | |
|
28 | with self.assertRaisesRegexp(ValueError, 'offset within segments array references memory'): | |
|
29 | zstd.BufferWithSegments(b'foo', ss.pack(0, 4)) | |
|
32 | with self.assertRaisesRegex( | |
|
33 | ValueError, "offset within segments array references memory" | |
|
34 | ): | |
|
35 | zstd.BufferWithSegments(b"foo", ss.pack(0, 4)) | |
|
30 | 36 | |
|
31 | 37 | def test_invalid_getitem(self): |
|
32 |
if not hasattr(zstd, |
|
|
33 |
self.skipTest( |
|
|
38 | if not hasattr(zstd, "BufferWithSegments"): | |
|
39 | self.skipTest("BufferWithSegments not available") | |
|
34 | 40 | |
|
35 |
b = zstd.BufferWithSegments(b |
|
|
41 | b = zstd.BufferWithSegments(b"foo", ss.pack(0, 3)) | |
|
36 | 42 | |
|
37 |
with self.assertRaisesRegex |
|
|
43 | with self.assertRaisesRegex(IndexError, "offset must be non-negative"): | |
|
38 | 44 | test = b[-10] |
|
39 | 45 | |
|
40 |
with self.assertRaisesRegex |
|
|
46 | with self.assertRaisesRegex(IndexError, "offset must be less than 1"): | |
|
41 | 47 | test = b[1] |
|
42 | 48 | |
|
43 |
with self.assertRaisesRegex |
|
|
49 | with self.assertRaisesRegex(IndexError, "offset must be less than 1"): | |
|
44 | 50 | test = b[2] |
|
45 | 51 | |
|
46 | 52 | def test_single(self): |
|
47 |
if not hasattr(zstd, |
|
|
48 |
self.skipTest( |
|
|
53 | if not hasattr(zstd, "BufferWithSegments"): | |
|
54 | self.skipTest("BufferWithSegments not available") | |
|
49 | 55 | |
|
50 |
b = zstd.BufferWithSegments(b |
|
|
56 | b = zstd.BufferWithSegments(b"foo", ss.pack(0, 3)) | |
|
51 | 57 | self.assertEqual(len(b), 1) |
|
52 | 58 | self.assertEqual(b.size, 3) |
|
53 |
self.assertEqual(b.tobytes(), b |
|
|
59 | self.assertEqual(b.tobytes(), b"foo") | |
|
54 | 60 | |
|
55 | 61 | self.assertEqual(len(b[0]), 3) |
|
56 | 62 | self.assertEqual(b[0].offset, 0) |
|
57 |
self.assertEqual(b[0].tobytes(), b |
|
|
63 | self.assertEqual(b[0].tobytes(), b"foo") | |
|
58 | 64 | |
|
59 | 65 | def test_multiple(self): |
|
60 |
if not hasattr(zstd, |
|
|
61 |
self.skipTest( |
|
|
66 | if not hasattr(zstd, "BufferWithSegments"): | |
|
67 | self.skipTest("BufferWithSegments not available") | |
|
62 | 68 | |
|
63 |
b = zstd.BufferWithSegments( |
|
|
64 | ss.pack(3, 4), | |
|
65 | ss.pack(7, 5)])) | |
|
69 | b = zstd.BufferWithSegments( | |
|
70 | b"foofooxfooxy", b"".join([ss.pack(0, 3), ss.pack(3, 4), ss.pack(7, 5)]) | |
|
71 | ) | |
|
66 | 72 | self.assertEqual(len(b), 3) |
|
67 | 73 | self.assertEqual(b.size, 12) |
|
68 |
self.assertEqual(b.tobytes(), b |
|
|
74 | self.assertEqual(b.tobytes(), b"foofooxfooxy") | |
|
69 | 75 | |
|
70 |
self.assertEqual(b[0].tobytes(), b |
|
|
71 |
self.assertEqual(b[1].tobytes(), b |
|
|
72 |
self.assertEqual(b[2].tobytes(), b |
|
|
76 | self.assertEqual(b[0].tobytes(), b"foo") | |
|
77 | self.assertEqual(b[1].tobytes(), b"foox") | |
|
78 | self.assertEqual(b[2].tobytes(), b"fooxy") | |
|
73 | 79 | |
|
74 | 80 | |
|
75 |
class TestBufferWithSegmentsCollection( |
|
|
81 | class TestBufferWithSegmentsCollection(TestCase): | |
|
76 | 82 | def test_empty_constructor(self): |
|
77 |
if not hasattr(zstd, |
|
|
78 |
self.skipTest( |
|
|
83 | if not hasattr(zstd, "BufferWithSegmentsCollection"): | |
|
84 | self.skipTest("BufferWithSegmentsCollection not available") | |
|
79 | 85 | |
|
80 |
with self.assertRaisesRegex |
|
|
86 | with self.assertRaisesRegex(ValueError, "must pass at least 1 argument"): | |
|
81 | 87 | zstd.BufferWithSegmentsCollection() |
|
82 | 88 | |
|
83 | 89 | def test_argument_validation(self): |
|
84 |
if not hasattr(zstd, |
|
|
85 |
self.skipTest( |
|
|
90 | if not hasattr(zstd, "BufferWithSegmentsCollection"): | |
|
91 | self.skipTest("BufferWithSegmentsCollection not available") | |
|
86 | 92 | |
|
87 |
with self.assertRaisesRegex |
|
|
93 | with self.assertRaisesRegex(TypeError, "arguments must be BufferWithSegments"): | |
|
88 | 94 | zstd.BufferWithSegmentsCollection(None) |
|
89 | 95 | |
|
90 |
with self.assertRaisesRegex |
|
|
91 |
zstd.BufferWithSegmentsCollection( |
|
|
92 | None) | |
|
96 | with self.assertRaisesRegex(TypeError, "arguments must be BufferWithSegments"): | |
|
97 | zstd.BufferWithSegmentsCollection( | |
|
98 | zstd.BufferWithSegments(b"foo", ss.pack(0, 3)), None | |
|
99 | ) | |
|
93 | 100 | |
|
94 | with self.assertRaisesRegexp(ValueError, 'ZstdBufferWithSegments cannot be empty'): | |
|
95 | zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'', b'')) | |
|
101 | with self.assertRaisesRegex( | |
|
102 | ValueError, "ZstdBufferWithSegments cannot be empty" | |
|
103 | ): | |
|
104 | zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b"", b"")) | |
|
96 | 105 | |
|
97 | 106 | def test_length(self): |
|
98 |
if not hasattr(zstd, |
|
|
99 |
self.skipTest( |
|
|
107 | if not hasattr(zstd, "BufferWithSegmentsCollection"): | |
|
108 | self.skipTest("BufferWithSegmentsCollection not available") | |
|
100 | 109 | |
|
101 |
b1 = zstd.BufferWithSegments(b |
|
|
102 |
b2 = zstd.BufferWithSegments( |
|
|
103 | ss.pack(3, 3)])) | |
|
110 | b1 = zstd.BufferWithSegments(b"foo", ss.pack(0, 3)) | |
|
111 | b2 = zstd.BufferWithSegments( | |
|
112 | b"barbaz", b"".join([ss.pack(0, 3), ss.pack(3, 3)]) | |
|
113 | ) | |
|
104 | 114 | |
|
105 | 115 | c = zstd.BufferWithSegmentsCollection(b1) |
|
106 | 116 | self.assertEqual(len(c), 1) |
|
107 | 117 | self.assertEqual(c.size(), 3) |
|
108 | 118 | |
|
109 | 119 | c = zstd.BufferWithSegmentsCollection(b2) |
|
110 | 120 | self.assertEqual(len(c), 2) |
|
111 | 121 | self.assertEqual(c.size(), 6) |
|
112 | 122 | |
|
113 | 123 | c = zstd.BufferWithSegmentsCollection(b1, b2) |
|
114 | 124 | self.assertEqual(len(c), 3) |
|
115 | 125 | self.assertEqual(c.size(), 9) |
|
116 | 126 | |
|
117 | 127 | def test_getitem(self): |
|
118 |
if not hasattr(zstd, |
|
|
119 |
self.skipTest( |
|
|
128 | if not hasattr(zstd, "BufferWithSegmentsCollection"): | |
|
129 | self.skipTest("BufferWithSegmentsCollection not available") | |
|
120 | 130 | |
|
121 |
b1 = zstd.BufferWithSegments(b |
|
|
122 |
b2 = zstd.BufferWithSegments( |
|
|
123 | ss.pack(3, 3)])) | |
|
131 | b1 = zstd.BufferWithSegments(b"foo", ss.pack(0, 3)) | |
|
132 | b2 = zstd.BufferWithSegments( | |
|
133 | b"barbaz", b"".join([ss.pack(0, 3), ss.pack(3, 3)]) | |
|
134 | ) | |
|
124 | 135 | |
|
125 | 136 | c = zstd.BufferWithSegmentsCollection(b1, b2) |
|
126 | 137 | |
|
127 |
with self.assertRaisesRegex |
|
|
138 | with self.assertRaisesRegex(IndexError, "offset must be less than 3"): | |
|
128 | 139 | c[3] |
|
129 | 140 | |
|
130 |
with self.assertRaisesRegex |
|
|
141 | with self.assertRaisesRegex(IndexError, "offset must be less than 3"): | |
|
131 | 142 | c[4] |
|
132 | 143 | |
|
133 |
self.assertEqual(c[0].tobytes(), b |
|
|
134 |
self.assertEqual(c[1].tobytes(), b |
|
|
135 |
self.assertEqual(c[2].tobytes(), b |
|
|
144 | self.assertEqual(c[0].tobytes(), b"foo") | |
|
145 | self.assertEqual(c[1].tobytes(), b"bar") | |
|
146 | self.assertEqual(c[2].tobytes(), b"baz") |
This diff has been collapsed as it changes many lines, (811 lines changed) Show them Hide them | |||
@@ -1,1735 +1,1770 b'' | |||
|
1 | 1 | import hashlib |
|
2 | 2 | import io |
|
3 | 3 | import os |
|
4 | 4 | import struct |
|
5 | 5 | import sys |
|
6 | 6 | import tarfile |
|
7 | 7 | import tempfile |
|
8 | 8 | import unittest |
|
9 | 9 | |
|
10 | 10 | import zstandard as zstd |
|
11 | 11 | |
|
12 | 12 | from .common import ( |
|
13 | 13 | make_cffi, |
|
14 | 14 | NonClosingBytesIO, |
|
15 | 15 | OpCountingBytesIO, |
|
16 | TestCase, | |
|
16 | 17 | ) |
|
17 | 18 | |
|
18 | 19 | |
|
19 | 20 | if sys.version_info[0] >= 3: |
|
20 | 21 | next = lambda it: it.__next__() |
|
21 | 22 | else: |
|
22 | 23 | next = lambda it: it.next() |
|
23 | 24 | |
|
24 | 25 | |
|
25 | 26 | def multithreaded_chunk_size(level, source_size=0): |
|
26 | params = zstd.ZstdCompressionParameters.from_level(level, | |
|
27 | source_size=source_size) | |
|
27 | params = zstd.ZstdCompressionParameters.from_level(level, source_size=source_size) | |
|
28 | 28 | |
|
29 | 29 | return 1 << (params.window_log + 2) |
|
30 | 30 | |
|
31 | 31 | |
|
32 | 32 | @make_cffi |
|
33 |
class TestCompressor( |
|
|
33 | class TestCompressor(TestCase): | |
|
34 | 34 | def test_level_bounds(self): |
|
35 | 35 | with self.assertRaises(ValueError): |
|
36 | 36 | zstd.ZstdCompressor(level=23) |
|
37 | 37 | |
|
38 | 38 | def test_memory_size(self): |
|
39 | 39 | cctx = zstd.ZstdCompressor(level=1) |
|
40 | 40 | self.assertGreater(cctx.memory_size(), 100) |
|
41 | 41 | |
|
42 | 42 | |
|
43 | 43 | @make_cffi |
|
44 |
class TestCompressor_compress( |
|
|
44 | class TestCompressor_compress(TestCase): | |
|
45 | 45 | def test_compress_empty(self): |
|
46 | 46 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
47 |
result = cctx.compress(b |
|
|
48 |
self.assertEqual(result, b |
|
|
47 | result = cctx.compress(b"") | |
|
48 | self.assertEqual(result, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") | |
|
49 | 49 | params = zstd.get_frame_parameters(result) |
|
50 | 50 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
51 | 51 | self.assertEqual(params.window_size, 524288) |
|
52 | 52 | self.assertEqual(params.dict_id, 0) |
|
53 | 53 | self.assertFalse(params.has_checksum, 0) |
|
54 | 54 | |
|
55 | 55 | cctx = zstd.ZstdCompressor() |
|
56 |
result = cctx.compress(b |
|
|
57 |
self.assertEqual(result, b |
|
|
56 | result = cctx.compress(b"") | |
|
57 | self.assertEqual(result, b"\x28\xb5\x2f\xfd\x20\x00\x01\x00\x00") | |
|
58 | 58 | params = zstd.get_frame_parameters(result) |
|
59 | 59 | self.assertEqual(params.content_size, 0) |
|
60 | 60 | |
|
61 | 61 | def test_input_types(self): |
|
62 | 62 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
63 |
expected = b |
|
|
63 | expected = b"\x28\xb5\x2f\xfd\x00\x00\x19\x00\x00\x66\x6f\x6f" | |
|
64 | 64 | |
|
65 | 65 | mutable_array = bytearray(3) |
|
66 |
mutable_array[:] = b |
|
|
66 | mutable_array[:] = b"foo" | |
|
67 | 67 | |
|
68 | 68 | sources = [ |
|
69 |
memoryview(b |
|
|
70 |
bytearray(b |
|
|
69 | memoryview(b"foo"), | |
|
70 | bytearray(b"foo"), | |
|
71 | 71 | mutable_array, |
|
72 | 72 | ] |
|
73 | 73 | |
|
74 | 74 | for source in sources: |
|
75 | 75 | self.assertEqual(cctx.compress(source), expected) |
|
76 | 76 | |
|
77 | 77 | def test_compress_large(self): |
|
78 | 78 | chunks = [] |
|
79 | 79 | for i in range(255): |
|
80 |
chunks.append(struct.Struct( |
|
|
80 | chunks.append(struct.Struct(">B").pack(i) * 16384) | |
|
81 | 81 | |
|
82 | 82 | cctx = zstd.ZstdCompressor(level=3, write_content_size=False) |
|
83 |
result = cctx.compress(b |
|
|
83 | result = cctx.compress(b"".join(chunks)) | |
|
84 | 84 | self.assertEqual(len(result), 999) |
|
85 |
self.assertEqual(result[0:4], b |
|
|
85 | self.assertEqual(result[0:4], b"\x28\xb5\x2f\xfd") | |
|
86 | 86 | |
|
87 | 87 | # This matches the test for read_to_iter() below. |
|
88 | 88 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
89 |
result = cctx.compress(b |
|
|
90 | self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00' | |
|
91 | b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0' | |
|
92 | b'\x02\x09\x00\x00\x6f') | |
|
89 | result = cctx.compress(b"f" * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b"o") | |
|
90 | self.assertEqual( | |
|
91 | result, | |
|
92 | b"\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00" | |
|
93 | b"\x10\x66\x66\x01\x00\xfb\xff\x39\xc0" | |
|
94 | b"\x02\x09\x00\x00\x6f", | |
|
95 | ) | |
|
93 | 96 | |
|
94 | 97 | def test_negative_level(self): |
|
95 | 98 | cctx = zstd.ZstdCompressor(level=-4) |
|
96 |
result = cctx.compress(b |
|
|
99 | result = cctx.compress(b"foo" * 256) | |
|
97 | 100 | |
|
98 | 101 | def test_no_magic(self): |
|
99 | params = zstd.ZstdCompressionParameters.from_level( | |
|
100 | 1, format=zstd.FORMAT_ZSTD1) | |
|
102 | params = zstd.ZstdCompressionParameters.from_level(1, format=zstd.FORMAT_ZSTD1) | |
|
101 | 103 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
102 |
magic = cctx.compress(b |
|
|
104 | magic = cctx.compress(b"foobar") | |
|
103 | 105 | |
|
104 | 106 | params = zstd.ZstdCompressionParameters.from_level( |
|
105 |
1, format=zstd.FORMAT_ZSTD1_MAGICLESS |
|
|
107 | 1, format=zstd.FORMAT_ZSTD1_MAGICLESS | |
|
108 | ) | |
|
106 | 109 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
107 |
no_magic = cctx.compress(b |
|
|
110 | no_magic = cctx.compress(b"foobar") | |
|
108 | 111 | |
|
109 |
self.assertEqual(magic[0:4], b |
|
|
112 | self.assertEqual(magic[0:4], b"\x28\xb5\x2f\xfd") | |
|
110 | 113 | self.assertEqual(magic[4:], no_magic) |
|
111 | 114 | |
|
112 | 115 | def test_write_checksum(self): |
|
113 | 116 | cctx = zstd.ZstdCompressor(level=1) |
|
114 |
no_checksum = cctx.compress(b |
|
|
117 | no_checksum = cctx.compress(b"foobar") | |
|
115 | 118 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) |
|
116 |
with_checksum = cctx.compress(b |
|
|
119 | with_checksum = cctx.compress(b"foobar") | |
|
117 | 120 | |
|
118 | 121 | self.assertEqual(len(with_checksum), len(no_checksum) + 4) |
|
119 | 122 | |
|
120 | 123 | no_params = zstd.get_frame_parameters(no_checksum) |
|
121 | 124 | with_params = zstd.get_frame_parameters(with_checksum) |
|
122 | 125 | |
|
123 | 126 | self.assertFalse(no_params.has_checksum) |
|
124 | 127 | self.assertTrue(with_params.has_checksum) |
|
125 | 128 | |
|
126 | 129 | def test_write_content_size(self): |
|
127 | 130 | cctx = zstd.ZstdCompressor(level=1) |
|
128 |
with_size = cctx.compress(b |
|
|
131 | with_size = cctx.compress(b"foobar" * 256) | |
|
129 | 132 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
130 |
no_size = cctx.compress(b |
|
|
133 | no_size = cctx.compress(b"foobar" * 256) | |
|
131 | 134 | |
|
132 | 135 | self.assertEqual(len(with_size), len(no_size) + 1) |
|
133 | 136 | |
|
134 | 137 | no_params = zstd.get_frame_parameters(no_size) |
|
135 | 138 | with_params = zstd.get_frame_parameters(with_size) |
|
136 | 139 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
137 | 140 | self.assertEqual(with_params.content_size, 1536) |
|
138 | 141 | |
|
139 | 142 | def test_no_dict_id(self): |
|
140 | 143 | samples = [] |
|
141 | 144 | for i in range(128): |
|
142 |
samples.append(b |
|
|
143 |
samples.append(b |
|
|
144 |
samples.append(b |
|
|
145 | samples.append(b"foo" * 64) | |
|
146 | samples.append(b"bar" * 64) | |
|
147 | samples.append(b"foobar" * 64) | |
|
145 | 148 | |
|
146 | 149 | d = zstd.train_dictionary(1024, samples) |
|
147 | 150 | |
|
148 | 151 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
149 |
with_dict_id = cctx.compress(b |
|
|
152 | with_dict_id = cctx.compress(b"foobarfoobar") | |
|
150 | 153 | |
|
151 | 154 | cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False) |
|
152 |
no_dict_id = cctx.compress(b |
|
|
155 | no_dict_id = cctx.compress(b"foobarfoobar") | |
|
153 | 156 | |
|
154 | 157 | self.assertEqual(len(with_dict_id), len(no_dict_id) + 4) |
|
155 | 158 | |
|
156 | 159 | no_params = zstd.get_frame_parameters(no_dict_id) |
|
157 | 160 | with_params = zstd.get_frame_parameters(with_dict_id) |
|
158 | 161 | self.assertEqual(no_params.dict_id, 0) |
|
159 | 162 | self.assertEqual(with_params.dict_id, 1880053135) |
|
160 | 163 | |
|
161 | 164 | def test_compress_dict_multiple(self): |
|
162 | 165 | samples = [] |
|
163 | 166 | for i in range(128): |
|
164 |
samples.append(b |
|
|
165 |
samples.append(b |
|
|
166 |
samples.append(b |
|
|
167 | samples.append(b"foo" * 64) | |
|
168 | samples.append(b"bar" * 64) | |
|
169 | samples.append(b"foobar" * 64) | |
|
167 | 170 | |
|
168 | 171 | d = zstd.train_dictionary(8192, samples) |
|
169 | 172 | |
|
170 | 173 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
171 | 174 | |
|
172 | 175 | for i in range(32): |
|
173 |
cctx.compress(b |
|
|
176 | cctx.compress(b"foo bar foobar foo bar foobar") | |
|
174 | 177 | |
|
175 | 178 | def test_dict_precompute(self): |
|
176 | 179 | samples = [] |
|
177 | 180 | for i in range(128): |
|
178 |
samples.append(b |
|
|
179 |
samples.append(b |
|
|
180 |
samples.append(b |
|
|
181 | samples.append(b"foo" * 64) | |
|
182 | samples.append(b"bar" * 64) | |
|
183 | samples.append(b"foobar" * 64) | |
|
181 | 184 | |
|
182 | 185 | d = zstd.train_dictionary(8192, samples) |
|
183 | 186 | d.precompute_compress(level=1) |
|
184 | 187 | |
|
185 | 188 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
186 | 189 | |
|
187 | 190 | for i in range(32): |
|
188 |
cctx.compress(b |
|
|
191 | cctx.compress(b"foo bar foobar foo bar foobar") | |
|
189 | 192 | |
|
190 | 193 | def test_multithreaded(self): |
|
191 | 194 | chunk_size = multithreaded_chunk_size(1) |
|
192 |
source = b |
|
|
195 | source = b"".join([b"x" * chunk_size, b"y" * chunk_size]) | |
|
193 | 196 | |
|
194 | 197 | cctx = zstd.ZstdCompressor(level=1, threads=2) |
|
195 | 198 | compressed = cctx.compress(source) |
|
196 | 199 | |
|
197 | 200 | params = zstd.get_frame_parameters(compressed) |
|
198 | 201 | self.assertEqual(params.content_size, chunk_size * 2) |
|
199 | 202 | self.assertEqual(params.dict_id, 0) |
|
200 | 203 | self.assertFalse(params.has_checksum) |
|
201 | 204 | |
|
202 | 205 | dctx = zstd.ZstdDecompressor() |
|
203 | 206 | self.assertEqual(dctx.decompress(compressed), source) |
|
204 | 207 | |
|
205 | 208 | def test_multithreaded_dict(self): |
|
206 | 209 | samples = [] |
|
207 | 210 | for i in range(128): |
|
208 |
samples.append(b |
|
|
209 |
samples.append(b |
|
|
210 |
samples.append(b |
|
|
211 | samples.append(b"foo" * 64) | |
|
212 | samples.append(b"bar" * 64) | |
|
213 | samples.append(b"foobar" * 64) | |
|
211 | 214 | |
|
212 | 215 | d = zstd.train_dictionary(1024, samples) |
|
213 | 216 | |
|
214 | 217 | cctx = zstd.ZstdCompressor(dict_data=d, threads=2) |
|
215 | 218 | |
|
216 |
result = cctx.compress(b |
|
|
217 |
params = zstd.get_frame_parameters(result) |
|
|
218 |
self.assertEqual(params.content_size, 3) |
|
|
219 | result = cctx.compress(b"foo") | |
|
220 | params = zstd.get_frame_parameters(result) | |
|
221 | self.assertEqual(params.content_size, 3) | |
|
219 | 222 | self.assertEqual(params.dict_id, d.dict_id()) |
|
220 | 223 | |
|
221 |
self.assertEqual( |
|
|
222 | b'\x28\xb5\x2f\xfd\x23\x8f\x55\x0f\x70\x03\x19\x00\x00' | |
|
223 | b'\x66\x6f\x6f') | |
|
224 | self.assertEqual( | |
|
225 | result, | |
|
226 | b"\x28\xb5\x2f\xfd\x23\x8f\x55\x0f\x70\x03\x19\x00\x00" b"\x66\x6f\x6f", | |
|
227 | ) | |
|
224 | 228 | |
|
225 | 229 | def test_multithreaded_compression_params(self): |
|
226 | 230 | params = zstd.ZstdCompressionParameters.from_level(0, threads=2) |
|
227 | 231 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
228 | 232 | |
|
229 |
result = cctx.compress(b |
|
|
230 |
params = zstd.get_frame_parameters(result) |
|
|
231 |
self.assertEqual(params.content_size, 3) |
|
|
233 | result = cctx.compress(b"foo") | |
|
234 | params = zstd.get_frame_parameters(result) | |
|
235 | self.assertEqual(params.content_size, 3) | |
|
232 | 236 | |
|
233 | self.assertEqual(result, | |
|
234 | b'\x28\xb5\x2f\xfd\x20\x03\x19\x00\x00\x66\x6f\x6f') | |
|
237 | self.assertEqual(result, b"\x28\xb5\x2f\xfd\x20\x03\x19\x00\x00\x66\x6f\x6f") | |
|
235 | 238 | |
|
236 | 239 | |
|
237 | 240 | @make_cffi |
|
238 |
class TestCompressor_compressobj( |
|
|
241 | class TestCompressor_compressobj(TestCase): | |
|
239 | 242 | def test_compressobj_empty(self): |
|
240 | 243 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
241 | 244 | cobj = cctx.compressobj() |
|
242 |
self.assertEqual(cobj.compress(b |
|
|
243 | self.assertEqual(cobj.flush(), | |
|
244 | b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') | |
|
245 | self.assertEqual(cobj.compress(b""), b"") | |
|
246 | self.assertEqual(cobj.flush(), b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") | |
|
245 | 247 | |
|
246 | 248 | def test_input_types(self): |
|
247 |
expected = b |
|
|
249 | expected = b"\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f" | |
|
248 | 250 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
249 | 251 | |
|
250 | 252 | mutable_array = bytearray(3) |
|
251 |
mutable_array[:] = b |
|
|
253 | mutable_array[:] = b"foo" | |
|
252 | 254 | |
|
253 | 255 | sources = [ |
|
254 |
memoryview(b |
|
|
255 |
bytearray(b |
|
|
256 | memoryview(b"foo"), | |
|
257 | bytearray(b"foo"), | |
|
256 | 258 | mutable_array, |
|
257 | 259 | ] |
|
258 | 260 | |
|
259 | 261 | for source in sources: |
|
260 | 262 | cobj = cctx.compressobj() |
|
261 |
self.assertEqual(cobj.compress(source), b |
|
|
263 | self.assertEqual(cobj.compress(source), b"") | |
|
262 | 264 | self.assertEqual(cobj.flush(), expected) |
|
263 | 265 | |
|
264 | 266 | def test_compressobj_large(self): |
|
265 | 267 | chunks = [] |
|
266 | 268 | for i in range(255): |
|
267 |
chunks.append(struct.Struct( |
|
|
269 | chunks.append(struct.Struct(">B").pack(i) * 16384) | |
|
268 | 270 | |
|
269 | 271 | cctx = zstd.ZstdCompressor(level=3) |
|
270 | 272 | cobj = cctx.compressobj() |
|
271 | 273 | |
|
272 |
result = cobj.compress(b |
|
|
274 | result = cobj.compress(b"".join(chunks)) + cobj.flush() | |
|
273 | 275 | self.assertEqual(len(result), 999) |
|
274 |
self.assertEqual(result[0:4], b |
|
|
276 | self.assertEqual(result[0:4], b"\x28\xb5\x2f\xfd") | |
|
275 | 277 | |
|
276 | 278 | params = zstd.get_frame_parameters(result) |
|
277 | 279 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
278 | 280 | self.assertEqual(params.window_size, 2097152) |
|
279 | 281 | self.assertEqual(params.dict_id, 0) |
|
280 | 282 | self.assertFalse(params.has_checksum) |
|
281 | 283 | |
|
282 | 284 | def test_write_checksum(self): |
|
283 | 285 | cctx = zstd.ZstdCompressor(level=1) |
|
284 | 286 | cobj = cctx.compressobj() |
|
285 |
no_checksum = cobj.compress(b |
|
|
287 | no_checksum = cobj.compress(b"foobar") + cobj.flush() | |
|
286 | 288 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) |
|
287 | 289 | cobj = cctx.compressobj() |
|
288 |
with_checksum = cobj.compress(b |
|
|
290 | with_checksum = cobj.compress(b"foobar") + cobj.flush() | |
|
289 | 291 | |
|
290 | 292 | no_params = zstd.get_frame_parameters(no_checksum) |
|
291 | 293 | with_params = zstd.get_frame_parameters(with_checksum) |
|
292 | 294 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
293 | 295 | self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
294 | 296 | self.assertEqual(no_params.dict_id, 0) |
|
295 | 297 | self.assertEqual(with_params.dict_id, 0) |
|
296 | 298 | self.assertFalse(no_params.has_checksum) |
|
297 | 299 | self.assertTrue(with_params.has_checksum) |
|
298 | 300 | |
|
299 | 301 | self.assertEqual(len(with_checksum), len(no_checksum) + 4) |
|
300 | 302 | |
|
301 | 303 | def test_write_content_size(self): |
|
302 | 304 | cctx = zstd.ZstdCompressor(level=1) |
|
303 |
cobj = cctx.compressobj(size=len(b |
|
|
304 |
with_size = cobj.compress(b |
|
|
305 | cobj = cctx.compressobj(size=len(b"foobar" * 256)) | |
|
306 | with_size = cobj.compress(b"foobar" * 256) + cobj.flush() | |
|
305 | 307 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
306 |
cobj = cctx.compressobj(size=len(b |
|
|
307 |
no_size = cobj.compress(b |
|
|
308 | cobj = cctx.compressobj(size=len(b"foobar" * 256)) | |
|
309 | no_size = cobj.compress(b"foobar" * 256) + cobj.flush() | |
|
308 | 310 | |
|
309 | 311 | no_params = zstd.get_frame_parameters(no_size) |
|
310 | 312 | with_params = zstd.get_frame_parameters(with_size) |
|
311 | 313 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
312 | 314 | self.assertEqual(with_params.content_size, 1536) |
|
313 | 315 | self.assertEqual(no_params.dict_id, 0) |
|
314 | 316 | self.assertEqual(with_params.dict_id, 0) |
|
315 | 317 | self.assertFalse(no_params.has_checksum) |
|
316 | 318 | self.assertFalse(with_params.has_checksum) |
|
317 | 319 | |
|
318 | 320 | self.assertEqual(len(with_size), len(no_size) + 1) |
|
319 | 321 | |
|
320 | 322 | def test_compress_after_finished(self): |
|
321 | 323 | cctx = zstd.ZstdCompressor() |
|
322 | 324 | cobj = cctx.compressobj() |
|
323 | 325 | |
|
324 |
cobj.compress(b |
|
|
326 | cobj.compress(b"foo") | |
|
325 | 327 | cobj.flush() |
|
326 | 328 | |
|
327 | with self.assertRaisesRegexp(zstd.ZstdError, r'cannot call compress\(\) after compressor'): | |
|
328 | cobj.compress(b'foo') | |
|
329 | with self.assertRaisesRegex( | |
|
330 | zstd.ZstdError, r"cannot call compress\(\) after compressor" | |
|
331 | ): | |
|
332 | cobj.compress(b"foo") | |
|
329 | 333 | |
|
330 | with self.assertRaisesRegexp(zstd.ZstdError, 'compressor object already finished'): | |
|
334 | with self.assertRaisesRegex( | |
|
335 | zstd.ZstdError, "compressor object already finished" | |
|
336 | ): | |
|
331 | 337 | cobj.flush() |
|
332 | 338 | |
|
333 | 339 | def test_flush_block_repeated(self): |
|
334 | 340 | cctx = zstd.ZstdCompressor(level=1) |
|
335 | 341 | cobj = cctx.compressobj() |
|
336 | 342 | |
|
337 |
self.assertEqual(cobj.compress(b |
|
|
338 | self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), | |
|
339 | b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo') | |
|
340 | self.assertEqual(cobj.compress(b'bar'), b'') | |
|
343 | self.assertEqual(cobj.compress(b"foo"), b"") | |
|
344 | self.assertEqual( | |
|
345 | cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), | |
|
346 | b"\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo", | |
|
347 | ) | |
|
348 | self.assertEqual(cobj.compress(b"bar"), b"") | |
|
341 | 349 | # 3 byte header plus content. |
|
342 | self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), | |
|
343 | b'\x18\x00\x00bar') | |
|
344 | self.assertEqual(cobj.flush(), b'\x01\x00\x00') | |
|
350 | self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b"\x18\x00\x00bar") | |
|
351 | self.assertEqual(cobj.flush(), b"\x01\x00\x00") | |
|
345 | 352 | |
|
346 | 353 | def test_flush_empty_block(self): |
|
347 | 354 | cctx = zstd.ZstdCompressor(write_checksum=True) |
|
348 | 355 | cobj = cctx.compressobj() |
|
349 | 356 | |
|
350 |
cobj.compress(b |
|
|
357 | cobj.compress(b"foobar") | |
|
351 | 358 | cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK) |
|
352 | 359 | # No-op if no block is active (this is internal to zstd). |
|
353 |
self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b |
|
|
360 | self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b"") | |
|
354 | 361 | |
|
355 | 362 | trailing = cobj.flush() |
|
356 | 363 | # 3 bytes block header + 4 bytes frame checksum |
|
357 | 364 | self.assertEqual(len(trailing), 7) |
|
358 | 365 | header = trailing[0:3] |
|
359 |
self.assertEqual(header, b |
|
|
366 | self.assertEqual(header, b"\x01\x00\x00") | |
|
360 | 367 | |
|
361 | 368 | def test_multithreaded(self): |
|
362 | 369 | source = io.BytesIO() |
|
363 |
source.write(b |
|
|
364 |
source.write(b |
|
|
365 |
source.write(b |
|
|
370 | source.write(b"a" * 1048576) | |
|
371 | source.write(b"b" * 1048576) | |
|
372 | source.write(b"c" * 1048576) | |
|
366 | 373 | source.seek(0) |
|
367 | 374 | |
|
368 | 375 | cctx = zstd.ZstdCompressor(level=1, threads=2) |
|
369 | 376 | cobj = cctx.compressobj() |
|
370 | 377 | |
|
371 | 378 | chunks = [] |
|
372 | 379 | while True: |
|
373 | 380 | d = source.read(8192) |
|
374 | 381 | if not d: |
|
375 | 382 | break |
|
376 | 383 | |
|
377 | 384 | chunks.append(cobj.compress(d)) |
|
378 | 385 | |
|
379 | 386 | chunks.append(cobj.flush()) |
|
380 | 387 | |
|
381 |
compressed = b |
|
|
388 | compressed = b"".join(chunks) | |
|
382 | 389 | |
|
383 |
self.assertEqual(len(compressed), |
|
|
390 | self.assertEqual(len(compressed), 119) | |
|
384 | 391 | |
|
385 | 392 | def test_frame_progression(self): |
|
386 | 393 | cctx = zstd.ZstdCompressor() |
|
387 | 394 | |
|
388 | 395 | self.assertEqual(cctx.frame_progression(), (0, 0, 0)) |
|
389 | 396 | |
|
390 | 397 | cobj = cctx.compressobj() |
|
391 | 398 | |
|
392 |
cobj.compress(b |
|
|
399 | cobj.compress(b"foobar") | |
|
393 | 400 | self.assertEqual(cctx.frame_progression(), (6, 0, 0)) |
|
394 | 401 | |
|
395 | 402 | cobj.flush() |
|
396 | 403 | self.assertEqual(cctx.frame_progression(), (6, 6, 15)) |
|
397 | 404 | |
|
398 | 405 | def test_bad_size(self): |
|
399 | 406 | cctx = zstd.ZstdCompressor() |
|
400 | 407 | |
|
401 | 408 | cobj = cctx.compressobj(size=2) |
|
402 |
with self.assertRaisesRegex |
|
|
403 |
cobj.compress(b |
|
|
409 | with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): | |
|
410 | cobj.compress(b"foo") | |
|
404 | 411 | |
|
405 | 412 | # Try another operation on this instance. |
|
406 |
with self.assertRaisesRegex |
|
|
407 |
cobj.compress(b |
|
|
413 | with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): | |
|
414 | cobj.compress(b"aa") | |
|
408 | 415 | |
|
409 | 416 | # Try another operation on the compressor. |
|
410 | 417 | cctx.compressobj(size=4) |
|
411 |
cctx.compress(b |
|
|
418 | cctx.compress(b"foobar") | |
|
412 | 419 | |
|
413 | 420 | |
|
414 | 421 | @make_cffi |
|
415 |
class TestCompressor_copy_stream( |
|
|
422 | class TestCompressor_copy_stream(TestCase): | |
|
416 | 423 | def test_no_read(self): |
|
417 | 424 | source = object() |
|
418 | 425 | dest = io.BytesIO() |
|
419 | 426 | |
|
420 | 427 | cctx = zstd.ZstdCompressor() |
|
421 | 428 | with self.assertRaises(ValueError): |
|
422 | 429 | cctx.copy_stream(source, dest) |
|
423 | 430 | |
|
424 | 431 | def test_no_write(self): |
|
425 | 432 | source = io.BytesIO() |
|
426 | 433 | dest = object() |
|
427 | 434 | |
|
428 | 435 | cctx = zstd.ZstdCompressor() |
|
429 | 436 | with self.assertRaises(ValueError): |
|
430 | 437 | cctx.copy_stream(source, dest) |
|
431 | 438 | |
|
432 | 439 | def test_empty(self): |
|
433 | 440 | source = io.BytesIO() |
|
434 | 441 | dest = io.BytesIO() |
|
435 | 442 | |
|
436 | 443 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
437 | 444 | r, w = cctx.copy_stream(source, dest) |
|
438 | 445 | self.assertEqual(int(r), 0) |
|
439 | 446 | self.assertEqual(w, 9) |
|
440 | 447 | |
|
441 | self.assertEqual(dest.getvalue(), | |
|
442 | b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') | |
|
448 | self.assertEqual(dest.getvalue(), b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") | |
|
443 | 449 | |
|
444 | 450 | def test_large_data(self): |
|
445 | 451 | source = io.BytesIO() |
|
446 | 452 | for i in range(255): |
|
447 |
source.write(struct.Struct( |
|
|
453 | source.write(struct.Struct(">B").pack(i) * 16384) | |
|
448 | 454 | source.seek(0) |
|
449 | 455 | |
|
450 | 456 | dest = io.BytesIO() |
|
451 | 457 | cctx = zstd.ZstdCompressor() |
|
452 | 458 | r, w = cctx.copy_stream(source, dest) |
|
453 | 459 | |
|
454 | 460 | self.assertEqual(r, 255 * 16384) |
|
455 | 461 | self.assertEqual(w, 999) |
|
456 | 462 | |
|
457 | 463 | params = zstd.get_frame_parameters(dest.getvalue()) |
|
458 | 464 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
459 | 465 | self.assertEqual(params.window_size, 2097152) |
|
460 | 466 | self.assertEqual(params.dict_id, 0) |
|
461 | 467 | self.assertFalse(params.has_checksum) |
|
462 | 468 | |
|
463 | 469 | def test_write_checksum(self): |
|
464 |
source = io.BytesIO(b |
|
|
470 | source = io.BytesIO(b"foobar") | |
|
465 | 471 | no_checksum = io.BytesIO() |
|
466 | 472 | |
|
467 | 473 | cctx = zstd.ZstdCompressor(level=1) |
|
468 | 474 | cctx.copy_stream(source, no_checksum) |
|
469 | 475 | |
|
470 | 476 | source.seek(0) |
|
471 | 477 | with_checksum = io.BytesIO() |
|
472 | 478 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) |
|
473 | 479 | cctx.copy_stream(source, with_checksum) |
|
474 | 480 | |
|
475 | self.assertEqual(len(with_checksum.getvalue()), | |
|
476 | len(no_checksum.getvalue()) + 4) | |
|
481 | self.assertEqual(len(with_checksum.getvalue()), len(no_checksum.getvalue()) + 4) | |
|
477 | 482 | |
|
478 | 483 | no_params = zstd.get_frame_parameters(no_checksum.getvalue()) |
|
479 | 484 | with_params = zstd.get_frame_parameters(with_checksum.getvalue()) |
|
480 | 485 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
481 | 486 | self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
482 | 487 | self.assertEqual(no_params.dict_id, 0) |
|
483 | 488 | self.assertEqual(with_params.dict_id, 0) |
|
484 | 489 | self.assertFalse(no_params.has_checksum) |
|
485 | 490 | self.assertTrue(with_params.has_checksum) |
|
486 | 491 | |
|
487 | 492 | def test_write_content_size(self): |
|
488 |
source = io.BytesIO(b |
|
|
493 | source = io.BytesIO(b"foobar" * 256) | |
|
489 | 494 | no_size = io.BytesIO() |
|
490 | 495 | |
|
491 | 496 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
492 | 497 | cctx.copy_stream(source, no_size) |
|
493 | 498 | |
|
494 | 499 | source.seek(0) |
|
495 | 500 | with_size = io.BytesIO() |
|
496 | 501 | cctx = zstd.ZstdCompressor(level=1) |
|
497 | 502 | cctx.copy_stream(source, with_size) |
|
498 | 503 | |
|
499 | 504 | # Source content size is unknown, so no content size written. |
|
500 | self.assertEqual(len(with_size.getvalue()), | |
|
501 | len(no_size.getvalue())) | |
|
505 | self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue())) | |
|
502 | 506 | |
|
503 | 507 | source.seek(0) |
|
504 | 508 | with_size = io.BytesIO() |
|
505 | 509 | cctx.copy_stream(source, with_size, size=len(source.getvalue())) |
|
506 | 510 | |
|
507 | 511 | # We specified source size, so content size header is present. |
|
508 | self.assertEqual(len(with_size.getvalue()), | |
|
509 | len(no_size.getvalue()) + 1) | |
|
512 | self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()) + 1) | |
|
510 | 513 | |
|
511 | 514 | no_params = zstd.get_frame_parameters(no_size.getvalue()) |
|
512 | 515 | with_params = zstd.get_frame_parameters(with_size.getvalue()) |
|
513 | 516 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
514 | 517 | self.assertEqual(with_params.content_size, 1536) |
|
515 | 518 | self.assertEqual(no_params.dict_id, 0) |
|
516 | 519 | self.assertEqual(with_params.dict_id, 0) |
|
517 | 520 | self.assertFalse(no_params.has_checksum) |
|
518 | 521 | self.assertFalse(with_params.has_checksum) |
|
519 | 522 | |
|
520 | 523 | def test_read_write_size(self): |
|
521 |
source = OpCountingBytesIO(b |
|
|
524 | source = OpCountingBytesIO(b"foobarfoobar") | |
|
522 | 525 | dest = OpCountingBytesIO() |
|
523 | 526 | cctx = zstd.ZstdCompressor() |
|
524 | 527 | r, w = cctx.copy_stream(source, dest, read_size=1, write_size=1) |
|
525 | 528 | |
|
526 | 529 | self.assertEqual(r, len(source.getvalue())) |
|
527 | 530 | self.assertEqual(w, 21) |
|
528 | 531 | self.assertEqual(source._read_count, len(source.getvalue()) + 1) |
|
529 | 532 | self.assertEqual(dest._write_count, len(dest.getvalue())) |
|
530 | 533 | |
|
531 | 534 | def test_multithreaded(self): |
|
532 | 535 | source = io.BytesIO() |
|
533 |
source.write(b |
|
|
534 |
source.write(b |
|
|
535 |
source.write(b |
|
|
536 | source.write(b"a" * 1048576) | |
|
537 | source.write(b"b" * 1048576) | |
|
538 | source.write(b"c" * 1048576) | |
|
536 | 539 | source.seek(0) |
|
537 | 540 | |
|
538 | 541 | dest = io.BytesIO() |
|
539 | 542 | cctx = zstd.ZstdCompressor(threads=2, write_content_size=False) |
|
540 | 543 | r, w = cctx.copy_stream(source, dest) |
|
541 | 544 | self.assertEqual(r, 3145728) |
|
542 |
self.assertEqual(w, |
|
|
545 | self.assertEqual(w, 111) | |
|
543 | 546 | |
|
544 | 547 | params = zstd.get_frame_parameters(dest.getvalue()) |
|
545 | 548 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
546 | 549 | self.assertEqual(params.dict_id, 0) |
|
547 | 550 | self.assertFalse(params.has_checksum) |
|
548 | 551 | |
|
549 | 552 | # Writing content size and checksum works. |
|
550 | 553 | cctx = zstd.ZstdCompressor(threads=2, write_checksum=True) |
|
551 | 554 | dest = io.BytesIO() |
|
552 | 555 | source.seek(0) |
|
553 | 556 | cctx.copy_stream(source, dest, size=len(source.getvalue())) |
|
554 | 557 | |
|
555 | 558 | params = zstd.get_frame_parameters(dest.getvalue()) |
|
556 | 559 | self.assertEqual(params.content_size, 3145728) |
|
557 | 560 | self.assertEqual(params.dict_id, 0) |
|
558 | 561 | self.assertTrue(params.has_checksum) |
|
559 | 562 | |
|
560 | 563 | def test_bad_size(self): |
|
561 | 564 | source = io.BytesIO() |
|
562 |
source.write(b |
|
|
563 |
source.write(b |
|
|
565 | source.write(b"a" * 32768) | |
|
566 | source.write(b"b" * 32768) | |
|
564 | 567 | source.seek(0) |
|
565 | 568 | |
|
566 | 569 | dest = io.BytesIO() |
|
567 | 570 | |
|
568 | 571 | cctx = zstd.ZstdCompressor() |
|
569 | 572 | |
|
570 |
with self.assertRaisesRegex |
|
|
573 | with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): | |
|
571 | 574 | cctx.copy_stream(source, dest, size=42) |
|
572 | 575 | |
|
573 | 576 | # Try another operation on this compressor. |
|
574 | 577 | source.seek(0) |
|
575 | 578 | dest = io.BytesIO() |
|
576 | 579 | cctx.copy_stream(source, dest) |
|
577 | 580 | |
|
578 | 581 | |
|
579 | 582 | @make_cffi |
|
580 |
class TestCompressor_stream_reader( |
|
|
583 | class TestCompressor_stream_reader(TestCase): | |
|
581 | 584 | def test_context_manager(self): |
|
582 | 585 | cctx = zstd.ZstdCompressor() |
|
583 | 586 | |
|
584 |
with cctx.stream_reader(b |
|
|
585 |
with self.assertRaisesRegex |
|
|
587 | with cctx.stream_reader(b"foo") as reader: | |
|
588 | with self.assertRaisesRegex(ValueError, "cannot __enter__ multiple times"): | |
|
586 | 589 | with reader as reader2: |
|
587 | 590 | pass |
|
588 | 591 | |
|
589 | 592 | def test_no_context_manager(self): |
|
590 | 593 | cctx = zstd.ZstdCompressor() |
|
591 | 594 | |
|
592 |
reader = cctx.stream_reader(b |
|
|
595 | reader = cctx.stream_reader(b"foo") | |
|
593 | 596 | reader.read(4) |
|
594 | 597 | self.assertFalse(reader.closed) |
|
595 | 598 | |
|
596 | 599 | reader.close() |
|
597 | 600 | self.assertTrue(reader.closed) |
|
598 |
with self.assertRaisesRegex |
|
|
601 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
599 | 602 | reader.read(1) |
|
600 | 603 | |
|
601 | 604 | def test_not_implemented(self): |
|
602 | 605 | cctx = zstd.ZstdCompressor() |
|
603 | 606 | |
|
604 |
with cctx.stream_reader(b |
|
|
607 | with cctx.stream_reader(b"foo" * 60) as reader: | |
|
605 | 608 | with self.assertRaises(io.UnsupportedOperation): |
|
606 | 609 | reader.readline() |
|
607 | 610 | |
|
608 | 611 | with self.assertRaises(io.UnsupportedOperation): |
|
609 | 612 | reader.readlines() |
|
610 | 613 | |
|
611 | 614 | with self.assertRaises(io.UnsupportedOperation): |
|
612 | 615 | iter(reader) |
|
613 | 616 | |
|
614 | 617 | with self.assertRaises(io.UnsupportedOperation): |
|
615 | 618 | next(reader) |
|
616 | 619 | |
|
617 | 620 | with self.assertRaises(OSError): |
|
618 | 621 | reader.writelines([]) |
|
619 | 622 | |
|
620 | 623 | with self.assertRaises(OSError): |
|
621 |
reader.write(b |
|
|
624 | reader.write(b"foo") | |
|
622 | 625 | |
|
623 | 626 | def test_constant_methods(self): |
|
624 | 627 | cctx = zstd.ZstdCompressor() |
|
625 | 628 | |
|
626 |
with cctx.stream_reader(b |
|
|
629 | with cctx.stream_reader(b"boo") as reader: | |
|
627 | 630 | self.assertTrue(reader.readable()) |
|
628 | 631 | self.assertFalse(reader.writable()) |
|
629 | 632 | self.assertFalse(reader.seekable()) |
|
630 | 633 | self.assertFalse(reader.isatty()) |
|
631 | 634 | self.assertFalse(reader.closed) |
|
632 | 635 | self.assertIsNone(reader.flush()) |
|
633 | 636 | self.assertFalse(reader.closed) |
|
634 | 637 | |
|
635 | 638 | self.assertTrue(reader.closed) |
|
636 | 639 | |
|
637 | 640 | def test_read_closed(self): |
|
638 | 641 | cctx = zstd.ZstdCompressor() |
|
639 | 642 | |
|
640 |
with cctx.stream_reader(b |
|
|
643 | with cctx.stream_reader(b"foo" * 60) as reader: | |
|
641 | 644 | reader.close() |
|
642 | 645 | self.assertTrue(reader.closed) |
|
643 |
with self.assertRaisesRegex |
|
|
646 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
644 | 647 | reader.read(10) |
|
645 | 648 | |
|
646 | 649 | def test_read_sizes(self): |
|
647 | 650 | cctx = zstd.ZstdCompressor() |
|
648 |
foo = cctx.compress(b |
|
|
651 | foo = cctx.compress(b"foo") | |
|
649 | 652 | |
|
650 |
with cctx.stream_reader(b |
|
|
651 | with self.assertRaisesRegexp(ValueError, 'cannot read negative amounts less than -1'): | |
|
653 | with cctx.stream_reader(b"foo") as reader: | |
|
654 | with self.assertRaisesRegex( | |
|
655 | ValueError, "cannot read negative amounts less than -1" | |
|
656 | ): | |
|
652 | 657 | reader.read(-2) |
|
653 | 658 | |
|
654 |
self.assertEqual(reader.read(0), b |
|
|
659 | self.assertEqual(reader.read(0), b"") | |
|
655 | 660 | self.assertEqual(reader.read(), foo) |
|
656 | 661 | |
|
657 | 662 | def test_read_buffer(self): |
|
658 | 663 | cctx = zstd.ZstdCompressor() |
|
659 | 664 | |
|
660 |
source = b |
|
|
665 | source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) | |
|
661 | 666 | frame = cctx.compress(source) |
|
662 | 667 | |
|
663 | 668 | with cctx.stream_reader(source) as reader: |
|
664 | 669 | self.assertEqual(reader.tell(), 0) |
|
665 | 670 | |
|
666 | 671 | # We should get entire frame in one read. |
|
667 | 672 | result = reader.read(8192) |
|
668 | 673 | self.assertEqual(result, frame) |
|
669 | 674 | self.assertEqual(reader.tell(), len(result)) |
|
670 |
self.assertEqual(reader.read(), b |
|
|
675 | self.assertEqual(reader.read(), b"") | |
|
671 | 676 | self.assertEqual(reader.tell(), len(result)) |
|
672 | 677 | |
|
673 | 678 | def test_read_buffer_small_chunks(self): |
|
674 | 679 | cctx = zstd.ZstdCompressor() |
|
675 | 680 | |
|
676 |
source = b |
|
|
681 | source = b"foo" * 60 | |
|
677 | 682 | chunks = [] |
|
678 | 683 | |
|
679 | 684 | with cctx.stream_reader(source) as reader: |
|
680 | 685 | self.assertEqual(reader.tell(), 0) |
|
681 | 686 | |
|
682 | 687 | while True: |
|
683 | 688 | chunk = reader.read(1) |
|
684 | 689 | if not chunk: |
|
685 | 690 | break |
|
686 | 691 | |
|
687 | 692 | chunks.append(chunk) |
|
688 | 693 | self.assertEqual(reader.tell(), sum(map(len, chunks))) |
|
689 | 694 | |
|
690 |
self.assertEqual(b |
|
|
695 | self.assertEqual(b"".join(chunks), cctx.compress(source)) | |
|
691 | 696 | |
|
692 | 697 | def test_read_stream(self): |
|
693 | 698 | cctx = zstd.ZstdCompressor() |
|
694 | 699 | |
|
695 |
source = b |
|
|
700 | source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) | |
|
696 | 701 | frame = cctx.compress(source) |
|
697 | 702 | |
|
698 | 703 | with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader: |
|
699 | 704 | self.assertEqual(reader.tell(), 0) |
|
700 | 705 | |
|
701 | 706 | chunk = reader.read(8192) |
|
702 | 707 | self.assertEqual(chunk, frame) |
|
703 | 708 | self.assertEqual(reader.tell(), len(chunk)) |
|
704 |
self.assertEqual(reader.read(), b |
|
|
709 | self.assertEqual(reader.read(), b"") | |
|
705 | 710 | self.assertEqual(reader.tell(), len(chunk)) |
|
706 | 711 | |
|
707 | 712 | def test_read_stream_small_chunks(self): |
|
708 | 713 | cctx = zstd.ZstdCompressor() |
|
709 | 714 | |
|
710 |
source = b |
|
|
715 | source = b"foo" * 60 | |
|
711 | 716 | chunks = [] |
|
712 | 717 | |
|
713 | 718 | with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader: |
|
714 | 719 | self.assertEqual(reader.tell(), 0) |
|
715 | 720 | |
|
716 | 721 | while True: |
|
717 | 722 | chunk = reader.read(1) |
|
718 | 723 | if not chunk: |
|
719 | 724 | break |
|
720 | 725 | |
|
721 | 726 | chunks.append(chunk) |
|
722 | 727 | self.assertEqual(reader.tell(), sum(map(len, chunks))) |
|
723 | 728 | |
|
724 |
self.assertEqual(b |
|
|
729 | self.assertEqual(b"".join(chunks), cctx.compress(source)) | |
|
725 | 730 | |
|
726 | 731 | def test_read_after_exit(self): |
|
727 | 732 | cctx = zstd.ZstdCompressor() |
|
728 | 733 | |
|
729 |
with cctx.stream_reader(b |
|
|
734 | with cctx.stream_reader(b"foo" * 60) as reader: | |
|
730 | 735 | while reader.read(8192): |
|
731 | 736 | pass |
|
732 | 737 | |
|
733 |
with self.assertRaisesRegex |
|
|
738 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
734 | 739 | reader.read(10) |
|
735 | 740 | |
|
736 | 741 | def test_bad_size(self): |
|
737 | 742 | cctx = zstd.ZstdCompressor() |
|
738 | 743 | |
|
739 |
source = io.BytesIO(b |
|
|
744 | source = io.BytesIO(b"foobar") | |
|
740 | 745 | |
|
741 | 746 | with cctx.stream_reader(source, size=2) as reader: |
|
742 |
with self.assertRaisesRegex |
|
|
747 | with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): | |
|
743 | 748 | reader.read(10) |
|
744 | 749 | |
|
745 | 750 | # Try another compression operation. |
|
746 | 751 | with cctx.stream_reader(source, size=42): |
|
747 | 752 | pass |
|
748 | 753 | |
|
749 | 754 | def test_readall(self): |
|
750 | 755 | cctx = zstd.ZstdCompressor() |
|
751 |
frame = cctx.compress(b |
|
|
756 | frame = cctx.compress(b"foo" * 1024) | |
|
752 | 757 | |
|
753 |
reader = cctx.stream_reader(b |
|
|
758 | reader = cctx.stream_reader(b"foo" * 1024) | |
|
754 | 759 | self.assertEqual(reader.readall(), frame) |
|
755 | 760 | |
|
756 | 761 | def test_readinto(self): |
|
757 | 762 | cctx = zstd.ZstdCompressor() |
|
758 |
foo = cctx.compress(b |
|
|
763 | foo = cctx.compress(b"foo") | |
|
759 | 764 | |
|
760 |
reader = cctx.stream_reader(b |
|
|
765 | reader = cctx.stream_reader(b"foo") | |
|
761 | 766 | with self.assertRaises(Exception): |
|
762 |
reader.readinto(b |
|
|
767 | reader.readinto(b"foobar") | |
|
763 | 768 | |
|
764 | 769 | # readinto() with sufficiently large destination. |
|
765 | 770 | b = bytearray(1024) |
|
766 |
reader = cctx.stream_reader(b |
|
|
771 | reader = cctx.stream_reader(b"foo") | |
|
767 | 772 | self.assertEqual(reader.readinto(b), len(foo)) |
|
768 | self.assertEqual(b[0:len(foo)], foo) | |
|
773 | self.assertEqual(b[0 : len(foo)], foo) | |
|
769 | 774 | self.assertEqual(reader.readinto(b), 0) |
|
770 | self.assertEqual(b[0:len(foo)], foo) | |
|
775 | self.assertEqual(b[0 : len(foo)], foo) | |
|
771 | 776 | |
|
772 | 777 | # readinto() with small reads. |
|
773 | 778 | b = bytearray(1024) |
|
774 |
reader = cctx.stream_reader(b |
|
|
779 | reader = cctx.stream_reader(b"foo", read_size=1) | |
|
775 | 780 | self.assertEqual(reader.readinto(b), len(foo)) |
|
776 | self.assertEqual(b[0:len(foo)], foo) | |
|
781 | self.assertEqual(b[0 : len(foo)], foo) | |
|
777 | 782 | |
|
778 | 783 | # Too small destination buffer. |
|
779 | 784 | b = bytearray(2) |
|
780 |
reader = cctx.stream_reader(b |
|
|
785 | reader = cctx.stream_reader(b"foo") | |
|
781 | 786 | self.assertEqual(reader.readinto(b), 2) |
|
782 | 787 | self.assertEqual(b[:], foo[0:2]) |
|
783 | 788 | self.assertEqual(reader.readinto(b), 2) |
|
784 | 789 | self.assertEqual(b[:], foo[2:4]) |
|
785 | 790 | self.assertEqual(reader.readinto(b), 2) |
|
786 | 791 | self.assertEqual(b[:], foo[4:6]) |
|
787 | 792 | |
|
788 | 793 | def test_readinto1(self): |
|
789 | 794 | cctx = zstd.ZstdCompressor() |
|
790 |
foo = b |
|
|
795 | foo = b"".join(cctx.read_to_iter(io.BytesIO(b"foo"))) | |
|
791 | 796 | |
|
792 |
reader = cctx.stream_reader(b |
|
|
797 | reader = cctx.stream_reader(b"foo") | |
|
793 | 798 | with self.assertRaises(Exception): |
|
794 |
reader.readinto1(b |
|
|
799 | reader.readinto1(b"foobar") | |
|
795 | 800 | |
|
796 | 801 | b = bytearray(1024) |
|
797 |
source = OpCountingBytesIO(b |
|
|
802 | source = OpCountingBytesIO(b"foo") | |
|
798 | 803 | reader = cctx.stream_reader(source) |
|
799 | 804 | self.assertEqual(reader.readinto1(b), len(foo)) |
|
800 | self.assertEqual(b[0:len(foo)], foo) | |
|
805 | self.assertEqual(b[0 : len(foo)], foo) | |
|
801 | 806 | self.assertEqual(source._read_count, 2) |
|
802 | 807 | |
|
803 | 808 | # readinto1() with small reads. |
|
804 | 809 | b = bytearray(1024) |
|
805 |
source = OpCountingBytesIO(b |
|
|
810 | source = OpCountingBytesIO(b"foo") | |
|
806 | 811 | reader = cctx.stream_reader(source, read_size=1) |
|
807 | 812 | self.assertEqual(reader.readinto1(b), len(foo)) |
|
808 | self.assertEqual(b[0:len(foo)], foo) | |
|
813 | self.assertEqual(b[0 : len(foo)], foo) | |
|
809 | 814 | self.assertEqual(source._read_count, 4) |
|
810 | 815 | |
|
811 | 816 | def test_read1(self): |
|
812 | 817 | cctx = zstd.ZstdCompressor() |
|
813 |
foo = b |
|
|
818 | foo = b"".join(cctx.read_to_iter(io.BytesIO(b"foo"))) | |
|
814 | 819 | |
|
815 |
b = OpCountingBytesIO(b |
|
|
820 | b = OpCountingBytesIO(b"foo") | |
|
816 | 821 | reader = cctx.stream_reader(b) |
|
817 | 822 | |
|
818 | 823 | self.assertEqual(reader.read1(), foo) |
|
819 | 824 | self.assertEqual(b._read_count, 2) |
|
820 | 825 | |
|
821 |
b = OpCountingBytesIO(b |
|
|
826 | b = OpCountingBytesIO(b"foo") | |
|
822 | 827 | reader = cctx.stream_reader(b) |
|
823 | 828 | |
|
824 |
self.assertEqual(reader.read1(0), b |
|
|
829 | self.assertEqual(reader.read1(0), b"") | |
|
825 | 830 | self.assertEqual(reader.read1(2), foo[0:2]) |
|
826 | 831 | self.assertEqual(b._read_count, 2) |
|
827 | 832 | self.assertEqual(reader.read1(2), foo[2:4]) |
|
828 | 833 | self.assertEqual(reader.read1(1024), foo[4:]) |
|
829 | 834 | |
|
830 | 835 | |
|
831 | 836 | @make_cffi |
|
832 |
class TestCompressor_stream_writer( |
|
|
837 | class TestCompressor_stream_writer(TestCase): | |
|
833 | 838 | def test_io_api(self): |
|
834 | 839 | buffer = io.BytesIO() |
|
835 | 840 | cctx = zstd.ZstdCompressor() |
|
836 | 841 | writer = cctx.stream_writer(buffer) |
|
837 | 842 | |
|
838 | 843 | self.assertFalse(writer.isatty()) |
|
839 | 844 | self.assertFalse(writer.readable()) |
|
840 | 845 | |
|
841 | 846 | with self.assertRaises(io.UnsupportedOperation): |
|
842 | 847 | writer.readline() |
|
843 | 848 | |
|
844 | 849 | with self.assertRaises(io.UnsupportedOperation): |
|
845 | 850 | writer.readline(42) |
|
846 | 851 | |
|
847 | 852 | with self.assertRaises(io.UnsupportedOperation): |
|
848 | 853 | writer.readline(size=42) |
|
849 | 854 | |
|
850 | 855 | with self.assertRaises(io.UnsupportedOperation): |
|
851 | 856 | writer.readlines() |
|
852 | 857 | |
|
853 | 858 | with self.assertRaises(io.UnsupportedOperation): |
|
854 | 859 | writer.readlines(42) |
|
855 | 860 | |
|
856 | 861 | with self.assertRaises(io.UnsupportedOperation): |
|
857 | 862 | writer.readlines(hint=42) |
|
858 | 863 | |
|
859 | 864 | with self.assertRaises(io.UnsupportedOperation): |
|
860 | 865 | writer.seek(0) |
|
861 | 866 | |
|
862 | 867 | with self.assertRaises(io.UnsupportedOperation): |
|
863 | 868 | writer.seek(10, os.SEEK_SET) |
|
864 | 869 | |
|
865 | 870 | self.assertFalse(writer.seekable()) |
|
866 | 871 | |
|
867 | 872 | with self.assertRaises(io.UnsupportedOperation): |
|
868 | 873 | writer.truncate() |
|
869 | 874 | |
|
870 | 875 | with self.assertRaises(io.UnsupportedOperation): |
|
871 | 876 | writer.truncate(42) |
|
872 | 877 | |
|
873 | 878 | with self.assertRaises(io.UnsupportedOperation): |
|
874 | 879 | writer.truncate(size=42) |
|
875 | 880 | |
|
876 | 881 | self.assertTrue(writer.writable()) |
|
877 | 882 | |
|
878 | 883 | with self.assertRaises(NotImplementedError): |
|
879 | 884 | writer.writelines([]) |
|
880 | 885 | |
|
881 | 886 | with self.assertRaises(io.UnsupportedOperation): |
|
882 | 887 | writer.read() |
|
883 | 888 | |
|
884 | 889 | with self.assertRaises(io.UnsupportedOperation): |
|
885 | 890 | writer.read(42) |
|
886 | 891 | |
|
887 | 892 | with self.assertRaises(io.UnsupportedOperation): |
|
888 | 893 | writer.read(size=42) |
|
889 | 894 | |
|
890 | 895 | with self.assertRaises(io.UnsupportedOperation): |
|
891 | 896 | writer.readall() |
|
892 | 897 | |
|
893 | 898 | with self.assertRaises(io.UnsupportedOperation): |
|
894 | 899 | writer.readinto(None) |
|
895 | 900 | |
|
896 | 901 | with self.assertRaises(io.UnsupportedOperation): |
|
897 | 902 | writer.fileno() |
|
898 | 903 | |
|
899 | 904 | self.assertFalse(writer.closed) |
|
900 | 905 | |
|
901 | 906 | def test_fileno_file(self): |
|
902 |
with tempfile.TemporaryFile( |
|
|
907 | with tempfile.TemporaryFile("wb") as tf: | |
|
903 | 908 | cctx = zstd.ZstdCompressor() |
|
904 | 909 | writer = cctx.stream_writer(tf) |
|
905 | 910 | |
|
906 | 911 | self.assertEqual(writer.fileno(), tf.fileno()) |
|
907 | 912 | |
|
908 | 913 | def test_close(self): |
|
909 | 914 | buffer = NonClosingBytesIO() |
|
910 | 915 | cctx = zstd.ZstdCompressor(level=1) |
|
911 | 916 | writer = cctx.stream_writer(buffer) |
|
912 | 917 | |
|
913 |
writer.write(b |
|
|
918 | writer.write(b"foo" * 1024) | |
|
914 | 919 | self.assertFalse(writer.closed) |
|
915 | 920 | self.assertFalse(buffer.closed) |
|
916 | 921 | writer.close() |
|
917 | 922 | self.assertTrue(writer.closed) |
|
918 | 923 | self.assertTrue(buffer.closed) |
|
919 | 924 | |
|
920 |
with self.assertRaisesRegex |
|
|
921 |
writer.write(b |
|
|
925 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
926 | writer.write(b"foo") | |
|
922 | 927 | |
|
923 |
with self.assertRaisesRegex |
|
|
928 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
924 | 929 | writer.flush() |
|
925 | 930 | |
|
926 |
with self.assertRaisesRegex |
|
|
931 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
927 | 932 | with writer: |
|
928 | 933 | pass |
|
929 | 934 | |
|
930 |
self.assertEqual( |
|
|
931 | b'\x28\xb5\x2f\xfd\x00\x48\x55\x00\x00\x18\x66\x6f' | |
|
932 | b'\x6f\x01\x00\xfa\xd3\x77\x43') | |
|
935 | self.assertEqual( | |
|
936 | buffer.getvalue(), | |
|
937 | b"\x28\xb5\x2f\xfd\x00\x48\x55\x00\x00\x18\x66\x6f" | |
|
938 | b"\x6f\x01\x00\xfa\xd3\x77\x43", | |
|
939 | ) | |
|
933 | 940 | |
|
934 | 941 | # Context manager exit should close stream. |
|
935 | 942 | buffer = io.BytesIO() |
|
936 | 943 | writer = cctx.stream_writer(buffer) |
|
937 | 944 | |
|
938 | 945 | with writer: |
|
939 |
writer.write(b |
|
|
946 | writer.write(b"foo") | |
|
940 | 947 | |
|
941 | 948 | self.assertTrue(writer.closed) |
|
942 | 949 | |
|
943 | 950 | def test_empty(self): |
|
944 | 951 | buffer = NonClosingBytesIO() |
|
945 | 952 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
946 | 953 | with cctx.stream_writer(buffer) as compressor: |
|
947 |
compressor.write(b |
|
|
954 | compressor.write(b"") | |
|
948 | 955 | |
|
949 | 956 | result = buffer.getvalue() |
|
950 |
self.assertEqual(result, b |
|
|
957 | self.assertEqual(result, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") | |
|
951 | 958 | |
|
952 | 959 | params = zstd.get_frame_parameters(result) |
|
953 | 960 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
954 | 961 | self.assertEqual(params.window_size, 524288) |
|
955 | 962 | self.assertEqual(params.dict_id, 0) |
|
956 | 963 | self.assertFalse(params.has_checksum) |
|
957 | 964 | |
|
958 | 965 | # Test without context manager. |
|
959 | 966 | buffer = io.BytesIO() |
|
960 | 967 | compressor = cctx.stream_writer(buffer) |
|
961 |
self.assertEqual(compressor.write(b |
|
|
962 |
self.assertEqual(buffer.getvalue(), b |
|
|
968 | self.assertEqual(compressor.write(b""), 0) | |
|
969 | self.assertEqual(buffer.getvalue(), b"") | |
|
963 | 970 | self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 9) |
|
964 | 971 | result = buffer.getvalue() |
|
965 |
self.assertEqual(result, b |
|
|
972 | self.assertEqual(result, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") | |
|
966 | 973 | |
|
967 | 974 | params = zstd.get_frame_parameters(result) |
|
968 | 975 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
969 | 976 | self.assertEqual(params.window_size, 524288) |
|
970 | 977 | self.assertEqual(params.dict_id, 0) |
|
971 | 978 | self.assertFalse(params.has_checksum) |
|
972 | 979 | |
|
973 | 980 | # Test write_return_read=True |
|
974 | 981 | compressor = cctx.stream_writer(buffer, write_return_read=True) |
|
975 |
self.assertEqual(compressor.write(b |
|
|
982 | self.assertEqual(compressor.write(b""), 0) | |
|
976 | 983 | |
|
977 | 984 | def test_input_types(self): |
|
978 |
expected = b |
|
|
985 | expected = b"\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f" | |
|
979 | 986 | cctx = zstd.ZstdCompressor(level=1) |
|
980 | 987 | |
|
981 | 988 | mutable_array = bytearray(3) |
|
982 |
mutable_array[:] = b |
|
|
989 | mutable_array[:] = b"foo" | |
|
983 | 990 | |
|
984 | 991 | sources = [ |
|
985 |
memoryview(b |
|
|
986 |
bytearray(b |
|
|
992 | memoryview(b"foo"), | |
|
993 | bytearray(b"foo"), | |
|
987 | 994 | mutable_array, |
|
988 | 995 | ] |
|
989 | 996 | |
|
990 | 997 | for source in sources: |
|
991 | 998 | buffer = NonClosingBytesIO() |
|
992 | 999 | with cctx.stream_writer(buffer) as compressor: |
|
993 | 1000 | compressor.write(source) |
|
994 | 1001 | |
|
995 | 1002 | self.assertEqual(buffer.getvalue(), expected) |
|
996 | 1003 | |
|
997 | 1004 | compressor = cctx.stream_writer(buffer, write_return_read=True) |
|
998 | 1005 | self.assertEqual(compressor.write(source), len(source)) |
|
999 | 1006 | |
|
1000 | 1007 | def test_multiple_compress(self): |
|
1001 | 1008 | buffer = NonClosingBytesIO() |
|
1002 | 1009 | cctx = zstd.ZstdCompressor(level=5) |
|
1003 | 1010 | with cctx.stream_writer(buffer) as compressor: |
|
1004 |
self.assertEqual(compressor.write(b |
|
|
1005 |
self.assertEqual(compressor.write(b |
|
|
1006 |
self.assertEqual(compressor.write(b |
|
|
1011 | self.assertEqual(compressor.write(b"foo"), 0) | |
|
1012 | self.assertEqual(compressor.write(b"bar"), 0) | |
|
1013 | self.assertEqual(compressor.write(b"x" * 8192), 0) | |
|
1007 | 1014 | |
|
1008 | 1015 | result = buffer.getvalue() |
|
1009 |
self.assertEqual( |
|
|
1010 | b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f' | |
|
1011 | b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23') | |
|
1016 | self.assertEqual( | |
|
1017 | result, | |
|
1018 | b"\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f" | |
|
1019 | b"\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23", | |
|
1020 | ) | |
|
1012 | 1021 | |
|
1013 | 1022 | # Test without context manager. |
|
1014 | 1023 | buffer = io.BytesIO() |
|
1015 | 1024 | compressor = cctx.stream_writer(buffer) |
|
1016 |
self.assertEqual(compressor.write(b |
|
|
1017 |
self.assertEqual(compressor.write(b |
|
|
1018 |
self.assertEqual(compressor.write(b |
|
|
1025 | self.assertEqual(compressor.write(b"foo"), 0) | |
|
1026 | self.assertEqual(compressor.write(b"bar"), 0) | |
|
1027 | self.assertEqual(compressor.write(b"x" * 8192), 0) | |
|
1019 | 1028 | self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23) |
|
1020 | 1029 | result = buffer.getvalue() |
|
1021 |
self.assertEqual( |
|
|
1022 | b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f' | |
|
1023 | b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23') | |
|
1030 | self.assertEqual( | |
|
1031 | result, | |
|
1032 | b"\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f" | |
|
1033 | b"\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23", | |
|
1034 | ) | |
|
1024 | 1035 | |
|
1025 | 1036 | # Test with write_return_read=True. |
|
1026 | 1037 | compressor = cctx.stream_writer(buffer, write_return_read=True) |
|
1027 |
self.assertEqual(compressor.write(b |
|
|
1028 |
self.assertEqual(compressor.write(b |
|
|
1029 |
self.assertEqual(compressor.write(b |
|
|
1038 | self.assertEqual(compressor.write(b"foo"), 3) | |
|
1039 | self.assertEqual(compressor.write(b"barbiz"), 6) | |
|
1040 | self.assertEqual(compressor.write(b"x" * 8192), 8192) | |
|
1030 | 1041 | |
|
1031 | 1042 | def test_dictionary(self): |
|
1032 | 1043 | samples = [] |
|
1033 | 1044 | for i in range(128): |
|
1034 |
samples.append(b |
|
|
1035 |
samples.append(b |
|
|
1036 |
samples.append(b |
|
|
1045 | samples.append(b"foo" * 64) | |
|
1046 | samples.append(b"bar" * 64) | |
|
1047 | samples.append(b"foobar" * 64) | |
|
1037 | 1048 | |
|
1038 | 1049 | d = zstd.train_dictionary(8192, samples) |
|
1039 | 1050 | |
|
1040 | 1051 | h = hashlib.sha1(d.as_bytes()).hexdigest() |
|
1041 |
self.assertEqual(h, |
|
|
1052 | self.assertEqual(h, "7a2e59a876db958f74257141045af8f912e00d4e") | |
|
1042 | 1053 | |
|
1043 | 1054 | buffer = NonClosingBytesIO() |
|
1044 | 1055 | cctx = zstd.ZstdCompressor(level=9, dict_data=d) |
|
1045 | 1056 | with cctx.stream_writer(buffer) as compressor: |
|
1046 |
self.assertEqual(compressor.write(b |
|
|
1047 |
self.assertEqual(compressor.write(b |
|
|
1048 |
self.assertEqual(compressor.write(b |
|
|
1057 | self.assertEqual(compressor.write(b"foo"), 0) | |
|
1058 | self.assertEqual(compressor.write(b"bar"), 0) | |
|
1059 | self.assertEqual(compressor.write(b"foo" * 16384), 0) | |
|
1049 | 1060 | |
|
1050 | 1061 | compressed = buffer.getvalue() |
|
1051 | 1062 | |
|
1052 | 1063 | params = zstd.get_frame_parameters(compressed) |
|
1053 | 1064 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
1054 | 1065 | self.assertEqual(params.window_size, 2097152) |
|
1055 | 1066 | self.assertEqual(params.dict_id, d.dict_id()) |
|
1056 | 1067 | self.assertFalse(params.has_checksum) |
|
1057 | 1068 | |
|
1058 | 1069 | h = hashlib.sha1(compressed).hexdigest() |
|
1059 |
self.assertEqual(h, |
|
|
1070 | self.assertEqual(h, "0a7c05635061f58039727cdbe76388c6f4cfef06") | |
|
1060 | 1071 | |
|
1061 |
source = b |
|
|
1072 | source = b"foo" + b"bar" + (b"foo" * 16384) | |
|
1062 | 1073 | |
|
1063 | 1074 | dctx = zstd.ZstdDecompressor(dict_data=d) |
|
1064 | 1075 | |
|
1065 | self.assertEqual(dctx.decompress(compressed, max_output_size=len(source)), | |
|
1066 | source) | |
|
1076 | self.assertEqual( | |
|
1077 | dctx.decompress(compressed, max_output_size=len(source)), source | |
|
1078 | ) | |
|
1067 | 1079 | |
|
1068 | 1080 | def test_compression_params(self): |
|
1069 | 1081 | params = zstd.ZstdCompressionParameters( |
|
1070 | 1082 | window_log=20, |
|
1071 | 1083 | chain_log=6, |
|
1072 | 1084 | hash_log=12, |
|
1073 | 1085 | min_match=5, |
|
1074 | 1086 | search_log=4, |
|
1075 | 1087 | target_length=10, |
|
1076 |
strategy=zstd.STRATEGY_FAST |
|
|
1088 | strategy=zstd.STRATEGY_FAST, | |
|
1089 | ) | |
|
1077 | 1090 | |
|
1078 | 1091 | buffer = NonClosingBytesIO() |
|
1079 | 1092 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
1080 | 1093 | with cctx.stream_writer(buffer) as compressor: |
|
1081 |
self.assertEqual(compressor.write(b |
|
|
1082 |
self.assertEqual(compressor.write(b |
|
|
1083 |
self.assertEqual(compressor.write(b |
|
|
1094 | self.assertEqual(compressor.write(b"foo"), 0) | |
|
1095 | self.assertEqual(compressor.write(b"bar"), 0) | |
|
1096 | self.assertEqual(compressor.write(b"foobar" * 16384), 0) | |
|
1084 | 1097 | |
|
1085 | 1098 | compressed = buffer.getvalue() |
|
1086 | 1099 | |
|
1087 | 1100 | params = zstd.get_frame_parameters(compressed) |
|
1088 | 1101 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
1089 | 1102 | self.assertEqual(params.window_size, 1048576) |
|
1090 | 1103 | self.assertEqual(params.dict_id, 0) |
|
1091 | 1104 | self.assertFalse(params.has_checksum) |
|
1092 | 1105 | |
|
1093 | 1106 | h = hashlib.sha1(compressed).hexdigest() |
|
1094 |
self.assertEqual(h, |
|
|
1107 | self.assertEqual(h, "dd4bb7d37c1a0235b38a2f6b462814376843ef0b") | |
|
1095 | 1108 | |
|
1096 | 1109 | def test_write_checksum(self): |
|
1097 | 1110 | no_checksum = NonClosingBytesIO() |
|
1098 | 1111 | cctx = zstd.ZstdCompressor(level=1) |
|
1099 | 1112 | with cctx.stream_writer(no_checksum) as compressor: |
|
1100 |
self.assertEqual(compressor.write(b |
|
|
1113 | self.assertEqual(compressor.write(b"foobar"), 0) | |
|
1101 | 1114 | |
|
1102 | 1115 | with_checksum = NonClosingBytesIO() |
|
1103 | 1116 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) |
|
1104 | 1117 | with cctx.stream_writer(with_checksum) as compressor: |
|
1105 |
self.assertEqual(compressor.write(b |
|
|
1118 | self.assertEqual(compressor.write(b"foobar"), 0) | |
|
1106 | 1119 | |
|
1107 | 1120 | no_params = zstd.get_frame_parameters(no_checksum.getvalue()) |
|
1108 | 1121 | with_params = zstd.get_frame_parameters(with_checksum.getvalue()) |
|
1109 | 1122 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
1110 | 1123 | self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
1111 | 1124 | self.assertEqual(no_params.dict_id, 0) |
|
1112 | 1125 | self.assertEqual(with_params.dict_id, 0) |
|
1113 | 1126 | self.assertFalse(no_params.has_checksum) |
|
1114 | 1127 | self.assertTrue(with_params.has_checksum) |
|
1115 | 1128 | |
|
1116 | self.assertEqual(len(with_checksum.getvalue()), | |
|
1117 | len(no_checksum.getvalue()) + 4) | |
|
1129 | self.assertEqual(len(with_checksum.getvalue()), len(no_checksum.getvalue()) + 4) | |
|
1118 | 1130 | |
|
1119 | 1131 | def test_write_content_size(self): |
|
1120 | 1132 | no_size = NonClosingBytesIO() |
|
1121 | 1133 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
1122 | 1134 | with cctx.stream_writer(no_size) as compressor: |
|
1123 |
self.assertEqual(compressor.write(b |
|
|
1135 | self.assertEqual(compressor.write(b"foobar" * 256), 0) | |
|
1124 | 1136 | |
|
1125 | 1137 | with_size = NonClosingBytesIO() |
|
1126 | 1138 | cctx = zstd.ZstdCompressor(level=1) |
|
1127 | 1139 | with cctx.stream_writer(with_size) as compressor: |
|
1128 |
self.assertEqual(compressor.write(b |
|
|
1140 | self.assertEqual(compressor.write(b"foobar" * 256), 0) | |
|
1129 | 1141 | |
|
1130 | 1142 | # Source size is not known in streaming mode, so header not |
|
1131 | 1143 | # written. |
|
1132 | self.assertEqual(len(with_size.getvalue()), | |
|
1133 | len(no_size.getvalue())) | |
|
1144 | self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue())) | |
|
1134 | 1145 | |
|
1135 | 1146 | # Declaring size will write the header. |
|
1136 | 1147 | with_size = NonClosingBytesIO() |
|
1137 |
with cctx.stream_writer(with_size, size=len(b |
|
|
1138 |
self.assertEqual(compressor.write(b |
|
|
1148 | with cctx.stream_writer(with_size, size=len(b"foobar" * 256)) as compressor: | |
|
1149 | self.assertEqual(compressor.write(b"foobar" * 256), 0) | |
|
1139 | 1150 | |
|
1140 | 1151 | no_params = zstd.get_frame_parameters(no_size.getvalue()) |
|
1141 | 1152 | with_params = zstd.get_frame_parameters(with_size.getvalue()) |
|
1142 | 1153 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
1143 | 1154 | self.assertEqual(with_params.content_size, 1536) |
|
1144 | 1155 | self.assertEqual(no_params.dict_id, 0) |
|
1145 | 1156 | self.assertEqual(with_params.dict_id, 0) |
|
1146 | 1157 | self.assertFalse(no_params.has_checksum) |
|
1147 | 1158 | self.assertFalse(with_params.has_checksum) |
|
1148 | 1159 | |
|
1149 | self.assertEqual(len(with_size.getvalue()), | |
|
1150 | len(no_size.getvalue()) + 1) | |
|
1160 | self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()) + 1) | |
|
1151 | 1161 | |
|
1152 | 1162 | def test_no_dict_id(self): |
|
1153 | 1163 | samples = [] |
|
1154 | 1164 | for i in range(128): |
|
1155 |
samples.append(b |
|
|
1156 |
samples.append(b |
|
|
1157 |
samples.append(b |
|
|
1165 | samples.append(b"foo" * 64) | |
|
1166 | samples.append(b"bar" * 64) | |
|
1167 | samples.append(b"foobar" * 64) | |
|
1158 | 1168 | |
|
1159 | 1169 | d = zstd.train_dictionary(1024, samples) |
|
1160 | 1170 | |
|
1161 | 1171 | with_dict_id = NonClosingBytesIO() |
|
1162 | 1172 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
1163 | 1173 | with cctx.stream_writer(with_dict_id) as compressor: |
|
1164 |
self.assertEqual(compressor.write(b |
|
|
1174 | self.assertEqual(compressor.write(b"foobarfoobar"), 0) | |
|
1165 | 1175 | |
|
1166 |
self.assertEqual(with_dict_id.getvalue()[4:5], b |
|
|
1176 | self.assertEqual(with_dict_id.getvalue()[4:5], b"\x03") | |
|
1167 | 1177 | |
|
1168 | 1178 | cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False) |
|
1169 | 1179 | no_dict_id = NonClosingBytesIO() |
|
1170 | 1180 | with cctx.stream_writer(no_dict_id) as compressor: |
|
1171 |
self.assertEqual(compressor.write(b |
|
|
1181 | self.assertEqual(compressor.write(b"foobarfoobar"), 0) | |
|
1172 | 1182 | |
|
1173 |
self.assertEqual(no_dict_id.getvalue()[4:5], b |
|
|
1183 | self.assertEqual(no_dict_id.getvalue()[4:5], b"\x00") | |
|
1174 | 1184 | |
|
1175 | 1185 | no_params = zstd.get_frame_parameters(no_dict_id.getvalue()) |
|
1176 | 1186 | with_params = zstd.get_frame_parameters(with_dict_id.getvalue()) |
|
1177 | 1187 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
1178 | 1188 | self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
1179 | 1189 | self.assertEqual(no_params.dict_id, 0) |
|
1180 | 1190 | self.assertEqual(with_params.dict_id, d.dict_id()) |
|
1181 | 1191 | self.assertFalse(no_params.has_checksum) |
|
1182 | 1192 | self.assertFalse(with_params.has_checksum) |
|
1183 | 1193 | |
|
1184 | self.assertEqual(len(with_dict_id.getvalue()), | |
|
1185 | len(no_dict_id.getvalue()) + 4) | |
|
1194 | self.assertEqual(len(with_dict_id.getvalue()), len(no_dict_id.getvalue()) + 4) | |
|
1186 | 1195 | |
|
1187 | 1196 | def test_memory_size(self): |
|
1188 | 1197 | cctx = zstd.ZstdCompressor(level=3) |
|
1189 | 1198 | buffer = io.BytesIO() |
|
1190 | 1199 | with cctx.stream_writer(buffer) as compressor: |
|
1191 |
compressor.write(b |
|
|
1200 | compressor.write(b"foo") | |
|
1192 | 1201 | size = compressor.memory_size() |
|
1193 | 1202 | |
|
1194 | 1203 | self.assertGreater(size, 100000) |
|
1195 | 1204 | |
|
1196 | 1205 | def test_write_size(self): |
|
1197 | 1206 | cctx = zstd.ZstdCompressor(level=3) |
|
1198 | 1207 | dest = OpCountingBytesIO() |
|
1199 | 1208 | with cctx.stream_writer(dest, write_size=1) as compressor: |
|
1200 |
self.assertEqual(compressor.write(b |
|
|
1201 |
self.assertEqual(compressor.write(b |
|
|
1202 |
self.assertEqual(compressor.write(b |
|
|
1209 | self.assertEqual(compressor.write(b"foo"), 0) | |
|
1210 | self.assertEqual(compressor.write(b"bar"), 0) | |
|
1211 | self.assertEqual(compressor.write(b"foobar"), 0) | |
|
1203 | 1212 | |
|
1204 | 1213 | self.assertEqual(len(dest.getvalue()), dest._write_count) |
|
1205 | 1214 | |
|
1206 | 1215 | def test_flush_repeated(self): |
|
1207 | 1216 | cctx = zstd.ZstdCompressor(level=3) |
|
1208 | 1217 | dest = OpCountingBytesIO() |
|
1209 | 1218 | with cctx.stream_writer(dest) as compressor: |
|
1210 |
self.assertEqual(compressor.write(b |
|
|
1219 | self.assertEqual(compressor.write(b"foo"), 0) | |
|
1211 | 1220 | self.assertEqual(dest._write_count, 0) |
|
1212 | 1221 | self.assertEqual(compressor.flush(), 12) |
|
1213 | 1222 | self.assertEqual(dest._write_count, 1) |
|
1214 |
self.assertEqual(compressor.write(b |
|
|
1223 | self.assertEqual(compressor.write(b"bar"), 0) | |
|
1215 | 1224 | self.assertEqual(dest._write_count, 1) |
|
1216 | 1225 | self.assertEqual(compressor.flush(), 6) |
|
1217 | 1226 | self.assertEqual(dest._write_count, 2) |
|
1218 |
self.assertEqual(compressor.write(b |
|
|
1227 | self.assertEqual(compressor.write(b"baz"), 0) | |
|
1219 | 1228 | |
|
1220 | 1229 | self.assertEqual(dest._write_count, 3) |
|
1221 | 1230 | |
|
1222 | 1231 | def test_flush_empty_block(self): |
|
1223 | 1232 | cctx = zstd.ZstdCompressor(level=3, write_checksum=True) |
|
1224 | 1233 | dest = OpCountingBytesIO() |
|
1225 | 1234 | with cctx.stream_writer(dest) as compressor: |
|
1226 |
self.assertEqual(compressor.write(b |
|
|
1235 | self.assertEqual(compressor.write(b"foobar" * 8192), 0) | |
|
1227 | 1236 | count = dest._write_count |
|
1228 | 1237 | offset = dest.tell() |
|
1229 | 1238 | self.assertEqual(compressor.flush(), 23) |
|
1230 | 1239 | self.assertGreater(dest._write_count, count) |
|
1231 | 1240 | self.assertGreater(dest.tell(), offset) |
|
1232 | 1241 | offset = dest.tell() |
|
1233 | 1242 | # Ending the write here should cause an empty block to be written |
|
1234 | 1243 | # to denote end of frame. |
|
1235 | 1244 | |
|
1236 | 1245 | trailing = dest.getvalue()[offset:] |
|
1237 | 1246 | # 3 bytes block header + 4 bytes frame checksum |
|
1238 | 1247 | self.assertEqual(len(trailing), 7) |
|
1239 | 1248 | |
|
1240 | 1249 | header = trailing[0:3] |
|
1241 |
self.assertEqual(header, b |
|
|
1250 | self.assertEqual(header, b"\x01\x00\x00") | |
|
1242 | 1251 | |
|
1243 | 1252 | def test_flush_frame(self): |
|
1244 | 1253 | cctx = zstd.ZstdCompressor(level=3) |
|
1245 | 1254 | dest = OpCountingBytesIO() |
|
1246 | 1255 | |
|
1247 | 1256 | with cctx.stream_writer(dest) as compressor: |
|
1248 |
self.assertEqual(compressor.write(b |
|
|
1257 | self.assertEqual(compressor.write(b"foobar" * 8192), 0) | |
|
1249 | 1258 | self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23) |
|
1250 |
compressor.write(b |
|
|
1259 | compressor.write(b"biz" * 16384) | |
|
1251 | 1260 | |
|
1252 |
self.assertEqual( |
|
|
1253 | # Frame 1. | |
|
1254 | b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x30\x66\x6f\x6f' | |
|
1255 | b'\x62\x61\x72\x01\x00\xf7\xbf\xe8\xa5\x08' | |
|
1256 | # Frame 2. | |
|
1257 | b'\x28\xb5\x2f\xfd\x00\x58\x5d\x00\x00\x18\x62\x69\x7a' | |
|
1258 | b'\x01\x00\xfa\x3f\x75\x37\x04') | |
|
1261 | self.assertEqual( | |
|
1262 | dest.getvalue(), | |
|
1263 | # Frame 1. | |
|
1264 | b"\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x30\x66\x6f\x6f" | |
|
1265 | b"\x62\x61\x72\x01\x00\xf7\xbf\xe8\xa5\x08" | |
|
1266 | # Frame 2. | |
|
1267 | b"\x28\xb5\x2f\xfd\x00\x58\x5d\x00\x00\x18\x62\x69\x7a" | |
|
1268 | b"\x01\x00\xfa\x3f\x75\x37\x04", | |
|
1269 | ) | |
|
1259 | 1270 | |
|
1260 | 1271 | def test_bad_flush_mode(self): |
|
1261 | 1272 | cctx = zstd.ZstdCompressor() |
|
1262 | 1273 | dest = io.BytesIO() |
|
1263 | 1274 | with cctx.stream_writer(dest) as compressor: |
|
1264 |
with self.assertRaisesRegex |
|
|
1275 | with self.assertRaisesRegex(ValueError, "unknown flush_mode: 42"): | |
|
1265 | 1276 | compressor.flush(flush_mode=42) |
|
1266 | 1277 | |
|
1267 | 1278 | def test_multithreaded(self): |
|
1268 | 1279 | dest = NonClosingBytesIO() |
|
1269 | 1280 | cctx = zstd.ZstdCompressor(threads=2) |
|
1270 | 1281 | with cctx.stream_writer(dest) as compressor: |
|
1271 |
compressor.write(b |
|
|
1272 |
compressor.write(b |
|
|
1273 |
compressor.write(b |
|
|
1282 | compressor.write(b"a" * 1048576) | |
|
1283 | compressor.write(b"b" * 1048576) | |
|
1284 | compressor.write(b"c" * 1048576) | |
|
1274 | 1285 | |
|
1275 |
self.assertEqual(len(dest.getvalue()), |
|
|
1286 | self.assertEqual(len(dest.getvalue()), 111) | |
|
1276 | 1287 | |
|
1277 | 1288 | def test_tell(self): |
|
1278 | 1289 | dest = io.BytesIO() |
|
1279 | 1290 | cctx = zstd.ZstdCompressor() |
|
1280 | 1291 | with cctx.stream_writer(dest) as compressor: |
|
1281 | 1292 | self.assertEqual(compressor.tell(), 0) |
|
1282 | 1293 | |
|
1283 | 1294 | for i in range(256): |
|
1284 |
compressor.write(b |
|
|
1295 | compressor.write(b"foo" * (i + 1)) | |
|
1285 | 1296 | self.assertEqual(compressor.tell(), dest.tell()) |
|
1286 | 1297 | |
|
1287 | 1298 | def test_bad_size(self): |
|
1288 | 1299 | cctx = zstd.ZstdCompressor() |
|
1289 | 1300 | |
|
1290 | 1301 | dest = io.BytesIO() |
|
1291 | 1302 | |
|
1292 |
with self.assertRaisesRegex |
|
|
1303 | with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): | |
|
1293 | 1304 | with cctx.stream_writer(dest, size=2) as compressor: |
|
1294 |
compressor.write(b |
|
|
1305 | compressor.write(b"foo") | |
|
1295 | 1306 | |
|
1296 | 1307 | # Test another operation. |
|
1297 | 1308 | with cctx.stream_writer(dest, size=42): |
|
1298 | 1309 | pass |
|
1299 | 1310 | |
|
1300 | 1311 | def test_tarfile_compat(self): |
|
1301 | 1312 | dest = NonClosingBytesIO() |
|
1302 | 1313 | cctx = zstd.ZstdCompressor() |
|
1303 | 1314 | with cctx.stream_writer(dest) as compressor: |
|
1304 |
with tarfile.open( |
|
|
1305 |
tf.add(__file__, |
|
|
1315 | with tarfile.open("tf", mode="w|", fileobj=compressor) as tf: | |
|
1316 | tf.add(__file__, "test_compressor.py") | |
|
1306 | 1317 | |
|
1307 | 1318 | dest = io.BytesIO(dest.getvalue()) |
|
1308 | 1319 | |
|
1309 | 1320 | dctx = zstd.ZstdDecompressor() |
|
1310 | 1321 | with dctx.stream_reader(dest) as reader: |
|
1311 |
with tarfile.open(mode= |
|
|
1322 | with tarfile.open(mode="r|", fileobj=reader) as tf: | |
|
1312 | 1323 | for member in tf: |
|
1313 |
self.assertEqual(member.name, |
|
|
1324 | self.assertEqual(member.name, "test_compressor.py") | |
|
1314 | 1325 | |
|
1315 | 1326 | |
|
1316 | 1327 | @make_cffi |
|
1317 |
class TestCompressor_read_to_iter( |
|
|
1328 | class TestCompressor_read_to_iter(TestCase): | |
|
1318 | 1329 | def test_type_validation(self): |
|
1319 | 1330 | cctx = zstd.ZstdCompressor() |
|
1320 | 1331 | |
|
1321 | 1332 | # Object with read() works. |
|
1322 | 1333 | for chunk in cctx.read_to_iter(io.BytesIO()): |
|
1323 | 1334 | pass |
|
1324 | 1335 | |
|
1325 | 1336 | # Buffer protocol works. |
|
1326 |
for chunk in cctx.read_to_iter(b |
|
|
1337 | for chunk in cctx.read_to_iter(b"foobar"): | |
|
1327 | 1338 | pass |
|
1328 | 1339 | |
|
1329 |
with self.assertRaisesRegex |
|
|
1340 | with self.assertRaisesRegex(ValueError, "must pass an object with a read"): | |
|
1330 | 1341 | for chunk in cctx.read_to_iter(True): |
|
1331 | 1342 | pass |
|
1332 | 1343 | |
|
1333 | 1344 | def test_read_empty(self): |
|
1334 | 1345 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
1335 | 1346 | |
|
1336 | 1347 | source = io.BytesIO() |
|
1337 | 1348 | it = cctx.read_to_iter(source) |
|
1338 | 1349 | chunks = list(it) |
|
1339 | 1350 | self.assertEqual(len(chunks), 1) |
|
1340 |
compressed = b |
|
|
1341 |
self.assertEqual(compressed, b |
|
|
1351 | compressed = b"".join(chunks) | |
|
1352 | self.assertEqual(compressed, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") | |
|
1342 | 1353 | |
|
1343 | 1354 | # And again with the buffer protocol. |
|
1344 |
it = cctx.read_to_iter(b |
|
|
1355 | it = cctx.read_to_iter(b"") | |
|
1345 | 1356 | chunks = list(it) |
|
1346 | 1357 | self.assertEqual(len(chunks), 1) |
|
1347 |
compressed2 = b |
|
|
1358 | compressed2 = b"".join(chunks) | |
|
1348 | 1359 | self.assertEqual(compressed2, compressed) |
|
1349 | 1360 | |
|
1350 | 1361 | def test_read_large(self): |
|
1351 | 1362 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
1352 | 1363 | |
|
1353 | 1364 | source = io.BytesIO() |
|
1354 |
source.write(b |
|
|
1355 |
source.write(b |
|
|
1365 | source.write(b"f" * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE) | |
|
1366 | source.write(b"o") | |
|
1356 | 1367 | source.seek(0) |
|
1357 | 1368 | |
|
1358 | 1369 | # Creating an iterator should not perform any compression until |
|
1359 | 1370 | # first read. |
|
1360 | 1371 | it = cctx.read_to_iter(source, size=len(source.getvalue())) |
|
1361 | 1372 | self.assertEqual(source.tell(), 0) |
|
1362 | 1373 | |
|
1363 | 1374 | # We should have exactly 2 output chunks. |
|
1364 | 1375 | chunks = [] |
|
1365 | 1376 | chunk = next(it) |
|
1366 | 1377 | self.assertIsNotNone(chunk) |
|
1367 | 1378 | self.assertEqual(source.tell(), zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE) |
|
1368 | 1379 | chunks.append(chunk) |
|
1369 | 1380 | chunk = next(it) |
|
1370 | 1381 | self.assertIsNotNone(chunk) |
|
1371 | 1382 | chunks.append(chunk) |
|
1372 | 1383 | |
|
1373 | 1384 | self.assertEqual(source.tell(), len(source.getvalue())) |
|
1374 | 1385 | |
|
1375 | 1386 | with self.assertRaises(StopIteration): |
|
1376 | 1387 | next(it) |
|
1377 | 1388 | |
|
1378 | 1389 | # And again for good measure. |
|
1379 | 1390 | with self.assertRaises(StopIteration): |
|
1380 | 1391 | next(it) |
|
1381 | 1392 | |
|
1382 | 1393 | # We should get the same output as the one-shot compression mechanism. |
|
1383 |
self.assertEqual(b |
|
|
1394 | self.assertEqual(b"".join(chunks), cctx.compress(source.getvalue())) | |
|
1384 | 1395 | |
|
1385 |
params = zstd.get_frame_parameters(b |
|
|
1396 | params = zstd.get_frame_parameters(b"".join(chunks)) | |
|
1386 | 1397 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
1387 | 1398 | self.assertEqual(params.window_size, 262144) |
|
1388 | 1399 | self.assertEqual(params.dict_id, 0) |
|
1389 | 1400 | self.assertFalse(params.has_checksum) |
|
1390 | 1401 | |
|
1391 | 1402 | # Now check the buffer protocol. |
|
1392 | 1403 | it = cctx.read_to_iter(source.getvalue()) |
|
1393 | 1404 | chunks = list(it) |
|
1394 | 1405 | self.assertEqual(len(chunks), 2) |
|
1395 | 1406 | |
|
1396 |
params = zstd.get_frame_parameters(b |
|
|
1407 | params = zstd.get_frame_parameters(b"".join(chunks)) | |
|
1397 | 1408 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
1398 | #self.assertEqual(params.window_size, 262144) | |
|
1409 | # self.assertEqual(params.window_size, 262144) | |
|
1399 | 1410 | self.assertEqual(params.dict_id, 0) |
|
1400 | 1411 | self.assertFalse(params.has_checksum) |
|
1401 | 1412 | |
|
1402 |
self.assertEqual(b |
|
|
1413 | self.assertEqual(b"".join(chunks), cctx.compress(source.getvalue())) | |
|
1403 | 1414 | |
|
1404 | 1415 | def test_read_write_size(self): |
|
1405 |
source = OpCountingBytesIO(b |
|
|
1416 | source = OpCountingBytesIO(b"foobarfoobar") | |
|
1406 | 1417 | cctx = zstd.ZstdCompressor(level=3) |
|
1407 | 1418 | for chunk in cctx.read_to_iter(source, read_size=1, write_size=1): |
|
1408 | 1419 | self.assertEqual(len(chunk), 1) |
|
1409 | 1420 | |
|
1410 | 1421 | self.assertEqual(source._read_count, len(source.getvalue()) + 1) |
|
1411 | 1422 | |
|
1412 | 1423 | def test_multithreaded(self): |
|
1413 | 1424 | source = io.BytesIO() |
|
1414 |
source.write(b |
|
|
1415 |
source.write(b |
|
|
1416 |
source.write(b |
|
|
1425 | source.write(b"a" * 1048576) | |
|
1426 | source.write(b"b" * 1048576) | |
|
1427 | source.write(b"c" * 1048576) | |
|
1417 | 1428 | source.seek(0) |
|
1418 | 1429 | |
|
1419 | 1430 | cctx = zstd.ZstdCompressor(threads=2) |
|
1420 | 1431 | |
|
1421 |
compressed = b |
|
|
1422 |
self.assertEqual(len(compressed), |
|
|
1432 | compressed = b"".join(cctx.read_to_iter(source)) | |
|
1433 | self.assertEqual(len(compressed), 111) | |
|
1423 | 1434 | |
|
1424 | 1435 | def test_bad_size(self): |
|
1425 | 1436 | cctx = zstd.ZstdCompressor() |
|
1426 | 1437 | |
|
1427 |
source = io.BytesIO(b |
|
|
1438 | source = io.BytesIO(b"a" * 42) | |
|
1428 | 1439 | |
|
1429 |
with self.assertRaisesRegex |
|
|
1430 |
b |
|
|
1440 | with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): | |
|
1441 | b"".join(cctx.read_to_iter(source, size=2)) | |
|
1431 | 1442 | |
|
1432 | 1443 | # Test another operation on errored compressor. |
|
1433 |
b |
|
|
1444 | b"".join(cctx.read_to_iter(source)) | |
|
1434 | 1445 | |
|
1435 | 1446 | |
|
1436 | 1447 | @make_cffi |
|
1437 |
class TestCompressor_chunker( |
|
|
1448 | class TestCompressor_chunker(TestCase): | |
|
1438 | 1449 | def test_empty(self): |
|
1439 | 1450 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
1440 | 1451 | chunker = cctx.chunker() |
|
1441 | 1452 | |
|
1442 |
it = chunker.compress(b |
|
|
1453 | it = chunker.compress(b"") | |
|
1443 | 1454 | |
|
1444 | 1455 | with self.assertRaises(StopIteration): |
|
1445 | 1456 | next(it) |
|
1446 | 1457 | |
|
1447 | 1458 | it = chunker.finish() |
|
1448 | 1459 | |
|
1449 |
self.assertEqual(next(it), b |
|
|
1460 | self.assertEqual(next(it), b"\x28\xb5\x2f\xfd\x00\x58\x01\x00\x00") | |
|
1450 | 1461 | |
|
1451 | 1462 | with self.assertRaises(StopIteration): |
|
1452 | 1463 | next(it) |
|
1453 | 1464 | |
|
1454 | 1465 | def test_simple_input(self): |
|
1455 | 1466 | cctx = zstd.ZstdCompressor() |
|
1456 | 1467 | chunker = cctx.chunker() |
|
1457 | 1468 | |
|
1458 |
it = chunker.compress(b |
|
|
1469 | it = chunker.compress(b"foobar") | |
|
1459 | 1470 | |
|
1460 | 1471 | with self.assertRaises(StopIteration): |
|
1461 | 1472 | next(it) |
|
1462 | 1473 | |
|
1463 |
it = chunker.compress(b |
|
|
1474 | it = chunker.compress(b"baz" * 30) | |
|
1464 | 1475 | |
|
1465 | 1476 | with self.assertRaises(StopIteration): |
|
1466 | 1477 | next(it) |
|
1467 | 1478 | |
|
1468 | 1479 | it = chunker.finish() |
|
1469 | 1480 | |
|
1470 |
self.assertEqual( |
|
|
1471 | b'\x28\xb5\x2f\xfd\x00\x58\x7d\x00\x00\x48\x66\x6f' | |
|
1472 | b'\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e') | |
|
1481 | self.assertEqual( | |
|
1482 | next(it), | |
|
1483 | b"\x28\xb5\x2f\xfd\x00\x58\x7d\x00\x00\x48\x66\x6f" | |
|
1484 | b"\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e", | |
|
1485 | ) | |
|
1473 | 1486 | |
|
1474 | 1487 | with self.assertRaises(StopIteration): |
|
1475 | 1488 | next(it) |
|
1476 | 1489 | |
|
1477 | 1490 | def test_input_size(self): |
|
1478 | 1491 | cctx = zstd.ZstdCompressor() |
|
1479 | 1492 | chunker = cctx.chunker(size=1024) |
|
1480 | 1493 | |
|
1481 |
it = chunker.compress(b |
|
|
1494 | it = chunker.compress(b"x" * 1000) | |
|
1482 | 1495 | |
|
1483 | 1496 | with self.assertRaises(StopIteration): |
|
1484 | 1497 | next(it) |
|
1485 | 1498 | |
|
1486 |
it = chunker.compress(b |
|
|
1499 | it = chunker.compress(b"y" * 24) | |
|
1487 | 1500 | |
|
1488 | 1501 | with self.assertRaises(StopIteration): |
|
1489 | 1502 | next(it) |
|
1490 | 1503 | |
|
1491 | 1504 | chunks = list(chunker.finish()) |
|
1492 | 1505 | |
|
1493 |
self.assertEqual( |
|
|
1494 | b'\x28\xb5\x2f\xfd\x60\x00\x03\x65\x00\x00\x18\x78\x78\x79\x02\x00' | |
|
1495 | b'\xa0\x16\xe3\x2b\x80\x05' | |
|
1496 | ]) | |
|
1506 | self.assertEqual( | |
|
1507 | chunks, | |
|
1508 | [ | |
|
1509 | b"\x28\xb5\x2f\xfd\x60\x00\x03\x65\x00\x00\x18\x78\x78\x79\x02\x00" | |
|
1510 | b"\xa0\x16\xe3\x2b\x80\x05" | |
|
1511 | ], | |
|
1512 | ) | |
|
1497 | 1513 | |
|
1498 | 1514 | dctx = zstd.ZstdDecompressor() |
|
1499 | 1515 | |
|
1500 |
self.assertEqual(dctx.decompress(b |
|
|
1501 | (b'x' * 1000) + (b'y' * 24)) | |
|
1516 | self.assertEqual(dctx.decompress(b"".join(chunks)), (b"x" * 1000) + (b"y" * 24)) | |
|
1502 | 1517 | |
|
1503 | 1518 | def test_small_chunk_size(self): |
|
1504 | 1519 | cctx = zstd.ZstdCompressor() |
|
1505 | 1520 | chunker = cctx.chunker(chunk_size=1) |
|
1506 | 1521 | |
|
1507 |
chunks = list(chunker.compress(b |
|
|
1522 | chunks = list(chunker.compress(b"foo" * 1024)) | |
|
1508 | 1523 | self.assertEqual(chunks, []) |
|
1509 | 1524 | |
|
1510 | 1525 | chunks = list(chunker.finish()) |
|
1511 | 1526 | self.assertTrue(all(len(chunk) == 1 for chunk in chunks)) |
|
1512 | 1527 | |
|
1513 | 1528 | self.assertEqual( |
|
1514 |
b |
|
|
1515 |
b |
|
|
1516 |
b |
|
|
1529 | b"".join(chunks), | |
|
1530 | b"\x28\xb5\x2f\xfd\x00\x58\x55\x00\x00\x18\x66\x6f\x6f\x01\x00" | |
|
1531 | b"\xfa\xd3\x77\x43", | |
|
1532 | ) | |
|
1517 | 1533 | |
|
1518 | 1534 | dctx = zstd.ZstdDecompressor() |
|
1519 |
self.assertEqual( |
|
|
1520 | max_output_size=10000), | |
|
1521 | b'foo' * 1024) | |
|
1535 | self.assertEqual( | |
|
1536 | dctx.decompress(b"".join(chunks), max_output_size=10000), b"foo" * 1024 | |
|
1537 | ) | |
|
1522 | 1538 | |
|
1523 | 1539 | def test_input_types(self): |
|
1524 | 1540 | cctx = zstd.ZstdCompressor() |
|
1525 | 1541 | |
|
1526 | 1542 | mutable_array = bytearray(3) |
|
1527 |
mutable_array[:] = b |
|
|
1543 | mutable_array[:] = b"foo" | |
|
1528 | 1544 | |
|
1529 | 1545 | sources = [ |
|
1530 |
memoryview(b |
|
|
1531 |
bytearray(b |
|
|
1546 | memoryview(b"foo"), | |
|
1547 | bytearray(b"foo"), | |
|
1532 | 1548 | mutable_array, |
|
1533 | 1549 | ] |
|
1534 | 1550 | |
|
1535 | 1551 | for source in sources: |
|
1536 | 1552 | chunker = cctx.chunker() |
|
1537 | 1553 | |
|
1538 | 1554 | self.assertEqual(list(chunker.compress(source)), []) |
|
1539 |
self.assertEqual( |
|
|
1540 | b'\x28\xb5\x2f\xfd\x00\x58\x19\x00\x00\x66\x6f\x6f' | |
|
1541 | ]) | |
|
1555 | self.assertEqual( | |
|
1556 | list(chunker.finish()), | |
|
1557 | [b"\x28\xb5\x2f\xfd\x00\x58\x19\x00\x00\x66\x6f\x6f"], | |
|
1558 | ) | |
|
1542 | 1559 | |
|
1543 | 1560 | def test_flush(self): |
|
1544 | 1561 | cctx = zstd.ZstdCompressor() |
|
1545 | 1562 | chunker = cctx.chunker() |
|
1546 | 1563 | |
|
1547 |
self.assertEqual(list(chunker.compress(b |
|
|
1548 |
self.assertEqual(list(chunker.compress(b |
|
|
1564 | self.assertEqual(list(chunker.compress(b"foo" * 1024)), []) | |
|
1565 | self.assertEqual(list(chunker.compress(b"bar" * 1024)), []) | |
|
1549 | 1566 | |
|
1550 | 1567 | chunks1 = list(chunker.flush()) |
|
1551 | 1568 | |
|
1552 |
self.assertEqual( |
|
|
1553 | b'\x28\xb5\x2f\xfd\x00\x58\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72' | |
|
1554 | b'\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02' | |
|
1555 | ]) | |
|
1569 | self.assertEqual( | |
|
1570 | chunks1, | |
|
1571 | [ | |
|
1572 | b"\x28\xb5\x2f\xfd\x00\x58\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72" | |
|
1573 | b"\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02" | |
|
1574 | ], | |
|
1575 | ) | |
|
1556 | 1576 | |
|
1557 | 1577 | self.assertEqual(list(chunker.flush()), []) |
|
1558 | 1578 | self.assertEqual(list(chunker.flush()), []) |
|
1559 | 1579 | |
|
1560 |
self.assertEqual(list(chunker.compress(b |
|
|
1580 | self.assertEqual(list(chunker.compress(b"baz" * 1024)), []) | |
|
1561 | 1581 | |
|
1562 | 1582 | chunks2 = list(chunker.flush()) |
|
1563 | 1583 | self.assertEqual(len(chunks2), 1) |
|
1564 | 1584 | |
|
1565 | 1585 | chunks3 = list(chunker.finish()) |
|
1566 | 1586 | self.assertEqual(len(chunks2), 1) |
|
1567 | 1587 | |
|
1568 | 1588 | dctx = zstd.ZstdDecompressor() |
|
1569 | 1589 | |
|
1570 | self.assertEqual(dctx.decompress(b''.join(chunks1 + chunks2 + chunks3), | |
|
1571 | max_output_size=10000), | |
|
1572 | (b'foo' * 1024) + (b'bar' * 1024) + (b'baz' * 1024)) | |
|
1590 | self.assertEqual( | |
|
1591 | dctx.decompress( | |
|
1592 | b"".join(chunks1 + chunks2 + chunks3), max_output_size=10000 | |
|
1593 | ), | |
|
1594 | (b"foo" * 1024) + (b"bar" * 1024) + (b"baz" * 1024), | |
|
1595 | ) | |
|
1573 | 1596 | |
|
1574 | 1597 | def test_compress_after_finish(self): |
|
1575 | 1598 | cctx = zstd.ZstdCompressor() |
|
1576 | 1599 | chunker = cctx.chunker() |
|
1577 | 1600 | |
|
1578 |
list(chunker.compress(b |
|
|
1601 | list(chunker.compress(b"foo")) | |
|
1579 | 1602 | list(chunker.finish()) |
|
1580 | 1603 | |
|
1581 |
with self.assertRaisesRegex |
|
|
1582 | zstd.ZstdError, | |
|
1583 | r'cannot call compress\(\) after compression finished'): | |
|
1584 |
list(chunker.compress(b |
|
|
1604 | with self.assertRaisesRegex( | |
|
1605 | zstd.ZstdError, r"cannot call compress\(\) after compression finished" | |
|
1606 | ): | |
|
1607 | list(chunker.compress(b"foo")) | |
|
1585 | 1608 | |
|
1586 | 1609 | def test_flush_after_finish(self): |
|
1587 | 1610 | cctx = zstd.ZstdCompressor() |
|
1588 | 1611 | chunker = cctx.chunker() |
|
1589 | 1612 | |
|
1590 |
list(chunker.compress(b |
|
|
1613 | list(chunker.compress(b"foo")) | |
|
1591 | 1614 | list(chunker.finish()) |
|
1592 | 1615 | |
|
1593 |
with self.assertRaisesRegex |
|
|
1594 | zstd.ZstdError, | |
|
1595 | r'cannot call flush\(\) after compression finished'): | |
|
1616 | with self.assertRaisesRegex( | |
|
1617 | zstd.ZstdError, r"cannot call flush\(\) after compression finished" | |
|
1618 | ): | |
|
1596 | 1619 | list(chunker.flush()) |
|
1597 | 1620 | |
|
1598 | 1621 | def test_finish_after_finish(self): |
|
1599 | 1622 | cctx = zstd.ZstdCompressor() |
|
1600 | 1623 | chunker = cctx.chunker() |
|
1601 | 1624 | |
|
1602 |
list(chunker.compress(b |
|
|
1625 | list(chunker.compress(b"foo")) | |
|
1603 | 1626 | list(chunker.finish()) |
|
1604 | 1627 | |
|
1605 |
with self.assertRaisesRegex |
|
|
1606 | zstd.ZstdError, | |
|
1607 | r'cannot call finish\(\) after compression finished'): | |
|
1628 | with self.assertRaisesRegex( | |
|
1629 | zstd.ZstdError, r"cannot call finish\(\) after compression finished" | |
|
1630 | ): | |
|
1608 | 1631 | list(chunker.finish()) |
|
1609 | 1632 | |
|
1610 | 1633 | |
|
1611 |
class TestCompressor_multi_compress_to_buffer( |
|
|
1634 | class TestCompressor_multi_compress_to_buffer(TestCase): | |
|
1612 | 1635 | def test_invalid_inputs(self): |
|
1613 | 1636 | cctx = zstd.ZstdCompressor() |
|
1614 | 1637 | |
|
1615 |
if not hasattr(cctx, |
|
|
1616 |
self.skipTest( |
|
|
1638 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
|
1639 | self.skipTest("multi_compress_to_buffer not available") | |
|
1617 | 1640 | |
|
1618 | 1641 | with self.assertRaises(TypeError): |
|
1619 | 1642 | cctx.multi_compress_to_buffer(True) |
|
1620 | 1643 | |
|
1621 | 1644 | with self.assertRaises(TypeError): |
|
1622 | 1645 | cctx.multi_compress_to_buffer((1, 2)) |
|
1623 | 1646 | |
|
1624 |
with self.assertRaisesRegex |
|
|
1625 |
cctx.multi_compress_to_buffer([u |
|
|
1647 | with self.assertRaisesRegex(TypeError, "item 0 not a bytes like object"): | |
|
1648 | cctx.multi_compress_to_buffer([u"foo"]) | |
|
1626 | 1649 | |
|
1627 | 1650 | def test_empty_input(self): |
|
1628 | 1651 | cctx = zstd.ZstdCompressor() |
|
1629 | 1652 | |
|
1630 |
if not hasattr(cctx, |
|
|
1631 |
self.skipTest( |
|
|
1653 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
|
1654 | self.skipTest("multi_compress_to_buffer not available") | |
|
1632 | 1655 | |
|
1633 |
with self.assertRaisesRegex |
|
|
1656 | with self.assertRaisesRegex(ValueError, "no source elements found"): | |
|
1634 | 1657 | cctx.multi_compress_to_buffer([]) |
|
1635 | 1658 | |
|
1636 |
with self.assertRaisesRegex |
|
|
1637 |
cctx.multi_compress_to_buffer([b |
|
|
1659 | with self.assertRaisesRegex(ValueError, "source elements are empty"): | |
|
1660 | cctx.multi_compress_to_buffer([b"", b"", b""]) | |
|
1638 | 1661 | |
|
1639 | 1662 | def test_list_input(self): |
|
1640 | 1663 | cctx = zstd.ZstdCompressor(write_checksum=True) |
|
1641 | 1664 | |
|
1642 |
if not hasattr(cctx, |
|
|
1643 |
self.skipTest( |
|
|
1665 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
|
1666 | self.skipTest("multi_compress_to_buffer not available") | |
|
1644 | 1667 | |
|
1645 |
original = [b |
|
|
1668 | original = [b"foo" * 12, b"bar" * 6] | |
|
1646 | 1669 | frames = [cctx.compress(c) for c in original] |
|
1647 | 1670 | b = cctx.multi_compress_to_buffer(original) |
|
1648 | 1671 | |
|
1649 | 1672 | self.assertIsInstance(b, zstd.BufferWithSegmentsCollection) |
|
1650 | 1673 | |
|
1651 | 1674 | self.assertEqual(len(b), 2) |
|
1652 | 1675 | self.assertEqual(b.size(), 44) |
|
1653 | 1676 | |
|
1654 | 1677 | self.assertEqual(b[0].tobytes(), frames[0]) |
|
1655 | 1678 | self.assertEqual(b[1].tobytes(), frames[1]) |
|
1656 | 1679 | |
|
1657 | 1680 | def test_buffer_with_segments_input(self): |
|
1658 | 1681 | cctx = zstd.ZstdCompressor(write_checksum=True) |
|
1659 | 1682 | |
|
1660 |
if not hasattr(cctx, |
|
|
1661 |
self.skipTest( |
|
|
1683 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
|
1684 | self.skipTest("multi_compress_to_buffer not available") | |
|
1662 | 1685 | |
|
1663 |
original = [b |
|
|
1686 | original = [b"foo" * 4, b"bar" * 6] | |
|
1664 | 1687 | frames = [cctx.compress(c) for c in original] |
|
1665 | 1688 | |
|
1666 |
offsets = struct.pack( |
|
|
1667 |
|
|
|
1668 | segments = zstd.BufferWithSegments(b''.join(original), offsets) | |
|
1689 | offsets = struct.pack( | |
|
1690 | "=QQQQ", 0, len(original[0]), len(original[0]), len(original[1]) | |
|
1691 | ) | |
|
1692 | segments = zstd.BufferWithSegments(b"".join(original), offsets) | |
|
1669 | 1693 | |
|
1670 | 1694 | result = cctx.multi_compress_to_buffer(segments) |
|
1671 | 1695 | |
|
1672 | 1696 | self.assertEqual(len(result), 2) |
|
1673 | 1697 | self.assertEqual(result.size(), 47) |
|
1674 | 1698 | |
|
1675 | 1699 | self.assertEqual(result[0].tobytes(), frames[0]) |
|
1676 | 1700 | self.assertEqual(result[1].tobytes(), frames[1]) |
|
1677 | 1701 | |
|
1678 | 1702 | def test_buffer_with_segments_collection_input(self): |
|
1679 | 1703 | cctx = zstd.ZstdCompressor(write_checksum=True) |
|
1680 | 1704 | |
|
1681 |
if not hasattr(cctx, |
|
|
1682 |
self.skipTest( |
|
|
1705 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
|
1706 | self.skipTest("multi_compress_to_buffer not available") | |
|
1683 | 1707 | |
|
1684 | 1708 | original = [ |
|
1685 |
b |
|
|
1686 |
b |
|
|
1687 |
b |
|
|
1688 |
b |
|
|
1689 |
b |
|
|
1709 | b"foo1", | |
|
1710 | b"foo2" * 2, | |
|
1711 | b"foo3" * 3, | |
|
1712 | b"foo4" * 4, | |
|
1713 | b"foo5" * 5, | |
|
1690 | 1714 | ] |
|
1691 | 1715 | |
|
1692 | 1716 | frames = [cctx.compress(c) for c in original] |
|
1693 | 1717 | |
|
1694 |
b = b |
|
|
1695 |
b1 = zstd.BufferWithSegments( |
|
|
1696 | 0, len(original[0]), | |
|
1697 | len(original[0]), len(original[1]))) | |
|
1698 |
|
|
|
1699 | b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ', | |
|
1700 | 0, len(original[2]), | |
|
1701 | len(original[2]), len(original[3]), | |
|
1702 | len(original[2]) + len(original[3]), len(original[4]))) | |
|
1718 | b = b"".join([original[0], original[1]]) | |
|
1719 | b1 = zstd.BufferWithSegments( | |
|
1720 | b, | |
|
1721 | struct.pack( | |
|
1722 | "=QQQQ", 0, len(original[0]), len(original[0]), len(original[1]) | |
|
1723 | ), | |
|
1724 | ) | |
|
1725 | b = b"".join([original[2], original[3], original[4]]) | |
|
1726 | b2 = zstd.BufferWithSegments( | |
|
1727 | b, | |
|
1728 | struct.pack( | |
|
1729 | "=QQQQQQ", | |
|
1730 | 0, | |
|
1731 | len(original[2]), | |
|
1732 | len(original[2]), | |
|
1733 | len(original[3]), | |
|
1734 | len(original[2]) + len(original[3]), | |
|
1735 | len(original[4]), | |
|
1736 | ), | |
|
1737 | ) | |
|
1703 | 1738 | |
|
1704 | 1739 | c = zstd.BufferWithSegmentsCollection(b1, b2) |
|
1705 | 1740 | |
|
1706 | 1741 | result = cctx.multi_compress_to_buffer(c) |
|
1707 | 1742 | |
|
1708 | 1743 | self.assertEqual(len(result), len(frames)) |
|
1709 | 1744 | |
|
1710 | 1745 | for i, frame in enumerate(frames): |
|
1711 | 1746 | self.assertEqual(result[i].tobytes(), frame) |
|
1712 | 1747 | |
|
1713 | 1748 | def test_multiple_threads(self): |
|
1714 | 1749 | # threads argument will cause multi-threaded ZSTD APIs to be used, which will |
|
1715 | 1750 | # make output different. |
|
1716 | 1751 | refcctx = zstd.ZstdCompressor(write_checksum=True) |
|
1717 |
reference = [refcctx.compress(b |
|
|
1752 | reference = [refcctx.compress(b"x" * 64), refcctx.compress(b"y" * 64)] | |
|
1718 | 1753 | |
|
1719 | 1754 | cctx = zstd.ZstdCompressor(write_checksum=True) |
|
1720 | 1755 | |
|
1721 |
if not hasattr(cctx, |
|
|
1722 |
self.skipTest( |
|
|
1756 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
|
1757 | self.skipTest("multi_compress_to_buffer not available") | |
|
1723 | 1758 | |
|
1724 | 1759 | frames = [] |
|
1725 |
frames.extend(b |
|
|
1726 |
frames.extend(b |
|
|
1760 | frames.extend(b"x" * 64 for i in range(256)) | |
|
1761 | frames.extend(b"y" * 64 for i in range(256)) | |
|
1727 | 1762 | |
|
1728 | 1763 | result = cctx.multi_compress_to_buffer(frames, threads=-1) |
|
1729 | 1764 | |
|
1730 | 1765 | self.assertEqual(len(result), 512) |
|
1731 | 1766 | for i in range(512): |
|
1732 | 1767 | if i < 256: |
|
1733 | 1768 | self.assertEqual(result[i].tobytes(), reference[0]) |
|
1734 | 1769 | else: |
|
1735 | 1770 | self.assertEqual(result[i].tobytes(), reference[1]) |
This diff has been collapsed as it changes many lines, (631 lines changed) Show them Hide them | |||
@@ -1,711 +1,836 b'' | |||
|
1 | 1 | import io |
|
2 | 2 | import os |
|
3 | 3 | import unittest |
|
4 | 4 | |
|
5 | 5 | try: |
|
6 | 6 | import hypothesis |
|
7 | 7 | import hypothesis.strategies as strategies |
|
8 | 8 | except ImportError: |
|
9 |
raise unittest.SkipTest( |
|
|
9 | raise unittest.SkipTest("hypothesis not available") | |
|
10 | 10 | |
|
11 | 11 | import zstandard as zstd |
|
12 | 12 | |
|
13 |
from . |
|
|
13 | from .common import ( | |
|
14 | 14 | make_cffi, |
|
15 | 15 | NonClosingBytesIO, |
|
16 | 16 | random_input_data, |
|
17 | TestCase, | |
|
17 | 18 | ) |
|
18 | 19 | |
|
19 | 20 | |
|
20 |
@unittest.skipUnless( |
|
|
21 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
21 | 22 | @make_cffi |
|
22 |
class TestCompressor_stream_reader_fuzzing( |
|
|
23 | class TestCompressor_stream_reader_fuzzing(TestCase): | |
|
23 | 24 | @hypothesis.settings( |
|
24 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
|
25 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
26 | level=strategies.integers(min_value=1, max_value=5), | |
|
27 | source_read_size=strategies.integers(1, 16384), | |
|
28 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) | |
|
29 | def test_stream_source_read(self, original, level, source_read_size, | |
|
30 | read_size): | |
|
25 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
|
26 | ) | |
|
27 | @hypothesis.given( | |
|
28 | original=strategies.sampled_from(random_input_data()), | |
|
29 | level=strategies.integers(min_value=1, max_value=5), | |
|
30 | source_read_size=strategies.integers(1, 16384), | |
|
31 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
32 | ) | |
|
33 | def test_stream_source_read(self, original, level, source_read_size, read_size): | |
|
31 | 34 | if read_size == 0: |
|
32 | 35 | read_size = -1 |
|
33 | 36 | |
|
34 | 37 | refctx = zstd.ZstdCompressor(level=level) |
|
35 | 38 | ref_frame = refctx.compress(original) |
|
36 | 39 | |
|
37 | 40 | cctx = zstd.ZstdCompressor(level=level) |
|
38 |
with cctx.stream_reader( |
|
|
39 | read_size=source_read_size) as reader: | |
|
41 | with cctx.stream_reader( | |
|
42 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
43 | ) as reader: | |
|
40 | 44 | chunks = [] |
|
41 | 45 | while True: |
|
42 | 46 | chunk = reader.read(read_size) |
|
43 | 47 | if not chunk: |
|
44 | 48 | break |
|
45 | 49 | |
|
46 | 50 | chunks.append(chunk) |
|
47 | 51 | |
|
48 |
self.assertEqual(b |
|
|
52 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
49 | 53 | |
|
50 | 54 | @hypothesis.settings( |
|
51 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
|
52 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
53 | level=strategies.integers(min_value=1, max_value=5), | |
|
54 | source_read_size=strategies.integers(1, 16384), | |
|
55 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) | |
|
56 | def test_buffer_source_read(self, original, level, source_read_size, | |
|
57 | read_size): | |
|
55 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
|
56 | ) | |
|
57 | @hypothesis.given( | |
|
58 | original=strategies.sampled_from(random_input_data()), | |
|
59 | level=strategies.integers(min_value=1, max_value=5), | |
|
60 | source_read_size=strategies.integers(1, 16384), | |
|
61 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
62 | ) | |
|
63 | def test_buffer_source_read(self, original, level, source_read_size, read_size): | |
|
58 | 64 | if read_size == 0: |
|
59 | 65 | read_size = -1 |
|
60 | 66 | |
|
61 | 67 | refctx = zstd.ZstdCompressor(level=level) |
|
62 | 68 | ref_frame = refctx.compress(original) |
|
63 | 69 | |
|
64 | 70 | cctx = zstd.ZstdCompressor(level=level) |
|
65 |
with cctx.stream_reader( |
|
|
66 | read_size=source_read_size) as reader: | |
|
71 | with cctx.stream_reader( | |
|
72 | original, size=len(original), read_size=source_read_size | |
|
73 | ) as reader: | |
|
67 | 74 | chunks = [] |
|
68 | 75 | while True: |
|
69 | 76 | chunk = reader.read(read_size) |
|
70 | 77 | if not chunk: |
|
71 | 78 | break |
|
72 | 79 | |
|
73 | 80 | chunks.append(chunk) |
|
74 | 81 | |
|
75 |
self.assertEqual(b |
|
|
82 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
76 | 83 | |
|
77 | 84 | @hypothesis.settings( |
|
78 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
79 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
80 | level=strategies.integers(min_value=1, max_value=5), | |
|
81 | source_read_size=strategies.integers(1, 16384), | |
|
82 | read_sizes=strategies.data()) | |
|
83 | def test_stream_source_read_variance(self, original, level, source_read_size, | |
|
84 | read_sizes): | |
|
85 | suppress_health_check=[ | |
|
86 | hypothesis.HealthCheck.large_base_example, | |
|
87 | hypothesis.HealthCheck.too_slow, | |
|
88 | ] | |
|
89 | ) | |
|
90 | @hypothesis.given( | |
|
91 | original=strategies.sampled_from(random_input_data()), | |
|
92 | level=strategies.integers(min_value=1, max_value=5), | |
|
93 | source_read_size=strategies.integers(1, 16384), | |
|
94 | read_sizes=strategies.data(), | |
|
95 | ) | |
|
96 | def test_stream_source_read_variance( | |
|
97 | self, original, level, source_read_size, read_sizes | |
|
98 | ): | |
|
85 | 99 | refctx = zstd.ZstdCompressor(level=level) |
|
86 | 100 | ref_frame = refctx.compress(original) |
|
87 | 101 | |
|
88 | 102 | cctx = zstd.ZstdCompressor(level=level) |
|
89 |
with cctx.stream_reader( |
|
|
90 | read_size=source_read_size) as reader: | |
|
103 | with cctx.stream_reader( | |
|
104 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
105 | ) as reader: | |
|
91 | 106 | chunks = [] |
|
92 | 107 | while True: |
|
93 | 108 | read_size = read_sizes.draw(strategies.integers(-1, 16384)) |
|
94 | 109 | chunk = reader.read(read_size) |
|
95 | 110 | if not chunk and read_size: |
|
96 | 111 | break |
|
97 | 112 | |
|
98 | 113 | chunks.append(chunk) |
|
99 | 114 | |
|
100 |
self.assertEqual(b |
|
|
115 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
101 | 116 | |
|
102 | 117 | @hypothesis.settings( |
|
103 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
104 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
105 | level=strategies.integers(min_value=1, max_value=5), | |
|
106 | source_read_size=strategies.integers(1, 16384), | |
|
107 | read_sizes=strategies.data()) | |
|
108 | def test_buffer_source_read_variance(self, original, level, source_read_size, | |
|
109 | read_sizes): | |
|
118 | suppress_health_check=[ | |
|
119 | hypothesis.HealthCheck.large_base_example, | |
|
120 | hypothesis.HealthCheck.too_slow, | |
|
121 | ] | |
|
122 | ) | |
|
123 | @hypothesis.given( | |
|
124 | original=strategies.sampled_from(random_input_data()), | |
|
125 | level=strategies.integers(min_value=1, max_value=5), | |
|
126 | source_read_size=strategies.integers(1, 16384), | |
|
127 | read_sizes=strategies.data(), | |
|
128 | ) | |
|
129 | def test_buffer_source_read_variance( | |
|
130 | self, original, level, source_read_size, read_sizes | |
|
131 | ): | |
|
110 | 132 | |
|
111 | 133 | refctx = zstd.ZstdCompressor(level=level) |
|
112 | 134 | ref_frame = refctx.compress(original) |
|
113 | 135 | |
|
114 | 136 | cctx = zstd.ZstdCompressor(level=level) |
|
115 |
with cctx.stream_reader( |
|
|
116 | read_size=source_read_size) as reader: | |
|
137 | with cctx.stream_reader( | |
|
138 | original, size=len(original), read_size=source_read_size | |
|
139 | ) as reader: | |
|
117 | 140 | chunks = [] |
|
118 | 141 | while True: |
|
119 | 142 | read_size = read_sizes.draw(strategies.integers(-1, 16384)) |
|
120 | 143 | chunk = reader.read(read_size) |
|
121 | 144 | if not chunk and read_size: |
|
122 | 145 | break |
|
123 | 146 | |
|
124 | 147 | chunks.append(chunk) |
|
125 | 148 | |
|
126 |
self.assertEqual(b |
|
|
149 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
127 | 150 | |
|
128 | 151 | @hypothesis.settings( |
|
129 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
|
130 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
131 | level=strategies.integers(min_value=1, max_value=5), | |
|
132 | source_read_size=strategies.integers(1, 16384), | |
|
133 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) | |
|
134 | def test_stream_source_readinto(self, original, level, | |
|
135 | source_read_size, read_size): | |
|
152 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
|
153 | ) | |
|
154 | @hypothesis.given( | |
|
155 | original=strategies.sampled_from(random_input_data()), | |
|
156 | level=strategies.integers(min_value=1, max_value=5), | |
|
157 | source_read_size=strategies.integers(1, 16384), | |
|
158 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
159 | ) | |
|
160 | def test_stream_source_readinto(self, original, level, source_read_size, read_size): | |
|
136 | 161 | refctx = zstd.ZstdCompressor(level=level) |
|
137 | 162 | ref_frame = refctx.compress(original) |
|
138 | 163 | |
|
139 | 164 | cctx = zstd.ZstdCompressor(level=level) |
|
140 |
with cctx.stream_reader( |
|
|
141 | read_size=source_read_size) as reader: | |
|
165 | with cctx.stream_reader( | |
|
166 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
167 | ) as reader: | |
|
142 | 168 | chunks = [] |
|
143 | 169 | while True: |
|
144 | 170 | b = bytearray(read_size) |
|
145 | 171 | count = reader.readinto(b) |
|
146 | 172 | |
|
147 | 173 | if not count: |
|
148 | 174 | break |
|
149 | 175 | |
|
150 | 176 | chunks.append(bytes(b[0:count])) |
|
151 | 177 | |
|
152 |
self.assertEqual(b |
|
|
178 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
153 | 179 | |
|
154 | 180 | @hypothesis.settings( |
|
155 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
|
156 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
157 | level=strategies.integers(min_value=1, max_value=5), | |
|
158 | source_read_size=strategies.integers(1, 16384), | |
|
159 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) | |
|
160 | def test_buffer_source_readinto(self, original, level, | |
|
161 | source_read_size, read_size): | |
|
181 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
|
182 | ) | |
|
183 | @hypothesis.given( | |
|
184 | original=strategies.sampled_from(random_input_data()), | |
|
185 | level=strategies.integers(min_value=1, max_value=5), | |
|
186 | source_read_size=strategies.integers(1, 16384), | |
|
187 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
188 | ) | |
|
189 | def test_buffer_source_readinto(self, original, level, source_read_size, read_size): | |
|
162 | 190 | |
|
163 | 191 | refctx = zstd.ZstdCompressor(level=level) |
|
164 | 192 | ref_frame = refctx.compress(original) |
|
165 | 193 | |
|
166 | 194 | cctx = zstd.ZstdCompressor(level=level) |
|
167 |
with cctx.stream_reader( |
|
|
168 | read_size=source_read_size) as reader: | |
|
195 | with cctx.stream_reader( | |
|
196 | original, size=len(original), read_size=source_read_size | |
|
197 | ) as reader: | |
|
169 | 198 | chunks = [] |
|
170 | 199 | while True: |
|
171 | 200 | b = bytearray(read_size) |
|
172 | 201 | count = reader.readinto(b) |
|
173 | 202 | |
|
174 | 203 | if not count: |
|
175 | 204 | break |
|
176 | 205 | |
|
177 | 206 | chunks.append(bytes(b[0:count])) |
|
178 | 207 | |
|
179 |
self.assertEqual(b |
|
|
208 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
180 | 209 | |
|
181 | 210 | @hypothesis.settings( |
|
182 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
183 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
184 | level=strategies.integers(min_value=1, max_value=5), | |
|
185 | source_read_size=strategies.integers(1, 16384), | |
|
186 | read_sizes=strategies.data()) | |
|
187 | def test_stream_source_readinto_variance(self, original, level, | |
|
188 | source_read_size, read_sizes): | |
|
211 | suppress_health_check=[ | |
|
212 | hypothesis.HealthCheck.large_base_example, | |
|
213 | hypothesis.HealthCheck.too_slow, | |
|
214 | ] | |
|
215 | ) | |
|
216 | @hypothesis.given( | |
|
217 | original=strategies.sampled_from(random_input_data()), | |
|
218 | level=strategies.integers(min_value=1, max_value=5), | |
|
219 | source_read_size=strategies.integers(1, 16384), | |
|
220 | read_sizes=strategies.data(), | |
|
221 | ) | |
|
222 | def test_stream_source_readinto_variance( | |
|
223 | self, original, level, source_read_size, read_sizes | |
|
224 | ): | |
|
189 | 225 | refctx = zstd.ZstdCompressor(level=level) |
|
190 | 226 | ref_frame = refctx.compress(original) |
|
191 | 227 | |
|
192 | 228 | cctx = zstd.ZstdCompressor(level=level) |
|
193 |
with cctx.stream_reader( |
|
|
194 | read_size=source_read_size) as reader: | |
|
229 | with cctx.stream_reader( | |
|
230 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
231 | ) as reader: | |
|
195 | 232 | chunks = [] |
|
196 | 233 | while True: |
|
197 | 234 | read_size = read_sizes.draw(strategies.integers(1, 16384)) |
|
198 | 235 | b = bytearray(read_size) |
|
199 | 236 | count = reader.readinto(b) |
|
200 | 237 | |
|
201 | 238 | if not count: |
|
202 | 239 | break |
|
203 | 240 | |
|
204 | 241 | chunks.append(bytes(b[0:count])) |
|
205 | 242 | |
|
206 |
self.assertEqual(b |
|
|
243 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
207 | 244 | |
|
208 | 245 | @hypothesis.settings( |
|
209 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
210 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
211 | level=strategies.integers(min_value=1, max_value=5), | |
|
212 | source_read_size=strategies.integers(1, 16384), | |
|
213 | read_sizes=strategies.data()) | |
|
214 | def test_buffer_source_readinto_variance(self, original, level, | |
|
215 | source_read_size, read_sizes): | |
|
246 | suppress_health_check=[ | |
|
247 | hypothesis.HealthCheck.large_base_example, | |
|
248 | hypothesis.HealthCheck.too_slow, | |
|
249 | ] | |
|
250 | ) | |
|
251 | @hypothesis.given( | |
|
252 | original=strategies.sampled_from(random_input_data()), | |
|
253 | level=strategies.integers(min_value=1, max_value=5), | |
|
254 | source_read_size=strategies.integers(1, 16384), | |
|
255 | read_sizes=strategies.data(), | |
|
256 | ) | |
|
257 | def test_buffer_source_readinto_variance( | |
|
258 | self, original, level, source_read_size, read_sizes | |
|
259 | ): | |
|
216 | 260 | |
|
217 | 261 | refctx = zstd.ZstdCompressor(level=level) |
|
218 | 262 | ref_frame = refctx.compress(original) |
|
219 | 263 | |
|
220 | 264 | cctx = zstd.ZstdCompressor(level=level) |
|
221 |
with cctx.stream_reader( |
|
|
222 | read_size=source_read_size) as reader: | |
|
265 | with cctx.stream_reader( | |
|
266 | original, size=len(original), read_size=source_read_size | |
|
267 | ) as reader: | |
|
223 | 268 | chunks = [] |
|
224 | 269 | while True: |
|
225 | 270 | read_size = read_sizes.draw(strategies.integers(1, 16384)) |
|
226 | 271 | b = bytearray(read_size) |
|
227 | 272 | count = reader.readinto(b) |
|
228 | 273 | |
|
229 | 274 | if not count: |
|
230 | 275 | break |
|
231 | 276 | |
|
232 | 277 | chunks.append(bytes(b[0:count])) |
|
233 | 278 | |
|
234 |
self.assertEqual(b |
|
|
279 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
235 | 280 | |
|
236 | 281 | @hypothesis.settings( |
|
237 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
|
238 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
239 | level=strategies.integers(min_value=1, max_value=5), | |
|
240 | source_read_size=strategies.integers(1, 16384), | |
|
241 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) | |
|
242 | def test_stream_source_read1(self, original, level, source_read_size, | |
|
243 | read_size): | |
|
282 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
|
283 | ) | |
|
284 | @hypothesis.given( | |
|
285 | original=strategies.sampled_from(random_input_data()), | |
|
286 | level=strategies.integers(min_value=1, max_value=5), | |
|
287 | source_read_size=strategies.integers(1, 16384), | |
|
288 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
289 | ) | |
|
290 | def test_stream_source_read1(self, original, level, source_read_size, read_size): | |
|
244 | 291 | if read_size == 0: |
|
245 | 292 | read_size = -1 |
|
246 | 293 | |
|
247 | 294 | refctx = zstd.ZstdCompressor(level=level) |
|
248 | 295 | ref_frame = refctx.compress(original) |
|
249 | 296 | |
|
250 | 297 | cctx = zstd.ZstdCompressor(level=level) |
|
251 |
with cctx.stream_reader( |
|
|
252 | read_size=source_read_size) as reader: | |
|
298 | with cctx.stream_reader( | |
|
299 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
300 | ) as reader: | |
|
253 | 301 | chunks = [] |
|
254 | 302 | while True: |
|
255 | 303 | chunk = reader.read1(read_size) |
|
256 | 304 | if not chunk: |
|
257 | 305 | break |
|
258 | 306 | |
|
259 | 307 | chunks.append(chunk) |
|
260 | 308 | |
|
261 |
self.assertEqual(b |
|
|
309 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
262 | 310 | |
|
263 | 311 | @hypothesis.settings( |
|
264 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
|
265 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
266 | level=strategies.integers(min_value=1, max_value=5), | |
|
267 | source_read_size=strategies.integers(1, 16384), | |
|
268 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) | |
|
269 | def test_buffer_source_read1(self, original, level, source_read_size, | |
|
270 | read_size): | |
|
312 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
|
313 | ) | |
|
314 | @hypothesis.given( | |
|
315 | original=strategies.sampled_from(random_input_data()), | |
|
316 | level=strategies.integers(min_value=1, max_value=5), | |
|
317 | source_read_size=strategies.integers(1, 16384), | |
|
318 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
319 | ) | |
|
320 | def test_buffer_source_read1(self, original, level, source_read_size, read_size): | |
|
271 | 321 | if read_size == 0: |
|
272 | 322 | read_size = -1 |
|
273 | 323 | |
|
274 | 324 | refctx = zstd.ZstdCompressor(level=level) |
|
275 | 325 | ref_frame = refctx.compress(original) |
|
276 | 326 | |
|
277 | 327 | cctx = zstd.ZstdCompressor(level=level) |
|
278 |
with cctx.stream_reader( |
|
|
279 | read_size=source_read_size) as reader: | |
|
328 | with cctx.stream_reader( | |
|
329 | original, size=len(original), read_size=source_read_size | |
|
330 | ) as reader: | |
|
280 | 331 | chunks = [] |
|
281 | 332 | while True: |
|
282 | 333 | chunk = reader.read1(read_size) |
|
283 | 334 | if not chunk: |
|
284 | 335 | break |
|
285 | 336 | |
|
286 | 337 | chunks.append(chunk) |
|
287 | 338 | |
|
288 |
self.assertEqual(b |
|
|
339 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
289 | 340 | |
|
290 | 341 | @hypothesis.settings( |
|
291 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
292 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
293 | level=strategies.integers(min_value=1, max_value=5), | |
|
294 | source_read_size=strategies.integers(1, 16384), | |
|
295 | read_sizes=strategies.data()) | |
|
296 | def test_stream_source_read1_variance(self, original, level, source_read_size, | |
|
297 | read_sizes): | |
|
342 | suppress_health_check=[ | |
|
343 | hypothesis.HealthCheck.large_base_example, | |
|
344 | hypothesis.HealthCheck.too_slow, | |
|
345 | ] | |
|
346 | ) | |
|
347 | @hypothesis.given( | |
|
348 | original=strategies.sampled_from(random_input_data()), | |
|
349 | level=strategies.integers(min_value=1, max_value=5), | |
|
350 | source_read_size=strategies.integers(1, 16384), | |
|
351 | read_sizes=strategies.data(), | |
|
352 | ) | |
|
353 | def test_stream_source_read1_variance( | |
|
354 | self, original, level, source_read_size, read_sizes | |
|
355 | ): | |
|
298 | 356 | refctx = zstd.ZstdCompressor(level=level) |
|
299 | 357 | ref_frame = refctx.compress(original) |
|
300 | 358 | |
|
301 | 359 | cctx = zstd.ZstdCompressor(level=level) |
|
302 |
with cctx.stream_reader( |
|
|
303 | read_size=source_read_size) as reader: | |
|
360 | with cctx.stream_reader( | |
|
361 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
362 | ) as reader: | |
|
304 | 363 | chunks = [] |
|
305 | 364 | while True: |
|
306 | 365 | read_size = read_sizes.draw(strategies.integers(-1, 16384)) |
|
307 | 366 | chunk = reader.read1(read_size) |
|
308 | 367 | if not chunk and read_size: |
|
309 | 368 | break |
|
310 | 369 | |
|
311 | 370 | chunks.append(chunk) |
|
312 | 371 | |
|
313 |
self.assertEqual(b |
|
|
372 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
314 | 373 | |
|
315 | 374 | @hypothesis.settings( |
|
316 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
317 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
318 | level=strategies.integers(min_value=1, max_value=5), | |
|
319 | source_read_size=strategies.integers(1, 16384), | |
|
320 | read_sizes=strategies.data()) | |
|
321 | def test_buffer_source_read1_variance(self, original, level, source_read_size, | |
|
322 | read_sizes): | |
|
375 | suppress_health_check=[ | |
|
376 | hypothesis.HealthCheck.large_base_example, | |
|
377 | hypothesis.HealthCheck.too_slow, | |
|
378 | ] | |
|
379 | ) | |
|
380 | @hypothesis.given( | |
|
381 | original=strategies.sampled_from(random_input_data()), | |
|
382 | level=strategies.integers(min_value=1, max_value=5), | |
|
383 | source_read_size=strategies.integers(1, 16384), | |
|
384 | read_sizes=strategies.data(), | |
|
385 | ) | |
|
386 | def test_buffer_source_read1_variance( | |
|
387 | self, original, level, source_read_size, read_sizes | |
|
388 | ): | |
|
323 | 389 | |
|
324 | 390 | refctx = zstd.ZstdCompressor(level=level) |
|
325 | 391 | ref_frame = refctx.compress(original) |
|
326 | 392 | |
|
327 | 393 | cctx = zstd.ZstdCompressor(level=level) |
|
328 |
with cctx.stream_reader( |
|
|
329 | read_size=source_read_size) as reader: | |
|
394 | with cctx.stream_reader( | |
|
395 | original, size=len(original), read_size=source_read_size | |
|
396 | ) as reader: | |
|
330 | 397 | chunks = [] |
|
331 | 398 | while True: |
|
332 | 399 | read_size = read_sizes.draw(strategies.integers(-1, 16384)) |
|
333 | 400 | chunk = reader.read1(read_size) |
|
334 | 401 | if not chunk and read_size: |
|
335 | 402 | break |
|
336 | 403 | |
|
337 | 404 | chunks.append(chunk) |
|
338 | 405 | |
|
339 |
self.assertEqual(b |
|
|
340 | ||
|
406 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
341 | 407 | |
|
342 | 408 | @hypothesis.settings( |
|
343 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
|
344 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
345 | level=strategies.integers(min_value=1, max_value=5), | |
|
346 | source_read_size=strategies.integers(1, 16384), | |
|
347 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) | |
|
348 | def test_stream_source_readinto1(self, original, level, source_read_size, | |
|
349 | read_size): | |
|
409 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
|
410 | ) | |
|
411 | @hypothesis.given( | |
|
412 | original=strategies.sampled_from(random_input_data()), | |
|
413 | level=strategies.integers(min_value=1, max_value=5), | |
|
414 | source_read_size=strategies.integers(1, 16384), | |
|
415 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
416 | ) | |
|
417 | def test_stream_source_readinto1( | |
|
418 | self, original, level, source_read_size, read_size | |
|
419 | ): | |
|
350 | 420 | if read_size == 0: |
|
351 | 421 | read_size = -1 |
|
352 | 422 | |
|
353 | 423 | refctx = zstd.ZstdCompressor(level=level) |
|
354 | 424 | ref_frame = refctx.compress(original) |
|
355 | 425 | |
|
356 | 426 | cctx = zstd.ZstdCompressor(level=level) |
|
357 |
with cctx.stream_reader( |
|
|
358 | read_size=source_read_size) as reader: | |
|
427 | with cctx.stream_reader( | |
|
428 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
429 | ) as reader: | |
|
359 | 430 | chunks = [] |
|
360 | 431 | while True: |
|
361 | 432 | b = bytearray(read_size) |
|
362 | 433 | count = reader.readinto1(b) |
|
363 | 434 | |
|
364 | 435 | if not count: |
|
365 | 436 | break |
|
366 | 437 | |
|
367 | 438 | chunks.append(bytes(b[0:count])) |
|
368 | 439 | |
|
369 |
self.assertEqual(b |
|
|
440 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
370 | 441 | |
|
371 | 442 | @hypothesis.settings( |
|
372 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
|
373 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
374 | level=strategies.integers(min_value=1, max_value=5), | |
|
375 | source_read_size=strategies.integers(1, 16384), | |
|
376 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) | |
|
377 | def test_buffer_source_readinto1(self, original, level, source_read_size, | |
|
378 | read_size): | |
|
443 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
|
444 | ) | |
|
445 | @hypothesis.given( | |
|
446 | original=strategies.sampled_from(random_input_data()), | |
|
447 | level=strategies.integers(min_value=1, max_value=5), | |
|
448 | source_read_size=strategies.integers(1, 16384), | |
|
449 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
450 | ) | |
|
451 | def test_buffer_source_readinto1( | |
|
452 | self, original, level, source_read_size, read_size | |
|
453 | ): | |
|
379 | 454 | if read_size == 0: |
|
380 | 455 | read_size = -1 |
|
381 | 456 | |
|
382 | 457 | refctx = zstd.ZstdCompressor(level=level) |
|
383 | 458 | ref_frame = refctx.compress(original) |
|
384 | 459 | |
|
385 | 460 | cctx = zstd.ZstdCompressor(level=level) |
|
386 |
with cctx.stream_reader( |
|
|
387 | read_size=source_read_size) as reader: | |
|
461 | with cctx.stream_reader( | |
|
462 | original, size=len(original), read_size=source_read_size | |
|
463 | ) as reader: | |
|
388 | 464 | chunks = [] |
|
389 | 465 | while True: |
|
390 | 466 | b = bytearray(read_size) |
|
391 | 467 | count = reader.readinto1(b) |
|
392 | 468 | |
|
393 | 469 | if not count: |
|
394 | 470 | break |
|
395 | 471 | |
|
396 | 472 | chunks.append(bytes(b[0:count])) |
|
397 | 473 | |
|
398 |
self.assertEqual(b |
|
|
474 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
399 | 475 | |
|
400 | 476 | @hypothesis.settings( |
|
401 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
402 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
403 | level=strategies.integers(min_value=1, max_value=5), | |
|
404 | source_read_size=strategies.integers(1, 16384), | |
|
405 | read_sizes=strategies.data()) | |
|
406 | def test_stream_source_readinto1_variance(self, original, level, source_read_size, | |
|
407 | read_sizes): | |
|
477 | suppress_health_check=[ | |
|
478 | hypothesis.HealthCheck.large_base_example, | |
|
479 | hypothesis.HealthCheck.too_slow, | |
|
480 | ] | |
|
481 | ) | |
|
482 | @hypothesis.given( | |
|
483 | original=strategies.sampled_from(random_input_data()), | |
|
484 | level=strategies.integers(min_value=1, max_value=5), | |
|
485 | source_read_size=strategies.integers(1, 16384), | |
|
486 | read_sizes=strategies.data(), | |
|
487 | ) | |
|
488 | def test_stream_source_readinto1_variance( | |
|
489 | self, original, level, source_read_size, read_sizes | |
|
490 | ): | |
|
408 | 491 | refctx = zstd.ZstdCompressor(level=level) |
|
409 | 492 | ref_frame = refctx.compress(original) |
|
410 | 493 | |
|
411 | 494 | cctx = zstd.ZstdCompressor(level=level) |
|
412 |
with cctx.stream_reader( |
|
|
413 | read_size=source_read_size) as reader: | |
|
495 | with cctx.stream_reader( | |
|
496 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
497 | ) as reader: | |
|
414 | 498 | chunks = [] |
|
415 | 499 | while True: |
|
416 | 500 | read_size = read_sizes.draw(strategies.integers(1, 16384)) |
|
417 | 501 | b = bytearray(read_size) |
|
418 | 502 | count = reader.readinto1(b) |
|
419 | 503 | |
|
420 | 504 | if not count: |
|
421 | 505 | break |
|
422 | 506 | |
|
423 | 507 | chunks.append(bytes(b[0:count])) |
|
424 | 508 | |
|
425 |
self.assertEqual(b |
|
|
509 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
426 | 510 | |
|
427 | 511 | @hypothesis.settings( |
|
428 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
429 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
430 | level=strategies.integers(min_value=1, max_value=5), | |
|
431 | source_read_size=strategies.integers(1, 16384), | |
|
432 | read_sizes=strategies.data()) | |
|
433 | def test_buffer_source_readinto1_variance(self, original, level, source_read_size, | |
|
434 | read_sizes): | |
|
512 | suppress_health_check=[ | |
|
513 | hypothesis.HealthCheck.large_base_example, | |
|
514 | hypothesis.HealthCheck.too_slow, | |
|
515 | ] | |
|
516 | ) | |
|
517 | @hypothesis.given( | |
|
518 | original=strategies.sampled_from(random_input_data()), | |
|
519 | level=strategies.integers(min_value=1, max_value=5), | |
|
520 | source_read_size=strategies.integers(1, 16384), | |
|
521 | read_sizes=strategies.data(), | |
|
522 | ) | |
|
523 | def test_buffer_source_readinto1_variance( | |
|
524 | self, original, level, source_read_size, read_sizes | |
|
525 | ): | |
|
435 | 526 | |
|
436 | 527 | refctx = zstd.ZstdCompressor(level=level) |
|
437 | 528 | ref_frame = refctx.compress(original) |
|
438 | 529 | |
|
439 | 530 | cctx = zstd.ZstdCompressor(level=level) |
|
440 |
with cctx.stream_reader( |
|
|
441 | read_size=source_read_size) as reader: | |
|
531 | with cctx.stream_reader( | |
|
532 | original, size=len(original), read_size=source_read_size | |
|
533 | ) as reader: | |
|
442 | 534 | chunks = [] |
|
443 | 535 | while True: |
|
444 | 536 | read_size = read_sizes.draw(strategies.integers(1, 16384)) |
|
445 | 537 | b = bytearray(read_size) |
|
446 | 538 | count = reader.readinto1(b) |
|
447 | 539 | |
|
448 | 540 | if not count: |
|
449 | 541 | break |
|
450 | 542 | |
|
451 | 543 | chunks.append(bytes(b[0:count])) |
|
452 | 544 | |
|
453 |
self.assertEqual(b |
|
|
454 | ||
|
545 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
455 | 546 | |
|
456 | 547 | |
|
457 |
@unittest.skipUnless( |
|
|
548 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
458 | 549 | @make_cffi |
|
459 |
class TestCompressor_stream_writer_fuzzing( |
|
|
460 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
461 | level=strategies.integers(min_value=1, max_value=5), | |
|
462 |
|
|
|
550 | class TestCompressor_stream_writer_fuzzing(TestCase): | |
|
551 | @hypothesis.given( | |
|
552 | original=strategies.sampled_from(random_input_data()), | |
|
553 | level=strategies.integers(min_value=1, max_value=5), | |
|
554 | write_size=strategies.integers(min_value=1, max_value=1048576), | |
|
555 | ) | |
|
463 | 556 | def test_write_size_variance(self, original, level, write_size): |
|
464 | 557 | refctx = zstd.ZstdCompressor(level=level) |
|
465 | 558 | ref_frame = refctx.compress(original) |
|
466 | 559 | |
|
467 | 560 | cctx = zstd.ZstdCompressor(level=level) |
|
468 | 561 | b = NonClosingBytesIO() |
|
469 | with cctx.stream_writer(b, size=len(original), write_size=write_size) as compressor: | |
|
562 | with cctx.stream_writer( | |
|
563 | b, size=len(original), write_size=write_size | |
|
564 | ) as compressor: | |
|
470 | 565 | compressor.write(original) |
|
471 | 566 | |
|
472 | 567 | self.assertEqual(b.getvalue(), ref_frame) |
|
473 | 568 | |
|
474 | 569 | |
|
475 |
@unittest.skipUnless( |
|
|
570 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
476 | 571 | @make_cffi |
|
477 |
class TestCompressor_copy_stream_fuzzing( |
|
|
478 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
479 | level=strategies.integers(min_value=1, max_value=5), | |
|
480 |
|
|
|
481 |
|
|
|
572 | class TestCompressor_copy_stream_fuzzing(TestCase): | |
|
573 | @hypothesis.given( | |
|
574 | original=strategies.sampled_from(random_input_data()), | |
|
575 | level=strategies.integers(min_value=1, max_value=5), | |
|
576 | read_size=strategies.integers(min_value=1, max_value=1048576), | |
|
577 | write_size=strategies.integers(min_value=1, max_value=1048576), | |
|
578 | ) | |
|
482 | 579 | def test_read_write_size_variance(self, original, level, read_size, write_size): |
|
483 | 580 | refctx = zstd.ZstdCompressor(level=level) |
|
484 | 581 | ref_frame = refctx.compress(original) |
|
485 | 582 | |
|
486 | 583 | cctx = zstd.ZstdCompressor(level=level) |
|
487 | 584 | source = io.BytesIO(original) |
|
488 | 585 | dest = io.BytesIO() |
|
489 | 586 | |
|
490 | cctx.copy_stream(source, dest, size=len(original), read_size=read_size, | |
|
491 | write_size=write_size) | |
|
587 | cctx.copy_stream( | |
|
588 | source, dest, size=len(original), read_size=read_size, write_size=write_size | |
|
589 | ) | |
|
492 | 590 | |
|
493 | 591 | self.assertEqual(dest.getvalue(), ref_frame) |
|
494 | 592 | |
|
495 | 593 | |
|
496 |
@unittest.skipUnless( |
|
|
594 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
497 | 595 | @make_cffi |
|
498 |
class TestCompressor_compressobj_fuzzing( |
|
|
596 | class TestCompressor_compressobj_fuzzing(TestCase): | |
|
499 | 597 | @hypothesis.settings( |
|
500 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
501 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
502 | level=strategies.integers(min_value=1, max_value=5), | |
|
503 | chunk_sizes=strategies.data()) | |
|
598 | suppress_health_check=[ | |
|
599 | hypothesis.HealthCheck.large_base_example, | |
|
600 | hypothesis.HealthCheck.too_slow, | |
|
601 | ] | |
|
602 | ) | |
|
603 | @hypothesis.given( | |
|
604 | original=strategies.sampled_from(random_input_data()), | |
|
605 | level=strategies.integers(min_value=1, max_value=5), | |
|
606 | chunk_sizes=strategies.data(), | |
|
607 | ) | |
|
504 | 608 | def test_random_input_sizes(self, original, level, chunk_sizes): |
|
505 | 609 | refctx = zstd.ZstdCompressor(level=level) |
|
506 | 610 | ref_frame = refctx.compress(original) |
|
507 | 611 | |
|
508 | 612 | cctx = zstd.ZstdCompressor(level=level) |
|
509 | 613 | cobj = cctx.compressobj(size=len(original)) |
|
510 | 614 | |
|
511 | 615 | chunks = [] |
|
512 | 616 | i = 0 |
|
513 | 617 | while True: |
|
514 | 618 | chunk_size = chunk_sizes.draw(strategies.integers(1, 4096)) |
|
515 | source = original[i:i + chunk_size] | |
|
619 | source = original[i : i + chunk_size] | |
|
516 | 620 | if not source: |
|
517 | 621 | break |
|
518 | 622 | |
|
519 | 623 | chunks.append(cobj.compress(source)) |
|
520 | 624 | i += chunk_size |
|
521 | 625 | |
|
522 | 626 | chunks.append(cobj.flush()) |
|
523 | 627 | |
|
524 |
self.assertEqual(b |
|
|
628 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
525 | 629 | |
|
526 | 630 | @hypothesis.settings( |
|
527 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
528 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
529 | level=strategies.integers(min_value=1, max_value=5), | |
|
530 | chunk_sizes=strategies.data(), | |
|
531 | flushes=strategies.data()) | |
|
631 | suppress_health_check=[ | |
|
632 | hypothesis.HealthCheck.large_base_example, | |
|
633 | hypothesis.HealthCheck.too_slow, | |
|
634 | ] | |
|
635 | ) | |
|
636 | @hypothesis.given( | |
|
637 | original=strategies.sampled_from(random_input_data()), | |
|
638 | level=strategies.integers(min_value=1, max_value=5), | |
|
639 | chunk_sizes=strategies.data(), | |
|
640 | flushes=strategies.data(), | |
|
641 | ) | |
|
532 | 642 | def test_flush_block(self, original, level, chunk_sizes, flushes): |
|
533 | 643 | cctx = zstd.ZstdCompressor(level=level) |
|
534 | 644 | cobj = cctx.compressobj() |
|
535 | 645 | |
|
536 | 646 | dctx = zstd.ZstdDecompressor() |
|
537 | 647 | dobj = dctx.decompressobj() |
|
538 | 648 | |
|
539 | 649 | compressed_chunks = [] |
|
540 | 650 | decompressed_chunks = [] |
|
541 | 651 | i = 0 |
|
542 | 652 | while True: |
|
543 | 653 | input_size = chunk_sizes.draw(strategies.integers(1, 4096)) |
|
544 | source = original[i:i + input_size] | |
|
654 | source = original[i : i + input_size] | |
|
545 | 655 | if not source: |
|
546 | 656 | break |
|
547 | 657 | |
|
548 | 658 | i += input_size |
|
549 | 659 | |
|
550 | 660 | chunk = cobj.compress(source) |
|
551 | 661 | compressed_chunks.append(chunk) |
|
552 | 662 | decompressed_chunks.append(dobj.decompress(chunk)) |
|
553 | 663 | |
|
554 | 664 | if not flushes.draw(strategies.booleans()): |
|
555 | 665 | continue |
|
556 | 666 | |
|
557 | 667 | chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK) |
|
558 | 668 | compressed_chunks.append(chunk) |
|
559 | 669 | decompressed_chunks.append(dobj.decompress(chunk)) |
|
560 | 670 | |
|
561 |
self.assertEqual(b |
|
|
671 | self.assertEqual(b"".join(decompressed_chunks), original[0:i]) | |
|
562 | 672 | |
|
563 | 673 | chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_FINISH) |
|
564 | 674 | compressed_chunks.append(chunk) |
|
565 | 675 | decompressed_chunks.append(dobj.decompress(chunk)) |
|
566 | 676 | |
|
567 | self.assertEqual(dctx.decompress(b''.join(compressed_chunks), | |
|
568 | max_output_size=len(original)), | |
|
569 |
|
|
|
570 | self.assertEqual(b''.join(decompressed_chunks), original) | |
|
677 | self.assertEqual( | |
|
678 | dctx.decompress(b"".join(compressed_chunks), max_output_size=len(original)), | |
|
679 | original, | |
|
680 | ) | |
|
681 | self.assertEqual(b"".join(decompressed_chunks), original) | |
|
682 | ||
|
571 | 683 | |
|
572 |
@unittest.skipUnless( |
|
|
684 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
573 | 685 | @make_cffi |
|
574 |
class TestCompressor_read_to_iter_fuzzing( |
|
|
575 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
576 | level=strategies.integers(min_value=1, max_value=5), | |
|
577 |
|
|
|
578 |
|
|
|
686 | class TestCompressor_read_to_iter_fuzzing(TestCase): | |
|
687 | @hypothesis.given( | |
|
688 | original=strategies.sampled_from(random_input_data()), | |
|
689 | level=strategies.integers(min_value=1, max_value=5), | |
|
690 | read_size=strategies.integers(min_value=1, max_value=4096), | |
|
691 | write_size=strategies.integers(min_value=1, max_value=4096), | |
|
692 | ) | |
|
579 | 693 | def test_read_write_size_variance(self, original, level, read_size, write_size): |
|
580 | 694 | refcctx = zstd.ZstdCompressor(level=level) |
|
581 | 695 | ref_frame = refcctx.compress(original) |
|
582 | 696 | |
|
583 | 697 | source = io.BytesIO(original) |
|
584 | 698 | |
|
585 | 699 | cctx = zstd.ZstdCompressor(level=level) |
|
586 | chunks = list(cctx.read_to_iter(source, size=len(original), | |
|
587 | read_size=read_size, | |
|
588 | write_size=write_size)) | |
|
700 | chunks = list( | |
|
701 | cctx.read_to_iter( | |
|
702 | source, size=len(original), read_size=read_size, write_size=write_size | |
|
703 | ) | |
|
704 | ) | |
|
589 | 705 | |
|
590 |
self.assertEqual(b |
|
|
706 | self.assertEqual(b"".join(chunks), ref_frame) | |
|
591 | 707 | |
|
592 | 708 | |
|
593 |
@unittest.skipUnless( |
|
|
594 |
class TestCompressor_multi_compress_to_buffer_fuzzing( |
|
|
595 | @hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()), | |
|
596 | min_size=1, max_size=1024), | |
|
597 | threads=strategies.integers(min_value=1, max_value=8), | |
|
598 | use_dict=strategies.booleans()) | |
|
709 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
710 | class TestCompressor_multi_compress_to_buffer_fuzzing(TestCase): | |
|
711 | @hypothesis.given( | |
|
712 | original=strategies.lists( | |
|
713 | strategies.sampled_from(random_input_data()), min_size=1, max_size=1024 | |
|
714 | ), | |
|
715 | threads=strategies.integers(min_value=1, max_value=8), | |
|
716 | use_dict=strategies.booleans(), | |
|
717 | ) | |
|
599 | 718 | def test_data_equivalence(self, original, threads, use_dict): |
|
600 | 719 | kwargs = {} |
|
601 | 720 | |
|
602 | 721 | # Use a content dictionary because it is cheap to create. |
|
603 | 722 | if use_dict: |
|
604 |
kwargs[ |
|
|
723 | kwargs["dict_data"] = zstd.ZstdCompressionDict(original[0]) | |
|
605 | 724 | |
|
606 | cctx = zstd.ZstdCompressor(level=1, | |
|
607 | write_checksum=True, | |
|
608 | **kwargs) | |
|
725 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True, **kwargs) | |
|
609 | 726 | |
|
610 |
if not hasattr(cctx, |
|
|
611 |
self.skipTest( |
|
|
727 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
|
728 | self.skipTest("multi_compress_to_buffer not available") | |
|
612 | 729 | |
|
613 | 730 | result = cctx.multi_compress_to_buffer(original, threads=-1) |
|
614 | 731 | |
|
615 | 732 | self.assertEqual(len(result), len(original)) |
|
616 | 733 | |
|
617 | 734 | # The frame produced via the batch APIs may not be bit identical to that |
|
618 | 735 | # produced by compress() because compression parameters are adjusted |
|
619 | 736 | # from the first input in batch mode. So the only thing we can do is |
|
620 | 737 | # verify the decompressed data matches the input. |
|
621 | 738 | dctx = zstd.ZstdDecompressor(**kwargs) |
|
622 | 739 | |
|
623 | 740 | for i, frame in enumerate(result): |
|
624 | 741 | self.assertEqual(dctx.decompress(frame), original[i]) |
|
625 | 742 | |
|
626 | 743 | |
|
627 |
@unittest.skipUnless( |
|
|
744 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
628 | 745 | @make_cffi |
|
629 |
class TestCompressor_chunker_fuzzing( |
|
|
746 | class TestCompressor_chunker_fuzzing(TestCase): | |
|
630 | 747 | @hypothesis.settings( |
|
631 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
632 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
633 | level=strategies.integers(min_value=1, max_value=5), | |
|
634 | chunk_size=strategies.integers( | |
|
635 | min_value=1, | |
|
636 | max_value=32 * 1048576), | |
|
637 | input_sizes=strategies.data()) | |
|
748 | suppress_health_check=[ | |
|
749 | hypothesis.HealthCheck.large_base_example, | |
|
750 | hypothesis.HealthCheck.too_slow, | |
|
751 | ] | |
|
752 | ) | |
|
753 | @hypothesis.given( | |
|
754 | original=strategies.sampled_from(random_input_data()), | |
|
755 | level=strategies.integers(min_value=1, max_value=5), | |
|
756 | chunk_size=strategies.integers(min_value=1, max_value=32 * 1048576), | |
|
757 | input_sizes=strategies.data(), | |
|
758 | ) | |
|
638 | 759 | def test_random_input_sizes(self, original, level, chunk_size, input_sizes): |
|
639 | 760 | cctx = zstd.ZstdCompressor(level=level) |
|
640 | 761 | chunker = cctx.chunker(chunk_size=chunk_size) |
|
641 | 762 | |
|
642 | 763 | chunks = [] |
|
643 | 764 | i = 0 |
|
644 | 765 | while True: |
|
645 | 766 | input_size = input_sizes.draw(strategies.integers(1, 4096)) |
|
646 | source = original[i:i + input_size] | |
|
767 | source = original[i : i + input_size] | |
|
647 | 768 | if not source: |
|
648 | 769 | break |
|
649 | 770 | |
|
650 | 771 | chunks.extend(chunker.compress(source)) |
|
651 | 772 | i += input_size |
|
652 | 773 | |
|
653 | 774 | chunks.extend(chunker.finish()) |
|
654 | 775 | |
|
655 | 776 | dctx = zstd.ZstdDecompressor() |
|
656 | 777 | |
|
657 |
self.assertEqual( |
|
|
658 | max_output_size=len(original)), | |
|
659 | original) | |
|
778 | self.assertEqual( | |
|
779 | dctx.decompress(b"".join(chunks), max_output_size=len(original)), original | |
|
780 | ) | |
|
660 | 781 | |
|
661 | 782 | self.assertTrue(all(len(chunk) == chunk_size for chunk in chunks[:-1])) |
|
662 | 783 | |
|
663 | 784 | @hypothesis.settings( |
|
664 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
665 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
666 | level=strategies.integers(min_value=1, max_value=5), | |
|
667 | chunk_size=strategies.integers( | |
|
668 | min_value=1, | |
|
669 | max_value=32 * 1048576), | |
|
670 | input_sizes=strategies.data(), | |
|
671 | flushes=strategies.data()) | |
|
672 | def test_flush_block(self, original, level, chunk_size, input_sizes, | |
|
673 | flushes): | |
|
785 | suppress_health_check=[ | |
|
786 | hypothesis.HealthCheck.large_base_example, | |
|
787 | hypothesis.HealthCheck.too_slow, | |
|
788 | ] | |
|
789 | ) | |
|
790 | @hypothesis.given( | |
|
791 | original=strategies.sampled_from(random_input_data()), | |
|
792 | level=strategies.integers(min_value=1, max_value=5), | |
|
793 | chunk_size=strategies.integers(min_value=1, max_value=32 * 1048576), | |
|
794 | input_sizes=strategies.data(), | |
|
795 | flushes=strategies.data(), | |
|
796 | ) | |
|
797 | def test_flush_block(self, original, level, chunk_size, input_sizes, flushes): | |
|
674 | 798 | cctx = zstd.ZstdCompressor(level=level) |
|
675 | 799 | chunker = cctx.chunker(chunk_size=chunk_size) |
|
676 | 800 | |
|
677 | 801 | dctx = zstd.ZstdDecompressor() |
|
678 | 802 | dobj = dctx.decompressobj() |
|
679 | 803 | |
|
680 | 804 | compressed_chunks = [] |
|
681 | 805 | decompressed_chunks = [] |
|
682 | 806 | i = 0 |
|
683 | 807 | while True: |
|
684 | 808 | input_size = input_sizes.draw(strategies.integers(1, 4096)) |
|
685 | source = original[i:i + input_size] | |
|
809 | source = original[i : i + input_size] | |
|
686 | 810 | if not source: |
|
687 | 811 | break |
|
688 | 812 | |
|
689 | 813 | i += input_size |
|
690 | 814 | |
|
691 | 815 | chunks = list(chunker.compress(source)) |
|
692 | 816 | compressed_chunks.extend(chunks) |
|
693 |
decompressed_chunks.append(dobj.decompress(b |
|
|
817 | decompressed_chunks.append(dobj.decompress(b"".join(chunks))) | |
|
694 | 818 | |
|
695 | 819 | if not flushes.draw(strategies.booleans()): |
|
696 | 820 | continue |
|
697 | 821 | |
|
698 | 822 | chunks = list(chunker.flush()) |
|
699 | 823 | compressed_chunks.extend(chunks) |
|
700 |
decompressed_chunks.append(dobj.decompress(b |
|
|
824 | decompressed_chunks.append(dobj.decompress(b"".join(chunks))) | |
|
701 | 825 | |
|
702 |
self.assertEqual(b |
|
|
826 | self.assertEqual(b"".join(decompressed_chunks), original[0:i]) | |
|
703 | 827 | |
|
704 | 828 | chunks = list(chunker.finish()) |
|
705 | 829 | compressed_chunks.extend(chunks) |
|
706 |
decompressed_chunks.append(dobj.decompress(b |
|
|
830 | decompressed_chunks.append(dobj.decompress(b"".join(chunks))) | |
|
707 | 831 | |
|
708 | self.assertEqual(dctx.decompress(b''.join(compressed_chunks), | |
|
709 | max_output_size=len(original)), | |
|
710 |
|
|
|
711 | self.assertEqual(b''.join(decompressed_chunks), original) No newline at end of file | |
|
832 | self.assertEqual( | |
|
833 | dctx.decompress(b"".join(compressed_chunks), max_output_size=len(original)), | |
|
834 | original, | |
|
835 | ) | |
|
836 | self.assertEqual(b"".join(decompressed_chunks), original) |
@@ -1,228 +1,241 b'' | |||
|
1 | 1 | import sys |
|
2 | 2 | import unittest |
|
3 | 3 | |
|
4 | 4 | import zstandard as zstd |
|
5 | 5 | |
|
6 |
from . |
|
|
6 | from .common import ( | |
|
7 | 7 | make_cffi, |
|
8 | TestCase, | |
|
8 | 9 | ) |
|
9 | 10 | |
|
10 | 11 | |
|
11 | 12 | @make_cffi |
|
12 |
class TestCompressionParameters( |
|
|
13 | class TestCompressionParameters(TestCase): | |
|
13 | 14 | def test_bounds(self): |
|
14 |
zstd.ZstdCompressionParameters( |
|
|
15 |
|
|
|
16 |
|
|
|
17 |
|
|
|
18 | min_match=zstd.MINMATCH_MIN + 1, | |
|
19 | target_length=zstd.TARGETLENGTH_MIN, | |
|
20 | strategy=zstd.STRATEGY_FAST) | |
|
15 | zstd.ZstdCompressionParameters( | |
|
16 | window_log=zstd.WINDOWLOG_MIN, | |
|
17 | chain_log=zstd.CHAINLOG_MIN, | |
|
18 | hash_log=zstd.HASHLOG_MIN, | |
|
19 | search_log=zstd.SEARCHLOG_MIN, | |
|
20 | min_match=zstd.MINMATCH_MIN + 1, | |
|
21 | target_length=zstd.TARGETLENGTH_MIN, | |
|
22 | strategy=zstd.STRATEGY_FAST, | |
|
23 | ) | |
|
21 | 24 | |
|
22 |
zstd.ZstdCompressionParameters( |
|
|
23 |
|
|
|
24 |
|
|
|
25 |
|
|
|
26 | min_match=zstd.MINMATCH_MAX - 1, | |
|
27 | target_length=zstd.TARGETLENGTH_MAX, | |
|
28 | strategy=zstd.STRATEGY_BTULTRA2) | |
|
25 | zstd.ZstdCompressionParameters( | |
|
26 | window_log=zstd.WINDOWLOG_MAX, | |
|
27 | chain_log=zstd.CHAINLOG_MAX, | |
|
28 | hash_log=zstd.HASHLOG_MAX, | |
|
29 | search_log=zstd.SEARCHLOG_MAX, | |
|
30 | min_match=zstd.MINMATCH_MAX - 1, | |
|
31 | target_length=zstd.TARGETLENGTH_MAX, | |
|
32 | strategy=zstd.STRATEGY_BTULTRA2, | |
|
33 | ) | |
|
29 | 34 | |
|
30 | 35 | def test_from_level(self): |
|
31 | 36 | p = zstd.ZstdCompressionParameters.from_level(1) |
|
32 | 37 | self.assertIsInstance(p, zstd.CompressionParameters) |
|
33 | 38 | |
|
34 | 39 | self.assertEqual(p.window_log, 19) |
|
35 | 40 | |
|
36 | 41 | p = zstd.ZstdCompressionParameters.from_level(-4) |
|
37 | 42 | self.assertEqual(p.window_log, 19) |
|
38 | 43 | |
|
39 | 44 | def test_members(self): |
|
40 |
p = zstd.ZstdCompressionParameters( |
|
|
41 | chain_log=6, | |
|
42 | hash_log=7, | |
|
43 | search_log=4, | |
|
44 | min_match=5, | |
|
45 | target_length=8, | |
|
46 | strategy=1) | |
|
45 | p = zstd.ZstdCompressionParameters( | |
|
46 | window_log=10, | |
|
47 | chain_log=6, | |
|
48 | hash_log=7, | |
|
49 | search_log=4, | |
|
50 | min_match=5, | |
|
51 | target_length=8, | |
|
52 | strategy=1, | |
|
53 | ) | |
|
47 | 54 | self.assertEqual(p.window_log, 10) |
|
48 | 55 | self.assertEqual(p.chain_log, 6) |
|
49 | 56 | self.assertEqual(p.hash_log, 7) |
|
50 | 57 | self.assertEqual(p.search_log, 4) |
|
51 | 58 | self.assertEqual(p.min_match, 5) |
|
52 | 59 | self.assertEqual(p.target_length, 8) |
|
53 | 60 | self.assertEqual(p.compression_strategy, 1) |
|
54 | 61 | |
|
55 | 62 | p = zstd.ZstdCompressionParameters(compression_level=2) |
|
56 | 63 | self.assertEqual(p.compression_level, 2) |
|
57 | 64 | |
|
58 | 65 | p = zstd.ZstdCompressionParameters(threads=4) |
|
59 | 66 | self.assertEqual(p.threads, 4) |
|
60 | 67 | |
|
61 | p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576, | |
|
62 | overlap_log=6) | |
|
68 | p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576, overlap_log=6) | |
|
63 | 69 | self.assertEqual(p.threads, 2) |
|
64 | 70 | self.assertEqual(p.job_size, 1048576) |
|
65 | 71 | self.assertEqual(p.overlap_log, 6) |
|
66 | 72 | self.assertEqual(p.overlap_size_log, 6) |
|
67 | 73 | |
|
68 | 74 | p = zstd.ZstdCompressionParameters(compression_level=-1) |
|
69 | 75 | self.assertEqual(p.compression_level, -1) |
|
70 | 76 | |
|
71 | 77 | p = zstd.ZstdCompressionParameters(compression_level=-2) |
|
72 | 78 | self.assertEqual(p.compression_level, -2) |
|
73 | 79 | |
|
74 | 80 | p = zstd.ZstdCompressionParameters(force_max_window=True) |
|
75 | 81 | self.assertEqual(p.force_max_window, 1) |
|
76 | 82 | |
|
77 | 83 | p = zstd.ZstdCompressionParameters(enable_ldm=True) |
|
78 | 84 | self.assertEqual(p.enable_ldm, 1) |
|
79 | 85 | |
|
80 | 86 | p = zstd.ZstdCompressionParameters(ldm_hash_log=7) |
|
81 | 87 | self.assertEqual(p.ldm_hash_log, 7) |
|
82 | 88 | |
|
83 | 89 | p = zstd.ZstdCompressionParameters(ldm_min_match=6) |
|
84 | 90 | self.assertEqual(p.ldm_min_match, 6) |
|
85 | 91 | |
|
86 | 92 | p = zstd.ZstdCompressionParameters(ldm_bucket_size_log=7) |
|
87 | 93 | self.assertEqual(p.ldm_bucket_size_log, 7) |
|
88 | 94 | |
|
89 | 95 | p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8) |
|
90 | 96 | self.assertEqual(p.ldm_hash_every_log, 8) |
|
91 | 97 | self.assertEqual(p.ldm_hash_rate_log, 8) |
|
92 | 98 | |
|
93 | 99 | def test_estimated_compression_context_size(self): |
|
94 |
p = zstd.ZstdCompressionParameters( |
|
|
95 | chain_log=16, | |
|
96 | hash_log=17, | |
|
97 | search_log=1, | |
|
98 | min_match=5, | |
|
99 | target_length=16, | |
|
100 | strategy=zstd.STRATEGY_DFAST) | |
|
100 | p = zstd.ZstdCompressionParameters( | |
|
101 | window_log=20, | |
|
102 | chain_log=16, | |
|
103 | hash_log=17, | |
|
104 | search_log=1, | |
|
105 | min_match=5, | |
|
106 | target_length=16, | |
|
107 | strategy=zstd.STRATEGY_DFAST, | |
|
108 | ) | |
|
101 | 109 | |
|
102 | 110 | # 32-bit has slightly different values from 64-bit. |
|
103 | self.assertAlmostEqual(p.estimated_compression_context_size(), 1294144, | |
|
104 | delta=250) | |
|
111 | self.assertAlmostEqual( | |
|
112 | p.estimated_compression_context_size(), 1294464, delta=400 | |
|
113 | ) | |
|
105 | 114 | |
|
106 | 115 | def test_strategy(self): |
|
107 | with self.assertRaisesRegexp(ValueError, 'cannot specify both compression_strategy'): | |
|
116 | with self.assertRaisesRegex( | |
|
117 | ValueError, "cannot specify both compression_strategy" | |
|
118 | ): | |
|
108 | 119 | zstd.ZstdCompressionParameters(strategy=0, compression_strategy=0) |
|
109 | 120 | |
|
110 | 121 | p = zstd.ZstdCompressionParameters(strategy=2) |
|
111 | 122 | self.assertEqual(p.compression_strategy, 2) |
|
112 | 123 | |
|
113 | 124 | p = zstd.ZstdCompressionParameters(strategy=3) |
|
114 | 125 | self.assertEqual(p.compression_strategy, 3) |
|
115 | 126 | |
|
116 | 127 | def test_ldm_hash_rate_log(self): |
|
117 | with self.assertRaisesRegexp(ValueError, 'cannot specify both ldm_hash_rate_log'): | |
|
128 | with self.assertRaisesRegex( | |
|
129 | ValueError, "cannot specify both ldm_hash_rate_log" | |
|
130 | ): | |
|
118 | 131 | zstd.ZstdCompressionParameters(ldm_hash_rate_log=8, ldm_hash_every_log=4) |
|
119 | 132 | |
|
120 | 133 | p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8) |
|
121 | 134 | self.assertEqual(p.ldm_hash_every_log, 8) |
|
122 | 135 | |
|
123 | 136 | p = zstd.ZstdCompressionParameters(ldm_hash_every_log=16) |
|
124 | 137 | self.assertEqual(p.ldm_hash_every_log, 16) |
|
125 | 138 | |
|
126 | 139 | def test_overlap_log(self): |
|
127 |
with self.assertRaisesRegex |
|
|
140 | with self.assertRaisesRegex(ValueError, "cannot specify both overlap_log"): | |
|
128 | 141 | zstd.ZstdCompressionParameters(overlap_log=1, overlap_size_log=9) |
|
129 | 142 | |
|
130 | 143 | p = zstd.ZstdCompressionParameters(overlap_log=2) |
|
131 | 144 | self.assertEqual(p.overlap_log, 2) |
|
132 | 145 | self.assertEqual(p.overlap_size_log, 2) |
|
133 | 146 | |
|
134 | 147 | p = zstd.ZstdCompressionParameters(overlap_size_log=4) |
|
135 | 148 | self.assertEqual(p.overlap_log, 4) |
|
136 | 149 | self.assertEqual(p.overlap_size_log, 4) |
|
137 | 150 | |
|
138 | 151 | |
|
139 | 152 | @make_cffi |
|
140 |
class TestFrameParameters( |
|
|
153 | class TestFrameParameters(TestCase): | |
|
141 | 154 | def test_invalid_type(self): |
|
142 | 155 | with self.assertRaises(TypeError): |
|
143 | 156 | zstd.get_frame_parameters(None) |
|
144 | 157 | |
|
145 | 158 | # Python 3 doesn't appear to convert unicode to Py_buffer. |
|
146 | 159 | if sys.version_info[0] >= 3: |
|
147 | 160 | with self.assertRaises(TypeError): |
|
148 |
zstd.get_frame_parameters(u |
|
|
161 | zstd.get_frame_parameters(u"foobarbaz") | |
|
149 | 162 | else: |
|
150 | 163 | # CPython will convert unicode to Py_buffer. But CFFI won't. |
|
151 |
if zstd.backend == |
|
|
164 | if zstd.backend == "cffi": | |
|
152 | 165 | with self.assertRaises(TypeError): |
|
153 |
zstd.get_frame_parameters(u |
|
|
166 | zstd.get_frame_parameters(u"foobarbaz") | |
|
154 | 167 | else: |
|
155 | 168 | with self.assertRaises(zstd.ZstdError): |
|
156 |
zstd.get_frame_parameters(u |
|
|
169 | zstd.get_frame_parameters(u"foobarbaz") | |
|
157 | 170 | |
|
158 | 171 | def test_invalid_input_sizes(self): |
|
159 |
with self.assertRaisesRegex |
|
|
160 |
zstd.get_frame_parameters(b |
|
|
172 | with self.assertRaisesRegex(zstd.ZstdError, "not enough data for frame"): | |
|
173 | zstd.get_frame_parameters(b"") | |
|
161 | 174 | |
|
162 |
with self.assertRaisesRegex |
|
|
175 | with self.assertRaisesRegex(zstd.ZstdError, "not enough data for frame"): | |
|
163 | 176 | zstd.get_frame_parameters(zstd.FRAME_HEADER) |
|
164 | 177 | |
|
165 | 178 | def test_invalid_frame(self): |
|
166 |
with self.assertRaisesRegex |
|
|
167 |
zstd.get_frame_parameters(b |
|
|
179 | with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"): | |
|
180 | zstd.get_frame_parameters(b"foobarbaz") | |
|
168 | 181 | |
|
169 | 182 | def test_attributes(self): |
|
170 |
params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b |
|
|
183 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x00\x00") | |
|
171 | 184 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
172 | 185 | self.assertEqual(params.window_size, 1024) |
|
173 | 186 | self.assertEqual(params.dict_id, 0) |
|
174 | 187 | self.assertFalse(params.has_checksum) |
|
175 | 188 | |
|
176 | 189 | # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte. |
|
177 |
params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b |
|
|
190 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x01\x00\xff") | |
|
178 | 191 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
179 | 192 | self.assertEqual(params.window_size, 1024) |
|
180 | 193 | self.assertEqual(params.dict_id, 255) |
|
181 | 194 | self.assertFalse(params.has_checksum) |
|
182 | 195 | |
|
183 | 196 | # Lowest 3rd bit indicates if checksum is present. |
|
184 |
params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b |
|
|
197 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x04\x00") | |
|
185 | 198 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
186 | 199 | self.assertEqual(params.window_size, 1024) |
|
187 | 200 | self.assertEqual(params.dict_id, 0) |
|
188 | 201 | self.assertTrue(params.has_checksum) |
|
189 | 202 | |
|
190 | 203 | # Upper 2 bits indicate content size. |
|
191 |
params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b |
|
|
204 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x40\x00\xff\x00") | |
|
192 | 205 | self.assertEqual(params.content_size, 511) |
|
193 | 206 | self.assertEqual(params.window_size, 1024) |
|
194 | 207 | self.assertEqual(params.dict_id, 0) |
|
195 | 208 | self.assertFalse(params.has_checksum) |
|
196 | 209 | |
|
197 | 210 | # Window descriptor is 2nd byte after frame header. |
|
198 |
params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b |
|
|
211 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x00\x40") | |
|
199 | 212 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
200 | 213 | self.assertEqual(params.window_size, 262144) |
|
201 | 214 | self.assertEqual(params.dict_id, 0) |
|
202 | 215 | self.assertFalse(params.has_checksum) |
|
203 | 216 | |
|
204 | 217 | # Set multiple things. |
|
205 |
params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b |
|
|
218 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x45\x40\x0f\x10\x00") | |
|
206 | 219 | self.assertEqual(params.content_size, 272) |
|
207 | 220 | self.assertEqual(params.window_size, 262144) |
|
208 | 221 | self.assertEqual(params.dict_id, 15) |
|
209 | 222 | self.assertTrue(params.has_checksum) |
|
210 | 223 | |
|
211 | 224 | def test_input_types(self): |
|
212 |
v = zstd.FRAME_HEADER + b |
|
|
225 | v = zstd.FRAME_HEADER + b"\x00\x00" | |
|
213 | 226 | |
|
214 | 227 | mutable_array = bytearray(len(v)) |
|
215 | 228 | mutable_array[:] = v |
|
216 | 229 | |
|
217 | 230 | sources = [ |
|
218 | 231 | memoryview(v), |
|
219 | 232 | bytearray(v), |
|
220 | 233 | mutable_array, |
|
221 | 234 | ] |
|
222 | 235 | |
|
223 | 236 | for source in sources: |
|
224 | 237 | params = zstd.get_frame_parameters(source) |
|
225 | 238 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
226 | 239 | self.assertEqual(params.window_size, 1024) |
|
227 | 240 | self.assertEqual(params.dict_id, 0) |
|
228 | 241 | self.assertFalse(params.has_checksum) |
@@ -1,76 +1,105 b'' | |||
|
1 | 1 | import io |
|
2 | 2 | import os |
|
3 | 3 | import sys |
|
4 | 4 | import unittest |
|
5 | 5 | |
|
6 | 6 | try: |
|
7 | 7 | import hypothesis |
|
8 | 8 | import hypothesis.strategies as strategies |
|
9 | 9 | except ImportError: |
|
10 |
raise unittest.SkipTest( |
|
|
10 | raise unittest.SkipTest("hypothesis not available") | |
|
11 | 11 | |
|
12 | 12 | import zstandard as zstd |
|
13 | 13 | |
|
14 | 14 | from .common import ( |
|
15 | 15 | make_cffi, |
|
16 | TestCase, | |
|
17 | ) | |
|
18 | ||
|
19 | ||
|
20 | s_windowlog = strategies.integers( | |
|
21 | min_value=zstd.WINDOWLOG_MIN, max_value=zstd.WINDOWLOG_MAX | |
|
22 | ) | |
|
23 | s_chainlog = strategies.integers( | |
|
24 | min_value=zstd.CHAINLOG_MIN, max_value=zstd.CHAINLOG_MAX | |
|
25 | ) | |
|
26 | s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN, max_value=zstd.HASHLOG_MAX) | |
|
27 | s_searchlog = strategies.integers( | |
|
28 | min_value=zstd.SEARCHLOG_MIN, max_value=zstd.SEARCHLOG_MAX | |
|
29 | ) | |
|
30 | s_minmatch = strategies.integers( | |
|
31 | min_value=zstd.MINMATCH_MIN, max_value=zstd.MINMATCH_MAX | |
|
32 | ) | |
|
33 | s_targetlength = strategies.integers( | |
|
34 | min_value=zstd.TARGETLENGTH_MIN, max_value=zstd.TARGETLENGTH_MAX | |
|
35 | ) | |
|
36 | s_strategy = strategies.sampled_from( | |
|
37 | ( | |
|
38 | zstd.STRATEGY_FAST, | |
|
39 | zstd.STRATEGY_DFAST, | |
|
40 | zstd.STRATEGY_GREEDY, | |
|
41 | zstd.STRATEGY_LAZY, | |
|
42 | zstd.STRATEGY_LAZY2, | |
|
43 | zstd.STRATEGY_BTLAZY2, | |
|
44 | zstd.STRATEGY_BTOPT, | |
|
45 | zstd.STRATEGY_BTULTRA, | |
|
46 | zstd.STRATEGY_BTULTRA2, | |
|
47 | ) | |
|
16 | 48 | ) |
|
17 | 49 | |
|
18 | 50 | |
|
19 | s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN, | |
|
20 | max_value=zstd.WINDOWLOG_MAX) | |
|
21 | s_chainlog = strategies.integers(min_value=zstd.CHAINLOG_MIN, | |
|
22 | max_value=zstd.CHAINLOG_MAX) | |
|
23 | s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN, | |
|
24 | max_value=zstd.HASHLOG_MAX) | |
|
25 | s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN, | |
|
26 | max_value=zstd.SEARCHLOG_MAX) | |
|
27 | s_minmatch = strategies.integers(min_value=zstd.MINMATCH_MIN, | |
|
28 | max_value=zstd.MINMATCH_MAX) | |
|
29 | s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN, | |
|
30 | max_value=zstd.TARGETLENGTH_MAX) | |
|
31 | s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST, | |
|
32 | zstd.STRATEGY_DFAST, | |
|
33 | zstd.STRATEGY_GREEDY, | |
|
34 | zstd.STRATEGY_LAZY, | |
|
35 | zstd.STRATEGY_LAZY2, | |
|
36 | zstd.STRATEGY_BTLAZY2, | |
|
37 | zstd.STRATEGY_BTOPT, | |
|
38 | zstd.STRATEGY_BTULTRA, | |
|
39 | zstd.STRATEGY_BTULTRA2)) | |
|
40 | ||
|
51 | @make_cffi | |
|
52 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
53 | class TestCompressionParametersHypothesis(TestCase): | |
|
54 | @hypothesis.given( | |
|
55 | s_windowlog, | |
|
56 | s_chainlog, | |
|
57 | s_hashlog, | |
|
58 | s_searchlog, | |
|
59 | s_minmatch, | |
|
60 | s_targetlength, | |
|
61 | s_strategy, | |
|
62 | ) | |
|
63 | def test_valid_init( | |
|
64 | self, windowlog, chainlog, hashlog, searchlog, minmatch, targetlength, strategy | |
|
65 | ): | |
|
66 | zstd.ZstdCompressionParameters( | |
|
67 | window_log=windowlog, | |
|
68 | chain_log=chainlog, | |
|
69 | hash_log=hashlog, | |
|
70 | search_log=searchlog, | |
|
71 | min_match=minmatch, | |
|
72 | target_length=targetlength, | |
|
73 | strategy=strategy, | |
|
74 | ) | |
|
41 | 75 | |
|
42 | @make_cffi | |
|
43 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') | |
|
44 | class TestCompressionParametersHypothesis(unittest.TestCase): | |
|
45 | @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog, | |
|
46 | s_minmatch, s_targetlength, s_strategy) | |
|
47 | def test_valid_init(self, windowlog, chainlog, hashlog, searchlog, | |
|
48 | minmatch, targetlength, strategy): | |
|
49 | zstd.ZstdCompressionParameters(window_log=windowlog, | |
|
50 | chain_log=chainlog, | |
|
51 | hash_log=hashlog, | |
|
52 | search_log=searchlog, | |
|
53 | min_match=minmatch, | |
|
54 | target_length=targetlength, | |
|
55 | strategy=strategy) | |
|
56 | ||
|
57 | @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog, | |
|
58 | s_minmatch, s_targetlength, s_strategy) | |
|
59 | def test_estimated_compression_context_size(self, windowlog, chainlog, | |
|
60 | hashlog, searchlog, | |
|
61 | minmatch, targetlength, | |
|
62 | strategy): | |
|
63 | if minmatch == zstd.MINMATCH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY): | |
|
76 | @hypothesis.given( | |
|
77 | s_windowlog, | |
|
78 | s_chainlog, | |
|
79 | s_hashlog, | |
|
80 | s_searchlog, | |
|
81 | s_minmatch, | |
|
82 | s_targetlength, | |
|
83 | s_strategy, | |
|
84 | ) | |
|
85 | def test_estimated_compression_context_size( | |
|
86 | self, windowlog, chainlog, hashlog, searchlog, minmatch, targetlength, strategy | |
|
87 | ): | |
|
88 | if minmatch == zstd.MINMATCH_MIN and strategy in ( | |
|
89 | zstd.STRATEGY_FAST, | |
|
90 | zstd.STRATEGY_GREEDY, | |
|
91 | ): | |
|
64 | 92 | minmatch += 1 |
|
65 | 93 | elif minmatch == zstd.MINMATCH_MAX and strategy != zstd.STRATEGY_FAST: |
|
66 | 94 | minmatch -= 1 |
|
67 | 95 | |
|
68 |
p = zstd.ZstdCompressionParameters( |
|
|
69 | chain_log=chainlog, | |
|
70 | hash_log=hashlog, | |
|
71 | search_log=searchlog, | |
|
72 | min_match=minmatch, | |
|
73 | target_length=targetlength, | |
|
74 | strategy=strategy) | |
|
96 | p = zstd.ZstdCompressionParameters( | |
|
97 | window_log=windowlog, | |
|
98 | chain_log=chainlog, | |
|
99 | hash_log=hashlog, | |
|
100 | search_log=searchlog, | |
|
101 | min_match=minmatch, | |
|
102 | target_length=targetlength, | |
|
103 | strategy=strategy, | |
|
104 | ) | |
|
75 | 105 | size = p.estimated_compression_context_size() |
|
76 |
This diff has been collapsed as it changes many lines, (729 lines changed) Show them Hide them | |||
@@ -1,1611 +1,1670 b'' | |||
|
1 | 1 | import io |
|
2 | 2 | import os |
|
3 | 3 | import random |
|
4 | 4 | import struct |
|
5 | 5 | import sys |
|
6 | 6 | import tempfile |
|
7 | 7 | import unittest |
|
8 | 8 | |
|
9 | 9 | import zstandard as zstd |
|
10 | 10 | |
|
11 | 11 | from .common import ( |
|
12 | 12 | generate_samples, |
|
13 | 13 | make_cffi, |
|
14 | 14 | NonClosingBytesIO, |
|
15 | 15 | OpCountingBytesIO, |
|
16 | TestCase, | |
|
16 | 17 | ) |
|
17 | 18 | |
|
18 | 19 | |
|
19 | 20 | if sys.version_info[0] >= 3: |
|
20 | 21 | next = lambda it: it.__next__() |
|
21 | 22 | else: |
|
22 | 23 | next = lambda it: it.next() |
|
23 | 24 | |
|
24 | 25 | |
|
25 | 26 | @make_cffi |
|
26 |
class TestFrameHeaderSize( |
|
|
27 | class TestFrameHeaderSize(TestCase): | |
|
27 | 28 | def test_empty(self): |
|
28 |
with self.assertRaisesRegex |
|
|
29 | zstd.ZstdError, 'could not determine frame header size: Src size ' | |
|
30 | 'is incorrect'): | |
|
31 | zstd.frame_header_size(b'') | |
|
29 | with self.assertRaisesRegex( | |
|
30 | zstd.ZstdError, | |
|
31 | "could not determine frame header size: Src size " "is incorrect", | |
|
32 | ): | |
|
33 | zstd.frame_header_size(b"") | |
|
32 | 34 | |
|
33 | 35 | def test_too_small(self): |
|
34 |
with self.assertRaisesRegex |
|
|
35 | zstd.ZstdError, 'could not determine frame header size: Src size ' | |
|
36 | 'is incorrect'): | |
|
37 | zstd.frame_header_size(b'foob') | |
|
36 | with self.assertRaisesRegex( | |
|
37 | zstd.ZstdError, | |
|
38 | "could not determine frame header size: Src size " "is incorrect", | |
|
39 | ): | |
|
40 | zstd.frame_header_size(b"foob") | |
|
38 | 41 | |
|
39 | 42 | def test_basic(self): |
|
40 | 43 | # It doesn't matter that it isn't a valid frame. |
|
41 |
self.assertEqual(zstd.frame_header_size(b |
|
|
44 | self.assertEqual(zstd.frame_header_size(b"long enough but no magic"), 6) | |
|
42 | 45 | |
|
43 | 46 | |
|
44 | 47 | @make_cffi |
|
45 |
class TestFrameContentSize( |
|
|
48 | class TestFrameContentSize(TestCase): | |
|
46 | 49 | def test_empty(self): |
|
47 |
with self.assertRaisesRegex |
|
|
48 |
|
|
|
49 | zstd.frame_content_size(b'') | |
|
50 | with self.assertRaisesRegex( | |
|
51 | zstd.ZstdError, "error when determining content size" | |
|
52 | ): | |
|
53 | zstd.frame_content_size(b"") | |
|
50 | 54 | |
|
51 | 55 | def test_too_small(self): |
|
52 |
with self.assertRaisesRegex |
|
|
53 |
|
|
|
54 | zstd.frame_content_size(b'foob') | |
|
56 | with self.assertRaisesRegex( | |
|
57 | zstd.ZstdError, "error when determining content size" | |
|
58 | ): | |
|
59 | zstd.frame_content_size(b"foob") | |
|
55 | 60 | |
|
56 | 61 | def test_bad_frame(self): |
|
57 |
with self.assertRaisesRegex |
|
|
58 |
|
|
|
59 | zstd.frame_content_size(b'invalid frame header') | |
|
62 | with self.assertRaisesRegex( | |
|
63 | zstd.ZstdError, "error when determining content size" | |
|
64 | ): | |
|
65 | zstd.frame_content_size(b"invalid frame header") | |
|
60 | 66 | |
|
61 | 67 | def test_unknown(self): |
|
62 | 68 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
63 |
frame = cctx.compress(b |
|
|
69 | frame = cctx.compress(b"foobar") | |
|
64 | 70 | |
|
65 | 71 | self.assertEqual(zstd.frame_content_size(frame), -1) |
|
66 | 72 | |
|
67 | 73 | def test_empty(self): |
|
68 | 74 | cctx = zstd.ZstdCompressor() |
|
69 |
frame = cctx.compress(b |
|
|
75 | frame = cctx.compress(b"") | |
|
70 | 76 | |
|
71 | 77 | self.assertEqual(zstd.frame_content_size(frame), 0) |
|
72 | 78 | |
|
73 | 79 | def test_basic(self): |
|
74 | 80 | cctx = zstd.ZstdCompressor() |
|
75 |
frame = cctx.compress(b |
|
|
81 | frame = cctx.compress(b"foobar") | |
|
76 | 82 | |
|
77 | 83 | self.assertEqual(zstd.frame_content_size(frame), 6) |
|
78 | 84 | |
|
79 | 85 | |
|
80 | 86 | @make_cffi |
|
81 |
class TestDecompressor( |
|
|
87 | class TestDecompressor(TestCase): | |
|
82 | 88 | def test_memory_size(self): |
|
83 | 89 | dctx = zstd.ZstdDecompressor() |
|
84 | 90 | |
|
85 | 91 | self.assertGreater(dctx.memory_size(), 100) |
|
86 | 92 | |
|
87 | 93 | |
|
88 | 94 | @make_cffi |
|
89 |
class TestDecompressor_decompress( |
|
|
95 | class TestDecompressor_decompress(TestCase): | |
|
90 | 96 | def test_empty_input(self): |
|
91 | 97 | dctx = zstd.ZstdDecompressor() |
|
92 | 98 | |
|
93 | with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'): | |
|
94 | dctx.decompress(b'') | |
|
99 | with self.assertRaisesRegex( | |
|
100 | zstd.ZstdError, "error determining content size from frame header" | |
|
101 | ): | |
|
102 | dctx.decompress(b"") | |
|
95 | 103 | |
|
96 | 104 | def test_invalid_input(self): |
|
97 | 105 | dctx = zstd.ZstdDecompressor() |
|
98 | 106 | |
|
99 | with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'): | |
|
100 | dctx.decompress(b'foobar') | |
|
107 | with self.assertRaisesRegex( | |
|
108 | zstd.ZstdError, "error determining content size from frame header" | |
|
109 | ): | |
|
110 | dctx.decompress(b"foobar") | |
|
101 | 111 | |
|
102 | 112 | def test_input_types(self): |
|
103 | 113 | cctx = zstd.ZstdCompressor(level=1) |
|
104 |
compressed = cctx.compress(b |
|
|
114 | compressed = cctx.compress(b"foo") | |
|
105 | 115 | |
|
106 | 116 | mutable_array = bytearray(len(compressed)) |
|
107 | 117 | mutable_array[:] = compressed |
|
108 | 118 | |
|
109 | 119 | sources = [ |
|
110 | 120 | memoryview(compressed), |
|
111 | 121 | bytearray(compressed), |
|
112 | 122 | mutable_array, |
|
113 | 123 | ] |
|
114 | 124 | |
|
115 | 125 | dctx = zstd.ZstdDecompressor() |
|
116 | 126 | for source in sources: |
|
117 |
self.assertEqual(dctx.decompress(source), b |
|
|
127 | self.assertEqual(dctx.decompress(source), b"foo") | |
|
118 | 128 | |
|
119 | 129 | def test_no_content_size_in_frame(self): |
|
120 | 130 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
121 |
compressed = cctx.compress(b |
|
|
131 | compressed = cctx.compress(b"foobar") | |
|
122 | 132 | |
|
123 | 133 | dctx = zstd.ZstdDecompressor() |
|
124 | with self.assertRaisesRegexp(zstd.ZstdError, 'could not determine content size in frame header'): | |
|
134 | with self.assertRaisesRegex( | |
|
135 | zstd.ZstdError, "could not determine content size in frame header" | |
|
136 | ): | |
|
125 | 137 | dctx.decompress(compressed) |
|
126 | 138 | |
|
127 | 139 | def test_content_size_present(self): |
|
128 | 140 | cctx = zstd.ZstdCompressor() |
|
129 |
compressed = cctx.compress(b |
|
|
141 | compressed = cctx.compress(b"foobar") | |
|
130 | 142 | |
|
131 | 143 | dctx = zstd.ZstdDecompressor() |
|
132 | 144 | decompressed = dctx.decompress(compressed) |
|
133 |
self.assertEqual(decompressed, b |
|
|
145 | self.assertEqual(decompressed, b"foobar") | |
|
134 | 146 | |
|
135 | 147 | def test_empty_roundtrip(self): |
|
136 | 148 | cctx = zstd.ZstdCompressor() |
|
137 |
compressed = cctx.compress(b |
|
|
149 | compressed = cctx.compress(b"") | |
|
138 | 150 | |
|
139 | 151 | dctx = zstd.ZstdDecompressor() |
|
140 | 152 | decompressed = dctx.decompress(compressed) |
|
141 | 153 | |
|
142 |
self.assertEqual(decompressed, b |
|
|
154 | self.assertEqual(decompressed, b"") | |
|
143 | 155 | |
|
144 | 156 | def test_max_output_size(self): |
|
145 | 157 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
146 |
source = b |
|
|
158 | source = b"foobar" * 256 | |
|
147 | 159 | compressed = cctx.compress(source) |
|
148 | 160 | |
|
149 | 161 | dctx = zstd.ZstdDecompressor() |
|
150 | 162 | # Will fit into buffer exactly the size of input. |
|
151 | 163 | decompressed = dctx.decompress(compressed, max_output_size=len(source)) |
|
152 | 164 | self.assertEqual(decompressed, source) |
|
153 | 165 | |
|
154 | 166 | # Input size - 1 fails |
|
155 |
with self.assertRaisesRegex |
|
|
156 |
|
|
|
167 | with self.assertRaisesRegex( | |
|
168 | zstd.ZstdError, "decompression error: did not decompress full frame" | |
|
169 | ): | |
|
157 | 170 | dctx.decompress(compressed, max_output_size=len(source) - 1) |
|
158 | 171 | |
|
159 | 172 | # Input size + 1 works |
|
160 | 173 | decompressed = dctx.decompress(compressed, max_output_size=len(source) + 1) |
|
161 | 174 | self.assertEqual(decompressed, source) |
|
162 | 175 | |
|
163 | 176 | # A much larger buffer works. |
|
164 | 177 | decompressed = dctx.decompress(compressed, max_output_size=len(source) * 64) |
|
165 | 178 | self.assertEqual(decompressed, source) |
|
166 | 179 | |
|
167 | 180 | def test_stupidly_large_output_buffer(self): |
|
168 | 181 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
169 |
compressed = cctx.compress(b |
|
|
182 | compressed = cctx.compress(b"foobar" * 256) | |
|
170 | 183 | dctx = zstd.ZstdDecompressor() |
|
171 | 184 | |
|
172 | 185 | # Will get OverflowError on some Python distributions that can't |
|
173 | 186 | # handle really large integers. |
|
174 | 187 | with self.assertRaises((MemoryError, OverflowError)): |
|
175 | dctx.decompress(compressed, max_output_size=2**62) | |
|
188 | dctx.decompress(compressed, max_output_size=2 ** 62) | |
|
176 | 189 | |
|
177 | 190 | def test_dictionary(self): |
|
178 | 191 | samples = [] |
|
179 | 192 | for i in range(128): |
|
180 |
samples.append(b |
|
|
181 |
samples.append(b |
|
|
182 |
samples.append(b |
|
|
193 | samples.append(b"foo" * 64) | |
|
194 | samples.append(b"bar" * 64) | |
|
195 | samples.append(b"foobar" * 64) | |
|
183 | 196 | |
|
184 | 197 | d = zstd.train_dictionary(8192, samples) |
|
185 | 198 | |
|
186 |
orig = b |
|
|
199 | orig = b"foobar" * 16384 | |
|
187 | 200 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
188 | 201 | compressed = cctx.compress(orig) |
|
189 | 202 | |
|
190 | 203 | dctx = zstd.ZstdDecompressor(dict_data=d) |
|
191 | 204 | decompressed = dctx.decompress(compressed) |
|
192 | 205 | |
|
193 | 206 | self.assertEqual(decompressed, orig) |
|
194 | 207 | |
|
195 | 208 | def test_dictionary_multiple(self): |
|
196 | 209 | samples = [] |
|
197 | 210 | for i in range(128): |
|
198 |
samples.append(b |
|
|
199 |
samples.append(b |
|
|
200 |
samples.append(b |
|
|
211 | samples.append(b"foo" * 64) | |
|
212 | samples.append(b"bar" * 64) | |
|
213 | samples.append(b"foobar" * 64) | |
|
201 | 214 | |
|
202 | 215 | d = zstd.train_dictionary(8192, samples) |
|
203 | 216 | |
|
204 |
sources = (b |
|
|
217 | sources = (b"foobar" * 8192, b"foo" * 8192, b"bar" * 8192) | |
|
205 | 218 | compressed = [] |
|
206 | 219 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
207 | 220 | for source in sources: |
|
208 | 221 | compressed.append(cctx.compress(source)) |
|
209 | 222 | |
|
210 | 223 | dctx = zstd.ZstdDecompressor(dict_data=d) |
|
211 | 224 | for i in range(len(sources)): |
|
212 | 225 | decompressed = dctx.decompress(compressed[i]) |
|
213 | 226 | self.assertEqual(decompressed, sources[i]) |
|
214 | 227 | |
|
215 | 228 | def test_max_window_size(self): |
|
216 |
with open(__file__, |
|
|
229 | with open(__file__, "rb") as fh: | |
|
217 | 230 | source = fh.read() |
|
218 | 231 | |
|
219 | 232 | # If we write a content size, the decompressor engages single pass |
|
220 | 233 | # mode and the window size doesn't come into play. |
|
221 | 234 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
222 | 235 | frame = cctx.compress(source) |
|
223 | 236 | |
|
224 | dctx = zstd.ZstdDecompressor(max_window_size=2**zstd.WINDOWLOG_MIN) | |
|
237 | dctx = zstd.ZstdDecompressor(max_window_size=2 ** zstd.WINDOWLOG_MIN) | |
|
225 | 238 | |
|
226 |
with self.assertRaisesRegex |
|
|
227 |
zstd.ZstdError, |
|
|
239 | with self.assertRaisesRegex( | |
|
240 | zstd.ZstdError, "decompression error: Frame requires too much memory" | |
|
241 | ): | |
|
228 | 242 | dctx.decompress(frame, max_output_size=len(source)) |
|
229 | 243 | |
|
230 | 244 | |
|
231 | 245 | @make_cffi |
|
232 |
class TestDecompressor_copy_stream( |
|
|
246 | class TestDecompressor_copy_stream(TestCase): | |
|
233 | 247 | def test_no_read(self): |
|
234 | 248 | source = object() |
|
235 | 249 | dest = io.BytesIO() |
|
236 | 250 | |
|
237 | 251 | dctx = zstd.ZstdDecompressor() |
|
238 | 252 | with self.assertRaises(ValueError): |
|
239 | 253 | dctx.copy_stream(source, dest) |
|
240 | 254 | |
|
241 | 255 | def test_no_write(self): |
|
242 | 256 | source = io.BytesIO() |
|
243 | 257 | dest = object() |
|
244 | 258 | |
|
245 | 259 | dctx = zstd.ZstdDecompressor() |
|
246 | 260 | with self.assertRaises(ValueError): |
|
247 | 261 | dctx.copy_stream(source, dest) |
|
248 | 262 | |
|
249 | 263 | def test_empty(self): |
|
250 | 264 | source = io.BytesIO() |
|
251 | 265 | dest = io.BytesIO() |
|
252 | 266 | |
|
253 | 267 | dctx = zstd.ZstdDecompressor() |
|
254 | 268 | # TODO should this raise an error? |
|
255 | 269 | r, w = dctx.copy_stream(source, dest) |
|
256 | 270 | |
|
257 | 271 | self.assertEqual(r, 0) |
|
258 | 272 | self.assertEqual(w, 0) |
|
259 |
self.assertEqual(dest.getvalue(), b |
|
|
273 | self.assertEqual(dest.getvalue(), b"") | |
|
260 | 274 | |
|
261 | 275 | def test_large_data(self): |
|
262 | 276 | source = io.BytesIO() |
|
263 | 277 | for i in range(255): |
|
264 |
source.write(struct.Struct( |
|
|
278 | source.write(struct.Struct(">B").pack(i) * 16384) | |
|
265 | 279 | source.seek(0) |
|
266 | 280 | |
|
267 | 281 | compressed = io.BytesIO() |
|
268 | 282 | cctx = zstd.ZstdCompressor() |
|
269 | 283 | cctx.copy_stream(source, compressed) |
|
270 | 284 | |
|
271 | 285 | compressed.seek(0) |
|
272 | 286 | dest = io.BytesIO() |
|
273 | 287 | dctx = zstd.ZstdDecompressor() |
|
274 | 288 | r, w = dctx.copy_stream(compressed, dest) |
|
275 | 289 | |
|
276 | 290 | self.assertEqual(r, len(compressed.getvalue())) |
|
277 | 291 | self.assertEqual(w, len(source.getvalue())) |
|
278 | 292 | |
|
279 | 293 | def test_read_write_size(self): |
|
280 | source = OpCountingBytesIO(zstd.ZstdCompressor().compress( | |
|
281 | b'foobarfoobar')) | |
|
294 | source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b"foobarfoobar")) | |
|
282 | 295 | |
|
283 | 296 | dest = OpCountingBytesIO() |
|
284 | 297 | dctx = zstd.ZstdDecompressor() |
|
285 | 298 | r, w = dctx.copy_stream(source, dest, read_size=1, write_size=1) |
|
286 | 299 | |
|
287 | 300 | self.assertEqual(r, len(source.getvalue())) |
|
288 |
self.assertEqual(w, len(b |
|
|
301 | self.assertEqual(w, len(b"foobarfoobar")) | |
|
289 | 302 | self.assertEqual(source._read_count, len(source.getvalue()) + 1) |
|
290 | 303 | self.assertEqual(dest._write_count, len(dest.getvalue())) |
|
291 | 304 | |
|
292 | 305 | |
|
293 | 306 | @make_cffi |
|
294 |
class TestDecompressor_stream_reader( |
|
|
307 | class TestDecompressor_stream_reader(TestCase): | |
|
295 | 308 | def test_context_manager(self): |
|
296 | 309 | dctx = zstd.ZstdDecompressor() |
|
297 | 310 | |
|
298 |
with dctx.stream_reader(b |
|
|
299 |
with self.assertRaisesRegex |
|
|
311 | with dctx.stream_reader(b"foo") as reader: | |
|
312 | with self.assertRaisesRegex(ValueError, "cannot __enter__ multiple times"): | |
|
300 | 313 | with reader as reader2: |
|
301 | 314 | pass |
|
302 | 315 | |
|
303 | 316 | def test_not_implemented(self): |
|
304 | 317 | dctx = zstd.ZstdDecompressor() |
|
305 | 318 | |
|
306 |
with dctx.stream_reader(b |
|
|
319 | with dctx.stream_reader(b"foo") as reader: | |
|
307 | 320 | with self.assertRaises(io.UnsupportedOperation): |
|
308 | 321 | reader.readline() |
|
309 | 322 | |
|
310 | 323 | with self.assertRaises(io.UnsupportedOperation): |
|
311 | 324 | reader.readlines() |
|
312 | 325 | |
|
313 | 326 | with self.assertRaises(io.UnsupportedOperation): |
|
314 | 327 | iter(reader) |
|
315 | 328 | |
|
316 | 329 | with self.assertRaises(io.UnsupportedOperation): |
|
317 | 330 | next(reader) |
|
318 | 331 | |
|
319 | 332 | with self.assertRaises(io.UnsupportedOperation): |
|
320 |
reader.write(b |
|
|
333 | reader.write(b"foo") | |
|
321 | 334 | |
|
322 | 335 | with self.assertRaises(io.UnsupportedOperation): |
|
323 | 336 | reader.writelines([]) |
|
324 | 337 | |
|
325 | 338 | def test_constant_methods(self): |
|
326 | 339 | dctx = zstd.ZstdDecompressor() |
|
327 | 340 | |
|
328 |
with dctx.stream_reader(b |
|
|
341 | with dctx.stream_reader(b"foo") as reader: | |
|
329 | 342 | self.assertFalse(reader.closed) |
|
330 | 343 | self.assertTrue(reader.readable()) |
|
331 | 344 | self.assertFalse(reader.writable()) |
|
332 | 345 | self.assertTrue(reader.seekable()) |
|
333 | 346 | self.assertFalse(reader.isatty()) |
|
334 | 347 | self.assertFalse(reader.closed) |
|
335 | 348 | self.assertIsNone(reader.flush()) |
|
336 | 349 | self.assertFalse(reader.closed) |
|
337 | 350 | |
|
338 | 351 | self.assertTrue(reader.closed) |
|
339 | 352 | |
|
340 | 353 | def test_read_closed(self): |
|
341 | 354 | dctx = zstd.ZstdDecompressor() |
|
342 | 355 | |
|
343 |
with dctx.stream_reader(b |
|
|
356 | with dctx.stream_reader(b"foo") as reader: | |
|
344 | 357 | reader.close() |
|
345 | 358 | self.assertTrue(reader.closed) |
|
346 |
with self.assertRaisesRegex |
|
|
359 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
347 | 360 | reader.read(1) |
|
348 | 361 | |
|
349 | 362 | def test_read_sizes(self): |
|
350 | 363 | cctx = zstd.ZstdCompressor() |
|
351 |
foo = cctx.compress(b |
|
|
364 | foo = cctx.compress(b"foo") | |
|
352 | 365 | |
|
353 | 366 | dctx = zstd.ZstdDecompressor() |
|
354 | 367 | |
|
355 | 368 | with dctx.stream_reader(foo) as reader: |
|
356 | with self.assertRaisesRegexp(ValueError, 'cannot read negative amounts less than -1'): | |
|
369 | with self.assertRaisesRegex( | |
|
370 | ValueError, "cannot read negative amounts less than -1" | |
|
371 | ): | |
|
357 | 372 | reader.read(-2) |
|
358 | 373 | |
|
359 |
self.assertEqual(reader.read(0), b |
|
|
360 |
self.assertEqual(reader.read(), b |
|
|
374 | self.assertEqual(reader.read(0), b"") | |
|
375 | self.assertEqual(reader.read(), b"foo") | |
|
361 | 376 | |
|
362 | 377 | def test_read_buffer(self): |
|
363 | 378 | cctx = zstd.ZstdCompressor() |
|
364 | 379 | |
|
365 |
source = b |
|
|
380 | source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) | |
|
366 | 381 | frame = cctx.compress(source) |
|
367 | 382 | |
|
368 | 383 | dctx = zstd.ZstdDecompressor() |
|
369 | 384 | |
|
370 | 385 | with dctx.stream_reader(frame) as reader: |
|
371 | 386 | self.assertEqual(reader.tell(), 0) |
|
372 | 387 | |
|
373 | 388 | # We should get entire frame in one read. |
|
374 | 389 | result = reader.read(8192) |
|
375 | 390 | self.assertEqual(result, source) |
|
376 | 391 | self.assertEqual(reader.tell(), len(source)) |
|
377 | 392 | |
|
378 | 393 | # Read after EOF should return empty bytes. |
|
379 |
self.assertEqual(reader.read(1), b |
|
|
394 | self.assertEqual(reader.read(1), b"") | |
|
380 | 395 | self.assertEqual(reader.tell(), len(result)) |
|
381 | 396 | |
|
382 | 397 | self.assertTrue(reader.closed) |
|
383 | 398 | |
|
384 | 399 | def test_read_buffer_small_chunks(self): |
|
385 | 400 | cctx = zstd.ZstdCompressor() |
|
386 |
source = b |
|
|
401 | source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) | |
|
387 | 402 | frame = cctx.compress(source) |
|
388 | 403 | |
|
389 | 404 | dctx = zstd.ZstdDecompressor() |
|
390 | 405 | chunks = [] |
|
391 | 406 | |
|
392 | 407 | with dctx.stream_reader(frame, read_size=1) as reader: |
|
393 | 408 | while True: |
|
394 | 409 | chunk = reader.read(1) |
|
395 | 410 | if not chunk: |
|
396 | 411 | break |
|
397 | 412 | |
|
398 | 413 | chunks.append(chunk) |
|
399 | 414 | self.assertEqual(reader.tell(), sum(map(len, chunks))) |
|
400 | 415 | |
|
401 |
self.assertEqual(b |
|
|
416 | self.assertEqual(b"".join(chunks), source) | |
|
402 | 417 | |
|
403 | 418 | def test_read_stream(self): |
|
404 | 419 | cctx = zstd.ZstdCompressor() |
|
405 |
source = b |
|
|
420 | source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) | |
|
406 | 421 | frame = cctx.compress(source) |
|
407 | 422 | |
|
408 | 423 | dctx = zstd.ZstdDecompressor() |
|
409 | 424 | with dctx.stream_reader(io.BytesIO(frame)) as reader: |
|
410 | 425 | self.assertEqual(reader.tell(), 0) |
|
411 | 426 | |
|
412 | 427 | chunk = reader.read(8192) |
|
413 | 428 | self.assertEqual(chunk, source) |
|
414 | 429 | self.assertEqual(reader.tell(), len(source)) |
|
415 |
self.assertEqual(reader.read(1), b |
|
|
430 | self.assertEqual(reader.read(1), b"") | |
|
416 | 431 | self.assertEqual(reader.tell(), len(source)) |
|
417 | 432 | self.assertFalse(reader.closed) |
|
418 | 433 | |
|
419 | 434 | self.assertTrue(reader.closed) |
|
420 | 435 | |
|
421 | 436 | def test_read_stream_small_chunks(self): |
|
422 | 437 | cctx = zstd.ZstdCompressor() |
|
423 |
source = b |
|
|
438 | source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) | |
|
424 | 439 | frame = cctx.compress(source) |
|
425 | 440 | |
|
426 | 441 | dctx = zstd.ZstdDecompressor() |
|
427 | 442 | chunks = [] |
|
428 | 443 | |
|
429 | 444 | with dctx.stream_reader(io.BytesIO(frame), read_size=1) as reader: |
|
430 | 445 | while True: |
|
431 | 446 | chunk = reader.read(1) |
|
432 | 447 | if not chunk: |
|
433 | 448 | break |
|
434 | 449 | |
|
435 | 450 | chunks.append(chunk) |
|
436 | 451 | self.assertEqual(reader.tell(), sum(map(len, chunks))) |
|
437 | 452 | |
|
438 |
self.assertEqual(b |
|
|
453 | self.assertEqual(b"".join(chunks), source) | |
|
439 | 454 | |
|
440 | 455 | def test_read_after_exit(self): |
|
441 | 456 | cctx = zstd.ZstdCompressor() |
|
442 |
frame = cctx.compress(b |
|
|
457 | frame = cctx.compress(b"foo" * 60) | |
|
443 | 458 | |
|
444 | 459 | dctx = zstd.ZstdDecompressor() |
|
445 | 460 | |
|
446 | 461 | with dctx.stream_reader(frame) as reader: |
|
447 | 462 | while reader.read(16): |
|
448 | 463 | pass |
|
449 | 464 | |
|
450 | 465 | self.assertTrue(reader.closed) |
|
451 | 466 | |
|
452 |
with self.assertRaisesRegex |
|
|
467 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
453 | 468 | reader.read(10) |
|
454 | 469 | |
|
455 | 470 | def test_illegal_seeks(self): |
|
456 | 471 | cctx = zstd.ZstdCompressor() |
|
457 |
frame = cctx.compress(b |
|
|
472 | frame = cctx.compress(b"foo" * 60) | |
|
458 | 473 | |
|
459 | 474 | dctx = zstd.ZstdDecompressor() |
|
460 | 475 | |
|
461 | 476 | with dctx.stream_reader(frame) as reader: |
|
462 |
with self.assertRaisesRegex |
|
|
463 | 'cannot seek to negative position'): | |
|
477 | with self.assertRaisesRegex(ValueError, "cannot seek to negative position"): | |
|
464 | 478 | reader.seek(-1, os.SEEK_SET) |
|
465 | 479 | |
|
466 | 480 | reader.read(1) |
|
467 | 481 | |
|
468 |
with self.assertRaisesRegex |
|
|
469 |
ValueError, |
|
|
482 | with self.assertRaisesRegex( | |
|
483 | ValueError, "cannot seek zstd decompression stream backwards" | |
|
484 | ): | |
|
470 | 485 | reader.seek(0, os.SEEK_SET) |
|
471 | 486 | |
|
472 |
with self.assertRaisesRegex |
|
|
473 |
ValueError, |
|
|
487 | with self.assertRaisesRegex( | |
|
488 | ValueError, "cannot seek zstd decompression stream backwards" | |
|
489 | ): | |
|
474 | 490 | reader.seek(-1, os.SEEK_CUR) |
|
475 | 491 | |
|
476 |
with self.assertRaisesRegex |
|
|
477 | ValueError, | |
|
478 | 'zstd decompression streams cannot be seeked with SEEK_END'): | |
|
492 | with self.assertRaisesRegex( | |
|
493 | ValueError, "zstd decompression streams cannot be seeked with SEEK_END" | |
|
494 | ): | |
|
479 | 495 | reader.seek(0, os.SEEK_END) |
|
480 | 496 | |
|
481 | 497 | reader.close() |
|
482 | 498 | |
|
483 |
with self.assertRaisesRegex |
|
|
499 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
484 | 500 | reader.seek(4, os.SEEK_SET) |
|
485 | 501 | |
|
486 |
with self.assertRaisesRegex |
|
|
502 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
487 | 503 | reader.seek(0) |
|
488 | 504 | |
|
489 | 505 | def test_seek(self): |
|
490 |
source = b |
|
|
506 | source = b"foobar" * 60 | |
|
491 | 507 | cctx = zstd.ZstdCompressor() |
|
492 | 508 | frame = cctx.compress(source) |
|
493 | 509 | |
|
494 | 510 | dctx = zstd.ZstdDecompressor() |
|
495 | 511 | |
|
496 | 512 | with dctx.stream_reader(frame) as reader: |
|
497 | 513 | reader.seek(3) |
|
498 |
self.assertEqual(reader.read(3), b |
|
|
514 | self.assertEqual(reader.read(3), b"bar") | |
|
499 | 515 | |
|
500 | 516 | reader.seek(4, os.SEEK_CUR) |
|
501 |
self.assertEqual(reader.read(2), b |
|
|
517 | self.assertEqual(reader.read(2), b"ar") | |
|
502 | 518 | |
|
503 | 519 | def test_no_context_manager(self): |
|
504 |
source = b |
|
|
520 | source = b"foobar" * 60 | |
|
505 | 521 | cctx = zstd.ZstdCompressor() |
|
506 | 522 | frame = cctx.compress(source) |
|
507 | 523 | |
|
508 | 524 | dctx = zstd.ZstdDecompressor() |
|
509 | 525 | reader = dctx.stream_reader(frame) |
|
510 | 526 | |
|
511 |
self.assertEqual(reader.read(6), b |
|
|
512 |
self.assertEqual(reader.read(18), b |
|
|
527 | self.assertEqual(reader.read(6), b"foobar") | |
|
528 | self.assertEqual(reader.read(18), b"foobar" * 3) | |
|
513 | 529 | self.assertFalse(reader.closed) |
|
514 | 530 | |
|
515 | 531 | # Calling close prevents subsequent use. |
|
516 | 532 | reader.close() |
|
517 | 533 | self.assertTrue(reader.closed) |
|
518 | 534 | |
|
519 |
with self.assertRaisesRegex |
|
|
535 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
520 | 536 | reader.read(6) |
|
521 | 537 | |
|
522 | 538 | def test_read_after_error(self): |
|
523 |
source = io.BytesIO(b |
|
|
539 | source = io.BytesIO(b"") | |
|
524 | 540 | dctx = zstd.ZstdDecompressor() |
|
525 | 541 | |
|
526 | 542 | reader = dctx.stream_reader(source) |
|
527 | 543 | |
|
528 | 544 | with reader: |
|
529 | 545 | reader.read(0) |
|
530 | 546 | |
|
531 | 547 | with reader: |
|
532 |
with self.assertRaisesRegex |
|
|
548 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
533 | 549 | reader.read(100) |
|
534 | 550 | |
|
535 | 551 | def test_partial_read(self): |
|
536 | 552 | # Inspired by https://github.com/indygreg/python-zstandard/issues/71. |
|
537 | 553 | buffer = io.BytesIO() |
|
538 | 554 | cctx = zstd.ZstdCompressor() |
|
539 | 555 | writer = cctx.stream_writer(buffer) |
|
540 | 556 | writer.write(bytearray(os.urandom(1000000))) |
|
541 | 557 | writer.flush(zstd.FLUSH_FRAME) |
|
542 | 558 | buffer.seek(0) |
|
543 | 559 | |
|
544 | 560 | dctx = zstd.ZstdDecompressor() |
|
545 | 561 | reader = dctx.stream_reader(buffer) |
|
546 | 562 | |
|
547 | 563 | while True: |
|
548 | 564 | chunk = reader.read(8192) |
|
549 | 565 | if not chunk: |
|
550 | 566 | break |
|
551 | 567 | |
|
552 | 568 | def test_read_multiple_frames(self): |
|
553 | 569 | cctx = zstd.ZstdCompressor() |
|
554 | 570 | source = io.BytesIO() |
|
555 | 571 | writer = cctx.stream_writer(source) |
|
556 |
writer.write(b |
|
|
572 | writer.write(b"foo") | |
|
557 | 573 | writer.flush(zstd.FLUSH_FRAME) |
|
558 |
writer.write(b |
|
|
574 | writer.write(b"bar") | |
|
559 | 575 | writer.flush(zstd.FLUSH_FRAME) |
|
560 | 576 | |
|
561 | 577 | dctx = zstd.ZstdDecompressor() |
|
562 | 578 | |
|
563 | 579 | reader = dctx.stream_reader(source.getvalue()) |
|
564 |
self.assertEqual(reader.read(2), b |
|
|
565 |
self.assertEqual(reader.read(2), b |
|
|
566 |
self.assertEqual(reader.read(2), b |
|
|
567 |
self.assertEqual(reader.read(2), b |
|
|
580 | self.assertEqual(reader.read(2), b"fo") | |
|
581 | self.assertEqual(reader.read(2), b"o") | |
|
582 | self.assertEqual(reader.read(2), b"ba") | |
|
583 | self.assertEqual(reader.read(2), b"r") | |
|
568 | 584 | |
|
569 | 585 | source.seek(0) |
|
570 | 586 | reader = dctx.stream_reader(source) |
|
571 |
self.assertEqual(reader.read(2), b |
|
|
572 |
self.assertEqual(reader.read(2), b |
|
|
573 |
self.assertEqual(reader.read(2), b |
|
|
574 |
self.assertEqual(reader.read(2), b |
|
|
587 | self.assertEqual(reader.read(2), b"fo") | |
|
588 | self.assertEqual(reader.read(2), b"o") | |
|
589 | self.assertEqual(reader.read(2), b"ba") | |
|
590 | self.assertEqual(reader.read(2), b"r") | |
|
575 | 591 | |
|
576 | 592 | reader = dctx.stream_reader(source.getvalue()) |
|
577 |
self.assertEqual(reader.read(3), b |
|
|
578 |
self.assertEqual(reader.read(3), b |
|
|
593 | self.assertEqual(reader.read(3), b"foo") | |
|
594 | self.assertEqual(reader.read(3), b"bar") | |
|
579 | 595 | |
|
580 | 596 | source.seek(0) |
|
581 | 597 | reader = dctx.stream_reader(source) |
|
582 |
self.assertEqual(reader.read(3), b |
|
|
583 |
self.assertEqual(reader.read(3), b |
|
|
598 | self.assertEqual(reader.read(3), b"foo") | |
|
599 | self.assertEqual(reader.read(3), b"bar") | |
|
584 | 600 | |
|
585 | 601 | reader = dctx.stream_reader(source.getvalue()) |
|
586 |
self.assertEqual(reader.read(4), b |
|
|
587 |
self.assertEqual(reader.read(4), b |
|
|
602 | self.assertEqual(reader.read(4), b"foo") | |
|
603 | self.assertEqual(reader.read(4), b"bar") | |
|
588 | 604 | |
|
589 | 605 | source.seek(0) |
|
590 | 606 | reader = dctx.stream_reader(source) |
|
591 |
self.assertEqual(reader.read(4), b |
|
|
592 |
self.assertEqual(reader.read(4), b |
|
|
607 | self.assertEqual(reader.read(4), b"foo") | |
|
608 | self.assertEqual(reader.read(4), b"bar") | |
|
593 | 609 | |
|
594 | 610 | reader = dctx.stream_reader(source.getvalue()) |
|
595 |
self.assertEqual(reader.read(128), b |
|
|
596 |
self.assertEqual(reader.read(128), b |
|
|
611 | self.assertEqual(reader.read(128), b"foo") | |
|
612 | self.assertEqual(reader.read(128), b"bar") | |
|
597 | 613 | |
|
598 | 614 | source.seek(0) |
|
599 | 615 | reader = dctx.stream_reader(source) |
|
600 |
self.assertEqual(reader.read(128), b |
|
|
601 |
self.assertEqual(reader.read(128), b |
|
|
616 | self.assertEqual(reader.read(128), b"foo") | |
|
617 | self.assertEqual(reader.read(128), b"bar") | |
|
602 | 618 | |
|
603 | 619 | # Now tests for reads spanning frames. |
|
604 | 620 | reader = dctx.stream_reader(source.getvalue(), read_across_frames=True) |
|
605 |
self.assertEqual(reader.read(3), b |
|
|
606 |
self.assertEqual(reader.read(3), b |
|
|
621 | self.assertEqual(reader.read(3), b"foo") | |
|
622 | self.assertEqual(reader.read(3), b"bar") | |
|
607 | 623 | |
|
608 | 624 | source.seek(0) |
|
609 | 625 | reader = dctx.stream_reader(source, read_across_frames=True) |
|
610 |
self.assertEqual(reader.read(3), b |
|
|
611 |
self.assertEqual(reader.read(3), b |
|
|
626 | self.assertEqual(reader.read(3), b"foo") | |
|
627 | self.assertEqual(reader.read(3), b"bar") | |
|
612 | 628 | |
|
613 | 629 | reader = dctx.stream_reader(source.getvalue(), read_across_frames=True) |
|
614 |
self.assertEqual(reader.read(6), b |
|
|
630 | self.assertEqual(reader.read(6), b"foobar") | |
|
615 | 631 | |
|
616 | 632 | source.seek(0) |
|
617 | 633 | reader = dctx.stream_reader(source, read_across_frames=True) |
|
618 |
self.assertEqual(reader.read(6), b |
|
|
634 | self.assertEqual(reader.read(6), b"foobar") | |
|
619 | 635 | |
|
620 | 636 | reader = dctx.stream_reader(source.getvalue(), read_across_frames=True) |
|
621 |
self.assertEqual(reader.read(7), b |
|
|
637 | self.assertEqual(reader.read(7), b"foobar") | |
|
622 | 638 | |
|
623 | 639 | source.seek(0) |
|
624 | 640 | reader = dctx.stream_reader(source, read_across_frames=True) |
|
625 |
self.assertEqual(reader.read(7), b |
|
|
641 | self.assertEqual(reader.read(7), b"foobar") | |
|
626 | 642 | |
|
627 | 643 | reader = dctx.stream_reader(source.getvalue(), read_across_frames=True) |
|
628 |
self.assertEqual(reader.read(128), b |
|
|
644 | self.assertEqual(reader.read(128), b"foobar") | |
|
629 | 645 | |
|
630 | 646 | source.seek(0) |
|
631 | 647 | reader = dctx.stream_reader(source, read_across_frames=True) |
|
632 |
self.assertEqual(reader.read(128), b |
|
|
648 | self.assertEqual(reader.read(128), b"foobar") | |
|
633 | 649 | |
|
634 | 650 | def test_readinto(self): |
|
635 | 651 | cctx = zstd.ZstdCompressor() |
|
636 |
foo = cctx.compress(b |
|
|
652 | foo = cctx.compress(b"foo") | |
|
637 | 653 | |
|
638 | 654 | dctx = zstd.ZstdDecompressor() |
|
639 | 655 | |
|
640 | 656 | # Attempting to readinto() a non-writable buffer fails. |
|
641 | 657 | # The exact exception varies based on the backend. |
|
642 | 658 | reader = dctx.stream_reader(foo) |
|
643 | 659 | with self.assertRaises(Exception): |
|
644 |
reader.readinto(b |
|
|
660 | reader.readinto(b"foobar") | |
|
645 | 661 | |
|
646 | 662 | # readinto() with sufficiently large destination. |
|
647 | 663 | b = bytearray(1024) |
|
648 | 664 | reader = dctx.stream_reader(foo) |
|
649 | 665 | self.assertEqual(reader.readinto(b), 3) |
|
650 |
self.assertEqual(b[0:3], b |
|
|
666 | self.assertEqual(b[0:3], b"foo") | |
|
651 | 667 | self.assertEqual(reader.readinto(b), 0) |
|
652 |
self.assertEqual(b[0:3], b |
|
|
668 | self.assertEqual(b[0:3], b"foo") | |
|
653 | 669 | |
|
654 | 670 | # readinto() with small reads. |
|
655 | 671 | b = bytearray(1024) |
|
656 | 672 | reader = dctx.stream_reader(foo, read_size=1) |
|
657 | 673 | self.assertEqual(reader.readinto(b), 3) |
|
658 |
self.assertEqual(b[0:3], b |
|
|
674 | self.assertEqual(b[0:3], b"foo") | |
|
659 | 675 | |
|
660 | 676 | # Too small destination buffer. |
|
661 | 677 | b = bytearray(2) |
|
662 | 678 | reader = dctx.stream_reader(foo) |
|
663 | 679 | self.assertEqual(reader.readinto(b), 2) |
|
664 |
self.assertEqual(b[:], b |
|
|
680 | self.assertEqual(b[:], b"fo") | |
|
665 | 681 | |
|
666 | 682 | def test_readinto1(self): |
|
667 | 683 | cctx = zstd.ZstdCompressor() |
|
668 |
foo = cctx.compress(b |
|
|
684 | foo = cctx.compress(b"foo") | |
|
669 | 685 | |
|
670 | 686 | dctx = zstd.ZstdDecompressor() |
|
671 | 687 | |
|
672 | 688 | reader = dctx.stream_reader(foo) |
|
673 | 689 | with self.assertRaises(Exception): |
|
674 |
reader.readinto1(b |
|
|
690 | reader.readinto1(b"foobar") | |
|
675 | 691 | |
|
676 | 692 | # Sufficiently large destination. |
|
677 | 693 | b = bytearray(1024) |
|
678 | 694 | reader = dctx.stream_reader(foo) |
|
679 | 695 | self.assertEqual(reader.readinto1(b), 3) |
|
680 |
self.assertEqual(b[0:3], b |
|
|
696 | self.assertEqual(b[0:3], b"foo") | |
|
681 | 697 | self.assertEqual(reader.readinto1(b), 0) |
|
682 |
self.assertEqual(b[0:3], b |
|
|
698 | self.assertEqual(b[0:3], b"foo") | |
|
683 | 699 | |
|
684 | 700 | # readinto() with small reads. |
|
685 | 701 | b = bytearray(1024) |
|
686 | 702 | reader = dctx.stream_reader(foo, read_size=1) |
|
687 | 703 | self.assertEqual(reader.readinto1(b), 3) |
|
688 |
self.assertEqual(b[0:3], b |
|
|
704 | self.assertEqual(b[0:3], b"foo") | |
|
689 | 705 | |
|
690 | 706 | # Too small destination buffer. |
|
691 | 707 | b = bytearray(2) |
|
692 | 708 | reader = dctx.stream_reader(foo) |
|
693 | 709 | self.assertEqual(reader.readinto1(b), 2) |
|
694 |
self.assertEqual(b[:], b |
|
|
710 | self.assertEqual(b[:], b"fo") | |
|
695 | 711 | |
|
696 | 712 | def test_readall(self): |
|
697 | 713 | cctx = zstd.ZstdCompressor() |
|
698 |
foo = cctx.compress(b |
|
|
714 | foo = cctx.compress(b"foo") | |
|
699 | 715 | |
|
700 | 716 | dctx = zstd.ZstdDecompressor() |
|
701 | 717 | reader = dctx.stream_reader(foo) |
|
702 | 718 | |
|
703 |
self.assertEqual(reader.readall(), b |
|
|
719 | self.assertEqual(reader.readall(), b"foo") | |
|
704 | 720 | |
|
705 | 721 | def test_read1(self): |
|
706 | 722 | cctx = zstd.ZstdCompressor() |
|
707 |
foo = cctx.compress(b |
|
|
723 | foo = cctx.compress(b"foo") | |
|
708 | 724 | |
|
709 | 725 | dctx = zstd.ZstdDecompressor() |
|
710 | 726 | |
|
711 | 727 | b = OpCountingBytesIO(foo) |
|
712 | 728 | reader = dctx.stream_reader(b) |
|
713 | 729 | |
|
714 |
self.assertEqual(reader.read1(), b |
|
|
730 | self.assertEqual(reader.read1(), b"foo") | |
|
715 | 731 | self.assertEqual(b._read_count, 1) |
|
716 | 732 | |
|
717 | 733 | b = OpCountingBytesIO(foo) |
|
718 | 734 | reader = dctx.stream_reader(b) |
|
719 | 735 | |
|
720 |
self.assertEqual(reader.read1(0), b |
|
|
721 |
self.assertEqual(reader.read1(2), b |
|
|
736 | self.assertEqual(reader.read1(0), b"") | |
|
737 | self.assertEqual(reader.read1(2), b"fo") | |
|
722 | 738 | self.assertEqual(b._read_count, 1) |
|
723 |
self.assertEqual(reader.read1(1), b |
|
|
739 | self.assertEqual(reader.read1(1), b"o") | |
|
724 | 740 | self.assertEqual(b._read_count, 1) |
|
725 |
self.assertEqual(reader.read1(1), b |
|
|
741 | self.assertEqual(reader.read1(1), b"") | |
|
726 | 742 | self.assertEqual(b._read_count, 2) |
|
727 | 743 | |
|
728 | 744 | def test_read_lines(self): |
|
729 | 745 | cctx = zstd.ZstdCompressor() |
|
730 |
source = b |
|
|
746 | source = b"\n".join(("line %d" % i).encode("ascii") for i in range(1024)) | |
|
731 | 747 | |
|
732 | 748 | frame = cctx.compress(source) |
|
733 | 749 | |
|
734 | 750 | dctx = zstd.ZstdDecompressor() |
|
735 | 751 | reader = dctx.stream_reader(frame) |
|
736 |
tr = io.TextIOWrapper(reader, encoding= |
|
|
752 | tr = io.TextIOWrapper(reader, encoding="utf-8") | |
|
737 | 753 | |
|
738 | 754 | lines = [] |
|
739 | 755 | for line in tr: |
|
740 |
lines.append(line.encode( |
|
|
756 | lines.append(line.encode("utf-8")) | |
|
741 | 757 | |
|
742 | 758 | self.assertEqual(len(lines), 1024) |
|
743 |
self.assertEqual(b |
|
|
759 | self.assertEqual(b"".join(lines), source) | |
|
744 | 760 | |
|
745 | 761 | reader = dctx.stream_reader(frame) |
|
746 |
tr = io.TextIOWrapper(reader, encoding= |
|
|
762 | tr = io.TextIOWrapper(reader, encoding="utf-8") | |
|
747 | 763 | |
|
748 | 764 | lines = tr.readlines() |
|
749 | 765 | self.assertEqual(len(lines), 1024) |
|
750 |
self.assertEqual( |
|
|
766 | self.assertEqual("".join(lines).encode("utf-8"), source) | |
|
751 | 767 | |
|
752 | 768 | reader = dctx.stream_reader(frame) |
|
753 |
tr = io.TextIOWrapper(reader, encoding= |
|
|
769 | tr = io.TextIOWrapper(reader, encoding="utf-8") | |
|
754 | 770 | |
|
755 | 771 | lines = [] |
|
756 | 772 | while True: |
|
757 | 773 | line = tr.readline() |
|
758 | 774 | if not line: |
|
759 | 775 | break |
|
760 | 776 | |
|
761 |
lines.append(line.encode( |
|
|
777 | lines.append(line.encode("utf-8")) | |
|
762 | 778 | |
|
763 | 779 | self.assertEqual(len(lines), 1024) |
|
764 |
self.assertEqual(b |
|
|
780 | self.assertEqual(b"".join(lines), source) | |
|
765 | 781 | |
|
766 | 782 | |
|
767 | 783 | @make_cffi |
|
768 |
class TestDecompressor_decompressobj( |
|
|
784 | class TestDecompressor_decompressobj(TestCase): | |
|
769 | 785 | def test_simple(self): |
|
770 |
data = zstd.ZstdCompressor(level=1).compress(b |
|
|
786 | data = zstd.ZstdCompressor(level=1).compress(b"foobar") | |
|
771 | 787 | |
|
772 | 788 | dctx = zstd.ZstdDecompressor() |
|
773 | 789 | dobj = dctx.decompressobj() |
|
774 |
self.assertEqual(dobj.decompress(data), b |
|
|
790 | self.assertEqual(dobj.decompress(data), b"foobar") | |
|
775 | 791 | self.assertIsNone(dobj.flush()) |
|
776 | 792 | self.assertIsNone(dobj.flush(10)) |
|
777 | 793 | self.assertIsNone(dobj.flush(length=100)) |
|
778 | 794 | |
|
779 | 795 | def test_input_types(self): |
|
780 |
compressed = zstd.ZstdCompressor(level=1).compress(b |
|
|
796 | compressed = zstd.ZstdCompressor(level=1).compress(b"foo") | |
|
781 | 797 | |
|
782 | 798 | dctx = zstd.ZstdDecompressor() |
|
783 | 799 | |
|
784 | 800 | mutable_array = bytearray(len(compressed)) |
|
785 | 801 | mutable_array[:] = compressed |
|
786 | 802 | |
|
787 | 803 | sources = [ |
|
788 | 804 | memoryview(compressed), |
|
789 | 805 | bytearray(compressed), |
|
790 | 806 | mutable_array, |
|
791 | 807 | ] |
|
792 | 808 | |
|
793 | 809 | for source in sources: |
|
794 | 810 | dobj = dctx.decompressobj() |
|
795 | 811 | self.assertIsNone(dobj.flush()) |
|
796 | 812 | self.assertIsNone(dobj.flush(10)) |
|
797 | 813 | self.assertIsNone(dobj.flush(length=100)) |
|
798 |
self.assertEqual(dobj.decompress(source), b |
|
|
814 | self.assertEqual(dobj.decompress(source), b"foo") | |
|
799 | 815 | self.assertIsNone(dobj.flush()) |
|
800 | 816 | |
|
801 | 817 | def test_reuse(self): |
|
802 |
data = zstd.ZstdCompressor(level=1).compress(b |
|
|
818 | data = zstd.ZstdCompressor(level=1).compress(b"foobar") | |
|
803 | 819 | |
|
804 | 820 | dctx = zstd.ZstdDecompressor() |
|
805 | 821 | dobj = dctx.decompressobj() |
|
806 | 822 | dobj.decompress(data) |
|
807 | 823 | |
|
808 |
with self.assertRaisesRegex |
|
|
824 | with self.assertRaisesRegex(zstd.ZstdError, "cannot use a decompressobj"): | |
|
809 | 825 | dobj.decompress(data) |
|
810 | 826 | self.assertIsNone(dobj.flush()) |
|
811 | 827 | |
|
812 | 828 | def test_bad_write_size(self): |
|
813 | 829 | dctx = zstd.ZstdDecompressor() |
|
814 | 830 | |
|
815 |
with self.assertRaisesRegex |
|
|
831 | with self.assertRaisesRegex(ValueError, "write_size must be positive"): | |
|
816 | 832 | dctx.decompressobj(write_size=0) |
|
817 | 833 | |
|
818 | 834 | def test_write_size(self): |
|
819 |
source = b |
|
|
835 | source = b"foo" * 64 + b"bar" * 128 | |
|
820 | 836 | data = zstd.ZstdCompressor(level=1).compress(source) |
|
821 | 837 | |
|
822 | 838 | dctx = zstd.ZstdDecompressor() |
|
823 | 839 | |
|
824 | 840 | for i in range(128): |
|
825 | 841 | dobj = dctx.decompressobj(write_size=i + 1) |
|
826 | 842 | self.assertEqual(dobj.decompress(data), source) |
|
827 | 843 | |
|
828 | 844 | |
|
829 | 845 | def decompress_via_writer(data): |
|
830 | 846 | buffer = io.BytesIO() |
|
831 | 847 | dctx = zstd.ZstdDecompressor() |
|
832 | 848 | decompressor = dctx.stream_writer(buffer) |
|
833 | 849 | decompressor.write(data) |
|
834 | 850 | |
|
835 | 851 | return buffer.getvalue() |
|
836 | 852 | |
|
837 | 853 | |
|
838 | 854 | @make_cffi |
|
839 |
class TestDecompressor_stream_writer( |
|
|
855 | class TestDecompressor_stream_writer(TestCase): | |
|
840 | 856 | def test_io_api(self): |
|
841 | 857 | buffer = io.BytesIO() |
|
842 | 858 | dctx = zstd.ZstdDecompressor() |
|
843 | 859 | writer = dctx.stream_writer(buffer) |
|
844 | 860 | |
|
845 | 861 | self.assertFalse(writer.closed) |
|
846 | 862 | self.assertFalse(writer.isatty()) |
|
847 | 863 | self.assertFalse(writer.readable()) |
|
848 | 864 | |
|
849 | 865 | with self.assertRaises(io.UnsupportedOperation): |
|
850 | 866 | writer.readline() |
|
851 | 867 | |
|
852 | 868 | with self.assertRaises(io.UnsupportedOperation): |
|
853 | 869 | writer.readline(42) |
|
854 | 870 | |
|
855 | 871 | with self.assertRaises(io.UnsupportedOperation): |
|
856 | 872 | writer.readline(size=42) |
|
857 | 873 | |
|
858 | 874 | with self.assertRaises(io.UnsupportedOperation): |
|
859 | 875 | writer.readlines() |
|
860 | 876 | |
|
861 | 877 | with self.assertRaises(io.UnsupportedOperation): |
|
862 | 878 | writer.readlines(42) |
|
863 | 879 | |
|
864 | 880 | with self.assertRaises(io.UnsupportedOperation): |
|
865 | 881 | writer.readlines(hint=42) |
|
866 | 882 | |
|
867 | 883 | with self.assertRaises(io.UnsupportedOperation): |
|
868 | 884 | writer.seek(0) |
|
869 | 885 | |
|
870 | 886 | with self.assertRaises(io.UnsupportedOperation): |
|
871 | 887 | writer.seek(10, os.SEEK_SET) |
|
872 | 888 | |
|
873 | 889 | self.assertFalse(writer.seekable()) |
|
874 | 890 | |
|
875 | 891 | with self.assertRaises(io.UnsupportedOperation): |
|
876 | 892 | writer.tell() |
|
877 | 893 | |
|
878 | 894 | with self.assertRaises(io.UnsupportedOperation): |
|
879 | 895 | writer.truncate() |
|
880 | 896 | |
|
881 | 897 | with self.assertRaises(io.UnsupportedOperation): |
|
882 | 898 | writer.truncate(42) |
|
883 | 899 | |
|
884 | 900 | with self.assertRaises(io.UnsupportedOperation): |
|
885 | 901 | writer.truncate(size=42) |
|
886 | 902 | |
|
887 | 903 | self.assertTrue(writer.writable()) |
|
888 | 904 | |
|
889 | 905 | with self.assertRaises(io.UnsupportedOperation): |
|
890 | 906 | writer.writelines([]) |
|
891 | 907 | |
|
892 | 908 | with self.assertRaises(io.UnsupportedOperation): |
|
893 | 909 | writer.read() |
|
894 | 910 | |
|
895 | 911 | with self.assertRaises(io.UnsupportedOperation): |
|
896 | 912 | writer.read(42) |
|
897 | 913 | |
|
898 | 914 | with self.assertRaises(io.UnsupportedOperation): |
|
899 | 915 | writer.read(size=42) |
|
900 | 916 | |
|
901 | 917 | with self.assertRaises(io.UnsupportedOperation): |
|
902 | 918 | writer.readall() |
|
903 | 919 | |
|
904 | 920 | with self.assertRaises(io.UnsupportedOperation): |
|
905 | 921 | writer.readinto(None) |
|
906 | 922 | |
|
907 | 923 | with self.assertRaises(io.UnsupportedOperation): |
|
908 | 924 | writer.fileno() |
|
909 | 925 | |
|
910 | 926 | def test_fileno_file(self): |
|
911 |
with tempfile.TemporaryFile( |
|
|
927 | with tempfile.TemporaryFile("wb") as tf: | |
|
912 | 928 | dctx = zstd.ZstdDecompressor() |
|
913 | 929 | writer = dctx.stream_writer(tf) |
|
914 | 930 | |
|
915 | 931 | self.assertEqual(writer.fileno(), tf.fileno()) |
|
916 | 932 | |
|
917 | 933 | def test_close(self): |
|
918 |
foo = zstd.ZstdCompressor().compress(b |
|
|
934 | foo = zstd.ZstdCompressor().compress(b"foo") | |
|
919 | 935 | |
|
920 | 936 | buffer = NonClosingBytesIO() |
|
921 | 937 | dctx = zstd.ZstdDecompressor() |
|
922 | 938 | writer = dctx.stream_writer(buffer) |
|
923 | 939 | |
|
924 | 940 | writer.write(foo) |
|
925 | 941 | self.assertFalse(writer.closed) |
|
926 | 942 | self.assertFalse(buffer.closed) |
|
927 | 943 | writer.close() |
|
928 | 944 | self.assertTrue(writer.closed) |
|
929 | 945 | self.assertTrue(buffer.closed) |
|
930 | 946 | |
|
931 |
with self.assertRaisesRegex |
|
|
932 |
writer.write(b |
|
|
947 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
948 | writer.write(b"") | |
|
933 | 949 | |
|
934 |
with self.assertRaisesRegex |
|
|
950 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
935 | 951 | writer.flush() |
|
936 | 952 | |
|
937 |
with self.assertRaisesRegex |
|
|
953 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
|
938 | 954 | with writer: |
|
939 | 955 | pass |
|
940 | 956 | |
|
941 |
self.assertEqual(buffer.getvalue(), b |
|
|
957 | self.assertEqual(buffer.getvalue(), b"foo") | |
|
942 | 958 | |
|
943 | 959 | # Context manager exit should close stream. |
|
944 | 960 | buffer = NonClosingBytesIO() |
|
945 | 961 | writer = dctx.stream_writer(buffer) |
|
946 | 962 | |
|
947 | 963 | with writer: |
|
948 | 964 | writer.write(foo) |
|
949 | 965 | |
|
950 | 966 | self.assertTrue(writer.closed) |
|
951 |
self.assertEqual(buffer.getvalue(), b |
|
|
967 | self.assertEqual(buffer.getvalue(), b"foo") | |
|
952 | 968 | |
|
953 | 969 | def test_flush(self): |
|
954 | 970 | buffer = OpCountingBytesIO() |
|
955 | 971 | dctx = zstd.ZstdDecompressor() |
|
956 | 972 | writer = dctx.stream_writer(buffer) |
|
957 | 973 | |
|
958 | 974 | writer.flush() |
|
959 | 975 | self.assertEqual(buffer._flush_count, 1) |
|
960 | 976 | writer.flush() |
|
961 | 977 | self.assertEqual(buffer._flush_count, 2) |
|
962 | 978 | |
|
963 | 979 | def test_empty_roundtrip(self): |
|
964 | 980 | cctx = zstd.ZstdCompressor() |
|
965 |
empty = cctx.compress(b |
|
|
966 |
self.assertEqual(decompress_via_writer(empty), b |
|
|
981 | empty = cctx.compress(b"") | |
|
982 | self.assertEqual(decompress_via_writer(empty), b"") | |
|
967 | 983 | |
|
968 | 984 | def test_input_types(self): |
|
969 | 985 | cctx = zstd.ZstdCompressor(level=1) |
|
970 |
compressed = cctx.compress(b |
|
|
986 | compressed = cctx.compress(b"foo") | |
|
971 | 987 | |
|
972 | 988 | mutable_array = bytearray(len(compressed)) |
|
973 | 989 | mutable_array[:] = compressed |
|
974 | 990 | |
|
975 | 991 | sources = [ |
|
976 | 992 | memoryview(compressed), |
|
977 | 993 | bytearray(compressed), |
|
978 | 994 | mutable_array, |
|
979 | 995 | ] |
|
980 | 996 | |
|
981 | 997 | dctx = zstd.ZstdDecompressor() |
|
982 | 998 | for source in sources: |
|
983 | 999 | buffer = io.BytesIO() |
|
984 | 1000 | |
|
985 | 1001 | decompressor = dctx.stream_writer(buffer) |
|
986 | 1002 | decompressor.write(source) |
|
987 |
self.assertEqual(buffer.getvalue(), b |
|
|
1003 | self.assertEqual(buffer.getvalue(), b"foo") | |
|
988 | 1004 | |
|
989 | 1005 | buffer = NonClosingBytesIO() |
|
990 | 1006 | |
|
991 | 1007 | with dctx.stream_writer(buffer) as decompressor: |
|
992 | 1008 | self.assertEqual(decompressor.write(source), 3) |
|
993 | 1009 | |
|
994 |
self.assertEqual(buffer.getvalue(), b |
|
|
1010 | self.assertEqual(buffer.getvalue(), b"foo") | |
|
995 | 1011 | |
|
996 | 1012 | buffer = io.BytesIO() |
|
997 | 1013 | writer = dctx.stream_writer(buffer, write_return_read=True) |
|
998 | 1014 | self.assertEqual(writer.write(source), len(source)) |
|
999 |
self.assertEqual(buffer.getvalue(), b |
|
|
1015 | self.assertEqual(buffer.getvalue(), b"foo") | |
|
1000 | 1016 | |
|
1001 | 1017 | def test_large_roundtrip(self): |
|
1002 | 1018 | chunks = [] |
|
1003 | 1019 | for i in range(255): |
|
1004 |
chunks.append(struct.Struct( |
|
|
1005 |
orig = b |
|
|
1020 | chunks.append(struct.Struct(">B").pack(i) * 16384) | |
|
1021 | orig = b"".join(chunks) | |
|
1006 | 1022 | cctx = zstd.ZstdCompressor() |
|
1007 | 1023 | compressed = cctx.compress(orig) |
|
1008 | 1024 | |
|
1009 | 1025 | self.assertEqual(decompress_via_writer(compressed), orig) |
|
1010 | 1026 | |
|
1011 | 1027 | def test_multiple_calls(self): |
|
1012 | 1028 | chunks = [] |
|
1013 | 1029 | for i in range(255): |
|
1014 | 1030 | for j in range(255): |
|
1015 |
chunks.append(struct.Struct( |
|
|
1031 | chunks.append(struct.Struct(">B").pack(j) * i) | |
|
1016 | 1032 | |
|
1017 |
orig = b |
|
|
1033 | orig = b"".join(chunks) | |
|
1018 | 1034 | cctx = zstd.ZstdCompressor() |
|
1019 | 1035 | compressed = cctx.compress(orig) |
|
1020 | 1036 | |
|
1021 | 1037 | buffer = NonClosingBytesIO() |
|
1022 | 1038 | dctx = zstd.ZstdDecompressor() |
|
1023 | 1039 | with dctx.stream_writer(buffer) as decompressor: |
|
1024 | 1040 | pos = 0 |
|
1025 | 1041 | while pos < len(compressed): |
|
1026 | 1042 | pos2 = pos + 8192 |
|
1027 | 1043 | decompressor.write(compressed[pos:pos2]) |
|
1028 | 1044 | pos += 8192 |
|
1029 | 1045 | self.assertEqual(buffer.getvalue(), orig) |
|
1030 | 1046 | |
|
1031 | 1047 | # Again with write_return_read=True |
|
1032 | 1048 | buffer = io.BytesIO() |
|
1033 | 1049 | writer = dctx.stream_writer(buffer, write_return_read=True) |
|
1034 | 1050 | pos = 0 |
|
1035 | 1051 | while pos < len(compressed): |
|
1036 | 1052 | pos2 = pos + 8192 |
|
1037 | 1053 | chunk = compressed[pos:pos2] |
|
1038 | 1054 | self.assertEqual(writer.write(chunk), len(chunk)) |
|
1039 | 1055 | pos += 8192 |
|
1040 | 1056 | self.assertEqual(buffer.getvalue(), orig) |
|
1041 | 1057 | |
|
1042 | 1058 | def test_dictionary(self): |
|
1043 | 1059 | samples = [] |
|
1044 | 1060 | for i in range(128): |
|
1045 |
samples.append(b |
|
|
1046 |
samples.append(b |
|
|
1047 |
samples.append(b |
|
|
1061 | samples.append(b"foo" * 64) | |
|
1062 | samples.append(b"bar" * 64) | |
|
1063 | samples.append(b"foobar" * 64) | |
|
1048 | 1064 | |
|
1049 | 1065 | d = zstd.train_dictionary(8192, samples) |
|
1050 | 1066 | |
|
1051 |
orig = b |
|
|
1067 | orig = b"foobar" * 16384 | |
|
1052 | 1068 | buffer = NonClosingBytesIO() |
|
1053 | 1069 | cctx = zstd.ZstdCompressor(dict_data=d) |
|
1054 | 1070 | with cctx.stream_writer(buffer) as compressor: |
|
1055 | 1071 | self.assertEqual(compressor.write(orig), 0) |
|
1056 | 1072 | |
|
1057 | 1073 | compressed = buffer.getvalue() |
|
1058 | 1074 | buffer = io.BytesIO() |
|
1059 | 1075 | |
|
1060 | 1076 | dctx = zstd.ZstdDecompressor(dict_data=d) |
|
1061 | 1077 | decompressor = dctx.stream_writer(buffer) |
|
1062 | 1078 | self.assertEqual(decompressor.write(compressed), len(orig)) |
|
1063 | 1079 | self.assertEqual(buffer.getvalue(), orig) |
|
1064 | 1080 | |
|
1065 | 1081 | buffer = NonClosingBytesIO() |
|
1066 | 1082 | |
|
1067 | 1083 | with dctx.stream_writer(buffer) as decompressor: |
|
1068 | 1084 | self.assertEqual(decompressor.write(compressed), len(orig)) |
|
1069 | 1085 | |
|
1070 | 1086 | self.assertEqual(buffer.getvalue(), orig) |
|
1071 | 1087 | |
|
1072 | 1088 | def test_memory_size(self): |
|
1073 | 1089 | dctx = zstd.ZstdDecompressor() |
|
1074 | 1090 | buffer = io.BytesIO() |
|
1075 | 1091 | |
|
1076 | 1092 | decompressor = dctx.stream_writer(buffer) |
|
1077 | 1093 | size = decompressor.memory_size() |
|
1078 | 1094 | self.assertGreater(size, 100000) |
|
1079 | 1095 | |
|
1080 | 1096 | with dctx.stream_writer(buffer) as decompressor: |
|
1081 | 1097 | size = decompressor.memory_size() |
|
1082 | 1098 | |
|
1083 | 1099 | self.assertGreater(size, 100000) |
|
1084 | 1100 | |
|
1085 | 1101 | def test_write_size(self): |
|
1086 |
source = zstd.ZstdCompressor().compress(b |
|
|
1102 | source = zstd.ZstdCompressor().compress(b"foobarfoobar") | |
|
1087 | 1103 | dest = OpCountingBytesIO() |
|
1088 | 1104 | dctx = zstd.ZstdDecompressor() |
|
1089 | 1105 | with dctx.stream_writer(dest, write_size=1) as decompressor: |
|
1090 |
s = struct.Struct( |
|
|
1106 | s = struct.Struct(">B") | |
|
1091 | 1107 | for c in source: |
|
1092 | 1108 | if not isinstance(c, str): |
|
1093 | 1109 | c = s.pack(c) |
|
1094 | 1110 | decompressor.write(c) |
|
1095 | 1111 | |
|
1096 |
self.assertEqual(dest.getvalue(), b |
|
|
1112 | self.assertEqual(dest.getvalue(), b"foobarfoobar") | |
|
1097 | 1113 | self.assertEqual(dest._write_count, len(dest.getvalue())) |
|
1098 | 1114 | |
|
1099 | 1115 | |
|
1100 | 1116 | @make_cffi |
|
1101 |
class TestDecompressor_read_to_iter( |
|
|
1117 | class TestDecompressor_read_to_iter(TestCase): | |
|
1102 | 1118 | def test_type_validation(self): |
|
1103 | 1119 | dctx = zstd.ZstdDecompressor() |
|
1104 | 1120 | |
|
1105 | 1121 | # Object with read() works. |
|
1106 | 1122 | dctx.read_to_iter(io.BytesIO()) |
|
1107 | 1123 | |
|
1108 | 1124 | # Buffer protocol works. |
|
1109 |
dctx.read_to_iter(b |
|
|
1125 | dctx.read_to_iter(b"foobar") | |
|
1110 | 1126 | |
|
1111 |
with self.assertRaisesRegex |
|
|
1112 |
b |
|
|
1127 | with self.assertRaisesRegex(ValueError, "must pass an object with a read"): | |
|
1128 | b"".join(dctx.read_to_iter(True)) | |
|
1113 | 1129 | |
|
1114 | 1130 | def test_empty_input(self): |
|
1115 | 1131 | dctx = zstd.ZstdDecompressor() |
|
1116 | 1132 | |
|
1117 | 1133 | source = io.BytesIO() |
|
1118 | 1134 | it = dctx.read_to_iter(source) |
|
1119 | 1135 | # TODO this is arguably wrong. Should get an error about missing frame foo. |
|
1120 | 1136 | with self.assertRaises(StopIteration): |
|
1121 | 1137 | next(it) |
|
1122 | 1138 | |
|
1123 |
it = dctx.read_to_iter(b |
|
|
1139 | it = dctx.read_to_iter(b"") | |
|
1124 | 1140 | with self.assertRaises(StopIteration): |
|
1125 | 1141 | next(it) |
|
1126 | 1142 | |
|
1127 | 1143 | def test_invalid_input(self): |
|
1128 | 1144 | dctx = zstd.ZstdDecompressor() |
|
1129 | 1145 | |
|
1130 |
source = io.BytesIO(b |
|
|
1146 | source = io.BytesIO(b"foobar") | |
|
1131 | 1147 | it = dctx.read_to_iter(source) |
|
1132 |
with self.assertRaisesRegex |
|
|
1148 | with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"): | |
|
1133 | 1149 | next(it) |
|
1134 | 1150 | |
|
1135 |
it = dctx.read_to_iter(b |
|
|
1136 |
with self.assertRaisesRegex |
|
|
1151 | it = dctx.read_to_iter(b"foobar") | |
|
1152 | with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"): | |
|
1137 | 1153 | next(it) |
|
1138 | 1154 | |
|
1139 | 1155 | def test_empty_roundtrip(self): |
|
1140 | 1156 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
1141 |
empty = cctx.compress(b |
|
|
1157 | empty = cctx.compress(b"") | |
|
1142 | 1158 | |
|
1143 | 1159 | source = io.BytesIO(empty) |
|
1144 | 1160 | source.seek(0) |
|
1145 | 1161 | |
|
1146 | 1162 | dctx = zstd.ZstdDecompressor() |
|
1147 | 1163 | it = dctx.read_to_iter(source) |
|
1148 | 1164 | |
|
1149 | 1165 | # No chunks should be emitted since there is no data. |
|
1150 | 1166 | with self.assertRaises(StopIteration): |
|
1151 | 1167 | next(it) |
|
1152 | 1168 | |
|
1153 | 1169 | # Again for good measure. |
|
1154 | 1170 | with self.assertRaises(StopIteration): |
|
1155 | 1171 | next(it) |
|
1156 | 1172 | |
|
1157 | 1173 | def test_skip_bytes_too_large(self): |
|
1158 | 1174 | dctx = zstd.ZstdDecompressor() |
|
1159 | 1175 | |
|
1160 | with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'): | |
|
1161 | b''.join(dctx.read_to_iter(b'', skip_bytes=1, read_size=1)) | |
|
1176 | with self.assertRaisesRegex( | |
|
1177 | ValueError, "skip_bytes must be smaller than read_size" | |
|
1178 | ): | |
|
1179 | b"".join(dctx.read_to_iter(b"", skip_bytes=1, read_size=1)) | |
|
1162 | 1180 | |
|
1163 | with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'): | |
|
1164 | b''.join(dctx.read_to_iter(b'foobar', skip_bytes=10)) | |
|
1181 | with self.assertRaisesRegex( | |
|
1182 | ValueError, "skip_bytes larger than first input chunk" | |
|
1183 | ): | |
|
1184 | b"".join(dctx.read_to_iter(b"foobar", skip_bytes=10)) | |
|
1165 | 1185 | |
|
1166 | 1186 | def test_skip_bytes(self): |
|
1167 | 1187 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
1168 |
compressed = cctx.compress(b |
|
|
1188 | compressed = cctx.compress(b"foobar") | |
|
1169 | 1189 | |
|
1170 | 1190 | dctx = zstd.ZstdDecompressor() |
|
1171 |
output = b |
|
|
1172 |
self.assertEqual(output, b |
|
|
1191 | output = b"".join(dctx.read_to_iter(b"hdr" + compressed, skip_bytes=3)) | |
|
1192 | self.assertEqual(output, b"foobar") | |
|
1173 | 1193 | |
|
1174 | 1194 | def test_large_output(self): |
|
1175 | 1195 | source = io.BytesIO() |
|
1176 |
source.write(b |
|
|
1177 |
source.write(b |
|
|
1196 | source.write(b"f" * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE) | |
|
1197 | source.write(b"o") | |
|
1178 | 1198 | source.seek(0) |
|
1179 | 1199 | |
|
1180 | 1200 | cctx = zstd.ZstdCompressor(level=1) |
|
1181 | 1201 | compressed = io.BytesIO(cctx.compress(source.getvalue())) |
|
1182 | 1202 | compressed.seek(0) |
|
1183 | 1203 | |
|
1184 | 1204 | dctx = zstd.ZstdDecompressor() |
|
1185 | 1205 | it = dctx.read_to_iter(compressed) |
|
1186 | 1206 | |
|
1187 | 1207 | chunks = [] |
|
1188 | 1208 | chunks.append(next(it)) |
|
1189 | 1209 | chunks.append(next(it)) |
|
1190 | 1210 | |
|
1191 | 1211 | with self.assertRaises(StopIteration): |
|
1192 | 1212 | next(it) |
|
1193 | 1213 | |
|
1194 |
decompressed = b |
|
|
1214 | decompressed = b"".join(chunks) | |
|
1195 | 1215 | self.assertEqual(decompressed, source.getvalue()) |
|
1196 | 1216 | |
|
1197 | 1217 | # And again with buffer protocol. |
|
1198 | 1218 | it = dctx.read_to_iter(compressed.getvalue()) |
|
1199 | 1219 | chunks = [] |
|
1200 | 1220 | chunks.append(next(it)) |
|
1201 | 1221 | chunks.append(next(it)) |
|
1202 | 1222 | |
|
1203 | 1223 | with self.assertRaises(StopIteration): |
|
1204 | 1224 | next(it) |
|
1205 | 1225 | |
|
1206 |
decompressed = b |
|
|
1226 | decompressed = b"".join(chunks) | |
|
1207 | 1227 | self.assertEqual(decompressed, source.getvalue()) |
|
1208 | 1228 | |
|
1209 |
@unittest.skipUnless( |
|
|
1229 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
1210 | 1230 | def test_large_input(self): |
|
1211 |
bytes = list(struct.Struct( |
|
|
1231 | bytes = list(struct.Struct(">B").pack(i) for i in range(256)) | |
|
1212 | 1232 | compressed = NonClosingBytesIO() |
|
1213 | 1233 | input_size = 0 |
|
1214 | 1234 | cctx = zstd.ZstdCompressor(level=1) |
|
1215 | 1235 | with cctx.stream_writer(compressed) as compressor: |
|
1216 | 1236 | while True: |
|
1217 | 1237 | compressor.write(random.choice(bytes)) |
|
1218 | 1238 | input_size += 1 |
|
1219 | 1239 | |
|
1220 | have_compressed = len(compressed.getvalue()) > zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE | |
|
1240 | have_compressed = ( | |
|
1241 | len(compressed.getvalue()) | |
|
1242 | > zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE | |
|
1243 | ) | |
|
1221 | 1244 | have_raw = input_size > zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE * 2 |
|
1222 | 1245 | if have_compressed and have_raw: |
|
1223 | 1246 | break |
|
1224 | 1247 | |
|
1225 | 1248 | compressed = io.BytesIO(compressed.getvalue()) |
|
1226 |
self.assertGreater( |
|
|
1227 |
|
|
|
1249 | self.assertGreater( | |
|
1250 | len(compressed.getvalue()), zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE | |
|
1251 | ) | |
|
1228 | 1252 | |
|
1229 | 1253 | dctx = zstd.ZstdDecompressor() |
|
1230 | 1254 | it = dctx.read_to_iter(compressed) |
|
1231 | 1255 | |
|
1232 | 1256 | chunks = [] |
|
1233 | 1257 | chunks.append(next(it)) |
|
1234 | 1258 | chunks.append(next(it)) |
|
1235 | 1259 | chunks.append(next(it)) |
|
1236 | 1260 | |
|
1237 | 1261 | with self.assertRaises(StopIteration): |
|
1238 | 1262 | next(it) |
|
1239 | 1263 | |
|
1240 |
decompressed = b |
|
|
1264 | decompressed = b"".join(chunks) | |
|
1241 | 1265 | self.assertEqual(len(decompressed), input_size) |
|
1242 | 1266 | |
|
1243 | 1267 | # And again with buffer protocol. |
|
1244 | 1268 | it = dctx.read_to_iter(compressed.getvalue()) |
|
1245 | 1269 | |
|
1246 | 1270 | chunks = [] |
|
1247 | 1271 | chunks.append(next(it)) |
|
1248 | 1272 | chunks.append(next(it)) |
|
1249 | 1273 | chunks.append(next(it)) |
|
1250 | 1274 | |
|
1251 | 1275 | with self.assertRaises(StopIteration): |
|
1252 | 1276 | next(it) |
|
1253 | 1277 | |
|
1254 |
decompressed = b |
|
|
1278 | decompressed = b"".join(chunks) | |
|
1255 | 1279 | self.assertEqual(len(decompressed), input_size) |
|
1256 | 1280 | |
|
1257 | 1281 | def test_interesting(self): |
|
1258 | 1282 | # Found this edge case via fuzzing. |
|
1259 | 1283 | cctx = zstd.ZstdCompressor(level=1) |
|
1260 | 1284 | |
|
1261 | 1285 | source = io.BytesIO() |
|
1262 | 1286 | |
|
1263 | 1287 | compressed = NonClosingBytesIO() |
|
1264 | 1288 | with cctx.stream_writer(compressed) as compressor: |
|
1265 | 1289 | for i in range(256): |
|
1266 |
chunk = b |
|
|
1290 | chunk = b"\0" * 1024 | |
|
1267 | 1291 | compressor.write(chunk) |
|
1268 | 1292 | source.write(chunk) |
|
1269 | 1293 | |
|
1270 | 1294 | dctx = zstd.ZstdDecompressor() |
|
1271 | 1295 | |
|
1272 |
simple = dctx.decompress( |
|
|
1273 |
|
|
|
1296 | simple = dctx.decompress( | |
|
1297 | compressed.getvalue(), max_output_size=len(source.getvalue()) | |
|
1298 | ) | |
|
1274 | 1299 | self.assertEqual(simple, source.getvalue()) |
|
1275 | 1300 | |
|
1276 | 1301 | compressed = io.BytesIO(compressed.getvalue()) |
|
1277 |
streamed = b |
|
|
1302 | streamed = b"".join(dctx.read_to_iter(compressed)) | |
|
1278 | 1303 | self.assertEqual(streamed, source.getvalue()) |
|
1279 | 1304 | |
|
1280 | 1305 | def test_read_write_size(self): |
|
1281 |
source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b |
|
|
1306 | source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b"foobarfoobar")) | |
|
1282 | 1307 | dctx = zstd.ZstdDecompressor() |
|
1283 | 1308 | for chunk in dctx.read_to_iter(source, read_size=1, write_size=1): |
|
1284 | 1309 | self.assertEqual(len(chunk), 1) |
|
1285 | 1310 | |
|
1286 | 1311 | self.assertEqual(source._read_count, len(source.getvalue())) |
|
1287 | 1312 | |
|
1288 | 1313 | def test_magic_less(self): |
|
1289 | 1314 | params = zstd.CompressionParameters.from_level( |
|
1290 |
1, format=zstd.FORMAT_ZSTD1_MAGICLESS |
|
|
1315 | 1, format=zstd.FORMAT_ZSTD1_MAGICLESS | |
|
1316 | ) | |
|
1291 | 1317 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
1292 |
frame = cctx.compress(b |
|
|
1318 | frame = cctx.compress(b"foobar") | |
|
1293 | 1319 | |
|
1294 |
self.assertNotEqual(frame[0:4], b |
|
|
1320 | self.assertNotEqual(frame[0:4], b"\x28\xb5\x2f\xfd") | |
|
1295 | 1321 | |
|
1296 | 1322 | dctx = zstd.ZstdDecompressor() |
|
1297 |
with self.assertRaisesRegex |
|
|
1298 |
zstd.ZstdError, |
|
|
1323 | with self.assertRaisesRegex( | |
|
1324 | zstd.ZstdError, "error determining content size from frame header" | |
|
1325 | ): | |
|
1299 | 1326 | dctx.decompress(frame) |
|
1300 | 1327 | |
|
1301 | 1328 | dctx = zstd.ZstdDecompressor(format=zstd.FORMAT_ZSTD1_MAGICLESS) |
|
1302 |
res = b |
|
|
1303 |
self.assertEqual(res, b |
|
|
1329 | res = b"".join(dctx.read_to_iter(frame)) | |
|
1330 | self.assertEqual(res, b"foobar") | |
|
1304 | 1331 | |
|
1305 | 1332 | |
|
1306 | 1333 | @make_cffi |
|
1307 |
class TestDecompressor_content_dict_chain( |
|
|
1334 | class TestDecompressor_content_dict_chain(TestCase): | |
|
1308 | 1335 | def test_bad_inputs_simple(self): |
|
1309 | 1336 | dctx = zstd.ZstdDecompressor() |
|
1310 | 1337 | |
|
1311 | 1338 | with self.assertRaises(TypeError): |
|
1312 |
dctx.decompress_content_dict_chain(b |
|
|
1339 | dctx.decompress_content_dict_chain(b"foo") | |
|
1313 | 1340 | |
|
1314 | 1341 | with self.assertRaises(TypeError): |
|
1315 |
dctx.decompress_content_dict_chain((b |
|
|
1342 | dctx.decompress_content_dict_chain((b"foo", b"bar")) | |
|
1316 | 1343 | |
|
1317 |
with self.assertRaisesRegex |
|
|
1344 | with self.assertRaisesRegex(ValueError, "empty input chain"): | |
|
1318 | 1345 | dctx.decompress_content_dict_chain([]) |
|
1319 | 1346 | |
|
1320 |
with self.assertRaisesRegex |
|
|
1321 |
dctx.decompress_content_dict_chain([u |
|
|
1347 | with self.assertRaisesRegex(ValueError, "chunk 0 must be bytes"): | |
|
1348 | dctx.decompress_content_dict_chain([u"foo"]) | |
|
1322 | 1349 | |
|
1323 |
with self.assertRaisesRegex |
|
|
1350 | with self.assertRaisesRegex(ValueError, "chunk 0 must be bytes"): | |
|
1324 | 1351 | dctx.decompress_content_dict_chain([True]) |
|
1325 | 1352 | |
|
1326 | with self.assertRaisesRegexp(ValueError, 'chunk 0 is too small to contain a zstd frame'): | |
|
1353 | with self.assertRaisesRegex( | |
|
1354 | ValueError, "chunk 0 is too small to contain a zstd frame" | |
|
1355 | ): | |
|
1327 | 1356 | dctx.decompress_content_dict_chain([zstd.FRAME_HEADER]) |
|
1328 | 1357 | |
|
1329 |
with self.assertRaisesRegex |
|
|
1330 |
dctx.decompress_content_dict_chain([b |
|
|
1358 | with self.assertRaisesRegex(ValueError, "chunk 0 is not a valid zstd frame"): | |
|
1359 | dctx.decompress_content_dict_chain([b"foo" * 8]) | |
|
1331 | 1360 | |
|
1332 |
no_size = zstd.ZstdCompressor(write_content_size=False).compress(b |
|
|
1361 | no_size = zstd.ZstdCompressor(write_content_size=False).compress(b"foo" * 64) | |
|
1333 | 1362 | |
|
1334 | with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'): | |
|
1363 | with self.assertRaisesRegex( | |
|
1364 | ValueError, "chunk 0 missing content size in frame" | |
|
1365 | ): | |
|
1335 | 1366 | dctx.decompress_content_dict_chain([no_size]) |
|
1336 | 1367 | |
|
1337 | 1368 | # Corrupt first frame. |
|
1338 |
frame = zstd.ZstdCompressor().compress(b |
|
|
1369 | frame = zstd.ZstdCompressor().compress(b"foo" * 64) | |
|
1339 | 1370 | frame = frame[0:12] + frame[15:] |
|
1340 |
with self.assertRaisesRegex |
|
|
1341 |
|
|
|
1371 | with self.assertRaisesRegex( | |
|
1372 | zstd.ZstdError, "chunk 0 did not decompress full frame" | |
|
1373 | ): | |
|
1342 | 1374 | dctx.decompress_content_dict_chain([frame]) |
|
1343 | 1375 | |
|
1344 | 1376 | def test_bad_subsequent_input(self): |
|
1345 |
initial = zstd.ZstdCompressor().compress(b |
|
|
1377 | initial = zstd.ZstdCompressor().compress(b"foo" * 64) | |
|
1346 | 1378 | |
|
1347 | 1379 | dctx = zstd.ZstdDecompressor() |
|
1348 | 1380 | |
|
1349 |
with self.assertRaisesRegex |
|
|
1350 |
dctx.decompress_content_dict_chain([initial, u |
|
|
1381 | with self.assertRaisesRegex(ValueError, "chunk 1 must be bytes"): | |
|
1382 | dctx.decompress_content_dict_chain([initial, u"foo"]) | |
|
1351 | 1383 | |
|
1352 |
with self.assertRaisesRegex |
|
|
1384 | with self.assertRaisesRegex(ValueError, "chunk 1 must be bytes"): | |
|
1353 | 1385 | dctx.decompress_content_dict_chain([initial, None]) |
|
1354 | 1386 | |
|
1355 | with self.assertRaisesRegexp(ValueError, 'chunk 1 is too small to contain a zstd frame'): | |
|
1387 | with self.assertRaisesRegex( | |
|
1388 | ValueError, "chunk 1 is too small to contain a zstd frame" | |
|
1389 | ): | |
|
1356 | 1390 | dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER]) |
|
1357 | 1391 | |
|
1358 |
with self.assertRaisesRegex |
|
|
1359 |
dctx.decompress_content_dict_chain([initial, b |
|
|
1392 | with self.assertRaisesRegex(ValueError, "chunk 1 is not a valid zstd frame"): | |
|
1393 | dctx.decompress_content_dict_chain([initial, b"foo" * 8]) | |
|
1360 | 1394 | |
|
1361 |
no_size = zstd.ZstdCompressor(write_content_size=False).compress(b |
|
|
1395 | no_size = zstd.ZstdCompressor(write_content_size=False).compress(b"foo" * 64) | |
|
1362 | 1396 | |
|
1363 | with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'): | |
|
1397 | with self.assertRaisesRegex( | |
|
1398 | ValueError, "chunk 1 missing content size in frame" | |
|
1399 | ): | |
|
1364 | 1400 | dctx.decompress_content_dict_chain([initial, no_size]) |
|
1365 | 1401 | |
|
1366 | 1402 | # Corrupt second frame. |
|
1367 |
cctx = zstd.ZstdCompressor(dict_data=zstd.ZstdCompressionDict(b |
|
|
1368 |
frame = cctx.compress(b |
|
|
1403 | cctx = zstd.ZstdCompressor(dict_data=zstd.ZstdCompressionDict(b"foo" * 64)) | |
|
1404 | frame = cctx.compress(b"bar" * 64) | |
|
1369 | 1405 | frame = frame[0:12] + frame[15:] |
|
1370 | 1406 | |
|
1371 | with self.assertRaisesRegexp(zstd.ZstdError, 'chunk 1 did not decompress full frame'): | |
|
1407 | with self.assertRaisesRegex( | |
|
1408 | zstd.ZstdError, "chunk 1 did not decompress full frame" | |
|
1409 | ): | |
|
1372 | 1410 | dctx.decompress_content_dict_chain([initial, frame]) |
|
1373 | 1411 | |
|
1374 | 1412 | def test_simple(self): |
|
1375 | 1413 | original = [ |
|
1376 |
b |
|
|
1377 |
b |
|
|
1378 |
b |
|
|
1379 |
b |
|
|
1380 |
b |
|
|
1414 | b"foo" * 64, | |
|
1415 | b"foobar" * 64, | |
|
1416 | b"baz" * 64, | |
|
1417 | b"foobaz" * 64, | |
|
1418 | b"foobarbaz" * 64, | |
|
1381 | 1419 | ] |
|
1382 | 1420 | |
|
1383 | 1421 | chunks = [] |
|
1384 | 1422 | chunks.append(zstd.ZstdCompressor().compress(original[0])) |
|
1385 | 1423 | for i, chunk in enumerate(original[1:]): |
|
1386 | 1424 | d = zstd.ZstdCompressionDict(original[i]) |
|
1387 | 1425 | cctx = zstd.ZstdCompressor(dict_data=d) |
|
1388 | 1426 | chunks.append(cctx.compress(chunk)) |
|
1389 | 1427 | |
|
1390 | 1428 | for i in range(1, len(original)): |
|
1391 | 1429 | chain = chunks[0:i] |
|
1392 | 1430 | expected = original[i - 1] |
|
1393 | 1431 | dctx = zstd.ZstdDecompressor() |
|
1394 | 1432 | decompressed = dctx.decompress_content_dict_chain(chain) |
|
1395 | 1433 | self.assertEqual(decompressed, expected) |
|
1396 | 1434 | |
|
1397 | 1435 | |
|
1398 | 1436 | # TODO enable for CFFI |
|
1399 |
class TestDecompressor_multi_decompress_to_buffer( |
|
|
1437 | class TestDecompressor_multi_decompress_to_buffer(TestCase): | |
|
1400 | 1438 | def test_invalid_inputs(self): |
|
1401 | 1439 | dctx = zstd.ZstdDecompressor() |
|
1402 | 1440 | |
|
1403 |
if not hasattr(dctx, |
|
|
1404 |
self.skipTest( |
|
|
1441 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
|
1442 | self.skipTest("multi_decompress_to_buffer not available") | |
|
1405 | 1443 | |
|
1406 | 1444 | with self.assertRaises(TypeError): |
|
1407 | 1445 | dctx.multi_decompress_to_buffer(True) |
|
1408 | 1446 | |
|
1409 | 1447 | with self.assertRaises(TypeError): |
|
1410 | 1448 | dctx.multi_decompress_to_buffer((1, 2)) |
|
1411 | 1449 | |
|
1412 |
with self.assertRaisesRegex |
|
|
1413 |
dctx.multi_decompress_to_buffer([u |
|
|
1450 | with self.assertRaisesRegex(TypeError, "item 0 not a bytes like object"): | |
|
1451 | dctx.multi_decompress_to_buffer([u"foo"]) | |
|
1414 | 1452 | |
|
1415 | with self.assertRaisesRegexp(ValueError, 'could not determine decompressed size of item 0'): | |
|
1416 | dctx.multi_decompress_to_buffer([b'foobarbaz']) | |
|
1453 | with self.assertRaisesRegex( | |
|
1454 | ValueError, "could not determine decompressed size of item 0" | |
|
1455 | ): | |
|
1456 | dctx.multi_decompress_to_buffer([b"foobarbaz"]) | |
|
1417 | 1457 | |
|
1418 | 1458 | def test_list_input(self): |
|
1419 | 1459 | cctx = zstd.ZstdCompressor() |
|
1420 | 1460 | |
|
1421 |
original = [b |
|
|
1461 | original = [b"foo" * 4, b"bar" * 6] | |
|
1422 | 1462 | frames = [cctx.compress(d) for d in original] |
|
1423 | 1463 | |
|
1424 | 1464 | dctx = zstd.ZstdDecompressor() |
|
1425 | 1465 | |
|
1426 |
if not hasattr(dctx, |
|
|
1427 |
self.skipTest( |
|
|
1466 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
|
1467 | self.skipTest("multi_decompress_to_buffer not available") | |
|
1428 | 1468 | |
|
1429 | 1469 | result = dctx.multi_decompress_to_buffer(frames) |
|
1430 | 1470 | |
|
1431 | 1471 | self.assertEqual(len(result), len(frames)) |
|
1432 | 1472 | self.assertEqual(result.size(), sum(map(len, original))) |
|
1433 | 1473 | |
|
1434 | 1474 | for i, data in enumerate(original): |
|
1435 | 1475 | self.assertEqual(result[i].tobytes(), data) |
|
1436 | 1476 | |
|
1437 | 1477 | self.assertEqual(result[0].offset, 0) |
|
1438 | 1478 | self.assertEqual(len(result[0]), 12) |
|
1439 | 1479 | self.assertEqual(result[1].offset, 12) |
|
1440 | 1480 | self.assertEqual(len(result[1]), 18) |
|
1441 | 1481 | |
|
1442 | 1482 | def test_list_input_frame_sizes(self): |
|
1443 | 1483 | cctx = zstd.ZstdCompressor() |
|
1444 | 1484 | |
|
1445 |
original = [b |
|
|
1485 | original = [b"foo" * 4, b"bar" * 6, b"baz" * 8] | |
|
1446 | 1486 | frames = [cctx.compress(d) for d in original] |
|
1447 |
sizes = struct.pack( |
|
|
1487 | sizes = struct.pack("=" + "Q" * len(original), *map(len, original)) | |
|
1448 | 1488 | |
|
1449 | 1489 | dctx = zstd.ZstdDecompressor() |
|
1450 | 1490 | |
|
1451 |
if not hasattr(dctx, |
|
|
1452 |
self.skipTest( |
|
|
1491 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
|
1492 | self.skipTest("multi_decompress_to_buffer not available") | |
|
1453 | 1493 | |
|
1454 | 1494 | result = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes) |
|
1455 | 1495 | |
|
1456 | 1496 | self.assertEqual(len(result), len(frames)) |
|
1457 | 1497 | self.assertEqual(result.size(), sum(map(len, original))) |
|
1458 | 1498 | |
|
1459 | 1499 | for i, data in enumerate(original): |
|
1460 | 1500 | self.assertEqual(result[i].tobytes(), data) |
|
1461 | 1501 | |
|
1462 | 1502 | def test_buffer_with_segments_input(self): |
|
1463 | 1503 | cctx = zstd.ZstdCompressor() |
|
1464 | 1504 | |
|
1465 |
original = [b |
|
|
1505 | original = [b"foo" * 4, b"bar" * 6] | |
|
1466 | 1506 | frames = [cctx.compress(d) for d in original] |
|
1467 | 1507 | |
|
1468 | 1508 | dctx = zstd.ZstdDecompressor() |
|
1469 | 1509 | |
|
1470 |
if not hasattr(dctx, |
|
|
1471 |
self.skipTest( |
|
|
1510 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
|
1511 | self.skipTest("multi_decompress_to_buffer not available") | |
|
1472 | 1512 | |
|
1473 | segments = struct.pack('=QQQQ', 0, len(frames[0]), len(frames[0]), len(frames[1])) | |
|
1474 | b = zstd.BufferWithSegments(b''.join(frames), segments) | |
|
1513 | segments = struct.pack( | |
|
1514 | "=QQQQ", 0, len(frames[0]), len(frames[0]), len(frames[1]) | |
|
1515 | ) | |
|
1516 | b = zstd.BufferWithSegments(b"".join(frames), segments) | |
|
1475 | 1517 | |
|
1476 | 1518 | result = dctx.multi_decompress_to_buffer(b) |
|
1477 | 1519 | |
|
1478 | 1520 | self.assertEqual(len(result), len(frames)) |
|
1479 | 1521 | self.assertEqual(result[0].offset, 0) |
|
1480 | 1522 | self.assertEqual(len(result[0]), 12) |
|
1481 | 1523 | self.assertEqual(result[1].offset, 12) |
|
1482 | 1524 | self.assertEqual(len(result[1]), 18) |
|
1483 | 1525 | |
|
1484 | 1526 | def test_buffer_with_segments_sizes(self): |
|
1485 | 1527 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
1486 |
original = [b |
|
|
1528 | original = [b"foo" * 4, b"bar" * 6, b"baz" * 8] | |
|
1487 | 1529 | frames = [cctx.compress(d) for d in original] |
|
1488 |
sizes = struct.pack( |
|
|
1530 | sizes = struct.pack("=" + "Q" * len(original), *map(len, original)) | |
|
1489 | 1531 | |
|
1490 | 1532 | dctx = zstd.ZstdDecompressor() |
|
1491 | 1533 | |
|
1492 |
if not hasattr(dctx, |
|
|
1493 |
self.skipTest( |
|
|
1534 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
|
1535 | self.skipTest("multi_decompress_to_buffer not available") | |
|
1494 | 1536 | |
|
1495 |
segments = struct.pack( |
|
|
1496 | len(frames[0]), len(frames[1]), | |
|
1497 | len(frames[0]) + len(frames[1]), len(frames[2])) | |
|
1498 | b = zstd.BufferWithSegments(b''.join(frames), segments) | |
|
1537 | segments = struct.pack( | |
|
1538 | "=QQQQQQ", | |
|
1539 | 0, | |
|
1540 | len(frames[0]), | |
|
1541 | len(frames[0]), | |
|
1542 | len(frames[1]), | |
|
1543 | len(frames[0]) + len(frames[1]), | |
|
1544 | len(frames[2]), | |
|
1545 | ) | |
|
1546 | b = zstd.BufferWithSegments(b"".join(frames), segments) | |
|
1499 | 1547 | |
|
1500 | 1548 | result = dctx.multi_decompress_to_buffer(b, decompressed_sizes=sizes) |
|
1501 | 1549 | |
|
1502 | 1550 | self.assertEqual(len(result), len(frames)) |
|
1503 | 1551 | self.assertEqual(result.size(), sum(map(len, original))) |
|
1504 | 1552 | |
|
1505 | 1553 | for i, data in enumerate(original): |
|
1506 | 1554 | self.assertEqual(result[i].tobytes(), data) |
|
1507 | 1555 | |
|
1508 | 1556 | def test_buffer_with_segments_collection_input(self): |
|
1509 | 1557 | cctx = zstd.ZstdCompressor() |
|
1510 | 1558 | |
|
1511 | 1559 | original = [ |
|
1512 |
b |
|
|
1513 |
b |
|
|
1514 |
b |
|
|
1515 |
b |
|
|
1516 |
b |
|
|
1560 | b"foo0" * 2, | |
|
1561 | b"foo1" * 3, | |
|
1562 | b"foo2" * 4, | |
|
1563 | b"foo3" * 5, | |
|
1564 | b"foo4" * 6, | |
|
1517 | 1565 | ] |
|
1518 | 1566 | |
|
1519 |
if not hasattr(cctx, |
|
|
1520 |
self.skipTest( |
|
|
1567 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
|
1568 | self.skipTest("multi_compress_to_buffer not available") | |
|
1521 | 1569 | |
|
1522 | 1570 | frames = cctx.multi_compress_to_buffer(original) |
|
1523 | 1571 | |
|
1524 | 1572 | # Check round trip. |
|
1525 | 1573 | dctx = zstd.ZstdDecompressor() |
|
1526 | 1574 | |
|
1527 | 1575 | decompressed = dctx.multi_decompress_to_buffer(frames, threads=3) |
|
1528 | 1576 | |
|
1529 | 1577 | self.assertEqual(len(decompressed), len(original)) |
|
1530 | 1578 | |
|
1531 | 1579 | for i, data in enumerate(original): |
|
1532 | 1580 | self.assertEqual(data, decompressed[i].tobytes()) |
|
1533 | 1581 | |
|
1534 | 1582 | # And a manual mode. |
|
1535 |
b = b |
|
|
1536 |
b1 = zstd.BufferWithSegments( |
|
|
1537 | 0, len(frames[0]), | |
|
1538 | len(frames[0]), len(frames[1]))) | |
|
1583 | b = b"".join([frames[0].tobytes(), frames[1].tobytes()]) | |
|
1584 | b1 = zstd.BufferWithSegments( | |
|
1585 | b, struct.pack("=QQQQ", 0, len(frames[0]), len(frames[0]), len(frames[1])) | |
|
1586 | ) | |
|
1539 | 1587 | |
|
1540 |
b = b |
|
|
1541 |
b2 = zstd.BufferWithSegments( |
|
|
1542 | 0, len(frames[2]), | |
|
1543 | len(frames[2]), len(frames[3]), | |
|
1544 | len(frames[2]) + len(frames[3]), len(frames[4]))) | |
|
1588 | b = b"".join([frames[2].tobytes(), frames[3].tobytes(), frames[4].tobytes()]) | |
|
1589 | b2 = zstd.BufferWithSegments( | |
|
1590 | b, | |
|
1591 | struct.pack( | |
|
1592 | "=QQQQQQ", | |
|
1593 | 0, | |
|
1594 | len(frames[2]), | |
|
1595 | len(frames[2]), | |
|
1596 | len(frames[3]), | |
|
1597 | len(frames[2]) + len(frames[3]), | |
|
1598 | len(frames[4]), | |
|
1599 | ), | |
|
1600 | ) | |
|
1545 | 1601 | |
|
1546 | 1602 | c = zstd.BufferWithSegmentsCollection(b1, b2) |
|
1547 | 1603 | |
|
1548 | 1604 | dctx = zstd.ZstdDecompressor() |
|
1549 | 1605 | decompressed = dctx.multi_decompress_to_buffer(c) |
|
1550 | 1606 | |
|
1551 | 1607 | self.assertEqual(len(decompressed), 5) |
|
1552 | 1608 | for i in range(5): |
|
1553 | 1609 | self.assertEqual(decompressed[i].tobytes(), original[i]) |
|
1554 | 1610 | |
|
1555 | 1611 | def test_dict(self): |
|
1556 | 1612 | d = zstd.train_dictionary(16384, generate_samples(), k=64, d=16) |
|
1557 | 1613 | |
|
1558 | 1614 | cctx = zstd.ZstdCompressor(dict_data=d, level=1) |
|
1559 | 1615 | frames = [cctx.compress(s) for s in generate_samples()] |
|
1560 | 1616 | |
|
1561 | 1617 | dctx = zstd.ZstdDecompressor(dict_data=d) |
|
1562 | 1618 | |
|
1563 |
if not hasattr(dctx, |
|
|
1564 |
self.skipTest( |
|
|
1619 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
|
1620 | self.skipTest("multi_decompress_to_buffer not available") | |
|
1565 | 1621 | |
|
1566 | 1622 | result = dctx.multi_decompress_to_buffer(frames) |
|
1567 | 1623 | |
|
1568 | 1624 | self.assertEqual([o.tobytes() for o in result], generate_samples()) |
|
1569 | 1625 | |
|
1570 | 1626 | def test_multiple_threads(self): |
|
1571 | 1627 | cctx = zstd.ZstdCompressor() |
|
1572 | 1628 | |
|
1573 | 1629 | frames = [] |
|
1574 |
frames.extend(cctx.compress(b |
|
|
1575 |
frames.extend(cctx.compress(b |
|
|
1630 | frames.extend(cctx.compress(b"x" * 64) for i in range(256)) | |
|
1631 | frames.extend(cctx.compress(b"y" * 64) for i in range(256)) | |
|
1576 | 1632 | |
|
1577 | 1633 | dctx = zstd.ZstdDecompressor() |
|
1578 | 1634 | |
|
1579 |
if not hasattr(dctx, |
|
|
1580 |
self.skipTest( |
|
|
1635 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
|
1636 | self.skipTest("multi_decompress_to_buffer not available") | |
|
1581 | 1637 | |
|
1582 | 1638 | result = dctx.multi_decompress_to_buffer(frames, threads=-1) |
|
1583 | 1639 | |
|
1584 | 1640 | self.assertEqual(len(result), len(frames)) |
|
1585 | 1641 | self.assertEqual(result.size(), 2 * 64 * 256) |
|
1586 |
self.assertEqual(result[0].tobytes(), b |
|
|
1587 |
self.assertEqual(result[256].tobytes(), b |
|
|
1642 | self.assertEqual(result[0].tobytes(), b"x" * 64) | |
|
1643 | self.assertEqual(result[256].tobytes(), b"y" * 64) | |
|
1588 | 1644 | |
|
1589 | 1645 | def test_item_failure(self): |
|
1590 | 1646 | cctx = zstd.ZstdCompressor() |
|
1591 |
frames = [cctx.compress(b |
|
|
1647 | frames = [cctx.compress(b"x" * 128), cctx.compress(b"y" * 128)] | |
|
1592 | 1648 | |
|
1593 |
frames[1] = frames[1][0:15] + b |
|
|
1649 | frames[1] = frames[1][0:15] + b"extra" + frames[1][15:] | |
|
1594 | 1650 | |
|
1595 | 1651 | dctx = zstd.ZstdDecompressor() |
|
1596 | 1652 | |
|
1597 |
if not hasattr(dctx, |
|
|
1598 |
self.skipTest( |
|
|
1653 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
|
1654 | self.skipTest("multi_decompress_to_buffer not available") | |
|
1599 | 1655 | |
|
1600 |
with self.assertRaisesRegex |
|
|
1601 | 'error decompressing item 1: (' | |
|
1602 | 'Corrupted block|' | |
|
1603 | 'Destination buffer is too small)'): | |
|
1656 | with self.assertRaisesRegex( | |
|
1657 | zstd.ZstdError, | |
|
1658 | "error decompressing item 1: (" | |
|
1659 | "Corrupted block|" | |
|
1660 | "Destination buffer is too small)", | |
|
1661 | ): | |
|
1604 | 1662 | dctx.multi_decompress_to_buffer(frames) |
|
1605 | 1663 | |
|
1606 |
with self.assertRaisesRegex |
|
|
1607 | 'error decompressing item 1: (' | |
|
1608 | 'Corrupted block|' | |
|
1609 | 'Destination buffer is too small)'): | |
|
1664 | with self.assertRaisesRegex( | |
|
1665 | zstd.ZstdError, | |
|
1666 | "error decompressing item 1: (" | |
|
1667 | "Corrupted block|" | |
|
1668 | "Destination buffer is too small)", | |
|
1669 | ): | |
|
1610 | 1670 | dctx.multi_decompress_to_buffer(frames, threads=2) |
|
1611 |
@@ -1,485 +1,576 b'' | |||
|
1 | 1 | import io |
|
2 | 2 | import os |
|
3 | 3 | import unittest |
|
4 | 4 | |
|
5 | 5 | try: |
|
6 | 6 | import hypothesis |
|
7 | 7 | import hypothesis.strategies as strategies |
|
8 | 8 | except ImportError: |
|
9 |
raise unittest.SkipTest( |
|
|
9 | raise unittest.SkipTest("hypothesis not available") | |
|
10 | 10 | |
|
11 | 11 | import zstandard as zstd |
|
12 | 12 | |
|
13 |
from . |
|
|
13 | from .common import ( | |
|
14 | 14 | make_cffi, |
|
15 | 15 | NonClosingBytesIO, |
|
16 | 16 | random_input_data, |
|
17 | TestCase, | |
|
17 | 18 | ) |
|
18 | 19 | |
|
19 | 20 | |
|
20 |
@unittest.skipUnless( |
|
|
21 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
21 | 22 | @make_cffi |
|
22 |
class TestDecompressor_stream_reader_fuzzing( |
|
|
23 | class TestDecompressor_stream_reader_fuzzing(TestCase): | |
|
23 | 24 | @hypothesis.settings( |
|
24 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
25 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
26 | level=strategies.integers(min_value=1, max_value=5), | |
|
27 | streaming=strategies.booleans(), | |
|
28 | source_read_size=strategies.integers(1, 1048576), | |
|
29 | read_sizes=strategies.data()) | |
|
30 | def test_stream_source_read_variance(self, original, level, streaming, | |
|
31 | source_read_size, read_sizes): | |
|
25 | suppress_health_check=[ | |
|
26 | hypothesis.HealthCheck.large_base_example, | |
|
27 | hypothesis.HealthCheck.too_slow, | |
|
28 | ] | |
|
29 | ) | |
|
30 | @hypothesis.given( | |
|
31 | original=strategies.sampled_from(random_input_data()), | |
|
32 | level=strategies.integers(min_value=1, max_value=5), | |
|
33 | streaming=strategies.booleans(), | |
|
34 | source_read_size=strategies.integers(1, 1048576), | |
|
35 | read_sizes=strategies.data(), | |
|
36 | ) | |
|
37 | def test_stream_source_read_variance( | |
|
38 | self, original, level, streaming, source_read_size, read_sizes | |
|
39 | ): | |
|
32 | 40 | cctx = zstd.ZstdCompressor(level=level) |
|
33 | 41 | |
|
34 | 42 | if streaming: |
|
35 | 43 | source = io.BytesIO() |
|
36 | 44 | writer = cctx.stream_writer(source) |
|
37 | 45 | writer.write(original) |
|
38 | 46 | writer.flush(zstd.FLUSH_FRAME) |
|
39 | 47 | source.seek(0) |
|
40 | 48 | else: |
|
41 | 49 | frame = cctx.compress(original) |
|
42 | 50 | source = io.BytesIO(frame) |
|
43 | 51 | |
|
44 | 52 | dctx = zstd.ZstdDecompressor() |
|
45 | 53 | |
|
46 | 54 | chunks = [] |
|
47 | 55 | with dctx.stream_reader(source, read_size=source_read_size) as reader: |
|
48 | 56 | while True: |
|
49 | 57 | read_size = read_sizes.draw(strategies.integers(-1, 131072)) |
|
50 | 58 | chunk = reader.read(read_size) |
|
51 | 59 | if not chunk and read_size: |
|
52 | 60 | break |
|
53 | 61 | |
|
54 | 62 | chunks.append(chunk) |
|
55 | 63 | |
|
56 |
self.assertEqual(b |
|
|
64 | self.assertEqual(b"".join(chunks), original) | |
|
57 | 65 | |
|
58 | 66 | # Similar to above except we have a constant read() size. |
|
59 | 67 | @hypothesis.settings( |
|
60 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
|
61 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
62 | level=strategies.integers(min_value=1, max_value=5), | |
|
63 | streaming=strategies.booleans(), | |
|
64 | source_read_size=strategies.integers(1, 1048576), | |
|
65 | read_size=strategies.integers(-1, 131072)) | |
|
66 | def test_stream_source_read_size(self, original, level, streaming, | |
|
67 | source_read_size, read_size): | |
|
68 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
|
69 | ) | |
|
70 | @hypothesis.given( | |
|
71 | original=strategies.sampled_from(random_input_data()), | |
|
72 | level=strategies.integers(min_value=1, max_value=5), | |
|
73 | streaming=strategies.booleans(), | |
|
74 | source_read_size=strategies.integers(1, 1048576), | |
|
75 | read_size=strategies.integers(-1, 131072), | |
|
76 | ) | |
|
77 | def test_stream_source_read_size( | |
|
78 | self, original, level, streaming, source_read_size, read_size | |
|
79 | ): | |
|
68 | 80 | if read_size == 0: |
|
69 | 81 | read_size = 1 |
|
70 | 82 | |
|
71 | 83 | cctx = zstd.ZstdCompressor(level=level) |
|
72 | 84 | |
|
73 | 85 | if streaming: |
|
74 | 86 | source = io.BytesIO() |
|
75 | 87 | writer = cctx.stream_writer(source) |
|
76 | 88 | writer.write(original) |
|
77 | 89 | writer.flush(zstd.FLUSH_FRAME) |
|
78 | 90 | source.seek(0) |
|
79 | 91 | else: |
|
80 | 92 | frame = cctx.compress(original) |
|
81 | 93 | source = io.BytesIO(frame) |
|
82 | 94 | |
|
83 | 95 | dctx = zstd.ZstdDecompressor() |
|
84 | 96 | |
|
85 | 97 | chunks = [] |
|
86 | 98 | reader = dctx.stream_reader(source, read_size=source_read_size) |
|
87 | 99 | while True: |
|
88 | 100 | chunk = reader.read(read_size) |
|
89 | 101 | if not chunk and read_size: |
|
90 | 102 | break |
|
91 | 103 | |
|
92 | 104 | chunks.append(chunk) |
|
93 | 105 | |
|
94 |
self.assertEqual(b |
|
|
106 | self.assertEqual(b"".join(chunks), original) | |
|
95 | 107 | |
|
96 | 108 | @hypothesis.settings( |
|
97 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
98 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
99 | level=strategies.integers(min_value=1, max_value=5), | |
|
100 | streaming=strategies.booleans(), | |
|
101 | source_read_size=strategies.integers(1, 1048576), | |
|
102 | read_sizes=strategies.data()) | |
|
103 | def test_buffer_source_read_variance(self, original, level, streaming, | |
|
104 | source_read_size, read_sizes): | |
|
109 | suppress_health_check=[ | |
|
110 | hypothesis.HealthCheck.large_base_example, | |
|
111 | hypothesis.HealthCheck.too_slow, | |
|
112 | ] | |
|
113 | ) | |
|
114 | @hypothesis.given( | |
|
115 | original=strategies.sampled_from(random_input_data()), | |
|
116 | level=strategies.integers(min_value=1, max_value=5), | |
|
117 | streaming=strategies.booleans(), | |
|
118 | source_read_size=strategies.integers(1, 1048576), | |
|
119 | read_sizes=strategies.data(), | |
|
120 | ) | |
|
121 | def test_buffer_source_read_variance( | |
|
122 | self, original, level, streaming, source_read_size, read_sizes | |
|
123 | ): | |
|
105 | 124 | cctx = zstd.ZstdCompressor(level=level) |
|
106 | 125 | |
|
107 | 126 | if streaming: |
|
108 | 127 | source = io.BytesIO() |
|
109 | 128 | writer = cctx.stream_writer(source) |
|
110 | 129 | writer.write(original) |
|
111 | 130 | writer.flush(zstd.FLUSH_FRAME) |
|
112 | 131 | frame = source.getvalue() |
|
113 | 132 | else: |
|
114 | 133 | frame = cctx.compress(original) |
|
115 | 134 | |
|
116 | 135 | dctx = zstd.ZstdDecompressor() |
|
117 | 136 | chunks = [] |
|
118 | 137 | |
|
119 | 138 | with dctx.stream_reader(frame, read_size=source_read_size) as reader: |
|
120 | 139 | while True: |
|
121 | 140 | read_size = read_sizes.draw(strategies.integers(-1, 131072)) |
|
122 | 141 | chunk = reader.read(read_size) |
|
123 | 142 | if not chunk and read_size: |
|
124 | 143 | break |
|
125 | 144 | |
|
126 | 145 | chunks.append(chunk) |
|
127 | 146 | |
|
128 |
self.assertEqual(b |
|
|
147 | self.assertEqual(b"".join(chunks), original) | |
|
129 | 148 | |
|
130 | 149 | # Similar to above except we have a constant read() size. |
|
131 | 150 | @hypothesis.settings( |
|
132 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
|
133 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
134 | level=strategies.integers(min_value=1, max_value=5), | |
|
135 | streaming=strategies.booleans(), | |
|
136 | source_read_size=strategies.integers(1, 1048576), | |
|
137 | read_size=strategies.integers(-1, 131072)) | |
|
138 | def test_buffer_source_constant_read_size(self, original, level, streaming, | |
|
139 | source_read_size, read_size): | |
|
151 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
|
152 | ) | |
|
153 | @hypothesis.given( | |
|
154 | original=strategies.sampled_from(random_input_data()), | |
|
155 | level=strategies.integers(min_value=1, max_value=5), | |
|
156 | streaming=strategies.booleans(), | |
|
157 | source_read_size=strategies.integers(1, 1048576), | |
|
158 | read_size=strategies.integers(-1, 131072), | |
|
159 | ) | |
|
160 | def test_buffer_source_constant_read_size( | |
|
161 | self, original, level, streaming, source_read_size, read_size | |
|
162 | ): | |
|
140 | 163 | if read_size == 0: |
|
141 | 164 | read_size = -1 |
|
142 | 165 | |
|
143 | 166 | cctx = zstd.ZstdCompressor(level=level) |
|
144 | 167 | |
|
145 | 168 | if streaming: |
|
146 | 169 | source = io.BytesIO() |
|
147 | 170 | writer = cctx.stream_writer(source) |
|
148 | 171 | writer.write(original) |
|
149 | 172 | writer.flush(zstd.FLUSH_FRAME) |
|
150 | 173 | frame = source.getvalue() |
|
151 | 174 | else: |
|
152 | 175 | frame = cctx.compress(original) |
|
153 | 176 | |
|
154 | 177 | dctx = zstd.ZstdDecompressor() |
|
155 | 178 | chunks = [] |
|
156 | 179 | |
|
157 | 180 | reader = dctx.stream_reader(frame, read_size=source_read_size) |
|
158 | 181 | while True: |
|
159 | 182 | chunk = reader.read(read_size) |
|
160 | 183 | if not chunk and read_size: |
|
161 | 184 | break |
|
162 | 185 | |
|
163 | 186 | chunks.append(chunk) |
|
164 | 187 | |
|
165 |
self.assertEqual(b |
|
|
188 | self.assertEqual(b"".join(chunks), original) | |
|
166 | 189 | |
|
167 | 190 | @hypothesis.settings( |
|
168 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
|
169 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
170 | level=strategies.integers(min_value=1, max_value=5), | |
|
171 | streaming=strategies.booleans(), | |
|
172 | source_read_size=strategies.integers(1, 1048576)) | |
|
173 | def test_stream_source_readall(self, original, level, streaming, | |
|
174 | source_read_size): | |
|
191 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
|
192 | ) | |
|
193 | @hypothesis.given( | |
|
194 | original=strategies.sampled_from(random_input_data()), | |
|
195 | level=strategies.integers(min_value=1, max_value=5), | |
|
196 | streaming=strategies.booleans(), | |
|
197 | source_read_size=strategies.integers(1, 1048576), | |
|
198 | ) | |
|
199 | def test_stream_source_readall(self, original, level, streaming, source_read_size): | |
|
175 | 200 | cctx = zstd.ZstdCompressor(level=level) |
|
176 | 201 | |
|
177 | 202 | if streaming: |
|
178 | 203 | source = io.BytesIO() |
|
179 | 204 | writer = cctx.stream_writer(source) |
|
180 | 205 | writer.write(original) |
|
181 | 206 | writer.flush(zstd.FLUSH_FRAME) |
|
182 | 207 | source.seek(0) |
|
183 | 208 | else: |
|
184 | 209 | frame = cctx.compress(original) |
|
185 | 210 | source = io.BytesIO(frame) |
|
186 | 211 | |
|
187 | 212 | dctx = zstd.ZstdDecompressor() |
|
188 | 213 | |
|
189 | 214 | data = dctx.stream_reader(source, read_size=source_read_size).readall() |
|
190 | 215 | self.assertEqual(data, original) |
|
191 | 216 | |
|
192 | 217 | @hypothesis.settings( |
|
193 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
194 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
195 | level=strategies.integers(min_value=1, max_value=5), | |
|
196 | streaming=strategies.booleans(), | |
|
197 | source_read_size=strategies.integers(1, 1048576), | |
|
198 | read_sizes=strategies.data()) | |
|
199 | def test_stream_source_read1_variance(self, original, level, streaming, | |
|
200 | source_read_size, read_sizes): | |
|
218 | suppress_health_check=[ | |
|
219 | hypothesis.HealthCheck.large_base_example, | |
|
220 | hypothesis.HealthCheck.too_slow, | |
|
221 | ] | |
|
222 | ) | |
|
223 | @hypothesis.given( | |
|
224 | original=strategies.sampled_from(random_input_data()), | |
|
225 | level=strategies.integers(min_value=1, max_value=5), | |
|
226 | streaming=strategies.booleans(), | |
|
227 | source_read_size=strategies.integers(1, 1048576), | |
|
228 | read_sizes=strategies.data(), | |
|
229 | ) | |
|
230 | def test_stream_source_read1_variance( | |
|
231 | self, original, level, streaming, source_read_size, read_sizes | |
|
232 | ): | |
|
201 | 233 | cctx = zstd.ZstdCompressor(level=level) |
|
202 | 234 | |
|
203 | 235 | if streaming: |
|
204 | 236 | source = io.BytesIO() |
|
205 | 237 | writer = cctx.stream_writer(source) |
|
206 | 238 | writer.write(original) |
|
207 | 239 | writer.flush(zstd.FLUSH_FRAME) |
|
208 | 240 | source.seek(0) |
|
209 | 241 | else: |
|
210 | 242 | frame = cctx.compress(original) |
|
211 | 243 | source = io.BytesIO(frame) |
|
212 | 244 | |
|
213 | 245 | dctx = zstd.ZstdDecompressor() |
|
214 | 246 | |
|
215 | 247 | chunks = [] |
|
216 | 248 | with dctx.stream_reader(source, read_size=source_read_size) as reader: |
|
217 | 249 | while True: |
|
218 | 250 | read_size = read_sizes.draw(strategies.integers(-1, 131072)) |
|
219 | 251 | chunk = reader.read1(read_size) |
|
220 | 252 | if not chunk and read_size: |
|
221 | 253 | break |
|
222 | 254 | |
|
223 | 255 | chunks.append(chunk) |
|
224 | 256 | |
|
225 |
self.assertEqual(b |
|
|
257 | self.assertEqual(b"".join(chunks), original) | |
|
226 | 258 | |
|
227 | 259 | @hypothesis.settings( |
|
228 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
229 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
230 | level=strategies.integers(min_value=1, max_value=5), | |
|
231 | streaming=strategies.booleans(), | |
|
232 | source_read_size=strategies.integers(1, 1048576), | |
|
233 | read_sizes=strategies.data()) | |
|
234 | def test_stream_source_readinto1_variance(self, original, level, streaming, | |
|
235 | source_read_size, read_sizes): | |
|
260 | suppress_health_check=[ | |
|
261 | hypothesis.HealthCheck.large_base_example, | |
|
262 | hypothesis.HealthCheck.too_slow, | |
|
263 | ] | |
|
264 | ) | |
|
265 | @hypothesis.given( | |
|
266 | original=strategies.sampled_from(random_input_data()), | |
|
267 | level=strategies.integers(min_value=1, max_value=5), | |
|
268 | streaming=strategies.booleans(), | |
|
269 | source_read_size=strategies.integers(1, 1048576), | |
|
270 | read_sizes=strategies.data(), | |
|
271 | ) | |
|
272 | def test_stream_source_readinto1_variance( | |
|
273 | self, original, level, streaming, source_read_size, read_sizes | |
|
274 | ): | |
|
236 | 275 | cctx = zstd.ZstdCompressor(level=level) |
|
237 | 276 | |
|
238 | 277 | if streaming: |
|
239 | 278 | source = io.BytesIO() |
|
240 | 279 | writer = cctx.stream_writer(source) |
|
241 | 280 | writer.write(original) |
|
242 | 281 | writer.flush(zstd.FLUSH_FRAME) |
|
243 | 282 | source.seek(0) |
|
244 | 283 | else: |
|
245 | 284 | frame = cctx.compress(original) |
|
246 | 285 | source = io.BytesIO(frame) |
|
247 | 286 | |
|
248 | 287 | dctx = zstd.ZstdDecompressor() |
|
249 | 288 | |
|
250 | 289 | chunks = [] |
|
251 | 290 | with dctx.stream_reader(source, read_size=source_read_size) as reader: |
|
252 | 291 | while True: |
|
253 | 292 | read_size = read_sizes.draw(strategies.integers(1, 131072)) |
|
254 | 293 | b = bytearray(read_size) |
|
255 | 294 | count = reader.readinto1(b) |
|
256 | 295 | |
|
257 | 296 | if not count: |
|
258 | 297 | break |
|
259 | 298 | |
|
260 | 299 | chunks.append(bytes(b[0:count])) |
|
261 | 300 | |
|
262 |
self.assertEqual(b |
|
|
301 | self.assertEqual(b"".join(chunks), original) | |
|
263 | 302 | |
|
264 | 303 | @hypothesis.settings( |
|
265 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
304 | suppress_health_check=[ | |
|
305 | hypothesis.HealthCheck.large_base_example, | |
|
306 | hypothesis.HealthCheck.too_slow, | |
|
307 | ] | |
|
308 | ) | |
|
266 | 309 | @hypothesis.given( |
|
267 | 310 | original=strategies.sampled_from(random_input_data()), |
|
268 | 311 | level=strategies.integers(min_value=1, max_value=5), |
|
269 | 312 | source_read_size=strategies.integers(1, 1048576), |
|
270 | 313 | seek_amounts=strategies.data(), |
|
271 |
read_sizes=strategies.data() |
|
|
272 | def test_relative_seeks(self, original, level, source_read_size, seek_amounts, | |
|
273 | read_sizes): | |
|
314 | read_sizes=strategies.data(), | |
|
315 | ) | |
|
316 | def test_relative_seeks( | |
|
317 | self, original, level, source_read_size, seek_amounts, read_sizes | |
|
318 | ): | |
|
274 | 319 | cctx = zstd.ZstdCompressor(level=level) |
|
275 | 320 | frame = cctx.compress(original) |
|
276 | 321 | |
|
277 | 322 | dctx = zstd.ZstdDecompressor() |
|
278 | 323 | |
|
279 | 324 | with dctx.stream_reader(frame, read_size=source_read_size) as reader: |
|
280 | 325 | while True: |
|
281 | 326 | amount = seek_amounts.draw(strategies.integers(0, 16384)) |
|
282 | 327 | reader.seek(amount, os.SEEK_CUR) |
|
283 | 328 | |
|
284 | 329 | offset = reader.tell() |
|
285 | 330 | read_amount = read_sizes.draw(strategies.integers(1, 16384)) |
|
286 | 331 | chunk = reader.read(read_amount) |
|
287 | 332 | |
|
288 | 333 | if not chunk: |
|
289 | 334 | break |
|
290 | 335 | |
|
291 | self.assertEqual(original[offset:offset + len(chunk)], chunk) | |
|
336 | self.assertEqual(original[offset : offset + len(chunk)], chunk) | |
|
292 | 337 | |
|
293 | 338 | @hypothesis.settings( |
|
294 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) | |
|
339 | suppress_health_check=[ | |
|
340 | hypothesis.HealthCheck.large_base_example, | |
|
341 | hypothesis.HealthCheck.too_slow, | |
|
342 | ] | |
|
343 | ) | |
|
295 | 344 | @hypothesis.given( |
|
296 | 345 | originals=strategies.data(), |
|
297 | 346 | frame_count=strategies.integers(min_value=2, max_value=10), |
|
298 | 347 | level=strategies.integers(min_value=1, max_value=5), |
|
299 | 348 | source_read_size=strategies.integers(1, 1048576), |
|
300 |
read_sizes=strategies.data() |
|
|
301 | def test_multiple_frames(self, originals, frame_count, level, | |
|
302 | source_read_size, read_sizes): | |
|
349 | read_sizes=strategies.data(), | |
|
350 | ) | |
|
351 | def test_multiple_frames( | |
|
352 | self, originals, frame_count, level, source_read_size, read_sizes | |
|
353 | ): | |
|
303 | 354 | |
|
304 | 355 | cctx = zstd.ZstdCompressor(level=level) |
|
305 | 356 | source = io.BytesIO() |
|
306 | 357 | buffer = io.BytesIO() |
|
307 | 358 | writer = cctx.stream_writer(buffer) |
|
308 | 359 | |
|
309 | 360 | for i in range(frame_count): |
|
310 | 361 | data = originals.draw(strategies.sampled_from(random_input_data())) |
|
311 | 362 | source.write(data) |
|
312 | 363 | writer.write(data) |
|
313 | 364 | writer.flush(zstd.FLUSH_FRAME) |
|
314 | 365 | |
|
315 | 366 | dctx = zstd.ZstdDecompressor() |
|
316 | 367 | buffer.seek(0) |
|
317 |
reader = dctx.stream_reader( |
|
|
318 | read_across_frames=True) | |
|
368 | reader = dctx.stream_reader( | |
|
369 | buffer, read_size=source_read_size, read_across_frames=True | |
|
370 | ) | |
|
319 | 371 | |
|
320 | 372 | chunks = [] |
|
321 | 373 | |
|
322 | 374 | while True: |
|
323 | 375 | read_amount = read_sizes.draw(strategies.integers(-1, 16384)) |
|
324 | 376 | chunk = reader.read(read_amount) |
|
325 | 377 | |
|
326 | 378 | if not chunk and read_amount: |
|
327 | 379 | break |
|
328 | 380 | |
|
329 | 381 | chunks.append(chunk) |
|
330 | 382 | |
|
331 |
self.assertEqual(source.getvalue(), b |
|
|
383 | self.assertEqual(source.getvalue(), b"".join(chunks)) | |
|
332 | 384 | |
|
333 | 385 | |
|
334 |
@unittest.skipUnless( |
|
|
386 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
335 | 387 | @make_cffi |
|
336 |
class TestDecompressor_stream_writer_fuzzing( |
|
|
337 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
338 | level=strategies.integers(min_value=1, max_value=5), | |
|
339 | write_size=strategies.integers(min_value=1, max_value=8192), | |
|
340 | input_sizes=strategies.data()) | |
|
388 | class TestDecompressor_stream_writer_fuzzing(TestCase): | |
|
389 | @hypothesis.settings( | |
|
390 | suppress_health_check=[ | |
|
391 | hypothesis.HealthCheck.large_base_example, | |
|
392 | hypothesis.HealthCheck.too_slow, | |
|
393 | ] | |
|
394 | ) | |
|
395 | @hypothesis.given( | |
|
396 | original=strategies.sampled_from(random_input_data()), | |
|
397 | level=strategies.integers(min_value=1, max_value=5), | |
|
398 | write_size=strategies.integers(min_value=1, max_value=8192), | |
|
399 | input_sizes=strategies.data(), | |
|
400 | ) | |
|
341 | 401 | def test_write_size_variance(self, original, level, write_size, input_sizes): |
|
342 | 402 | cctx = zstd.ZstdCompressor(level=level) |
|
343 | 403 | frame = cctx.compress(original) |
|
344 | 404 | |
|
345 | 405 | dctx = zstd.ZstdDecompressor() |
|
346 | 406 | source = io.BytesIO(frame) |
|
347 | 407 | dest = NonClosingBytesIO() |
|
348 | 408 | |
|
349 | 409 | with dctx.stream_writer(dest, write_size=write_size) as decompressor: |
|
350 | 410 | while True: |
|
351 | 411 | input_size = input_sizes.draw(strategies.integers(1, 4096)) |
|
352 | 412 | chunk = source.read(input_size) |
|
353 | 413 | if not chunk: |
|
354 | 414 | break |
|
355 | 415 | |
|
356 | 416 | decompressor.write(chunk) |
|
357 | 417 | |
|
358 | 418 | self.assertEqual(dest.getvalue(), original) |
|
359 | 419 | |
|
360 | 420 | |
|
361 |
@unittest.skipUnless( |
|
|
421 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
362 | 422 | @make_cffi |
|
363 |
class TestDecompressor_copy_stream_fuzzing( |
|
|
364 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
365 | level=strategies.integers(min_value=1, max_value=5), | |
|
366 | read_size=strategies.integers(min_value=1, max_value=8192), | |
|
367 | write_size=strategies.integers(min_value=1, max_value=8192)) | |
|
423 | class TestDecompressor_copy_stream_fuzzing(TestCase): | |
|
424 | @hypothesis.settings( | |
|
425 | suppress_health_check=[ | |
|
426 | hypothesis.HealthCheck.large_base_example, | |
|
427 | hypothesis.HealthCheck.too_slow, | |
|
428 | ] | |
|
429 | ) | |
|
430 | @hypothesis.given( | |
|
431 | original=strategies.sampled_from(random_input_data()), | |
|
432 | level=strategies.integers(min_value=1, max_value=5), | |
|
433 | read_size=strategies.integers(min_value=1, max_value=8192), | |
|
434 | write_size=strategies.integers(min_value=1, max_value=8192), | |
|
435 | ) | |
|
368 | 436 | def test_read_write_size_variance(self, original, level, read_size, write_size): |
|
369 | 437 | cctx = zstd.ZstdCompressor(level=level) |
|
370 | 438 | frame = cctx.compress(original) |
|
371 | 439 | |
|
372 | 440 | source = io.BytesIO(frame) |
|
373 | 441 | dest = io.BytesIO() |
|
374 | 442 | |
|
375 | 443 | dctx = zstd.ZstdDecompressor() |
|
376 | 444 | dctx.copy_stream(source, dest, read_size=read_size, write_size=write_size) |
|
377 | 445 | |
|
378 | 446 | self.assertEqual(dest.getvalue(), original) |
|
379 | 447 | |
|
380 | 448 | |
|
381 |
@unittest.skipUnless( |
|
|
449 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
382 | 450 | @make_cffi |
|
383 |
class TestDecompressor_decompressobj_fuzzing( |
|
|
384 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
385 | level=strategies.integers(min_value=1, max_value=5), | |
|
386 | chunk_sizes=strategies.data()) | |
|
451 | class TestDecompressor_decompressobj_fuzzing(TestCase): | |
|
452 | @hypothesis.settings( | |
|
453 | suppress_health_check=[ | |
|
454 | hypothesis.HealthCheck.large_base_example, | |
|
455 | hypothesis.HealthCheck.too_slow, | |
|
456 | ] | |
|
457 | ) | |
|
458 | @hypothesis.given( | |
|
459 | original=strategies.sampled_from(random_input_data()), | |
|
460 | level=strategies.integers(min_value=1, max_value=5), | |
|
461 | chunk_sizes=strategies.data(), | |
|
462 | ) | |
|
387 | 463 | def test_random_input_sizes(self, original, level, chunk_sizes): |
|
388 | 464 | cctx = zstd.ZstdCompressor(level=level) |
|
389 | 465 | frame = cctx.compress(original) |
|
390 | 466 | |
|
391 | 467 | source = io.BytesIO(frame) |
|
392 | 468 | |
|
393 | 469 | dctx = zstd.ZstdDecompressor() |
|
394 | 470 | dobj = dctx.decompressobj() |
|
395 | 471 | |
|
396 | 472 | chunks = [] |
|
397 | 473 | while True: |
|
398 | 474 | chunk_size = chunk_sizes.draw(strategies.integers(1, 4096)) |
|
399 | 475 | chunk = source.read(chunk_size) |
|
400 | 476 | if not chunk: |
|
401 | 477 | break |
|
402 | 478 | |
|
403 | 479 | chunks.append(dobj.decompress(chunk)) |
|
404 | 480 | |
|
405 |
self.assertEqual(b |
|
|
481 | self.assertEqual(b"".join(chunks), original) | |
|
406 | 482 | |
|
407 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
408 | level=strategies.integers(min_value=1, max_value=5), | |
|
409 | write_size=strategies.integers(min_value=1, | |
|
410 | max_value=4 * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
411 | chunk_sizes=strategies.data()) | |
|
483 | @hypothesis.settings( | |
|
484 | suppress_health_check=[ | |
|
485 | hypothesis.HealthCheck.large_base_example, | |
|
486 | hypothesis.HealthCheck.too_slow, | |
|
487 | ] | |
|
488 | ) | |
|
489 | @hypothesis.given( | |
|
490 | original=strategies.sampled_from(random_input_data()), | |
|
491 | level=strategies.integers(min_value=1, max_value=5), | |
|
492 | write_size=strategies.integers( | |
|
493 | min_value=1, max_value=4 * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE | |
|
494 | ), | |
|
495 | chunk_sizes=strategies.data(), | |
|
496 | ) | |
|
412 | 497 | def test_random_output_sizes(self, original, level, write_size, chunk_sizes): |
|
413 | 498 | cctx = zstd.ZstdCompressor(level=level) |
|
414 | 499 | frame = cctx.compress(original) |
|
415 | 500 | |
|
416 | 501 | source = io.BytesIO(frame) |
|
417 | 502 | |
|
418 | 503 | dctx = zstd.ZstdDecompressor() |
|
419 | 504 | dobj = dctx.decompressobj(write_size=write_size) |
|
420 | 505 | |
|
421 | 506 | chunks = [] |
|
422 | 507 | while True: |
|
423 | 508 | chunk_size = chunk_sizes.draw(strategies.integers(1, 4096)) |
|
424 | 509 | chunk = source.read(chunk_size) |
|
425 | 510 | if not chunk: |
|
426 | 511 | break |
|
427 | 512 | |
|
428 | 513 | chunks.append(dobj.decompress(chunk)) |
|
429 | 514 | |
|
430 |
self.assertEqual(b |
|
|
515 | self.assertEqual(b"".join(chunks), original) | |
|
431 | 516 | |
|
432 | 517 | |
|
433 |
@unittest.skipUnless( |
|
|
518 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
434 | 519 | @make_cffi |
|
435 |
class TestDecompressor_read_to_iter_fuzzing( |
|
|
436 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), | |
|
437 | level=strategies.integers(min_value=1, max_value=5), | |
|
438 |
|
|
|
439 |
|
|
|
520 | class TestDecompressor_read_to_iter_fuzzing(TestCase): | |
|
521 | @hypothesis.given( | |
|
522 | original=strategies.sampled_from(random_input_data()), | |
|
523 | level=strategies.integers(min_value=1, max_value=5), | |
|
524 | read_size=strategies.integers(min_value=1, max_value=4096), | |
|
525 | write_size=strategies.integers(min_value=1, max_value=4096), | |
|
526 | ) | |
|
440 | 527 | def test_read_write_size_variance(self, original, level, read_size, write_size): |
|
441 | 528 | cctx = zstd.ZstdCompressor(level=level) |
|
442 | 529 | frame = cctx.compress(original) |
|
443 | 530 | |
|
444 | 531 | source = io.BytesIO(frame) |
|
445 | 532 | |
|
446 | 533 | dctx = zstd.ZstdDecompressor() |
|
447 | chunks = list(dctx.read_to_iter(source, read_size=read_size, write_size=write_size)) | |
|
534 | chunks = list( | |
|
535 | dctx.read_to_iter(source, read_size=read_size, write_size=write_size) | |
|
536 | ) | |
|
448 | 537 | |
|
449 |
self.assertEqual(b |
|
|
538 | self.assertEqual(b"".join(chunks), original) | |
|
450 | 539 | |
|
451 | 540 | |
|
452 |
@unittest.skipUnless( |
|
|
453 |
class TestDecompressor_multi_decompress_to_buffer_fuzzing( |
|
|
454 | @hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()), | |
|
455 | min_size=1, max_size=1024), | |
|
456 | threads=strategies.integers(min_value=1, max_value=8), | |
|
457 | use_dict=strategies.booleans()) | |
|
541 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
|
542 | class TestDecompressor_multi_decompress_to_buffer_fuzzing(TestCase): | |
|
543 | @hypothesis.given( | |
|
544 | original=strategies.lists( | |
|
545 | strategies.sampled_from(random_input_data()), min_size=1, max_size=1024 | |
|
546 | ), | |
|
547 | threads=strategies.integers(min_value=1, max_value=8), | |
|
548 | use_dict=strategies.booleans(), | |
|
549 | ) | |
|
458 | 550 | def test_data_equivalence(self, original, threads, use_dict): |
|
459 | 551 | kwargs = {} |
|
460 | 552 | if use_dict: |
|
461 |
kwargs[ |
|
|
553 | kwargs["dict_data"] = zstd.ZstdCompressionDict(original[0]) | |
|
462 | 554 | |
|
463 |
cctx = zstd.ZstdCompressor( |
|
|
464 | write_content_size=True, | |
|
465 | write_checksum=True, | |
|
466 | **kwargs) | |
|
555 | cctx = zstd.ZstdCompressor( | |
|
556 | level=1, write_content_size=True, write_checksum=True, **kwargs | |
|
557 | ) | |
|
467 | 558 | |
|
468 |
if not hasattr(cctx, |
|
|
469 |
self.skipTest( |
|
|
559 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
|
560 | self.skipTest("multi_compress_to_buffer not available") | |
|
470 | 561 | |
|
471 | 562 | frames_buffer = cctx.multi_compress_to_buffer(original, threads=-1) |
|
472 | 563 | |
|
473 | 564 | dctx = zstd.ZstdDecompressor(**kwargs) |
|
474 | 565 | result = dctx.multi_decompress_to_buffer(frames_buffer) |
|
475 | 566 | |
|
476 | 567 | self.assertEqual(len(result), len(original)) |
|
477 | 568 | for i, frame in enumerate(result): |
|
478 | 569 | self.assertEqual(frame.tobytes(), original[i]) |
|
479 | 570 | |
|
480 | 571 | frames_list = [f.tobytes() for f in frames_buffer] |
|
481 | 572 | result = dctx.multi_decompress_to_buffer(frames_list) |
|
482 | 573 | |
|
483 | 574 | self.assertEqual(len(result), len(original)) |
|
484 | 575 | for i, frame in enumerate(result): |
|
485 | 576 | self.assertEqual(frame.tobytes(), original[i]) |
@@ -1,15 +1,15 b'' | |||
|
1 | 1 | import unittest |
|
2 | 2 | |
|
3 | 3 | import zstandard as zstd |
|
4 | 4 | |
|
5 |
from . |
|
|
5 | from .common import ( | |
|
6 | 6 | make_cffi, |
|
7 | TestCase, | |
|
7 | 8 | ) |
|
8 | 9 | |
|
9 | 10 | |
|
10 | 11 | @make_cffi |
|
11 |
class TestSizes( |
|
|
12 | class TestSizes(TestCase): | |
|
12 | 13 | def test_decompression_size(self): |
|
13 | 14 | size = zstd.estimate_decompression_context_size() |
|
14 | 15 | self.assertGreater(size, 100000) |
|
15 |
@@ -1,69 +1,70 b'' | |||
|
1 | 1 | from __future__ import unicode_literals |
|
2 | 2 | |
|
3 | 3 | import unittest |
|
4 | 4 | |
|
5 | 5 | import zstandard as zstd |
|
6 | 6 | |
|
7 |
from . |
|
|
7 | from .common import ( | |
|
8 | 8 | make_cffi, |
|
9 | TestCase, | |
|
9 | 10 | ) |
|
10 | 11 | |
|
11 | 12 | |
|
12 | 13 | @make_cffi |
|
13 |
class TestModuleAttributes( |
|
|
14 | class TestModuleAttributes(TestCase): | |
|
14 | 15 | def test_version(self): |
|
15 |
self.assertEqual(zstd.ZSTD_VERSION, (1, 4, |
|
|
16 | self.assertEqual(zstd.ZSTD_VERSION, (1, 4, 4)) | |
|
16 | 17 | |
|
17 |
self.assertEqual(zstd.__version__, |
|
|
18 | self.assertEqual(zstd.__version__, "0.13.0") | |
|
18 | 19 | |
|
19 | 20 | def test_constants(self): |
|
20 | 21 | self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22) |
|
21 |
self.assertEqual(zstd.FRAME_HEADER, b |
|
|
22 | self.assertEqual(zstd.FRAME_HEADER, b"\x28\xb5\x2f\xfd") | |
|
22 | 23 | |
|
23 | 24 | def test_hasattr(self): |
|
24 | 25 | attrs = ( |
|
25 |
|
|
|
26 |
|
|
|
27 |
|
|
|
28 |
|
|
|
29 |
|
|
|
30 |
|
|
|
31 |
|
|
|
32 |
|
|
|
33 |
|
|
|
34 |
|
|
|
35 |
|
|
|
36 |
|
|
|
37 |
|
|
|
38 |
|
|
|
39 |
|
|
|
40 |
|
|
|
41 |
|
|
|
42 |
|
|
|
43 |
|
|
|
44 |
|
|
|
45 |
|
|
|
46 |
|
|
|
47 |
|
|
|
48 |
|
|
|
49 |
|
|
|
50 |
|
|
|
51 |
|
|
|
52 |
|
|
|
53 |
|
|
|
54 |
|
|
|
55 |
|
|
|
56 |
|
|
|
57 |
|
|
|
58 |
|
|
|
59 |
|
|
|
60 |
|
|
|
61 |
|
|
|
62 |
|
|
|
63 |
|
|
|
64 |
|
|
|
65 |
|
|
|
26 | "CONTENTSIZE_UNKNOWN", | |
|
27 | "CONTENTSIZE_ERROR", | |
|
28 | "COMPRESSION_RECOMMENDED_INPUT_SIZE", | |
|
29 | "COMPRESSION_RECOMMENDED_OUTPUT_SIZE", | |
|
30 | "DECOMPRESSION_RECOMMENDED_INPUT_SIZE", | |
|
31 | "DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE", | |
|
32 | "MAGIC_NUMBER", | |
|
33 | "FLUSH_BLOCK", | |
|
34 | "FLUSH_FRAME", | |
|
35 | "BLOCKSIZELOG_MAX", | |
|
36 | "BLOCKSIZE_MAX", | |
|
37 | "WINDOWLOG_MIN", | |
|
38 | "WINDOWLOG_MAX", | |
|
39 | "CHAINLOG_MIN", | |
|
40 | "CHAINLOG_MAX", | |
|
41 | "HASHLOG_MIN", | |
|
42 | "HASHLOG_MAX", | |
|
43 | "HASHLOG3_MAX", | |
|
44 | "MINMATCH_MIN", | |
|
45 | "MINMATCH_MAX", | |
|
46 | "SEARCHLOG_MIN", | |
|
47 | "SEARCHLOG_MAX", | |
|
48 | "SEARCHLENGTH_MIN", | |
|
49 | "SEARCHLENGTH_MAX", | |
|
50 | "TARGETLENGTH_MIN", | |
|
51 | "TARGETLENGTH_MAX", | |
|
52 | "LDM_MINMATCH_MIN", | |
|
53 | "LDM_MINMATCH_MAX", | |
|
54 | "LDM_BUCKETSIZELOG_MAX", | |
|
55 | "STRATEGY_FAST", | |
|
56 | "STRATEGY_DFAST", | |
|
57 | "STRATEGY_GREEDY", | |
|
58 | "STRATEGY_LAZY", | |
|
59 | "STRATEGY_LAZY2", | |
|
60 | "STRATEGY_BTLAZY2", | |
|
61 | "STRATEGY_BTOPT", | |
|
62 | "STRATEGY_BTULTRA", | |
|
63 | "STRATEGY_BTULTRA2", | |
|
64 | "DICT_TYPE_AUTO", | |
|
65 | "DICT_TYPE_RAWCONTENT", | |
|
66 | "DICT_TYPE_FULLDICT", | |
|
66 | 67 | ) |
|
67 | 68 | |
|
68 | 69 | for a in attrs: |
|
69 | 70 | self.assertTrue(hasattr(zstd, a), a) |
@@ -1,89 +1,92 b'' | |||
|
1 | 1 | import struct |
|
2 | 2 | import sys |
|
3 | 3 | import unittest |
|
4 | 4 | |
|
5 | 5 | import zstandard as zstd |
|
6 | 6 | |
|
7 |
from . |
|
|
7 | from .common import ( | |
|
8 | 8 | generate_samples, |
|
9 | 9 | make_cffi, |
|
10 | 10 | random_input_data, |
|
11 | TestCase, | |
|
11 | 12 | ) |
|
12 | 13 | |
|
13 | 14 | if sys.version_info[0] >= 3: |
|
14 | 15 | int_type = int |
|
15 | 16 | else: |
|
16 | 17 | int_type = long |
|
17 | 18 | |
|
18 | 19 | |
|
19 | 20 | @make_cffi |
|
20 |
class TestTrainDictionary( |
|
|
21 | class TestTrainDictionary(TestCase): | |
|
21 | 22 | def test_no_args(self): |
|
22 | 23 | with self.assertRaises(TypeError): |
|
23 | 24 | zstd.train_dictionary() |
|
24 | 25 | |
|
25 | 26 | def test_bad_args(self): |
|
26 | 27 | with self.assertRaises(TypeError): |
|
27 |
zstd.train_dictionary(8192, u |
|
|
28 | zstd.train_dictionary(8192, u"foo") | |
|
28 | 29 | |
|
29 | 30 | with self.assertRaises(ValueError): |
|
30 |
zstd.train_dictionary(8192, [u |
|
|
31 | zstd.train_dictionary(8192, [u"foo"]) | |
|
31 | 32 | |
|
32 | 33 | def test_no_params(self): |
|
33 | 34 | d = zstd.train_dictionary(8192, random_input_data()) |
|
34 | 35 | self.assertIsInstance(d.dict_id(), int_type) |
|
35 | 36 | |
|
36 | 37 | # The dictionary ID may be different across platforms. |
|
37 |
expected = b |
|
|
38 | expected = b"\x37\xa4\x30\xec" + struct.pack("<I", d.dict_id()) | |
|
38 | 39 | |
|
39 | 40 | data = d.as_bytes() |
|
40 | 41 | self.assertEqual(data[0:8], expected) |
|
41 | 42 | |
|
42 | 43 | def test_basic(self): |
|
43 | 44 | d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16) |
|
44 | 45 | self.assertIsInstance(d.dict_id(), int_type) |
|
45 | 46 | |
|
46 | 47 | data = d.as_bytes() |
|
47 |
self.assertEqual(data[0:4], b |
|
|
48 | self.assertEqual(data[0:4], b"\x37\xa4\x30\xec") | |
|
48 | 49 | |
|
49 | 50 | self.assertEqual(d.k, 64) |
|
50 | 51 | self.assertEqual(d.d, 16) |
|
51 | 52 | |
|
52 | 53 | def test_set_dict_id(self): |
|
53 | d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16, | |
|
54 | dict_id=42) | |
|
54 | d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16, dict_id=42) | |
|
55 | 55 | self.assertEqual(d.dict_id(), 42) |
|
56 | 56 | |
|
57 | 57 | def test_optimize(self): |
|
58 | d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1, | |
|
59 | d=16) | |
|
58 | d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1, d=16) | |
|
60 | 59 | |
|
61 | 60 | # This varies by platform. |
|
62 | 61 | self.assertIn(d.k, (50, 2000)) |
|
63 | 62 | self.assertEqual(d.d, 16) |
|
64 | 63 | |
|
64 | ||
|
65 | 65 | @make_cffi |
|
66 |
class TestCompressionDict( |
|
|
66 | class TestCompressionDict(TestCase): | |
|
67 | 67 | def test_bad_mode(self): |
|
68 |
with self.assertRaisesRegex |
|
|
69 |
zstd.ZstdCompressionDict(b |
|
|
68 | with self.assertRaisesRegex(ValueError, "invalid dictionary load mode"): | |
|
69 | zstd.ZstdCompressionDict(b"foo", dict_type=42) | |
|
70 | 70 | |
|
71 | 71 | def test_bad_precompute_compress(self): |
|
72 | 72 | d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16) |
|
73 | 73 | |
|
74 |
with self.assertRaisesRegex |
|
|
74 | with self.assertRaisesRegex(ValueError, "must specify one of level or "): | |
|
75 | 75 | d.precompute_compress() |
|
76 | 76 | |
|
77 |
with self.assertRaisesRegex |
|
|
78 |
d.precompute_compress( |
|
|
79 |
|
|
|
77 | with self.assertRaisesRegex(ValueError, "must only specify one of level or "): | |
|
78 | d.precompute_compress( | |
|
79 | level=3, compression_params=zstd.CompressionParameters() | |
|
80 | ) | |
|
80 | 81 | |
|
81 | 82 | def test_precompute_compress_rawcontent(self): |
|
82 |
d = zstd.ZstdCompressionDict( |
|
|
83 |
|
|
|
83 | d = zstd.ZstdCompressionDict( | |
|
84 | b"dictcontent" * 64, dict_type=zstd.DICT_TYPE_RAWCONTENT | |
|
85 | ) | |
|
84 | 86 | d.precompute_compress(level=1) |
|
85 | 87 | |
|
86 |
d = zstd.ZstdCompressionDict( |
|
|
87 |
|
|
|
88 | with self.assertRaisesRegexp(zstd.ZstdError, 'unable to precompute dictionary'): | |
|
88 | d = zstd.ZstdCompressionDict( | |
|
89 | b"dictcontent" * 64, dict_type=zstd.DICT_TYPE_FULLDICT | |
|
90 | ) | |
|
91 | with self.assertRaisesRegex(zstd.ZstdError, "unable to precompute dictionary"): | |
|
89 | 92 | d.precompute_compress(level=1) |
@@ -1,65 +1,75 b'' | |||
|
1 | 1 | # Copyright (c) 2017-present, Gregory Szorc |
|
2 | 2 | # All rights reserved. |
|
3 | 3 | # |
|
4 | 4 | # This software may be modified and distributed under the terms |
|
5 | 5 | # of the BSD license. See the LICENSE file for details. |
|
6 | 6 | |
|
7 | 7 | """Python interface to the Zstandard (zstd) compression library.""" |
|
8 | 8 | |
|
9 | 9 | from __future__ import absolute_import, unicode_literals |
|
10 | 10 | |
|
11 | 11 | # This module serves 2 roles: |
|
12 | 12 | # |
|
13 | 13 | # 1) Export the C or CFFI "backend" through a central module. |
|
14 | 14 | # 2) Implement additional functionality built on top of C or CFFI backend. |
|
15 | 15 | |
|
16 | 16 | import os |
|
17 | 17 | import platform |
|
18 | 18 | |
|
19 | 19 | # Some Python implementations don't support C extensions. That's why we have |
|
20 | 20 | # a CFFI implementation in the first place. The code here import one of our |
|
21 | 21 | # "backends" then re-exports the symbols from this module. For convenience, |
|
22 | 22 | # we support falling back to the CFFI backend if the C extension can't be |
|
23 | 23 | # imported. But for performance reasons, we only do this on unknown Python |
|
24 | 24 | # implementation. Notably, for CPython we require the C extension by default. |
|
25 | 25 | # Because someone will inevitably want special behavior, the behavior is |
|
26 | 26 | # configurable via an environment variable. A potentially better way to handle |
|
27 | 27 | # this is to import a special ``__importpolicy__`` module or something |
|
28 | 28 | # defining a variable and `setup.py` could write the file with whatever |
|
29 | 29 | # policy was specified at build time. Until someone needs it, we go with |
|
30 | 30 | # the hacky but simple environment variable approach. |
|
31 |
_module_policy = os.environ.get( |
|
|
31 | _module_policy = os.environ.get("PYTHON_ZSTANDARD_IMPORT_POLICY", "default") | |
|
32 | 32 | |
|
33 |
if _module_policy == |
|
|
34 |
if platform.python_implementation() in ( |
|
|
33 | if _module_policy == "default": | |
|
34 | if platform.python_implementation() in ("CPython",): | |
|
35 | 35 | from zstd import * |
|
36 | backend = 'cext' | |
|
37 | elif platform.python_implementation() in ('PyPy',): | |
|
36 | ||
|
37 | backend = "cext" | |
|
38 | elif platform.python_implementation() in ("PyPy",): | |
|
38 | 39 | from .cffi import * |
|
39 | backend = 'cffi' | |
|
40 | ||
|
41 | backend = "cffi" | |
|
40 | 42 | else: |
|
41 | 43 | try: |
|
42 | 44 | from zstd import * |
|
43 | backend = 'cext' | |
|
45 | ||
|
46 | backend = "cext" | |
|
44 | 47 | except ImportError: |
|
45 | 48 | from .cffi import * |
|
46 | backend = 'cffi' | |
|
47 | elif _module_policy == 'cffi_fallback': | |
|
49 | ||
|
50 | backend = "cffi" | |
|
51 | elif _module_policy == "cffi_fallback": | |
|
48 | 52 | try: |
|
49 | 53 | from zstd import * |
|
50 | backend = 'cext' | |
|
54 | ||
|
55 | backend = "cext" | |
|
51 | 56 | except ImportError: |
|
52 | 57 | from .cffi import * |
|
53 | backend = 'cffi' | |
|
54 | elif _module_policy == 'cext': | |
|
58 | ||
|
59 | backend = "cffi" | |
|
60 | elif _module_policy == "cext": | |
|
55 | 61 | from zstd import * |
|
56 | backend = 'cext' | |
|
57 | elif _module_policy == 'cffi': | |
|
62 | ||
|
63 | backend = "cext" | |
|
64 | elif _module_policy == "cffi": | |
|
58 | 65 | from .cffi import * |
|
59 | backend = 'cffi' | |
|
66 | ||
|
67 | backend = "cffi" | |
|
60 | 68 | else: |
|
61 | raise ImportError('unknown module import policy: %s; use default, cffi_fallback, ' | |
|
62 | 'cext, or cffi' % _module_policy) | |
|
69 | raise ImportError( | |
|
70 | "unknown module import policy: %s; use default, cffi_fallback, " | |
|
71 | "cext, or cffi" % _module_policy | |
|
72 | ) | |
|
63 | 73 | |
|
64 | 74 | # Keep this in sync with python-zstandard.h. |
|
65 |
__version__ = |
|
|
75 | __version__ = "0.13.0" |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
|
1 | NO CONTENT: modified file | |
The requested commit or file is too big and content was truncated. Show full diff |
General Comments 0
You need to be logged in to leave comments.
Login now