##// END OF EJS Templates
zstandard: vendor python-zstandard 0.13.0...
Gregory Szorc -
r44446:de783805 default
parent child Browse files
Show More

The requested changes are too big and content was truncated. Show full diff

1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
@@ -1,102 +1,103 b''
1 # Files that just need to be migrated to the formatter.
1 # Files that just need to be migrated to the formatter.
2 # Do not add new files here!
2 # Do not add new files here!
3 mercurial/cext/manifest.c
3 mercurial/cext/manifest.c
4 mercurial/cext/osutil.c
4 mercurial/cext/osutil.c
5 # Vendored code that we should never format:
5 # Vendored code that we should never format:
6 contrib/python-zstandard/c-ext/bufferutil.c
6 contrib/python-zstandard/c-ext/bufferutil.c
7 contrib/python-zstandard/c-ext/compressionchunker.c
7 contrib/python-zstandard/c-ext/compressionchunker.c
8 contrib/python-zstandard/c-ext/compressiondict.c
8 contrib/python-zstandard/c-ext/compressiondict.c
9 contrib/python-zstandard/c-ext/compressionparams.c
9 contrib/python-zstandard/c-ext/compressionparams.c
10 contrib/python-zstandard/c-ext/compressionreader.c
10 contrib/python-zstandard/c-ext/compressionreader.c
11 contrib/python-zstandard/c-ext/compressionwriter.c
11 contrib/python-zstandard/c-ext/compressionwriter.c
12 contrib/python-zstandard/c-ext/compressobj.c
12 contrib/python-zstandard/c-ext/compressobj.c
13 contrib/python-zstandard/c-ext/compressor.c
13 contrib/python-zstandard/c-ext/compressor.c
14 contrib/python-zstandard/c-ext/compressoriterator.c
14 contrib/python-zstandard/c-ext/compressoriterator.c
15 contrib/python-zstandard/c-ext/constants.c
15 contrib/python-zstandard/c-ext/constants.c
16 contrib/python-zstandard/c-ext/decompressionreader.c
16 contrib/python-zstandard/c-ext/decompressionreader.c
17 contrib/python-zstandard/c-ext/decompressionwriter.c
17 contrib/python-zstandard/c-ext/decompressionwriter.c
18 contrib/python-zstandard/c-ext/decompressobj.c
18 contrib/python-zstandard/c-ext/decompressobj.c
19 contrib/python-zstandard/c-ext/decompressor.c
19 contrib/python-zstandard/c-ext/decompressor.c
20 contrib/python-zstandard/c-ext/decompressoriterator.c
20 contrib/python-zstandard/c-ext/decompressoriterator.c
21 contrib/python-zstandard/c-ext/frameparams.c
21 contrib/python-zstandard/c-ext/frameparams.c
22 contrib/python-zstandard/c-ext/python-zstandard.h
22 contrib/python-zstandard/c-ext/python-zstandard.h
23 contrib/python-zstandard/zstd.c
23 contrib/python-zstandard/zstd.c
24 contrib/python-zstandard/zstd/common/bitstream.h
24 contrib/python-zstandard/zstd/common/bitstream.h
25 contrib/python-zstandard/zstd/common/compiler.h
25 contrib/python-zstandard/zstd/common/compiler.h
26 contrib/python-zstandard/zstd/common/cpu.h
26 contrib/python-zstandard/zstd/common/cpu.h
27 contrib/python-zstandard/zstd/common/debug.c
27 contrib/python-zstandard/zstd/common/debug.c
28 contrib/python-zstandard/zstd/common/debug.h
28 contrib/python-zstandard/zstd/common/debug.h
29 contrib/python-zstandard/zstd/common/entropy_common.c
29 contrib/python-zstandard/zstd/common/entropy_common.c
30 contrib/python-zstandard/zstd/common/error_private.c
30 contrib/python-zstandard/zstd/common/error_private.c
31 contrib/python-zstandard/zstd/common/error_private.h
31 contrib/python-zstandard/zstd/common/error_private.h
32 contrib/python-zstandard/zstd/common/fse_decompress.c
32 contrib/python-zstandard/zstd/common/fse_decompress.c
33 contrib/python-zstandard/zstd/common/fse.h
33 contrib/python-zstandard/zstd/common/fse.h
34 contrib/python-zstandard/zstd/common/huf.h
34 contrib/python-zstandard/zstd/common/huf.h
35 contrib/python-zstandard/zstd/common/mem.h
35 contrib/python-zstandard/zstd/common/mem.h
36 contrib/python-zstandard/zstd/common/pool.c
36 contrib/python-zstandard/zstd/common/pool.c
37 contrib/python-zstandard/zstd/common/pool.h
37 contrib/python-zstandard/zstd/common/pool.h
38 contrib/python-zstandard/zstd/common/threading.c
38 contrib/python-zstandard/zstd/common/threading.c
39 contrib/python-zstandard/zstd/common/threading.h
39 contrib/python-zstandard/zstd/common/threading.h
40 contrib/python-zstandard/zstd/common/xxhash.c
40 contrib/python-zstandard/zstd/common/xxhash.c
41 contrib/python-zstandard/zstd/common/xxhash.h
41 contrib/python-zstandard/zstd/common/xxhash.h
42 contrib/python-zstandard/zstd/common/zstd_common.c
42 contrib/python-zstandard/zstd/common/zstd_common.c
43 contrib/python-zstandard/zstd/common/zstd_errors.h
43 contrib/python-zstandard/zstd/common/zstd_errors.h
44 contrib/python-zstandard/zstd/common/zstd_internal.h
44 contrib/python-zstandard/zstd/common/zstd_internal.h
45 contrib/python-zstandard/zstd/compress/fse_compress.c
45 contrib/python-zstandard/zstd/compress/fse_compress.c
46 contrib/python-zstandard/zstd/compress/hist.c
46 contrib/python-zstandard/zstd/compress/hist.c
47 contrib/python-zstandard/zstd/compress/hist.h
47 contrib/python-zstandard/zstd/compress/hist.h
48 contrib/python-zstandard/zstd/compress/huf_compress.c
48 contrib/python-zstandard/zstd/compress/huf_compress.c
49 contrib/python-zstandard/zstd/compress/zstd_compress.c
49 contrib/python-zstandard/zstd/compress/zstd_compress.c
50 contrib/python-zstandard/zstd/compress/zstd_compress_internal.h
50 contrib/python-zstandard/zstd/compress/zstd_compress_internal.h
51 contrib/python-zstandard/zstd/compress/zstd_compress_literals.c
51 contrib/python-zstandard/zstd/compress/zstd_compress_literals.c
52 contrib/python-zstandard/zstd/compress/zstd_compress_literals.h
52 contrib/python-zstandard/zstd/compress/zstd_compress_literals.h
53 contrib/python-zstandard/zstd/compress/zstd_compress_sequences.c
53 contrib/python-zstandard/zstd/compress/zstd_compress_sequences.c
54 contrib/python-zstandard/zstd/compress/zstd_compress_sequences.h
54 contrib/python-zstandard/zstd/compress/zstd_compress_sequences.h
55 contrib/python-zstandard/zstd/compress/zstd_cwksp.h
55 contrib/python-zstandard/zstd/compress/zstd_double_fast.c
56 contrib/python-zstandard/zstd/compress/zstd_double_fast.c
56 contrib/python-zstandard/zstd/compress/zstd_double_fast.h
57 contrib/python-zstandard/zstd/compress/zstd_double_fast.h
57 contrib/python-zstandard/zstd/compress/zstd_fast.c
58 contrib/python-zstandard/zstd/compress/zstd_fast.c
58 contrib/python-zstandard/zstd/compress/zstd_fast.h
59 contrib/python-zstandard/zstd/compress/zstd_fast.h
59 contrib/python-zstandard/zstd/compress/zstd_lazy.c
60 contrib/python-zstandard/zstd/compress/zstd_lazy.c
60 contrib/python-zstandard/zstd/compress/zstd_lazy.h
61 contrib/python-zstandard/zstd/compress/zstd_lazy.h
61 contrib/python-zstandard/zstd/compress/zstd_ldm.c
62 contrib/python-zstandard/zstd/compress/zstd_ldm.c
62 contrib/python-zstandard/zstd/compress/zstd_ldm.h
63 contrib/python-zstandard/zstd/compress/zstd_ldm.h
63 contrib/python-zstandard/zstd/compress/zstdmt_compress.c
64 contrib/python-zstandard/zstd/compress/zstdmt_compress.c
64 contrib/python-zstandard/zstd/compress/zstdmt_compress.h
65 contrib/python-zstandard/zstd/compress/zstdmt_compress.h
65 contrib/python-zstandard/zstd/compress/zstd_opt.c
66 contrib/python-zstandard/zstd/compress/zstd_opt.c
66 contrib/python-zstandard/zstd/compress/zstd_opt.h
67 contrib/python-zstandard/zstd/compress/zstd_opt.h
67 contrib/python-zstandard/zstd/decompress/huf_decompress.c
68 contrib/python-zstandard/zstd/decompress/huf_decompress.c
68 contrib/python-zstandard/zstd/decompress/zstd_ddict.c
69 contrib/python-zstandard/zstd/decompress/zstd_ddict.c
69 contrib/python-zstandard/zstd/decompress/zstd_ddict.h
70 contrib/python-zstandard/zstd/decompress/zstd_ddict.h
70 contrib/python-zstandard/zstd/decompress/zstd_decompress_block.c
71 contrib/python-zstandard/zstd/decompress/zstd_decompress_block.c
71 contrib/python-zstandard/zstd/decompress/zstd_decompress_block.h
72 contrib/python-zstandard/zstd/decompress/zstd_decompress_block.h
72 contrib/python-zstandard/zstd/decompress/zstd_decompress_internal.h
73 contrib/python-zstandard/zstd/decompress/zstd_decompress_internal.h
73 contrib/python-zstandard/zstd/decompress/zstd_decompress.c
74 contrib/python-zstandard/zstd/decompress/zstd_decompress.c
74 contrib/python-zstandard/zstd/deprecated/zbuff_common.c
75 contrib/python-zstandard/zstd/deprecated/zbuff_common.c
75 contrib/python-zstandard/zstd/deprecated/zbuff_compress.c
76 contrib/python-zstandard/zstd/deprecated/zbuff_compress.c
76 contrib/python-zstandard/zstd/deprecated/zbuff_decompress.c
77 contrib/python-zstandard/zstd/deprecated/zbuff_decompress.c
77 contrib/python-zstandard/zstd/deprecated/zbuff.h
78 contrib/python-zstandard/zstd/deprecated/zbuff.h
78 contrib/python-zstandard/zstd/dictBuilder/cover.c
79 contrib/python-zstandard/zstd/dictBuilder/cover.c
79 contrib/python-zstandard/zstd/dictBuilder/cover.h
80 contrib/python-zstandard/zstd/dictBuilder/cover.h
80 contrib/python-zstandard/zstd/dictBuilder/divsufsort.c
81 contrib/python-zstandard/zstd/dictBuilder/divsufsort.c
81 contrib/python-zstandard/zstd/dictBuilder/divsufsort.h
82 contrib/python-zstandard/zstd/dictBuilder/divsufsort.h
82 contrib/python-zstandard/zstd/dictBuilder/fastcover.c
83 contrib/python-zstandard/zstd/dictBuilder/fastcover.c
83 contrib/python-zstandard/zstd/dictBuilder/zdict.c
84 contrib/python-zstandard/zstd/dictBuilder/zdict.c
84 contrib/python-zstandard/zstd/dictBuilder/zdict.h
85 contrib/python-zstandard/zstd/dictBuilder/zdict.h
85 contrib/python-zstandard/zstd/zstd.h
86 contrib/python-zstandard/zstd/zstd.h
86 hgext/fsmonitor/pywatchman/bser.c
87 hgext/fsmonitor/pywatchman/bser.c
87 mercurial/thirdparty/xdiff/xdiff.h
88 mercurial/thirdparty/xdiff/xdiff.h
88 mercurial/thirdparty/xdiff/xdiffi.c
89 mercurial/thirdparty/xdiff/xdiffi.c
89 mercurial/thirdparty/xdiff/xdiffi.h
90 mercurial/thirdparty/xdiff/xdiffi.h
90 mercurial/thirdparty/xdiff/xemit.c
91 mercurial/thirdparty/xdiff/xemit.c
91 mercurial/thirdparty/xdiff/xemit.h
92 mercurial/thirdparty/xdiff/xemit.h
92 mercurial/thirdparty/xdiff/xhistogram.c
93 mercurial/thirdparty/xdiff/xhistogram.c
93 mercurial/thirdparty/xdiff/xinclude.h
94 mercurial/thirdparty/xdiff/xinclude.h
94 mercurial/thirdparty/xdiff/xmacros.h
95 mercurial/thirdparty/xdiff/xmacros.h
95 mercurial/thirdparty/xdiff/xmerge.c
96 mercurial/thirdparty/xdiff/xmerge.c
96 mercurial/thirdparty/xdiff/xpatience.c
97 mercurial/thirdparty/xdiff/xpatience.c
97 mercurial/thirdparty/xdiff/xprepare.c
98 mercurial/thirdparty/xdiff/xprepare.c
98 mercurial/thirdparty/xdiff/xprepare.h
99 mercurial/thirdparty/xdiff/xprepare.h
99 mercurial/thirdparty/xdiff/xtypes.h
100 mercurial/thirdparty/xdiff/xtypes.h
100 mercurial/thirdparty/xdiff/xutils.c
101 mercurial/thirdparty/xdiff/xutils.c
101 mercurial/thirdparty/xdiff/xutils.h
102 mercurial/thirdparty/xdiff/xutils.h
102 mercurial/thirdparty/zope/interface/_zope_interface_coptimizations.c
103 mercurial/thirdparty/zope/interface/_zope_interface_coptimizations.c
@@ -1,702 +1,721 b''
1 ===============
1 ===============
2 Version History
2 Version History
3 ===============
3 ===============
4
4
5 1.0.0 (not yet released)
5 1.0.0 (not yet released)
6 ========================
6 ========================
7
7
8 Actions Blocking Release
8 Actions Blocking Release
9 ------------------------
9 ------------------------
10
10
11 * compression and decompression APIs that support ``io.RawIOBase`` interface
11 * compression and decompression APIs that support ``io.RawIOBase`` interface
12 (#13).
12 (#13).
13 * ``stream_writer()`` APIs should support ``io.RawIOBase`` interface.
13 * ``stream_writer()`` APIs should support ``io.RawIOBase`` interface.
14 * Properly handle non-blocking I/O and partial writes for objects implementing
14 * Properly handle non-blocking I/O and partial writes for objects implementing
15 ``io.RawIOBase``.
15 ``io.RawIOBase``.
16 * Make ``write_return_read=True`` the default for objects implementing
16 * Make ``write_return_read=True`` the default for objects implementing
17 ``io.RawIOBase``.
17 ``io.RawIOBase``.
18 * Audit for consistent and proper behavior of ``flush()`` and ``close()`` for
18 * Audit for consistent and proper behavior of ``flush()`` and ``close()`` for
19 all objects implementing ``io.RawIOBase``. Is calling ``close()`` on
19 all objects implementing ``io.RawIOBase``. Is calling ``close()`` on
20 wrapped stream acceptable, should ``__exit__`` always call ``close()``,
20 wrapped stream acceptable, should ``__exit__`` always call ``close()``,
21 should ``close()`` imply ``flush()``, etc.
21 should ``close()`` imply ``flush()``, etc.
22 * Consider making reads across frames configurable behavior.
22 * Consider making reads across frames configurable behavior.
23 * Refactor module names so C and CFFI extensions live under ``zstandard``
23 * Refactor module names so C and CFFI extensions live under ``zstandard``
24 package.
24 package.
25 * Overall API design review.
25 * Overall API design review.
26 * Use Python allocator where possible.
26 * Use Python allocator where possible.
27 * Figure out what to do about experimental APIs not implemented by CFFI.
27 * Figure out what to do about experimental APIs not implemented by CFFI.
28 * APIs for auto adjusting compression parameters based on input size. e.g.
28 * APIs for auto adjusting compression parameters based on input size. e.g.
29 clamping the window log so it isn't too large for input.
29 clamping the window log so it isn't too large for input.
30 * Consider allowing compressor and decompressor instances to be thread safe,
30 * Consider allowing compressor and decompressor instances to be thread safe,
31 support concurrent operations. Or track when an operation is in progress and
31 support concurrent operations. Or track when an operation is in progress and
32 refuse to let concurrent operations use the same instance.
32 refuse to let concurrent operations use the same instance.
33 * Support for magic-less frames for all decompression operations (``decompress()``
33 * Support for magic-less frames for all decompression operations (``decompress()``
34 doesn't work due to sniffing the content size and the lack of a ZSTD API to
34 doesn't work due to sniffing the content size and the lack of a ZSTD API to
35 sniff magic-less frames - this should be fixed in 1.3.5.).
35 sniff magic-less frames - this should be fixed in 1.3.5.).
36 * Audit for complete flushing when ending compression streams.
36 * Audit for complete flushing when ending compression streams.
37 * Deprecate legacy APIs.
37 * Deprecate legacy APIs.
38 * Audit for ability to control read/write sizes on all APIs.
38 * Audit for ability to control read/write sizes on all APIs.
39 * Detect memory leaks via bench.py.
39 * Detect memory leaks via bench.py.
40 * Remove low-level compression parameters from ``ZstdCompressor.__init__`` and
40 * Remove low-level compression parameters from ``ZstdCompressor.__init__`` and
41 require use of ``CompressionParameters``.
41 require use of ``CompressionParameters``.
42 * Expose ``ZSTD_getFrameProgression()`` from more compressor types.
42 * Expose ``ZSTD_getFrameProgression()`` from more compressor types.
43 * Support modifying compression parameters mid operation when supported by
43 * Support modifying compression parameters mid operation when supported by
44 zstd API.
44 zstd API.
45 * Expose ``ZSTD_CLEVEL_DEFAULT`` constant.
45 * Expose ``ZSTD_CLEVEL_DEFAULT`` constant.
46 * Expose ``ZSTD_SRCSIZEHINT_{MIN,MAX}`` constants.
46 * Support ``ZSTD_p_forceAttachDict`` compression parameter.
47 * Support ``ZSTD_p_forceAttachDict`` compression parameter.
47 * Support ``ZSTD_c_literalCompressionMode `` compression parameter.
48 * Support ``ZSTD_dictForceLoad`` dictionary compression parameter.
49 * Support ``ZSTD_c_targetCBlockSize`` compression parameter.
50 * Support ``ZSTD_c_literalCompressionMode`` compression parameter.
51 * Support ``ZSTD_c_srcSizeHint`` compression parameter.
48 * Use ``ZSTD_CCtx_getParameter()``/``ZSTD_CCtxParam_getParameter()`` for retrieving
52 * Use ``ZSTD_CCtx_getParameter()``/``ZSTD_CCtxParam_getParameter()`` for retrieving
49 compression parameters.
53 compression parameters.
50 * Consider exposing ``ZSTDMT_toFlushNow()``.
54 * Consider exposing ``ZSTDMT_toFlushNow()``.
51 * Expose ``ZDICT_trainFromBuffer_fastCover()``,
55 * Expose ``ZDICT_trainFromBuffer_fastCover()``,
52 ``ZDICT_optimizeTrainFromBuffer_fastCover``.
56 ``ZDICT_optimizeTrainFromBuffer_fastCover``.
57 * Expose ``ZSTD_Sequence`` struct and related ``ZSTD_getSequences()`` API.
53 * Expose and enforce ``ZSTD_minCLevel()`` for minimum compression level.
58 * Expose and enforce ``ZSTD_minCLevel()`` for minimum compression level.
54 * Consider a ``chunker()`` API for decompression.
59 * Consider a ``chunker()`` API for decompression.
55 * Consider stats for ``chunker()`` API, including finding the last consumed
60 * Consider stats for ``chunker()`` API, including finding the last consumed
56 offset of input data.
61 offset of input data.
57 * Consider exposing ``ZSTD_cParam_getBounds()`` and
62 * Consider exposing ``ZSTD_cParam_getBounds()`` and
58 ``ZSTD_dParam_getBounds()`` APIs.
63 ``ZSTD_dParam_getBounds()`` APIs.
59 * Consider controls over resetting compression contexts (session only, parameters,
64 * Consider controls over resetting compression contexts (session only, parameters,
60 or session and parameters).
65 or session and parameters).
61 * Actually use the CFFI backend in fuzzing tests.
66 * Actually use the CFFI backend in fuzzing tests.
62
67
63 Other Actions Not Blocking Release
68 Other Actions Not Blocking Release
64 ---------------------------------------
69 ---------------------------------------
65
70
66 * Support for block compression APIs.
71 * Support for block compression APIs.
67 * API for ensuring max memory ceiling isn't exceeded.
72 * API for ensuring max memory ceiling isn't exceeded.
68 * Move off nose for testing.
73 * Move off nose for testing.
69
74
75 0.13.0 (released 2019-12-28)
76 ============================
77
78 Changes
79 -------
80
81 * ``pytest-xdist`` ``pytest`` extension is now installed so tests can be
82 run in parallel.
83 * CI now builds ``manylinux2010`` and ``manylinux2014`` binary wheels
84 instead of a mix of ``manylinux2010`` and ``manylinux1``.
85 * Official support for Python 3.8 has been added.
86 * Bundled zstandard library upgraded from 1.4.3 to 1.4.4.
87 * Python code has been reformatted with black.
88
70 0.12.0 (released 2019-09-15)
89 0.12.0 (released 2019-09-15)
71 ============================
90 ============================
72
91
73 Backwards Compatibility Notes
92 Backwards Compatibility Notes
74 -----------------------------
93 -----------------------------
75
94
76 * Support for Python 3.4 has been dropped since Python 3.4 is no longer
95 * Support for Python 3.4 has been dropped since Python 3.4 is no longer
77 a supported Python version upstream. (But it will likely continue to
96 a supported Python version upstream. (But it will likely continue to
78 work until Python 2.7 support is dropped and we port to Python 3.5+
97 work until Python 2.7 support is dropped and we port to Python 3.5+
79 APIs.)
98 APIs.)
80
99
81 Bug Fixes
100 Bug Fixes
82 ---------
101 ---------
83
102
84 * Fix ``ZstdDecompressor.__init__`` on 64-bit big-endian systems (#91).
103 * Fix ``ZstdDecompressor.__init__`` on 64-bit big-endian systems (#91).
85 * Fix memory leak in ``ZstdDecompressionReader.seek()`` (#82).
104 * Fix memory leak in ``ZstdDecompressionReader.seek()`` (#82).
86
105
87 Changes
106 Changes
88 -------
107 -------
89
108
90 * CI transitioned to Azure Pipelines (from AppVeyor and Travis CI).
109 * CI transitioned to Azure Pipelines (from AppVeyor and Travis CI).
91 * Switched to ``pytest`` for running tests (from ``nose``).
110 * Switched to ``pytest`` for running tests (from ``nose``).
92 * Bundled zstandard library upgraded from 1.3.8 to 1.4.3.
111 * Bundled zstandard library upgraded from 1.3.8 to 1.4.3.
93
112
94 0.11.1 (released 2019-05-14)
113 0.11.1 (released 2019-05-14)
95 ============================
114 ============================
96
115
97 * Fix memory leak in ``ZstdDecompressionReader.seek()`` (#82).
116 * Fix memory leak in ``ZstdDecompressionReader.seek()`` (#82).
98
117
99 0.11.0 (released 2019-02-24)
118 0.11.0 (released 2019-02-24)
100 ============================
119 ============================
101
120
102 Backwards Compatibility Notes
121 Backwards Compatibility Notes
103 -----------------------------
122 -----------------------------
104
123
105 * ``ZstdDecompressor.read()`` now allows reading sizes of ``-1`` or ``0``
124 * ``ZstdDecompressor.read()`` now allows reading sizes of ``-1`` or ``0``
106 and defaults to ``-1``, per the documented behavior of
125 and defaults to ``-1``, per the documented behavior of
107 ``io.RawIOBase.read()``. Previously, we required an argument that was
126 ``io.RawIOBase.read()``. Previously, we required an argument that was
108 a positive value.
127 a positive value.
109 * The ``readline()``, ``readlines()``, ``__iter__``, and ``__next__`` methods
128 * The ``readline()``, ``readlines()``, ``__iter__``, and ``__next__`` methods
110 of ``ZstdDecompressionReader()`` now raise ``io.UnsupportedOperation``
129 of ``ZstdDecompressionReader()`` now raise ``io.UnsupportedOperation``
111 instead of ``NotImplementedError``.
130 instead of ``NotImplementedError``.
112 * ``ZstdDecompressor.stream_reader()`` now accepts a ``read_across_frames``
131 * ``ZstdDecompressor.stream_reader()`` now accepts a ``read_across_frames``
113 argument. The default value will likely be changed in a future release
132 argument. The default value will likely be changed in a future release
114 and consumers are advised to pass the argument to avoid unwanted change
133 and consumers are advised to pass the argument to avoid unwanted change
115 of behavior in the future.
134 of behavior in the future.
116 * ``setup.py`` now always disables the CFFI backend if the installed
135 * ``setup.py`` now always disables the CFFI backend if the installed
117 CFFI package does not meet the minimum version requirements. Before, it was
136 CFFI package does not meet the minimum version requirements. Before, it was
118 possible for the CFFI backend to be generated and a run-time error to
137 possible for the CFFI backend to be generated and a run-time error to
119 occur.
138 occur.
120 * In the CFFI backend, ``CompressionReader`` and ``DecompressionReader``
139 * In the CFFI backend, ``CompressionReader`` and ``DecompressionReader``
121 were renamed to ``ZstdCompressionReader`` and ``ZstdDecompressionReader``,
140 were renamed to ``ZstdCompressionReader`` and ``ZstdDecompressionReader``,
122 respectively so naming is identical to the C extension. This should have
141 respectively so naming is identical to the C extension. This should have
123 no meaningful end-user impact, as instances aren't meant to be
142 no meaningful end-user impact, as instances aren't meant to be
124 constructed directly.
143 constructed directly.
125 * ``ZstdDecompressor.stream_writer()`` now accepts a ``write_return_read``
144 * ``ZstdDecompressor.stream_writer()`` now accepts a ``write_return_read``
126 argument to control whether ``write()`` returns the number of bytes
145 argument to control whether ``write()`` returns the number of bytes
127 read from the source / written to the decompressor. It defaults to off,
146 read from the source / written to the decompressor. It defaults to off,
128 which preserves the existing behavior of returning the number of bytes
147 which preserves the existing behavior of returning the number of bytes
129 emitted from the decompressor. The default will change in a future release
148 emitted from the decompressor. The default will change in a future release
130 so behavior aligns with the specified behavior of ``io.RawIOBase``.
149 so behavior aligns with the specified behavior of ``io.RawIOBase``.
131 * ``ZstdDecompressionWriter.__exit__`` now calls ``self.close()``. This
150 * ``ZstdDecompressionWriter.__exit__`` now calls ``self.close()``. This
132 will result in that stream plus the underlying stream being closed as
151 will result in that stream plus the underlying stream being closed as
133 well. If this behavior is not desirable, do not use instances as
152 well. If this behavior is not desirable, do not use instances as
134 context managers.
153 context managers.
135 * ``ZstdCompressor.stream_writer()`` now accepts a ``write_return_read``
154 * ``ZstdCompressor.stream_writer()`` now accepts a ``write_return_read``
136 argument to control whether ``write()`` returns the number of bytes read
155 argument to control whether ``write()`` returns the number of bytes read
137 from the source / written to the compressor. It defaults to off, which
156 from the source / written to the compressor. It defaults to off, which
138 preserves the existing behavior of returning the number of bytes emitted
157 preserves the existing behavior of returning the number of bytes emitted
139 from the compressor. The default will change in a future release so
158 from the compressor. The default will change in a future release so
140 behavior aligns with the specified behavior of ``io.RawIOBase``.
159 behavior aligns with the specified behavior of ``io.RawIOBase``.
141 * ``ZstdCompressionWriter.__exit__`` now calls ``self.close()``. This will
160 * ``ZstdCompressionWriter.__exit__`` now calls ``self.close()``. This will
142 result in that stream plus any underlying stream being closed as well. If
161 result in that stream plus any underlying stream being closed as well. If
143 this behavior is not desirable, do not use instances as context managers.
162 this behavior is not desirable, do not use instances as context managers.
144 * ``ZstdDecompressionWriter`` no longer requires being used as a context
163 * ``ZstdDecompressionWriter`` no longer requires being used as a context
145 manager (#57).
164 manager (#57).
146 * ``ZstdCompressionWriter`` no longer requires being used as a context
165 * ``ZstdCompressionWriter`` no longer requires being used as a context
147 manager (#57).
166 manager (#57).
148 * The ``overlap_size_log`` attribute on ``CompressionParameters`` instances
167 * The ``overlap_size_log`` attribute on ``CompressionParameters`` instances
149 has been deprecated and will be removed in a future release. The
168 has been deprecated and will be removed in a future release. The
150 ``overlap_log`` attribute should be used instead.
169 ``overlap_log`` attribute should be used instead.
151 * The ``overlap_size_log`` argument to ``CompressionParameters`` has been
170 * The ``overlap_size_log`` argument to ``CompressionParameters`` has been
152 deprecated and will be removed in a future release. The ``overlap_log``
171 deprecated and will be removed in a future release. The ``overlap_log``
153 argument should be used instead.
172 argument should be used instead.
154 * The ``ldm_hash_every_log`` attribute on ``CompressionParameters`` instances
173 * The ``ldm_hash_every_log`` attribute on ``CompressionParameters`` instances
155 has been deprecated and will be removed in a future release. The
174 has been deprecated and will be removed in a future release. The
156 ``ldm_hash_rate_log`` attribute should be used instead.
175 ``ldm_hash_rate_log`` attribute should be used instead.
157 * The ``ldm_hash_every_log`` argument to ``CompressionParameters`` has been
176 * The ``ldm_hash_every_log`` argument to ``CompressionParameters`` has been
158 deprecated and will be removed in a future release. The ``ldm_hash_rate_log``
177 deprecated and will be removed in a future release. The ``ldm_hash_rate_log``
159 argument should be used instead.
178 argument should be used instead.
160 * The ``compression_strategy`` argument to ``CompressionParameters`` has been
179 * The ``compression_strategy`` argument to ``CompressionParameters`` has been
161 deprecated and will be removed in a future release. The ``strategy``
180 deprecated and will be removed in a future release. The ``strategy``
162 argument should be used instead.
181 argument should be used instead.
163 * The ``SEARCHLENGTH_MIN`` and ``SEARCHLENGTH_MAX`` constants are deprecated
182 * The ``SEARCHLENGTH_MIN`` and ``SEARCHLENGTH_MAX`` constants are deprecated
164 and will be removed in a future release. Use ``MINMATCH_MIN`` and
183 and will be removed in a future release. Use ``MINMATCH_MIN`` and
165 ``MINMATCH_MAX`` instead.
184 ``MINMATCH_MAX`` instead.
166 * The ``zstd_cffi`` module has been renamed to ``zstandard.cffi``. As had
185 * The ``zstd_cffi`` module has been renamed to ``zstandard.cffi``. As had
167 been documented in the ``README`` file since the ``0.9.0`` release, the
186 been documented in the ``README`` file since the ``0.9.0`` release, the
168 module should not be imported directly at its new location. Instead,
187 module should not be imported directly at its new location. Instead,
169 ``import zstandard`` to cause an appropriate backend module to be loaded
188 ``import zstandard`` to cause an appropriate backend module to be loaded
170 automatically.
189 automatically.
171
190
172 Bug Fixes
191 Bug Fixes
173 ---------
192 ---------
174
193
175 * CFFI backend could encounter a failure when sending an empty chunk into
194 * CFFI backend could encounter a failure when sending an empty chunk into
176 ``ZstdDecompressionObj.decompress()``. The issue has been fixed.
195 ``ZstdDecompressionObj.decompress()``. The issue has been fixed.
177 * CFFI backend could encounter an error when calling
196 * CFFI backend could encounter an error when calling
178 ``ZstdDecompressionReader.read()`` if there was data remaining in an
197 ``ZstdDecompressionReader.read()`` if there was data remaining in an
179 internal buffer. The issue has been fixed. (#71)
198 internal buffer. The issue has been fixed. (#71)
180
199
181 Changes
200 Changes
182 -------
201 -------
183
202
184 * ``ZstDecompressionObj.decompress()`` now properly handles empty inputs in
203 * ``ZstDecompressionObj.decompress()`` now properly handles empty inputs in
185 the CFFI backend.
204 the CFFI backend.
186 * ``ZstdCompressionReader`` now implements ``read1()`` and ``readinto1()``.
205 * ``ZstdCompressionReader`` now implements ``read1()`` and ``readinto1()``.
187 These are part of the ``io.BufferedIOBase`` interface.
206 These are part of the ``io.BufferedIOBase`` interface.
188 * ``ZstdCompressionReader`` has gained a ``readinto(b)`` method for reading
207 * ``ZstdCompressionReader`` has gained a ``readinto(b)`` method for reading
189 compressed output into an existing buffer.
208 compressed output into an existing buffer.
190 * ``ZstdCompressionReader.read()`` now defaults to ``size=-1`` and accepts
209 * ``ZstdCompressionReader.read()`` now defaults to ``size=-1`` and accepts
191 read sizes of ``-1`` and ``0``. The new behavior aligns with the documented
210 read sizes of ``-1`` and ``0``. The new behavior aligns with the documented
192 behavior of ``io.RawIOBase``.
211 behavior of ``io.RawIOBase``.
193 * ``ZstdCompressionReader`` now implements ``readall()``. Previously, this
212 * ``ZstdCompressionReader`` now implements ``readall()``. Previously, this
194 method raised ``NotImplementedError``.
213 method raised ``NotImplementedError``.
195 * ``ZstdDecompressionReader`` now implements ``read1()`` and ``readinto1()``.
214 * ``ZstdDecompressionReader`` now implements ``read1()`` and ``readinto1()``.
196 These are part of the ``io.BufferedIOBase`` interface.
215 These are part of the ``io.BufferedIOBase`` interface.
197 * ``ZstdDecompressionReader.read()`` now defaults to ``size=-1`` and accepts
216 * ``ZstdDecompressionReader.read()`` now defaults to ``size=-1`` and accepts
198 read sizes of ``-1`` and ``0``. The new behavior aligns with the documented
217 read sizes of ``-1`` and ``0``. The new behavior aligns with the documented
199 behavior of ``io.RawIOBase``.
218 behavior of ``io.RawIOBase``.
200 * ``ZstdDecompressionReader()`` now implements ``readall()``. Previously, this
219 * ``ZstdDecompressionReader()`` now implements ``readall()``. Previously, this
201 method raised ``NotImplementedError``.
220 method raised ``NotImplementedError``.
202 * The ``readline()``, ``readlines()``, ``__iter__``, and ``__next__`` methods
221 * The ``readline()``, ``readlines()``, ``__iter__``, and ``__next__`` methods
203 of ``ZstdDecompressionReader()`` now raise ``io.UnsupportedOperation``
222 of ``ZstdDecompressionReader()`` now raise ``io.UnsupportedOperation``
204 instead of ``NotImplementedError``. This reflects a decision to never
223 instead of ``NotImplementedError``. This reflects a decision to never
205 implement text-based I/O on (de)compressors and keep the low-level API
224 implement text-based I/O on (de)compressors and keep the low-level API
206 operating in the binary domain. (#13)
225 operating in the binary domain. (#13)
207 * ``README.rst`` now documented how to achieve linewise iteration using
226 * ``README.rst`` now documented how to achieve linewise iteration using
208 an ``io.TextIOWrapper`` with a ``ZstdDecompressionReader``.
227 an ``io.TextIOWrapper`` with a ``ZstdDecompressionReader``.
209 * ``ZstdDecompressionReader`` has gained a ``readinto(b)`` method for
228 * ``ZstdDecompressionReader`` has gained a ``readinto(b)`` method for
210 reading decompressed output into an existing buffer. This allows chaining
229 reading decompressed output into an existing buffer. This allows chaining
211 to an ``io.TextIOWrapper`` on Python 3 without using an ``io.BufferedReader``.
230 to an ``io.TextIOWrapper`` on Python 3 without using an ``io.BufferedReader``.
212 * ``ZstdDecompressor.stream_reader()`` now accepts a ``read_across_frames``
231 * ``ZstdDecompressor.stream_reader()`` now accepts a ``read_across_frames``
213 argument to control behavior when the input data has multiple zstd
232 argument to control behavior when the input data has multiple zstd
214 *frames*. When ``False`` (the default for backwards compatibility), a
233 *frames*. When ``False`` (the default for backwards compatibility), a
215 ``read()`` will stop when the end of a zstd *frame* is encountered. When
234 ``read()`` will stop when the end of a zstd *frame* is encountered. When
216 ``True``, ``read()`` can potentially return data spanning multiple zstd
235 ``True``, ``read()`` can potentially return data spanning multiple zstd
217 *frames*. The default will likely be changed to ``True`` in a future
236 *frames*. The default will likely be changed to ``True`` in a future
218 release.
237 release.
219 * ``setup.py`` now performs CFFI version sniffing and disables the CFFI
238 * ``setup.py`` now performs CFFI version sniffing and disables the CFFI
220 backend if CFFI is too old. Previously, we only used ``install_requires``
239 backend if CFFI is too old. Previously, we only used ``install_requires``
221 to enforce the CFFI version and not all build modes would properly enforce
240 to enforce the CFFI version and not all build modes would properly enforce
222 the minimum CFFI version. (#69)
241 the minimum CFFI version. (#69)
223 * CFFI's ``ZstdDecompressionReader.read()`` now properly handles data
242 * CFFI's ``ZstdDecompressionReader.read()`` now properly handles data
224 remaining in any internal buffer. Before, repeated ``read()`` could
243 remaining in any internal buffer. Before, repeated ``read()`` could
225 result in *random* errors. (#71)
244 result in *random* errors. (#71)
226 * Upgraded various Python packages in CI environment.
245 * Upgraded various Python packages in CI environment.
227 * Upgrade to hypothesis 4.5.11.
246 * Upgrade to hypothesis 4.5.11.
228 * In the CFFI backend, ``CompressionReader`` and ``DecompressionReader``
247 * In the CFFI backend, ``CompressionReader`` and ``DecompressionReader``
229 were renamed to ``ZstdCompressionReader`` and ``ZstdDecompressionReader``,
248 were renamed to ``ZstdCompressionReader`` and ``ZstdDecompressionReader``,
230 respectively.
249 respectively.
231 * ``ZstdDecompressor.stream_writer()`` now accepts a ``write_return_read``
250 * ``ZstdDecompressor.stream_writer()`` now accepts a ``write_return_read``
232 argument to control whether ``write()`` returns the number of bytes read
251 argument to control whether ``write()`` returns the number of bytes read
233 from the source. It defaults to ``False`` to preserve backwards
252 from the source. It defaults to ``False`` to preserve backwards
234 compatibility.
253 compatibility.
235 * ``ZstdDecompressor.stream_writer()`` now implements the ``io.RawIOBase``
254 * ``ZstdDecompressor.stream_writer()`` now implements the ``io.RawIOBase``
236 interface and behaves as a proper stream object.
255 interface and behaves as a proper stream object.
237 * ``ZstdCompressor.stream_writer()`` now accepts a ``write_return_read``
256 * ``ZstdCompressor.stream_writer()`` now accepts a ``write_return_read``
238 argument to control whether ``write()`` returns the number of bytes read
257 argument to control whether ``write()`` returns the number of bytes read
239 from the source. It defaults to ``False`` to preserve backwards
258 from the source. It defaults to ``False`` to preserve backwards
240 compatibility.
259 compatibility.
241 * ``ZstdCompressionWriter`` now implements the ``io.RawIOBase`` interface and
260 * ``ZstdCompressionWriter`` now implements the ``io.RawIOBase`` interface and
242 behaves as a proper stream object. ``close()`` will now close the stream
261 behaves as a proper stream object. ``close()`` will now close the stream
243 and the underlying stream (if possible). ``__exit__`` will now call
262 and the underlying stream (if possible). ``__exit__`` will now call
244 ``close()``. Methods like ``writable()`` and ``fileno()`` are implemented.
263 ``close()``. Methods like ``writable()`` and ``fileno()`` are implemented.
245 * ``ZstdDecompressionWriter`` no longer must be used as a context manager.
264 * ``ZstdDecompressionWriter`` no longer must be used as a context manager.
246 * ``ZstdCompressionWriter`` no longer must be used as a context manager.
265 * ``ZstdCompressionWriter`` no longer must be used as a context manager.
247 When not using as a context manager, it is important to call
266 When not using as a context manager, it is important to call
248 ``flush(FRAME_FRAME)`` or the compression stream won't be properly
267 ``flush(FRAME_FRAME)`` or the compression stream won't be properly
249 terminated and decoders may complain about malformed input.
268 terminated and decoders may complain about malformed input.
250 * ``ZstdCompressionWriter.flush()`` (what is returned from
269 * ``ZstdCompressionWriter.flush()`` (what is returned from
251 ``ZstdCompressor.stream_writer()``) now accepts an argument controlling the
270 ``ZstdCompressor.stream_writer()``) now accepts an argument controlling the
252 flush behavior. Its value can be one of the new constants
271 flush behavior. Its value can be one of the new constants
253 ``FLUSH_BLOCK`` or ``FLUSH_FRAME``.
272 ``FLUSH_BLOCK`` or ``FLUSH_FRAME``.
254 * ``ZstdDecompressionObj`` instances now have a ``flush([length=None])`` method.
273 * ``ZstdDecompressionObj`` instances now have a ``flush([length=None])`` method.
255 This provides parity with standard library equivalent types. (#65)
274 This provides parity with standard library equivalent types. (#65)
256 * ``CompressionParameters`` no longer redundantly store individual compression
275 * ``CompressionParameters`` no longer redundantly store individual compression
257 parameters on each instance. Instead, compression parameters are stored inside
276 parameters on each instance. Instead, compression parameters are stored inside
258 the underlying ``ZSTD_CCtx_params`` instance. Attributes for obtaining
277 the underlying ``ZSTD_CCtx_params`` instance. Attributes for obtaining
259 parameters are now properties rather than instance variables.
278 parameters are now properties rather than instance variables.
260 * Exposed the ``STRATEGY_BTULTRA2`` constant.
279 * Exposed the ``STRATEGY_BTULTRA2`` constant.
261 * ``CompressionParameters`` instances now expose an ``overlap_log`` attribute.
280 * ``CompressionParameters`` instances now expose an ``overlap_log`` attribute.
262 This behaves identically to the ``overlap_size_log`` attribute.
281 This behaves identically to the ``overlap_size_log`` attribute.
263 * ``CompressionParameters()`` now accepts an ``overlap_log`` argument that
282 * ``CompressionParameters()`` now accepts an ``overlap_log`` argument that
264 behaves identically to the ``overlap_size_log`` argument. An error will be
283 behaves identically to the ``overlap_size_log`` argument. An error will be
265 raised if both arguments are specified.
284 raised if both arguments are specified.
266 * ``CompressionParameters`` instances now expose an ``ldm_hash_rate_log``
285 * ``CompressionParameters`` instances now expose an ``ldm_hash_rate_log``
267 attribute. This behaves identically to the ``ldm_hash_every_log`` attribute.
286 attribute. This behaves identically to the ``ldm_hash_every_log`` attribute.
268 * ``CompressionParameters()`` now accepts a ``ldm_hash_rate_log`` argument that
287 * ``CompressionParameters()`` now accepts a ``ldm_hash_rate_log`` argument that
269 behaves identically to the ``ldm_hash_every_log`` argument. An error will be
288 behaves identically to the ``ldm_hash_every_log`` argument. An error will be
270 raised if both arguments are specified.
289 raised if both arguments are specified.
271 * ``CompressionParameters()`` now accepts a ``strategy`` argument that behaves
290 * ``CompressionParameters()`` now accepts a ``strategy`` argument that behaves
272 identically to the ``compression_strategy`` argument. An error will be raised
291 identically to the ``compression_strategy`` argument. An error will be raised
273 if both arguments are specified.
292 if both arguments are specified.
274 * The ``MINMATCH_MIN`` and ``MINMATCH_MAX`` constants were added. They are
293 * The ``MINMATCH_MIN`` and ``MINMATCH_MAX`` constants were added. They are
275 semantically equivalent to the old ``SEARCHLENGTH_MIN`` and
294 semantically equivalent to the old ``SEARCHLENGTH_MIN`` and
276 ``SEARCHLENGTH_MAX`` constants.
295 ``SEARCHLENGTH_MAX`` constants.
277 * Bundled zstandard library upgraded from 1.3.7 to 1.3.8.
296 * Bundled zstandard library upgraded from 1.3.7 to 1.3.8.
278 * ``setup.py`` denotes support for Python 3.7 (Python 3.7 was supported and
297 * ``setup.py`` denotes support for Python 3.7 (Python 3.7 was supported and
279 tested in the 0.10 release).
298 tested in the 0.10 release).
280 * ``zstd_cffi`` module has been renamed to ``zstandard.cffi``.
299 * ``zstd_cffi`` module has been renamed to ``zstandard.cffi``.
281 * ``ZstdCompressor.stream_writer()`` now reuses a buffer in order to avoid
300 * ``ZstdCompressor.stream_writer()`` now reuses a buffer in order to avoid
282 allocating a new buffer for every operation. This should result in faster
301 allocating a new buffer for every operation. This should result in faster
283 performance in cases where ``write()`` or ``flush()`` are being called
302 performance in cases where ``write()`` or ``flush()`` are being called
284 frequently. (#62)
303 frequently. (#62)
285 * Bundled zstandard library upgraded from 1.3.6 to 1.3.7.
304 * Bundled zstandard library upgraded from 1.3.6 to 1.3.7.
286
305
287 0.10.2 (released 2018-11-03)
306 0.10.2 (released 2018-11-03)
288 ============================
307 ============================
289
308
290 Bug Fixes
309 Bug Fixes
291 ---------
310 ---------
292
311
293 * ``zstd_cffi.py`` added to ``setup.py`` (#60).
312 * ``zstd_cffi.py`` added to ``setup.py`` (#60).
294
313
295 Changes
314 Changes
296 -------
315 -------
297
316
298 * Change some integer casts to avoid ``ssize_t`` (#61).
317 * Change some integer casts to avoid ``ssize_t`` (#61).
299
318
300 0.10.1 (released 2018-10-08)
319 0.10.1 (released 2018-10-08)
301 ============================
320 ============================
302
321
303 Backwards Compatibility Notes
322 Backwards Compatibility Notes
304 -----------------------------
323 -----------------------------
305
324
306 * ``ZstdCompressor.stream_reader().closed`` is now a property instead of a
325 * ``ZstdCompressor.stream_reader().closed`` is now a property instead of a
307 method (#58).
326 method (#58).
308 * ``ZstdDecompressor.stream_reader().closed`` is now a property instead of a
327 * ``ZstdDecompressor.stream_reader().closed`` is now a property instead of a
309 method (#58).
328 method (#58).
310
329
311 Changes
330 Changes
312 -------
331 -------
313
332
314 * Stop attempting to package Python 3.6 for Miniconda. The latest version of
333 * Stop attempting to package Python 3.6 for Miniconda. The latest version of
315 Miniconda is using Python 3.7. The Python 3.6 Miniconda packages were a lie
334 Miniconda is using Python 3.7. The Python 3.6 Miniconda packages were a lie
316 since this were built against Python 3.7.
335 since this were built against Python 3.7.
317 * ``ZstdCompressor.stream_reader()``'s and ``ZstdDecompressor.stream_reader()``'s
336 * ``ZstdCompressor.stream_reader()``'s and ``ZstdDecompressor.stream_reader()``'s
318 ``closed`` attribute is now a read-only property instead of a method. This now
337 ``closed`` attribute is now a read-only property instead of a method. This now
319 properly matches the ``IOBase`` API and allows instances to be used in more
338 properly matches the ``IOBase`` API and allows instances to be used in more
320 places that accept ``IOBase`` instances.
339 places that accept ``IOBase`` instances.
321
340
322 0.10.0 (released 2018-10-08)
341 0.10.0 (released 2018-10-08)
323 ============================
342 ============================
324
343
325 Backwards Compatibility Notes
344 Backwards Compatibility Notes
326 -----------------------------
345 -----------------------------
327
346
328 * ``ZstdDecompressor.stream_reader().read()`` now consistently requires an
347 * ``ZstdDecompressor.stream_reader().read()`` now consistently requires an
329 argument in both the C and CFFI backends. Before, the CFFI implementation
348 argument in both the C and CFFI backends. Before, the CFFI implementation
330 would assume a default value of ``-1``, which was later rejected.
349 would assume a default value of ``-1``, which was later rejected.
331 * The ``compress_literals`` argument and attribute has been removed from
350 * The ``compress_literals`` argument and attribute has been removed from
332 ``zstd.ZstdCompressionParameters`` because it was removed by the zstd 1.3.5
351 ``zstd.ZstdCompressionParameters`` because it was removed by the zstd 1.3.5
333 API.
352 API.
334 * ``ZSTD_CCtx_setParametersUsingCCtxParams()`` is no longer called on every
353 * ``ZSTD_CCtx_setParametersUsingCCtxParams()`` is no longer called on every
335 operation performed against ``ZstdCompressor`` instances. The reason for this
354 operation performed against ``ZstdCompressor`` instances. The reason for this
336 change is that the zstd 1.3.5 API no longer allows this without calling
355 change is that the zstd 1.3.5 API no longer allows this without calling
337 ``ZSTD_CCtx_resetParameters()`` first. But if we called
356 ``ZSTD_CCtx_resetParameters()`` first. But if we called
338 ``ZSTD_CCtx_resetParameters()`` on every operation, we'd have to redo
357 ``ZSTD_CCtx_resetParameters()`` on every operation, we'd have to redo
339 potentially expensive setup when using dictionaries. We now call
358 potentially expensive setup when using dictionaries. We now call
340 ``ZSTD_CCtx_reset()`` on every operation and don't attempt to change
359 ``ZSTD_CCtx_reset()`` on every operation and don't attempt to change
341 compression parameters.
360 compression parameters.
342 * Objects returned by ``ZstdCompressor.stream_reader()`` no longer need to be
361 * Objects returned by ``ZstdCompressor.stream_reader()`` no longer need to be
343 used as a context manager. The context manager interface still exists and its
362 used as a context manager. The context manager interface still exists and its
344 behavior is unchanged.
363 behavior is unchanged.
345 * Objects returned by ``ZstdDecompressor.stream_reader()`` no longer need to be
364 * Objects returned by ``ZstdDecompressor.stream_reader()`` no longer need to be
346 used as a context manager. The context manager interface still exists and its
365 used as a context manager. The context manager interface still exists and its
347 behavior is unchanged.
366 behavior is unchanged.
348
367
349 Bug Fixes
368 Bug Fixes
350 ---------
369 ---------
351
370
352 * ``ZstdDecompressor.decompressobj().decompress()`` should now return all data
371 * ``ZstdDecompressor.decompressobj().decompress()`` should now return all data
353 from internal buffers in more scenarios. Before, it was possible for data to
372 from internal buffers in more scenarios. Before, it was possible for data to
354 remain in internal buffers. This data would be emitted on a subsequent call
373 remain in internal buffers. This data would be emitted on a subsequent call
355 to ``decompress()``. The overall output stream would still be valid. But if
374 to ``decompress()``. The overall output stream would still be valid. But if
356 callers were expecting input data to exactly map to output data (say the
375 callers were expecting input data to exactly map to output data (say the
357 producer had used ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` and was attempting to
376 producer had used ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` and was attempting to
358 map input chunks to output chunks), then the previous behavior would be
377 map input chunks to output chunks), then the previous behavior would be
359 wrong. The new behavior is such that output from
378 wrong. The new behavior is such that output from
360 ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` fed into ``decompressobj().decompress()``
379 ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` fed into ``decompressobj().decompress()``
361 should produce all available compressed input.
380 should produce all available compressed input.
362 * ``ZstdDecompressor.stream_reader().read()`` should no longer segfault after
381 * ``ZstdDecompressor.stream_reader().read()`` should no longer segfault after
363 a previous context manager resulted in error (#56).
382 a previous context manager resulted in error (#56).
364 * ``ZstdCompressor.compressobj().flush(COMPRESSOBJ_FLUSH_BLOCK)`` now returns
383 * ``ZstdCompressor.compressobj().flush(COMPRESSOBJ_FLUSH_BLOCK)`` now returns
365 all data necessary to flush a block. Before, it was possible for the
384 all data necessary to flush a block. Before, it was possible for the
366 ``flush()`` to not emit all data necessary to fully represent a block. This
385 ``flush()`` to not emit all data necessary to fully represent a block. This
367 would mean decompressors wouldn't be able to decompress all data that had been
386 would mean decompressors wouldn't be able to decompress all data that had been
368 fed into the compressor and ``flush()``ed. (#55).
387 fed into the compressor and ``flush()``ed. (#55).
369
388
370 New Features
389 New Features
371 ------------
390 ------------
372
391
373 * New module constants ``BLOCKSIZELOG_MAX``, ``BLOCKSIZE_MAX``,
392 * New module constants ``BLOCKSIZELOG_MAX``, ``BLOCKSIZE_MAX``,
374 ``TARGETLENGTH_MAX`` that expose constants from libzstd.
393 ``TARGETLENGTH_MAX`` that expose constants from libzstd.
375 * New ``ZstdCompressor.chunker()`` API for manually feeding data into a
394 * New ``ZstdCompressor.chunker()`` API for manually feeding data into a
376 compressor and emitting chunks of a fixed size. Like ``compressobj()``, the
395 compressor and emitting chunks of a fixed size. Like ``compressobj()``, the
377 API doesn't impose restrictions on the input or output types for the
396 API doesn't impose restrictions on the input or output types for the
378 data streams. Unlike ``compressobj()``, it ensures output chunks are of a
397 data streams. Unlike ``compressobj()``, it ensures output chunks are of a
379 fixed size. This makes this API useful when the compressed output is being
398 fixed size. This makes this API useful when the compressed output is being
380 fed into an I/O layer, where uniform write sizes are useful.
399 fed into an I/O layer, where uniform write sizes are useful.
381 * ``ZstdCompressor.stream_reader()`` no longer needs to be used as a context
400 * ``ZstdCompressor.stream_reader()`` no longer needs to be used as a context
382 manager (#34).
401 manager (#34).
383 * ``ZstdDecompressor.stream_reader()`` no longer needs to be used as a context
402 * ``ZstdDecompressor.stream_reader()`` no longer needs to be used as a context
384 manager (#34).
403 manager (#34).
385 * Bundled zstandard library upgraded from 1.3.4 to 1.3.6.
404 * Bundled zstandard library upgraded from 1.3.4 to 1.3.6.
386
405
387 Changes
406 Changes
388 -------
407 -------
389
408
390 * Added ``zstd_cffi.py`` and ``NEWS.rst`` to ``MANIFEST.in``.
409 * Added ``zstd_cffi.py`` and ``NEWS.rst`` to ``MANIFEST.in``.
391 * ``zstandard.__version__`` is now defined (#50).
410 * ``zstandard.__version__`` is now defined (#50).
392 * Upgrade pip, setuptools, wheel, and cibuildwheel packages to latest versions.
411 * Upgrade pip, setuptools, wheel, and cibuildwheel packages to latest versions.
393 * Upgrade various packages used in CI to latest versions. Notably tox (in
412 * Upgrade various packages used in CI to latest versions. Notably tox (in
394 order to support Python 3.7).
413 order to support Python 3.7).
395 * Use relative paths in setup.py to appease Python 3.7 (#51).
414 * Use relative paths in setup.py to appease Python 3.7 (#51).
396 * Added CI for Python 3.7.
415 * Added CI for Python 3.7.
397
416
398 0.9.1 (released 2018-06-04)
417 0.9.1 (released 2018-06-04)
399 ===========================
418 ===========================
400
419
401 * Debian packaging support.
420 * Debian packaging support.
402 * Fix typo in setup.py (#44).
421 * Fix typo in setup.py (#44).
403 * Support building with mingw compiler (#46).
422 * Support building with mingw compiler (#46).
404
423
405 0.9.0 (released 2018-04-08)
424 0.9.0 (released 2018-04-08)
406 ===========================
425 ===========================
407
426
408 Backwards Compatibility Notes
427 Backwards Compatibility Notes
409 -----------------------------
428 -----------------------------
410
429
411 * CFFI 1.11 or newer is now required (previous requirement was 1.8).
430 * CFFI 1.11 or newer is now required (previous requirement was 1.8).
412 * The primary module is now ``zstandard``. Please change imports of ``zstd``
431 * The primary module is now ``zstandard``. Please change imports of ``zstd``
413 and ``zstd_cffi`` to ``import zstandard``. See the README for more. Support
432 and ``zstd_cffi`` to ``import zstandard``. See the README for more. Support
414 for importing the old names will be dropped in the next release.
433 for importing the old names will be dropped in the next release.
415 * ``ZstdCompressor.read_from()`` and ``ZstdDecompressor.read_from()`` have
434 * ``ZstdCompressor.read_from()`` and ``ZstdDecompressor.read_from()`` have
416 been renamed to ``read_to_iter()``. ``read_from()`` is aliased to the new
435 been renamed to ``read_to_iter()``. ``read_from()`` is aliased to the new
417 name and will be deleted in a future release.
436 name and will be deleted in a future release.
418 * Support for Python 2.6 has been removed.
437 * Support for Python 2.6 has been removed.
419 * Support for Python 3.3 has been removed.
438 * Support for Python 3.3 has been removed.
420 * The ``selectivity`` argument to ``train_dictionary()`` has been removed, as
439 * The ``selectivity`` argument to ``train_dictionary()`` has been removed, as
421 the feature disappeared from zstd 1.3.
440 the feature disappeared from zstd 1.3.
422 * Support for legacy dictionaries has been removed. Cover dictionaries are now
441 * Support for legacy dictionaries has been removed. Cover dictionaries are now
423 the default. ``train_cover_dictionary()`` has effectively been renamed to
442 the default. ``train_cover_dictionary()`` has effectively been renamed to
424 ``train_dictionary()``.
443 ``train_dictionary()``.
425 * The ``allow_empty`` argument from ``ZstdCompressor.compress()`` has been
444 * The ``allow_empty`` argument from ``ZstdCompressor.compress()`` has been
426 deleted and the method now allows empty inputs to be compressed by default.
445 deleted and the method now allows empty inputs to be compressed by default.
427 * ``estimate_compression_context_size()`` has been removed. Use
446 * ``estimate_compression_context_size()`` has been removed. Use
428 ``CompressionParameters.estimated_compression_context_size()`` instead.
447 ``CompressionParameters.estimated_compression_context_size()`` instead.
429 * ``get_compression_parameters()`` has been removed. Use
448 * ``get_compression_parameters()`` has been removed. Use
430 ``CompressionParameters.from_level()`` instead.
449 ``CompressionParameters.from_level()`` instead.
431 * The arguments to ``CompressionParameters.__init__()`` have changed. If you
450 * The arguments to ``CompressionParameters.__init__()`` have changed. If you
432 were using positional arguments before, the positions now map to different
451 were using positional arguments before, the positions now map to different
433 arguments. It is recommended to use keyword arguments to construct
452 arguments. It is recommended to use keyword arguments to construct
434 ``CompressionParameters`` instances.
453 ``CompressionParameters`` instances.
435 * ``TARGETLENGTH_MAX`` constant has been removed (it disappeared from zstandard
454 * ``TARGETLENGTH_MAX`` constant has been removed (it disappeared from zstandard
436 1.3.4).
455 1.3.4).
437 * ``ZstdCompressor.write_to()`` and ``ZstdDecompressor.write_to()`` have been
456 * ``ZstdCompressor.write_to()`` and ``ZstdDecompressor.write_to()`` have been
438 renamed to ``ZstdCompressor.stream_writer()`` and
457 renamed to ``ZstdCompressor.stream_writer()`` and
439 ``ZstdDecompressor.stream_writer()``, respectively. The old names are still
458 ``ZstdDecompressor.stream_writer()``, respectively. The old names are still
440 aliased, but will be removed in the next major release.
459 aliased, but will be removed in the next major release.
441 * Content sizes are written into frame headers by default
460 * Content sizes are written into frame headers by default
442 (``ZstdCompressor(write_content_size=True)`` is now the default).
461 (``ZstdCompressor(write_content_size=True)`` is now the default).
443 * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters``
462 * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters``
444 for consistency with other types. The old name is an alias and will be removed
463 for consistency with other types. The old name is an alias and will be removed
445 in the next major release.
464 in the next major release.
446
465
447 Bug Fixes
466 Bug Fixes
448 ---------
467 ---------
449
468
450 * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40) (from 0.8.2).
469 * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40) (from 0.8.2).
451 * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35) (from 0.8.2).
470 * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35) (from 0.8.2).
452 * Fixed memory leak of ``ZSTD_DDict`` instances in CFFI's ``ZstdDecompressor``.
471 * Fixed memory leak of ``ZSTD_DDict`` instances in CFFI's ``ZstdDecompressor``.
453
472
454 New Features
473 New Features
455 ------------
474 ------------
456
475
457 * Bundled zstandard library upgraded from 1.1.3 to 1.3.4. This delivers various
476 * Bundled zstandard library upgraded from 1.1.3 to 1.3.4. This delivers various
458 bug fixes and performance improvements. It also gives us access to newer
477 bug fixes and performance improvements. It also gives us access to newer
459 features.
478 features.
460 * Support for negative compression levels.
479 * Support for negative compression levels.
461 * Support for *long distance matching* (facilitates compression ratios that approach
480 * Support for *long distance matching* (facilitates compression ratios that approach
462 LZMA).
481 LZMA).
463 * Supporting for reading empty zstandard frames (with an embedded content size
482 * Supporting for reading empty zstandard frames (with an embedded content size
464 of 0).
483 of 0).
465 * Support for writing and partial support for reading zstandard frames without a
484 * Support for writing and partial support for reading zstandard frames without a
466 magic header.
485 magic header.
467 * New ``stream_reader()`` API that exposes the ``io.RawIOBase`` interface (allows
486 * New ``stream_reader()`` API that exposes the ``io.RawIOBase`` interface (allows
468 you to ``.read()`` from a file-like object).
487 you to ``.read()`` from a file-like object).
469 * Several minor features, bug fixes, and performance enhancements.
488 * Several minor features, bug fixes, and performance enhancements.
470 * Wheels for Linux and macOS are now provided with releases.
489 * Wheels for Linux and macOS are now provided with releases.
471
490
472 Changes
491 Changes
473 -------
492 -------
474
493
475 * Functions accepting bytes data now use the buffer protocol and can accept
494 * Functions accepting bytes data now use the buffer protocol and can accept
476 more types (like ``memoryview`` and ``bytearray``) (#26).
495 more types (like ``memoryview`` and ``bytearray``) (#26).
477 * Add #includes so compilation on OS X and BSDs works (#20).
496 * Add #includes so compilation on OS X and BSDs works (#20).
478 * New ``ZstdDecompressor.stream_reader()`` API to obtain a read-only i/o stream
497 * New ``ZstdDecompressor.stream_reader()`` API to obtain a read-only i/o stream
479 of decompressed data for a source.
498 of decompressed data for a source.
480 * New ``ZstdCompressor.stream_reader()`` API to obtain a read-only i/o stream of
499 * New ``ZstdCompressor.stream_reader()`` API to obtain a read-only i/o stream of
481 compressed data for a source.
500 compressed data for a source.
482 * Renamed ``ZstdDecompressor.read_from()`` to ``ZstdDecompressor.read_to_iter()``.
501 * Renamed ``ZstdDecompressor.read_from()`` to ``ZstdDecompressor.read_to_iter()``.
483 The old name is still available.
502 The old name is still available.
484 * Renamed ``ZstdCompressor.read_from()`` to ``ZstdCompressor.read_to_iter()``.
503 * Renamed ``ZstdCompressor.read_from()`` to ``ZstdCompressor.read_to_iter()``.
485 ``read_from()`` is still available at its old location.
504 ``read_from()`` is still available at its old location.
486 * Introduce the ``zstandard`` module to import and re-export the C or CFFI
505 * Introduce the ``zstandard`` module to import and re-export the C or CFFI
487 *backend* as appropriate. Behavior can be controlled via the
506 *backend* as appropriate. Behavior can be controlled via the
488 ``PYTHON_ZSTANDARD_IMPORT_POLICY`` environment variable. See README for
507 ``PYTHON_ZSTANDARD_IMPORT_POLICY`` environment variable. See README for
489 usage info.
508 usage info.
490 * Vendored version of zstd upgraded to 1.3.4.
509 * Vendored version of zstd upgraded to 1.3.4.
491 * Added module constants ``CONTENTSIZE_UNKNOWN`` and ``CONTENTSIZE_ERROR``.
510 * Added module constants ``CONTENTSIZE_UNKNOWN`` and ``CONTENTSIZE_ERROR``.
492 * Add ``STRATEGY_BTULTRA`` compression strategy constant.
511 * Add ``STRATEGY_BTULTRA`` compression strategy constant.
493 * Switch from deprecated ``ZSTD_getDecompressedSize()`` to
512 * Switch from deprecated ``ZSTD_getDecompressedSize()`` to
494 ``ZSTD_getFrameContentSize()`` replacement.
513 ``ZSTD_getFrameContentSize()`` replacement.
495 * ``ZstdCompressor.compress()`` can now compress empty inputs without requiring
514 * ``ZstdCompressor.compress()`` can now compress empty inputs without requiring
496 special handling.
515 special handling.
497 * ``ZstdCompressor`` and ``ZstdDecompressor`` now have a ``memory_size()``
516 * ``ZstdCompressor`` and ``ZstdDecompressor`` now have a ``memory_size()``
498 method for determining the current memory utilization of the underlying zstd
517 method for determining the current memory utilization of the underlying zstd
499 primitive.
518 primitive.
500 * ``train_dictionary()`` has new arguments and functionality for trying multiple
519 * ``train_dictionary()`` has new arguments and functionality for trying multiple
501 variations of COVER parameters and selecting the best one.
520 variations of COVER parameters and selecting the best one.
502 * Added module constants ``LDM_MINMATCH_MIN``, ``LDM_MINMATCH_MAX``, and
521 * Added module constants ``LDM_MINMATCH_MIN``, ``LDM_MINMATCH_MAX``, and
503 ``LDM_BUCKETSIZELOG_MAX``.
522 ``LDM_BUCKETSIZELOG_MAX``.
504 * Converted all consumers to the zstandard *new advanced API*, which uses
523 * Converted all consumers to the zstandard *new advanced API*, which uses
505 ``ZSTD_compress_generic()``
524 ``ZSTD_compress_generic()``
506 * ``CompressionParameters.__init__`` now accepts several more arguments,
525 * ``CompressionParameters.__init__`` now accepts several more arguments,
507 including support for *long distance matching*.
526 including support for *long distance matching*.
508 * ``ZstdCompressionDict.__init__`` now accepts a ``dict_type`` argument that
527 * ``ZstdCompressionDict.__init__`` now accepts a ``dict_type`` argument that
509 controls how the dictionary should be interpreted. This can be used to
528 controls how the dictionary should be interpreted. This can be used to
510 force the use of *content-only* dictionaries or to require the presence
529 force the use of *content-only* dictionaries or to require the presence
511 of the dictionary magic header.
530 of the dictionary magic header.
512 * ``ZstdCompressionDict.precompute_compress()`` can be used to precompute the
531 * ``ZstdCompressionDict.precompute_compress()`` can be used to precompute the
513 compression dictionary so it can efficiently be used with multiple
532 compression dictionary so it can efficiently be used with multiple
514 ``ZstdCompressor`` instances.
533 ``ZstdCompressor`` instances.
515 * Digested dictionaries are now stored in ``ZstdCompressionDict`` instances,
534 * Digested dictionaries are now stored in ``ZstdCompressionDict`` instances,
516 created automatically on first use, and automatically reused by all
535 created automatically on first use, and automatically reused by all
517 ``ZstdDecompressor`` instances bound to that dictionary.
536 ``ZstdDecompressor`` instances bound to that dictionary.
518 * All meaningful functions now accept keyword arguments.
537 * All meaningful functions now accept keyword arguments.
519 * ``ZstdDecompressor.decompressobj()`` now accepts a ``write_size`` argument
538 * ``ZstdDecompressor.decompressobj()`` now accepts a ``write_size`` argument
520 to control how much work to perform on every decompressor invocation.
539 to control how much work to perform on every decompressor invocation.
521 * ``ZstdCompressor.write_to()`` now exposes a ``tell()``, which exposes the
540 * ``ZstdCompressor.write_to()`` now exposes a ``tell()``, which exposes the
522 total number of bytes written so far.
541 total number of bytes written so far.
523 * ``ZstdDecompressor.stream_reader()`` now supports ``seek()`` when moving
542 * ``ZstdDecompressor.stream_reader()`` now supports ``seek()`` when moving
524 forward in the stream.
543 forward in the stream.
525 * Removed ``TARGETLENGTH_MAX`` constant.
544 * Removed ``TARGETLENGTH_MAX`` constant.
526 * Added ``frame_header_size(data)`` function.
545 * Added ``frame_header_size(data)`` function.
527 * Added ``frame_content_size(data)`` function.
546 * Added ``frame_content_size(data)`` function.
528 * Consumers of ``ZSTD_decompress*`` have been switched to the new *advanced
547 * Consumers of ``ZSTD_decompress*`` have been switched to the new *advanced
529 decompression* API.
548 decompression* API.
530 * ``ZstdCompressor`` and ``ZstdCompressionParams`` can now be constructed with
549 * ``ZstdCompressor`` and ``ZstdCompressionParams`` can now be constructed with
531 negative compression levels.
550 negative compression levels.
532 * ``ZstdDecompressor`` now accepts a ``max_window_size`` argument to limit the
551 * ``ZstdDecompressor`` now accepts a ``max_window_size`` argument to limit the
533 amount of memory required for decompression operations.
552 amount of memory required for decompression operations.
534 * ``FORMAT_ZSTD1`` and ``FORMAT_ZSTD1_MAGICLESS`` constants to be used with
553 * ``FORMAT_ZSTD1`` and ``FORMAT_ZSTD1_MAGICLESS`` constants to be used with
535 the ``format`` compression parameter to control whether the frame magic
554 the ``format`` compression parameter to control whether the frame magic
536 header is written.
555 header is written.
537 * ``ZstdDecompressor`` now accepts a ``format`` argument to control the
556 * ``ZstdDecompressor`` now accepts a ``format`` argument to control the
538 expected frame format.
557 expected frame format.
539 * ``ZstdCompressor`` now has a ``frame_progression()`` method to return
558 * ``ZstdCompressor`` now has a ``frame_progression()`` method to return
540 information about the current compression operation.
559 information about the current compression operation.
541 * Error messages in CFFI no longer have ``b''`` literals.
560 * Error messages in CFFI no longer have ``b''`` literals.
542 * Compiler warnings and underlying overflow issues on 32-bit platforms have been
561 * Compiler warnings and underlying overflow issues on 32-bit platforms have been
543 fixed.
562 fixed.
544 * Builds in CI now build with compiler warnings as errors. This should hopefully
563 * Builds in CI now build with compiler warnings as errors. This should hopefully
545 fix new compiler warnings from being introduced.
564 fix new compiler warnings from being introduced.
546 * Make ``ZstdCompressor(write_content_size=True)`` and
565 * Make ``ZstdCompressor(write_content_size=True)`` and
547 ``CompressionParameters(write_content_size=True)`` the default.
566 ``CompressionParameters(write_content_size=True)`` the default.
548 * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters``.
567 * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters``.
549
568
550 0.8.2 (released 2018-02-22)
569 0.8.2 (released 2018-02-22)
551 ---------------------------
570 ---------------------------
552
571
553 * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40).
572 * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40).
554 * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35).
573 * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35).
555
574
556 0.8.1 (released 2017-04-08)
575 0.8.1 (released 2017-04-08)
557 ---------------------------
576 ---------------------------
558
577
559 * Add #includes so compilation on OS X and BSDs works (#20).
578 * Add #includes so compilation on OS X and BSDs works (#20).
560
579
561 0.8.0 (released 2017-03-08)
580 0.8.0 (released 2017-03-08)
562 ===========================
581 ===========================
563
582
564 * CompressionParameters now has a estimated_compression_context_size() method.
583 * CompressionParameters now has a estimated_compression_context_size() method.
565 zstd.estimate_compression_context_size() is now deprecated and slated for
584 zstd.estimate_compression_context_size() is now deprecated and slated for
566 removal.
585 removal.
567 * Implemented a lot of fuzzing tests.
586 * Implemented a lot of fuzzing tests.
568 * CompressionParameters instances now perform extra validation by calling
587 * CompressionParameters instances now perform extra validation by calling
569 ZSTD_checkCParams() at construction time.
588 ZSTD_checkCParams() at construction time.
570 * multi_compress_to_buffer() API for compressing multiple inputs as a
589 * multi_compress_to_buffer() API for compressing multiple inputs as a
571 single operation, as efficiently as possible.
590 single operation, as efficiently as possible.
572 * ZSTD_CStream instances are now used across multiple operations on
591 * ZSTD_CStream instances are now used across multiple operations on
573 ZstdCompressor instances, resulting in much better performance for
592 ZstdCompressor instances, resulting in much better performance for
574 APIs that do streaming.
593 APIs that do streaming.
575 * ZSTD_DStream instances are now used across multiple operations on
594 * ZSTD_DStream instances are now used across multiple operations on
576 ZstdDecompressor instances, resulting in much better performance for
595 ZstdDecompressor instances, resulting in much better performance for
577 APIs that do streaming.
596 APIs that do streaming.
578 * train_dictionary() now releases the GIL.
597 * train_dictionary() now releases the GIL.
579 * Support for training dictionaries using the COVER algorithm.
598 * Support for training dictionaries using the COVER algorithm.
580 * multi_decompress_to_buffer() API for decompressing multiple frames as a
599 * multi_decompress_to_buffer() API for decompressing multiple frames as a
581 single operation, as efficiently as possible.
600 single operation, as efficiently as possible.
582 * Support for multi-threaded compression.
601 * Support for multi-threaded compression.
583 * Disable deprecation warnings when compiling CFFI module.
602 * Disable deprecation warnings when compiling CFFI module.
584 * Fixed memory leak in train_dictionary().
603 * Fixed memory leak in train_dictionary().
585 * Removed DictParameters type.
604 * Removed DictParameters type.
586 * train_dictionary() now accepts keyword arguments instead of a
605 * train_dictionary() now accepts keyword arguments instead of a
587 DictParameters instance to control dictionary generation.
606 DictParameters instance to control dictionary generation.
588
607
589 0.7.0 (released 2017-02-07)
608 0.7.0 (released 2017-02-07)
590 ===========================
609 ===========================
591
610
592 * Added zstd.get_frame_parameters() to obtain info about a zstd frame.
611 * Added zstd.get_frame_parameters() to obtain info about a zstd frame.
593 * Added ZstdDecompressor.decompress_content_dict_chain() for efficient
612 * Added ZstdDecompressor.decompress_content_dict_chain() for efficient
594 decompression of *content-only dictionary chains*.
613 decompression of *content-only dictionary chains*.
595 * CFFI module fully implemented; all tests run against both C extension and
614 * CFFI module fully implemented; all tests run against both C extension and
596 CFFI implementation.
615 CFFI implementation.
597 * Vendored version of zstd updated to 1.1.3.
616 * Vendored version of zstd updated to 1.1.3.
598 * Use ZstdDecompressor.decompress() now uses ZSTD_createDDict_byReference()
617 * Use ZstdDecompressor.decompress() now uses ZSTD_createDDict_byReference()
599 to avoid extra memory allocation of dict data.
618 to avoid extra memory allocation of dict data.
600 * Add function names to error messages (by using ":name" in PyArg_Parse*
619 * Add function names to error messages (by using ":name" in PyArg_Parse*
601 functions).
620 functions).
602 * Reuse decompression context across operations. Previously, we created a
621 * Reuse decompression context across operations. Previously, we created a
603 new ZSTD_DCtx for each decompress(). This was measured to slow down
622 new ZSTD_DCtx for each decompress(). This was measured to slow down
604 decompression by 40-200MB/s. The API guarantees say ZstdDecompressor
623 decompression by 40-200MB/s. The API guarantees say ZstdDecompressor
605 is not thread safe. So we reuse the ZSTD_DCtx across operations and make
624 is not thread safe. So we reuse the ZSTD_DCtx across operations and make
606 things faster in the process.
625 things faster in the process.
607 * ZstdCompressor.write_to()'s compress() and flush() methods now return number
626 * ZstdCompressor.write_to()'s compress() and flush() methods now return number
608 of bytes written.
627 of bytes written.
609 * ZstdDecompressor.write_to()'s write() method now returns the number of bytes
628 * ZstdDecompressor.write_to()'s write() method now returns the number of bytes
610 written to the underlying output object.
629 written to the underlying output object.
611 * CompressionParameters instances now expose their values as attributes.
630 * CompressionParameters instances now expose their values as attributes.
612 * CompressionParameters instances no longer are subscriptable nor behave
631 * CompressionParameters instances no longer are subscriptable nor behave
613 as tuples (backwards incompatible). Use attributes to obtain values.
632 as tuples (backwards incompatible). Use attributes to obtain values.
614 * DictParameters instances now expose their values as attributes.
633 * DictParameters instances now expose their values as attributes.
615
634
616 0.6.0 (released 2017-01-14)
635 0.6.0 (released 2017-01-14)
617 ===========================
636 ===========================
618
637
619 * Support for legacy zstd protocols (build time opt in feature).
638 * Support for legacy zstd protocols (build time opt in feature).
620 * Automation improvements to test against Python 3.6, latest versions
639 * Automation improvements to test against Python 3.6, latest versions
621 of Tox, more deterministic AppVeyor behavior.
640 of Tox, more deterministic AppVeyor behavior.
622 * CFFI "parser" improved to use a compiler preprocessor instead of rewriting
641 * CFFI "parser" improved to use a compiler preprocessor instead of rewriting
623 source code manually.
642 source code manually.
624 * Vendored version of zstd updated to 1.1.2.
643 * Vendored version of zstd updated to 1.1.2.
625 * Documentation improvements.
644 * Documentation improvements.
626 * Introduce a bench.py script for performing (crude) benchmarks.
645 * Introduce a bench.py script for performing (crude) benchmarks.
627 * ZSTD_CCtx instances are now reused across multiple compress() operations.
646 * ZSTD_CCtx instances are now reused across multiple compress() operations.
628 * ZstdCompressor.write_to() now has a flush() method.
647 * ZstdCompressor.write_to() now has a flush() method.
629 * ZstdCompressor.compressobj()'s flush() method now accepts an argument to
648 * ZstdCompressor.compressobj()'s flush() method now accepts an argument to
630 flush a block (as opposed to ending the stream).
649 flush a block (as opposed to ending the stream).
631 * Disallow compress(b'') when writing content sizes by default (issue #11).
650 * Disallow compress(b'') when writing content sizes by default (issue #11).
632
651
633 0.5.2 (released 2016-11-12)
652 0.5.2 (released 2016-11-12)
634 ===========================
653 ===========================
635
654
636 * more packaging fixes for source distribution
655 * more packaging fixes for source distribution
637
656
638 0.5.1 (released 2016-11-12)
657 0.5.1 (released 2016-11-12)
639 ===========================
658 ===========================
640
659
641 * setup_zstd.py is included in the source distribution
660 * setup_zstd.py is included in the source distribution
642
661
643 0.5.0 (released 2016-11-10)
662 0.5.0 (released 2016-11-10)
644 ===========================
663 ===========================
645
664
646 * Vendored version of zstd updated to 1.1.1.
665 * Vendored version of zstd updated to 1.1.1.
647 * Continuous integration for Python 3.6 and 3.7
666 * Continuous integration for Python 3.6 and 3.7
648 * Continuous integration for Conda
667 * Continuous integration for Conda
649 * Added compression and decompression APIs providing similar interfaces
668 * Added compression and decompression APIs providing similar interfaces
650 to the standard library ``zlib`` and ``bz2`` modules. This allows
669 to the standard library ``zlib`` and ``bz2`` modules. This allows
651 coding to a common interface.
670 coding to a common interface.
652 * ``zstd.__version__` is now defined.
671 * ``zstd.__version__` is now defined.
653 * ``read_from()`` on various APIs now accepts objects implementing the buffer
672 * ``read_from()`` on various APIs now accepts objects implementing the buffer
654 protocol.
673 protocol.
655 * ``read_from()`` has gained a ``skip_bytes`` argument. This allows callers
674 * ``read_from()`` has gained a ``skip_bytes`` argument. This allows callers
656 to pass in an existing buffer with a header without having to create a
675 to pass in an existing buffer with a header without having to create a
657 slice or a new object.
676 slice or a new object.
658 * Implemented ``ZstdCompressionDict.as_bytes()``.
677 * Implemented ``ZstdCompressionDict.as_bytes()``.
659 * Python's memory allocator is now used instead of ``malloc()``.
678 * Python's memory allocator is now used instead of ``malloc()``.
660 * Low-level zstd data structures are reused in more instances, cutting down
679 * Low-level zstd data structures are reused in more instances, cutting down
661 on overhead for certain operations.
680 on overhead for certain operations.
662 * ``distutils`` boilerplate for obtaining an ``Extension`` instance
681 * ``distutils`` boilerplate for obtaining an ``Extension`` instance
663 has now been refactored into a standalone ``setup_zstd.py`` file. This
682 has now been refactored into a standalone ``setup_zstd.py`` file. This
664 allows other projects with ``setup.py`` files to reuse the
683 allows other projects with ``setup.py`` files to reuse the
665 ``distutils`` code for this project without copying code.
684 ``distutils`` code for this project without copying code.
666 * The monolithic ``zstd.c`` file has been split into a header file defining
685 * The monolithic ``zstd.c`` file has been split into a header file defining
667 types and separate ``.c`` source files for the implementation.
686 types and separate ``.c`` source files for the implementation.
668
687
669 Older History
688 Older History
670 =============
689 =============
671
690
672 2016-08-31 - Zstandard 1.0.0 is released and Gregory starts hacking on a
691 2016-08-31 - Zstandard 1.0.0 is released and Gregory starts hacking on a
673 Python extension for use by the Mercurial project. A very hacky prototype
692 Python extension for use by the Mercurial project. A very hacky prototype
674 is sent to the mercurial-devel list for RFC.
693 is sent to the mercurial-devel list for RFC.
675
694
676 2016-09-03 - Most functionality from Zstandard C API implemented. Source
695 2016-09-03 - Most functionality from Zstandard C API implemented. Source
677 code published on https://github.com/indygreg/python-zstandard. Travis-CI
696 code published on https://github.com/indygreg/python-zstandard. Travis-CI
678 automation configured. 0.0.1 release on PyPI.
697 automation configured. 0.0.1 release on PyPI.
679
698
680 2016-09-05 - After the API was rounded out a bit and support for Python
699 2016-09-05 - After the API was rounded out a bit and support for Python
681 2.6 and 2.7 was added, version 0.1 was released to PyPI.
700 2.6 and 2.7 was added, version 0.1 was released to PyPI.
682
701
683 2016-09-05 - After the compressor and decompressor APIs were changed, 0.2
702 2016-09-05 - After the compressor and decompressor APIs were changed, 0.2
684 was released to PyPI.
703 was released to PyPI.
685
704
686 2016-09-10 - 0.3 is released with a bunch of new features. ZstdCompressor
705 2016-09-10 - 0.3 is released with a bunch of new features. ZstdCompressor
687 now accepts arguments controlling frame parameters. The source size can now
706 now accepts arguments controlling frame parameters. The source size can now
688 be declared when performing streaming compression. ZstdDecompressor.decompress()
707 be declared when performing streaming compression. ZstdDecompressor.decompress()
689 is implemented. Compression dictionaries are now cached when using the simple
708 is implemented. Compression dictionaries are now cached when using the simple
690 compression and decompression APIs. Memory size APIs added.
709 compression and decompression APIs. Memory size APIs added.
691 ZstdCompressor.read_from() and ZstdDecompressor.read_from() have been
710 ZstdCompressor.read_from() and ZstdDecompressor.read_from() have been
692 implemented. This rounds out the major compression/decompression APIs planned
711 implemented. This rounds out the major compression/decompression APIs planned
693 by the author.
712 by the author.
694
713
695 2016-10-02 - 0.3.3 is released with a bug fix for read_from not fully
714 2016-10-02 - 0.3.3 is released with a bug fix for read_from not fully
696 decoding a zstd frame (issue #2).
715 decoding a zstd frame (issue #2).
697
716
698 2016-10-02 - 0.4.0 is released with zstd 1.1.0, support for custom read and
717 2016-10-02 - 0.4.0 is released with zstd 1.1.0, support for custom read and
699 write buffer sizes, and a few bug fixes involving failure to read/write
718 write buffer sizes, and a few bug fixes involving failure to read/write
700 all data when buffer sizes were too small to hold remaining data.
719 all data when buffer sizes were too small to hold remaining data.
701
720
702 2016-11-10 - 0.5.0 is released with zstd 1.1.1 and other enhancements.
721 2016-11-10 - 0.5.0 is released with zstd 1.1.1 and other enhancements.
@@ -1,1602 +1,1602 b''
1 ================
1 ================
2 python-zstandard
2 python-zstandard
3 ================
3 ================
4
4
5 This project provides Python bindings for interfacing with the
5 This project provides Python bindings for interfacing with the
6 `Zstandard <http://www.zstd.net>`_ compression library. A C extension
6 `Zstandard <http://www.zstd.net>`_ compression library. A C extension
7 and CFFI interface are provided.
7 and CFFI interface are provided.
8
8
9 The primary goal of the project is to provide a rich interface to the
9 The primary goal of the project is to provide a rich interface to the
10 underlying C API through a Pythonic interface while not sacrificing
10 underlying C API through a Pythonic interface while not sacrificing
11 performance. This means exposing most of the features and flexibility
11 performance. This means exposing most of the features and flexibility
12 of the C API while not sacrificing usability or safety that Python provides.
12 of the C API while not sacrificing usability or safety that Python provides.
13
13
14 The canonical home for this project lives in a Mercurial repository run by
14 The canonical home for this project lives in a Mercurial repository run by
15 the author. For convenience, that repository is frequently synchronized to
15 the author. For convenience, that repository is frequently synchronized to
16 https://github.com/indygreg/python-zstandard.
16 https://github.com/indygreg/python-zstandard.
17
17
18 | |ci-status|
18 | |ci-status|
19
19
20 Requirements
20 Requirements
21 ============
21 ============
22
22
23 This extension is designed to run with Python 2.7, 3.4, 3.5, 3.6, and 3.7
23 This extension is designed to run with Python 2.7, 3.5, 3.6, 3.7, and 3.8
24 on common platforms (Linux, Windows, and OS X). On PyPy (both PyPy2 and PyPy3) we support version 6.0.0 and above.
24 on common platforms (Linux, Windows, and OS X). On PyPy (both PyPy2 and PyPy3) we support version 6.0.0 and above.
25 x86 and x86_64 are well-tested on Windows. Only x86_64 is well-tested on Linux and macOS.
25 x86 and x86_64 are well-tested on Windows. Only x86_64 is well-tested on Linux and macOS.
26
26
27 Installing
27 Installing
28 ==========
28 ==========
29
29
30 This package is uploaded to PyPI at https://pypi.python.org/pypi/zstandard.
30 This package is uploaded to PyPI at https://pypi.python.org/pypi/zstandard.
31 So, to install this package::
31 So, to install this package::
32
32
33 $ pip install zstandard
33 $ pip install zstandard
34
34
35 Binary wheels are made available for some platforms. If you need to
35 Binary wheels are made available for some platforms. If you need to
36 install from a source distribution, all you should need is a working C
36 install from a source distribution, all you should need is a working C
37 compiler and the Python development headers/libraries. On many Linux
37 compiler and the Python development headers/libraries. On many Linux
38 distributions, you can install a ``python-dev`` or ``python-devel``
38 distributions, you can install a ``python-dev`` or ``python-devel``
39 package to provide these dependencies.
39 package to provide these dependencies.
40
40
41 Packages are also uploaded to Anaconda Cloud at
41 Packages are also uploaded to Anaconda Cloud at
42 https://anaconda.org/indygreg/zstandard. See that URL for how to install
42 https://anaconda.org/indygreg/zstandard. See that URL for how to install
43 this package with ``conda``.
43 this package with ``conda``.
44
44
45 Performance
45 Performance
46 ===========
46 ===========
47
47
48 zstandard is a highly tunable compression algorithm. In its default settings
48 zstandard is a highly tunable compression algorithm. In its default settings
49 (compression level 3), it will be faster at compression and decompression and
49 (compression level 3), it will be faster at compression and decompression and
50 will have better compression ratios than zlib on most data sets. When tuned
50 will have better compression ratios than zlib on most data sets. When tuned
51 for speed, it approaches lz4's speed and ratios. When tuned for compression
51 for speed, it approaches lz4's speed and ratios. When tuned for compression
52 ratio, it approaches lzma ratios and compression speed, but decompression
52 ratio, it approaches lzma ratios and compression speed, but decompression
53 speed is much faster. See the official zstandard documentation for more.
53 speed is much faster. See the official zstandard documentation for more.
54
54
55 zstandard and this library support multi-threaded compression. There is a
55 zstandard and this library support multi-threaded compression. There is a
56 mechanism to compress large inputs using multiple threads.
56 mechanism to compress large inputs using multiple threads.
57
57
58 The performance of this library is usually very similar to what the zstandard
58 The performance of this library is usually very similar to what the zstandard
59 C API can deliver. Overhead in this library is due to general Python overhead
59 C API can deliver. Overhead in this library is due to general Python overhead
60 and can't easily be avoided by *any* zstandard Python binding. This library
60 and can't easily be avoided by *any* zstandard Python binding. This library
61 exposes multiple APIs for performing compression and decompression so callers
61 exposes multiple APIs for performing compression and decompression so callers
62 can pick an API suitable for their need. Contrast with the compression
62 can pick an API suitable for their need. Contrast with the compression
63 modules in Python's standard library (like ``zlib``), which only offer limited
63 modules in Python's standard library (like ``zlib``), which only offer limited
64 mechanisms for performing operations. The API flexibility means consumers can
64 mechanisms for performing operations. The API flexibility means consumers can
65 choose to use APIs that facilitate zero copying or minimize Python object
65 choose to use APIs that facilitate zero copying or minimize Python object
66 creation and garbage collection overhead.
66 creation and garbage collection overhead.
67
67
68 This library is capable of single-threaded throughputs well over 1 GB/s. For
68 This library is capable of single-threaded throughputs well over 1 GB/s. For
69 exact numbers, measure yourself. The source code repository has a ``bench.py``
69 exact numbers, measure yourself. The source code repository has a ``bench.py``
70 script that can be used to measure things.
70 script that can be used to measure things.
71
71
72 API
72 API
73 ===
73 ===
74
74
75 To interface with Zstandard, simply import the ``zstandard`` module::
75 To interface with Zstandard, simply import the ``zstandard`` module::
76
76
77 import zstandard
77 import zstandard
78
78
79 It is a popular convention to alias the module as a different name for
79 It is a popular convention to alias the module as a different name for
80 brevity::
80 brevity::
81
81
82 import zstandard as zstd
82 import zstandard as zstd
83
83
84 This module attempts to import and use either the C extension or CFFI
84 This module attempts to import and use either the C extension or CFFI
85 implementation. On Python platforms known to support C extensions (like
85 implementation. On Python platforms known to support C extensions (like
86 CPython), it raises an ImportError if the C extension cannot be imported.
86 CPython), it raises an ImportError if the C extension cannot be imported.
87 On Python platforms known to not support C extensions (like PyPy), it only
87 On Python platforms known to not support C extensions (like PyPy), it only
88 attempts to import the CFFI implementation and raises ImportError if that
88 attempts to import the CFFI implementation and raises ImportError if that
89 can't be done. On other platforms, it first tries to import the C extension
89 can't be done. On other platforms, it first tries to import the C extension
90 then falls back to CFFI if that fails and raises ImportError if CFFI fails.
90 then falls back to CFFI if that fails and raises ImportError if CFFI fails.
91
91
92 To change the module import behavior, a ``PYTHON_ZSTANDARD_IMPORT_POLICY``
92 To change the module import behavior, a ``PYTHON_ZSTANDARD_IMPORT_POLICY``
93 environment variable can be set. The following values are accepted:
93 environment variable can be set. The following values are accepted:
94
94
95 default
95 default
96 The behavior described above.
96 The behavior described above.
97 cffi_fallback
97 cffi_fallback
98 Always try to import the C extension then fall back to CFFI if that
98 Always try to import the C extension then fall back to CFFI if that
99 fails.
99 fails.
100 cext
100 cext
101 Only attempt to import the C extension.
101 Only attempt to import the C extension.
102 cffi
102 cffi
103 Only attempt to import the CFFI implementation.
103 Only attempt to import the CFFI implementation.
104
104
105 In addition, the ``zstandard`` module exports a ``backend`` attribute
105 In addition, the ``zstandard`` module exports a ``backend`` attribute
106 containing the string name of the backend being used. It will be one
106 containing the string name of the backend being used. It will be one
107 of ``cext`` or ``cffi`` (for *C extension* and *cffi*, respectively).
107 of ``cext`` or ``cffi`` (for *C extension* and *cffi*, respectively).
108
108
109 The types, functions, and attributes exposed by the ``zstandard`` module
109 The types, functions, and attributes exposed by the ``zstandard`` module
110 are documented in the sections below.
110 are documented in the sections below.
111
111
112 .. note::
112 .. note::
113
113
114 The documentation in this section makes references to various zstd
114 The documentation in this section makes references to various zstd
115 concepts and functionality. The source repository contains a
115 concepts and functionality. The source repository contains a
116 ``docs/concepts.rst`` file explaining these in more detail.
116 ``docs/concepts.rst`` file explaining these in more detail.
117
117
118 ZstdCompressor
118 ZstdCompressor
119 --------------
119 --------------
120
120
121 The ``ZstdCompressor`` class provides an interface for performing
121 The ``ZstdCompressor`` class provides an interface for performing
122 compression operations. Each instance is essentially a wrapper around a
122 compression operations. Each instance is essentially a wrapper around a
123 ``ZSTD_CCtx`` from the C API.
123 ``ZSTD_CCtx`` from the C API.
124
124
125 Each instance is associated with parameters that control compression
125 Each instance is associated with parameters that control compression
126 behavior. These come from the following named arguments (all optional):
126 behavior. These come from the following named arguments (all optional):
127
127
128 level
128 level
129 Integer compression level. Valid values are between 1 and 22.
129 Integer compression level. Valid values are between 1 and 22.
130 dict_data
130 dict_data
131 Compression dictionary to use.
131 Compression dictionary to use.
132
132
133 Note: When using dictionary data and ``compress()`` is called multiple
133 Note: When using dictionary data and ``compress()`` is called multiple
134 times, the ``ZstdCompressionParameters`` derived from an integer
134 times, the ``ZstdCompressionParameters`` derived from an integer
135 compression ``level`` and the first compressed data's size will be reused
135 compression ``level`` and the first compressed data's size will be reused
136 for all subsequent operations. This may not be desirable if source data
136 for all subsequent operations. This may not be desirable if source data
137 size varies significantly.
137 size varies significantly.
138 compression_params
138 compression_params
139 A ``ZstdCompressionParameters`` instance defining compression settings.
139 A ``ZstdCompressionParameters`` instance defining compression settings.
140 write_checksum
140 write_checksum
141 Whether a 4 byte checksum should be written with the compressed data.
141 Whether a 4 byte checksum should be written with the compressed data.
142 Defaults to False. If True, the decompressor can verify that decompressed
142 Defaults to False. If True, the decompressor can verify that decompressed
143 data matches the original input data.
143 data matches the original input data.
144 write_content_size
144 write_content_size
145 Whether the size of the uncompressed data will be written into the
145 Whether the size of the uncompressed data will be written into the
146 header of compressed data. Defaults to True. The data will only be
146 header of compressed data. Defaults to True. The data will only be
147 written if the compressor knows the size of the input data. This is
147 written if the compressor knows the size of the input data. This is
148 often not true for streaming compression.
148 often not true for streaming compression.
149 write_dict_id
149 write_dict_id
150 Whether to write the dictionary ID into the compressed data.
150 Whether to write the dictionary ID into the compressed data.
151 Defaults to True. The dictionary ID is only written if a dictionary
151 Defaults to True. The dictionary ID is only written if a dictionary
152 is being used.
152 is being used.
153 threads
153 threads
154 Enables and sets the number of threads to use for multi-threaded compression
154 Enables and sets the number of threads to use for multi-threaded compression
155 operations. Defaults to 0, which means to use single-threaded compression.
155 operations. Defaults to 0, which means to use single-threaded compression.
156 Negative values will resolve to the number of logical CPUs in the system.
156 Negative values will resolve to the number of logical CPUs in the system.
157 Read below for more info on multi-threaded compression. This argument only
157 Read below for more info on multi-threaded compression. This argument only
158 controls thread count for operations that operate on individual pieces of
158 controls thread count for operations that operate on individual pieces of
159 data. APIs that spawn multiple threads for working on multiple pieces of
159 data. APIs that spawn multiple threads for working on multiple pieces of
160 data have their own ``threads`` argument.
160 data have their own ``threads`` argument.
161
161
162 ``compression_params`` is mutually exclusive with ``level``, ``write_checksum``,
162 ``compression_params`` is mutually exclusive with ``level``, ``write_checksum``,
163 ``write_content_size``, ``write_dict_id``, and ``threads``.
163 ``write_content_size``, ``write_dict_id``, and ``threads``.
164
164
165 Unless specified otherwise, assume that no two methods of ``ZstdCompressor``
165 Unless specified otherwise, assume that no two methods of ``ZstdCompressor``
166 instances can be called from multiple Python threads simultaneously. In other
166 instances can be called from multiple Python threads simultaneously. In other
167 words, assume instances are not thread safe unless stated otherwise.
167 words, assume instances are not thread safe unless stated otherwise.
168
168
169 Utility Methods
169 Utility Methods
170 ^^^^^^^^^^^^^^^
170 ^^^^^^^^^^^^^^^
171
171
172 ``frame_progression()`` returns a 3-tuple containing the number of bytes
172 ``frame_progression()`` returns a 3-tuple containing the number of bytes
173 ingested, consumed, and produced by the current compression operation.
173 ingested, consumed, and produced by the current compression operation.
174
174
175 ``memory_size()`` obtains the memory utilization of the underlying zstd
175 ``memory_size()`` obtains the memory utilization of the underlying zstd
176 compression context, in bytes.::
176 compression context, in bytes.::
177
177
178 cctx = zstd.ZstdCompressor()
178 cctx = zstd.ZstdCompressor()
179 memory = cctx.memory_size()
179 memory = cctx.memory_size()
180
180
181 Simple API
181 Simple API
182 ^^^^^^^^^^
182 ^^^^^^^^^^
183
183
184 ``compress(data)`` compresses and returns data as a one-shot operation.::
184 ``compress(data)`` compresses and returns data as a one-shot operation.::
185
185
186 cctx = zstd.ZstdCompressor()
186 cctx = zstd.ZstdCompressor()
187 compressed = cctx.compress(b'data to compress')
187 compressed = cctx.compress(b'data to compress')
188
188
189 The ``data`` argument can be any object that implements the *buffer protocol*.
189 The ``data`` argument can be any object that implements the *buffer protocol*.
190
190
191 Stream Reader API
191 Stream Reader API
192 ^^^^^^^^^^^^^^^^^
192 ^^^^^^^^^^^^^^^^^
193
193
194 ``stream_reader(source)`` can be used to obtain an object conforming to the
194 ``stream_reader(source)`` can be used to obtain an object conforming to the
195 ``io.RawIOBase`` interface for reading compressed output as a stream::
195 ``io.RawIOBase`` interface for reading compressed output as a stream::
196
196
197 with open(path, 'rb') as fh:
197 with open(path, 'rb') as fh:
198 cctx = zstd.ZstdCompressor()
198 cctx = zstd.ZstdCompressor()
199 reader = cctx.stream_reader(fh)
199 reader = cctx.stream_reader(fh)
200 while True:
200 while True:
201 chunk = reader.read(16384)
201 chunk = reader.read(16384)
202 if not chunk:
202 if not chunk:
203 break
203 break
204
204
205 # Do something with compressed chunk.
205 # Do something with compressed chunk.
206
206
207 Instances can also be used as context managers::
207 Instances can also be used as context managers::
208
208
209 with open(path, 'rb') as fh:
209 with open(path, 'rb') as fh:
210 with cctx.stream_reader(fh) as reader:
210 with cctx.stream_reader(fh) as reader:
211 while True:
211 while True:
212 chunk = reader.read(16384)
212 chunk = reader.read(16384)
213 if not chunk:
213 if not chunk:
214 break
214 break
215
215
216 # Do something with compressed chunk.
216 # Do something with compressed chunk.
217
217
218 When the context manager exits or ``close()`` is called, the stream is closed,
218 When the context manager exits or ``close()`` is called, the stream is closed,
219 underlying resources are released, and future operations against the compression
219 underlying resources are released, and future operations against the compression
220 stream will fail.
220 stream will fail.
221
221
222 The ``source`` argument to ``stream_reader()`` can be any object with a
222 The ``source`` argument to ``stream_reader()`` can be any object with a
223 ``read(size)`` method or any object implementing the *buffer protocol*.
223 ``read(size)`` method or any object implementing the *buffer protocol*.
224
224
225 ``stream_reader()`` accepts a ``size`` argument specifying how large the input
225 ``stream_reader()`` accepts a ``size`` argument specifying how large the input
226 stream is. This is used to adjust compression parameters so they are
226 stream is. This is used to adjust compression parameters so they are
227 tailored to the source size.::
227 tailored to the source size.::
228
228
229 with open(path, 'rb') as fh:
229 with open(path, 'rb') as fh:
230 cctx = zstd.ZstdCompressor()
230 cctx = zstd.ZstdCompressor()
231 with cctx.stream_reader(fh, size=os.stat(path).st_size) as reader:
231 with cctx.stream_reader(fh, size=os.stat(path).st_size) as reader:
232 ...
232 ...
233
233
234 If the ``source`` is a stream, you can specify how large ``read()`` requests
234 If the ``source`` is a stream, you can specify how large ``read()`` requests
235 to that stream should be via the ``read_size`` argument. It defaults to
235 to that stream should be via the ``read_size`` argument. It defaults to
236 ``zstandard.COMPRESSION_RECOMMENDED_INPUT_SIZE``.::
236 ``zstandard.COMPRESSION_RECOMMENDED_INPUT_SIZE``.::
237
237
238 with open(path, 'rb') as fh:
238 with open(path, 'rb') as fh:
239 cctx = zstd.ZstdCompressor()
239 cctx = zstd.ZstdCompressor()
240 # Will perform fh.read(8192) when obtaining data to feed into the
240 # Will perform fh.read(8192) when obtaining data to feed into the
241 # compressor.
241 # compressor.
242 with cctx.stream_reader(fh, read_size=8192) as reader:
242 with cctx.stream_reader(fh, read_size=8192) as reader:
243 ...
243 ...
244
244
245 The stream returned by ``stream_reader()`` is neither writable nor seekable
245 The stream returned by ``stream_reader()`` is neither writable nor seekable
246 (even if the underlying source is seekable). ``readline()`` and
246 (even if the underlying source is seekable). ``readline()`` and
247 ``readlines()`` are not implemented because they don't make sense for
247 ``readlines()`` are not implemented because they don't make sense for
248 compressed data. ``tell()`` returns the number of compressed bytes
248 compressed data. ``tell()`` returns the number of compressed bytes
249 emitted so far.
249 emitted so far.
250
250
251 Streaming Input API
251 Streaming Input API
252 ^^^^^^^^^^^^^^^^^^^
252 ^^^^^^^^^^^^^^^^^^^
253
253
254 ``stream_writer(fh)`` allows you to *stream* data into a compressor.
254 ``stream_writer(fh)`` allows you to *stream* data into a compressor.
255
255
256 Returned instances implement the ``io.RawIOBase`` interface. Only methods
256 Returned instances implement the ``io.RawIOBase`` interface. Only methods
257 that involve writing will do useful things.
257 that involve writing will do useful things.
258
258
259 The argument to ``stream_writer()`` must have a ``write(data)`` method. As
259 The argument to ``stream_writer()`` must have a ``write(data)`` method. As
260 compressed data is available, ``write()`` will be called with the compressed
260 compressed data is available, ``write()`` will be called with the compressed
261 data as its argument. Many common Python types implement ``write()``, including
261 data as its argument. Many common Python types implement ``write()``, including
262 open file handles and ``io.BytesIO``.
262 open file handles and ``io.BytesIO``.
263
263
264 The ``write(data)`` method is used to feed data into the compressor.
264 The ``write(data)`` method is used to feed data into the compressor.
265
265
266 The ``flush([flush_mode=FLUSH_BLOCK])`` method can be called to evict whatever
266 The ``flush([flush_mode=FLUSH_BLOCK])`` method can be called to evict whatever
267 data remains within the compressor's internal state into the output object. This
267 data remains within the compressor's internal state into the output object. This
268 may result in 0 or more ``write()`` calls to the output object. This method
268 may result in 0 or more ``write()`` calls to the output object. This method
269 accepts an optional ``flush_mode`` argument to control the flushing behavior.
269 accepts an optional ``flush_mode`` argument to control the flushing behavior.
270 Its value can be any of the ``FLUSH_*`` constants.
270 Its value can be any of the ``FLUSH_*`` constants.
271
271
272 Both ``write()`` and ``flush()`` return the number of bytes written to the
272 Both ``write()`` and ``flush()`` return the number of bytes written to the
273 object's ``write()``. In many cases, small inputs do not accumulate enough
273 object's ``write()``. In many cases, small inputs do not accumulate enough
274 data to cause a write and ``write()`` will return ``0``.
274 data to cause a write and ``write()`` will return ``0``.
275
275
276 Calling ``close()`` will mark the stream as closed and subsequent I/O
276 Calling ``close()`` will mark the stream as closed and subsequent I/O
277 operations will raise ``ValueError`` (per the documented behavior of
277 operations will raise ``ValueError`` (per the documented behavior of
278 ``io.RawIOBase``). ``close()`` will also call ``close()`` on the underlying
278 ``io.RawIOBase``). ``close()`` will also call ``close()`` on the underlying
279 stream if such a method exists.
279 stream if such a method exists.
280
280
281 Typically usage is as follows::
281 Typically usage is as follows::
282
282
283 cctx = zstd.ZstdCompressor(level=10)
283 cctx = zstd.ZstdCompressor(level=10)
284 compressor = cctx.stream_writer(fh)
284 compressor = cctx.stream_writer(fh)
285
285
286 compressor.write(b'chunk 0\n')
286 compressor.write(b'chunk 0\n')
287 compressor.write(b'chunk 1\n')
287 compressor.write(b'chunk 1\n')
288 compressor.flush()
288 compressor.flush()
289 # Receiver will be able to decode ``chunk 0\nchunk 1\n`` at this point.
289 # Receiver will be able to decode ``chunk 0\nchunk 1\n`` at this point.
290 # Receiver is also expecting more data in the zstd *frame*.
290 # Receiver is also expecting more data in the zstd *frame*.
291
291
292 compressor.write(b'chunk 2\n')
292 compressor.write(b'chunk 2\n')
293 compressor.flush(zstd.FLUSH_FRAME)
293 compressor.flush(zstd.FLUSH_FRAME)
294 # Receiver will be able to decode ``chunk 0\nchunk 1\nchunk 2``.
294 # Receiver will be able to decode ``chunk 0\nchunk 1\nchunk 2``.
295 # Receiver is expecting no more data, as the zstd frame is closed.
295 # Receiver is expecting no more data, as the zstd frame is closed.
296 # Any future calls to ``write()`` at this point will construct a new
296 # Any future calls to ``write()`` at this point will construct a new
297 # zstd frame.
297 # zstd frame.
298
298
299 Instances can be used as context managers. Exiting the context manager is
299 Instances can be used as context managers. Exiting the context manager is
300 the equivalent of calling ``close()``, which is equivalent to calling
300 the equivalent of calling ``close()``, which is equivalent to calling
301 ``flush(zstd.FLUSH_FRAME)``::
301 ``flush(zstd.FLUSH_FRAME)``::
302
302
303 cctx = zstd.ZstdCompressor(level=10)
303 cctx = zstd.ZstdCompressor(level=10)
304 with cctx.stream_writer(fh) as compressor:
304 with cctx.stream_writer(fh) as compressor:
305 compressor.write(b'chunk 0')
305 compressor.write(b'chunk 0')
306 compressor.write(b'chunk 1')
306 compressor.write(b'chunk 1')
307 ...
307 ...
308
308
309 .. important::
309 .. important::
310
310
311 If ``flush(FLUSH_FRAME)`` is not called, emitted data doesn't constitute
311 If ``flush(FLUSH_FRAME)`` is not called, emitted data doesn't constitute
312 a full zstd *frame* and consumers of this data may complain about malformed
312 a full zstd *frame* and consumers of this data may complain about malformed
313 input. It is recommended to use instances as a context manager to ensure
313 input. It is recommended to use instances as a context manager to ensure
314 *frames* are properly finished.
314 *frames* are properly finished.
315
315
316 If the size of the data being fed to this streaming compressor is known,
316 If the size of the data being fed to this streaming compressor is known,
317 you can declare it before compression begins::
317 you can declare it before compression begins::
318
318
319 cctx = zstd.ZstdCompressor()
319 cctx = zstd.ZstdCompressor()
320 with cctx.stream_writer(fh, size=data_len) as compressor:
320 with cctx.stream_writer(fh, size=data_len) as compressor:
321 compressor.write(chunk0)
321 compressor.write(chunk0)
322 compressor.write(chunk1)
322 compressor.write(chunk1)
323 ...
323 ...
324
324
325 Declaring the size of the source data allows compression parameters to
325 Declaring the size of the source data allows compression parameters to
326 be tuned. And if ``write_content_size`` is used, it also results in the
326 be tuned. And if ``write_content_size`` is used, it also results in the
327 content size being written into the frame header of the output data.
327 content size being written into the frame header of the output data.
328
328
329 The size of chunks being ``write()`` to the destination can be specified::
329 The size of chunks being ``write()`` to the destination can be specified::
330
330
331 cctx = zstd.ZstdCompressor()
331 cctx = zstd.ZstdCompressor()
332 with cctx.stream_writer(fh, write_size=32768) as compressor:
332 with cctx.stream_writer(fh, write_size=32768) as compressor:
333 ...
333 ...
334
334
335 To see how much memory is being used by the streaming compressor::
335 To see how much memory is being used by the streaming compressor::
336
336
337 cctx = zstd.ZstdCompressor()
337 cctx = zstd.ZstdCompressor()
338 with cctx.stream_writer(fh) as compressor:
338 with cctx.stream_writer(fh) as compressor:
339 ...
339 ...
340 byte_size = compressor.memory_size()
340 byte_size = compressor.memory_size()
341
341
342 Thte total number of bytes written so far are exposed via ``tell()``::
342 Thte total number of bytes written so far are exposed via ``tell()``::
343
343
344 cctx = zstd.ZstdCompressor()
344 cctx = zstd.ZstdCompressor()
345 with cctx.stream_writer(fh) as compressor:
345 with cctx.stream_writer(fh) as compressor:
346 ...
346 ...
347 total_written = compressor.tell()
347 total_written = compressor.tell()
348
348
349 ``stream_writer()`` accepts a ``write_return_read`` boolean argument to control
349 ``stream_writer()`` accepts a ``write_return_read`` boolean argument to control
350 the return value of ``write()``. When ``False`` (the default), ``write()`` returns
350 the return value of ``write()``. When ``False`` (the default), ``write()`` returns
351 the number of bytes that were ``write()``en to the underlying object. When
351 the number of bytes that were ``write()``en to the underlying object. When
352 ``True``, ``write()`` returns the number of bytes read from the input that
352 ``True``, ``write()`` returns the number of bytes read from the input that
353 were subsequently written to the compressor. ``True`` is the *proper* behavior
353 were subsequently written to the compressor. ``True`` is the *proper* behavior
354 for ``write()`` as specified by the ``io.RawIOBase`` interface and will become
354 for ``write()`` as specified by the ``io.RawIOBase`` interface and will become
355 the default value in a future release.
355 the default value in a future release.
356
356
357 Streaming Output API
357 Streaming Output API
358 ^^^^^^^^^^^^^^^^^^^^
358 ^^^^^^^^^^^^^^^^^^^^
359
359
360 ``read_to_iter(reader)`` provides a mechanism to stream data out of a
360 ``read_to_iter(reader)`` provides a mechanism to stream data out of a
361 compressor as an iterator of data chunks.::
361 compressor as an iterator of data chunks.::
362
362
363 cctx = zstd.ZstdCompressor()
363 cctx = zstd.ZstdCompressor()
364 for chunk in cctx.read_to_iter(fh):
364 for chunk in cctx.read_to_iter(fh):
365 # Do something with emitted data.
365 # Do something with emitted data.
366
366
367 ``read_to_iter()`` accepts an object that has a ``read(size)`` method or
367 ``read_to_iter()`` accepts an object that has a ``read(size)`` method or
368 conforms to the buffer protocol.
368 conforms to the buffer protocol.
369
369
370 Uncompressed data is fetched from the source either by calling ``read(size)``
370 Uncompressed data is fetched from the source either by calling ``read(size)``
371 or by fetching a slice of data from the object directly (in the case where
371 or by fetching a slice of data from the object directly (in the case where
372 the buffer protocol is being used). The returned iterator consists of chunks
372 the buffer protocol is being used). The returned iterator consists of chunks
373 of compressed data.
373 of compressed data.
374
374
375 If reading from the source via ``read()``, ``read()`` will be called until
375 If reading from the source via ``read()``, ``read()`` will be called until
376 it raises or returns an empty bytes (``b''``). It is perfectly valid for
376 it raises or returns an empty bytes (``b''``). It is perfectly valid for
377 the source to deliver fewer bytes than were what requested by ``read(size)``.
377 the source to deliver fewer bytes than were what requested by ``read(size)``.
378
378
379 Like ``stream_writer()``, ``read_to_iter()`` also accepts a ``size`` argument
379 Like ``stream_writer()``, ``read_to_iter()`` also accepts a ``size`` argument
380 declaring the size of the input stream::
380 declaring the size of the input stream::
381
381
382 cctx = zstd.ZstdCompressor()
382 cctx = zstd.ZstdCompressor()
383 for chunk in cctx.read_to_iter(fh, size=some_int):
383 for chunk in cctx.read_to_iter(fh, size=some_int):
384 pass
384 pass
385
385
386 You can also control the size that data is ``read()`` from the source and
386 You can also control the size that data is ``read()`` from the source and
387 the ideal size of output chunks::
387 the ideal size of output chunks::
388
388
389 cctx = zstd.ZstdCompressor()
389 cctx = zstd.ZstdCompressor()
390 for chunk in cctx.read_to_iter(fh, read_size=16384, write_size=8192):
390 for chunk in cctx.read_to_iter(fh, read_size=16384, write_size=8192):
391 pass
391 pass
392
392
393 Unlike ``stream_writer()``, ``read_to_iter()`` does not give direct control
393 Unlike ``stream_writer()``, ``read_to_iter()`` does not give direct control
394 over the sizes of chunks fed into the compressor. Instead, chunk sizes will
394 over the sizes of chunks fed into the compressor. Instead, chunk sizes will
395 be whatever the object being read from delivers. These will often be of a
395 be whatever the object being read from delivers. These will often be of a
396 uniform size.
396 uniform size.
397
397
398 Stream Copying API
398 Stream Copying API
399 ^^^^^^^^^^^^^^^^^^
399 ^^^^^^^^^^^^^^^^^^
400
400
401 ``copy_stream(ifh, ofh)`` can be used to copy data between 2 streams while
401 ``copy_stream(ifh, ofh)`` can be used to copy data between 2 streams while
402 compressing it.::
402 compressing it.::
403
403
404 cctx = zstd.ZstdCompressor()
404 cctx = zstd.ZstdCompressor()
405 cctx.copy_stream(ifh, ofh)
405 cctx.copy_stream(ifh, ofh)
406
406
407 For example, say you wish to compress a file::
407 For example, say you wish to compress a file::
408
408
409 cctx = zstd.ZstdCompressor()
409 cctx = zstd.ZstdCompressor()
410 with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
410 with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
411 cctx.copy_stream(ifh, ofh)
411 cctx.copy_stream(ifh, ofh)
412
412
413 It is also possible to declare the size of the source stream::
413 It is also possible to declare the size of the source stream::
414
414
415 cctx = zstd.ZstdCompressor()
415 cctx = zstd.ZstdCompressor()
416 cctx.copy_stream(ifh, ofh, size=len_of_input)
416 cctx.copy_stream(ifh, ofh, size=len_of_input)
417
417
418 You can also specify how large the chunks that are ``read()`` and ``write()``
418 You can also specify how large the chunks that are ``read()`` and ``write()``
419 from and to the streams::
419 from and to the streams::
420
420
421 cctx = zstd.ZstdCompressor()
421 cctx = zstd.ZstdCompressor()
422 cctx.copy_stream(ifh, ofh, read_size=32768, write_size=16384)
422 cctx.copy_stream(ifh, ofh, read_size=32768, write_size=16384)
423
423
424 The stream copier returns a 2-tuple of bytes read and written::
424 The stream copier returns a 2-tuple of bytes read and written::
425
425
426 cctx = zstd.ZstdCompressor()
426 cctx = zstd.ZstdCompressor()
427 read_count, write_count = cctx.copy_stream(ifh, ofh)
427 read_count, write_count = cctx.copy_stream(ifh, ofh)
428
428
429 Compressor API
429 Compressor API
430 ^^^^^^^^^^^^^^
430 ^^^^^^^^^^^^^^
431
431
432 ``compressobj()`` returns an object that exposes ``compress(data)`` and
432 ``compressobj()`` returns an object that exposes ``compress(data)`` and
433 ``flush()`` methods. Each returns compressed data or an empty bytes.
433 ``flush()`` methods. Each returns compressed data or an empty bytes.
434
434
435 The purpose of ``compressobj()`` is to provide an API-compatible interface
435 The purpose of ``compressobj()`` is to provide an API-compatible interface
436 with ``zlib.compressobj``, ``bz2.BZ2Compressor``, etc. This allows callers to
436 with ``zlib.compressobj``, ``bz2.BZ2Compressor``, etc. This allows callers to
437 swap in different compressor objects while using the same API.
437 swap in different compressor objects while using the same API.
438
438
439 ``flush()`` accepts an optional argument indicating how to end the stream.
439 ``flush()`` accepts an optional argument indicating how to end the stream.
440 ``zstd.COMPRESSOBJ_FLUSH_FINISH`` (the default) ends the compression stream.
440 ``zstd.COMPRESSOBJ_FLUSH_FINISH`` (the default) ends the compression stream.
441 Once this type of flush is performed, ``compress()`` and ``flush()`` can
441 Once this type of flush is performed, ``compress()`` and ``flush()`` can
442 no longer be called. This type of flush **must** be called to end the
442 no longer be called. This type of flush **must** be called to end the
443 compression context. If not called, returned data may be incomplete.
443 compression context. If not called, returned data may be incomplete.
444
444
445 A ``zstd.COMPRESSOBJ_FLUSH_BLOCK`` argument to ``flush()`` will flush a
445 A ``zstd.COMPRESSOBJ_FLUSH_BLOCK`` argument to ``flush()`` will flush a
446 zstd block. Flushes of this type can be performed multiple times. The next
446 zstd block. Flushes of this type can be performed multiple times. The next
447 call to ``compress()`` will begin a new zstd block.
447 call to ``compress()`` will begin a new zstd block.
448
448
449 Here is how this API should be used::
449 Here is how this API should be used::
450
450
451 cctx = zstd.ZstdCompressor()
451 cctx = zstd.ZstdCompressor()
452 cobj = cctx.compressobj()
452 cobj = cctx.compressobj()
453 data = cobj.compress(b'raw input 0')
453 data = cobj.compress(b'raw input 0')
454 data = cobj.compress(b'raw input 1')
454 data = cobj.compress(b'raw input 1')
455 data = cobj.flush()
455 data = cobj.flush()
456
456
457 Or to flush blocks::
457 Or to flush blocks::
458
458
459 cctx.zstd.ZstdCompressor()
459 cctx.zstd.ZstdCompressor()
460 cobj = cctx.compressobj()
460 cobj = cctx.compressobj()
461 data = cobj.compress(b'chunk in first block')
461 data = cobj.compress(b'chunk in first block')
462 data = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
462 data = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
463 data = cobj.compress(b'chunk in second block')
463 data = cobj.compress(b'chunk in second block')
464 data = cobj.flush()
464 data = cobj.flush()
465
465
466 For best performance results, keep input chunks under 256KB. This avoids
466 For best performance results, keep input chunks under 256KB. This avoids
467 extra allocations for a large output object.
467 extra allocations for a large output object.
468
468
469 It is possible to declare the input size of the data that will be fed into
469 It is possible to declare the input size of the data that will be fed into
470 the compressor::
470 the compressor::
471
471
472 cctx = zstd.ZstdCompressor()
472 cctx = zstd.ZstdCompressor()
473 cobj = cctx.compressobj(size=6)
473 cobj = cctx.compressobj(size=6)
474 data = cobj.compress(b'foobar')
474 data = cobj.compress(b'foobar')
475 data = cobj.flush()
475 data = cobj.flush()
476
476
477 Chunker API
477 Chunker API
478 ^^^^^^^^^^^
478 ^^^^^^^^^^^
479
479
480 ``chunker(size=None, chunk_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE)`` returns
480 ``chunker(size=None, chunk_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE)`` returns
481 an object that can be used to iteratively feed chunks of data into a compressor
481 an object that can be used to iteratively feed chunks of data into a compressor
482 and produce output chunks of a uniform size.
482 and produce output chunks of a uniform size.
483
483
484 The object returned by ``chunker()`` exposes the following methods:
484 The object returned by ``chunker()`` exposes the following methods:
485
485
486 ``compress(data)``
486 ``compress(data)``
487 Feeds new input data into the compressor.
487 Feeds new input data into the compressor.
488
488
489 ``flush()``
489 ``flush()``
490 Flushes all data currently in the compressor.
490 Flushes all data currently in the compressor.
491
491
492 ``finish()``
492 ``finish()``
493 Signals the end of input data. No new data can be compressed after this
493 Signals the end of input data. No new data can be compressed after this
494 method is called.
494 method is called.
495
495
496 ``compress()``, ``flush()``, and ``finish()`` all return an iterator of
496 ``compress()``, ``flush()``, and ``finish()`` all return an iterator of
497 ``bytes`` instances holding compressed data. The iterator may be empty. Callers
497 ``bytes`` instances holding compressed data. The iterator may be empty. Callers
498 MUST iterate through all elements of the returned iterator before performing
498 MUST iterate through all elements of the returned iterator before performing
499 another operation on the object.
499 another operation on the object.
500
500
501 All chunks emitted by ``compress()`` will have a length of ``chunk_size``.
501 All chunks emitted by ``compress()`` will have a length of ``chunk_size``.
502
502
503 ``flush()`` and ``finish()`` may return a final chunk smaller than
503 ``flush()`` and ``finish()`` may return a final chunk smaller than
504 ``chunk_size``.
504 ``chunk_size``.
505
505
506 Here is how the API should be used::
506 Here is how the API should be used::
507
507
508 cctx = zstd.ZstdCompressor()
508 cctx = zstd.ZstdCompressor()
509 chunker = cctx.chunker(chunk_size=32768)
509 chunker = cctx.chunker(chunk_size=32768)
510
510
511 with open(path, 'rb') as fh:
511 with open(path, 'rb') as fh:
512 while True:
512 while True:
513 in_chunk = fh.read(32768)
513 in_chunk = fh.read(32768)
514 if not in_chunk:
514 if not in_chunk:
515 break
515 break
516
516
517 for out_chunk in chunker.compress(in_chunk):
517 for out_chunk in chunker.compress(in_chunk):
518 # Do something with output chunk of size 32768.
518 # Do something with output chunk of size 32768.
519
519
520 for out_chunk in chunker.finish():
520 for out_chunk in chunker.finish():
521 # Do something with output chunks that finalize the zstd frame.
521 # Do something with output chunks that finalize the zstd frame.
522
522
523 The ``chunker()`` API is often a better alternative to ``compressobj()``.
523 The ``chunker()`` API is often a better alternative to ``compressobj()``.
524
524
525 ``compressobj()`` will emit output data as it is available. This results in a
525 ``compressobj()`` will emit output data as it is available. This results in a
526 *stream* of output chunks of varying sizes. The consistency of the output chunk
526 *stream* of output chunks of varying sizes. The consistency of the output chunk
527 size with ``chunker()`` is more appropriate for many usages, such as sending
527 size with ``chunker()`` is more appropriate for many usages, such as sending
528 compressed data to a socket.
528 compressed data to a socket.
529
529
530 ``compressobj()`` may also perform extra memory reallocations in order to
530 ``compressobj()`` may also perform extra memory reallocations in order to
531 dynamically adjust the sizes of the output chunks. Since ``chunker()`` output
531 dynamically adjust the sizes of the output chunks. Since ``chunker()`` output
532 chunks are all the same size (except for flushed or final chunks), there is
532 chunks are all the same size (except for flushed or final chunks), there is
533 less memory allocation overhead.
533 less memory allocation overhead.
534
534
535 Batch Compression API
535 Batch Compression API
536 ^^^^^^^^^^^^^^^^^^^^^
536 ^^^^^^^^^^^^^^^^^^^^^
537
537
538 (Experimental. Not yet supported in CFFI bindings.)
538 (Experimental. Not yet supported in CFFI bindings.)
539
539
540 ``multi_compress_to_buffer(data, [threads=0])`` performs compression of multiple
540 ``multi_compress_to_buffer(data, [threads=0])`` performs compression of multiple
541 inputs as a single operation.
541 inputs as a single operation.
542
542
543 Data to be compressed can be passed as a ``BufferWithSegmentsCollection``, a
543 Data to be compressed can be passed as a ``BufferWithSegmentsCollection``, a
544 ``BufferWithSegments``, or a list containing byte like objects. Each element of
544 ``BufferWithSegments``, or a list containing byte like objects. Each element of
545 the container will be compressed individually using the configured parameters
545 the container will be compressed individually using the configured parameters
546 on the ``ZstdCompressor`` instance.
546 on the ``ZstdCompressor`` instance.
547
547
548 The ``threads`` argument controls how many threads to use for compression. The
548 The ``threads`` argument controls how many threads to use for compression. The
549 default is ``0`` which means to use a single thread. Negative values use the
549 default is ``0`` which means to use a single thread. Negative values use the
550 number of logical CPUs in the machine.
550 number of logical CPUs in the machine.
551
551
552 The function returns a ``BufferWithSegmentsCollection``. This type represents
552 The function returns a ``BufferWithSegmentsCollection``. This type represents
553 N discrete memory allocations, eaching holding 1 or more compressed frames.
553 N discrete memory allocations, eaching holding 1 or more compressed frames.
554
554
555 Output data is written to shared memory buffers. This means that unlike
555 Output data is written to shared memory buffers. This means that unlike
556 regular Python objects, a reference to *any* object within the collection
556 regular Python objects, a reference to *any* object within the collection
557 keeps the shared buffer and therefore memory backing it alive. This can have
557 keeps the shared buffer and therefore memory backing it alive. This can have
558 undesirable effects on process memory usage.
558 undesirable effects on process memory usage.
559
559
560 The API and behavior of this function is experimental and will likely change.
560 The API and behavior of this function is experimental and will likely change.
561 Known deficiencies include:
561 Known deficiencies include:
562
562
563 * If asked to use multiple threads, it will always spawn that many threads,
563 * If asked to use multiple threads, it will always spawn that many threads,
564 even if the input is too small to use them. It should automatically lower
564 even if the input is too small to use them. It should automatically lower
565 the thread count when the extra threads would just add overhead.
565 the thread count when the extra threads would just add overhead.
566 * The buffer allocation strategy is fixed. There is room to make it dynamic,
566 * The buffer allocation strategy is fixed. There is room to make it dynamic,
567 perhaps even to allow one output buffer per input, facilitating a variation
567 perhaps even to allow one output buffer per input, facilitating a variation
568 of the API to return a list without the adverse effects of shared memory
568 of the API to return a list without the adverse effects of shared memory
569 buffers.
569 buffers.
570
570
571 ZstdDecompressor
571 ZstdDecompressor
572 ----------------
572 ----------------
573
573
574 The ``ZstdDecompressor`` class provides an interface for performing
574 The ``ZstdDecompressor`` class provides an interface for performing
575 decompression. It is effectively a wrapper around the ``ZSTD_DCtx`` type from
575 decompression. It is effectively a wrapper around the ``ZSTD_DCtx`` type from
576 the C API.
576 the C API.
577
577
578 Each instance is associated with parameters that control decompression. These
578 Each instance is associated with parameters that control decompression. These
579 come from the following named arguments (all optional):
579 come from the following named arguments (all optional):
580
580
581 dict_data
581 dict_data
582 Compression dictionary to use.
582 Compression dictionary to use.
583 max_window_size
583 max_window_size
584 Sets an uppet limit on the window size for decompression operations in
584 Sets an uppet limit on the window size for decompression operations in
585 kibibytes. This setting can be used to prevent large memory allocations
585 kibibytes. This setting can be used to prevent large memory allocations
586 for inputs using large compression windows.
586 for inputs using large compression windows.
587 format
587 format
588 Set the format of data for the decoder. By default, this is
588 Set the format of data for the decoder. By default, this is
589 ``zstd.FORMAT_ZSTD1``. It can be set to ``zstd.FORMAT_ZSTD1_MAGICLESS`` to
589 ``zstd.FORMAT_ZSTD1``. It can be set to ``zstd.FORMAT_ZSTD1_MAGICLESS`` to
590 allow decoding frames without the 4 byte magic header. Not all decompression
590 allow decoding frames without the 4 byte magic header. Not all decompression
591 APIs support this mode.
591 APIs support this mode.
592
592
593 The interface of this class is very similar to ``ZstdCompressor`` (by design).
593 The interface of this class is very similar to ``ZstdCompressor`` (by design).
594
594
595 Unless specified otherwise, assume that no two methods of ``ZstdDecompressor``
595 Unless specified otherwise, assume that no two methods of ``ZstdDecompressor``
596 instances can be called from multiple Python threads simultaneously. In other
596 instances can be called from multiple Python threads simultaneously. In other
597 words, assume instances are not thread safe unless stated otherwise.
597 words, assume instances are not thread safe unless stated otherwise.
598
598
599 Utility Methods
599 Utility Methods
600 ^^^^^^^^^^^^^^^
600 ^^^^^^^^^^^^^^^
601
601
602 ``memory_size()`` obtains the size of the underlying zstd decompression context,
602 ``memory_size()`` obtains the size of the underlying zstd decompression context,
603 in bytes.::
603 in bytes.::
604
604
605 dctx = zstd.ZstdDecompressor()
605 dctx = zstd.ZstdDecompressor()
606 size = dctx.memory_size()
606 size = dctx.memory_size()
607
607
608 Simple API
608 Simple API
609 ^^^^^^^^^^
609 ^^^^^^^^^^
610
610
611 ``decompress(data)`` can be used to decompress an entire compressed zstd
611 ``decompress(data)`` can be used to decompress an entire compressed zstd
612 frame in a single operation.::
612 frame in a single operation.::
613
613
614 dctx = zstd.ZstdDecompressor()
614 dctx = zstd.ZstdDecompressor()
615 decompressed = dctx.decompress(data)
615 decompressed = dctx.decompress(data)
616
616
617 By default, ``decompress(data)`` will only work on data written with the content
617 By default, ``decompress(data)`` will only work on data written with the content
618 size encoded in its header (this is the default behavior of
618 size encoded in its header (this is the default behavior of
619 ``ZstdCompressor().compress()`` but may not be true for streaming compression). If
619 ``ZstdCompressor().compress()`` but may not be true for streaming compression). If
620 compressed data without an embedded content size is seen, ``zstd.ZstdError`` will
620 compressed data without an embedded content size is seen, ``zstd.ZstdError`` will
621 be raised.
621 be raised.
622
622
623 If the compressed data doesn't have its content size embedded within it,
623 If the compressed data doesn't have its content size embedded within it,
624 decompression can be attempted by specifying the ``max_output_size``
624 decompression can be attempted by specifying the ``max_output_size``
625 argument.::
625 argument.::
626
626
627 dctx = zstd.ZstdDecompressor()
627 dctx = zstd.ZstdDecompressor()
628 uncompressed = dctx.decompress(data, max_output_size=1048576)
628 uncompressed = dctx.decompress(data, max_output_size=1048576)
629
629
630 Ideally, ``max_output_size`` will be identical to the decompressed output
630 Ideally, ``max_output_size`` will be identical to the decompressed output
631 size.
631 size.
632
632
633 If ``max_output_size`` is too small to hold the decompressed data,
633 If ``max_output_size`` is too small to hold the decompressed data,
634 ``zstd.ZstdError`` will be raised.
634 ``zstd.ZstdError`` will be raised.
635
635
636 If ``max_output_size`` is larger than the decompressed data, the allocated
636 If ``max_output_size`` is larger than the decompressed data, the allocated
637 output buffer will be resized to only use the space required.
637 output buffer will be resized to only use the space required.
638
638
639 Please note that an allocation of the requested ``max_output_size`` will be
639 Please note that an allocation of the requested ``max_output_size`` will be
640 performed every time the method is called. Setting to a very large value could
640 performed every time the method is called. Setting to a very large value could
641 result in a lot of work for the memory allocator and may result in
641 result in a lot of work for the memory allocator and may result in
642 ``MemoryError`` being raised if the allocation fails.
642 ``MemoryError`` being raised if the allocation fails.
643
643
644 .. important::
644 .. important::
645
645
646 If the exact size of decompressed data is unknown (not passed in explicitly
646 If the exact size of decompressed data is unknown (not passed in explicitly
647 and not stored in the zstandard frame), for performance reasons it is
647 and not stored in the zstandard frame), for performance reasons it is
648 encouraged to use a streaming API.
648 encouraged to use a streaming API.
649
649
650 Stream Reader API
650 Stream Reader API
651 ^^^^^^^^^^^^^^^^^
651 ^^^^^^^^^^^^^^^^^
652
652
653 ``stream_reader(source)`` can be used to obtain an object conforming to the
653 ``stream_reader(source)`` can be used to obtain an object conforming to the
654 ``io.RawIOBase`` interface for reading decompressed output as a stream::
654 ``io.RawIOBase`` interface for reading decompressed output as a stream::
655
655
656 with open(path, 'rb') as fh:
656 with open(path, 'rb') as fh:
657 dctx = zstd.ZstdDecompressor()
657 dctx = zstd.ZstdDecompressor()
658 reader = dctx.stream_reader(fh)
658 reader = dctx.stream_reader(fh)
659 while True:
659 while True:
660 chunk = reader.read(16384)
660 chunk = reader.read(16384)
661 if not chunk:
661 if not chunk:
662 break
662 break
663
663
664 # Do something with decompressed chunk.
664 # Do something with decompressed chunk.
665
665
666 The stream can also be used as a context manager::
666 The stream can also be used as a context manager::
667
667
668 with open(path, 'rb') as fh:
668 with open(path, 'rb') as fh:
669 dctx = zstd.ZstdDecompressor()
669 dctx = zstd.ZstdDecompressor()
670 with dctx.stream_reader(fh) as reader:
670 with dctx.stream_reader(fh) as reader:
671 ...
671 ...
672
672
673 When used as a context manager, the stream is closed and the underlying
673 When used as a context manager, the stream is closed and the underlying
674 resources are released when the context manager exits. Future operations against
674 resources are released when the context manager exits. Future operations against
675 the stream will fail.
675 the stream will fail.
676
676
677 The ``source`` argument to ``stream_reader()`` can be any object with a
677 The ``source`` argument to ``stream_reader()`` can be any object with a
678 ``read(size)`` method or any object implementing the *buffer protocol*.
678 ``read(size)`` method or any object implementing the *buffer protocol*.
679
679
680 If the ``source`` is a stream, you can specify how large ``read()`` requests
680 If the ``source`` is a stream, you can specify how large ``read()`` requests
681 to that stream should be via the ``read_size`` argument. It defaults to
681 to that stream should be via the ``read_size`` argument. It defaults to
682 ``zstandard.DECOMPRESSION_RECOMMENDED_INPUT_SIZE``.::
682 ``zstandard.DECOMPRESSION_RECOMMENDED_INPUT_SIZE``.::
683
683
684 with open(path, 'rb') as fh:
684 with open(path, 'rb') as fh:
685 dctx = zstd.ZstdDecompressor()
685 dctx = zstd.ZstdDecompressor()
686 # Will perform fh.read(8192) when obtaining data for the decompressor.
686 # Will perform fh.read(8192) when obtaining data for the decompressor.
687 with dctx.stream_reader(fh, read_size=8192) as reader:
687 with dctx.stream_reader(fh, read_size=8192) as reader:
688 ...
688 ...
689
689
690 The stream returned by ``stream_reader()`` is not writable.
690 The stream returned by ``stream_reader()`` is not writable.
691
691
692 The stream returned by ``stream_reader()`` is *partially* seekable.
692 The stream returned by ``stream_reader()`` is *partially* seekable.
693 Absolute and relative positions (``SEEK_SET`` and ``SEEK_CUR``) forward
693 Absolute and relative positions (``SEEK_SET`` and ``SEEK_CUR``) forward
694 of the current position are allowed. Offsets behind the current read
694 of the current position are allowed. Offsets behind the current read
695 position and offsets relative to the end of stream are not allowed and
695 position and offsets relative to the end of stream are not allowed and
696 will raise ``ValueError`` if attempted.
696 will raise ``ValueError`` if attempted.
697
697
698 ``tell()`` returns the number of decompressed bytes read so far.
698 ``tell()`` returns the number of decompressed bytes read so far.
699
699
700 Not all I/O methods are implemented. Notably missing is support for
700 Not all I/O methods are implemented. Notably missing is support for
701 ``readline()``, ``readlines()``, and linewise iteration support. This is
701 ``readline()``, ``readlines()``, and linewise iteration support. This is
702 because streams operate on binary data - not text data. If you want to
702 because streams operate on binary data - not text data. If you want to
703 convert decompressed output to text, you can chain an ``io.TextIOWrapper``
703 convert decompressed output to text, you can chain an ``io.TextIOWrapper``
704 to the stream::
704 to the stream::
705
705
706 with open(path, 'rb') as fh:
706 with open(path, 'rb') as fh:
707 dctx = zstd.ZstdDecompressor()
707 dctx = zstd.ZstdDecompressor()
708 stream_reader = dctx.stream_reader(fh)
708 stream_reader = dctx.stream_reader(fh)
709 text_stream = io.TextIOWrapper(stream_reader, encoding='utf-8')
709 text_stream = io.TextIOWrapper(stream_reader, encoding='utf-8')
710
710
711 for line in text_stream:
711 for line in text_stream:
712 ...
712 ...
713
713
714 The ``read_across_frames`` argument to ``stream_reader()`` controls the
714 The ``read_across_frames`` argument to ``stream_reader()`` controls the
715 behavior of read operations when the end of a zstd *frame* is encountered.
715 behavior of read operations when the end of a zstd *frame* is encountered.
716 When ``False`` (the default), a read will complete when the end of a
716 When ``False`` (the default), a read will complete when the end of a
717 zstd *frame* is encountered. When ``True``, a read can potentially
717 zstd *frame* is encountered. When ``True``, a read can potentially
718 return data spanning multiple zstd *frames*.
718 return data spanning multiple zstd *frames*.
719
719
720 Streaming Input API
720 Streaming Input API
721 ^^^^^^^^^^^^^^^^^^^
721 ^^^^^^^^^^^^^^^^^^^
722
722
723 ``stream_writer(fh)`` allows you to *stream* data into a decompressor.
723 ``stream_writer(fh)`` allows you to *stream* data into a decompressor.
724
724
725 Returned instances implement the ``io.RawIOBase`` interface. Only methods
725 Returned instances implement the ``io.RawIOBase`` interface. Only methods
726 that involve writing will do useful things.
726 that involve writing will do useful things.
727
727
728 The argument to ``stream_writer()`` is typically an object that also implements
728 The argument to ``stream_writer()`` is typically an object that also implements
729 ``io.RawIOBase``. But any object with a ``write(data)`` method will work. Many
729 ``io.RawIOBase``. But any object with a ``write(data)`` method will work. Many
730 common Python types conform to this interface, including open file handles
730 common Python types conform to this interface, including open file handles
731 and ``io.BytesIO``.
731 and ``io.BytesIO``.
732
732
733 Behavior is similar to ``ZstdCompressor.stream_writer()``: compressed data
733 Behavior is similar to ``ZstdCompressor.stream_writer()``: compressed data
734 is sent to the decompressor by calling ``write(data)`` and decompressed
734 is sent to the decompressor by calling ``write(data)`` and decompressed
735 output is written to the underlying stream by calling its ``write(data)``
735 output is written to the underlying stream by calling its ``write(data)``
736 method.::
736 method.::
737
737
738 dctx = zstd.ZstdDecompressor()
738 dctx = zstd.ZstdDecompressor()
739 decompressor = dctx.stream_writer(fh)
739 decompressor = dctx.stream_writer(fh)
740
740
741 decompressor.write(compressed_data)
741 decompressor.write(compressed_data)
742 ...
742 ...
743
743
744
744
745 Calls to ``write()`` will return the number of bytes written to the output
745 Calls to ``write()`` will return the number of bytes written to the output
746 object. Not all inputs will result in bytes being written, so return values
746 object. Not all inputs will result in bytes being written, so return values
747 of ``0`` are possible.
747 of ``0`` are possible.
748
748
749 Like the ``stream_writer()`` compressor, instances can be used as context
749 Like the ``stream_writer()`` compressor, instances can be used as context
750 managers. However, context managers add no extra special behavior and offer
750 managers. However, context managers add no extra special behavior and offer
751 little to no benefit to being used.
751 little to no benefit to being used.
752
752
753 Calling ``close()`` will mark the stream as closed and subsequent I/O operations
753 Calling ``close()`` will mark the stream as closed and subsequent I/O operations
754 will raise ``ValueError`` (per the documented behavior of ``io.RawIOBase``).
754 will raise ``ValueError`` (per the documented behavior of ``io.RawIOBase``).
755 ``close()`` will also call ``close()`` on the underlying stream if such a
755 ``close()`` will also call ``close()`` on the underlying stream if such a
756 method exists.
756 method exists.
757
757
758 The size of chunks being ``write()`` to the destination can be specified::
758 The size of chunks being ``write()`` to the destination can be specified::
759
759
760 dctx = zstd.ZstdDecompressor()
760 dctx = zstd.ZstdDecompressor()
761 with dctx.stream_writer(fh, write_size=16384) as decompressor:
761 with dctx.stream_writer(fh, write_size=16384) as decompressor:
762 pass
762 pass
763
763
764 You can see how much memory is being used by the decompressor::
764 You can see how much memory is being used by the decompressor::
765
765
766 dctx = zstd.ZstdDecompressor()
766 dctx = zstd.ZstdDecompressor()
767 with dctx.stream_writer(fh) as decompressor:
767 with dctx.stream_writer(fh) as decompressor:
768 byte_size = decompressor.memory_size()
768 byte_size = decompressor.memory_size()
769
769
770 ``stream_writer()`` accepts a ``write_return_read`` boolean argument to control
770 ``stream_writer()`` accepts a ``write_return_read`` boolean argument to control
771 the return value of ``write()``. When ``False`` (the default)``, ``write()``
771 the return value of ``write()``. When ``False`` (the default)``, ``write()``
772 returns the number of bytes that were ``write()``en to the underlying stream.
772 returns the number of bytes that were ``write()``en to the underlying stream.
773 When ``True``, ``write()`` returns the number of bytes read from the input.
773 When ``True``, ``write()`` returns the number of bytes read from the input.
774 ``True`` is the *proper* behavior for ``write()`` as specified by the
774 ``True`` is the *proper* behavior for ``write()`` as specified by the
775 ``io.RawIOBase`` interface and will become the default in a future release.
775 ``io.RawIOBase`` interface and will become the default in a future release.
776
776
777 Streaming Output API
777 Streaming Output API
778 ^^^^^^^^^^^^^^^^^^^^
778 ^^^^^^^^^^^^^^^^^^^^
779
779
780 ``read_to_iter(fh)`` provides a mechanism to stream decompressed data out of a
780 ``read_to_iter(fh)`` provides a mechanism to stream decompressed data out of a
781 compressed source as an iterator of data chunks.::
781 compressed source as an iterator of data chunks.::
782
782
783 dctx = zstd.ZstdDecompressor()
783 dctx = zstd.ZstdDecompressor()
784 for chunk in dctx.read_to_iter(fh):
784 for chunk in dctx.read_to_iter(fh):
785 # Do something with original data.
785 # Do something with original data.
786
786
787 ``read_to_iter()`` accepts an object with a ``read(size)`` method that will
787 ``read_to_iter()`` accepts an object with a ``read(size)`` method that will
788 return compressed bytes or an object conforming to the buffer protocol that
788 return compressed bytes or an object conforming to the buffer protocol that
789 can expose its data as a contiguous range of bytes.
789 can expose its data as a contiguous range of bytes.
790
790
791 ``read_to_iter()`` returns an iterator whose elements are chunks of the
791 ``read_to_iter()`` returns an iterator whose elements are chunks of the
792 decompressed data.
792 decompressed data.
793
793
794 The size of requested ``read()`` from the source can be specified::
794 The size of requested ``read()`` from the source can be specified::
795
795
796 dctx = zstd.ZstdDecompressor()
796 dctx = zstd.ZstdDecompressor()
797 for chunk in dctx.read_to_iter(fh, read_size=16384):
797 for chunk in dctx.read_to_iter(fh, read_size=16384):
798 pass
798 pass
799
799
800 It is also possible to skip leading bytes in the input data::
800 It is also possible to skip leading bytes in the input data::
801
801
802 dctx = zstd.ZstdDecompressor()
802 dctx = zstd.ZstdDecompressor()
803 for chunk in dctx.read_to_iter(fh, skip_bytes=1):
803 for chunk in dctx.read_to_iter(fh, skip_bytes=1):
804 pass
804 pass
805
805
806 .. tip::
806 .. tip::
807
807
808 Skipping leading bytes is useful if the source data contains extra
808 Skipping leading bytes is useful if the source data contains extra
809 *header* data. Traditionally, you would need to create a slice or
809 *header* data. Traditionally, you would need to create a slice or
810 ``memoryview`` of the data you want to decompress. This would create
810 ``memoryview`` of the data you want to decompress. This would create
811 overhead. It is more efficient to pass the offset into this API.
811 overhead. It is more efficient to pass the offset into this API.
812
812
813 Similarly to ``ZstdCompressor.read_to_iter()``, the consumer of the iterator
813 Similarly to ``ZstdCompressor.read_to_iter()``, the consumer of the iterator
814 controls when data is decompressed. If the iterator isn't consumed,
814 controls when data is decompressed. If the iterator isn't consumed,
815 decompression is put on hold.
815 decompression is put on hold.
816
816
817 When ``read_to_iter()`` is passed an object conforming to the buffer protocol,
817 When ``read_to_iter()`` is passed an object conforming to the buffer protocol,
818 the behavior may seem similar to what occurs when the simple decompression
818 the behavior may seem similar to what occurs when the simple decompression
819 API is used. However, this API works when the decompressed size is unknown.
819 API is used. However, this API works when the decompressed size is unknown.
820 Furthermore, if feeding large inputs, the decompressor will work in chunks
820 Furthermore, if feeding large inputs, the decompressor will work in chunks
821 instead of performing a single operation.
821 instead of performing a single operation.
822
822
823 Stream Copying API
823 Stream Copying API
824 ^^^^^^^^^^^^^^^^^^
824 ^^^^^^^^^^^^^^^^^^
825
825
826 ``copy_stream(ifh, ofh)`` can be used to copy data across 2 streams while
826 ``copy_stream(ifh, ofh)`` can be used to copy data across 2 streams while
827 performing decompression.::
827 performing decompression.::
828
828
829 dctx = zstd.ZstdDecompressor()
829 dctx = zstd.ZstdDecompressor()
830 dctx.copy_stream(ifh, ofh)
830 dctx.copy_stream(ifh, ofh)
831
831
832 e.g. to decompress a file to another file::
832 e.g. to decompress a file to another file::
833
833
834 dctx = zstd.ZstdDecompressor()
834 dctx = zstd.ZstdDecompressor()
835 with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
835 with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
836 dctx.copy_stream(ifh, ofh)
836 dctx.copy_stream(ifh, ofh)
837
837
838 The size of chunks being ``read()`` and ``write()`` from and to the streams
838 The size of chunks being ``read()`` and ``write()`` from and to the streams
839 can be specified::
839 can be specified::
840
840
841 dctx = zstd.ZstdDecompressor()
841 dctx = zstd.ZstdDecompressor()
842 dctx.copy_stream(ifh, ofh, read_size=8192, write_size=16384)
842 dctx.copy_stream(ifh, ofh, read_size=8192, write_size=16384)
843
843
844 Decompressor API
844 Decompressor API
845 ^^^^^^^^^^^^^^^^
845 ^^^^^^^^^^^^^^^^
846
846
847 ``decompressobj()`` returns an object that exposes a ``decompress(data)``
847 ``decompressobj()`` returns an object that exposes a ``decompress(data)``
848 method. Compressed data chunks are fed into ``decompress(data)`` and
848 method. Compressed data chunks are fed into ``decompress(data)`` and
849 uncompressed output (or an empty bytes) is returned. Output from subsequent
849 uncompressed output (or an empty bytes) is returned. Output from subsequent
850 calls needs to be concatenated to reassemble the full decompressed byte
850 calls needs to be concatenated to reassemble the full decompressed byte
851 sequence.
851 sequence.
852
852
853 The purpose of ``decompressobj()`` is to provide an API-compatible interface
853 The purpose of ``decompressobj()`` is to provide an API-compatible interface
854 with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor``. This allows callers
854 with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor``. This allows callers
855 to swap in different decompressor objects while using the same API.
855 to swap in different decompressor objects while using the same API.
856
856
857 Each object is single use: once an input frame is decoded, ``decompress()``
857 Each object is single use: once an input frame is decoded, ``decompress()``
858 can no longer be called.
858 can no longer be called.
859
859
860 Here is how this API should be used::
860 Here is how this API should be used::
861
861
862 dctx = zstd.ZstdDecompressor()
862 dctx = zstd.ZstdDecompressor()
863 dobj = dctx.decompressobj()
863 dobj = dctx.decompressobj()
864 data = dobj.decompress(compressed_chunk_0)
864 data = dobj.decompress(compressed_chunk_0)
865 data = dobj.decompress(compressed_chunk_1)
865 data = dobj.decompress(compressed_chunk_1)
866
866
867 By default, calls to ``decompress()`` write output data in chunks of size
867 By default, calls to ``decompress()`` write output data in chunks of size
868 ``DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE``. These chunks are concatenated
868 ``DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE``. These chunks are concatenated
869 before being returned to the caller. It is possible to define the size of
869 before being returned to the caller. It is possible to define the size of
870 these temporary chunks by passing ``write_size`` to ``decompressobj()``::
870 these temporary chunks by passing ``write_size`` to ``decompressobj()``::
871
871
872 dctx = zstd.ZstdDecompressor()
872 dctx = zstd.ZstdDecompressor()
873 dobj = dctx.decompressobj(write_size=1048576)
873 dobj = dctx.decompressobj(write_size=1048576)
874
874
875 .. note::
875 .. note::
876
876
877 Because calls to ``decompress()`` may need to perform multiple
877 Because calls to ``decompress()`` may need to perform multiple
878 memory (re)allocations, this streaming decompression API isn't as
878 memory (re)allocations, this streaming decompression API isn't as
879 efficient as other APIs.
879 efficient as other APIs.
880
880
881 For compatibility with the standard library APIs, instances expose a
881 For compatibility with the standard library APIs, instances expose a
882 ``flush([length=None])`` method. This method no-ops and has no meaningful
882 ``flush([length=None])`` method. This method no-ops and has no meaningful
883 side-effects, making it safe to call any time.
883 side-effects, making it safe to call any time.
884
884
885 Batch Decompression API
885 Batch Decompression API
886 ^^^^^^^^^^^^^^^^^^^^^^^
886 ^^^^^^^^^^^^^^^^^^^^^^^
887
887
888 (Experimental. Not yet supported in CFFI bindings.)
888 (Experimental. Not yet supported in CFFI bindings.)
889
889
890 ``multi_decompress_to_buffer()`` performs decompression of multiple
890 ``multi_decompress_to_buffer()`` performs decompression of multiple
891 frames as a single operation and returns a ``BufferWithSegmentsCollection``
891 frames as a single operation and returns a ``BufferWithSegmentsCollection``
892 containing decompressed data for all inputs.
892 containing decompressed data for all inputs.
893
893
894 Compressed frames can be passed to the function as a ``BufferWithSegments``,
894 Compressed frames can be passed to the function as a ``BufferWithSegments``,
895 a ``BufferWithSegmentsCollection``, or as a list containing objects that
895 a ``BufferWithSegmentsCollection``, or as a list containing objects that
896 conform to the buffer protocol. For best performance, pass a
896 conform to the buffer protocol. For best performance, pass a
897 ``BufferWithSegmentsCollection`` or a ``BufferWithSegments``, as
897 ``BufferWithSegmentsCollection`` or a ``BufferWithSegments``, as
898 minimal input validation will be done for that type. If calling from
898 minimal input validation will be done for that type. If calling from
899 Python (as opposed to C), constructing one of these instances may add
899 Python (as opposed to C), constructing one of these instances may add
900 overhead cancelling out the performance overhead of validation for list
900 overhead cancelling out the performance overhead of validation for list
901 inputs.::
901 inputs.::
902
902
903 dctx = zstd.ZstdDecompressor()
903 dctx = zstd.ZstdDecompressor()
904 results = dctx.multi_decompress_to_buffer([b'...', b'...'])
904 results = dctx.multi_decompress_to_buffer([b'...', b'...'])
905
905
906 The decompressed size of each frame MUST be discoverable. It can either be
906 The decompressed size of each frame MUST be discoverable. It can either be
907 embedded within the zstd frame (``write_content_size=True`` argument to
907 embedded within the zstd frame (``write_content_size=True`` argument to
908 ``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument.
908 ``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument.
909
909
910 The ``decompressed_sizes`` argument is an object conforming to the buffer
910 The ``decompressed_sizes`` argument is an object conforming to the buffer
911 protocol which holds an array of 64-bit unsigned integers in the machine's
911 protocol which holds an array of 64-bit unsigned integers in the machine's
912 native format defining the decompressed sizes of each frame. If this argument
912 native format defining the decompressed sizes of each frame. If this argument
913 is passed, it avoids having to scan each frame for its decompressed size.
913 is passed, it avoids having to scan each frame for its decompressed size.
914 This frame scanning can add noticeable overhead in some scenarios.::
914 This frame scanning can add noticeable overhead in some scenarios.::
915
915
916 frames = [...]
916 frames = [...]
917 sizes = struct.pack('=QQQQ', len0, len1, len2, len3)
917 sizes = struct.pack('=QQQQ', len0, len1, len2, len3)
918
918
919 dctx = zstd.ZstdDecompressor()
919 dctx = zstd.ZstdDecompressor()
920 results = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes)
920 results = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes)
921
921
922 The ``threads`` argument controls the number of threads to use to perform
922 The ``threads`` argument controls the number of threads to use to perform
923 decompression operations. The default (``0``) or the value ``1`` means to
923 decompression operations. The default (``0``) or the value ``1`` means to
924 use a single thread. Negative values use the number of logical CPUs in the
924 use a single thread. Negative values use the number of logical CPUs in the
925 machine.
925 machine.
926
926
927 .. note::
927 .. note::
928
928
929 It is possible to pass a ``mmap.mmap()`` instance into this function by
929 It is possible to pass a ``mmap.mmap()`` instance into this function by
930 wrapping it with a ``BufferWithSegments`` instance (which will define the
930 wrapping it with a ``BufferWithSegments`` instance (which will define the
931 offsets of frames within the memory mapped region).
931 offsets of frames within the memory mapped region).
932
932
933 This function is logically equivalent to performing ``dctx.decompress()``
933 This function is logically equivalent to performing ``dctx.decompress()``
934 on each input frame and returning the result.
934 on each input frame and returning the result.
935
935
936 This function exists to perform decompression on multiple frames as fast
936 This function exists to perform decompression on multiple frames as fast
937 as possible by having as little overhead as possible. Since decompression is
937 as possible by having as little overhead as possible. Since decompression is
938 performed as a single operation and since the decompressed output is stored in
938 performed as a single operation and since the decompressed output is stored in
939 a single buffer, extra memory allocations, Python objects, and Python function
939 a single buffer, extra memory allocations, Python objects, and Python function
940 calls are avoided. This is ideal for scenarios where callers know up front that
940 calls are avoided. This is ideal for scenarios where callers know up front that
941 they need to access data for multiple frames, such as when *delta chains* are
941 they need to access data for multiple frames, such as when *delta chains* are
942 being used.
942 being used.
943
943
944 Currently, the implementation always spawns multiple threads when requested,
944 Currently, the implementation always spawns multiple threads when requested,
945 even if the amount of work to do is small. In the future, it will be smarter
945 even if the amount of work to do is small. In the future, it will be smarter
946 about avoiding threads and their associated overhead when the amount of
946 about avoiding threads and their associated overhead when the amount of
947 work to do is small.
947 work to do is small.
948
948
949 Prefix Dictionary Chain Decompression
949 Prefix Dictionary Chain Decompression
950 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
950 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
951
951
952 ``decompress_content_dict_chain(frames)`` performs decompression of a list of
952 ``decompress_content_dict_chain(frames)`` performs decompression of a list of
953 zstd frames produced using chained *prefix* dictionary compression. Such
953 zstd frames produced using chained *prefix* dictionary compression. Such
954 a list of frames is produced by compressing discrete inputs where each
954 a list of frames is produced by compressing discrete inputs where each
955 non-initial input is compressed with a *prefix* dictionary consisting of the
955 non-initial input is compressed with a *prefix* dictionary consisting of the
956 content of the previous input.
956 content of the previous input.
957
957
958 For example, say you have the following inputs::
958 For example, say you have the following inputs::
959
959
960 inputs = [b'input 1', b'input 2', b'input 3']
960 inputs = [b'input 1', b'input 2', b'input 3']
961
961
962 The zstd frame chain consists of:
962 The zstd frame chain consists of:
963
963
964 1. ``b'input 1'`` compressed in standalone/discrete mode
964 1. ``b'input 1'`` compressed in standalone/discrete mode
965 2. ``b'input 2'`` compressed using ``b'input 1'`` as a *prefix* dictionary
965 2. ``b'input 2'`` compressed using ``b'input 1'`` as a *prefix* dictionary
966 3. ``b'input 3'`` compressed using ``b'input 2'`` as a *prefix* dictionary
966 3. ``b'input 3'`` compressed using ``b'input 2'`` as a *prefix* dictionary
967
967
968 Each zstd frame **must** have the content size written.
968 Each zstd frame **must** have the content size written.
969
969
970 The following Python code can be used to produce a *prefix dictionary chain*::
970 The following Python code can be used to produce a *prefix dictionary chain*::
971
971
972 def make_chain(inputs):
972 def make_chain(inputs):
973 frames = []
973 frames = []
974
974
975 # First frame is compressed in standalone/discrete mode.
975 # First frame is compressed in standalone/discrete mode.
976 zctx = zstd.ZstdCompressor()
976 zctx = zstd.ZstdCompressor()
977 frames.append(zctx.compress(inputs[0]))
977 frames.append(zctx.compress(inputs[0]))
978
978
979 # Subsequent frames use the previous fulltext as a prefix dictionary
979 # Subsequent frames use the previous fulltext as a prefix dictionary
980 for i, raw in enumerate(inputs[1:]):
980 for i, raw in enumerate(inputs[1:]):
981 dict_data = zstd.ZstdCompressionDict(
981 dict_data = zstd.ZstdCompressionDict(
982 inputs[i], dict_type=zstd.DICT_TYPE_RAWCONTENT)
982 inputs[i], dict_type=zstd.DICT_TYPE_RAWCONTENT)
983 zctx = zstd.ZstdCompressor(dict_data=dict_data)
983 zctx = zstd.ZstdCompressor(dict_data=dict_data)
984 frames.append(zctx.compress(raw))
984 frames.append(zctx.compress(raw))
985
985
986 return frames
986 return frames
987
987
988 ``decompress_content_dict_chain()`` returns the uncompressed data of the last
988 ``decompress_content_dict_chain()`` returns the uncompressed data of the last
989 element in the input chain.
989 element in the input chain.
990
990
991
991
992 .. note::
992 .. note::
993
993
994 It is possible to implement *prefix dictionary chain* decompression
994 It is possible to implement *prefix dictionary chain* decompression
995 on top of other APIs. However, this function will likely be faster -
995 on top of other APIs. However, this function will likely be faster -
996 especially for long input chains - as it avoids the overhead of instantiating
996 especially for long input chains - as it avoids the overhead of instantiating
997 and passing around intermediate objects between C and Python.
997 and passing around intermediate objects between C and Python.
998
998
999 Multi-Threaded Compression
999 Multi-Threaded Compression
1000 --------------------------
1000 --------------------------
1001
1001
1002 ``ZstdCompressor`` accepts a ``threads`` argument that controls the number
1002 ``ZstdCompressor`` accepts a ``threads`` argument that controls the number
1003 of threads to use for compression. The way this works is that input is split
1003 of threads to use for compression. The way this works is that input is split
1004 into segments and each segment is fed into a worker pool for compression. Once
1004 into segments and each segment is fed into a worker pool for compression. Once
1005 a segment is compressed, it is flushed/appended to the output.
1005 a segment is compressed, it is flushed/appended to the output.
1006
1006
1007 .. note::
1007 .. note::
1008
1008
1009 These threads are created at the C layer and are not Python threads. So they
1009 These threads are created at the C layer and are not Python threads. So they
1010 work outside the GIL. It is therefore possible to CPU saturate multiple cores
1010 work outside the GIL. It is therefore possible to CPU saturate multiple cores
1011 from Python.
1011 from Python.
1012
1012
1013 The segment size for multi-threaded compression is chosen from the window size
1013 The segment size for multi-threaded compression is chosen from the window size
1014 of the compressor. This is derived from the ``window_log`` attribute of a
1014 of the compressor. This is derived from the ``window_log`` attribute of a
1015 ``ZstdCompressionParameters`` instance. By default, segment sizes are in the 1+MB
1015 ``ZstdCompressionParameters`` instance. By default, segment sizes are in the 1+MB
1016 range.
1016 range.
1017
1017
1018 If multi-threaded compression is requested and the input is smaller than the
1018 If multi-threaded compression is requested and the input is smaller than the
1019 configured segment size, only a single compression thread will be used. If the
1019 configured segment size, only a single compression thread will be used. If the
1020 input is smaller than the segment size multiplied by the thread pool size or
1020 input is smaller than the segment size multiplied by the thread pool size or
1021 if data cannot be delivered to the compressor fast enough, not all requested
1021 if data cannot be delivered to the compressor fast enough, not all requested
1022 compressor threads may be active simultaneously.
1022 compressor threads may be active simultaneously.
1023
1023
1024 Compared to non-multi-threaded compression, multi-threaded compression has
1024 Compared to non-multi-threaded compression, multi-threaded compression has
1025 higher per-operation overhead. This includes extra memory operations,
1025 higher per-operation overhead. This includes extra memory operations,
1026 thread creation, lock acquisition, etc.
1026 thread creation, lock acquisition, etc.
1027
1027
1028 Due to the nature of multi-threaded compression using *N* compression
1028 Due to the nature of multi-threaded compression using *N* compression
1029 *states*, the output from multi-threaded compression will likely be larger
1029 *states*, the output from multi-threaded compression will likely be larger
1030 than non-multi-threaded compression. The difference is usually small. But
1030 than non-multi-threaded compression. The difference is usually small. But
1031 there is a CPU/wall time versus size trade off that may warrant investigation.
1031 there is a CPU/wall time versus size trade off that may warrant investigation.
1032
1032
1033 Output from multi-threaded compression does not require any special handling
1033 Output from multi-threaded compression does not require any special handling
1034 on the decompression side. To the decompressor, data generated with single
1034 on the decompression side. To the decompressor, data generated with single
1035 threaded compressor looks the same as data generated by a multi-threaded
1035 threaded compressor looks the same as data generated by a multi-threaded
1036 compressor and does not require any special handling or additional resource
1036 compressor and does not require any special handling or additional resource
1037 requirements.
1037 requirements.
1038
1038
1039 Dictionary Creation and Management
1039 Dictionary Creation and Management
1040 ----------------------------------
1040 ----------------------------------
1041
1041
1042 Compression dictionaries are represented with the ``ZstdCompressionDict`` type.
1042 Compression dictionaries are represented with the ``ZstdCompressionDict`` type.
1043
1043
1044 Instances can be constructed from bytes::
1044 Instances can be constructed from bytes::
1045
1045
1046 dict_data = zstd.ZstdCompressionDict(data)
1046 dict_data = zstd.ZstdCompressionDict(data)
1047
1047
1048 It is possible to construct a dictionary from *any* data. If the data doesn't
1048 It is possible to construct a dictionary from *any* data. If the data doesn't
1049 begin with a magic header, it will be treated as a *prefix* dictionary.
1049 begin with a magic header, it will be treated as a *prefix* dictionary.
1050 *Prefix* dictionaries allow compression operations to reference raw data
1050 *Prefix* dictionaries allow compression operations to reference raw data
1051 within the dictionary.
1051 within the dictionary.
1052
1052
1053 It is possible to force the use of *prefix* dictionaries or to require a
1053 It is possible to force the use of *prefix* dictionaries or to require a
1054 dictionary header:
1054 dictionary header:
1055
1055
1056 dict_data = zstd.ZstdCompressionDict(data,
1056 dict_data = zstd.ZstdCompressionDict(data,
1057 dict_type=zstd.DICT_TYPE_RAWCONTENT)
1057 dict_type=zstd.DICT_TYPE_RAWCONTENT)
1058
1058
1059 dict_data = zstd.ZstdCompressionDict(data,
1059 dict_data = zstd.ZstdCompressionDict(data,
1060 dict_type=zstd.DICT_TYPE_FULLDICT)
1060 dict_type=zstd.DICT_TYPE_FULLDICT)
1061
1061
1062 You can see how many bytes are in the dictionary by calling ``len()``::
1062 You can see how many bytes are in the dictionary by calling ``len()``::
1063
1063
1064 dict_data = zstd.train_dictionary(size, samples)
1064 dict_data = zstd.train_dictionary(size, samples)
1065 dict_size = len(dict_data) # will not be larger than ``size``
1065 dict_size = len(dict_data) # will not be larger than ``size``
1066
1066
1067 Once you have a dictionary, you can pass it to the objects performing
1067 Once you have a dictionary, you can pass it to the objects performing
1068 compression and decompression::
1068 compression and decompression::
1069
1069
1070 dict_data = zstd.train_dictionary(131072, samples)
1070 dict_data = zstd.train_dictionary(131072, samples)
1071
1071
1072 cctx = zstd.ZstdCompressor(dict_data=dict_data)
1072 cctx = zstd.ZstdCompressor(dict_data=dict_data)
1073 for source_data in input_data:
1073 for source_data in input_data:
1074 compressed = cctx.compress(source_data)
1074 compressed = cctx.compress(source_data)
1075 # Do something with compressed data.
1075 # Do something with compressed data.
1076
1076
1077 dctx = zstd.ZstdDecompressor(dict_data=dict_data)
1077 dctx = zstd.ZstdDecompressor(dict_data=dict_data)
1078 for compressed_data in input_data:
1078 for compressed_data in input_data:
1079 buffer = io.BytesIO()
1079 buffer = io.BytesIO()
1080 with dctx.stream_writer(buffer) as decompressor:
1080 with dctx.stream_writer(buffer) as decompressor:
1081 decompressor.write(compressed_data)
1081 decompressor.write(compressed_data)
1082 # Do something with raw data in ``buffer``.
1082 # Do something with raw data in ``buffer``.
1083
1083
1084 Dictionaries have unique integer IDs. You can retrieve this ID via::
1084 Dictionaries have unique integer IDs. You can retrieve this ID via::
1085
1085
1086 dict_id = zstd.dictionary_id(dict_data)
1086 dict_id = zstd.dictionary_id(dict_data)
1087
1087
1088 You can obtain the raw data in the dict (useful for persisting and constructing
1088 You can obtain the raw data in the dict (useful for persisting and constructing
1089 a ``ZstdCompressionDict`` later) via ``as_bytes()``::
1089 a ``ZstdCompressionDict`` later) via ``as_bytes()``::
1090
1090
1091 dict_data = zstd.train_dictionary(size, samples)
1091 dict_data = zstd.train_dictionary(size, samples)
1092 raw_data = dict_data.as_bytes()
1092 raw_data = dict_data.as_bytes()
1093
1093
1094 By default, when a ``ZstdCompressionDict`` is *attached* to a
1094 By default, when a ``ZstdCompressionDict`` is *attached* to a
1095 ``ZstdCompressor``, each ``ZstdCompressor`` performs work to prepare the
1095 ``ZstdCompressor``, each ``ZstdCompressor`` performs work to prepare the
1096 dictionary for use. This is fine if only 1 compression operation is being
1096 dictionary for use. This is fine if only 1 compression operation is being
1097 performed or if the ``ZstdCompressor`` is being reused for multiple operations.
1097 performed or if the ``ZstdCompressor`` is being reused for multiple operations.
1098 But if multiple ``ZstdCompressor`` instances are being used with the dictionary,
1098 But if multiple ``ZstdCompressor`` instances are being used with the dictionary,
1099 this can add overhead.
1099 this can add overhead.
1100
1100
1101 It is possible to *precompute* the dictionary so it can readily be consumed
1101 It is possible to *precompute* the dictionary so it can readily be consumed
1102 by multiple ``ZstdCompressor`` instances::
1102 by multiple ``ZstdCompressor`` instances::
1103
1103
1104 d = zstd.ZstdCompressionDict(data)
1104 d = zstd.ZstdCompressionDict(data)
1105
1105
1106 # Precompute for compression level 3.
1106 # Precompute for compression level 3.
1107 d.precompute_compress(level=3)
1107 d.precompute_compress(level=3)
1108
1108
1109 # Precompute with specific compression parameters.
1109 # Precompute with specific compression parameters.
1110 params = zstd.ZstdCompressionParameters(...)
1110 params = zstd.ZstdCompressionParameters(...)
1111 d.precompute_compress(compression_params=params)
1111 d.precompute_compress(compression_params=params)
1112
1112
1113 .. note::
1113 .. note::
1114
1114
1115 When a dictionary is precomputed, the compression parameters used to
1115 When a dictionary is precomputed, the compression parameters used to
1116 precompute the dictionary overwrite some of the compression parameters
1116 precompute the dictionary overwrite some of the compression parameters
1117 specified to ``ZstdCompressor.__init__``.
1117 specified to ``ZstdCompressor.__init__``.
1118
1118
1119 Training Dictionaries
1119 Training Dictionaries
1120 ^^^^^^^^^^^^^^^^^^^^^
1120 ^^^^^^^^^^^^^^^^^^^^^
1121
1121
1122 Unless using *prefix* dictionaries, dictionary data is produced by *training*
1122 Unless using *prefix* dictionaries, dictionary data is produced by *training*
1123 on existing data::
1123 on existing data::
1124
1124
1125 dict_data = zstd.train_dictionary(size, samples)
1125 dict_data = zstd.train_dictionary(size, samples)
1126
1126
1127 This takes a target dictionary size and list of bytes instances and creates and
1127 This takes a target dictionary size and list of bytes instances and creates and
1128 returns a ``ZstdCompressionDict``.
1128 returns a ``ZstdCompressionDict``.
1129
1129
1130 The dictionary training mechanism is known as *cover*. More details about it are
1130 The dictionary training mechanism is known as *cover*. More details about it are
1131 available in the paper *Effective Construction of Relative Lempel-Ziv
1131 available in the paper *Effective Construction of Relative Lempel-Ziv
1132 Dictionaries* (authors: Liao, Petri, Moffat, Wirth).
1132 Dictionaries* (authors: Liao, Petri, Moffat, Wirth).
1133
1133
1134 The cover algorithm takes parameters ``k` and ``d``. These are the
1134 The cover algorithm takes parameters ``k` and ``d``. These are the
1135 *segment size* and *dmer size*, respectively. The returned dictionary
1135 *segment size* and *dmer size*, respectively. The returned dictionary
1136 instance created by this function has ``k`` and ``d`` attributes
1136 instance created by this function has ``k`` and ``d`` attributes
1137 containing the values for these parameters. If a ``ZstdCompressionDict``
1137 containing the values for these parameters. If a ``ZstdCompressionDict``
1138 is constructed from raw bytes data (a content-only dictionary), the
1138 is constructed from raw bytes data (a content-only dictionary), the
1139 ``k`` and ``d`` attributes will be ``0``.
1139 ``k`` and ``d`` attributes will be ``0``.
1140
1140
1141 The segment and dmer size parameters to the cover algorithm can either be
1141 The segment and dmer size parameters to the cover algorithm can either be
1142 specified manually or ``train_dictionary()`` can try multiple values
1142 specified manually or ``train_dictionary()`` can try multiple values
1143 and pick the best one, where *best* means the smallest compressed data size.
1143 and pick the best one, where *best* means the smallest compressed data size.
1144 This later mode is called *optimization* mode.
1144 This later mode is called *optimization* mode.
1145
1145
1146 If none of ``k``, ``d``, ``steps``, ``threads``, ``level``, ``notifications``,
1146 If none of ``k``, ``d``, ``steps``, ``threads``, ``level``, ``notifications``,
1147 or ``dict_id`` (basically anything from the underlying ``ZDICT_cover_params_t``
1147 or ``dict_id`` (basically anything from the underlying ``ZDICT_cover_params_t``
1148 struct) are defined, *optimization* mode is used with default parameter
1148 struct) are defined, *optimization* mode is used with default parameter
1149 values.
1149 values.
1150
1150
1151 If ``steps`` or ``threads`` are defined, then *optimization* mode is engaged
1151 If ``steps`` or ``threads`` are defined, then *optimization* mode is engaged
1152 with explicit control over those parameters. Specifying ``threads=0`` or
1152 with explicit control over those parameters. Specifying ``threads=0`` or
1153 ``threads=1`` can be used to engage *optimization* mode if other parameters
1153 ``threads=1`` can be used to engage *optimization* mode if other parameters
1154 are not defined.
1154 are not defined.
1155
1155
1156 Otherwise, non-*optimization* mode is used with the parameters specified.
1156 Otherwise, non-*optimization* mode is used with the parameters specified.
1157
1157
1158 This function takes the following arguments:
1158 This function takes the following arguments:
1159
1159
1160 dict_size
1160 dict_size
1161 Target size in bytes of the dictionary to generate.
1161 Target size in bytes of the dictionary to generate.
1162 samples
1162 samples
1163 A list of bytes holding samples the dictionary will be trained from.
1163 A list of bytes holding samples the dictionary will be trained from.
1164 k
1164 k
1165 Parameter to cover algorithm defining the segment size. A reasonable range
1165 Parameter to cover algorithm defining the segment size. A reasonable range
1166 is [16, 2048+].
1166 is [16, 2048+].
1167 d
1167 d
1168 Parameter to cover algorithm defining the dmer size. A reasonable range is
1168 Parameter to cover algorithm defining the dmer size. A reasonable range is
1169 [6, 16]. ``d`` must be less than or equal to ``k``.
1169 [6, 16]. ``d`` must be less than or equal to ``k``.
1170 dict_id
1170 dict_id
1171 Integer dictionary ID for the produced dictionary. Default is 0, which uses
1171 Integer dictionary ID for the produced dictionary. Default is 0, which uses
1172 a random value.
1172 a random value.
1173 steps
1173 steps
1174 Number of steps through ``k`` values to perform when trying parameter
1174 Number of steps through ``k`` values to perform when trying parameter
1175 variations.
1175 variations.
1176 threads
1176 threads
1177 Number of threads to use when trying parameter variations. Default is 0,
1177 Number of threads to use when trying parameter variations. Default is 0,
1178 which means to use a single thread. A negative value can be specified to
1178 which means to use a single thread. A negative value can be specified to
1179 use as many threads as there are detected logical CPUs.
1179 use as many threads as there are detected logical CPUs.
1180 level
1180 level
1181 Integer target compression level when trying parameter variations.
1181 Integer target compression level when trying parameter variations.
1182 notifications
1182 notifications
1183 Controls writing of informational messages to ``stderr``. ``0`` (the
1183 Controls writing of informational messages to ``stderr``. ``0`` (the
1184 default) means to write nothing. ``1`` writes errors. ``2`` writes
1184 default) means to write nothing. ``1`` writes errors. ``2`` writes
1185 progression info. ``3`` writes more details. And ``4`` writes all info.
1185 progression info. ``3`` writes more details. And ``4`` writes all info.
1186
1186
1187 Explicit Compression Parameters
1187 Explicit Compression Parameters
1188 -------------------------------
1188 -------------------------------
1189
1189
1190 Zstandard offers a high-level *compression level* that maps to lower-level
1190 Zstandard offers a high-level *compression level* that maps to lower-level
1191 compression parameters. For many consumers, this numeric level is the only
1191 compression parameters. For many consumers, this numeric level is the only
1192 compression setting you'll need to touch.
1192 compression setting you'll need to touch.
1193
1193
1194 But for advanced use cases, it might be desirable to tweak these lower-level
1194 But for advanced use cases, it might be desirable to tweak these lower-level
1195 settings.
1195 settings.
1196
1196
1197 The ``ZstdCompressionParameters`` type represents these low-level compression
1197 The ``ZstdCompressionParameters`` type represents these low-level compression
1198 settings.
1198 settings.
1199
1199
1200 Instances of this type can be constructed from a myriad of keyword arguments
1200 Instances of this type can be constructed from a myriad of keyword arguments
1201 (defined below) for complete low-level control over each adjustable
1201 (defined below) for complete low-level control over each adjustable
1202 compression setting.
1202 compression setting.
1203
1203
1204 From a higher level, one can construct a ``ZstdCompressionParameters`` instance
1204 From a higher level, one can construct a ``ZstdCompressionParameters`` instance
1205 given a desired compression level and target input and dictionary size
1205 given a desired compression level and target input and dictionary size
1206 using ``ZstdCompressionParameters.from_level()``. e.g.::
1206 using ``ZstdCompressionParameters.from_level()``. e.g.::
1207
1207
1208 # Derive compression settings for compression level 7.
1208 # Derive compression settings for compression level 7.
1209 params = zstd.ZstdCompressionParameters.from_level(7)
1209 params = zstd.ZstdCompressionParameters.from_level(7)
1210
1210
1211 # With an input size of 1MB
1211 # With an input size of 1MB
1212 params = zstd.ZstdCompressionParameters.from_level(7, source_size=1048576)
1212 params = zstd.ZstdCompressionParameters.from_level(7, source_size=1048576)
1213
1213
1214 Using ``from_level()``, it is also possible to override individual compression
1214 Using ``from_level()``, it is also possible to override individual compression
1215 parameters or to define additional settings that aren't automatically derived.
1215 parameters or to define additional settings that aren't automatically derived.
1216 e.g.::
1216 e.g.::
1217
1217
1218 params = zstd.ZstdCompressionParameters.from_level(4, window_log=10)
1218 params = zstd.ZstdCompressionParameters.from_level(4, window_log=10)
1219 params = zstd.ZstdCompressionParameters.from_level(5, threads=4)
1219 params = zstd.ZstdCompressionParameters.from_level(5, threads=4)
1220
1220
1221 Or you can define low-level compression settings directly::
1221 Or you can define low-level compression settings directly::
1222
1222
1223 params = zstd.ZstdCompressionParameters(window_log=12, enable_ldm=True)
1223 params = zstd.ZstdCompressionParameters(window_log=12, enable_ldm=True)
1224
1224
1225 Once a ``ZstdCompressionParameters`` instance is obtained, it can be used to
1225 Once a ``ZstdCompressionParameters`` instance is obtained, it can be used to
1226 configure a compressor::
1226 configure a compressor::
1227
1227
1228 cctx = zstd.ZstdCompressor(compression_params=params)
1228 cctx = zstd.ZstdCompressor(compression_params=params)
1229
1229
1230 The named arguments and attributes of ``ZstdCompressionParameters`` are as
1230 The named arguments and attributes of ``ZstdCompressionParameters`` are as
1231 follows:
1231 follows:
1232
1232
1233 * format
1233 * format
1234 * compression_level
1234 * compression_level
1235 * window_log
1235 * window_log
1236 * hash_log
1236 * hash_log
1237 * chain_log
1237 * chain_log
1238 * search_log
1238 * search_log
1239 * min_match
1239 * min_match
1240 * target_length
1240 * target_length
1241 * strategy
1241 * strategy
1242 * compression_strategy (deprecated: same as ``strategy``)
1242 * compression_strategy (deprecated: same as ``strategy``)
1243 * write_content_size
1243 * write_content_size
1244 * write_checksum
1244 * write_checksum
1245 * write_dict_id
1245 * write_dict_id
1246 * job_size
1246 * job_size
1247 * overlap_log
1247 * overlap_log
1248 * overlap_size_log (deprecated: same as ``overlap_log``)
1248 * overlap_size_log (deprecated: same as ``overlap_log``)
1249 * force_max_window
1249 * force_max_window
1250 * enable_ldm
1250 * enable_ldm
1251 * ldm_hash_log
1251 * ldm_hash_log
1252 * ldm_min_match
1252 * ldm_min_match
1253 * ldm_bucket_size_log
1253 * ldm_bucket_size_log
1254 * ldm_hash_rate_log
1254 * ldm_hash_rate_log
1255 * ldm_hash_every_log (deprecated: same as ``ldm_hash_rate_log``)
1255 * ldm_hash_every_log (deprecated: same as ``ldm_hash_rate_log``)
1256 * threads
1256 * threads
1257
1257
1258 Some of these are very low-level settings. It may help to consult the official
1258 Some of these are very low-level settings. It may help to consult the official
1259 zstandard documentation for their behavior. Look for the ``ZSTD_p_*`` constants
1259 zstandard documentation for their behavior. Look for the ``ZSTD_p_*`` constants
1260 in ``zstd.h`` (https://github.com/facebook/zstd/blob/dev/lib/zstd.h).
1260 in ``zstd.h`` (https://github.com/facebook/zstd/blob/dev/lib/zstd.h).
1261
1261
1262 Frame Inspection
1262 Frame Inspection
1263 ----------------
1263 ----------------
1264
1264
1265 Data emitted from zstd compression is encapsulated in a *frame*. This frame
1265 Data emitted from zstd compression is encapsulated in a *frame*. This frame
1266 begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing
1266 begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing
1267 the frame in more detail. For more info, see
1267 the frame in more detail. For more info, see
1268 https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md.
1268 https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md.
1269
1269
1270 ``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes
1270 ``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes
1271 instance and return a ``FrameParameters`` object describing the frame.
1271 instance and return a ``FrameParameters`` object describing the frame.
1272
1272
1273 Depending on which fields are present in the frame and their values, the
1273 Depending on which fields are present in the frame and their values, the
1274 length of the frame parameters varies. If insufficient bytes are passed
1274 length of the frame parameters varies. If insufficient bytes are passed
1275 in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure
1275 in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure
1276 frame parameters can be parsed, pass in at least 18 bytes.
1276 frame parameters can be parsed, pass in at least 18 bytes.
1277
1277
1278 ``FrameParameters`` instances have the following attributes:
1278 ``FrameParameters`` instances have the following attributes:
1279
1279
1280 content_size
1280 content_size
1281 Integer size of original, uncompressed content. This will be ``0`` if the
1281 Integer size of original, uncompressed content. This will be ``0`` if the
1282 original content size isn't written to the frame (controlled with the
1282 original content size isn't written to the frame (controlled with the
1283 ``write_content_size`` argument to ``ZstdCompressor``) or if the input
1283 ``write_content_size`` argument to ``ZstdCompressor``) or if the input
1284 content size was ``0``.
1284 content size was ``0``.
1285
1285
1286 window_size
1286 window_size
1287 Integer size of maximum back-reference distance in compressed data.
1287 Integer size of maximum back-reference distance in compressed data.
1288
1288
1289 dict_id
1289 dict_id
1290 Integer of dictionary ID used for compression. ``0`` if no dictionary
1290 Integer of dictionary ID used for compression. ``0`` if no dictionary
1291 ID was used or if the dictionary ID was ``0``.
1291 ID was used or if the dictionary ID was ``0``.
1292
1292
1293 has_checksum
1293 has_checksum
1294 Bool indicating whether a 4 byte content checksum is stored at the end
1294 Bool indicating whether a 4 byte content checksum is stored at the end
1295 of the frame.
1295 of the frame.
1296
1296
1297 ``zstd.frame_header_size(data)`` returns the size of the zstandard frame
1297 ``zstd.frame_header_size(data)`` returns the size of the zstandard frame
1298 header.
1298 header.
1299
1299
1300 ``zstd.frame_content_size(data)`` returns the content size as parsed from
1300 ``zstd.frame_content_size(data)`` returns the content size as parsed from
1301 the frame header. ``-1`` means the content size is unknown. ``0`` means
1301 the frame header. ``-1`` means the content size is unknown. ``0`` means
1302 an empty frame. The content size is usually correct. However, it may not
1302 an empty frame. The content size is usually correct. However, it may not
1303 be accurate.
1303 be accurate.
1304
1304
1305 Misc Functionality
1305 Misc Functionality
1306 ------------------
1306 ------------------
1307
1307
1308 estimate_decompression_context_size()
1308 estimate_decompression_context_size()
1309 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1309 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1310
1310
1311 Estimate the memory size requirements for a decompressor instance.
1311 Estimate the memory size requirements for a decompressor instance.
1312
1312
1313 Constants
1313 Constants
1314 ---------
1314 ---------
1315
1315
1316 The following module constants/attributes are exposed:
1316 The following module constants/attributes are exposed:
1317
1317
1318 ZSTD_VERSION
1318 ZSTD_VERSION
1319 This module attribute exposes a 3-tuple of the Zstandard version. e.g.
1319 This module attribute exposes a 3-tuple of the Zstandard version. e.g.
1320 ``(1, 0, 0)``
1320 ``(1, 0, 0)``
1321 MAX_COMPRESSION_LEVEL
1321 MAX_COMPRESSION_LEVEL
1322 Integer max compression level accepted by compression functions
1322 Integer max compression level accepted by compression functions
1323 COMPRESSION_RECOMMENDED_INPUT_SIZE
1323 COMPRESSION_RECOMMENDED_INPUT_SIZE
1324 Recommended chunk size to feed to compressor functions
1324 Recommended chunk size to feed to compressor functions
1325 COMPRESSION_RECOMMENDED_OUTPUT_SIZE
1325 COMPRESSION_RECOMMENDED_OUTPUT_SIZE
1326 Recommended chunk size for compression output
1326 Recommended chunk size for compression output
1327 DECOMPRESSION_RECOMMENDED_INPUT_SIZE
1327 DECOMPRESSION_RECOMMENDED_INPUT_SIZE
1328 Recommended chunk size to feed into decompresor functions
1328 Recommended chunk size to feed into decompresor functions
1329 DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE
1329 DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE
1330 Recommended chunk size for decompression output
1330 Recommended chunk size for decompression output
1331
1331
1332 FRAME_HEADER
1332 FRAME_HEADER
1333 bytes containing header of the Zstandard frame
1333 bytes containing header of the Zstandard frame
1334 MAGIC_NUMBER
1334 MAGIC_NUMBER
1335 Frame header as an integer
1335 Frame header as an integer
1336
1336
1337 FLUSH_BLOCK
1337 FLUSH_BLOCK
1338 Flushing behavior that denotes to flush a zstd block. A decompressor will
1338 Flushing behavior that denotes to flush a zstd block. A decompressor will
1339 be able to decode all data fed into the compressor so far.
1339 be able to decode all data fed into the compressor so far.
1340 FLUSH_FRAME
1340 FLUSH_FRAME
1341 Flushing behavior that denotes to end a zstd frame. Any new data fed
1341 Flushing behavior that denotes to end a zstd frame. Any new data fed
1342 to the compressor will start a new frame.
1342 to the compressor will start a new frame.
1343
1343
1344 CONTENTSIZE_UNKNOWN
1344 CONTENTSIZE_UNKNOWN
1345 Value for content size when the content size is unknown.
1345 Value for content size when the content size is unknown.
1346 CONTENTSIZE_ERROR
1346 CONTENTSIZE_ERROR
1347 Value for content size when content size couldn't be determined.
1347 Value for content size when content size couldn't be determined.
1348
1348
1349 WINDOWLOG_MIN
1349 WINDOWLOG_MIN
1350 Minimum value for compression parameter
1350 Minimum value for compression parameter
1351 WINDOWLOG_MAX
1351 WINDOWLOG_MAX
1352 Maximum value for compression parameter
1352 Maximum value for compression parameter
1353 CHAINLOG_MIN
1353 CHAINLOG_MIN
1354 Minimum value for compression parameter
1354 Minimum value for compression parameter
1355 CHAINLOG_MAX
1355 CHAINLOG_MAX
1356 Maximum value for compression parameter
1356 Maximum value for compression parameter
1357 HASHLOG_MIN
1357 HASHLOG_MIN
1358 Minimum value for compression parameter
1358 Minimum value for compression parameter
1359 HASHLOG_MAX
1359 HASHLOG_MAX
1360 Maximum value for compression parameter
1360 Maximum value for compression parameter
1361 SEARCHLOG_MIN
1361 SEARCHLOG_MIN
1362 Minimum value for compression parameter
1362 Minimum value for compression parameter
1363 SEARCHLOG_MAX
1363 SEARCHLOG_MAX
1364 Maximum value for compression parameter
1364 Maximum value for compression parameter
1365 MINMATCH_MIN
1365 MINMATCH_MIN
1366 Minimum value for compression parameter
1366 Minimum value for compression parameter
1367 MINMATCH_MAX
1367 MINMATCH_MAX
1368 Maximum value for compression parameter
1368 Maximum value for compression parameter
1369 SEARCHLENGTH_MIN
1369 SEARCHLENGTH_MIN
1370 Minimum value for compression parameter
1370 Minimum value for compression parameter
1371
1371
1372 Deprecated: use ``MINMATCH_MIN``
1372 Deprecated: use ``MINMATCH_MIN``
1373 SEARCHLENGTH_MAX
1373 SEARCHLENGTH_MAX
1374 Maximum value for compression parameter
1374 Maximum value for compression parameter
1375
1375
1376 Deprecated: use ``MINMATCH_MAX``
1376 Deprecated: use ``MINMATCH_MAX``
1377 TARGETLENGTH_MIN
1377 TARGETLENGTH_MIN
1378 Minimum value for compression parameter
1378 Minimum value for compression parameter
1379 STRATEGY_FAST
1379 STRATEGY_FAST
1380 Compression strategy
1380 Compression strategy
1381 STRATEGY_DFAST
1381 STRATEGY_DFAST
1382 Compression strategy
1382 Compression strategy
1383 STRATEGY_GREEDY
1383 STRATEGY_GREEDY
1384 Compression strategy
1384 Compression strategy
1385 STRATEGY_LAZY
1385 STRATEGY_LAZY
1386 Compression strategy
1386 Compression strategy
1387 STRATEGY_LAZY2
1387 STRATEGY_LAZY2
1388 Compression strategy
1388 Compression strategy
1389 STRATEGY_BTLAZY2
1389 STRATEGY_BTLAZY2
1390 Compression strategy
1390 Compression strategy
1391 STRATEGY_BTOPT
1391 STRATEGY_BTOPT
1392 Compression strategy
1392 Compression strategy
1393 STRATEGY_BTULTRA
1393 STRATEGY_BTULTRA
1394 Compression strategy
1394 Compression strategy
1395 STRATEGY_BTULTRA2
1395 STRATEGY_BTULTRA2
1396 Compression strategy
1396 Compression strategy
1397
1397
1398 FORMAT_ZSTD1
1398 FORMAT_ZSTD1
1399 Zstandard frame format
1399 Zstandard frame format
1400 FORMAT_ZSTD1_MAGICLESS
1400 FORMAT_ZSTD1_MAGICLESS
1401 Zstandard frame format without magic header
1401 Zstandard frame format without magic header
1402
1402
1403 Performance Considerations
1403 Performance Considerations
1404 --------------------------
1404 --------------------------
1405
1405
1406 The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a
1406 The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a
1407 persistent compression or decompression *context*. Reusing a ``ZstdCompressor``
1407 persistent compression or decompression *context*. Reusing a ``ZstdCompressor``
1408 or ``ZstdDecompressor`` instance for multiple operations is faster than
1408 or ``ZstdDecompressor`` instance for multiple operations is faster than
1409 instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each
1409 instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each
1410 operation. The differences are magnified as the size of data decreases. For
1410 operation. The differences are magnified as the size of data decreases. For
1411 example, the difference between *context* reuse and non-reuse for 100,000
1411 example, the difference between *context* reuse and non-reuse for 100,000
1412 100 byte inputs will be significant (possiby over 10x faster to reuse contexts)
1412 100 byte inputs will be significant (possiby over 10x faster to reuse contexts)
1413 whereas 10 100,000,000 byte inputs will be more similar in speed (because the
1413 whereas 10 100,000,000 byte inputs will be more similar in speed (because the
1414 time spent doing compression dwarfs time spent creating new *contexts*).
1414 time spent doing compression dwarfs time spent creating new *contexts*).
1415
1415
1416 Buffer Types
1416 Buffer Types
1417 ------------
1417 ------------
1418
1418
1419 The API exposes a handful of custom types for interfacing with memory buffers.
1419 The API exposes a handful of custom types for interfacing with memory buffers.
1420 The primary goal of these types is to facilitate efficient multi-object
1420 The primary goal of these types is to facilitate efficient multi-object
1421 operations.
1421 operations.
1422
1422
1423 The essential idea is to have a single memory allocation provide backing
1423 The essential idea is to have a single memory allocation provide backing
1424 storage for multiple logical objects. This has 2 main advantages: fewer
1424 storage for multiple logical objects. This has 2 main advantages: fewer
1425 allocations and optimal memory access patterns. This avoids having to allocate
1425 allocations and optimal memory access patterns. This avoids having to allocate
1426 a Python object for each logical object and furthermore ensures that access of
1426 a Python object for each logical object and furthermore ensures that access of
1427 data for objects can be sequential (read: fast) in memory.
1427 data for objects can be sequential (read: fast) in memory.
1428
1428
1429 BufferWithSegments
1429 BufferWithSegments
1430 ^^^^^^^^^^^^^^^^^^
1430 ^^^^^^^^^^^^^^^^^^
1431
1431
1432 The ``BufferWithSegments`` type represents a memory buffer containing N
1432 The ``BufferWithSegments`` type represents a memory buffer containing N
1433 discrete items of known lengths (segments). It is essentially a fixed size
1433 discrete items of known lengths (segments). It is essentially a fixed size
1434 memory address and an array of 2-tuples of ``(offset, length)`` 64-bit
1434 memory address and an array of 2-tuples of ``(offset, length)`` 64-bit
1435 unsigned native endian integers defining the byte offset and length of each
1435 unsigned native endian integers defining the byte offset and length of each
1436 segment within the buffer.
1436 segment within the buffer.
1437
1437
1438 Instances behave like containers.
1438 Instances behave like containers.
1439
1439
1440 ``len()`` returns the number of segments within the instance.
1440 ``len()`` returns the number of segments within the instance.
1441
1441
1442 ``o[index]`` or ``__getitem__`` obtains a ``BufferSegment`` representing an
1442 ``o[index]`` or ``__getitem__`` obtains a ``BufferSegment`` representing an
1443 individual segment within the backing buffer. That returned object references
1443 individual segment within the backing buffer. That returned object references
1444 (not copies) memory. This means that iterating all objects doesn't copy
1444 (not copies) memory. This means that iterating all objects doesn't copy
1445 data within the buffer.
1445 data within the buffer.
1446
1446
1447 The ``.size`` attribute contains the total size in bytes of the backing
1447 The ``.size`` attribute contains the total size in bytes of the backing
1448 buffer.
1448 buffer.
1449
1449
1450 Instances conform to the buffer protocol. So a reference to the backing bytes
1450 Instances conform to the buffer protocol. So a reference to the backing bytes
1451 can be obtained via ``memoryview(o)``. A *copy* of the backing bytes can also
1451 can be obtained via ``memoryview(o)``. A *copy* of the backing bytes can also
1452 be obtained via ``.tobytes()``.
1452 be obtained via ``.tobytes()``.
1453
1453
1454 The ``.segments`` attribute exposes the array of ``(offset, length)`` for
1454 The ``.segments`` attribute exposes the array of ``(offset, length)`` for
1455 segments within the buffer. It is a ``BufferSegments`` type.
1455 segments within the buffer. It is a ``BufferSegments`` type.
1456
1456
1457 BufferSegment
1457 BufferSegment
1458 ^^^^^^^^^^^^^
1458 ^^^^^^^^^^^^^
1459
1459
1460 The ``BufferSegment`` type represents a segment within a ``BufferWithSegments``.
1460 The ``BufferSegment`` type represents a segment within a ``BufferWithSegments``.
1461 It is essentially a reference to N bytes within a ``BufferWithSegments``.
1461 It is essentially a reference to N bytes within a ``BufferWithSegments``.
1462
1462
1463 ``len()`` returns the length of the segment in bytes.
1463 ``len()`` returns the length of the segment in bytes.
1464
1464
1465 ``.offset`` contains the byte offset of this segment within its parent
1465 ``.offset`` contains the byte offset of this segment within its parent
1466 ``BufferWithSegments`` instance.
1466 ``BufferWithSegments`` instance.
1467
1467
1468 The object conforms to the buffer protocol. ``.tobytes()`` can be called to
1468 The object conforms to the buffer protocol. ``.tobytes()`` can be called to
1469 obtain a ``bytes`` instance with a copy of the backing bytes.
1469 obtain a ``bytes`` instance with a copy of the backing bytes.
1470
1470
1471 BufferSegments
1471 BufferSegments
1472 ^^^^^^^^^^^^^^
1472 ^^^^^^^^^^^^^^
1473
1473
1474 This type represents an array of ``(offset, length)`` integers defining segments
1474 This type represents an array of ``(offset, length)`` integers defining segments
1475 within a ``BufferWithSegments``.
1475 within a ``BufferWithSegments``.
1476
1476
1477 The array members are 64-bit unsigned integers using host/native bit order.
1477 The array members are 64-bit unsigned integers using host/native bit order.
1478
1478
1479 Instances conform to the buffer protocol.
1479 Instances conform to the buffer protocol.
1480
1480
1481 BufferWithSegmentsCollection
1481 BufferWithSegmentsCollection
1482 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1482 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1483
1483
1484 The ``BufferWithSegmentsCollection`` type represents a virtual spanning view
1484 The ``BufferWithSegmentsCollection`` type represents a virtual spanning view
1485 of multiple ``BufferWithSegments`` instances.
1485 of multiple ``BufferWithSegments`` instances.
1486
1486
1487 Instances are constructed from 1 or more ``BufferWithSegments`` instances. The
1487 Instances are constructed from 1 or more ``BufferWithSegments`` instances. The
1488 resulting object behaves like an ordered sequence whose members are the
1488 resulting object behaves like an ordered sequence whose members are the
1489 segments within each ``BufferWithSegments``.
1489 segments within each ``BufferWithSegments``.
1490
1490
1491 ``len()`` returns the number of segments within all ``BufferWithSegments``
1491 ``len()`` returns the number of segments within all ``BufferWithSegments``
1492 instances.
1492 instances.
1493
1493
1494 ``o[index]`` and ``__getitem__(index)`` return the ``BufferSegment`` at
1494 ``o[index]`` and ``__getitem__(index)`` return the ``BufferSegment`` at
1495 that offset as if all ``BufferWithSegments`` instances were a single
1495 that offset as if all ``BufferWithSegments`` instances were a single
1496 entity.
1496 entity.
1497
1497
1498 If the object is composed of 2 ``BufferWithSegments`` instances with the
1498 If the object is composed of 2 ``BufferWithSegments`` instances with the
1499 first having 2 segments and the second have 3 segments, then ``b[0]``
1499 first having 2 segments and the second have 3 segments, then ``b[0]``
1500 and ``b[1]`` access segments in the first object and ``b[2]``, ``b[3]``,
1500 and ``b[1]`` access segments in the first object and ``b[2]``, ``b[3]``,
1501 and ``b[4]`` access segments from the second.
1501 and ``b[4]`` access segments from the second.
1502
1502
1503 Choosing an API
1503 Choosing an API
1504 ===============
1504 ===============
1505
1505
1506 There are multiple APIs for performing compression and decompression. This is
1506 There are multiple APIs for performing compression and decompression. This is
1507 because different applications have different needs and the library wants to
1507 because different applications have different needs and the library wants to
1508 facilitate optimal use in as many use cases as possible.
1508 facilitate optimal use in as many use cases as possible.
1509
1509
1510 From a high-level, APIs are divided into *one-shot* and *streaming*: either you
1510 From a high-level, APIs are divided into *one-shot* and *streaming*: either you
1511 are operating on all data at once or you operate on it piecemeal.
1511 are operating on all data at once or you operate on it piecemeal.
1512
1512
1513 The *one-shot* APIs are useful for small data, where the input or output
1513 The *one-shot* APIs are useful for small data, where the input or output
1514 size is known. (The size can come from a buffer length, file size, or
1514 size is known. (The size can come from a buffer length, file size, or
1515 stored in the zstd frame header.) A limitation of the *one-shot* APIs is that
1515 stored in the zstd frame header.) A limitation of the *one-shot* APIs is that
1516 input and output must fit in memory simultaneously. For say a 4 GB input,
1516 input and output must fit in memory simultaneously. For say a 4 GB input,
1517 this is often not feasible.
1517 this is often not feasible.
1518
1518
1519 The *one-shot* APIs also perform all work as a single operation. So, if you
1519 The *one-shot* APIs also perform all work as a single operation. So, if you
1520 feed it large input, it could take a long time for the function to return.
1520 feed it large input, it could take a long time for the function to return.
1521
1521
1522 The streaming APIs do not have the limitations of the simple API. But the
1522 The streaming APIs do not have the limitations of the simple API. But the
1523 price you pay for this flexibility is that they are more complex than a
1523 price you pay for this flexibility is that they are more complex than a
1524 single function call.
1524 single function call.
1525
1525
1526 The streaming APIs put the caller in control of compression and decompression
1526 The streaming APIs put the caller in control of compression and decompression
1527 behavior by allowing them to directly control either the input or output side
1527 behavior by allowing them to directly control either the input or output side
1528 of the operation.
1528 of the operation.
1529
1529
1530 With the *streaming input*, *compressor*, and *decompressor* APIs, the caller
1530 With the *streaming input*, *compressor*, and *decompressor* APIs, the caller
1531 has full control over the input to the compression or decompression stream.
1531 has full control over the input to the compression or decompression stream.
1532 They can directly choose when new data is operated on.
1532 They can directly choose when new data is operated on.
1533
1533
1534 With the *streaming ouput* APIs, the caller has full control over the output
1534 With the *streaming ouput* APIs, the caller has full control over the output
1535 of the compression or decompression stream. It can choose when to receive
1535 of the compression or decompression stream. It can choose when to receive
1536 new data.
1536 new data.
1537
1537
1538 When using the *streaming* APIs that operate on file-like or stream objects,
1538 When using the *streaming* APIs that operate on file-like or stream objects,
1539 it is important to consider what happens in that object when I/O is requested.
1539 it is important to consider what happens in that object when I/O is requested.
1540 There is potential for long pauses as data is read or written from the
1540 There is potential for long pauses as data is read or written from the
1541 underlying stream (say from interacting with a filesystem or network). This
1541 underlying stream (say from interacting with a filesystem or network). This
1542 could add considerable overhead.
1542 could add considerable overhead.
1543
1543
1544 Thread Safety
1544 Thread Safety
1545 =============
1545 =============
1546
1546
1547 ``ZstdCompressor`` and ``ZstdDecompressor`` instances have no guarantees
1547 ``ZstdCompressor`` and ``ZstdDecompressor`` instances have no guarantees
1548 about thread safety. Do not operate on the same ``ZstdCompressor`` and
1548 about thread safety. Do not operate on the same ``ZstdCompressor`` and
1549 ``ZstdDecompressor`` instance simultaneously from different threads. It is
1549 ``ZstdDecompressor`` instance simultaneously from different threads. It is
1550 fine to have different threads call into a single instance, just not at the
1550 fine to have different threads call into a single instance, just not at the
1551 same time.
1551 same time.
1552
1552
1553 Some operations require multiple function calls to complete. e.g. streaming
1553 Some operations require multiple function calls to complete. e.g. streaming
1554 operations. A single ``ZstdCompressor`` or ``ZstdDecompressor`` cannot be used
1554 operations. A single ``ZstdCompressor`` or ``ZstdDecompressor`` cannot be used
1555 for simultaneously active operations. e.g. you must not start a streaming
1555 for simultaneously active operations. e.g. you must not start a streaming
1556 operation when another streaming operation is already active.
1556 operation when another streaming operation is already active.
1557
1557
1558 The C extension releases the GIL during non-trivial calls into the zstd C
1558 The C extension releases the GIL during non-trivial calls into the zstd C
1559 API. Non-trivial calls are notably compression and decompression. Trivial
1559 API. Non-trivial calls are notably compression and decompression. Trivial
1560 calls are things like parsing frame parameters. Where the GIL is released
1560 calls are things like parsing frame parameters. Where the GIL is released
1561 is considered an implementation detail and can change in any release.
1561 is considered an implementation detail and can change in any release.
1562
1562
1563 APIs that accept bytes-like objects don't enforce that the underlying object
1563 APIs that accept bytes-like objects don't enforce that the underlying object
1564 is read-only. However, it is assumed that the passed object is read-only for
1564 is read-only. However, it is assumed that the passed object is read-only for
1565 the duration of the function call. It is possible to pass a mutable object
1565 the duration of the function call. It is possible to pass a mutable object
1566 (like a ``bytearray``) to e.g. ``ZstdCompressor.compress()``, have the GIL
1566 (like a ``bytearray``) to e.g. ``ZstdCompressor.compress()``, have the GIL
1567 released, and mutate the object from another thread. Such a race condition
1567 released, and mutate the object from another thread. Such a race condition
1568 is a bug in the consumer of python-zstandard. Most Python data types are
1568 is a bug in the consumer of python-zstandard. Most Python data types are
1569 immutable, so unless you are doing something fancy, you don't need to
1569 immutable, so unless you are doing something fancy, you don't need to
1570 worry about this.
1570 worry about this.
1571
1571
1572 Note on Zstandard's *Experimental* API
1572 Note on Zstandard's *Experimental* API
1573 ======================================
1573 ======================================
1574
1574
1575 Many of the Zstandard APIs used by this module are marked as *experimental*
1575 Many of the Zstandard APIs used by this module are marked as *experimental*
1576 within the Zstandard project.
1576 within the Zstandard project.
1577
1577
1578 It is unclear how Zstandard's C API will evolve over time, especially with
1578 It is unclear how Zstandard's C API will evolve over time, especially with
1579 regards to this *experimental* functionality. We will try to maintain
1579 regards to this *experimental* functionality. We will try to maintain
1580 backwards compatibility at the Python API level. However, we cannot
1580 backwards compatibility at the Python API level. However, we cannot
1581 guarantee this for things not under our control.
1581 guarantee this for things not under our control.
1582
1582
1583 Since a copy of the Zstandard source code is distributed with this
1583 Since a copy of the Zstandard source code is distributed with this
1584 module and since we compile against it, the behavior of a specific
1584 module and since we compile against it, the behavior of a specific
1585 version of this module should be constant for all of time. So if you
1585 version of this module should be constant for all of time. So if you
1586 pin the version of this module used in your projects (which is a Python
1586 pin the version of this module used in your projects (which is a Python
1587 best practice), you should be shielded from unwanted future changes.
1587 best practice), you should be shielded from unwanted future changes.
1588
1588
1589 Donate
1589 Donate
1590 ======
1590 ======
1591
1591
1592 A lot of time has been invested into this project by the author.
1592 A lot of time has been invested into this project by the author.
1593
1593
1594 If you find this project useful and would like to thank the author for
1594 If you find this project useful and would like to thank the author for
1595 their work, consider donating some money. Any amount is appreciated.
1595 their work, consider donating some money. Any amount is appreciated.
1596
1596
1597 .. image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif
1597 .. image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif
1598 :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard&currency_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted
1598 :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard&currency_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted
1599 :alt: Donate via PayPal
1599 :alt: Donate via PayPal
1600
1600
1601 .. |ci-status| image:: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master
1601 .. |ci-status| image:: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master
1602 :target: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master
1602 :target: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master
@@ -1,359 +1,359 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #define PY_SSIZE_T_CLEAN
9 #define PY_SSIZE_T_CLEAN
10 #include <Python.h>
10 #include <Python.h>
11 #include "structmember.h"
11 #include "structmember.h"
12
12
13 #define ZSTD_STATIC_LINKING_ONLY
13 #define ZSTD_STATIC_LINKING_ONLY
14 #define ZDICT_STATIC_LINKING_ONLY
14 #define ZDICT_STATIC_LINKING_ONLY
15 #include <zstd.h>
15 #include <zstd.h>
16 #include <zdict.h>
16 #include <zdict.h>
17
17
18 /* Remember to change the string in zstandard/__init__ as well */
18 /* Remember to change the string in zstandard/__init__ as well */
19 #define PYTHON_ZSTANDARD_VERSION "0.12.0"
19 #define PYTHON_ZSTANDARD_VERSION "0.13.0"
20
20
21 typedef enum {
21 typedef enum {
22 compressorobj_flush_finish,
22 compressorobj_flush_finish,
23 compressorobj_flush_block,
23 compressorobj_flush_block,
24 } CompressorObj_Flush;
24 } CompressorObj_Flush;
25
25
26 /*
26 /*
27 Represents a ZstdCompressionParameters type.
27 Represents a ZstdCompressionParameters type.
28
28
29 This type holds all the low-level compression parameters that can be set.
29 This type holds all the low-level compression parameters that can be set.
30 */
30 */
31 typedef struct {
31 typedef struct {
32 PyObject_HEAD
32 PyObject_HEAD
33 ZSTD_CCtx_params* params;
33 ZSTD_CCtx_params* params;
34 } ZstdCompressionParametersObject;
34 } ZstdCompressionParametersObject;
35
35
36 extern PyTypeObject ZstdCompressionParametersType;
36 extern PyTypeObject ZstdCompressionParametersType;
37
37
38 /*
38 /*
39 Represents a FrameParameters type.
39 Represents a FrameParameters type.
40
40
41 This type is basically a wrapper around ZSTD_frameParams.
41 This type is basically a wrapper around ZSTD_frameParams.
42 */
42 */
43 typedef struct {
43 typedef struct {
44 PyObject_HEAD
44 PyObject_HEAD
45 unsigned long long frameContentSize;
45 unsigned long long frameContentSize;
46 unsigned long long windowSize;
46 unsigned long long windowSize;
47 unsigned dictID;
47 unsigned dictID;
48 char checksumFlag;
48 char checksumFlag;
49 } FrameParametersObject;
49 } FrameParametersObject;
50
50
51 extern PyTypeObject FrameParametersType;
51 extern PyTypeObject FrameParametersType;
52
52
53 /*
53 /*
54 Represents a ZstdCompressionDict type.
54 Represents a ZstdCompressionDict type.
55
55
56 Instances hold data used for a zstd compression dictionary.
56 Instances hold data used for a zstd compression dictionary.
57 */
57 */
58 typedef struct {
58 typedef struct {
59 PyObject_HEAD
59 PyObject_HEAD
60
60
61 /* Pointer to dictionary data. Owned by self. */
61 /* Pointer to dictionary data. Owned by self. */
62 void* dictData;
62 void* dictData;
63 /* Size of dictionary data. */
63 /* Size of dictionary data. */
64 size_t dictSize;
64 size_t dictSize;
65 ZSTD_dictContentType_e dictType;
65 ZSTD_dictContentType_e dictType;
66 /* k parameter for cover dictionaries. Only populated by train_cover_dict(). */
66 /* k parameter for cover dictionaries. Only populated by train_cover_dict(). */
67 unsigned k;
67 unsigned k;
68 /* d parameter for cover dictionaries. Only populated by train_cover_dict(). */
68 /* d parameter for cover dictionaries. Only populated by train_cover_dict(). */
69 unsigned d;
69 unsigned d;
70 /* Digested dictionary, suitable for reuse. */
70 /* Digested dictionary, suitable for reuse. */
71 ZSTD_CDict* cdict;
71 ZSTD_CDict* cdict;
72 ZSTD_DDict* ddict;
72 ZSTD_DDict* ddict;
73 } ZstdCompressionDict;
73 } ZstdCompressionDict;
74
74
75 extern PyTypeObject ZstdCompressionDictType;
75 extern PyTypeObject ZstdCompressionDictType;
76
76
77 /*
77 /*
78 Represents a ZstdCompressor type.
78 Represents a ZstdCompressor type.
79 */
79 */
80 typedef struct {
80 typedef struct {
81 PyObject_HEAD
81 PyObject_HEAD
82
82
83 /* Number of threads to use for operations. */
83 /* Number of threads to use for operations. */
84 unsigned int threads;
84 unsigned int threads;
85 /* Pointer to compression dictionary to use. NULL if not using dictionary
85 /* Pointer to compression dictionary to use. NULL if not using dictionary
86 compression. */
86 compression. */
87 ZstdCompressionDict* dict;
87 ZstdCompressionDict* dict;
88 /* Compression context to use. Populated during object construction. */
88 /* Compression context to use. Populated during object construction. */
89 ZSTD_CCtx* cctx;
89 ZSTD_CCtx* cctx;
90 /* Compression parameters in use. */
90 /* Compression parameters in use. */
91 ZSTD_CCtx_params* params;
91 ZSTD_CCtx_params* params;
92 } ZstdCompressor;
92 } ZstdCompressor;
93
93
94 extern PyTypeObject ZstdCompressorType;
94 extern PyTypeObject ZstdCompressorType;
95
95
96 typedef struct {
96 typedef struct {
97 PyObject_HEAD
97 PyObject_HEAD
98
98
99 ZstdCompressor* compressor;
99 ZstdCompressor* compressor;
100 ZSTD_outBuffer output;
100 ZSTD_outBuffer output;
101 int finished;
101 int finished;
102 } ZstdCompressionObj;
102 } ZstdCompressionObj;
103
103
104 extern PyTypeObject ZstdCompressionObjType;
104 extern PyTypeObject ZstdCompressionObjType;
105
105
106 typedef struct {
106 typedef struct {
107 PyObject_HEAD
107 PyObject_HEAD
108
108
109 ZstdCompressor* compressor;
109 ZstdCompressor* compressor;
110 PyObject* writer;
110 PyObject* writer;
111 ZSTD_outBuffer output;
111 ZSTD_outBuffer output;
112 size_t outSize;
112 size_t outSize;
113 int entered;
113 int entered;
114 int closed;
114 int closed;
115 int writeReturnRead;
115 int writeReturnRead;
116 unsigned long long bytesCompressed;
116 unsigned long long bytesCompressed;
117 } ZstdCompressionWriter;
117 } ZstdCompressionWriter;
118
118
119 extern PyTypeObject ZstdCompressionWriterType;
119 extern PyTypeObject ZstdCompressionWriterType;
120
120
121 typedef struct {
121 typedef struct {
122 PyObject_HEAD
122 PyObject_HEAD
123
123
124 ZstdCompressor* compressor;
124 ZstdCompressor* compressor;
125 PyObject* reader;
125 PyObject* reader;
126 Py_buffer buffer;
126 Py_buffer buffer;
127 Py_ssize_t bufferOffset;
127 Py_ssize_t bufferOffset;
128 size_t inSize;
128 size_t inSize;
129 size_t outSize;
129 size_t outSize;
130
130
131 ZSTD_inBuffer input;
131 ZSTD_inBuffer input;
132 ZSTD_outBuffer output;
132 ZSTD_outBuffer output;
133 int finishedOutput;
133 int finishedOutput;
134 int finishedInput;
134 int finishedInput;
135 PyObject* readResult;
135 PyObject* readResult;
136 } ZstdCompressorIterator;
136 } ZstdCompressorIterator;
137
137
138 extern PyTypeObject ZstdCompressorIteratorType;
138 extern PyTypeObject ZstdCompressorIteratorType;
139
139
140 typedef struct {
140 typedef struct {
141 PyObject_HEAD
141 PyObject_HEAD
142
142
143 ZstdCompressor* compressor;
143 ZstdCompressor* compressor;
144 PyObject* reader;
144 PyObject* reader;
145 Py_buffer buffer;
145 Py_buffer buffer;
146 size_t readSize;
146 size_t readSize;
147
147
148 int entered;
148 int entered;
149 int closed;
149 int closed;
150 unsigned long long bytesCompressed;
150 unsigned long long bytesCompressed;
151
151
152 ZSTD_inBuffer input;
152 ZSTD_inBuffer input;
153 ZSTD_outBuffer output;
153 ZSTD_outBuffer output;
154 int finishedInput;
154 int finishedInput;
155 int finishedOutput;
155 int finishedOutput;
156 PyObject* readResult;
156 PyObject* readResult;
157 } ZstdCompressionReader;
157 } ZstdCompressionReader;
158
158
159 extern PyTypeObject ZstdCompressionReaderType;
159 extern PyTypeObject ZstdCompressionReaderType;
160
160
161 typedef struct {
161 typedef struct {
162 PyObject_HEAD
162 PyObject_HEAD
163
163
164 ZstdCompressor* compressor;
164 ZstdCompressor* compressor;
165 ZSTD_inBuffer input;
165 ZSTD_inBuffer input;
166 ZSTD_outBuffer output;
166 ZSTD_outBuffer output;
167 Py_buffer inBuffer;
167 Py_buffer inBuffer;
168 int finished;
168 int finished;
169 size_t chunkSize;
169 size_t chunkSize;
170 } ZstdCompressionChunker;
170 } ZstdCompressionChunker;
171
171
172 extern PyTypeObject ZstdCompressionChunkerType;
172 extern PyTypeObject ZstdCompressionChunkerType;
173
173
174 typedef enum {
174 typedef enum {
175 compressionchunker_mode_normal,
175 compressionchunker_mode_normal,
176 compressionchunker_mode_flush,
176 compressionchunker_mode_flush,
177 compressionchunker_mode_finish,
177 compressionchunker_mode_finish,
178 } CompressionChunkerMode;
178 } CompressionChunkerMode;
179
179
180 typedef struct {
180 typedef struct {
181 PyObject_HEAD
181 PyObject_HEAD
182
182
183 ZstdCompressionChunker* chunker;
183 ZstdCompressionChunker* chunker;
184 CompressionChunkerMode mode;
184 CompressionChunkerMode mode;
185 } ZstdCompressionChunkerIterator;
185 } ZstdCompressionChunkerIterator;
186
186
187 extern PyTypeObject ZstdCompressionChunkerIteratorType;
187 extern PyTypeObject ZstdCompressionChunkerIteratorType;
188
188
189 typedef struct {
189 typedef struct {
190 PyObject_HEAD
190 PyObject_HEAD
191
191
192 ZSTD_DCtx* dctx;
192 ZSTD_DCtx* dctx;
193 ZstdCompressionDict* dict;
193 ZstdCompressionDict* dict;
194 size_t maxWindowSize;
194 size_t maxWindowSize;
195 ZSTD_format_e format;
195 ZSTD_format_e format;
196 } ZstdDecompressor;
196 } ZstdDecompressor;
197
197
198 extern PyTypeObject ZstdDecompressorType;
198 extern PyTypeObject ZstdDecompressorType;
199
199
200 typedef struct {
200 typedef struct {
201 PyObject_HEAD
201 PyObject_HEAD
202
202
203 ZstdDecompressor* decompressor;
203 ZstdDecompressor* decompressor;
204 size_t outSize;
204 size_t outSize;
205 int finished;
205 int finished;
206 } ZstdDecompressionObj;
206 } ZstdDecompressionObj;
207
207
208 extern PyTypeObject ZstdDecompressionObjType;
208 extern PyTypeObject ZstdDecompressionObjType;
209
209
210 typedef struct {
210 typedef struct {
211 PyObject_HEAD
211 PyObject_HEAD
212
212
213 /* Parent decompressor to which this object is associated. */
213 /* Parent decompressor to which this object is associated. */
214 ZstdDecompressor* decompressor;
214 ZstdDecompressor* decompressor;
215 /* Object to read() from (if reading from a stream). */
215 /* Object to read() from (if reading from a stream). */
216 PyObject* reader;
216 PyObject* reader;
217 /* Size for read() operations on reader. */
217 /* Size for read() operations on reader. */
218 size_t readSize;
218 size_t readSize;
219 /* Whether a read() can return data spanning multiple zstd frames. */
219 /* Whether a read() can return data spanning multiple zstd frames. */
220 int readAcrossFrames;
220 int readAcrossFrames;
221 /* Buffer to read from (if reading from a buffer). */
221 /* Buffer to read from (if reading from a buffer). */
222 Py_buffer buffer;
222 Py_buffer buffer;
223
223
224 /* Whether the context manager is active. */
224 /* Whether the context manager is active. */
225 int entered;
225 int entered;
226 /* Whether we've closed the stream. */
226 /* Whether we've closed the stream. */
227 int closed;
227 int closed;
228
228
229 /* Number of bytes decompressed and returned to user. */
229 /* Number of bytes decompressed and returned to user. */
230 unsigned long long bytesDecompressed;
230 unsigned long long bytesDecompressed;
231
231
232 /* Tracks data going into decompressor. */
232 /* Tracks data going into decompressor. */
233 ZSTD_inBuffer input;
233 ZSTD_inBuffer input;
234
234
235 /* Holds output from read() operation on reader. */
235 /* Holds output from read() operation on reader. */
236 PyObject* readResult;
236 PyObject* readResult;
237
237
238 /* Whether all input has been sent to the decompressor. */
238 /* Whether all input has been sent to the decompressor. */
239 int finishedInput;
239 int finishedInput;
240 /* Whether all output has been flushed from the decompressor. */
240 /* Whether all output has been flushed from the decompressor. */
241 int finishedOutput;
241 int finishedOutput;
242 } ZstdDecompressionReader;
242 } ZstdDecompressionReader;
243
243
244 extern PyTypeObject ZstdDecompressionReaderType;
244 extern PyTypeObject ZstdDecompressionReaderType;
245
245
246 typedef struct {
246 typedef struct {
247 PyObject_HEAD
247 PyObject_HEAD
248
248
249 ZstdDecompressor* decompressor;
249 ZstdDecompressor* decompressor;
250 PyObject* writer;
250 PyObject* writer;
251 size_t outSize;
251 size_t outSize;
252 int entered;
252 int entered;
253 int closed;
253 int closed;
254 int writeReturnRead;
254 int writeReturnRead;
255 } ZstdDecompressionWriter;
255 } ZstdDecompressionWriter;
256
256
257 extern PyTypeObject ZstdDecompressionWriterType;
257 extern PyTypeObject ZstdDecompressionWriterType;
258
258
259 typedef struct {
259 typedef struct {
260 PyObject_HEAD
260 PyObject_HEAD
261
261
262 ZstdDecompressor* decompressor;
262 ZstdDecompressor* decompressor;
263 PyObject* reader;
263 PyObject* reader;
264 Py_buffer buffer;
264 Py_buffer buffer;
265 Py_ssize_t bufferOffset;
265 Py_ssize_t bufferOffset;
266 size_t inSize;
266 size_t inSize;
267 size_t outSize;
267 size_t outSize;
268 size_t skipBytes;
268 size_t skipBytes;
269 ZSTD_inBuffer input;
269 ZSTD_inBuffer input;
270 ZSTD_outBuffer output;
270 ZSTD_outBuffer output;
271 Py_ssize_t readCount;
271 Py_ssize_t readCount;
272 int finishedInput;
272 int finishedInput;
273 int finishedOutput;
273 int finishedOutput;
274 } ZstdDecompressorIterator;
274 } ZstdDecompressorIterator;
275
275
276 extern PyTypeObject ZstdDecompressorIteratorType;
276 extern PyTypeObject ZstdDecompressorIteratorType;
277
277
278 typedef struct {
278 typedef struct {
279 int errored;
279 int errored;
280 PyObject* chunk;
280 PyObject* chunk;
281 } DecompressorIteratorResult;
281 } DecompressorIteratorResult;
282
282
283 typedef struct {
283 typedef struct {
284 /* The public API is that these are 64-bit unsigned integers. So these can't
284 /* The public API is that these are 64-bit unsigned integers. So these can't
285 * be size_t, even though values larger than SIZE_MAX or PY_SSIZE_T_MAX may
285 * be size_t, even though values larger than SIZE_MAX or PY_SSIZE_T_MAX may
286 * be nonsensical for this platform. */
286 * be nonsensical for this platform. */
287 unsigned long long offset;
287 unsigned long long offset;
288 unsigned long long length;
288 unsigned long long length;
289 } BufferSegment;
289 } BufferSegment;
290
290
291 typedef struct {
291 typedef struct {
292 PyObject_HEAD
292 PyObject_HEAD
293
293
294 PyObject* parent;
294 PyObject* parent;
295 BufferSegment* segments;
295 BufferSegment* segments;
296 Py_ssize_t segmentCount;
296 Py_ssize_t segmentCount;
297 } ZstdBufferSegments;
297 } ZstdBufferSegments;
298
298
299 extern PyTypeObject ZstdBufferSegmentsType;
299 extern PyTypeObject ZstdBufferSegmentsType;
300
300
301 typedef struct {
301 typedef struct {
302 PyObject_HEAD
302 PyObject_HEAD
303
303
304 PyObject* parent;
304 PyObject* parent;
305 void* data;
305 void* data;
306 Py_ssize_t dataSize;
306 Py_ssize_t dataSize;
307 unsigned long long offset;
307 unsigned long long offset;
308 } ZstdBufferSegment;
308 } ZstdBufferSegment;
309
309
310 extern PyTypeObject ZstdBufferSegmentType;
310 extern PyTypeObject ZstdBufferSegmentType;
311
311
312 typedef struct {
312 typedef struct {
313 PyObject_HEAD
313 PyObject_HEAD
314
314
315 Py_buffer parent;
315 Py_buffer parent;
316 void* data;
316 void* data;
317 unsigned long long dataSize;
317 unsigned long long dataSize;
318 BufferSegment* segments;
318 BufferSegment* segments;
319 Py_ssize_t segmentCount;
319 Py_ssize_t segmentCount;
320 int useFree;
320 int useFree;
321 } ZstdBufferWithSegments;
321 } ZstdBufferWithSegments;
322
322
323 extern PyTypeObject ZstdBufferWithSegmentsType;
323 extern PyTypeObject ZstdBufferWithSegmentsType;
324
324
325 /**
325 /**
326 * An ordered collection of BufferWithSegments exposed as a squashed collection.
326 * An ordered collection of BufferWithSegments exposed as a squashed collection.
327 *
327 *
328 * This type provides a virtual view spanning multiple BufferWithSegments
328 * This type provides a virtual view spanning multiple BufferWithSegments
329 * instances. It allows multiple instances to be "chained" together and
329 * instances. It allows multiple instances to be "chained" together and
330 * exposed as a single collection. e.g. if there are 2 buffers holding
330 * exposed as a single collection. e.g. if there are 2 buffers holding
331 * 10 segments each, then o[14] will access the 5th segment in the 2nd buffer.
331 * 10 segments each, then o[14] will access the 5th segment in the 2nd buffer.
332 */
332 */
333 typedef struct {
333 typedef struct {
334 PyObject_HEAD
334 PyObject_HEAD
335
335
336 /* An array of buffers that should be exposed through this instance. */
336 /* An array of buffers that should be exposed through this instance. */
337 ZstdBufferWithSegments** buffers;
337 ZstdBufferWithSegments** buffers;
338 /* Number of elements in buffers array. */
338 /* Number of elements in buffers array. */
339 Py_ssize_t bufferCount;
339 Py_ssize_t bufferCount;
340 /* Array of first offset in each buffer instance. 0th entry corresponds
340 /* Array of first offset in each buffer instance. 0th entry corresponds
341 to number of elements in the 0th buffer. 1st entry corresponds to the
341 to number of elements in the 0th buffer. 1st entry corresponds to the
342 sum of elements in 0th and 1st buffers. */
342 sum of elements in 0th and 1st buffers. */
343 Py_ssize_t* firstElements;
343 Py_ssize_t* firstElements;
344 } ZstdBufferWithSegmentsCollection;
344 } ZstdBufferWithSegmentsCollection;
345
345
346 extern PyTypeObject ZstdBufferWithSegmentsCollectionType;
346 extern PyTypeObject ZstdBufferWithSegmentsCollectionType;
347
347
348 int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
348 int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
349 int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj);
349 int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj);
350 int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams);
350 int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams);
351 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs);
351 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs);
352 int ensure_ddict(ZstdCompressionDict* dict);
352 int ensure_ddict(ZstdCompressionDict* dict);
353 int ensure_dctx(ZstdDecompressor* decompressor, int loadDict);
353 int ensure_dctx(ZstdDecompressor* decompressor, int loadDict);
354 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
354 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
355 ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize);
355 ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize);
356 Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*);
356 Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*);
357 int cpu_count(void);
357 int cpu_count(void);
358 size_t roundpow2(size_t);
358 size_t roundpow2(size_t);
359 int safe_pybytes_resize(PyObject** obj, Py_ssize_t size);
359 int safe_pybytes_resize(PyObject** obj, Py_ssize_t size);
@@ -1,207 +1,225 b''
1 # Copyright (c) 2016-present, Gregory Szorc
1 # Copyright (c) 2016-present, Gregory Szorc
2 # All rights reserved.
2 # All rights reserved.
3 #
3 #
4 # This software may be modified and distributed under the terms
4 # This software may be modified and distributed under the terms
5 # of the BSD license. See the LICENSE file for details.
5 # of the BSD license. See the LICENSE file for details.
6
6
7 from __future__ import absolute_import
7 from __future__ import absolute_import
8
8
9 import cffi
9 import cffi
10 import distutils.ccompiler
10 import distutils.ccompiler
11 import os
11 import os
12 import re
12 import re
13 import subprocess
13 import subprocess
14 import tempfile
14 import tempfile
15
15
16
16
17 HERE = os.path.abspath(os.path.dirname(__file__))
17 HERE = os.path.abspath(os.path.dirname(__file__))
18
18
19 SOURCES = ['zstd/%s' % p for p in (
19 SOURCES = [
20 'common/debug.c',
20 "zstd/%s" % p
21 'common/entropy_common.c',
21 for p in (
22 'common/error_private.c',
22 "common/debug.c",
23 'common/fse_decompress.c',
23 "common/entropy_common.c",
24 'common/pool.c',
24 "common/error_private.c",
25 'common/threading.c',
25 "common/fse_decompress.c",
26 'common/xxhash.c',
26 "common/pool.c",
27 'common/zstd_common.c',
27 "common/threading.c",
28 'compress/fse_compress.c',
28 "common/xxhash.c",
29 'compress/hist.c',
29 "common/zstd_common.c",
30 'compress/huf_compress.c',
30 "compress/fse_compress.c",
31 'compress/zstd_compress.c',
31 "compress/hist.c",
32 'compress/zstd_compress_literals.c',
32 "compress/huf_compress.c",
33 'compress/zstd_compress_sequences.c',
33 "compress/zstd_compress.c",
34 'compress/zstd_double_fast.c',
34 "compress/zstd_compress_literals.c",
35 'compress/zstd_fast.c',
35 "compress/zstd_compress_sequences.c",
36 'compress/zstd_lazy.c',
36 "compress/zstd_double_fast.c",
37 'compress/zstd_ldm.c',
37 "compress/zstd_fast.c",
38 'compress/zstd_opt.c',
38 "compress/zstd_lazy.c",
39 'compress/zstdmt_compress.c',
39 "compress/zstd_ldm.c",
40 'decompress/huf_decompress.c',
40 "compress/zstd_opt.c",
41 'decompress/zstd_ddict.c',
41 "compress/zstdmt_compress.c",
42 'decompress/zstd_decompress.c',
42 "decompress/huf_decompress.c",
43 'decompress/zstd_decompress_block.c',
43 "decompress/zstd_ddict.c",
44 'dictBuilder/cover.c',
44 "decompress/zstd_decompress.c",
45 'dictBuilder/fastcover.c',
45 "decompress/zstd_decompress_block.c",
46 'dictBuilder/divsufsort.c',
46 "dictBuilder/cover.c",
47 'dictBuilder/zdict.c',
47 "dictBuilder/fastcover.c",
48 )]
48 "dictBuilder/divsufsort.c",
49 "dictBuilder/zdict.c",
50 )
51 ]
49
52
50 # Headers whose preprocessed output will be fed into cdef().
53 # Headers whose preprocessed output will be fed into cdef().
51 HEADERS = [os.path.join(HERE, 'zstd', *p) for p in (
54 HEADERS = [
52 ('zstd.h',),
55 os.path.join(HERE, "zstd", *p) for p in (("zstd.h",), ("dictBuilder", "zdict.h"),)
53 ('dictBuilder', 'zdict.h'),
56 ]
54 )]
55
57
56 INCLUDE_DIRS = [os.path.join(HERE, d) for d in (
58 INCLUDE_DIRS = [
57 'zstd',
59 os.path.join(HERE, d)
58 'zstd/common',
60 for d in (
59 'zstd/compress',
61 "zstd",
60 'zstd/decompress',
62 "zstd/common",
61 'zstd/dictBuilder',
63 "zstd/compress",
62 )]
64 "zstd/decompress",
65 "zstd/dictBuilder",
66 )
67 ]
63
68
64 # cffi can't parse some of the primitives in zstd.h. So we invoke the
69 # cffi can't parse some of the primitives in zstd.h. So we invoke the
65 # preprocessor and feed its output into cffi.
70 # preprocessor and feed its output into cffi.
66 compiler = distutils.ccompiler.new_compiler()
71 compiler = distutils.ccompiler.new_compiler()
67
72
68 # Needed for MSVC.
73 # Needed for MSVC.
69 if hasattr(compiler, 'initialize'):
74 if hasattr(compiler, "initialize"):
70 compiler.initialize()
75 compiler.initialize()
71
76
72 # Distutils doesn't set compiler.preprocessor, so invoke the preprocessor
77 # Distutils doesn't set compiler.preprocessor, so invoke the preprocessor
73 # manually.
78 # manually.
74 if compiler.compiler_type == 'unix':
79 if compiler.compiler_type == "unix":
75 args = list(compiler.executables['compiler'])
80 args = list(compiler.executables["compiler"])
76 args.extend([
81 args.extend(
77 '-E',
82 ["-E", "-DZSTD_STATIC_LINKING_ONLY", "-DZDICT_STATIC_LINKING_ONLY",]
78 '-DZSTD_STATIC_LINKING_ONLY',
83 )
79 '-DZDICT_STATIC_LINKING_ONLY',
84 elif compiler.compiler_type == "msvc":
80 ])
81 elif compiler.compiler_type == 'msvc':
82 args = [compiler.cc]
85 args = [compiler.cc]
83 args.extend([
86 args.extend(
84 '/EP',
87 ["/EP", "/DZSTD_STATIC_LINKING_ONLY", "/DZDICT_STATIC_LINKING_ONLY",]
85 '/DZSTD_STATIC_LINKING_ONLY',
88 )
86 '/DZDICT_STATIC_LINKING_ONLY',
87 ])
88 else:
89 else:
89 raise Exception('unsupported compiler type: %s' % compiler.compiler_type)
90 raise Exception("unsupported compiler type: %s" % compiler.compiler_type)
91
90
92
91 def preprocess(path):
93 def preprocess(path):
92 with open(path, 'rb') as fh:
94 with open(path, "rb") as fh:
93 lines = []
95 lines = []
94 it = iter(fh)
96 it = iter(fh)
95
97
96 for l in it:
98 for l in it:
97 # zstd.h includes <stddef.h>, which is also included by cffi's
99 # zstd.h includes <stddef.h>, which is also included by cffi's
98 # boilerplate. This can lead to duplicate declarations. So we strip
100 # boilerplate. This can lead to duplicate declarations. So we strip
99 # this include from the preprocessor invocation.
101 # this include from the preprocessor invocation.
100 #
102 #
101 # The same things happens for including zstd.h, so give it the same
103 # The same things happens for including zstd.h, so give it the same
102 # treatment.
104 # treatment.
103 #
105 #
104 # We define ZSTD_STATIC_LINKING_ONLY, which is redundant with the inline
106 # We define ZSTD_STATIC_LINKING_ONLY, which is redundant with the inline
105 # #define in zstdmt_compress.h and results in a compiler warning. So drop
107 # #define in zstdmt_compress.h and results in a compiler warning. So drop
106 # the inline #define.
108 # the inline #define.
107 if l.startswith((b'#include <stddef.h>',
109 if l.startswith(
108 b'#include "zstd.h"',
110 (
109 b'#define ZSTD_STATIC_LINKING_ONLY')):
111 b"#include <stddef.h>",
112 b'#include "zstd.h"',
113 b"#define ZSTD_STATIC_LINKING_ONLY",
114 )
115 ):
110 continue
116 continue
111
117
118 # The preprocessor environment on Windows doesn't define include
119 # paths, so the #include of limits.h fails. We work around this
120 # by removing that import and defining INT_MAX ourselves. This is
121 # a bit hacky. But it gets the job done.
122 # TODO make limits.h work on Windows so we ensure INT_MAX is
123 # correct.
124 if l.startswith(b"#include <limits.h>"):
125 l = b"#define INT_MAX 2147483647\n"
126
112 # ZSTDLIB_API may not be defined if we dropped zstd.h. It isn't
127 # ZSTDLIB_API may not be defined if we dropped zstd.h. It isn't
113 # important so just filter it out.
128 # important so just filter it out.
114 if l.startswith(b'ZSTDLIB_API'):
129 if l.startswith(b"ZSTDLIB_API"):
115 l = l[len(b'ZSTDLIB_API '):]
130 l = l[len(b"ZSTDLIB_API ") :]
116
131
117 lines.append(l)
132 lines.append(l)
118
133
119 fd, input_file = tempfile.mkstemp(suffix='.h')
134 fd, input_file = tempfile.mkstemp(suffix=".h")
120 os.write(fd, b''.join(lines))
135 os.write(fd, b"".join(lines))
121 os.close(fd)
136 os.close(fd)
122
137
123 try:
138 try:
124 env = dict(os.environ)
139 env = dict(os.environ)
125 if getattr(compiler, '_paths', None):
140 if getattr(compiler, "_paths", None):
126 env['PATH'] = compiler._paths
141 env["PATH"] = compiler._paths
127 process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE,
142 process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE, env=env)
128 env=env)
129 output = process.communicate()[0]
143 output = process.communicate()[0]
130 ret = process.poll()
144 ret = process.poll()
131 if ret:
145 if ret:
132 raise Exception('preprocessor exited with error')
146 raise Exception("preprocessor exited with error")
133
147
134 return output
148 return output
135 finally:
149 finally:
136 os.unlink(input_file)
150 os.unlink(input_file)
137
151
138
152
139 def normalize_output(output):
153 def normalize_output(output):
140 lines = []
154 lines = []
141 for line in output.splitlines():
155 for line in output.splitlines():
142 # CFFI's parser doesn't like __attribute__ on UNIX compilers.
156 # CFFI's parser doesn't like __attribute__ on UNIX compilers.
143 if line.startswith(b'__attribute__ ((visibility ("default"))) '):
157 if line.startswith(b'__attribute__ ((visibility ("default"))) '):
144 line = line[len(b'__attribute__ ((visibility ("default"))) '):]
158 line = line[len(b'__attribute__ ((visibility ("default"))) ') :]
145
159
146 if line.startswith(b'__attribute__((deprecated('):
160 if line.startswith(b"__attribute__((deprecated("):
147 continue
161 continue
148 elif b'__declspec(deprecated(' in line:
162 elif b"__declspec(deprecated(" in line:
149 continue
163 continue
150
164
151 lines.append(line)
165 lines.append(line)
152
166
153 return b'\n'.join(lines)
167 return b"\n".join(lines)
154
168
155
169
156 ffi = cffi.FFI()
170 ffi = cffi.FFI()
157 # zstd.h uses a possible undefined MIN(). Define it until
171 # zstd.h uses a possible undefined MIN(). Define it until
158 # https://github.com/facebook/zstd/issues/976 is fixed.
172 # https://github.com/facebook/zstd/issues/976 is fixed.
159 # *_DISABLE_DEPRECATE_WARNINGS prevents the compiler from emitting a warning
173 # *_DISABLE_DEPRECATE_WARNINGS prevents the compiler from emitting a warning
160 # when cffi uses the function. Since we statically link against zstd, even
174 # when cffi uses the function. Since we statically link against zstd, even
161 # if we use the deprecated functions it shouldn't be a huge problem.
175 # if we use the deprecated functions it shouldn't be a huge problem.
162 ffi.set_source('_zstd_cffi', '''
176 ffi.set_source(
177 "_zstd_cffi",
178 """
163 #define MIN(a,b) ((a)<(b) ? (a) : (b))
179 #define MIN(a,b) ((a)<(b) ? (a) : (b))
164 #define ZSTD_STATIC_LINKING_ONLY
180 #define ZSTD_STATIC_LINKING_ONLY
165 #include <zstd.h>
181 #include <zstd.h>
166 #define ZDICT_STATIC_LINKING_ONLY
182 #define ZDICT_STATIC_LINKING_ONLY
167 #define ZDICT_DISABLE_DEPRECATE_WARNINGS
183 #define ZDICT_DISABLE_DEPRECATE_WARNINGS
168 #include <zdict.h>
184 #include <zdict.h>
169 ''', sources=SOURCES,
185 """,
170 include_dirs=INCLUDE_DIRS,
186 sources=SOURCES,
171 extra_compile_args=['-DZSTD_MULTITHREAD'])
187 include_dirs=INCLUDE_DIRS,
188 extra_compile_args=["-DZSTD_MULTITHREAD"],
189 )
172
190
173 DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ')
191 DEFINE = re.compile(b"^\\#define ([a-zA-Z0-9_]+) ")
174
192
175 sources = []
193 sources = []
176
194
177 # Feed normalized preprocessor output for headers into the cdef parser.
195 # Feed normalized preprocessor output for headers into the cdef parser.
178 for header in HEADERS:
196 for header in HEADERS:
179 preprocessed = preprocess(header)
197 preprocessed = preprocess(header)
180 sources.append(normalize_output(preprocessed))
198 sources.append(normalize_output(preprocessed))
181
199
182 # #define's are effectively erased as part of going through preprocessor.
200 # #define's are effectively erased as part of going through preprocessor.
183 # So perform a manual pass to re-add those to the cdef source.
201 # So perform a manual pass to re-add those to the cdef source.
184 with open(header, 'rb') as fh:
202 with open(header, "rb") as fh:
185 for line in fh:
203 for line in fh:
186 line = line.strip()
204 line = line.strip()
187 m = DEFINE.match(line)
205 m = DEFINE.match(line)
188 if not m:
206 if not m:
189 continue
207 continue
190
208
191 if m.group(1) == b'ZSTD_STATIC_LINKING_ONLY':
209 if m.group(1) == b"ZSTD_STATIC_LINKING_ONLY":
192 continue
210 continue
193
211
194 # The parser doesn't like some constants with complex values.
212 # The parser doesn't like some constants with complex values.
195 if m.group(1) in (b'ZSTD_LIB_VERSION', b'ZSTD_VERSION_STRING'):
213 if m.group(1) in (b"ZSTD_LIB_VERSION", b"ZSTD_VERSION_STRING"):
196 continue
214 continue
197
215
198 # The ... is magic syntax by the cdef parser to resolve the
216 # The ... is magic syntax by the cdef parser to resolve the
199 # value at compile time.
217 # value at compile time.
200 sources.append(m.group(0) + b' ...')
218 sources.append(m.group(0) + b" ...")
201
219
202 cdeflines = b'\n'.join(sources).splitlines()
220 cdeflines = b"\n".join(sources).splitlines()
203 cdeflines = [l for l in cdeflines if l.strip()]
221 cdeflines = [l for l in cdeflines if l.strip()]
204 ffi.cdef(b'\n'.join(cdeflines).decode('latin1'))
222 ffi.cdef(b"\n".join(cdeflines).decode("latin1"))
205
223
206 if __name__ == '__main__':
224 if __name__ == "__main__":
207 ffi.compile()
225 ffi.compile()
@@ -1,112 +1,118 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # Copyright (c) 2016-present, Gregory Szorc
2 # Copyright (c) 2016-present, Gregory Szorc
3 # All rights reserved.
3 # All rights reserved.
4 #
4 #
5 # This software may be modified and distributed under the terms
5 # This software may be modified and distributed under the terms
6 # of the BSD license. See the LICENSE file for details.
6 # of the BSD license. See the LICENSE file for details.
7
7
8 from __future__ import print_function
8 from __future__ import print_function
9
9
10 from distutils.version import LooseVersion
10 from distutils.version import LooseVersion
11 import os
11 import os
12 import sys
12 import sys
13 from setuptools import setup
13 from setuptools import setup
14
14
15 # Need change in 1.10 for ffi.from_buffer() to handle all buffer types
15 # Need change in 1.10 for ffi.from_buffer() to handle all buffer types
16 # (like memoryview).
16 # (like memoryview).
17 # Need feature in 1.11 for ffi.gc() to declare size of objects so we avoid
17 # Need feature in 1.11 for ffi.gc() to declare size of objects so we avoid
18 # garbage collection pitfalls.
18 # garbage collection pitfalls.
19 MINIMUM_CFFI_VERSION = '1.11'
19 MINIMUM_CFFI_VERSION = "1.11"
20
20
21 try:
21 try:
22 import cffi
22 import cffi
23
23
24 # PyPy (and possibly other distros) have CFFI distributed as part of
24 # PyPy (and possibly other distros) have CFFI distributed as part of
25 # them. The install_requires for CFFI below won't work. We need to sniff
25 # them. The install_requires for CFFI below won't work. We need to sniff
26 # out the CFFI version here and reject CFFI if it is too old.
26 # out the CFFI version here and reject CFFI if it is too old.
27 cffi_version = LooseVersion(cffi.__version__)
27 cffi_version = LooseVersion(cffi.__version__)
28 if cffi_version < LooseVersion(MINIMUM_CFFI_VERSION):
28 if cffi_version < LooseVersion(MINIMUM_CFFI_VERSION):
29 print('CFFI 1.11 or newer required (%s found); '
29 print(
30 'not building CFFI backend' % cffi_version,
30 "CFFI 1.11 or newer required (%s found); "
31 file=sys.stderr)
31 "not building CFFI backend" % cffi_version,
32 file=sys.stderr,
33 )
32 cffi = None
34 cffi = None
33
35
34 except ImportError:
36 except ImportError:
35 cffi = None
37 cffi = None
36
38
37 import setup_zstd
39 import setup_zstd
38
40
39 SUPPORT_LEGACY = False
41 SUPPORT_LEGACY = False
40 SYSTEM_ZSTD = False
42 SYSTEM_ZSTD = False
41 WARNINGS_AS_ERRORS = False
43 WARNINGS_AS_ERRORS = False
42
44
43 if os.environ.get('ZSTD_WARNINGS_AS_ERRORS', ''):
45 if os.environ.get("ZSTD_WARNINGS_AS_ERRORS", ""):
44 WARNINGS_AS_ERRORS = True
46 WARNINGS_AS_ERRORS = True
45
47
46 if '--legacy' in sys.argv:
48 if "--legacy" in sys.argv:
47 SUPPORT_LEGACY = True
49 SUPPORT_LEGACY = True
48 sys.argv.remove('--legacy')
50 sys.argv.remove("--legacy")
49
51
50 if '--system-zstd' in sys.argv:
52 if "--system-zstd" in sys.argv:
51 SYSTEM_ZSTD = True
53 SYSTEM_ZSTD = True
52 sys.argv.remove('--system-zstd')
54 sys.argv.remove("--system-zstd")
53
55
54 if '--warnings-as-errors' in sys.argv:
56 if "--warnings-as-errors" in sys.argv:
55 WARNINGS_AS_ERRORS = True
57 WARNINGS_AS_ERRORS = True
56 sys.argv.remove('--warning-as-errors')
58 sys.argv.remove("--warning-as-errors")
57
59
58 # Code for obtaining the Extension instance is in its own module to
60 # Code for obtaining the Extension instance is in its own module to
59 # facilitate reuse in other projects.
61 # facilitate reuse in other projects.
60 extensions = [
62 extensions = [
61 setup_zstd.get_c_extension(name='zstd',
63 setup_zstd.get_c_extension(
62 support_legacy=SUPPORT_LEGACY,
64 name="zstd",
63 system_zstd=SYSTEM_ZSTD,
65 support_legacy=SUPPORT_LEGACY,
64 warnings_as_errors=WARNINGS_AS_ERRORS),
66 system_zstd=SYSTEM_ZSTD,
67 warnings_as_errors=WARNINGS_AS_ERRORS,
68 ),
65 ]
69 ]
66
70
67 install_requires = []
71 install_requires = []
68
72
69 if cffi:
73 if cffi:
70 import make_cffi
74 import make_cffi
75
71 extensions.append(make_cffi.ffi.distutils_extension())
76 extensions.append(make_cffi.ffi.distutils_extension())
72 install_requires.append('cffi>=%s' % MINIMUM_CFFI_VERSION)
77 install_requires.append("cffi>=%s" % MINIMUM_CFFI_VERSION)
73
78
74 version = None
79 version = None
75
80
76 with open('c-ext/python-zstandard.h', 'r') as fh:
81 with open("c-ext/python-zstandard.h", "r") as fh:
77 for line in fh:
82 for line in fh:
78 if not line.startswith('#define PYTHON_ZSTANDARD_VERSION'):
83 if not line.startswith("#define PYTHON_ZSTANDARD_VERSION"):
79 continue
84 continue
80
85
81 version = line.split()[2][1:-1]
86 version = line.split()[2][1:-1]
82 break
87 break
83
88
84 if not version:
89 if not version:
85 raise Exception('could not resolve package version; '
90 raise Exception("could not resolve package version; " "this should never happen")
86 'this should never happen')
87
91
88 setup(
92 setup(
89 name='zstandard',
93 name="zstandard",
90 version=version,
94 version=version,
91 description='Zstandard bindings for Python',
95 description="Zstandard bindings for Python",
92 long_description=open('README.rst', 'r').read(),
96 long_description=open("README.rst", "r").read(),
93 url='https://github.com/indygreg/python-zstandard',
97 url="https://github.com/indygreg/python-zstandard",
94 author='Gregory Szorc',
98 author="Gregory Szorc",
95 author_email='gregory.szorc@gmail.com',
99 author_email="gregory.szorc@gmail.com",
96 license='BSD',
100 license="BSD",
97 classifiers=[
101 classifiers=[
98 'Development Status :: 4 - Beta',
102 "Development Status :: 4 - Beta",
99 'Intended Audience :: Developers',
103 "Intended Audience :: Developers",
100 'License :: OSI Approved :: BSD License',
104 "License :: OSI Approved :: BSD License",
101 'Programming Language :: C',
105 "Programming Language :: C",
102 'Programming Language :: Python :: 2.7',
106 "Programming Language :: Python :: 2.7",
103 'Programming Language :: Python :: 3.5',
107 "Programming Language :: Python :: 3.5",
104 'Programming Language :: Python :: 3.6',
108 "Programming Language :: Python :: 3.6",
105 'Programming Language :: Python :: 3.7',
109 "Programming Language :: Python :: 3.7",
110 "Programming Language :: Python :: 3.8",
106 ],
111 ],
107 keywords='zstandard zstd compression',
112 keywords="zstandard zstd compression",
108 packages=['zstandard'],
113 packages=["zstandard"],
109 ext_modules=extensions,
114 ext_modules=extensions,
110 test_suite='tests',
115 test_suite="tests",
111 install_requires=install_requires,
116 install_requires=install_requires,
117 tests_require=["hypothesis"],
112 )
118 )
@@ -1,192 +1,206 b''
1 # Copyright (c) 2016-present, Gregory Szorc
1 # Copyright (c) 2016-present, Gregory Szorc
2 # All rights reserved.
2 # All rights reserved.
3 #
3 #
4 # This software may be modified and distributed under the terms
4 # This software may be modified and distributed under the terms
5 # of the BSD license. See the LICENSE file for details.
5 # of the BSD license. See the LICENSE file for details.
6
6
7 import distutils.ccompiler
7 import distutils.ccompiler
8 import os
8 import os
9
9
10 from distutils.extension import Extension
10 from distutils.extension import Extension
11
11
12
12
13 zstd_sources = ['zstd/%s' % p for p in (
13 zstd_sources = [
14 'common/debug.c',
14 "zstd/%s" % p
15 'common/entropy_common.c',
15 for p in (
16 'common/error_private.c',
16 "common/debug.c",
17 'common/fse_decompress.c',
17 "common/entropy_common.c",
18 'common/pool.c',
18 "common/error_private.c",
19 'common/threading.c',
19 "common/fse_decompress.c",
20 'common/xxhash.c',
20 "common/pool.c",
21 'common/zstd_common.c',
21 "common/threading.c",
22 'compress/fse_compress.c',
22 "common/xxhash.c",
23 'compress/hist.c',
23 "common/zstd_common.c",
24 'compress/huf_compress.c',
24 "compress/fse_compress.c",
25 'compress/zstd_compress_literals.c',
25 "compress/hist.c",
26 'compress/zstd_compress_sequences.c',
26 "compress/huf_compress.c",
27 'compress/zstd_compress.c',
27 "compress/zstd_compress_literals.c",
28 'compress/zstd_double_fast.c',
28 "compress/zstd_compress_sequences.c",
29 'compress/zstd_fast.c',
29 "compress/zstd_compress.c",
30 'compress/zstd_lazy.c',
30 "compress/zstd_double_fast.c",
31 'compress/zstd_ldm.c',
31 "compress/zstd_fast.c",
32 'compress/zstd_opt.c',
32 "compress/zstd_lazy.c",
33 'compress/zstdmt_compress.c',
33 "compress/zstd_ldm.c",
34 'decompress/huf_decompress.c',
34 "compress/zstd_opt.c",
35 'decompress/zstd_ddict.c',
35 "compress/zstdmt_compress.c",
36 'decompress/zstd_decompress.c',
36 "decompress/huf_decompress.c",
37 'decompress/zstd_decompress_block.c',
37 "decompress/zstd_ddict.c",
38 'dictBuilder/cover.c',
38 "decompress/zstd_decompress.c",
39 'dictBuilder/divsufsort.c',
39 "decompress/zstd_decompress_block.c",
40 'dictBuilder/fastcover.c',
40 "dictBuilder/cover.c",
41 'dictBuilder/zdict.c',
41 "dictBuilder/divsufsort.c",
42 )]
42 "dictBuilder/fastcover.c",
43 "dictBuilder/zdict.c",
44 )
45 ]
43
46
44 zstd_sources_legacy = ['zstd/%s' % p for p in (
47 zstd_sources_legacy = [
45 'deprecated/zbuff_common.c',
48 "zstd/%s" % p
46 'deprecated/zbuff_compress.c',
49 for p in (
47 'deprecated/zbuff_decompress.c',
50 "deprecated/zbuff_common.c",
48 'legacy/zstd_v01.c',
51 "deprecated/zbuff_compress.c",
49 'legacy/zstd_v02.c',
52 "deprecated/zbuff_decompress.c",
50 'legacy/zstd_v03.c',
53 "legacy/zstd_v01.c",
51 'legacy/zstd_v04.c',
54 "legacy/zstd_v02.c",
52 'legacy/zstd_v05.c',
55 "legacy/zstd_v03.c",
53 'legacy/zstd_v06.c',
56 "legacy/zstd_v04.c",
54 'legacy/zstd_v07.c'
57 "legacy/zstd_v05.c",
55 )]
58 "legacy/zstd_v06.c",
59 "legacy/zstd_v07.c",
60 )
61 ]
56
62
57 zstd_includes = [
63 zstd_includes = [
58 'zstd',
64 "zstd",
59 'zstd/common',
65 "zstd/common",
60 'zstd/compress',
66 "zstd/compress",
61 'zstd/decompress',
67 "zstd/decompress",
62 'zstd/dictBuilder',
68 "zstd/dictBuilder",
63 ]
69 ]
64
70
65 zstd_includes_legacy = [
71 zstd_includes_legacy = [
66 'zstd/deprecated',
72 "zstd/deprecated",
67 'zstd/legacy',
73 "zstd/legacy",
68 ]
74 ]
69
75
70 ext_includes = [
76 ext_includes = [
71 'c-ext',
77 "c-ext",
72 'zstd/common',
78 "zstd/common",
73 ]
79 ]
74
80
75 ext_sources = [
81 ext_sources = [
76 'zstd/common/pool.c',
82 "zstd/common/error_private.c",
77 'zstd/common/threading.c',
83 "zstd/common/pool.c",
78 'zstd.c',
84 "zstd/common/threading.c",
79 'c-ext/bufferutil.c',
85 "zstd/common/zstd_common.c",
80 'c-ext/compressiondict.c',
86 "zstd.c",
81 'c-ext/compressobj.c',
87 "c-ext/bufferutil.c",
82 'c-ext/compressor.c',
88 "c-ext/compressiondict.c",
83 'c-ext/compressoriterator.c',
89 "c-ext/compressobj.c",
84 'c-ext/compressionchunker.c',
90 "c-ext/compressor.c",
85 'c-ext/compressionparams.c',
91 "c-ext/compressoriterator.c",
86 'c-ext/compressionreader.c',
92 "c-ext/compressionchunker.c",
87 'c-ext/compressionwriter.c',
93 "c-ext/compressionparams.c",
88 'c-ext/constants.c',
94 "c-ext/compressionreader.c",
89 'c-ext/decompressobj.c',
95 "c-ext/compressionwriter.c",
90 'c-ext/decompressor.c',
96 "c-ext/constants.c",
91 'c-ext/decompressoriterator.c',
97 "c-ext/decompressobj.c",
92 'c-ext/decompressionreader.c',
98 "c-ext/decompressor.c",
93 'c-ext/decompressionwriter.c',
99 "c-ext/decompressoriterator.c",
94 'c-ext/frameparams.c',
100 "c-ext/decompressionreader.c",
101 "c-ext/decompressionwriter.c",
102 "c-ext/frameparams.c",
95 ]
103 ]
96
104
97 zstd_depends = [
105 zstd_depends = [
98 'c-ext/python-zstandard.h',
106 "c-ext/python-zstandard.h",
99 ]
107 ]
100
108
101
109
102 def get_c_extension(support_legacy=False, system_zstd=False, name='zstd',
110 def get_c_extension(
103 warnings_as_errors=False, root=None):
111 support_legacy=False,
112 system_zstd=False,
113 name="zstd",
114 warnings_as_errors=False,
115 root=None,
116 ):
104 """Obtain a distutils.extension.Extension for the C extension.
117 """Obtain a distutils.extension.Extension for the C extension.
105
118
106 ``support_legacy`` controls whether to compile in legacy zstd format support.
119 ``support_legacy`` controls whether to compile in legacy zstd format support.
107
120
108 ``system_zstd`` controls whether to compile against the system zstd library.
121 ``system_zstd`` controls whether to compile against the system zstd library.
109 For this to work, the system zstd library and headers must match what
122 For this to work, the system zstd library and headers must match what
110 python-zstandard is coded against exactly.
123 python-zstandard is coded against exactly.
111
124
112 ``name`` is the module name of the C extension to produce.
125 ``name`` is the module name of the C extension to produce.
113
126
114 ``warnings_as_errors`` controls whether compiler warnings are turned into
127 ``warnings_as_errors`` controls whether compiler warnings are turned into
115 compiler errors.
128 compiler errors.
116
129
117 ``root`` defines a root path that source should be computed as relative
130 ``root`` defines a root path that source should be computed as relative
118 to. This should be the directory with the main ``setup.py`` that is
131 to. This should be the directory with the main ``setup.py`` that is
119 being invoked. If not defined, paths will be relative to this file.
132 being invoked. If not defined, paths will be relative to this file.
120 """
133 """
121 actual_root = os.path.abspath(os.path.dirname(__file__))
134 actual_root = os.path.abspath(os.path.dirname(__file__))
122 root = root or actual_root
135 root = root or actual_root
123
136
124 sources = set([os.path.join(actual_root, p) for p in ext_sources])
137 sources = set([os.path.join(actual_root, p) for p in ext_sources])
125 if not system_zstd:
138 if not system_zstd:
126 sources.update([os.path.join(actual_root, p) for p in zstd_sources])
139 sources.update([os.path.join(actual_root, p) for p in zstd_sources])
127 if support_legacy:
140 if support_legacy:
128 sources.update([os.path.join(actual_root, p)
141 sources.update([os.path.join(actual_root, p) for p in zstd_sources_legacy])
129 for p in zstd_sources_legacy])
130 sources = list(sources)
142 sources = list(sources)
131
143
132 include_dirs = set([os.path.join(actual_root, d) for d in ext_includes])
144 include_dirs = set([os.path.join(actual_root, d) for d in ext_includes])
133 if not system_zstd:
145 if not system_zstd:
134 include_dirs.update([os.path.join(actual_root, d)
146 include_dirs.update([os.path.join(actual_root, d) for d in zstd_includes])
135 for d in zstd_includes])
136 if support_legacy:
147 if support_legacy:
137 include_dirs.update([os.path.join(actual_root, d)
148 include_dirs.update(
138 for d in zstd_includes_legacy])
149 [os.path.join(actual_root, d) for d in zstd_includes_legacy]
150 )
139 include_dirs = list(include_dirs)
151 include_dirs = list(include_dirs)
140
152
141 depends = [os.path.join(actual_root, p) for p in zstd_depends]
153 depends = [os.path.join(actual_root, p) for p in zstd_depends]
142
154
143 compiler = distutils.ccompiler.new_compiler()
155 compiler = distutils.ccompiler.new_compiler()
144
156
145 # Needed for MSVC.
157 # Needed for MSVC.
146 if hasattr(compiler, 'initialize'):
158 if hasattr(compiler, "initialize"):
147 compiler.initialize()
159 compiler.initialize()
148
160
149 if compiler.compiler_type == 'unix':
161 if compiler.compiler_type == "unix":
150 compiler_type = 'unix'
162 compiler_type = "unix"
151 elif compiler.compiler_type == 'msvc':
163 elif compiler.compiler_type == "msvc":
152 compiler_type = 'msvc'
164 compiler_type = "msvc"
153 elif compiler.compiler_type == 'mingw32':
165 elif compiler.compiler_type == "mingw32":
154 compiler_type = 'mingw32'
166 compiler_type = "mingw32"
155 else:
167 else:
156 raise Exception('unhandled compiler type: %s' %
168 raise Exception("unhandled compiler type: %s" % compiler.compiler_type)
157 compiler.compiler_type)
158
169
159 extra_args = ['-DZSTD_MULTITHREAD']
170 extra_args = ["-DZSTD_MULTITHREAD"]
160
171
161 if not system_zstd:
172 if not system_zstd:
162 extra_args.append('-DZSTDLIB_VISIBILITY=')
173 extra_args.append("-DZSTDLIB_VISIBILITY=")
163 extra_args.append('-DZDICTLIB_VISIBILITY=')
174 extra_args.append("-DZDICTLIB_VISIBILITY=")
164 extra_args.append('-DZSTDERRORLIB_VISIBILITY=')
175 extra_args.append("-DZSTDERRORLIB_VISIBILITY=")
165
176
166 if compiler_type == 'unix':
177 if compiler_type == "unix":
167 extra_args.append('-fvisibility=hidden')
178 extra_args.append("-fvisibility=hidden")
168
179
169 if not system_zstd and support_legacy:
180 if not system_zstd and support_legacy:
170 extra_args.append('-DZSTD_LEGACY_SUPPORT=1')
181 extra_args.append("-DZSTD_LEGACY_SUPPORT=1")
171
182
172 if warnings_as_errors:
183 if warnings_as_errors:
173 if compiler_type in ('unix', 'mingw32'):
184 if compiler_type in ("unix", "mingw32"):
174 extra_args.append('-Werror')
185 extra_args.append("-Werror")
175 elif compiler_type == 'msvc':
186 elif compiler_type == "msvc":
176 extra_args.append('/WX')
187 extra_args.append("/WX")
177 else:
188 else:
178 assert False
189 assert False
179
190
180 libraries = ['zstd'] if system_zstd else []
191 libraries = ["zstd"] if system_zstd else []
181
192
182 # Python 3.7 doesn't like absolute paths. So normalize to relative.
193 # Python 3.7 doesn't like absolute paths. So normalize to relative.
183 sources = [os.path.relpath(p, root) for p in sources]
194 sources = [os.path.relpath(p, root) for p in sources]
184 include_dirs = [os.path.relpath(p, root) for p in include_dirs]
195 include_dirs = [os.path.relpath(p, root) for p in include_dirs]
185 depends = [os.path.relpath(p, root) for p in depends]
196 depends = [os.path.relpath(p, root) for p in depends]
186
197
187 # TODO compile with optimizations.
198 # TODO compile with optimizations.
188 return Extension(name, sources,
199 return Extension(
189 include_dirs=include_dirs,
200 name,
190 depends=depends,
201 sources,
191 extra_compile_args=extra_args,
202 include_dirs=include_dirs,
192 libraries=libraries)
203 depends=depends,
204 extra_compile_args=extra_args,
205 libraries=libraries,
206 )
@@ -1,185 +1,197 b''
1 import imp
1 import imp
2 import inspect
2 import inspect
3 import io
3 import io
4 import os
4 import os
5 import types
5 import types
6 import unittest
6
7
7 try:
8 try:
8 import hypothesis
9 import hypothesis
9 except ImportError:
10 except ImportError:
10 hypothesis = None
11 hypothesis = None
11
12
12
13
14 class TestCase(unittest.TestCase):
15 if not getattr(unittest.TestCase, "assertRaisesRegex", False):
16 assertRaisesRegex = unittest.TestCase.assertRaisesRegexp
17
18
13 def make_cffi(cls):
19 def make_cffi(cls):
14 """Decorator to add CFFI versions of each test method."""
20 """Decorator to add CFFI versions of each test method."""
15
21
16 # The module containing this class definition should
22 # The module containing this class definition should
17 # `import zstandard as zstd`. Otherwise things may blow up.
23 # `import zstandard as zstd`. Otherwise things may blow up.
18 mod = inspect.getmodule(cls)
24 mod = inspect.getmodule(cls)
19 if not hasattr(mod, 'zstd'):
25 if not hasattr(mod, "zstd"):
20 raise Exception('test module does not contain "zstd" symbol')
26 raise Exception('test module does not contain "zstd" symbol')
21
27
22 if not hasattr(mod.zstd, 'backend'):
28 if not hasattr(mod.zstd, "backend"):
23 raise Exception('zstd symbol does not have "backend" attribute; did '
29 raise Exception(
24 'you `import zstandard as zstd`?')
30 'zstd symbol does not have "backend" attribute; did '
31 "you `import zstandard as zstd`?"
32 )
25
33
26 # If `import zstandard` already chose the cffi backend, there is nothing
34 # If `import zstandard` already chose the cffi backend, there is nothing
27 # for us to do: we only add the cffi variation if the default backend
35 # for us to do: we only add the cffi variation if the default backend
28 # is the C extension.
36 # is the C extension.
29 if mod.zstd.backend == 'cffi':
37 if mod.zstd.backend == "cffi":
30 return cls
38 return cls
31
39
32 old_env = dict(os.environ)
40 old_env = dict(os.environ)
33 os.environ['PYTHON_ZSTANDARD_IMPORT_POLICY'] = 'cffi'
41 os.environ["PYTHON_ZSTANDARD_IMPORT_POLICY"] = "cffi"
34 try:
42 try:
35 try:
43 try:
36 mod_info = imp.find_module('zstandard')
44 mod_info = imp.find_module("zstandard")
37 mod = imp.load_module('zstandard_cffi', *mod_info)
45 mod = imp.load_module("zstandard_cffi", *mod_info)
38 except ImportError:
46 except ImportError:
39 return cls
47 return cls
40 finally:
48 finally:
41 os.environ.clear()
49 os.environ.clear()
42 os.environ.update(old_env)
50 os.environ.update(old_env)
43
51
44 if mod.backend != 'cffi':
52 if mod.backend != "cffi":
45 raise Exception('got the zstandard %s backend instead of cffi' % mod.backend)
53 raise Exception("got the zstandard %s backend instead of cffi" % mod.backend)
46
54
47 # If CFFI version is available, dynamically construct test methods
55 # If CFFI version is available, dynamically construct test methods
48 # that use it.
56 # that use it.
49
57
50 for attr in dir(cls):
58 for attr in dir(cls):
51 fn = getattr(cls, attr)
59 fn = getattr(cls, attr)
52 if not inspect.ismethod(fn) and not inspect.isfunction(fn):
60 if not inspect.ismethod(fn) and not inspect.isfunction(fn):
53 continue
61 continue
54
62
55 if not fn.__name__.startswith('test_'):
63 if not fn.__name__.startswith("test_"):
56 continue
64 continue
57
65
58 name = '%s_cffi' % fn.__name__
66 name = "%s_cffi" % fn.__name__
59
67
60 # Replace the "zstd" symbol with the CFFI module instance. Then copy
68 # Replace the "zstd" symbol with the CFFI module instance. Then copy
61 # the function object and install it in a new attribute.
69 # the function object and install it in a new attribute.
62 if isinstance(fn, types.FunctionType):
70 if isinstance(fn, types.FunctionType):
63 globs = dict(fn.__globals__)
71 globs = dict(fn.__globals__)
64 globs['zstd'] = mod
72 globs["zstd"] = mod
65 new_fn = types.FunctionType(fn.__code__, globs, name,
73 new_fn = types.FunctionType(
66 fn.__defaults__, fn.__closure__)
74 fn.__code__, globs, name, fn.__defaults__, fn.__closure__
75 )
67 new_method = new_fn
76 new_method = new_fn
68 else:
77 else:
69 globs = dict(fn.__func__.func_globals)
78 globs = dict(fn.__func__.func_globals)
70 globs['zstd'] = mod
79 globs["zstd"] = mod
71 new_fn = types.FunctionType(fn.__func__.func_code, globs, name,
80 new_fn = types.FunctionType(
72 fn.__func__.func_defaults,
81 fn.__func__.func_code,
73 fn.__func__.func_closure)
82 globs,
74 new_method = types.UnboundMethodType(new_fn, fn.im_self,
83 name,
75 fn.im_class)
84 fn.__func__.func_defaults,
85 fn.__func__.func_closure,
86 )
87 new_method = types.UnboundMethodType(new_fn, fn.im_self, fn.im_class)
76
88
77 setattr(cls, name, new_method)
89 setattr(cls, name, new_method)
78
90
79 return cls
91 return cls
80
92
81
93
82 class NonClosingBytesIO(io.BytesIO):
94 class NonClosingBytesIO(io.BytesIO):
83 """BytesIO that saves the underlying buffer on close().
95 """BytesIO that saves the underlying buffer on close().
84
96
85 This allows us to access written data after close().
97 This allows us to access written data after close().
86 """
98 """
99
87 def __init__(self, *args, **kwargs):
100 def __init__(self, *args, **kwargs):
88 super(NonClosingBytesIO, self).__init__(*args, **kwargs)
101 super(NonClosingBytesIO, self).__init__(*args, **kwargs)
89 self._saved_buffer = None
102 self._saved_buffer = None
90
103
91 def close(self):
104 def close(self):
92 self._saved_buffer = self.getvalue()
105 self._saved_buffer = self.getvalue()
93 return super(NonClosingBytesIO, self).close()
106 return super(NonClosingBytesIO, self).close()
94
107
95 def getvalue(self):
108 def getvalue(self):
96 if self.closed:
109 if self.closed:
97 return self._saved_buffer
110 return self._saved_buffer
98 else:
111 else:
99 return super(NonClosingBytesIO, self).getvalue()
112 return super(NonClosingBytesIO, self).getvalue()
100
113
101
114
102 class OpCountingBytesIO(NonClosingBytesIO):
115 class OpCountingBytesIO(NonClosingBytesIO):
103 def __init__(self, *args, **kwargs):
116 def __init__(self, *args, **kwargs):
104 self._flush_count = 0
117 self._flush_count = 0
105 self._read_count = 0
118 self._read_count = 0
106 self._write_count = 0
119 self._write_count = 0
107 return super(OpCountingBytesIO, self).__init__(*args, **kwargs)
120 return super(OpCountingBytesIO, self).__init__(*args, **kwargs)
108
121
109 def flush(self):
122 def flush(self):
110 self._flush_count += 1
123 self._flush_count += 1
111 return super(OpCountingBytesIO, self).flush()
124 return super(OpCountingBytesIO, self).flush()
112
125
113 def read(self, *args):
126 def read(self, *args):
114 self._read_count += 1
127 self._read_count += 1
115 return super(OpCountingBytesIO, self).read(*args)
128 return super(OpCountingBytesIO, self).read(*args)
116
129
117 def write(self, data):
130 def write(self, data):
118 self._write_count += 1
131 self._write_count += 1
119 return super(OpCountingBytesIO, self).write(data)
132 return super(OpCountingBytesIO, self).write(data)
120
133
121
134
122 _source_files = []
135 _source_files = []
123
136
124
137
125 def random_input_data():
138 def random_input_data():
126 """Obtain the raw content of source files.
139 """Obtain the raw content of source files.
127
140
128 This is used for generating "random" data to feed into fuzzing, since it is
141 This is used for generating "random" data to feed into fuzzing, since it is
129 faster than random content generation.
142 faster than random content generation.
130 """
143 """
131 if _source_files:
144 if _source_files:
132 return _source_files
145 return _source_files
133
146
134 for root, dirs, files in os.walk(os.path.dirname(__file__)):
147 for root, dirs, files in os.walk(os.path.dirname(__file__)):
135 dirs[:] = list(sorted(dirs))
148 dirs[:] = list(sorted(dirs))
136 for f in sorted(files):
149 for f in sorted(files):
137 try:
150 try:
138 with open(os.path.join(root, f), 'rb') as fh:
151 with open(os.path.join(root, f), "rb") as fh:
139 data = fh.read()
152 data = fh.read()
140 if data:
153 if data:
141 _source_files.append(data)
154 _source_files.append(data)
142 except OSError:
155 except OSError:
143 pass
156 pass
144
157
145 # Also add some actual random data.
158 # Also add some actual random data.
146 _source_files.append(os.urandom(100))
159 _source_files.append(os.urandom(100))
147 _source_files.append(os.urandom(1000))
160 _source_files.append(os.urandom(1000))
148 _source_files.append(os.urandom(10000))
161 _source_files.append(os.urandom(10000))
149 _source_files.append(os.urandom(100000))
162 _source_files.append(os.urandom(100000))
150 _source_files.append(os.urandom(1000000))
163 _source_files.append(os.urandom(1000000))
151
164
152 return _source_files
165 return _source_files
153
166
154
167
155 def generate_samples():
168 def generate_samples():
156 inputs = [
169 inputs = [
157 b'foo',
170 b"foo",
158 b'bar',
171 b"bar",
159 b'abcdef',
172 b"abcdef",
160 b'sometext',
173 b"sometext",
161 b'baz',
174 b"baz",
162 ]
175 ]
163
176
164 samples = []
177 samples = []
165
178
166 for i in range(128):
179 for i in range(128):
167 samples.append(inputs[i % 5])
180 samples.append(inputs[i % 5])
168 samples.append(inputs[i % 5] * (i + 3))
181 samples.append(inputs[i % 5] * (i + 3))
169 samples.append(inputs[-(i % 5)] * (i + 2))
182 samples.append(inputs[-(i % 5)] * (i + 2))
170
183
171 return samples
184 return samples
172
185
173
186
174 if hypothesis:
187 if hypothesis:
175 default_settings = hypothesis.settings(deadline=10000)
188 default_settings = hypothesis.settings(deadline=10000)
176 hypothesis.settings.register_profile('default', default_settings)
189 hypothesis.settings.register_profile("default", default_settings)
177
190
178 ci_settings = hypothesis.settings(deadline=20000, max_examples=1000)
191 ci_settings = hypothesis.settings(deadline=20000, max_examples=1000)
179 hypothesis.settings.register_profile('ci', ci_settings)
192 hypothesis.settings.register_profile("ci", ci_settings)
180
193
181 expensive_settings = hypothesis.settings(deadline=None, max_examples=10000)
194 expensive_settings = hypothesis.settings(deadline=None, max_examples=10000)
182 hypothesis.settings.register_profile('expensive', expensive_settings)
195 hypothesis.settings.register_profile("expensive", expensive_settings)
183
196
184 hypothesis.settings.load_profile(
197 hypothesis.settings.load_profile(os.environ.get("HYPOTHESIS_PROFILE", "default"))
185 os.environ.get('HYPOTHESIS_PROFILE', 'default'))
@@ -1,135 +1,146 b''
1 import struct
1 import struct
2 import unittest
2 import unittest
3
3
4 import zstandard as zstd
4 import zstandard as zstd
5
5
6 ss = struct.Struct('=QQ')
6 from .common import TestCase
7
8 ss = struct.Struct("=QQ")
7
9
8
10
9 class TestBufferWithSegments(unittest.TestCase):
11 class TestBufferWithSegments(TestCase):
10 def test_arguments(self):
12 def test_arguments(self):
11 if not hasattr(zstd, 'BufferWithSegments'):
13 if not hasattr(zstd, "BufferWithSegments"):
12 self.skipTest('BufferWithSegments not available')
14 self.skipTest("BufferWithSegments not available")
13
15
14 with self.assertRaises(TypeError):
16 with self.assertRaises(TypeError):
15 zstd.BufferWithSegments()
17 zstd.BufferWithSegments()
16
18
17 with self.assertRaises(TypeError):
19 with self.assertRaises(TypeError):
18 zstd.BufferWithSegments(b'foo')
20 zstd.BufferWithSegments(b"foo")
19
21
20 # Segments data should be a multiple of 16.
22 # Segments data should be a multiple of 16.
21 with self.assertRaisesRegexp(ValueError, 'segments array size is not a multiple of 16'):
23 with self.assertRaisesRegex(
22 zstd.BufferWithSegments(b'foo', b'\x00\x00')
24 ValueError, "segments array size is not a multiple of 16"
25 ):
26 zstd.BufferWithSegments(b"foo", b"\x00\x00")
23
27
24 def test_invalid_offset(self):
28 def test_invalid_offset(self):
25 if not hasattr(zstd, 'BufferWithSegments'):
29 if not hasattr(zstd, "BufferWithSegments"):
26 self.skipTest('BufferWithSegments not available')
30 self.skipTest("BufferWithSegments not available")
27
31
28 with self.assertRaisesRegexp(ValueError, 'offset within segments array references memory'):
32 with self.assertRaisesRegex(
29 zstd.BufferWithSegments(b'foo', ss.pack(0, 4))
33 ValueError, "offset within segments array references memory"
34 ):
35 zstd.BufferWithSegments(b"foo", ss.pack(0, 4))
30
36
31 def test_invalid_getitem(self):
37 def test_invalid_getitem(self):
32 if not hasattr(zstd, 'BufferWithSegments'):
38 if not hasattr(zstd, "BufferWithSegments"):
33 self.skipTest('BufferWithSegments not available')
39 self.skipTest("BufferWithSegments not available")
34
40
35 b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
41 b = zstd.BufferWithSegments(b"foo", ss.pack(0, 3))
36
42
37 with self.assertRaisesRegexp(IndexError, 'offset must be non-negative'):
43 with self.assertRaisesRegex(IndexError, "offset must be non-negative"):
38 test = b[-10]
44 test = b[-10]
39
45
40 with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'):
46 with self.assertRaisesRegex(IndexError, "offset must be less than 1"):
41 test = b[1]
47 test = b[1]
42
48
43 with self.assertRaisesRegexp(IndexError, 'offset must be less than 1'):
49 with self.assertRaisesRegex(IndexError, "offset must be less than 1"):
44 test = b[2]
50 test = b[2]
45
51
46 def test_single(self):
52 def test_single(self):
47 if not hasattr(zstd, 'BufferWithSegments'):
53 if not hasattr(zstd, "BufferWithSegments"):
48 self.skipTest('BufferWithSegments not available')
54 self.skipTest("BufferWithSegments not available")
49
55
50 b = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
56 b = zstd.BufferWithSegments(b"foo", ss.pack(0, 3))
51 self.assertEqual(len(b), 1)
57 self.assertEqual(len(b), 1)
52 self.assertEqual(b.size, 3)
58 self.assertEqual(b.size, 3)
53 self.assertEqual(b.tobytes(), b'foo')
59 self.assertEqual(b.tobytes(), b"foo")
54
60
55 self.assertEqual(len(b[0]), 3)
61 self.assertEqual(len(b[0]), 3)
56 self.assertEqual(b[0].offset, 0)
62 self.assertEqual(b[0].offset, 0)
57 self.assertEqual(b[0].tobytes(), b'foo')
63 self.assertEqual(b[0].tobytes(), b"foo")
58
64
59 def test_multiple(self):
65 def test_multiple(self):
60 if not hasattr(zstd, 'BufferWithSegments'):
66 if not hasattr(zstd, "BufferWithSegments"):
61 self.skipTest('BufferWithSegments not available')
67 self.skipTest("BufferWithSegments not available")
62
68
63 b = zstd.BufferWithSegments(b'foofooxfooxy', b''.join([ss.pack(0, 3),
69 b = zstd.BufferWithSegments(
64 ss.pack(3, 4),
70 b"foofooxfooxy", b"".join([ss.pack(0, 3), ss.pack(3, 4), ss.pack(7, 5)])
65 ss.pack(7, 5)]))
71 )
66 self.assertEqual(len(b), 3)
72 self.assertEqual(len(b), 3)
67 self.assertEqual(b.size, 12)
73 self.assertEqual(b.size, 12)
68 self.assertEqual(b.tobytes(), b'foofooxfooxy')
74 self.assertEqual(b.tobytes(), b"foofooxfooxy")
69
75
70 self.assertEqual(b[0].tobytes(), b'foo')
76 self.assertEqual(b[0].tobytes(), b"foo")
71 self.assertEqual(b[1].tobytes(), b'foox')
77 self.assertEqual(b[1].tobytes(), b"foox")
72 self.assertEqual(b[2].tobytes(), b'fooxy')
78 self.assertEqual(b[2].tobytes(), b"fooxy")
73
79
74
80
75 class TestBufferWithSegmentsCollection(unittest.TestCase):
81 class TestBufferWithSegmentsCollection(TestCase):
76 def test_empty_constructor(self):
82 def test_empty_constructor(self):
77 if not hasattr(zstd, 'BufferWithSegmentsCollection'):
83 if not hasattr(zstd, "BufferWithSegmentsCollection"):
78 self.skipTest('BufferWithSegmentsCollection not available')
84 self.skipTest("BufferWithSegmentsCollection not available")
79
85
80 with self.assertRaisesRegexp(ValueError, 'must pass at least 1 argument'):
86 with self.assertRaisesRegex(ValueError, "must pass at least 1 argument"):
81 zstd.BufferWithSegmentsCollection()
87 zstd.BufferWithSegmentsCollection()
82
88
83 def test_argument_validation(self):
89 def test_argument_validation(self):
84 if not hasattr(zstd, 'BufferWithSegmentsCollection'):
90 if not hasattr(zstd, "BufferWithSegmentsCollection"):
85 self.skipTest('BufferWithSegmentsCollection not available')
91 self.skipTest("BufferWithSegmentsCollection not available")
86
92
87 with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'):
93 with self.assertRaisesRegex(TypeError, "arguments must be BufferWithSegments"):
88 zstd.BufferWithSegmentsCollection(None)
94 zstd.BufferWithSegmentsCollection(None)
89
95
90 with self.assertRaisesRegexp(TypeError, 'arguments must be BufferWithSegments'):
96 with self.assertRaisesRegex(TypeError, "arguments must be BufferWithSegments"):
91 zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'foo', ss.pack(0, 3)),
97 zstd.BufferWithSegmentsCollection(
92 None)
98 zstd.BufferWithSegments(b"foo", ss.pack(0, 3)), None
99 )
93
100
94 with self.assertRaisesRegexp(ValueError, 'ZstdBufferWithSegments cannot be empty'):
101 with self.assertRaisesRegex(
95 zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'', b''))
102 ValueError, "ZstdBufferWithSegments cannot be empty"
103 ):
104 zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b"", b""))
96
105
97 def test_length(self):
106 def test_length(self):
98 if not hasattr(zstd, 'BufferWithSegmentsCollection'):
107 if not hasattr(zstd, "BufferWithSegmentsCollection"):
99 self.skipTest('BufferWithSegmentsCollection not available')
108 self.skipTest("BufferWithSegmentsCollection not available")
100
109
101 b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
110 b1 = zstd.BufferWithSegments(b"foo", ss.pack(0, 3))
102 b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3),
111 b2 = zstd.BufferWithSegments(
103 ss.pack(3, 3)]))
112 b"barbaz", b"".join([ss.pack(0, 3), ss.pack(3, 3)])
113 )
104
114
105 c = zstd.BufferWithSegmentsCollection(b1)
115 c = zstd.BufferWithSegmentsCollection(b1)
106 self.assertEqual(len(c), 1)
116 self.assertEqual(len(c), 1)
107 self.assertEqual(c.size(), 3)
117 self.assertEqual(c.size(), 3)
108
118
109 c = zstd.BufferWithSegmentsCollection(b2)
119 c = zstd.BufferWithSegmentsCollection(b2)
110 self.assertEqual(len(c), 2)
120 self.assertEqual(len(c), 2)
111 self.assertEqual(c.size(), 6)
121 self.assertEqual(c.size(), 6)
112
122
113 c = zstd.BufferWithSegmentsCollection(b1, b2)
123 c = zstd.BufferWithSegmentsCollection(b1, b2)
114 self.assertEqual(len(c), 3)
124 self.assertEqual(len(c), 3)
115 self.assertEqual(c.size(), 9)
125 self.assertEqual(c.size(), 9)
116
126
117 def test_getitem(self):
127 def test_getitem(self):
118 if not hasattr(zstd, 'BufferWithSegmentsCollection'):
128 if not hasattr(zstd, "BufferWithSegmentsCollection"):
119 self.skipTest('BufferWithSegmentsCollection not available')
129 self.skipTest("BufferWithSegmentsCollection not available")
120
130
121 b1 = zstd.BufferWithSegments(b'foo', ss.pack(0, 3))
131 b1 = zstd.BufferWithSegments(b"foo", ss.pack(0, 3))
122 b2 = zstd.BufferWithSegments(b'barbaz', b''.join([ss.pack(0, 3),
132 b2 = zstd.BufferWithSegments(
123 ss.pack(3, 3)]))
133 b"barbaz", b"".join([ss.pack(0, 3), ss.pack(3, 3)])
134 )
124
135
125 c = zstd.BufferWithSegmentsCollection(b1, b2)
136 c = zstd.BufferWithSegmentsCollection(b1, b2)
126
137
127 with self.assertRaisesRegexp(IndexError, 'offset must be less than 3'):
138 with self.assertRaisesRegex(IndexError, "offset must be less than 3"):
128 c[3]
139 c[3]
129
140
130 with self.assertRaisesRegexp(IndexError, 'offset must be less than 3'):
141 with self.assertRaisesRegex(IndexError, "offset must be less than 3"):
131 c[4]
142 c[4]
132
143
133 self.assertEqual(c[0].tobytes(), b'foo')
144 self.assertEqual(c[0].tobytes(), b"foo")
134 self.assertEqual(c[1].tobytes(), b'bar')
145 self.assertEqual(c[1].tobytes(), b"bar")
135 self.assertEqual(c[2].tobytes(), b'baz')
146 self.assertEqual(c[2].tobytes(), b"baz")
This diff has been collapsed as it changes many lines, (811 lines changed) Show them Hide them
@@ -1,1735 +1,1770 b''
1 import hashlib
1 import hashlib
2 import io
2 import io
3 import os
3 import os
4 import struct
4 import struct
5 import sys
5 import sys
6 import tarfile
6 import tarfile
7 import tempfile
7 import tempfile
8 import unittest
8 import unittest
9
9
10 import zstandard as zstd
10 import zstandard as zstd
11
11
12 from .common import (
12 from .common import (
13 make_cffi,
13 make_cffi,
14 NonClosingBytesIO,
14 NonClosingBytesIO,
15 OpCountingBytesIO,
15 OpCountingBytesIO,
16 TestCase,
16 )
17 )
17
18
18
19
19 if sys.version_info[0] >= 3:
20 if sys.version_info[0] >= 3:
20 next = lambda it: it.__next__()
21 next = lambda it: it.__next__()
21 else:
22 else:
22 next = lambda it: it.next()
23 next = lambda it: it.next()
23
24
24
25
25 def multithreaded_chunk_size(level, source_size=0):
26 def multithreaded_chunk_size(level, source_size=0):
26 params = zstd.ZstdCompressionParameters.from_level(level,
27 params = zstd.ZstdCompressionParameters.from_level(level, source_size=source_size)
27 source_size=source_size)
28
28
29 return 1 << (params.window_log + 2)
29 return 1 << (params.window_log + 2)
30
30
31
31
32 @make_cffi
32 @make_cffi
33 class TestCompressor(unittest.TestCase):
33 class TestCompressor(TestCase):
34 def test_level_bounds(self):
34 def test_level_bounds(self):
35 with self.assertRaises(ValueError):
35 with self.assertRaises(ValueError):
36 zstd.ZstdCompressor(level=23)
36 zstd.ZstdCompressor(level=23)
37
37
38 def test_memory_size(self):
38 def test_memory_size(self):
39 cctx = zstd.ZstdCompressor(level=1)
39 cctx = zstd.ZstdCompressor(level=1)
40 self.assertGreater(cctx.memory_size(), 100)
40 self.assertGreater(cctx.memory_size(), 100)
41
41
42
42
43 @make_cffi
43 @make_cffi
44 class TestCompressor_compress(unittest.TestCase):
44 class TestCompressor_compress(TestCase):
45 def test_compress_empty(self):
45 def test_compress_empty(self):
46 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
46 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
47 result = cctx.compress(b'')
47 result = cctx.compress(b"")
48 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
48 self.assertEqual(result, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00")
49 params = zstd.get_frame_parameters(result)
49 params = zstd.get_frame_parameters(result)
50 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
50 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
51 self.assertEqual(params.window_size, 524288)
51 self.assertEqual(params.window_size, 524288)
52 self.assertEqual(params.dict_id, 0)
52 self.assertEqual(params.dict_id, 0)
53 self.assertFalse(params.has_checksum, 0)
53 self.assertFalse(params.has_checksum, 0)
54
54
55 cctx = zstd.ZstdCompressor()
55 cctx = zstd.ZstdCompressor()
56 result = cctx.compress(b'')
56 result = cctx.compress(b"")
57 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x20\x00\x01\x00\x00')
57 self.assertEqual(result, b"\x28\xb5\x2f\xfd\x20\x00\x01\x00\x00")
58 params = zstd.get_frame_parameters(result)
58 params = zstd.get_frame_parameters(result)
59 self.assertEqual(params.content_size, 0)
59 self.assertEqual(params.content_size, 0)
60
60
61 def test_input_types(self):
61 def test_input_types(self):
62 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
62 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
63 expected = b'\x28\xb5\x2f\xfd\x00\x00\x19\x00\x00\x66\x6f\x6f'
63 expected = b"\x28\xb5\x2f\xfd\x00\x00\x19\x00\x00\x66\x6f\x6f"
64
64
65 mutable_array = bytearray(3)
65 mutable_array = bytearray(3)
66 mutable_array[:] = b'foo'
66 mutable_array[:] = b"foo"
67
67
68 sources = [
68 sources = [
69 memoryview(b'foo'),
69 memoryview(b"foo"),
70 bytearray(b'foo'),
70 bytearray(b"foo"),
71 mutable_array,
71 mutable_array,
72 ]
72 ]
73
73
74 for source in sources:
74 for source in sources:
75 self.assertEqual(cctx.compress(source), expected)
75 self.assertEqual(cctx.compress(source), expected)
76
76
77 def test_compress_large(self):
77 def test_compress_large(self):
78 chunks = []
78 chunks = []
79 for i in range(255):
79 for i in range(255):
80 chunks.append(struct.Struct('>B').pack(i) * 16384)
80 chunks.append(struct.Struct(">B").pack(i) * 16384)
81
81
82 cctx = zstd.ZstdCompressor(level=3, write_content_size=False)
82 cctx = zstd.ZstdCompressor(level=3, write_content_size=False)
83 result = cctx.compress(b''.join(chunks))
83 result = cctx.compress(b"".join(chunks))
84 self.assertEqual(len(result), 999)
84 self.assertEqual(len(result), 999)
85 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
85 self.assertEqual(result[0:4], b"\x28\xb5\x2f\xfd")
86
86
87 # This matches the test for read_to_iter() below.
87 # This matches the test for read_to_iter() below.
88 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
88 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
89 result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o')
89 result = cctx.compress(b"f" * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b"o")
90 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00'
90 self.assertEqual(
91 b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0'
91 result,
92 b'\x02\x09\x00\x00\x6f')
92 b"\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00"
93 b"\x10\x66\x66\x01\x00\xfb\xff\x39\xc0"
94 b"\x02\x09\x00\x00\x6f",
95 )
93
96
94 def test_negative_level(self):
97 def test_negative_level(self):
95 cctx = zstd.ZstdCompressor(level=-4)
98 cctx = zstd.ZstdCompressor(level=-4)
96 result = cctx.compress(b'foo' * 256)
99 result = cctx.compress(b"foo" * 256)
97
100
98 def test_no_magic(self):
101 def test_no_magic(self):
99 params = zstd.ZstdCompressionParameters.from_level(
102 params = zstd.ZstdCompressionParameters.from_level(1, format=zstd.FORMAT_ZSTD1)
100 1, format=zstd.FORMAT_ZSTD1)
101 cctx = zstd.ZstdCompressor(compression_params=params)
103 cctx = zstd.ZstdCompressor(compression_params=params)
102 magic = cctx.compress(b'foobar')
104 magic = cctx.compress(b"foobar")
103
105
104 params = zstd.ZstdCompressionParameters.from_level(
106 params = zstd.ZstdCompressionParameters.from_level(
105 1, format=zstd.FORMAT_ZSTD1_MAGICLESS)
107 1, format=zstd.FORMAT_ZSTD1_MAGICLESS
108 )
106 cctx = zstd.ZstdCompressor(compression_params=params)
109 cctx = zstd.ZstdCompressor(compression_params=params)
107 no_magic = cctx.compress(b'foobar')
110 no_magic = cctx.compress(b"foobar")
108
111
109 self.assertEqual(magic[0:4], b'\x28\xb5\x2f\xfd')
112 self.assertEqual(magic[0:4], b"\x28\xb5\x2f\xfd")
110 self.assertEqual(magic[4:], no_magic)
113 self.assertEqual(magic[4:], no_magic)
111
114
112 def test_write_checksum(self):
115 def test_write_checksum(self):
113 cctx = zstd.ZstdCompressor(level=1)
116 cctx = zstd.ZstdCompressor(level=1)
114 no_checksum = cctx.compress(b'foobar')
117 no_checksum = cctx.compress(b"foobar")
115 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
118 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
116 with_checksum = cctx.compress(b'foobar')
119 with_checksum = cctx.compress(b"foobar")
117
120
118 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
121 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
119
122
120 no_params = zstd.get_frame_parameters(no_checksum)
123 no_params = zstd.get_frame_parameters(no_checksum)
121 with_params = zstd.get_frame_parameters(with_checksum)
124 with_params = zstd.get_frame_parameters(with_checksum)
122
125
123 self.assertFalse(no_params.has_checksum)
126 self.assertFalse(no_params.has_checksum)
124 self.assertTrue(with_params.has_checksum)
127 self.assertTrue(with_params.has_checksum)
125
128
126 def test_write_content_size(self):
129 def test_write_content_size(self):
127 cctx = zstd.ZstdCompressor(level=1)
130 cctx = zstd.ZstdCompressor(level=1)
128 with_size = cctx.compress(b'foobar' * 256)
131 with_size = cctx.compress(b"foobar" * 256)
129 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
132 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
130 no_size = cctx.compress(b'foobar' * 256)
133 no_size = cctx.compress(b"foobar" * 256)
131
134
132 self.assertEqual(len(with_size), len(no_size) + 1)
135 self.assertEqual(len(with_size), len(no_size) + 1)
133
136
134 no_params = zstd.get_frame_parameters(no_size)
137 no_params = zstd.get_frame_parameters(no_size)
135 with_params = zstd.get_frame_parameters(with_size)
138 with_params = zstd.get_frame_parameters(with_size)
136 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
139 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
137 self.assertEqual(with_params.content_size, 1536)
140 self.assertEqual(with_params.content_size, 1536)
138
141
139 def test_no_dict_id(self):
142 def test_no_dict_id(self):
140 samples = []
143 samples = []
141 for i in range(128):
144 for i in range(128):
142 samples.append(b'foo' * 64)
145 samples.append(b"foo" * 64)
143 samples.append(b'bar' * 64)
146 samples.append(b"bar" * 64)
144 samples.append(b'foobar' * 64)
147 samples.append(b"foobar" * 64)
145
148
146 d = zstd.train_dictionary(1024, samples)
149 d = zstd.train_dictionary(1024, samples)
147
150
148 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
151 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
149 with_dict_id = cctx.compress(b'foobarfoobar')
152 with_dict_id = cctx.compress(b"foobarfoobar")
150
153
151 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
154 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
152 no_dict_id = cctx.compress(b'foobarfoobar')
155 no_dict_id = cctx.compress(b"foobarfoobar")
153
156
154 self.assertEqual(len(with_dict_id), len(no_dict_id) + 4)
157 self.assertEqual(len(with_dict_id), len(no_dict_id) + 4)
155
158
156 no_params = zstd.get_frame_parameters(no_dict_id)
159 no_params = zstd.get_frame_parameters(no_dict_id)
157 with_params = zstd.get_frame_parameters(with_dict_id)
160 with_params = zstd.get_frame_parameters(with_dict_id)
158 self.assertEqual(no_params.dict_id, 0)
161 self.assertEqual(no_params.dict_id, 0)
159 self.assertEqual(with_params.dict_id, 1880053135)
162 self.assertEqual(with_params.dict_id, 1880053135)
160
163
161 def test_compress_dict_multiple(self):
164 def test_compress_dict_multiple(self):
162 samples = []
165 samples = []
163 for i in range(128):
166 for i in range(128):
164 samples.append(b'foo' * 64)
167 samples.append(b"foo" * 64)
165 samples.append(b'bar' * 64)
168 samples.append(b"bar" * 64)
166 samples.append(b'foobar' * 64)
169 samples.append(b"foobar" * 64)
167
170
168 d = zstd.train_dictionary(8192, samples)
171 d = zstd.train_dictionary(8192, samples)
169
172
170 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
173 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
171
174
172 for i in range(32):
175 for i in range(32):
173 cctx.compress(b'foo bar foobar foo bar foobar')
176 cctx.compress(b"foo bar foobar foo bar foobar")
174
177
175 def test_dict_precompute(self):
178 def test_dict_precompute(self):
176 samples = []
179 samples = []
177 for i in range(128):
180 for i in range(128):
178 samples.append(b'foo' * 64)
181 samples.append(b"foo" * 64)
179 samples.append(b'bar' * 64)
182 samples.append(b"bar" * 64)
180 samples.append(b'foobar' * 64)
183 samples.append(b"foobar" * 64)
181
184
182 d = zstd.train_dictionary(8192, samples)
185 d = zstd.train_dictionary(8192, samples)
183 d.precompute_compress(level=1)
186 d.precompute_compress(level=1)
184
187
185 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
188 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
186
189
187 for i in range(32):
190 for i in range(32):
188 cctx.compress(b'foo bar foobar foo bar foobar')
191 cctx.compress(b"foo bar foobar foo bar foobar")
189
192
190 def test_multithreaded(self):
193 def test_multithreaded(self):
191 chunk_size = multithreaded_chunk_size(1)
194 chunk_size = multithreaded_chunk_size(1)
192 source = b''.join([b'x' * chunk_size, b'y' * chunk_size])
195 source = b"".join([b"x" * chunk_size, b"y" * chunk_size])
193
196
194 cctx = zstd.ZstdCompressor(level=1, threads=2)
197 cctx = zstd.ZstdCompressor(level=1, threads=2)
195 compressed = cctx.compress(source)
198 compressed = cctx.compress(source)
196
199
197 params = zstd.get_frame_parameters(compressed)
200 params = zstd.get_frame_parameters(compressed)
198 self.assertEqual(params.content_size, chunk_size * 2)
201 self.assertEqual(params.content_size, chunk_size * 2)
199 self.assertEqual(params.dict_id, 0)
202 self.assertEqual(params.dict_id, 0)
200 self.assertFalse(params.has_checksum)
203 self.assertFalse(params.has_checksum)
201
204
202 dctx = zstd.ZstdDecompressor()
205 dctx = zstd.ZstdDecompressor()
203 self.assertEqual(dctx.decompress(compressed), source)
206 self.assertEqual(dctx.decompress(compressed), source)
204
207
205 def test_multithreaded_dict(self):
208 def test_multithreaded_dict(self):
206 samples = []
209 samples = []
207 for i in range(128):
210 for i in range(128):
208 samples.append(b'foo' * 64)
211 samples.append(b"foo" * 64)
209 samples.append(b'bar' * 64)
212 samples.append(b"bar" * 64)
210 samples.append(b'foobar' * 64)
213 samples.append(b"foobar" * 64)
211
214
212 d = zstd.train_dictionary(1024, samples)
215 d = zstd.train_dictionary(1024, samples)
213
216
214 cctx = zstd.ZstdCompressor(dict_data=d, threads=2)
217 cctx = zstd.ZstdCompressor(dict_data=d, threads=2)
215
218
216 result = cctx.compress(b'foo')
219 result = cctx.compress(b"foo")
217 params = zstd.get_frame_parameters(result);
220 params = zstd.get_frame_parameters(result)
218 self.assertEqual(params.content_size, 3);
221 self.assertEqual(params.content_size, 3)
219 self.assertEqual(params.dict_id, d.dict_id())
222 self.assertEqual(params.dict_id, d.dict_id())
220
223
221 self.assertEqual(result,
224 self.assertEqual(
222 b'\x28\xb5\x2f\xfd\x23\x8f\x55\x0f\x70\x03\x19\x00\x00'
225 result,
223 b'\x66\x6f\x6f')
226 b"\x28\xb5\x2f\xfd\x23\x8f\x55\x0f\x70\x03\x19\x00\x00" b"\x66\x6f\x6f",
227 )
224
228
225 def test_multithreaded_compression_params(self):
229 def test_multithreaded_compression_params(self):
226 params = zstd.ZstdCompressionParameters.from_level(0, threads=2)
230 params = zstd.ZstdCompressionParameters.from_level(0, threads=2)
227 cctx = zstd.ZstdCompressor(compression_params=params)
231 cctx = zstd.ZstdCompressor(compression_params=params)
228
232
229 result = cctx.compress(b'foo')
233 result = cctx.compress(b"foo")
230 params = zstd.get_frame_parameters(result);
234 params = zstd.get_frame_parameters(result)
231 self.assertEqual(params.content_size, 3);
235 self.assertEqual(params.content_size, 3)
232
236
233 self.assertEqual(result,
237 self.assertEqual(result, b"\x28\xb5\x2f\xfd\x20\x03\x19\x00\x00\x66\x6f\x6f")
234 b'\x28\xb5\x2f\xfd\x20\x03\x19\x00\x00\x66\x6f\x6f')
235
238
236
239
237 @make_cffi
240 @make_cffi
238 class TestCompressor_compressobj(unittest.TestCase):
241 class TestCompressor_compressobj(TestCase):
239 def test_compressobj_empty(self):
242 def test_compressobj_empty(self):
240 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
243 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
241 cobj = cctx.compressobj()
244 cobj = cctx.compressobj()
242 self.assertEqual(cobj.compress(b''), b'')
245 self.assertEqual(cobj.compress(b""), b"")
243 self.assertEqual(cobj.flush(),
246 self.assertEqual(cobj.flush(), b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00")
244 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
245
247
246 def test_input_types(self):
248 def test_input_types(self):
247 expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f'
249 expected = b"\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f"
248 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
250 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
249
251
250 mutable_array = bytearray(3)
252 mutable_array = bytearray(3)
251 mutable_array[:] = b'foo'
253 mutable_array[:] = b"foo"
252
254
253 sources = [
255 sources = [
254 memoryview(b'foo'),
256 memoryview(b"foo"),
255 bytearray(b'foo'),
257 bytearray(b"foo"),
256 mutable_array,
258 mutable_array,
257 ]
259 ]
258
260
259 for source in sources:
261 for source in sources:
260 cobj = cctx.compressobj()
262 cobj = cctx.compressobj()
261 self.assertEqual(cobj.compress(source), b'')
263 self.assertEqual(cobj.compress(source), b"")
262 self.assertEqual(cobj.flush(), expected)
264 self.assertEqual(cobj.flush(), expected)
263
265
264 def test_compressobj_large(self):
266 def test_compressobj_large(self):
265 chunks = []
267 chunks = []
266 for i in range(255):
268 for i in range(255):
267 chunks.append(struct.Struct('>B').pack(i) * 16384)
269 chunks.append(struct.Struct(">B").pack(i) * 16384)
268
270
269 cctx = zstd.ZstdCompressor(level=3)
271 cctx = zstd.ZstdCompressor(level=3)
270 cobj = cctx.compressobj()
272 cobj = cctx.compressobj()
271
273
272 result = cobj.compress(b''.join(chunks)) + cobj.flush()
274 result = cobj.compress(b"".join(chunks)) + cobj.flush()
273 self.assertEqual(len(result), 999)
275 self.assertEqual(len(result), 999)
274 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
276 self.assertEqual(result[0:4], b"\x28\xb5\x2f\xfd")
275
277
276 params = zstd.get_frame_parameters(result)
278 params = zstd.get_frame_parameters(result)
277 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
279 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
278 self.assertEqual(params.window_size, 2097152)
280 self.assertEqual(params.window_size, 2097152)
279 self.assertEqual(params.dict_id, 0)
281 self.assertEqual(params.dict_id, 0)
280 self.assertFalse(params.has_checksum)
282 self.assertFalse(params.has_checksum)
281
283
282 def test_write_checksum(self):
284 def test_write_checksum(self):
283 cctx = zstd.ZstdCompressor(level=1)
285 cctx = zstd.ZstdCompressor(level=1)
284 cobj = cctx.compressobj()
286 cobj = cctx.compressobj()
285 no_checksum = cobj.compress(b'foobar') + cobj.flush()
287 no_checksum = cobj.compress(b"foobar") + cobj.flush()
286 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
288 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
287 cobj = cctx.compressobj()
289 cobj = cctx.compressobj()
288 with_checksum = cobj.compress(b'foobar') + cobj.flush()
290 with_checksum = cobj.compress(b"foobar") + cobj.flush()
289
291
290 no_params = zstd.get_frame_parameters(no_checksum)
292 no_params = zstd.get_frame_parameters(no_checksum)
291 with_params = zstd.get_frame_parameters(with_checksum)
293 with_params = zstd.get_frame_parameters(with_checksum)
292 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
294 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
293 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
295 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
294 self.assertEqual(no_params.dict_id, 0)
296 self.assertEqual(no_params.dict_id, 0)
295 self.assertEqual(with_params.dict_id, 0)
297 self.assertEqual(with_params.dict_id, 0)
296 self.assertFalse(no_params.has_checksum)
298 self.assertFalse(no_params.has_checksum)
297 self.assertTrue(with_params.has_checksum)
299 self.assertTrue(with_params.has_checksum)
298
300
299 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
301 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
300
302
301 def test_write_content_size(self):
303 def test_write_content_size(self):
302 cctx = zstd.ZstdCompressor(level=1)
304 cctx = zstd.ZstdCompressor(level=1)
303 cobj = cctx.compressobj(size=len(b'foobar' * 256))
305 cobj = cctx.compressobj(size=len(b"foobar" * 256))
304 with_size = cobj.compress(b'foobar' * 256) + cobj.flush()
306 with_size = cobj.compress(b"foobar" * 256) + cobj.flush()
305 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
307 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
306 cobj = cctx.compressobj(size=len(b'foobar' * 256))
308 cobj = cctx.compressobj(size=len(b"foobar" * 256))
307 no_size = cobj.compress(b'foobar' * 256) + cobj.flush()
309 no_size = cobj.compress(b"foobar" * 256) + cobj.flush()
308
310
309 no_params = zstd.get_frame_parameters(no_size)
311 no_params = zstd.get_frame_parameters(no_size)
310 with_params = zstd.get_frame_parameters(with_size)
312 with_params = zstd.get_frame_parameters(with_size)
311 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
313 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
312 self.assertEqual(with_params.content_size, 1536)
314 self.assertEqual(with_params.content_size, 1536)
313 self.assertEqual(no_params.dict_id, 0)
315 self.assertEqual(no_params.dict_id, 0)
314 self.assertEqual(with_params.dict_id, 0)
316 self.assertEqual(with_params.dict_id, 0)
315 self.assertFalse(no_params.has_checksum)
317 self.assertFalse(no_params.has_checksum)
316 self.assertFalse(with_params.has_checksum)
318 self.assertFalse(with_params.has_checksum)
317
319
318 self.assertEqual(len(with_size), len(no_size) + 1)
320 self.assertEqual(len(with_size), len(no_size) + 1)
319
321
320 def test_compress_after_finished(self):
322 def test_compress_after_finished(self):
321 cctx = zstd.ZstdCompressor()
323 cctx = zstd.ZstdCompressor()
322 cobj = cctx.compressobj()
324 cobj = cctx.compressobj()
323
325
324 cobj.compress(b'foo')
326 cobj.compress(b"foo")
325 cobj.flush()
327 cobj.flush()
326
328
327 with self.assertRaisesRegexp(zstd.ZstdError, r'cannot call compress\(\) after compressor'):
329 with self.assertRaisesRegex(
328 cobj.compress(b'foo')
330 zstd.ZstdError, r"cannot call compress\(\) after compressor"
331 ):
332 cobj.compress(b"foo")
329
333
330 with self.assertRaisesRegexp(zstd.ZstdError, 'compressor object already finished'):
334 with self.assertRaisesRegex(
335 zstd.ZstdError, "compressor object already finished"
336 ):
331 cobj.flush()
337 cobj.flush()
332
338
333 def test_flush_block_repeated(self):
339 def test_flush_block_repeated(self):
334 cctx = zstd.ZstdCompressor(level=1)
340 cctx = zstd.ZstdCompressor(level=1)
335 cobj = cctx.compressobj()
341 cobj = cctx.compressobj()
336
342
337 self.assertEqual(cobj.compress(b'foo'), b'')
343 self.assertEqual(cobj.compress(b"foo"), b"")
338 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
344 self.assertEqual(
339 b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo')
345 cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
340 self.assertEqual(cobj.compress(b'bar'), b'')
346 b"\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo",
347 )
348 self.assertEqual(cobj.compress(b"bar"), b"")
341 # 3 byte header plus content.
349 # 3 byte header plus content.
342 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
350 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b"\x18\x00\x00bar")
343 b'\x18\x00\x00bar')
351 self.assertEqual(cobj.flush(), b"\x01\x00\x00")
344 self.assertEqual(cobj.flush(), b'\x01\x00\x00')
345
352
346 def test_flush_empty_block(self):
353 def test_flush_empty_block(self):
347 cctx = zstd.ZstdCompressor(write_checksum=True)
354 cctx = zstd.ZstdCompressor(write_checksum=True)
348 cobj = cctx.compressobj()
355 cobj = cctx.compressobj()
349
356
350 cobj.compress(b'foobar')
357 cobj.compress(b"foobar")
351 cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
358 cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
352 # No-op if no block is active (this is internal to zstd).
359 # No-op if no block is active (this is internal to zstd).
353 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b'')
360 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b"")
354
361
355 trailing = cobj.flush()
362 trailing = cobj.flush()
356 # 3 bytes block header + 4 bytes frame checksum
363 # 3 bytes block header + 4 bytes frame checksum
357 self.assertEqual(len(trailing), 7)
364 self.assertEqual(len(trailing), 7)
358 header = trailing[0:3]
365 header = trailing[0:3]
359 self.assertEqual(header, b'\x01\x00\x00')
366 self.assertEqual(header, b"\x01\x00\x00")
360
367
361 def test_multithreaded(self):
368 def test_multithreaded(self):
362 source = io.BytesIO()
369 source = io.BytesIO()
363 source.write(b'a' * 1048576)
370 source.write(b"a" * 1048576)
364 source.write(b'b' * 1048576)
371 source.write(b"b" * 1048576)
365 source.write(b'c' * 1048576)
372 source.write(b"c" * 1048576)
366 source.seek(0)
373 source.seek(0)
367
374
368 cctx = zstd.ZstdCompressor(level=1, threads=2)
375 cctx = zstd.ZstdCompressor(level=1, threads=2)
369 cobj = cctx.compressobj()
376 cobj = cctx.compressobj()
370
377
371 chunks = []
378 chunks = []
372 while True:
379 while True:
373 d = source.read(8192)
380 d = source.read(8192)
374 if not d:
381 if not d:
375 break
382 break
376
383
377 chunks.append(cobj.compress(d))
384 chunks.append(cobj.compress(d))
378
385
379 chunks.append(cobj.flush())
386 chunks.append(cobj.flush())
380
387
381 compressed = b''.join(chunks)
388 compressed = b"".join(chunks)
382
389
383 self.assertEqual(len(compressed), 295)
390 self.assertEqual(len(compressed), 119)
384
391
385 def test_frame_progression(self):
392 def test_frame_progression(self):
386 cctx = zstd.ZstdCompressor()
393 cctx = zstd.ZstdCompressor()
387
394
388 self.assertEqual(cctx.frame_progression(), (0, 0, 0))
395 self.assertEqual(cctx.frame_progression(), (0, 0, 0))
389
396
390 cobj = cctx.compressobj()
397 cobj = cctx.compressobj()
391
398
392 cobj.compress(b'foobar')
399 cobj.compress(b"foobar")
393 self.assertEqual(cctx.frame_progression(), (6, 0, 0))
400 self.assertEqual(cctx.frame_progression(), (6, 0, 0))
394
401
395 cobj.flush()
402 cobj.flush()
396 self.assertEqual(cctx.frame_progression(), (6, 6, 15))
403 self.assertEqual(cctx.frame_progression(), (6, 6, 15))
397
404
398 def test_bad_size(self):
405 def test_bad_size(self):
399 cctx = zstd.ZstdCompressor()
406 cctx = zstd.ZstdCompressor()
400
407
401 cobj = cctx.compressobj(size=2)
408 cobj = cctx.compressobj(size=2)
402 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
409 with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"):
403 cobj.compress(b'foo')
410 cobj.compress(b"foo")
404
411
405 # Try another operation on this instance.
412 # Try another operation on this instance.
406 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
413 with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"):
407 cobj.compress(b'aa')
414 cobj.compress(b"aa")
408
415
409 # Try another operation on the compressor.
416 # Try another operation on the compressor.
410 cctx.compressobj(size=4)
417 cctx.compressobj(size=4)
411 cctx.compress(b'foobar')
418 cctx.compress(b"foobar")
412
419
413
420
414 @make_cffi
421 @make_cffi
415 class TestCompressor_copy_stream(unittest.TestCase):
422 class TestCompressor_copy_stream(TestCase):
416 def test_no_read(self):
423 def test_no_read(self):
417 source = object()
424 source = object()
418 dest = io.BytesIO()
425 dest = io.BytesIO()
419
426
420 cctx = zstd.ZstdCompressor()
427 cctx = zstd.ZstdCompressor()
421 with self.assertRaises(ValueError):
428 with self.assertRaises(ValueError):
422 cctx.copy_stream(source, dest)
429 cctx.copy_stream(source, dest)
423
430
424 def test_no_write(self):
431 def test_no_write(self):
425 source = io.BytesIO()
432 source = io.BytesIO()
426 dest = object()
433 dest = object()
427
434
428 cctx = zstd.ZstdCompressor()
435 cctx = zstd.ZstdCompressor()
429 with self.assertRaises(ValueError):
436 with self.assertRaises(ValueError):
430 cctx.copy_stream(source, dest)
437 cctx.copy_stream(source, dest)
431
438
432 def test_empty(self):
439 def test_empty(self):
433 source = io.BytesIO()
440 source = io.BytesIO()
434 dest = io.BytesIO()
441 dest = io.BytesIO()
435
442
436 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
443 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
437 r, w = cctx.copy_stream(source, dest)
444 r, w = cctx.copy_stream(source, dest)
438 self.assertEqual(int(r), 0)
445 self.assertEqual(int(r), 0)
439 self.assertEqual(w, 9)
446 self.assertEqual(w, 9)
440
447
441 self.assertEqual(dest.getvalue(),
448 self.assertEqual(dest.getvalue(), b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00")
442 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
443
449
444 def test_large_data(self):
450 def test_large_data(self):
445 source = io.BytesIO()
451 source = io.BytesIO()
446 for i in range(255):
452 for i in range(255):
447 source.write(struct.Struct('>B').pack(i) * 16384)
453 source.write(struct.Struct(">B").pack(i) * 16384)
448 source.seek(0)
454 source.seek(0)
449
455
450 dest = io.BytesIO()
456 dest = io.BytesIO()
451 cctx = zstd.ZstdCompressor()
457 cctx = zstd.ZstdCompressor()
452 r, w = cctx.copy_stream(source, dest)
458 r, w = cctx.copy_stream(source, dest)
453
459
454 self.assertEqual(r, 255 * 16384)
460 self.assertEqual(r, 255 * 16384)
455 self.assertEqual(w, 999)
461 self.assertEqual(w, 999)
456
462
457 params = zstd.get_frame_parameters(dest.getvalue())
463 params = zstd.get_frame_parameters(dest.getvalue())
458 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
464 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
459 self.assertEqual(params.window_size, 2097152)
465 self.assertEqual(params.window_size, 2097152)
460 self.assertEqual(params.dict_id, 0)
466 self.assertEqual(params.dict_id, 0)
461 self.assertFalse(params.has_checksum)
467 self.assertFalse(params.has_checksum)
462
468
463 def test_write_checksum(self):
469 def test_write_checksum(self):
464 source = io.BytesIO(b'foobar')
470 source = io.BytesIO(b"foobar")
465 no_checksum = io.BytesIO()
471 no_checksum = io.BytesIO()
466
472
467 cctx = zstd.ZstdCompressor(level=1)
473 cctx = zstd.ZstdCompressor(level=1)
468 cctx.copy_stream(source, no_checksum)
474 cctx.copy_stream(source, no_checksum)
469
475
470 source.seek(0)
476 source.seek(0)
471 with_checksum = io.BytesIO()
477 with_checksum = io.BytesIO()
472 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
478 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
473 cctx.copy_stream(source, with_checksum)
479 cctx.copy_stream(source, with_checksum)
474
480
475 self.assertEqual(len(with_checksum.getvalue()),
481 self.assertEqual(len(with_checksum.getvalue()), len(no_checksum.getvalue()) + 4)
476 len(no_checksum.getvalue()) + 4)
477
482
478 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
483 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
479 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
484 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
480 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
485 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
481 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
486 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
482 self.assertEqual(no_params.dict_id, 0)
487 self.assertEqual(no_params.dict_id, 0)
483 self.assertEqual(with_params.dict_id, 0)
488 self.assertEqual(with_params.dict_id, 0)
484 self.assertFalse(no_params.has_checksum)
489 self.assertFalse(no_params.has_checksum)
485 self.assertTrue(with_params.has_checksum)
490 self.assertTrue(with_params.has_checksum)
486
491
487 def test_write_content_size(self):
492 def test_write_content_size(self):
488 source = io.BytesIO(b'foobar' * 256)
493 source = io.BytesIO(b"foobar" * 256)
489 no_size = io.BytesIO()
494 no_size = io.BytesIO()
490
495
491 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
496 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
492 cctx.copy_stream(source, no_size)
497 cctx.copy_stream(source, no_size)
493
498
494 source.seek(0)
499 source.seek(0)
495 with_size = io.BytesIO()
500 with_size = io.BytesIO()
496 cctx = zstd.ZstdCompressor(level=1)
501 cctx = zstd.ZstdCompressor(level=1)
497 cctx.copy_stream(source, with_size)
502 cctx.copy_stream(source, with_size)
498
503
499 # Source content size is unknown, so no content size written.
504 # Source content size is unknown, so no content size written.
500 self.assertEqual(len(with_size.getvalue()),
505 self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()))
501 len(no_size.getvalue()))
502
506
503 source.seek(0)
507 source.seek(0)
504 with_size = io.BytesIO()
508 with_size = io.BytesIO()
505 cctx.copy_stream(source, with_size, size=len(source.getvalue()))
509 cctx.copy_stream(source, with_size, size=len(source.getvalue()))
506
510
507 # We specified source size, so content size header is present.
511 # We specified source size, so content size header is present.
508 self.assertEqual(len(with_size.getvalue()),
512 self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()) + 1)
509 len(no_size.getvalue()) + 1)
510
513
511 no_params = zstd.get_frame_parameters(no_size.getvalue())
514 no_params = zstd.get_frame_parameters(no_size.getvalue())
512 with_params = zstd.get_frame_parameters(with_size.getvalue())
515 with_params = zstd.get_frame_parameters(with_size.getvalue())
513 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
516 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
514 self.assertEqual(with_params.content_size, 1536)
517 self.assertEqual(with_params.content_size, 1536)
515 self.assertEqual(no_params.dict_id, 0)
518 self.assertEqual(no_params.dict_id, 0)
516 self.assertEqual(with_params.dict_id, 0)
519 self.assertEqual(with_params.dict_id, 0)
517 self.assertFalse(no_params.has_checksum)
520 self.assertFalse(no_params.has_checksum)
518 self.assertFalse(with_params.has_checksum)
521 self.assertFalse(with_params.has_checksum)
519
522
520 def test_read_write_size(self):
523 def test_read_write_size(self):
521 source = OpCountingBytesIO(b'foobarfoobar')
524 source = OpCountingBytesIO(b"foobarfoobar")
522 dest = OpCountingBytesIO()
525 dest = OpCountingBytesIO()
523 cctx = zstd.ZstdCompressor()
526 cctx = zstd.ZstdCompressor()
524 r, w = cctx.copy_stream(source, dest, read_size=1, write_size=1)
527 r, w = cctx.copy_stream(source, dest, read_size=1, write_size=1)
525
528
526 self.assertEqual(r, len(source.getvalue()))
529 self.assertEqual(r, len(source.getvalue()))
527 self.assertEqual(w, 21)
530 self.assertEqual(w, 21)
528 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
531 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
529 self.assertEqual(dest._write_count, len(dest.getvalue()))
532 self.assertEqual(dest._write_count, len(dest.getvalue()))
530
533
531 def test_multithreaded(self):
534 def test_multithreaded(self):
532 source = io.BytesIO()
535 source = io.BytesIO()
533 source.write(b'a' * 1048576)
536 source.write(b"a" * 1048576)
534 source.write(b'b' * 1048576)
537 source.write(b"b" * 1048576)
535 source.write(b'c' * 1048576)
538 source.write(b"c" * 1048576)
536 source.seek(0)
539 source.seek(0)
537
540
538 dest = io.BytesIO()
541 dest = io.BytesIO()
539 cctx = zstd.ZstdCompressor(threads=2, write_content_size=False)
542 cctx = zstd.ZstdCompressor(threads=2, write_content_size=False)
540 r, w = cctx.copy_stream(source, dest)
543 r, w = cctx.copy_stream(source, dest)
541 self.assertEqual(r, 3145728)
544 self.assertEqual(r, 3145728)
542 self.assertEqual(w, 295)
545 self.assertEqual(w, 111)
543
546
544 params = zstd.get_frame_parameters(dest.getvalue())
547 params = zstd.get_frame_parameters(dest.getvalue())
545 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
548 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
546 self.assertEqual(params.dict_id, 0)
549 self.assertEqual(params.dict_id, 0)
547 self.assertFalse(params.has_checksum)
550 self.assertFalse(params.has_checksum)
548
551
549 # Writing content size and checksum works.
552 # Writing content size and checksum works.
550 cctx = zstd.ZstdCompressor(threads=2, write_checksum=True)
553 cctx = zstd.ZstdCompressor(threads=2, write_checksum=True)
551 dest = io.BytesIO()
554 dest = io.BytesIO()
552 source.seek(0)
555 source.seek(0)
553 cctx.copy_stream(source, dest, size=len(source.getvalue()))
556 cctx.copy_stream(source, dest, size=len(source.getvalue()))
554
557
555 params = zstd.get_frame_parameters(dest.getvalue())
558 params = zstd.get_frame_parameters(dest.getvalue())
556 self.assertEqual(params.content_size, 3145728)
559 self.assertEqual(params.content_size, 3145728)
557 self.assertEqual(params.dict_id, 0)
560 self.assertEqual(params.dict_id, 0)
558 self.assertTrue(params.has_checksum)
561 self.assertTrue(params.has_checksum)
559
562
560 def test_bad_size(self):
563 def test_bad_size(self):
561 source = io.BytesIO()
564 source = io.BytesIO()
562 source.write(b'a' * 32768)
565 source.write(b"a" * 32768)
563 source.write(b'b' * 32768)
566 source.write(b"b" * 32768)
564 source.seek(0)
567 source.seek(0)
565
568
566 dest = io.BytesIO()
569 dest = io.BytesIO()
567
570
568 cctx = zstd.ZstdCompressor()
571 cctx = zstd.ZstdCompressor()
569
572
570 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
573 with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"):
571 cctx.copy_stream(source, dest, size=42)
574 cctx.copy_stream(source, dest, size=42)
572
575
573 # Try another operation on this compressor.
576 # Try another operation on this compressor.
574 source.seek(0)
577 source.seek(0)
575 dest = io.BytesIO()
578 dest = io.BytesIO()
576 cctx.copy_stream(source, dest)
579 cctx.copy_stream(source, dest)
577
580
578
581
579 @make_cffi
582 @make_cffi
580 class TestCompressor_stream_reader(unittest.TestCase):
583 class TestCompressor_stream_reader(TestCase):
581 def test_context_manager(self):
584 def test_context_manager(self):
582 cctx = zstd.ZstdCompressor()
585 cctx = zstd.ZstdCompressor()
583
586
584 with cctx.stream_reader(b'foo') as reader:
587 with cctx.stream_reader(b"foo") as reader:
585 with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'):
588 with self.assertRaisesRegex(ValueError, "cannot __enter__ multiple times"):
586 with reader as reader2:
589 with reader as reader2:
587 pass
590 pass
588
591
589 def test_no_context_manager(self):
592 def test_no_context_manager(self):
590 cctx = zstd.ZstdCompressor()
593 cctx = zstd.ZstdCompressor()
591
594
592 reader = cctx.stream_reader(b'foo')
595 reader = cctx.stream_reader(b"foo")
593 reader.read(4)
596 reader.read(4)
594 self.assertFalse(reader.closed)
597 self.assertFalse(reader.closed)
595
598
596 reader.close()
599 reader.close()
597 self.assertTrue(reader.closed)
600 self.assertTrue(reader.closed)
598 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
601 with self.assertRaisesRegex(ValueError, "stream is closed"):
599 reader.read(1)
602 reader.read(1)
600
603
601 def test_not_implemented(self):
604 def test_not_implemented(self):
602 cctx = zstd.ZstdCompressor()
605 cctx = zstd.ZstdCompressor()
603
606
604 with cctx.stream_reader(b'foo' * 60) as reader:
607 with cctx.stream_reader(b"foo" * 60) as reader:
605 with self.assertRaises(io.UnsupportedOperation):
608 with self.assertRaises(io.UnsupportedOperation):
606 reader.readline()
609 reader.readline()
607
610
608 with self.assertRaises(io.UnsupportedOperation):
611 with self.assertRaises(io.UnsupportedOperation):
609 reader.readlines()
612 reader.readlines()
610
613
611 with self.assertRaises(io.UnsupportedOperation):
614 with self.assertRaises(io.UnsupportedOperation):
612 iter(reader)
615 iter(reader)
613
616
614 with self.assertRaises(io.UnsupportedOperation):
617 with self.assertRaises(io.UnsupportedOperation):
615 next(reader)
618 next(reader)
616
619
617 with self.assertRaises(OSError):
620 with self.assertRaises(OSError):
618 reader.writelines([])
621 reader.writelines([])
619
622
620 with self.assertRaises(OSError):
623 with self.assertRaises(OSError):
621 reader.write(b'foo')
624 reader.write(b"foo")
622
625
623 def test_constant_methods(self):
626 def test_constant_methods(self):
624 cctx = zstd.ZstdCompressor()
627 cctx = zstd.ZstdCompressor()
625
628
626 with cctx.stream_reader(b'boo') as reader:
629 with cctx.stream_reader(b"boo") as reader:
627 self.assertTrue(reader.readable())
630 self.assertTrue(reader.readable())
628 self.assertFalse(reader.writable())
631 self.assertFalse(reader.writable())
629 self.assertFalse(reader.seekable())
632 self.assertFalse(reader.seekable())
630 self.assertFalse(reader.isatty())
633 self.assertFalse(reader.isatty())
631 self.assertFalse(reader.closed)
634 self.assertFalse(reader.closed)
632 self.assertIsNone(reader.flush())
635 self.assertIsNone(reader.flush())
633 self.assertFalse(reader.closed)
636 self.assertFalse(reader.closed)
634
637
635 self.assertTrue(reader.closed)
638 self.assertTrue(reader.closed)
636
639
637 def test_read_closed(self):
640 def test_read_closed(self):
638 cctx = zstd.ZstdCompressor()
641 cctx = zstd.ZstdCompressor()
639
642
640 with cctx.stream_reader(b'foo' * 60) as reader:
643 with cctx.stream_reader(b"foo" * 60) as reader:
641 reader.close()
644 reader.close()
642 self.assertTrue(reader.closed)
645 self.assertTrue(reader.closed)
643 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
646 with self.assertRaisesRegex(ValueError, "stream is closed"):
644 reader.read(10)
647 reader.read(10)
645
648
646 def test_read_sizes(self):
649 def test_read_sizes(self):
647 cctx = zstd.ZstdCompressor()
650 cctx = zstd.ZstdCompressor()
648 foo = cctx.compress(b'foo')
651 foo = cctx.compress(b"foo")
649
652
650 with cctx.stream_reader(b'foo') as reader:
653 with cctx.stream_reader(b"foo") as reader:
651 with self.assertRaisesRegexp(ValueError, 'cannot read negative amounts less than -1'):
654 with self.assertRaisesRegex(
655 ValueError, "cannot read negative amounts less than -1"
656 ):
652 reader.read(-2)
657 reader.read(-2)
653
658
654 self.assertEqual(reader.read(0), b'')
659 self.assertEqual(reader.read(0), b"")
655 self.assertEqual(reader.read(), foo)
660 self.assertEqual(reader.read(), foo)
656
661
657 def test_read_buffer(self):
662 def test_read_buffer(self):
658 cctx = zstd.ZstdCompressor()
663 cctx = zstd.ZstdCompressor()
659
664
660 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
665 source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60])
661 frame = cctx.compress(source)
666 frame = cctx.compress(source)
662
667
663 with cctx.stream_reader(source) as reader:
668 with cctx.stream_reader(source) as reader:
664 self.assertEqual(reader.tell(), 0)
669 self.assertEqual(reader.tell(), 0)
665
670
666 # We should get entire frame in one read.
671 # We should get entire frame in one read.
667 result = reader.read(8192)
672 result = reader.read(8192)
668 self.assertEqual(result, frame)
673 self.assertEqual(result, frame)
669 self.assertEqual(reader.tell(), len(result))
674 self.assertEqual(reader.tell(), len(result))
670 self.assertEqual(reader.read(), b'')
675 self.assertEqual(reader.read(), b"")
671 self.assertEqual(reader.tell(), len(result))
676 self.assertEqual(reader.tell(), len(result))
672
677
673 def test_read_buffer_small_chunks(self):
678 def test_read_buffer_small_chunks(self):
674 cctx = zstd.ZstdCompressor()
679 cctx = zstd.ZstdCompressor()
675
680
676 source = b'foo' * 60
681 source = b"foo" * 60
677 chunks = []
682 chunks = []
678
683
679 with cctx.stream_reader(source) as reader:
684 with cctx.stream_reader(source) as reader:
680 self.assertEqual(reader.tell(), 0)
685 self.assertEqual(reader.tell(), 0)
681
686
682 while True:
687 while True:
683 chunk = reader.read(1)
688 chunk = reader.read(1)
684 if not chunk:
689 if not chunk:
685 break
690 break
686
691
687 chunks.append(chunk)
692 chunks.append(chunk)
688 self.assertEqual(reader.tell(), sum(map(len, chunks)))
693 self.assertEqual(reader.tell(), sum(map(len, chunks)))
689
694
690 self.assertEqual(b''.join(chunks), cctx.compress(source))
695 self.assertEqual(b"".join(chunks), cctx.compress(source))
691
696
692 def test_read_stream(self):
697 def test_read_stream(self):
693 cctx = zstd.ZstdCompressor()
698 cctx = zstd.ZstdCompressor()
694
699
695 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
700 source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60])
696 frame = cctx.compress(source)
701 frame = cctx.compress(source)
697
702
698 with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader:
703 with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader:
699 self.assertEqual(reader.tell(), 0)
704 self.assertEqual(reader.tell(), 0)
700
705
701 chunk = reader.read(8192)
706 chunk = reader.read(8192)
702 self.assertEqual(chunk, frame)
707 self.assertEqual(chunk, frame)
703 self.assertEqual(reader.tell(), len(chunk))
708 self.assertEqual(reader.tell(), len(chunk))
704 self.assertEqual(reader.read(), b'')
709 self.assertEqual(reader.read(), b"")
705 self.assertEqual(reader.tell(), len(chunk))
710 self.assertEqual(reader.tell(), len(chunk))
706
711
707 def test_read_stream_small_chunks(self):
712 def test_read_stream_small_chunks(self):
708 cctx = zstd.ZstdCompressor()
713 cctx = zstd.ZstdCompressor()
709
714
710 source = b'foo' * 60
715 source = b"foo" * 60
711 chunks = []
716 chunks = []
712
717
713 with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader:
718 with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader:
714 self.assertEqual(reader.tell(), 0)
719 self.assertEqual(reader.tell(), 0)
715
720
716 while True:
721 while True:
717 chunk = reader.read(1)
722 chunk = reader.read(1)
718 if not chunk:
723 if not chunk:
719 break
724 break
720
725
721 chunks.append(chunk)
726 chunks.append(chunk)
722 self.assertEqual(reader.tell(), sum(map(len, chunks)))
727 self.assertEqual(reader.tell(), sum(map(len, chunks)))
723
728
724 self.assertEqual(b''.join(chunks), cctx.compress(source))
729 self.assertEqual(b"".join(chunks), cctx.compress(source))
725
730
726 def test_read_after_exit(self):
731 def test_read_after_exit(self):
727 cctx = zstd.ZstdCompressor()
732 cctx = zstd.ZstdCompressor()
728
733
729 with cctx.stream_reader(b'foo' * 60) as reader:
734 with cctx.stream_reader(b"foo" * 60) as reader:
730 while reader.read(8192):
735 while reader.read(8192):
731 pass
736 pass
732
737
733 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
738 with self.assertRaisesRegex(ValueError, "stream is closed"):
734 reader.read(10)
739 reader.read(10)
735
740
736 def test_bad_size(self):
741 def test_bad_size(self):
737 cctx = zstd.ZstdCompressor()
742 cctx = zstd.ZstdCompressor()
738
743
739 source = io.BytesIO(b'foobar')
744 source = io.BytesIO(b"foobar")
740
745
741 with cctx.stream_reader(source, size=2) as reader:
746 with cctx.stream_reader(source, size=2) as reader:
742 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
747 with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"):
743 reader.read(10)
748 reader.read(10)
744
749
745 # Try another compression operation.
750 # Try another compression operation.
746 with cctx.stream_reader(source, size=42):
751 with cctx.stream_reader(source, size=42):
747 pass
752 pass
748
753
749 def test_readall(self):
754 def test_readall(self):
750 cctx = zstd.ZstdCompressor()
755 cctx = zstd.ZstdCompressor()
751 frame = cctx.compress(b'foo' * 1024)
756 frame = cctx.compress(b"foo" * 1024)
752
757
753 reader = cctx.stream_reader(b'foo' * 1024)
758 reader = cctx.stream_reader(b"foo" * 1024)
754 self.assertEqual(reader.readall(), frame)
759 self.assertEqual(reader.readall(), frame)
755
760
756 def test_readinto(self):
761 def test_readinto(self):
757 cctx = zstd.ZstdCompressor()
762 cctx = zstd.ZstdCompressor()
758 foo = cctx.compress(b'foo')
763 foo = cctx.compress(b"foo")
759
764
760 reader = cctx.stream_reader(b'foo')
765 reader = cctx.stream_reader(b"foo")
761 with self.assertRaises(Exception):
766 with self.assertRaises(Exception):
762 reader.readinto(b'foobar')
767 reader.readinto(b"foobar")
763
768
764 # readinto() with sufficiently large destination.
769 # readinto() with sufficiently large destination.
765 b = bytearray(1024)
770 b = bytearray(1024)
766 reader = cctx.stream_reader(b'foo')
771 reader = cctx.stream_reader(b"foo")
767 self.assertEqual(reader.readinto(b), len(foo))
772 self.assertEqual(reader.readinto(b), len(foo))
768 self.assertEqual(b[0:len(foo)], foo)
773 self.assertEqual(b[0 : len(foo)], foo)
769 self.assertEqual(reader.readinto(b), 0)
774 self.assertEqual(reader.readinto(b), 0)
770 self.assertEqual(b[0:len(foo)], foo)
775 self.assertEqual(b[0 : len(foo)], foo)
771
776
772 # readinto() with small reads.
777 # readinto() with small reads.
773 b = bytearray(1024)
778 b = bytearray(1024)
774 reader = cctx.stream_reader(b'foo', read_size=1)
779 reader = cctx.stream_reader(b"foo", read_size=1)
775 self.assertEqual(reader.readinto(b), len(foo))
780 self.assertEqual(reader.readinto(b), len(foo))
776 self.assertEqual(b[0:len(foo)], foo)
781 self.assertEqual(b[0 : len(foo)], foo)
777
782
778 # Too small destination buffer.
783 # Too small destination buffer.
779 b = bytearray(2)
784 b = bytearray(2)
780 reader = cctx.stream_reader(b'foo')
785 reader = cctx.stream_reader(b"foo")
781 self.assertEqual(reader.readinto(b), 2)
786 self.assertEqual(reader.readinto(b), 2)
782 self.assertEqual(b[:], foo[0:2])
787 self.assertEqual(b[:], foo[0:2])
783 self.assertEqual(reader.readinto(b), 2)
788 self.assertEqual(reader.readinto(b), 2)
784 self.assertEqual(b[:], foo[2:4])
789 self.assertEqual(b[:], foo[2:4])
785 self.assertEqual(reader.readinto(b), 2)
790 self.assertEqual(reader.readinto(b), 2)
786 self.assertEqual(b[:], foo[4:6])
791 self.assertEqual(b[:], foo[4:6])
787
792
788 def test_readinto1(self):
793 def test_readinto1(self):
789 cctx = zstd.ZstdCompressor()
794 cctx = zstd.ZstdCompressor()
790 foo = b''.join(cctx.read_to_iter(io.BytesIO(b'foo')))
795 foo = b"".join(cctx.read_to_iter(io.BytesIO(b"foo")))
791
796
792 reader = cctx.stream_reader(b'foo')
797 reader = cctx.stream_reader(b"foo")
793 with self.assertRaises(Exception):
798 with self.assertRaises(Exception):
794 reader.readinto1(b'foobar')
799 reader.readinto1(b"foobar")
795
800
796 b = bytearray(1024)
801 b = bytearray(1024)
797 source = OpCountingBytesIO(b'foo')
802 source = OpCountingBytesIO(b"foo")
798 reader = cctx.stream_reader(source)
803 reader = cctx.stream_reader(source)
799 self.assertEqual(reader.readinto1(b), len(foo))
804 self.assertEqual(reader.readinto1(b), len(foo))
800 self.assertEqual(b[0:len(foo)], foo)
805 self.assertEqual(b[0 : len(foo)], foo)
801 self.assertEqual(source._read_count, 2)
806 self.assertEqual(source._read_count, 2)
802
807
803 # readinto1() with small reads.
808 # readinto1() with small reads.
804 b = bytearray(1024)
809 b = bytearray(1024)
805 source = OpCountingBytesIO(b'foo')
810 source = OpCountingBytesIO(b"foo")
806 reader = cctx.stream_reader(source, read_size=1)
811 reader = cctx.stream_reader(source, read_size=1)
807 self.assertEqual(reader.readinto1(b), len(foo))
812 self.assertEqual(reader.readinto1(b), len(foo))
808 self.assertEqual(b[0:len(foo)], foo)
813 self.assertEqual(b[0 : len(foo)], foo)
809 self.assertEqual(source._read_count, 4)
814 self.assertEqual(source._read_count, 4)
810
815
811 def test_read1(self):
816 def test_read1(self):
812 cctx = zstd.ZstdCompressor()
817 cctx = zstd.ZstdCompressor()
813 foo = b''.join(cctx.read_to_iter(io.BytesIO(b'foo')))
818 foo = b"".join(cctx.read_to_iter(io.BytesIO(b"foo")))
814
819
815 b = OpCountingBytesIO(b'foo')
820 b = OpCountingBytesIO(b"foo")
816 reader = cctx.stream_reader(b)
821 reader = cctx.stream_reader(b)
817
822
818 self.assertEqual(reader.read1(), foo)
823 self.assertEqual(reader.read1(), foo)
819 self.assertEqual(b._read_count, 2)
824 self.assertEqual(b._read_count, 2)
820
825
821 b = OpCountingBytesIO(b'foo')
826 b = OpCountingBytesIO(b"foo")
822 reader = cctx.stream_reader(b)
827 reader = cctx.stream_reader(b)
823
828
824 self.assertEqual(reader.read1(0), b'')
829 self.assertEqual(reader.read1(0), b"")
825 self.assertEqual(reader.read1(2), foo[0:2])
830 self.assertEqual(reader.read1(2), foo[0:2])
826 self.assertEqual(b._read_count, 2)
831 self.assertEqual(b._read_count, 2)
827 self.assertEqual(reader.read1(2), foo[2:4])
832 self.assertEqual(reader.read1(2), foo[2:4])
828 self.assertEqual(reader.read1(1024), foo[4:])
833 self.assertEqual(reader.read1(1024), foo[4:])
829
834
830
835
831 @make_cffi
836 @make_cffi
832 class TestCompressor_stream_writer(unittest.TestCase):
837 class TestCompressor_stream_writer(TestCase):
833 def test_io_api(self):
838 def test_io_api(self):
834 buffer = io.BytesIO()
839 buffer = io.BytesIO()
835 cctx = zstd.ZstdCompressor()
840 cctx = zstd.ZstdCompressor()
836 writer = cctx.stream_writer(buffer)
841 writer = cctx.stream_writer(buffer)
837
842
838 self.assertFalse(writer.isatty())
843 self.assertFalse(writer.isatty())
839 self.assertFalse(writer.readable())
844 self.assertFalse(writer.readable())
840
845
841 with self.assertRaises(io.UnsupportedOperation):
846 with self.assertRaises(io.UnsupportedOperation):
842 writer.readline()
847 writer.readline()
843
848
844 with self.assertRaises(io.UnsupportedOperation):
849 with self.assertRaises(io.UnsupportedOperation):
845 writer.readline(42)
850 writer.readline(42)
846
851
847 with self.assertRaises(io.UnsupportedOperation):
852 with self.assertRaises(io.UnsupportedOperation):
848 writer.readline(size=42)
853 writer.readline(size=42)
849
854
850 with self.assertRaises(io.UnsupportedOperation):
855 with self.assertRaises(io.UnsupportedOperation):
851 writer.readlines()
856 writer.readlines()
852
857
853 with self.assertRaises(io.UnsupportedOperation):
858 with self.assertRaises(io.UnsupportedOperation):
854 writer.readlines(42)
859 writer.readlines(42)
855
860
856 with self.assertRaises(io.UnsupportedOperation):
861 with self.assertRaises(io.UnsupportedOperation):
857 writer.readlines(hint=42)
862 writer.readlines(hint=42)
858
863
859 with self.assertRaises(io.UnsupportedOperation):
864 with self.assertRaises(io.UnsupportedOperation):
860 writer.seek(0)
865 writer.seek(0)
861
866
862 with self.assertRaises(io.UnsupportedOperation):
867 with self.assertRaises(io.UnsupportedOperation):
863 writer.seek(10, os.SEEK_SET)
868 writer.seek(10, os.SEEK_SET)
864
869
865 self.assertFalse(writer.seekable())
870 self.assertFalse(writer.seekable())
866
871
867 with self.assertRaises(io.UnsupportedOperation):
872 with self.assertRaises(io.UnsupportedOperation):
868 writer.truncate()
873 writer.truncate()
869
874
870 with self.assertRaises(io.UnsupportedOperation):
875 with self.assertRaises(io.UnsupportedOperation):
871 writer.truncate(42)
876 writer.truncate(42)
872
877
873 with self.assertRaises(io.UnsupportedOperation):
878 with self.assertRaises(io.UnsupportedOperation):
874 writer.truncate(size=42)
879 writer.truncate(size=42)
875
880
876 self.assertTrue(writer.writable())
881 self.assertTrue(writer.writable())
877
882
878 with self.assertRaises(NotImplementedError):
883 with self.assertRaises(NotImplementedError):
879 writer.writelines([])
884 writer.writelines([])
880
885
881 with self.assertRaises(io.UnsupportedOperation):
886 with self.assertRaises(io.UnsupportedOperation):
882 writer.read()
887 writer.read()
883
888
884 with self.assertRaises(io.UnsupportedOperation):
889 with self.assertRaises(io.UnsupportedOperation):
885 writer.read(42)
890 writer.read(42)
886
891
887 with self.assertRaises(io.UnsupportedOperation):
892 with self.assertRaises(io.UnsupportedOperation):
888 writer.read(size=42)
893 writer.read(size=42)
889
894
890 with self.assertRaises(io.UnsupportedOperation):
895 with self.assertRaises(io.UnsupportedOperation):
891 writer.readall()
896 writer.readall()
892
897
893 with self.assertRaises(io.UnsupportedOperation):
898 with self.assertRaises(io.UnsupportedOperation):
894 writer.readinto(None)
899 writer.readinto(None)
895
900
896 with self.assertRaises(io.UnsupportedOperation):
901 with self.assertRaises(io.UnsupportedOperation):
897 writer.fileno()
902 writer.fileno()
898
903
899 self.assertFalse(writer.closed)
904 self.assertFalse(writer.closed)
900
905
901 def test_fileno_file(self):
906 def test_fileno_file(self):
902 with tempfile.TemporaryFile('wb') as tf:
907 with tempfile.TemporaryFile("wb") as tf:
903 cctx = zstd.ZstdCompressor()
908 cctx = zstd.ZstdCompressor()
904 writer = cctx.stream_writer(tf)
909 writer = cctx.stream_writer(tf)
905
910
906 self.assertEqual(writer.fileno(), tf.fileno())
911 self.assertEqual(writer.fileno(), tf.fileno())
907
912
908 def test_close(self):
913 def test_close(self):
909 buffer = NonClosingBytesIO()
914 buffer = NonClosingBytesIO()
910 cctx = zstd.ZstdCompressor(level=1)
915 cctx = zstd.ZstdCompressor(level=1)
911 writer = cctx.stream_writer(buffer)
916 writer = cctx.stream_writer(buffer)
912
917
913 writer.write(b'foo' * 1024)
918 writer.write(b"foo" * 1024)
914 self.assertFalse(writer.closed)
919 self.assertFalse(writer.closed)
915 self.assertFalse(buffer.closed)
920 self.assertFalse(buffer.closed)
916 writer.close()
921 writer.close()
917 self.assertTrue(writer.closed)
922 self.assertTrue(writer.closed)
918 self.assertTrue(buffer.closed)
923 self.assertTrue(buffer.closed)
919
924
920 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
925 with self.assertRaisesRegex(ValueError, "stream is closed"):
921 writer.write(b'foo')
926 writer.write(b"foo")
922
927
923 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
928 with self.assertRaisesRegex(ValueError, "stream is closed"):
924 writer.flush()
929 writer.flush()
925
930
926 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
931 with self.assertRaisesRegex(ValueError, "stream is closed"):
927 with writer:
932 with writer:
928 pass
933 pass
929
934
930 self.assertEqual(buffer.getvalue(),
935 self.assertEqual(
931 b'\x28\xb5\x2f\xfd\x00\x48\x55\x00\x00\x18\x66\x6f'
936 buffer.getvalue(),
932 b'\x6f\x01\x00\xfa\xd3\x77\x43')
937 b"\x28\xb5\x2f\xfd\x00\x48\x55\x00\x00\x18\x66\x6f"
938 b"\x6f\x01\x00\xfa\xd3\x77\x43",
939 )
933
940
934 # Context manager exit should close stream.
941 # Context manager exit should close stream.
935 buffer = io.BytesIO()
942 buffer = io.BytesIO()
936 writer = cctx.stream_writer(buffer)
943 writer = cctx.stream_writer(buffer)
937
944
938 with writer:
945 with writer:
939 writer.write(b'foo')
946 writer.write(b"foo")
940
947
941 self.assertTrue(writer.closed)
948 self.assertTrue(writer.closed)
942
949
943 def test_empty(self):
950 def test_empty(self):
944 buffer = NonClosingBytesIO()
951 buffer = NonClosingBytesIO()
945 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
952 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
946 with cctx.stream_writer(buffer) as compressor:
953 with cctx.stream_writer(buffer) as compressor:
947 compressor.write(b'')
954 compressor.write(b"")
948
955
949 result = buffer.getvalue()
956 result = buffer.getvalue()
950 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
957 self.assertEqual(result, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00")
951
958
952 params = zstd.get_frame_parameters(result)
959 params = zstd.get_frame_parameters(result)
953 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
960 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
954 self.assertEqual(params.window_size, 524288)
961 self.assertEqual(params.window_size, 524288)
955 self.assertEqual(params.dict_id, 0)
962 self.assertEqual(params.dict_id, 0)
956 self.assertFalse(params.has_checksum)
963 self.assertFalse(params.has_checksum)
957
964
958 # Test without context manager.
965 # Test without context manager.
959 buffer = io.BytesIO()
966 buffer = io.BytesIO()
960 compressor = cctx.stream_writer(buffer)
967 compressor = cctx.stream_writer(buffer)
961 self.assertEqual(compressor.write(b''), 0)
968 self.assertEqual(compressor.write(b""), 0)
962 self.assertEqual(buffer.getvalue(), b'')
969 self.assertEqual(buffer.getvalue(), b"")
963 self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 9)
970 self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 9)
964 result = buffer.getvalue()
971 result = buffer.getvalue()
965 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
972 self.assertEqual(result, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00")
966
973
967 params = zstd.get_frame_parameters(result)
974 params = zstd.get_frame_parameters(result)
968 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
975 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
969 self.assertEqual(params.window_size, 524288)
976 self.assertEqual(params.window_size, 524288)
970 self.assertEqual(params.dict_id, 0)
977 self.assertEqual(params.dict_id, 0)
971 self.assertFalse(params.has_checksum)
978 self.assertFalse(params.has_checksum)
972
979
973 # Test write_return_read=True
980 # Test write_return_read=True
974 compressor = cctx.stream_writer(buffer, write_return_read=True)
981 compressor = cctx.stream_writer(buffer, write_return_read=True)
975 self.assertEqual(compressor.write(b''), 0)
982 self.assertEqual(compressor.write(b""), 0)
976
983
977 def test_input_types(self):
984 def test_input_types(self):
978 expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f'
985 expected = b"\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f"
979 cctx = zstd.ZstdCompressor(level=1)
986 cctx = zstd.ZstdCompressor(level=1)
980
987
981 mutable_array = bytearray(3)
988 mutable_array = bytearray(3)
982 mutable_array[:] = b'foo'
989 mutable_array[:] = b"foo"
983
990
984 sources = [
991 sources = [
985 memoryview(b'foo'),
992 memoryview(b"foo"),
986 bytearray(b'foo'),
993 bytearray(b"foo"),
987 mutable_array,
994 mutable_array,
988 ]
995 ]
989
996
990 for source in sources:
997 for source in sources:
991 buffer = NonClosingBytesIO()
998 buffer = NonClosingBytesIO()
992 with cctx.stream_writer(buffer) as compressor:
999 with cctx.stream_writer(buffer) as compressor:
993 compressor.write(source)
1000 compressor.write(source)
994
1001
995 self.assertEqual(buffer.getvalue(), expected)
1002 self.assertEqual(buffer.getvalue(), expected)
996
1003
997 compressor = cctx.stream_writer(buffer, write_return_read=True)
1004 compressor = cctx.stream_writer(buffer, write_return_read=True)
998 self.assertEqual(compressor.write(source), len(source))
1005 self.assertEqual(compressor.write(source), len(source))
999
1006
1000 def test_multiple_compress(self):
1007 def test_multiple_compress(self):
1001 buffer = NonClosingBytesIO()
1008 buffer = NonClosingBytesIO()
1002 cctx = zstd.ZstdCompressor(level=5)
1009 cctx = zstd.ZstdCompressor(level=5)
1003 with cctx.stream_writer(buffer) as compressor:
1010 with cctx.stream_writer(buffer) as compressor:
1004 self.assertEqual(compressor.write(b'foo'), 0)
1011 self.assertEqual(compressor.write(b"foo"), 0)
1005 self.assertEqual(compressor.write(b'bar'), 0)
1012 self.assertEqual(compressor.write(b"bar"), 0)
1006 self.assertEqual(compressor.write(b'x' * 8192), 0)
1013 self.assertEqual(compressor.write(b"x" * 8192), 0)
1007
1014
1008 result = buffer.getvalue()
1015 result = buffer.getvalue()
1009 self.assertEqual(result,
1016 self.assertEqual(
1010 b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f'
1017 result,
1011 b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23')
1018 b"\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f"
1019 b"\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23",
1020 )
1012
1021
1013 # Test without context manager.
1022 # Test without context manager.
1014 buffer = io.BytesIO()
1023 buffer = io.BytesIO()
1015 compressor = cctx.stream_writer(buffer)
1024 compressor = cctx.stream_writer(buffer)
1016 self.assertEqual(compressor.write(b'foo'), 0)
1025 self.assertEqual(compressor.write(b"foo"), 0)
1017 self.assertEqual(compressor.write(b'bar'), 0)
1026 self.assertEqual(compressor.write(b"bar"), 0)
1018 self.assertEqual(compressor.write(b'x' * 8192), 0)
1027 self.assertEqual(compressor.write(b"x" * 8192), 0)
1019 self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23)
1028 self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23)
1020 result = buffer.getvalue()
1029 result = buffer.getvalue()
1021 self.assertEqual(result,
1030 self.assertEqual(
1022 b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f'
1031 result,
1023 b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23')
1032 b"\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f"
1033 b"\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23",
1034 )
1024
1035
1025 # Test with write_return_read=True.
1036 # Test with write_return_read=True.
1026 compressor = cctx.stream_writer(buffer, write_return_read=True)
1037 compressor = cctx.stream_writer(buffer, write_return_read=True)
1027 self.assertEqual(compressor.write(b'foo'), 3)
1038 self.assertEqual(compressor.write(b"foo"), 3)
1028 self.assertEqual(compressor.write(b'barbiz'), 6)
1039 self.assertEqual(compressor.write(b"barbiz"), 6)
1029 self.assertEqual(compressor.write(b'x' * 8192), 8192)
1040 self.assertEqual(compressor.write(b"x" * 8192), 8192)
1030
1041
1031 def test_dictionary(self):
1042 def test_dictionary(self):
1032 samples = []
1043 samples = []
1033 for i in range(128):
1044 for i in range(128):
1034 samples.append(b'foo' * 64)
1045 samples.append(b"foo" * 64)
1035 samples.append(b'bar' * 64)
1046 samples.append(b"bar" * 64)
1036 samples.append(b'foobar' * 64)
1047 samples.append(b"foobar" * 64)
1037
1048
1038 d = zstd.train_dictionary(8192, samples)
1049 d = zstd.train_dictionary(8192, samples)
1039
1050
1040 h = hashlib.sha1(d.as_bytes()).hexdigest()
1051 h = hashlib.sha1(d.as_bytes()).hexdigest()
1041 self.assertEqual(h, '7a2e59a876db958f74257141045af8f912e00d4e')
1052 self.assertEqual(h, "7a2e59a876db958f74257141045af8f912e00d4e")
1042
1053
1043 buffer = NonClosingBytesIO()
1054 buffer = NonClosingBytesIO()
1044 cctx = zstd.ZstdCompressor(level=9, dict_data=d)
1055 cctx = zstd.ZstdCompressor(level=9, dict_data=d)
1045 with cctx.stream_writer(buffer) as compressor:
1056 with cctx.stream_writer(buffer) as compressor:
1046 self.assertEqual(compressor.write(b'foo'), 0)
1057 self.assertEqual(compressor.write(b"foo"), 0)
1047 self.assertEqual(compressor.write(b'bar'), 0)
1058 self.assertEqual(compressor.write(b"bar"), 0)
1048 self.assertEqual(compressor.write(b'foo' * 16384), 0)
1059 self.assertEqual(compressor.write(b"foo" * 16384), 0)
1049
1060
1050 compressed = buffer.getvalue()
1061 compressed = buffer.getvalue()
1051
1062
1052 params = zstd.get_frame_parameters(compressed)
1063 params = zstd.get_frame_parameters(compressed)
1053 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1064 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1054 self.assertEqual(params.window_size, 2097152)
1065 self.assertEqual(params.window_size, 2097152)
1055 self.assertEqual(params.dict_id, d.dict_id())
1066 self.assertEqual(params.dict_id, d.dict_id())
1056 self.assertFalse(params.has_checksum)
1067 self.assertFalse(params.has_checksum)
1057
1068
1058 h = hashlib.sha1(compressed).hexdigest()
1069 h = hashlib.sha1(compressed).hexdigest()
1059 self.assertEqual(h, '0a7c05635061f58039727cdbe76388c6f4cfef06')
1070 self.assertEqual(h, "0a7c05635061f58039727cdbe76388c6f4cfef06")
1060
1071
1061 source = b'foo' + b'bar' + (b'foo' * 16384)
1072 source = b"foo" + b"bar" + (b"foo" * 16384)
1062
1073
1063 dctx = zstd.ZstdDecompressor(dict_data=d)
1074 dctx = zstd.ZstdDecompressor(dict_data=d)
1064
1075
1065 self.assertEqual(dctx.decompress(compressed, max_output_size=len(source)),
1076 self.assertEqual(
1066 source)
1077 dctx.decompress(compressed, max_output_size=len(source)), source
1078 )
1067
1079
1068 def test_compression_params(self):
1080 def test_compression_params(self):
1069 params = zstd.ZstdCompressionParameters(
1081 params = zstd.ZstdCompressionParameters(
1070 window_log=20,
1082 window_log=20,
1071 chain_log=6,
1083 chain_log=6,
1072 hash_log=12,
1084 hash_log=12,
1073 min_match=5,
1085 min_match=5,
1074 search_log=4,
1086 search_log=4,
1075 target_length=10,
1087 target_length=10,
1076 strategy=zstd.STRATEGY_FAST)
1088 strategy=zstd.STRATEGY_FAST,
1089 )
1077
1090
1078 buffer = NonClosingBytesIO()
1091 buffer = NonClosingBytesIO()
1079 cctx = zstd.ZstdCompressor(compression_params=params)
1092 cctx = zstd.ZstdCompressor(compression_params=params)
1080 with cctx.stream_writer(buffer) as compressor:
1093 with cctx.stream_writer(buffer) as compressor:
1081 self.assertEqual(compressor.write(b'foo'), 0)
1094 self.assertEqual(compressor.write(b"foo"), 0)
1082 self.assertEqual(compressor.write(b'bar'), 0)
1095 self.assertEqual(compressor.write(b"bar"), 0)
1083 self.assertEqual(compressor.write(b'foobar' * 16384), 0)
1096 self.assertEqual(compressor.write(b"foobar" * 16384), 0)
1084
1097
1085 compressed = buffer.getvalue()
1098 compressed = buffer.getvalue()
1086
1099
1087 params = zstd.get_frame_parameters(compressed)
1100 params = zstd.get_frame_parameters(compressed)
1088 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1101 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1089 self.assertEqual(params.window_size, 1048576)
1102 self.assertEqual(params.window_size, 1048576)
1090 self.assertEqual(params.dict_id, 0)
1103 self.assertEqual(params.dict_id, 0)
1091 self.assertFalse(params.has_checksum)
1104 self.assertFalse(params.has_checksum)
1092
1105
1093 h = hashlib.sha1(compressed).hexdigest()
1106 h = hashlib.sha1(compressed).hexdigest()
1094 self.assertEqual(h, 'dd4bb7d37c1a0235b38a2f6b462814376843ef0b')
1107 self.assertEqual(h, "dd4bb7d37c1a0235b38a2f6b462814376843ef0b")
1095
1108
1096 def test_write_checksum(self):
1109 def test_write_checksum(self):
1097 no_checksum = NonClosingBytesIO()
1110 no_checksum = NonClosingBytesIO()
1098 cctx = zstd.ZstdCompressor(level=1)
1111 cctx = zstd.ZstdCompressor(level=1)
1099 with cctx.stream_writer(no_checksum) as compressor:
1112 with cctx.stream_writer(no_checksum) as compressor:
1100 self.assertEqual(compressor.write(b'foobar'), 0)
1113 self.assertEqual(compressor.write(b"foobar"), 0)
1101
1114
1102 with_checksum = NonClosingBytesIO()
1115 with_checksum = NonClosingBytesIO()
1103 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
1116 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
1104 with cctx.stream_writer(with_checksum) as compressor:
1117 with cctx.stream_writer(with_checksum) as compressor:
1105 self.assertEqual(compressor.write(b'foobar'), 0)
1118 self.assertEqual(compressor.write(b"foobar"), 0)
1106
1119
1107 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
1120 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
1108 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
1121 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
1109 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1122 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1110 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1123 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1111 self.assertEqual(no_params.dict_id, 0)
1124 self.assertEqual(no_params.dict_id, 0)
1112 self.assertEqual(with_params.dict_id, 0)
1125 self.assertEqual(with_params.dict_id, 0)
1113 self.assertFalse(no_params.has_checksum)
1126 self.assertFalse(no_params.has_checksum)
1114 self.assertTrue(with_params.has_checksum)
1127 self.assertTrue(with_params.has_checksum)
1115
1128
1116 self.assertEqual(len(with_checksum.getvalue()),
1129 self.assertEqual(len(with_checksum.getvalue()), len(no_checksum.getvalue()) + 4)
1117 len(no_checksum.getvalue()) + 4)
1118
1130
1119 def test_write_content_size(self):
1131 def test_write_content_size(self):
1120 no_size = NonClosingBytesIO()
1132 no_size = NonClosingBytesIO()
1121 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
1133 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
1122 with cctx.stream_writer(no_size) as compressor:
1134 with cctx.stream_writer(no_size) as compressor:
1123 self.assertEqual(compressor.write(b'foobar' * 256), 0)
1135 self.assertEqual(compressor.write(b"foobar" * 256), 0)
1124
1136
1125 with_size = NonClosingBytesIO()
1137 with_size = NonClosingBytesIO()
1126 cctx = zstd.ZstdCompressor(level=1)
1138 cctx = zstd.ZstdCompressor(level=1)
1127 with cctx.stream_writer(with_size) as compressor:
1139 with cctx.stream_writer(with_size) as compressor:
1128 self.assertEqual(compressor.write(b'foobar' * 256), 0)
1140 self.assertEqual(compressor.write(b"foobar" * 256), 0)
1129
1141
1130 # Source size is not known in streaming mode, so header not
1142 # Source size is not known in streaming mode, so header not
1131 # written.
1143 # written.
1132 self.assertEqual(len(with_size.getvalue()),
1144 self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()))
1133 len(no_size.getvalue()))
1134
1145
1135 # Declaring size will write the header.
1146 # Declaring size will write the header.
1136 with_size = NonClosingBytesIO()
1147 with_size = NonClosingBytesIO()
1137 with cctx.stream_writer(with_size, size=len(b'foobar' * 256)) as compressor:
1148 with cctx.stream_writer(with_size, size=len(b"foobar" * 256)) as compressor:
1138 self.assertEqual(compressor.write(b'foobar' * 256), 0)
1149 self.assertEqual(compressor.write(b"foobar" * 256), 0)
1139
1150
1140 no_params = zstd.get_frame_parameters(no_size.getvalue())
1151 no_params = zstd.get_frame_parameters(no_size.getvalue())
1141 with_params = zstd.get_frame_parameters(with_size.getvalue())
1152 with_params = zstd.get_frame_parameters(with_size.getvalue())
1142 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1153 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1143 self.assertEqual(with_params.content_size, 1536)
1154 self.assertEqual(with_params.content_size, 1536)
1144 self.assertEqual(no_params.dict_id, 0)
1155 self.assertEqual(no_params.dict_id, 0)
1145 self.assertEqual(with_params.dict_id, 0)
1156 self.assertEqual(with_params.dict_id, 0)
1146 self.assertFalse(no_params.has_checksum)
1157 self.assertFalse(no_params.has_checksum)
1147 self.assertFalse(with_params.has_checksum)
1158 self.assertFalse(with_params.has_checksum)
1148
1159
1149 self.assertEqual(len(with_size.getvalue()),
1160 self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()) + 1)
1150 len(no_size.getvalue()) + 1)
1151
1161
1152 def test_no_dict_id(self):
1162 def test_no_dict_id(self):
1153 samples = []
1163 samples = []
1154 for i in range(128):
1164 for i in range(128):
1155 samples.append(b'foo' * 64)
1165 samples.append(b"foo" * 64)
1156 samples.append(b'bar' * 64)
1166 samples.append(b"bar" * 64)
1157 samples.append(b'foobar' * 64)
1167 samples.append(b"foobar" * 64)
1158
1168
1159 d = zstd.train_dictionary(1024, samples)
1169 d = zstd.train_dictionary(1024, samples)
1160
1170
1161 with_dict_id = NonClosingBytesIO()
1171 with_dict_id = NonClosingBytesIO()
1162 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
1172 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
1163 with cctx.stream_writer(with_dict_id) as compressor:
1173 with cctx.stream_writer(with_dict_id) as compressor:
1164 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
1174 self.assertEqual(compressor.write(b"foobarfoobar"), 0)
1165
1175
1166 self.assertEqual(with_dict_id.getvalue()[4:5], b'\x03')
1176 self.assertEqual(with_dict_id.getvalue()[4:5], b"\x03")
1167
1177
1168 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
1178 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
1169 no_dict_id = NonClosingBytesIO()
1179 no_dict_id = NonClosingBytesIO()
1170 with cctx.stream_writer(no_dict_id) as compressor:
1180 with cctx.stream_writer(no_dict_id) as compressor:
1171 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
1181 self.assertEqual(compressor.write(b"foobarfoobar"), 0)
1172
1182
1173 self.assertEqual(no_dict_id.getvalue()[4:5], b'\x00')
1183 self.assertEqual(no_dict_id.getvalue()[4:5], b"\x00")
1174
1184
1175 no_params = zstd.get_frame_parameters(no_dict_id.getvalue())
1185 no_params = zstd.get_frame_parameters(no_dict_id.getvalue())
1176 with_params = zstd.get_frame_parameters(with_dict_id.getvalue())
1186 with_params = zstd.get_frame_parameters(with_dict_id.getvalue())
1177 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1187 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1178 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1188 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1179 self.assertEqual(no_params.dict_id, 0)
1189 self.assertEqual(no_params.dict_id, 0)
1180 self.assertEqual(with_params.dict_id, d.dict_id())
1190 self.assertEqual(with_params.dict_id, d.dict_id())
1181 self.assertFalse(no_params.has_checksum)
1191 self.assertFalse(no_params.has_checksum)
1182 self.assertFalse(with_params.has_checksum)
1192 self.assertFalse(with_params.has_checksum)
1183
1193
1184 self.assertEqual(len(with_dict_id.getvalue()),
1194 self.assertEqual(len(with_dict_id.getvalue()), len(no_dict_id.getvalue()) + 4)
1185 len(no_dict_id.getvalue()) + 4)
1186
1195
1187 def test_memory_size(self):
1196 def test_memory_size(self):
1188 cctx = zstd.ZstdCompressor(level=3)
1197 cctx = zstd.ZstdCompressor(level=3)
1189 buffer = io.BytesIO()
1198 buffer = io.BytesIO()
1190 with cctx.stream_writer(buffer) as compressor:
1199 with cctx.stream_writer(buffer) as compressor:
1191 compressor.write(b'foo')
1200 compressor.write(b"foo")
1192 size = compressor.memory_size()
1201 size = compressor.memory_size()
1193
1202
1194 self.assertGreater(size, 100000)
1203 self.assertGreater(size, 100000)
1195
1204
1196 def test_write_size(self):
1205 def test_write_size(self):
1197 cctx = zstd.ZstdCompressor(level=3)
1206 cctx = zstd.ZstdCompressor(level=3)
1198 dest = OpCountingBytesIO()
1207 dest = OpCountingBytesIO()
1199 with cctx.stream_writer(dest, write_size=1) as compressor:
1208 with cctx.stream_writer(dest, write_size=1) as compressor:
1200 self.assertEqual(compressor.write(b'foo'), 0)
1209 self.assertEqual(compressor.write(b"foo"), 0)
1201 self.assertEqual(compressor.write(b'bar'), 0)
1210 self.assertEqual(compressor.write(b"bar"), 0)
1202 self.assertEqual(compressor.write(b'foobar'), 0)
1211 self.assertEqual(compressor.write(b"foobar"), 0)
1203
1212
1204 self.assertEqual(len(dest.getvalue()), dest._write_count)
1213 self.assertEqual(len(dest.getvalue()), dest._write_count)
1205
1214
1206 def test_flush_repeated(self):
1215 def test_flush_repeated(self):
1207 cctx = zstd.ZstdCompressor(level=3)
1216 cctx = zstd.ZstdCompressor(level=3)
1208 dest = OpCountingBytesIO()
1217 dest = OpCountingBytesIO()
1209 with cctx.stream_writer(dest) as compressor:
1218 with cctx.stream_writer(dest) as compressor:
1210 self.assertEqual(compressor.write(b'foo'), 0)
1219 self.assertEqual(compressor.write(b"foo"), 0)
1211 self.assertEqual(dest._write_count, 0)
1220 self.assertEqual(dest._write_count, 0)
1212 self.assertEqual(compressor.flush(), 12)
1221 self.assertEqual(compressor.flush(), 12)
1213 self.assertEqual(dest._write_count, 1)
1222 self.assertEqual(dest._write_count, 1)
1214 self.assertEqual(compressor.write(b'bar'), 0)
1223 self.assertEqual(compressor.write(b"bar"), 0)
1215 self.assertEqual(dest._write_count, 1)
1224 self.assertEqual(dest._write_count, 1)
1216 self.assertEqual(compressor.flush(), 6)
1225 self.assertEqual(compressor.flush(), 6)
1217 self.assertEqual(dest._write_count, 2)
1226 self.assertEqual(dest._write_count, 2)
1218 self.assertEqual(compressor.write(b'baz'), 0)
1227 self.assertEqual(compressor.write(b"baz"), 0)
1219
1228
1220 self.assertEqual(dest._write_count, 3)
1229 self.assertEqual(dest._write_count, 3)
1221
1230
1222 def test_flush_empty_block(self):
1231 def test_flush_empty_block(self):
1223 cctx = zstd.ZstdCompressor(level=3, write_checksum=True)
1232 cctx = zstd.ZstdCompressor(level=3, write_checksum=True)
1224 dest = OpCountingBytesIO()
1233 dest = OpCountingBytesIO()
1225 with cctx.stream_writer(dest) as compressor:
1234 with cctx.stream_writer(dest) as compressor:
1226 self.assertEqual(compressor.write(b'foobar' * 8192), 0)
1235 self.assertEqual(compressor.write(b"foobar" * 8192), 0)
1227 count = dest._write_count
1236 count = dest._write_count
1228 offset = dest.tell()
1237 offset = dest.tell()
1229 self.assertEqual(compressor.flush(), 23)
1238 self.assertEqual(compressor.flush(), 23)
1230 self.assertGreater(dest._write_count, count)
1239 self.assertGreater(dest._write_count, count)
1231 self.assertGreater(dest.tell(), offset)
1240 self.assertGreater(dest.tell(), offset)
1232 offset = dest.tell()
1241 offset = dest.tell()
1233 # Ending the write here should cause an empty block to be written
1242 # Ending the write here should cause an empty block to be written
1234 # to denote end of frame.
1243 # to denote end of frame.
1235
1244
1236 trailing = dest.getvalue()[offset:]
1245 trailing = dest.getvalue()[offset:]
1237 # 3 bytes block header + 4 bytes frame checksum
1246 # 3 bytes block header + 4 bytes frame checksum
1238 self.assertEqual(len(trailing), 7)
1247 self.assertEqual(len(trailing), 7)
1239
1248
1240 header = trailing[0:3]
1249 header = trailing[0:3]
1241 self.assertEqual(header, b'\x01\x00\x00')
1250 self.assertEqual(header, b"\x01\x00\x00")
1242
1251
1243 def test_flush_frame(self):
1252 def test_flush_frame(self):
1244 cctx = zstd.ZstdCompressor(level=3)
1253 cctx = zstd.ZstdCompressor(level=3)
1245 dest = OpCountingBytesIO()
1254 dest = OpCountingBytesIO()
1246
1255
1247 with cctx.stream_writer(dest) as compressor:
1256 with cctx.stream_writer(dest) as compressor:
1248 self.assertEqual(compressor.write(b'foobar' * 8192), 0)
1257 self.assertEqual(compressor.write(b"foobar" * 8192), 0)
1249 self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23)
1258 self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23)
1250 compressor.write(b'biz' * 16384)
1259 compressor.write(b"biz" * 16384)
1251
1260
1252 self.assertEqual(dest.getvalue(),
1261 self.assertEqual(
1253 # Frame 1.
1262 dest.getvalue(),
1254 b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x30\x66\x6f\x6f'
1263 # Frame 1.
1255 b'\x62\x61\x72\x01\x00\xf7\xbf\xe8\xa5\x08'
1264 b"\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x30\x66\x6f\x6f"
1256 # Frame 2.
1265 b"\x62\x61\x72\x01\x00\xf7\xbf\xe8\xa5\x08"
1257 b'\x28\xb5\x2f\xfd\x00\x58\x5d\x00\x00\x18\x62\x69\x7a'
1266 # Frame 2.
1258 b'\x01\x00\xfa\x3f\x75\x37\x04')
1267 b"\x28\xb5\x2f\xfd\x00\x58\x5d\x00\x00\x18\x62\x69\x7a"
1268 b"\x01\x00\xfa\x3f\x75\x37\x04",
1269 )
1259
1270
1260 def test_bad_flush_mode(self):
1271 def test_bad_flush_mode(self):
1261 cctx = zstd.ZstdCompressor()
1272 cctx = zstd.ZstdCompressor()
1262 dest = io.BytesIO()
1273 dest = io.BytesIO()
1263 with cctx.stream_writer(dest) as compressor:
1274 with cctx.stream_writer(dest) as compressor:
1264 with self.assertRaisesRegexp(ValueError, 'unknown flush_mode: 42'):
1275 with self.assertRaisesRegex(ValueError, "unknown flush_mode: 42"):
1265 compressor.flush(flush_mode=42)
1276 compressor.flush(flush_mode=42)
1266
1277
1267 def test_multithreaded(self):
1278 def test_multithreaded(self):
1268 dest = NonClosingBytesIO()
1279 dest = NonClosingBytesIO()
1269 cctx = zstd.ZstdCompressor(threads=2)
1280 cctx = zstd.ZstdCompressor(threads=2)
1270 with cctx.stream_writer(dest) as compressor:
1281 with cctx.stream_writer(dest) as compressor:
1271 compressor.write(b'a' * 1048576)
1282 compressor.write(b"a" * 1048576)
1272 compressor.write(b'b' * 1048576)
1283 compressor.write(b"b" * 1048576)
1273 compressor.write(b'c' * 1048576)
1284 compressor.write(b"c" * 1048576)
1274
1285
1275 self.assertEqual(len(dest.getvalue()), 295)
1286 self.assertEqual(len(dest.getvalue()), 111)
1276
1287
1277 def test_tell(self):
1288 def test_tell(self):
1278 dest = io.BytesIO()
1289 dest = io.BytesIO()
1279 cctx = zstd.ZstdCompressor()
1290 cctx = zstd.ZstdCompressor()
1280 with cctx.stream_writer(dest) as compressor:
1291 with cctx.stream_writer(dest) as compressor:
1281 self.assertEqual(compressor.tell(), 0)
1292 self.assertEqual(compressor.tell(), 0)
1282
1293
1283 for i in range(256):
1294 for i in range(256):
1284 compressor.write(b'foo' * (i + 1))
1295 compressor.write(b"foo" * (i + 1))
1285 self.assertEqual(compressor.tell(), dest.tell())
1296 self.assertEqual(compressor.tell(), dest.tell())
1286
1297
1287 def test_bad_size(self):
1298 def test_bad_size(self):
1288 cctx = zstd.ZstdCompressor()
1299 cctx = zstd.ZstdCompressor()
1289
1300
1290 dest = io.BytesIO()
1301 dest = io.BytesIO()
1291
1302
1292 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
1303 with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"):
1293 with cctx.stream_writer(dest, size=2) as compressor:
1304 with cctx.stream_writer(dest, size=2) as compressor:
1294 compressor.write(b'foo')
1305 compressor.write(b"foo")
1295
1306
1296 # Test another operation.
1307 # Test another operation.
1297 with cctx.stream_writer(dest, size=42):
1308 with cctx.stream_writer(dest, size=42):
1298 pass
1309 pass
1299
1310
1300 def test_tarfile_compat(self):
1311 def test_tarfile_compat(self):
1301 dest = NonClosingBytesIO()
1312 dest = NonClosingBytesIO()
1302 cctx = zstd.ZstdCompressor()
1313 cctx = zstd.ZstdCompressor()
1303 with cctx.stream_writer(dest) as compressor:
1314 with cctx.stream_writer(dest) as compressor:
1304 with tarfile.open('tf', mode='w|', fileobj=compressor) as tf:
1315 with tarfile.open("tf", mode="w|", fileobj=compressor) as tf:
1305 tf.add(__file__, 'test_compressor.py')
1316 tf.add(__file__, "test_compressor.py")
1306
1317
1307 dest = io.BytesIO(dest.getvalue())
1318 dest = io.BytesIO(dest.getvalue())
1308
1319
1309 dctx = zstd.ZstdDecompressor()
1320 dctx = zstd.ZstdDecompressor()
1310 with dctx.stream_reader(dest) as reader:
1321 with dctx.stream_reader(dest) as reader:
1311 with tarfile.open(mode='r|', fileobj=reader) as tf:
1322 with tarfile.open(mode="r|", fileobj=reader) as tf:
1312 for member in tf:
1323 for member in tf:
1313 self.assertEqual(member.name, 'test_compressor.py')
1324 self.assertEqual(member.name, "test_compressor.py")
1314
1325
1315
1326
1316 @make_cffi
1327 @make_cffi
1317 class TestCompressor_read_to_iter(unittest.TestCase):
1328 class TestCompressor_read_to_iter(TestCase):
1318 def test_type_validation(self):
1329 def test_type_validation(self):
1319 cctx = zstd.ZstdCompressor()
1330 cctx = zstd.ZstdCompressor()
1320
1331
1321 # Object with read() works.
1332 # Object with read() works.
1322 for chunk in cctx.read_to_iter(io.BytesIO()):
1333 for chunk in cctx.read_to_iter(io.BytesIO()):
1323 pass
1334 pass
1324
1335
1325 # Buffer protocol works.
1336 # Buffer protocol works.
1326 for chunk in cctx.read_to_iter(b'foobar'):
1337 for chunk in cctx.read_to_iter(b"foobar"):
1327 pass
1338 pass
1328
1339
1329 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
1340 with self.assertRaisesRegex(ValueError, "must pass an object with a read"):
1330 for chunk in cctx.read_to_iter(True):
1341 for chunk in cctx.read_to_iter(True):
1331 pass
1342 pass
1332
1343
1333 def test_read_empty(self):
1344 def test_read_empty(self):
1334 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
1345 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
1335
1346
1336 source = io.BytesIO()
1347 source = io.BytesIO()
1337 it = cctx.read_to_iter(source)
1348 it = cctx.read_to_iter(source)
1338 chunks = list(it)
1349 chunks = list(it)
1339 self.assertEqual(len(chunks), 1)
1350 self.assertEqual(len(chunks), 1)
1340 compressed = b''.join(chunks)
1351 compressed = b"".join(chunks)
1341 self.assertEqual(compressed, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
1352 self.assertEqual(compressed, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00")
1342
1353
1343 # And again with the buffer protocol.
1354 # And again with the buffer protocol.
1344 it = cctx.read_to_iter(b'')
1355 it = cctx.read_to_iter(b"")
1345 chunks = list(it)
1356 chunks = list(it)
1346 self.assertEqual(len(chunks), 1)
1357 self.assertEqual(len(chunks), 1)
1347 compressed2 = b''.join(chunks)
1358 compressed2 = b"".join(chunks)
1348 self.assertEqual(compressed2, compressed)
1359 self.assertEqual(compressed2, compressed)
1349
1360
1350 def test_read_large(self):
1361 def test_read_large(self):
1351 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
1362 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
1352
1363
1353 source = io.BytesIO()
1364 source = io.BytesIO()
1354 source.write(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
1365 source.write(b"f" * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
1355 source.write(b'o')
1366 source.write(b"o")
1356 source.seek(0)
1367 source.seek(0)
1357
1368
1358 # Creating an iterator should not perform any compression until
1369 # Creating an iterator should not perform any compression until
1359 # first read.
1370 # first read.
1360 it = cctx.read_to_iter(source, size=len(source.getvalue()))
1371 it = cctx.read_to_iter(source, size=len(source.getvalue()))
1361 self.assertEqual(source.tell(), 0)
1372 self.assertEqual(source.tell(), 0)
1362
1373
1363 # We should have exactly 2 output chunks.
1374 # We should have exactly 2 output chunks.
1364 chunks = []
1375 chunks = []
1365 chunk = next(it)
1376 chunk = next(it)
1366 self.assertIsNotNone(chunk)
1377 self.assertIsNotNone(chunk)
1367 self.assertEqual(source.tell(), zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
1378 self.assertEqual(source.tell(), zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
1368 chunks.append(chunk)
1379 chunks.append(chunk)
1369 chunk = next(it)
1380 chunk = next(it)
1370 self.assertIsNotNone(chunk)
1381 self.assertIsNotNone(chunk)
1371 chunks.append(chunk)
1382 chunks.append(chunk)
1372
1383
1373 self.assertEqual(source.tell(), len(source.getvalue()))
1384 self.assertEqual(source.tell(), len(source.getvalue()))
1374
1385
1375 with self.assertRaises(StopIteration):
1386 with self.assertRaises(StopIteration):
1376 next(it)
1387 next(it)
1377
1388
1378 # And again for good measure.
1389 # And again for good measure.
1379 with self.assertRaises(StopIteration):
1390 with self.assertRaises(StopIteration):
1380 next(it)
1391 next(it)
1381
1392
1382 # We should get the same output as the one-shot compression mechanism.
1393 # We should get the same output as the one-shot compression mechanism.
1383 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
1394 self.assertEqual(b"".join(chunks), cctx.compress(source.getvalue()))
1384
1395
1385 params = zstd.get_frame_parameters(b''.join(chunks))
1396 params = zstd.get_frame_parameters(b"".join(chunks))
1386 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1397 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1387 self.assertEqual(params.window_size, 262144)
1398 self.assertEqual(params.window_size, 262144)
1388 self.assertEqual(params.dict_id, 0)
1399 self.assertEqual(params.dict_id, 0)
1389 self.assertFalse(params.has_checksum)
1400 self.assertFalse(params.has_checksum)
1390
1401
1391 # Now check the buffer protocol.
1402 # Now check the buffer protocol.
1392 it = cctx.read_to_iter(source.getvalue())
1403 it = cctx.read_to_iter(source.getvalue())
1393 chunks = list(it)
1404 chunks = list(it)
1394 self.assertEqual(len(chunks), 2)
1405 self.assertEqual(len(chunks), 2)
1395
1406
1396 params = zstd.get_frame_parameters(b''.join(chunks))
1407 params = zstd.get_frame_parameters(b"".join(chunks))
1397 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1408 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1398 #self.assertEqual(params.window_size, 262144)
1409 # self.assertEqual(params.window_size, 262144)
1399 self.assertEqual(params.dict_id, 0)
1410 self.assertEqual(params.dict_id, 0)
1400 self.assertFalse(params.has_checksum)
1411 self.assertFalse(params.has_checksum)
1401
1412
1402 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
1413 self.assertEqual(b"".join(chunks), cctx.compress(source.getvalue()))
1403
1414
1404 def test_read_write_size(self):
1415 def test_read_write_size(self):
1405 source = OpCountingBytesIO(b'foobarfoobar')
1416 source = OpCountingBytesIO(b"foobarfoobar")
1406 cctx = zstd.ZstdCompressor(level=3)
1417 cctx = zstd.ZstdCompressor(level=3)
1407 for chunk in cctx.read_to_iter(source, read_size=1, write_size=1):
1418 for chunk in cctx.read_to_iter(source, read_size=1, write_size=1):
1408 self.assertEqual(len(chunk), 1)
1419 self.assertEqual(len(chunk), 1)
1409
1420
1410 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
1421 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
1411
1422
1412 def test_multithreaded(self):
1423 def test_multithreaded(self):
1413 source = io.BytesIO()
1424 source = io.BytesIO()
1414 source.write(b'a' * 1048576)
1425 source.write(b"a" * 1048576)
1415 source.write(b'b' * 1048576)
1426 source.write(b"b" * 1048576)
1416 source.write(b'c' * 1048576)
1427 source.write(b"c" * 1048576)
1417 source.seek(0)
1428 source.seek(0)
1418
1429
1419 cctx = zstd.ZstdCompressor(threads=2)
1430 cctx = zstd.ZstdCompressor(threads=2)
1420
1431
1421 compressed = b''.join(cctx.read_to_iter(source))
1432 compressed = b"".join(cctx.read_to_iter(source))
1422 self.assertEqual(len(compressed), 295)
1433 self.assertEqual(len(compressed), 111)
1423
1434
1424 def test_bad_size(self):
1435 def test_bad_size(self):
1425 cctx = zstd.ZstdCompressor()
1436 cctx = zstd.ZstdCompressor()
1426
1437
1427 source = io.BytesIO(b'a' * 42)
1438 source = io.BytesIO(b"a" * 42)
1428
1439
1429 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
1440 with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"):
1430 b''.join(cctx.read_to_iter(source, size=2))
1441 b"".join(cctx.read_to_iter(source, size=2))
1431
1442
1432 # Test another operation on errored compressor.
1443 # Test another operation on errored compressor.
1433 b''.join(cctx.read_to_iter(source))
1444 b"".join(cctx.read_to_iter(source))
1434
1445
1435
1446
1436 @make_cffi
1447 @make_cffi
1437 class TestCompressor_chunker(unittest.TestCase):
1448 class TestCompressor_chunker(TestCase):
1438 def test_empty(self):
1449 def test_empty(self):
1439 cctx = zstd.ZstdCompressor(write_content_size=False)
1450 cctx = zstd.ZstdCompressor(write_content_size=False)
1440 chunker = cctx.chunker()
1451 chunker = cctx.chunker()
1441
1452
1442 it = chunker.compress(b'')
1453 it = chunker.compress(b"")
1443
1454
1444 with self.assertRaises(StopIteration):
1455 with self.assertRaises(StopIteration):
1445 next(it)
1456 next(it)
1446
1457
1447 it = chunker.finish()
1458 it = chunker.finish()
1448
1459
1449 self.assertEqual(next(it), b'\x28\xb5\x2f\xfd\x00\x58\x01\x00\x00')
1460 self.assertEqual(next(it), b"\x28\xb5\x2f\xfd\x00\x58\x01\x00\x00")
1450
1461
1451 with self.assertRaises(StopIteration):
1462 with self.assertRaises(StopIteration):
1452 next(it)
1463 next(it)
1453
1464
1454 def test_simple_input(self):
1465 def test_simple_input(self):
1455 cctx = zstd.ZstdCompressor()
1466 cctx = zstd.ZstdCompressor()
1456 chunker = cctx.chunker()
1467 chunker = cctx.chunker()
1457
1468
1458 it = chunker.compress(b'foobar')
1469 it = chunker.compress(b"foobar")
1459
1470
1460 with self.assertRaises(StopIteration):
1471 with self.assertRaises(StopIteration):
1461 next(it)
1472 next(it)
1462
1473
1463 it = chunker.compress(b'baz' * 30)
1474 it = chunker.compress(b"baz" * 30)
1464
1475
1465 with self.assertRaises(StopIteration):
1476 with self.assertRaises(StopIteration):
1466 next(it)
1477 next(it)
1467
1478
1468 it = chunker.finish()
1479 it = chunker.finish()
1469
1480
1470 self.assertEqual(next(it),
1481 self.assertEqual(
1471 b'\x28\xb5\x2f\xfd\x00\x58\x7d\x00\x00\x48\x66\x6f'
1482 next(it),
1472 b'\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e')
1483 b"\x28\xb5\x2f\xfd\x00\x58\x7d\x00\x00\x48\x66\x6f"
1484 b"\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e",
1485 )
1473
1486
1474 with self.assertRaises(StopIteration):
1487 with self.assertRaises(StopIteration):
1475 next(it)
1488 next(it)
1476
1489
1477 def test_input_size(self):
1490 def test_input_size(self):
1478 cctx = zstd.ZstdCompressor()
1491 cctx = zstd.ZstdCompressor()
1479 chunker = cctx.chunker(size=1024)
1492 chunker = cctx.chunker(size=1024)
1480
1493
1481 it = chunker.compress(b'x' * 1000)
1494 it = chunker.compress(b"x" * 1000)
1482
1495
1483 with self.assertRaises(StopIteration):
1496 with self.assertRaises(StopIteration):
1484 next(it)
1497 next(it)
1485
1498
1486 it = chunker.compress(b'y' * 24)
1499 it = chunker.compress(b"y" * 24)
1487
1500
1488 with self.assertRaises(StopIteration):
1501 with self.assertRaises(StopIteration):
1489 next(it)
1502 next(it)
1490
1503
1491 chunks = list(chunker.finish())
1504 chunks = list(chunker.finish())
1492
1505
1493 self.assertEqual(chunks, [
1506 self.assertEqual(
1494 b'\x28\xb5\x2f\xfd\x60\x00\x03\x65\x00\x00\x18\x78\x78\x79\x02\x00'
1507 chunks,
1495 b'\xa0\x16\xe3\x2b\x80\x05'
1508 [
1496 ])
1509 b"\x28\xb5\x2f\xfd\x60\x00\x03\x65\x00\x00\x18\x78\x78\x79\x02\x00"
1510 b"\xa0\x16\xe3\x2b\x80\x05"
1511 ],
1512 )
1497
1513
1498 dctx = zstd.ZstdDecompressor()
1514 dctx = zstd.ZstdDecompressor()
1499
1515
1500 self.assertEqual(dctx.decompress(b''.join(chunks)),
1516 self.assertEqual(dctx.decompress(b"".join(chunks)), (b"x" * 1000) + (b"y" * 24))
1501 (b'x' * 1000) + (b'y' * 24))
1502
1517
1503 def test_small_chunk_size(self):
1518 def test_small_chunk_size(self):
1504 cctx = zstd.ZstdCompressor()
1519 cctx = zstd.ZstdCompressor()
1505 chunker = cctx.chunker(chunk_size=1)
1520 chunker = cctx.chunker(chunk_size=1)
1506
1521
1507 chunks = list(chunker.compress(b'foo' * 1024))
1522 chunks = list(chunker.compress(b"foo" * 1024))
1508 self.assertEqual(chunks, [])
1523 self.assertEqual(chunks, [])
1509
1524
1510 chunks = list(chunker.finish())
1525 chunks = list(chunker.finish())
1511 self.assertTrue(all(len(chunk) == 1 for chunk in chunks))
1526 self.assertTrue(all(len(chunk) == 1 for chunk in chunks))
1512
1527
1513 self.assertEqual(
1528 self.assertEqual(
1514 b''.join(chunks),
1529 b"".join(chunks),
1515 b'\x28\xb5\x2f\xfd\x00\x58\x55\x00\x00\x18\x66\x6f\x6f\x01\x00'
1530 b"\x28\xb5\x2f\xfd\x00\x58\x55\x00\x00\x18\x66\x6f\x6f\x01\x00"
1516 b'\xfa\xd3\x77\x43')
1531 b"\xfa\xd3\x77\x43",
1532 )
1517
1533
1518 dctx = zstd.ZstdDecompressor()
1534 dctx = zstd.ZstdDecompressor()
1519 self.assertEqual(dctx.decompress(b''.join(chunks),
1535 self.assertEqual(
1520 max_output_size=10000),
1536 dctx.decompress(b"".join(chunks), max_output_size=10000), b"foo" * 1024
1521 b'foo' * 1024)
1537 )
1522
1538
1523 def test_input_types(self):
1539 def test_input_types(self):
1524 cctx = zstd.ZstdCompressor()
1540 cctx = zstd.ZstdCompressor()
1525
1541
1526 mutable_array = bytearray(3)
1542 mutable_array = bytearray(3)
1527 mutable_array[:] = b'foo'
1543 mutable_array[:] = b"foo"
1528
1544
1529 sources = [
1545 sources = [
1530 memoryview(b'foo'),
1546 memoryview(b"foo"),
1531 bytearray(b'foo'),
1547 bytearray(b"foo"),
1532 mutable_array,
1548 mutable_array,
1533 ]
1549 ]
1534
1550
1535 for source in sources:
1551 for source in sources:
1536 chunker = cctx.chunker()
1552 chunker = cctx.chunker()
1537
1553
1538 self.assertEqual(list(chunker.compress(source)), [])
1554 self.assertEqual(list(chunker.compress(source)), [])
1539 self.assertEqual(list(chunker.finish()), [
1555 self.assertEqual(
1540 b'\x28\xb5\x2f\xfd\x00\x58\x19\x00\x00\x66\x6f\x6f'
1556 list(chunker.finish()),
1541 ])
1557 [b"\x28\xb5\x2f\xfd\x00\x58\x19\x00\x00\x66\x6f\x6f"],
1558 )
1542
1559
1543 def test_flush(self):
1560 def test_flush(self):
1544 cctx = zstd.ZstdCompressor()
1561 cctx = zstd.ZstdCompressor()
1545 chunker = cctx.chunker()
1562 chunker = cctx.chunker()
1546
1563
1547 self.assertEqual(list(chunker.compress(b'foo' * 1024)), [])
1564 self.assertEqual(list(chunker.compress(b"foo" * 1024)), [])
1548 self.assertEqual(list(chunker.compress(b'bar' * 1024)), [])
1565 self.assertEqual(list(chunker.compress(b"bar" * 1024)), [])
1549
1566
1550 chunks1 = list(chunker.flush())
1567 chunks1 = list(chunker.flush())
1551
1568
1552 self.assertEqual(chunks1, [
1569 self.assertEqual(
1553 b'\x28\xb5\x2f\xfd\x00\x58\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72'
1570 chunks1,
1554 b'\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02'
1571 [
1555 ])
1572 b"\x28\xb5\x2f\xfd\x00\x58\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72"
1573 b"\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02"
1574 ],
1575 )
1556
1576
1557 self.assertEqual(list(chunker.flush()), [])
1577 self.assertEqual(list(chunker.flush()), [])
1558 self.assertEqual(list(chunker.flush()), [])
1578 self.assertEqual(list(chunker.flush()), [])
1559
1579
1560 self.assertEqual(list(chunker.compress(b'baz' * 1024)), [])
1580 self.assertEqual(list(chunker.compress(b"baz" * 1024)), [])
1561
1581
1562 chunks2 = list(chunker.flush())
1582 chunks2 = list(chunker.flush())
1563 self.assertEqual(len(chunks2), 1)
1583 self.assertEqual(len(chunks2), 1)
1564
1584
1565 chunks3 = list(chunker.finish())
1585 chunks3 = list(chunker.finish())
1566 self.assertEqual(len(chunks2), 1)
1586 self.assertEqual(len(chunks2), 1)
1567
1587
1568 dctx = zstd.ZstdDecompressor()
1588 dctx = zstd.ZstdDecompressor()
1569
1589
1570 self.assertEqual(dctx.decompress(b''.join(chunks1 + chunks2 + chunks3),
1590 self.assertEqual(
1571 max_output_size=10000),
1591 dctx.decompress(
1572 (b'foo' * 1024) + (b'bar' * 1024) + (b'baz' * 1024))
1592 b"".join(chunks1 + chunks2 + chunks3), max_output_size=10000
1593 ),
1594 (b"foo" * 1024) + (b"bar" * 1024) + (b"baz" * 1024),
1595 )
1573
1596
1574 def test_compress_after_finish(self):
1597 def test_compress_after_finish(self):
1575 cctx = zstd.ZstdCompressor()
1598 cctx = zstd.ZstdCompressor()
1576 chunker = cctx.chunker()
1599 chunker = cctx.chunker()
1577
1600
1578 list(chunker.compress(b'foo'))
1601 list(chunker.compress(b"foo"))
1579 list(chunker.finish())
1602 list(chunker.finish())
1580
1603
1581 with self.assertRaisesRegexp(
1604 with self.assertRaisesRegex(
1582 zstd.ZstdError,
1605 zstd.ZstdError, r"cannot call compress\(\) after compression finished"
1583 r'cannot call compress\(\) after compression finished'):
1606 ):
1584 list(chunker.compress(b'foo'))
1607 list(chunker.compress(b"foo"))
1585
1608
1586 def test_flush_after_finish(self):
1609 def test_flush_after_finish(self):
1587 cctx = zstd.ZstdCompressor()
1610 cctx = zstd.ZstdCompressor()
1588 chunker = cctx.chunker()
1611 chunker = cctx.chunker()
1589
1612
1590 list(chunker.compress(b'foo'))
1613 list(chunker.compress(b"foo"))
1591 list(chunker.finish())
1614 list(chunker.finish())
1592
1615
1593 with self.assertRaisesRegexp(
1616 with self.assertRaisesRegex(
1594 zstd.ZstdError,
1617 zstd.ZstdError, r"cannot call flush\(\) after compression finished"
1595 r'cannot call flush\(\) after compression finished'):
1618 ):
1596 list(chunker.flush())
1619 list(chunker.flush())
1597
1620
1598 def test_finish_after_finish(self):
1621 def test_finish_after_finish(self):
1599 cctx = zstd.ZstdCompressor()
1622 cctx = zstd.ZstdCompressor()
1600 chunker = cctx.chunker()
1623 chunker = cctx.chunker()
1601
1624
1602 list(chunker.compress(b'foo'))
1625 list(chunker.compress(b"foo"))
1603 list(chunker.finish())
1626 list(chunker.finish())
1604
1627
1605 with self.assertRaisesRegexp(
1628 with self.assertRaisesRegex(
1606 zstd.ZstdError,
1629 zstd.ZstdError, r"cannot call finish\(\) after compression finished"
1607 r'cannot call finish\(\) after compression finished'):
1630 ):
1608 list(chunker.finish())
1631 list(chunker.finish())
1609
1632
1610
1633
1611 class TestCompressor_multi_compress_to_buffer(unittest.TestCase):
1634 class TestCompressor_multi_compress_to_buffer(TestCase):
1612 def test_invalid_inputs(self):
1635 def test_invalid_inputs(self):
1613 cctx = zstd.ZstdCompressor()
1636 cctx = zstd.ZstdCompressor()
1614
1637
1615 if not hasattr(cctx, 'multi_compress_to_buffer'):
1638 if not hasattr(cctx, "multi_compress_to_buffer"):
1616 self.skipTest('multi_compress_to_buffer not available')
1639 self.skipTest("multi_compress_to_buffer not available")
1617
1640
1618 with self.assertRaises(TypeError):
1641 with self.assertRaises(TypeError):
1619 cctx.multi_compress_to_buffer(True)
1642 cctx.multi_compress_to_buffer(True)
1620
1643
1621 with self.assertRaises(TypeError):
1644 with self.assertRaises(TypeError):
1622 cctx.multi_compress_to_buffer((1, 2))
1645 cctx.multi_compress_to_buffer((1, 2))
1623
1646
1624 with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'):
1647 with self.assertRaisesRegex(TypeError, "item 0 not a bytes like object"):
1625 cctx.multi_compress_to_buffer([u'foo'])
1648 cctx.multi_compress_to_buffer([u"foo"])
1626
1649
1627 def test_empty_input(self):
1650 def test_empty_input(self):
1628 cctx = zstd.ZstdCompressor()
1651 cctx = zstd.ZstdCompressor()
1629
1652
1630 if not hasattr(cctx, 'multi_compress_to_buffer'):
1653 if not hasattr(cctx, "multi_compress_to_buffer"):
1631 self.skipTest('multi_compress_to_buffer not available')
1654 self.skipTest("multi_compress_to_buffer not available")
1632
1655
1633 with self.assertRaisesRegexp(ValueError, 'no source elements found'):
1656 with self.assertRaisesRegex(ValueError, "no source elements found"):
1634 cctx.multi_compress_to_buffer([])
1657 cctx.multi_compress_to_buffer([])
1635
1658
1636 with self.assertRaisesRegexp(ValueError, 'source elements are empty'):
1659 with self.assertRaisesRegex(ValueError, "source elements are empty"):
1637 cctx.multi_compress_to_buffer([b'', b'', b''])
1660 cctx.multi_compress_to_buffer([b"", b"", b""])
1638
1661
1639 def test_list_input(self):
1662 def test_list_input(self):
1640 cctx = zstd.ZstdCompressor(write_checksum=True)
1663 cctx = zstd.ZstdCompressor(write_checksum=True)
1641
1664
1642 if not hasattr(cctx, 'multi_compress_to_buffer'):
1665 if not hasattr(cctx, "multi_compress_to_buffer"):
1643 self.skipTest('multi_compress_to_buffer not available')
1666 self.skipTest("multi_compress_to_buffer not available")
1644
1667
1645 original = [b'foo' * 12, b'bar' * 6]
1668 original = [b"foo" * 12, b"bar" * 6]
1646 frames = [cctx.compress(c) for c in original]
1669 frames = [cctx.compress(c) for c in original]
1647 b = cctx.multi_compress_to_buffer(original)
1670 b = cctx.multi_compress_to_buffer(original)
1648
1671
1649 self.assertIsInstance(b, zstd.BufferWithSegmentsCollection)
1672 self.assertIsInstance(b, zstd.BufferWithSegmentsCollection)
1650
1673
1651 self.assertEqual(len(b), 2)
1674 self.assertEqual(len(b), 2)
1652 self.assertEqual(b.size(), 44)
1675 self.assertEqual(b.size(), 44)
1653
1676
1654 self.assertEqual(b[0].tobytes(), frames[0])
1677 self.assertEqual(b[0].tobytes(), frames[0])
1655 self.assertEqual(b[1].tobytes(), frames[1])
1678 self.assertEqual(b[1].tobytes(), frames[1])
1656
1679
1657 def test_buffer_with_segments_input(self):
1680 def test_buffer_with_segments_input(self):
1658 cctx = zstd.ZstdCompressor(write_checksum=True)
1681 cctx = zstd.ZstdCompressor(write_checksum=True)
1659
1682
1660 if not hasattr(cctx, 'multi_compress_to_buffer'):
1683 if not hasattr(cctx, "multi_compress_to_buffer"):
1661 self.skipTest('multi_compress_to_buffer not available')
1684 self.skipTest("multi_compress_to_buffer not available")
1662
1685
1663 original = [b'foo' * 4, b'bar' * 6]
1686 original = [b"foo" * 4, b"bar" * 6]
1664 frames = [cctx.compress(c) for c in original]
1687 frames = [cctx.compress(c) for c in original]
1665
1688
1666 offsets = struct.pack('=QQQQ', 0, len(original[0]),
1689 offsets = struct.pack(
1667 len(original[0]), len(original[1]))
1690 "=QQQQ", 0, len(original[0]), len(original[0]), len(original[1])
1668 segments = zstd.BufferWithSegments(b''.join(original), offsets)
1691 )
1692 segments = zstd.BufferWithSegments(b"".join(original), offsets)
1669
1693
1670 result = cctx.multi_compress_to_buffer(segments)
1694 result = cctx.multi_compress_to_buffer(segments)
1671
1695
1672 self.assertEqual(len(result), 2)
1696 self.assertEqual(len(result), 2)
1673 self.assertEqual(result.size(), 47)
1697 self.assertEqual(result.size(), 47)
1674
1698
1675 self.assertEqual(result[0].tobytes(), frames[0])
1699 self.assertEqual(result[0].tobytes(), frames[0])
1676 self.assertEqual(result[1].tobytes(), frames[1])
1700 self.assertEqual(result[1].tobytes(), frames[1])
1677
1701
1678 def test_buffer_with_segments_collection_input(self):
1702 def test_buffer_with_segments_collection_input(self):
1679 cctx = zstd.ZstdCompressor(write_checksum=True)
1703 cctx = zstd.ZstdCompressor(write_checksum=True)
1680
1704
1681 if not hasattr(cctx, 'multi_compress_to_buffer'):
1705 if not hasattr(cctx, "multi_compress_to_buffer"):
1682 self.skipTest('multi_compress_to_buffer not available')
1706 self.skipTest("multi_compress_to_buffer not available")
1683
1707
1684 original = [
1708 original = [
1685 b'foo1',
1709 b"foo1",
1686 b'foo2' * 2,
1710 b"foo2" * 2,
1687 b'foo3' * 3,
1711 b"foo3" * 3,
1688 b'foo4' * 4,
1712 b"foo4" * 4,
1689 b'foo5' * 5,
1713 b"foo5" * 5,
1690 ]
1714 ]
1691
1715
1692 frames = [cctx.compress(c) for c in original]
1716 frames = [cctx.compress(c) for c in original]
1693
1717
1694 b = b''.join([original[0], original[1]])
1718 b = b"".join([original[0], original[1]])
1695 b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ',
1719 b1 = zstd.BufferWithSegments(
1696 0, len(original[0]),
1720 b,
1697 len(original[0]), len(original[1])))
1721 struct.pack(
1698 b = b''.join([original[2], original[3], original[4]])
1722 "=QQQQ", 0, len(original[0]), len(original[0]), len(original[1])
1699 b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ',
1723 ),
1700 0, len(original[2]),
1724 )
1701 len(original[2]), len(original[3]),
1725 b = b"".join([original[2], original[3], original[4]])
1702 len(original[2]) + len(original[3]), len(original[4])))
1726 b2 = zstd.BufferWithSegments(
1727 b,
1728 struct.pack(
1729 "=QQQQQQ",
1730 0,
1731 len(original[2]),
1732 len(original[2]),
1733 len(original[3]),
1734 len(original[2]) + len(original[3]),
1735 len(original[4]),
1736 ),
1737 )
1703
1738
1704 c = zstd.BufferWithSegmentsCollection(b1, b2)
1739 c = zstd.BufferWithSegmentsCollection(b1, b2)
1705
1740
1706 result = cctx.multi_compress_to_buffer(c)
1741 result = cctx.multi_compress_to_buffer(c)
1707
1742
1708 self.assertEqual(len(result), len(frames))
1743 self.assertEqual(len(result), len(frames))
1709
1744
1710 for i, frame in enumerate(frames):
1745 for i, frame in enumerate(frames):
1711 self.assertEqual(result[i].tobytes(), frame)
1746 self.assertEqual(result[i].tobytes(), frame)
1712
1747
1713 def test_multiple_threads(self):
1748 def test_multiple_threads(self):
1714 # threads argument will cause multi-threaded ZSTD APIs to be used, which will
1749 # threads argument will cause multi-threaded ZSTD APIs to be used, which will
1715 # make output different.
1750 # make output different.
1716 refcctx = zstd.ZstdCompressor(write_checksum=True)
1751 refcctx = zstd.ZstdCompressor(write_checksum=True)
1717 reference = [refcctx.compress(b'x' * 64), refcctx.compress(b'y' * 64)]
1752 reference = [refcctx.compress(b"x" * 64), refcctx.compress(b"y" * 64)]
1718
1753
1719 cctx = zstd.ZstdCompressor(write_checksum=True)
1754 cctx = zstd.ZstdCompressor(write_checksum=True)
1720
1755
1721 if not hasattr(cctx, 'multi_compress_to_buffer'):
1756 if not hasattr(cctx, "multi_compress_to_buffer"):
1722 self.skipTest('multi_compress_to_buffer not available')
1757 self.skipTest("multi_compress_to_buffer not available")
1723
1758
1724 frames = []
1759 frames = []
1725 frames.extend(b'x' * 64 for i in range(256))
1760 frames.extend(b"x" * 64 for i in range(256))
1726 frames.extend(b'y' * 64 for i in range(256))
1761 frames.extend(b"y" * 64 for i in range(256))
1727
1762
1728 result = cctx.multi_compress_to_buffer(frames, threads=-1)
1763 result = cctx.multi_compress_to_buffer(frames, threads=-1)
1729
1764
1730 self.assertEqual(len(result), 512)
1765 self.assertEqual(len(result), 512)
1731 for i in range(512):
1766 for i in range(512):
1732 if i < 256:
1767 if i < 256:
1733 self.assertEqual(result[i].tobytes(), reference[0])
1768 self.assertEqual(result[i].tobytes(), reference[0])
1734 else:
1769 else:
1735 self.assertEqual(result[i].tobytes(), reference[1])
1770 self.assertEqual(result[i].tobytes(), reference[1])
This diff has been collapsed as it changes many lines, (631 lines changed) Show them Hide them
@@ -1,711 +1,836 b''
1 import io
1 import io
2 import os
2 import os
3 import unittest
3 import unittest
4
4
5 try:
5 try:
6 import hypothesis
6 import hypothesis
7 import hypothesis.strategies as strategies
7 import hypothesis.strategies as strategies
8 except ImportError:
8 except ImportError:
9 raise unittest.SkipTest('hypothesis not available')
9 raise unittest.SkipTest("hypothesis not available")
10
10
11 import zstandard as zstd
11 import zstandard as zstd
12
12
13 from . common import (
13 from .common import (
14 make_cffi,
14 make_cffi,
15 NonClosingBytesIO,
15 NonClosingBytesIO,
16 random_input_data,
16 random_input_data,
17 TestCase,
17 )
18 )
18
19
19
20
20 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
21 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
21 @make_cffi
22 @make_cffi
22 class TestCompressor_stream_reader_fuzzing(unittest.TestCase):
23 class TestCompressor_stream_reader_fuzzing(TestCase):
23 @hypothesis.settings(
24 @hypothesis.settings(
24 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
25 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
25 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
26 )
26 level=strategies.integers(min_value=1, max_value=5),
27 @hypothesis.given(
27 source_read_size=strategies.integers(1, 16384),
28 original=strategies.sampled_from(random_input_data()),
28 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
29 level=strategies.integers(min_value=1, max_value=5),
29 def test_stream_source_read(self, original, level, source_read_size,
30 source_read_size=strategies.integers(1, 16384),
30 read_size):
31 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
32 )
33 def test_stream_source_read(self, original, level, source_read_size, read_size):
31 if read_size == 0:
34 if read_size == 0:
32 read_size = -1
35 read_size = -1
33
36
34 refctx = zstd.ZstdCompressor(level=level)
37 refctx = zstd.ZstdCompressor(level=level)
35 ref_frame = refctx.compress(original)
38 ref_frame = refctx.compress(original)
36
39
37 cctx = zstd.ZstdCompressor(level=level)
40 cctx = zstd.ZstdCompressor(level=level)
38 with cctx.stream_reader(io.BytesIO(original), size=len(original),
41 with cctx.stream_reader(
39 read_size=source_read_size) as reader:
42 io.BytesIO(original), size=len(original), read_size=source_read_size
43 ) as reader:
40 chunks = []
44 chunks = []
41 while True:
45 while True:
42 chunk = reader.read(read_size)
46 chunk = reader.read(read_size)
43 if not chunk:
47 if not chunk:
44 break
48 break
45
49
46 chunks.append(chunk)
50 chunks.append(chunk)
47
51
48 self.assertEqual(b''.join(chunks), ref_frame)
52 self.assertEqual(b"".join(chunks), ref_frame)
49
53
50 @hypothesis.settings(
54 @hypothesis.settings(
51 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
55 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
52 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
56 )
53 level=strategies.integers(min_value=1, max_value=5),
57 @hypothesis.given(
54 source_read_size=strategies.integers(1, 16384),
58 original=strategies.sampled_from(random_input_data()),
55 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
59 level=strategies.integers(min_value=1, max_value=5),
56 def test_buffer_source_read(self, original, level, source_read_size,
60 source_read_size=strategies.integers(1, 16384),
57 read_size):
61 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
62 )
63 def test_buffer_source_read(self, original, level, source_read_size, read_size):
58 if read_size == 0:
64 if read_size == 0:
59 read_size = -1
65 read_size = -1
60
66
61 refctx = zstd.ZstdCompressor(level=level)
67 refctx = zstd.ZstdCompressor(level=level)
62 ref_frame = refctx.compress(original)
68 ref_frame = refctx.compress(original)
63
69
64 cctx = zstd.ZstdCompressor(level=level)
70 cctx = zstd.ZstdCompressor(level=level)
65 with cctx.stream_reader(original, size=len(original),
71 with cctx.stream_reader(
66 read_size=source_read_size) as reader:
72 original, size=len(original), read_size=source_read_size
73 ) as reader:
67 chunks = []
74 chunks = []
68 while True:
75 while True:
69 chunk = reader.read(read_size)
76 chunk = reader.read(read_size)
70 if not chunk:
77 if not chunk:
71 break
78 break
72
79
73 chunks.append(chunk)
80 chunks.append(chunk)
74
81
75 self.assertEqual(b''.join(chunks), ref_frame)
82 self.assertEqual(b"".join(chunks), ref_frame)
76
83
77 @hypothesis.settings(
84 @hypothesis.settings(
78 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
85 suppress_health_check=[
79 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
86 hypothesis.HealthCheck.large_base_example,
80 level=strategies.integers(min_value=1, max_value=5),
87 hypothesis.HealthCheck.too_slow,
81 source_read_size=strategies.integers(1, 16384),
88 ]
82 read_sizes=strategies.data())
89 )
83 def test_stream_source_read_variance(self, original, level, source_read_size,
90 @hypothesis.given(
84 read_sizes):
91 original=strategies.sampled_from(random_input_data()),
92 level=strategies.integers(min_value=1, max_value=5),
93 source_read_size=strategies.integers(1, 16384),
94 read_sizes=strategies.data(),
95 )
96 def test_stream_source_read_variance(
97 self, original, level, source_read_size, read_sizes
98 ):
85 refctx = zstd.ZstdCompressor(level=level)
99 refctx = zstd.ZstdCompressor(level=level)
86 ref_frame = refctx.compress(original)
100 ref_frame = refctx.compress(original)
87
101
88 cctx = zstd.ZstdCompressor(level=level)
102 cctx = zstd.ZstdCompressor(level=level)
89 with cctx.stream_reader(io.BytesIO(original), size=len(original),
103 with cctx.stream_reader(
90 read_size=source_read_size) as reader:
104 io.BytesIO(original), size=len(original), read_size=source_read_size
105 ) as reader:
91 chunks = []
106 chunks = []
92 while True:
107 while True:
93 read_size = read_sizes.draw(strategies.integers(-1, 16384))
108 read_size = read_sizes.draw(strategies.integers(-1, 16384))
94 chunk = reader.read(read_size)
109 chunk = reader.read(read_size)
95 if not chunk and read_size:
110 if not chunk and read_size:
96 break
111 break
97
112
98 chunks.append(chunk)
113 chunks.append(chunk)
99
114
100 self.assertEqual(b''.join(chunks), ref_frame)
115 self.assertEqual(b"".join(chunks), ref_frame)
101
116
102 @hypothesis.settings(
117 @hypothesis.settings(
103 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
118 suppress_health_check=[
104 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
119 hypothesis.HealthCheck.large_base_example,
105 level=strategies.integers(min_value=1, max_value=5),
120 hypothesis.HealthCheck.too_slow,
106 source_read_size=strategies.integers(1, 16384),
121 ]
107 read_sizes=strategies.data())
122 )
108 def test_buffer_source_read_variance(self, original, level, source_read_size,
123 @hypothesis.given(
109 read_sizes):
124 original=strategies.sampled_from(random_input_data()),
125 level=strategies.integers(min_value=1, max_value=5),
126 source_read_size=strategies.integers(1, 16384),
127 read_sizes=strategies.data(),
128 )
129 def test_buffer_source_read_variance(
130 self, original, level, source_read_size, read_sizes
131 ):
110
132
111 refctx = zstd.ZstdCompressor(level=level)
133 refctx = zstd.ZstdCompressor(level=level)
112 ref_frame = refctx.compress(original)
134 ref_frame = refctx.compress(original)
113
135
114 cctx = zstd.ZstdCompressor(level=level)
136 cctx = zstd.ZstdCompressor(level=level)
115 with cctx.stream_reader(original, size=len(original),
137 with cctx.stream_reader(
116 read_size=source_read_size) as reader:
138 original, size=len(original), read_size=source_read_size
139 ) as reader:
117 chunks = []
140 chunks = []
118 while True:
141 while True:
119 read_size = read_sizes.draw(strategies.integers(-1, 16384))
142 read_size = read_sizes.draw(strategies.integers(-1, 16384))
120 chunk = reader.read(read_size)
143 chunk = reader.read(read_size)
121 if not chunk and read_size:
144 if not chunk and read_size:
122 break
145 break
123
146
124 chunks.append(chunk)
147 chunks.append(chunk)
125
148
126 self.assertEqual(b''.join(chunks), ref_frame)
149 self.assertEqual(b"".join(chunks), ref_frame)
127
150
128 @hypothesis.settings(
151 @hypothesis.settings(
129 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
152 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
130 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
153 )
131 level=strategies.integers(min_value=1, max_value=5),
154 @hypothesis.given(
132 source_read_size=strategies.integers(1, 16384),
155 original=strategies.sampled_from(random_input_data()),
133 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
156 level=strategies.integers(min_value=1, max_value=5),
134 def test_stream_source_readinto(self, original, level,
157 source_read_size=strategies.integers(1, 16384),
135 source_read_size, read_size):
158 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
159 )
160 def test_stream_source_readinto(self, original, level, source_read_size, read_size):
136 refctx = zstd.ZstdCompressor(level=level)
161 refctx = zstd.ZstdCompressor(level=level)
137 ref_frame = refctx.compress(original)
162 ref_frame = refctx.compress(original)
138
163
139 cctx = zstd.ZstdCompressor(level=level)
164 cctx = zstd.ZstdCompressor(level=level)
140 with cctx.stream_reader(io.BytesIO(original), size=len(original),
165 with cctx.stream_reader(
141 read_size=source_read_size) as reader:
166 io.BytesIO(original), size=len(original), read_size=source_read_size
167 ) as reader:
142 chunks = []
168 chunks = []
143 while True:
169 while True:
144 b = bytearray(read_size)
170 b = bytearray(read_size)
145 count = reader.readinto(b)
171 count = reader.readinto(b)
146
172
147 if not count:
173 if not count:
148 break
174 break
149
175
150 chunks.append(bytes(b[0:count]))
176 chunks.append(bytes(b[0:count]))
151
177
152 self.assertEqual(b''.join(chunks), ref_frame)
178 self.assertEqual(b"".join(chunks), ref_frame)
153
179
154 @hypothesis.settings(
180 @hypothesis.settings(
155 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
181 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
156 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
182 )
157 level=strategies.integers(min_value=1, max_value=5),
183 @hypothesis.given(
158 source_read_size=strategies.integers(1, 16384),
184 original=strategies.sampled_from(random_input_data()),
159 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
185 level=strategies.integers(min_value=1, max_value=5),
160 def test_buffer_source_readinto(self, original, level,
186 source_read_size=strategies.integers(1, 16384),
161 source_read_size, read_size):
187 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
188 )
189 def test_buffer_source_readinto(self, original, level, source_read_size, read_size):
162
190
163 refctx = zstd.ZstdCompressor(level=level)
191 refctx = zstd.ZstdCompressor(level=level)
164 ref_frame = refctx.compress(original)
192 ref_frame = refctx.compress(original)
165
193
166 cctx = zstd.ZstdCompressor(level=level)
194 cctx = zstd.ZstdCompressor(level=level)
167 with cctx.stream_reader(original, size=len(original),
195 with cctx.stream_reader(
168 read_size=source_read_size) as reader:
196 original, size=len(original), read_size=source_read_size
197 ) as reader:
169 chunks = []
198 chunks = []
170 while True:
199 while True:
171 b = bytearray(read_size)
200 b = bytearray(read_size)
172 count = reader.readinto(b)
201 count = reader.readinto(b)
173
202
174 if not count:
203 if not count:
175 break
204 break
176
205
177 chunks.append(bytes(b[0:count]))
206 chunks.append(bytes(b[0:count]))
178
207
179 self.assertEqual(b''.join(chunks), ref_frame)
208 self.assertEqual(b"".join(chunks), ref_frame)
180
209
181 @hypothesis.settings(
210 @hypothesis.settings(
182 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
211 suppress_health_check=[
183 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
212 hypothesis.HealthCheck.large_base_example,
184 level=strategies.integers(min_value=1, max_value=5),
213 hypothesis.HealthCheck.too_slow,
185 source_read_size=strategies.integers(1, 16384),
214 ]
186 read_sizes=strategies.data())
215 )
187 def test_stream_source_readinto_variance(self, original, level,
216 @hypothesis.given(
188 source_read_size, read_sizes):
217 original=strategies.sampled_from(random_input_data()),
218 level=strategies.integers(min_value=1, max_value=5),
219 source_read_size=strategies.integers(1, 16384),
220 read_sizes=strategies.data(),
221 )
222 def test_stream_source_readinto_variance(
223 self, original, level, source_read_size, read_sizes
224 ):
189 refctx = zstd.ZstdCompressor(level=level)
225 refctx = zstd.ZstdCompressor(level=level)
190 ref_frame = refctx.compress(original)
226 ref_frame = refctx.compress(original)
191
227
192 cctx = zstd.ZstdCompressor(level=level)
228 cctx = zstd.ZstdCompressor(level=level)
193 with cctx.stream_reader(io.BytesIO(original), size=len(original),
229 with cctx.stream_reader(
194 read_size=source_read_size) as reader:
230 io.BytesIO(original), size=len(original), read_size=source_read_size
231 ) as reader:
195 chunks = []
232 chunks = []
196 while True:
233 while True:
197 read_size = read_sizes.draw(strategies.integers(1, 16384))
234 read_size = read_sizes.draw(strategies.integers(1, 16384))
198 b = bytearray(read_size)
235 b = bytearray(read_size)
199 count = reader.readinto(b)
236 count = reader.readinto(b)
200
237
201 if not count:
238 if not count:
202 break
239 break
203
240
204 chunks.append(bytes(b[0:count]))
241 chunks.append(bytes(b[0:count]))
205
242
206 self.assertEqual(b''.join(chunks), ref_frame)
243 self.assertEqual(b"".join(chunks), ref_frame)
207
244
208 @hypothesis.settings(
245 @hypothesis.settings(
209 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
246 suppress_health_check=[
210 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
247 hypothesis.HealthCheck.large_base_example,
211 level=strategies.integers(min_value=1, max_value=5),
248 hypothesis.HealthCheck.too_slow,
212 source_read_size=strategies.integers(1, 16384),
249 ]
213 read_sizes=strategies.data())
250 )
214 def test_buffer_source_readinto_variance(self, original, level,
251 @hypothesis.given(
215 source_read_size, read_sizes):
252 original=strategies.sampled_from(random_input_data()),
253 level=strategies.integers(min_value=1, max_value=5),
254 source_read_size=strategies.integers(1, 16384),
255 read_sizes=strategies.data(),
256 )
257 def test_buffer_source_readinto_variance(
258 self, original, level, source_read_size, read_sizes
259 ):
216
260
217 refctx = zstd.ZstdCompressor(level=level)
261 refctx = zstd.ZstdCompressor(level=level)
218 ref_frame = refctx.compress(original)
262 ref_frame = refctx.compress(original)
219
263
220 cctx = zstd.ZstdCompressor(level=level)
264 cctx = zstd.ZstdCompressor(level=level)
221 with cctx.stream_reader(original, size=len(original),
265 with cctx.stream_reader(
222 read_size=source_read_size) as reader:
266 original, size=len(original), read_size=source_read_size
267 ) as reader:
223 chunks = []
268 chunks = []
224 while True:
269 while True:
225 read_size = read_sizes.draw(strategies.integers(1, 16384))
270 read_size = read_sizes.draw(strategies.integers(1, 16384))
226 b = bytearray(read_size)
271 b = bytearray(read_size)
227 count = reader.readinto(b)
272 count = reader.readinto(b)
228
273
229 if not count:
274 if not count:
230 break
275 break
231
276
232 chunks.append(bytes(b[0:count]))
277 chunks.append(bytes(b[0:count]))
233
278
234 self.assertEqual(b''.join(chunks), ref_frame)
279 self.assertEqual(b"".join(chunks), ref_frame)
235
280
236 @hypothesis.settings(
281 @hypothesis.settings(
237 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
282 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
238 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
283 )
239 level=strategies.integers(min_value=1, max_value=5),
284 @hypothesis.given(
240 source_read_size=strategies.integers(1, 16384),
285 original=strategies.sampled_from(random_input_data()),
241 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
286 level=strategies.integers(min_value=1, max_value=5),
242 def test_stream_source_read1(self, original, level, source_read_size,
287 source_read_size=strategies.integers(1, 16384),
243 read_size):
288 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
289 )
290 def test_stream_source_read1(self, original, level, source_read_size, read_size):
244 if read_size == 0:
291 if read_size == 0:
245 read_size = -1
292 read_size = -1
246
293
247 refctx = zstd.ZstdCompressor(level=level)
294 refctx = zstd.ZstdCompressor(level=level)
248 ref_frame = refctx.compress(original)
295 ref_frame = refctx.compress(original)
249
296
250 cctx = zstd.ZstdCompressor(level=level)
297 cctx = zstd.ZstdCompressor(level=level)
251 with cctx.stream_reader(io.BytesIO(original), size=len(original),
298 with cctx.stream_reader(
252 read_size=source_read_size) as reader:
299 io.BytesIO(original), size=len(original), read_size=source_read_size
300 ) as reader:
253 chunks = []
301 chunks = []
254 while True:
302 while True:
255 chunk = reader.read1(read_size)
303 chunk = reader.read1(read_size)
256 if not chunk:
304 if not chunk:
257 break
305 break
258
306
259 chunks.append(chunk)
307 chunks.append(chunk)
260
308
261 self.assertEqual(b''.join(chunks), ref_frame)
309 self.assertEqual(b"".join(chunks), ref_frame)
262
310
263 @hypothesis.settings(
311 @hypothesis.settings(
264 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
312 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
265 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
313 )
266 level=strategies.integers(min_value=1, max_value=5),
314 @hypothesis.given(
267 source_read_size=strategies.integers(1, 16384),
315 original=strategies.sampled_from(random_input_data()),
268 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
316 level=strategies.integers(min_value=1, max_value=5),
269 def test_buffer_source_read1(self, original, level, source_read_size,
317 source_read_size=strategies.integers(1, 16384),
270 read_size):
318 read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
319 )
320 def test_buffer_source_read1(self, original, level, source_read_size, read_size):
271 if read_size == 0:
321 if read_size == 0:
272 read_size = -1
322 read_size = -1
273
323
274 refctx = zstd.ZstdCompressor(level=level)
324 refctx = zstd.ZstdCompressor(level=level)
275 ref_frame = refctx.compress(original)
325 ref_frame = refctx.compress(original)
276
326
277 cctx = zstd.ZstdCompressor(level=level)
327 cctx = zstd.ZstdCompressor(level=level)
278 with cctx.stream_reader(original, size=len(original),
328 with cctx.stream_reader(
279 read_size=source_read_size) as reader:
329 original, size=len(original), read_size=source_read_size
330 ) as reader:
280 chunks = []
331 chunks = []
281 while True:
332 while True:
282 chunk = reader.read1(read_size)
333 chunk = reader.read1(read_size)
283 if not chunk:
334 if not chunk:
284 break
335 break
285
336
286 chunks.append(chunk)
337 chunks.append(chunk)
287
338
288 self.assertEqual(b''.join(chunks), ref_frame)
339 self.assertEqual(b"".join(chunks), ref_frame)
289
340
290 @hypothesis.settings(
341 @hypothesis.settings(
291 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
342 suppress_health_check=[
292 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
343 hypothesis.HealthCheck.large_base_example,
293 level=strategies.integers(min_value=1, max_value=5),
344 hypothesis.HealthCheck.too_slow,
294 source_read_size=strategies.integers(1, 16384),
345 ]
295 read_sizes=strategies.data())
346 )
296 def test_stream_source_read1_variance(self, original, level, source_read_size,
347 @hypothesis.given(
297 read_sizes):
348 original=strategies.sampled_from(random_input_data()),
349 level=strategies.integers(min_value=1, max_value=5),
350 source_read_size=strategies.integers(1, 16384),
351 read_sizes=strategies.data(),
352 )
353 def test_stream_source_read1_variance(
354 self, original, level, source_read_size, read_sizes
355 ):
298 refctx = zstd.ZstdCompressor(level=level)
356 refctx = zstd.ZstdCompressor(level=level)
299 ref_frame = refctx.compress(original)
357 ref_frame = refctx.compress(original)
300
358
301 cctx = zstd.ZstdCompressor(level=level)
359 cctx = zstd.ZstdCompressor(level=level)
302 with cctx.stream_reader(io.BytesIO(original), size=len(original),
360 with cctx.stream_reader(
303 read_size=source_read_size) as reader:
361 io.BytesIO(original), size=len(original), read_size=source_read_size
362 ) as reader:
304 chunks = []
363 chunks = []
305 while True:
364 while True:
306 read_size = read_sizes.draw(strategies.integers(-1, 16384))
365 read_size = read_sizes.draw(strategies.integers(-1, 16384))
307 chunk = reader.read1(read_size)
366 chunk = reader.read1(read_size)
308 if not chunk and read_size:
367 if not chunk and read_size:
309 break
368 break
310
369
311 chunks.append(chunk)
370 chunks.append(chunk)
312
371
313 self.assertEqual(b''.join(chunks), ref_frame)
372 self.assertEqual(b"".join(chunks), ref_frame)
314
373
315 @hypothesis.settings(
374 @hypothesis.settings(
316 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
375 suppress_health_check=[
317 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
376 hypothesis.HealthCheck.large_base_example,
318 level=strategies.integers(min_value=1, max_value=5),
377 hypothesis.HealthCheck.too_slow,
319 source_read_size=strategies.integers(1, 16384),
378 ]
320 read_sizes=strategies.data())
379 )
321 def test_buffer_source_read1_variance(self, original, level, source_read_size,
380 @hypothesis.given(
322 read_sizes):
381 original=strategies.sampled_from(random_input_data()),
382 level=strategies.integers(min_value=1, max_value=5),
383 source_read_size=strategies.integers(1, 16384),
384 read_sizes=strategies.data(),
385 )
386 def test_buffer_source_read1_variance(
387 self, original, level, source_read_size, read_sizes
388 ):
323
389
324 refctx = zstd.ZstdCompressor(level=level)
390 refctx = zstd.ZstdCompressor(level=level)
325 ref_frame = refctx.compress(original)
391 ref_frame = refctx.compress(original)
326
392
327 cctx = zstd.ZstdCompressor(level=level)
393 cctx = zstd.ZstdCompressor(level=level)
328 with cctx.stream_reader(original, size=len(original),
394 with cctx.stream_reader(
329 read_size=source_read_size) as reader:
395 original, size=len(original), read_size=source_read_size
396 ) as reader:
330 chunks = []
397 chunks = []
331 while True:
398 while True:
332 read_size = read_sizes.draw(strategies.integers(-1, 16384))
399 read_size = read_sizes.draw(strategies.integers(-1, 16384))
333 chunk = reader.read1(read_size)
400 chunk = reader.read1(read_size)
334 if not chunk and read_size:
401 if not chunk and read_size:
335 break
402 break
336
403
337 chunks.append(chunk)
404 chunks.append(chunk)
338
405
339 self.assertEqual(b''.join(chunks), ref_frame)
406 self.assertEqual(b"".join(chunks), ref_frame)
340
341
407
342 @hypothesis.settings(
408 @hypothesis.settings(
343 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
409 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
344 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
410 )
345 level=strategies.integers(min_value=1, max_value=5),
411 @hypothesis.given(
346 source_read_size=strategies.integers(1, 16384),
412 original=strategies.sampled_from(random_input_data()),
347 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
413 level=strategies.integers(min_value=1, max_value=5),
348 def test_stream_source_readinto1(self, original, level, source_read_size,
414 source_read_size=strategies.integers(1, 16384),
349 read_size):
415 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
416 )
417 def test_stream_source_readinto1(
418 self, original, level, source_read_size, read_size
419 ):
350 if read_size == 0:
420 if read_size == 0:
351 read_size = -1
421 read_size = -1
352
422
353 refctx = zstd.ZstdCompressor(level=level)
423 refctx = zstd.ZstdCompressor(level=level)
354 ref_frame = refctx.compress(original)
424 ref_frame = refctx.compress(original)
355
425
356 cctx = zstd.ZstdCompressor(level=level)
426 cctx = zstd.ZstdCompressor(level=level)
357 with cctx.stream_reader(io.BytesIO(original), size=len(original),
427 with cctx.stream_reader(
358 read_size=source_read_size) as reader:
428 io.BytesIO(original), size=len(original), read_size=source_read_size
429 ) as reader:
359 chunks = []
430 chunks = []
360 while True:
431 while True:
361 b = bytearray(read_size)
432 b = bytearray(read_size)
362 count = reader.readinto1(b)
433 count = reader.readinto1(b)
363
434
364 if not count:
435 if not count:
365 break
436 break
366
437
367 chunks.append(bytes(b[0:count]))
438 chunks.append(bytes(b[0:count]))
368
439
369 self.assertEqual(b''.join(chunks), ref_frame)
440 self.assertEqual(b"".join(chunks), ref_frame)
370
441
371 @hypothesis.settings(
442 @hypothesis.settings(
372 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
443 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
373 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
444 )
374 level=strategies.integers(min_value=1, max_value=5),
445 @hypothesis.given(
375 source_read_size=strategies.integers(1, 16384),
446 original=strategies.sampled_from(random_input_data()),
376 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE))
447 level=strategies.integers(min_value=1, max_value=5),
377 def test_buffer_source_readinto1(self, original, level, source_read_size,
448 source_read_size=strategies.integers(1, 16384),
378 read_size):
449 read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE),
450 )
451 def test_buffer_source_readinto1(
452 self, original, level, source_read_size, read_size
453 ):
379 if read_size == 0:
454 if read_size == 0:
380 read_size = -1
455 read_size = -1
381
456
382 refctx = zstd.ZstdCompressor(level=level)
457 refctx = zstd.ZstdCompressor(level=level)
383 ref_frame = refctx.compress(original)
458 ref_frame = refctx.compress(original)
384
459
385 cctx = zstd.ZstdCompressor(level=level)
460 cctx = zstd.ZstdCompressor(level=level)
386 with cctx.stream_reader(original, size=len(original),
461 with cctx.stream_reader(
387 read_size=source_read_size) as reader:
462 original, size=len(original), read_size=source_read_size
463 ) as reader:
388 chunks = []
464 chunks = []
389 while True:
465 while True:
390 b = bytearray(read_size)
466 b = bytearray(read_size)
391 count = reader.readinto1(b)
467 count = reader.readinto1(b)
392
468
393 if not count:
469 if not count:
394 break
470 break
395
471
396 chunks.append(bytes(b[0:count]))
472 chunks.append(bytes(b[0:count]))
397
473
398 self.assertEqual(b''.join(chunks), ref_frame)
474 self.assertEqual(b"".join(chunks), ref_frame)
399
475
400 @hypothesis.settings(
476 @hypothesis.settings(
401 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
477 suppress_health_check=[
402 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
478 hypothesis.HealthCheck.large_base_example,
403 level=strategies.integers(min_value=1, max_value=5),
479 hypothesis.HealthCheck.too_slow,
404 source_read_size=strategies.integers(1, 16384),
480 ]
405 read_sizes=strategies.data())
481 )
406 def test_stream_source_readinto1_variance(self, original, level, source_read_size,
482 @hypothesis.given(
407 read_sizes):
483 original=strategies.sampled_from(random_input_data()),
484 level=strategies.integers(min_value=1, max_value=5),
485 source_read_size=strategies.integers(1, 16384),
486 read_sizes=strategies.data(),
487 )
488 def test_stream_source_readinto1_variance(
489 self, original, level, source_read_size, read_sizes
490 ):
408 refctx = zstd.ZstdCompressor(level=level)
491 refctx = zstd.ZstdCompressor(level=level)
409 ref_frame = refctx.compress(original)
492 ref_frame = refctx.compress(original)
410
493
411 cctx = zstd.ZstdCompressor(level=level)
494 cctx = zstd.ZstdCompressor(level=level)
412 with cctx.stream_reader(io.BytesIO(original), size=len(original),
495 with cctx.stream_reader(
413 read_size=source_read_size) as reader:
496 io.BytesIO(original), size=len(original), read_size=source_read_size
497 ) as reader:
414 chunks = []
498 chunks = []
415 while True:
499 while True:
416 read_size = read_sizes.draw(strategies.integers(1, 16384))
500 read_size = read_sizes.draw(strategies.integers(1, 16384))
417 b = bytearray(read_size)
501 b = bytearray(read_size)
418 count = reader.readinto1(b)
502 count = reader.readinto1(b)
419
503
420 if not count:
504 if not count:
421 break
505 break
422
506
423 chunks.append(bytes(b[0:count]))
507 chunks.append(bytes(b[0:count]))
424
508
425 self.assertEqual(b''.join(chunks), ref_frame)
509 self.assertEqual(b"".join(chunks), ref_frame)
426
510
427 @hypothesis.settings(
511 @hypothesis.settings(
428 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
512 suppress_health_check=[
429 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
513 hypothesis.HealthCheck.large_base_example,
430 level=strategies.integers(min_value=1, max_value=5),
514 hypothesis.HealthCheck.too_slow,
431 source_read_size=strategies.integers(1, 16384),
515 ]
432 read_sizes=strategies.data())
516 )
433 def test_buffer_source_readinto1_variance(self, original, level, source_read_size,
517 @hypothesis.given(
434 read_sizes):
518 original=strategies.sampled_from(random_input_data()),
519 level=strategies.integers(min_value=1, max_value=5),
520 source_read_size=strategies.integers(1, 16384),
521 read_sizes=strategies.data(),
522 )
523 def test_buffer_source_readinto1_variance(
524 self, original, level, source_read_size, read_sizes
525 ):
435
526
436 refctx = zstd.ZstdCompressor(level=level)
527 refctx = zstd.ZstdCompressor(level=level)
437 ref_frame = refctx.compress(original)
528 ref_frame = refctx.compress(original)
438
529
439 cctx = zstd.ZstdCompressor(level=level)
530 cctx = zstd.ZstdCompressor(level=level)
440 with cctx.stream_reader(original, size=len(original),
531 with cctx.stream_reader(
441 read_size=source_read_size) as reader:
532 original, size=len(original), read_size=source_read_size
533 ) as reader:
442 chunks = []
534 chunks = []
443 while True:
535 while True:
444 read_size = read_sizes.draw(strategies.integers(1, 16384))
536 read_size = read_sizes.draw(strategies.integers(1, 16384))
445 b = bytearray(read_size)
537 b = bytearray(read_size)
446 count = reader.readinto1(b)
538 count = reader.readinto1(b)
447
539
448 if not count:
540 if not count:
449 break
541 break
450
542
451 chunks.append(bytes(b[0:count]))
543 chunks.append(bytes(b[0:count]))
452
544
453 self.assertEqual(b''.join(chunks), ref_frame)
545 self.assertEqual(b"".join(chunks), ref_frame)
454
455
546
456
547
457 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
548 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
458 @make_cffi
549 @make_cffi
459 class TestCompressor_stream_writer_fuzzing(unittest.TestCase):
550 class TestCompressor_stream_writer_fuzzing(TestCase):
460 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
551 @hypothesis.given(
461 level=strategies.integers(min_value=1, max_value=5),
552 original=strategies.sampled_from(random_input_data()),
462 write_size=strategies.integers(min_value=1, max_value=1048576))
553 level=strategies.integers(min_value=1, max_value=5),
554 write_size=strategies.integers(min_value=1, max_value=1048576),
555 )
463 def test_write_size_variance(self, original, level, write_size):
556 def test_write_size_variance(self, original, level, write_size):
464 refctx = zstd.ZstdCompressor(level=level)
557 refctx = zstd.ZstdCompressor(level=level)
465 ref_frame = refctx.compress(original)
558 ref_frame = refctx.compress(original)
466
559
467 cctx = zstd.ZstdCompressor(level=level)
560 cctx = zstd.ZstdCompressor(level=level)
468 b = NonClosingBytesIO()
561 b = NonClosingBytesIO()
469 with cctx.stream_writer(b, size=len(original), write_size=write_size) as compressor:
562 with cctx.stream_writer(
563 b, size=len(original), write_size=write_size
564 ) as compressor:
470 compressor.write(original)
565 compressor.write(original)
471
566
472 self.assertEqual(b.getvalue(), ref_frame)
567 self.assertEqual(b.getvalue(), ref_frame)
473
568
474
569
475 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
570 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
476 @make_cffi
571 @make_cffi
477 class TestCompressor_copy_stream_fuzzing(unittest.TestCase):
572 class TestCompressor_copy_stream_fuzzing(TestCase):
478 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
573 @hypothesis.given(
479 level=strategies.integers(min_value=1, max_value=5),
574 original=strategies.sampled_from(random_input_data()),
480 read_size=strategies.integers(min_value=1, max_value=1048576),
575 level=strategies.integers(min_value=1, max_value=5),
481 write_size=strategies.integers(min_value=1, max_value=1048576))
576 read_size=strategies.integers(min_value=1, max_value=1048576),
577 write_size=strategies.integers(min_value=1, max_value=1048576),
578 )
482 def test_read_write_size_variance(self, original, level, read_size, write_size):
579 def test_read_write_size_variance(self, original, level, read_size, write_size):
483 refctx = zstd.ZstdCompressor(level=level)
580 refctx = zstd.ZstdCompressor(level=level)
484 ref_frame = refctx.compress(original)
581 ref_frame = refctx.compress(original)
485
582
486 cctx = zstd.ZstdCompressor(level=level)
583 cctx = zstd.ZstdCompressor(level=level)
487 source = io.BytesIO(original)
584 source = io.BytesIO(original)
488 dest = io.BytesIO()
585 dest = io.BytesIO()
489
586
490 cctx.copy_stream(source, dest, size=len(original), read_size=read_size,
587 cctx.copy_stream(
491 write_size=write_size)
588 source, dest, size=len(original), read_size=read_size, write_size=write_size
589 )
492
590
493 self.assertEqual(dest.getvalue(), ref_frame)
591 self.assertEqual(dest.getvalue(), ref_frame)
494
592
495
593
496 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
594 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
497 @make_cffi
595 @make_cffi
498 class TestCompressor_compressobj_fuzzing(unittest.TestCase):
596 class TestCompressor_compressobj_fuzzing(TestCase):
499 @hypothesis.settings(
597 @hypothesis.settings(
500 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
598 suppress_health_check=[
501 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
599 hypothesis.HealthCheck.large_base_example,
502 level=strategies.integers(min_value=1, max_value=5),
600 hypothesis.HealthCheck.too_slow,
503 chunk_sizes=strategies.data())
601 ]
602 )
603 @hypothesis.given(
604 original=strategies.sampled_from(random_input_data()),
605 level=strategies.integers(min_value=1, max_value=5),
606 chunk_sizes=strategies.data(),
607 )
504 def test_random_input_sizes(self, original, level, chunk_sizes):
608 def test_random_input_sizes(self, original, level, chunk_sizes):
505 refctx = zstd.ZstdCompressor(level=level)
609 refctx = zstd.ZstdCompressor(level=level)
506 ref_frame = refctx.compress(original)
610 ref_frame = refctx.compress(original)
507
611
508 cctx = zstd.ZstdCompressor(level=level)
612 cctx = zstd.ZstdCompressor(level=level)
509 cobj = cctx.compressobj(size=len(original))
613 cobj = cctx.compressobj(size=len(original))
510
614
511 chunks = []
615 chunks = []
512 i = 0
616 i = 0
513 while True:
617 while True:
514 chunk_size = chunk_sizes.draw(strategies.integers(1, 4096))
618 chunk_size = chunk_sizes.draw(strategies.integers(1, 4096))
515 source = original[i:i + chunk_size]
619 source = original[i : i + chunk_size]
516 if not source:
620 if not source:
517 break
621 break
518
622
519 chunks.append(cobj.compress(source))
623 chunks.append(cobj.compress(source))
520 i += chunk_size
624 i += chunk_size
521
625
522 chunks.append(cobj.flush())
626 chunks.append(cobj.flush())
523
627
524 self.assertEqual(b''.join(chunks), ref_frame)
628 self.assertEqual(b"".join(chunks), ref_frame)
525
629
526 @hypothesis.settings(
630 @hypothesis.settings(
527 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
631 suppress_health_check=[
528 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
632 hypothesis.HealthCheck.large_base_example,
529 level=strategies.integers(min_value=1, max_value=5),
633 hypothesis.HealthCheck.too_slow,
530 chunk_sizes=strategies.data(),
634 ]
531 flushes=strategies.data())
635 )
636 @hypothesis.given(
637 original=strategies.sampled_from(random_input_data()),
638 level=strategies.integers(min_value=1, max_value=5),
639 chunk_sizes=strategies.data(),
640 flushes=strategies.data(),
641 )
532 def test_flush_block(self, original, level, chunk_sizes, flushes):
642 def test_flush_block(self, original, level, chunk_sizes, flushes):
533 cctx = zstd.ZstdCompressor(level=level)
643 cctx = zstd.ZstdCompressor(level=level)
534 cobj = cctx.compressobj()
644 cobj = cctx.compressobj()
535
645
536 dctx = zstd.ZstdDecompressor()
646 dctx = zstd.ZstdDecompressor()
537 dobj = dctx.decompressobj()
647 dobj = dctx.decompressobj()
538
648
539 compressed_chunks = []
649 compressed_chunks = []
540 decompressed_chunks = []
650 decompressed_chunks = []
541 i = 0
651 i = 0
542 while True:
652 while True:
543 input_size = chunk_sizes.draw(strategies.integers(1, 4096))
653 input_size = chunk_sizes.draw(strategies.integers(1, 4096))
544 source = original[i:i + input_size]
654 source = original[i : i + input_size]
545 if not source:
655 if not source:
546 break
656 break
547
657
548 i += input_size
658 i += input_size
549
659
550 chunk = cobj.compress(source)
660 chunk = cobj.compress(source)
551 compressed_chunks.append(chunk)
661 compressed_chunks.append(chunk)
552 decompressed_chunks.append(dobj.decompress(chunk))
662 decompressed_chunks.append(dobj.decompress(chunk))
553
663
554 if not flushes.draw(strategies.booleans()):
664 if not flushes.draw(strategies.booleans()):
555 continue
665 continue
556
666
557 chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
667 chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
558 compressed_chunks.append(chunk)
668 compressed_chunks.append(chunk)
559 decompressed_chunks.append(dobj.decompress(chunk))
669 decompressed_chunks.append(dobj.decompress(chunk))
560
670
561 self.assertEqual(b''.join(decompressed_chunks), original[0:i])
671 self.assertEqual(b"".join(decompressed_chunks), original[0:i])
562
672
563 chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_FINISH)
673 chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_FINISH)
564 compressed_chunks.append(chunk)
674 compressed_chunks.append(chunk)
565 decompressed_chunks.append(dobj.decompress(chunk))
675 decompressed_chunks.append(dobj.decompress(chunk))
566
676
567 self.assertEqual(dctx.decompress(b''.join(compressed_chunks),
677 self.assertEqual(
568 max_output_size=len(original)),
678 dctx.decompress(b"".join(compressed_chunks), max_output_size=len(original)),
569 original)
679 original,
570 self.assertEqual(b''.join(decompressed_chunks), original)
680 )
681 self.assertEqual(b"".join(decompressed_chunks), original)
682
571
683
572 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
684 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
573 @make_cffi
685 @make_cffi
574 class TestCompressor_read_to_iter_fuzzing(unittest.TestCase):
686 class TestCompressor_read_to_iter_fuzzing(TestCase):
575 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
687 @hypothesis.given(
576 level=strategies.integers(min_value=1, max_value=5),
688 original=strategies.sampled_from(random_input_data()),
577 read_size=strategies.integers(min_value=1, max_value=4096),
689 level=strategies.integers(min_value=1, max_value=5),
578 write_size=strategies.integers(min_value=1, max_value=4096))
690 read_size=strategies.integers(min_value=1, max_value=4096),
691 write_size=strategies.integers(min_value=1, max_value=4096),
692 )
579 def test_read_write_size_variance(self, original, level, read_size, write_size):
693 def test_read_write_size_variance(self, original, level, read_size, write_size):
580 refcctx = zstd.ZstdCompressor(level=level)
694 refcctx = zstd.ZstdCompressor(level=level)
581 ref_frame = refcctx.compress(original)
695 ref_frame = refcctx.compress(original)
582
696
583 source = io.BytesIO(original)
697 source = io.BytesIO(original)
584
698
585 cctx = zstd.ZstdCompressor(level=level)
699 cctx = zstd.ZstdCompressor(level=level)
586 chunks = list(cctx.read_to_iter(source, size=len(original),
700 chunks = list(
587 read_size=read_size,
701 cctx.read_to_iter(
588 write_size=write_size))
702 source, size=len(original), read_size=read_size, write_size=write_size
703 )
704 )
589
705
590 self.assertEqual(b''.join(chunks), ref_frame)
706 self.assertEqual(b"".join(chunks), ref_frame)
591
707
592
708
593 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
709 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
594 class TestCompressor_multi_compress_to_buffer_fuzzing(unittest.TestCase):
710 class TestCompressor_multi_compress_to_buffer_fuzzing(TestCase):
595 @hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()),
711 @hypothesis.given(
596 min_size=1, max_size=1024),
712 original=strategies.lists(
597 threads=strategies.integers(min_value=1, max_value=8),
713 strategies.sampled_from(random_input_data()), min_size=1, max_size=1024
598 use_dict=strategies.booleans())
714 ),
715 threads=strategies.integers(min_value=1, max_value=8),
716 use_dict=strategies.booleans(),
717 )
599 def test_data_equivalence(self, original, threads, use_dict):
718 def test_data_equivalence(self, original, threads, use_dict):
600 kwargs = {}
719 kwargs = {}
601
720
602 # Use a content dictionary because it is cheap to create.
721 # Use a content dictionary because it is cheap to create.
603 if use_dict:
722 if use_dict:
604 kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0])
723 kwargs["dict_data"] = zstd.ZstdCompressionDict(original[0])
605
724
606 cctx = zstd.ZstdCompressor(level=1,
725 cctx = zstd.ZstdCompressor(level=1, write_checksum=True, **kwargs)
607 write_checksum=True,
608 **kwargs)
609
726
610 if not hasattr(cctx, 'multi_compress_to_buffer'):
727 if not hasattr(cctx, "multi_compress_to_buffer"):
611 self.skipTest('multi_compress_to_buffer not available')
728 self.skipTest("multi_compress_to_buffer not available")
612
729
613 result = cctx.multi_compress_to_buffer(original, threads=-1)
730 result = cctx.multi_compress_to_buffer(original, threads=-1)
614
731
615 self.assertEqual(len(result), len(original))
732 self.assertEqual(len(result), len(original))
616
733
617 # The frame produced via the batch APIs may not be bit identical to that
734 # The frame produced via the batch APIs may not be bit identical to that
618 # produced by compress() because compression parameters are adjusted
735 # produced by compress() because compression parameters are adjusted
619 # from the first input in batch mode. So the only thing we can do is
736 # from the first input in batch mode. So the only thing we can do is
620 # verify the decompressed data matches the input.
737 # verify the decompressed data matches the input.
621 dctx = zstd.ZstdDecompressor(**kwargs)
738 dctx = zstd.ZstdDecompressor(**kwargs)
622
739
623 for i, frame in enumerate(result):
740 for i, frame in enumerate(result):
624 self.assertEqual(dctx.decompress(frame), original[i])
741 self.assertEqual(dctx.decompress(frame), original[i])
625
742
626
743
627 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
744 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
628 @make_cffi
745 @make_cffi
629 class TestCompressor_chunker_fuzzing(unittest.TestCase):
746 class TestCompressor_chunker_fuzzing(TestCase):
630 @hypothesis.settings(
747 @hypothesis.settings(
631 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
748 suppress_health_check=[
632 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
749 hypothesis.HealthCheck.large_base_example,
633 level=strategies.integers(min_value=1, max_value=5),
750 hypothesis.HealthCheck.too_slow,
634 chunk_size=strategies.integers(
751 ]
635 min_value=1,
752 )
636 max_value=32 * 1048576),
753 @hypothesis.given(
637 input_sizes=strategies.data())
754 original=strategies.sampled_from(random_input_data()),
755 level=strategies.integers(min_value=1, max_value=5),
756 chunk_size=strategies.integers(min_value=1, max_value=32 * 1048576),
757 input_sizes=strategies.data(),
758 )
638 def test_random_input_sizes(self, original, level, chunk_size, input_sizes):
759 def test_random_input_sizes(self, original, level, chunk_size, input_sizes):
639 cctx = zstd.ZstdCompressor(level=level)
760 cctx = zstd.ZstdCompressor(level=level)
640 chunker = cctx.chunker(chunk_size=chunk_size)
761 chunker = cctx.chunker(chunk_size=chunk_size)
641
762
642 chunks = []
763 chunks = []
643 i = 0
764 i = 0
644 while True:
765 while True:
645 input_size = input_sizes.draw(strategies.integers(1, 4096))
766 input_size = input_sizes.draw(strategies.integers(1, 4096))
646 source = original[i:i + input_size]
767 source = original[i : i + input_size]
647 if not source:
768 if not source:
648 break
769 break
649
770
650 chunks.extend(chunker.compress(source))
771 chunks.extend(chunker.compress(source))
651 i += input_size
772 i += input_size
652
773
653 chunks.extend(chunker.finish())
774 chunks.extend(chunker.finish())
654
775
655 dctx = zstd.ZstdDecompressor()
776 dctx = zstd.ZstdDecompressor()
656
777
657 self.assertEqual(dctx.decompress(b''.join(chunks),
778 self.assertEqual(
658 max_output_size=len(original)),
779 dctx.decompress(b"".join(chunks), max_output_size=len(original)), original
659 original)
780 )
660
781
661 self.assertTrue(all(len(chunk) == chunk_size for chunk in chunks[:-1]))
782 self.assertTrue(all(len(chunk) == chunk_size for chunk in chunks[:-1]))
662
783
663 @hypothesis.settings(
784 @hypothesis.settings(
664 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
785 suppress_health_check=[
665 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
786 hypothesis.HealthCheck.large_base_example,
666 level=strategies.integers(min_value=1, max_value=5),
787 hypothesis.HealthCheck.too_slow,
667 chunk_size=strategies.integers(
788 ]
668 min_value=1,
789 )
669 max_value=32 * 1048576),
790 @hypothesis.given(
670 input_sizes=strategies.data(),
791 original=strategies.sampled_from(random_input_data()),
671 flushes=strategies.data())
792 level=strategies.integers(min_value=1, max_value=5),
672 def test_flush_block(self, original, level, chunk_size, input_sizes,
793 chunk_size=strategies.integers(min_value=1, max_value=32 * 1048576),
673 flushes):
794 input_sizes=strategies.data(),
795 flushes=strategies.data(),
796 )
797 def test_flush_block(self, original, level, chunk_size, input_sizes, flushes):
674 cctx = zstd.ZstdCompressor(level=level)
798 cctx = zstd.ZstdCompressor(level=level)
675 chunker = cctx.chunker(chunk_size=chunk_size)
799 chunker = cctx.chunker(chunk_size=chunk_size)
676
800
677 dctx = zstd.ZstdDecompressor()
801 dctx = zstd.ZstdDecompressor()
678 dobj = dctx.decompressobj()
802 dobj = dctx.decompressobj()
679
803
680 compressed_chunks = []
804 compressed_chunks = []
681 decompressed_chunks = []
805 decompressed_chunks = []
682 i = 0
806 i = 0
683 while True:
807 while True:
684 input_size = input_sizes.draw(strategies.integers(1, 4096))
808 input_size = input_sizes.draw(strategies.integers(1, 4096))
685 source = original[i:i + input_size]
809 source = original[i : i + input_size]
686 if not source:
810 if not source:
687 break
811 break
688
812
689 i += input_size
813 i += input_size
690
814
691 chunks = list(chunker.compress(source))
815 chunks = list(chunker.compress(source))
692 compressed_chunks.extend(chunks)
816 compressed_chunks.extend(chunks)
693 decompressed_chunks.append(dobj.decompress(b''.join(chunks)))
817 decompressed_chunks.append(dobj.decompress(b"".join(chunks)))
694
818
695 if not flushes.draw(strategies.booleans()):
819 if not flushes.draw(strategies.booleans()):
696 continue
820 continue
697
821
698 chunks = list(chunker.flush())
822 chunks = list(chunker.flush())
699 compressed_chunks.extend(chunks)
823 compressed_chunks.extend(chunks)
700 decompressed_chunks.append(dobj.decompress(b''.join(chunks)))
824 decompressed_chunks.append(dobj.decompress(b"".join(chunks)))
701
825
702 self.assertEqual(b''.join(decompressed_chunks), original[0:i])
826 self.assertEqual(b"".join(decompressed_chunks), original[0:i])
703
827
704 chunks = list(chunker.finish())
828 chunks = list(chunker.finish())
705 compressed_chunks.extend(chunks)
829 compressed_chunks.extend(chunks)
706 decompressed_chunks.append(dobj.decompress(b''.join(chunks)))
830 decompressed_chunks.append(dobj.decompress(b"".join(chunks)))
707
831
708 self.assertEqual(dctx.decompress(b''.join(compressed_chunks),
832 self.assertEqual(
709 max_output_size=len(original)),
833 dctx.decompress(b"".join(compressed_chunks), max_output_size=len(original)),
710 original)
834 original,
711 self.assertEqual(b''.join(decompressed_chunks), original) No newline at end of file
835 )
836 self.assertEqual(b"".join(decompressed_chunks), original)
@@ -1,228 +1,241 b''
1 import sys
1 import sys
2 import unittest
2 import unittest
3
3
4 import zstandard as zstd
4 import zstandard as zstd
5
5
6 from . common import (
6 from .common import (
7 make_cffi,
7 make_cffi,
8 TestCase,
8 )
9 )
9
10
10
11
11 @make_cffi
12 @make_cffi
12 class TestCompressionParameters(unittest.TestCase):
13 class TestCompressionParameters(TestCase):
13 def test_bounds(self):
14 def test_bounds(self):
14 zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MIN,
15 zstd.ZstdCompressionParameters(
15 chain_log=zstd.CHAINLOG_MIN,
16 window_log=zstd.WINDOWLOG_MIN,
16 hash_log=zstd.HASHLOG_MIN,
17 chain_log=zstd.CHAINLOG_MIN,
17 search_log=zstd.SEARCHLOG_MIN,
18 hash_log=zstd.HASHLOG_MIN,
18 min_match=zstd.MINMATCH_MIN + 1,
19 search_log=zstd.SEARCHLOG_MIN,
19 target_length=zstd.TARGETLENGTH_MIN,
20 min_match=zstd.MINMATCH_MIN + 1,
20 strategy=zstd.STRATEGY_FAST)
21 target_length=zstd.TARGETLENGTH_MIN,
22 strategy=zstd.STRATEGY_FAST,
23 )
21
24
22 zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MAX,
25 zstd.ZstdCompressionParameters(
23 chain_log=zstd.CHAINLOG_MAX,
26 window_log=zstd.WINDOWLOG_MAX,
24 hash_log=zstd.HASHLOG_MAX,
27 chain_log=zstd.CHAINLOG_MAX,
25 search_log=zstd.SEARCHLOG_MAX,
28 hash_log=zstd.HASHLOG_MAX,
26 min_match=zstd.MINMATCH_MAX - 1,
29 search_log=zstd.SEARCHLOG_MAX,
27 target_length=zstd.TARGETLENGTH_MAX,
30 min_match=zstd.MINMATCH_MAX - 1,
28 strategy=zstd.STRATEGY_BTULTRA2)
31 target_length=zstd.TARGETLENGTH_MAX,
32 strategy=zstd.STRATEGY_BTULTRA2,
33 )
29
34
30 def test_from_level(self):
35 def test_from_level(self):
31 p = zstd.ZstdCompressionParameters.from_level(1)
36 p = zstd.ZstdCompressionParameters.from_level(1)
32 self.assertIsInstance(p, zstd.CompressionParameters)
37 self.assertIsInstance(p, zstd.CompressionParameters)
33
38
34 self.assertEqual(p.window_log, 19)
39 self.assertEqual(p.window_log, 19)
35
40
36 p = zstd.ZstdCompressionParameters.from_level(-4)
41 p = zstd.ZstdCompressionParameters.from_level(-4)
37 self.assertEqual(p.window_log, 19)
42 self.assertEqual(p.window_log, 19)
38
43
39 def test_members(self):
44 def test_members(self):
40 p = zstd.ZstdCompressionParameters(window_log=10,
45 p = zstd.ZstdCompressionParameters(
41 chain_log=6,
46 window_log=10,
42 hash_log=7,
47 chain_log=6,
43 search_log=4,
48 hash_log=7,
44 min_match=5,
49 search_log=4,
45 target_length=8,
50 min_match=5,
46 strategy=1)
51 target_length=8,
52 strategy=1,
53 )
47 self.assertEqual(p.window_log, 10)
54 self.assertEqual(p.window_log, 10)
48 self.assertEqual(p.chain_log, 6)
55 self.assertEqual(p.chain_log, 6)
49 self.assertEqual(p.hash_log, 7)
56 self.assertEqual(p.hash_log, 7)
50 self.assertEqual(p.search_log, 4)
57 self.assertEqual(p.search_log, 4)
51 self.assertEqual(p.min_match, 5)
58 self.assertEqual(p.min_match, 5)
52 self.assertEqual(p.target_length, 8)
59 self.assertEqual(p.target_length, 8)
53 self.assertEqual(p.compression_strategy, 1)
60 self.assertEqual(p.compression_strategy, 1)
54
61
55 p = zstd.ZstdCompressionParameters(compression_level=2)
62 p = zstd.ZstdCompressionParameters(compression_level=2)
56 self.assertEqual(p.compression_level, 2)
63 self.assertEqual(p.compression_level, 2)
57
64
58 p = zstd.ZstdCompressionParameters(threads=4)
65 p = zstd.ZstdCompressionParameters(threads=4)
59 self.assertEqual(p.threads, 4)
66 self.assertEqual(p.threads, 4)
60
67
61 p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576,
68 p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576, overlap_log=6)
62 overlap_log=6)
63 self.assertEqual(p.threads, 2)
69 self.assertEqual(p.threads, 2)
64 self.assertEqual(p.job_size, 1048576)
70 self.assertEqual(p.job_size, 1048576)
65 self.assertEqual(p.overlap_log, 6)
71 self.assertEqual(p.overlap_log, 6)
66 self.assertEqual(p.overlap_size_log, 6)
72 self.assertEqual(p.overlap_size_log, 6)
67
73
68 p = zstd.ZstdCompressionParameters(compression_level=-1)
74 p = zstd.ZstdCompressionParameters(compression_level=-1)
69 self.assertEqual(p.compression_level, -1)
75 self.assertEqual(p.compression_level, -1)
70
76
71 p = zstd.ZstdCompressionParameters(compression_level=-2)
77 p = zstd.ZstdCompressionParameters(compression_level=-2)
72 self.assertEqual(p.compression_level, -2)
78 self.assertEqual(p.compression_level, -2)
73
79
74 p = zstd.ZstdCompressionParameters(force_max_window=True)
80 p = zstd.ZstdCompressionParameters(force_max_window=True)
75 self.assertEqual(p.force_max_window, 1)
81 self.assertEqual(p.force_max_window, 1)
76
82
77 p = zstd.ZstdCompressionParameters(enable_ldm=True)
83 p = zstd.ZstdCompressionParameters(enable_ldm=True)
78 self.assertEqual(p.enable_ldm, 1)
84 self.assertEqual(p.enable_ldm, 1)
79
85
80 p = zstd.ZstdCompressionParameters(ldm_hash_log=7)
86 p = zstd.ZstdCompressionParameters(ldm_hash_log=7)
81 self.assertEqual(p.ldm_hash_log, 7)
87 self.assertEqual(p.ldm_hash_log, 7)
82
88
83 p = zstd.ZstdCompressionParameters(ldm_min_match=6)
89 p = zstd.ZstdCompressionParameters(ldm_min_match=6)
84 self.assertEqual(p.ldm_min_match, 6)
90 self.assertEqual(p.ldm_min_match, 6)
85
91
86 p = zstd.ZstdCompressionParameters(ldm_bucket_size_log=7)
92 p = zstd.ZstdCompressionParameters(ldm_bucket_size_log=7)
87 self.assertEqual(p.ldm_bucket_size_log, 7)
93 self.assertEqual(p.ldm_bucket_size_log, 7)
88
94
89 p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8)
95 p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8)
90 self.assertEqual(p.ldm_hash_every_log, 8)
96 self.assertEqual(p.ldm_hash_every_log, 8)
91 self.assertEqual(p.ldm_hash_rate_log, 8)
97 self.assertEqual(p.ldm_hash_rate_log, 8)
92
98
93 def test_estimated_compression_context_size(self):
99 def test_estimated_compression_context_size(self):
94 p = zstd.ZstdCompressionParameters(window_log=20,
100 p = zstd.ZstdCompressionParameters(
95 chain_log=16,
101 window_log=20,
96 hash_log=17,
102 chain_log=16,
97 search_log=1,
103 hash_log=17,
98 min_match=5,
104 search_log=1,
99 target_length=16,
105 min_match=5,
100 strategy=zstd.STRATEGY_DFAST)
106 target_length=16,
107 strategy=zstd.STRATEGY_DFAST,
108 )
101
109
102 # 32-bit has slightly different values from 64-bit.
110 # 32-bit has slightly different values from 64-bit.
103 self.assertAlmostEqual(p.estimated_compression_context_size(), 1294144,
111 self.assertAlmostEqual(
104 delta=250)
112 p.estimated_compression_context_size(), 1294464, delta=400
113 )
105
114
106 def test_strategy(self):
115 def test_strategy(self):
107 with self.assertRaisesRegexp(ValueError, 'cannot specify both compression_strategy'):
116 with self.assertRaisesRegex(
117 ValueError, "cannot specify both compression_strategy"
118 ):
108 zstd.ZstdCompressionParameters(strategy=0, compression_strategy=0)
119 zstd.ZstdCompressionParameters(strategy=0, compression_strategy=0)
109
120
110 p = zstd.ZstdCompressionParameters(strategy=2)
121 p = zstd.ZstdCompressionParameters(strategy=2)
111 self.assertEqual(p.compression_strategy, 2)
122 self.assertEqual(p.compression_strategy, 2)
112
123
113 p = zstd.ZstdCompressionParameters(strategy=3)
124 p = zstd.ZstdCompressionParameters(strategy=3)
114 self.assertEqual(p.compression_strategy, 3)
125 self.assertEqual(p.compression_strategy, 3)
115
126
116 def test_ldm_hash_rate_log(self):
127 def test_ldm_hash_rate_log(self):
117 with self.assertRaisesRegexp(ValueError, 'cannot specify both ldm_hash_rate_log'):
128 with self.assertRaisesRegex(
129 ValueError, "cannot specify both ldm_hash_rate_log"
130 ):
118 zstd.ZstdCompressionParameters(ldm_hash_rate_log=8, ldm_hash_every_log=4)
131 zstd.ZstdCompressionParameters(ldm_hash_rate_log=8, ldm_hash_every_log=4)
119
132
120 p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8)
133 p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8)
121 self.assertEqual(p.ldm_hash_every_log, 8)
134 self.assertEqual(p.ldm_hash_every_log, 8)
122
135
123 p = zstd.ZstdCompressionParameters(ldm_hash_every_log=16)
136 p = zstd.ZstdCompressionParameters(ldm_hash_every_log=16)
124 self.assertEqual(p.ldm_hash_every_log, 16)
137 self.assertEqual(p.ldm_hash_every_log, 16)
125
138
126 def test_overlap_log(self):
139 def test_overlap_log(self):
127 with self.assertRaisesRegexp(ValueError, 'cannot specify both overlap_log'):
140 with self.assertRaisesRegex(ValueError, "cannot specify both overlap_log"):
128 zstd.ZstdCompressionParameters(overlap_log=1, overlap_size_log=9)
141 zstd.ZstdCompressionParameters(overlap_log=1, overlap_size_log=9)
129
142
130 p = zstd.ZstdCompressionParameters(overlap_log=2)
143 p = zstd.ZstdCompressionParameters(overlap_log=2)
131 self.assertEqual(p.overlap_log, 2)
144 self.assertEqual(p.overlap_log, 2)
132 self.assertEqual(p.overlap_size_log, 2)
145 self.assertEqual(p.overlap_size_log, 2)
133
146
134 p = zstd.ZstdCompressionParameters(overlap_size_log=4)
147 p = zstd.ZstdCompressionParameters(overlap_size_log=4)
135 self.assertEqual(p.overlap_log, 4)
148 self.assertEqual(p.overlap_log, 4)
136 self.assertEqual(p.overlap_size_log, 4)
149 self.assertEqual(p.overlap_size_log, 4)
137
150
138
151
139 @make_cffi
152 @make_cffi
140 class TestFrameParameters(unittest.TestCase):
153 class TestFrameParameters(TestCase):
141 def test_invalid_type(self):
154 def test_invalid_type(self):
142 with self.assertRaises(TypeError):
155 with self.assertRaises(TypeError):
143 zstd.get_frame_parameters(None)
156 zstd.get_frame_parameters(None)
144
157
145 # Python 3 doesn't appear to convert unicode to Py_buffer.
158 # Python 3 doesn't appear to convert unicode to Py_buffer.
146 if sys.version_info[0] >= 3:
159 if sys.version_info[0] >= 3:
147 with self.assertRaises(TypeError):
160 with self.assertRaises(TypeError):
148 zstd.get_frame_parameters(u'foobarbaz')
161 zstd.get_frame_parameters(u"foobarbaz")
149 else:
162 else:
150 # CPython will convert unicode to Py_buffer. But CFFI won't.
163 # CPython will convert unicode to Py_buffer. But CFFI won't.
151 if zstd.backend == 'cffi':
164 if zstd.backend == "cffi":
152 with self.assertRaises(TypeError):
165 with self.assertRaises(TypeError):
153 zstd.get_frame_parameters(u'foobarbaz')
166 zstd.get_frame_parameters(u"foobarbaz")
154 else:
167 else:
155 with self.assertRaises(zstd.ZstdError):
168 with self.assertRaises(zstd.ZstdError):
156 zstd.get_frame_parameters(u'foobarbaz')
169 zstd.get_frame_parameters(u"foobarbaz")
157
170
158 def test_invalid_input_sizes(self):
171 def test_invalid_input_sizes(self):
159 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
172 with self.assertRaisesRegex(zstd.ZstdError, "not enough data for frame"):
160 zstd.get_frame_parameters(b'')
173 zstd.get_frame_parameters(b"")
161
174
162 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
175 with self.assertRaisesRegex(zstd.ZstdError, "not enough data for frame"):
163 zstd.get_frame_parameters(zstd.FRAME_HEADER)
176 zstd.get_frame_parameters(zstd.FRAME_HEADER)
164
177
165 def test_invalid_frame(self):
178 def test_invalid_frame(self):
166 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
179 with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"):
167 zstd.get_frame_parameters(b'foobarbaz')
180 zstd.get_frame_parameters(b"foobarbaz")
168
181
169 def test_attributes(self):
182 def test_attributes(self):
170 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00')
183 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x00\x00")
171 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
184 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
172 self.assertEqual(params.window_size, 1024)
185 self.assertEqual(params.window_size, 1024)
173 self.assertEqual(params.dict_id, 0)
186 self.assertEqual(params.dict_id, 0)
174 self.assertFalse(params.has_checksum)
187 self.assertFalse(params.has_checksum)
175
188
176 # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte.
189 # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte.
177 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff')
190 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x01\x00\xff")
178 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
191 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
179 self.assertEqual(params.window_size, 1024)
192 self.assertEqual(params.window_size, 1024)
180 self.assertEqual(params.dict_id, 255)
193 self.assertEqual(params.dict_id, 255)
181 self.assertFalse(params.has_checksum)
194 self.assertFalse(params.has_checksum)
182
195
183 # Lowest 3rd bit indicates if checksum is present.
196 # Lowest 3rd bit indicates if checksum is present.
184 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00')
197 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x04\x00")
185 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
198 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
186 self.assertEqual(params.window_size, 1024)
199 self.assertEqual(params.window_size, 1024)
187 self.assertEqual(params.dict_id, 0)
200 self.assertEqual(params.dict_id, 0)
188 self.assertTrue(params.has_checksum)
201 self.assertTrue(params.has_checksum)
189
202
190 # Upper 2 bits indicate content size.
203 # Upper 2 bits indicate content size.
191 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x40\x00\xff\x00')
204 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x40\x00\xff\x00")
192 self.assertEqual(params.content_size, 511)
205 self.assertEqual(params.content_size, 511)
193 self.assertEqual(params.window_size, 1024)
206 self.assertEqual(params.window_size, 1024)
194 self.assertEqual(params.dict_id, 0)
207 self.assertEqual(params.dict_id, 0)
195 self.assertFalse(params.has_checksum)
208 self.assertFalse(params.has_checksum)
196
209
197 # Window descriptor is 2nd byte after frame header.
210 # Window descriptor is 2nd byte after frame header.
198 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40')
211 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x00\x40")
199 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
212 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
200 self.assertEqual(params.window_size, 262144)
213 self.assertEqual(params.window_size, 262144)
201 self.assertEqual(params.dict_id, 0)
214 self.assertEqual(params.dict_id, 0)
202 self.assertFalse(params.has_checksum)
215 self.assertFalse(params.has_checksum)
203
216
204 # Set multiple things.
217 # Set multiple things.
205 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x45\x40\x0f\x10\x00')
218 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x45\x40\x0f\x10\x00")
206 self.assertEqual(params.content_size, 272)
219 self.assertEqual(params.content_size, 272)
207 self.assertEqual(params.window_size, 262144)
220 self.assertEqual(params.window_size, 262144)
208 self.assertEqual(params.dict_id, 15)
221 self.assertEqual(params.dict_id, 15)
209 self.assertTrue(params.has_checksum)
222 self.assertTrue(params.has_checksum)
210
223
211 def test_input_types(self):
224 def test_input_types(self):
212 v = zstd.FRAME_HEADER + b'\x00\x00'
225 v = zstd.FRAME_HEADER + b"\x00\x00"
213
226
214 mutable_array = bytearray(len(v))
227 mutable_array = bytearray(len(v))
215 mutable_array[:] = v
228 mutable_array[:] = v
216
229
217 sources = [
230 sources = [
218 memoryview(v),
231 memoryview(v),
219 bytearray(v),
232 bytearray(v),
220 mutable_array,
233 mutable_array,
221 ]
234 ]
222
235
223 for source in sources:
236 for source in sources:
224 params = zstd.get_frame_parameters(source)
237 params = zstd.get_frame_parameters(source)
225 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
238 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
226 self.assertEqual(params.window_size, 1024)
239 self.assertEqual(params.window_size, 1024)
227 self.assertEqual(params.dict_id, 0)
240 self.assertEqual(params.dict_id, 0)
228 self.assertFalse(params.has_checksum)
241 self.assertFalse(params.has_checksum)
@@ -1,76 +1,105 b''
1 import io
1 import io
2 import os
2 import os
3 import sys
3 import sys
4 import unittest
4 import unittest
5
5
6 try:
6 try:
7 import hypothesis
7 import hypothesis
8 import hypothesis.strategies as strategies
8 import hypothesis.strategies as strategies
9 except ImportError:
9 except ImportError:
10 raise unittest.SkipTest('hypothesis not available')
10 raise unittest.SkipTest("hypothesis not available")
11
11
12 import zstandard as zstd
12 import zstandard as zstd
13
13
14 from .common import (
14 from .common import (
15 make_cffi,
15 make_cffi,
16 TestCase,
17 )
18
19
20 s_windowlog = strategies.integers(
21 min_value=zstd.WINDOWLOG_MIN, max_value=zstd.WINDOWLOG_MAX
22 )
23 s_chainlog = strategies.integers(
24 min_value=zstd.CHAINLOG_MIN, max_value=zstd.CHAINLOG_MAX
25 )
26 s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN, max_value=zstd.HASHLOG_MAX)
27 s_searchlog = strategies.integers(
28 min_value=zstd.SEARCHLOG_MIN, max_value=zstd.SEARCHLOG_MAX
29 )
30 s_minmatch = strategies.integers(
31 min_value=zstd.MINMATCH_MIN, max_value=zstd.MINMATCH_MAX
32 )
33 s_targetlength = strategies.integers(
34 min_value=zstd.TARGETLENGTH_MIN, max_value=zstd.TARGETLENGTH_MAX
35 )
36 s_strategy = strategies.sampled_from(
37 (
38 zstd.STRATEGY_FAST,
39 zstd.STRATEGY_DFAST,
40 zstd.STRATEGY_GREEDY,
41 zstd.STRATEGY_LAZY,
42 zstd.STRATEGY_LAZY2,
43 zstd.STRATEGY_BTLAZY2,
44 zstd.STRATEGY_BTOPT,
45 zstd.STRATEGY_BTULTRA,
46 zstd.STRATEGY_BTULTRA2,
47 )
16 )
48 )
17
49
18
50
19 s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN,
51 @make_cffi
20 max_value=zstd.WINDOWLOG_MAX)
52 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
21 s_chainlog = strategies.integers(min_value=zstd.CHAINLOG_MIN,
53 class TestCompressionParametersHypothesis(TestCase):
22 max_value=zstd.CHAINLOG_MAX)
54 @hypothesis.given(
23 s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN,
55 s_windowlog,
24 max_value=zstd.HASHLOG_MAX)
56 s_chainlog,
25 s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN,
57 s_hashlog,
26 max_value=zstd.SEARCHLOG_MAX)
58 s_searchlog,
27 s_minmatch = strategies.integers(min_value=zstd.MINMATCH_MIN,
59 s_minmatch,
28 max_value=zstd.MINMATCH_MAX)
60 s_targetlength,
29 s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN,
61 s_strategy,
30 max_value=zstd.TARGETLENGTH_MAX)
62 )
31 s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST,
63 def test_valid_init(
32 zstd.STRATEGY_DFAST,
64 self, windowlog, chainlog, hashlog, searchlog, minmatch, targetlength, strategy
33 zstd.STRATEGY_GREEDY,
65 ):
34 zstd.STRATEGY_LAZY,
66 zstd.ZstdCompressionParameters(
35 zstd.STRATEGY_LAZY2,
67 window_log=windowlog,
36 zstd.STRATEGY_BTLAZY2,
68 chain_log=chainlog,
37 zstd.STRATEGY_BTOPT,
69 hash_log=hashlog,
38 zstd.STRATEGY_BTULTRA,
70 search_log=searchlog,
39 zstd.STRATEGY_BTULTRA2))
71 min_match=minmatch,
40
72 target_length=targetlength,
73 strategy=strategy,
74 )
41
75
42 @make_cffi
76 @hypothesis.given(
43 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
77 s_windowlog,
44 class TestCompressionParametersHypothesis(unittest.TestCase):
78 s_chainlog,
45 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
79 s_hashlog,
46 s_minmatch, s_targetlength, s_strategy)
80 s_searchlog,
47 def test_valid_init(self, windowlog, chainlog, hashlog, searchlog,
81 s_minmatch,
48 minmatch, targetlength, strategy):
82 s_targetlength,
49 zstd.ZstdCompressionParameters(window_log=windowlog,
83 s_strategy,
50 chain_log=chainlog,
84 )
51 hash_log=hashlog,
85 def test_estimated_compression_context_size(
52 search_log=searchlog,
86 self, windowlog, chainlog, hashlog, searchlog, minmatch, targetlength, strategy
53 min_match=minmatch,
87 ):
54 target_length=targetlength,
88 if minmatch == zstd.MINMATCH_MIN and strategy in (
55 strategy=strategy)
89 zstd.STRATEGY_FAST,
56
90 zstd.STRATEGY_GREEDY,
57 @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog,
91 ):
58 s_minmatch, s_targetlength, s_strategy)
59 def test_estimated_compression_context_size(self, windowlog, chainlog,
60 hashlog, searchlog,
61 minmatch, targetlength,
62 strategy):
63 if minmatch == zstd.MINMATCH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY):
64 minmatch += 1
92 minmatch += 1
65 elif minmatch == zstd.MINMATCH_MAX and strategy != zstd.STRATEGY_FAST:
93 elif minmatch == zstd.MINMATCH_MAX and strategy != zstd.STRATEGY_FAST:
66 minmatch -= 1
94 minmatch -= 1
67
95
68 p = zstd.ZstdCompressionParameters(window_log=windowlog,
96 p = zstd.ZstdCompressionParameters(
69 chain_log=chainlog,
97 window_log=windowlog,
70 hash_log=hashlog,
98 chain_log=chainlog,
71 search_log=searchlog,
99 hash_log=hashlog,
72 min_match=minmatch,
100 search_log=searchlog,
73 target_length=targetlength,
101 min_match=minmatch,
74 strategy=strategy)
102 target_length=targetlength,
103 strategy=strategy,
104 )
75 size = p.estimated_compression_context_size()
105 size = p.estimated_compression_context_size()
76
This diff has been collapsed as it changes many lines, (729 lines changed) Show them Hide them
@@ -1,1611 +1,1670 b''
1 import io
1 import io
2 import os
2 import os
3 import random
3 import random
4 import struct
4 import struct
5 import sys
5 import sys
6 import tempfile
6 import tempfile
7 import unittest
7 import unittest
8
8
9 import zstandard as zstd
9 import zstandard as zstd
10
10
11 from .common import (
11 from .common import (
12 generate_samples,
12 generate_samples,
13 make_cffi,
13 make_cffi,
14 NonClosingBytesIO,
14 NonClosingBytesIO,
15 OpCountingBytesIO,
15 OpCountingBytesIO,
16 TestCase,
16 )
17 )
17
18
18
19
19 if sys.version_info[0] >= 3:
20 if sys.version_info[0] >= 3:
20 next = lambda it: it.__next__()
21 next = lambda it: it.__next__()
21 else:
22 else:
22 next = lambda it: it.next()
23 next = lambda it: it.next()
23
24
24
25
25 @make_cffi
26 @make_cffi
26 class TestFrameHeaderSize(unittest.TestCase):
27 class TestFrameHeaderSize(TestCase):
27 def test_empty(self):
28 def test_empty(self):
28 with self.assertRaisesRegexp(
29 with self.assertRaisesRegex(
29 zstd.ZstdError, 'could not determine frame header size: Src size '
30 zstd.ZstdError,
30 'is incorrect'):
31 "could not determine frame header size: Src size " "is incorrect",
31 zstd.frame_header_size(b'')
32 ):
33 zstd.frame_header_size(b"")
32
34
33 def test_too_small(self):
35 def test_too_small(self):
34 with self.assertRaisesRegexp(
36 with self.assertRaisesRegex(
35 zstd.ZstdError, 'could not determine frame header size: Src size '
37 zstd.ZstdError,
36 'is incorrect'):
38 "could not determine frame header size: Src size " "is incorrect",
37 zstd.frame_header_size(b'foob')
39 ):
40 zstd.frame_header_size(b"foob")
38
41
39 def test_basic(self):
42 def test_basic(self):
40 # It doesn't matter that it isn't a valid frame.
43 # It doesn't matter that it isn't a valid frame.
41 self.assertEqual(zstd.frame_header_size(b'long enough but no magic'), 6)
44 self.assertEqual(zstd.frame_header_size(b"long enough but no magic"), 6)
42
45
43
46
44 @make_cffi
47 @make_cffi
45 class TestFrameContentSize(unittest.TestCase):
48 class TestFrameContentSize(TestCase):
46 def test_empty(self):
49 def test_empty(self):
47 with self.assertRaisesRegexp(zstd.ZstdError,
50 with self.assertRaisesRegex(
48 'error when determining content size'):
51 zstd.ZstdError, "error when determining content size"
49 zstd.frame_content_size(b'')
52 ):
53 zstd.frame_content_size(b"")
50
54
51 def test_too_small(self):
55 def test_too_small(self):
52 with self.assertRaisesRegexp(zstd.ZstdError,
56 with self.assertRaisesRegex(
53 'error when determining content size'):
57 zstd.ZstdError, "error when determining content size"
54 zstd.frame_content_size(b'foob')
58 ):
59 zstd.frame_content_size(b"foob")
55
60
56 def test_bad_frame(self):
61 def test_bad_frame(self):
57 with self.assertRaisesRegexp(zstd.ZstdError,
62 with self.assertRaisesRegex(
58 'error when determining content size'):
63 zstd.ZstdError, "error when determining content size"
59 zstd.frame_content_size(b'invalid frame header')
64 ):
65 zstd.frame_content_size(b"invalid frame header")
60
66
61 def test_unknown(self):
67 def test_unknown(self):
62 cctx = zstd.ZstdCompressor(write_content_size=False)
68 cctx = zstd.ZstdCompressor(write_content_size=False)
63 frame = cctx.compress(b'foobar')
69 frame = cctx.compress(b"foobar")
64
70
65 self.assertEqual(zstd.frame_content_size(frame), -1)
71 self.assertEqual(zstd.frame_content_size(frame), -1)
66
72
67 def test_empty(self):
73 def test_empty(self):
68 cctx = zstd.ZstdCompressor()
74 cctx = zstd.ZstdCompressor()
69 frame = cctx.compress(b'')
75 frame = cctx.compress(b"")
70
76
71 self.assertEqual(zstd.frame_content_size(frame), 0)
77 self.assertEqual(zstd.frame_content_size(frame), 0)
72
78
73 def test_basic(self):
79 def test_basic(self):
74 cctx = zstd.ZstdCompressor()
80 cctx = zstd.ZstdCompressor()
75 frame = cctx.compress(b'foobar')
81 frame = cctx.compress(b"foobar")
76
82
77 self.assertEqual(zstd.frame_content_size(frame), 6)
83 self.assertEqual(zstd.frame_content_size(frame), 6)
78
84
79
85
80 @make_cffi
86 @make_cffi
81 class TestDecompressor(unittest.TestCase):
87 class TestDecompressor(TestCase):
82 def test_memory_size(self):
88 def test_memory_size(self):
83 dctx = zstd.ZstdDecompressor()
89 dctx = zstd.ZstdDecompressor()
84
90
85 self.assertGreater(dctx.memory_size(), 100)
91 self.assertGreater(dctx.memory_size(), 100)
86
92
87
93
88 @make_cffi
94 @make_cffi
89 class TestDecompressor_decompress(unittest.TestCase):
95 class TestDecompressor_decompress(TestCase):
90 def test_empty_input(self):
96 def test_empty_input(self):
91 dctx = zstd.ZstdDecompressor()
97 dctx = zstd.ZstdDecompressor()
92
98
93 with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'):
99 with self.assertRaisesRegex(
94 dctx.decompress(b'')
100 zstd.ZstdError, "error determining content size from frame header"
101 ):
102 dctx.decompress(b"")
95
103
96 def test_invalid_input(self):
104 def test_invalid_input(self):
97 dctx = zstd.ZstdDecompressor()
105 dctx = zstd.ZstdDecompressor()
98
106
99 with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'):
107 with self.assertRaisesRegex(
100 dctx.decompress(b'foobar')
108 zstd.ZstdError, "error determining content size from frame header"
109 ):
110 dctx.decompress(b"foobar")
101
111
102 def test_input_types(self):
112 def test_input_types(self):
103 cctx = zstd.ZstdCompressor(level=1)
113 cctx = zstd.ZstdCompressor(level=1)
104 compressed = cctx.compress(b'foo')
114 compressed = cctx.compress(b"foo")
105
115
106 mutable_array = bytearray(len(compressed))
116 mutable_array = bytearray(len(compressed))
107 mutable_array[:] = compressed
117 mutable_array[:] = compressed
108
118
109 sources = [
119 sources = [
110 memoryview(compressed),
120 memoryview(compressed),
111 bytearray(compressed),
121 bytearray(compressed),
112 mutable_array,
122 mutable_array,
113 ]
123 ]
114
124
115 dctx = zstd.ZstdDecompressor()
125 dctx = zstd.ZstdDecompressor()
116 for source in sources:
126 for source in sources:
117 self.assertEqual(dctx.decompress(source), b'foo')
127 self.assertEqual(dctx.decompress(source), b"foo")
118
128
119 def test_no_content_size_in_frame(self):
129 def test_no_content_size_in_frame(self):
120 cctx = zstd.ZstdCompressor(write_content_size=False)
130 cctx = zstd.ZstdCompressor(write_content_size=False)
121 compressed = cctx.compress(b'foobar')
131 compressed = cctx.compress(b"foobar")
122
132
123 dctx = zstd.ZstdDecompressor()
133 dctx = zstd.ZstdDecompressor()
124 with self.assertRaisesRegexp(zstd.ZstdError, 'could not determine content size in frame header'):
134 with self.assertRaisesRegex(
135 zstd.ZstdError, "could not determine content size in frame header"
136 ):
125 dctx.decompress(compressed)
137 dctx.decompress(compressed)
126
138
127 def test_content_size_present(self):
139 def test_content_size_present(self):
128 cctx = zstd.ZstdCompressor()
140 cctx = zstd.ZstdCompressor()
129 compressed = cctx.compress(b'foobar')
141 compressed = cctx.compress(b"foobar")
130
142
131 dctx = zstd.ZstdDecompressor()
143 dctx = zstd.ZstdDecompressor()
132 decompressed = dctx.decompress(compressed)
144 decompressed = dctx.decompress(compressed)
133 self.assertEqual(decompressed, b'foobar')
145 self.assertEqual(decompressed, b"foobar")
134
146
135 def test_empty_roundtrip(self):
147 def test_empty_roundtrip(self):
136 cctx = zstd.ZstdCompressor()
148 cctx = zstd.ZstdCompressor()
137 compressed = cctx.compress(b'')
149 compressed = cctx.compress(b"")
138
150
139 dctx = zstd.ZstdDecompressor()
151 dctx = zstd.ZstdDecompressor()
140 decompressed = dctx.decompress(compressed)
152 decompressed = dctx.decompress(compressed)
141
153
142 self.assertEqual(decompressed, b'')
154 self.assertEqual(decompressed, b"")
143
155
144 def test_max_output_size(self):
156 def test_max_output_size(self):
145 cctx = zstd.ZstdCompressor(write_content_size=False)
157 cctx = zstd.ZstdCompressor(write_content_size=False)
146 source = b'foobar' * 256
158 source = b"foobar" * 256
147 compressed = cctx.compress(source)
159 compressed = cctx.compress(source)
148
160
149 dctx = zstd.ZstdDecompressor()
161 dctx = zstd.ZstdDecompressor()
150 # Will fit into buffer exactly the size of input.
162 # Will fit into buffer exactly the size of input.
151 decompressed = dctx.decompress(compressed, max_output_size=len(source))
163 decompressed = dctx.decompress(compressed, max_output_size=len(source))
152 self.assertEqual(decompressed, source)
164 self.assertEqual(decompressed, source)
153
165
154 # Input size - 1 fails
166 # Input size - 1 fails
155 with self.assertRaisesRegexp(zstd.ZstdError,
167 with self.assertRaisesRegex(
156 'decompression error: did not decompress full frame'):
168 zstd.ZstdError, "decompression error: did not decompress full frame"
169 ):
157 dctx.decompress(compressed, max_output_size=len(source) - 1)
170 dctx.decompress(compressed, max_output_size=len(source) - 1)
158
171
159 # Input size + 1 works
172 # Input size + 1 works
160 decompressed = dctx.decompress(compressed, max_output_size=len(source) + 1)
173 decompressed = dctx.decompress(compressed, max_output_size=len(source) + 1)
161 self.assertEqual(decompressed, source)
174 self.assertEqual(decompressed, source)
162
175
163 # A much larger buffer works.
176 # A much larger buffer works.
164 decompressed = dctx.decompress(compressed, max_output_size=len(source) * 64)
177 decompressed = dctx.decompress(compressed, max_output_size=len(source) * 64)
165 self.assertEqual(decompressed, source)
178 self.assertEqual(decompressed, source)
166
179
167 def test_stupidly_large_output_buffer(self):
180 def test_stupidly_large_output_buffer(self):
168 cctx = zstd.ZstdCompressor(write_content_size=False)
181 cctx = zstd.ZstdCompressor(write_content_size=False)
169 compressed = cctx.compress(b'foobar' * 256)
182 compressed = cctx.compress(b"foobar" * 256)
170 dctx = zstd.ZstdDecompressor()
183 dctx = zstd.ZstdDecompressor()
171
184
172 # Will get OverflowError on some Python distributions that can't
185 # Will get OverflowError on some Python distributions that can't
173 # handle really large integers.
186 # handle really large integers.
174 with self.assertRaises((MemoryError, OverflowError)):
187 with self.assertRaises((MemoryError, OverflowError)):
175 dctx.decompress(compressed, max_output_size=2**62)
188 dctx.decompress(compressed, max_output_size=2 ** 62)
176
189
177 def test_dictionary(self):
190 def test_dictionary(self):
178 samples = []
191 samples = []
179 for i in range(128):
192 for i in range(128):
180 samples.append(b'foo' * 64)
193 samples.append(b"foo" * 64)
181 samples.append(b'bar' * 64)
194 samples.append(b"bar" * 64)
182 samples.append(b'foobar' * 64)
195 samples.append(b"foobar" * 64)
183
196
184 d = zstd.train_dictionary(8192, samples)
197 d = zstd.train_dictionary(8192, samples)
185
198
186 orig = b'foobar' * 16384
199 orig = b"foobar" * 16384
187 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
200 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
188 compressed = cctx.compress(orig)
201 compressed = cctx.compress(orig)
189
202
190 dctx = zstd.ZstdDecompressor(dict_data=d)
203 dctx = zstd.ZstdDecompressor(dict_data=d)
191 decompressed = dctx.decompress(compressed)
204 decompressed = dctx.decompress(compressed)
192
205
193 self.assertEqual(decompressed, orig)
206 self.assertEqual(decompressed, orig)
194
207
195 def test_dictionary_multiple(self):
208 def test_dictionary_multiple(self):
196 samples = []
209 samples = []
197 for i in range(128):
210 for i in range(128):
198 samples.append(b'foo' * 64)
211 samples.append(b"foo" * 64)
199 samples.append(b'bar' * 64)
212 samples.append(b"bar" * 64)
200 samples.append(b'foobar' * 64)
213 samples.append(b"foobar" * 64)
201
214
202 d = zstd.train_dictionary(8192, samples)
215 d = zstd.train_dictionary(8192, samples)
203
216
204 sources = (b'foobar' * 8192, b'foo' * 8192, b'bar' * 8192)
217 sources = (b"foobar" * 8192, b"foo" * 8192, b"bar" * 8192)
205 compressed = []
218 compressed = []
206 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
219 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
207 for source in sources:
220 for source in sources:
208 compressed.append(cctx.compress(source))
221 compressed.append(cctx.compress(source))
209
222
210 dctx = zstd.ZstdDecompressor(dict_data=d)
223 dctx = zstd.ZstdDecompressor(dict_data=d)
211 for i in range(len(sources)):
224 for i in range(len(sources)):
212 decompressed = dctx.decompress(compressed[i])
225 decompressed = dctx.decompress(compressed[i])
213 self.assertEqual(decompressed, sources[i])
226 self.assertEqual(decompressed, sources[i])
214
227
215 def test_max_window_size(self):
228 def test_max_window_size(self):
216 with open(__file__, 'rb') as fh:
229 with open(__file__, "rb") as fh:
217 source = fh.read()
230 source = fh.read()
218
231
219 # If we write a content size, the decompressor engages single pass
232 # If we write a content size, the decompressor engages single pass
220 # mode and the window size doesn't come into play.
233 # mode and the window size doesn't come into play.
221 cctx = zstd.ZstdCompressor(write_content_size=False)
234 cctx = zstd.ZstdCompressor(write_content_size=False)
222 frame = cctx.compress(source)
235 frame = cctx.compress(source)
223
236
224 dctx = zstd.ZstdDecompressor(max_window_size=2**zstd.WINDOWLOG_MIN)
237 dctx = zstd.ZstdDecompressor(max_window_size=2 ** zstd.WINDOWLOG_MIN)
225
238
226 with self.assertRaisesRegexp(
239 with self.assertRaisesRegex(
227 zstd.ZstdError, 'decompression error: Frame requires too much memory'):
240 zstd.ZstdError, "decompression error: Frame requires too much memory"
241 ):
228 dctx.decompress(frame, max_output_size=len(source))
242 dctx.decompress(frame, max_output_size=len(source))
229
243
230
244
231 @make_cffi
245 @make_cffi
232 class TestDecompressor_copy_stream(unittest.TestCase):
246 class TestDecompressor_copy_stream(TestCase):
233 def test_no_read(self):
247 def test_no_read(self):
234 source = object()
248 source = object()
235 dest = io.BytesIO()
249 dest = io.BytesIO()
236
250
237 dctx = zstd.ZstdDecompressor()
251 dctx = zstd.ZstdDecompressor()
238 with self.assertRaises(ValueError):
252 with self.assertRaises(ValueError):
239 dctx.copy_stream(source, dest)
253 dctx.copy_stream(source, dest)
240
254
241 def test_no_write(self):
255 def test_no_write(self):
242 source = io.BytesIO()
256 source = io.BytesIO()
243 dest = object()
257 dest = object()
244
258
245 dctx = zstd.ZstdDecompressor()
259 dctx = zstd.ZstdDecompressor()
246 with self.assertRaises(ValueError):
260 with self.assertRaises(ValueError):
247 dctx.copy_stream(source, dest)
261 dctx.copy_stream(source, dest)
248
262
249 def test_empty(self):
263 def test_empty(self):
250 source = io.BytesIO()
264 source = io.BytesIO()
251 dest = io.BytesIO()
265 dest = io.BytesIO()
252
266
253 dctx = zstd.ZstdDecompressor()
267 dctx = zstd.ZstdDecompressor()
254 # TODO should this raise an error?
268 # TODO should this raise an error?
255 r, w = dctx.copy_stream(source, dest)
269 r, w = dctx.copy_stream(source, dest)
256
270
257 self.assertEqual(r, 0)
271 self.assertEqual(r, 0)
258 self.assertEqual(w, 0)
272 self.assertEqual(w, 0)
259 self.assertEqual(dest.getvalue(), b'')
273 self.assertEqual(dest.getvalue(), b"")
260
274
261 def test_large_data(self):
275 def test_large_data(self):
262 source = io.BytesIO()
276 source = io.BytesIO()
263 for i in range(255):
277 for i in range(255):
264 source.write(struct.Struct('>B').pack(i) * 16384)
278 source.write(struct.Struct(">B").pack(i) * 16384)
265 source.seek(0)
279 source.seek(0)
266
280
267 compressed = io.BytesIO()
281 compressed = io.BytesIO()
268 cctx = zstd.ZstdCompressor()
282 cctx = zstd.ZstdCompressor()
269 cctx.copy_stream(source, compressed)
283 cctx.copy_stream(source, compressed)
270
284
271 compressed.seek(0)
285 compressed.seek(0)
272 dest = io.BytesIO()
286 dest = io.BytesIO()
273 dctx = zstd.ZstdDecompressor()
287 dctx = zstd.ZstdDecompressor()
274 r, w = dctx.copy_stream(compressed, dest)
288 r, w = dctx.copy_stream(compressed, dest)
275
289
276 self.assertEqual(r, len(compressed.getvalue()))
290 self.assertEqual(r, len(compressed.getvalue()))
277 self.assertEqual(w, len(source.getvalue()))
291 self.assertEqual(w, len(source.getvalue()))
278
292
279 def test_read_write_size(self):
293 def test_read_write_size(self):
280 source = OpCountingBytesIO(zstd.ZstdCompressor().compress(
294 source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b"foobarfoobar"))
281 b'foobarfoobar'))
282
295
283 dest = OpCountingBytesIO()
296 dest = OpCountingBytesIO()
284 dctx = zstd.ZstdDecompressor()
297 dctx = zstd.ZstdDecompressor()
285 r, w = dctx.copy_stream(source, dest, read_size=1, write_size=1)
298 r, w = dctx.copy_stream(source, dest, read_size=1, write_size=1)
286
299
287 self.assertEqual(r, len(source.getvalue()))
300 self.assertEqual(r, len(source.getvalue()))
288 self.assertEqual(w, len(b'foobarfoobar'))
301 self.assertEqual(w, len(b"foobarfoobar"))
289 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
302 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
290 self.assertEqual(dest._write_count, len(dest.getvalue()))
303 self.assertEqual(dest._write_count, len(dest.getvalue()))
291
304
292
305
293 @make_cffi
306 @make_cffi
294 class TestDecompressor_stream_reader(unittest.TestCase):
307 class TestDecompressor_stream_reader(TestCase):
295 def test_context_manager(self):
308 def test_context_manager(self):
296 dctx = zstd.ZstdDecompressor()
309 dctx = zstd.ZstdDecompressor()
297
310
298 with dctx.stream_reader(b'foo') as reader:
311 with dctx.stream_reader(b"foo") as reader:
299 with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'):
312 with self.assertRaisesRegex(ValueError, "cannot __enter__ multiple times"):
300 with reader as reader2:
313 with reader as reader2:
301 pass
314 pass
302
315
303 def test_not_implemented(self):
316 def test_not_implemented(self):
304 dctx = zstd.ZstdDecompressor()
317 dctx = zstd.ZstdDecompressor()
305
318
306 with dctx.stream_reader(b'foo') as reader:
319 with dctx.stream_reader(b"foo") as reader:
307 with self.assertRaises(io.UnsupportedOperation):
320 with self.assertRaises(io.UnsupportedOperation):
308 reader.readline()
321 reader.readline()
309
322
310 with self.assertRaises(io.UnsupportedOperation):
323 with self.assertRaises(io.UnsupportedOperation):
311 reader.readlines()
324 reader.readlines()
312
325
313 with self.assertRaises(io.UnsupportedOperation):
326 with self.assertRaises(io.UnsupportedOperation):
314 iter(reader)
327 iter(reader)
315
328
316 with self.assertRaises(io.UnsupportedOperation):
329 with self.assertRaises(io.UnsupportedOperation):
317 next(reader)
330 next(reader)
318
331
319 with self.assertRaises(io.UnsupportedOperation):
332 with self.assertRaises(io.UnsupportedOperation):
320 reader.write(b'foo')
333 reader.write(b"foo")
321
334
322 with self.assertRaises(io.UnsupportedOperation):
335 with self.assertRaises(io.UnsupportedOperation):
323 reader.writelines([])
336 reader.writelines([])
324
337
325 def test_constant_methods(self):
338 def test_constant_methods(self):
326 dctx = zstd.ZstdDecompressor()
339 dctx = zstd.ZstdDecompressor()
327
340
328 with dctx.stream_reader(b'foo') as reader:
341 with dctx.stream_reader(b"foo") as reader:
329 self.assertFalse(reader.closed)
342 self.assertFalse(reader.closed)
330 self.assertTrue(reader.readable())
343 self.assertTrue(reader.readable())
331 self.assertFalse(reader.writable())
344 self.assertFalse(reader.writable())
332 self.assertTrue(reader.seekable())
345 self.assertTrue(reader.seekable())
333 self.assertFalse(reader.isatty())
346 self.assertFalse(reader.isatty())
334 self.assertFalse(reader.closed)
347 self.assertFalse(reader.closed)
335 self.assertIsNone(reader.flush())
348 self.assertIsNone(reader.flush())
336 self.assertFalse(reader.closed)
349 self.assertFalse(reader.closed)
337
350
338 self.assertTrue(reader.closed)
351 self.assertTrue(reader.closed)
339
352
340 def test_read_closed(self):
353 def test_read_closed(self):
341 dctx = zstd.ZstdDecompressor()
354 dctx = zstd.ZstdDecompressor()
342
355
343 with dctx.stream_reader(b'foo') as reader:
356 with dctx.stream_reader(b"foo") as reader:
344 reader.close()
357 reader.close()
345 self.assertTrue(reader.closed)
358 self.assertTrue(reader.closed)
346 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
359 with self.assertRaisesRegex(ValueError, "stream is closed"):
347 reader.read(1)
360 reader.read(1)
348
361
349 def test_read_sizes(self):
362 def test_read_sizes(self):
350 cctx = zstd.ZstdCompressor()
363 cctx = zstd.ZstdCompressor()
351 foo = cctx.compress(b'foo')
364 foo = cctx.compress(b"foo")
352
365
353 dctx = zstd.ZstdDecompressor()
366 dctx = zstd.ZstdDecompressor()
354
367
355 with dctx.stream_reader(foo) as reader:
368 with dctx.stream_reader(foo) as reader:
356 with self.assertRaisesRegexp(ValueError, 'cannot read negative amounts less than -1'):
369 with self.assertRaisesRegex(
370 ValueError, "cannot read negative amounts less than -1"
371 ):
357 reader.read(-2)
372 reader.read(-2)
358
373
359 self.assertEqual(reader.read(0), b'')
374 self.assertEqual(reader.read(0), b"")
360 self.assertEqual(reader.read(), b'foo')
375 self.assertEqual(reader.read(), b"foo")
361
376
362 def test_read_buffer(self):
377 def test_read_buffer(self):
363 cctx = zstd.ZstdCompressor()
378 cctx = zstd.ZstdCompressor()
364
379
365 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
380 source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60])
366 frame = cctx.compress(source)
381 frame = cctx.compress(source)
367
382
368 dctx = zstd.ZstdDecompressor()
383 dctx = zstd.ZstdDecompressor()
369
384
370 with dctx.stream_reader(frame) as reader:
385 with dctx.stream_reader(frame) as reader:
371 self.assertEqual(reader.tell(), 0)
386 self.assertEqual(reader.tell(), 0)
372
387
373 # We should get entire frame in one read.
388 # We should get entire frame in one read.
374 result = reader.read(8192)
389 result = reader.read(8192)
375 self.assertEqual(result, source)
390 self.assertEqual(result, source)
376 self.assertEqual(reader.tell(), len(source))
391 self.assertEqual(reader.tell(), len(source))
377
392
378 # Read after EOF should return empty bytes.
393 # Read after EOF should return empty bytes.
379 self.assertEqual(reader.read(1), b'')
394 self.assertEqual(reader.read(1), b"")
380 self.assertEqual(reader.tell(), len(result))
395 self.assertEqual(reader.tell(), len(result))
381
396
382 self.assertTrue(reader.closed)
397 self.assertTrue(reader.closed)
383
398
384 def test_read_buffer_small_chunks(self):
399 def test_read_buffer_small_chunks(self):
385 cctx = zstd.ZstdCompressor()
400 cctx = zstd.ZstdCompressor()
386 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
401 source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60])
387 frame = cctx.compress(source)
402 frame = cctx.compress(source)
388
403
389 dctx = zstd.ZstdDecompressor()
404 dctx = zstd.ZstdDecompressor()
390 chunks = []
405 chunks = []
391
406
392 with dctx.stream_reader(frame, read_size=1) as reader:
407 with dctx.stream_reader(frame, read_size=1) as reader:
393 while True:
408 while True:
394 chunk = reader.read(1)
409 chunk = reader.read(1)
395 if not chunk:
410 if not chunk:
396 break
411 break
397
412
398 chunks.append(chunk)
413 chunks.append(chunk)
399 self.assertEqual(reader.tell(), sum(map(len, chunks)))
414 self.assertEqual(reader.tell(), sum(map(len, chunks)))
400
415
401 self.assertEqual(b''.join(chunks), source)
416 self.assertEqual(b"".join(chunks), source)
402
417
403 def test_read_stream(self):
418 def test_read_stream(self):
404 cctx = zstd.ZstdCompressor()
419 cctx = zstd.ZstdCompressor()
405 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
420 source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60])
406 frame = cctx.compress(source)
421 frame = cctx.compress(source)
407
422
408 dctx = zstd.ZstdDecompressor()
423 dctx = zstd.ZstdDecompressor()
409 with dctx.stream_reader(io.BytesIO(frame)) as reader:
424 with dctx.stream_reader(io.BytesIO(frame)) as reader:
410 self.assertEqual(reader.tell(), 0)
425 self.assertEqual(reader.tell(), 0)
411
426
412 chunk = reader.read(8192)
427 chunk = reader.read(8192)
413 self.assertEqual(chunk, source)
428 self.assertEqual(chunk, source)
414 self.assertEqual(reader.tell(), len(source))
429 self.assertEqual(reader.tell(), len(source))
415 self.assertEqual(reader.read(1), b'')
430 self.assertEqual(reader.read(1), b"")
416 self.assertEqual(reader.tell(), len(source))
431 self.assertEqual(reader.tell(), len(source))
417 self.assertFalse(reader.closed)
432 self.assertFalse(reader.closed)
418
433
419 self.assertTrue(reader.closed)
434 self.assertTrue(reader.closed)
420
435
421 def test_read_stream_small_chunks(self):
436 def test_read_stream_small_chunks(self):
422 cctx = zstd.ZstdCompressor()
437 cctx = zstd.ZstdCompressor()
423 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
438 source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60])
424 frame = cctx.compress(source)
439 frame = cctx.compress(source)
425
440
426 dctx = zstd.ZstdDecompressor()
441 dctx = zstd.ZstdDecompressor()
427 chunks = []
442 chunks = []
428
443
429 with dctx.stream_reader(io.BytesIO(frame), read_size=1) as reader:
444 with dctx.stream_reader(io.BytesIO(frame), read_size=1) as reader:
430 while True:
445 while True:
431 chunk = reader.read(1)
446 chunk = reader.read(1)
432 if not chunk:
447 if not chunk:
433 break
448 break
434
449
435 chunks.append(chunk)
450 chunks.append(chunk)
436 self.assertEqual(reader.tell(), sum(map(len, chunks)))
451 self.assertEqual(reader.tell(), sum(map(len, chunks)))
437
452
438 self.assertEqual(b''.join(chunks), source)
453 self.assertEqual(b"".join(chunks), source)
439
454
440 def test_read_after_exit(self):
455 def test_read_after_exit(self):
441 cctx = zstd.ZstdCompressor()
456 cctx = zstd.ZstdCompressor()
442 frame = cctx.compress(b'foo' * 60)
457 frame = cctx.compress(b"foo" * 60)
443
458
444 dctx = zstd.ZstdDecompressor()
459 dctx = zstd.ZstdDecompressor()
445
460
446 with dctx.stream_reader(frame) as reader:
461 with dctx.stream_reader(frame) as reader:
447 while reader.read(16):
462 while reader.read(16):
448 pass
463 pass
449
464
450 self.assertTrue(reader.closed)
465 self.assertTrue(reader.closed)
451
466
452 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
467 with self.assertRaisesRegex(ValueError, "stream is closed"):
453 reader.read(10)
468 reader.read(10)
454
469
455 def test_illegal_seeks(self):
470 def test_illegal_seeks(self):
456 cctx = zstd.ZstdCompressor()
471 cctx = zstd.ZstdCompressor()
457 frame = cctx.compress(b'foo' * 60)
472 frame = cctx.compress(b"foo" * 60)
458
473
459 dctx = zstd.ZstdDecompressor()
474 dctx = zstd.ZstdDecompressor()
460
475
461 with dctx.stream_reader(frame) as reader:
476 with dctx.stream_reader(frame) as reader:
462 with self.assertRaisesRegexp(ValueError,
477 with self.assertRaisesRegex(ValueError, "cannot seek to negative position"):
463 'cannot seek to negative position'):
464 reader.seek(-1, os.SEEK_SET)
478 reader.seek(-1, os.SEEK_SET)
465
479
466 reader.read(1)
480 reader.read(1)
467
481
468 with self.assertRaisesRegexp(
482 with self.assertRaisesRegex(
469 ValueError, 'cannot seek zstd decompression stream backwards'):
483 ValueError, "cannot seek zstd decompression stream backwards"
484 ):
470 reader.seek(0, os.SEEK_SET)
485 reader.seek(0, os.SEEK_SET)
471
486
472 with self.assertRaisesRegexp(
487 with self.assertRaisesRegex(
473 ValueError, 'cannot seek zstd decompression stream backwards'):
488 ValueError, "cannot seek zstd decompression stream backwards"
489 ):
474 reader.seek(-1, os.SEEK_CUR)
490 reader.seek(-1, os.SEEK_CUR)
475
491
476 with self.assertRaisesRegexp(
492 with self.assertRaisesRegex(
477 ValueError,
493 ValueError, "zstd decompression streams cannot be seeked with SEEK_END"
478 'zstd decompression streams cannot be seeked with SEEK_END'):
494 ):
479 reader.seek(0, os.SEEK_END)
495 reader.seek(0, os.SEEK_END)
480
496
481 reader.close()
497 reader.close()
482
498
483 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
499 with self.assertRaisesRegex(ValueError, "stream is closed"):
484 reader.seek(4, os.SEEK_SET)
500 reader.seek(4, os.SEEK_SET)
485
501
486 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
502 with self.assertRaisesRegex(ValueError, "stream is closed"):
487 reader.seek(0)
503 reader.seek(0)
488
504
489 def test_seek(self):
505 def test_seek(self):
490 source = b'foobar' * 60
506 source = b"foobar" * 60
491 cctx = zstd.ZstdCompressor()
507 cctx = zstd.ZstdCompressor()
492 frame = cctx.compress(source)
508 frame = cctx.compress(source)
493
509
494 dctx = zstd.ZstdDecompressor()
510 dctx = zstd.ZstdDecompressor()
495
511
496 with dctx.stream_reader(frame) as reader:
512 with dctx.stream_reader(frame) as reader:
497 reader.seek(3)
513 reader.seek(3)
498 self.assertEqual(reader.read(3), b'bar')
514 self.assertEqual(reader.read(3), b"bar")
499
515
500 reader.seek(4, os.SEEK_CUR)
516 reader.seek(4, os.SEEK_CUR)
501 self.assertEqual(reader.read(2), b'ar')
517 self.assertEqual(reader.read(2), b"ar")
502
518
503 def test_no_context_manager(self):
519 def test_no_context_manager(self):
504 source = b'foobar' * 60
520 source = b"foobar" * 60
505 cctx = zstd.ZstdCompressor()
521 cctx = zstd.ZstdCompressor()
506 frame = cctx.compress(source)
522 frame = cctx.compress(source)
507
523
508 dctx = zstd.ZstdDecompressor()
524 dctx = zstd.ZstdDecompressor()
509 reader = dctx.stream_reader(frame)
525 reader = dctx.stream_reader(frame)
510
526
511 self.assertEqual(reader.read(6), b'foobar')
527 self.assertEqual(reader.read(6), b"foobar")
512 self.assertEqual(reader.read(18), b'foobar' * 3)
528 self.assertEqual(reader.read(18), b"foobar" * 3)
513 self.assertFalse(reader.closed)
529 self.assertFalse(reader.closed)
514
530
515 # Calling close prevents subsequent use.
531 # Calling close prevents subsequent use.
516 reader.close()
532 reader.close()
517 self.assertTrue(reader.closed)
533 self.assertTrue(reader.closed)
518
534
519 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
535 with self.assertRaisesRegex(ValueError, "stream is closed"):
520 reader.read(6)
536 reader.read(6)
521
537
522 def test_read_after_error(self):
538 def test_read_after_error(self):
523 source = io.BytesIO(b'')
539 source = io.BytesIO(b"")
524 dctx = zstd.ZstdDecompressor()
540 dctx = zstd.ZstdDecompressor()
525
541
526 reader = dctx.stream_reader(source)
542 reader = dctx.stream_reader(source)
527
543
528 with reader:
544 with reader:
529 reader.read(0)
545 reader.read(0)
530
546
531 with reader:
547 with reader:
532 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
548 with self.assertRaisesRegex(ValueError, "stream is closed"):
533 reader.read(100)
549 reader.read(100)
534
550
535 def test_partial_read(self):
551 def test_partial_read(self):
536 # Inspired by https://github.com/indygreg/python-zstandard/issues/71.
552 # Inspired by https://github.com/indygreg/python-zstandard/issues/71.
537 buffer = io.BytesIO()
553 buffer = io.BytesIO()
538 cctx = zstd.ZstdCompressor()
554 cctx = zstd.ZstdCompressor()
539 writer = cctx.stream_writer(buffer)
555 writer = cctx.stream_writer(buffer)
540 writer.write(bytearray(os.urandom(1000000)))
556 writer.write(bytearray(os.urandom(1000000)))
541 writer.flush(zstd.FLUSH_FRAME)
557 writer.flush(zstd.FLUSH_FRAME)
542 buffer.seek(0)
558 buffer.seek(0)
543
559
544 dctx = zstd.ZstdDecompressor()
560 dctx = zstd.ZstdDecompressor()
545 reader = dctx.stream_reader(buffer)
561 reader = dctx.stream_reader(buffer)
546
562
547 while True:
563 while True:
548 chunk = reader.read(8192)
564 chunk = reader.read(8192)
549 if not chunk:
565 if not chunk:
550 break
566 break
551
567
552 def test_read_multiple_frames(self):
568 def test_read_multiple_frames(self):
553 cctx = zstd.ZstdCompressor()
569 cctx = zstd.ZstdCompressor()
554 source = io.BytesIO()
570 source = io.BytesIO()
555 writer = cctx.stream_writer(source)
571 writer = cctx.stream_writer(source)
556 writer.write(b'foo')
572 writer.write(b"foo")
557 writer.flush(zstd.FLUSH_FRAME)
573 writer.flush(zstd.FLUSH_FRAME)
558 writer.write(b'bar')
574 writer.write(b"bar")
559 writer.flush(zstd.FLUSH_FRAME)
575 writer.flush(zstd.FLUSH_FRAME)
560
576
561 dctx = zstd.ZstdDecompressor()
577 dctx = zstd.ZstdDecompressor()
562
578
563 reader = dctx.stream_reader(source.getvalue())
579 reader = dctx.stream_reader(source.getvalue())
564 self.assertEqual(reader.read(2), b'fo')
580 self.assertEqual(reader.read(2), b"fo")
565 self.assertEqual(reader.read(2), b'o')
581 self.assertEqual(reader.read(2), b"o")
566 self.assertEqual(reader.read(2), b'ba')
582 self.assertEqual(reader.read(2), b"ba")
567 self.assertEqual(reader.read(2), b'r')
583 self.assertEqual(reader.read(2), b"r")
568
584
569 source.seek(0)
585 source.seek(0)
570 reader = dctx.stream_reader(source)
586 reader = dctx.stream_reader(source)
571 self.assertEqual(reader.read(2), b'fo')
587 self.assertEqual(reader.read(2), b"fo")
572 self.assertEqual(reader.read(2), b'o')
588 self.assertEqual(reader.read(2), b"o")
573 self.assertEqual(reader.read(2), b'ba')
589 self.assertEqual(reader.read(2), b"ba")
574 self.assertEqual(reader.read(2), b'r')
590 self.assertEqual(reader.read(2), b"r")
575
591
576 reader = dctx.stream_reader(source.getvalue())
592 reader = dctx.stream_reader(source.getvalue())
577 self.assertEqual(reader.read(3), b'foo')
593 self.assertEqual(reader.read(3), b"foo")
578 self.assertEqual(reader.read(3), b'bar')
594 self.assertEqual(reader.read(3), b"bar")
579
595
580 source.seek(0)
596 source.seek(0)
581 reader = dctx.stream_reader(source)
597 reader = dctx.stream_reader(source)
582 self.assertEqual(reader.read(3), b'foo')
598 self.assertEqual(reader.read(3), b"foo")
583 self.assertEqual(reader.read(3), b'bar')
599 self.assertEqual(reader.read(3), b"bar")
584
600
585 reader = dctx.stream_reader(source.getvalue())
601 reader = dctx.stream_reader(source.getvalue())
586 self.assertEqual(reader.read(4), b'foo')
602 self.assertEqual(reader.read(4), b"foo")
587 self.assertEqual(reader.read(4), b'bar')
603 self.assertEqual(reader.read(4), b"bar")
588
604
589 source.seek(0)
605 source.seek(0)
590 reader = dctx.stream_reader(source)
606 reader = dctx.stream_reader(source)
591 self.assertEqual(reader.read(4), b'foo')
607 self.assertEqual(reader.read(4), b"foo")
592 self.assertEqual(reader.read(4), b'bar')
608 self.assertEqual(reader.read(4), b"bar")
593
609
594 reader = dctx.stream_reader(source.getvalue())
610 reader = dctx.stream_reader(source.getvalue())
595 self.assertEqual(reader.read(128), b'foo')
611 self.assertEqual(reader.read(128), b"foo")
596 self.assertEqual(reader.read(128), b'bar')
612 self.assertEqual(reader.read(128), b"bar")
597
613
598 source.seek(0)
614 source.seek(0)
599 reader = dctx.stream_reader(source)
615 reader = dctx.stream_reader(source)
600 self.assertEqual(reader.read(128), b'foo')
616 self.assertEqual(reader.read(128), b"foo")
601 self.assertEqual(reader.read(128), b'bar')
617 self.assertEqual(reader.read(128), b"bar")
602
618
603 # Now tests for reads spanning frames.
619 # Now tests for reads spanning frames.
604 reader = dctx.stream_reader(source.getvalue(), read_across_frames=True)
620 reader = dctx.stream_reader(source.getvalue(), read_across_frames=True)
605 self.assertEqual(reader.read(3), b'foo')
621 self.assertEqual(reader.read(3), b"foo")
606 self.assertEqual(reader.read(3), b'bar')
622 self.assertEqual(reader.read(3), b"bar")
607
623
608 source.seek(0)
624 source.seek(0)
609 reader = dctx.stream_reader(source, read_across_frames=True)
625 reader = dctx.stream_reader(source, read_across_frames=True)
610 self.assertEqual(reader.read(3), b'foo')
626 self.assertEqual(reader.read(3), b"foo")
611 self.assertEqual(reader.read(3), b'bar')
627 self.assertEqual(reader.read(3), b"bar")
612
628
613 reader = dctx.stream_reader(source.getvalue(), read_across_frames=True)
629 reader = dctx.stream_reader(source.getvalue(), read_across_frames=True)
614 self.assertEqual(reader.read(6), b'foobar')
630 self.assertEqual(reader.read(6), b"foobar")
615
631
616 source.seek(0)
632 source.seek(0)
617 reader = dctx.stream_reader(source, read_across_frames=True)
633 reader = dctx.stream_reader(source, read_across_frames=True)
618 self.assertEqual(reader.read(6), b'foobar')
634 self.assertEqual(reader.read(6), b"foobar")
619
635
620 reader = dctx.stream_reader(source.getvalue(), read_across_frames=True)
636 reader = dctx.stream_reader(source.getvalue(), read_across_frames=True)
621 self.assertEqual(reader.read(7), b'foobar')
637 self.assertEqual(reader.read(7), b"foobar")
622
638
623 source.seek(0)
639 source.seek(0)
624 reader = dctx.stream_reader(source, read_across_frames=True)
640 reader = dctx.stream_reader(source, read_across_frames=True)
625 self.assertEqual(reader.read(7), b'foobar')
641 self.assertEqual(reader.read(7), b"foobar")
626
642
627 reader = dctx.stream_reader(source.getvalue(), read_across_frames=True)
643 reader = dctx.stream_reader(source.getvalue(), read_across_frames=True)
628 self.assertEqual(reader.read(128), b'foobar')
644 self.assertEqual(reader.read(128), b"foobar")
629
645
630 source.seek(0)
646 source.seek(0)
631 reader = dctx.stream_reader(source, read_across_frames=True)
647 reader = dctx.stream_reader(source, read_across_frames=True)
632 self.assertEqual(reader.read(128), b'foobar')
648 self.assertEqual(reader.read(128), b"foobar")
633
649
634 def test_readinto(self):
650 def test_readinto(self):
635 cctx = zstd.ZstdCompressor()
651 cctx = zstd.ZstdCompressor()
636 foo = cctx.compress(b'foo')
652 foo = cctx.compress(b"foo")
637
653
638 dctx = zstd.ZstdDecompressor()
654 dctx = zstd.ZstdDecompressor()
639
655
640 # Attempting to readinto() a non-writable buffer fails.
656 # Attempting to readinto() a non-writable buffer fails.
641 # The exact exception varies based on the backend.
657 # The exact exception varies based on the backend.
642 reader = dctx.stream_reader(foo)
658 reader = dctx.stream_reader(foo)
643 with self.assertRaises(Exception):
659 with self.assertRaises(Exception):
644 reader.readinto(b'foobar')
660 reader.readinto(b"foobar")
645
661
646 # readinto() with sufficiently large destination.
662 # readinto() with sufficiently large destination.
647 b = bytearray(1024)
663 b = bytearray(1024)
648 reader = dctx.stream_reader(foo)
664 reader = dctx.stream_reader(foo)
649 self.assertEqual(reader.readinto(b), 3)
665 self.assertEqual(reader.readinto(b), 3)
650 self.assertEqual(b[0:3], b'foo')
666 self.assertEqual(b[0:3], b"foo")
651 self.assertEqual(reader.readinto(b), 0)
667 self.assertEqual(reader.readinto(b), 0)
652 self.assertEqual(b[0:3], b'foo')
668 self.assertEqual(b[0:3], b"foo")
653
669
654 # readinto() with small reads.
670 # readinto() with small reads.
655 b = bytearray(1024)
671 b = bytearray(1024)
656 reader = dctx.stream_reader(foo, read_size=1)
672 reader = dctx.stream_reader(foo, read_size=1)
657 self.assertEqual(reader.readinto(b), 3)
673 self.assertEqual(reader.readinto(b), 3)
658 self.assertEqual(b[0:3], b'foo')
674 self.assertEqual(b[0:3], b"foo")
659
675
660 # Too small destination buffer.
676 # Too small destination buffer.
661 b = bytearray(2)
677 b = bytearray(2)
662 reader = dctx.stream_reader(foo)
678 reader = dctx.stream_reader(foo)
663 self.assertEqual(reader.readinto(b), 2)
679 self.assertEqual(reader.readinto(b), 2)
664 self.assertEqual(b[:], b'fo')
680 self.assertEqual(b[:], b"fo")
665
681
666 def test_readinto1(self):
682 def test_readinto1(self):
667 cctx = zstd.ZstdCompressor()
683 cctx = zstd.ZstdCompressor()
668 foo = cctx.compress(b'foo')
684 foo = cctx.compress(b"foo")
669
685
670 dctx = zstd.ZstdDecompressor()
686 dctx = zstd.ZstdDecompressor()
671
687
672 reader = dctx.stream_reader(foo)
688 reader = dctx.stream_reader(foo)
673 with self.assertRaises(Exception):
689 with self.assertRaises(Exception):
674 reader.readinto1(b'foobar')
690 reader.readinto1(b"foobar")
675
691
676 # Sufficiently large destination.
692 # Sufficiently large destination.
677 b = bytearray(1024)
693 b = bytearray(1024)
678 reader = dctx.stream_reader(foo)
694 reader = dctx.stream_reader(foo)
679 self.assertEqual(reader.readinto1(b), 3)
695 self.assertEqual(reader.readinto1(b), 3)
680 self.assertEqual(b[0:3], b'foo')
696 self.assertEqual(b[0:3], b"foo")
681 self.assertEqual(reader.readinto1(b), 0)
697 self.assertEqual(reader.readinto1(b), 0)
682 self.assertEqual(b[0:3], b'foo')
698 self.assertEqual(b[0:3], b"foo")
683
699
684 # readinto() with small reads.
700 # readinto() with small reads.
685 b = bytearray(1024)
701 b = bytearray(1024)
686 reader = dctx.stream_reader(foo, read_size=1)
702 reader = dctx.stream_reader(foo, read_size=1)
687 self.assertEqual(reader.readinto1(b), 3)
703 self.assertEqual(reader.readinto1(b), 3)
688 self.assertEqual(b[0:3], b'foo')
704 self.assertEqual(b[0:3], b"foo")
689
705
690 # Too small destination buffer.
706 # Too small destination buffer.
691 b = bytearray(2)
707 b = bytearray(2)
692 reader = dctx.stream_reader(foo)
708 reader = dctx.stream_reader(foo)
693 self.assertEqual(reader.readinto1(b), 2)
709 self.assertEqual(reader.readinto1(b), 2)
694 self.assertEqual(b[:], b'fo')
710 self.assertEqual(b[:], b"fo")
695
711
696 def test_readall(self):
712 def test_readall(self):
697 cctx = zstd.ZstdCompressor()
713 cctx = zstd.ZstdCompressor()
698 foo = cctx.compress(b'foo')
714 foo = cctx.compress(b"foo")
699
715
700 dctx = zstd.ZstdDecompressor()
716 dctx = zstd.ZstdDecompressor()
701 reader = dctx.stream_reader(foo)
717 reader = dctx.stream_reader(foo)
702
718
703 self.assertEqual(reader.readall(), b'foo')
719 self.assertEqual(reader.readall(), b"foo")
704
720
705 def test_read1(self):
721 def test_read1(self):
706 cctx = zstd.ZstdCompressor()
722 cctx = zstd.ZstdCompressor()
707 foo = cctx.compress(b'foo')
723 foo = cctx.compress(b"foo")
708
724
709 dctx = zstd.ZstdDecompressor()
725 dctx = zstd.ZstdDecompressor()
710
726
711 b = OpCountingBytesIO(foo)
727 b = OpCountingBytesIO(foo)
712 reader = dctx.stream_reader(b)
728 reader = dctx.stream_reader(b)
713
729
714 self.assertEqual(reader.read1(), b'foo')
730 self.assertEqual(reader.read1(), b"foo")
715 self.assertEqual(b._read_count, 1)
731 self.assertEqual(b._read_count, 1)
716
732
717 b = OpCountingBytesIO(foo)
733 b = OpCountingBytesIO(foo)
718 reader = dctx.stream_reader(b)
734 reader = dctx.stream_reader(b)
719
735
720 self.assertEqual(reader.read1(0), b'')
736 self.assertEqual(reader.read1(0), b"")
721 self.assertEqual(reader.read1(2), b'fo')
737 self.assertEqual(reader.read1(2), b"fo")
722 self.assertEqual(b._read_count, 1)
738 self.assertEqual(b._read_count, 1)
723 self.assertEqual(reader.read1(1), b'o')
739 self.assertEqual(reader.read1(1), b"o")
724 self.assertEqual(b._read_count, 1)
740 self.assertEqual(b._read_count, 1)
725 self.assertEqual(reader.read1(1), b'')
741 self.assertEqual(reader.read1(1), b"")
726 self.assertEqual(b._read_count, 2)
742 self.assertEqual(b._read_count, 2)
727
743
728 def test_read_lines(self):
744 def test_read_lines(self):
729 cctx = zstd.ZstdCompressor()
745 cctx = zstd.ZstdCompressor()
730 source = b'\n'.join(('line %d' % i).encode('ascii') for i in range(1024))
746 source = b"\n".join(("line %d" % i).encode("ascii") for i in range(1024))
731
747
732 frame = cctx.compress(source)
748 frame = cctx.compress(source)
733
749
734 dctx = zstd.ZstdDecompressor()
750 dctx = zstd.ZstdDecompressor()
735 reader = dctx.stream_reader(frame)
751 reader = dctx.stream_reader(frame)
736 tr = io.TextIOWrapper(reader, encoding='utf-8')
752 tr = io.TextIOWrapper(reader, encoding="utf-8")
737
753
738 lines = []
754 lines = []
739 for line in tr:
755 for line in tr:
740 lines.append(line.encode('utf-8'))
756 lines.append(line.encode("utf-8"))
741
757
742 self.assertEqual(len(lines), 1024)
758 self.assertEqual(len(lines), 1024)
743 self.assertEqual(b''.join(lines), source)
759 self.assertEqual(b"".join(lines), source)
744
760
745 reader = dctx.stream_reader(frame)
761 reader = dctx.stream_reader(frame)
746 tr = io.TextIOWrapper(reader, encoding='utf-8')
762 tr = io.TextIOWrapper(reader, encoding="utf-8")
747
763
748 lines = tr.readlines()
764 lines = tr.readlines()
749 self.assertEqual(len(lines), 1024)
765 self.assertEqual(len(lines), 1024)
750 self.assertEqual(''.join(lines).encode('utf-8'), source)
766 self.assertEqual("".join(lines).encode("utf-8"), source)
751
767
752 reader = dctx.stream_reader(frame)
768 reader = dctx.stream_reader(frame)
753 tr = io.TextIOWrapper(reader, encoding='utf-8')
769 tr = io.TextIOWrapper(reader, encoding="utf-8")
754
770
755 lines = []
771 lines = []
756 while True:
772 while True:
757 line = tr.readline()
773 line = tr.readline()
758 if not line:
774 if not line:
759 break
775 break
760
776
761 lines.append(line.encode('utf-8'))
777 lines.append(line.encode("utf-8"))
762
778
763 self.assertEqual(len(lines), 1024)
779 self.assertEqual(len(lines), 1024)
764 self.assertEqual(b''.join(lines), source)
780 self.assertEqual(b"".join(lines), source)
765
781
766
782
767 @make_cffi
783 @make_cffi
768 class TestDecompressor_decompressobj(unittest.TestCase):
784 class TestDecompressor_decompressobj(TestCase):
769 def test_simple(self):
785 def test_simple(self):
770 data = zstd.ZstdCompressor(level=1).compress(b'foobar')
786 data = zstd.ZstdCompressor(level=1).compress(b"foobar")
771
787
772 dctx = zstd.ZstdDecompressor()
788 dctx = zstd.ZstdDecompressor()
773 dobj = dctx.decompressobj()
789 dobj = dctx.decompressobj()
774 self.assertEqual(dobj.decompress(data), b'foobar')
790 self.assertEqual(dobj.decompress(data), b"foobar")
775 self.assertIsNone(dobj.flush())
791 self.assertIsNone(dobj.flush())
776 self.assertIsNone(dobj.flush(10))
792 self.assertIsNone(dobj.flush(10))
777 self.assertIsNone(dobj.flush(length=100))
793 self.assertIsNone(dobj.flush(length=100))
778
794
779 def test_input_types(self):
795 def test_input_types(self):
780 compressed = zstd.ZstdCompressor(level=1).compress(b'foo')
796 compressed = zstd.ZstdCompressor(level=1).compress(b"foo")
781
797
782 dctx = zstd.ZstdDecompressor()
798 dctx = zstd.ZstdDecompressor()
783
799
784 mutable_array = bytearray(len(compressed))
800 mutable_array = bytearray(len(compressed))
785 mutable_array[:] = compressed
801 mutable_array[:] = compressed
786
802
787 sources = [
803 sources = [
788 memoryview(compressed),
804 memoryview(compressed),
789 bytearray(compressed),
805 bytearray(compressed),
790 mutable_array,
806 mutable_array,
791 ]
807 ]
792
808
793 for source in sources:
809 for source in sources:
794 dobj = dctx.decompressobj()
810 dobj = dctx.decompressobj()
795 self.assertIsNone(dobj.flush())
811 self.assertIsNone(dobj.flush())
796 self.assertIsNone(dobj.flush(10))
812 self.assertIsNone(dobj.flush(10))
797 self.assertIsNone(dobj.flush(length=100))
813 self.assertIsNone(dobj.flush(length=100))
798 self.assertEqual(dobj.decompress(source), b'foo')
814 self.assertEqual(dobj.decompress(source), b"foo")
799 self.assertIsNone(dobj.flush())
815 self.assertIsNone(dobj.flush())
800
816
801 def test_reuse(self):
817 def test_reuse(self):
802 data = zstd.ZstdCompressor(level=1).compress(b'foobar')
818 data = zstd.ZstdCompressor(level=1).compress(b"foobar")
803
819
804 dctx = zstd.ZstdDecompressor()
820 dctx = zstd.ZstdDecompressor()
805 dobj = dctx.decompressobj()
821 dobj = dctx.decompressobj()
806 dobj.decompress(data)
822 dobj.decompress(data)
807
823
808 with self.assertRaisesRegexp(zstd.ZstdError, 'cannot use a decompressobj'):
824 with self.assertRaisesRegex(zstd.ZstdError, "cannot use a decompressobj"):
809 dobj.decompress(data)
825 dobj.decompress(data)
810 self.assertIsNone(dobj.flush())
826 self.assertIsNone(dobj.flush())
811
827
812 def test_bad_write_size(self):
828 def test_bad_write_size(self):
813 dctx = zstd.ZstdDecompressor()
829 dctx = zstd.ZstdDecompressor()
814
830
815 with self.assertRaisesRegexp(ValueError, 'write_size must be positive'):
831 with self.assertRaisesRegex(ValueError, "write_size must be positive"):
816 dctx.decompressobj(write_size=0)
832 dctx.decompressobj(write_size=0)
817
833
818 def test_write_size(self):
834 def test_write_size(self):
819 source = b'foo' * 64 + b'bar' * 128
835 source = b"foo" * 64 + b"bar" * 128
820 data = zstd.ZstdCompressor(level=1).compress(source)
836 data = zstd.ZstdCompressor(level=1).compress(source)
821
837
822 dctx = zstd.ZstdDecompressor()
838 dctx = zstd.ZstdDecompressor()
823
839
824 for i in range(128):
840 for i in range(128):
825 dobj = dctx.decompressobj(write_size=i + 1)
841 dobj = dctx.decompressobj(write_size=i + 1)
826 self.assertEqual(dobj.decompress(data), source)
842 self.assertEqual(dobj.decompress(data), source)
827
843
828
844
829 def decompress_via_writer(data):
845 def decompress_via_writer(data):
830 buffer = io.BytesIO()
846 buffer = io.BytesIO()
831 dctx = zstd.ZstdDecompressor()
847 dctx = zstd.ZstdDecompressor()
832 decompressor = dctx.stream_writer(buffer)
848 decompressor = dctx.stream_writer(buffer)
833 decompressor.write(data)
849 decompressor.write(data)
834
850
835 return buffer.getvalue()
851 return buffer.getvalue()
836
852
837
853
838 @make_cffi
854 @make_cffi
839 class TestDecompressor_stream_writer(unittest.TestCase):
855 class TestDecompressor_stream_writer(TestCase):
840 def test_io_api(self):
856 def test_io_api(self):
841 buffer = io.BytesIO()
857 buffer = io.BytesIO()
842 dctx = zstd.ZstdDecompressor()
858 dctx = zstd.ZstdDecompressor()
843 writer = dctx.stream_writer(buffer)
859 writer = dctx.stream_writer(buffer)
844
860
845 self.assertFalse(writer.closed)
861 self.assertFalse(writer.closed)
846 self.assertFalse(writer.isatty())
862 self.assertFalse(writer.isatty())
847 self.assertFalse(writer.readable())
863 self.assertFalse(writer.readable())
848
864
849 with self.assertRaises(io.UnsupportedOperation):
865 with self.assertRaises(io.UnsupportedOperation):
850 writer.readline()
866 writer.readline()
851
867
852 with self.assertRaises(io.UnsupportedOperation):
868 with self.assertRaises(io.UnsupportedOperation):
853 writer.readline(42)
869 writer.readline(42)
854
870
855 with self.assertRaises(io.UnsupportedOperation):
871 with self.assertRaises(io.UnsupportedOperation):
856 writer.readline(size=42)
872 writer.readline(size=42)
857
873
858 with self.assertRaises(io.UnsupportedOperation):
874 with self.assertRaises(io.UnsupportedOperation):
859 writer.readlines()
875 writer.readlines()
860
876
861 with self.assertRaises(io.UnsupportedOperation):
877 with self.assertRaises(io.UnsupportedOperation):
862 writer.readlines(42)
878 writer.readlines(42)
863
879
864 with self.assertRaises(io.UnsupportedOperation):
880 with self.assertRaises(io.UnsupportedOperation):
865 writer.readlines(hint=42)
881 writer.readlines(hint=42)
866
882
867 with self.assertRaises(io.UnsupportedOperation):
883 with self.assertRaises(io.UnsupportedOperation):
868 writer.seek(0)
884 writer.seek(0)
869
885
870 with self.assertRaises(io.UnsupportedOperation):
886 with self.assertRaises(io.UnsupportedOperation):
871 writer.seek(10, os.SEEK_SET)
887 writer.seek(10, os.SEEK_SET)
872
888
873 self.assertFalse(writer.seekable())
889 self.assertFalse(writer.seekable())
874
890
875 with self.assertRaises(io.UnsupportedOperation):
891 with self.assertRaises(io.UnsupportedOperation):
876 writer.tell()
892 writer.tell()
877
893
878 with self.assertRaises(io.UnsupportedOperation):
894 with self.assertRaises(io.UnsupportedOperation):
879 writer.truncate()
895 writer.truncate()
880
896
881 with self.assertRaises(io.UnsupportedOperation):
897 with self.assertRaises(io.UnsupportedOperation):
882 writer.truncate(42)
898 writer.truncate(42)
883
899
884 with self.assertRaises(io.UnsupportedOperation):
900 with self.assertRaises(io.UnsupportedOperation):
885 writer.truncate(size=42)
901 writer.truncate(size=42)
886
902
887 self.assertTrue(writer.writable())
903 self.assertTrue(writer.writable())
888
904
889 with self.assertRaises(io.UnsupportedOperation):
905 with self.assertRaises(io.UnsupportedOperation):
890 writer.writelines([])
906 writer.writelines([])
891
907
892 with self.assertRaises(io.UnsupportedOperation):
908 with self.assertRaises(io.UnsupportedOperation):
893 writer.read()
909 writer.read()
894
910
895 with self.assertRaises(io.UnsupportedOperation):
911 with self.assertRaises(io.UnsupportedOperation):
896 writer.read(42)
912 writer.read(42)
897
913
898 with self.assertRaises(io.UnsupportedOperation):
914 with self.assertRaises(io.UnsupportedOperation):
899 writer.read(size=42)
915 writer.read(size=42)
900
916
901 with self.assertRaises(io.UnsupportedOperation):
917 with self.assertRaises(io.UnsupportedOperation):
902 writer.readall()
918 writer.readall()
903
919
904 with self.assertRaises(io.UnsupportedOperation):
920 with self.assertRaises(io.UnsupportedOperation):
905 writer.readinto(None)
921 writer.readinto(None)
906
922
907 with self.assertRaises(io.UnsupportedOperation):
923 with self.assertRaises(io.UnsupportedOperation):
908 writer.fileno()
924 writer.fileno()
909
925
910 def test_fileno_file(self):
926 def test_fileno_file(self):
911 with tempfile.TemporaryFile('wb') as tf:
927 with tempfile.TemporaryFile("wb") as tf:
912 dctx = zstd.ZstdDecompressor()
928 dctx = zstd.ZstdDecompressor()
913 writer = dctx.stream_writer(tf)
929 writer = dctx.stream_writer(tf)
914
930
915 self.assertEqual(writer.fileno(), tf.fileno())
931 self.assertEqual(writer.fileno(), tf.fileno())
916
932
917 def test_close(self):
933 def test_close(self):
918 foo = zstd.ZstdCompressor().compress(b'foo')
934 foo = zstd.ZstdCompressor().compress(b"foo")
919
935
920 buffer = NonClosingBytesIO()
936 buffer = NonClosingBytesIO()
921 dctx = zstd.ZstdDecompressor()
937 dctx = zstd.ZstdDecompressor()
922 writer = dctx.stream_writer(buffer)
938 writer = dctx.stream_writer(buffer)
923
939
924 writer.write(foo)
940 writer.write(foo)
925 self.assertFalse(writer.closed)
941 self.assertFalse(writer.closed)
926 self.assertFalse(buffer.closed)
942 self.assertFalse(buffer.closed)
927 writer.close()
943 writer.close()
928 self.assertTrue(writer.closed)
944 self.assertTrue(writer.closed)
929 self.assertTrue(buffer.closed)
945 self.assertTrue(buffer.closed)
930
946
931 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
947 with self.assertRaisesRegex(ValueError, "stream is closed"):
932 writer.write(b'')
948 writer.write(b"")
933
949
934 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
950 with self.assertRaisesRegex(ValueError, "stream is closed"):
935 writer.flush()
951 writer.flush()
936
952
937 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
953 with self.assertRaisesRegex(ValueError, "stream is closed"):
938 with writer:
954 with writer:
939 pass
955 pass
940
956
941 self.assertEqual(buffer.getvalue(), b'foo')
957 self.assertEqual(buffer.getvalue(), b"foo")
942
958
943 # Context manager exit should close stream.
959 # Context manager exit should close stream.
944 buffer = NonClosingBytesIO()
960 buffer = NonClosingBytesIO()
945 writer = dctx.stream_writer(buffer)
961 writer = dctx.stream_writer(buffer)
946
962
947 with writer:
963 with writer:
948 writer.write(foo)
964 writer.write(foo)
949
965
950 self.assertTrue(writer.closed)
966 self.assertTrue(writer.closed)
951 self.assertEqual(buffer.getvalue(), b'foo')
967 self.assertEqual(buffer.getvalue(), b"foo")
952
968
953 def test_flush(self):
969 def test_flush(self):
954 buffer = OpCountingBytesIO()
970 buffer = OpCountingBytesIO()
955 dctx = zstd.ZstdDecompressor()
971 dctx = zstd.ZstdDecompressor()
956 writer = dctx.stream_writer(buffer)
972 writer = dctx.stream_writer(buffer)
957
973
958 writer.flush()
974 writer.flush()
959 self.assertEqual(buffer._flush_count, 1)
975 self.assertEqual(buffer._flush_count, 1)
960 writer.flush()
976 writer.flush()
961 self.assertEqual(buffer._flush_count, 2)
977 self.assertEqual(buffer._flush_count, 2)
962
978
963 def test_empty_roundtrip(self):
979 def test_empty_roundtrip(self):
964 cctx = zstd.ZstdCompressor()
980 cctx = zstd.ZstdCompressor()
965 empty = cctx.compress(b'')
981 empty = cctx.compress(b"")
966 self.assertEqual(decompress_via_writer(empty), b'')
982 self.assertEqual(decompress_via_writer(empty), b"")
967
983
968 def test_input_types(self):
984 def test_input_types(self):
969 cctx = zstd.ZstdCompressor(level=1)
985 cctx = zstd.ZstdCompressor(level=1)
970 compressed = cctx.compress(b'foo')
986 compressed = cctx.compress(b"foo")
971
987
972 mutable_array = bytearray(len(compressed))
988 mutable_array = bytearray(len(compressed))
973 mutable_array[:] = compressed
989 mutable_array[:] = compressed
974
990
975 sources = [
991 sources = [
976 memoryview(compressed),
992 memoryview(compressed),
977 bytearray(compressed),
993 bytearray(compressed),
978 mutable_array,
994 mutable_array,
979 ]
995 ]
980
996
981 dctx = zstd.ZstdDecompressor()
997 dctx = zstd.ZstdDecompressor()
982 for source in sources:
998 for source in sources:
983 buffer = io.BytesIO()
999 buffer = io.BytesIO()
984
1000
985 decompressor = dctx.stream_writer(buffer)
1001 decompressor = dctx.stream_writer(buffer)
986 decompressor.write(source)
1002 decompressor.write(source)
987 self.assertEqual(buffer.getvalue(), b'foo')
1003 self.assertEqual(buffer.getvalue(), b"foo")
988
1004
989 buffer = NonClosingBytesIO()
1005 buffer = NonClosingBytesIO()
990
1006
991 with dctx.stream_writer(buffer) as decompressor:
1007 with dctx.stream_writer(buffer) as decompressor:
992 self.assertEqual(decompressor.write(source), 3)
1008 self.assertEqual(decompressor.write(source), 3)
993
1009
994 self.assertEqual(buffer.getvalue(), b'foo')
1010 self.assertEqual(buffer.getvalue(), b"foo")
995
1011
996 buffer = io.BytesIO()
1012 buffer = io.BytesIO()
997 writer = dctx.stream_writer(buffer, write_return_read=True)
1013 writer = dctx.stream_writer(buffer, write_return_read=True)
998 self.assertEqual(writer.write(source), len(source))
1014 self.assertEqual(writer.write(source), len(source))
999 self.assertEqual(buffer.getvalue(), b'foo')
1015 self.assertEqual(buffer.getvalue(), b"foo")
1000
1016
1001 def test_large_roundtrip(self):
1017 def test_large_roundtrip(self):
1002 chunks = []
1018 chunks = []
1003 for i in range(255):
1019 for i in range(255):
1004 chunks.append(struct.Struct('>B').pack(i) * 16384)
1020 chunks.append(struct.Struct(">B").pack(i) * 16384)
1005 orig = b''.join(chunks)
1021 orig = b"".join(chunks)
1006 cctx = zstd.ZstdCompressor()
1022 cctx = zstd.ZstdCompressor()
1007 compressed = cctx.compress(orig)
1023 compressed = cctx.compress(orig)
1008
1024
1009 self.assertEqual(decompress_via_writer(compressed), orig)
1025 self.assertEqual(decompress_via_writer(compressed), orig)
1010
1026
1011 def test_multiple_calls(self):
1027 def test_multiple_calls(self):
1012 chunks = []
1028 chunks = []
1013 for i in range(255):
1029 for i in range(255):
1014 for j in range(255):
1030 for j in range(255):
1015 chunks.append(struct.Struct('>B').pack(j) * i)
1031 chunks.append(struct.Struct(">B").pack(j) * i)
1016
1032
1017 orig = b''.join(chunks)
1033 orig = b"".join(chunks)
1018 cctx = zstd.ZstdCompressor()
1034 cctx = zstd.ZstdCompressor()
1019 compressed = cctx.compress(orig)
1035 compressed = cctx.compress(orig)
1020
1036
1021 buffer = NonClosingBytesIO()
1037 buffer = NonClosingBytesIO()
1022 dctx = zstd.ZstdDecompressor()
1038 dctx = zstd.ZstdDecompressor()
1023 with dctx.stream_writer(buffer) as decompressor:
1039 with dctx.stream_writer(buffer) as decompressor:
1024 pos = 0
1040 pos = 0
1025 while pos < len(compressed):
1041 while pos < len(compressed):
1026 pos2 = pos + 8192
1042 pos2 = pos + 8192
1027 decompressor.write(compressed[pos:pos2])
1043 decompressor.write(compressed[pos:pos2])
1028 pos += 8192
1044 pos += 8192
1029 self.assertEqual(buffer.getvalue(), orig)
1045 self.assertEqual(buffer.getvalue(), orig)
1030
1046
1031 # Again with write_return_read=True
1047 # Again with write_return_read=True
1032 buffer = io.BytesIO()
1048 buffer = io.BytesIO()
1033 writer = dctx.stream_writer(buffer, write_return_read=True)
1049 writer = dctx.stream_writer(buffer, write_return_read=True)
1034 pos = 0
1050 pos = 0
1035 while pos < len(compressed):
1051 while pos < len(compressed):
1036 pos2 = pos + 8192
1052 pos2 = pos + 8192
1037 chunk = compressed[pos:pos2]
1053 chunk = compressed[pos:pos2]
1038 self.assertEqual(writer.write(chunk), len(chunk))
1054 self.assertEqual(writer.write(chunk), len(chunk))
1039 pos += 8192
1055 pos += 8192
1040 self.assertEqual(buffer.getvalue(), orig)
1056 self.assertEqual(buffer.getvalue(), orig)
1041
1057
1042 def test_dictionary(self):
1058 def test_dictionary(self):
1043 samples = []
1059 samples = []
1044 for i in range(128):
1060 for i in range(128):
1045 samples.append(b'foo' * 64)
1061 samples.append(b"foo" * 64)
1046 samples.append(b'bar' * 64)
1062 samples.append(b"bar" * 64)
1047 samples.append(b'foobar' * 64)
1063 samples.append(b"foobar" * 64)
1048
1064
1049 d = zstd.train_dictionary(8192, samples)
1065 d = zstd.train_dictionary(8192, samples)
1050
1066
1051 orig = b'foobar' * 16384
1067 orig = b"foobar" * 16384
1052 buffer = NonClosingBytesIO()
1068 buffer = NonClosingBytesIO()
1053 cctx = zstd.ZstdCompressor(dict_data=d)
1069 cctx = zstd.ZstdCompressor(dict_data=d)
1054 with cctx.stream_writer(buffer) as compressor:
1070 with cctx.stream_writer(buffer) as compressor:
1055 self.assertEqual(compressor.write(orig), 0)
1071 self.assertEqual(compressor.write(orig), 0)
1056
1072
1057 compressed = buffer.getvalue()
1073 compressed = buffer.getvalue()
1058 buffer = io.BytesIO()
1074 buffer = io.BytesIO()
1059
1075
1060 dctx = zstd.ZstdDecompressor(dict_data=d)
1076 dctx = zstd.ZstdDecompressor(dict_data=d)
1061 decompressor = dctx.stream_writer(buffer)
1077 decompressor = dctx.stream_writer(buffer)
1062 self.assertEqual(decompressor.write(compressed), len(orig))
1078 self.assertEqual(decompressor.write(compressed), len(orig))
1063 self.assertEqual(buffer.getvalue(), orig)
1079 self.assertEqual(buffer.getvalue(), orig)
1064
1080
1065 buffer = NonClosingBytesIO()
1081 buffer = NonClosingBytesIO()
1066
1082
1067 with dctx.stream_writer(buffer) as decompressor:
1083 with dctx.stream_writer(buffer) as decompressor:
1068 self.assertEqual(decompressor.write(compressed), len(orig))
1084 self.assertEqual(decompressor.write(compressed), len(orig))
1069
1085
1070 self.assertEqual(buffer.getvalue(), orig)
1086 self.assertEqual(buffer.getvalue(), orig)
1071
1087
1072 def test_memory_size(self):
1088 def test_memory_size(self):
1073 dctx = zstd.ZstdDecompressor()
1089 dctx = zstd.ZstdDecompressor()
1074 buffer = io.BytesIO()
1090 buffer = io.BytesIO()
1075
1091
1076 decompressor = dctx.stream_writer(buffer)
1092 decompressor = dctx.stream_writer(buffer)
1077 size = decompressor.memory_size()
1093 size = decompressor.memory_size()
1078 self.assertGreater(size, 100000)
1094 self.assertGreater(size, 100000)
1079
1095
1080 with dctx.stream_writer(buffer) as decompressor:
1096 with dctx.stream_writer(buffer) as decompressor:
1081 size = decompressor.memory_size()
1097 size = decompressor.memory_size()
1082
1098
1083 self.assertGreater(size, 100000)
1099 self.assertGreater(size, 100000)
1084
1100
1085 def test_write_size(self):
1101 def test_write_size(self):
1086 source = zstd.ZstdCompressor().compress(b'foobarfoobar')
1102 source = zstd.ZstdCompressor().compress(b"foobarfoobar")
1087 dest = OpCountingBytesIO()
1103 dest = OpCountingBytesIO()
1088 dctx = zstd.ZstdDecompressor()
1104 dctx = zstd.ZstdDecompressor()
1089 with dctx.stream_writer(dest, write_size=1) as decompressor:
1105 with dctx.stream_writer(dest, write_size=1) as decompressor:
1090 s = struct.Struct('>B')
1106 s = struct.Struct(">B")
1091 for c in source:
1107 for c in source:
1092 if not isinstance(c, str):
1108 if not isinstance(c, str):
1093 c = s.pack(c)
1109 c = s.pack(c)
1094 decompressor.write(c)
1110 decompressor.write(c)
1095
1111
1096 self.assertEqual(dest.getvalue(), b'foobarfoobar')
1112 self.assertEqual(dest.getvalue(), b"foobarfoobar")
1097 self.assertEqual(dest._write_count, len(dest.getvalue()))
1113 self.assertEqual(dest._write_count, len(dest.getvalue()))
1098
1114
1099
1115
1100 @make_cffi
1116 @make_cffi
1101 class TestDecompressor_read_to_iter(unittest.TestCase):
1117 class TestDecompressor_read_to_iter(TestCase):
1102 def test_type_validation(self):
1118 def test_type_validation(self):
1103 dctx = zstd.ZstdDecompressor()
1119 dctx = zstd.ZstdDecompressor()
1104
1120
1105 # Object with read() works.
1121 # Object with read() works.
1106 dctx.read_to_iter(io.BytesIO())
1122 dctx.read_to_iter(io.BytesIO())
1107
1123
1108 # Buffer protocol works.
1124 # Buffer protocol works.
1109 dctx.read_to_iter(b'foobar')
1125 dctx.read_to_iter(b"foobar")
1110
1126
1111 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
1127 with self.assertRaisesRegex(ValueError, "must pass an object with a read"):
1112 b''.join(dctx.read_to_iter(True))
1128 b"".join(dctx.read_to_iter(True))
1113
1129
1114 def test_empty_input(self):
1130 def test_empty_input(self):
1115 dctx = zstd.ZstdDecompressor()
1131 dctx = zstd.ZstdDecompressor()
1116
1132
1117 source = io.BytesIO()
1133 source = io.BytesIO()
1118 it = dctx.read_to_iter(source)
1134 it = dctx.read_to_iter(source)
1119 # TODO this is arguably wrong. Should get an error about missing frame foo.
1135 # TODO this is arguably wrong. Should get an error about missing frame foo.
1120 with self.assertRaises(StopIteration):
1136 with self.assertRaises(StopIteration):
1121 next(it)
1137 next(it)
1122
1138
1123 it = dctx.read_to_iter(b'')
1139 it = dctx.read_to_iter(b"")
1124 with self.assertRaises(StopIteration):
1140 with self.assertRaises(StopIteration):
1125 next(it)
1141 next(it)
1126
1142
1127 def test_invalid_input(self):
1143 def test_invalid_input(self):
1128 dctx = zstd.ZstdDecompressor()
1144 dctx = zstd.ZstdDecompressor()
1129
1145
1130 source = io.BytesIO(b'foobar')
1146 source = io.BytesIO(b"foobar")
1131 it = dctx.read_to_iter(source)
1147 it = dctx.read_to_iter(source)
1132 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
1148 with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"):
1133 next(it)
1149 next(it)
1134
1150
1135 it = dctx.read_to_iter(b'foobar')
1151 it = dctx.read_to_iter(b"foobar")
1136 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
1152 with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"):
1137 next(it)
1153 next(it)
1138
1154
1139 def test_empty_roundtrip(self):
1155 def test_empty_roundtrip(self):
1140 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
1156 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
1141 empty = cctx.compress(b'')
1157 empty = cctx.compress(b"")
1142
1158
1143 source = io.BytesIO(empty)
1159 source = io.BytesIO(empty)
1144 source.seek(0)
1160 source.seek(0)
1145
1161
1146 dctx = zstd.ZstdDecompressor()
1162 dctx = zstd.ZstdDecompressor()
1147 it = dctx.read_to_iter(source)
1163 it = dctx.read_to_iter(source)
1148
1164
1149 # No chunks should be emitted since there is no data.
1165 # No chunks should be emitted since there is no data.
1150 with self.assertRaises(StopIteration):
1166 with self.assertRaises(StopIteration):
1151 next(it)
1167 next(it)
1152
1168
1153 # Again for good measure.
1169 # Again for good measure.
1154 with self.assertRaises(StopIteration):
1170 with self.assertRaises(StopIteration):
1155 next(it)
1171 next(it)
1156
1172
1157 def test_skip_bytes_too_large(self):
1173 def test_skip_bytes_too_large(self):
1158 dctx = zstd.ZstdDecompressor()
1174 dctx = zstd.ZstdDecompressor()
1159
1175
1160 with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'):
1176 with self.assertRaisesRegex(
1161 b''.join(dctx.read_to_iter(b'', skip_bytes=1, read_size=1))
1177 ValueError, "skip_bytes must be smaller than read_size"
1178 ):
1179 b"".join(dctx.read_to_iter(b"", skip_bytes=1, read_size=1))
1162
1180
1163 with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'):
1181 with self.assertRaisesRegex(
1164 b''.join(dctx.read_to_iter(b'foobar', skip_bytes=10))
1182 ValueError, "skip_bytes larger than first input chunk"
1183 ):
1184 b"".join(dctx.read_to_iter(b"foobar", skip_bytes=10))
1165
1185
1166 def test_skip_bytes(self):
1186 def test_skip_bytes(self):
1167 cctx = zstd.ZstdCompressor(write_content_size=False)
1187 cctx = zstd.ZstdCompressor(write_content_size=False)
1168 compressed = cctx.compress(b'foobar')
1188 compressed = cctx.compress(b"foobar")
1169
1189
1170 dctx = zstd.ZstdDecompressor()
1190 dctx = zstd.ZstdDecompressor()
1171 output = b''.join(dctx.read_to_iter(b'hdr' + compressed, skip_bytes=3))
1191 output = b"".join(dctx.read_to_iter(b"hdr" + compressed, skip_bytes=3))
1172 self.assertEqual(output, b'foobar')
1192 self.assertEqual(output, b"foobar")
1173
1193
1174 def test_large_output(self):
1194 def test_large_output(self):
1175 source = io.BytesIO()
1195 source = io.BytesIO()
1176 source.write(b'f' * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
1196 source.write(b"f" * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE)
1177 source.write(b'o')
1197 source.write(b"o")
1178 source.seek(0)
1198 source.seek(0)
1179
1199
1180 cctx = zstd.ZstdCompressor(level=1)
1200 cctx = zstd.ZstdCompressor(level=1)
1181 compressed = io.BytesIO(cctx.compress(source.getvalue()))
1201 compressed = io.BytesIO(cctx.compress(source.getvalue()))
1182 compressed.seek(0)
1202 compressed.seek(0)
1183
1203
1184 dctx = zstd.ZstdDecompressor()
1204 dctx = zstd.ZstdDecompressor()
1185 it = dctx.read_to_iter(compressed)
1205 it = dctx.read_to_iter(compressed)
1186
1206
1187 chunks = []
1207 chunks = []
1188 chunks.append(next(it))
1208 chunks.append(next(it))
1189 chunks.append(next(it))
1209 chunks.append(next(it))
1190
1210
1191 with self.assertRaises(StopIteration):
1211 with self.assertRaises(StopIteration):
1192 next(it)
1212 next(it)
1193
1213
1194 decompressed = b''.join(chunks)
1214 decompressed = b"".join(chunks)
1195 self.assertEqual(decompressed, source.getvalue())
1215 self.assertEqual(decompressed, source.getvalue())
1196
1216
1197 # And again with buffer protocol.
1217 # And again with buffer protocol.
1198 it = dctx.read_to_iter(compressed.getvalue())
1218 it = dctx.read_to_iter(compressed.getvalue())
1199 chunks = []
1219 chunks = []
1200 chunks.append(next(it))
1220 chunks.append(next(it))
1201 chunks.append(next(it))
1221 chunks.append(next(it))
1202
1222
1203 with self.assertRaises(StopIteration):
1223 with self.assertRaises(StopIteration):
1204 next(it)
1224 next(it)
1205
1225
1206 decompressed = b''.join(chunks)
1226 decompressed = b"".join(chunks)
1207 self.assertEqual(decompressed, source.getvalue())
1227 self.assertEqual(decompressed, source.getvalue())
1208
1228
1209 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
1229 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
1210 def test_large_input(self):
1230 def test_large_input(self):
1211 bytes = list(struct.Struct('>B').pack(i) for i in range(256))
1231 bytes = list(struct.Struct(">B").pack(i) for i in range(256))
1212 compressed = NonClosingBytesIO()
1232 compressed = NonClosingBytesIO()
1213 input_size = 0
1233 input_size = 0
1214 cctx = zstd.ZstdCompressor(level=1)
1234 cctx = zstd.ZstdCompressor(level=1)
1215 with cctx.stream_writer(compressed) as compressor:
1235 with cctx.stream_writer(compressed) as compressor:
1216 while True:
1236 while True:
1217 compressor.write(random.choice(bytes))
1237 compressor.write(random.choice(bytes))
1218 input_size += 1
1238 input_size += 1
1219
1239
1220 have_compressed = len(compressed.getvalue()) > zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
1240 have_compressed = (
1241 len(compressed.getvalue())
1242 > zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
1243 )
1221 have_raw = input_size > zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE * 2
1244 have_raw = input_size > zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE * 2
1222 if have_compressed and have_raw:
1245 if have_compressed and have_raw:
1223 break
1246 break
1224
1247
1225 compressed = io.BytesIO(compressed.getvalue())
1248 compressed = io.BytesIO(compressed.getvalue())
1226 self.assertGreater(len(compressed.getvalue()),
1249 self.assertGreater(
1227 zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE)
1250 len(compressed.getvalue()), zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
1251 )
1228
1252
1229 dctx = zstd.ZstdDecompressor()
1253 dctx = zstd.ZstdDecompressor()
1230 it = dctx.read_to_iter(compressed)
1254 it = dctx.read_to_iter(compressed)
1231
1255
1232 chunks = []
1256 chunks = []
1233 chunks.append(next(it))
1257 chunks.append(next(it))
1234 chunks.append(next(it))
1258 chunks.append(next(it))
1235 chunks.append(next(it))
1259 chunks.append(next(it))
1236
1260
1237 with self.assertRaises(StopIteration):
1261 with self.assertRaises(StopIteration):
1238 next(it)
1262 next(it)
1239
1263
1240 decompressed = b''.join(chunks)
1264 decompressed = b"".join(chunks)
1241 self.assertEqual(len(decompressed), input_size)
1265 self.assertEqual(len(decompressed), input_size)
1242
1266
1243 # And again with buffer protocol.
1267 # And again with buffer protocol.
1244 it = dctx.read_to_iter(compressed.getvalue())
1268 it = dctx.read_to_iter(compressed.getvalue())
1245
1269
1246 chunks = []
1270 chunks = []
1247 chunks.append(next(it))
1271 chunks.append(next(it))
1248 chunks.append(next(it))
1272 chunks.append(next(it))
1249 chunks.append(next(it))
1273 chunks.append(next(it))
1250
1274
1251 with self.assertRaises(StopIteration):
1275 with self.assertRaises(StopIteration):
1252 next(it)
1276 next(it)
1253
1277
1254 decompressed = b''.join(chunks)
1278 decompressed = b"".join(chunks)
1255 self.assertEqual(len(decompressed), input_size)
1279 self.assertEqual(len(decompressed), input_size)
1256
1280
1257 def test_interesting(self):
1281 def test_interesting(self):
1258 # Found this edge case via fuzzing.
1282 # Found this edge case via fuzzing.
1259 cctx = zstd.ZstdCompressor(level=1)
1283 cctx = zstd.ZstdCompressor(level=1)
1260
1284
1261 source = io.BytesIO()
1285 source = io.BytesIO()
1262
1286
1263 compressed = NonClosingBytesIO()
1287 compressed = NonClosingBytesIO()
1264 with cctx.stream_writer(compressed) as compressor:
1288 with cctx.stream_writer(compressed) as compressor:
1265 for i in range(256):
1289 for i in range(256):
1266 chunk = b'\0' * 1024
1290 chunk = b"\0" * 1024
1267 compressor.write(chunk)
1291 compressor.write(chunk)
1268 source.write(chunk)
1292 source.write(chunk)
1269
1293
1270 dctx = zstd.ZstdDecompressor()
1294 dctx = zstd.ZstdDecompressor()
1271
1295
1272 simple = dctx.decompress(compressed.getvalue(),
1296 simple = dctx.decompress(
1273 max_output_size=len(source.getvalue()))
1297 compressed.getvalue(), max_output_size=len(source.getvalue())
1298 )
1274 self.assertEqual(simple, source.getvalue())
1299 self.assertEqual(simple, source.getvalue())
1275
1300
1276 compressed = io.BytesIO(compressed.getvalue())
1301 compressed = io.BytesIO(compressed.getvalue())
1277 streamed = b''.join(dctx.read_to_iter(compressed))
1302 streamed = b"".join(dctx.read_to_iter(compressed))
1278 self.assertEqual(streamed, source.getvalue())
1303 self.assertEqual(streamed, source.getvalue())
1279
1304
1280 def test_read_write_size(self):
1305 def test_read_write_size(self):
1281 source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b'foobarfoobar'))
1306 source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b"foobarfoobar"))
1282 dctx = zstd.ZstdDecompressor()
1307 dctx = zstd.ZstdDecompressor()
1283 for chunk in dctx.read_to_iter(source, read_size=1, write_size=1):
1308 for chunk in dctx.read_to_iter(source, read_size=1, write_size=1):
1284 self.assertEqual(len(chunk), 1)
1309 self.assertEqual(len(chunk), 1)
1285
1310
1286 self.assertEqual(source._read_count, len(source.getvalue()))
1311 self.assertEqual(source._read_count, len(source.getvalue()))
1287
1312
1288 def test_magic_less(self):
1313 def test_magic_less(self):
1289 params = zstd.CompressionParameters.from_level(
1314 params = zstd.CompressionParameters.from_level(
1290 1, format=zstd.FORMAT_ZSTD1_MAGICLESS)
1315 1, format=zstd.FORMAT_ZSTD1_MAGICLESS
1316 )
1291 cctx = zstd.ZstdCompressor(compression_params=params)
1317 cctx = zstd.ZstdCompressor(compression_params=params)
1292 frame = cctx.compress(b'foobar')
1318 frame = cctx.compress(b"foobar")
1293
1319
1294 self.assertNotEqual(frame[0:4], b'\x28\xb5\x2f\xfd')
1320 self.assertNotEqual(frame[0:4], b"\x28\xb5\x2f\xfd")
1295
1321
1296 dctx = zstd.ZstdDecompressor()
1322 dctx = zstd.ZstdDecompressor()
1297 with self.assertRaisesRegexp(
1323 with self.assertRaisesRegex(
1298 zstd.ZstdError, 'error determining content size from frame header'):
1324 zstd.ZstdError, "error determining content size from frame header"
1325 ):
1299 dctx.decompress(frame)
1326 dctx.decompress(frame)
1300
1327
1301 dctx = zstd.ZstdDecompressor(format=zstd.FORMAT_ZSTD1_MAGICLESS)
1328 dctx = zstd.ZstdDecompressor(format=zstd.FORMAT_ZSTD1_MAGICLESS)
1302 res = b''.join(dctx.read_to_iter(frame))
1329 res = b"".join(dctx.read_to_iter(frame))
1303 self.assertEqual(res, b'foobar')
1330 self.assertEqual(res, b"foobar")
1304
1331
1305
1332
1306 @make_cffi
1333 @make_cffi
1307 class TestDecompressor_content_dict_chain(unittest.TestCase):
1334 class TestDecompressor_content_dict_chain(TestCase):
1308 def test_bad_inputs_simple(self):
1335 def test_bad_inputs_simple(self):
1309 dctx = zstd.ZstdDecompressor()
1336 dctx = zstd.ZstdDecompressor()
1310
1337
1311 with self.assertRaises(TypeError):
1338 with self.assertRaises(TypeError):
1312 dctx.decompress_content_dict_chain(b'foo')
1339 dctx.decompress_content_dict_chain(b"foo")
1313
1340
1314 with self.assertRaises(TypeError):
1341 with self.assertRaises(TypeError):
1315 dctx.decompress_content_dict_chain((b'foo', b'bar'))
1342 dctx.decompress_content_dict_chain((b"foo", b"bar"))
1316
1343
1317 with self.assertRaisesRegexp(ValueError, 'empty input chain'):
1344 with self.assertRaisesRegex(ValueError, "empty input chain"):
1318 dctx.decompress_content_dict_chain([])
1345 dctx.decompress_content_dict_chain([])
1319
1346
1320 with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
1347 with self.assertRaisesRegex(ValueError, "chunk 0 must be bytes"):
1321 dctx.decompress_content_dict_chain([u'foo'])
1348 dctx.decompress_content_dict_chain([u"foo"])
1322
1349
1323 with self.assertRaisesRegexp(ValueError, 'chunk 0 must be bytes'):
1350 with self.assertRaisesRegex(ValueError, "chunk 0 must be bytes"):
1324 dctx.decompress_content_dict_chain([True])
1351 dctx.decompress_content_dict_chain([True])
1325
1352
1326 with self.assertRaisesRegexp(ValueError, 'chunk 0 is too small to contain a zstd frame'):
1353 with self.assertRaisesRegex(
1354 ValueError, "chunk 0 is too small to contain a zstd frame"
1355 ):
1327 dctx.decompress_content_dict_chain([zstd.FRAME_HEADER])
1356 dctx.decompress_content_dict_chain([zstd.FRAME_HEADER])
1328
1357
1329 with self.assertRaisesRegexp(ValueError, 'chunk 0 is not a valid zstd frame'):
1358 with self.assertRaisesRegex(ValueError, "chunk 0 is not a valid zstd frame"):
1330 dctx.decompress_content_dict_chain([b'foo' * 8])
1359 dctx.decompress_content_dict_chain([b"foo" * 8])
1331
1360
1332 no_size = zstd.ZstdCompressor(write_content_size=False).compress(b'foo' * 64)
1361 no_size = zstd.ZstdCompressor(write_content_size=False).compress(b"foo" * 64)
1333
1362
1334 with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'):
1363 with self.assertRaisesRegex(
1364 ValueError, "chunk 0 missing content size in frame"
1365 ):
1335 dctx.decompress_content_dict_chain([no_size])
1366 dctx.decompress_content_dict_chain([no_size])
1336
1367
1337 # Corrupt first frame.
1368 # Corrupt first frame.
1338 frame = zstd.ZstdCompressor().compress(b'foo' * 64)
1369 frame = zstd.ZstdCompressor().compress(b"foo" * 64)
1339 frame = frame[0:12] + frame[15:]
1370 frame = frame[0:12] + frame[15:]
1340 with self.assertRaisesRegexp(zstd.ZstdError,
1371 with self.assertRaisesRegex(
1341 'chunk 0 did not decompress full frame'):
1372 zstd.ZstdError, "chunk 0 did not decompress full frame"
1373 ):
1342 dctx.decompress_content_dict_chain([frame])
1374 dctx.decompress_content_dict_chain([frame])
1343
1375
1344 def test_bad_subsequent_input(self):
1376 def test_bad_subsequent_input(self):
1345 initial = zstd.ZstdCompressor().compress(b'foo' * 64)
1377 initial = zstd.ZstdCompressor().compress(b"foo" * 64)
1346
1378
1347 dctx = zstd.ZstdDecompressor()
1379 dctx = zstd.ZstdDecompressor()
1348
1380
1349 with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
1381 with self.assertRaisesRegex(ValueError, "chunk 1 must be bytes"):
1350 dctx.decompress_content_dict_chain([initial, u'foo'])
1382 dctx.decompress_content_dict_chain([initial, u"foo"])
1351
1383
1352 with self.assertRaisesRegexp(ValueError, 'chunk 1 must be bytes'):
1384 with self.assertRaisesRegex(ValueError, "chunk 1 must be bytes"):
1353 dctx.decompress_content_dict_chain([initial, None])
1385 dctx.decompress_content_dict_chain([initial, None])
1354
1386
1355 with self.assertRaisesRegexp(ValueError, 'chunk 1 is too small to contain a zstd frame'):
1387 with self.assertRaisesRegex(
1388 ValueError, "chunk 1 is too small to contain a zstd frame"
1389 ):
1356 dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER])
1390 dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER])
1357
1391
1358 with self.assertRaisesRegexp(ValueError, 'chunk 1 is not a valid zstd frame'):
1392 with self.assertRaisesRegex(ValueError, "chunk 1 is not a valid zstd frame"):
1359 dctx.decompress_content_dict_chain([initial, b'foo' * 8])
1393 dctx.decompress_content_dict_chain([initial, b"foo" * 8])
1360
1394
1361 no_size = zstd.ZstdCompressor(write_content_size=False).compress(b'foo' * 64)
1395 no_size = zstd.ZstdCompressor(write_content_size=False).compress(b"foo" * 64)
1362
1396
1363 with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'):
1397 with self.assertRaisesRegex(
1398 ValueError, "chunk 1 missing content size in frame"
1399 ):
1364 dctx.decompress_content_dict_chain([initial, no_size])
1400 dctx.decompress_content_dict_chain([initial, no_size])
1365
1401
1366 # Corrupt second frame.
1402 # Corrupt second frame.
1367 cctx = zstd.ZstdCompressor(dict_data=zstd.ZstdCompressionDict(b'foo' * 64))
1403 cctx = zstd.ZstdCompressor(dict_data=zstd.ZstdCompressionDict(b"foo" * 64))
1368 frame = cctx.compress(b'bar' * 64)
1404 frame = cctx.compress(b"bar" * 64)
1369 frame = frame[0:12] + frame[15:]
1405 frame = frame[0:12] + frame[15:]
1370
1406
1371 with self.assertRaisesRegexp(zstd.ZstdError, 'chunk 1 did not decompress full frame'):
1407 with self.assertRaisesRegex(
1408 zstd.ZstdError, "chunk 1 did not decompress full frame"
1409 ):
1372 dctx.decompress_content_dict_chain([initial, frame])
1410 dctx.decompress_content_dict_chain([initial, frame])
1373
1411
1374 def test_simple(self):
1412 def test_simple(self):
1375 original = [
1413 original = [
1376 b'foo' * 64,
1414 b"foo" * 64,
1377 b'foobar' * 64,
1415 b"foobar" * 64,
1378 b'baz' * 64,
1416 b"baz" * 64,
1379 b'foobaz' * 64,
1417 b"foobaz" * 64,
1380 b'foobarbaz' * 64,
1418 b"foobarbaz" * 64,
1381 ]
1419 ]
1382
1420
1383 chunks = []
1421 chunks = []
1384 chunks.append(zstd.ZstdCompressor().compress(original[0]))
1422 chunks.append(zstd.ZstdCompressor().compress(original[0]))
1385 for i, chunk in enumerate(original[1:]):
1423 for i, chunk in enumerate(original[1:]):
1386 d = zstd.ZstdCompressionDict(original[i])
1424 d = zstd.ZstdCompressionDict(original[i])
1387 cctx = zstd.ZstdCompressor(dict_data=d)
1425 cctx = zstd.ZstdCompressor(dict_data=d)
1388 chunks.append(cctx.compress(chunk))
1426 chunks.append(cctx.compress(chunk))
1389
1427
1390 for i in range(1, len(original)):
1428 for i in range(1, len(original)):
1391 chain = chunks[0:i]
1429 chain = chunks[0:i]
1392 expected = original[i - 1]
1430 expected = original[i - 1]
1393 dctx = zstd.ZstdDecompressor()
1431 dctx = zstd.ZstdDecompressor()
1394 decompressed = dctx.decompress_content_dict_chain(chain)
1432 decompressed = dctx.decompress_content_dict_chain(chain)
1395 self.assertEqual(decompressed, expected)
1433 self.assertEqual(decompressed, expected)
1396
1434
1397
1435
1398 # TODO enable for CFFI
1436 # TODO enable for CFFI
1399 class TestDecompressor_multi_decompress_to_buffer(unittest.TestCase):
1437 class TestDecompressor_multi_decompress_to_buffer(TestCase):
1400 def test_invalid_inputs(self):
1438 def test_invalid_inputs(self):
1401 dctx = zstd.ZstdDecompressor()
1439 dctx = zstd.ZstdDecompressor()
1402
1440
1403 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1441 if not hasattr(dctx, "multi_decompress_to_buffer"):
1404 self.skipTest('multi_decompress_to_buffer not available')
1442 self.skipTest("multi_decompress_to_buffer not available")
1405
1443
1406 with self.assertRaises(TypeError):
1444 with self.assertRaises(TypeError):
1407 dctx.multi_decompress_to_buffer(True)
1445 dctx.multi_decompress_to_buffer(True)
1408
1446
1409 with self.assertRaises(TypeError):
1447 with self.assertRaises(TypeError):
1410 dctx.multi_decompress_to_buffer((1, 2))
1448 dctx.multi_decompress_to_buffer((1, 2))
1411
1449
1412 with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'):
1450 with self.assertRaisesRegex(TypeError, "item 0 not a bytes like object"):
1413 dctx.multi_decompress_to_buffer([u'foo'])
1451 dctx.multi_decompress_to_buffer([u"foo"])
1414
1452
1415 with self.assertRaisesRegexp(ValueError, 'could not determine decompressed size of item 0'):
1453 with self.assertRaisesRegex(
1416 dctx.multi_decompress_to_buffer([b'foobarbaz'])
1454 ValueError, "could not determine decompressed size of item 0"
1455 ):
1456 dctx.multi_decompress_to_buffer([b"foobarbaz"])
1417
1457
1418 def test_list_input(self):
1458 def test_list_input(self):
1419 cctx = zstd.ZstdCompressor()
1459 cctx = zstd.ZstdCompressor()
1420
1460
1421 original = [b'foo' * 4, b'bar' * 6]
1461 original = [b"foo" * 4, b"bar" * 6]
1422 frames = [cctx.compress(d) for d in original]
1462 frames = [cctx.compress(d) for d in original]
1423
1463
1424 dctx = zstd.ZstdDecompressor()
1464 dctx = zstd.ZstdDecompressor()
1425
1465
1426 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1466 if not hasattr(dctx, "multi_decompress_to_buffer"):
1427 self.skipTest('multi_decompress_to_buffer not available')
1467 self.skipTest("multi_decompress_to_buffer not available")
1428
1468
1429 result = dctx.multi_decompress_to_buffer(frames)
1469 result = dctx.multi_decompress_to_buffer(frames)
1430
1470
1431 self.assertEqual(len(result), len(frames))
1471 self.assertEqual(len(result), len(frames))
1432 self.assertEqual(result.size(), sum(map(len, original)))
1472 self.assertEqual(result.size(), sum(map(len, original)))
1433
1473
1434 for i, data in enumerate(original):
1474 for i, data in enumerate(original):
1435 self.assertEqual(result[i].tobytes(), data)
1475 self.assertEqual(result[i].tobytes(), data)
1436
1476
1437 self.assertEqual(result[0].offset, 0)
1477 self.assertEqual(result[0].offset, 0)
1438 self.assertEqual(len(result[0]), 12)
1478 self.assertEqual(len(result[0]), 12)
1439 self.assertEqual(result[1].offset, 12)
1479 self.assertEqual(result[1].offset, 12)
1440 self.assertEqual(len(result[1]), 18)
1480 self.assertEqual(len(result[1]), 18)
1441
1481
1442 def test_list_input_frame_sizes(self):
1482 def test_list_input_frame_sizes(self):
1443 cctx = zstd.ZstdCompressor()
1483 cctx = zstd.ZstdCompressor()
1444
1484
1445 original = [b'foo' * 4, b'bar' * 6, b'baz' * 8]
1485 original = [b"foo" * 4, b"bar" * 6, b"baz" * 8]
1446 frames = [cctx.compress(d) for d in original]
1486 frames = [cctx.compress(d) for d in original]
1447 sizes = struct.pack('=' + 'Q' * len(original), *map(len, original))
1487 sizes = struct.pack("=" + "Q" * len(original), *map(len, original))
1448
1488
1449 dctx = zstd.ZstdDecompressor()
1489 dctx = zstd.ZstdDecompressor()
1450
1490
1451 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1491 if not hasattr(dctx, "multi_decompress_to_buffer"):
1452 self.skipTest('multi_decompress_to_buffer not available')
1492 self.skipTest("multi_decompress_to_buffer not available")
1453
1493
1454 result = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes)
1494 result = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes)
1455
1495
1456 self.assertEqual(len(result), len(frames))
1496 self.assertEqual(len(result), len(frames))
1457 self.assertEqual(result.size(), sum(map(len, original)))
1497 self.assertEqual(result.size(), sum(map(len, original)))
1458
1498
1459 for i, data in enumerate(original):
1499 for i, data in enumerate(original):
1460 self.assertEqual(result[i].tobytes(), data)
1500 self.assertEqual(result[i].tobytes(), data)
1461
1501
1462 def test_buffer_with_segments_input(self):
1502 def test_buffer_with_segments_input(self):
1463 cctx = zstd.ZstdCompressor()
1503 cctx = zstd.ZstdCompressor()
1464
1504
1465 original = [b'foo' * 4, b'bar' * 6]
1505 original = [b"foo" * 4, b"bar" * 6]
1466 frames = [cctx.compress(d) for d in original]
1506 frames = [cctx.compress(d) for d in original]
1467
1507
1468 dctx = zstd.ZstdDecompressor()
1508 dctx = zstd.ZstdDecompressor()
1469
1509
1470 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1510 if not hasattr(dctx, "multi_decompress_to_buffer"):
1471 self.skipTest('multi_decompress_to_buffer not available')
1511 self.skipTest("multi_decompress_to_buffer not available")
1472
1512
1473 segments = struct.pack('=QQQQ', 0, len(frames[0]), len(frames[0]), len(frames[1]))
1513 segments = struct.pack(
1474 b = zstd.BufferWithSegments(b''.join(frames), segments)
1514 "=QQQQ", 0, len(frames[0]), len(frames[0]), len(frames[1])
1515 )
1516 b = zstd.BufferWithSegments(b"".join(frames), segments)
1475
1517
1476 result = dctx.multi_decompress_to_buffer(b)
1518 result = dctx.multi_decompress_to_buffer(b)
1477
1519
1478 self.assertEqual(len(result), len(frames))
1520 self.assertEqual(len(result), len(frames))
1479 self.assertEqual(result[0].offset, 0)
1521 self.assertEqual(result[0].offset, 0)
1480 self.assertEqual(len(result[0]), 12)
1522 self.assertEqual(len(result[0]), 12)
1481 self.assertEqual(result[1].offset, 12)
1523 self.assertEqual(result[1].offset, 12)
1482 self.assertEqual(len(result[1]), 18)
1524 self.assertEqual(len(result[1]), 18)
1483
1525
1484 def test_buffer_with_segments_sizes(self):
1526 def test_buffer_with_segments_sizes(self):
1485 cctx = zstd.ZstdCompressor(write_content_size=False)
1527 cctx = zstd.ZstdCompressor(write_content_size=False)
1486 original = [b'foo' * 4, b'bar' * 6, b'baz' * 8]
1528 original = [b"foo" * 4, b"bar" * 6, b"baz" * 8]
1487 frames = [cctx.compress(d) for d in original]
1529 frames = [cctx.compress(d) for d in original]
1488 sizes = struct.pack('=' + 'Q' * len(original), *map(len, original))
1530 sizes = struct.pack("=" + "Q" * len(original), *map(len, original))
1489
1531
1490 dctx = zstd.ZstdDecompressor()
1532 dctx = zstd.ZstdDecompressor()
1491
1533
1492 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1534 if not hasattr(dctx, "multi_decompress_to_buffer"):
1493 self.skipTest('multi_decompress_to_buffer not available')
1535 self.skipTest("multi_decompress_to_buffer not available")
1494
1536
1495 segments = struct.pack('=QQQQQQ', 0, len(frames[0]),
1537 segments = struct.pack(
1496 len(frames[0]), len(frames[1]),
1538 "=QQQQQQ",
1497 len(frames[0]) + len(frames[1]), len(frames[2]))
1539 0,
1498 b = zstd.BufferWithSegments(b''.join(frames), segments)
1540 len(frames[0]),
1541 len(frames[0]),
1542 len(frames[1]),
1543 len(frames[0]) + len(frames[1]),
1544 len(frames[2]),
1545 )
1546 b = zstd.BufferWithSegments(b"".join(frames), segments)
1499
1547
1500 result = dctx.multi_decompress_to_buffer(b, decompressed_sizes=sizes)
1548 result = dctx.multi_decompress_to_buffer(b, decompressed_sizes=sizes)
1501
1549
1502 self.assertEqual(len(result), len(frames))
1550 self.assertEqual(len(result), len(frames))
1503 self.assertEqual(result.size(), sum(map(len, original)))
1551 self.assertEqual(result.size(), sum(map(len, original)))
1504
1552
1505 for i, data in enumerate(original):
1553 for i, data in enumerate(original):
1506 self.assertEqual(result[i].tobytes(), data)
1554 self.assertEqual(result[i].tobytes(), data)
1507
1555
1508 def test_buffer_with_segments_collection_input(self):
1556 def test_buffer_with_segments_collection_input(self):
1509 cctx = zstd.ZstdCompressor()
1557 cctx = zstd.ZstdCompressor()
1510
1558
1511 original = [
1559 original = [
1512 b'foo0' * 2,
1560 b"foo0" * 2,
1513 b'foo1' * 3,
1561 b"foo1" * 3,
1514 b'foo2' * 4,
1562 b"foo2" * 4,
1515 b'foo3' * 5,
1563 b"foo3" * 5,
1516 b'foo4' * 6,
1564 b"foo4" * 6,
1517 ]
1565 ]
1518
1566
1519 if not hasattr(cctx, 'multi_compress_to_buffer'):
1567 if not hasattr(cctx, "multi_compress_to_buffer"):
1520 self.skipTest('multi_compress_to_buffer not available')
1568 self.skipTest("multi_compress_to_buffer not available")
1521
1569
1522 frames = cctx.multi_compress_to_buffer(original)
1570 frames = cctx.multi_compress_to_buffer(original)
1523
1571
1524 # Check round trip.
1572 # Check round trip.
1525 dctx = zstd.ZstdDecompressor()
1573 dctx = zstd.ZstdDecompressor()
1526
1574
1527 decompressed = dctx.multi_decompress_to_buffer(frames, threads=3)
1575 decompressed = dctx.multi_decompress_to_buffer(frames, threads=3)
1528
1576
1529 self.assertEqual(len(decompressed), len(original))
1577 self.assertEqual(len(decompressed), len(original))
1530
1578
1531 for i, data in enumerate(original):
1579 for i, data in enumerate(original):
1532 self.assertEqual(data, decompressed[i].tobytes())
1580 self.assertEqual(data, decompressed[i].tobytes())
1533
1581
1534 # And a manual mode.
1582 # And a manual mode.
1535 b = b''.join([frames[0].tobytes(), frames[1].tobytes()])
1583 b = b"".join([frames[0].tobytes(), frames[1].tobytes()])
1536 b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ',
1584 b1 = zstd.BufferWithSegments(
1537 0, len(frames[0]),
1585 b, struct.pack("=QQQQ", 0, len(frames[0]), len(frames[0]), len(frames[1]))
1538 len(frames[0]), len(frames[1])))
1586 )
1539
1587
1540 b = b''.join([frames[2].tobytes(), frames[3].tobytes(), frames[4].tobytes()])
1588 b = b"".join([frames[2].tobytes(), frames[3].tobytes(), frames[4].tobytes()])
1541 b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ',
1589 b2 = zstd.BufferWithSegments(
1542 0, len(frames[2]),
1590 b,
1543 len(frames[2]), len(frames[3]),
1591 struct.pack(
1544 len(frames[2]) + len(frames[3]), len(frames[4])))
1592 "=QQQQQQ",
1593 0,
1594 len(frames[2]),
1595 len(frames[2]),
1596 len(frames[3]),
1597 len(frames[2]) + len(frames[3]),
1598 len(frames[4]),
1599 ),
1600 )
1545
1601
1546 c = zstd.BufferWithSegmentsCollection(b1, b2)
1602 c = zstd.BufferWithSegmentsCollection(b1, b2)
1547
1603
1548 dctx = zstd.ZstdDecompressor()
1604 dctx = zstd.ZstdDecompressor()
1549 decompressed = dctx.multi_decompress_to_buffer(c)
1605 decompressed = dctx.multi_decompress_to_buffer(c)
1550
1606
1551 self.assertEqual(len(decompressed), 5)
1607 self.assertEqual(len(decompressed), 5)
1552 for i in range(5):
1608 for i in range(5):
1553 self.assertEqual(decompressed[i].tobytes(), original[i])
1609 self.assertEqual(decompressed[i].tobytes(), original[i])
1554
1610
1555 def test_dict(self):
1611 def test_dict(self):
1556 d = zstd.train_dictionary(16384, generate_samples(), k=64, d=16)
1612 d = zstd.train_dictionary(16384, generate_samples(), k=64, d=16)
1557
1613
1558 cctx = zstd.ZstdCompressor(dict_data=d, level=1)
1614 cctx = zstd.ZstdCompressor(dict_data=d, level=1)
1559 frames = [cctx.compress(s) for s in generate_samples()]
1615 frames = [cctx.compress(s) for s in generate_samples()]
1560
1616
1561 dctx = zstd.ZstdDecompressor(dict_data=d)
1617 dctx = zstd.ZstdDecompressor(dict_data=d)
1562
1618
1563 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1619 if not hasattr(dctx, "multi_decompress_to_buffer"):
1564 self.skipTest('multi_decompress_to_buffer not available')
1620 self.skipTest("multi_decompress_to_buffer not available")
1565
1621
1566 result = dctx.multi_decompress_to_buffer(frames)
1622 result = dctx.multi_decompress_to_buffer(frames)
1567
1623
1568 self.assertEqual([o.tobytes() for o in result], generate_samples())
1624 self.assertEqual([o.tobytes() for o in result], generate_samples())
1569
1625
1570 def test_multiple_threads(self):
1626 def test_multiple_threads(self):
1571 cctx = zstd.ZstdCompressor()
1627 cctx = zstd.ZstdCompressor()
1572
1628
1573 frames = []
1629 frames = []
1574 frames.extend(cctx.compress(b'x' * 64) for i in range(256))
1630 frames.extend(cctx.compress(b"x" * 64) for i in range(256))
1575 frames.extend(cctx.compress(b'y' * 64) for i in range(256))
1631 frames.extend(cctx.compress(b"y" * 64) for i in range(256))
1576
1632
1577 dctx = zstd.ZstdDecompressor()
1633 dctx = zstd.ZstdDecompressor()
1578
1634
1579 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1635 if not hasattr(dctx, "multi_decompress_to_buffer"):
1580 self.skipTest('multi_decompress_to_buffer not available')
1636 self.skipTest("multi_decompress_to_buffer not available")
1581
1637
1582 result = dctx.multi_decompress_to_buffer(frames, threads=-1)
1638 result = dctx.multi_decompress_to_buffer(frames, threads=-1)
1583
1639
1584 self.assertEqual(len(result), len(frames))
1640 self.assertEqual(len(result), len(frames))
1585 self.assertEqual(result.size(), 2 * 64 * 256)
1641 self.assertEqual(result.size(), 2 * 64 * 256)
1586 self.assertEqual(result[0].tobytes(), b'x' * 64)
1642 self.assertEqual(result[0].tobytes(), b"x" * 64)
1587 self.assertEqual(result[256].tobytes(), b'y' * 64)
1643 self.assertEqual(result[256].tobytes(), b"y" * 64)
1588
1644
1589 def test_item_failure(self):
1645 def test_item_failure(self):
1590 cctx = zstd.ZstdCompressor()
1646 cctx = zstd.ZstdCompressor()
1591 frames = [cctx.compress(b'x' * 128), cctx.compress(b'y' * 128)]
1647 frames = [cctx.compress(b"x" * 128), cctx.compress(b"y" * 128)]
1592
1648
1593 frames[1] = frames[1][0:15] + b'extra' + frames[1][15:]
1649 frames[1] = frames[1][0:15] + b"extra" + frames[1][15:]
1594
1650
1595 dctx = zstd.ZstdDecompressor()
1651 dctx = zstd.ZstdDecompressor()
1596
1652
1597 if not hasattr(dctx, 'multi_decompress_to_buffer'):
1653 if not hasattr(dctx, "multi_decompress_to_buffer"):
1598 self.skipTest('multi_decompress_to_buffer not available')
1654 self.skipTest("multi_decompress_to_buffer not available")
1599
1655
1600 with self.assertRaisesRegexp(zstd.ZstdError,
1656 with self.assertRaisesRegex(
1601 'error decompressing item 1: ('
1657 zstd.ZstdError,
1602 'Corrupted block|'
1658 "error decompressing item 1: ("
1603 'Destination buffer is too small)'):
1659 "Corrupted block|"
1660 "Destination buffer is too small)",
1661 ):
1604 dctx.multi_decompress_to_buffer(frames)
1662 dctx.multi_decompress_to_buffer(frames)
1605
1663
1606 with self.assertRaisesRegexp(zstd.ZstdError,
1664 with self.assertRaisesRegex(
1607 'error decompressing item 1: ('
1665 zstd.ZstdError,
1608 'Corrupted block|'
1666 "error decompressing item 1: ("
1609 'Destination buffer is too small)'):
1667 "Corrupted block|"
1668 "Destination buffer is too small)",
1669 ):
1610 dctx.multi_decompress_to_buffer(frames, threads=2)
1670 dctx.multi_decompress_to_buffer(frames, threads=2)
1611
@@ -1,485 +1,576 b''
1 import io
1 import io
2 import os
2 import os
3 import unittest
3 import unittest
4
4
5 try:
5 try:
6 import hypothesis
6 import hypothesis
7 import hypothesis.strategies as strategies
7 import hypothesis.strategies as strategies
8 except ImportError:
8 except ImportError:
9 raise unittest.SkipTest('hypothesis not available')
9 raise unittest.SkipTest("hypothesis not available")
10
10
11 import zstandard as zstd
11 import zstandard as zstd
12
12
13 from . common import (
13 from .common import (
14 make_cffi,
14 make_cffi,
15 NonClosingBytesIO,
15 NonClosingBytesIO,
16 random_input_data,
16 random_input_data,
17 TestCase,
17 )
18 )
18
19
19
20
20 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
21 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
21 @make_cffi
22 @make_cffi
22 class TestDecompressor_stream_reader_fuzzing(unittest.TestCase):
23 class TestDecompressor_stream_reader_fuzzing(TestCase):
23 @hypothesis.settings(
24 @hypothesis.settings(
24 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
25 suppress_health_check=[
25 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
26 hypothesis.HealthCheck.large_base_example,
26 level=strategies.integers(min_value=1, max_value=5),
27 hypothesis.HealthCheck.too_slow,
27 streaming=strategies.booleans(),
28 ]
28 source_read_size=strategies.integers(1, 1048576),
29 )
29 read_sizes=strategies.data())
30 @hypothesis.given(
30 def test_stream_source_read_variance(self, original, level, streaming,
31 original=strategies.sampled_from(random_input_data()),
31 source_read_size, read_sizes):
32 level=strategies.integers(min_value=1, max_value=5),
33 streaming=strategies.booleans(),
34 source_read_size=strategies.integers(1, 1048576),
35 read_sizes=strategies.data(),
36 )
37 def test_stream_source_read_variance(
38 self, original, level, streaming, source_read_size, read_sizes
39 ):
32 cctx = zstd.ZstdCompressor(level=level)
40 cctx = zstd.ZstdCompressor(level=level)
33
41
34 if streaming:
42 if streaming:
35 source = io.BytesIO()
43 source = io.BytesIO()
36 writer = cctx.stream_writer(source)
44 writer = cctx.stream_writer(source)
37 writer.write(original)
45 writer.write(original)
38 writer.flush(zstd.FLUSH_FRAME)
46 writer.flush(zstd.FLUSH_FRAME)
39 source.seek(0)
47 source.seek(0)
40 else:
48 else:
41 frame = cctx.compress(original)
49 frame = cctx.compress(original)
42 source = io.BytesIO(frame)
50 source = io.BytesIO(frame)
43
51
44 dctx = zstd.ZstdDecompressor()
52 dctx = zstd.ZstdDecompressor()
45
53
46 chunks = []
54 chunks = []
47 with dctx.stream_reader(source, read_size=source_read_size) as reader:
55 with dctx.stream_reader(source, read_size=source_read_size) as reader:
48 while True:
56 while True:
49 read_size = read_sizes.draw(strategies.integers(-1, 131072))
57 read_size = read_sizes.draw(strategies.integers(-1, 131072))
50 chunk = reader.read(read_size)
58 chunk = reader.read(read_size)
51 if not chunk and read_size:
59 if not chunk and read_size:
52 break
60 break
53
61
54 chunks.append(chunk)
62 chunks.append(chunk)
55
63
56 self.assertEqual(b''.join(chunks), original)
64 self.assertEqual(b"".join(chunks), original)
57
65
58 # Similar to above except we have a constant read() size.
66 # Similar to above except we have a constant read() size.
59 @hypothesis.settings(
67 @hypothesis.settings(
60 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
68 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
61 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
69 )
62 level=strategies.integers(min_value=1, max_value=5),
70 @hypothesis.given(
63 streaming=strategies.booleans(),
71 original=strategies.sampled_from(random_input_data()),
64 source_read_size=strategies.integers(1, 1048576),
72 level=strategies.integers(min_value=1, max_value=5),
65 read_size=strategies.integers(-1, 131072))
73 streaming=strategies.booleans(),
66 def test_stream_source_read_size(self, original, level, streaming,
74 source_read_size=strategies.integers(1, 1048576),
67 source_read_size, read_size):
75 read_size=strategies.integers(-1, 131072),
76 )
77 def test_stream_source_read_size(
78 self, original, level, streaming, source_read_size, read_size
79 ):
68 if read_size == 0:
80 if read_size == 0:
69 read_size = 1
81 read_size = 1
70
82
71 cctx = zstd.ZstdCompressor(level=level)
83 cctx = zstd.ZstdCompressor(level=level)
72
84
73 if streaming:
85 if streaming:
74 source = io.BytesIO()
86 source = io.BytesIO()
75 writer = cctx.stream_writer(source)
87 writer = cctx.stream_writer(source)
76 writer.write(original)
88 writer.write(original)
77 writer.flush(zstd.FLUSH_FRAME)
89 writer.flush(zstd.FLUSH_FRAME)
78 source.seek(0)
90 source.seek(0)
79 else:
91 else:
80 frame = cctx.compress(original)
92 frame = cctx.compress(original)
81 source = io.BytesIO(frame)
93 source = io.BytesIO(frame)
82
94
83 dctx = zstd.ZstdDecompressor()
95 dctx = zstd.ZstdDecompressor()
84
96
85 chunks = []
97 chunks = []
86 reader = dctx.stream_reader(source, read_size=source_read_size)
98 reader = dctx.stream_reader(source, read_size=source_read_size)
87 while True:
99 while True:
88 chunk = reader.read(read_size)
100 chunk = reader.read(read_size)
89 if not chunk and read_size:
101 if not chunk and read_size:
90 break
102 break
91
103
92 chunks.append(chunk)
104 chunks.append(chunk)
93
105
94 self.assertEqual(b''.join(chunks), original)
106 self.assertEqual(b"".join(chunks), original)
95
107
96 @hypothesis.settings(
108 @hypothesis.settings(
97 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
109 suppress_health_check=[
98 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
110 hypothesis.HealthCheck.large_base_example,
99 level=strategies.integers(min_value=1, max_value=5),
111 hypothesis.HealthCheck.too_slow,
100 streaming=strategies.booleans(),
112 ]
101 source_read_size=strategies.integers(1, 1048576),
113 )
102 read_sizes=strategies.data())
114 @hypothesis.given(
103 def test_buffer_source_read_variance(self, original, level, streaming,
115 original=strategies.sampled_from(random_input_data()),
104 source_read_size, read_sizes):
116 level=strategies.integers(min_value=1, max_value=5),
117 streaming=strategies.booleans(),
118 source_read_size=strategies.integers(1, 1048576),
119 read_sizes=strategies.data(),
120 )
121 def test_buffer_source_read_variance(
122 self, original, level, streaming, source_read_size, read_sizes
123 ):
105 cctx = zstd.ZstdCompressor(level=level)
124 cctx = zstd.ZstdCompressor(level=level)
106
125
107 if streaming:
126 if streaming:
108 source = io.BytesIO()
127 source = io.BytesIO()
109 writer = cctx.stream_writer(source)
128 writer = cctx.stream_writer(source)
110 writer.write(original)
129 writer.write(original)
111 writer.flush(zstd.FLUSH_FRAME)
130 writer.flush(zstd.FLUSH_FRAME)
112 frame = source.getvalue()
131 frame = source.getvalue()
113 else:
132 else:
114 frame = cctx.compress(original)
133 frame = cctx.compress(original)
115
134
116 dctx = zstd.ZstdDecompressor()
135 dctx = zstd.ZstdDecompressor()
117 chunks = []
136 chunks = []
118
137
119 with dctx.stream_reader(frame, read_size=source_read_size) as reader:
138 with dctx.stream_reader(frame, read_size=source_read_size) as reader:
120 while True:
139 while True:
121 read_size = read_sizes.draw(strategies.integers(-1, 131072))
140 read_size = read_sizes.draw(strategies.integers(-1, 131072))
122 chunk = reader.read(read_size)
141 chunk = reader.read(read_size)
123 if not chunk and read_size:
142 if not chunk and read_size:
124 break
143 break
125
144
126 chunks.append(chunk)
145 chunks.append(chunk)
127
146
128 self.assertEqual(b''.join(chunks), original)
147 self.assertEqual(b"".join(chunks), original)
129
148
130 # Similar to above except we have a constant read() size.
149 # Similar to above except we have a constant read() size.
131 @hypothesis.settings(
150 @hypothesis.settings(
132 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
151 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
133 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
152 )
134 level=strategies.integers(min_value=1, max_value=5),
153 @hypothesis.given(
135 streaming=strategies.booleans(),
154 original=strategies.sampled_from(random_input_data()),
136 source_read_size=strategies.integers(1, 1048576),
155 level=strategies.integers(min_value=1, max_value=5),
137 read_size=strategies.integers(-1, 131072))
156 streaming=strategies.booleans(),
138 def test_buffer_source_constant_read_size(self, original, level, streaming,
157 source_read_size=strategies.integers(1, 1048576),
139 source_read_size, read_size):
158 read_size=strategies.integers(-1, 131072),
159 )
160 def test_buffer_source_constant_read_size(
161 self, original, level, streaming, source_read_size, read_size
162 ):
140 if read_size == 0:
163 if read_size == 0:
141 read_size = -1
164 read_size = -1
142
165
143 cctx = zstd.ZstdCompressor(level=level)
166 cctx = zstd.ZstdCompressor(level=level)
144
167
145 if streaming:
168 if streaming:
146 source = io.BytesIO()
169 source = io.BytesIO()
147 writer = cctx.stream_writer(source)
170 writer = cctx.stream_writer(source)
148 writer.write(original)
171 writer.write(original)
149 writer.flush(zstd.FLUSH_FRAME)
172 writer.flush(zstd.FLUSH_FRAME)
150 frame = source.getvalue()
173 frame = source.getvalue()
151 else:
174 else:
152 frame = cctx.compress(original)
175 frame = cctx.compress(original)
153
176
154 dctx = zstd.ZstdDecompressor()
177 dctx = zstd.ZstdDecompressor()
155 chunks = []
178 chunks = []
156
179
157 reader = dctx.stream_reader(frame, read_size=source_read_size)
180 reader = dctx.stream_reader(frame, read_size=source_read_size)
158 while True:
181 while True:
159 chunk = reader.read(read_size)
182 chunk = reader.read(read_size)
160 if not chunk and read_size:
183 if not chunk and read_size:
161 break
184 break
162
185
163 chunks.append(chunk)
186 chunks.append(chunk)
164
187
165 self.assertEqual(b''.join(chunks), original)
188 self.assertEqual(b"".join(chunks), original)
166
189
167 @hypothesis.settings(
190 @hypothesis.settings(
168 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
191 suppress_health_check=[hypothesis.HealthCheck.large_base_example]
169 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
192 )
170 level=strategies.integers(min_value=1, max_value=5),
193 @hypothesis.given(
171 streaming=strategies.booleans(),
194 original=strategies.sampled_from(random_input_data()),
172 source_read_size=strategies.integers(1, 1048576))
195 level=strategies.integers(min_value=1, max_value=5),
173 def test_stream_source_readall(self, original, level, streaming,
196 streaming=strategies.booleans(),
174 source_read_size):
197 source_read_size=strategies.integers(1, 1048576),
198 )
199 def test_stream_source_readall(self, original, level, streaming, source_read_size):
175 cctx = zstd.ZstdCompressor(level=level)
200 cctx = zstd.ZstdCompressor(level=level)
176
201
177 if streaming:
202 if streaming:
178 source = io.BytesIO()
203 source = io.BytesIO()
179 writer = cctx.stream_writer(source)
204 writer = cctx.stream_writer(source)
180 writer.write(original)
205 writer.write(original)
181 writer.flush(zstd.FLUSH_FRAME)
206 writer.flush(zstd.FLUSH_FRAME)
182 source.seek(0)
207 source.seek(0)
183 else:
208 else:
184 frame = cctx.compress(original)
209 frame = cctx.compress(original)
185 source = io.BytesIO(frame)
210 source = io.BytesIO(frame)
186
211
187 dctx = zstd.ZstdDecompressor()
212 dctx = zstd.ZstdDecompressor()
188
213
189 data = dctx.stream_reader(source, read_size=source_read_size).readall()
214 data = dctx.stream_reader(source, read_size=source_read_size).readall()
190 self.assertEqual(data, original)
215 self.assertEqual(data, original)
191
216
192 @hypothesis.settings(
217 @hypothesis.settings(
193 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
218 suppress_health_check=[
194 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
219 hypothesis.HealthCheck.large_base_example,
195 level=strategies.integers(min_value=1, max_value=5),
220 hypothesis.HealthCheck.too_slow,
196 streaming=strategies.booleans(),
221 ]
197 source_read_size=strategies.integers(1, 1048576),
222 )
198 read_sizes=strategies.data())
223 @hypothesis.given(
199 def test_stream_source_read1_variance(self, original, level, streaming,
224 original=strategies.sampled_from(random_input_data()),
200 source_read_size, read_sizes):
225 level=strategies.integers(min_value=1, max_value=5),
226 streaming=strategies.booleans(),
227 source_read_size=strategies.integers(1, 1048576),
228 read_sizes=strategies.data(),
229 )
230 def test_stream_source_read1_variance(
231 self, original, level, streaming, source_read_size, read_sizes
232 ):
201 cctx = zstd.ZstdCompressor(level=level)
233 cctx = zstd.ZstdCompressor(level=level)
202
234
203 if streaming:
235 if streaming:
204 source = io.BytesIO()
236 source = io.BytesIO()
205 writer = cctx.stream_writer(source)
237 writer = cctx.stream_writer(source)
206 writer.write(original)
238 writer.write(original)
207 writer.flush(zstd.FLUSH_FRAME)
239 writer.flush(zstd.FLUSH_FRAME)
208 source.seek(0)
240 source.seek(0)
209 else:
241 else:
210 frame = cctx.compress(original)
242 frame = cctx.compress(original)
211 source = io.BytesIO(frame)
243 source = io.BytesIO(frame)
212
244
213 dctx = zstd.ZstdDecompressor()
245 dctx = zstd.ZstdDecompressor()
214
246
215 chunks = []
247 chunks = []
216 with dctx.stream_reader(source, read_size=source_read_size) as reader:
248 with dctx.stream_reader(source, read_size=source_read_size) as reader:
217 while True:
249 while True:
218 read_size = read_sizes.draw(strategies.integers(-1, 131072))
250 read_size = read_sizes.draw(strategies.integers(-1, 131072))
219 chunk = reader.read1(read_size)
251 chunk = reader.read1(read_size)
220 if not chunk and read_size:
252 if not chunk and read_size:
221 break
253 break
222
254
223 chunks.append(chunk)
255 chunks.append(chunk)
224
256
225 self.assertEqual(b''.join(chunks), original)
257 self.assertEqual(b"".join(chunks), original)
226
258
227 @hypothesis.settings(
259 @hypothesis.settings(
228 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
260 suppress_health_check=[
229 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
261 hypothesis.HealthCheck.large_base_example,
230 level=strategies.integers(min_value=1, max_value=5),
262 hypothesis.HealthCheck.too_slow,
231 streaming=strategies.booleans(),
263 ]
232 source_read_size=strategies.integers(1, 1048576),
264 )
233 read_sizes=strategies.data())
265 @hypothesis.given(
234 def test_stream_source_readinto1_variance(self, original, level, streaming,
266 original=strategies.sampled_from(random_input_data()),
235 source_read_size, read_sizes):
267 level=strategies.integers(min_value=1, max_value=5),
268 streaming=strategies.booleans(),
269 source_read_size=strategies.integers(1, 1048576),
270 read_sizes=strategies.data(),
271 )
272 def test_stream_source_readinto1_variance(
273 self, original, level, streaming, source_read_size, read_sizes
274 ):
236 cctx = zstd.ZstdCompressor(level=level)
275 cctx = zstd.ZstdCompressor(level=level)
237
276
238 if streaming:
277 if streaming:
239 source = io.BytesIO()
278 source = io.BytesIO()
240 writer = cctx.stream_writer(source)
279 writer = cctx.stream_writer(source)
241 writer.write(original)
280 writer.write(original)
242 writer.flush(zstd.FLUSH_FRAME)
281 writer.flush(zstd.FLUSH_FRAME)
243 source.seek(0)
282 source.seek(0)
244 else:
283 else:
245 frame = cctx.compress(original)
284 frame = cctx.compress(original)
246 source = io.BytesIO(frame)
285 source = io.BytesIO(frame)
247
286
248 dctx = zstd.ZstdDecompressor()
287 dctx = zstd.ZstdDecompressor()
249
288
250 chunks = []
289 chunks = []
251 with dctx.stream_reader(source, read_size=source_read_size) as reader:
290 with dctx.stream_reader(source, read_size=source_read_size) as reader:
252 while True:
291 while True:
253 read_size = read_sizes.draw(strategies.integers(1, 131072))
292 read_size = read_sizes.draw(strategies.integers(1, 131072))
254 b = bytearray(read_size)
293 b = bytearray(read_size)
255 count = reader.readinto1(b)
294 count = reader.readinto1(b)
256
295
257 if not count:
296 if not count:
258 break
297 break
259
298
260 chunks.append(bytes(b[0:count]))
299 chunks.append(bytes(b[0:count]))
261
300
262 self.assertEqual(b''.join(chunks), original)
301 self.assertEqual(b"".join(chunks), original)
263
302
264 @hypothesis.settings(
303 @hypothesis.settings(
265 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
304 suppress_health_check=[
305 hypothesis.HealthCheck.large_base_example,
306 hypothesis.HealthCheck.too_slow,
307 ]
308 )
266 @hypothesis.given(
309 @hypothesis.given(
267 original=strategies.sampled_from(random_input_data()),
310 original=strategies.sampled_from(random_input_data()),
268 level=strategies.integers(min_value=1, max_value=5),
311 level=strategies.integers(min_value=1, max_value=5),
269 source_read_size=strategies.integers(1, 1048576),
312 source_read_size=strategies.integers(1, 1048576),
270 seek_amounts=strategies.data(),
313 seek_amounts=strategies.data(),
271 read_sizes=strategies.data())
314 read_sizes=strategies.data(),
272 def test_relative_seeks(self, original, level, source_read_size, seek_amounts,
315 )
273 read_sizes):
316 def test_relative_seeks(
317 self, original, level, source_read_size, seek_amounts, read_sizes
318 ):
274 cctx = zstd.ZstdCompressor(level=level)
319 cctx = zstd.ZstdCompressor(level=level)
275 frame = cctx.compress(original)
320 frame = cctx.compress(original)
276
321
277 dctx = zstd.ZstdDecompressor()
322 dctx = zstd.ZstdDecompressor()
278
323
279 with dctx.stream_reader(frame, read_size=source_read_size) as reader:
324 with dctx.stream_reader(frame, read_size=source_read_size) as reader:
280 while True:
325 while True:
281 amount = seek_amounts.draw(strategies.integers(0, 16384))
326 amount = seek_amounts.draw(strategies.integers(0, 16384))
282 reader.seek(amount, os.SEEK_CUR)
327 reader.seek(amount, os.SEEK_CUR)
283
328
284 offset = reader.tell()
329 offset = reader.tell()
285 read_amount = read_sizes.draw(strategies.integers(1, 16384))
330 read_amount = read_sizes.draw(strategies.integers(1, 16384))
286 chunk = reader.read(read_amount)
331 chunk = reader.read(read_amount)
287
332
288 if not chunk:
333 if not chunk:
289 break
334 break
290
335
291 self.assertEqual(original[offset:offset + len(chunk)], chunk)
336 self.assertEqual(original[offset : offset + len(chunk)], chunk)
292
337
293 @hypothesis.settings(
338 @hypothesis.settings(
294 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
339 suppress_health_check=[
340 hypothesis.HealthCheck.large_base_example,
341 hypothesis.HealthCheck.too_slow,
342 ]
343 )
295 @hypothesis.given(
344 @hypothesis.given(
296 originals=strategies.data(),
345 originals=strategies.data(),
297 frame_count=strategies.integers(min_value=2, max_value=10),
346 frame_count=strategies.integers(min_value=2, max_value=10),
298 level=strategies.integers(min_value=1, max_value=5),
347 level=strategies.integers(min_value=1, max_value=5),
299 source_read_size=strategies.integers(1, 1048576),
348 source_read_size=strategies.integers(1, 1048576),
300 read_sizes=strategies.data())
349 read_sizes=strategies.data(),
301 def test_multiple_frames(self, originals, frame_count, level,
350 )
302 source_read_size, read_sizes):
351 def test_multiple_frames(
352 self, originals, frame_count, level, source_read_size, read_sizes
353 ):
303
354
304 cctx = zstd.ZstdCompressor(level=level)
355 cctx = zstd.ZstdCompressor(level=level)
305 source = io.BytesIO()
356 source = io.BytesIO()
306 buffer = io.BytesIO()
357 buffer = io.BytesIO()
307 writer = cctx.stream_writer(buffer)
358 writer = cctx.stream_writer(buffer)
308
359
309 for i in range(frame_count):
360 for i in range(frame_count):
310 data = originals.draw(strategies.sampled_from(random_input_data()))
361 data = originals.draw(strategies.sampled_from(random_input_data()))
311 source.write(data)
362 source.write(data)
312 writer.write(data)
363 writer.write(data)
313 writer.flush(zstd.FLUSH_FRAME)
364 writer.flush(zstd.FLUSH_FRAME)
314
365
315 dctx = zstd.ZstdDecompressor()
366 dctx = zstd.ZstdDecompressor()
316 buffer.seek(0)
367 buffer.seek(0)
317 reader = dctx.stream_reader(buffer, read_size=source_read_size,
368 reader = dctx.stream_reader(
318 read_across_frames=True)
369 buffer, read_size=source_read_size, read_across_frames=True
370 )
319
371
320 chunks = []
372 chunks = []
321
373
322 while True:
374 while True:
323 read_amount = read_sizes.draw(strategies.integers(-1, 16384))
375 read_amount = read_sizes.draw(strategies.integers(-1, 16384))
324 chunk = reader.read(read_amount)
376 chunk = reader.read(read_amount)
325
377
326 if not chunk and read_amount:
378 if not chunk and read_amount:
327 break
379 break
328
380
329 chunks.append(chunk)
381 chunks.append(chunk)
330
382
331 self.assertEqual(source.getvalue(), b''.join(chunks))
383 self.assertEqual(source.getvalue(), b"".join(chunks))
332
384
333
385
334 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
386 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
335 @make_cffi
387 @make_cffi
336 class TestDecompressor_stream_writer_fuzzing(unittest.TestCase):
388 class TestDecompressor_stream_writer_fuzzing(TestCase):
337 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
389 @hypothesis.settings(
338 level=strategies.integers(min_value=1, max_value=5),
390 suppress_health_check=[
339 write_size=strategies.integers(min_value=1, max_value=8192),
391 hypothesis.HealthCheck.large_base_example,
340 input_sizes=strategies.data())
392 hypothesis.HealthCheck.too_slow,
393 ]
394 )
395 @hypothesis.given(
396 original=strategies.sampled_from(random_input_data()),
397 level=strategies.integers(min_value=1, max_value=5),
398 write_size=strategies.integers(min_value=1, max_value=8192),
399 input_sizes=strategies.data(),
400 )
341 def test_write_size_variance(self, original, level, write_size, input_sizes):
401 def test_write_size_variance(self, original, level, write_size, input_sizes):
342 cctx = zstd.ZstdCompressor(level=level)
402 cctx = zstd.ZstdCompressor(level=level)
343 frame = cctx.compress(original)
403 frame = cctx.compress(original)
344
404
345 dctx = zstd.ZstdDecompressor()
405 dctx = zstd.ZstdDecompressor()
346 source = io.BytesIO(frame)
406 source = io.BytesIO(frame)
347 dest = NonClosingBytesIO()
407 dest = NonClosingBytesIO()
348
408
349 with dctx.stream_writer(dest, write_size=write_size) as decompressor:
409 with dctx.stream_writer(dest, write_size=write_size) as decompressor:
350 while True:
410 while True:
351 input_size = input_sizes.draw(strategies.integers(1, 4096))
411 input_size = input_sizes.draw(strategies.integers(1, 4096))
352 chunk = source.read(input_size)
412 chunk = source.read(input_size)
353 if not chunk:
413 if not chunk:
354 break
414 break
355
415
356 decompressor.write(chunk)
416 decompressor.write(chunk)
357
417
358 self.assertEqual(dest.getvalue(), original)
418 self.assertEqual(dest.getvalue(), original)
359
419
360
420
361 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
421 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
362 @make_cffi
422 @make_cffi
363 class TestDecompressor_copy_stream_fuzzing(unittest.TestCase):
423 class TestDecompressor_copy_stream_fuzzing(TestCase):
364 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
424 @hypothesis.settings(
365 level=strategies.integers(min_value=1, max_value=5),
425 suppress_health_check=[
366 read_size=strategies.integers(min_value=1, max_value=8192),
426 hypothesis.HealthCheck.large_base_example,
367 write_size=strategies.integers(min_value=1, max_value=8192))
427 hypothesis.HealthCheck.too_slow,
428 ]
429 )
430 @hypothesis.given(
431 original=strategies.sampled_from(random_input_data()),
432 level=strategies.integers(min_value=1, max_value=5),
433 read_size=strategies.integers(min_value=1, max_value=8192),
434 write_size=strategies.integers(min_value=1, max_value=8192),
435 )
368 def test_read_write_size_variance(self, original, level, read_size, write_size):
436 def test_read_write_size_variance(self, original, level, read_size, write_size):
369 cctx = zstd.ZstdCompressor(level=level)
437 cctx = zstd.ZstdCompressor(level=level)
370 frame = cctx.compress(original)
438 frame = cctx.compress(original)
371
439
372 source = io.BytesIO(frame)
440 source = io.BytesIO(frame)
373 dest = io.BytesIO()
441 dest = io.BytesIO()
374
442
375 dctx = zstd.ZstdDecompressor()
443 dctx = zstd.ZstdDecompressor()
376 dctx.copy_stream(source, dest, read_size=read_size, write_size=write_size)
444 dctx.copy_stream(source, dest, read_size=read_size, write_size=write_size)
377
445
378 self.assertEqual(dest.getvalue(), original)
446 self.assertEqual(dest.getvalue(), original)
379
447
380
448
381 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
449 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
382 @make_cffi
450 @make_cffi
383 class TestDecompressor_decompressobj_fuzzing(unittest.TestCase):
451 class TestDecompressor_decompressobj_fuzzing(TestCase):
384 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
452 @hypothesis.settings(
385 level=strategies.integers(min_value=1, max_value=5),
453 suppress_health_check=[
386 chunk_sizes=strategies.data())
454 hypothesis.HealthCheck.large_base_example,
455 hypothesis.HealthCheck.too_slow,
456 ]
457 )
458 @hypothesis.given(
459 original=strategies.sampled_from(random_input_data()),
460 level=strategies.integers(min_value=1, max_value=5),
461 chunk_sizes=strategies.data(),
462 )
387 def test_random_input_sizes(self, original, level, chunk_sizes):
463 def test_random_input_sizes(self, original, level, chunk_sizes):
388 cctx = zstd.ZstdCompressor(level=level)
464 cctx = zstd.ZstdCompressor(level=level)
389 frame = cctx.compress(original)
465 frame = cctx.compress(original)
390
466
391 source = io.BytesIO(frame)
467 source = io.BytesIO(frame)
392
468
393 dctx = zstd.ZstdDecompressor()
469 dctx = zstd.ZstdDecompressor()
394 dobj = dctx.decompressobj()
470 dobj = dctx.decompressobj()
395
471
396 chunks = []
472 chunks = []
397 while True:
473 while True:
398 chunk_size = chunk_sizes.draw(strategies.integers(1, 4096))
474 chunk_size = chunk_sizes.draw(strategies.integers(1, 4096))
399 chunk = source.read(chunk_size)
475 chunk = source.read(chunk_size)
400 if not chunk:
476 if not chunk:
401 break
477 break
402
478
403 chunks.append(dobj.decompress(chunk))
479 chunks.append(dobj.decompress(chunk))
404
480
405 self.assertEqual(b''.join(chunks), original)
481 self.assertEqual(b"".join(chunks), original)
406
482
407 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
483 @hypothesis.settings(
408 level=strategies.integers(min_value=1, max_value=5),
484 suppress_health_check=[
409 write_size=strategies.integers(min_value=1,
485 hypothesis.HealthCheck.large_base_example,
410 max_value=4 * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE),
486 hypothesis.HealthCheck.too_slow,
411 chunk_sizes=strategies.data())
487 ]
488 )
489 @hypothesis.given(
490 original=strategies.sampled_from(random_input_data()),
491 level=strategies.integers(min_value=1, max_value=5),
492 write_size=strategies.integers(
493 min_value=1, max_value=4 * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE
494 ),
495 chunk_sizes=strategies.data(),
496 )
412 def test_random_output_sizes(self, original, level, write_size, chunk_sizes):
497 def test_random_output_sizes(self, original, level, write_size, chunk_sizes):
413 cctx = zstd.ZstdCompressor(level=level)
498 cctx = zstd.ZstdCompressor(level=level)
414 frame = cctx.compress(original)
499 frame = cctx.compress(original)
415
500
416 source = io.BytesIO(frame)
501 source = io.BytesIO(frame)
417
502
418 dctx = zstd.ZstdDecompressor()
503 dctx = zstd.ZstdDecompressor()
419 dobj = dctx.decompressobj(write_size=write_size)
504 dobj = dctx.decompressobj(write_size=write_size)
420
505
421 chunks = []
506 chunks = []
422 while True:
507 while True:
423 chunk_size = chunk_sizes.draw(strategies.integers(1, 4096))
508 chunk_size = chunk_sizes.draw(strategies.integers(1, 4096))
424 chunk = source.read(chunk_size)
509 chunk = source.read(chunk_size)
425 if not chunk:
510 if not chunk:
426 break
511 break
427
512
428 chunks.append(dobj.decompress(chunk))
513 chunks.append(dobj.decompress(chunk))
429
514
430 self.assertEqual(b''.join(chunks), original)
515 self.assertEqual(b"".join(chunks), original)
431
516
432
517
433 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
518 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
434 @make_cffi
519 @make_cffi
435 class TestDecompressor_read_to_iter_fuzzing(unittest.TestCase):
520 class TestDecompressor_read_to_iter_fuzzing(TestCase):
436 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
521 @hypothesis.given(
437 level=strategies.integers(min_value=1, max_value=5),
522 original=strategies.sampled_from(random_input_data()),
438 read_size=strategies.integers(min_value=1, max_value=4096),
523 level=strategies.integers(min_value=1, max_value=5),
439 write_size=strategies.integers(min_value=1, max_value=4096))
524 read_size=strategies.integers(min_value=1, max_value=4096),
525 write_size=strategies.integers(min_value=1, max_value=4096),
526 )
440 def test_read_write_size_variance(self, original, level, read_size, write_size):
527 def test_read_write_size_variance(self, original, level, read_size, write_size):
441 cctx = zstd.ZstdCompressor(level=level)
528 cctx = zstd.ZstdCompressor(level=level)
442 frame = cctx.compress(original)
529 frame = cctx.compress(original)
443
530
444 source = io.BytesIO(frame)
531 source = io.BytesIO(frame)
445
532
446 dctx = zstd.ZstdDecompressor()
533 dctx = zstd.ZstdDecompressor()
447 chunks = list(dctx.read_to_iter(source, read_size=read_size, write_size=write_size))
534 chunks = list(
535 dctx.read_to_iter(source, read_size=read_size, write_size=write_size)
536 )
448
537
449 self.assertEqual(b''.join(chunks), original)
538 self.assertEqual(b"".join(chunks), original)
450
539
451
540
452 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
541 @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set")
453 class TestDecompressor_multi_decompress_to_buffer_fuzzing(unittest.TestCase):
542 class TestDecompressor_multi_decompress_to_buffer_fuzzing(TestCase):
454 @hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()),
543 @hypothesis.given(
455 min_size=1, max_size=1024),
544 original=strategies.lists(
456 threads=strategies.integers(min_value=1, max_value=8),
545 strategies.sampled_from(random_input_data()), min_size=1, max_size=1024
457 use_dict=strategies.booleans())
546 ),
547 threads=strategies.integers(min_value=1, max_value=8),
548 use_dict=strategies.booleans(),
549 )
458 def test_data_equivalence(self, original, threads, use_dict):
550 def test_data_equivalence(self, original, threads, use_dict):
459 kwargs = {}
551 kwargs = {}
460 if use_dict:
552 if use_dict:
461 kwargs['dict_data'] = zstd.ZstdCompressionDict(original[0])
553 kwargs["dict_data"] = zstd.ZstdCompressionDict(original[0])
462
554
463 cctx = zstd.ZstdCompressor(level=1,
555 cctx = zstd.ZstdCompressor(
464 write_content_size=True,
556 level=1, write_content_size=True, write_checksum=True, **kwargs
465 write_checksum=True,
557 )
466 **kwargs)
467
558
468 if not hasattr(cctx, 'multi_compress_to_buffer'):
559 if not hasattr(cctx, "multi_compress_to_buffer"):
469 self.skipTest('multi_compress_to_buffer not available')
560 self.skipTest("multi_compress_to_buffer not available")
470
561
471 frames_buffer = cctx.multi_compress_to_buffer(original, threads=-1)
562 frames_buffer = cctx.multi_compress_to_buffer(original, threads=-1)
472
563
473 dctx = zstd.ZstdDecompressor(**kwargs)
564 dctx = zstd.ZstdDecompressor(**kwargs)
474 result = dctx.multi_decompress_to_buffer(frames_buffer)
565 result = dctx.multi_decompress_to_buffer(frames_buffer)
475
566
476 self.assertEqual(len(result), len(original))
567 self.assertEqual(len(result), len(original))
477 for i, frame in enumerate(result):
568 for i, frame in enumerate(result):
478 self.assertEqual(frame.tobytes(), original[i])
569 self.assertEqual(frame.tobytes(), original[i])
479
570
480 frames_list = [f.tobytes() for f in frames_buffer]
571 frames_list = [f.tobytes() for f in frames_buffer]
481 result = dctx.multi_decompress_to_buffer(frames_list)
572 result = dctx.multi_decompress_to_buffer(frames_list)
482
573
483 self.assertEqual(len(result), len(original))
574 self.assertEqual(len(result), len(original))
484 for i, frame in enumerate(result):
575 for i, frame in enumerate(result):
485 self.assertEqual(frame.tobytes(), original[i])
576 self.assertEqual(frame.tobytes(), original[i])
@@ -1,15 +1,15 b''
1 import unittest
1 import unittest
2
2
3 import zstandard as zstd
3 import zstandard as zstd
4
4
5 from . common import (
5 from .common import (
6 make_cffi,
6 make_cffi,
7 TestCase,
7 )
8 )
8
9
9
10
10 @make_cffi
11 @make_cffi
11 class TestSizes(unittest.TestCase):
12 class TestSizes(TestCase):
12 def test_decompression_size(self):
13 def test_decompression_size(self):
13 size = zstd.estimate_decompression_context_size()
14 size = zstd.estimate_decompression_context_size()
14 self.assertGreater(size, 100000)
15 self.assertGreater(size, 100000)
15
@@ -1,69 +1,70 b''
1 from __future__ import unicode_literals
1 from __future__ import unicode_literals
2
2
3 import unittest
3 import unittest
4
4
5 import zstandard as zstd
5 import zstandard as zstd
6
6
7 from . common import (
7 from .common import (
8 make_cffi,
8 make_cffi,
9 TestCase,
9 )
10 )
10
11
11
12
12 @make_cffi
13 @make_cffi
13 class TestModuleAttributes(unittest.TestCase):
14 class TestModuleAttributes(TestCase):
14 def test_version(self):
15 def test_version(self):
15 self.assertEqual(zstd.ZSTD_VERSION, (1, 4, 3))
16 self.assertEqual(zstd.ZSTD_VERSION, (1, 4, 4))
16
17
17 self.assertEqual(zstd.__version__, '0.12.0')
18 self.assertEqual(zstd.__version__, "0.13.0")
18
19
19 def test_constants(self):
20 def test_constants(self):
20 self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22)
21 self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22)
21 self.assertEqual(zstd.FRAME_HEADER, b'\x28\xb5\x2f\xfd')
22 self.assertEqual(zstd.FRAME_HEADER, b"\x28\xb5\x2f\xfd")
22
23
23 def test_hasattr(self):
24 def test_hasattr(self):
24 attrs = (
25 attrs = (
25 'CONTENTSIZE_UNKNOWN',
26 "CONTENTSIZE_UNKNOWN",
26 'CONTENTSIZE_ERROR',
27 "CONTENTSIZE_ERROR",
27 'COMPRESSION_RECOMMENDED_INPUT_SIZE',
28 "COMPRESSION_RECOMMENDED_INPUT_SIZE",
28 'COMPRESSION_RECOMMENDED_OUTPUT_SIZE',
29 "COMPRESSION_RECOMMENDED_OUTPUT_SIZE",
29 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE',
30 "DECOMPRESSION_RECOMMENDED_INPUT_SIZE",
30 'DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE',
31 "DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE",
31 'MAGIC_NUMBER',
32 "MAGIC_NUMBER",
32 'FLUSH_BLOCK',
33 "FLUSH_BLOCK",
33 'FLUSH_FRAME',
34 "FLUSH_FRAME",
34 'BLOCKSIZELOG_MAX',
35 "BLOCKSIZELOG_MAX",
35 'BLOCKSIZE_MAX',
36 "BLOCKSIZE_MAX",
36 'WINDOWLOG_MIN',
37 "WINDOWLOG_MIN",
37 'WINDOWLOG_MAX',
38 "WINDOWLOG_MAX",
38 'CHAINLOG_MIN',
39 "CHAINLOG_MIN",
39 'CHAINLOG_MAX',
40 "CHAINLOG_MAX",
40 'HASHLOG_MIN',
41 "HASHLOG_MIN",
41 'HASHLOG_MAX',
42 "HASHLOG_MAX",
42 'HASHLOG3_MAX',
43 "HASHLOG3_MAX",
43 'MINMATCH_MIN',
44 "MINMATCH_MIN",
44 'MINMATCH_MAX',
45 "MINMATCH_MAX",
45 'SEARCHLOG_MIN',
46 "SEARCHLOG_MIN",
46 'SEARCHLOG_MAX',
47 "SEARCHLOG_MAX",
47 'SEARCHLENGTH_MIN',
48 "SEARCHLENGTH_MIN",
48 'SEARCHLENGTH_MAX',
49 "SEARCHLENGTH_MAX",
49 'TARGETLENGTH_MIN',
50 "TARGETLENGTH_MIN",
50 'TARGETLENGTH_MAX',
51 "TARGETLENGTH_MAX",
51 'LDM_MINMATCH_MIN',
52 "LDM_MINMATCH_MIN",
52 'LDM_MINMATCH_MAX',
53 "LDM_MINMATCH_MAX",
53 'LDM_BUCKETSIZELOG_MAX',
54 "LDM_BUCKETSIZELOG_MAX",
54 'STRATEGY_FAST',
55 "STRATEGY_FAST",
55 'STRATEGY_DFAST',
56 "STRATEGY_DFAST",
56 'STRATEGY_GREEDY',
57 "STRATEGY_GREEDY",
57 'STRATEGY_LAZY',
58 "STRATEGY_LAZY",
58 'STRATEGY_LAZY2',
59 "STRATEGY_LAZY2",
59 'STRATEGY_BTLAZY2',
60 "STRATEGY_BTLAZY2",
60 'STRATEGY_BTOPT',
61 "STRATEGY_BTOPT",
61 'STRATEGY_BTULTRA',
62 "STRATEGY_BTULTRA",
62 'STRATEGY_BTULTRA2',
63 "STRATEGY_BTULTRA2",
63 'DICT_TYPE_AUTO',
64 "DICT_TYPE_AUTO",
64 'DICT_TYPE_RAWCONTENT',
65 "DICT_TYPE_RAWCONTENT",
65 'DICT_TYPE_FULLDICT',
66 "DICT_TYPE_FULLDICT",
66 )
67 )
67
68
68 for a in attrs:
69 for a in attrs:
69 self.assertTrue(hasattr(zstd, a), a)
70 self.assertTrue(hasattr(zstd, a), a)
@@ -1,89 +1,92 b''
1 import struct
1 import struct
2 import sys
2 import sys
3 import unittest
3 import unittest
4
4
5 import zstandard as zstd
5 import zstandard as zstd
6
6
7 from . common import (
7 from .common import (
8 generate_samples,
8 generate_samples,
9 make_cffi,
9 make_cffi,
10 random_input_data,
10 random_input_data,
11 TestCase,
11 )
12 )
12
13
13 if sys.version_info[0] >= 3:
14 if sys.version_info[0] >= 3:
14 int_type = int
15 int_type = int
15 else:
16 else:
16 int_type = long
17 int_type = long
17
18
18
19
19 @make_cffi
20 @make_cffi
20 class TestTrainDictionary(unittest.TestCase):
21 class TestTrainDictionary(TestCase):
21 def test_no_args(self):
22 def test_no_args(self):
22 with self.assertRaises(TypeError):
23 with self.assertRaises(TypeError):
23 zstd.train_dictionary()
24 zstd.train_dictionary()
24
25
25 def test_bad_args(self):
26 def test_bad_args(self):
26 with self.assertRaises(TypeError):
27 with self.assertRaises(TypeError):
27 zstd.train_dictionary(8192, u'foo')
28 zstd.train_dictionary(8192, u"foo")
28
29
29 with self.assertRaises(ValueError):
30 with self.assertRaises(ValueError):
30 zstd.train_dictionary(8192, [u'foo'])
31 zstd.train_dictionary(8192, [u"foo"])
31
32
32 def test_no_params(self):
33 def test_no_params(self):
33 d = zstd.train_dictionary(8192, random_input_data())
34 d = zstd.train_dictionary(8192, random_input_data())
34 self.assertIsInstance(d.dict_id(), int_type)
35 self.assertIsInstance(d.dict_id(), int_type)
35
36
36 # The dictionary ID may be different across platforms.
37 # The dictionary ID may be different across platforms.
37 expected = b'\x37\xa4\x30\xec' + struct.pack('<I', d.dict_id())
38 expected = b"\x37\xa4\x30\xec" + struct.pack("<I", d.dict_id())
38
39
39 data = d.as_bytes()
40 data = d.as_bytes()
40 self.assertEqual(data[0:8], expected)
41 self.assertEqual(data[0:8], expected)
41
42
42 def test_basic(self):
43 def test_basic(self):
43 d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
44 d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
44 self.assertIsInstance(d.dict_id(), int_type)
45 self.assertIsInstance(d.dict_id(), int_type)
45
46
46 data = d.as_bytes()
47 data = d.as_bytes()
47 self.assertEqual(data[0:4], b'\x37\xa4\x30\xec')
48 self.assertEqual(data[0:4], b"\x37\xa4\x30\xec")
48
49
49 self.assertEqual(d.k, 64)
50 self.assertEqual(d.k, 64)
50 self.assertEqual(d.d, 16)
51 self.assertEqual(d.d, 16)
51
52
52 def test_set_dict_id(self):
53 def test_set_dict_id(self):
53 d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16,
54 d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16, dict_id=42)
54 dict_id=42)
55 self.assertEqual(d.dict_id(), 42)
55 self.assertEqual(d.dict_id(), 42)
56
56
57 def test_optimize(self):
57 def test_optimize(self):
58 d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1,
58 d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1, d=16)
59 d=16)
60
59
61 # This varies by platform.
60 # This varies by platform.
62 self.assertIn(d.k, (50, 2000))
61 self.assertIn(d.k, (50, 2000))
63 self.assertEqual(d.d, 16)
62 self.assertEqual(d.d, 16)
64
63
64
65 @make_cffi
65 @make_cffi
66 class TestCompressionDict(unittest.TestCase):
66 class TestCompressionDict(TestCase):
67 def test_bad_mode(self):
67 def test_bad_mode(self):
68 with self.assertRaisesRegexp(ValueError, 'invalid dictionary load mode'):
68 with self.assertRaisesRegex(ValueError, "invalid dictionary load mode"):
69 zstd.ZstdCompressionDict(b'foo', dict_type=42)
69 zstd.ZstdCompressionDict(b"foo", dict_type=42)
70
70
71 def test_bad_precompute_compress(self):
71 def test_bad_precompute_compress(self):
72 d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
72 d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
73
73
74 with self.assertRaisesRegexp(ValueError, 'must specify one of level or '):
74 with self.assertRaisesRegex(ValueError, "must specify one of level or "):
75 d.precompute_compress()
75 d.precompute_compress()
76
76
77 with self.assertRaisesRegexp(ValueError, 'must only specify one of level or '):
77 with self.assertRaisesRegex(ValueError, "must only specify one of level or "):
78 d.precompute_compress(level=3,
78 d.precompute_compress(
79 compression_params=zstd.CompressionParameters())
79 level=3, compression_params=zstd.CompressionParameters()
80 )
80
81
81 def test_precompute_compress_rawcontent(self):
82 def test_precompute_compress_rawcontent(self):
82 d = zstd.ZstdCompressionDict(b'dictcontent' * 64,
83 d = zstd.ZstdCompressionDict(
83 dict_type=zstd.DICT_TYPE_RAWCONTENT)
84 b"dictcontent" * 64, dict_type=zstd.DICT_TYPE_RAWCONTENT
85 )
84 d.precompute_compress(level=1)
86 d.precompute_compress(level=1)
85
87
86 d = zstd.ZstdCompressionDict(b'dictcontent' * 64,
88 d = zstd.ZstdCompressionDict(
87 dict_type=zstd.DICT_TYPE_FULLDICT)
89 b"dictcontent" * 64, dict_type=zstd.DICT_TYPE_FULLDICT
88 with self.assertRaisesRegexp(zstd.ZstdError, 'unable to precompute dictionary'):
90 )
91 with self.assertRaisesRegex(zstd.ZstdError, "unable to precompute dictionary"):
89 d.precompute_compress(level=1)
92 d.precompute_compress(level=1)
@@ -1,65 +1,75 b''
1 # Copyright (c) 2017-present, Gregory Szorc
1 # Copyright (c) 2017-present, Gregory Szorc
2 # All rights reserved.
2 # All rights reserved.
3 #
3 #
4 # This software may be modified and distributed under the terms
4 # This software may be modified and distributed under the terms
5 # of the BSD license. See the LICENSE file for details.
5 # of the BSD license. See the LICENSE file for details.
6
6
7 """Python interface to the Zstandard (zstd) compression library."""
7 """Python interface to the Zstandard (zstd) compression library."""
8
8
9 from __future__ import absolute_import, unicode_literals
9 from __future__ import absolute_import, unicode_literals
10
10
11 # This module serves 2 roles:
11 # This module serves 2 roles:
12 #
12 #
13 # 1) Export the C or CFFI "backend" through a central module.
13 # 1) Export the C or CFFI "backend" through a central module.
14 # 2) Implement additional functionality built on top of C or CFFI backend.
14 # 2) Implement additional functionality built on top of C or CFFI backend.
15
15
16 import os
16 import os
17 import platform
17 import platform
18
18
19 # Some Python implementations don't support C extensions. That's why we have
19 # Some Python implementations don't support C extensions. That's why we have
20 # a CFFI implementation in the first place. The code here import one of our
20 # a CFFI implementation in the first place. The code here import one of our
21 # "backends" then re-exports the symbols from this module. For convenience,
21 # "backends" then re-exports the symbols from this module. For convenience,
22 # we support falling back to the CFFI backend if the C extension can't be
22 # we support falling back to the CFFI backend if the C extension can't be
23 # imported. But for performance reasons, we only do this on unknown Python
23 # imported. But for performance reasons, we only do this on unknown Python
24 # implementation. Notably, for CPython we require the C extension by default.
24 # implementation. Notably, for CPython we require the C extension by default.
25 # Because someone will inevitably want special behavior, the behavior is
25 # Because someone will inevitably want special behavior, the behavior is
26 # configurable via an environment variable. A potentially better way to handle
26 # configurable via an environment variable. A potentially better way to handle
27 # this is to import a special ``__importpolicy__`` module or something
27 # this is to import a special ``__importpolicy__`` module or something
28 # defining a variable and `setup.py` could write the file with whatever
28 # defining a variable and `setup.py` could write the file with whatever
29 # policy was specified at build time. Until someone needs it, we go with
29 # policy was specified at build time. Until someone needs it, we go with
30 # the hacky but simple environment variable approach.
30 # the hacky but simple environment variable approach.
31 _module_policy = os.environ.get('PYTHON_ZSTANDARD_IMPORT_POLICY', 'default')
31 _module_policy = os.environ.get("PYTHON_ZSTANDARD_IMPORT_POLICY", "default")
32
32
33 if _module_policy == 'default':
33 if _module_policy == "default":
34 if platform.python_implementation() in ('CPython',):
34 if platform.python_implementation() in ("CPython",):
35 from zstd import *
35 from zstd import *
36 backend = 'cext'
36
37 elif platform.python_implementation() in ('PyPy',):
37 backend = "cext"
38 elif platform.python_implementation() in ("PyPy",):
38 from .cffi import *
39 from .cffi import *
39 backend = 'cffi'
40
41 backend = "cffi"
40 else:
42 else:
41 try:
43 try:
42 from zstd import *
44 from zstd import *
43 backend = 'cext'
45
46 backend = "cext"
44 except ImportError:
47 except ImportError:
45 from .cffi import *
48 from .cffi import *
46 backend = 'cffi'
49
47 elif _module_policy == 'cffi_fallback':
50 backend = "cffi"
51 elif _module_policy == "cffi_fallback":
48 try:
52 try:
49 from zstd import *
53 from zstd import *
50 backend = 'cext'
54
55 backend = "cext"
51 except ImportError:
56 except ImportError:
52 from .cffi import *
57 from .cffi import *
53 backend = 'cffi'
58
54 elif _module_policy == 'cext':
59 backend = "cffi"
60 elif _module_policy == "cext":
55 from zstd import *
61 from zstd import *
56 backend = 'cext'
62
57 elif _module_policy == 'cffi':
63 backend = "cext"
64 elif _module_policy == "cffi":
58 from .cffi import *
65 from .cffi import *
59 backend = 'cffi'
66
67 backend = "cffi"
60 else:
68 else:
61 raise ImportError('unknown module import policy: %s; use default, cffi_fallback, '
69 raise ImportError(
62 'cext, or cffi' % _module_policy)
70 "unknown module import policy: %s; use default, cffi_fallback, "
71 "cext, or cffi" % _module_policy
72 )
63
73
64 # Keep this in sync with python-zstandard.h.
74 # Keep this in sync with python-zstandard.h.
65 __version__ = '0.12.0'
75 __version__ = "0.13.0"
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
General Comments 0
You need to be logged in to leave comments. Login now