Show More
The requested changes are too big and content was truncated. Show full diff
1 | NO CONTENT: new file 100644 |
|
NO CONTENT: new file 100644 | ||
The requested commit or file is too big and content was truncated. Show full diff |
@@ -1,102 +1,103 b'' | |||||
1 | # Files that just need to be migrated to the formatter. |
|
1 | # Files that just need to be migrated to the formatter. | |
2 | # Do not add new files here! |
|
2 | # Do not add new files here! | |
3 | mercurial/cext/manifest.c |
|
3 | mercurial/cext/manifest.c | |
4 | mercurial/cext/osutil.c |
|
4 | mercurial/cext/osutil.c | |
5 | # Vendored code that we should never format: |
|
5 | # Vendored code that we should never format: | |
6 | contrib/python-zstandard/c-ext/bufferutil.c |
|
6 | contrib/python-zstandard/c-ext/bufferutil.c | |
7 | contrib/python-zstandard/c-ext/compressionchunker.c |
|
7 | contrib/python-zstandard/c-ext/compressionchunker.c | |
8 | contrib/python-zstandard/c-ext/compressiondict.c |
|
8 | contrib/python-zstandard/c-ext/compressiondict.c | |
9 | contrib/python-zstandard/c-ext/compressionparams.c |
|
9 | contrib/python-zstandard/c-ext/compressionparams.c | |
10 | contrib/python-zstandard/c-ext/compressionreader.c |
|
10 | contrib/python-zstandard/c-ext/compressionreader.c | |
11 | contrib/python-zstandard/c-ext/compressionwriter.c |
|
11 | contrib/python-zstandard/c-ext/compressionwriter.c | |
12 | contrib/python-zstandard/c-ext/compressobj.c |
|
12 | contrib/python-zstandard/c-ext/compressobj.c | |
13 | contrib/python-zstandard/c-ext/compressor.c |
|
13 | contrib/python-zstandard/c-ext/compressor.c | |
14 | contrib/python-zstandard/c-ext/compressoriterator.c |
|
14 | contrib/python-zstandard/c-ext/compressoriterator.c | |
15 | contrib/python-zstandard/c-ext/constants.c |
|
15 | contrib/python-zstandard/c-ext/constants.c | |
16 | contrib/python-zstandard/c-ext/decompressionreader.c |
|
16 | contrib/python-zstandard/c-ext/decompressionreader.c | |
17 | contrib/python-zstandard/c-ext/decompressionwriter.c |
|
17 | contrib/python-zstandard/c-ext/decompressionwriter.c | |
18 | contrib/python-zstandard/c-ext/decompressobj.c |
|
18 | contrib/python-zstandard/c-ext/decompressobj.c | |
19 | contrib/python-zstandard/c-ext/decompressor.c |
|
19 | contrib/python-zstandard/c-ext/decompressor.c | |
20 | contrib/python-zstandard/c-ext/decompressoriterator.c |
|
20 | contrib/python-zstandard/c-ext/decompressoriterator.c | |
21 | contrib/python-zstandard/c-ext/frameparams.c |
|
21 | contrib/python-zstandard/c-ext/frameparams.c | |
22 | contrib/python-zstandard/c-ext/python-zstandard.h |
|
22 | contrib/python-zstandard/c-ext/python-zstandard.h | |
23 | contrib/python-zstandard/zstd.c |
|
23 | contrib/python-zstandard/zstd.c | |
24 | contrib/python-zstandard/zstd/common/bitstream.h |
|
24 | contrib/python-zstandard/zstd/common/bitstream.h | |
25 | contrib/python-zstandard/zstd/common/compiler.h |
|
25 | contrib/python-zstandard/zstd/common/compiler.h | |
26 | contrib/python-zstandard/zstd/common/cpu.h |
|
26 | contrib/python-zstandard/zstd/common/cpu.h | |
27 | contrib/python-zstandard/zstd/common/debug.c |
|
27 | contrib/python-zstandard/zstd/common/debug.c | |
28 | contrib/python-zstandard/zstd/common/debug.h |
|
28 | contrib/python-zstandard/zstd/common/debug.h | |
29 | contrib/python-zstandard/zstd/common/entropy_common.c |
|
29 | contrib/python-zstandard/zstd/common/entropy_common.c | |
30 | contrib/python-zstandard/zstd/common/error_private.c |
|
30 | contrib/python-zstandard/zstd/common/error_private.c | |
31 | contrib/python-zstandard/zstd/common/error_private.h |
|
31 | contrib/python-zstandard/zstd/common/error_private.h | |
32 | contrib/python-zstandard/zstd/common/fse_decompress.c |
|
32 | contrib/python-zstandard/zstd/common/fse_decompress.c | |
33 | contrib/python-zstandard/zstd/common/fse.h |
|
33 | contrib/python-zstandard/zstd/common/fse.h | |
34 | contrib/python-zstandard/zstd/common/huf.h |
|
34 | contrib/python-zstandard/zstd/common/huf.h | |
35 | contrib/python-zstandard/zstd/common/mem.h |
|
35 | contrib/python-zstandard/zstd/common/mem.h | |
36 | contrib/python-zstandard/zstd/common/pool.c |
|
36 | contrib/python-zstandard/zstd/common/pool.c | |
37 | contrib/python-zstandard/zstd/common/pool.h |
|
37 | contrib/python-zstandard/zstd/common/pool.h | |
38 | contrib/python-zstandard/zstd/common/threading.c |
|
38 | contrib/python-zstandard/zstd/common/threading.c | |
39 | contrib/python-zstandard/zstd/common/threading.h |
|
39 | contrib/python-zstandard/zstd/common/threading.h | |
40 | contrib/python-zstandard/zstd/common/xxhash.c |
|
40 | contrib/python-zstandard/zstd/common/xxhash.c | |
41 | contrib/python-zstandard/zstd/common/xxhash.h |
|
41 | contrib/python-zstandard/zstd/common/xxhash.h | |
42 | contrib/python-zstandard/zstd/common/zstd_common.c |
|
42 | contrib/python-zstandard/zstd/common/zstd_common.c | |
43 | contrib/python-zstandard/zstd/common/zstd_errors.h |
|
43 | contrib/python-zstandard/zstd/common/zstd_errors.h | |
44 | contrib/python-zstandard/zstd/common/zstd_internal.h |
|
44 | contrib/python-zstandard/zstd/common/zstd_internal.h | |
45 | contrib/python-zstandard/zstd/compress/fse_compress.c |
|
45 | contrib/python-zstandard/zstd/compress/fse_compress.c | |
46 | contrib/python-zstandard/zstd/compress/hist.c |
|
46 | contrib/python-zstandard/zstd/compress/hist.c | |
47 | contrib/python-zstandard/zstd/compress/hist.h |
|
47 | contrib/python-zstandard/zstd/compress/hist.h | |
48 | contrib/python-zstandard/zstd/compress/huf_compress.c |
|
48 | contrib/python-zstandard/zstd/compress/huf_compress.c | |
49 | contrib/python-zstandard/zstd/compress/zstd_compress.c |
|
49 | contrib/python-zstandard/zstd/compress/zstd_compress.c | |
50 | contrib/python-zstandard/zstd/compress/zstd_compress_internal.h |
|
50 | contrib/python-zstandard/zstd/compress/zstd_compress_internal.h | |
51 | contrib/python-zstandard/zstd/compress/zstd_compress_literals.c |
|
51 | contrib/python-zstandard/zstd/compress/zstd_compress_literals.c | |
52 | contrib/python-zstandard/zstd/compress/zstd_compress_literals.h |
|
52 | contrib/python-zstandard/zstd/compress/zstd_compress_literals.h | |
53 | contrib/python-zstandard/zstd/compress/zstd_compress_sequences.c |
|
53 | contrib/python-zstandard/zstd/compress/zstd_compress_sequences.c | |
54 | contrib/python-zstandard/zstd/compress/zstd_compress_sequences.h |
|
54 | contrib/python-zstandard/zstd/compress/zstd_compress_sequences.h | |
|
55 | contrib/python-zstandard/zstd/compress/zstd_cwksp.h | |||
55 | contrib/python-zstandard/zstd/compress/zstd_double_fast.c |
|
56 | contrib/python-zstandard/zstd/compress/zstd_double_fast.c | |
56 | contrib/python-zstandard/zstd/compress/zstd_double_fast.h |
|
57 | contrib/python-zstandard/zstd/compress/zstd_double_fast.h | |
57 | contrib/python-zstandard/zstd/compress/zstd_fast.c |
|
58 | contrib/python-zstandard/zstd/compress/zstd_fast.c | |
58 | contrib/python-zstandard/zstd/compress/zstd_fast.h |
|
59 | contrib/python-zstandard/zstd/compress/zstd_fast.h | |
59 | contrib/python-zstandard/zstd/compress/zstd_lazy.c |
|
60 | contrib/python-zstandard/zstd/compress/zstd_lazy.c | |
60 | contrib/python-zstandard/zstd/compress/zstd_lazy.h |
|
61 | contrib/python-zstandard/zstd/compress/zstd_lazy.h | |
61 | contrib/python-zstandard/zstd/compress/zstd_ldm.c |
|
62 | contrib/python-zstandard/zstd/compress/zstd_ldm.c | |
62 | contrib/python-zstandard/zstd/compress/zstd_ldm.h |
|
63 | contrib/python-zstandard/zstd/compress/zstd_ldm.h | |
63 | contrib/python-zstandard/zstd/compress/zstdmt_compress.c |
|
64 | contrib/python-zstandard/zstd/compress/zstdmt_compress.c | |
64 | contrib/python-zstandard/zstd/compress/zstdmt_compress.h |
|
65 | contrib/python-zstandard/zstd/compress/zstdmt_compress.h | |
65 | contrib/python-zstandard/zstd/compress/zstd_opt.c |
|
66 | contrib/python-zstandard/zstd/compress/zstd_opt.c | |
66 | contrib/python-zstandard/zstd/compress/zstd_opt.h |
|
67 | contrib/python-zstandard/zstd/compress/zstd_opt.h | |
67 | contrib/python-zstandard/zstd/decompress/huf_decompress.c |
|
68 | contrib/python-zstandard/zstd/decompress/huf_decompress.c | |
68 | contrib/python-zstandard/zstd/decompress/zstd_ddict.c |
|
69 | contrib/python-zstandard/zstd/decompress/zstd_ddict.c | |
69 | contrib/python-zstandard/zstd/decompress/zstd_ddict.h |
|
70 | contrib/python-zstandard/zstd/decompress/zstd_ddict.h | |
70 | contrib/python-zstandard/zstd/decompress/zstd_decompress_block.c |
|
71 | contrib/python-zstandard/zstd/decompress/zstd_decompress_block.c | |
71 | contrib/python-zstandard/zstd/decompress/zstd_decompress_block.h |
|
72 | contrib/python-zstandard/zstd/decompress/zstd_decompress_block.h | |
72 | contrib/python-zstandard/zstd/decompress/zstd_decompress_internal.h |
|
73 | contrib/python-zstandard/zstd/decompress/zstd_decompress_internal.h | |
73 | contrib/python-zstandard/zstd/decompress/zstd_decompress.c |
|
74 | contrib/python-zstandard/zstd/decompress/zstd_decompress.c | |
74 | contrib/python-zstandard/zstd/deprecated/zbuff_common.c |
|
75 | contrib/python-zstandard/zstd/deprecated/zbuff_common.c | |
75 | contrib/python-zstandard/zstd/deprecated/zbuff_compress.c |
|
76 | contrib/python-zstandard/zstd/deprecated/zbuff_compress.c | |
76 | contrib/python-zstandard/zstd/deprecated/zbuff_decompress.c |
|
77 | contrib/python-zstandard/zstd/deprecated/zbuff_decompress.c | |
77 | contrib/python-zstandard/zstd/deprecated/zbuff.h |
|
78 | contrib/python-zstandard/zstd/deprecated/zbuff.h | |
78 | contrib/python-zstandard/zstd/dictBuilder/cover.c |
|
79 | contrib/python-zstandard/zstd/dictBuilder/cover.c | |
79 | contrib/python-zstandard/zstd/dictBuilder/cover.h |
|
80 | contrib/python-zstandard/zstd/dictBuilder/cover.h | |
80 | contrib/python-zstandard/zstd/dictBuilder/divsufsort.c |
|
81 | contrib/python-zstandard/zstd/dictBuilder/divsufsort.c | |
81 | contrib/python-zstandard/zstd/dictBuilder/divsufsort.h |
|
82 | contrib/python-zstandard/zstd/dictBuilder/divsufsort.h | |
82 | contrib/python-zstandard/zstd/dictBuilder/fastcover.c |
|
83 | contrib/python-zstandard/zstd/dictBuilder/fastcover.c | |
83 | contrib/python-zstandard/zstd/dictBuilder/zdict.c |
|
84 | contrib/python-zstandard/zstd/dictBuilder/zdict.c | |
84 | contrib/python-zstandard/zstd/dictBuilder/zdict.h |
|
85 | contrib/python-zstandard/zstd/dictBuilder/zdict.h | |
85 | contrib/python-zstandard/zstd/zstd.h |
|
86 | contrib/python-zstandard/zstd/zstd.h | |
86 | hgext/fsmonitor/pywatchman/bser.c |
|
87 | hgext/fsmonitor/pywatchman/bser.c | |
87 | mercurial/thirdparty/xdiff/xdiff.h |
|
88 | mercurial/thirdparty/xdiff/xdiff.h | |
88 | mercurial/thirdparty/xdiff/xdiffi.c |
|
89 | mercurial/thirdparty/xdiff/xdiffi.c | |
89 | mercurial/thirdparty/xdiff/xdiffi.h |
|
90 | mercurial/thirdparty/xdiff/xdiffi.h | |
90 | mercurial/thirdparty/xdiff/xemit.c |
|
91 | mercurial/thirdparty/xdiff/xemit.c | |
91 | mercurial/thirdparty/xdiff/xemit.h |
|
92 | mercurial/thirdparty/xdiff/xemit.h | |
92 | mercurial/thirdparty/xdiff/xhistogram.c |
|
93 | mercurial/thirdparty/xdiff/xhistogram.c | |
93 | mercurial/thirdparty/xdiff/xinclude.h |
|
94 | mercurial/thirdparty/xdiff/xinclude.h | |
94 | mercurial/thirdparty/xdiff/xmacros.h |
|
95 | mercurial/thirdparty/xdiff/xmacros.h | |
95 | mercurial/thirdparty/xdiff/xmerge.c |
|
96 | mercurial/thirdparty/xdiff/xmerge.c | |
96 | mercurial/thirdparty/xdiff/xpatience.c |
|
97 | mercurial/thirdparty/xdiff/xpatience.c | |
97 | mercurial/thirdparty/xdiff/xprepare.c |
|
98 | mercurial/thirdparty/xdiff/xprepare.c | |
98 | mercurial/thirdparty/xdiff/xprepare.h |
|
99 | mercurial/thirdparty/xdiff/xprepare.h | |
99 | mercurial/thirdparty/xdiff/xtypes.h |
|
100 | mercurial/thirdparty/xdiff/xtypes.h | |
100 | mercurial/thirdparty/xdiff/xutils.c |
|
101 | mercurial/thirdparty/xdiff/xutils.c | |
101 | mercurial/thirdparty/xdiff/xutils.h |
|
102 | mercurial/thirdparty/xdiff/xutils.h | |
102 | mercurial/thirdparty/zope/interface/_zope_interface_coptimizations.c |
|
103 | mercurial/thirdparty/zope/interface/_zope_interface_coptimizations.c |
@@ -1,702 +1,721 b'' | |||||
1 | =============== |
|
1 | =============== | |
2 | Version History |
|
2 | Version History | |
3 | =============== |
|
3 | =============== | |
4 |
|
4 | |||
5 | 1.0.0 (not yet released) |
|
5 | 1.0.0 (not yet released) | |
6 | ======================== |
|
6 | ======================== | |
7 |
|
7 | |||
8 | Actions Blocking Release |
|
8 | Actions Blocking Release | |
9 | ------------------------ |
|
9 | ------------------------ | |
10 |
|
10 | |||
11 | * compression and decompression APIs that support ``io.RawIOBase`` interface |
|
11 | * compression and decompression APIs that support ``io.RawIOBase`` interface | |
12 | (#13). |
|
12 | (#13). | |
13 | * ``stream_writer()`` APIs should support ``io.RawIOBase`` interface. |
|
13 | * ``stream_writer()`` APIs should support ``io.RawIOBase`` interface. | |
14 | * Properly handle non-blocking I/O and partial writes for objects implementing |
|
14 | * Properly handle non-blocking I/O and partial writes for objects implementing | |
15 | ``io.RawIOBase``. |
|
15 | ``io.RawIOBase``. | |
16 | * Make ``write_return_read=True`` the default for objects implementing |
|
16 | * Make ``write_return_read=True`` the default for objects implementing | |
17 | ``io.RawIOBase``. |
|
17 | ``io.RawIOBase``. | |
18 | * Audit for consistent and proper behavior of ``flush()`` and ``close()`` for |
|
18 | * Audit for consistent and proper behavior of ``flush()`` and ``close()`` for | |
19 | all objects implementing ``io.RawIOBase``. Is calling ``close()`` on |
|
19 | all objects implementing ``io.RawIOBase``. Is calling ``close()`` on | |
20 | wrapped stream acceptable, should ``__exit__`` always call ``close()``, |
|
20 | wrapped stream acceptable, should ``__exit__`` always call ``close()``, | |
21 | should ``close()`` imply ``flush()``, etc. |
|
21 | should ``close()`` imply ``flush()``, etc. | |
22 | * Consider making reads across frames configurable behavior. |
|
22 | * Consider making reads across frames configurable behavior. | |
23 | * Refactor module names so C and CFFI extensions live under ``zstandard`` |
|
23 | * Refactor module names so C and CFFI extensions live under ``zstandard`` | |
24 | package. |
|
24 | package. | |
25 | * Overall API design review. |
|
25 | * Overall API design review. | |
26 | * Use Python allocator where possible. |
|
26 | * Use Python allocator where possible. | |
27 | * Figure out what to do about experimental APIs not implemented by CFFI. |
|
27 | * Figure out what to do about experimental APIs not implemented by CFFI. | |
28 | * APIs for auto adjusting compression parameters based on input size. e.g. |
|
28 | * APIs for auto adjusting compression parameters based on input size. e.g. | |
29 | clamping the window log so it isn't too large for input. |
|
29 | clamping the window log so it isn't too large for input. | |
30 | * Consider allowing compressor and decompressor instances to be thread safe, |
|
30 | * Consider allowing compressor and decompressor instances to be thread safe, | |
31 | support concurrent operations. Or track when an operation is in progress and |
|
31 | support concurrent operations. Or track when an operation is in progress and | |
32 | refuse to let concurrent operations use the same instance. |
|
32 | refuse to let concurrent operations use the same instance. | |
33 | * Support for magic-less frames for all decompression operations (``decompress()`` |
|
33 | * Support for magic-less frames for all decompression operations (``decompress()`` | |
34 | doesn't work due to sniffing the content size and the lack of a ZSTD API to |
|
34 | doesn't work due to sniffing the content size and the lack of a ZSTD API to | |
35 | sniff magic-less frames - this should be fixed in 1.3.5.). |
|
35 | sniff magic-less frames - this should be fixed in 1.3.5.). | |
36 | * Audit for complete flushing when ending compression streams. |
|
36 | * Audit for complete flushing when ending compression streams. | |
37 | * Deprecate legacy APIs. |
|
37 | * Deprecate legacy APIs. | |
38 | * Audit for ability to control read/write sizes on all APIs. |
|
38 | * Audit for ability to control read/write sizes on all APIs. | |
39 | * Detect memory leaks via bench.py. |
|
39 | * Detect memory leaks via bench.py. | |
40 | * Remove low-level compression parameters from ``ZstdCompressor.__init__`` and |
|
40 | * Remove low-level compression parameters from ``ZstdCompressor.__init__`` and | |
41 | require use of ``CompressionParameters``. |
|
41 | require use of ``CompressionParameters``. | |
42 | * Expose ``ZSTD_getFrameProgression()`` from more compressor types. |
|
42 | * Expose ``ZSTD_getFrameProgression()`` from more compressor types. | |
43 | * Support modifying compression parameters mid operation when supported by |
|
43 | * Support modifying compression parameters mid operation when supported by | |
44 | zstd API. |
|
44 | zstd API. | |
45 | * Expose ``ZSTD_CLEVEL_DEFAULT`` constant. |
|
45 | * Expose ``ZSTD_CLEVEL_DEFAULT`` constant. | |
|
46 | * Expose ``ZSTD_SRCSIZEHINT_{MIN,MAX}`` constants. | |||
46 | * Support ``ZSTD_p_forceAttachDict`` compression parameter. |
|
47 | * Support ``ZSTD_p_forceAttachDict`` compression parameter. | |
47 |
* Support ``ZSTD_ |
|
48 | * Support ``ZSTD_dictForceLoad`` dictionary compression parameter. | |
|
49 | * Support ``ZSTD_c_targetCBlockSize`` compression parameter. | |||
|
50 | * Support ``ZSTD_c_literalCompressionMode`` compression parameter. | |||
|
51 | * Support ``ZSTD_c_srcSizeHint`` compression parameter. | |||
48 | * Use ``ZSTD_CCtx_getParameter()``/``ZSTD_CCtxParam_getParameter()`` for retrieving |
|
52 | * Use ``ZSTD_CCtx_getParameter()``/``ZSTD_CCtxParam_getParameter()`` for retrieving | |
49 | compression parameters. |
|
53 | compression parameters. | |
50 | * Consider exposing ``ZSTDMT_toFlushNow()``. |
|
54 | * Consider exposing ``ZSTDMT_toFlushNow()``. | |
51 | * Expose ``ZDICT_trainFromBuffer_fastCover()``, |
|
55 | * Expose ``ZDICT_trainFromBuffer_fastCover()``, | |
52 | ``ZDICT_optimizeTrainFromBuffer_fastCover``. |
|
56 | ``ZDICT_optimizeTrainFromBuffer_fastCover``. | |
|
57 | * Expose ``ZSTD_Sequence`` struct and related ``ZSTD_getSequences()`` API. | |||
53 | * Expose and enforce ``ZSTD_minCLevel()`` for minimum compression level. |
|
58 | * Expose and enforce ``ZSTD_minCLevel()`` for minimum compression level. | |
54 | * Consider a ``chunker()`` API for decompression. |
|
59 | * Consider a ``chunker()`` API for decompression. | |
55 | * Consider stats for ``chunker()`` API, including finding the last consumed |
|
60 | * Consider stats for ``chunker()`` API, including finding the last consumed | |
56 | offset of input data. |
|
61 | offset of input data. | |
57 | * Consider exposing ``ZSTD_cParam_getBounds()`` and |
|
62 | * Consider exposing ``ZSTD_cParam_getBounds()`` and | |
58 | ``ZSTD_dParam_getBounds()`` APIs. |
|
63 | ``ZSTD_dParam_getBounds()`` APIs. | |
59 | * Consider controls over resetting compression contexts (session only, parameters, |
|
64 | * Consider controls over resetting compression contexts (session only, parameters, | |
60 | or session and parameters). |
|
65 | or session and parameters). | |
61 | * Actually use the CFFI backend in fuzzing tests. |
|
66 | * Actually use the CFFI backend in fuzzing tests. | |
62 |
|
67 | |||
63 | Other Actions Not Blocking Release |
|
68 | Other Actions Not Blocking Release | |
64 | --------------------------------------- |
|
69 | --------------------------------------- | |
65 |
|
70 | |||
66 | * Support for block compression APIs. |
|
71 | * Support for block compression APIs. | |
67 | * API for ensuring max memory ceiling isn't exceeded. |
|
72 | * API for ensuring max memory ceiling isn't exceeded. | |
68 | * Move off nose for testing. |
|
73 | * Move off nose for testing. | |
69 |
|
74 | |||
|
75 | 0.13.0 (released 2019-12-28) | |||
|
76 | ============================ | |||
|
77 | ||||
|
78 | Changes | |||
|
79 | ------- | |||
|
80 | ||||
|
81 | * ``pytest-xdist`` ``pytest`` extension is now installed so tests can be | |||
|
82 | run in parallel. | |||
|
83 | * CI now builds ``manylinux2010`` and ``manylinux2014`` binary wheels | |||
|
84 | instead of a mix of ``manylinux2010`` and ``manylinux1``. | |||
|
85 | * Official support for Python 3.8 has been added. | |||
|
86 | * Bundled zstandard library upgraded from 1.4.3 to 1.4.4. | |||
|
87 | * Python code has been reformatted with black. | |||
|
88 | ||||
70 | 0.12.0 (released 2019-09-15) |
|
89 | 0.12.0 (released 2019-09-15) | |
71 | ============================ |
|
90 | ============================ | |
72 |
|
91 | |||
73 | Backwards Compatibility Notes |
|
92 | Backwards Compatibility Notes | |
74 | ----------------------------- |
|
93 | ----------------------------- | |
75 |
|
94 | |||
76 | * Support for Python 3.4 has been dropped since Python 3.4 is no longer |
|
95 | * Support for Python 3.4 has been dropped since Python 3.4 is no longer | |
77 | a supported Python version upstream. (But it will likely continue to |
|
96 | a supported Python version upstream. (But it will likely continue to | |
78 | work until Python 2.7 support is dropped and we port to Python 3.5+ |
|
97 | work until Python 2.7 support is dropped and we port to Python 3.5+ | |
79 | APIs.) |
|
98 | APIs.) | |
80 |
|
99 | |||
81 | Bug Fixes |
|
100 | Bug Fixes | |
82 | --------- |
|
101 | --------- | |
83 |
|
102 | |||
84 | * Fix ``ZstdDecompressor.__init__`` on 64-bit big-endian systems (#91). |
|
103 | * Fix ``ZstdDecompressor.__init__`` on 64-bit big-endian systems (#91). | |
85 | * Fix memory leak in ``ZstdDecompressionReader.seek()`` (#82). |
|
104 | * Fix memory leak in ``ZstdDecompressionReader.seek()`` (#82). | |
86 |
|
105 | |||
87 | Changes |
|
106 | Changes | |
88 | ------- |
|
107 | ------- | |
89 |
|
108 | |||
90 | * CI transitioned to Azure Pipelines (from AppVeyor and Travis CI). |
|
109 | * CI transitioned to Azure Pipelines (from AppVeyor and Travis CI). | |
91 | * Switched to ``pytest`` for running tests (from ``nose``). |
|
110 | * Switched to ``pytest`` for running tests (from ``nose``). | |
92 | * Bundled zstandard library upgraded from 1.3.8 to 1.4.3. |
|
111 | * Bundled zstandard library upgraded from 1.3.8 to 1.4.3. | |
93 |
|
112 | |||
94 | 0.11.1 (released 2019-05-14) |
|
113 | 0.11.1 (released 2019-05-14) | |
95 | ============================ |
|
114 | ============================ | |
96 |
|
115 | |||
97 | * Fix memory leak in ``ZstdDecompressionReader.seek()`` (#82). |
|
116 | * Fix memory leak in ``ZstdDecompressionReader.seek()`` (#82). | |
98 |
|
117 | |||
99 | 0.11.0 (released 2019-02-24) |
|
118 | 0.11.0 (released 2019-02-24) | |
100 | ============================ |
|
119 | ============================ | |
101 |
|
120 | |||
102 | Backwards Compatibility Notes |
|
121 | Backwards Compatibility Notes | |
103 | ----------------------------- |
|
122 | ----------------------------- | |
104 |
|
123 | |||
105 | * ``ZstdDecompressor.read()`` now allows reading sizes of ``-1`` or ``0`` |
|
124 | * ``ZstdDecompressor.read()`` now allows reading sizes of ``-1`` or ``0`` | |
106 | and defaults to ``-1``, per the documented behavior of |
|
125 | and defaults to ``-1``, per the documented behavior of | |
107 | ``io.RawIOBase.read()``. Previously, we required an argument that was |
|
126 | ``io.RawIOBase.read()``. Previously, we required an argument that was | |
108 | a positive value. |
|
127 | a positive value. | |
109 | * The ``readline()``, ``readlines()``, ``__iter__``, and ``__next__`` methods |
|
128 | * The ``readline()``, ``readlines()``, ``__iter__``, and ``__next__`` methods | |
110 | of ``ZstdDecompressionReader()`` now raise ``io.UnsupportedOperation`` |
|
129 | of ``ZstdDecompressionReader()`` now raise ``io.UnsupportedOperation`` | |
111 | instead of ``NotImplementedError``. |
|
130 | instead of ``NotImplementedError``. | |
112 | * ``ZstdDecompressor.stream_reader()`` now accepts a ``read_across_frames`` |
|
131 | * ``ZstdDecompressor.stream_reader()`` now accepts a ``read_across_frames`` | |
113 | argument. The default value will likely be changed in a future release |
|
132 | argument. The default value will likely be changed in a future release | |
114 | and consumers are advised to pass the argument to avoid unwanted change |
|
133 | and consumers are advised to pass the argument to avoid unwanted change | |
115 | of behavior in the future. |
|
134 | of behavior in the future. | |
116 | * ``setup.py`` now always disables the CFFI backend if the installed |
|
135 | * ``setup.py`` now always disables the CFFI backend if the installed | |
117 | CFFI package does not meet the minimum version requirements. Before, it was |
|
136 | CFFI package does not meet the minimum version requirements. Before, it was | |
118 | possible for the CFFI backend to be generated and a run-time error to |
|
137 | possible for the CFFI backend to be generated and a run-time error to | |
119 | occur. |
|
138 | occur. | |
120 | * In the CFFI backend, ``CompressionReader`` and ``DecompressionReader`` |
|
139 | * In the CFFI backend, ``CompressionReader`` and ``DecompressionReader`` | |
121 | were renamed to ``ZstdCompressionReader`` and ``ZstdDecompressionReader``, |
|
140 | were renamed to ``ZstdCompressionReader`` and ``ZstdDecompressionReader``, | |
122 | respectively so naming is identical to the C extension. This should have |
|
141 | respectively so naming is identical to the C extension. This should have | |
123 | no meaningful end-user impact, as instances aren't meant to be |
|
142 | no meaningful end-user impact, as instances aren't meant to be | |
124 | constructed directly. |
|
143 | constructed directly. | |
125 | * ``ZstdDecompressor.stream_writer()`` now accepts a ``write_return_read`` |
|
144 | * ``ZstdDecompressor.stream_writer()`` now accepts a ``write_return_read`` | |
126 | argument to control whether ``write()`` returns the number of bytes |
|
145 | argument to control whether ``write()`` returns the number of bytes | |
127 | read from the source / written to the decompressor. It defaults to off, |
|
146 | read from the source / written to the decompressor. It defaults to off, | |
128 | which preserves the existing behavior of returning the number of bytes |
|
147 | which preserves the existing behavior of returning the number of bytes | |
129 | emitted from the decompressor. The default will change in a future release |
|
148 | emitted from the decompressor. The default will change in a future release | |
130 | so behavior aligns with the specified behavior of ``io.RawIOBase``. |
|
149 | so behavior aligns with the specified behavior of ``io.RawIOBase``. | |
131 | * ``ZstdDecompressionWriter.__exit__`` now calls ``self.close()``. This |
|
150 | * ``ZstdDecompressionWriter.__exit__`` now calls ``self.close()``. This | |
132 | will result in that stream plus the underlying stream being closed as |
|
151 | will result in that stream plus the underlying stream being closed as | |
133 | well. If this behavior is not desirable, do not use instances as |
|
152 | well. If this behavior is not desirable, do not use instances as | |
134 | context managers. |
|
153 | context managers. | |
135 | * ``ZstdCompressor.stream_writer()`` now accepts a ``write_return_read`` |
|
154 | * ``ZstdCompressor.stream_writer()`` now accepts a ``write_return_read`` | |
136 | argument to control whether ``write()`` returns the number of bytes read |
|
155 | argument to control whether ``write()`` returns the number of bytes read | |
137 | from the source / written to the compressor. It defaults to off, which |
|
156 | from the source / written to the compressor. It defaults to off, which | |
138 | preserves the existing behavior of returning the number of bytes emitted |
|
157 | preserves the existing behavior of returning the number of bytes emitted | |
139 | from the compressor. The default will change in a future release so |
|
158 | from the compressor. The default will change in a future release so | |
140 | behavior aligns with the specified behavior of ``io.RawIOBase``. |
|
159 | behavior aligns with the specified behavior of ``io.RawIOBase``. | |
141 | * ``ZstdCompressionWriter.__exit__`` now calls ``self.close()``. This will |
|
160 | * ``ZstdCompressionWriter.__exit__`` now calls ``self.close()``. This will | |
142 | result in that stream plus any underlying stream being closed as well. If |
|
161 | result in that stream plus any underlying stream being closed as well. If | |
143 | this behavior is not desirable, do not use instances as context managers. |
|
162 | this behavior is not desirable, do not use instances as context managers. | |
144 | * ``ZstdDecompressionWriter`` no longer requires being used as a context |
|
163 | * ``ZstdDecompressionWriter`` no longer requires being used as a context | |
145 | manager (#57). |
|
164 | manager (#57). | |
146 | * ``ZstdCompressionWriter`` no longer requires being used as a context |
|
165 | * ``ZstdCompressionWriter`` no longer requires being used as a context | |
147 | manager (#57). |
|
166 | manager (#57). | |
148 | * The ``overlap_size_log`` attribute on ``CompressionParameters`` instances |
|
167 | * The ``overlap_size_log`` attribute on ``CompressionParameters`` instances | |
149 | has been deprecated and will be removed in a future release. The |
|
168 | has been deprecated and will be removed in a future release. The | |
150 | ``overlap_log`` attribute should be used instead. |
|
169 | ``overlap_log`` attribute should be used instead. | |
151 | * The ``overlap_size_log`` argument to ``CompressionParameters`` has been |
|
170 | * The ``overlap_size_log`` argument to ``CompressionParameters`` has been | |
152 | deprecated and will be removed in a future release. The ``overlap_log`` |
|
171 | deprecated and will be removed in a future release. The ``overlap_log`` | |
153 | argument should be used instead. |
|
172 | argument should be used instead. | |
154 | * The ``ldm_hash_every_log`` attribute on ``CompressionParameters`` instances |
|
173 | * The ``ldm_hash_every_log`` attribute on ``CompressionParameters`` instances | |
155 | has been deprecated and will be removed in a future release. The |
|
174 | has been deprecated and will be removed in a future release. The | |
156 | ``ldm_hash_rate_log`` attribute should be used instead. |
|
175 | ``ldm_hash_rate_log`` attribute should be used instead. | |
157 | * The ``ldm_hash_every_log`` argument to ``CompressionParameters`` has been |
|
176 | * The ``ldm_hash_every_log`` argument to ``CompressionParameters`` has been | |
158 | deprecated and will be removed in a future release. The ``ldm_hash_rate_log`` |
|
177 | deprecated and will be removed in a future release. The ``ldm_hash_rate_log`` | |
159 | argument should be used instead. |
|
178 | argument should be used instead. | |
160 | * The ``compression_strategy`` argument to ``CompressionParameters`` has been |
|
179 | * The ``compression_strategy`` argument to ``CompressionParameters`` has been | |
161 | deprecated and will be removed in a future release. The ``strategy`` |
|
180 | deprecated and will be removed in a future release. The ``strategy`` | |
162 | argument should be used instead. |
|
181 | argument should be used instead. | |
163 | * The ``SEARCHLENGTH_MIN`` and ``SEARCHLENGTH_MAX`` constants are deprecated |
|
182 | * The ``SEARCHLENGTH_MIN`` and ``SEARCHLENGTH_MAX`` constants are deprecated | |
164 | and will be removed in a future release. Use ``MINMATCH_MIN`` and |
|
183 | and will be removed in a future release. Use ``MINMATCH_MIN`` and | |
165 | ``MINMATCH_MAX`` instead. |
|
184 | ``MINMATCH_MAX`` instead. | |
166 | * The ``zstd_cffi`` module has been renamed to ``zstandard.cffi``. As had |
|
185 | * The ``zstd_cffi`` module has been renamed to ``zstandard.cffi``. As had | |
167 | been documented in the ``README`` file since the ``0.9.0`` release, the |
|
186 | been documented in the ``README`` file since the ``0.9.0`` release, the | |
168 | module should not be imported directly at its new location. Instead, |
|
187 | module should not be imported directly at its new location. Instead, | |
169 | ``import zstandard`` to cause an appropriate backend module to be loaded |
|
188 | ``import zstandard`` to cause an appropriate backend module to be loaded | |
170 | automatically. |
|
189 | automatically. | |
171 |
|
190 | |||
172 | Bug Fixes |
|
191 | Bug Fixes | |
173 | --------- |
|
192 | --------- | |
174 |
|
193 | |||
175 | * CFFI backend could encounter a failure when sending an empty chunk into |
|
194 | * CFFI backend could encounter a failure when sending an empty chunk into | |
176 | ``ZstdDecompressionObj.decompress()``. The issue has been fixed. |
|
195 | ``ZstdDecompressionObj.decompress()``. The issue has been fixed. | |
177 | * CFFI backend could encounter an error when calling |
|
196 | * CFFI backend could encounter an error when calling | |
178 | ``ZstdDecompressionReader.read()`` if there was data remaining in an |
|
197 | ``ZstdDecompressionReader.read()`` if there was data remaining in an | |
179 | internal buffer. The issue has been fixed. (#71) |
|
198 | internal buffer. The issue has been fixed. (#71) | |
180 |
|
199 | |||
181 | Changes |
|
200 | Changes | |
182 | ------- |
|
201 | ------- | |
183 |
|
202 | |||
184 | * ``ZstDecompressionObj.decompress()`` now properly handles empty inputs in |
|
203 | * ``ZstDecompressionObj.decompress()`` now properly handles empty inputs in | |
185 | the CFFI backend. |
|
204 | the CFFI backend. | |
186 | * ``ZstdCompressionReader`` now implements ``read1()`` and ``readinto1()``. |
|
205 | * ``ZstdCompressionReader`` now implements ``read1()`` and ``readinto1()``. | |
187 | These are part of the ``io.BufferedIOBase`` interface. |
|
206 | These are part of the ``io.BufferedIOBase`` interface. | |
188 | * ``ZstdCompressionReader`` has gained a ``readinto(b)`` method for reading |
|
207 | * ``ZstdCompressionReader`` has gained a ``readinto(b)`` method for reading | |
189 | compressed output into an existing buffer. |
|
208 | compressed output into an existing buffer. | |
190 | * ``ZstdCompressionReader.read()`` now defaults to ``size=-1`` and accepts |
|
209 | * ``ZstdCompressionReader.read()`` now defaults to ``size=-1`` and accepts | |
191 | read sizes of ``-1`` and ``0``. The new behavior aligns with the documented |
|
210 | read sizes of ``-1`` and ``0``. The new behavior aligns with the documented | |
192 | behavior of ``io.RawIOBase``. |
|
211 | behavior of ``io.RawIOBase``. | |
193 | * ``ZstdCompressionReader`` now implements ``readall()``. Previously, this |
|
212 | * ``ZstdCompressionReader`` now implements ``readall()``. Previously, this | |
194 | method raised ``NotImplementedError``. |
|
213 | method raised ``NotImplementedError``. | |
195 | * ``ZstdDecompressionReader`` now implements ``read1()`` and ``readinto1()``. |
|
214 | * ``ZstdDecompressionReader`` now implements ``read1()`` and ``readinto1()``. | |
196 | These are part of the ``io.BufferedIOBase`` interface. |
|
215 | These are part of the ``io.BufferedIOBase`` interface. | |
197 | * ``ZstdDecompressionReader.read()`` now defaults to ``size=-1`` and accepts |
|
216 | * ``ZstdDecompressionReader.read()`` now defaults to ``size=-1`` and accepts | |
198 | read sizes of ``-1`` and ``0``. The new behavior aligns with the documented |
|
217 | read sizes of ``-1`` and ``0``. The new behavior aligns with the documented | |
199 | behavior of ``io.RawIOBase``. |
|
218 | behavior of ``io.RawIOBase``. | |
200 | * ``ZstdDecompressionReader()`` now implements ``readall()``. Previously, this |
|
219 | * ``ZstdDecompressionReader()`` now implements ``readall()``. Previously, this | |
201 | method raised ``NotImplementedError``. |
|
220 | method raised ``NotImplementedError``. | |
202 | * The ``readline()``, ``readlines()``, ``__iter__``, and ``__next__`` methods |
|
221 | * The ``readline()``, ``readlines()``, ``__iter__``, and ``__next__`` methods | |
203 | of ``ZstdDecompressionReader()`` now raise ``io.UnsupportedOperation`` |
|
222 | of ``ZstdDecompressionReader()`` now raise ``io.UnsupportedOperation`` | |
204 | instead of ``NotImplementedError``. This reflects a decision to never |
|
223 | instead of ``NotImplementedError``. This reflects a decision to never | |
205 | implement text-based I/O on (de)compressors and keep the low-level API |
|
224 | implement text-based I/O on (de)compressors and keep the low-level API | |
206 | operating in the binary domain. (#13) |
|
225 | operating in the binary domain. (#13) | |
207 | * ``README.rst`` now documented how to achieve linewise iteration using |
|
226 | * ``README.rst`` now documented how to achieve linewise iteration using | |
208 | an ``io.TextIOWrapper`` with a ``ZstdDecompressionReader``. |
|
227 | an ``io.TextIOWrapper`` with a ``ZstdDecompressionReader``. | |
209 | * ``ZstdDecompressionReader`` has gained a ``readinto(b)`` method for |
|
228 | * ``ZstdDecompressionReader`` has gained a ``readinto(b)`` method for | |
210 | reading decompressed output into an existing buffer. This allows chaining |
|
229 | reading decompressed output into an existing buffer. This allows chaining | |
211 | to an ``io.TextIOWrapper`` on Python 3 without using an ``io.BufferedReader``. |
|
230 | to an ``io.TextIOWrapper`` on Python 3 without using an ``io.BufferedReader``. | |
212 | * ``ZstdDecompressor.stream_reader()`` now accepts a ``read_across_frames`` |
|
231 | * ``ZstdDecompressor.stream_reader()`` now accepts a ``read_across_frames`` | |
213 | argument to control behavior when the input data has multiple zstd |
|
232 | argument to control behavior when the input data has multiple zstd | |
214 | *frames*. When ``False`` (the default for backwards compatibility), a |
|
233 | *frames*. When ``False`` (the default for backwards compatibility), a | |
215 | ``read()`` will stop when the end of a zstd *frame* is encountered. When |
|
234 | ``read()`` will stop when the end of a zstd *frame* is encountered. When | |
216 | ``True``, ``read()`` can potentially return data spanning multiple zstd |
|
235 | ``True``, ``read()`` can potentially return data spanning multiple zstd | |
217 | *frames*. The default will likely be changed to ``True`` in a future |
|
236 | *frames*. The default will likely be changed to ``True`` in a future | |
218 | release. |
|
237 | release. | |
219 | * ``setup.py`` now performs CFFI version sniffing and disables the CFFI |
|
238 | * ``setup.py`` now performs CFFI version sniffing and disables the CFFI | |
220 | backend if CFFI is too old. Previously, we only used ``install_requires`` |
|
239 | backend if CFFI is too old. Previously, we only used ``install_requires`` | |
221 | to enforce the CFFI version and not all build modes would properly enforce |
|
240 | to enforce the CFFI version and not all build modes would properly enforce | |
222 | the minimum CFFI version. (#69) |
|
241 | the minimum CFFI version. (#69) | |
223 | * CFFI's ``ZstdDecompressionReader.read()`` now properly handles data |
|
242 | * CFFI's ``ZstdDecompressionReader.read()`` now properly handles data | |
224 | remaining in any internal buffer. Before, repeated ``read()`` could |
|
243 | remaining in any internal buffer. Before, repeated ``read()`` could | |
225 | result in *random* errors. (#71) |
|
244 | result in *random* errors. (#71) | |
226 | * Upgraded various Python packages in CI environment. |
|
245 | * Upgraded various Python packages in CI environment. | |
227 | * Upgrade to hypothesis 4.5.11. |
|
246 | * Upgrade to hypothesis 4.5.11. | |
228 | * In the CFFI backend, ``CompressionReader`` and ``DecompressionReader`` |
|
247 | * In the CFFI backend, ``CompressionReader`` and ``DecompressionReader`` | |
229 | were renamed to ``ZstdCompressionReader`` and ``ZstdDecompressionReader``, |
|
248 | were renamed to ``ZstdCompressionReader`` and ``ZstdDecompressionReader``, | |
230 | respectively. |
|
249 | respectively. | |
231 | * ``ZstdDecompressor.stream_writer()`` now accepts a ``write_return_read`` |
|
250 | * ``ZstdDecompressor.stream_writer()`` now accepts a ``write_return_read`` | |
232 | argument to control whether ``write()`` returns the number of bytes read |
|
251 | argument to control whether ``write()`` returns the number of bytes read | |
233 | from the source. It defaults to ``False`` to preserve backwards |
|
252 | from the source. It defaults to ``False`` to preserve backwards | |
234 | compatibility. |
|
253 | compatibility. | |
235 | * ``ZstdDecompressor.stream_writer()`` now implements the ``io.RawIOBase`` |
|
254 | * ``ZstdDecompressor.stream_writer()`` now implements the ``io.RawIOBase`` | |
236 | interface and behaves as a proper stream object. |
|
255 | interface and behaves as a proper stream object. | |
237 | * ``ZstdCompressor.stream_writer()`` now accepts a ``write_return_read`` |
|
256 | * ``ZstdCompressor.stream_writer()`` now accepts a ``write_return_read`` | |
238 | argument to control whether ``write()`` returns the number of bytes read |
|
257 | argument to control whether ``write()`` returns the number of bytes read | |
239 | from the source. It defaults to ``False`` to preserve backwards |
|
258 | from the source. It defaults to ``False`` to preserve backwards | |
240 | compatibility. |
|
259 | compatibility. | |
241 | * ``ZstdCompressionWriter`` now implements the ``io.RawIOBase`` interface and |
|
260 | * ``ZstdCompressionWriter`` now implements the ``io.RawIOBase`` interface and | |
242 | behaves as a proper stream object. ``close()`` will now close the stream |
|
261 | behaves as a proper stream object. ``close()`` will now close the stream | |
243 | and the underlying stream (if possible). ``__exit__`` will now call |
|
262 | and the underlying stream (if possible). ``__exit__`` will now call | |
244 | ``close()``. Methods like ``writable()`` and ``fileno()`` are implemented. |
|
263 | ``close()``. Methods like ``writable()`` and ``fileno()`` are implemented. | |
245 | * ``ZstdDecompressionWriter`` no longer must be used as a context manager. |
|
264 | * ``ZstdDecompressionWriter`` no longer must be used as a context manager. | |
246 | * ``ZstdCompressionWriter`` no longer must be used as a context manager. |
|
265 | * ``ZstdCompressionWriter`` no longer must be used as a context manager. | |
247 | When not using as a context manager, it is important to call |
|
266 | When not using as a context manager, it is important to call | |
248 | ``flush(FRAME_FRAME)`` or the compression stream won't be properly |
|
267 | ``flush(FRAME_FRAME)`` or the compression stream won't be properly | |
249 | terminated and decoders may complain about malformed input. |
|
268 | terminated and decoders may complain about malformed input. | |
250 | * ``ZstdCompressionWriter.flush()`` (what is returned from |
|
269 | * ``ZstdCompressionWriter.flush()`` (what is returned from | |
251 | ``ZstdCompressor.stream_writer()``) now accepts an argument controlling the |
|
270 | ``ZstdCompressor.stream_writer()``) now accepts an argument controlling the | |
252 | flush behavior. Its value can be one of the new constants |
|
271 | flush behavior. Its value can be one of the new constants | |
253 | ``FLUSH_BLOCK`` or ``FLUSH_FRAME``. |
|
272 | ``FLUSH_BLOCK`` or ``FLUSH_FRAME``. | |
254 | * ``ZstdDecompressionObj`` instances now have a ``flush([length=None])`` method. |
|
273 | * ``ZstdDecompressionObj`` instances now have a ``flush([length=None])`` method. | |
255 | This provides parity with standard library equivalent types. (#65) |
|
274 | This provides parity with standard library equivalent types. (#65) | |
256 | * ``CompressionParameters`` no longer redundantly store individual compression |
|
275 | * ``CompressionParameters`` no longer redundantly store individual compression | |
257 | parameters on each instance. Instead, compression parameters are stored inside |
|
276 | parameters on each instance. Instead, compression parameters are stored inside | |
258 | the underlying ``ZSTD_CCtx_params`` instance. Attributes for obtaining |
|
277 | the underlying ``ZSTD_CCtx_params`` instance. Attributes for obtaining | |
259 | parameters are now properties rather than instance variables. |
|
278 | parameters are now properties rather than instance variables. | |
260 | * Exposed the ``STRATEGY_BTULTRA2`` constant. |
|
279 | * Exposed the ``STRATEGY_BTULTRA2`` constant. | |
261 | * ``CompressionParameters`` instances now expose an ``overlap_log`` attribute. |
|
280 | * ``CompressionParameters`` instances now expose an ``overlap_log`` attribute. | |
262 | This behaves identically to the ``overlap_size_log`` attribute. |
|
281 | This behaves identically to the ``overlap_size_log`` attribute. | |
263 | * ``CompressionParameters()`` now accepts an ``overlap_log`` argument that |
|
282 | * ``CompressionParameters()`` now accepts an ``overlap_log`` argument that | |
264 | behaves identically to the ``overlap_size_log`` argument. An error will be |
|
283 | behaves identically to the ``overlap_size_log`` argument. An error will be | |
265 | raised if both arguments are specified. |
|
284 | raised if both arguments are specified. | |
266 | * ``CompressionParameters`` instances now expose an ``ldm_hash_rate_log`` |
|
285 | * ``CompressionParameters`` instances now expose an ``ldm_hash_rate_log`` | |
267 | attribute. This behaves identically to the ``ldm_hash_every_log`` attribute. |
|
286 | attribute. This behaves identically to the ``ldm_hash_every_log`` attribute. | |
268 | * ``CompressionParameters()`` now accepts a ``ldm_hash_rate_log`` argument that |
|
287 | * ``CompressionParameters()`` now accepts a ``ldm_hash_rate_log`` argument that | |
269 | behaves identically to the ``ldm_hash_every_log`` argument. An error will be |
|
288 | behaves identically to the ``ldm_hash_every_log`` argument. An error will be | |
270 | raised if both arguments are specified. |
|
289 | raised if both arguments are specified. | |
271 | * ``CompressionParameters()`` now accepts a ``strategy`` argument that behaves |
|
290 | * ``CompressionParameters()`` now accepts a ``strategy`` argument that behaves | |
272 | identically to the ``compression_strategy`` argument. An error will be raised |
|
291 | identically to the ``compression_strategy`` argument. An error will be raised | |
273 | if both arguments are specified. |
|
292 | if both arguments are specified. | |
274 | * The ``MINMATCH_MIN`` and ``MINMATCH_MAX`` constants were added. They are |
|
293 | * The ``MINMATCH_MIN`` and ``MINMATCH_MAX`` constants were added. They are | |
275 | semantically equivalent to the old ``SEARCHLENGTH_MIN`` and |
|
294 | semantically equivalent to the old ``SEARCHLENGTH_MIN`` and | |
276 | ``SEARCHLENGTH_MAX`` constants. |
|
295 | ``SEARCHLENGTH_MAX`` constants. | |
277 | * Bundled zstandard library upgraded from 1.3.7 to 1.3.8. |
|
296 | * Bundled zstandard library upgraded from 1.3.7 to 1.3.8. | |
278 | * ``setup.py`` denotes support for Python 3.7 (Python 3.7 was supported and |
|
297 | * ``setup.py`` denotes support for Python 3.7 (Python 3.7 was supported and | |
279 | tested in the 0.10 release). |
|
298 | tested in the 0.10 release). | |
280 | * ``zstd_cffi`` module has been renamed to ``zstandard.cffi``. |
|
299 | * ``zstd_cffi`` module has been renamed to ``zstandard.cffi``. | |
281 | * ``ZstdCompressor.stream_writer()`` now reuses a buffer in order to avoid |
|
300 | * ``ZstdCompressor.stream_writer()`` now reuses a buffer in order to avoid | |
282 | allocating a new buffer for every operation. This should result in faster |
|
301 | allocating a new buffer for every operation. This should result in faster | |
283 | performance in cases where ``write()`` or ``flush()`` are being called |
|
302 | performance in cases where ``write()`` or ``flush()`` are being called | |
284 | frequently. (#62) |
|
303 | frequently. (#62) | |
285 | * Bundled zstandard library upgraded from 1.3.6 to 1.3.7. |
|
304 | * Bundled zstandard library upgraded from 1.3.6 to 1.3.7. | |
286 |
|
305 | |||
287 | 0.10.2 (released 2018-11-03) |
|
306 | 0.10.2 (released 2018-11-03) | |
288 | ============================ |
|
307 | ============================ | |
289 |
|
308 | |||
290 | Bug Fixes |
|
309 | Bug Fixes | |
291 | --------- |
|
310 | --------- | |
292 |
|
311 | |||
293 | * ``zstd_cffi.py`` added to ``setup.py`` (#60). |
|
312 | * ``zstd_cffi.py`` added to ``setup.py`` (#60). | |
294 |
|
313 | |||
295 | Changes |
|
314 | Changes | |
296 | ------- |
|
315 | ------- | |
297 |
|
316 | |||
298 | * Change some integer casts to avoid ``ssize_t`` (#61). |
|
317 | * Change some integer casts to avoid ``ssize_t`` (#61). | |
299 |
|
318 | |||
300 | 0.10.1 (released 2018-10-08) |
|
319 | 0.10.1 (released 2018-10-08) | |
301 | ============================ |
|
320 | ============================ | |
302 |
|
321 | |||
303 | Backwards Compatibility Notes |
|
322 | Backwards Compatibility Notes | |
304 | ----------------------------- |
|
323 | ----------------------------- | |
305 |
|
324 | |||
306 | * ``ZstdCompressor.stream_reader().closed`` is now a property instead of a |
|
325 | * ``ZstdCompressor.stream_reader().closed`` is now a property instead of a | |
307 | method (#58). |
|
326 | method (#58). | |
308 | * ``ZstdDecompressor.stream_reader().closed`` is now a property instead of a |
|
327 | * ``ZstdDecompressor.stream_reader().closed`` is now a property instead of a | |
309 | method (#58). |
|
328 | method (#58). | |
310 |
|
329 | |||
311 | Changes |
|
330 | Changes | |
312 | ------- |
|
331 | ------- | |
313 |
|
332 | |||
314 | * Stop attempting to package Python 3.6 for Miniconda. The latest version of |
|
333 | * Stop attempting to package Python 3.6 for Miniconda. The latest version of | |
315 | Miniconda is using Python 3.7. The Python 3.6 Miniconda packages were a lie |
|
334 | Miniconda is using Python 3.7. The Python 3.6 Miniconda packages were a lie | |
316 | since this were built against Python 3.7. |
|
335 | since this were built against Python 3.7. | |
317 | * ``ZstdCompressor.stream_reader()``'s and ``ZstdDecompressor.stream_reader()``'s |
|
336 | * ``ZstdCompressor.stream_reader()``'s and ``ZstdDecompressor.stream_reader()``'s | |
318 | ``closed`` attribute is now a read-only property instead of a method. This now |
|
337 | ``closed`` attribute is now a read-only property instead of a method. This now | |
319 | properly matches the ``IOBase`` API and allows instances to be used in more |
|
338 | properly matches the ``IOBase`` API and allows instances to be used in more | |
320 | places that accept ``IOBase`` instances. |
|
339 | places that accept ``IOBase`` instances. | |
321 |
|
340 | |||
322 | 0.10.0 (released 2018-10-08) |
|
341 | 0.10.0 (released 2018-10-08) | |
323 | ============================ |
|
342 | ============================ | |
324 |
|
343 | |||
325 | Backwards Compatibility Notes |
|
344 | Backwards Compatibility Notes | |
326 | ----------------------------- |
|
345 | ----------------------------- | |
327 |
|
346 | |||
328 | * ``ZstdDecompressor.stream_reader().read()`` now consistently requires an |
|
347 | * ``ZstdDecompressor.stream_reader().read()`` now consistently requires an | |
329 | argument in both the C and CFFI backends. Before, the CFFI implementation |
|
348 | argument in both the C and CFFI backends. Before, the CFFI implementation | |
330 | would assume a default value of ``-1``, which was later rejected. |
|
349 | would assume a default value of ``-1``, which was later rejected. | |
331 | * The ``compress_literals`` argument and attribute has been removed from |
|
350 | * The ``compress_literals`` argument and attribute has been removed from | |
332 | ``zstd.ZstdCompressionParameters`` because it was removed by the zstd 1.3.5 |
|
351 | ``zstd.ZstdCompressionParameters`` because it was removed by the zstd 1.3.5 | |
333 | API. |
|
352 | API. | |
334 | * ``ZSTD_CCtx_setParametersUsingCCtxParams()`` is no longer called on every |
|
353 | * ``ZSTD_CCtx_setParametersUsingCCtxParams()`` is no longer called on every | |
335 | operation performed against ``ZstdCompressor`` instances. The reason for this |
|
354 | operation performed against ``ZstdCompressor`` instances. The reason for this | |
336 | change is that the zstd 1.3.5 API no longer allows this without calling |
|
355 | change is that the zstd 1.3.5 API no longer allows this without calling | |
337 | ``ZSTD_CCtx_resetParameters()`` first. But if we called |
|
356 | ``ZSTD_CCtx_resetParameters()`` first. But if we called | |
338 | ``ZSTD_CCtx_resetParameters()`` on every operation, we'd have to redo |
|
357 | ``ZSTD_CCtx_resetParameters()`` on every operation, we'd have to redo | |
339 | potentially expensive setup when using dictionaries. We now call |
|
358 | potentially expensive setup when using dictionaries. We now call | |
340 | ``ZSTD_CCtx_reset()`` on every operation and don't attempt to change |
|
359 | ``ZSTD_CCtx_reset()`` on every operation and don't attempt to change | |
341 | compression parameters. |
|
360 | compression parameters. | |
342 | * Objects returned by ``ZstdCompressor.stream_reader()`` no longer need to be |
|
361 | * Objects returned by ``ZstdCompressor.stream_reader()`` no longer need to be | |
343 | used as a context manager. The context manager interface still exists and its |
|
362 | used as a context manager. The context manager interface still exists and its | |
344 | behavior is unchanged. |
|
363 | behavior is unchanged. | |
345 | * Objects returned by ``ZstdDecompressor.stream_reader()`` no longer need to be |
|
364 | * Objects returned by ``ZstdDecompressor.stream_reader()`` no longer need to be | |
346 | used as a context manager. The context manager interface still exists and its |
|
365 | used as a context manager. The context manager interface still exists and its | |
347 | behavior is unchanged. |
|
366 | behavior is unchanged. | |
348 |
|
367 | |||
349 | Bug Fixes |
|
368 | Bug Fixes | |
350 | --------- |
|
369 | --------- | |
351 |
|
370 | |||
352 | * ``ZstdDecompressor.decompressobj().decompress()`` should now return all data |
|
371 | * ``ZstdDecompressor.decompressobj().decompress()`` should now return all data | |
353 | from internal buffers in more scenarios. Before, it was possible for data to |
|
372 | from internal buffers in more scenarios. Before, it was possible for data to | |
354 | remain in internal buffers. This data would be emitted on a subsequent call |
|
373 | remain in internal buffers. This data would be emitted on a subsequent call | |
355 | to ``decompress()``. The overall output stream would still be valid. But if |
|
374 | to ``decompress()``. The overall output stream would still be valid. But if | |
356 | callers were expecting input data to exactly map to output data (say the |
|
375 | callers were expecting input data to exactly map to output data (say the | |
357 | producer had used ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` and was attempting to |
|
376 | producer had used ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` and was attempting to | |
358 | map input chunks to output chunks), then the previous behavior would be |
|
377 | map input chunks to output chunks), then the previous behavior would be | |
359 | wrong. The new behavior is such that output from |
|
378 | wrong. The new behavior is such that output from | |
360 | ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` fed into ``decompressobj().decompress()`` |
|
379 | ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` fed into ``decompressobj().decompress()`` | |
361 | should produce all available compressed input. |
|
380 | should produce all available compressed input. | |
362 | * ``ZstdDecompressor.stream_reader().read()`` should no longer segfault after |
|
381 | * ``ZstdDecompressor.stream_reader().read()`` should no longer segfault after | |
363 | a previous context manager resulted in error (#56). |
|
382 | a previous context manager resulted in error (#56). | |
364 | * ``ZstdCompressor.compressobj().flush(COMPRESSOBJ_FLUSH_BLOCK)`` now returns |
|
383 | * ``ZstdCompressor.compressobj().flush(COMPRESSOBJ_FLUSH_BLOCK)`` now returns | |
365 | all data necessary to flush a block. Before, it was possible for the |
|
384 | all data necessary to flush a block. Before, it was possible for the | |
366 | ``flush()`` to not emit all data necessary to fully represent a block. This |
|
385 | ``flush()`` to not emit all data necessary to fully represent a block. This | |
367 | would mean decompressors wouldn't be able to decompress all data that had been |
|
386 | would mean decompressors wouldn't be able to decompress all data that had been | |
368 | fed into the compressor and ``flush()``ed. (#55). |
|
387 | fed into the compressor and ``flush()``ed. (#55). | |
369 |
|
388 | |||
370 | New Features |
|
389 | New Features | |
371 | ------------ |
|
390 | ------------ | |
372 |
|
391 | |||
373 | * New module constants ``BLOCKSIZELOG_MAX``, ``BLOCKSIZE_MAX``, |
|
392 | * New module constants ``BLOCKSIZELOG_MAX``, ``BLOCKSIZE_MAX``, | |
374 | ``TARGETLENGTH_MAX`` that expose constants from libzstd. |
|
393 | ``TARGETLENGTH_MAX`` that expose constants from libzstd. | |
375 | * New ``ZstdCompressor.chunker()`` API for manually feeding data into a |
|
394 | * New ``ZstdCompressor.chunker()`` API for manually feeding data into a | |
376 | compressor and emitting chunks of a fixed size. Like ``compressobj()``, the |
|
395 | compressor and emitting chunks of a fixed size. Like ``compressobj()``, the | |
377 | API doesn't impose restrictions on the input or output types for the |
|
396 | API doesn't impose restrictions on the input or output types for the | |
378 | data streams. Unlike ``compressobj()``, it ensures output chunks are of a |
|
397 | data streams. Unlike ``compressobj()``, it ensures output chunks are of a | |
379 | fixed size. This makes this API useful when the compressed output is being |
|
398 | fixed size. This makes this API useful when the compressed output is being | |
380 | fed into an I/O layer, where uniform write sizes are useful. |
|
399 | fed into an I/O layer, where uniform write sizes are useful. | |
381 | * ``ZstdCompressor.stream_reader()`` no longer needs to be used as a context |
|
400 | * ``ZstdCompressor.stream_reader()`` no longer needs to be used as a context | |
382 | manager (#34). |
|
401 | manager (#34). | |
383 | * ``ZstdDecompressor.stream_reader()`` no longer needs to be used as a context |
|
402 | * ``ZstdDecompressor.stream_reader()`` no longer needs to be used as a context | |
384 | manager (#34). |
|
403 | manager (#34). | |
385 | * Bundled zstandard library upgraded from 1.3.4 to 1.3.6. |
|
404 | * Bundled zstandard library upgraded from 1.3.4 to 1.3.6. | |
386 |
|
405 | |||
387 | Changes |
|
406 | Changes | |
388 | ------- |
|
407 | ------- | |
389 |
|
408 | |||
390 | * Added ``zstd_cffi.py`` and ``NEWS.rst`` to ``MANIFEST.in``. |
|
409 | * Added ``zstd_cffi.py`` and ``NEWS.rst`` to ``MANIFEST.in``. | |
391 | * ``zstandard.__version__`` is now defined (#50). |
|
410 | * ``zstandard.__version__`` is now defined (#50). | |
392 | * Upgrade pip, setuptools, wheel, and cibuildwheel packages to latest versions. |
|
411 | * Upgrade pip, setuptools, wheel, and cibuildwheel packages to latest versions. | |
393 | * Upgrade various packages used in CI to latest versions. Notably tox (in |
|
412 | * Upgrade various packages used in CI to latest versions. Notably tox (in | |
394 | order to support Python 3.7). |
|
413 | order to support Python 3.7). | |
395 | * Use relative paths in setup.py to appease Python 3.7 (#51). |
|
414 | * Use relative paths in setup.py to appease Python 3.7 (#51). | |
396 | * Added CI for Python 3.7. |
|
415 | * Added CI for Python 3.7. | |
397 |
|
416 | |||
398 | 0.9.1 (released 2018-06-04) |
|
417 | 0.9.1 (released 2018-06-04) | |
399 | =========================== |
|
418 | =========================== | |
400 |
|
419 | |||
401 | * Debian packaging support. |
|
420 | * Debian packaging support. | |
402 | * Fix typo in setup.py (#44). |
|
421 | * Fix typo in setup.py (#44). | |
403 | * Support building with mingw compiler (#46). |
|
422 | * Support building with mingw compiler (#46). | |
404 |
|
423 | |||
405 | 0.9.0 (released 2018-04-08) |
|
424 | 0.9.0 (released 2018-04-08) | |
406 | =========================== |
|
425 | =========================== | |
407 |
|
426 | |||
408 | Backwards Compatibility Notes |
|
427 | Backwards Compatibility Notes | |
409 | ----------------------------- |
|
428 | ----------------------------- | |
410 |
|
429 | |||
411 | * CFFI 1.11 or newer is now required (previous requirement was 1.8). |
|
430 | * CFFI 1.11 or newer is now required (previous requirement was 1.8). | |
412 | * The primary module is now ``zstandard``. Please change imports of ``zstd`` |
|
431 | * The primary module is now ``zstandard``. Please change imports of ``zstd`` | |
413 | and ``zstd_cffi`` to ``import zstandard``. See the README for more. Support |
|
432 | and ``zstd_cffi`` to ``import zstandard``. See the README for more. Support | |
414 | for importing the old names will be dropped in the next release. |
|
433 | for importing the old names will be dropped in the next release. | |
415 | * ``ZstdCompressor.read_from()`` and ``ZstdDecompressor.read_from()`` have |
|
434 | * ``ZstdCompressor.read_from()`` and ``ZstdDecompressor.read_from()`` have | |
416 | been renamed to ``read_to_iter()``. ``read_from()`` is aliased to the new |
|
435 | been renamed to ``read_to_iter()``. ``read_from()`` is aliased to the new | |
417 | name and will be deleted in a future release. |
|
436 | name and will be deleted in a future release. | |
418 | * Support for Python 2.6 has been removed. |
|
437 | * Support for Python 2.6 has been removed. | |
419 | * Support for Python 3.3 has been removed. |
|
438 | * Support for Python 3.3 has been removed. | |
420 | * The ``selectivity`` argument to ``train_dictionary()`` has been removed, as |
|
439 | * The ``selectivity`` argument to ``train_dictionary()`` has been removed, as | |
421 | the feature disappeared from zstd 1.3. |
|
440 | the feature disappeared from zstd 1.3. | |
422 | * Support for legacy dictionaries has been removed. Cover dictionaries are now |
|
441 | * Support for legacy dictionaries has been removed. Cover dictionaries are now | |
423 | the default. ``train_cover_dictionary()`` has effectively been renamed to |
|
442 | the default. ``train_cover_dictionary()`` has effectively been renamed to | |
424 | ``train_dictionary()``. |
|
443 | ``train_dictionary()``. | |
425 | * The ``allow_empty`` argument from ``ZstdCompressor.compress()`` has been |
|
444 | * The ``allow_empty`` argument from ``ZstdCompressor.compress()`` has been | |
426 | deleted and the method now allows empty inputs to be compressed by default. |
|
445 | deleted and the method now allows empty inputs to be compressed by default. | |
427 | * ``estimate_compression_context_size()`` has been removed. Use |
|
446 | * ``estimate_compression_context_size()`` has been removed. Use | |
428 | ``CompressionParameters.estimated_compression_context_size()`` instead. |
|
447 | ``CompressionParameters.estimated_compression_context_size()`` instead. | |
429 | * ``get_compression_parameters()`` has been removed. Use |
|
448 | * ``get_compression_parameters()`` has been removed. Use | |
430 | ``CompressionParameters.from_level()`` instead. |
|
449 | ``CompressionParameters.from_level()`` instead. | |
431 | * The arguments to ``CompressionParameters.__init__()`` have changed. If you |
|
450 | * The arguments to ``CompressionParameters.__init__()`` have changed. If you | |
432 | were using positional arguments before, the positions now map to different |
|
451 | were using positional arguments before, the positions now map to different | |
433 | arguments. It is recommended to use keyword arguments to construct |
|
452 | arguments. It is recommended to use keyword arguments to construct | |
434 | ``CompressionParameters`` instances. |
|
453 | ``CompressionParameters`` instances. | |
435 | * ``TARGETLENGTH_MAX`` constant has been removed (it disappeared from zstandard |
|
454 | * ``TARGETLENGTH_MAX`` constant has been removed (it disappeared from zstandard | |
436 | 1.3.4). |
|
455 | 1.3.4). | |
437 | * ``ZstdCompressor.write_to()`` and ``ZstdDecompressor.write_to()`` have been |
|
456 | * ``ZstdCompressor.write_to()`` and ``ZstdDecompressor.write_to()`` have been | |
438 | renamed to ``ZstdCompressor.stream_writer()`` and |
|
457 | renamed to ``ZstdCompressor.stream_writer()`` and | |
439 | ``ZstdDecompressor.stream_writer()``, respectively. The old names are still |
|
458 | ``ZstdDecompressor.stream_writer()``, respectively. The old names are still | |
440 | aliased, but will be removed in the next major release. |
|
459 | aliased, but will be removed in the next major release. | |
441 | * Content sizes are written into frame headers by default |
|
460 | * Content sizes are written into frame headers by default | |
442 | (``ZstdCompressor(write_content_size=True)`` is now the default). |
|
461 | (``ZstdCompressor(write_content_size=True)`` is now the default). | |
443 | * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters`` |
|
462 | * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters`` | |
444 | for consistency with other types. The old name is an alias and will be removed |
|
463 | for consistency with other types. The old name is an alias and will be removed | |
445 | in the next major release. |
|
464 | in the next major release. | |
446 |
|
465 | |||
447 | Bug Fixes |
|
466 | Bug Fixes | |
448 | --------- |
|
467 | --------- | |
449 |
|
468 | |||
450 | * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40) (from 0.8.2). |
|
469 | * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40) (from 0.8.2). | |
451 | * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35) (from 0.8.2). |
|
470 | * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35) (from 0.8.2). | |
452 | * Fixed memory leak of ``ZSTD_DDict`` instances in CFFI's ``ZstdDecompressor``. |
|
471 | * Fixed memory leak of ``ZSTD_DDict`` instances in CFFI's ``ZstdDecompressor``. | |
453 |
|
472 | |||
454 | New Features |
|
473 | New Features | |
455 | ------------ |
|
474 | ------------ | |
456 |
|
475 | |||
457 | * Bundled zstandard library upgraded from 1.1.3 to 1.3.4. This delivers various |
|
476 | * Bundled zstandard library upgraded from 1.1.3 to 1.3.4. This delivers various | |
458 | bug fixes and performance improvements. It also gives us access to newer |
|
477 | bug fixes and performance improvements. It also gives us access to newer | |
459 | features. |
|
478 | features. | |
460 | * Support for negative compression levels. |
|
479 | * Support for negative compression levels. | |
461 | * Support for *long distance matching* (facilitates compression ratios that approach |
|
480 | * Support for *long distance matching* (facilitates compression ratios that approach | |
462 | LZMA). |
|
481 | LZMA). | |
463 | * Supporting for reading empty zstandard frames (with an embedded content size |
|
482 | * Supporting for reading empty zstandard frames (with an embedded content size | |
464 | of 0). |
|
483 | of 0). | |
465 | * Support for writing and partial support for reading zstandard frames without a |
|
484 | * Support for writing and partial support for reading zstandard frames without a | |
466 | magic header. |
|
485 | magic header. | |
467 | * New ``stream_reader()`` API that exposes the ``io.RawIOBase`` interface (allows |
|
486 | * New ``stream_reader()`` API that exposes the ``io.RawIOBase`` interface (allows | |
468 | you to ``.read()`` from a file-like object). |
|
487 | you to ``.read()`` from a file-like object). | |
469 | * Several minor features, bug fixes, and performance enhancements. |
|
488 | * Several minor features, bug fixes, and performance enhancements. | |
470 | * Wheels for Linux and macOS are now provided with releases. |
|
489 | * Wheels for Linux and macOS are now provided with releases. | |
471 |
|
490 | |||
472 | Changes |
|
491 | Changes | |
473 | ------- |
|
492 | ------- | |
474 |
|
493 | |||
475 | * Functions accepting bytes data now use the buffer protocol and can accept |
|
494 | * Functions accepting bytes data now use the buffer protocol and can accept | |
476 | more types (like ``memoryview`` and ``bytearray``) (#26). |
|
495 | more types (like ``memoryview`` and ``bytearray``) (#26). | |
477 | * Add #includes so compilation on OS X and BSDs works (#20). |
|
496 | * Add #includes so compilation on OS X and BSDs works (#20). | |
478 | * New ``ZstdDecompressor.stream_reader()`` API to obtain a read-only i/o stream |
|
497 | * New ``ZstdDecompressor.stream_reader()`` API to obtain a read-only i/o stream | |
479 | of decompressed data for a source. |
|
498 | of decompressed data for a source. | |
480 | * New ``ZstdCompressor.stream_reader()`` API to obtain a read-only i/o stream of |
|
499 | * New ``ZstdCompressor.stream_reader()`` API to obtain a read-only i/o stream of | |
481 | compressed data for a source. |
|
500 | compressed data for a source. | |
482 | * Renamed ``ZstdDecompressor.read_from()`` to ``ZstdDecompressor.read_to_iter()``. |
|
501 | * Renamed ``ZstdDecompressor.read_from()`` to ``ZstdDecompressor.read_to_iter()``. | |
483 | The old name is still available. |
|
502 | The old name is still available. | |
484 | * Renamed ``ZstdCompressor.read_from()`` to ``ZstdCompressor.read_to_iter()``. |
|
503 | * Renamed ``ZstdCompressor.read_from()`` to ``ZstdCompressor.read_to_iter()``. | |
485 | ``read_from()`` is still available at its old location. |
|
504 | ``read_from()`` is still available at its old location. | |
486 | * Introduce the ``zstandard`` module to import and re-export the C or CFFI |
|
505 | * Introduce the ``zstandard`` module to import and re-export the C or CFFI | |
487 | *backend* as appropriate. Behavior can be controlled via the |
|
506 | *backend* as appropriate. Behavior can be controlled via the | |
488 | ``PYTHON_ZSTANDARD_IMPORT_POLICY`` environment variable. See README for |
|
507 | ``PYTHON_ZSTANDARD_IMPORT_POLICY`` environment variable. See README for | |
489 | usage info. |
|
508 | usage info. | |
490 | * Vendored version of zstd upgraded to 1.3.4. |
|
509 | * Vendored version of zstd upgraded to 1.3.4. | |
491 | * Added module constants ``CONTENTSIZE_UNKNOWN`` and ``CONTENTSIZE_ERROR``. |
|
510 | * Added module constants ``CONTENTSIZE_UNKNOWN`` and ``CONTENTSIZE_ERROR``. | |
492 | * Add ``STRATEGY_BTULTRA`` compression strategy constant. |
|
511 | * Add ``STRATEGY_BTULTRA`` compression strategy constant. | |
493 | * Switch from deprecated ``ZSTD_getDecompressedSize()`` to |
|
512 | * Switch from deprecated ``ZSTD_getDecompressedSize()`` to | |
494 | ``ZSTD_getFrameContentSize()`` replacement. |
|
513 | ``ZSTD_getFrameContentSize()`` replacement. | |
495 | * ``ZstdCompressor.compress()`` can now compress empty inputs without requiring |
|
514 | * ``ZstdCompressor.compress()`` can now compress empty inputs without requiring | |
496 | special handling. |
|
515 | special handling. | |
497 | * ``ZstdCompressor`` and ``ZstdDecompressor`` now have a ``memory_size()`` |
|
516 | * ``ZstdCompressor`` and ``ZstdDecompressor`` now have a ``memory_size()`` | |
498 | method for determining the current memory utilization of the underlying zstd |
|
517 | method for determining the current memory utilization of the underlying zstd | |
499 | primitive. |
|
518 | primitive. | |
500 | * ``train_dictionary()`` has new arguments and functionality for trying multiple |
|
519 | * ``train_dictionary()`` has new arguments and functionality for trying multiple | |
501 | variations of COVER parameters and selecting the best one. |
|
520 | variations of COVER parameters and selecting the best one. | |
502 | * Added module constants ``LDM_MINMATCH_MIN``, ``LDM_MINMATCH_MAX``, and |
|
521 | * Added module constants ``LDM_MINMATCH_MIN``, ``LDM_MINMATCH_MAX``, and | |
503 | ``LDM_BUCKETSIZELOG_MAX``. |
|
522 | ``LDM_BUCKETSIZELOG_MAX``. | |
504 | * Converted all consumers to the zstandard *new advanced API*, which uses |
|
523 | * Converted all consumers to the zstandard *new advanced API*, which uses | |
505 | ``ZSTD_compress_generic()`` |
|
524 | ``ZSTD_compress_generic()`` | |
506 | * ``CompressionParameters.__init__`` now accepts several more arguments, |
|
525 | * ``CompressionParameters.__init__`` now accepts several more arguments, | |
507 | including support for *long distance matching*. |
|
526 | including support for *long distance matching*. | |
508 | * ``ZstdCompressionDict.__init__`` now accepts a ``dict_type`` argument that |
|
527 | * ``ZstdCompressionDict.__init__`` now accepts a ``dict_type`` argument that | |
509 | controls how the dictionary should be interpreted. This can be used to |
|
528 | controls how the dictionary should be interpreted. This can be used to | |
510 | force the use of *content-only* dictionaries or to require the presence |
|
529 | force the use of *content-only* dictionaries or to require the presence | |
511 | of the dictionary magic header. |
|
530 | of the dictionary magic header. | |
512 | * ``ZstdCompressionDict.precompute_compress()`` can be used to precompute the |
|
531 | * ``ZstdCompressionDict.precompute_compress()`` can be used to precompute the | |
513 | compression dictionary so it can efficiently be used with multiple |
|
532 | compression dictionary so it can efficiently be used with multiple | |
514 | ``ZstdCompressor`` instances. |
|
533 | ``ZstdCompressor`` instances. | |
515 | * Digested dictionaries are now stored in ``ZstdCompressionDict`` instances, |
|
534 | * Digested dictionaries are now stored in ``ZstdCompressionDict`` instances, | |
516 | created automatically on first use, and automatically reused by all |
|
535 | created automatically on first use, and automatically reused by all | |
517 | ``ZstdDecompressor`` instances bound to that dictionary. |
|
536 | ``ZstdDecompressor`` instances bound to that dictionary. | |
518 | * All meaningful functions now accept keyword arguments. |
|
537 | * All meaningful functions now accept keyword arguments. | |
519 | * ``ZstdDecompressor.decompressobj()`` now accepts a ``write_size`` argument |
|
538 | * ``ZstdDecompressor.decompressobj()`` now accepts a ``write_size`` argument | |
520 | to control how much work to perform on every decompressor invocation. |
|
539 | to control how much work to perform on every decompressor invocation. | |
521 | * ``ZstdCompressor.write_to()`` now exposes a ``tell()``, which exposes the |
|
540 | * ``ZstdCompressor.write_to()`` now exposes a ``tell()``, which exposes the | |
522 | total number of bytes written so far. |
|
541 | total number of bytes written so far. | |
523 | * ``ZstdDecompressor.stream_reader()`` now supports ``seek()`` when moving |
|
542 | * ``ZstdDecompressor.stream_reader()`` now supports ``seek()`` when moving | |
524 | forward in the stream. |
|
543 | forward in the stream. | |
525 | * Removed ``TARGETLENGTH_MAX`` constant. |
|
544 | * Removed ``TARGETLENGTH_MAX`` constant. | |
526 | * Added ``frame_header_size(data)`` function. |
|
545 | * Added ``frame_header_size(data)`` function. | |
527 | * Added ``frame_content_size(data)`` function. |
|
546 | * Added ``frame_content_size(data)`` function. | |
528 | * Consumers of ``ZSTD_decompress*`` have been switched to the new *advanced |
|
547 | * Consumers of ``ZSTD_decompress*`` have been switched to the new *advanced | |
529 | decompression* API. |
|
548 | decompression* API. | |
530 | * ``ZstdCompressor`` and ``ZstdCompressionParams`` can now be constructed with |
|
549 | * ``ZstdCompressor`` and ``ZstdCompressionParams`` can now be constructed with | |
531 | negative compression levels. |
|
550 | negative compression levels. | |
532 | * ``ZstdDecompressor`` now accepts a ``max_window_size`` argument to limit the |
|
551 | * ``ZstdDecompressor`` now accepts a ``max_window_size`` argument to limit the | |
533 | amount of memory required for decompression operations. |
|
552 | amount of memory required for decompression operations. | |
534 | * ``FORMAT_ZSTD1`` and ``FORMAT_ZSTD1_MAGICLESS`` constants to be used with |
|
553 | * ``FORMAT_ZSTD1`` and ``FORMAT_ZSTD1_MAGICLESS`` constants to be used with | |
535 | the ``format`` compression parameter to control whether the frame magic |
|
554 | the ``format`` compression parameter to control whether the frame magic | |
536 | header is written. |
|
555 | header is written. | |
537 | * ``ZstdDecompressor`` now accepts a ``format`` argument to control the |
|
556 | * ``ZstdDecompressor`` now accepts a ``format`` argument to control the | |
538 | expected frame format. |
|
557 | expected frame format. | |
539 | * ``ZstdCompressor`` now has a ``frame_progression()`` method to return |
|
558 | * ``ZstdCompressor`` now has a ``frame_progression()`` method to return | |
540 | information about the current compression operation. |
|
559 | information about the current compression operation. | |
541 | * Error messages in CFFI no longer have ``b''`` literals. |
|
560 | * Error messages in CFFI no longer have ``b''`` literals. | |
542 | * Compiler warnings and underlying overflow issues on 32-bit platforms have been |
|
561 | * Compiler warnings and underlying overflow issues on 32-bit platforms have been | |
543 | fixed. |
|
562 | fixed. | |
544 | * Builds in CI now build with compiler warnings as errors. This should hopefully |
|
563 | * Builds in CI now build with compiler warnings as errors. This should hopefully | |
545 | fix new compiler warnings from being introduced. |
|
564 | fix new compiler warnings from being introduced. | |
546 | * Make ``ZstdCompressor(write_content_size=True)`` and |
|
565 | * Make ``ZstdCompressor(write_content_size=True)`` and | |
547 | ``CompressionParameters(write_content_size=True)`` the default. |
|
566 | ``CompressionParameters(write_content_size=True)`` the default. | |
548 | * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters``. |
|
567 | * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters``. | |
549 |
|
568 | |||
550 | 0.8.2 (released 2018-02-22) |
|
569 | 0.8.2 (released 2018-02-22) | |
551 | --------------------------- |
|
570 | --------------------------- | |
552 |
|
571 | |||
553 | * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40). |
|
572 | * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40). | |
554 | * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35). |
|
573 | * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35). | |
555 |
|
574 | |||
556 | 0.8.1 (released 2017-04-08) |
|
575 | 0.8.1 (released 2017-04-08) | |
557 | --------------------------- |
|
576 | --------------------------- | |
558 |
|
577 | |||
559 | * Add #includes so compilation on OS X and BSDs works (#20). |
|
578 | * Add #includes so compilation on OS X and BSDs works (#20). | |
560 |
|
579 | |||
561 | 0.8.0 (released 2017-03-08) |
|
580 | 0.8.0 (released 2017-03-08) | |
562 | =========================== |
|
581 | =========================== | |
563 |
|
582 | |||
564 | * CompressionParameters now has a estimated_compression_context_size() method. |
|
583 | * CompressionParameters now has a estimated_compression_context_size() method. | |
565 | zstd.estimate_compression_context_size() is now deprecated and slated for |
|
584 | zstd.estimate_compression_context_size() is now deprecated and slated for | |
566 | removal. |
|
585 | removal. | |
567 | * Implemented a lot of fuzzing tests. |
|
586 | * Implemented a lot of fuzzing tests. | |
568 | * CompressionParameters instances now perform extra validation by calling |
|
587 | * CompressionParameters instances now perform extra validation by calling | |
569 | ZSTD_checkCParams() at construction time. |
|
588 | ZSTD_checkCParams() at construction time. | |
570 | * multi_compress_to_buffer() API for compressing multiple inputs as a |
|
589 | * multi_compress_to_buffer() API for compressing multiple inputs as a | |
571 | single operation, as efficiently as possible. |
|
590 | single operation, as efficiently as possible. | |
572 | * ZSTD_CStream instances are now used across multiple operations on |
|
591 | * ZSTD_CStream instances are now used across multiple operations on | |
573 | ZstdCompressor instances, resulting in much better performance for |
|
592 | ZstdCompressor instances, resulting in much better performance for | |
574 | APIs that do streaming. |
|
593 | APIs that do streaming. | |
575 | * ZSTD_DStream instances are now used across multiple operations on |
|
594 | * ZSTD_DStream instances are now used across multiple operations on | |
576 | ZstdDecompressor instances, resulting in much better performance for |
|
595 | ZstdDecompressor instances, resulting in much better performance for | |
577 | APIs that do streaming. |
|
596 | APIs that do streaming. | |
578 | * train_dictionary() now releases the GIL. |
|
597 | * train_dictionary() now releases the GIL. | |
579 | * Support for training dictionaries using the COVER algorithm. |
|
598 | * Support for training dictionaries using the COVER algorithm. | |
580 | * multi_decompress_to_buffer() API for decompressing multiple frames as a |
|
599 | * multi_decompress_to_buffer() API for decompressing multiple frames as a | |
581 | single operation, as efficiently as possible. |
|
600 | single operation, as efficiently as possible. | |
582 | * Support for multi-threaded compression. |
|
601 | * Support for multi-threaded compression. | |
583 | * Disable deprecation warnings when compiling CFFI module. |
|
602 | * Disable deprecation warnings when compiling CFFI module. | |
584 | * Fixed memory leak in train_dictionary(). |
|
603 | * Fixed memory leak in train_dictionary(). | |
585 | * Removed DictParameters type. |
|
604 | * Removed DictParameters type. | |
586 | * train_dictionary() now accepts keyword arguments instead of a |
|
605 | * train_dictionary() now accepts keyword arguments instead of a | |
587 | DictParameters instance to control dictionary generation. |
|
606 | DictParameters instance to control dictionary generation. | |
588 |
|
607 | |||
589 | 0.7.0 (released 2017-02-07) |
|
608 | 0.7.0 (released 2017-02-07) | |
590 | =========================== |
|
609 | =========================== | |
591 |
|
610 | |||
592 | * Added zstd.get_frame_parameters() to obtain info about a zstd frame. |
|
611 | * Added zstd.get_frame_parameters() to obtain info about a zstd frame. | |
593 | * Added ZstdDecompressor.decompress_content_dict_chain() for efficient |
|
612 | * Added ZstdDecompressor.decompress_content_dict_chain() for efficient | |
594 | decompression of *content-only dictionary chains*. |
|
613 | decompression of *content-only dictionary chains*. | |
595 | * CFFI module fully implemented; all tests run against both C extension and |
|
614 | * CFFI module fully implemented; all tests run against both C extension and | |
596 | CFFI implementation. |
|
615 | CFFI implementation. | |
597 | * Vendored version of zstd updated to 1.1.3. |
|
616 | * Vendored version of zstd updated to 1.1.3. | |
598 | * Use ZstdDecompressor.decompress() now uses ZSTD_createDDict_byReference() |
|
617 | * Use ZstdDecompressor.decompress() now uses ZSTD_createDDict_byReference() | |
599 | to avoid extra memory allocation of dict data. |
|
618 | to avoid extra memory allocation of dict data. | |
600 | * Add function names to error messages (by using ":name" in PyArg_Parse* |
|
619 | * Add function names to error messages (by using ":name" in PyArg_Parse* | |
601 | functions). |
|
620 | functions). | |
602 | * Reuse decompression context across operations. Previously, we created a |
|
621 | * Reuse decompression context across operations. Previously, we created a | |
603 | new ZSTD_DCtx for each decompress(). This was measured to slow down |
|
622 | new ZSTD_DCtx for each decompress(). This was measured to slow down | |
604 | decompression by 40-200MB/s. The API guarantees say ZstdDecompressor |
|
623 | decompression by 40-200MB/s. The API guarantees say ZstdDecompressor | |
605 | is not thread safe. So we reuse the ZSTD_DCtx across operations and make |
|
624 | is not thread safe. So we reuse the ZSTD_DCtx across operations and make | |
606 | things faster in the process. |
|
625 | things faster in the process. | |
607 | * ZstdCompressor.write_to()'s compress() and flush() methods now return number |
|
626 | * ZstdCompressor.write_to()'s compress() and flush() methods now return number | |
608 | of bytes written. |
|
627 | of bytes written. | |
609 | * ZstdDecompressor.write_to()'s write() method now returns the number of bytes |
|
628 | * ZstdDecompressor.write_to()'s write() method now returns the number of bytes | |
610 | written to the underlying output object. |
|
629 | written to the underlying output object. | |
611 | * CompressionParameters instances now expose their values as attributes. |
|
630 | * CompressionParameters instances now expose their values as attributes. | |
612 | * CompressionParameters instances no longer are subscriptable nor behave |
|
631 | * CompressionParameters instances no longer are subscriptable nor behave | |
613 | as tuples (backwards incompatible). Use attributes to obtain values. |
|
632 | as tuples (backwards incompatible). Use attributes to obtain values. | |
614 | * DictParameters instances now expose their values as attributes. |
|
633 | * DictParameters instances now expose their values as attributes. | |
615 |
|
634 | |||
616 | 0.6.0 (released 2017-01-14) |
|
635 | 0.6.0 (released 2017-01-14) | |
617 | =========================== |
|
636 | =========================== | |
618 |
|
637 | |||
619 | * Support for legacy zstd protocols (build time opt in feature). |
|
638 | * Support for legacy zstd protocols (build time opt in feature). | |
620 | * Automation improvements to test against Python 3.6, latest versions |
|
639 | * Automation improvements to test against Python 3.6, latest versions | |
621 | of Tox, more deterministic AppVeyor behavior. |
|
640 | of Tox, more deterministic AppVeyor behavior. | |
622 | * CFFI "parser" improved to use a compiler preprocessor instead of rewriting |
|
641 | * CFFI "parser" improved to use a compiler preprocessor instead of rewriting | |
623 | source code manually. |
|
642 | source code manually. | |
624 | * Vendored version of zstd updated to 1.1.2. |
|
643 | * Vendored version of zstd updated to 1.1.2. | |
625 | * Documentation improvements. |
|
644 | * Documentation improvements. | |
626 | * Introduce a bench.py script for performing (crude) benchmarks. |
|
645 | * Introduce a bench.py script for performing (crude) benchmarks. | |
627 | * ZSTD_CCtx instances are now reused across multiple compress() operations. |
|
646 | * ZSTD_CCtx instances are now reused across multiple compress() operations. | |
628 | * ZstdCompressor.write_to() now has a flush() method. |
|
647 | * ZstdCompressor.write_to() now has a flush() method. | |
629 | * ZstdCompressor.compressobj()'s flush() method now accepts an argument to |
|
648 | * ZstdCompressor.compressobj()'s flush() method now accepts an argument to | |
630 | flush a block (as opposed to ending the stream). |
|
649 | flush a block (as opposed to ending the stream). | |
631 | * Disallow compress(b'') when writing content sizes by default (issue #11). |
|
650 | * Disallow compress(b'') when writing content sizes by default (issue #11). | |
632 |
|
651 | |||
633 | 0.5.2 (released 2016-11-12) |
|
652 | 0.5.2 (released 2016-11-12) | |
634 | =========================== |
|
653 | =========================== | |
635 |
|
654 | |||
636 | * more packaging fixes for source distribution |
|
655 | * more packaging fixes for source distribution | |
637 |
|
656 | |||
638 | 0.5.1 (released 2016-11-12) |
|
657 | 0.5.1 (released 2016-11-12) | |
639 | =========================== |
|
658 | =========================== | |
640 |
|
659 | |||
641 | * setup_zstd.py is included in the source distribution |
|
660 | * setup_zstd.py is included in the source distribution | |
642 |
|
661 | |||
643 | 0.5.0 (released 2016-11-10) |
|
662 | 0.5.0 (released 2016-11-10) | |
644 | =========================== |
|
663 | =========================== | |
645 |
|
664 | |||
646 | * Vendored version of zstd updated to 1.1.1. |
|
665 | * Vendored version of zstd updated to 1.1.1. | |
647 | * Continuous integration for Python 3.6 and 3.7 |
|
666 | * Continuous integration for Python 3.6 and 3.7 | |
648 | * Continuous integration for Conda |
|
667 | * Continuous integration for Conda | |
649 | * Added compression and decompression APIs providing similar interfaces |
|
668 | * Added compression and decompression APIs providing similar interfaces | |
650 | to the standard library ``zlib`` and ``bz2`` modules. This allows |
|
669 | to the standard library ``zlib`` and ``bz2`` modules. This allows | |
651 | coding to a common interface. |
|
670 | coding to a common interface. | |
652 | * ``zstd.__version__` is now defined. |
|
671 | * ``zstd.__version__` is now defined. | |
653 | * ``read_from()`` on various APIs now accepts objects implementing the buffer |
|
672 | * ``read_from()`` on various APIs now accepts objects implementing the buffer | |
654 | protocol. |
|
673 | protocol. | |
655 | * ``read_from()`` has gained a ``skip_bytes`` argument. This allows callers |
|
674 | * ``read_from()`` has gained a ``skip_bytes`` argument. This allows callers | |
656 | to pass in an existing buffer with a header without having to create a |
|
675 | to pass in an existing buffer with a header without having to create a | |
657 | slice or a new object. |
|
676 | slice or a new object. | |
658 | * Implemented ``ZstdCompressionDict.as_bytes()``. |
|
677 | * Implemented ``ZstdCompressionDict.as_bytes()``. | |
659 | * Python's memory allocator is now used instead of ``malloc()``. |
|
678 | * Python's memory allocator is now used instead of ``malloc()``. | |
660 | * Low-level zstd data structures are reused in more instances, cutting down |
|
679 | * Low-level zstd data structures are reused in more instances, cutting down | |
661 | on overhead for certain operations. |
|
680 | on overhead for certain operations. | |
662 | * ``distutils`` boilerplate for obtaining an ``Extension`` instance |
|
681 | * ``distutils`` boilerplate for obtaining an ``Extension`` instance | |
663 | has now been refactored into a standalone ``setup_zstd.py`` file. This |
|
682 | has now been refactored into a standalone ``setup_zstd.py`` file. This | |
664 | allows other projects with ``setup.py`` files to reuse the |
|
683 | allows other projects with ``setup.py`` files to reuse the | |
665 | ``distutils`` code for this project without copying code. |
|
684 | ``distutils`` code for this project without copying code. | |
666 | * The monolithic ``zstd.c`` file has been split into a header file defining |
|
685 | * The monolithic ``zstd.c`` file has been split into a header file defining | |
667 | types and separate ``.c`` source files for the implementation. |
|
686 | types and separate ``.c`` source files for the implementation. | |
668 |
|
687 | |||
669 | Older History |
|
688 | Older History | |
670 | ============= |
|
689 | ============= | |
671 |
|
690 | |||
672 | 2016-08-31 - Zstandard 1.0.0 is released and Gregory starts hacking on a |
|
691 | 2016-08-31 - Zstandard 1.0.0 is released and Gregory starts hacking on a | |
673 | Python extension for use by the Mercurial project. A very hacky prototype |
|
692 | Python extension for use by the Mercurial project. A very hacky prototype | |
674 | is sent to the mercurial-devel list for RFC. |
|
693 | is sent to the mercurial-devel list for RFC. | |
675 |
|
694 | |||
676 | 2016-09-03 - Most functionality from Zstandard C API implemented. Source |
|
695 | 2016-09-03 - Most functionality from Zstandard C API implemented. Source | |
677 | code published on https://github.com/indygreg/python-zstandard. Travis-CI |
|
696 | code published on https://github.com/indygreg/python-zstandard. Travis-CI | |
678 | automation configured. 0.0.1 release on PyPI. |
|
697 | automation configured. 0.0.1 release on PyPI. | |
679 |
|
698 | |||
680 | 2016-09-05 - After the API was rounded out a bit and support for Python |
|
699 | 2016-09-05 - After the API was rounded out a bit and support for Python | |
681 | 2.6 and 2.7 was added, version 0.1 was released to PyPI. |
|
700 | 2.6 and 2.7 was added, version 0.1 was released to PyPI. | |
682 |
|
701 | |||
683 | 2016-09-05 - After the compressor and decompressor APIs were changed, 0.2 |
|
702 | 2016-09-05 - After the compressor and decompressor APIs were changed, 0.2 | |
684 | was released to PyPI. |
|
703 | was released to PyPI. | |
685 |
|
704 | |||
686 | 2016-09-10 - 0.3 is released with a bunch of new features. ZstdCompressor |
|
705 | 2016-09-10 - 0.3 is released with a bunch of new features. ZstdCompressor | |
687 | now accepts arguments controlling frame parameters. The source size can now |
|
706 | now accepts arguments controlling frame parameters. The source size can now | |
688 | be declared when performing streaming compression. ZstdDecompressor.decompress() |
|
707 | be declared when performing streaming compression. ZstdDecompressor.decompress() | |
689 | is implemented. Compression dictionaries are now cached when using the simple |
|
708 | is implemented. Compression dictionaries are now cached when using the simple | |
690 | compression and decompression APIs. Memory size APIs added. |
|
709 | compression and decompression APIs. Memory size APIs added. | |
691 | ZstdCompressor.read_from() and ZstdDecompressor.read_from() have been |
|
710 | ZstdCompressor.read_from() and ZstdDecompressor.read_from() have been | |
692 | implemented. This rounds out the major compression/decompression APIs planned |
|
711 | implemented. This rounds out the major compression/decompression APIs planned | |
693 | by the author. |
|
712 | by the author. | |
694 |
|
713 | |||
695 | 2016-10-02 - 0.3.3 is released with a bug fix for read_from not fully |
|
714 | 2016-10-02 - 0.3.3 is released with a bug fix for read_from not fully | |
696 | decoding a zstd frame (issue #2). |
|
715 | decoding a zstd frame (issue #2). | |
697 |
|
716 | |||
698 | 2016-10-02 - 0.4.0 is released with zstd 1.1.0, support for custom read and |
|
717 | 2016-10-02 - 0.4.0 is released with zstd 1.1.0, support for custom read and | |
699 | write buffer sizes, and a few bug fixes involving failure to read/write |
|
718 | write buffer sizes, and a few bug fixes involving failure to read/write | |
700 | all data when buffer sizes were too small to hold remaining data. |
|
719 | all data when buffer sizes were too small to hold remaining data. | |
701 |
|
720 | |||
702 | 2016-11-10 - 0.5.0 is released with zstd 1.1.1 and other enhancements. |
|
721 | 2016-11-10 - 0.5.0 is released with zstd 1.1.1 and other enhancements. |
@@ -1,1602 +1,1602 b'' | |||||
1 | ================ |
|
1 | ================ | |
2 | python-zstandard |
|
2 | python-zstandard | |
3 | ================ |
|
3 | ================ | |
4 |
|
4 | |||
5 | This project provides Python bindings for interfacing with the |
|
5 | This project provides Python bindings for interfacing with the | |
6 | `Zstandard <http://www.zstd.net>`_ compression library. A C extension |
|
6 | `Zstandard <http://www.zstd.net>`_ compression library. A C extension | |
7 | and CFFI interface are provided. |
|
7 | and CFFI interface are provided. | |
8 |
|
8 | |||
9 | The primary goal of the project is to provide a rich interface to the |
|
9 | The primary goal of the project is to provide a rich interface to the | |
10 | underlying C API through a Pythonic interface while not sacrificing |
|
10 | underlying C API through a Pythonic interface while not sacrificing | |
11 | performance. This means exposing most of the features and flexibility |
|
11 | performance. This means exposing most of the features and flexibility | |
12 | of the C API while not sacrificing usability or safety that Python provides. |
|
12 | of the C API while not sacrificing usability or safety that Python provides. | |
13 |
|
13 | |||
14 | The canonical home for this project lives in a Mercurial repository run by |
|
14 | The canonical home for this project lives in a Mercurial repository run by | |
15 | the author. For convenience, that repository is frequently synchronized to |
|
15 | the author. For convenience, that repository is frequently synchronized to | |
16 | https://github.com/indygreg/python-zstandard. |
|
16 | https://github.com/indygreg/python-zstandard. | |
17 |
|
17 | |||
18 | | |ci-status| |
|
18 | | |ci-status| | |
19 |
|
19 | |||
20 | Requirements |
|
20 | Requirements | |
21 | ============ |
|
21 | ============ | |
22 |
|
22 | |||
23 |
This extension is designed to run with Python 2.7, 3. |
|
23 | This extension is designed to run with Python 2.7, 3.5, 3.6, 3.7, and 3.8 | |
24 | on common platforms (Linux, Windows, and OS X). On PyPy (both PyPy2 and PyPy3) we support version 6.0.0 and above. |
|
24 | on common platforms (Linux, Windows, and OS X). On PyPy (both PyPy2 and PyPy3) we support version 6.0.0 and above. | |
25 | x86 and x86_64 are well-tested on Windows. Only x86_64 is well-tested on Linux and macOS. |
|
25 | x86 and x86_64 are well-tested on Windows. Only x86_64 is well-tested on Linux and macOS. | |
26 |
|
26 | |||
27 | Installing |
|
27 | Installing | |
28 | ========== |
|
28 | ========== | |
29 |
|
29 | |||
30 | This package is uploaded to PyPI at https://pypi.python.org/pypi/zstandard. |
|
30 | This package is uploaded to PyPI at https://pypi.python.org/pypi/zstandard. | |
31 | So, to install this package:: |
|
31 | So, to install this package:: | |
32 |
|
32 | |||
33 | $ pip install zstandard |
|
33 | $ pip install zstandard | |
34 |
|
34 | |||
35 | Binary wheels are made available for some platforms. If you need to |
|
35 | Binary wheels are made available for some platforms. If you need to | |
36 | install from a source distribution, all you should need is a working C |
|
36 | install from a source distribution, all you should need is a working C | |
37 | compiler and the Python development headers/libraries. On many Linux |
|
37 | compiler and the Python development headers/libraries. On many Linux | |
38 | distributions, you can install a ``python-dev`` or ``python-devel`` |
|
38 | distributions, you can install a ``python-dev`` or ``python-devel`` | |
39 | package to provide these dependencies. |
|
39 | package to provide these dependencies. | |
40 |
|
40 | |||
41 | Packages are also uploaded to Anaconda Cloud at |
|
41 | Packages are also uploaded to Anaconda Cloud at | |
42 | https://anaconda.org/indygreg/zstandard. See that URL for how to install |
|
42 | https://anaconda.org/indygreg/zstandard. See that URL for how to install | |
43 | this package with ``conda``. |
|
43 | this package with ``conda``. | |
44 |
|
44 | |||
45 | Performance |
|
45 | Performance | |
46 | =========== |
|
46 | =========== | |
47 |
|
47 | |||
48 | zstandard is a highly tunable compression algorithm. In its default settings |
|
48 | zstandard is a highly tunable compression algorithm. In its default settings | |
49 | (compression level 3), it will be faster at compression and decompression and |
|
49 | (compression level 3), it will be faster at compression and decompression and | |
50 | will have better compression ratios than zlib on most data sets. When tuned |
|
50 | will have better compression ratios than zlib on most data sets. When tuned | |
51 | for speed, it approaches lz4's speed and ratios. When tuned for compression |
|
51 | for speed, it approaches lz4's speed and ratios. When tuned for compression | |
52 | ratio, it approaches lzma ratios and compression speed, but decompression |
|
52 | ratio, it approaches lzma ratios and compression speed, but decompression | |
53 | speed is much faster. See the official zstandard documentation for more. |
|
53 | speed is much faster. See the official zstandard documentation for more. | |
54 |
|
54 | |||
55 | zstandard and this library support multi-threaded compression. There is a |
|
55 | zstandard and this library support multi-threaded compression. There is a | |
56 | mechanism to compress large inputs using multiple threads. |
|
56 | mechanism to compress large inputs using multiple threads. | |
57 |
|
57 | |||
58 | The performance of this library is usually very similar to what the zstandard |
|
58 | The performance of this library is usually very similar to what the zstandard | |
59 | C API can deliver. Overhead in this library is due to general Python overhead |
|
59 | C API can deliver. Overhead in this library is due to general Python overhead | |
60 | and can't easily be avoided by *any* zstandard Python binding. This library |
|
60 | and can't easily be avoided by *any* zstandard Python binding. This library | |
61 | exposes multiple APIs for performing compression and decompression so callers |
|
61 | exposes multiple APIs for performing compression and decompression so callers | |
62 | can pick an API suitable for their need. Contrast with the compression |
|
62 | can pick an API suitable for their need. Contrast with the compression | |
63 | modules in Python's standard library (like ``zlib``), which only offer limited |
|
63 | modules in Python's standard library (like ``zlib``), which only offer limited | |
64 | mechanisms for performing operations. The API flexibility means consumers can |
|
64 | mechanisms for performing operations. The API flexibility means consumers can | |
65 | choose to use APIs that facilitate zero copying or minimize Python object |
|
65 | choose to use APIs that facilitate zero copying or minimize Python object | |
66 | creation and garbage collection overhead. |
|
66 | creation and garbage collection overhead. | |
67 |
|
67 | |||
68 | This library is capable of single-threaded throughputs well over 1 GB/s. For |
|
68 | This library is capable of single-threaded throughputs well over 1 GB/s. For | |
69 | exact numbers, measure yourself. The source code repository has a ``bench.py`` |
|
69 | exact numbers, measure yourself. The source code repository has a ``bench.py`` | |
70 | script that can be used to measure things. |
|
70 | script that can be used to measure things. | |
71 |
|
71 | |||
72 | API |
|
72 | API | |
73 | === |
|
73 | === | |
74 |
|
74 | |||
75 | To interface with Zstandard, simply import the ``zstandard`` module:: |
|
75 | To interface with Zstandard, simply import the ``zstandard`` module:: | |
76 |
|
76 | |||
77 | import zstandard |
|
77 | import zstandard | |
78 |
|
78 | |||
79 | It is a popular convention to alias the module as a different name for |
|
79 | It is a popular convention to alias the module as a different name for | |
80 | brevity:: |
|
80 | brevity:: | |
81 |
|
81 | |||
82 | import zstandard as zstd |
|
82 | import zstandard as zstd | |
83 |
|
83 | |||
84 | This module attempts to import and use either the C extension or CFFI |
|
84 | This module attempts to import and use either the C extension or CFFI | |
85 | implementation. On Python platforms known to support C extensions (like |
|
85 | implementation. On Python platforms known to support C extensions (like | |
86 | CPython), it raises an ImportError if the C extension cannot be imported. |
|
86 | CPython), it raises an ImportError if the C extension cannot be imported. | |
87 | On Python platforms known to not support C extensions (like PyPy), it only |
|
87 | On Python platforms known to not support C extensions (like PyPy), it only | |
88 | attempts to import the CFFI implementation and raises ImportError if that |
|
88 | attempts to import the CFFI implementation and raises ImportError if that | |
89 | can't be done. On other platforms, it first tries to import the C extension |
|
89 | can't be done. On other platforms, it first tries to import the C extension | |
90 | then falls back to CFFI if that fails and raises ImportError if CFFI fails. |
|
90 | then falls back to CFFI if that fails and raises ImportError if CFFI fails. | |
91 |
|
91 | |||
92 | To change the module import behavior, a ``PYTHON_ZSTANDARD_IMPORT_POLICY`` |
|
92 | To change the module import behavior, a ``PYTHON_ZSTANDARD_IMPORT_POLICY`` | |
93 | environment variable can be set. The following values are accepted: |
|
93 | environment variable can be set. The following values are accepted: | |
94 |
|
94 | |||
95 | default |
|
95 | default | |
96 | The behavior described above. |
|
96 | The behavior described above. | |
97 | cffi_fallback |
|
97 | cffi_fallback | |
98 | Always try to import the C extension then fall back to CFFI if that |
|
98 | Always try to import the C extension then fall back to CFFI if that | |
99 | fails. |
|
99 | fails. | |
100 | cext |
|
100 | cext | |
101 | Only attempt to import the C extension. |
|
101 | Only attempt to import the C extension. | |
102 | cffi |
|
102 | cffi | |
103 | Only attempt to import the CFFI implementation. |
|
103 | Only attempt to import the CFFI implementation. | |
104 |
|
104 | |||
105 | In addition, the ``zstandard`` module exports a ``backend`` attribute |
|
105 | In addition, the ``zstandard`` module exports a ``backend`` attribute | |
106 | containing the string name of the backend being used. It will be one |
|
106 | containing the string name of the backend being used. It will be one | |
107 | of ``cext`` or ``cffi`` (for *C extension* and *cffi*, respectively). |
|
107 | of ``cext`` or ``cffi`` (for *C extension* and *cffi*, respectively). | |
108 |
|
108 | |||
109 | The types, functions, and attributes exposed by the ``zstandard`` module |
|
109 | The types, functions, and attributes exposed by the ``zstandard`` module | |
110 | are documented in the sections below. |
|
110 | are documented in the sections below. | |
111 |
|
111 | |||
112 | .. note:: |
|
112 | .. note:: | |
113 |
|
113 | |||
114 | The documentation in this section makes references to various zstd |
|
114 | The documentation in this section makes references to various zstd | |
115 | concepts and functionality. The source repository contains a |
|
115 | concepts and functionality. The source repository contains a | |
116 | ``docs/concepts.rst`` file explaining these in more detail. |
|
116 | ``docs/concepts.rst`` file explaining these in more detail. | |
117 |
|
117 | |||
118 | ZstdCompressor |
|
118 | ZstdCompressor | |
119 | -------------- |
|
119 | -------------- | |
120 |
|
120 | |||
121 | The ``ZstdCompressor`` class provides an interface for performing |
|
121 | The ``ZstdCompressor`` class provides an interface for performing | |
122 | compression operations. Each instance is essentially a wrapper around a |
|
122 | compression operations. Each instance is essentially a wrapper around a | |
123 | ``ZSTD_CCtx`` from the C API. |
|
123 | ``ZSTD_CCtx`` from the C API. | |
124 |
|
124 | |||
125 | Each instance is associated with parameters that control compression |
|
125 | Each instance is associated with parameters that control compression | |
126 | behavior. These come from the following named arguments (all optional): |
|
126 | behavior. These come from the following named arguments (all optional): | |
127 |
|
127 | |||
128 | level |
|
128 | level | |
129 | Integer compression level. Valid values are between 1 and 22. |
|
129 | Integer compression level. Valid values are between 1 and 22. | |
130 | dict_data |
|
130 | dict_data | |
131 | Compression dictionary to use. |
|
131 | Compression dictionary to use. | |
132 |
|
132 | |||
133 | Note: When using dictionary data and ``compress()`` is called multiple |
|
133 | Note: When using dictionary data and ``compress()`` is called multiple | |
134 | times, the ``ZstdCompressionParameters`` derived from an integer |
|
134 | times, the ``ZstdCompressionParameters`` derived from an integer | |
135 | compression ``level`` and the first compressed data's size will be reused |
|
135 | compression ``level`` and the first compressed data's size will be reused | |
136 | for all subsequent operations. This may not be desirable if source data |
|
136 | for all subsequent operations. This may not be desirable if source data | |
137 | size varies significantly. |
|
137 | size varies significantly. | |
138 | compression_params |
|
138 | compression_params | |
139 | A ``ZstdCompressionParameters`` instance defining compression settings. |
|
139 | A ``ZstdCompressionParameters`` instance defining compression settings. | |
140 | write_checksum |
|
140 | write_checksum | |
141 | Whether a 4 byte checksum should be written with the compressed data. |
|
141 | Whether a 4 byte checksum should be written with the compressed data. | |
142 | Defaults to False. If True, the decompressor can verify that decompressed |
|
142 | Defaults to False. If True, the decompressor can verify that decompressed | |
143 | data matches the original input data. |
|
143 | data matches the original input data. | |
144 | write_content_size |
|
144 | write_content_size | |
145 | Whether the size of the uncompressed data will be written into the |
|
145 | Whether the size of the uncompressed data will be written into the | |
146 | header of compressed data. Defaults to True. The data will only be |
|
146 | header of compressed data. Defaults to True. The data will only be | |
147 | written if the compressor knows the size of the input data. This is |
|
147 | written if the compressor knows the size of the input data. This is | |
148 | often not true for streaming compression. |
|
148 | often not true for streaming compression. | |
149 | write_dict_id |
|
149 | write_dict_id | |
150 | Whether to write the dictionary ID into the compressed data. |
|
150 | Whether to write the dictionary ID into the compressed data. | |
151 | Defaults to True. The dictionary ID is only written if a dictionary |
|
151 | Defaults to True. The dictionary ID is only written if a dictionary | |
152 | is being used. |
|
152 | is being used. | |
153 | threads |
|
153 | threads | |
154 | Enables and sets the number of threads to use for multi-threaded compression |
|
154 | Enables and sets the number of threads to use for multi-threaded compression | |
155 | operations. Defaults to 0, which means to use single-threaded compression. |
|
155 | operations. Defaults to 0, which means to use single-threaded compression. | |
156 | Negative values will resolve to the number of logical CPUs in the system. |
|
156 | Negative values will resolve to the number of logical CPUs in the system. | |
157 | Read below for more info on multi-threaded compression. This argument only |
|
157 | Read below for more info on multi-threaded compression. This argument only | |
158 | controls thread count for operations that operate on individual pieces of |
|
158 | controls thread count for operations that operate on individual pieces of | |
159 | data. APIs that spawn multiple threads for working on multiple pieces of |
|
159 | data. APIs that spawn multiple threads for working on multiple pieces of | |
160 | data have their own ``threads`` argument. |
|
160 | data have their own ``threads`` argument. | |
161 |
|
161 | |||
162 | ``compression_params`` is mutually exclusive with ``level``, ``write_checksum``, |
|
162 | ``compression_params`` is mutually exclusive with ``level``, ``write_checksum``, | |
163 | ``write_content_size``, ``write_dict_id``, and ``threads``. |
|
163 | ``write_content_size``, ``write_dict_id``, and ``threads``. | |
164 |
|
164 | |||
165 | Unless specified otherwise, assume that no two methods of ``ZstdCompressor`` |
|
165 | Unless specified otherwise, assume that no two methods of ``ZstdCompressor`` | |
166 | instances can be called from multiple Python threads simultaneously. In other |
|
166 | instances can be called from multiple Python threads simultaneously. In other | |
167 | words, assume instances are not thread safe unless stated otherwise. |
|
167 | words, assume instances are not thread safe unless stated otherwise. | |
168 |
|
168 | |||
169 | Utility Methods |
|
169 | Utility Methods | |
170 | ^^^^^^^^^^^^^^^ |
|
170 | ^^^^^^^^^^^^^^^ | |
171 |
|
171 | |||
172 | ``frame_progression()`` returns a 3-tuple containing the number of bytes |
|
172 | ``frame_progression()`` returns a 3-tuple containing the number of bytes | |
173 | ingested, consumed, and produced by the current compression operation. |
|
173 | ingested, consumed, and produced by the current compression operation. | |
174 |
|
174 | |||
175 | ``memory_size()`` obtains the memory utilization of the underlying zstd |
|
175 | ``memory_size()`` obtains the memory utilization of the underlying zstd | |
176 | compression context, in bytes.:: |
|
176 | compression context, in bytes.:: | |
177 |
|
177 | |||
178 | cctx = zstd.ZstdCompressor() |
|
178 | cctx = zstd.ZstdCompressor() | |
179 | memory = cctx.memory_size() |
|
179 | memory = cctx.memory_size() | |
180 |
|
180 | |||
181 | Simple API |
|
181 | Simple API | |
182 | ^^^^^^^^^^ |
|
182 | ^^^^^^^^^^ | |
183 |
|
183 | |||
184 | ``compress(data)`` compresses and returns data as a one-shot operation.:: |
|
184 | ``compress(data)`` compresses and returns data as a one-shot operation.:: | |
185 |
|
185 | |||
186 | cctx = zstd.ZstdCompressor() |
|
186 | cctx = zstd.ZstdCompressor() | |
187 | compressed = cctx.compress(b'data to compress') |
|
187 | compressed = cctx.compress(b'data to compress') | |
188 |
|
188 | |||
189 | The ``data`` argument can be any object that implements the *buffer protocol*. |
|
189 | The ``data`` argument can be any object that implements the *buffer protocol*. | |
190 |
|
190 | |||
191 | Stream Reader API |
|
191 | Stream Reader API | |
192 | ^^^^^^^^^^^^^^^^^ |
|
192 | ^^^^^^^^^^^^^^^^^ | |
193 |
|
193 | |||
194 | ``stream_reader(source)`` can be used to obtain an object conforming to the |
|
194 | ``stream_reader(source)`` can be used to obtain an object conforming to the | |
195 | ``io.RawIOBase`` interface for reading compressed output as a stream:: |
|
195 | ``io.RawIOBase`` interface for reading compressed output as a stream:: | |
196 |
|
196 | |||
197 | with open(path, 'rb') as fh: |
|
197 | with open(path, 'rb') as fh: | |
198 | cctx = zstd.ZstdCompressor() |
|
198 | cctx = zstd.ZstdCompressor() | |
199 | reader = cctx.stream_reader(fh) |
|
199 | reader = cctx.stream_reader(fh) | |
200 | while True: |
|
200 | while True: | |
201 | chunk = reader.read(16384) |
|
201 | chunk = reader.read(16384) | |
202 | if not chunk: |
|
202 | if not chunk: | |
203 | break |
|
203 | break | |
204 |
|
204 | |||
205 | # Do something with compressed chunk. |
|
205 | # Do something with compressed chunk. | |
206 |
|
206 | |||
207 | Instances can also be used as context managers:: |
|
207 | Instances can also be used as context managers:: | |
208 |
|
208 | |||
209 | with open(path, 'rb') as fh: |
|
209 | with open(path, 'rb') as fh: | |
210 | with cctx.stream_reader(fh) as reader: |
|
210 | with cctx.stream_reader(fh) as reader: | |
211 | while True: |
|
211 | while True: | |
212 | chunk = reader.read(16384) |
|
212 | chunk = reader.read(16384) | |
213 | if not chunk: |
|
213 | if not chunk: | |
214 | break |
|
214 | break | |
215 |
|
215 | |||
216 | # Do something with compressed chunk. |
|
216 | # Do something with compressed chunk. | |
217 |
|
217 | |||
218 | When the context manager exits or ``close()`` is called, the stream is closed, |
|
218 | When the context manager exits or ``close()`` is called, the stream is closed, | |
219 | underlying resources are released, and future operations against the compression |
|
219 | underlying resources are released, and future operations against the compression | |
220 | stream will fail. |
|
220 | stream will fail. | |
221 |
|
221 | |||
222 | The ``source`` argument to ``stream_reader()`` can be any object with a |
|
222 | The ``source`` argument to ``stream_reader()`` can be any object with a | |
223 | ``read(size)`` method or any object implementing the *buffer protocol*. |
|
223 | ``read(size)`` method or any object implementing the *buffer protocol*. | |
224 |
|
224 | |||
225 | ``stream_reader()`` accepts a ``size`` argument specifying how large the input |
|
225 | ``stream_reader()`` accepts a ``size`` argument specifying how large the input | |
226 | stream is. This is used to adjust compression parameters so they are |
|
226 | stream is. This is used to adjust compression parameters so they are | |
227 | tailored to the source size.:: |
|
227 | tailored to the source size.:: | |
228 |
|
228 | |||
229 | with open(path, 'rb') as fh: |
|
229 | with open(path, 'rb') as fh: | |
230 | cctx = zstd.ZstdCompressor() |
|
230 | cctx = zstd.ZstdCompressor() | |
231 | with cctx.stream_reader(fh, size=os.stat(path).st_size) as reader: |
|
231 | with cctx.stream_reader(fh, size=os.stat(path).st_size) as reader: | |
232 | ... |
|
232 | ... | |
233 |
|
233 | |||
234 | If the ``source`` is a stream, you can specify how large ``read()`` requests |
|
234 | If the ``source`` is a stream, you can specify how large ``read()`` requests | |
235 | to that stream should be via the ``read_size`` argument. It defaults to |
|
235 | to that stream should be via the ``read_size`` argument. It defaults to | |
236 | ``zstandard.COMPRESSION_RECOMMENDED_INPUT_SIZE``.:: |
|
236 | ``zstandard.COMPRESSION_RECOMMENDED_INPUT_SIZE``.:: | |
237 |
|
237 | |||
238 | with open(path, 'rb') as fh: |
|
238 | with open(path, 'rb') as fh: | |
239 | cctx = zstd.ZstdCompressor() |
|
239 | cctx = zstd.ZstdCompressor() | |
240 | # Will perform fh.read(8192) when obtaining data to feed into the |
|
240 | # Will perform fh.read(8192) when obtaining data to feed into the | |
241 | # compressor. |
|
241 | # compressor. | |
242 | with cctx.stream_reader(fh, read_size=8192) as reader: |
|
242 | with cctx.stream_reader(fh, read_size=8192) as reader: | |
243 | ... |
|
243 | ... | |
244 |
|
244 | |||
245 | The stream returned by ``stream_reader()`` is neither writable nor seekable |
|
245 | The stream returned by ``stream_reader()`` is neither writable nor seekable | |
246 | (even if the underlying source is seekable). ``readline()`` and |
|
246 | (even if the underlying source is seekable). ``readline()`` and | |
247 | ``readlines()`` are not implemented because they don't make sense for |
|
247 | ``readlines()`` are not implemented because they don't make sense for | |
248 | compressed data. ``tell()`` returns the number of compressed bytes |
|
248 | compressed data. ``tell()`` returns the number of compressed bytes | |
249 | emitted so far. |
|
249 | emitted so far. | |
250 |
|
250 | |||
251 | Streaming Input API |
|
251 | Streaming Input API | |
252 | ^^^^^^^^^^^^^^^^^^^ |
|
252 | ^^^^^^^^^^^^^^^^^^^ | |
253 |
|
253 | |||
254 | ``stream_writer(fh)`` allows you to *stream* data into a compressor. |
|
254 | ``stream_writer(fh)`` allows you to *stream* data into a compressor. | |
255 |
|
255 | |||
256 | Returned instances implement the ``io.RawIOBase`` interface. Only methods |
|
256 | Returned instances implement the ``io.RawIOBase`` interface. Only methods | |
257 | that involve writing will do useful things. |
|
257 | that involve writing will do useful things. | |
258 |
|
258 | |||
259 | The argument to ``stream_writer()`` must have a ``write(data)`` method. As |
|
259 | The argument to ``stream_writer()`` must have a ``write(data)`` method. As | |
260 | compressed data is available, ``write()`` will be called with the compressed |
|
260 | compressed data is available, ``write()`` will be called with the compressed | |
261 | data as its argument. Many common Python types implement ``write()``, including |
|
261 | data as its argument. Many common Python types implement ``write()``, including | |
262 | open file handles and ``io.BytesIO``. |
|
262 | open file handles and ``io.BytesIO``. | |
263 |
|
263 | |||
264 | The ``write(data)`` method is used to feed data into the compressor. |
|
264 | The ``write(data)`` method is used to feed data into the compressor. | |
265 |
|
265 | |||
266 | The ``flush([flush_mode=FLUSH_BLOCK])`` method can be called to evict whatever |
|
266 | The ``flush([flush_mode=FLUSH_BLOCK])`` method can be called to evict whatever | |
267 | data remains within the compressor's internal state into the output object. This |
|
267 | data remains within the compressor's internal state into the output object. This | |
268 | may result in 0 or more ``write()`` calls to the output object. This method |
|
268 | may result in 0 or more ``write()`` calls to the output object. This method | |
269 | accepts an optional ``flush_mode`` argument to control the flushing behavior. |
|
269 | accepts an optional ``flush_mode`` argument to control the flushing behavior. | |
270 | Its value can be any of the ``FLUSH_*`` constants. |
|
270 | Its value can be any of the ``FLUSH_*`` constants. | |
271 |
|
271 | |||
272 | Both ``write()`` and ``flush()`` return the number of bytes written to the |
|
272 | Both ``write()`` and ``flush()`` return the number of bytes written to the | |
273 | object's ``write()``. In many cases, small inputs do not accumulate enough |
|
273 | object's ``write()``. In many cases, small inputs do not accumulate enough | |
274 | data to cause a write and ``write()`` will return ``0``. |
|
274 | data to cause a write and ``write()`` will return ``0``. | |
275 |
|
275 | |||
276 | Calling ``close()`` will mark the stream as closed and subsequent I/O |
|
276 | Calling ``close()`` will mark the stream as closed and subsequent I/O | |
277 | operations will raise ``ValueError`` (per the documented behavior of |
|
277 | operations will raise ``ValueError`` (per the documented behavior of | |
278 | ``io.RawIOBase``). ``close()`` will also call ``close()`` on the underlying |
|
278 | ``io.RawIOBase``). ``close()`` will also call ``close()`` on the underlying | |
279 | stream if such a method exists. |
|
279 | stream if such a method exists. | |
280 |
|
280 | |||
281 | Typically usage is as follows:: |
|
281 | Typically usage is as follows:: | |
282 |
|
282 | |||
283 | cctx = zstd.ZstdCompressor(level=10) |
|
283 | cctx = zstd.ZstdCompressor(level=10) | |
284 | compressor = cctx.stream_writer(fh) |
|
284 | compressor = cctx.stream_writer(fh) | |
285 |
|
285 | |||
286 | compressor.write(b'chunk 0\n') |
|
286 | compressor.write(b'chunk 0\n') | |
287 | compressor.write(b'chunk 1\n') |
|
287 | compressor.write(b'chunk 1\n') | |
288 | compressor.flush() |
|
288 | compressor.flush() | |
289 | # Receiver will be able to decode ``chunk 0\nchunk 1\n`` at this point. |
|
289 | # Receiver will be able to decode ``chunk 0\nchunk 1\n`` at this point. | |
290 | # Receiver is also expecting more data in the zstd *frame*. |
|
290 | # Receiver is also expecting more data in the zstd *frame*. | |
291 |
|
291 | |||
292 | compressor.write(b'chunk 2\n') |
|
292 | compressor.write(b'chunk 2\n') | |
293 | compressor.flush(zstd.FLUSH_FRAME) |
|
293 | compressor.flush(zstd.FLUSH_FRAME) | |
294 | # Receiver will be able to decode ``chunk 0\nchunk 1\nchunk 2``. |
|
294 | # Receiver will be able to decode ``chunk 0\nchunk 1\nchunk 2``. | |
295 | # Receiver is expecting no more data, as the zstd frame is closed. |
|
295 | # Receiver is expecting no more data, as the zstd frame is closed. | |
296 | # Any future calls to ``write()`` at this point will construct a new |
|
296 | # Any future calls to ``write()`` at this point will construct a new | |
297 | # zstd frame. |
|
297 | # zstd frame. | |
298 |
|
298 | |||
299 | Instances can be used as context managers. Exiting the context manager is |
|
299 | Instances can be used as context managers. Exiting the context manager is | |
300 | the equivalent of calling ``close()``, which is equivalent to calling |
|
300 | the equivalent of calling ``close()``, which is equivalent to calling | |
301 | ``flush(zstd.FLUSH_FRAME)``:: |
|
301 | ``flush(zstd.FLUSH_FRAME)``:: | |
302 |
|
302 | |||
303 | cctx = zstd.ZstdCompressor(level=10) |
|
303 | cctx = zstd.ZstdCompressor(level=10) | |
304 | with cctx.stream_writer(fh) as compressor: |
|
304 | with cctx.stream_writer(fh) as compressor: | |
305 | compressor.write(b'chunk 0') |
|
305 | compressor.write(b'chunk 0') | |
306 | compressor.write(b'chunk 1') |
|
306 | compressor.write(b'chunk 1') | |
307 | ... |
|
307 | ... | |
308 |
|
308 | |||
309 | .. important:: |
|
309 | .. important:: | |
310 |
|
310 | |||
311 | If ``flush(FLUSH_FRAME)`` is not called, emitted data doesn't constitute |
|
311 | If ``flush(FLUSH_FRAME)`` is not called, emitted data doesn't constitute | |
312 | a full zstd *frame* and consumers of this data may complain about malformed |
|
312 | a full zstd *frame* and consumers of this data may complain about malformed | |
313 | input. It is recommended to use instances as a context manager to ensure |
|
313 | input. It is recommended to use instances as a context manager to ensure | |
314 | *frames* are properly finished. |
|
314 | *frames* are properly finished. | |
315 |
|
315 | |||
316 | If the size of the data being fed to this streaming compressor is known, |
|
316 | If the size of the data being fed to this streaming compressor is known, | |
317 | you can declare it before compression begins:: |
|
317 | you can declare it before compression begins:: | |
318 |
|
318 | |||
319 | cctx = zstd.ZstdCompressor() |
|
319 | cctx = zstd.ZstdCompressor() | |
320 | with cctx.stream_writer(fh, size=data_len) as compressor: |
|
320 | with cctx.stream_writer(fh, size=data_len) as compressor: | |
321 | compressor.write(chunk0) |
|
321 | compressor.write(chunk0) | |
322 | compressor.write(chunk1) |
|
322 | compressor.write(chunk1) | |
323 | ... |
|
323 | ... | |
324 |
|
324 | |||
325 | Declaring the size of the source data allows compression parameters to |
|
325 | Declaring the size of the source data allows compression parameters to | |
326 | be tuned. And if ``write_content_size`` is used, it also results in the |
|
326 | be tuned. And if ``write_content_size`` is used, it also results in the | |
327 | content size being written into the frame header of the output data. |
|
327 | content size being written into the frame header of the output data. | |
328 |
|
328 | |||
329 | The size of chunks being ``write()`` to the destination can be specified:: |
|
329 | The size of chunks being ``write()`` to the destination can be specified:: | |
330 |
|
330 | |||
331 | cctx = zstd.ZstdCompressor() |
|
331 | cctx = zstd.ZstdCompressor() | |
332 | with cctx.stream_writer(fh, write_size=32768) as compressor: |
|
332 | with cctx.stream_writer(fh, write_size=32768) as compressor: | |
333 | ... |
|
333 | ... | |
334 |
|
334 | |||
335 | To see how much memory is being used by the streaming compressor:: |
|
335 | To see how much memory is being used by the streaming compressor:: | |
336 |
|
336 | |||
337 | cctx = zstd.ZstdCompressor() |
|
337 | cctx = zstd.ZstdCompressor() | |
338 | with cctx.stream_writer(fh) as compressor: |
|
338 | with cctx.stream_writer(fh) as compressor: | |
339 | ... |
|
339 | ... | |
340 | byte_size = compressor.memory_size() |
|
340 | byte_size = compressor.memory_size() | |
341 |
|
341 | |||
342 | Thte total number of bytes written so far are exposed via ``tell()``:: |
|
342 | Thte total number of bytes written so far are exposed via ``tell()``:: | |
343 |
|
343 | |||
344 | cctx = zstd.ZstdCompressor() |
|
344 | cctx = zstd.ZstdCompressor() | |
345 | with cctx.stream_writer(fh) as compressor: |
|
345 | with cctx.stream_writer(fh) as compressor: | |
346 | ... |
|
346 | ... | |
347 | total_written = compressor.tell() |
|
347 | total_written = compressor.tell() | |
348 |
|
348 | |||
349 | ``stream_writer()`` accepts a ``write_return_read`` boolean argument to control |
|
349 | ``stream_writer()`` accepts a ``write_return_read`` boolean argument to control | |
350 | the return value of ``write()``. When ``False`` (the default), ``write()`` returns |
|
350 | the return value of ``write()``. When ``False`` (the default), ``write()`` returns | |
351 | the number of bytes that were ``write()``en to the underlying object. When |
|
351 | the number of bytes that were ``write()``en to the underlying object. When | |
352 | ``True``, ``write()`` returns the number of bytes read from the input that |
|
352 | ``True``, ``write()`` returns the number of bytes read from the input that | |
353 | were subsequently written to the compressor. ``True`` is the *proper* behavior |
|
353 | were subsequently written to the compressor. ``True`` is the *proper* behavior | |
354 | for ``write()`` as specified by the ``io.RawIOBase`` interface and will become |
|
354 | for ``write()`` as specified by the ``io.RawIOBase`` interface and will become | |
355 | the default value in a future release. |
|
355 | the default value in a future release. | |
356 |
|
356 | |||
357 | Streaming Output API |
|
357 | Streaming Output API | |
358 | ^^^^^^^^^^^^^^^^^^^^ |
|
358 | ^^^^^^^^^^^^^^^^^^^^ | |
359 |
|
359 | |||
360 | ``read_to_iter(reader)`` provides a mechanism to stream data out of a |
|
360 | ``read_to_iter(reader)`` provides a mechanism to stream data out of a | |
361 | compressor as an iterator of data chunks.:: |
|
361 | compressor as an iterator of data chunks.:: | |
362 |
|
362 | |||
363 | cctx = zstd.ZstdCompressor() |
|
363 | cctx = zstd.ZstdCompressor() | |
364 | for chunk in cctx.read_to_iter(fh): |
|
364 | for chunk in cctx.read_to_iter(fh): | |
365 | # Do something with emitted data. |
|
365 | # Do something with emitted data. | |
366 |
|
366 | |||
367 | ``read_to_iter()`` accepts an object that has a ``read(size)`` method or |
|
367 | ``read_to_iter()`` accepts an object that has a ``read(size)`` method or | |
368 | conforms to the buffer protocol. |
|
368 | conforms to the buffer protocol. | |
369 |
|
369 | |||
370 | Uncompressed data is fetched from the source either by calling ``read(size)`` |
|
370 | Uncompressed data is fetched from the source either by calling ``read(size)`` | |
371 | or by fetching a slice of data from the object directly (in the case where |
|
371 | or by fetching a slice of data from the object directly (in the case where | |
372 | the buffer protocol is being used). The returned iterator consists of chunks |
|
372 | the buffer protocol is being used). The returned iterator consists of chunks | |
373 | of compressed data. |
|
373 | of compressed data. | |
374 |
|
374 | |||
375 | If reading from the source via ``read()``, ``read()`` will be called until |
|
375 | If reading from the source via ``read()``, ``read()`` will be called until | |
376 | it raises or returns an empty bytes (``b''``). It is perfectly valid for |
|
376 | it raises or returns an empty bytes (``b''``). It is perfectly valid for | |
377 | the source to deliver fewer bytes than were what requested by ``read(size)``. |
|
377 | the source to deliver fewer bytes than were what requested by ``read(size)``. | |
378 |
|
378 | |||
379 | Like ``stream_writer()``, ``read_to_iter()`` also accepts a ``size`` argument |
|
379 | Like ``stream_writer()``, ``read_to_iter()`` also accepts a ``size`` argument | |
380 | declaring the size of the input stream:: |
|
380 | declaring the size of the input stream:: | |
381 |
|
381 | |||
382 | cctx = zstd.ZstdCompressor() |
|
382 | cctx = zstd.ZstdCompressor() | |
383 | for chunk in cctx.read_to_iter(fh, size=some_int): |
|
383 | for chunk in cctx.read_to_iter(fh, size=some_int): | |
384 | pass |
|
384 | pass | |
385 |
|
385 | |||
386 | You can also control the size that data is ``read()`` from the source and |
|
386 | You can also control the size that data is ``read()`` from the source and | |
387 | the ideal size of output chunks:: |
|
387 | the ideal size of output chunks:: | |
388 |
|
388 | |||
389 | cctx = zstd.ZstdCompressor() |
|
389 | cctx = zstd.ZstdCompressor() | |
390 | for chunk in cctx.read_to_iter(fh, read_size=16384, write_size=8192): |
|
390 | for chunk in cctx.read_to_iter(fh, read_size=16384, write_size=8192): | |
391 | pass |
|
391 | pass | |
392 |
|
392 | |||
393 | Unlike ``stream_writer()``, ``read_to_iter()`` does not give direct control |
|
393 | Unlike ``stream_writer()``, ``read_to_iter()`` does not give direct control | |
394 | over the sizes of chunks fed into the compressor. Instead, chunk sizes will |
|
394 | over the sizes of chunks fed into the compressor. Instead, chunk sizes will | |
395 | be whatever the object being read from delivers. These will often be of a |
|
395 | be whatever the object being read from delivers. These will often be of a | |
396 | uniform size. |
|
396 | uniform size. | |
397 |
|
397 | |||
398 | Stream Copying API |
|
398 | Stream Copying API | |
399 | ^^^^^^^^^^^^^^^^^^ |
|
399 | ^^^^^^^^^^^^^^^^^^ | |
400 |
|
400 | |||
401 | ``copy_stream(ifh, ofh)`` can be used to copy data between 2 streams while |
|
401 | ``copy_stream(ifh, ofh)`` can be used to copy data between 2 streams while | |
402 | compressing it.:: |
|
402 | compressing it.:: | |
403 |
|
403 | |||
404 | cctx = zstd.ZstdCompressor() |
|
404 | cctx = zstd.ZstdCompressor() | |
405 | cctx.copy_stream(ifh, ofh) |
|
405 | cctx.copy_stream(ifh, ofh) | |
406 |
|
406 | |||
407 | For example, say you wish to compress a file:: |
|
407 | For example, say you wish to compress a file:: | |
408 |
|
408 | |||
409 | cctx = zstd.ZstdCompressor() |
|
409 | cctx = zstd.ZstdCompressor() | |
410 | with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh: |
|
410 | with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh: | |
411 | cctx.copy_stream(ifh, ofh) |
|
411 | cctx.copy_stream(ifh, ofh) | |
412 |
|
412 | |||
413 | It is also possible to declare the size of the source stream:: |
|
413 | It is also possible to declare the size of the source stream:: | |
414 |
|
414 | |||
415 | cctx = zstd.ZstdCompressor() |
|
415 | cctx = zstd.ZstdCompressor() | |
416 | cctx.copy_stream(ifh, ofh, size=len_of_input) |
|
416 | cctx.copy_stream(ifh, ofh, size=len_of_input) | |
417 |
|
417 | |||
418 | You can also specify how large the chunks that are ``read()`` and ``write()`` |
|
418 | You can also specify how large the chunks that are ``read()`` and ``write()`` | |
419 | from and to the streams:: |
|
419 | from and to the streams:: | |
420 |
|
420 | |||
421 | cctx = zstd.ZstdCompressor() |
|
421 | cctx = zstd.ZstdCompressor() | |
422 | cctx.copy_stream(ifh, ofh, read_size=32768, write_size=16384) |
|
422 | cctx.copy_stream(ifh, ofh, read_size=32768, write_size=16384) | |
423 |
|
423 | |||
424 | The stream copier returns a 2-tuple of bytes read and written:: |
|
424 | The stream copier returns a 2-tuple of bytes read and written:: | |
425 |
|
425 | |||
426 | cctx = zstd.ZstdCompressor() |
|
426 | cctx = zstd.ZstdCompressor() | |
427 | read_count, write_count = cctx.copy_stream(ifh, ofh) |
|
427 | read_count, write_count = cctx.copy_stream(ifh, ofh) | |
428 |
|
428 | |||
429 | Compressor API |
|
429 | Compressor API | |
430 | ^^^^^^^^^^^^^^ |
|
430 | ^^^^^^^^^^^^^^ | |
431 |
|
431 | |||
432 | ``compressobj()`` returns an object that exposes ``compress(data)`` and |
|
432 | ``compressobj()`` returns an object that exposes ``compress(data)`` and | |
433 | ``flush()`` methods. Each returns compressed data or an empty bytes. |
|
433 | ``flush()`` methods. Each returns compressed data or an empty bytes. | |
434 |
|
434 | |||
435 | The purpose of ``compressobj()`` is to provide an API-compatible interface |
|
435 | The purpose of ``compressobj()`` is to provide an API-compatible interface | |
436 | with ``zlib.compressobj``, ``bz2.BZ2Compressor``, etc. This allows callers to |
|
436 | with ``zlib.compressobj``, ``bz2.BZ2Compressor``, etc. This allows callers to | |
437 | swap in different compressor objects while using the same API. |
|
437 | swap in different compressor objects while using the same API. | |
438 |
|
438 | |||
439 | ``flush()`` accepts an optional argument indicating how to end the stream. |
|
439 | ``flush()`` accepts an optional argument indicating how to end the stream. | |
440 | ``zstd.COMPRESSOBJ_FLUSH_FINISH`` (the default) ends the compression stream. |
|
440 | ``zstd.COMPRESSOBJ_FLUSH_FINISH`` (the default) ends the compression stream. | |
441 | Once this type of flush is performed, ``compress()`` and ``flush()`` can |
|
441 | Once this type of flush is performed, ``compress()`` and ``flush()`` can | |
442 | no longer be called. This type of flush **must** be called to end the |
|
442 | no longer be called. This type of flush **must** be called to end the | |
443 | compression context. If not called, returned data may be incomplete. |
|
443 | compression context. If not called, returned data may be incomplete. | |
444 |
|
444 | |||
445 | A ``zstd.COMPRESSOBJ_FLUSH_BLOCK`` argument to ``flush()`` will flush a |
|
445 | A ``zstd.COMPRESSOBJ_FLUSH_BLOCK`` argument to ``flush()`` will flush a | |
446 | zstd block. Flushes of this type can be performed multiple times. The next |
|
446 | zstd block. Flushes of this type can be performed multiple times. The next | |
447 | call to ``compress()`` will begin a new zstd block. |
|
447 | call to ``compress()`` will begin a new zstd block. | |
448 |
|
448 | |||
449 | Here is how this API should be used:: |
|
449 | Here is how this API should be used:: | |
450 |
|
450 | |||
451 | cctx = zstd.ZstdCompressor() |
|
451 | cctx = zstd.ZstdCompressor() | |
452 | cobj = cctx.compressobj() |
|
452 | cobj = cctx.compressobj() | |
453 | data = cobj.compress(b'raw input 0') |
|
453 | data = cobj.compress(b'raw input 0') | |
454 | data = cobj.compress(b'raw input 1') |
|
454 | data = cobj.compress(b'raw input 1') | |
455 | data = cobj.flush() |
|
455 | data = cobj.flush() | |
456 |
|
456 | |||
457 | Or to flush blocks:: |
|
457 | Or to flush blocks:: | |
458 |
|
458 | |||
459 | cctx.zstd.ZstdCompressor() |
|
459 | cctx.zstd.ZstdCompressor() | |
460 | cobj = cctx.compressobj() |
|
460 | cobj = cctx.compressobj() | |
461 | data = cobj.compress(b'chunk in first block') |
|
461 | data = cobj.compress(b'chunk in first block') | |
462 | data = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK) |
|
462 | data = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK) | |
463 | data = cobj.compress(b'chunk in second block') |
|
463 | data = cobj.compress(b'chunk in second block') | |
464 | data = cobj.flush() |
|
464 | data = cobj.flush() | |
465 |
|
465 | |||
466 | For best performance results, keep input chunks under 256KB. This avoids |
|
466 | For best performance results, keep input chunks under 256KB. This avoids | |
467 | extra allocations for a large output object. |
|
467 | extra allocations for a large output object. | |
468 |
|
468 | |||
469 | It is possible to declare the input size of the data that will be fed into |
|
469 | It is possible to declare the input size of the data that will be fed into | |
470 | the compressor:: |
|
470 | the compressor:: | |
471 |
|
471 | |||
472 | cctx = zstd.ZstdCompressor() |
|
472 | cctx = zstd.ZstdCompressor() | |
473 | cobj = cctx.compressobj(size=6) |
|
473 | cobj = cctx.compressobj(size=6) | |
474 | data = cobj.compress(b'foobar') |
|
474 | data = cobj.compress(b'foobar') | |
475 | data = cobj.flush() |
|
475 | data = cobj.flush() | |
476 |
|
476 | |||
477 | Chunker API |
|
477 | Chunker API | |
478 | ^^^^^^^^^^^ |
|
478 | ^^^^^^^^^^^ | |
479 |
|
479 | |||
480 | ``chunker(size=None, chunk_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE)`` returns |
|
480 | ``chunker(size=None, chunk_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE)`` returns | |
481 | an object that can be used to iteratively feed chunks of data into a compressor |
|
481 | an object that can be used to iteratively feed chunks of data into a compressor | |
482 | and produce output chunks of a uniform size. |
|
482 | and produce output chunks of a uniform size. | |
483 |
|
483 | |||
484 | The object returned by ``chunker()`` exposes the following methods: |
|
484 | The object returned by ``chunker()`` exposes the following methods: | |
485 |
|
485 | |||
486 | ``compress(data)`` |
|
486 | ``compress(data)`` | |
487 | Feeds new input data into the compressor. |
|
487 | Feeds new input data into the compressor. | |
488 |
|
488 | |||
489 | ``flush()`` |
|
489 | ``flush()`` | |
490 | Flushes all data currently in the compressor. |
|
490 | Flushes all data currently in the compressor. | |
491 |
|
491 | |||
492 | ``finish()`` |
|
492 | ``finish()`` | |
493 | Signals the end of input data. No new data can be compressed after this |
|
493 | Signals the end of input data. No new data can be compressed after this | |
494 | method is called. |
|
494 | method is called. | |
495 |
|
495 | |||
496 | ``compress()``, ``flush()``, and ``finish()`` all return an iterator of |
|
496 | ``compress()``, ``flush()``, and ``finish()`` all return an iterator of | |
497 | ``bytes`` instances holding compressed data. The iterator may be empty. Callers |
|
497 | ``bytes`` instances holding compressed data. The iterator may be empty. Callers | |
498 | MUST iterate through all elements of the returned iterator before performing |
|
498 | MUST iterate through all elements of the returned iterator before performing | |
499 | another operation on the object. |
|
499 | another operation on the object. | |
500 |
|
500 | |||
501 | All chunks emitted by ``compress()`` will have a length of ``chunk_size``. |
|
501 | All chunks emitted by ``compress()`` will have a length of ``chunk_size``. | |
502 |
|
502 | |||
503 | ``flush()`` and ``finish()`` may return a final chunk smaller than |
|
503 | ``flush()`` and ``finish()`` may return a final chunk smaller than | |
504 | ``chunk_size``. |
|
504 | ``chunk_size``. | |
505 |
|
505 | |||
506 | Here is how the API should be used:: |
|
506 | Here is how the API should be used:: | |
507 |
|
507 | |||
508 | cctx = zstd.ZstdCompressor() |
|
508 | cctx = zstd.ZstdCompressor() | |
509 | chunker = cctx.chunker(chunk_size=32768) |
|
509 | chunker = cctx.chunker(chunk_size=32768) | |
510 |
|
510 | |||
511 | with open(path, 'rb') as fh: |
|
511 | with open(path, 'rb') as fh: | |
512 | while True: |
|
512 | while True: | |
513 | in_chunk = fh.read(32768) |
|
513 | in_chunk = fh.read(32768) | |
514 | if not in_chunk: |
|
514 | if not in_chunk: | |
515 | break |
|
515 | break | |
516 |
|
516 | |||
517 | for out_chunk in chunker.compress(in_chunk): |
|
517 | for out_chunk in chunker.compress(in_chunk): | |
518 | # Do something with output chunk of size 32768. |
|
518 | # Do something with output chunk of size 32768. | |
519 |
|
519 | |||
520 | for out_chunk in chunker.finish(): |
|
520 | for out_chunk in chunker.finish(): | |
521 | # Do something with output chunks that finalize the zstd frame. |
|
521 | # Do something with output chunks that finalize the zstd frame. | |
522 |
|
522 | |||
523 | The ``chunker()`` API is often a better alternative to ``compressobj()``. |
|
523 | The ``chunker()`` API is often a better alternative to ``compressobj()``. | |
524 |
|
524 | |||
525 | ``compressobj()`` will emit output data as it is available. This results in a |
|
525 | ``compressobj()`` will emit output data as it is available. This results in a | |
526 | *stream* of output chunks of varying sizes. The consistency of the output chunk |
|
526 | *stream* of output chunks of varying sizes. The consistency of the output chunk | |
527 | size with ``chunker()`` is more appropriate for many usages, such as sending |
|
527 | size with ``chunker()`` is more appropriate for many usages, such as sending | |
528 | compressed data to a socket. |
|
528 | compressed data to a socket. | |
529 |
|
529 | |||
530 | ``compressobj()`` may also perform extra memory reallocations in order to |
|
530 | ``compressobj()`` may also perform extra memory reallocations in order to | |
531 | dynamically adjust the sizes of the output chunks. Since ``chunker()`` output |
|
531 | dynamically adjust the sizes of the output chunks. Since ``chunker()`` output | |
532 | chunks are all the same size (except for flushed or final chunks), there is |
|
532 | chunks are all the same size (except for flushed or final chunks), there is | |
533 | less memory allocation overhead. |
|
533 | less memory allocation overhead. | |
534 |
|
534 | |||
535 | Batch Compression API |
|
535 | Batch Compression API | |
536 | ^^^^^^^^^^^^^^^^^^^^^ |
|
536 | ^^^^^^^^^^^^^^^^^^^^^ | |
537 |
|
537 | |||
538 | (Experimental. Not yet supported in CFFI bindings.) |
|
538 | (Experimental. Not yet supported in CFFI bindings.) | |
539 |
|
539 | |||
540 | ``multi_compress_to_buffer(data, [threads=0])`` performs compression of multiple |
|
540 | ``multi_compress_to_buffer(data, [threads=0])`` performs compression of multiple | |
541 | inputs as a single operation. |
|
541 | inputs as a single operation. | |
542 |
|
542 | |||
543 | Data to be compressed can be passed as a ``BufferWithSegmentsCollection``, a |
|
543 | Data to be compressed can be passed as a ``BufferWithSegmentsCollection``, a | |
544 | ``BufferWithSegments``, or a list containing byte like objects. Each element of |
|
544 | ``BufferWithSegments``, or a list containing byte like objects. Each element of | |
545 | the container will be compressed individually using the configured parameters |
|
545 | the container will be compressed individually using the configured parameters | |
546 | on the ``ZstdCompressor`` instance. |
|
546 | on the ``ZstdCompressor`` instance. | |
547 |
|
547 | |||
548 | The ``threads`` argument controls how many threads to use for compression. The |
|
548 | The ``threads`` argument controls how many threads to use for compression. The | |
549 | default is ``0`` which means to use a single thread. Negative values use the |
|
549 | default is ``0`` which means to use a single thread. Negative values use the | |
550 | number of logical CPUs in the machine. |
|
550 | number of logical CPUs in the machine. | |
551 |
|
551 | |||
552 | The function returns a ``BufferWithSegmentsCollection``. This type represents |
|
552 | The function returns a ``BufferWithSegmentsCollection``. This type represents | |
553 | N discrete memory allocations, eaching holding 1 or more compressed frames. |
|
553 | N discrete memory allocations, eaching holding 1 or more compressed frames. | |
554 |
|
554 | |||
555 | Output data is written to shared memory buffers. This means that unlike |
|
555 | Output data is written to shared memory buffers. This means that unlike | |
556 | regular Python objects, a reference to *any* object within the collection |
|
556 | regular Python objects, a reference to *any* object within the collection | |
557 | keeps the shared buffer and therefore memory backing it alive. This can have |
|
557 | keeps the shared buffer and therefore memory backing it alive. This can have | |
558 | undesirable effects on process memory usage. |
|
558 | undesirable effects on process memory usage. | |
559 |
|
559 | |||
560 | The API and behavior of this function is experimental and will likely change. |
|
560 | The API and behavior of this function is experimental and will likely change. | |
561 | Known deficiencies include: |
|
561 | Known deficiencies include: | |
562 |
|
562 | |||
563 | * If asked to use multiple threads, it will always spawn that many threads, |
|
563 | * If asked to use multiple threads, it will always spawn that many threads, | |
564 | even if the input is too small to use them. It should automatically lower |
|
564 | even if the input is too small to use them. It should automatically lower | |
565 | the thread count when the extra threads would just add overhead. |
|
565 | the thread count when the extra threads would just add overhead. | |
566 | * The buffer allocation strategy is fixed. There is room to make it dynamic, |
|
566 | * The buffer allocation strategy is fixed. There is room to make it dynamic, | |
567 | perhaps even to allow one output buffer per input, facilitating a variation |
|
567 | perhaps even to allow one output buffer per input, facilitating a variation | |
568 | of the API to return a list without the adverse effects of shared memory |
|
568 | of the API to return a list without the adverse effects of shared memory | |
569 | buffers. |
|
569 | buffers. | |
570 |
|
570 | |||
571 | ZstdDecompressor |
|
571 | ZstdDecompressor | |
572 | ---------------- |
|
572 | ---------------- | |
573 |
|
573 | |||
574 | The ``ZstdDecompressor`` class provides an interface for performing |
|
574 | The ``ZstdDecompressor`` class provides an interface for performing | |
575 | decompression. It is effectively a wrapper around the ``ZSTD_DCtx`` type from |
|
575 | decompression. It is effectively a wrapper around the ``ZSTD_DCtx`` type from | |
576 | the C API. |
|
576 | the C API. | |
577 |
|
577 | |||
578 | Each instance is associated with parameters that control decompression. These |
|
578 | Each instance is associated with parameters that control decompression. These | |
579 | come from the following named arguments (all optional): |
|
579 | come from the following named arguments (all optional): | |
580 |
|
580 | |||
581 | dict_data |
|
581 | dict_data | |
582 | Compression dictionary to use. |
|
582 | Compression dictionary to use. | |
583 | max_window_size |
|
583 | max_window_size | |
584 | Sets an uppet limit on the window size for decompression operations in |
|
584 | Sets an uppet limit on the window size for decompression operations in | |
585 | kibibytes. This setting can be used to prevent large memory allocations |
|
585 | kibibytes. This setting can be used to prevent large memory allocations | |
586 | for inputs using large compression windows. |
|
586 | for inputs using large compression windows. | |
587 | format |
|
587 | format | |
588 | Set the format of data for the decoder. By default, this is |
|
588 | Set the format of data for the decoder. By default, this is | |
589 | ``zstd.FORMAT_ZSTD1``. It can be set to ``zstd.FORMAT_ZSTD1_MAGICLESS`` to |
|
589 | ``zstd.FORMAT_ZSTD1``. It can be set to ``zstd.FORMAT_ZSTD1_MAGICLESS`` to | |
590 | allow decoding frames without the 4 byte magic header. Not all decompression |
|
590 | allow decoding frames without the 4 byte magic header. Not all decompression | |
591 | APIs support this mode. |
|
591 | APIs support this mode. | |
592 |
|
592 | |||
593 | The interface of this class is very similar to ``ZstdCompressor`` (by design). |
|
593 | The interface of this class is very similar to ``ZstdCompressor`` (by design). | |
594 |
|
594 | |||
595 | Unless specified otherwise, assume that no two methods of ``ZstdDecompressor`` |
|
595 | Unless specified otherwise, assume that no two methods of ``ZstdDecompressor`` | |
596 | instances can be called from multiple Python threads simultaneously. In other |
|
596 | instances can be called from multiple Python threads simultaneously. In other | |
597 | words, assume instances are not thread safe unless stated otherwise. |
|
597 | words, assume instances are not thread safe unless stated otherwise. | |
598 |
|
598 | |||
599 | Utility Methods |
|
599 | Utility Methods | |
600 | ^^^^^^^^^^^^^^^ |
|
600 | ^^^^^^^^^^^^^^^ | |
601 |
|
601 | |||
602 | ``memory_size()`` obtains the size of the underlying zstd decompression context, |
|
602 | ``memory_size()`` obtains the size of the underlying zstd decompression context, | |
603 | in bytes.:: |
|
603 | in bytes.:: | |
604 |
|
604 | |||
605 | dctx = zstd.ZstdDecompressor() |
|
605 | dctx = zstd.ZstdDecompressor() | |
606 | size = dctx.memory_size() |
|
606 | size = dctx.memory_size() | |
607 |
|
607 | |||
608 | Simple API |
|
608 | Simple API | |
609 | ^^^^^^^^^^ |
|
609 | ^^^^^^^^^^ | |
610 |
|
610 | |||
611 | ``decompress(data)`` can be used to decompress an entire compressed zstd |
|
611 | ``decompress(data)`` can be used to decompress an entire compressed zstd | |
612 | frame in a single operation.:: |
|
612 | frame in a single operation.:: | |
613 |
|
613 | |||
614 | dctx = zstd.ZstdDecompressor() |
|
614 | dctx = zstd.ZstdDecompressor() | |
615 | decompressed = dctx.decompress(data) |
|
615 | decompressed = dctx.decompress(data) | |
616 |
|
616 | |||
617 | By default, ``decompress(data)`` will only work on data written with the content |
|
617 | By default, ``decompress(data)`` will only work on data written with the content | |
618 | size encoded in its header (this is the default behavior of |
|
618 | size encoded in its header (this is the default behavior of | |
619 | ``ZstdCompressor().compress()`` but may not be true for streaming compression). If |
|
619 | ``ZstdCompressor().compress()`` but may not be true for streaming compression). If | |
620 | compressed data without an embedded content size is seen, ``zstd.ZstdError`` will |
|
620 | compressed data without an embedded content size is seen, ``zstd.ZstdError`` will | |
621 | be raised. |
|
621 | be raised. | |
622 |
|
622 | |||
623 | If the compressed data doesn't have its content size embedded within it, |
|
623 | If the compressed data doesn't have its content size embedded within it, | |
624 | decompression can be attempted by specifying the ``max_output_size`` |
|
624 | decompression can be attempted by specifying the ``max_output_size`` | |
625 | argument.:: |
|
625 | argument.:: | |
626 |
|
626 | |||
627 | dctx = zstd.ZstdDecompressor() |
|
627 | dctx = zstd.ZstdDecompressor() | |
628 | uncompressed = dctx.decompress(data, max_output_size=1048576) |
|
628 | uncompressed = dctx.decompress(data, max_output_size=1048576) | |
629 |
|
629 | |||
630 | Ideally, ``max_output_size`` will be identical to the decompressed output |
|
630 | Ideally, ``max_output_size`` will be identical to the decompressed output | |
631 | size. |
|
631 | size. | |
632 |
|
632 | |||
633 | If ``max_output_size`` is too small to hold the decompressed data, |
|
633 | If ``max_output_size`` is too small to hold the decompressed data, | |
634 | ``zstd.ZstdError`` will be raised. |
|
634 | ``zstd.ZstdError`` will be raised. | |
635 |
|
635 | |||
636 | If ``max_output_size`` is larger than the decompressed data, the allocated |
|
636 | If ``max_output_size`` is larger than the decompressed data, the allocated | |
637 | output buffer will be resized to only use the space required. |
|
637 | output buffer will be resized to only use the space required. | |
638 |
|
638 | |||
639 | Please note that an allocation of the requested ``max_output_size`` will be |
|
639 | Please note that an allocation of the requested ``max_output_size`` will be | |
640 | performed every time the method is called. Setting to a very large value could |
|
640 | performed every time the method is called. Setting to a very large value could | |
641 | result in a lot of work for the memory allocator and may result in |
|
641 | result in a lot of work for the memory allocator and may result in | |
642 | ``MemoryError`` being raised if the allocation fails. |
|
642 | ``MemoryError`` being raised if the allocation fails. | |
643 |
|
643 | |||
644 | .. important:: |
|
644 | .. important:: | |
645 |
|
645 | |||
646 | If the exact size of decompressed data is unknown (not passed in explicitly |
|
646 | If the exact size of decompressed data is unknown (not passed in explicitly | |
647 | and not stored in the zstandard frame), for performance reasons it is |
|
647 | and not stored in the zstandard frame), for performance reasons it is | |
648 | encouraged to use a streaming API. |
|
648 | encouraged to use a streaming API. | |
649 |
|
649 | |||
650 | Stream Reader API |
|
650 | Stream Reader API | |
651 | ^^^^^^^^^^^^^^^^^ |
|
651 | ^^^^^^^^^^^^^^^^^ | |
652 |
|
652 | |||
653 | ``stream_reader(source)`` can be used to obtain an object conforming to the |
|
653 | ``stream_reader(source)`` can be used to obtain an object conforming to the | |
654 | ``io.RawIOBase`` interface for reading decompressed output as a stream:: |
|
654 | ``io.RawIOBase`` interface for reading decompressed output as a stream:: | |
655 |
|
655 | |||
656 | with open(path, 'rb') as fh: |
|
656 | with open(path, 'rb') as fh: | |
657 | dctx = zstd.ZstdDecompressor() |
|
657 | dctx = zstd.ZstdDecompressor() | |
658 | reader = dctx.stream_reader(fh) |
|
658 | reader = dctx.stream_reader(fh) | |
659 | while True: |
|
659 | while True: | |
660 | chunk = reader.read(16384) |
|
660 | chunk = reader.read(16384) | |
661 | if not chunk: |
|
661 | if not chunk: | |
662 | break |
|
662 | break | |
663 |
|
663 | |||
664 | # Do something with decompressed chunk. |
|
664 | # Do something with decompressed chunk. | |
665 |
|
665 | |||
666 | The stream can also be used as a context manager:: |
|
666 | The stream can also be used as a context manager:: | |
667 |
|
667 | |||
668 | with open(path, 'rb') as fh: |
|
668 | with open(path, 'rb') as fh: | |
669 | dctx = zstd.ZstdDecompressor() |
|
669 | dctx = zstd.ZstdDecompressor() | |
670 | with dctx.stream_reader(fh) as reader: |
|
670 | with dctx.stream_reader(fh) as reader: | |
671 | ... |
|
671 | ... | |
672 |
|
672 | |||
673 | When used as a context manager, the stream is closed and the underlying |
|
673 | When used as a context manager, the stream is closed and the underlying | |
674 | resources are released when the context manager exits. Future operations against |
|
674 | resources are released when the context manager exits. Future operations against | |
675 | the stream will fail. |
|
675 | the stream will fail. | |
676 |
|
676 | |||
677 | The ``source`` argument to ``stream_reader()`` can be any object with a |
|
677 | The ``source`` argument to ``stream_reader()`` can be any object with a | |
678 | ``read(size)`` method or any object implementing the *buffer protocol*. |
|
678 | ``read(size)`` method or any object implementing the *buffer protocol*. | |
679 |
|
679 | |||
680 | If the ``source`` is a stream, you can specify how large ``read()`` requests |
|
680 | If the ``source`` is a stream, you can specify how large ``read()`` requests | |
681 | to that stream should be via the ``read_size`` argument. It defaults to |
|
681 | to that stream should be via the ``read_size`` argument. It defaults to | |
682 | ``zstandard.DECOMPRESSION_RECOMMENDED_INPUT_SIZE``.:: |
|
682 | ``zstandard.DECOMPRESSION_RECOMMENDED_INPUT_SIZE``.:: | |
683 |
|
683 | |||
684 | with open(path, 'rb') as fh: |
|
684 | with open(path, 'rb') as fh: | |
685 | dctx = zstd.ZstdDecompressor() |
|
685 | dctx = zstd.ZstdDecompressor() | |
686 | # Will perform fh.read(8192) when obtaining data for the decompressor. |
|
686 | # Will perform fh.read(8192) when obtaining data for the decompressor. | |
687 | with dctx.stream_reader(fh, read_size=8192) as reader: |
|
687 | with dctx.stream_reader(fh, read_size=8192) as reader: | |
688 | ... |
|
688 | ... | |
689 |
|
689 | |||
690 | The stream returned by ``stream_reader()`` is not writable. |
|
690 | The stream returned by ``stream_reader()`` is not writable. | |
691 |
|
691 | |||
692 | The stream returned by ``stream_reader()`` is *partially* seekable. |
|
692 | The stream returned by ``stream_reader()`` is *partially* seekable. | |
693 | Absolute and relative positions (``SEEK_SET`` and ``SEEK_CUR``) forward |
|
693 | Absolute and relative positions (``SEEK_SET`` and ``SEEK_CUR``) forward | |
694 | of the current position are allowed. Offsets behind the current read |
|
694 | of the current position are allowed. Offsets behind the current read | |
695 | position and offsets relative to the end of stream are not allowed and |
|
695 | position and offsets relative to the end of stream are not allowed and | |
696 | will raise ``ValueError`` if attempted. |
|
696 | will raise ``ValueError`` if attempted. | |
697 |
|
697 | |||
698 | ``tell()`` returns the number of decompressed bytes read so far. |
|
698 | ``tell()`` returns the number of decompressed bytes read so far. | |
699 |
|
699 | |||
700 | Not all I/O methods are implemented. Notably missing is support for |
|
700 | Not all I/O methods are implemented. Notably missing is support for | |
701 | ``readline()``, ``readlines()``, and linewise iteration support. This is |
|
701 | ``readline()``, ``readlines()``, and linewise iteration support. This is | |
702 | because streams operate on binary data - not text data. If you want to |
|
702 | because streams operate on binary data - not text data. If you want to | |
703 | convert decompressed output to text, you can chain an ``io.TextIOWrapper`` |
|
703 | convert decompressed output to text, you can chain an ``io.TextIOWrapper`` | |
704 | to the stream:: |
|
704 | to the stream:: | |
705 |
|
705 | |||
706 | with open(path, 'rb') as fh: |
|
706 | with open(path, 'rb') as fh: | |
707 | dctx = zstd.ZstdDecompressor() |
|
707 | dctx = zstd.ZstdDecompressor() | |
708 | stream_reader = dctx.stream_reader(fh) |
|
708 | stream_reader = dctx.stream_reader(fh) | |
709 | text_stream = io.TextIOWrapper(stream_reader, encoding='utf-8') |
|
709 | text_stream = io.TextIOWrapper(stream_reader, encoding='utf-8') | |
710 |
|
710 | |||
711 | for line in text_stream: |
|
711 | for line in text_stream: | |
712 | ... |
|
712 | ... | |
713 |
|
713 | |||
714 | The ``read_across_frames`` argument to ``stream_reader()`` controls the |
|
714 | The ``read_across_frames`` argument to ``stream_reader()`` controls the | |
715 | behavior of read operations when the end of a zstd *frame* is encountered. |
|
715 | behavior of read operations when the end of a zstd *frame* is encountered. | |
716 | When ``False`` (the default), a read will complete when the end of a |
|
716 | When ``False`` (the default), a read will complete when the end of a | |
717 | zstd *frame* is encountered. When ``True``, a read can potentially |
|
717 | zstd *frame* is encountered. When ``True``, a read can potentially | |
718 | return data spanning multiple zstd *frames*. |
|
718 | return data spanning multiple zstd *frames*. | |
719 |
|
719 | |||
720 | Streaming Input API |
|
720 | Streaming Input API | |
721 | ^^^^^^^^^^^^^^^^^^^ |
|
721 | ^^^^^^^^^^^^^^^^^^^ | |
722 |
|
722 | |||
723 | ``stream_writer(fh)`` allows you to *stream* data into a decompressor. |
|
723 | ``stream_writer(fh)`` allows you to *stream* data into a decompressor. | |
724 |
|
724 | |||
725 | Returned instances implement the ``io.RawIOBase`` interface. Only methods |
|
725 | Returned instances implement the ``io.RawIOBase`` interface. Only methods | |
726 | that involve writing will do useful things. |
|
726 | that involve writing will do useful things. | |
727 |
|
727 | |||
728 | The argument to ``stream_writer()`` is typically an object that also implements |
|
728 | The argument to ``stream_writer()`` is typically an object that also implements | |
729 | ``io.RawIOBase``. But any object with a ``write(data)`` method will work. Many |
|
729 | ``io.RawIOBase``. But any object with a ``write(data)`` method will work. Many | |
730 | common Python types conform to this interface, including open file handles |
|
730 | common Python types conform to this interface, including open file handles | |
731 | and ``io.BytesIO``. |
|
731 | and ``io.BytesIO``. | |
732 |
|
732 | |||
733 | Behavior is similar to ``ZstdCompressor.stream_writer()``: compressed data |
|
733 | Behavior is similar to ``ZstdCompressor.stream_writer()``: compressed data | |
734 | is sent to the decompressor by calling ``write(data)`` and decompressed |
|
734 | is sent to the decompressor by calling ``write(data)`` and decompressed | |
735 | output is written to the underlying stream by calling its ``write(data)`` |
|
735 | output is written to the underlying stream by calling its ``write(data)`` | |
736 | method.:: |
|
736 | method.:: | |
737 |
|
737 | |||
738 | dctx = zstd.ZstdDecompressor() |
|
738 | dctx = zstd.ZstdDecompressor() | |
739 | decompressor = dctx.stream_writer(fh) |
|
739 | decompressor = dctx.stream_writer(fh) | |
740 |
|
740 | |||
741 | decompressor.write(compressed_data) |
|
741 | decompressor.write(compressed_data) | |
742 | ... |
|
742 | ... | |
743 |
|
743 | |||
744 |
|
744 | |||
745 | Calls to ``write()`` will return the number of bytes written to the output |
|
745 | Calls to ``write()`` will return the number of bytes written to the output | |
746 | object. Not all inputs will result in bytes being written, so return values |
|
746 | object. Not all inputs will result in bytes being written, so return values | |
747 | of ``0`` are possible. |
|
747 | of ``0`` are possible. | |
748 |
|
748 | |||
749 | Like the ``stream_writer()`` compressor, instances can be used as context |
|
749 | Like the ``stream_writer()`` compressor, instances can be used as context | |
750 | managers. However, context managers add no extra special behavior and offer |
|
750 | managers. However, context managers add no extra special behavior and offer | |
751 | little to no benefit to being used. |
|
751 | little to no benefit to being used. | |
752 |
|
752 | |||
753 | Calling ``close()`` will mark the stream as closed and subsequent I/O operations |
|
753 | Calling ``close()`` will mark the stream as closed and subsequent I/O operations | |
754 | will raise ``ValueError`` (per the documented behavior of ``io.RawIOBase``). |
|
754 | will raise ``ValueError`` (per the documented behavior of ``io.RawIOBase``). | |
755 | ``close()`` will also call ``close()`` on the underlying stream if such a |
|
755 | ``close()`` will also call ``close()`` on the underlying stream if such a | |
756 | method exists. |
|
756 | method exists. | |
757 |
|
757 | |||
758 | The size of chunks being ``write()`` to the destination can be specified:: |
|
758 | The size of chunks being ``write()`` to the destination can be specified:: | |
759 |
|
759 | |||
760 | dctx = zstd.ZstdDecompressor() |
|
760 | dctx = zstd.ZstdDecompressor() | |
761 | with dctx.stream_writer(fh, write_size=16384) as decompressor: |
|
761 | with dctx.stream_writer(fh, write_size=16384) as decompressor: | |
762 | pass |
|
762 | pass | |
763 |
|
763 | |||
764 | You can see how much memory is being used by the decompressor:: |
|
764 | You can see how much memory is being used by the decompressor:: | |
765 |
|
765 | |||
766 | dctx = zstd.ZstdDecompressor() |
|
766 | dctx = zstd.ZstdDecompressor() | |
767 | with dctx.stream_writer(fh) as decompressor: |
|
767 | with dctx.stream_writer(fh) as decompressor: | |
768 | byte_size = decompressor.memory_size() |
|
768 | byte_size = decompressor.memory_size() | |
769 |
|
769 | |||
770 | ``stream_writer()`` accepts a ``write_return_read`` boolean argument to control |
|
770 | ``stream_writer()`` accepts a ``write_return_read`` boolean argument to control | |
771 | the return value of ``write()``. When ``False`` (the default)``, ``write()`` |
|
771 | the return value of ``write()``. When ``False`` (the default)``, ``write()`` | |
772 | returns the number of bytes that were ``write()``en to the underlying stream. |
|
772 | returns the number of bytes that were ``write()``en to the underlying stream. | |
773 | When ``True``, ``write()`` returns the number of bytes read from the input. |
|
773 | When ``True``, ``write()`` returns the number of bytes read from the input. | |
774 | ``True`` is the *proper* behavior for ``write()`` as specified by the |
|
774 | ``True`` is the *proper* behavior for ``write()`` as specified by the | |
775 | ``io.RawIOBase`` interface and will become the default in a future release. |
|
775 | ``io.RawIOBase`` interface and will become the default in a future release. | |
776 |
|
776 | |||
777 | Streaming Output API |
|
777 | Streaming Output API | |
778 | ^^^^^^^^^^^^^^^^^^^^ |
|
778 | ^^^^^^^^^^^^^^^^^^^^ | |
779 |
|
779 | |||
780 | ``read_to_iter(fh)`` provides a mechanism to stream decompressed data out of a |
|
780 | ``read_to_iter(fh)`` provides a mechanism to stream decompressed data out of a | |
781 | compressed source as an iterator of data chunks.:: |
|
781 | compressed source as an iterator of data chunks.:: | |
782 |
|
782 | |||
783 | dctx = zstd.ZstdDecompressor() |
|
783 | dctx = zstd.ZstdDecompressor() | |
784 | for chunk in dctx.read_to_iter(fh): |
|
784 | for chunk in dctx.read_to_iter(fh): | |
785 | # Do something with original data. |
|
785 | # Do something with original data. | |
786 |
|
786 | |||
787 | ``read_to_iter()`` accepts an object with a ``read(size)`` method that will |
|
787 | ``read_to_iter()`` accepts an object with a ``read(size)`` method that will | |
788 | return compressed bytes or an object conforming to the buffer protocol that |
|
788 | return compressed bytes or an object conforming to the buffer protocol that | |
789 | can expose its data as a contiguous range of bytes. |
|
789 | can expose its data as a contiguous range of bytes. | |
790 |
|
790 | |||
791 | ``read_to_iter()`` returns an iterator whose elements are chunks of the |
|
791 | ``read_to_iter()`` returns an iterator whose elements are chunks of the | |
792 | decompressed data. |
|
792 | decompressed data. | |
793 |
|
793 | |||
794 | The size of requested ``read()`` from the source can be specified:: |
|
794 | The size of requested ``read()`` from the source can be specified:: | |
795 |
|
795 | |||
796 | dctx = zstd.ZstdDecompressor() |
|
796 | dctx = zstd.ZstdDecompressor() | |
797 | for chunk in dctx.read_to_iter(fh, read_size=16384): |
|
797 | for chunk in dctx.read_to_iter(fh, read_size=16384): | |
798 | pass |
|
798 | pass | |
799 |
|
799 | |||
800 | It is also possible to skip leading bytes in the input data:: |
|
800 | It is also possible to skip leading bytes in the input data:: | |
801 |
|
801 | |||
802 | dctx = zstd.ZstdDecompressor() |
|
802 | dctx = zstd.ZstdDecompressor() | |
803 | for chunk in dctx.read_to_iter(fh, skip_bytes=1): |
|
803 | for chunk in dctx.read_to_iter(fh, skip_bytes=1): | |
804 | pass |
|
804 | pass | |
805 |
|
805 | |||
806 | .. tip:: |
|
806 | .. tip:: | |
807 |
|
807 | |||
808 | Skipping leading bytes is useful if the source data contains extra |
|
808 | Skipping leading bytes is useful if the source data contains extra | |
809 | *header* data. Traditionally, you would need to create a slice or |
|
809 | *header* data. Traditionally, you would need to create a slice or | |
810 | ``memoryview`` of the data you want to decompress. This would create |
|
810 | ``memoryview`` of the data you want to decompress. This would create | |
811 | overhead. It is more efficient to pass the offset into this API. |
|
811 | overhead. It is more efficient to pass the offset into this API. | |
812 |
|
812 | |||
813 | Similarly to ``ZstdCompressor.read_to_iter()``, the consumer of the iterator |
|
813 | Similarly to ``ZstdCompressor.read_to_iter()``, the consumer of the iterator | |
814 | controls when data is decompressed. If the iterator isn't consumed, |
|
814 | controls when data is decompressed. If the iterator isn't consumed, | |
815 | decompression is put on hold. |
|
815 | decompression is put on hold. | |
816 |
|
816 | |||
817 | When ``read_to_iter()`` is passed an object conforming to the buffer protocol, |
|
817 | When ``read_to_iter()`` is passed an object conforming to the buffer protocol, | |
818 | the behavior may seem similar to what occurs when the simple decompression |
|
818 | the behavior may seem similar to what occurs when the simple decompression | |
819 | API is used. However, this API works when the decompressed size is unknown. |
|
819 | API is used. However, this API works when the decompressed size is unknown. | |
820 | Furthermore, if feeding large inputs, the decompressor will work in chunks |
|
820 | Furthermore, if feeding large inputs, the decompressor will work in chunks | |
821 | instead of performing a single operation. |
|
821 | instead of performing a single operation. | |
822 |
|
822 | |||
823 | Stream Copying API |
|
823 | Stream Copying API | |
824 | ^^^^^^^^^^^^^^^^^^ |
|
824 | ^^^^^^^^^^^^^^^^^^ | |
825 |
|
825 | |||
826 | ``copy_stream(ifh, ofh)`` can be used to copy data across 2 streams while |
|
826 | ``copy_stream(ifh, ofh)`` can be used to copy data across 2 streams while | |
827 | performing decompression.:: |
|
827 | performing decompression.:: | |
828 |
|
828 | |||
829 | dctx = zstd.ZstdDecompressor() |
|
829 | dctx = zstd.ZstdDecompressor() | |
830 | dctx.copy_stream(ifh, ofh) |
|
830 | dctx.copy_stream(ifh, ofh) | |
831 |
|
831 | |||
832 | e.g. to decompress a file to another file:: |
|
832 | e.g. to decompress a file to another file:: | |
833 |
|
833 | |||
834 | dctx = zstd.ZstdDecompressor() |
|
834 | dctx = zstd.ZstdDecompressor() | |
835 | with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh: |
|
835 | with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh: | |
836 | dctx.copy_stream(ifh, ofh) |
|
836 | dctx.copy_stream(ifh, ofh) | |
837 |
|
837 | |||
838 | The size of chunks being ``read()`` and ``write()`` from and to the streams |
|
838 | The size of chunks being ``read()`` and ``write()`` from and to the streams | |
839 | can be specified:: |
|
839 | can be specified:: | |
840 |
|
840 | |||
841 | dctx = zstd.ZstdDecompressor() |
|
841 | dctx = zstd.ZstdDecompressor() | |
842 | dctx.copy_stream(ifh, ofh, read_size=8192, write_size=16384) |
|
842 | dctx.copy_stream(ifh, ofh, read_size=8192, write_size=16384) | |
843 |
|
843 | |||
844 | Decompressor API |
|
844 | Decompressor API | |
845 | ^^^^^^^^^^^^^^^^ |
|
845 | ^^^^^^^^^^^^^^^^ | |
846 |
|
846 | |||
847 | ``decompressobj()`` returns an object that exposes a ``decompress(data)`` |
|
847 | ``decompressobj()`` returns an object that exposes a ``decompress(data)`` | |
848 | method. Compressed data chunks are fed into ``decompress(data)`` and |
|
848 | method. Compressed data chunks are fed into ``decompress(data)`` and | |
849 | uncompressed output (or an empty bytes) is returned. Output from subsequent |
|
849 | uncompressed output (or an empty bytes) is returned. Output from subsequent | |
850 | calls needs to be concatenated to reassemble the full decompressed byte |
|
850 | calls needs to be concatenated to reassemble the full decompressed byte | |
851 | sequence. |
|
851 | sequence. | |
852 |
|
852 | |||
853 | The purpose of ``decompressobj()`` is to provide an API-compatible interface |
|
853 | The purpose of ``decompressobj()`` is to provide an API-compatible interface | |
854 | with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor``. This allows callers |
|
854 | with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor``. This allows callers | |
855 | to swap in different decompressor objects while using the same API. |
|
855 | to swap in different decompressor objects while using the same API. | |
856 |
|
856 | |||
857 | Each object is single use: once an input frame is decoded, ``decompress()`` |
|
857 | Each object is single use: once an input frame is decoded, ``decompress()`` | |
858 | can no longer be called. |
|
858 | can no longer be called. | |
859 |
|
859 | |||
860 | Here is how this API should be used:: |
|
860 | Here is how this API should be used:: | |
861 |
|
861 | |||
862 | dctx = zstd.ZstdDecompressor() |
|
862 | dctx = zstd.ZstdDecompressor() | |
863 | dobj = dctx.decompressobj() |
|
863 | dobj = dctx.decompressobj() | |
864 | data = dobj.decompress(compressed_chunk_0) |
|
864 | data = dobj.decompress(compressed_chunk_0) | |
865 | data = dobj.decompress(compressed_chunk_1) |
|
865 | data = dobj.decompress(compressed_chunk_1) | |
866 |
|
866 | |||
867 | By default, calls to ``decompress()`` write output data in chunks of size |
|
867 | By default, calls to ``decompress()`` write output data in chunks of size | |
868 | ``DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE``. These chunks are concatenated |
|
868 | ``DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE``. These chunks are concatenated | |
869 | before being returned to the caller. It is possible to define the size of |
|
869 | before being returned to the caller. It is possible to define the size of | |
870 | these temporary chunks by passing ``write_size`` to ``decompressobj()``:: |
|
870 | these temporary chunks by passing ``write_size`` to ``decompressobj()``:: | |
871 |
|
871 | |||
872 | dctx = zstd.ZstdDecompressor() |
|
872 | dctx = zstd.ZstdDecompressor() | |
873 | dobj = dctx.decompressobj(write_size=1048576) |
|
873 | dobj = dctx.decompressobj(write_size=1048576) | |
874 |
|
874 | |||
875 | .. note:: |
|
875 | .. note:: | |
876 |
|
876 | |||
877 | Because calls to ``decompress()`` may need to perform multiple |
|
877 | Because calls to ``decompress()`` may need to perform multiple | |
878 | memory (re)allocations, this streaming decompression API isn't as |
|
878 | memory (re)allocations, this streaming decompression API isn't as | |
879 | efficient as other APIs. |
|
879 | efficient as other APIs. | |
880 |
|
880 | |||
881 | For compatibility with the standard library APIs, instances expose a |
|
881 | For compatibility with the standard library APIs, instances expose a | |
882 | ``flush([length=None])`` method. This method no-ops and has no meaningful |
|
882 | ``flush([length=None])`` method. This method no-ops and has no meaningful | |
883 | side-effects, making it safe to call any time. |
|
883 | side-effects, making it safe to call any time. | |
884 |
|
884 | |||
885 | Batch Decompression API |
|
885 | Batch Decompression API | |
886 | ^^^^^^^^^^^^^^^^^^^^^^^ |
|
886 | ^^^^^^^^^^^^^^^^^^^^^^^ | |
887 |
|
887 | |||
888 | (Experimental. Not yet supported in CFFI bindings.) |
|
888 | (Experimental. Not yet supported in CFFI bindings.) | |
889 |
|
889 | |||
890 | ``multi_decompress_to_buffer()`` performs decompression of multiple |
|
890 | ``multi_decompress_to_buffer()`` performs decompression of multiple | |
891 | frames as a single operation and returns a ``BufferWithSegmentsCollection`` |
|
891 | frames as a single operation and returns a ``BufferWithSegmentsCollection`` | |
892 | containing decompressed data for all inputs. |
|
892 | containing decompressed data for all inputs. | |
893 |
|
893 | |||
894 | Compressed frames can be passed to the function as a ``BufferWithSegments``, |
|
894 | Compressed frames can be passed to the function as a ``BufferWithSegments``, | |
895 | a ``BufferWithSegmentsCollection``, or as a list containing objects that |
|
895 | a ``BufferWithSegmentsCollection``, or as a list containing objects that | |
896 | conform to the buffer protocol. For best performance, pass a |
|
896 | conform to the buffer protocol. For best performance, pass a | |
897 | ``BufferWithSegmentsCollection`` or a ``BufferWithSegments``, as |
|
897 | ``BufferWithSegmentsCollection`` or a ``BufferWithSegments``, as | |
898 | minimal input validation will be done for that type. If calling from |
|
898 | minimal input validation will be done for that type. If calling from | |
899 | Python (as opposed to C), constructing one of these instances may add |
|
899 | Python (as opposed to C), constructing one of these instances may add | |
900 | overhead cancelling out the performance overhead of validation for list |
|
900 | overhead cancelling out the performance overhead of validation for list | |
901 | inputs.:: |
|
901 | inputs.:: | |
902 |
|
902 | |||
903 | dctx = zstd.ZstdDecompressor() |
|
903 | dctx = zstd.ZstdDecompressor() | |
904 | results = dctx.multi_decompress_to_buffer([b'...', b'...']) |
|
904 | results = dctx.multi_decompress_to_buffer([b'...', b'...']) | |
905 |
|
905 | |||
906 | The decompressed size of each frame MUST be discoverable. It can either be |
|
906 | The decompressed size of each frame MUST be discoverable. It can either be | |
907 | embedded within the zstd frame (``write_content_size=True`` argument to |
|
907 | embedded within the zstd frame (``write_content_size=True`` argument to | |
908 | ``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument. |
|
908 | ``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument. | |
909 |
|
909 | |||
910 | The ``decompressed_sizes`` argument is an object conforming to the buffer |
|
910 | The ``decompressed_sizes`` argument is an object conforming to the buffer | |
911 | protocol which holds an array of 64-bit unsigned integers in the machine's |
|
911 | protocol which holds an array of 64-bit unsigned integers in the machine's | |
912 | native format defining the decompressed sizes of each frame. If this argument |
|
912 | native format defining the decompressed sizes of each frame. If this argument | |
913 | is passed, it avoids having to scan each frame for its decompressed size. |
|
913 | is passed, it avoids having to scan each frame for its decompressed size. | |
914 | This frame scanning can add noticeable overhead in some scenarios.:: |
|
914 | This frame scanning can add noticeable overhead in some scenarios.:: | |
915 |
|
915 | |||
916 | frames = [...] |
|
916 | frames = [...] | |
917 | sizes = struct.pack('=QQQQ', len0, len1, len2, len3) |
|
917 | sizes = struct.pack('=QQQQ', len0, len1, len2, len3) | |
918 |
|
918 | |||
919 | dctx = zstd.ZstdDecompressor() |
|
919 | dctx = zstd.ZstdDecompressor() | |
920 | results = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes) |
|
920 | results = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes) | |
921 |
|
921 | |||
922 | The ``threads`` argument controls the number of threads to use to perform |
|
922 | The ``threads`` argument controls the number of threads to use to perform | |
923 | decompression operations. The default (``0``) or the value ``1`` means to |
|
923 | decompression operations. The default (``0``) or the value ``1`` means to | |
924 | use a single thread. Negative values use the number of logical CPUs in the |
|
924 | use a single thread. Negative values use the number of logical CPUs in the | |
925 | machine. |
|
925 | machine. | |
926 |
|
926 | |||
927 | .. note:: |
|
927 | .. note:: | |
928 |
|
928 | |||
929 | It is possible to pass a ``mmap.mmap()`` instance into this function by |
|
929 | It is possible to pass a ``mmap.mmap()`` instance into this function by | |
930 | wrapping it with a ``BufferWithSegments`` instance (which will define the |
|
930 | wrapping it with a ``BufferWithSegments`` instance (which will define the | |
931 | offsets of frames within the memory mapped region). |
|
931 | offsets of frames within the memory mapped region). | |
932 |
|
932 | |||
933 | This function is logically equivalent to performing ``dctx.decompress()`` |
|
933 | This function is logically equivalent to performing ``dctx.decompress()`` | |
934 | on each input frame and returning the result. |
|
934 | on each input frame and returning the result. | |
935 |
|
935 | |||
936 | This function exists to perform decompression on multiple frames as fast |
|
936 | This function exists to perform decompression on multiple frames as fast | |
937 | as possible by having as little overhead as possible. Since decompression is |
|
937 | as possible by having as little overhead as possible. Since decompression is | |
938 | performed as a single operation and since the decompressed output is stored in |
|
938 | performed as a single operation and since the decompressed output is stored in | |
939 | a single buffer, extra memory allocations, Python objects, and Python function |
|
939 | a single buffer, extra memory allocations, Python objects, and Python function | |
940 | calls are avoided. This is ideal for scenarios where callers know up front that |
|
940 | calls are avoided. This is ideal for scenarios where callers know up front that | |
941 | they need to access data for multiple frames, such as when *delta chains* are |
|
941 | they need to access data for multiple frames, such as when *delta chains* are | |
942 | being used. |
|
942 | being used. | |
943 |
|
943 | |||
944 | Currently, the implementation always spawns multiple threads when requested, |
|
944 | Currently, the implementation always spawns multiple threads when requested, | |
945 | even if the amount of work to do is small. In the future, it will be smarter |
|
945 | even if the amount of work to do is small. In the future, it will be smarter | |
946 | about avoiding threads and their associated overhead when the amount of |
|
946 | about avoiding threads and their associated overhead when the amount of | |
947 | work to do is small. |
|
947 | work to do is small. | |
948 |
|
948 | |||
949 | Prefix Dictionary Chain Decompression |
|
949 | Prefix Dictionary Chain Decompression | |
950 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
950 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
951 |
|
951 | |||
952 | ``decompress_content_dict_chain(frames)`` performs decompression of a list of |
|
952 | ``decompress_content_dict_chain(frames)`` performs decompression of a list of | |
953 | zstd frames produced using chained *prefix* dictionary compression. Such |
|
953 | zstd frames produced using chained *prefix* dictionary compression. Such | |
954 | a list of frames is produced by compressing discrete inputs where each |
|
954 | a list of frames is produced by compressing discrete inputs where each | |
955 | non-initial input is compressed with a *prefix* dictionary consisting of the |
|
955 | non-initial input is compressed with a *prefix* dictionary consisting of the | |
956 | content of the previous input. |
|
956 | content of the previous input. | |
957 |
|
957 | |||
958 | For example, say you have the following inputs:: |
|
958 | For example, say you have the following inputs:: | |
959 |
|
959 | |||
960 | inputs = [b'input 1', b'input 2', b'input 3'] |
|
960 | inputs = [b'input 1', b'input 2', b'input 3'] | |
961 |
|
961 | |||
962 | The zstd frame chain consists of: |
|
962 | The zstd frame chain consists of: | |
963 |
|
963 | |||
964 | 1. ``b'input 1'`` compressed in standalone/discrete mode |
|
964 | 1. ``b'input 1'`` compressed in standalone/discrete mode | |
965 | 2. ``b'input 2'`` compressed using ``b'input 1'`` as a *prefix* dictionary |
|
965 | 2. ``b'input 2'`` compressed using ``b'input 1'`` as a *prefix* dictionary | |
966 | 3. ``b'input 3'`` compressed using ``b'input 2'`` as a *prefix* dictionary |
|
966 | 3. ``b'input 3'`` compressed using ``b'input 2'`` as a *prefix* dictionary | |
967 |
|
967 | |||
968 | Each zstd frame **must** have the content size written. |
|
968 | Each zstd frame **must** have the content size written. | |
969 |
|
969 | |||
970 | The following Python code can be used to produce a *prefix dictionary chain*:: |
|
970 | The following Python code can be used to produce a *prefix dictionary chain*:: | |
971 |
|
971 | |||
972 | def make_chain(inputs): |
|
972 | def make_chain(inputs): | |
973 | frames = [] |
|
973 | frames = [] | |
974 |
|
974 | |||
975 | # First frame is compressed in standalone/discrete mode. |
|
975 | # First frame is compressed in standalone/discrete mode. | |
976 | zctx = zstd.ZstdCompressor() |
|
976 | zctx = zstd.ZstdCompressor() | |
977 | frames.append(zctx.compress(inputs[0])) |
|
977 | frames.append(zctx.compress(inputs[0])) | |
978 |
|
978 | |||
979 | # Subsequent frames use the previous fulltext as a prefix dictionary |
|
979 | # Subsequent frames use the previous fulltext as a prefix dictionary | |
980 | for i, raw in enumerate(inputs[1:]): |
|
980 | for i, raw in enumerate(inputs[1:]): | |
981 | dict_data = zstd.ZstdCompressionDict( |
|
981 | dict_data = zstd.ZstdCompressionDict( | |
982 | inputs[i], dict_type=zstd.DICT_TYPE_RAWCONTENT) |
|
982 | inputs[i], dict_type=zstd.DICT_TYPE_RAWCONTENT) | |
983 | zctx = zstd.ZstdCompressor(dict_data=dict_data) |
|
983 | zctx = zstd.ZstdCompressor(dict_data=dict_data) | |
984 | frames.append(zctx.compress(raw)) |
|
984 | frames.append(zctx.compress(raw)) | |
985 |
|
985 | |||
986 | return frames |
|
986 | return frames | |
987 |
|
987 | |||
988 | ``decompress_content_dict_chain()`` returns the uncompressed data of the last |
|
988 | ``decompress_content_dict_chain()`` returns the uncompressed data of the last | |
989 | element in the input chain. |
|
989 | element in the input chain. | |
990 |
|
990 | |||
991 |
|
991 | |||
992 | .. note:: |
|
992 | .. note:: | |
993 |
|
993 | |||
994 | It is possible to implement *prefix dictionary chain* decompression |
|
994 | It is possible to implement *prefix dictionary chain* decompression | |
995 | on top of other APIs. However, this function will likely be faster - |
|
995 | on top of other APIs. However, this function will likely be faster - | |
996 | especially for long input chains - as it avoids the overhead of instantiating |
|
996 | especially for long input chains - as it avoids the overhead of instantiating | |
997 | and passing around intermediate objects between C and Python. |
|
997 | and passing around intermediate objects between C and Python. | |
998 |
|
998 | |||
999 | Multi-Threaded Compression |
|
999 | Multi-Threaded Compression | |
1000 | -------------------------- |
|
1000 | -------------------------- | |
1001 |
|
1001 | |||
1002 | ``ZstdCompressor`` accepts a ``threads`` argument that controls the number |
|
1002 | ``ZstdCompressor`` accepts a ``threads`` argument that controls the number | |
1003 | of threads to use for compression. The way this works is that input is split |
|
1003 | of threads to use for compression. The way this works is that input is split | |
1004 | into segments and each segment is fed into a worker pool for compression. Once |
|
1004 | into segments and each segment is fed into a worker pool for compression. Once | |
1005 | a segment is compressed, it is flushed/appended to the output. |
|
1005 | a segment is compressed, it is flushed/appended to the output. | |
1006 |
|
1006 | |||
1007 | .. note:: |
|
1007 | .. note:: | |
1008 |
|
1008 | |||
1009 | These threads are created at the C layer and are not Python threads. So they |
|
1009 | These threads are created at the C layer and are not Python threads. So they | |
1010 | work outside the GIL. It is therefore possible to CPU saturate multiple cores |
|
1010 | work outside the GIL. It is therefore possible to CPU saturate multiple cores | |
1011 | from Python. |
|
1011 | from Python. | |
1012 |
|
1012 | |||
1013 | The segment size for multi-threaded compression is chosen from the window size |
|
1013 | The segment size for multi-threaded compression is chosen from the window size | |
1014 | of the compressor. This is derived from the ``window_log`` attribute of a |
|
1014 | of the compressor. This is derived from the ``window_log`` attribute of a | |
1015 | ``ZstdCompressionParameters`` instance. By default, segment sizes are in the 1+MB |
|
1015 | ``ZstdCompressionParameters`` instance. By default, segment sizes are in the 1+MB | |
1016 | range. |
|
1016 | range. | |
1017 |
|
1017 | |||
1018 | If multi-threaded compression is requested and the input is smaller than the |
|
1018 | If multi-threaded compression is requested and the input is smaller than the | |
1019 | configured segment size, only a single compression thread will be used. If the |
|
1019 | configured segment size, only a single compression thread will be used. If the | |
1020 | input is smaller than the segment size multiplied by the thread pool size or |
|
1020 | input is smaller than the segment size multiplied by the thread pool size or | |
1021 | if data cannot be delivered to the compressor fast enough, not all requested |
|
1021 | if data cannot be delivered to the compressor fast enough, not all requested | |
1022 | compressor threads may be active simultaneously. |
|
1022 | compressor threads may be active simultaneously. | |
1023 |
|
1023 | |||
1024 | Compared to non-multi-threaded compression, multi-threaded compression has |
|
1024 | Compared to non-multi-threaded compression, multi-threaded compression has | |
1025 | higher per-operation overhead. This includes extra memory operations, |
|
1025 | higher per-operation overhead. This includes extra memory operations, | |
1026 | thread creation, lock acquisition, etc. |
|
1026 | thread creation, lock acquisition, etc. | |
1027 |
|
1027 | |||
1028 | Due to the nature of multi-threaded compression using *N* compression |
|
1028 | Due to the nature of multi-threaded compression using *N* compression | |
1029 | *states*, the output from multi-threaded compression will likely be larger |
|
1029 | *states*, the output from multi-threaded compression will likely be larger | |
1030 | than non-multi-threaded compression. The difference is usually small. But |
|
1030 | than non-multi-threaded compression. The difference is usually small. But | |
1031 | there is a CPU/wall time versus size trade off that may warrant investigation. |
|
1031 | there is a CPU/wall time versus size trade off that may warrant investigation. | |
1032 |
|
1032 | |||
1033 | Output from multi-threaded compression does not require any special handling |
|
1033 | Output from multi-threaded compression does not require any special handling | |
1034 | on the decompression side. To the decompressor, data generated with single |
|
1034 | on the decompression side. To the decompressor, data generated with single | |
1035 | threaded compressor looks the same as data generated by a multi-threaded |
|
1035 | threaded compressor looks the same as data generated by a multi-threaded | |
1036 | compressor and does not require any special handling or additional resource |
|
1036 | compressor and does not require any special handling or additional resource | |
1037 | requirements. |
|
1037 | requirements. | |
1038 |
|
1038 | |||
1039 | Dictionary Creation and Management |
|
1039 | Dictionary Creation and Management | |
1040 | ---------------------------------- |
|
1040 | ---------------------------------- | |
1041 |
|
1041 | |||
1042 | Compression dictionaries are represented with the ``ZstdCompressionDict`` type. |
|
1042 | Compression dictionaries are represented with the ``ZstdCompressionDict`` type. | |
1043 |
|
1043 | |||
1044 | Instances can be constructed from bytes:: |
|
1044 | Instances can be constructed from bytes:: | |
1045 |
|
1045 | |||
1046 | dict_data = zstd.ZstdCompressionDict(data) |
|
1046 | dict_data = zstd.ZstdCompressionDict(data) | |
1047 |
|
1047 | |||
1048 | It is possible to construct a dictionary from *any* data. If the data doesn't |
|
1048 | It is possible to construct a dictionary from *any* data. If the data doesn't | |
1049 | begin with a magic header, it will be treated as a *prefix* dictionary. |
|
1049 | begin with a magic header, it will be treated as a *prefix* dictionary. | |
1050 | *Prefix* dictionaries allow compression operations to reference raw data |
|
1050 | *Prefix* dictionaries allow compression operations to reference raw data | |
1051 | within the dictionary. |
|
1051 | within the dictionary. | |
1052 |
|
1052 | |||
1053 | It is possible to force the use of *prefix* dictionaries or to require a |
|
1053 | It is possible to force the use of *prefix* dictionaries or to require a | |
1054 | dictionary header: |
|
1054 | dictionary header: | |
1055 |
|
1055 | |||
1056 | dict_data = zstd.ZstdCompressionDict(data, |
|
1056 | dict_data = zstd.ZstdCompressionDict(data, | |
1057 | dict_type=zstd.DICT_TYPE_RAWCONTENT) |
|
1057 | dict_type=zstd.DICT_TYPE_RAWCONTENT) | |
1058 |
|
1058 | |||
1059 | dict_data = zstd.ZstdCompressionDict(data, |
|
1059 | dict_data = zstd.ZstdCompressionDict(data, | |
1060 | dict_type=zstd.DICT_TYPE_FULLDICT) |
|
1060 | dict_type=zstd.DICT_TYPE_FULLDICT) | |
1061 |
|
1061 | |||
1062 | You can see how many bytes are in the dictionary by calling ``len()``:: |
|
1062 | You can see how many bytes are in the dictionary by calling ``len()``:: | |
1063 |
|
1063 | |||
1064 | dict_data = zstd.train_dictionary(size, samples) |
|
1064 | dict_data = zstd.train_dictionary(size, samples) | |
1065 | dict_size = len(dict_data) # will not be larger than ``size`` |
|
1065 | dict_size = len(dict_data) # will not be larger than ``size`` | |
1066 |
|
1066 | |||
1067 | Once you have a dictionary, you can pass it to the objects performing |
|
1067 | Once you have a dictionary, you can pass it to the objects performing | |
1068 | compression and decompression:: |
|
1068 | compression and decompression:: | |
1069 |
|
1069 | |||
1070 | dict_data = zstd.train_dictionary(131072, samples) |
|
1070 | dict_data = zstd.train_dictionary(131072, samples) | |
1071 |
|
1071 | |||
1072 | cctx = zstd.ZstdCompressor(dict_data=dict_data) |
|
1072 | cctx = zstd.ZstdCompressor(dict_data=dict_data) | |
1073 | for source_data in input_data: |
|
1073 | for source_data in input_data: | |
1074 | compressed = cctx.compress(source_data) |
|
1074 | compressed = cctx.compress(source_data) | |
1075 | # Do something with compressed data. |
|
1075 | # Do something with compressed data. | |
1076 |
|
1076 | |||
1077 | dctx = zstd.ZstdDecompressor(dict_data=dict_data) |
|
1077 | dctx = zstd.ZstdDecompressor(dict_data=dict_data) | |
1078 | for compressed_data in input_data: |
|
1078 | for compressed_data in input_data: | |
1079 | buffer = io.BytesIO() |
|
1079 | buffer = io.BytesIO() | |
1080 | with dctx.stream_writer(buffer) as decompressor: |
|
1080 | with dctx.stream_writer(buffer) as decompressor: | |
1081 | decompressor.write(compressed_data) |
|
1081 | decompressor.write(compressed_data) | |
1082 | # Do something with raw data in ``buffer``. |
|
1082 | # Do something with raw data in ``buffer``. | |
1083 |
|
1083 | |||
1084 | Dictionaries have unique integer IDs. You can retrieve this ID via:: |
|
1084 | Dictionaries have unique integer IDs. You can retrieve this ID via:: | |
1085 |
|
1085 | |||
1086 | dict_id = zstd.dictionary_id(dict_data) |
|
1086 | dict_id = zstd.dictionary_id(dict_data) | |
1087 |
|
1087 | |||
1088 | You can obtain the raw data in the dict (useful for persisting and constructing |
|
1088 | You can obtain the raw data in the dict (useful for persisting and constructing | |
1089 | a ``ZstdCompressionDict`` later) via ``as_bytes()``:: |
|
1089 | a ``ZstdCompressionDict`` later) via ``as_bytes()``:: | |
1090 |
|
1090 | |||
1091 | dict_data = zstd.train_dictionary(size, samples) |
|
1091 | dict_data = zstd.train_dictionary(size, samples) | |
1092 | raw_data = dict_data.as_bytes() |
|
1092 | raw_data = dict_data.as_bytes() | |
1093 |
|
1093 | |||
1094 | By default, when a ``ZstdCompressionDict`` is *attached* to a |
|
1094 | By default, when a ``ZstdCompressionDict`` is *attached* to a | |
1095 | ``ZstdCompressor``, each ``ZstdCompressor`` performs work to prepare the |
|
1095 | ``ZstdCompressor``, each ``ZstdCompressor`` performs work to prepare the | |
1096 | dictionary for use. This is fine if only 1 compression operation is being |
|
1096 | dictionary for use. This is fine if only 1 compression operation is being | |
1097 | performed or if the ``ZstdCompressor`` is being reused for multiple operations. |
|
1097 | performed or if the ``ZstdCompressor`` is being reused for multiple operations. | |
1098 | But if multiple ``ZstdCompressor`` instances are being used with the dictionary, |
|
1098 | But if multiple ``ZstdCompressor`` instances are being used with the dictionary, | |
1099 | this can add overhead. |
|
1099 | this can add overhead. | |
1100 |
|
1100 | |||
1101 | It is possible to *precompute* the dictionary so it can readily be consumed |
|
1101 | It is possible to *precompute* the dictionary so it can readily be consumed | |
1102 | by multiple ``ZstdCompressor`` instances:: |
|
1102 | by multiple ``ZstdCompressor`` instances:: | |
1103 |
|
1103 | |||
1104 | d = zstd.ZstdCompressionDict(data) |
|
1104 | d = zstd.ZstdCompressionDict(data) | |
1105 |
|
1105 | |||
1106 | # Precompute for compression level 3. |
|
1106 | # Precompute for compression level 3. | |
1107 | d.precompute_compress(level=3) |
|
1107 | d.precompute_compress(level=3) | |
1108 |
|
1108 | |||
1109 | # Precompute with specific compression parameters. |
|
1109 | # Precompute with specific compression parameters. | |
1110 | params = zstd.ZstdCompressionParameters(...) |
|
1110 | params = zstd.ZstdCompressionParameters(...) | |
1111 | d.precompute_compress(compression_params=params) |
|
1111 | d.precompute_compress(compression_params=params) | |
1112 |
|
1112 | |||
1113 | .. note:: |
|
1113 | .. note:: | |
1114 |
|
1114 | |||
1115 | When a dictionary is precomputed, the compression parameters used to |
|
1115 | When a dictionary is precomputed, the compression parameters used to | |
1116 | precompute the dictionary overwrite some of the compression parameters |
|
1116 | precompute the dictionary overwrite some of the compression parameters | |
1117 | specified to ``ZstdCompressor.__init__``. |
|
1117 | specified to ``ZstdCompressor.__init__``. | |
1118 |
|
1118 | |||
1119 | Training Dictionaries |
|
1119 | Training Dictionaries | |
1120 | ^^^^^^^^^^^^^^^^^^^^^ |
|
1120 | ^^^^^^^^^^^^^^^^^^^^^ | |
1121 |
|
1121 | |||
1122 | Unless using *prefix* dictionaries, dictionary data is produced by *training* |
|
1122 | Unless using *prefix* dictionaries, dictionary data is produced by *training* | |
1123 | on existing data:: |
|
1123 | on existing data:: | |
1124 |
|
1124 | |||
1125 | dict_data = zstd.train_dictionary(size, samples) |
|
1125 | dict_data = zstd.train_dictionary(size, samples) | |
1126 |
|
1126 | |||
1127 | This takes a target dictionary size and list of bytes instances and creates and |
|
1127 | This takes a target dictionary size and list of bytes instances and creates and | |
1128 | returns a ``ZstdCompressionDict``. |
|
1128 | returns a ``ZstdCompressionDict``. | |
1129 |
|
1129 | |||
1130 | The dictionary training mechanism is known as *cover*. More details about it are |
|
1130 | The dictionary training mechanism is known as *cover*. More details about it are | |
1131 | available in the paper *Effective Construction of Relative Lempel-Ziv |
|
1131 | available in the paper *Effective Construction of Relative Lempel-Ziv | |
1132 | Dictionaries* (authors: Liao, Petri, Moffat, Wirth). |
|
1132 | Dictionaries* (authors: Liao, Petri, Moffat, Wirth). | |
1133 |
|
1133 | |||
1134 | The cover algorithm takes parameters ``k` and ``d``. These are the |
|
1134 | The cover algorithm takes parameters ``k` and ``d``. These are the | |
1135 | *segment size* and *dmer size*, respectively. The returned dictionary |
|
1135 | *segment size* and *dmer size*, respectively. The returned dictionary | |
1136 | instance created by this function has ``k`` and ``d`` attributes |
|
1136 | instance created by this function has ``k`` and ``d`` attributes | |
1137 | containing the values for these parameters. If a ``ZstdCompressionDict`` |
|
1137 | containing the values for these parameters. If a ``ZstdCompressionDict`` | |
1138 | is constructed from raw bytes data (a content-only dictionary), the |
|
1138 | is constructed from raw bytes data (a content-only dictionary), the | |
1139 | ``k`` and ``d`` attributes will be ``0``. |
|
1139 | ``k`` and ``d`` attributes will be ``0``. | |
1140 |
|
1140 | |||
1141 | The segment and dmer size parameters to the cover algorithm can either be |
|
1141 | The segment and dmer size parameters to the cover algorithm can either be | |
1142 | specified manually or ``train_dictionary()`` can try multiple values |
|
1142 | specified manually or ``train_dictionary()`` can try multiple values | |
1143 | and pick the best one, where *best* means the smallest compressed data size. |
|
1143 | and pick the best one, where *best* means the smallest compressed data size. | |
1144 | This later mode is called *optimization* mode. |
|
1144 | This later mode is called *optimization* mode. | |
1145 |
|
1145 | |||
1146 | If none of ``k``, ``d``, ``steps``, ``threads``, ``level``, ``notifications``, |
|
1146 | If none of ``k``, ``d``, ``steps``, ``threads``, ``level``, ``notifications``, | |
1147 | or ``dict_id`` (basically anything from the underlying ``ZDICT_cover_params_t`` |
|
1147 | or ``dict_id`` (basically anything from the underlying ``ZDICT_cover_params_t`` | |
1148 | struct) are defined, *optimization* mode is used with default parameter |
|
1148 | struct) are defined, *optimization* mode is used with default parameter | |
1149 | values. |
|
1149 | values. | |
1150 |
|
1150 | |||
1151 | If ``steps`` or ``threads`` are defined, then *optimization* mode is engaged |
|
1151 | If ``steps`` or ``threads`` are defined, then *optimization* mode is engaged | |
1152 | with explicit control over those parameters. Specifying ``threads=0`` or |
|
1152 | with explicit control over those parameters. Specifying ``threads=0`` or | |
1153 | ``threads=1`` can be used to engage *optimization* mode if other parameters |
|
1153 | ``threads=1`` can be used to engage *optimization* mode if other parameters | |
1154 | are not defined. |
|
1154 | are not defined. | |
1155 |
|
1155 | |||
1156 | Otherwise, non-*optimization* mode is used with the parameters specified. |
|
1156 | Otherwise, non-*optimization* mode is used with the parameters specified. | |
1157 |
|
1157 | |||
1158 | This function takes the following arguments: |
|
1158 | This function takes the following arguments: | |
1159 |
|
1159 | |||
1160 | dict_size |
|
1160 | dict_size | |
1161 | Target size in bytes of the dictionary to generate. |
|
1161 | Target size in bytes of the dictionary to generate. | |
1162 | samples |
|
1162 | samples | |
1163 | A list of bytes holding samples the dictionary will be trained from. |
|
1163 | A list of bytes holding samples the dictionary will be trained from. | |
1164 | k |
|
1164 | k | |
1165 | Parameter to cover algorithm defining the segment size. A reasonable range |
|
1165 | Parameter to cover algorithm defining the segment size. A reasonable range | |
1166 | is [16, 2048+]. |
|
1166 | is [16, 2048+]. | |
1167 | d |
|
1167 | d | |
1168 | Parameter to cover algorithm defining the dmer size. A reasonable range is |
|
1168 | Parameter to cover algorithm defining the dmer size. A reasonable range is | |
1169 | [6, 16]. ``d`` must be less than or equal to ``k``. |
|
1169 | [6, 16]. ``d`` must be less than or equal to ``k``. | |
1170 | dict_id |
|
1170 | dict_id | |
1171 | Integer dictionary ID for the produced dictionary. Default is 0, which uses |
|
1171 | Integer dictionary ID for the produced dictionary. Default is 0, which uses | |
1172 | a random value. |
|
1172 | a random value. | |
1173 | steps |
|
1173 | steps | |
1174 | Number of steps through ``k`` values to perform when trying parameter |
|
1174 | Number of steps through ``k`` values to perform when trying parameter | |
1175 | variations. |
|
1175 | variations. | |
1176 | threads |
|
1176 | threads | |
1177 | Number of threads to use when trying parameter variations. Default is 0, |
|
1177 | Number of threads to use when trying parameter variations. Default is 0, | |
1178 | which means to use a single thread. A negative value can be specified to |
|
1178 | which means to use a single thread. A negative value can be specified to | |
1179 | use as many threads as there are detected logical CPUs. |
|
1179 | use as many threads as there are detected logical CPUs. | |
1180 | level |
|
1180 | level | |
1181 | Integer target compression level when trying parameter variations. |
|
1181 | Integer target compression level when trying parameter variations. | |
1182 | notifications |
|
1182 | notifications | |
1183 | Controls writing of informational messages to ``stderr``. ``0`` (the |
|
1183 | Controls writing of informational messages to ``stderr``. ``0`` (the | |
1184 | default) means to write nothing. ``1`` writes errors. ``2`` writes |
|
1184 | default) means to write nothing. ``1`` writes errors. ``2`` writes | |
1185 | progression info. ``3`` writes more details. And ``4`` writes all info. |
|
1185 | progression info. ``3`` writes more details. And ``4`` writes all info. | |
1186 |
|
1186 | |||
1187 | Explicit Compression Parameters |
|
1187 | Explicit Compression Parameters | |
1188 | ------------------------------- |
|
1188 | ------------------------------- | |
1189 |
|
1189 | |||
1190 | Zstandard offers a high-level *compression level* that maps to lower-level |
|
1190 | Zstandard offers a high-level *compression level* that maps to lower-level | |
1191 | compression parameters. For many consumers, this numeric level is the only |
|
1191 | compression parameters. For many consumers, this numeric level is the only | |
1192 | compression setting you'll need to touch. |
|
1192 | compression setting you'll need to touch. | |
1193 |
|
1193 | |||
1194 | But for advanced use cases, it might be desirable to tweak these lower-level |
|
1194 | But for advanced use cases, it might be desirable to tweak these lower-level | |
1195 | settings. |
|
1195 | settings. | |
1196 |
|
1196 | |||
1197 | The ``ZstdCompressionParameters`` type represents these low-level compression |
|
1197 | The ``ZstdCompressionParameters`` type represents these low-level compression | |
1198 | settings. |
|
1198 | settings. | |
1199 |
|
1199 | |||
1200 | Instances of this type can be constructed from a myriad of keyword arguments |
|
1200 | Instances of this type can be constructed from a myriad of keyword arguments | |
1201 | (defined below) for complete low-level control over each adjustable |
|
1201 | (defined below) for complete low-level control over each adjustable | |
1202 | compression setting. |
|
1202 | compression setting. | |
1203 |
|
1203 | |||
1204 | From a higher level, one can construct a ``ZstdCompressionParameters`` instance |
|
1204 | From a higher level, one can construct a ``ZstdCompressionParameters`` instance | |
1205 | given a desired compression level and target input and dictionary size |
|
1205 | given a desired compression level and target input and dictionary size | |
1206 | using ``ZstdCompressionParameters.from_level()``. e.g.:: |
|
1206 | using ``ZstdCompressionParameters.from_level()``. e.g.:: | |
1207 |
|
1207 | |||
1208 | # Derive compression settings for compression level 7. |
|
1208 | # Derive compression settings for compression level 7. | |
1209 | params = zstd.ZstdCompressionParameters.from_level(7) |
|
1209 | params = zstd.ZstdCompressionParameters.from_level(7) | |
1210 |
|
1210 | |||
1211 | # With an input size of 1MB |
|
1211 | # With an input size of 1MB | |
1212 | params = zstd.ZstdCompressionParameters.from_level(7, source_size=1048576) |
|
1212 | params = zstd.ZstdCompressionParameters.from_level(7, source_size=1048576) | |
1213 |
|
1213 | |||
1214 | Using ``from_level()``, it is also possible to override individual compression |
|
1214 | Using ``from_level()``, it is also possible to override individual compression | |
1215 | parameters or to define additional settings that aren't automatically derived. |
|
1215 | parameters or to define additional settings that aren't automatically derived. | |
1216 | e.g.:: |
|
1216 | e.g.:: | |
1217 |
|
1217 | |||
1218 | params = zstd.ZstdCompressionParameters.from_level(4, window_log=10) |
|
1218 | params = zstd.ZstdCompressionParameters.from_level(4, window_log=10) | |
1219 | params = zstd.ZstdCompressionParameters.from_level(5, threads=4) |
|
1219 | params = zstd.ZstdCompressionParameters.from_level(5, threads=4) | |
1220 |
|
1220 | |||
1221 | Or you can define low-level compression settings directly:: |
|
1221 | Or you can define low-level compression settings directly:: | |
1222 |
|
1222 | |||
1223 | params = zstd.ZstdCompressionParameters(window_log=12, enable_ldm=True) |
|
1223 | params = zstd.ZstdCompressionParameters(window_log=12, enable_ldm=True) | |
1224 |
|
1224 | |||
1225 | Once a ``ZstdCompressionParameters`` instance is obtained, it can be used to |
|
1225 | Once a ``ZstdCompressionParameters`` instance is obtained, it can be used to | |
1226 | configure a compressor:: |
|
1226 | configure a compressor:: | |
1227 |
|
1227 | |||
1228 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
1228 | cctx = zstd.ZstdCompressor(compression_params=params) | |
1229 |
|
1229 | |||
1230 | The named arguments and attributes of ``ZstdCompressionParameters`` are as |
|
1230 | The named arguments and attributes of ``ZstdCompressionParameters`` are as | |
1231 | follows: |
|
1231 | follows: | |
1232 |
|
1232 | |||
1233 | * format |
|
1233 | * format | |
1234 | * compression_level |
|
1234 | * compression_level | |
1235 | * window_log |
|
1235 | * window_log | |
1236 | * hash_log |
|
1236 | * hash_log | |
1237 | * chain_log |
|
1237 | * chain_log | |
1238 | * search_log |
|
1238 | * search_log | |
1239 | * min_match |
|
1239 | * min_match | |
1240 | * target_length |
|
1240 | * target_length | |
1241 | * strategy |
|
1241 | * strategy | |
1242 | * compression_strategy (deprecated: same as ``strategy``) |
|
1242 | * compression_strategy (deprecated: same as ``strategy``) | |
1243 | * write_content_size |
|
1243 | * write_content_size | |
1244 | * write_checksum |
|
1244 | * write_checksum | |
1245 | * write_dict_id |
|
1245 | * write_dict_id | |
1246 | * job_size |
|
1246 | * job_size | |
1247 | * overlap_log |
|
1247 | * overlap_log | |
1248 | * overlap_size_log (deprecated: same as ``overlap_log``) |
|
1248 | * overlap_size_log (deprecated: same as ``overlap_log``) | |
1249 | * force_max_window |
|
1249 | * force_max_window | |
1250 | * enable_ldm |
|
1250 | * enable_ldm | |
1251 | * ldm_hash_log |
|
1251 | * ldm_hash_log | |
1252 | * ldm_min_match |
|
1252 | * ldm_min_match | |
1253 | * ldm_bucket_size_log |
|
1253 | * ldm_bucket_size_log | |
1254 | * ldm_hash_rate_log |
|
1254 | * ldm_hash_rate_log | |
1255 | * ldm_hash_every_log (deprecated: same as ``ldm_hash_rate_log``) |
|
1255 | * ldm_hash_every_log (deprecated: same as ``ldm_hash_rate_log``) | |
1256 | * threads |
|
1256 | * threads | |
1257 |
|
1257 | |||
1258 | Some of these are very low-level settings. It may help to consult the official |
|
1258 | Some of these are very low-level settings. It may help to consult the official | |
1259 | zstandard documentation for their behavior. Look for the ``ZSTD_p_*`` constants |
|
1259 | zstandard documentation for their behavior. Look for the ``ZSTD_p_*`` constants | |
1260 | in ``zstd.h`` (https://github.com/facebook/zstd/blob/dev/lib/zstd.h). |
|
1260 | in ``zstd.h`` (https://github.com/facebook/zstd/blob/dev/lib/zstd.h). | |
1261 |
|
1261 | |||
1262 | Frame Inspection |
|
1262 | Frame Inspection | |
1263 | ---------------- |
|
1263 | ---------------- | |
1264 |
|
1264 | |||
1265 | Data emitted from zstd compression is encapsulated in a *frame*. This frame |
|
1265 | Data emitted from zstd compression is encapsulated in a *frame*. This frame | |
1266 | begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing |
|
1266 | begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing | |
1267 | the frame in more detail. For more info, see |
|
1267 | the frame in more detail. For more info, see | |
1268 | https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md. |
|
1268 | https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md. | |
1269 |
|
1269 | |||
1270 | ``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes |
|
1270 | ``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes | |
1271 | instance and return a ``FrameParameters`` object describing the frame. |
|
1271 | instance and return a ``FrameParameters`` object describing the frame. | |
1272 |
|
1272 | |||
1273 | Depending on which fields are present in the frame and their values, the |
|
1273 | Depending on which fields are present in the frame and their values, the | |
1274 | length of the frame parameters varies. If insufficient bytes are passed |
|
1274 | length of the frame parameters varies. If insufficient bytes are passed | |
1275 | in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure |
|
1275 | in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure | |
1276 | frame parameters can be parsed, pass in at least 18 bytes. |
|
1276 | frame parameters can be parsed, pass in at least 18 bytes. | |
1277 |
|
1277 | |||
1278 | ``FrameParameters`` instances have the following attributes: |
|
1278 | ``FrameParameters`` instances have the following attributes: | |
1279 |
|
1279 | |||
1280 | content_size |
|
1280 | content_size | |
1281 | Integer size of original, uncompressed content. This will be ``0`` if the |
|
1281 | Integer size of original, uncompressed content. This will be ``0`` if the | |
1282 | original content size isn't written to the frame (controlled with the |
|
1282 | original content size isn't written to the frame (controlled with the | |
1283 | ``write_content_size`` argument to ``ZstdCompressor``) or if the input |
|
1283 | ``write_content_size`` argument to ``ZstdCompressor``) or if the input | |
1284 | content size was ``0``. |
|
1284 | content size was ``0``. | |
1285 |
|
1285 | |||
1286 | window_size |
|
1286 | window_size | |
1287 | Integer size of maximum back-reference distance in compressed data. |
|
1287 | Integer size of maximum back-reference distance in compressed data. | |
1288 |
|
1288 | |||
1289 | dict_id |
|
1289 | dict_id | |
1290 | Integer of dictionary ID used for compression. ``0`` if no dictionary |
|
1290 | Integer of dictionary ID used for compression. ``0`` if no dictionary | |
1291 | ID was used or if the dictionary ID was ``0``. |
|
1291 | ID was used or if the dictionary ID was ``0``. | |
1292 |
|
1292 | |||
1293 | has_checksum |
|
1293 | has_checksum | |
1294 | Bool indicating whether a 4 byte content checksum is stored at the end |
|
1294 | Bool indicating whether a 4 byte content checksum is stored at the end | |
1295 | of the frame. |
|
1295 | of the frame. | |
1296 |
|
1296 | |||
1297 | ``zstd.frame_header_size(data)`` returns the size of the zstandard frame |
|
1297 | ``zstd.frame_header_size(data)`` returns the size of the zstandard frame | |
1298 | header. |
|
1298 | header. | |
1299 |
|
1299 | |||
1300 | ``zstd.frame_content_size(data)`` returns the content size as parsed from |
|
1300 | ``zstd.frame_content_size(data)`` returns the content size as parsed from | |
1301 | the frame header. ``-1`` means the content size is unknown. ``0`` means |
|
1301 | the frame header. ``-1`` means the content size is unknown. ``0`` means | |
1302 | an empty frame. The content size is usually correct. However, it may not |
|
1302 | an empty frame. The content size is usually correct. However, it may not | |
1303 | be accurate. |
|
1303 | be accurate. | |
1304 |
|
1304 | |||
1305 | Misc Functionality |
|
1305 | Misc Functionality | |
1306 | ------------------ |
|
1306 | ------------------ | |
1307 |
|
1307 | |||
1308 | estimate_decompression_context_size() |
|
1308 | estimate_decompression_context_size() | |
1309 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
1309 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
1310 |
|
1310 | |||
1311 | Estimate the memory size requirements for a decompressor instance. |
|
1311 | Estimate the memory size requirements for a decompressor instance. | |
1312 |
|
1312 | |||
1313 | Constants |
|
1313 | Constants | |
1314 | --------- |
|
1314 | --------- | |
1315 |
|
1315 | |||
1316 | The following module constants/attributes are exposed: |
|
1316 | The following module constants/attributes are exposed: | |
1317 |
|
1317 | |||
1318 | ZSTD_VERSION |
|
1318 | ZSTD_VERSION | |
1319 | This module attribute exposes a 3-tuple of the Zstandard version. e.g. |
|
1319 | This module attribute exposes a 3-tuple of the Zstandard version. e.g. | |
1320 | ``(1, 0, 0)`` |
|
1320 | ``(1, 0, 0)`` | |
1321 | MAX_COMPRESSION_LEVEL |
|
1321 | MAX_COMPRESSION_LEVEL | |
1322 | Integer max compression level accepted by compression functions |
|
1322 | Integer max compression level accepted by compression functions | |
1323 | COMPRESSION_RECOMMENDED_INPUT_SIZE |
|
1323 | COMPRESSION_RECOMMENDED_INPUT_SIZE | |
1324 | Recommended chunk size to feed to compressor functions |
|
1324 | Recommended chunk size to feed to compressor functions | |
1325 | COMPRESSION_RECOMMENDED_OUTPUT_SIZE |
|
1325 | COMPRESSION_RECOMMENDED_OUTPUT_SIZE | |
1326 | Recommended chunk size for compression output |
|
1326 | Recommended chunk size for compression output | |
1327 | DECOMPRESSION_RECOMMENDED_INPUT_SIZE |
|
1327 | DECOMPRESSION_RECOMMENDED_INPUT_SIZE | |
1328 | Recommended chunk size to feed into decompresor functions |
|
1328 | Recommended chunk size to feed into decompresor functions | |
1329 | DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE |
|
1329 | DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE | |
1330 | Recommended chunk size for decompression output |
|
1330 | Recommended chunk size for decompression output | |
1331 |
|
1331 | |||
1332 | FRAME_HEADER |
|
1332 | FRAME_HEADER | |
1333 | bytes containing header of the Zstandard frame |
|
1333 | bytes containing header of the Zstandard frame | |
1334 | MAGIC_NUMBER |
|
1334 | MAGIC_NUMBER | |
1335 | Frame header as an integer |
|
1335 | Frame header as an integer | |
1336 |
|
1336 | |||
1337 | FLUSH_BLOCK |
|
1337 | FLUSH_BLOCK | |
1338 | Flushing behavior that denotes to flush a zstd block. A decompressor will |
|
1338 | Flushing behavior that denotes to flush a zstd block. A decompressor will | |
1339 | be able to decode all data fed into the compressor so far. |
|
1339 | be able to decode all data fed into the compressor so far. | |
1340 | FLUSH_FRAME |
|
1340 | FLUSH_FRAME | |
1341 | Flushing behavior that denotes to end a zstd frame. Any new data fed |
|
1341 | Flushing behavior that denotes to end a zstd frame. Any new data fed | |
1342 | to the compressor will start a new frame. |
|
1342 | to the compressor will start a new frame. | |
1343 |
|
1343 | |||
1344 | CONTENTSIZE_UNKNOWN |
|
1344 | CONTENTSIZE_UNKNOWN | |
1345 | Value for content size when the content size is unknown. |
|
1345 | Value for content size when the content size is unknown. | |
1346 | CONTENTSIZE_ERROR |
|
1346 | CONTENTSIZE_ERROR | |
1347 | Value for content size when content size couldn't be determined. |
|
1347 | Value for content size when content size couldn't be determined. | |
1348 |
|
1348 | |||
1349 | WINDOWLOG_MIN |
|
1349 | WINDOWLOG_MIN | |
1350 | Minimum value for compression parameter |
|
1350 | Minimum value for compression parameter | |
1351 | WINDOWLOG_MAX |
|
1351 | WINDOWLOG_MAX | |
1352 | Maximum value for compression parameter |
|
1352 | Maximum value for compression parameter | |
1353 | CHAINLOG_MIN |
|
1353 | CHAINLOG_MIN | |
1354 | Minimum value for compression parameter |
|
1354 | Minimum value for compression parameter | |
1355 | CHAINLOG_MAX |
|
1355 | CHAINLOG_MAX | |
1356 | Maximum value for compression parameter |
|
1356 | Maximum value for compression parameter | |
1357 | HASHLOG_MIN |
|
1357 | HASHLOG_MIN | |
1358 | Minimum value for compression parameter |
|
1358 | Minimum value for compression parameter | |
1359 | HASHLOG_MAX |
|
1359 | HASHLOG_MAX | |
1360 | Maximum value for compression parameter |
|
1360 | Maximum value for compression parameter | |
1361 | SEARCHLOG_MIN |
|
1361 | SEARCHLOG_MIN | |
1362 | Minimum value for compression parameter |
|
1362 | Minimum value for compression parameter | |
1363 | SEARCHLOG_MAX |
|
1363 | SEARCHLOG_MAX | |
1364 | Maximum value for compression parameter |
|
1364 | Maximum value for compression parameter | |
1365 | MINMATCH_MIN |
|
1365 | MINMATCH_MIN | |
1366 | Minimum value for compression parameter |
|
1366 | Minimum value for compression parameter | |
1367 | MINMATCH_MAX |
|
1367 | MINMATCH_MAX | |
1368 | Maximum value for compression parameter |
|
1368 | Maximum value for compression parameter | |
1369 | SEARCHLENGTH_MIN |
|
1369 | SEARCHLENGTH_MIN | |
1370 | Minimum value for compression parameter |
|
1370 | Minimum value for compression parameter | |
1371 |
|
1371 | |||
1372 | Deprecated: use ``MINMATCH_MIN`` |
|
1372 | Deprecated: use ``MINMATCH_MIN`` | |
1373 | SEARCHLENGTH_MAX |
|
1373 | SEARCHLENGTH_MAX | |
1374 | Maximum value for compression parameter |
|
1374 | Maximum value for compression parameter | |
1375 |
|
1375 | |||
1376 | Deprecated: use ``MINMATCH_MAX`` |
|
1376 | Deprecated: use ``MINMATCH_MAX`` | |
1377 | TARGETLENGTH_MIN |
|
1377 | TARGETLENGTH_MIN | |
1378 | Minimum value for compression parameter |
|
1378 | Minimum value for compression parameter | |
1379 | STRATEGY_FAST |
|
1379 | STRATEGY_FAST | |
1380 | Compression strategy |
|
1380 | Compression strategy | |
1381 | STRATEGY_DFAST |
|
1381 | STRATEGY_DFAST | |
1382 | Compression strategy |
|
1382 | Compression strategy | |
1383 | STRATEGY_GREEDY |
|
1383 | STRATEGY_GREEDY | |
1384 | Compression strategy |
|
1384 | Compression strategy | |
1385 | STRATEGY_LAZY |
|
1385 | STRATEGY_LAZY | |
1386 | Compression strategy |
|
1386 | Compression strategy | |
1387 | STRATEGY_LAZY2 |
|
1387 | STRATEGY_LAZY2 | |
1388 | Compression strategy |
|
1388 | Compression strategy | |
1389 | STRATEGY_BTLAZY2 |
|
1389 | STRATEGY_BTLAZY2 | |
1390 | Compression strategy |
|
1390 | Compression strategy | |
1391 | STRATEGY_BTOPT |
|
1391 | STRATEGY_BTOPT | |
1392 | Compression strategy |
|
1392 | Compression strategy | |
1393 | STRATEGY_BTULTRA |
|
1393 | STRATEGY_BTULTRA | |
1394 | Compression strategy |
|
1394 | Compression strategy | |
1395 | STRATEGY_BTULTRA2 |
|
1395 | STRATEGY_BTULTRA2 | |
1396 | Compression strategy |
|
1396 | Compression strategy | |
1397 |
|
1397 | |||
1398 | FORMAT_ZSTD1 |
|
1398 | FORMAT_ZSTD1 | |
1399 | Zstandard frame format |
|
1399 | Zstandard frame format | |
1400 | FORMAT_ZSTD1_MAGICLESS |
|
1400 | FORMAT_ZSTD1_MAGICLESS | |
1401 | Zstandard frame format without magic header |
|
1401 | Zstandard frame format without magic header | |
1402 |
|
1402 | |||
1403 | Performance Considerations |
|
1403 | Performance Considerations | |
1404 | -------------------------- |
|
1404 | -------------------------- | |
1405 |
|
1405 | |||
1406 | The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a |
|
1406 | The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a | |
1407 | persistent compression or decompression *context*. Reusing a ``ZstdCompressor`` |
|
1407 | persistent compression or decompression *context*. Reusing a ``ZstdCompressor`` | |
1408 | or ``ZstdDecompressor`` instance for multiple operations is faster than |
|
1408 | or ``ZstdDecompressor`` instance for multiple operations is faster than | |
1409 | instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each |
|
1409 | instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each | |
1410 | operation. The differences are magnified as the size of data decreases. For |
|
1410 | operation. The differences are magnified as the size of data decreases. For | |
1411 | example, the difference between *context* reuse and non-reuse for 100,000 |
|
1411 | example, the difference between *context* reuse and non-reuse for 100,000 | |
1412 | 100 byte inputs will be significant (possiby over 10x faster to reuse contexts) |
|
1412 | 100 byte inputs will be significant (possiby over 10x faster to reuse contexts) | |
1413 | whereas 10 100,000,000 byte inputs will be more similar in speed (because the |
|
1413 | whereas 10 100,000,000 byte inputs will be more similar in speed (because the | |
1414 | time spent doing compression dwarfs time spent creating new *contexts*). |
|
1414 | time spent doing compression dwarfs time spent creating new *contexts*). | |
1415 |
|
1415 | |||
1416 | Buffer Types |
|
1416 | Buffer Types | |
1417 | ------------ |
|
1417 | ------------ | |
1418 |
|
1418 | |||
1419 | The API exposes a handful of custom types for interfacing with memory buffers. |
|
1419 | The API exposes a handful of custom types for interfacing with memory buffers. | |
1420 | The primary goal of these types is to facilitate efficient multi-object |
|
1420 | The primary goal of these types is to facilitate efficient multi-object | |
1421 | operations. |
|
1421 | operations. | |
1422 |
|
1422 | |||
1423 | The essential idea is to have a single memory allocation provide backing |
|
1423 | The essential idea is to have a single memory allocation provide backing | |
1424 | storage for multiple logical objects. This has 2 main advantages: fewer |
|
1424 | storage for multiple logical objects. This has 2 main advantages: fewer | |
1425 | allocations and optimal memory access patterns. This avoids having to allocate |
|
1425 | allocations and optimal memory access patterns. This avoids having to allocate | |
1426 | a Python object for each logical object and furthermore ensures that access of |
|
1426 | a Python object for each logical object and furthermore ensures that access of | |
1427 | data for objects can be sequential (read: fast) in memory. |
|
1427 | data for objects can be sequential (read: fast) in memory. | |
1428 |
|
1428 | |||
1429 | BufferWithSegments |
|
1429 | BufferWithSegments | |
1430 | ^^^^^^^^^^^^^^^^^^ |
|
1430 | ^^^^^^^^^^^^^^^^^^ | |
1431 |
|
1431 | |||
1432 | The ``BufferWithSegments`` type represents a memory buffer containing N |
|
1432 | The ``BufferWithSegments`` type represents a memory buffer containing N | |
1433 | discrete items of known lengths (segments). It is essentially a fixed size |
|
1433 | discrete items of known lengths (segments). It is essentially a fixed size | |
1434 | memory address and an array of 2-tuples of ``(offset, length)`` 64-bit |
|
1434 | memory address and an array of 2-tuples of ``(offset, length)`` 64-bit | |
1435 | unsigned native endian integers defining the byte offset and length of each |
|
1435 | unsigned native endian integers defining the byte offset and length of each | |
1436 | segment within the buffer. |
|
1436 | segment within the buffer. | |
1437 |
|
1437 | |||
1438 | Instances behave like containers. |
|
1438 | Instances behave like containers. | |
1439 |
|
1439 | |||
1440 | ``len()`` returns the number of segments within the instance. |
|
1440 | ``len()`` returns the number of segments within the instance. | |
1441 |
|
1441 | |||
1442 | ``o[index]`` or ``__getitem__`` obtains a ``BufferSegment`` representing an |
|
1442 | ``o[index]`` or ``__getitem__`` obtains a ``BufferSegment`` representing an | |
1443 | individual segment within the backing buffer. That returned object references |
|
1443 | individual segment within the backing buffer. That returned object references | |
1444 | (not copies) memory. This means that iterating all objects doesn't copy |
|
1444 | (not copies) memory. This means that iterating all objects doesn't copy | |
1445 | data within the buffer. |
|
1445 | data within the buffer. | |
1446 |
|
1446 | |||
1447 | The ``.size`` attribute contains the total size in bytes of the backing |
|
1447 | The ``.size`` attribute contains the total size in bytes of the backing | |
1448 | buffer. |
|
1448 | buffer. | |
1449 |
|
1449 | |||
1450 | Instances conform to the buffer protocol. So a reference to the backing bytes |
|
1450 | Instances conform to the buffer protocol. So a reference to the backing bytes | |
1451 | can be obtained via ``memoryview(o)``. A *copy* of the backing bytes can also |
|
1451 | can be obtained via ``memoryview(o)``. A *copy* of the backing bytes can also | |
1452 | be obtained via ``.tobytes()``. |
|
1452 | be obtained via ``.tobytes()``. | |
1453 |
|
1453 | |||
1454 | The ``.segments`` attribute exposes the array of ``(offset, length)`` for |
|
1454 | The ``.segments`` attribute exposes the array of ``(offset, length)`` for | |
1455 | segments within the buffer. It is a ``BufferSegments`` type. |
|
1455 | segments within the buffer. It is a ``BufferSegments`` type. | |
1456 |
|
1456 | |||
1457 | BufferSegment |
|
1457 | BufferSegment | |
1458 | ^^^^^^^^^^^^^ |
|
1458 | ^^^^^^^^^^^^^ | |
1459 |
|
1459 | |||
1460 | The ``BufferSegment`` type represents a segment within a ``BufferWithSegments``. |
|
1460 | The ``BufferSegment`` type represents a segment within a ``BufferWithSegments``. | |
1461 | It is essentially a reference to N bytes within a ``BufferWithSegments``. |
|
1461 | It is essentially a reference to N bytes within a ``BufferWithSegments``. | |
1462 |
|
1462 | |||
1463 | ``len()`` returns the length of the segment in bytes. |
|
1463 | ``len()`` returns the length of the segment in bytes. | |
1464 |
|
1464 | |||
1465 | ``.offset`` contains the byte offset of this segment within its parent |
|
1465 | ``.offset`` contains the byte offset of this segment within its parent | |
1466 | ``BufferWithSegments`` instance. |
|
1466 | ``BufferWithSegments`` instance. | |
1467 |
|
1467 | |||
1468 | The object conforms to the buffer protocol. ``.tobytes()`` can be called to |
|
1468 | The object conforms to the buffer protocol. ``.tobytes()`` can be called to | |
1469 | obtain a ``bytes`` instance with a copy of the backing bytes. |
|
1469 | obtain a ``bytes`` instance with a copy of the backing bytes. | |
1470 |
|
1470 | |||
1471 | BufferSegments |
|
1471 | BufferSegments | |
1472 | ^^^^^^^^^^^^^^ |
|
1472 | ^^^^^^^^^^^^^^ | |
1473 |
|
1473 | |||
1474 | This type represents an array of ``(offset, length)`` integers defining segments |
|
1474 | This type represents an array of ``(offset, length)`` integers defining segments | |
1475 | within a ``BufferWithSegments``. |
|
1475 | within a ``BufferWithSegments``. | |
1476 |
|
1476 | |||
1477 | The array members are 64-bit unsigned integers using host/native bit order. |
|
1477 | The array members are 64-bit unsigned integers using host/native bit order. | |
1478 |
|
1478 | |||
1479 | Instances conform to the buffer protocol. |
|
1479 | Instances conform to the buffer protocol. | |
1480 |
|
1480 | |||
1481 | BufferWithSegmentsCollection |
|
1481 | BufferWithSegmentsCollection | |
1482 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ |
|
1482 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | |
1483 |
|
1483 | |||
1484 | The ``BufferWithSegmentsCollection`` type represents a virtual spanning view |
|
1484 | The ``BufferWithSegmentsCollection`` type represents a virtual spanning view | |
1485 | of multiple ``BufferWithSegments`` instances. |
|
1485 | of multiple ``BufferWithSegments`` instances. | |
1486 |
|
1486 | |||
1487 | Instances are constructed from 1 or more ``BufferWithSegments`` instances. The |
|
1487 | Instances are constructed from 1 or more ``BufferWithSegments`` instances. The | |
1488 | resulting object behaves like an ordered sequence whose members are the |
|
1488 | resulting object behaves like an ordered sequence whose members are the | |
1489 | segments within each ``BufferWithSegments``. |
|
1489 | segments within each ``BufferWithSegments``. | |
1490 |
|
1490 | |||
1491 | ``len()`` returns the number of segments within all ``BufferWithSegments`` |
|
1491 | ``len()`` returns the number of segments within all ``BufferWithSegments`` | |
1492 | instances. |
|
1492 | instances. | |
1493 |
|
1493 | |||
1494 | ``o[index]`` and ``__getitem__(index)`` return the ``BufferSegment`` at |
|
1494 | ``o[index]`` and ``__getitem__(index)`` return the ``BufferSegment`` at | |
1495 | that offset as if all ``BufferWithSegments`` instances were a single |
|
1495 | that offset as if all ``BufferWithSegments`` instances were a single | |
1496 | entity. |
|
1496 | entity. | |
1497 |
|
1497 | |||
1498 | If the object is composed of 2 ``BufferWithSegments`` instances with the |
|
1498 | If the object is composed of 2 ``BufferWithSegments`` instances with the | |
1499 | first having 2 segments and the second have 3 segments, then ``b[0]`` |
|
1499 | first having 2 segments and the second have 3 segments, then ``b[0]`` | |
1500 | and ``b[1]`` access segments in the first object and ``b[2]``, ``b[3]``, |
|
1500 | and ``b[1]`` access segments in the first object and ``b[2]``, ``b[3]``, | |
1501 | and ``b[4]`` access segments from the second. |
|
1501 | and ``b[4]`` access segments from the second. | |
1502 |
|
1502 | |||
1503 | Choosing an API |
|
1503 | Choosing an API | |
1504 | =============== |
|
1504 | =============== | |
1505 |
|
1505 | |||
1506 | There are multiple APIs for performing compression and decompression. This is |
|
1506 | There are multiple APIs for performing compression and decompression. This is | |
1507 | because different applications have different needs and the library wants to |
|
1507 | because different applications have different needs and the library wants to | |
1508 | facilitate optimal use in as many use cases as possible. |
|
1508 | facilitate optimal use in as many use cases as possible. | |
1509 |
|
1509 | |||
1510 | From a high-level, APIs are divided into *one-shot* and *streaming*: either you |
|
1510 | From a high-level, APIs are divided into *one-shot* and *streaming*: either you | |
1511 | are operating on all data at once or you operate on it piecemeal. |
|
1511 | are operating on all data at once or you operate on it piecemeal. | |
1512 |
|
1512 | |||
1513 | The *one-shot* APIs are useful for small data, where the input or output |
|
1513 | The *one-shot* APIs are useful for small data, where the input or output | |
1514 | size is known. (The size can come from a buffer length, file size, or |
|
1514 | size is known. (The size can come from a buffer length, file size, or | |
1515 | stored in the zstd frame header.) A limitation of the *one-shot* APIs is that |
|
1515 | stored in the zstd frame header.) A limitation of the *one-shot* APIs is that | |
1516 | input and output must fit in memory simultaneously. For say a 4 GB input, |
|
1516 | input and output must fit in memory simultaneously. For say a 4 GB input, | |
1517 | this is often not feasible. |
|
1517 | this is often not feasible. | |
1518 |
|
1518 | |||
1519 | The *one-shot* APIs also perform all work as a single operation. So, if you |
|
1519 | The *one-shot* APIs also perform all work as a single operation. So, if you | |
1520 | feed it large input, it could take a long time for the function to return. |
|
1520 | feed it large input, it could take a long time for the function to return. | |
1521 |
|
1521 | |||
1522 | The streaming APIs do not have the limitations of the simple API. But the |
|
1522 | The streaming APIs do not have the limitations of the simple API. But the | |
1523 | price you pay for this flexibility is that they are more complex than a |
|
1523 | price you pay for this flexibility is that they are more complex than a | |
1524 | single function call. |
|
1524 | single function call. | |
1525 |
|
1525 | |||
1526 | The streaming APIs put the caller in control of compression and decompression |
|
1526 | The streaming APIs put the caller in control of compression and decompression | |
1527 | behavior by allowing them to directly control either the input or output side |
|
1527 | behavior by allowing them to directly control either the input or output side | |
1528 | of the operation. |
|
1528 | of the operation. | |
1529 |
|
1529 | |||
1530 | With the *streaming input*, *compressor*, and *decompressor* APIs, the caller |
|
1530 | With the *streaming input*, *compressor*, and *decompressor* APIs, the caller | |
1531 | has full control over the input to the compression or decompression stream. |
|
1531 | has full control over the input to the compression or decompression stream. | |
1532 | They can directly choose when new data is operated on. |
|
1532 | They can directly choose when new data is operated on. | |
1533 |
|
1533 | |||
1534 | With the *streaming ouput* APIs, the caller has full control over the output |
|
1534 | With the *streaming ouput* APIs, the caller has full control over the output | |
1535 | of the compression or decompression stream. It can choose when to receive |
|
1535 | of the compression or decompression stream. It can choose when to receive | |
1536 | new data. |
|
1536 | new data. | |
1537 |
|
1537 | |||
1538 | When using the *streaming* APIs that operate on file-like or stream objects, |
|
1538 | When using the *streaming* APIs that operate on file-like or stream objects, | |
1539 | it is important to consider what happens in that object when I/O is requested. |
|
1539 | it is important to consider what happens in that object when I/O is requested. | |
1540 | There is potential for long pauses as data is read or written from the |
|
1540 | There is potential for long pauses as data is read or written from the | |
1541 | underlying stream (say from interacting with a filesystem or network). This |
|
1541 | underlying stream (say from interacting with a filesystem or network). This | |
1542 | could add considerable overhead. |
|
1542 | could add considerable overhead. | |
1543 |
|
1543 | |||
1544 | Thread Safety |
|
1544 | Thread Safety | |
1545 | ============= |
|
1545 | ============= | |
1546 |
|
1546 | |||
1547 | ``ZstdCompressor`` and ``ZstdDecompressor`` instances have no guarantees |
|
1547 | ``ZstdCompressor`` and ``ZstdDecompressor`` instances have no guarantees | |
1548 | about thread safety. Do not operate on the same ``ZstdCompressor`` and |
|
1548 | about thread safety. Do not operate on the same ``ZstdCompressor`` and | |
1549 | ``ZstdDecompressor`` instance simultaneously from different threads. It is |
|
1549 | ``ZstdDecompressor`` instance simultaneously from different threads. It is | |
1550 | fine to have different threads call into a single instance, just not at the |
|
1550 | fine to have different threads call into a single instance, just not at the | |
1551 | same time. |
|
1551 | same time. | |
1552 |
|
1552 | |||
1553 | Some operations require multiple function calls to complete. e.g. streaming |
|
1553 | Some operations require multiple function calls to complete. e.g. streaming | |
1554 | operations. A single ``ZstdCompressor`` or ``ZstdDecompressor`` cannot be used |
|
1554 | operations. A single ``ZstdCompressor`` or ``ZstdDecompressor`` cannot be used | |
1555 | for simultaneously active operations. e.g. you must not start a streaming |
|
1555 | for simultaneously active operations. e.g. you must not start a streaming | |
1556 | operation when another streaming operation is already active. |
|
1556 | operation when another streaming operation is already active. | |
1557 |
|
1557 | |||
1558 | The C extension releases the GIL during non-trivial calls into the zstd C |
|
1558 | The C extension releases the GIL during non-trivial calls into the zstd C | |
1559 | API. Non-trivial calls are notably compression and decompression. Trivial |
|
1559 | API. Non-trivial calls are notably compression and decompression. Trivial | |
1560 | calls are things like parsing frame parameters. Where the GIL is released |
|
1560 | calls are things like parsing frame parameters. Where the GIL is released | |
1561 | is considered an implementation detail and can change in any release. |
|
1561 | is considered an implementation detail and can change in any release. | |
1562 |
|
1562 | |||
1563 | APIs that accept bytes-like objects don't enforce that the underlying object |
|
1563 | APIs that accept bytes-like objects don't enforce that the underlying object | |
1564 | is read-only. However, it is assumed that the passed object is read-only for |
|
1564 | is read-only. However, it is assumed that the passed object is read-only for | |
1565 | the duration of the function call. It is possible to pass a mutable object |
|
1565 | the duration of the function call. It is possible to pass a mutable object | |
1566 | (like a ``bytearray``) to e.g. ``ZstdCompressor.compress()``, have the GIL |
|
1566 | (like a ``bytearray``) to e.g. ``ZstdCompressor.compress()``, have the GIL | |
1567 | released, and mutate the object from another thread. Such a race condition |
|
1567 | released, and mutate the object from another thread. Such a race condition | |
1568 | is a bug in the consumer of python-zstandard. Most Python data types are |
|
1568 | is a bug in the consumer of python-zstandard. Most Python data types are | |
1569 | immutable, so unless you are doing something fancy, you don't need to |
|
1569 | immutable, so unless you are doing something fancy, you don't need to | |
1570 | worry about this. |
|
1570 | worry about this. | |
1571 |
|
1571 | |||
1572 | Note on Zstandard's *Experimental* API |
|
1572 | Note on Zstandard's *Experimental* API | |
1573 | ====================================== |
|
1573 | ====================================== | |
1574 |
|
1574 | |||
1575 | Many of the Zstandard APIs used by this module are marked as *experimental* |
|
1575 | Many of the Zstandard APIs used by this module are marked as *experimental* | |
1576 | within the Zstandard project. |
|
1576 | within the Zstandard project. | |
1577 |
|
1577 | |||
1578 | It is unclear how Zstandard's C API will evolve over time, especially with |
|
1578 | It is unclear how Zstandard's C API will evolve over time, especially with | |
1579 | regards to this *experimental* functionality. We will try to maintain |
|
1579 | regards to this *experimental* functionality. We will try to maintain | |
1580 | backwards compatibility at the Python API level. However, we cannot |
|
1580 | backwards compatibility at the Python API level. However, we cannot | |
1581 | guarantee this for things not under our control. |
|
1581 | guarantee this for things not under our control. | |
1582 |
|
1582 | |||
1583 | Since a copy of the Zstandard source code is distributed with this |
|
1583 | Since a copy of the Zstandard source code is distributed with this | |
1584 | module and since we compile against it, the behavior of a specific |
|
1584 | module and since we compile against it, the behavior of a specific | |
1585 | version of this module should be constant for all of time. So if you |
|
1585 | version of this module should be constant for all of time. So if you | |
1586 | pin the version of this module used in your projects (which is a Python |
|
1586 | pin the version of this module used in your projects (which is a Python | |
1587 | best practice), you should be shielded from unwanted future changes. |
|
1587 | best practice), you should be shielded from unwanted future changes. | |
1588 |
|
1588 | |||
1589 | Donate |
|
1589 | Donate | |
1590 | ====== |
|
1590 | ====== | |
1591 |
|
1591 | |||
1592 | A lot of time has been invested into this project by the author. |
|
1592 | A lot of time has been invested into this project by the author. | |
1593 |
|
1593 | |||
1594 | If you find this project useful and would like to thank the author for |
|
1594 | If you find this project useful and would like to thank the author for | |
1595 | their work, consider donating some money. Any amount is appreciated. |
|
1595 | their work, consider donating some money. Any amount is appreciated. | |
1596 |
|
1596 | |||
1597 | .. image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif |
|
1597 | .. image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif | |
1598 | :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard¤cy_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted |
|
1598 | :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard¤cy_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted | |
1599 | :alt: Donate via PayPal |
|
1599 | :alt: Donate via PayPal | |
1600 |
|
1600 | |||
1601 | .. |ci-status| image:: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master |
|
1601 | .. |ci-status| image:: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master | |
1602 | :target: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master |
|
1602 | :target: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master |
@@ -1,359 +1,359 b'' | |||||
1 | /** |
|
1 | /** | |
2 | * Copyright (c) 2016-present, Gregory Szorc |
|
2 | * Copyright (c) 2016-present, Gregory Szorc | |
3 | * All rights reserved. |
|
3 | * All rights reserved. | |
4 | * |
|
4 | * | |
5 | * This software may be modified and distributed under the terms |
|
5 | * This software may be modified and distributed under the terms | |
6 | * of the BSD license. See the LICENSE file for details. |
|
6 | * of the BSD license. See the LICENSE file for details. | |
7 | */ |
|
7 | */ | |
8 |
|
8 | |||
9 | #define PY_SSIZE_T_CLEAN |
|
9 | #define PY_SSIZE_T_CLEAN | |
10 | #include <Python.h> |
|
10 | #include <Python.h> | |
11 | #include "structmember.h" |
|
11 | #include "structmember.h" | |
12 |
|
12 | |||
13 | #define ZSTD_STATIC_LINKING_ONLY |
|
13 | #define ZSTD_STATIC_LINKING_ONLY | |
14 | #define ZDICT_STATIC_LINKING_ONLY |
|
14 | #define ZDICT_STATIC_LINKING_ONLY | |
15 | #include <zstd.h> |
|
15 | #include <zstd.h> | |
16 | #include <zdict.h> |
|
16 | #include <zdict.h> | |
17 |
|
17 | |||
18 | /* Remember to change the string in zstandard/__init__ as well */ |
|
18 | /* Remember to change the string in zstandard/__init__ as well */ | |
19 |
#define PYTHON_ZSTANDARD_VERSION "0.1 |
|
19 | #define PYTHON_ZSTANDARD_VERSION "0.13.0" | |
20 |
|
20 | |||
21 | typedef enum { |
|
21 | typedef enum { | |
22 | compressorobj_flush_finish, |
|
22 | compressorobj_flush_finish, | |
23 | compressorobj_flush_block, |
|
23 | compressorobj_flush_block, | |
24 | } CompressorObj_Flush; |
|
24 | } CompressorObj_Flush; | |
25 |
|
25 | |||
26 | /* |
|
26 | /* | |
27 | Represents a ZstdCompressionParameters type. |
|
27 | Represents a ZstdCompressionParameters type. | |
28 |
|
28 | |||
29 | This type holds all the low-level compression parameters that can be set. |
|
29 | This type holds all the low-level compression parameters that can be set. | |
30 | */ |
|
30 | */ | |
31 | typedef struct { |
|
31 | typedef struct { | |
32 | PyObject_HEAD |
|
32 | PyObject_HEAD | |
33 | ZSTD_CCtx_params* params; |
|
33 | ZSTD_CCtx_params* params; | |
34 | } ZstdCompressionParametersObject; |
|
34 | } ZstdCompressionParametersObject; | |
35 |
|
35 | |||
36 | extern PyTypeObject ZstdCompressionParametersType; |
|
36 | extern PyTypeObject ZstdCompressionParametersType; | |
37 |
|
37 | |||
38 | /* |
|
38 | /* | |
39 | Represents a FrameParameters type. |
|
39 | Represents a FrameParameters type. | |
40 |
|
40 | |||
41 | This type is basically a wrapper around ZSTD_frameParams. |
|
41 | This type is basically a wrapper around ZSTD_frameParams. | |
42 | */ |
|
42 | */ | |
43 | typedef struct { |
|
43 | typedef struct { | |
44 | PyObject_HEAD |
|
44 | PyObject_HEAD | |
45 | unsigned long long frameContentSize; |
|
45 | unsigned long long frameContentSize; | |
46 | unsigned long long windowSize; |
|
46 | unsigned long long windowSize; | |
47 | unsigned dictID; |
|
47 | unsigned dictID; | |
48 | char checksumFlag; |
|
48 | char checksumFlag; | |
49 | } FrameParametersObject; |
|
49 | } FrameParametersObject; | |
50 |
|
50 | |||
51 | extern PyTypeObject FrameParametersType; |
|
51 | extern PyTypeObject FrameParametersType; | |
52 |
|
52 | |||
53 | /* |
|
53 | /* | |
54 | Represents a ZstdCompressionDict type. |
|
54 | Represents a ZstdCompressionDict type. | |
55 |
|
55 | |||
56 | Instances hold data used for a zstd compression dictionary. |
|
56 | Instances hold data used for a zstd compression dictionary. | |
57 | */ |
|
57 | */ | |
58 | typedef struct { |
|
58 | typedef struct { | |
59 | PyObject_HEAD |
|
59 | PyObject_HEAD | |
60 |
|
60 | |||
61 | /* Pointer to dictionary data. Owned by self. */ |
|
61 | /* Pointer to dictionary data. Owned by self. */ | |
62 | void* dictData; |
|
62 | void* dictData; | |
63 | /* Size of dictionary data. */ |
|
63 | /* Size of dictionary data. */ | |
64 | size_t dictSize; |
|
64 | size_t dictSize; | |
65 | ZSTD_dictContentType_e dictType; |
|
65 | ZSTD_dictContentType_e dictType; | |
66 | /* k parameter for cover dictionaries. Only populated by train_cover_dict(). */ |
|
66 | /* k parameter for cover dictionaries. Only populated by train_cover_dict(). */ | |
67 | unsigned k; |
|
67 | unsigned k; | |
68 | /* d parameter for cover dictionaries. Only populated by train_cover_dict(). */ |
|
68 | /* d parameter for cover dictionaries. Only populated by train_cover_dict(). */ | |
69 | unsigned d; |
|
69 | unsigned d; | |
70 | /* Digested dictionary, suitable for reuse. */ |
|
70 | /* Digested dictionary, suitable for reuse. */ | |
71 | ZSTD_CDict* cdict; |
|
71 | ZSTD_CDict* cdict; | |
72 | ZSTD_DDict* ddict; |
|
72 | ZSTD_DDict* ddict; | |
73 | } ZstdCompressionDict; |
|
73 | } ZstdCompressionDict; | |
74 |
|
74 | |||
75 | extern PyTypeObject ZstdCompressionDictType; |
|
75 | extern PyTypeObject ZstdCompressionDictType; | |
76 |
|
76 | |||
77 | /* |
|
77 | /* | |
78 | Represents a ZstdCompressor type. |
|
78 | Represents a ZstdCompressor type. | |
79 | */ |
|
79 | */ | |
80 | typedef struct { |
|
80 | typedef struct { | |
81 | PyObject_HEAD |
|
81 | PyObject_HEAD | |
82 |
|
82 | |||
83 | /* Number of threads to use for operations. */ |
|
83 | /* Number of threads to use for operations. */ | |
84 | unsigned int threads; |
|
84 | unsigned int threads; | |
85 | /* Pointer to compression dictionary to use. NULL if not using dictionary |
|
85 | /* Pointer to compression dictionary to use. NULL if not using dictionary | |
86 | compression. */ |
|
86 | compression. */ | |
87 | ZstdCompressionDict* dict; |
|
87 | ZstdCompressionDict* dict; | |
88 | /* Compression context to use. Populated during object construction. */ |
|
88 | /* Compression context to use. Populated during object construction. */ | |
89 | ZSTD_CCtx* cctx; |
|
89 | ZSTD_CCtx* cctx; | |
90 | /* Compression parameters in use. */ |
|
90 | /* Compression parameters in use. */ | |
91 | ZSTD_CCtx_params* params; |
|
91 | ZSTD_CCtx_params* params; | |
92 | } ZstdCompressor; |
|
92 | } ZstdCompressor; | |
93 |
|
93 | |||
94 | extern PyTypeObject ZstdCompressorType; |
|
94 | extern PyTypeObject ZstdCompressorType; | |
95 |
|
95 | |||
96 | typedef struct { |
|
96 | typedef struct { | |
97 | PyObject_HEAD |
|
97 | PyObject_HEAD | |
98 |
|
98 | |||
99 | ZstdCompressor* compressor; |
|
99 | ZstdCompressor* compressor; | |
100 | ZSTD_outBuffer output; |
|
100 | ZSTD_outBuffer output; | |
101 | int finished; |
|
101 | int finished; | |
102 | } ZstdCompressionObj; |
|
102 | } ZstdCompressionObj; | |
103 |
|
103 | |||
104 | extern PyTypeObject ZstdCompressionObjType; |
|
104 | extern PyTypeObject ZstdCompressionObjType; | |
105 |
|
105 | |||
106 | typedef struct { |
|
106 | typedef struct { | |
107 | PyObject_HEAD |
|
107 | PyObject_HEAD | |
108 |
|
108 | |||
109 | ZstdCompressor* compressor; |
|
109 | ZstdCompressor* compressor; | |
110 | PyObject* writer; |
|
110 | PyObject* writer; | |
111 | ZSTD_outBuffer output; |
|
111 | ZSTD_outBuffer output; | |
112 | size_t outSize; |
|
112 | size_t outSize; | |
113 | int entered; |
|
113 | int entered; | |
114 | int closed; |
|
114 | int closed; | |
115 | int writeReturnRead; |
|
115 | int writeReturnRead; | |
116 | unsigned long long bytesCompressed; |
|
116 | unsigned long long bytesCompressed; | |
117 | } ZstdCompressionWriter; |
|
117 | } ZstdCompressionWriter; | |
118 |
|
118 | |||
119 | extern PyTypeObject ZstdCompressionWriterType; |
|
119 | extern PyTypeObject ZstdCompressionWriterType; | |
120 |
|
120 | |||
121 | typedef struct { |
|
121 | typedef struct { | |
122 | PyObject_HEAD |
|
122 | PyObject_HEAD | |
123 |
|
123 | |||
124 | ZstdCompressor* compressor; |
|
124 | ZstdCompressor* compressor; | |
125 | PyObject* reader; |
|
125 | PyObject* reader; | |
126 | Py_buffer buffer; |
|
126 | Py_buffer buffer; | |
127 | Py_ssize_t bufferOffset; |
|
127 | Py_ssize_t bufferOffset; | |
128 | size_t inSize; |
|
128 | size_t inSize; | |
129 | size_t outSize; |
|
129 | size_t outSize; | |
130 |
|
130 | |||
131 | ZSTD_inBuffer input; |
|
131 | ZSTD_inBuffer input; | |
132 | ZSTD_outBuffer output; |
|
132 | ZSTD_outBuffer output; | |
133 | int finishedOutput; |
|
133 | int finishedOutput; | |
134 | int finishedInput; |
|
134 | int finishedInput; | |
135 | PyObject* readResult; |
|
135 | PyObject* readResult; | |
136 | } ZstdCompressorIterator; |
|
136 | } ZstdCompressorIterator; | |
137 |
|
137 | |||
138 | extern PyTypeObject ZstdCompressorIteratorType; |
|
138 | extern PyTypeObject ZstdCompressorIteratorType; | |
139 |
|
139 | |||
140 | typedef struct { |
|
140 | typedef struct { | |
141 | PyObject_HEAD |
|
141 | PyObject_HEAD | |
142 |
|
142 | |||
143 | ZstdCompressor* compressor; |
|
143 | ZstdCompressor* compressor; | |
144 | PyObject* reader; |
|
144 | PyObject* reader; | |
145 | Py_buffer buffer; |
|
145 | Py_buffer buffer; | |
146 | size_t readSize; |
|
146 | size_t readSize; | |
147 |
|
147 | |||
148 | int entered; |
|
148 | int entered; | |
149 | int closed; |
|
149 | int closed; | |
150 | unsigned long long bytesCompressed; |
|
150 | unsigned long long bytesCompressed; | |
151 |
|
151 | |||
152 | ZSTD_inBuffer input; |
|
152 | ZSTD_inBuffer input; | |
153 | ZSTD_outBuffer output; |
|
153 | ZSTD_outBuffer output; | |
154 | int finishedInput; |
|
154 | int finishedInput; | |
155 | int finishedOutput; |
|
155 | int finishedOutput; | |
156 | PyObject* readResult; |
|
156 | PyObject* readResult; | |
157 | } ZstdCompressionReader; |
|
157 | } ZstdCompressionReader; | |
158 |
|
158 | |||
159 | extern PyTypeObject ZstdCompressionReaderType; |
|
159 | extern PyTypeObject ZstdCompressionReaderType; | |
160 |
|
160 | |||
161 | typedef struct { |
|
161 | typedef struct { | |
162 | PyObject_HEAD |
|
162 | PyObject_HEAD | |
163 |
|
163 | |||
164 | ZstdCompressor* compressor; |
|
164 | ZstdCompressor* compressor; | |
165 | ZSTD_inBuffer input; |
|
165 | ZSTD_inBuffer input; | |
166 | ZSTD_outBuffer output; |
|
166 | ZSTD_outBuffer output; | |
167 | Py_buffer inBuffer; |
|
167 | Py_buffer inBuffer; | |
168 | int finished; |
|
168 | int finished; | |
169 | size_t chunkSize; |
|
169 | size_t chunkSize; | |
170 | } ZstdCompressionChunker; |
|
170 | } ZstdCompressionChunker; | |
171 |
|
171 | |||
172 | extern PyTypeObject ZstdCompressionChunkerType; |
|
172 | extern PyTypeObject ZstdCompressionChunkerType; | |
173 |
|
173 | |||
174 | typedef enum { |
|
174 | typedef enum { | |
175 | compressionchunker_mode_normal, |
|
175 | compressionchunker_mode_normal, | |
176 | compressionchunker_mode_flush, |
|
176 | compressionchunker_mode_flush, | |
177 | compressionchunker_mode_finish, |
|
177 | compressionchunker_mode_finish, | |
178 | } CompressionChunkerMode; |
|
178 | } CompressionChunkerMode; | |
179 |
|
179 | |||
180 | typedef struct { |
|
180 | typedef struct { | |
181 | PyObject_HEAD |
|
181 | PyObject_HEAD | |
182 |
|
182 | |||
183 | ZstdCompressionChunker* chunker; |
|
183 | ZstdCompressionChunker* chunker; | |
184 | CompressionChunkerMode mode; |
|
184 | CompressionChunkerMode mode; | |
185 | } ZstdCompressionChunkerIterator; |
|
185 | } ZstdCompressionChunkerIterator; | |
186 |
|
186 | |||
187 | extern PyTypeObject ZstdCompressionChunkerIteratorType; |
|
187 | extern PyTypeObject ZstdCompressionChunkerIteratorType; | |
188 |
|
188 | |||
189 | typedef struct { |
|
189 | typedef struct { | |
190 | PyObject_HEAD |
|
190 | PyObject_HEAD | |
191 |
|
191 | |||
192 | ZSTD_DCtx* dctx; |
|
192 | ZSTD_DCtx* dctx; | |
193 | ZstdCompressionDict* dict; |
|
193 | ZstdCompressionDict* dict; | |
194 | size_t maxWindowSize; |
|
194 | size_t maxWindowSize; | |
195 | ZSTD_format_e format; |
|
195 | ZSTD_format_e format; | |
196 | } ZstdDecompressor; |
|
196 | } ZstdDecompressor; | |
197 |
|
197 | |||
198 | extern PyTypeObject ZstdDecompressorType; |
|
198 | extern PyTypeObject ZstdDecompressorType; | |
199 |
|
199 | |||
200 | typedef struct { |
|
200 | typedef struct { | |
201 | PyObject_HEAD |
|
201 | PyObject_HEAD | |
202 |
|
202 | |||
203 | ZstdDecompressor* decompressor; |
|
203 | ZstdDecompressor* decompressor; | |
204 | size_t outSize; |
|
204 | size_t outSize; | |
205 | int finished; |
|
205 | int finished; | |
206 | } ZstdDecompressionObj; |
|
206 | } ZstdDecompressionObj; | |
207 |
|
207 | |||
208 | extern PyTypeObject ZstdDecompressionObjType; |
|
208 | extern PyTypeObject ZstdDecompressionObjType; | |
209 |
|
209 | |||
210 | typedef struct { |
|
210 | typedef struct { | |
211 | PyObject_HEAD |
|
211 | PyObject_HEAD | |
212 |
|
212 | |||
213 | /* Parent decompressor to which this object is associated. */ |
|
213 | /* Parent decompressor to which this object is associated. */ | |
214 | ZstdDecompressor* decompressor; |
|
214 | ZstdDecompressor* decompressor; | |
215 | /* Object to read() from (if reading from a stream). */ |
|
215 | /* Object to read() from (if reading from a stream). */ | |
216 | PyObject* reader; |
|
216 | PyObject* reader; | |
217 | /* Size for read() operations on reader. */ |
|
217 | /* Size for read() operations on reader. */ | |
218 | size_t readSize; |
|
218 | size_t readSize; | |
219 | /* Whether a read() can return data spanning multiple zstd frames. */ |
|
219 | /* Whether a read() can return data spanning multiple zstd frames. */ | |
220 | int readAcrossFrames; |
|
220 | int readAcrossFrames; | |
221 | /* Buffer to read from (if reading from a buffer). */ |
|
221 | /* Buffer to read from (if reading from a buffer). */ | |
222 | Py_buffer buffer; |
|
222 | Py_buffer buffer; | |
223 |
|
223 | |||
224 | /* Whether the context manager is active. */ |
|
224 | /* Whether the context manager is active. */ | |
225 | int entered; |
|
225 | int entered; | |
226 | /* Whether we've closed the stream. */ |
|
226 | /* Whether we've closed the stream. */ | |
227 | int closed; |
|
227 | int closed; | |
228 |
|
228 | |||
229 | /* Number of bytes decompressed and returned to user. */ |
|
229 | /* Number of bytes decompressed and returned to user. */ | |
230 | unsigned long long bytesDecompressed; |
|
230 | unsigned long long bytesDecompressed; | |
231 |
|
231 | |||
232 | /* Tracks data going into decompressor. */ |
|
232 | /* Tracks data going into decompressor. */ | |
233 | ZSTD_inBuffer input; |
|
233 | ZSTD_inBuffer input; | |
234 |
|
234 | |||
235 | /* Holds output from read() operation on reader. */ |
|
235 | /* Holds output from read() operation on reader. */ | |
236 | PyObject* readResult; |
|
236 | PyObject* readResult; | |
237 |
|
237 | |||
238 | /* Whether all input has been sent to the decompressor. */ |
|
238 | /* Whether all input has been sent to the decompressor. */ | |
239 | int finishedInput; |
|
239 | int finishedInput; | |
240 | /* Whether all output has been flushed from the decompressor. */ |
|
240 | /* Whether all output has been flushed from the decompressor. */ | |
241 | int finishedOutput; |
|
241 | int finishedOutput; | |
242 | } ZstdDecompressionReader; |
|
242 | } ZstdDecompressionReader; | |
243 |
|
243 | |||
244 | extern PyTypeObject ZstdDecompressionReaderType; |
|
244 | extern PyTypeObject ZstdDecompressionReaderType; | |
245 |
|
245 | |||
246 | typedef struct { |
|
246 | typedef struct { | |
247 | PyObject_HEAD |
|
247 | PyObject_HEAD | |
248 |
|
248 | |||
249 | ZstdDecompressor* decompressor; |
|
249 | ZstdDecompressor* decompressor; | |
250 | PyObject* writer; |
|
250 | PyObject* writer; | |
251 | size_t outSize; |
|
251 | size_t outSize; | |
252 | int entered; |
|
252 | int entered; | |
253 | int closed; |
|
253 | int closed; | |
254 | int writeReturnRead; |
|
254 | int writeReturnRead; | |
255 | } ZstdDecompressionWriter; |
|
255 | } ZstdDecompressionWriter; | |
256 |
|
256 | |||
257 | extern PyTypeObject ZstdDecompressionWriterType; |
|
257 | extern PyTypeObject ZstdDecompressionWriterType; | |
258 |
|
258 | |||
259 | typedef struct { |
|
259 | typedef struct { | |
260 | PyObject_HEAD |
|
260 | PyObject_HEAD | |
261 |
|
261 | |||
262 | ZstdDecompressor* decompressor; |
|
262 | ZstdDecompressor* decompressor; | |
263 | PyObject* reader; |
|
263 | PyObject* reader; | |
264 | Py_buffer buffer; |
|
264 | Py_buffer buffer; | |
265 | Py_ssize_t bufferOffset; |
|
265 | Py_ssize_t bufferOffset; | |
266 | size_t inSize; |
|
266 | size_t inSize; | |
267 | size_t outSize; |
|
267 | size_t outSize; | |
268 | size_t skipBytes; |
|
268 | size_t skipBytes; | |
269 | ZSTD_inBuffer input; |
|
269 | ZSTD_inBuffer input; | |
270 | ZSTD_outBuffer output; |
|
270 | ZSTD_outBuffer output; | |
271 | Py_ssize_t readCount; |
|
271 | Py_ssize_t readCount; | |
272 | int finishedInput; |
|
272 | int finishedInput; | |
273 | int finishedOutput; |
|
273 | int finishedOutput; | |
274 | } ZstdDecompressorIterator; |
|
274 | } ZstdDecompressorIterator; | |
275 |
|
275 | |||
276 | extern PyTypeObject ZstdDecompressorIteratorType; |
|
276 | extern PyTypeObject ZstdDecompressorIteratorType; | |
277 |
|
277 | |||
278 | typedef struct { |
|
278 | typedef struct { | |
279 | int errored; |
|
279 | int errored; | |
280 | PyObject* chunk; |
|
280 | PyObject* chunk; | |
281 | } DecompressorIteratorResult; |
|
281 | } DecompressorIteratorResult; | |
282 |
|
282 | |||
283 | typedef struct { |
|
283 | typedef struct { | |
284 | /* The public API is that these are 64-bit unsigned integers. So these can't |
|
284 | /* The public API is that these are 64-bit unsigned integers. So these can't | |
285 | * be size_t, even though values larger than SIZE_MAX or PY_SSIZE_T_MAX may |
|
285 | * be size_t, even though values larger than SIZE_MAX or PY_SSIZE_T_MAX may | |
286 | * be nonsensical for this platform. */ |
|
286 | * be nonsensical for this platform. */ | |
287 | unsigned long long offset; |
|
287 | unsigned long long offset; | |
288 | unsigned long long length; |
|
288 | unsigned long long length; | |
289 | } BufferSegment; |
|
289 | } BufferSegment; | |
290 |
|
290 | |||
291 | typedef struct { |
|
291 | typedef struct { | |
292 | PyObject_HEAD |
|
292 | PyObject_HEAD | |
293 |
|
293 | |||
294 | PyObject* parent; |
|
294 | PyObject* parent; | |
295 | BufferSegment* segments; |
|
295 | BufferSegment* segments; | |
296 | Py_ssize_t segmentCount; |
|
296 | Py_ssize_t segmentCount; | |
297 | } ZstdBufferSegments; |
|
297 | } ZstdBufferSegments; | |
298 |
|
298 | |||
299 | extern PyTypeObject ZstdBufferSegmentsType; |
|
299 | extern PyTypeObject ZstdBufferSegmentsType; | |
300 |
|
300 | |||
301 | typedef struct { |
|
301 | typedef struct { | |
302 | PyObject_HEAD |
|
302 | PyObject_HEAD | |
303 |
|
303 | |||
304 | PyObject* parent; |
|
304 | PyObject* parent; | |
305 | void* data; |
|
305 | void* data; | |
306 | Py_ssize_t dataSize; |
|
306 | Py_ssize_t dataSize; | |
307 | unsigned long long offset; |
|
307 | unsigned long long offset; | |
308 | } ZstdBufferSegment; |
|
308 | } ZstdBufferSegment; | |
309 |
|
309 | |||
310 | extern PyTypeObject ZstdBufferSegmentType; |
|
310 | extern PyTypeObject ZstdBufferSegmentType; | |
311 |
|
311 | |||
312 | typedef struct { |
|
312 | typedef struct { | |
313 | PyObject_HEAD |
|
313 | PyObject_HEAD | |
314 |
|
314 | |||
315 | Py_buffer parent; |
|
315 | Py_buffer parent; | |
316 | void* data; |
|
316 | void* data; | |
317 | unsigned long long dataSize; |
|
317 | unsigned long long dataSize; | |
318 | BufferSegment* segments; |
|
318 | BufferSegment* segments; | |
319 | Py_ssize_t segmentCount; |
|
319 | Py_ssize_t segmentCount; | |
320 | int useFree; |
|
320 | int useFree; | |
321 | } ZstdBufferWithSegments; |
|
321 | } ZstdBufferWithSegments; | |
322 |
|
322 | |||
323 | extern PyTypeObject ZstdBufferWithSegmentsType; |
|
323 | extern PyTypeObject ZstdBufferWithSegmentsType; | |
324 |
|
324 | |||
325 | /** |
|
325 | /** | |
326 | * An ordered collection of BufferWithSegments exposed as a squashed collection. |
|
326 | * An ordered collection of BufferWithSegments exposed as a squashed collection. | |
327 | * |
|
327 | * | |
328 | * This type provides a virtual view spanning multiple BufferWithSegments |
|
328 | * This type provides a virtual view spanning multiple BufferWithSegments | |
329 | * instances. It allows multiple instances to be "chained" together and |
|
329 | * instances. It allows multiple instances to be "chained" together and | |
330 | * exposed as a single collection. e.g. if there are 2 buffers holding |
|
330 | * exposed as a single collection. e.g. if there are 2 buffers holding | |
331 | * 10 segments each, then o[14] will access the 5th segment in the 2nd buffer. |
|
331 | * 10 segments each, then o[14] will access the 5th segment in the 2nd buffer. | |
332 | */ |
|
332 | */ | |
333 | typedef struct { |
|
333 | typedef struct { | |
334 | PyObject_HEAD |
|
334 | PyObject_HEAD | |
335 |
|
335 | |||
336 | /* An array of buffers that should be exposed through this instance. */ |
|
336 | /* An array of buffers that should be exposed through this instance. */ | |
337 | ZstdBufferWithSegments** buffers; |
|
337 | ZstdBufferWithSegments** buffers; | |
338 | /* Number of elements in buffers array. */ |
|
338 | /* Number of elements in buffers array. */ | |
339 | Py_ssize_t bufferCount; |
|
339 | Py_ssize_t bufferCount; | |
340 | /* Array of first offset in each buffer instance. 0th entry corresponds |
|
340 | /* Array of first offset in each buffer instance. 0th entry corresponds | |
341 | to number of elements in the 0th buffer. 1st entry corresponds to the |
|
341 | to number of elements in the 0th buffer. 1st entry corresponds to the | |
342 | sum of elements in 0th and 1st buffers. */ |
|
342 | sum of elements in 0th and 1st buffers. */ | |
343 | Py_ssize_t* firstElements; |
|
343 | Py_ssize_t* firstElements; | |
344 | } ZstdBufferWithSegmentsCollection; |
|
344 | } ZstdBufferWithSegmentsCollection; | |
345 |
|
345 | |||
346 | extern PyTypeObject ZstdBufferWithSegmentsCollectionType; |
|
346 | extern PyTypeObject ZstdBufferWithSegmentsCollectionType; | |
347 |
|
347 | |||
348 | int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); |
|
348 | int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); | |
349 | int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj); |
|
349 | int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj); | |
350 | int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams); |
|
350 | int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams); | |
351 | FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs); |
|
351 | FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs); | |
352 | int ensure_ddict(ZstdCompressionDict* dict); |
|
352 | int ensure_ddict(ZstdCompressionDict* dict); | |
353 | int ensure_dctx(ZstdDecompressor* decompressor, int loadDict); |
|
353 | int ensure_dctx(ZstdDecompressor* decompressor, int loadDict); | |
354 | ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs); |
|
354 | ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs); | |
355 | ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize); |
|
355 | ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize); | |
356 | Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*); |
|
356 | Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*); | |
357 | int cpu_count(void); |
|
357 | int cpu_count(void); | |
358 | size_t roundpow2(size_t); |
|
358 | size_t roundpow2(size_t); | |
359 | int safe_pybytes_resize(PyObject** obj, Py_ssize_t size); |
|
359 | int safe_pybytes_resize(PyObject** obj, Py_ssize_t size); |
@@ -1,207 +1,225 b'' | |||||
1 | # Copyright (c) 2016-present, Gregory Szorc |
|
1 | # Copyright (c) 2016-present, Gregory Szorc | |
2 | # All rights reserved. |
|
2 | # All rights reserved. | |
3 | # |
|
3 | # | |
4 | # This software may be modified and distributed under the terms |
|
4 | # This software may be modified and distributed under the terms | |
5 | # of the BSD license. See the LICENSE file for details. |
|
5 | # of the BSD license. See the LICENSE file for details. | |
6 |
|
6 | |||
7 | from __future__ import absolute_import |
|
7 | from __future__ import absolute_import | |
8 |
|
8 | |||
9 | import cffi |
|
9 | import cffi | |
10 | import distutils.ccompiler |
|
10 | import distutils.ccompiler | |
11 | import os |
|
11 | import os | |
12 | import re |
|
12 | import re | |
13 | import subprocess |
|
13 | import subprocess | |
14 | import tempfile |
|
14 | import tempfile | |
15 |
|
15 | |||
16 |
|
16 | |||
17 | HERE = os.path.abspath(os.path.dirname(__file__)) |
|
17 | HERE = os.path.abspath(os.path.dirname(__file__)) | |
18 |
|
18 | |||
19 | SOURCES = ['zstd/%s' % p for p in ( |
|
19 | SOURCES = [ | |
20 | 'common/debug.c', |
|
20 | "zstd/%s" % p | |
21 | 'common/entropy_common.c', |
|
21 | for p in ( | |
22 | 'common/error_private.c', |
|
22 | "common/debug.c", | |
23 | 'common/fse_decompress.c', |
|
23 | "common/entropy_common.c", | |
24 | 'common/pool.c', |
|
24 | "common/error_private.c", | |
25 | 'common/threading.c', |
|
25 | "common/fse_decompress.c", | |
26 |
|
|
26 | "common/pool.c", | |
27 |
|
|
27 | "common/threading.c", | |
28 | 'compress/fse_compress.c', |
|
28 | "common/xxhash.c", | |
29 | 'compress/hist.c', |
|
29 | "common/zstd_common.c", | |
30 |
|
|
30 | "compress/fse_compress.c", | |
31 |
|
|
31 | "compress/hist.c", | |
32 |
|
|
32 | "compress/huf_compress.c", | |
33 |
|
|
33 | "compress/zstd_compress.c", | |
34 |
|
|
34 | "compress/zstd_compress_literals.c", | |
35 |
|
|
35 | "compress/zstd_compress_sequences.c", | |
36 |
|
|
36 | "compress/zstd_double_fast.c", | |
37 |
|
|
37 | "compress/zstd_fast.c", | |
38 |
|
|
38 | "compress/zstd_lazy.c", | |
39 |
|
|
39 | "compress/zstd_ldm.c", | |
40 | 'decompress/huf_decompress.c', |
|
40 | "compress/zstd_opt.c", | |
41 |
|
|
41 | "compress/zstdmt_compress.c", | |
42 |
|
|
42 | "decompress/huf_decompress.c", | |
43 |
|
|
43 | "decompress/zstd_ddict.c", | |
44 | 'dictBuilder/cover.c', |
|
44 | "decompress/zstd_decompress.c", | |
45 | 'dictBuilder/fastcover.c', |
|
45 | "decompress/zstd_decompress_block.c", | |
46 |
|
|
46 | "dictBuilder/cover.c", | |
47 |
|
|
47 | "dictBuilder/fastcover.c", | |
48 | )] |
|
48 | "dictBuilder/divsufsort.c", | |
|
49 | "dictBuilder/zdict.c", | |||
|
50 | ) | |||
|
51 | ] | |||
49 |
|
52 | |||
50 | # Headers whose preprocessed output will be fed into cdef(). |
|
53 | # Headers whose preprocessed output will be fed into cdef(). | |
51 | HEADERS = [os.path.join(HERE, 'zstd', *p) for p in ( |
|
54 | HEADERS = [ | |
52 | ('zstd.h',), |
|
55 | os.path.join(HERE, "zstd", *p) for p in (("zstd.h",), ("dictBuilder", "zdict.h"),) | |
53 | ('dictBuilder', 'zdict.h'), |
|
56 | ] | |
54 | )] |
|
|||
55 |
|
57 | |||
56 | INCLUDE_DIRS = [os.path.join(HERE, d) for d in ( |
|
58 | INCLUDE_DIRS = [ | |
57 | 'zstd', |
|
59 | os.path.join(HERE, d) | |
58 | 'zstd/common', |
|
60 | for d in ( | |
59 | 'zstd/compress', |
|
61 | "zstd", | |
60 | 'zstd/decompress', |
|
62 | "zstd/common", | |
61 | 'zstd/dictBuilder', |
|
63 | "zstd/compress", | |
62 | )] |
|
64 | "zstd/decompress", | |
|
65 | "zstd/dictBuilder", | |||
|
66 | ) | |||
|
67 | ] | |||
63 |
|
68 | |||
64 | # cffi can't parse some of the primitives in zstd.h. So we invoke the |
|
69 | # cffi can't parse some of the primitives in zstd.h. So we invoke the | |
65 | # preprocessor and feed its output into cffi. |
|
70 | # preprocessor and feed its output into cffi. | |
66 | compiler = distutils.ccompiler.new_compiler() |
|
71 | compiler = distutils.ccompiler.new_compiler() | |
67 |
|
72 | |||
68 | # Needed for MSVC. |
|
73 | # Needed for MSVC. | |
69 |
if hasattr(compiler, |
|
74 | if hasattr(compiler, "initialize"): | |
70 | compiler.initialize() |
|
75 | compiler.initialize() | |
71 |
|
76 | |||
72 | # Distutils doesn't set compiler.preprocessor, so invoke the preprocessor |
|
77 | # Distutils doesn't set compiler.preprocessor, so invoke the preprocessor | |
73 | # manually. |
|
78 | # manually. | |
74 |
if compiler.compiler_type == |
|
79 | if compiler.compiler_type == "unix": | |
75 |
args = list(compiler.executables[ |
|
80 | args = list(compiler.executables["compiler"]) | |
76 |
args.extend( |
|
81 | args.extend( | |
77 | '-E', |
|
82 | ["-E", "-DZSTD_STATIC_LINKING_ONLY", "-DZDICT_STATIC_LINKING_ONLY",] | |
78 | '-DZSTD_STATIC_LINKING_ONLY', |
|
83 | ) | |
79 | '-DZDICT_STATIC_LINKING_ONLY', |
|
84 | elif compiler.compiler_type == "msvc": | |
80 | ]) |
|
|||
81 | elif compiler.compiler_type == 'msvc': |
|
|||
82 | args = [compiler.cc] |
|
85 | args = [compiler.cc] | |
83 |
args.extend( |
|
86 | args.extend( | |
84 | '/EP', |
|
87 | ["/EP", "/DZSTD_STATIC_LINKING_ONLY", "/DZDICT_STATIC_LINKING_ONLY",] | |
85 | '/DZSTD_STATIC_LINKING_ONLY', |
|
88 | ) | |
86 | '/DZDICT_STATIC_LINKING_ONLY', |
|
|||
87 | ]) |
|
|||
88 | else: |
|
89 | else: | |
89 |
raise Exception( |
|
90 | raise Exception("unsupported compiler type: %s" % compiler.compiler_type) | |
|
91 | ||||
90 |
|
92 | |||
91 | def preprocess(path): |
|
93 | def preprocess(path): | |
92 |
with open(path, |
|
94 | with open(path, "rb") as fh: | |
93 | lines = [] |
|
95 | lines = [] | |
94 | it = iter(fh) |
|
96 | it = iter(fh) | |
95 |
|
97 | |||
96 | for l in it: |
|
98 | for l in it: | |
97 | # zstd.h includes <stddef.h>, which is also included by cffi's |
|
99 | # zstd.h includes <stddef.h>, which is also included by cffi's | |
98 | # boilerplate. This can lead to duplicate declarations. So we strip |
|
100 | # boilerplate. This can lead to duplicate declarations. So we strip | |
99 | # this include from the preprocessor invocation. |
|
101 | # this include from the preprocessor invocation. | |
100 | # |
|
102 | # | |
101 | # The same things happens for including zstd.h, so give it the same |
|
103 | # The same things happens for including zstd.h, so give it the same | |
102 | # treatment. |
|
104 | # treatment. | |
103 | # |
|
105 | # | |
104 | # We define ZSTD_STATIC_LINKING_ONLY, which is redundant with the inline |
|
106 | # We define ZSTD_STATIC_LINKING_ONLY, which is redundant with the inline | |
105 | # #define in zstdmt_compress.h and results in a compiler warning. So drop |
|
107 | # #define in zstdmt_compress.h and results in a compiler warning. So drop | |
106 | # the inline #define. |
|
108 | # the inline #define. | |
107 |
if l.startswith( |
|
109 | if l.startswith( | |
108 | b'#include "zstd.h"', |
|
110 | ( | |
109 | b'#define ZSTD_STATIC_LINKING_ONLY')): |
|
111 | b"#include <stddef.h>", | |
|
112 | b'#include "zstd.h"', | |||
|
113 | b"#define ZSTD_STATIC_LINKING_ONLY", | |||
|
114 | ) | |||
|
115 | ): | |||
110 | continue |
|
116 | continue | |
111 |
|
117 | |||
|
118 | # The preprocessor environment on Windows doesn't define include | |||
|
119 | # paths, so the #include of limits.h fails. We work around this | |||
|
120 | # by removing that import and defining INT_MAX ourselves. This is | |||
|
121 | # a bit hacky. But it gets the job done. | |||
|
122 | # TODO make limits.h work on Windows so we ensure INT_MAX is | |||
|
123 | # correct. | |||
|
124 | if l.startswith(b"#include <limits.h>"): | |||
|
125 | l = b"#define INT_MAX 2147483647\n" | |||
|
126 | ||||
112 | # ZSTDLIB_API may not be defined if we dropped zstd.h. It isn't |
|
127 | # ZSTDLIB_API may not be defined if we dropped zstd.h. It isn't | |
113 | # important so just filter it out. |
|
128 | # important so just filter it out. | |
114 |
if l.startswith(b |
|
129 | if l.startswith(b"ZSTDLIB_API"): | |
115 |
l = l[len(b |
|
130 | l = l[len(b"ZSTDLIB_API ") :] | |
116 |
|
131 | |||
117 | lines.append(l) |
|
132 | lines.append(l) | |
118 |
|
133 | |||
119 |
fd, input_file = tempfile.mkstemp(suffix= |
|
134 | fd, input_file = tempfile.mkstemp(suffix=".h") | |
120 |
os.write(fd, b |
|
135 | os.write(fd, b"".join(lines)) | |
121 | os.close(fd) |
|
136 | os.close(fd) | |
122 |
|
137 | |||
123 | try: |
|
138 | try: | |
124 | env = dict(os.environ) |
|
139 | env = dict(os.environ) | |
125 |
if getattr(compiler, |
|
140 | if getattr(compiler, "_paths", None): | |
126 |
env[ |
|
141 | env["PATH"] = compiler._paths | |
127 | process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE, |
|
142 | process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE, env=env) | |
128 | env=env) |
|
|||
129 | output = process.communicate()[0] |
|
143 | output = process.communicate()[0] | |
130 | ret = process.poll() |
|
144 | ret = process.poll() | |
131 | if ret: |
|
145 | if ret: | |
132 |
raise Exception( |
|
146 | raise Exception("preprocessor exited with error") | |
133 |
|
147 | |||
134 | return output |
|
148 | return output | |
135 | finally: |
|
149 | finally: | |
136 | os.unlink(input_file) |
|
150 | os.unlink(input_file) | |
137 |
|
151 | |||
138 |
|
152 | |||
139 | def normalize_output(output): |
|
153 | def normalize_output(output): | |
140 | lines = [] |
|
154 | lines = [] | |
141 | for line in output.splitlines(): |
|
155 | for line in output.splitlines(): | |
142 | # CFFI's parser doesn't like __attribute__ on UNIX compilers. |
|
156 | # CFFI's parser doesn't like __attribute__ on UNIX compilers. | |
143 | if line.startswith(b'__attribute__ ((visibility ("default"))) '): |
|
157 | if line.startswith(b'__attribute__ ((visibility ("default"))) '): | |
144 | line = line[len(b'__attribute__ ((visibility ("default"))) '):] |
|
158 | line = line[len(b'__attribute__ ((visibility ("default"))) ') :] | |
145 |
|
159 | |||
146 |
if line.startswith(b |
|
160 | if line.startswith(b"__attribute__((deprecated("): | |
147 | continue |
|
161 | continue | |
148 |
elif b |
|
162 | elif b"__declspec(deprecated(" in line: | |
149 | continue |
|
163 | continue | |
150 |
|
164 | |||
151 | lines.append(line) |
|
165 | lines.append(line) | |
152 |
|
166 | |||
153 |
return b |
|
167 | return b"\n".join(lines) | |
154 |
|
168 | |||
155 |
|
169 | |||
156 | ffi = cffi.FFI() |
|
170 | ffi = cffi.FFI() | |
157 | # zstd.h uses a possible undefined MIN(). Define it until |
|
171 | # zstd.h uses a possible undefined MIN(). Define it until | |
158 | # https://github.com/facebook/zstd/issues/976 is fixed. |
|
172 | # https://github.com/facebook/zstd/issues/976 is fixed. | |
159 | # *_DISABLE_DEPRECATE_WARNINGS prevents the compiler from emitting a warning |
|
173 | # *_DISABLE_DEPRECATE_WARNINGS prevents the compiler from emitting a warning | |
160 | # when cffi uses the function. Since we statically link against zstd, even |
|
174 | # when cffi uses the function. Since we statically link against zstd, even | |
161 | # if we use the deprecated functions it shouldn't be a huge problem. |
|
175 | # if we use the deprecated functions it shouldn't be a huge problem. | |
162 |
ffi.set_source( |
|
176 | ffi.set_source( | |
|
177 | "_zstd_cffi", | |||
|
178 | """ | |||
163 |
|
|
179 | #define MIN(a,b) ((a)<(b) ? (a) : (b)) | |
164 |
|
|
180 | #define ZSTD_STATIC_LINKING_ONLY | |
165 |
|
|
181 | #include <zstd.h> | |
166 |
|
|
182 | #define ZDICT_STATIC_LINKING_ONLY | |
167 |
|
|
183 | #define ZDICT_DISABLE_DEPRECATE_WARNINGS | |
168 |
|
|
184 | #include <zdict.h> | |
169 | ''', sources=SOURCES, |
|
185 | """, | |
170 | include_dirs=INCLUDE_DIRS, |
|
186 | sources=SOURCES, | |
171 | extra_compile_args=['-DZSTD_MULTITHREAD']) |
|
187 | include_dirs=INCLUDE_DIRS, | |
|
188 | extra_compile_args=["-DZSTD_MULTITHREAD"], | |||
|
189 | ) | |||
172 |
|
190 | |||
173 |
DEFINE = re.compile(b |
|
191 | DEFINE = re.compile(b"^\\#define ([a-zA-Z0-9_]+) ") | |
174 |
|
192 | |||
175 | sources = [] |
|
193 | sources = [] | |
176 |
|
194 | |||
177 | # Feed normalized preprocessor output for headers into the cdef parser. |
|
195 | # Feed normalized preprocessor output for headers into the cdef parser. | |
178 | for header in HEADERS: |
|
196 | for header in HEADERS: | |
179 | preprocessed = preprocess(header) |
|
197 | preprocessed = preprocess(header) | |
180 | sources.append(normalize_output(preprocessed)) |
|
198 | sources.append(normalize_output(preprocessed)) | |
181 |
|
199 | |||
182 | # #define's are effectively erased as part of going through preprocessor. |
|
200 | # #define's are effectively erased as part of going through preprocessor. | |
183 | # So perform a manual pass to re-add those to the cdef source. |
|
201 | # So perform a manual pass to re-add those to the cdef source. | |
184 |
with open(header, |
|
202 | with open(header, "rb") as fh: | |
185 | for line in fh: |
|
203 | for line in fh: | |
186 | line = line.strip() |
|
204 | line = line.strip() | |
187 | m = DEFINE.match(line) |
|
205 | m = DEFINE.match(line) | |
188 | if not m: |
|
206 | if not m: | |
189 | continue |
|
207 | continue | |
190 |
|
208 | |||
191 |
if m.group(1) == b |
|
209 | if m.group(1) == b"ZSTD_STATIC_LINKING_ONLY": | |
192 | continue |
|
210 | continue | |
193 |
|
211 | |||
194 | # The parser doesn't like some constants with complex values. |
|
212 | # The parser doesn't like some constants with complex values. | |
195 |
if m.group(1) in (b |
|
213 | if m.group(1) in (b"ZSTD_LIB_VERSION", b"ZSTD_VERSION_STRING"): | |
196 | continue |
|
214 | continue | |
197 |
|
215 | |||
198 | # The ... is magic syntax by the cdef parser to resolve the |
|
216 | # The ... is magic syntax by the cdef parser to resolve the | |
199 | # value at compile time. |
|
217 | # value at compile time. | |
200 |
sources.append(m.group(0) + b |
|
218 | sources.append(m.group(0) + b" ...") | |
201 |
|
219 | |||
202 |
cdeflines = b |
|
220 | cdeflines = b"\n".join(sources).splitlines() | |
203 | cdeflines = [l for l in cdeflines if l.strip()] |
|
221 | cdeflines = [l for l in cdeflines if l.strip()] | |
204 |
ffi.cdef(b |
|
222 | ffi.cdef(b"\n".join(cdeflines).decode("latin1")) | |
205 |
|
223 | |||
206 |
if __name__ == |
|
224 | if __name__ == "__main__": | |
207 | ffi.compile() |
|
225 | ffi.compile() |
@@ -1,112 +1,118 b'' | |||||
1 | #!/usr/bin/env python |
|
1 | #!/usr/bin/env python | |
2 | # Copyright (c) 2016-present, Gregory Szorc |
|
2 | # Copyright (c) 2016-present, Gregory Szorc | |
3 | # All rights reserved. |
|
3 | # All rights reserved. | |
4 | # |
|
4 | # | |
5 | # This software may be modified and distributed under the terms |
|
5 | # This software may be modified and distributed under the terms | |
6 | # of the BSD license. See the LICENSE file for details. |
|
6 | # of the BSD license. See the LICENSE file for details. | |
7 |
|
7 | |||
8 | from __future__ import print_function |
|
8 | from __future__ import print_function | |
9 |
|
9 | |||
10 | from distutils.version import LooseVersion |
|
10 | from distutils.version import LooseVersion | |
11 | import os |
|
11 | import os | |
12 | import sys |
|
12 | import sys | |
13 | from setuptools import setup |
|
13 | from setuptools import setup | |
14 |
|
14 | |||
15 | # Need change in 1.10 for ffi.from_buffer() to handle all buffer types |
|
15 | # Need change in 1.10 for ffi.from_buffer() to handle all buffer types | |
16 | # (like memoryview). |
|
16 | # (like memoryview). | |
17 | # Need feature in 1.11 for ffi.gc() to declare size of objects so we avoid |
|
17 | # Need feature in 1.11 for ffi.gc() to declare size of objects so we avoid | |
18 | # garbage collection pitfalls. |
|
18 | # garbage collection pitfalls. | |
19 |
MINIMUM_CFFI_VERSION = |
|
19 | MINIMUM_CFFI_VERSION = "1.11" | |
20 |
|
20 | |||
21 | try: |
|
21 | try: | |
22 | import cffi |
|
22 | import cffi | |
23 |
|
23 | |||
24 | # PyPy (and possibly other distros) have CFFI distributed as part of |
|
24 | # PyPy (and possibly other distros) have CFFI distributed as part of | |
25 | # them. The install_requires for CFFI below won't work. We need to sniff |
|
25 | # them. The install_requires for CFFI below won't work. We need to sniff | |
26 | # out the CFFI version here and reject CFFI if it is too old. |
|
26 | # out the CFFI version here and reject CFFI if it is too old. | |
27 | cffi_version = LooseVersion(cffi.__version__) |
|
27 | cffi_version = LooseVersion(cffi.__version__) | |
28 | if cffi_version < LooseVersion(MINIMUM_CFFI_VERSION): |
|
28 | if cffi_version < LooseVersion(MINIMUM_CFFI_VERSION): | |
29 | print('CFFI 1.11 or newer required (%s found); ' |
|
29 | print( | |
30 | 'not building CFFI backend' % cffi_version, |
|
30 | "CFFI 1.11 or newer required (%s found); " | |
31 | file=sys.stderr) |
|
31 | "not building CFFI backend" % cffi_version, | |
|
32 | file=sys.stderr, | |||
|
33 | ) | |||
32 | cffi = None |
|
34 | cffi = None | |
33 |
|
35 | |||
34 | except ImportError: |
|
36 | except ImportError: | |
35 | cffi = None |
|
37 | cffi = None | |
36 |
|
38 | |||
37 | import setup_zstd |
|
39 | import setup_zstd | |
38 |
|
40 | |||
39 | SUPPORT_LEGACY = False |
|
41 | SUPPORT_LEGACY = False | |
40 | SYSTEM_ZSTD = False |
|
42 | SYSTEM_ZSTD = False | |
41 | WARNINGS_AS_ERRORS = False |
|
43 | WARNINGS_AS_ERRORS = False | |
42 |
|
44 | |||
43 |
if os.environ.get( |
|
45 | if os.environ.get("ZSTD_WARNINGS_AS_ERRORS", ""): | |
44 | WARNINGS_AS_ERRORS = True |
|
46 | WARNINGS_AS_ERRORS = True | |
45 |
|
47 | |||
46 |
if |
|
48 | if "--legacy" in sys.argv: | |
47 | SUPPORT_LEGACY = True |
|
49 | SUPPORT_LEGACY = True | |
48 |
sys.argv.remove( |
|
50 | sys.argv.remove("--legacy") | |
49 |
|
51 | |||
50 |
if |
|
52 | if "--system-zstd" in sys.argv: | |
51 | SYSTEM_ZSTD = True |
|
53 | SYSTEM_ZSTD = True | |
52 |
sys.argv.remove( |
|
54 | sys.argv.remove("--system-zstd") | |
53 |
|
55 | |||
54 |
if |
|
56 | if "--warnings-as-errors" in sys.argv: | |
55 | WARNINGS_AS_ERRORS = True |
|
57 | WARNINGS_AS_ERRORS = True | |
56 |
sys.argv.remove( |
|
58 | sys.argv.remove("--warning-as-errors") | |
57 |
|
59 | |||
58 | # Code for obtaining the Extension instance is in its own module to |
|
60 | # Code for obtaining the Extension instance is in its own module to | |
59 | # facilitate reuse in other projects. |
|
61 | # facilitate reuse in other projects. | |
60 | extensions = [ |
|
62 | extensions = [ | |
61 |
setup_zstd.get_c_extension( |
|
63 | setup_zstd.get_c_extension( | |
62 | support_legacy=SUPPORT_LEGACY, |
|
64 | name="zstd", | |
63 | system_zstd=SYSTEM_ZSTD, |
|
65 | support_legacy=SUPPORT_LEGACY, | |
64 | warnings_as_errors=WARNINGS_AS_ERRORS), |
|
66 | system_zstd=SYSTEM_ZSTD, | |
|
67 | warnings_as_errors=WARNINGS_AS_ERRORS, | |||
|
68 | ), | |||
65 | ] |
|
69 | ] | |
66 |
|
70 | |||
67 | install_requires = [] |
|
71 | install_requires = [] | |
68 |
|
72 | |||
69 | if cffi: |
|
73 | if cffi: | |
70 | import make_cffi |
|
74 | import make_cffi | |
|
75 | ||||
71 | extensions.append(make_cffi.ffi.distutils_extension()) |
|
76 | extensions.append(make_cffi.ffi.distutils_extension()) | |
72 |
install_requires.append( |
|
77 | install_requires.append("cffi>=%s" % MINIMUM_CFFI_VERSION) | |
73 |
|
78 | |||
74 | version = None |
|
79 | version = None | |
75 |
|
80 | |||
76 |
with open( |
|
81 | with open("c-ext/python-zstandard.h", "r") as fh: | |
77 | for line in fh: |
|
82 | for line in fh: | |
78 |
if not line.startswith( |
|
83 | if not line.startswith("#define PYTHON_ZSTANDARD_VERSION"): | |
79 | continue |
|
84 | continue | |
80 |
|
85 | |||
81 | version = line.split()[2][1:-1] |
|
86 | version = line.split()[2][1:-1] | |
82 | break |
|
87 | break | |
83 |
|
88 | |||
84 | if not version: |
|
89 | if not version: | |
85 |
raise Exception( |
|
90 | raise Exception("could not resolve package version; " "this should never happen") | |
86 | 'this should never happen') |
|
|||
87 |
|
91 | |||
88 | setup( |
|
92 | setup( | |
89 |
name= |
|
93 | name="zstandard", | |
90 | version=version, |
|
94 | version=version, | |
91 |
description= |
|
95 | description="Zstandard bindings for Python", | |
92 |
long_description=open( |
|
96 | long_description=open("README.rst", "r").read(), | |
93 |
url= |
|
97 | url="https://github.com/indygreg/python-zstandard", | |
94 |
author= |
|
98 | author="Gregory Szorc", | |
95 |
author_email= |
|
99 | author_email="gregory.szorc@gmail.com", | |
96 |
license= |
|
100 | license="BSD", | |
97 | classifiers=[ |
|
101 | classifiers=[ | |
98 |
|
|
102 | "Development Status :: 4 - Beta", | |
99 |
|
|
103 | "Intended Audience :: Developers", | |
100 |
|
|
104 | "License :: OSI Approved :: BSD License", | |
101 |
|
|
105 | "Programming Language :: C", | |
102 |
|
|
106 | "Programming Language :: Python :: 2.7", | |
103 |
|
|
107 | "Programming Language :: Python :: 3.5", | |
104 |
|
|
108 | "Programming Language :: Python :: 3.6", | |
105 |
|
|
109 | "Programming Language :: Python :: 3.7", | |
|
110 | "Programming Language :: Python :: 3.8", | |||
106 | ], |
|
111 | ], | |
107 |
keywords= |
|
112 | keywords="zstandard zstd compression", | |
108 |
packages=[ |
|
113 | packages=["zstandard"], | |
109 | ext_modules=extensions, |
|
114 | ext_modules=extensions, | |
110 |
test_suite= |
|
115 | test_suite="tests", | |
111 | install_requires=install_requires, |
|
116 | install_requires=install_requires, | |
|
117 | tests_require=["hypothesis"], | |||
112 | ) |
|
118 | ) |
@@ -1,192 +1,206 b'' | |||||
1 | # Copyright (c) 2016-present, Gregory Szorc |
|
1 | # Copyright (c) 2016-present, Gregory Szorc | |
2 | # All rights reserved. |
|
2 | # All rights reserved. | |
3 | # |
|
3 | # | |
4 | # This software may be modified and distributed under the terms |
|
4 | # This software may be modified and distributed under the terms | |
5 | # of the BSD license. See the LICENSE file for details. |
|
5 | # of the BSD license. See the LICENSE file for details. | |
6 |
|
6 | |||
7 | import distutils.ccompiler |
|
7 | import distutils.ccompiler | |
8 | import os |
|
8 | import os | |
9 |
|
9 | |||
10 | from distutils.extension import Extension |
|
10 | from distutils.extension import Extension | |
11 |
|
11 | |||
12 |
|
12 | |||
13 | zstd_sources = ['zstd/%s' % p for p in ( |
|
13 | zstd_sources = [ | |
14 | 'common/debug.c', |
|
14 | "zstd/%s" % p | |
15 | 'common/entropy_common.c', |
|
15 | for p in ( | |
16 | 'common/error_private.c', |
|
16 | "common/debug.c", | |
17 | 'common/fse_decompress.c', |
|
17 | "common/entropy_common.c", | |
18 | 'common/pool.c', |
|
18 | "common/error_private.c", | |
19 | 'common/threading.c', |
|
19 | "common/fse_decompress.c", | |
20 |
|
|
20 | "common/pool.c", | |
21 |
|
|
21 | "common/threading.c", | |
22 | 'compress/fse_compress.c', |
|
22 | "common/xxhash.c", | |
23 | 'compress/hist.c', |
|
23 | "common/zstd_common.c", | |
24 |
|
|
24 | "compress/fse_compress.c", | |
25 | 'compress/zstd_compress_literals.c', |
|
25 | "compress/hist.c", | |
26 |
|
|
26 | "compress/huf_compress.c", | |
27 |
|
|
27 | "compress/zstd_compress_literals.c", | |
28 |
|
|
28 | "compress/zstd_compress_sequences.c", | |
29 |
|
|
29 | "compress/zstd_compress.c", | |
30 |
|
|
30 | "compress/zstd_double_fast.c", | |
31 |
|
|
31 | "compress/zstd_fast.c", | |
32 |
|
|
32 | "compress/zstd_lazy.c", | |
33 |
|
|
33 | "compress/zstd_ldm.c", | |
34 | 'decompress/huf_decompress.c', |
|
34 | "compress/zstd_opt.c", | |
35 |
|
|
35 | "compress/zstdmt_compress.c", | |
36 |
|
|
36 | "decompress/huf_decompress.c", | |
37 |
|
|
37 | "decompress/zstd_ddict.c", | |
38 | 'dictBuilder/cover.c', |
|
38 | "decompress/zstd_decompress.c", | |
39 | 'dictBuilder/divsufsort.c', |
|
39 | "decompress/zstd_decompress_block.c", | |
40 |
|
|
40 | "dictBuilder/cover.c", | |
41 |
|
|
41 | "dictBuilder/divsufsort.c", | |
42 | )] |
|
42 | "dictBuilder/fastcover.c", | |
|
43 | "dictBuilder/zdict.c", | |||
|
44 | ) | |||
|
45 | ] | |||
43 |
|
46 | |||
44 |
zstd_sources_legacy = [ |
|
47 | zstd_sources_legacy = [ | |
45 | 'deprecated/zbuff_common.c', |
|
48 | "zstd/%s" % p | |
46 | 'deprecated/zbuff_compress.c', |
|
49 | for p in ( | |
47 |
|
|
50 | "deprecated/zbuff_common.c", | |
48 | 'legacy/zstd_v01.c', |
|
51 | "deprecated/zbuff_compress.c", | |
49 | 'legacy/zstd_v02.c', |
|
52 | "deprecated/zbuff_decompress.c", | |
50 |
|
|
53 | "legacy/zstd_v01.c", | |
51 |
|
|
54 | "legacy/zstd_v02.c", | |
52 |
|
|
55 | "legacy/zstd_v03.c", | |
53 |
|
|
56 | "legacy/zstd_v04.c", | |
54 |
|
|
57 | "legacy/zstd_v05.c", | |
55 | )] |
|
58 | "legacy/zstd_v06.c", | |
|
59 | "legacy/zstd_v07.c", | |||
|
60 | ) | |||
|
61 | ] | |||
56 |
|
62 | |||
57 | zstd_includes = [ |
|
63 | zstd_includes = [ | |
58 |
|
|
64 | "zstd", | |
59 |
|
|
65 | "zstd/common", | |
60 |
|
|
66 | "zstd/compress", | |
61 |
|
|
67 | "zstd/decompress", | |
62 |
|
|
68 | "zstd/dictBuilder", | |
63 | ] |
|
69 | ] | |
64 |
|
70 | |||
65 | zstd_includes_legacy = [ |
|
71 | zstd_includes_legacy = [ | |
66 |
|
|
72 | "zstd/deprecated", | |
67 |
|
|
73 | "zstd/legacy", | |
68 | ] |
|
74 | ] | |
69 |
|
75 | |||
70 | ext_includes = [ |
|
76 | ext_includes = [ | |
71 |
|
|
77 | "c-ext", | |
72 |
|
|
78 | "zstd/common", | |
73 | ] |
|
79 | ] | |
74 |
|
80 | |||
75 | ext_sources = [ |
|
81 | ext_sources = [ | |
76 |
|
|
82 | "zstd/common/error_private.c", | |
77 |
|
|
83 | "zstd/common/pool.c", | |
78 | 'zstd.c', |
|
84 | "zstd/common/threading.c", | |
79 | 'c-ext/bufferutil.c', |
|
85 | "zstd/common/zstd_common.c", | |
80 | 'c-ext/compressiondict.c', |
|
86 | "zstd.c", | |
81 | 'c-ext/compressobj.c', |
|
87 | "c-ext/bufferutil.c", | |
82 |
|
|
88 | "c-ext/compressiondict.c", | |
83 |
|
|
89 | "c-ext/compressobj.c", | |
84 |
|
|
90 | "c-ext/compressor.c", | |
85 |
|
|
91 | "c-ext/compressoriterator.c", | |
86 |
|
|
92 | "c-ext/compressionchunker.c", | |
87 |
|
|
93 | "c-ext/compressionparams.c", | |
88 |
|
|
94 | "c-ext/compressionreader.c", | |
89 |
|
|
95 | "c-ext/compressionwriter.c", | |
90 |
|
|
96 | "c-ext/constants.c", | |
91 |
|
|
97 | "c-ext/decompressobj.c", | |
92 |
|
|
98 | "c-ext/decompressor.c", | |
93 |
|
|
99 | "c-ext/decompressoriterator.c", | |
94 | 'c-ext/frameparams.c', |
|
100 | "c-ext/decompressionreader.c", | |
|
101 | "c-ext/decompressionwriter.c", | |||
|
102 | "c-ext/frameparams.c", | |||
95 | ] |
|
103 | ] | |
96 |
|
104 | |||
97 | zstd_depends = [ |
|
105 | zstd_depends = [ | |
98 |
|
|
106 | "c-ext/python-zstandard.h", | |
99 | ] |
|
107 | ] | |
100 |
|
108 | |||
101 |
|
109 | |||
102 | def get_c_extension(support_legacy=False, system_zstd=False, name='zstd', |
|
110 | def get_c_extension( | |
103 | warnings_as_errors=False, root=None): |
|
111 | support_legacy=False, | |
|
112 | system_zstd=False, | |||
|
113 | name="zstd", | |||
|
114 | warnings_as_errors=False, | |||
|
115 | root=None, | |||
|
116 | ): | |||
104 | """Obtain a distutils.extension.Extension for the C extension. |
|
117 | """Obtain a distutils.extension.Extension for the C extension. | |
105 |
|
118 | |||
106 | ``support_legacy`` controls whether to compile in legacy zstd format support. |
|
119 | ``support_legacy`` controls whether to compile in legacy zstd format support. | |
107 |
|
120 | |||
108 | ``system_zstd`` controls whether to compile against the system zstd library. |
|
121 | ``system_zstd`` controls whether to compile against the system zstd library. | |
109 | For this to work, the system zstd library and headers must match what |
|
122 | For this to work, the system zstd library and headers must match what | |
110 | python-zstandard is coded against exactly. |
|
123 | python-zstandard is coded against exactly. | |
111 |
|
124 | |||
112 | ``name`` is the module name of the C extension to produce. |
|
125 | ``name`` is the module name of the C extension to produce. | |
113 |
|
126 | |||
114 | ``warnings_as_errors`` controls whether compiler warnings are turned into |
|
127 | ``warnings_as_errors`` controls whether compiler warnings are turned into | |
115 | compiler errors. |
|
128 | compiler errors. | |
116 |
|
129 | |||
117 | ``root`` defines a root path that source should be computed as relative |
|
130 | ``root`` defines a root path that source should be computed as relative | |
118 | to. This should be the directory with the main ``setup.py`` that is |
|
131 | to. This should be the directory with the main ``setup.py`` that is | |
119 | being invoked. If not defined, paths will be relative to this file. |
|
132 | being invoked. If not defined, paths will be relative to this file. | |
120 | """ |
|
133 | """ | |
121 | actual_root = os.path.abspath(os.path.dirname(__file__)) |
|
134 | actual_root = os.path.abspath(os.path.dirname(__file__)) | |
122 | root = root or actual_root |
|
135 | root = root or actual_root | |
123 |
|
136 | |||
124 | sources = set([os.path.join(actual_root, p) for p in ext_sources]) |
|
137 | sources = set([os.path.join(actual_root, p) for p in ext_sources]) | |
125 | if not system_zstd: |
|
138 | if not system_zstd: | |
126 | sources.update([os.path.join(actual_root, p) for p in zstd_sources]) |
|
139 | sources.update([os.path.join(actual_root, p) for p in zstd_sources]) | |
127 | if support_legacy: |
|
140 | if support_legacy: | |
128 | sources.update([os.path.join(actual_root, p) |
|
141 | sources.update([os.path.join(actual_root, p) for p in zstd_sources_legacy]) | |
129 | for p in zstd_sources_legacy]) |
|
|||
130 | sources = list(sources) |
|
142 | sources = list(sources) | |
131 |
|
143 | |||
132 | include_dirs = set([os.path.join(actual_root, d) for d in ext_includes]) |
|
144 | include_dirs = set([os.path.join(actual_root, d) for d in ext_includes]) | |
133 | if not system_zstd: |
|
145 | if not system_zstd: | |
134 | include_dirs.update([os.path.join(actual_root, d) |
|
146 | include_dirs.update([os.path.join(actual_root, d) for d in zstd_includes]) | |
135 | for d in zstd_includes]) |
|
|||
136 | if support_legacy: |
|
147 | if support_legacy: | |
137 |
include_dirs.update( |
|
148 | include_dirs.update( | |
138 |
|
|
149 | [os.path.join(actual_root, d) for d in zstd_includes_legacy] | |
|
150 | ) | |||
139 | include_dirs = list(include_dirs) |
|
151 | include_dirs = list(include_dirs) | |
140 |
|
152 | |||
141 | depends = [os.path.join(actual_root, p) for p in zstd_depends] |
|
153 | depends = [os.path.join(actual_root, p) for p in zstd_depends] | |
142 |
|
154 | |||
143 | compiler = distutils.ccompiler.new_compiler() |
|
155 | compiler = distutils.ccompiler.new_compiler() | |
144 |
|
156 | |||
145 | # Needed for MSVC. |
|
157 | # Needed for MSVC. | |
146 |
if hasattr(compiler, |
|
158 | if hasattr(compiler, "initialize"): | |
147 | compiler.initialize() |
|
159 | compiler.initialize() | |
148 |
|
160 | |||
149 |
if compiler.compiler_type == |
|
161 | if compiler.compiler_type == "unix": | |
150 |
compiler_type = |
|
162 | compiler_type = "unix" | |
151 |
elif compiler.compiler_type == |
|
163 | elif compiler.compiler_type == "msvc": | |
152 |
compiler_type = |
|
164 | compiler_type = "msvc" | |
153 |
elif compiler.compiler_type == |
|
165 | elif compiler.compiler_type == "mingw32": | |
154 |
compiler_type = |
|
166 | compiler_type = "mingw32" | |
155 | else: |
|
167 | else: | |
156 |
raise Exception( |
|
168 | raise Exception("unhandled compiler type: %s" % compiler.compiler_type) | |
157 | compiler.compiler_type) |
|
|||
158 |
|
169 | |||
159 |
extra_args = [ |
|
170 | extra_args = ["-DZSTD_MULTITHREAD"] | |
160 |
|
171 | |||
161 | if not system_zstd: |
|
172 | if not system_zstd: | |
162 |
extra_args.append( |
|
173 | extra_args.append("-DZSTDLIB_VISIBILITY=") | |
163 |
extra_args.append( |
|
174 | extra_args.append("-DZDICTLIB_VISIBILITY=") | |
164 |
extra_args.append( |
|
175 | extra_args.append("-DZSTDERRORLIB_VISIBILITY=") | |
165 |
|
176 | |||
166 |
if compiler_type == |
|
177 | if compiler_type == "unix": | |
167 |
extra_args.append( |
|
178 | extra_args.append("-fvisibility=hidden") | |
168 |
|
179 | |||
169 | if not system_zstd and support_legacy: |
|
180 | if not system_zstd and support_legacy: | |
170 |
extra_args.append( |
|
181 | extra_args.append("-DZSTD_LEGACY_SUPPORT=1") | |
171 |
|
182 | |||
172 | if warnings_as_errors: |
|
183 | if warnings_as_errors: | |
173 |
if compiler_type in ( |
|
184 | if compiler_type in ("unix", "mingw32"): | |
174 |
extra_args.append( |
|
185 | extra_args.append("-Werror") | |
175 |
elif compiler_type == |
|
186 | elif compiler_type == "msvc": | |
176 |
extra_args.append( |
|
187 | extra_args.append("/WX") | |
177 | else: |
|
188 | else: | |
178 | assert False |
|
189 | assert False | |
179 |
|
190 | |||
180 |
libraries = [ |
|
191 | libraries = ["zstd"] if system_zstd else [] | |
181 |
|
192 | |||
182 | # Python 3.7 doesn't like absolute paths. So normalize to relative. |
|
193 | # Python 3.7 doesn't like absolute paths. So normalize to relative. | |
183 | sources = [os.path.relpath(p, root) for p in sources] |
|
194 | sources = [os.path.relpath(p, root) for p in sources] | |
184 | include_dirs = [os.path.relpath(p, root) for p in include_dirs] |
|
195 | include_dirs = [os.path.relpath(p, root) for p in include_dirs] | |
185 | depends = [os.path.relpath(p, root) for p in depends] |
|
196 | depends = [os.path.relpath(p, root) for p in depends] | |
186 |
|
197 | |||
187 | # TODO compile with optimizations. |
|
198 | # TODO compile with optimizations. | |
188 |
return Extension( |
|
199 | return Extension( | |
189 | include_dirs=include_dirs, |
|
200 | name, | |
190 | depends=depends, |
|
201 | sources, | |
191 | extra_compile_args=extra_args, |
|
202 | include_dirs=include_dirs, | |
192 | libraries=libraries) |
|
203 | depends=depends, | |
|
204 | extra_compile_args=extra_args, | |||
|
205 | libraries=libraries, | |||
|
206 | ) |
@@ -1,185 +1,197 b'' | |||||
1 | import imp |
|
1 | import imp | |
2 | import inspect |
|
2 | import inspect | |
3 | import io |
|
3 | import io | |
4 | import os |
|
4 | import os | |
5 | import types |
|
5 | import types | |
|
6 | import unittest | |||
6 |
|
7 | |||
7 | try: |
|
8 | try: | |
8 | import hypothesis |
|
9 | import hypothesis | |
9 | except ImportError: |
|
10 | except ImportError: | |
10 | hypothesis = None |
|
11 | hypothesis = None | |
11 |
|
12 | |||
12 |
|
13 | |||
|
14 | class TestCase(unittest.TestCase): | |||
|
15 | if not getattr(unittest.TestCase, "assertRaisesRegex", False): | |||
|
16 | assertRaisesRegex = unittest.TestCase.assertRaisesRegexp | |||
|
17 | ||||
|
18 | ||||
13 | def make_cffi(cls): |
|
19 | def make_cffi(cls): | |
14 | """Decorator to add CFFI versions of each test method.""" |
|
20 | """Decorator to add CFFI versions of each test method.""" | |
15 |
|
21 | |||
16 | # The module containing this class definition should |
|
22 | # The module containing this class definition should | |
17 | # `import zstandard as zstd`. Otherwise things may blow up. |
|
23 | # `import zstandard as zstd`. Otherwise things may blow up. | |
18 | mod = inspect.getmodule(cls) |
|
24 | mod = inspect.getmodule(cls) | |
19 |
if not hasattr(mod, |
|
25 | if not hasattr(mod, "zstd"): | |
20 | raise Exception('test module does not contain "zstd" symbol') |
|
26 | raise Exception('test module does not contain "zstd" symbol') | |
21 |
|
27 | |||
22 |
if not hasattr(mod.zstd, |
|
28 | if not hasattr(mod.zstd, "backend"): | |
23 | raise Exception('zstd symbol does not have "backend" attribute; did ' |
|
29 | raise Exception( | |
24 | 'you `import zstandard as zstd`?') |
|
30 | 'zstd symbol does not have "backend" attribute; did ' | |
|
31 | "you `import zstandard as zstd`?" | |||
|
32 | ) | |||
25 |
|
33 | |||
26 | # If `import zstandard` already chose the cffi backend, there is nothing |
|
34 | # If `import zstandard` already chose the cffi backend, there is nothing | |
27 | # for us to do: we only add the cffi variation if the default backend |
|
35 | # for us to do: we only add the cffi variation if the default backend | |
28 | # is the C extension. |
|
36 | # is the C extension. | |
29 |
if mod.zstd.backend == |
|
37 | if mod.zstd.backend == "cffi": | |
30 | return cls |
|
38 | return cls | |
31 |
|
39 | |||
32 | old_env = dict(os.environ) |
|
40 | old_env = dict(os.environ) | |
33 |
os.environ[ |
|
41 | os.environ["PYTHON_ZSTANDARD_IMPORT_POLICY"] = "cffi" | |
34 | try: |
|
42 | try: | |
35 | try: |
|
43 | try: | |
36 |
mod_info = imp.find_module( |
|
44 | mod_info = imp.find_module("zstandard") | |
37 |
mod = imp.load_module( |
|
45 | mod = imp.load_module("zstandard_cffi", *mod_info) | |
38 | except ImportError: |
|
46 | except ImportError: | |
39 | return cls |
|
47 | return cls | |
40 | finally: |
|
48 | finally: | |
41 | os.environ.clear() |
|
49 | os.environ.clear() | |
42 | os.environ.update(old_env) |
|
50 | os.environ.update(old_env) | |
43 |
|
51 | |||
44 |
if mod.backend != |
|
52 | if mod.backend != "cffi": | |
45 |
raise Exception( |
|
53 | raise Exception("got the zstandard %s backend instead of cffi" % mod.backend) | |
46 |
|
54 | |||
47 | # If CFFI version is available, dynamically construct test methods |
|
55 | # If CFFI version is available, dynamically construct test methods | |
48 | # that use it. |
|
56 | # that use it. | |
49 |
|
57 | |||
50 | for attr in dir(cls): |
|
58 | for attr in dir(cls): | |
51 | fn = getattr(cls, attr) |
|
59 | fn = getattr(cls, attr) | |
52 | if not inspect.ismethod(fn) and not inspect.isfunction(fn): |
|
60 | if not inspect.ismethod(fn) and not inspect.isfunction(fn): | |
53 | continue |
|
61 | continue | |
54 |
|
62 | |||
55 |
if not fn.__name__.startswith( |
|
63 | if not fn.__name__.startswith("test_"): | |
56 | continue |
|
64 | continue | |
57 |
|
65 | |||
58 |
name = |
|
66 | name = "%s_cffi" % fn.__name__ | |
59 |
|
67 | |||
60 | # Replace the "zstd" symbol with the CFFI module instance. Then copy |
|
68 | # Replace the "zstd" symbol with the CFFI module instance. Then copy | |
61 | # the function object and install it in a new attribute. |
|
69 | # the function object and install it in a new attribute. | |
62 | if isinstance(fn, types.FunctionType): |
|
70 | if isinstance(fn, types.FunctionType): | |
63 | globs = dict(fn.__globals__) |
|
71 | globs = dict(fn.__globals__) | |
64 |
globs[ |
|
72 | globs["zstd"] = mod | |
65 |
new_fn = types.FunctionType( |
|
73 | new_fn = types.FunctionType( | |
66 |
|
|
74 | fn.__code__, globs, name, fn.__defaults__, fn.__closure__ | |
|
75 | ) | |||
67 | new_method = new_fn |
|
76 | new_method = new_fn | |
68 | else: |
|
77 | else: | |
69 | globs = dict(fn.__func__.func_globals) |
|
78 | globs = dict(fn.__func__.func_globals) | |
70 |
globs[ |
|
79 | globs["zstd"] = mod | |
71 |
new_fn = types.FunctionType( |
|
80 | new_fn = types.FunctionType( | |
72 |
|
|
81 | fn.__func__.func_code, | |
73 | fn.__func__.func_closure) |
|
82 | globs, | |
74 | new_method = types.UnboundMethodType(new_fn, fn.im_self, |
|
83 | name, | |
75 | fn.im_class) |
|
84 | fn.__func__.func_defaults, | |
|
85 | fn.__func__.func_closure, | |||
|
86 | ) | |||
|
87 | new_method = types.UnboundMethodType(new_fn, fn.im_self, fn.im_class) | |||
76 |
|
88 | |||
77 | setattr(cls, name, new_method) |
|
89 | setattr(cls, name, new_method) | |
78 |
|
90 | |||
79 | return cls |
|
91 | return cls | |
80 |
|
92 | |||
81 |
|
93 | |||
82 | class NonClosingBytesIO(io.BytesIO): |
|
94 | class NonClosingBytesIO(io.BytesIO): | |
83 | """BytesIO that saves the underlying buffer on close(). |
|
95 | """BytesIO that saves the underlying buffer on close(). | |
84 |
|
96 | |||
85 | This allows us to access written data after close(). |
|
97 | This allows us to access written data after close(). | |
86 | """ |
|
98 | """ | |
|
99 | ||||
87 | def __init__(self, *args, **kwargs): |
|
100 | def __init__(self, *args, **kwargs): | |
88 | super(NonClosingBytesIO, self).__init__(*args, **kwargs) |
|
101 | super(NonClosingBytesIO, self).__init__(*args, **kwargs) | |
89 | self._saved_buffer = None |
|
102 | self._saved_buffer = None | |
90 |
|
103 | |||
91 | def close(self): |
|
104 | def close(self): | |
92 | self._saved_buffer = self.getvalue() |
|
105 | self._saved_buffer = self.getvalue() | |
93 | return super(NonClosingBytesIO, self).close() |
|
106 | return super(NonClosingBytesIO, self).close() | |
94 |
|
107 | |||
95 | def getvalue(self): |
|
108 | def getvalue(self): | |
96 | if self.closed: |
|
109 | if self.closed: | |
97 | return self._saved_buffer |
|
110 | return self._saved_buffer | |
98 | else: |
|
111 | else: | |
99 | return super(NonClosingBytesIO, self).getvalue() |
|
112 | return super(NonClosingBytesIO, self).getvalue() | |
100 |
|
113 | |||
101 |
|
114 | |||
102 | class OpCountingBytesIO(NonClosingBytesIO): |
|
115 | class OpCountingBytesIO(NonClosingBytesIO): | |
103 | def __init__(self, *args, **kwargs): |
|
116 | def __init__(self, *args, **kwargs): | |
104 | self._flush_count = 0 |
|
117 | self._flush_count = 0 | |
105 | self._read_count = 0 |
|
118 | self._read_count = 0 | |
106 | self._write_count = 0 |
|
119 | self._write_count = 0 | |
107 | return super(OpCountingBytesIO, self).__init__(*args, **kwargs) |
|
120 | return super(OpCountingBytesIO, self).__init__(*args, **kwargs) | |
108 |
|
121 | |||
109 | def flush(self): |
|
122 | def flush(self): | |
110 | self._flush_count += 1 |
|
123 | self._flush_count += 1 | |
111 | return super(OpCountingBytesIO, self).flush() |
|
124 | return super(OpCountingBytesIO, self).flush() | |
112 |
|
125 | |||
113 | def read(self, *args): |
|
126 | def read(self, *args): | |
114 | self._read_count += 1 |
|
127 | self._read_count += 1 | |
115 | return super(OpCountingBytesIO, self).read(*args) |
|
128 | return super(OpCountingBytesIO, self).read(*args) | |
116 |
|
129 | |||
117 | def write(self, data): |
|
130 | def write(self, data): | |
118 | self._write_count += 1 |
|
131 | self._write_count += 1 | |
119 | return super(OpCountingBytesIO, self).write(data) |
|
132 | return super(OpCountingBytesIO, self).write(data) | |
120 |
|
133 | |||
121 |
|
134 | |||
122 | _source_files = [] |
|
135 | _source_files = [] | |
123 |
|
136 | |||
124 |
|
137 | |||
125 | def random_input_data(): |
|
138 | def random_input_data(): | |
126 | """Obtain the raw content of source files. |
|
139 | """Obtain the raw content of source files. | |
127 |
|
140 | |||
128 | This is used for generating "random" data to feed into fuzzing, since it is |
|
141 | This is used for generating "random" data to feed into fuzzing, since it is | |
129 | faster than random content generation. |
|
142 | faster than random content generation. | |
130 | """ |
|
143 | """ | |
131 | if _source_files: |
|
144 | if _source_files: | |
132 | return _source_files |
|
145 | return _source_files | |
133 |
|
146 | |||
134 | for root, dirs, files in os.walk(os.path.dirname(__file__)): |
|
147 | for root, dirs, files in os.walk(os.path.dirname(__file__)): | |
135 | dirs[:] = list(sorted(dirs)) |
|
148 | dirs[:] = list(sorted(dirs)) | |
136 | for f in sorted(files): |
|
149 | for f in sorted(files): | |
137 | try: |
|
150 | try: | |
138 |
with open(os.path.join(root, f), |
|
151 | with open(os.path.join(root, f), "rb") as fh: | |
139 | data = fh.read() |
|
152 | data = fh.read() | |
140 | if data: |
|
153 | if data: | |
141 | _source_files.append(data) |
|
154 | _source_files.append(data) | |
142 | except OSError: |
|
155 | except OSError: | |
143 | pass |
|
156 | pass | |
144 |
|
157 | |||
145 | # Also add some actual random data. |
|
158 | # Also add some actual random data. | |
146 | _source_files.append(os.urandom(100)) |
|
159 | _source_files.append(os.urandom(100)) | |
147 | _source_files.append(os.urandom(1000)) |
|
160 | _source_files.append(os.urandom(1000)) | |
148 | _source_files.append(os.urandom(10000)) |
|
161 | _source_files.append(os.urandom(10000)) | |
149 | _source_files.append(os.urandom(100000)) |
|
162 | _source_files.append(os.urandom(100000)) | |
150 | _source_files.append(os.urandom(1000000)) |
|
163 | _source_files.append(os.urandom(1000000)) | |
151 |
|
164 | |||
152 | return _source_files |
|
165 | return _source_files | |
153 |
|
166 | |||
154 |
|
167 | |||
155 | def generate_samples(): |
|
168 | def generate_samples(): | |
156 | inputs = [ |
|
169 | inputs = [ | |
157 |
b |
|
170 | b"foo", | |
158 |
b |
|
171 | b"bar", | |
159 |
b |
|
172 | b"abcdef", | |
160 |
b |
|
173 | b"sometext", | |
161 |
b |
|
174 | b"baz", | |
162 | ] |
|
175 | ] | |
163 |
|
176 | |||
164 | samples = [] |
|
177 | samples = [] | |
165 |
|
178 | |||
166 | for i in range(128): |
|
179 | for i in range(128): | |
167 | samples.append(inputs[i % 5]) |
|
180 | samples.append(inputs[i % 5]) | |
168 | samples.append(inputs[i % 5] * (i + 3)) |
|
181 | samples.append(inputs[i % 5] * (i + 3)) | |
169 | samples.append(inputs[-(i % 5)] * (i + 2)) |
|
182 | samples.append(inputs[-(i % 5)] * (i + 2)) | |
170 |
|
183 | |||
171 | return samples |
|
184 | return samples | |
172 |
|
185 | |||
173 |
|
186 | |||
174 | if hypothesis: |
|
187 | if hypothesis: | |
175 | default_settings = hypothesis.settings(deadline=10000) |
|
188 | default_settings = hypothesis.settings(deadline=10000) | |
176 |
hypothesis.settings.register_profile( |
|
189 | hypothesis.settings.register_profile("default", default_settings) | |
177 |
|
190 | |||
178 | ci_settings = hypothesis.settings(deadline=20000, max_examples=1000) |
|
191 | ci_settings = hypothesis.settings(deadline=20000, max_examples=1000) | |
179 |
hypothesis.settings.register_profile( |
|
192 | hypothesis.settings.register_profile("ci", ci_settings) | |
180 |
|
193 | |||
181 | expensive_settings = hypothesis.settings(deadline=None, max_examples=10000) |
|
194 | expensive_settings = hypothesis.settings(deadline=None, max_examples=10000) | |
182 |
hypothesis.settings.register_profile( |
|
195 | hypothesis.settings.register_profile("expensive", expensive_settings) | |
183 |
|
196 | |||
184 | hypothesis.settings.load_profile( |
|
197 | hypothesis.settings.load_profile(os.environ.get("HYPOTHESIS_PROFILE", "default")) | |
185 | os.environ.get('HYPOTHESIS_PROFILE', 'default')) |
|
@@ -1,135 +1,146 b'' | |||||
1 | import struct |
|
1 | import struct | |
2 | import unittest |
|
2 | import unittest | |
3 |
|
3 | |||
4 | import zstandard as zstd |
|
4 | import zstandard as zstd | |
5 |
|
5 | |||
6 | ss = struct.Struct('=QQ') |
|
6 | from .common import TestCase | |
|
7 | ||||
|
8 | ss = struct.Struct("=QQ") | |||
7 |
|
9 | |||
8 |
|
10 | |||
9 |
class TestBufferWithSegments( |
|
11 | class TestBufferWithSegments(TestCase): | |
10 | def test_arguments(self): |
|
12 | def test_arguments(self): | |
11 |
if not hasattr(zstd, |
|
13 | if not hasattr(zstd, "BufferWithSegments"): | |
12 |
self.skipTest( |
|
14 | self.skipTest("BufferWithSegments not available") | |
13 |
|
15 | |||
14 | with self.assertRaises(TypeError): |
|
16 | with self.assertRaises(TypeError): | |
15 | zstd.BufferWithSegments() |
|
17 | zstd.BufferWithSegments() | |
16 |
|
18 | |||
17 | with self.assertRaises(TypeError): |
|
19 | with self.assertRaises(TypeError): | |
18 |
zstd.BufferWithSegments(b |
|
20 | zstd.BufferWithSegments(b"foo") | |
19 |
|
21 | |||
20 | # Segments data should be a multiple of 16. |
|
22 | # Segments data should be a multiple of 16. | |
21 | with self.assertRaisesRegexp(ValueError, 'segments array size is not a multiple of 16'): |
|
23 | with self.assertRaisesRegex( | |
22 | zstd.BufferWithSegments(b'foo', b'\x00\x00') |
|
24 | ValueError, "segments array size is not a multiple of 16" | |
|
25 | ): | |||
|
26 | zstd.BufferWithSegments(b"foo", b"\x00\x00") | |||
23 |
|
27 | |||
24 | def test_invalid_offset(self): |
|
28 | def test_invalid_offset(self): | |
25 |
if not hasattr(zstd, |
|
29 | if not hasattr(zstd, "BufferWithSegments"): | |
26 |
self.skipTest( |
|
30 | self.skipTest("BufferWithSegments not available") | |
27 |
|
31 | |||
28 | with self.assertRaisesRegexp(ValueError, 'offset within segments array references memory'): |
|
32 | with self.assertRaisesRegex( | |
29 | zstd.BufferWithSegments(b'foo', ss.pack(0, 4)) |
|
33 | ValueError, "offset within segments array references memory" | |
|
34 | ): | |||
|
35 | zstd.BufferWithSegments(b"foo", ss.pack(0, 4)) | |||
30 |
|
36 | |||
31 | def test_invalid_getitem(self): |
|
37 | def test_invalid_getitem(self): | |
32 |
if not hasattr(zstd, |
|
38 | if not hasattr(zstd, "BufferWithSegments"): | |
33 |
self.skipTest( |
|
39 | self.skipTest("BufferWithSegments not available") | |
34 |
|
40 | |||
35 |
b = zstd.BufferWithSegments(b |
|
41 | b = zstd.BufferWithSegments(b"foo", ss.pack(0, 3)) | |
36 |
|
42 | |||
37 |
with self.assertRaisesRegex |
|
43 | with self.assertRaisesRegex(IndexError, "offset must be non-negative"): | |
38 | test = b[-10] |
|
44 | test = b[-10] | |
39 |
|
45 | |||
40 |
with self.assertRaisesRegex |
|
46 | with self.assertRaisesRegex(IndexError, "offset must be less than 1"): | |
41 | test = b[1] |
|
47 | test = b[1] | |
42 |
|
48 | |||
43 |
with self.assertRaisesRegex |
|
49 | with self.assertRaisesRegex(IndexError, "offset must be less than 1"): | |
44 | test = b[2] |
|
50 | test = b[2] | |
45 |
|
51 | |||
46 | def test_single(self): |
|
52 | def test_single(self): | |
47 |
if not hasattr(zstd, |
|
53 | if not hasattr(zstd, "BufferWithSegments"): | |
48 |
self.skipTest( |
|
54 | self.skipTest("BufferWithSegments not available") | |
49 |
|
55 | |||
50 |
b = zstd.BufferWithSegments(b |
|
56 | b = zstd.BufferWithSegments(b"foo", ss.pack(0, 3)) | |
51 | self.assertEqual(len(b), 1) |
|
57 | self.assertEqual(len(b), 1) | |
52 | self.assertEqual(b.size, 3) |
|
58 | self.assertEqual(b.size, 3) | |
53 |
self.assertEqual(b.tobytes(), b |
|
59 | self.assertEqual(b.tobytes(), b"foo") | |
54 |
|
60 | |||
55 | self.assertEqual(len(b[0]), 3) |
|
61 | self.assertEqual(len(b[0]), 3) | |
56 | self.assertEqual(b[0].offset, 0) |
|
62 | self.assertEqual(b[0].offset, 0) | |
57 |
self.assertEqual(b[0].tobytes(), b |
|
63 | self.assertEqual(b[0].tobytes(), b"foo") | |
58 |
|
64 | |||
59 | def test_multiple(self): |
|
65 | def test_multiple(self): | |
60 |
if not hasattr(zstd, |
|
66 | if not hasattr(zstd, "BufferWithSegments"): | |
61 |
self.skipTest( |
|
67 | self.skipTest("BufferWithSegments not available") | |
62 |
|
68 | |||
63 |
b = zstd.BufferWithSegments( |
|
69 | b = zstd.BufferWithSegments( | |
64 | ss.pack(3, 4), |
|
70 | b"foofooxfooxy", b"".join([ss.pack(0, 3), ss.pack(3, 4), ss.pack(7, 5)]) | |
65 | ss.pack(7, 5)])) |
|
71 | ) | |
66 | self.assertEqual(len(b), 3) |
|
72 | self.assertEqual(len(b), 3) | |
67 | self.assertEqual(b.size, 12) |
|
73 | self.assertEqual(b.size, 12) | |
68 |
self.assertEqual(b.tobytes(), b |
|
74 | self.assertEqual(b.tobytes(), b"foofooxfooxy") | |
69 |
|
75 | |||
70 |
self.assertEqual(b[0].tobytes(), b |
|
76 | self.assertEqual(b[0].tobytes(), b"foo") | |
71 |
self.assertEqual(b[1].tobytes(), b |
|
77 | self.assertEqual(b[1].tobytes(), b"foox") | |
72 |
self.assertEqual(b[2].tobytes(), b |
|
78 | self.assertEqual(b[2].tobytes(), b"fooxy") | |
73 |
|
79 | |||
74 |
|
80 | |||
75 |
class TestBufferWithSegmentsCollection( |
|
81 | class TestBufferWithSegmentsCollection(TestCase): | |
76 | def test_empty_constructor(self): |
|
82 | def test_empty_constructor(self): | |
77 |
if not hasattr(zstd, |
|
83 | if not hasattr(zstd, "BufferWithSegmentsCollection"): | |
78 |
self.skipTest( |
|
84 | self.skipTest("BufferWithSegmentsCollection not available") | |
79 |
|
85 | |||
80 |
with self.assertRaisesRegex |
|
86 | with self.assertRaisesRegex(ValueError, "must pass at least 1 argument"): | |
81 | zstd.BufferWithSegmentsCollection() |
|
87 | zstd.BufferWithSegmentsCollection() | |
82 |
|
88 | |||
83 | def test_argument_validation(self): |
|
89 | def test_argument_validation(self): | |
84 |
if not hasattr(zstd, |
|
90 | if not hasattr(zstd, "BufferWithSegmentsCollection"): | |
85 |
self.skipTest( |
|
91 | self.skipTest("BufferWithSegmentsCollection not available") | |
86 |
|
92 | |||
87 |
with self.assertRaisesRegex |
|
93 | with self.assertRaisesRegex(TypeError, "arguments must be BufferWithSegments"): | |
88 | zstd.BufferWithSegmentsCollection(None) |
|
94 | zstd.BufferWithSegmentsCollection(None) | |
89 |
|
95 | |||
90 |
with self.assertRaisesRegex |
|
96 | with self.assertRaisesRegex(TypeError, "arguments must be BufferWithSegments"): | |
91 |
zstd.BufferWithSegmentsCollection( |
|
97 | zstd.BufferWithSegmentsCollection( | |
92 | None) |
|
98 | zstd.BufferWithSegments(b"foo", ss.pack(0, 3)), None | |
|
99 | ) | |||
93 |
|
100 | |||
94 | with self.assertRaisesRegexp(ValueError, 'ZstdBufferWithSegments cannot be empty'): |
|
101 | with self.assertRaisesRegex( | |
95 | zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b'', b'')) |
|
102 | ValueError, "ZstdBufferWithSegments cannot be empty" | |
|
103 | ): | |||
|
104 | zstd.BufferWithSegmentsCollection(zstd.BufferWithSegments(b"", b"")) | |||
96 |
|
105 | |||
97 | def test_length(self): |
|
106 | def test_length(self): | |
98 |
if not hasattr(zstd, |
|
107 | if not hasattr(zstd, "BufferWithSegmentsCollection"): | |
99 |
self.skipTest( |
|
108 | self.skipTest("BufferWithSegmentsCollection not available") | |
100 |
|
109 | |||
101 |
b1 = zstd.BufferWithSegments(b |
|
110 | b1 = zstd.BufferWithSegments(b"foo", ss.pack(0, 3)) | |
102 |
b2 = zstd.BufferWithSegments( |
|
111 | b2 = zstd.BufferWithSegments( | |
103 | ss.pack(3, 3)])) |
|
112 | b"barbaz", b"".join([ss.pack(0, 3), ss.pack(3, 3)]) | |
|
113 | ) | |||
104 |
|
114 | |||
105 | c = zstd.BufferWithSegmentsCollection(b1) |
|
115 | c = zstd.BufferWithSegmentsCollection(b1) | |
106 | self.assertEqual(len(c), 1) |
|
116 | self.assertEqual(len(c), 1) | |
107 | self.assertEqual(c.size(), 3) |
|
117 | self.assertEqual(c.size(), 3) | |
108 |
|
118 | |||
109 | c = zstd.BufferWithSegmentsCollection(b2) |
|
119 | c = zstd.BufferWithSegmentsCollection(b2) | |
110 | self.assertEqual(len(c), 2) |
|
120 | self.assertEqual(len(c), 2) | |
111 | self.assertEqual(c.size(), 6) |
|
121 | self.assertEqual(c.size(), 6) | |
112 |
|
122 | |||
113 | c = zstd.BufferWithSegmentsCollection(b1, b2) |
|
123 | c = zstd.BufferWithSegmentsCollection(b1, b2) | |
114 | self.assertEqual(len(c), 3) |
|
124 | self.assertEqual(len(c), 3) | |
115 | self.assertEqual(c.size(), 9) |
|
125 | self.assertEqual(c.size(), 9) | |
116 |
|
126 | |||
117 | def test_getitem(self): |
|
127 | def test_getitem(self): | |
118 |
if not hasattr(zstd, |
|
128 | if not hasattr(zstd, "BufferWithSegmentsCollection"): | |
119 |
self.skipTest( |
|
129 | self.skipTest("BufferWithSegmentsCollection not available") | |
120 |
|
130 | |||
121 |
b1 = zstd.BufferWithSegments(b |
|
131 | b1 = zstd.BufferWithSegments(b"foo", ss.pack(0, 3)) | |
122 |
b2 = zstd.BufferWithSegments( |
|
132 | b2 = zstd.BufferWithSegments( | |
123 | ss.pack(3, 3)])) |
|
133 | b"barbaz", b"".join([ss.pack(0, 3), ss.pack(3, 3)]) | |
|
134 | ) | |||
124 |
|
135 | |||
125 | c = zstd.BufferWithSegmentsCollection(b1, b2) |
|
136 | c = zstd.BufferWithSegmentsCollection(b1, b2) | |
126 |
|
137 | |||
127 |
with self.assertRaisesRegex |
|
138 | with self.assertRaisesRegex(IndexError, "offset must be less than 3"): | |
128 | c[3] |
|
139 | c[3] | |
129 |
|
140 | |||
130 |
with self.assertRaisesRegex |
|
141 | with self.assertRaisesRegex(IndexError, "offset must be less than 3"): | |
131 | c[4] |
|
142 | c[4] | |
132 |
|
143 | |||
133 |
self.assertEqual(c[0].tobytes(), b |
|
144 | self.assertEqual(c[0].tobytes(), b"foo") | |
134 |
self.assertEqual(c[1].tobytes(), b |
|
145 | self.assertEqual(c[1].tobytes(), b"bar") | |
135 |
self.assertEqual(c[2].tobytes(), b |
|
146 | self.assertEqual(c[2].tobytes(), b"baz") |
This diff has been collapsed as it changes many lines, (811 lines changed) Show them Hide them | |||||
@@ -1,1735 +1,1770 b'' | |||||
1 | import hashlib |
|
1 | import hashlib | |
2 | import io |
|
2 | import io | |
3 | import os |
|
3 | import os | |
4 | import struct |
|
4 | import struct | |
5 | import sys |
|
5 | import sys | |
6 | import tarfile |
|
6 | import tarfile | |
7 | import tempfile |
|
7 | import tempfile | |
8 | import unittest |
|
8 | import unittest | |
9 |
|
9 | |||
10 | import zstandard as zstd |
|
10 | import zstandard as zstd | |
11 |
|
11 | |||
12 | from .common import ( |
|
12 | from .common import ( | |
13 | make_cffi, |
|
13 | make_cffi, | |
14 | NonClosingBytesIO, |
|
14 | NonClosingBytesIO, | |
15 | OpCountingBytesIO, |
|
15 | OpCountingBytesIO, | |
|
16 | TestCase, | |||
16 | ) |
|
17 | ) | |
17 |
|
18 | |||
18 |
|
19 | |||
19 | if sys.version_info[0] >= 3: |
|
20 | if sys.version_info[0] >= 3: | |
20 | next = lambda it: it.__next__() |
|
21 | next = lambda it: it.__next__() | |
21 | else: |
|
22 | else: | |
22 | next = lambda it: it.next() |
|
23 | next = lambda it: it.next() | |
23 |
|
24 | |||
24 |
|
25 | |||
25 | def multithreaded_chunk_size(level, source_size=0): |
|
26 | def multithreaded_chunk_size(level, source_size=0): | |
26 | params = zstd.ZstdCompressionParameters.from_level(level, |
|
27 | params = zstd.ZstdCompressionParameters.from_level(level, source_size=source_size) | |
27 | source_size=source_size) |
|
|||
28 |
|
28 | |||
29 | return 1 << (params.window_log + 2) |
|
29 | return 1 << (params.window_log + 2) | |
30 |
|
30 | |||
31 |
|
31 | |||
32 | @make_cffi |
|
32 | @make_cffi | |
33 |
class TestCompressor( |
|
33 | class TestCompressor(TestCase): | |
34 | def test_level_bounds(self): |
|
34 | def test_level_bounds(self): | |
35 | with self.assertRaises(ValueError): |
|
35 | with self.assertRaises(ValueError): | |
36 | zstd.ZstdCompressor(level=23) |
|
36 | zstd.ZstdCompressor(level=23) | |
37 |
|
37 | |||
38 | def test_memory_size(self): |
|
38 | def test_memory_size(self): | |
39 | cctx = zstd.ZstdCompressor(level=1) |
|
39 | cctx = zstd.ZstdCompressor(level=1) | |
40 | self.assertGreater(cctx.memory_size(), 100) |
|
40 | self.assertGreater(cctx.memory_size(), 100) | |
41 |
|
41 | |||
42 |
|
42 | |||
43 | @make_cffi |
|
43 | @make_cffi | |
44 |
class TestCompressor_compress( |
|
44 | class TestCompressor_compress(TestCase): | |
45 | def test_compress_empty(self): |
|
45 | def test_compress_empty(self): | |
46 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
46 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |
47 |
result = cctx.compress(b |
|
47 | result = cctx.compress(b"") | |
48 |
self.assertEqual(result, b |
|
48 | self.assertEqual(result, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") | |
49 | params = zstd.get_frame_parameters(result) |
|
49 | params = zstd.get_frame_parameters(result) | |
50 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
50 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
51 | self.assertEqual(params.window_size, 524288) |
|
51 | self.assertEqual(params.window_size, 524288) | |
52 | self.assertEqual(params.dict_id, 0) |
|
52 | self.assertEqual(params.dict_id, 0) | |
53 | self.assertFalse(params.has_checksum, 0) |
|
53 | self.assertFalse(params.has_checksum, 0) | |
54 |
|
54 | |||
55 | cctx = zstd.ZstdCompressor() |
|
55 | cctx = zstd.ZstdCompressor() | |
56 |
result = cctx.compress(b |
|
56 | result = cctx.compress(b"") | |
57 |
self.assertEqual(result, b |
|
57 | self.assertEqual(result, b"\x28\xb5\x2f\xfd\x20\x00\x01\x00\x00") | |
58 | params = zstd.get_frame_parameters(result) |
|
58 | params = zstd.get_frame_parameters(result) | |
59 | self.assertEqual(params.content_size, 0) |
|
59 | self.assertEqual(params.content_size, 0) | |
60 |
|
60 | |||
61 | def test_input_types(self): |
|
61 | def test_input_types(self): | |
62 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
62 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |
63 |
expected = b |
|
63 | expected = b"\x28\xb5\x2f\xfd\x00\x00\x19\x00\x00\x66\x6f\x6f" | |
64 |
|
64 | |||
65 | mutable_array = bytearray(3) |
|
65 | mutable_array = bytearray(3) | |
66 |
mutable_array[:] = b |
|
66 | mutable_array[:] = b"foo" | |
67 |
|
67 | |||
68 | sources = [ |
|
68 | sources = [ | |
69 |
memoryview(b |
|
69 | memoryview(b"foo"), | |
70 |
bytearray(b |
|
70 | bytearray(b"foo"), | |
71 | mutable_array, |
|
71 | mutable_array, | |
72 | ] |
|
72 | ] | |
73 |
|
73 | |||
74 | for source in sources: |
|
74 | for source in sources: | |
75 | self.assertEqual(cctx.compress(source), expected) |
|
75 | self.assertEqual(cctx.compress(source), expected) | |
76 |
|
76 | |||
77 | def test_compress_large(self): |
|
77 | def test_compress_large(self): | |
78 | chunks = [] |
|
78 | chunks = [] | |
79 | for i in range(255): |
|
79 | for i in range(255): | |
80 |
chunks.append(struct.Struct( |
|
80 | chunks.append(struct.Struct(">B").pack(i) * 16384) | |
81 |
|
81 | |||
82 | cctx = zstd.ZstdCompressor(level=3, write_content_size=False) |
|
82 | cctx = zstd.ZstdCompressor(level=3, write_content_size=False) | |
83 |
result = cctx.compress(b |
|
83 | result = cctx.compress(b"".join(chunks)) | |
84 | self.assertEqual(len(result), 999) |
|
84 | self.assertEqual(len(result), 999) | |
85 |
self.assertEqual(result[0:4], b |
|
85 | self.assertEqual(result[0:4], b"\x28\xb5\x2f\xfd") | |
86 |
|
86 | |||
87 | # This matches the test for read_to_iter() below. |
|
87 | # This matches the test for read_to_iter() below. | |
88 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
88 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |
89 |
result = cctx.compress(b |
|
89 | result = cctx.compress(b"f" * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b"o") | |
90 | self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00' |
|
90 | self.assertEqual( | |
91 | b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0' |
|
91 | result, | |
92 | b'\x02\x09\x00\x00\x6f') |
|
92 | b"\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00" | |
|
93 | b"\x10\x66\x66\x01\x00\xfb\xff\x39\xc0" | |||
|
94 | b"\x02\x09\x00\x00\x6f", | |||
|
95 | ) | |||
93 |
|
96 | |||
94 | def test_negative_level(self): |
|
97 | def test_negative_level(self): | |
95 | cctx = zstd.ZstdCompressor(level=-4) |
|
98 | cctx = zstd.ZstdCompressor(level=-4) | |
96 |
result = cctx.compress(b |
|
99 | result = cctx.compress(b"foo" * 256) | |
97 |
|
100 | |||
98 | def test_no_magic(self): |
|
101 | def test_no_magic(self): | |
99 | params = zstd.ZstdCompressionParameters.from_level( |
|
102 | params = zstd.ZstdCompressionParameters.from_level(1, format=zstd.FORMAT_ZSTD1) | |
100 | 1, format=zstd.FORMAT_ZSTD1) |
|
|||
101 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
103 | cctx = zstd.ZstdCompressor(compression_params=params) | |
102 |
magic = cctx.compress(b |
|
104 | magic = cctx.compress(b"foobar") | |
103 |
|
105 | |||
104 | params = zstd.ZstdCompressionParameters.from_level( |
|
106 | params = zstd.ZstdCompressionParameters.from_level( | |
105 |
1, format=zstd.FORMAT_ZSTD1_MAGICLESS |
|
107 | 1, format=zstd.FORMAT_ZSTD1_MAGICLESS | |
|
108 | ) | |||
106 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
109 | cctx = zstd.ZstdCompressor(compression_params=params) | |
107 |
no_magic = cctx.compress(b |
|
110 | no_magic = cctx.compress(b"foobar") | |
108 |
|
111 | |||
109 |
self.assertEqual(magic[0:4], b |
|
112 | self.assertEqual(magic[0:4], b"\x28\xb5\x2f\xfd") | |
110 | self.assertEqual(magic[4:], no_magic) |
|
113 | self.assertEqual(magic[4:], no_magic) | |
111 |
|
114 | |||
112 | def test_write_checksum(self): |
|
115 | def test_write_checksum(self): | |
113 | cctx = zstd.ZstdCompressor(level=1) |
|
116 | cctx = zstd.ZstdCompressor(level=1) | |
114 |
no_checksum = cctx.compress(b |
|
117 | no_checksum = cctx.compress(b"foobar") | |
115 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) |
|
118 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) | |
116 |
with_checksum = cctx.compress(b |
|
119 | with_checksum = cctx.compress(b"foobar") | |
117 |
|
120 | |||
118 | self.assertEqual(len(with_checksum), len(no_checksum) + 4) |
|
121 | self.assertEqual(len(with_checksum), len(no_checksum) + 4) | |
119 |
|
122 | |||
120 | no_params = zstd.get_frame_parameters(no_checksum) |
|
123 | no_params = zstd.get_frame_parameters(no_checksum) | |
121 | with_params = zstd.get_frame_parameters(with_checksum) |
|
124 | with_params = zstd.get_frame_parameters(with_checksum) | |
122 |
|
125 | |||
123 | self.assertFalse(no_params.has_checksum) |
|
126 | self.assertFalse(no_params.has_checksum) | |
124 | self.assertTrue(with_params.has_checksum) |
|
127 | self.assertTrue(with_params.has_checksum) | |
125 |
|
128 | |||
126 | def test_write_content_size(self): |
|
129 | def test_write_content_size(self): | |
127 | cctx = zstd.ZstdCompressor(level=1) |
|
130 | cctx = zstd.ZstdCompressor(level=1) | |
128 |
with_size = cctx.compress(b |
|
131 | with_size = cctx.compress(b"foobar" * 256) | |
129 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
132 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |
130 |
no_size = cctx.compress(b |
|
133 | no_size = cctx.compress(b"foobar" * 256) | |
131 |
|
134 | |||
132 | self.assertEqual(len(with_size), len(no_size) + 1) |
|
135 | self.assertEqual(len(with_size), len(no_size) + 1) | |
133 |
|
136 | |||
134 | no_params = zstd.get_frame_parameters(no_size) |
|
137 | no_params = zstd.get_frame_parameters(no_size) | |
135 | with_params = zstd.get_frame_parameters(with_size) |
|
138 | with_params = zstd.get_frame_parameters(with_size) | |
136 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
139 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
137 | self.assertEqual(with_params.content_size, 1536) |
|
140 | self.assertEqual(with_params.content_size, 1536) | |
138 |
|
141 | |||
139 | def test_no_dict_id(self): |
|
142 | def test_no_dict_id(self): | |
140 | samples = [] |
|
143 | samples = [] | |
141 | for i in range(128): |
|
144 | for i in range(128): | |
142 |
samples.append(b |
|
145 | samples.append(b"foo" * 64) | |
143 |
samples.append(b |
|
146 | samples.append(b"bar" * 64) | |
144 |
samples.append(b |
|
147 | samples.append(b"foobar" * 64) | |
145 |
|
148 | |||
146 | d = zstd.train_dictionary(1024, samples) |
|
149 | d = zstd.train_dictionary(1024, samples) | |
147 |
|
150 | |||
148 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
151 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) | |
149 |
with_dict_id = cctx.compress(b |
|
152 | with_dict_id = cctx.compress(b"foobarfoobar") | |
150 |
|
153 | |||
151 | cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False) |
|
154 | cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False) | |
152 |
no_dict_id = cctx.compress(b |
|
155 | no_dict_id = cctx.compress(b"foobarfoobar") | |
153 |
|
156 | |||
154 | self.assertEqual(len(with_dict_id), len(no_dict_id) + 4) |
|
157 | self.assertEqual(len(with_dict_id), len(no_dict_id) + 4) | |
155 |
|
158 | |||
156 | no_params = zstd.get_frame_parameters(no_dict_id) |
|
159 | no_params = zstd.get_frame_parameters(no_dict_id) | |
157 | with_params = zstd.get_frame_parameters(with_dict_id) |
|
160 | with_params = zstd.get_frame_parameters(with_dict_id) | |
158 | self.assertEqual(no_params.dict_id, 0) |
|
161 | self.assertEqual(no_params.dict_id, 0) | |
159 | self.assertEqual(with_params.dict_id, 1880053135) |
|
162 | self.assertEqual(with_params.dict_id, 1880053135) | |
160 |
|
163 | |||
161 | def test_compress_dict_multiple(self): |
|
164 | def test_compress_dict_multiple(self): | |
162 | samples = [] |
|
165 | samples = [] | |
163 | for i in range(128): |
|
166 | for i in range(128): | |
164 |
samples.append(b |
|
167 | samples.append(b"foo" * 64) | |
165 |
samples.append(b |
|
168 | samples.append(b"bar" * 64) | |
166 |
samples.append(b |
|
169 | samples.append(b"foobar" * 64) | |
167 |
|
170 | |||
168 | d = zstd.train_dictionary(8192, samples) |
|
171 | d = zstd.train_dictionary(8192, samples) | |
169 |
|
172 | |||
170 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
173 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) | |
171 |
|
174 | |||
172 | for i in range(32): |
|
175 | for i in range(32): | |
173 |
cctx.compress(b |
|
176 | cctx.compress(b"foo bar foobar foo bar foobar") | |
174 |
|
177 | |||
175 | def test_dict_precompute(self): |
|
178 | def test_dict_precompute(self): | |
176 | samples = [] |
|
179 | samples = [] | |
177 | for i in range(128): |
|
180 | for i in range(128): | |
178 |
samples.append(b |
|
181 | samples.append(b"foo" * 64) | |
179 |
samples.append(b |
|
182 | samples.append(b"bar" * 64) | |
180 |
samples.append(b |
|
183 | samples.append(b"foobar" * 64) | |
181 |
|
184 | |||
182 | d = zstd.train_dictionary(8192, samples) |
|
185 | d = zstd.train_dictionary(8192, samples) | |
183 | d.precompute_compress(level=1) |
|
186 | d.precompute_compress(level=1) | |
184 |
|
187 | |||
185 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
188 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) | |
186 |
|
189 | |||
187 | for i in range(32): |
|
190 | for i in range(32): | |
188 |
cctx.compress(b |
|
191 | cctx.compress(b"foo bar foobar foo bar foobar") | |
189 |
|
192 | |||
190 | def test_multithreaded(self): |
|
193 | def test_multithreaded(self): | |
191 | chunk_size = multithreaded_chunk_size(1) |
|
194 | chunk_size = multithreaded_chunk_size(1) | |
192 |
source = b |
|
195 | source = b"".join([b"x" * chunk_size, b"y" * chunk_size]) | |
193 |
|
196 | |||
194 | cctx = zstd.ZstdCompressor(level=1, threads=2) |
|
197 | cctx = zstd.ZstdCompressor(level=1, threads=2) | |
195 | compressed = cctx.compress(source) |
|
198 | compressed = cctx.compress(source) | |
196 |
|
199 | |||
197 | params = zstd.get_frame_parameters(compressed) |
|
200 | params = zstd.get_frame_parameters(compressed) | |
198 | self.assertEqual(params.content_size, chunk_size * 2) |
|
201 | self.assertEqual(params.content_size, chunk_size * 2) | |
199 | self.assertEqual(params.dict_id, 0) |
|
202 | self.assertEqual(params.dict_id, 0) | |
200 | self.assertFalse(params.has_checksum) |
|
203 | self.assertFalse(params.has_checksum) | |
201 |
|
204 | |||
202 | dctx = zstd.ZstdDecompressor() |
|
205 | dctx = zstd.ZstdDecompressor() | |
203 | self.assertEqual(dctx.decompress(compressed), source) |
|
206 | self.assertEqual(dctx.decompress(compressed), source) | |
204 |
|
207 | |||
205 | def test_multithreaded_dict(self): |
|
208 | def test_multithreaded_dict(self): | |
206 | samples = [] |
|
209 | samples = [] | |
207 | for i in range(128): |
|
210 | for i in range(128): | |
208 |
samples.append(b |
|
211 | samples.append(b"foo" * 64) | |
209 |
samples.append(b |
|
212 | samples.append(b"bar" * 64) | |
210 |
samples.append(b |
|
213 | samples.append(b"foobar" * 64) | |
211 |
|
214 | |||
212 | d = zstd.train_dictionary(1024, samples) |
|
215 | d = zstd.train_dictionary(1024, samples) | |
213 |
|
216 | |||
214 | cctx = zstd.ZstdCompressor(dict_data=d, threads=2) |
|
217 | cctx = zstd.ZstdCompressor(dict_data=d, threads=2) | |
215 |
|
218 | |||
216 |
result = cctx.compress(b |
|
219 | result = cctx.compress(b"foo") | |
217 |
params = zstd.get_frame_parameters(result) |
|
220 | params = zstd.get_frame_parameters(result) | |
218 |
self.assertEqual(params.content_size, 3) |
|
221 | self.assertEqual(params.content_size, 3) | |
219 | self.assertEqual(params.dict_id, d.dict_id()) |
|
222 | self.assertEqual(params.dict_id, d.dict_id()) | |
220 |
|
223 | |||
221 |
self.assertEqual( |
|
224 | self.assertEqual( | |
222 | b'\x28\xb5\x2f\xfd\x23\x8f\x55\x0f\x70\x03\x19\x00\x00' |
|
225 | result, | |
223 | b'\x66\x6f\x6f') |
|
226 | b"\x28\xb5\x2f\xfd\x23\x8f\x55\x0f\x70\x03\x19\x00\x00" b"\x66\x6f\x6f", | |
|
227 | ) | |||
224 |
|
228 | |||
225 | def test_multithreaded_compression_params(self): |
|
229 | def test_multithreaded_compression_params(self): | |
226 | params = zstd.ZstdCompressionParameters.from_level(0, threads=2) |
|
230 | params = zstd.ZstdCompressionParameters.from_level(0, threads=2) | |
227 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
231 | cctx = zstd.ZstdCompressor(compression_params=params) | |
228 |
|
232 | |||
229 |
result = cctx.compress(b |
|
233 | result = cctx.compress(b"foo") | |
230 |
params = zstd.get_frame_parameters(result) |
|
234 | params = zstd.get_frame_parameters(result) | |
231 |
self.assertEqual(params.content_size, 3) |
|
235 | self.assertEqual(params.content_size, 3) | |
232 |
|
236 | |||
233 | self.assertEqual(result, |
|
237 | self.assertEqual(result, b"\x28\xb5\x2f\xfd\x20\x03\x19\x00\x00\x66\x6f\x6f") | |
234 | b'\x28\xb5\x2f\xfd\x20\x03\x19\x00\x00\x66\x6f\x6f') |
|
|||
235 |
|
238 | |||
236 |
|
239 | |||
237 | @make_cffi |
|
240 | @make_cffi | |
238 |
class TestCompressor_compressobj( |
|
241 | class TestCompressor_compressobj(TestCase): | |
239 | def test_compressobj_empty(self): |
|
242 | def test_compressobj_empty(self): | |
240 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
243 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |
241 | cobj = cctx.compressobj() |
|
244 | cobj = cctx.compressobj() | |
242 |
self.assertEqual(cobj.compress(b |
|
245 | self.assertEqual(cobj.compress(b""), b"") | |
243 | self.assertEqual(cobj.flush(), |
|
246 | self.assertEqual(cobj.flush(), b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") | |
244 | b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') |
|
|||
245 |
|
247 | |||
246 | def test_input_types(self): |
|
248 | def test_input_types(self): | |
247 |
expected = b |
|
249 | expected = b"\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f" | |
248 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
250 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |
249 |
|
251 | |||
250 | mutable_array = bytearray(3) |
|
252 | mutable_array = bytearray(3) | |
251 |
mutable_array[:] = b |
|
253 | mutable_array[:] = b"foo" | |
252 |
|
254 | |||
253 | sources = [ |
|
255 | sources = [ | |
254 |
memoryview(b |
|
256 | memoryview(b"foo"), | |
255 |
bytearray(b |
|
257 | bytearray(b"foo"), | |
256 | mutable_array, |
|
258 | mutable_array, | |
257 | ] |
|
259 | ] | |
258 |
|
260 | |||
259 | for source in sources: |
|
261 | for source in sources: | |
260 | cobj = cctx.compressobj() |
|
262 | cobj = cctx.compressobj() | |
261 |
self.assertEqual(cobj.compress(source), b |
|
263 | self.assertEqual(cobj.compress(source), b"") | |
262 | self.assertEqual(cobj.flush(), expected) |
|
264 | self.assertEqual(cobj.flush(), expected) | |
263 |
|
265 | |||
264 | def test_compressobj_large(self): |
|
266 | def test_compressobj_large(self): | |
265 | chunks = [] |
|
267 | chunks = [] | |
266 | for i in range(255): |
|
268 | for i in range(255): | |
267 |
chunks.append(struct.Struct( |
|
269 | chunks.append(struct.Struct(">B").pack(i) * 16384) | |
268 |
|
270 | |||
269 | cctx = zstd.ZstdCompressor(level=3) |
|
271 | cctx = zstd.ZstdCompressor(level=3) | |
270 | cobj = cctx.compressobj() |
|
272 | cobj = cctx.compressobj() | |
271 |
|
273 | |||
272 |
result = cobj.compress(b |
|
274 | result = cobj.compress(b"".join(chunks)) + cobj.flush() | |
273 | self.assertEqual(len(result), 999) |
|
275 | self.assertEqual(len(result), 999) | |
274 |
self.assertEqual(result[0:4], b |
|
276 | self.assertEqual(result[0:4], b"\x28\xb5\x2f\xfd") | |
275 |
|
277 | |||
276 | params = zstd.get_frame_parameters(result) |
|
278 | params = zstd.get_frame_parameters(result) | |
277 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
279 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
278 | self.assertEqual(params.window_size, 2097152) |
|
280 | self.assertEqual(params.window_size, 2097152) | |
279 | self.assertEqual(params.dict_id, 0) |
|
281 | self.assertEqual(params.dict_id, 0) | |
280 | self.assertFalse(params.has_checksum) |
|
282 | self.assertFalse(params.has_checksum) | |
281 |
|
283 | |||
282 | def test_write_checksum(self): |
|
284 | def test_write_checksum(self): | |
283 | cctx = zstd.ZstdCompressor(level=1) |
|
285 | cctx = zstd.ZstdCompressor(level=1) | |
284 | cobj = cctx.compressobj() |
|
286 | cobj = cctx.compressobj() | |
285 |
no_checksum = cobj.compress(b |
|
287 | no_checksum = cobj.compress(b"foobar") + cobj.flush() | |
286 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) |
|
288 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) | |
287 | cobj = cctx.compressobj() |
|
289 | cobj = cctx.compressobj() | |
288 |
with_checksum = cobj.compress(b |
|
290 | with_checksum = cobj.compress(b"foobar") + cobj.flush() | |
289 |
|
291 | |||
290 | no_params = zstd.get_frame_parameters(no_checksum) |
|
292 | no_params = zstd.get_frame_parameters(no_checksum) | |
291 | with_params = zstd.get_frame_parameters(with_checksum) |
|
293 | with_params = zstd.get_frame_parameters(with_checksum) | |
292 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
294 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
293 | self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
295 | self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
294 | self.assertEqual(no_params.dict_id, 0) |
|
296 | self.assertEqual(no_params.dict_id, 0) | |
295 | self.assertEqual(with_params.dict_id, 0) |
|
297 | self.assertEqual(with_params.dict_id, 0) | |
296 | self.assertFalse(no_params.has_checksum) |
|
298 | self.assertFalse(no_params.has_checksum) | |
297 | self.assertTrue(with_params.has_checksum) |
|
299 | self.assertTrue(with_params.has_checksum) | |
298 |
|
300 | |||
299 | self.assertEqual(len(with_checksum), len(no_checksum) + 4) |
|
301 | self.assertEqual(len(with_checksum), len(no_checksum) + 4) | |
300 |
|
302 | |||
301 | def test_write_content_size(self): |
|
303 | def test_write_content_size(self): | |
302 | cctx = zstd.ZstdCompressor(level=1) |
|
304 | cctx = zstd.ZstdCompressor(level=1) | |
303 |
cobj = cctx.compressobj(size=len(b |
|
305 | cobj = cctx.compressobj(size=len(b"foobar" * 256)) | |
304 |
with_size = cobj.compress(b |
|
306 | with_size = cobj.compress(b"foobar" * 256) + cobj.flush() | |
305 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
307 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |
306 |
cobj = cctx.compressobj(size=len(b |
|
308 | cobj = cctx.compressobj(size=len(b"foobar" * 256)) | |
307 |
no_size = cobj.compress(b |
|
309 | no_size = cobj.compress(b"foobar" * 256) + cobj.flush() | |
308 |
|
310 | |||
309 | no_params = zstd.get_frame_parameters(no_size) |
|
311 | no_params = zstd.get_frame_parameters(no_size) | |
310 | with_params = zstd.get_frame_parameters(with_size) |
|
312 | with_params = zstd.get_frame_parameters(with_size) | |
311 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
313 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
312 | self.assertEqual(with_params.content_size, 1536) |
|
314 | self.assertEqual(with_params.content_size, 1536) | |
313 | self.assertEqual(no_params.dict_id, 0) |
|
315 | self.assertEqual(no_params.dict_id, 0) | |
314 | self.assertEqual(with_params.dict_id, 0) |
|
316 | self.assertEqual(with_params.dict_id, 0) | |
315 | self.assertFalse(no_params.has_checksum) |
|
317 | self.assertFalse(no_params.has_checksum) | |
316 | self.assertFalse(with_params.has_checksum) |
|
318 | self.assertFalse(with_params.has_checksum) | |
317 |
|
319 | |||
318 | self.assertEqual(len(with_size), len(no_size) + 1) |
|
320 | self.assertEqual(len(with_size), len(no_size) + 1) | |
319 |
|
321 | |||
320 | def test_compress_after_finished(self): |
|
322 | def test_compress_after_finished(self): | |
321 | cctx = zstd.ZstdCompressor() |
|
323 | cctx = zstd.ZstdCompressor() | |
322 | cobj = cctx.compressobj() |
|
324 | cobj = cctx.compressobj() | |
323 |
|
325 | |||
324 |
cobj.compress(b |
|
326 | cobj.compress(b"foo") | |
325 | cobj.flush() |
|
327 | cobj.flush() | |
326 |
|
328 | |||
327 | with self.assertRaisesRegexp(zstd.ZstdError, r'cannot call compress\(\) after compressor'): |
|
329 | with self.assertRaisesRegex( | |
328 | cobj.compress(b'foo') |
|
330 | zstd.ZstdError, r"cannot call compress\(\) after compressor" | |
|
331 | ): | |||
|
332 | cobj.compress(b"foo") | |||
329 |
|
333 | |||
330 | with self.assertRaisesRegexp(zstd.ZstdError, 'compressor object already finished'): |
|
334 | with self.assertRaisesRegex( | |
|
335 | zstd.ZstdError, "compressor object already finished" | |||
|
336 | ): | |||
331 | cobj.flush() |
|
337 | cobj.flush() | |
332 |
|
338 | |||
333 | def test_flush_block_repeated(self): |
|
339 | def test_flush_block_repeated(self): | |
334 | cctx = zstd.ZstdCompressor(level=1) |
|
340 | cctx = zstd.ZstdCompressor(level=1) | |
335 | cobj = cctx.compressobj() |
|
341 | cobj = cctx.compressobj() | |
336 |
|
342 | |||
337 |
self.assertEqual(cobj.compress(b |
|
343 | self.assertEqual(cobj.compress(b"foo"), b"") | |
338 | self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), |
|
344 | self.assertEqual( | |
339 | b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo') |
|
345 | cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), | |
340 | self.assertEqual(cobj.compress(b'bar'), b'') |
|
346 | b"\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo", | |
|
347 | ) | |||
|
348 | self.assertEqual(cobj.compress(b"bar"), b"") | |||
341 | # 3 byte header plus content. |
|
349 | # 3 byte header plus content. | |
342 | self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), |
|
350 | self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b"\x18\x00\x00bar") | |
343 | b'\x18\x00\x00bar') |
|
351 | self.assertEqual(cobj.flush(), b"\x01\x00\x00") | |
344 | self.assertEqual(cobj.flush(), b'\x01\x00\x00') |
|
|||
345 |
|
352 | |||
346 | def test_flush_empty_block(self): |
|
353 | def test_flush_empty_block(self): | |
347 | cctx = zstd.ZstdCompressor(write_checksum=True) |
|
354 | cctx = zstd.ZstdCompressor(write_checksum=True) | |
348 | cobj = cctx.compressobj() |
|
355 | cobj = cctx.compressobj() | |
349 |
|
356 | |||
350 |
cobj.compress(b |
|
357 | cobj.compress(b"foobar") | |
351 | cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK) |
|
358 | cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK) | |
352 | # No-op if no block is active (this is internal to zstd). |
|
359 | # No-op if no block is active (this is internal to zstd). | |
353 |
self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b |
|
360 | self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b"") | |
354 |
|
361 | |||
355 | trailing = cobj.flush() |
|
362 | trailing = cobj.flush() | |
356 | # 3 bytes block header + 4 bytes frame checksum |
|
363 | # 3 bytes block header + 4 bytes frame checksum | |
357 | self.assertEqual(len(trailing), 7) |
|
364 | self.assertEqual(len(trailing), 7) | |
358 | header = trailing[0:3] |
|
365 | header = trailing[0:3] | |
359 |
self.assertEqual(header, b |
|
366 | self.assertEqual(header, b"\x01\x00\x00") | |
360 |
|
367 | |||
361 | def test_multithreaded(self): |
|
368 | def test_multithreaded(self): | |
362 | source = io.BytesIO() |
|
369 | source = io.BytesIO() | |
363 |
source.write(b |
|
370 | source.write(b"a" * 1048576) | |
364 |
source.write(b |
|
371 | source.write(b"b" * 1048576) | |
365 |
source.write(b |
|
372 | source.write(b"c" * 1048576) | |
366 | source.seek(0) |
|
373 | source.seek(0) | |
367 |
|
374 | |||
368 | cctx = zstd.ZstdCompressor(level=1, threads=2) |
|
375 | cctx = zstd.ZstdCompressor(level=1, threads=2) | |
369 | cobj = cctx.compressobj() |
|
376 | cobj = cctx.compressobj() | |
370 |
|
377 | |||
371 | chunks = [] |
|
378 | chunks = [] | |
372 | while True: |
|
379 | while True: | |
373 | d = source.read(8192) |
|
380 | d = source.read(8192) | |
374 | if not d: |
|
381 | if not d: | |
375 | break |
|
382 | break | |
376 |
|
383 | |||
377 | chunks.append(cobj.compress(d)) |
|
384 | chunks.append(cobj.compress(d)) | |
378 |
|
385 | |||
379 | chunks.append(cobj.flush()) |
|
386 | chunks.append(cobj.flush()) | |
380 |
|
387 | |||
381 |
compressed = b |
|
388 | compressed = b"".join(chunks) | |
382 |
|
389 | |||
383 |
self.assertEqual(len(compressed), |
|
390 | self.assertEqual(len(compressed), 119) | |
384 |
|
391 | |||
385 | def test_frame_progression(self): |
|
392 | def test_frame_progression(self): | |
386 | cctx = zstd.ZstdCompressor() |
|
393 | cctx = zstd.ZstdCompressor() | |
387 |
|
394 | |||
388 | self.assertEqual(cctx.frame_progression(), (0, 0, 0)) |
|
395 | self.assertEqual(cctx.frame_progression(), (0, 0, 0)) | |
389 |
|
396 | |||
390 | cobj = cctx.compressobj() |
|
397 | cobj = cctx.compressobj() | |
391 |
|
398 | |||
392 |
cobj.compress(b |
|
399 | cobj.compress(b"foobar") | |
393 | self.assertEqual(cctx.frame_progression(), (6, 0, 0)) |
|
400 | self.assertEqual(cctx.frame_progression(), (6, 0, 0)) | |
394 |
|
401 | |||
395 | cobj.flush() |
|
402 | cobj.flush() | |
396 | self.assertEqual(cctx.frame_progression(), (6, 6, 15)) |
|
403 | self.assertEqual(cctx.frame_progression(), (6, 6, 15)) | |
397 |
|
404 | |||
398 | def test_bad_size(self): |
|
405 | def test_bad_size(self): | |
399 | cctx = zstd.ZstdCompressor() |
|
406 | cctx = zstd.ZstdCompressor() | |
400 |
|
407 | |||
401 | cobj = cctx.compressobj(size=2) |
|
408 | cobj = cctx.compressobj(size=2) | |
402 |
with self.assertRaisesRegex |
|
409 | with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): | |
403 |
cobj.compress(b |
|
410 | cobj.compress(b"foo") | |
404 |
|
411 | |||
405 | # Try another operation on this instance. |
|
412 | # Try another operation on this instance. | |
406 |
with self.assertRaisesRegex |
|
413 | with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): | |
407 |
cobj.compress(b |
|
414 | cobj.compress(b"aa") | |
408 |
|
415 | |||
409 | # Try another operation on the compressor. |
|
416 | # Try another operation on the compressor. | |
410 | cctx.compressobj(size=4) |
|
417 | cctx.compressobj(size=4) | |
411 |
cctx.compress(b |
|
418 | cctx.compress(b"foobar") | |
412 |
|
419 | |||
413 |
|
420 | |||
414 | @make_cffi |
|
421 | @make_cffi | |
415 |
class TestCompressor_copy_stream( |
|
422 | class TestCompressor_copy_stream(TestCase): | |
416 | def test_no_read(self): |
|
423 | def test_no_read(self): | |
417 | source = object() |
|
424 | source = object() | |
418 | dest = io.BytesIO() |
|
425 | dest = io.BytesIO() | |
419 |
|
426 | |||
420 | cctx = zstd.ZstdCompressor() |
|
427 | cctx = zstd.ZstdCompressor() | |
421 | with self.assertRaises(ValueError): |
|
428 | with self.assertRaises(ValueError): | |
422 | cctx.copy_stream(source, dest) |
|
429 | cctx.copy_stream(source, dest) | |
423 |
|
430 | |||
424 | def test_no_write(self): |
|
431 | def test_no_write(self): | |
425 | source = io.BytesIO() |
|
432 | source = io.BytesIO() | |
426 | dest = object() |
|
433 | dest = object() | |
427 |
|
434 | |||
428 | cctx = zstd.ZstdCompressor() |
|
435 | cctx = zstd.ZstdCompressor() | |
429 | with self.assertRaises(ValueError): |
|
436 | with self.assertRaises(ValueError): | |
430 | cctx.copy_stream(source, dest) |
|
437 | cctx.copy_stream(source, dest) | |
431 |
|
438 | |||
432 | def test_empty(self): |
|
439 | def test_empty(self): | |
433 | source = io.BytesIO() |
|
440 | source = io.BytesIO() | |
434 | dest = io.BytesIO() |
|
441 | dest = io.BytesIO() | |
435 |
|
442 | |||
436 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
443 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |
437 | r, w = cctx.copy_stream(source, dest) |
|
444 | r, w = cctx.copy_stream(source, dest) | |
438 | self.assertEqual(int(r), 0) |
|
445 | self.assertEqual(int(r), 0) | |
439 | self.assertEqual(w, 9) |
|
446 | self.assertEqual(w, 9) | |
440 |
|
447 | |||
441 | self.assertEqual(dest.getvalue(), |
|
448 | self.assertEqual(dest.getvalue(), b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") | |
442 | b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') |
|
|||
443 |
|
449 | |||
444 | def test_large_data(self): |
|
450 | def test_large_data(self): | |
445 | source = io.BytesIO() |
|
451 | source = io.BytesIO() | |
446 | for i in range(255): |
|
452 | for i in range(255): | |
447 |
source.write(struct.Struct( |
|
453 | source.write(struct.Struct(">B").pack(i) * 16384) | |
448 | source.seek(0) |
|
454 | source.seek(0) | |
449 |
|
455 | |||
450 | dest = io.BytesIO() |
|
456 | dest = io.BytesIO() | |
451 | cctx = zstd.ZstdCompressor() |
|
457 | cctx = zstd.ZstdCompressor() | |
452 | r, w = cctx.copy_stream(source, dest) |
|
458 | r, w = cctx.copy_stream(source, dest) | |
453 |
|
459 | |||
454 | self.assertEqual(r, 255 * 16384) |
|
460 | self.assertEqual(r, 255 * 16384) | |
455 | self.assertEqual(w, 999) |
|
461 | self.assertEqual(w, 999) | |
456 |
|
462 | |||
457 | params = zstd.get_frame_parameters(dest.getvalue()) |
|
463 | params = zstd.get_frame_parameters(dest.getvalue()) | |
458 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
464 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
459 | self.assertEqual(params.window_size, 2097152) |
|
465 | self.assertEqual(params.window_size, 2097152) | |
460 | self.assertEqual(params.dict_id, 0) |
|
466 | self.assertEqual(params.dict_id, 0) | |
461 | self.assertFalse(params.has_checksum) |
|
467 | self.assertFalse(params.has_checksum) | |
462 |
|
468 | |||
463 | def test_write_checksum(self): |
|
469 | def test_write_checksum(self): | |
464 |
source = io.BytesIO(b |
|
470 | source = io.BytesIO(b"foobar") | |
465 | no_checksum = io.BytesIO() |
|
471 | no_checksum = io.BytesIO() | |
466 |
|
472 | |||
467 | cctx = zstd.ZstdCompressor(level=1) |
|
473 | cctx = zstd.ZstdCompressor(level=1) | |
468 | cctx.copy_stream(source, no_checksum) |
|
474 | cctx.copy_stream(source, no_checksum) | |
469 |
|
475 | |||
470 | source.seek(0) |
|
476 | source.seek(0) | |
471 | with_checksum = io.BytesIO() |
|
477 | with_checksum = io.BytesIO() | |
472 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) |
|
478 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) | |
473 | cctx.copy_stream(source, with_checksum) |
|
479 | cctx.copy_stream(source, with_checksum) | |
474 |
|
480 | |||
475 | self.assertEqual(len(with_checksum.getvalue()), |
|
481 | self.assertEqual(len(with_checksum.getvalue()), len(no_checksum.getvalue()) + 4) | |
476 | len(no_checksum.getvalue()) + 4) |
|
|||
477 |
|
482 | |||
478 | no_params = zstd.get_frame_parameters(no_checksum.getvalue()) |
|
483 | no_params = zstd.get_frame_parameters(no_checksum.getvalue()) | |
479 | with_params = zstd.get_frame_parameters(with_checksum.getvalue()) |
|
484 | with_params = zstd.get_frame_parameters(with_checksum.getvalue()) | |
480 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
485 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
481 | self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
486 | self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
482 | self.assertEqual(no_params.dict_id, 0) |
|
487 | self.assertEqual(no_params.dict_id, 0) | |
483 | self.assertEqual(with_params.dict_id, 0) |
|
488 | self.assertEqual(with_params.dict_id, 0) | |
484 | self.assertFalse(no_params.has_checksum) |
|
489 | self.assertFalse(no_params.has_checksum) | |
485 | self.assertTrue(with_params.has_checksum) |
|
490 | self.assertTrue(with_params.has_checksum) | |
486 |
|
491 | |||
487 | def test_write_content_size(self): |
|
492 | def test_write_content_size(self): | |
488 |
source = io.BytesIO(b |
|
493 | source = io.BytesIO(b"foobar" * 256) | |
489 | no_size = io.BytesIO() |
|
494 | no_size = io.BytesIO() | |
490 |
|
495 | |||
491 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
496 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |
492 | cctx.copy_stream(source, no_size) |
|
497 | cctx.copy_stream(source, no_size) | |
493 |
|
498 | |||
494 | source.seek(0) |
|
499 | source.seek(0) | |
495 | with_size = io.BytesIO() |
|
500 | with_size = io.BytesIO() | |
496 | cctx = zstd.ZstdCompressor(level=1) |
|
501 | cctx = zstd.ZstdCompressor(level=1) | |
497 | cctx.copy_stream(source, with_size) |
|
502 | cctx.copy_stream(source, with_size) | |
498 |
|
503 | |||
499 | # Source content size is unknown, so no content size written. |
|
504 | # Source content size is unknown, so no content size written. | |
500 | self.assertEqual(len(with_size.getvalue()), |
|
505 | self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue())) | |
501 | len(no_size.getvalue())) |
|
|||
502 |
|
506 | |||
503 | source.seek(0) |
|
507 | source.seek(0) | |
504 | with_size = io.BytesIO() |
|
508 | with_size = io.BytesIO() | |
505 | cctx.copy_stream(source, with_size, size=len(source.getvalue())) |
|
509 | cctx.copy_stream(source, with_size, size=len(source.getvalue())) | |
506 |
|
510 | |||
507 | # We specified source size, so content size header is present. |
|
511 | # We specified source size, so content size header is present. | |
508 | self.assertEqual(len(with_size.getvalue()), |
|
512 | self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()) + 1) | |
509 | len(no_size.getvalue()) + 1) |
|
|||
510 |
|
513 | |||
511 | no_params = zstd.get_frame_parameters(no_size.getvalue()) |
|
514 | no_params = zstd.get_frame_parameters(no_size.getvalue()) | |
512 | with_params = zstd.get_frame_parameters(with_size.getvalue()) |
|
515 | with_params = zstd.get_frame_parameters(with_size.getvalue()) | |
513 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
516 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
514 | self.assertEqual(with_params.content_size, 1536) |
|
517 | self.assertEqual(with_params.content_size, 1536) | |
515 | self.assertEqual(no_params.dict_id, 0) |
|
518 | self.assertEqual(no_params.dict_id, 0) | |
516 | self.assertEqual(with_params.dict_id, 0) |
|
519 | self.assertEqual(with_params.dict_id, 0) | |
517 | self.assertFalse(no_params.has_checksum) |
|
520 | self.assertFalse(no_params.has_checksum) | |
518 | self.assertFalse(with_params.has_checksum) |
|
521 | self.assertFalse(with_params.has_checksum) | |
519 |
|
522 | |||
520 | def test_read_write_size(self): |
|
523 | def test_read_write_size(self): | |
521 |
source = OpCountingBytesIO(b |
|
524 | source = OpCountingBytesIO(b"foobarfoobar") | |
522 | dest = OpCountingBytesIO() |
|
525 | dest = OpCountingBytesIO() | |
523 | cctx = zstd.ZstdCompressor() |
|
526 | cctx = zstd.ZstdCompressor() | |
524 | r, w = cctx.copy_stream(source, dest, read_size=1, write_size=1) |
|
527 | r, w = cctx.copy_stream(source, dest, read_size=1, write_size=1) | |
525 |
|
528 | |||
526 | self.assertEqual(r, len(source.getvalue())) |
|
529 | self.assertEqual(r, len(source.getvalue())) | |
527 | self.assertEqual(w, 21) |
|
530 | self.assertEqual(w, 21) | |
528 | self.assertEqual(source._read_count, len(source.getvalue()) + 1) |
|
531 | self.assertEqual(source._read_count, len(source.getvalue()) + 1) | |
529 | self.assertEqual(dest._write_count, len(dest.getvalue())) |
|
532 | self.assertEqual(dest._write_count, len(dest.getvalue())) | |
530 |
|
533 | |||
531 | def test_multithreaded(self): |
|
534 | def test_multithreaded(self): | |
532 | source = io.BytesIO() |
|
535 | source = io.BytesIO() | |
533 |
source.write(b |
|
536 | source.write(b"a" * 1048576) | |
534 |
source.write(b |
|
537 | source.write(b"b" * 1048576) | |
535 |
source.write(b |
|
538 | source.write(b"c" * 1048576) | |
536 | source.seek(0) |
|
539 | source.seek(0) | |
537 |
|
540 | |||
538 | dest = io.BytesIO() |
|
541 | dest = io.BytesIO() | |
539 | cctx = zstd.ZstdCompressor(threads=2, write_content_size=False) |
|
542 | cctx = zstd.ZstdCompressor(threads=2, write_content_size=False) | |
540 | r, w = cctx.copy_stream(source, dest) |
|
543 | r, w = cctx.copy_stream(source, dest) | |
541 | self.assertEqual(r, 3145728) |
|
544 | self.assertEqual(r, 3145728) | |
542 |
self.assertEqual(w, |
|
545 | self.assertEqual(w, 111) | |
543 |
|
546 | |||
544 | params = zstd.get_frame_parameters(dest.getvalue()) |
|
547 | params = zstd.get_frame_parameters(dest.getvalue()) | |
545 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
548 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
546 | self.assertEqual(params.dict_id, 0) |
|
549 | self.assertEqual(params.dict_id, 0) | |
547 | self.assertFalse(params.has_checksum) |
|
550 | self.assertFalse(params.has_checksum) | |
548 |
|
551 | |||
549 | # Writing content size and checksum works. |
|
552 | # Writing content size and checksum works. | |
550 | cctx = zstd.ZstdCompressor(threads=2, write_checksum=True) |
|
553 | cctx = zstd.ZstdCompressor(threads=2, write_checksum=True) | |
551 | dest = io.BytesIO() |
|
554 | dest = io.BytesIO() | |
552 | source.seek(0) |
|
555 | source.seek(0) | |
553 | cctx.copy_stream(source, dest, size=len(source.getvalue())) |
|
556 | cctx.copy_stream(source, dest, size=len(source.getvalue())) | |
554 |
|
557 | |||
555 | params = zstd.get_frame_parameters(dest.getvalue()) |
|
558 | params = zstd.get_frame_parameters(dest.getvalue()) | |
556 | self.assertEqual(params.content_size, 3145728) |
|
559 | self.assertEqual(params.content_size, 3145728) | |
557 | self.assertEqual(params.dict_id, 0) |
|
560 | self.assertEqual(params.dict_id, 0) | |
558 | self.assertTrue(params.has_checksum) |
|
561 | self.assertTrue(params.has_checksum) | |
559 |
|
562 | |||
560 | def test_bad_size(self): |
|
563 | def test_bad_size(self): | |
561 | source = io.BytesIO() |
|
564 | source = io.BytesIO() | |
562 |
source.write(b |
|
565 | source.write(b"a" * 32768) | |
563 |
source.write(b |
|
566 | source.write(b"b" * 32768) | |
564 | source.seek(0) |
|
567 | source.seek(0) | |
565 |
|
568 | |||
566 | dest = io.BytesIO() |
|
569 | dest = io.BytesIO() | |
567 |
|
570 | |||
568 | cctx = zstd.ZstdCompressor() |
|
571 | cctx = zstd.ZstdCompressor() | |
569 |
|
572 | |||
570 |
with self.assertRaisesRegex |
|
573 | with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): | |
571 | cctx.copy_stream(source, dest, size=42) |
|
574 | cctx.copy_stream(source, dest, size=42) | |
572 |
|
575 | |||
573 | # Try another operation on this compressor. |
|
576 | # Try another operation on this compressor. | |
574 | source.seek(0) |
|
577 | source.seek(0) | |
575 | dest = io.BytesIO() |
|
578 | dest = io.BytesIO() | |
576 | cctx.copy_stream(source, dest) |
|
579 | cctx.copy_stream(source, dest) | |
577 |
|
580 | |||
578 |
|
581 | |||
579 | @make_cffi |
|
582 | @make_cffi | |
580 |
class TestCompressor_stream_reader( |
|
583 | class TestCompressor_stream_reader(TestCase): | |
581 | def test_context_manager(self): |
|
584 | def test_context_manager(self): | |
582 | cctx = zstd.ZstdCompressor() |
|
585 | cctx = zstd.ZstdCompressor() | |
583 |
|
586 | |||
584 |
with cctx.stream_reader(b |
|
587 | with cctx.stream_reader(b"foo") as reader: | |
585 |
with self.assertRaisesRegex |
|
588 | with self.assertRaisesRegex(ValueError, "cannot __enter__ multiple times"): | |
586 | with reader as reader2: |
|
589 | with reader as reader2: | |
587 | pass |
|
590 | pass | |
588 |
|
591 | |||
589 | def test_no_context_manager(self): |
|
592 | def test_no_context_manager(self): | |
590 | cctx = zstd.ZstdCompressor() |
|
593 | cctx = zstd.ZstdCompressor() | |
591 |
|
594 | |||
592 |
reader = cctx.stream_reader(b |
|
595 | reader = cctx.stream_reader(b"foo") | |
593 | reader.read(4) |
|
596 | reader.read(4) | |
594 | self.assertFalse(reader.closed) |
|
597 | self.assertFalse(reader.closed) | |
595 |
|
598 | |||
596 | reader.close() |
|
599 | reader.close() | |
597 | self.assertTrue(reader.closed) |
|
600 | self.assertTrue(reader.closed) | |
598 |
with self.assertRaisesRegex |
|
601 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
599 | reader.read(1) |
|
602 | reader.read(1) | |
600 |
|
603 | |||
601 | def test_not_implemented(self): |
|
604 | def test_not_implemented(self): | |
602 | cctx = zstd.ZstdCompressor() |
|
605 | cctx = zstd.ZstdCompressor() | |
603 |
|
606 | |||
604 |
with cctx.stream_reader(b |
|
607 | with cctx.stream_reader(b"foo" * 60) as reader: | |
605 | with self.assertRaises(io.UnsupportedOperation): |
|
608 | with self.assertRaises(io.UnsupportedOperation): | |
606 | reader.readline() |
|
609 | reader.readline() | |
607 |
|
610 | |||
608 | with self.assertRaises(io.UnsupportedOperation): |
|
611 | with self.assertRaises(io.UnsupportedOperation): | |
609 | reader.readlines() |
|
612 | reader.readlines() | |
610 |
|
613 | |||
611 | with self.assertRaises(io.UnsupportedOperation): |
|
614 | with self.assertRaises(io.UnsupportedOperation): | |
612 | iter(reader) |
|
615 | iter(reader) | |
613 |
|
616 | |||
614 | with self.assertRaises(io.UnsupportedOperation): |
|
617 | with self.assertRaises(io.UnsupportedOperation): | |
615 | next(reader) |
|
618 | next(reader) | |
616 |
|
619 | |||
617 | with self.assertRaises(OSError): |
|
620 | with self.assertRaises(OSError): | |
618 | reader.writelines([]) |
|
621 | reader.writelines([]) | |
619 |
|
622 | |||
620 | with self.assertRaises(OSError): |
|
623 | with self.assertRaises(OSError): | |
621 |
reader.write(b |
|
624 | reader.write(b"foo") | |
622 |
|
625 | |||
623 | def test_constant_methods(self): |
|
626 | def test_constant_methods(self): | |
624 | cctx = zstd.ZstdCompressor() |
|
627 | cctx = zstd.ZstdCompressor() | |
625 |
|
628 | |||
626 |
with cctx.stream_reader(b |
|
629 | with cctx.stream_reader(b"boo") as reader: | |
627 | self.assertTrue(reader.readable()) |
|
630 | self.assertTrue(reader.readable()) | |
628 | self.assertFalse(reader.writable()) |
|
631 | self.assertFalse(reader.writable()) | |
629 | self.assertFalse(reader.seekable()) |
|
632 | self.assertFalse(reader.seekable()) | |
630 | self.assertFalse(reader.isatty()) |
|
633 | self.assertFalse(reader.isatty()) | |
631 | self.assertFalse(reader.closed) |
|
634 | self.assertFalse(reader.closed) | |
632 | self.assertIsNone(reader.flush()) |
|
635 | self.assertIsNone(reader.flush()) | |
633 | self.assertFalse(reader.closed) |
|
636 | self.assertFalse(reader.closed) | |
634 |
|
637 | |||
635 | self.assertTrue(reader.closed) |
|
638 | self.assertTrue(reader.closed) | |
636 |
|
639 | |||
637 | def test_read_closed(self): |
|
640 | def test_read_closed(self): | |
638 | cctx = zstd.ZstdCompressor() |
|
641 | cctx = zstd.ZstdCompressor() | |
639 |
|
642 | |||
640 |
with cctx.stream_reader(b |
|
643 | with cctx.stream_reader(b"foo" * 60) as reader: | |
641 | reader.close() |
|
644 | reader.close() | |
642 | self.assertTrue(reader.closed) |
|
645 | self.assertTrue(reader.closed) | |
643 |
with self.assertRaisesRegex |
|
646 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
644 | reader.read(10) |
|
647 | reader.read(10) | |
645 |
|
648 | |||
646 | def test_read_sizes(self): |
|
649 | def test_read_sizes(self): | |
647 | cctx = zstd.ZstdCompressor() |
|
650 | cctx = zstd.ZstdCompressor() | |
648 |
foo = cctx.compress(b |
|
651 | foo = cctx.compress(b"foo") | |
649 |
|
652 | |||
650 |
with cctx.stream_reader(b |
|
653 | with cctx.stream_reader(b"foo") as reader: | |
651 | with self.assertRaisesRegexp(ValueError, 'cannot read negative amounts less than -1'): |
|
654 | with self.assertRaisesRegex( | |
|
655 | ValueError, "cannot read negative amounts less than -1" | |||
|
656 | ): | |||
652 | reader.read(-2) |
|
657 | reader.read(-2) | |
653 |
|
658 | |||
654 |
self.assertEqual(reader.read(0), b |
|
659 | self.assertEqual(reader.read(0), b"") | |
655 | self.assertEqual(reader.read(), foo) |
|
660 | self.assertEqual(reader.read(), foo) | |
656 |
|
661 | |||
657 | def test_read_buffer(self): |
|
662 | def test_read_buffer(self): | |
658 | cctx = zstd.ZstdCompressor() |
|
663 | cctx = zstd.ZstdCompressor() | |
659 |
|
664 | |||
660 |
source = b |
|
665 | source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) | |
661 | frame = cctx.compress(source) |
|
666 | frame = cctx.compress(source) | |
662 |
|
667 | |||
663 | with cctx.stream_reader(source) as reader: |
|
668 | with cctx.stream_reader(source) as reader: | |
664 | self.assertEqual(reader.tell(), 0) |
|
669 | self.assertEqual(reader.tell(), 0) | |
665 |
|
670 | |||
666 | # We should get entire frame in one read. |
|
671 | # We should get entire frame in one read. | |
667 | result = reader.read(8192) |
|
672 | result = reader.read(8192) | |
668 | self.assertEqual(result, frame) |
|
673 | self.assertEqual(result, frame) | |
669 | self.assertEqual(reader.tell(), len(result)) |
|
674 | self.assertEqual(reader.tell(), len(result)) | |
670 |
self.assertEqual(reader.read(), b |
|
675 | self.assertEqual(reader.read(), b"") | |
671 | self.assertEqual(reader.tell(), len(result)) |
|
676 | self.assertEqual(reader.tell(), len(result)) | |
672 |
|
677 | |||
673 | def test_read_buffer_small_chunks(self): |
|
678 | def test_read_buffer_small_chunks(self): | |
674 | cctx = zstd.ZstdCompressor() |
|
679 | cctx = zstd.ZstdCompressor() | |
675 |
|
680 | |||
676 |
source = b |
|
681 | source = b"foo" * 60 | |
677 | chunks = [] |
|
682 | chunks = [] | |
678 |
|
683 | |||
679 | with cctx.stream_reader(source) as reader: |
|
684 | with cctx.stream_reader(source) as reader: | |
680 | self.assertEqual(reader.tell(), 0) |
|
685 | self.assertEqual(reader.tell(), 0) | |
681 |
|
686 | |||
682 | while True: |
|
687 | while True: | |
683 | chunk = reader.read(1) |
|
688 | chunk = reader.read(1) | |
684 | if not chunk: |
|
689 | if not chunk: | |
685 | break |
|
690 | break | |
686 |
|
691 | |||
687 | chunks.append(chunk) |
|
692 | chunks.append(chunk) | |
688 | self.assertEqual(reader.tell(), sum(map(len, chunks))) |
|
693 | self.assertEqual(reader.tell(), sum(map(len, chunks))) | |
689 |
|
694 | |||
690 |
self.assertEqual(b |
|
695 | self.assertEqual(b"".join(chunks), cctx.compress(source)) | |
691 |
|
696 | |||
692 | def test_read_stream(self): |
|
697 | def test_read_stream(self): | |
693 | cctx = zstd.ZstdCompressor() |
|
698 | cctx = zstd.ZstdCompressor() | |
694 |
|
699 | |||
695 |
source = b |
|
700 | source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) | |
696 | frame = cctx.compress(source) |
|
701 | frame = cctx.compress(source) | |
697 |
|
702 | |||
698 | with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader: |
|
703 | with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader: | |
699 | self.assertEqual(reader.tell(), 0) |
|
704 | self.assertEqual(reader.tell(), 0) | |
700 |
|
705 | |||
701 | chunk = reader.read(8192) |
|
706 | chunk = reader.read(8192) | |
702 | self.assertEqual(chunk, frame) |
|
707 | self.assertEqual(chunk, frame) | |
703 | self.assertEqual(reader.tell(), len(chunk)) |
|
708 | self.assertEqual(reader.tell(), len(chunk)) | |
704 |
self.assertEqual(reader.read(), b |
|
709 | self.assertEqual(reader.read(), b"") | |
705 | self.assertEqual(reader.tell(), len(chunk)) |
|
710 | self.assertEqual(reader.tell(), len(chunk)) | |
706 |
|
711 | |||
707 | def test_read_stream_small_chunks(self): |
|
712 | def test_read_stream_small_chunks(self): | |
708 | cctx = zstd.ZstdCompressor() |
|
713 | cctx = zstd.ZstdCompressor() | |
709 |
|
714 | |||
710 |
source = b |
|
715 | source = b"foo" * 60 | |
711 | chunks = [] |
|
716 | chunks = [] | |
712 |
|
717 | |||
713 | with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader: |
|
718 | with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader: | |
714 | self.assertEqual(reader.tell(), 0) |
|
719 | self.assertEqual(reader.tell(), 0) | |
715 |
|
720 | |||
716 | while True: |
|
721 | while True: | |
717 | chunk = reader.read(1) |
|
722 | chunk = reader.read(1) | |
718 | if not chunk: |
|
723 | if not chunk: | |
719 | break |
|
724 | break | |
720 |
|
725 | |||
721 | chunks.append(chunk) |
|
726 | chunks.append(chunk) | |
722 | self.assertEqual(reader.tell(), sum(map(len, chunks))) |
|
727 | self.assertEqual(reader.tell(), sum(map(len, chunks))) | |
723 |
|
728 | |||
724 |
self.assertEqual(b |
|
729 | self.assertEqual(b"".join(chunks), cctx.compress(source)) | |
725 |
|
730 | |||
726 | def test_read_after_exit(self): |
|
731 | def test_read_after_exit(self): | |
727 | cctx = zstd.ZstdCompressor() |
|
732 | cctx = zstd.ZstdCompressor() | |
728 |
|
733 | |||
729 |
with cctx.stream_reader(b |
|
734 | with cctx.stream_reader(b"foo" * 60) as reader: | |
730 | while reader.read(8192): |
|
735 | while reader.read(8192): | |
731 | pass |
|
736 | pass | |
732 |
|
737 | |||
733 |
with self.assertRaisesRegex |
|
738 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
734 | reader.read(10) |
|
739 | reader.read(10) | |
735 |
|
740 | |||
736 | def test_bad_size(self): |
|
741 | def test_bad_size(self): | |
737 | cctx = zstd.ZstdCompressor() |
|
742 | cctx = zstd.ZstdCompressor() | |
738 |
|
743 | |||
739 |
source = io.BytesIO(b |
|
744 | source = io.BytesIO(b"foobar") | |
740 |
|
745 | |||
741 | with cctx.stream_reader(source, size=2) as reader: |
|
746 | with cctx.stream_reader(source, size=2) as reader: | |
742 |
with self.assertRaisesRegex |
|
747 | with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): | |
743 | reader.read(10) |
|
748 | reader.read(10) | |
744 |
|
749 | |||
745 | # Try another compression operation. |
|
750 | # Try another compression operation. | |
746 | with cctx.stream_reader(source, size=42): |
|
751 | with cctx.stream_reader(source, size=42): | |
747 | pass |
|
752 | pass | |
748 |
|
753 | |||
749 | def test_readall(self): |
|
754 | def test_readall(self): | |
750 | cctx = zstd.ZstdCompressor() |
|
755 | cctx = zstd.ZstdCompressor() | |
751 |
frame = cctx.compress(b |
|
756 | frame = cctx.compress(b"foo" * 1024) | |
752 |
|
757 | |||
753 |
reader = cctx.stream_reader(b |
|
758 | reader = cctx.stream_reader(b"foo" * 1024) | |
754 | self.assertEqual(reader.readall(), frame) |
|
759 | self.assertEqual(reader.readall(), frame) | |
755 |
|
760 | |||
756 | def test_readinto(self): |
|
761 | def test_readinto(self): | |
757 | cctx = zstd.ZstdCompressor() |
|
762 | cctx = zstd.ZstdCompressor() | |
758 |
foo = cctx.compress(b |
|
763 | foo = cctx.compress(b"foo") | |
759 |
|
764 | |||
760 |
reader = cctx.stream_reader(b |
|
765 | reader = cctx.stream_reader(b"foo") | |
761 | with self.assertRaises(Exception): |
|
766 | with self.assertRaises(Exception): | |
762 |
reader.readinto(b |
|
767 | reader.readinto(b"foobar") | |
763 |
|
768 | |||
764 | # readinto() with sufficiently large destination. |
|
769 | # readinto() with sufficiently large destination. | |
765 | b = bytearray(1024) |
|
770 | b = bytearray(1024) | |
766 |
reader = cctx.stream_reader(b |
|
771 | reader = cctx.stream_reader(b"foo") | |
767 | self.assertEqual(reader.readinto(b), len(foo)) |
|
772 | self.assertEqual(reader.readinto(b), len(foo)) | |
768 | self.assertEqual(b[0:len(foo)], foo) |
|
773 | self.assertEqual(b[0 : len(foo)], foo) | |
769 | self.assertEqual(reader.readinto(b), 0) |
|
774 | self.assertEqual(reader.readinto(b), 0) | |
770 | self.assertEqual(b[0:len(foo)], foo) |
|
775 | self.assertEqual(b[0 : len(foo)], foo) | |
771 |
|
776 | |||
772 | # readinto() with small reads. |
|
777 | # readinto() with small reads. | |
773 | b = bytearray(1024) |
|
778 | b = bytearray(1024) | |
774 |
reader = cctx.stream_reader(b |
|
779 | reader = cctx.stream_reader(b"foo", read_size=1) | |
775 | self.assertEqual(reader.readinto(b), len(foo)) |
|
780 | self.assertEqual(reader.readinto(b), len(foo)) | |
776 | self.assertEqual(b[0:len(foo)], foo) |
|
781 | self.assertEqual(b[0 : len(foo)], foo) | |
777 |
|
782 | |||
778 | # Too small destination buffer. |
|
783 | # Too small destination buffer. | |
779 | b = bytearray(2) |
|
784 | b = bytearray(2) | |
780 |
reader = cctx.stream_reader(b |
|
785 | reader = cctx.stream_reader(b"foo") | |
781 | self.assertEqual(reader.readinto(b), 2) |
|
786 | self.assertEqual(reader.readinto(b), 2) | |
782 | self.assertEqual(b[:], foo[0:2]) |
|
787 | self.assertEqual(b[:], foo[0:2]) | |
783 | self.assertEqual(reader.readinto(b), 2) |
|
788 | self.assertEqual(reader.readinto(b), 2) | |
784 | self.assertEqual(b[:], foo[2:4]) |
|
789 | self.assertEqual(b[:], foo[2:4]) | |
785 | self.assertEqual(reader.readinto(b), 2) |
|
790 | self.assertEqual(reader.readinto(b), 2) | |
786 | self.assertEqual(b[:], foo[4:6]) |
|
791 | self.assertEqual(b[:], foo[4:6]) | |
787 |
|
792 | |||
788 | def test_readinto1(self): |
|
793 | def test_readinto1(self): | |
789 | cctx = zstd.ZstdCompressor() |
|
794 | cctx = zstd.ZstdCompressor() | |
790 |
foo = b |
|
795 | foo = b"".join(cctx.read_to_iter(io.BytesIO(b"foo"))) | |
791 |
|
796 | |||
792 |
reader = cctx.stream_reader(b |
|
797 | reader = cctx.stream_reader(b"foo") | |
793 | with self.assertRaises(Exception): |
|
798 | with self.assertRaises(Exception): | |
794 |
reader.readinto1(b |
|
799 | reader.readinto1(b"foobar") | |
795 |
|
800 | |||
796 | b = bytearray(1024) |
|
801 | b = bytearray(1024) | |
797 |
source = OpCountingBytesIO(b |
|
802 | source = OpCountingBytesIO(b"foo") | |
798 | reader = cctx.stream_reader(source) |
|
803 | reader = cctx.stream_reader(source) | |
799 | self.assertEqual(reader.readinto1(b), len(foo)) |
|
804 | self.assertEqual(reader.readinto1(b), len(foo)) | |
800 | self.assertEqual(b[0:len(foo)], foo) |
|
805 | self.assertEqual(b[0 : len(foo)], foo) | |
801 | self.assertEqual(source._read_count, 2) |
|
806 | self.assertEqual(source._read_count, 2) | |
802 |
|
807 | |||
803 | # readinto1() with small reads. |
|
808 | # readinto1() with small reads. | |
804 | b = bytearray(1024) |
|
809 | b = bytearray(1024) | |
805 |
source = OpCountingBytesIO(b |
|
810 | source = OpCountingBytesIO(b"foo") | |
806 | reader = cctx.stream_reader(source, read_size=1) |
|
811 | reader = cctx.stream_reader(source, read_size=1) | |
807 | self.assertEqual(reader.readinto1(b), len(foo)) |
|
812 | self.assertEqual(reader.readinto1(b), len(foo)) | |
808 | self.assertEqual(b[0:len(foo)], foo) |
|
813 | self.assertEqual(b[0 : len(foo)], foo) | |
809 | self.assertEqual(source._read_count, 4) |
|
814 | self.assertEqual(source._read_count, 4) | |
810 |
|
815 | |||
811 | def test_read1(self): |
|
816 | def test_read1(self): | |
812 | cctx = zstd.ZstdCompressor() |
|
817 | cctx = zstd.ZstdCompressor() | |
813 |
foo = b |
|
818 | foo = b"".join(cctx.read_to_iter(io.BytesIO(b"foo"))) | |
814 |
|
819 | |||
815 |
b = OpCountingBytesIO(b |
|
820 | b = OpCountingBytesIO(b"foo") | |
816 | reader = cctx.stream_reader(b) |
|
821 | reader = cctx.stream_reader(b) | |
817 |
|
822 | |||
818 | self.assertEqual(reader.read1(), foo) |
|
823 | self.assertEqual(reader.read1(), foo) | |
819 | self.assertEqual(b._read_count, 2) |
|
824 | self.assertEqual(b._read_count, 2) | |
820 |
|
825 | |||
821 |
b = OpCountingBytesIO(b |
|
826 | b = OpCountingBytesIO(b"foo") | |
822 | reader = cctx.stream_reader(b) |
|
827 | reader = cctx.stream_reader(b) | |
823 |
|
828 | |||
824 |
self.assertEqual(reader.read1(0), b |
|
829 | self.assertEqual(reader.read1(0), b"") | |
825 | self.assertEqual(reader.read1(2), foo[0:2]) |
|
830 | self.assertEqual(reader.read1(2), foo[0:2]) | |
826 | self.assertEqual(b._read_count, 2) |
|
831 | self.assertEqual(b._read_count, 2) | |
827 | self.assertEqual(reader.read1(2), foo[2:4]) |
|
832 | self.assertEqual(reader.read1(2), foo[2:4]) | |
828 | self.assertEqual(reader.read1(1024), foo[4:]) |
|
833 | self.assertEqual(reader.read1(1024), foo[4:]) | |
829 |
|
834 | |||
830 |
|
835 | |||
831 | @make_cffi |
|
836 | @make_cffi | |
832 |
class TestCompressor_stream_writer( |
|
837 | class TestCompressor_stream_writer(TestCase): | |
833 | def test_io_api(self): |
|
838 | def test_io_api(self): | |
834 | buffer = io.BytesIO() |
|
839 | buffer = io.BytesIO() | |
835 | cctx = zstd.ZstdCompressor() |
|
840 | cctx = zstd.ZstdCompressor() | |
836 | writer = cctx.stream_writer(buffer) |
|
841 | writer = cctx.stream_writer(buffer) | |
837 |
|
842 | |||
838 | self.assertFalse(writer.isatty()) |
|
843 | self.assertFalse(writer.isatty()) | |
839 | self.assertFalse(writer.readable()) |
|
844 | self.assertFalse(writer.readable()) | |
840 |
|
845 | |||
841 | with self.assertRaises(io.UnsupportedOperation): |
|
846 | with self.assertRaises(io.UnsupportedOperation): | |
842 | writer.readline() |
|
847 | writer.readline() | |
843 |
|
848 | |||
844 | with self.assertRaises(io.UnsupportedOperation): |
|
849 | with self.assertRaises(io.UnsupportedOperation): | |
845 | writer.readline(42) |
|
850 | writer.readline(42) | |
846 |
|
851 | |||
847 | with self.assertRaises(io.UnsupportedOperation): |
|
852 | with self.assertRaises(io.UnsupportedOperation): | |
848 | writer.readline(size=42) |
|
853 | writer.readline(size=42) | |
849 |
|
854 | |||
850 | with self.assertRaises(io.UnsupportedOperation): |
|
855 | with self.assertRaises(io.UnsupportedOperation): | |
851 | writer.readlines() |
|
856 | writer.readlines() | |
852 |
|
857 | |||
853 | with self.assertRaises(io.UnsupportedOperation): |
|
858 | with self.assertRaises(io.UnsupportedOperation): | |
854 | writer.readlines(42) |
|
859 | writer.readlines(42) | |
855 |
|
860 | |||
856 | with self.assertRaises(io.UnsupportedOperation): |
|
861 | with self.assertRaises(io.UnsupportedOperation): | |
857 | writer.readlines(hint=42) |
|
862 | writer.readlines(hint=42) | |
858 |
|
863 | |||
859 | with self.assertRaises(io.UnsupportedOperation): |
|
864 | with self.assertRaises(io.UnsupportedOperation): | |
860 | writer.seek(0) |
|
865 | writer.seek(0) | |
861 |
|
866 | |||
862 | with self.assertRaises(io.UnsupportedOperation): |
|
867 | with self.assertRaises(io.UnsupportedOperation): | |
863 | writer.seek(10, os.SEEK_SET) |
|
868 | writer.seek(10, os.SEEK_SET) | |
864 |
|
869 | |||
865 | self.assertFalse(writer.seekable()) |
|
870 | self.assertFalse(writer.seekable()) | |
866 |
|
871 | |||
867 | with self.assertRaises(io.UnsupportedOperation): |
|
872 | with self.assertRaises(io.UnsupportedOperation): | |
868 | writer.truncate() |
|
873 | writer.truncate() | |
869 |
|
874 | |||
870 | with self.assertRaises(io.UnsupportedOperation): |
|
875 | with self.assertRaises(io.UnsupportedOperation): | |
871 | writer.truncate(42) |
|
876 | writer.truncate(42) | |
872 |
|
877 | |||
873 | with self.assertRaises(io.UnsupportedOperation): |
|
878 | with self.assertRaises(io.UnsupportedOperation): | |
874 | writer.truncate(size=42) |
|
879 | writer.truncate(size=42) | |
875 |
|
880 | |||
876 | self.assertTrue(writer.writable()) |
|
881 | self.assertTrue(writer.writable()) | |
877 |
|
882 | |||
878 | with self.assertRaises(NotImplementedError): |
|
883 | with self.assertRaises(NotImplementedError): | |
879 | writer.writelines([]) |
|
884 | writer.writelines([]) | |
880 |
|
885 | |||
881 | with self.assertRaises(io.UnsupportedOperation): |
|
886 | with self.assertRaises(io.UnsupportedOperation): | |
882 | writer.read() |
|
887 | writer.read() | |
883 |
|
888 | |||
884 | with self.assertRaises(io.UnsupportedOperation): |
|
889 | with self.assertRaises(io.UnsupportedOperation): | |
885 | writer.read(42) |
|
890 | writer.read(42) | |
886 |
|
891 | |||
887 | with self.assertRaises(io.UnsupportedOperation): |
|
892 | with self.assertRaises(io.UnsupportedOperation): | |
888 | writer.read(size=42) |
|
893 | writer.read(size=42) | |
889 |
|
894 | |||
890 | with self.assertRaises(io.UnsupportedOperation): |
|
895 | with self.assertRaises(io.UnsupportedOperation): | |
891 | writer.readall() |
|
896 | writer.readall() | |
892 |
|
897 | |||
893 | with self.assertRaises(io.UnsupportedOperation): |
|
898 | with self.assertRaises(io.UnsupportedOperation): | |
894 | writer.readinto(None) |
|
899 | writer.readinto(None) | |
895 |
|
900 | |||
896 | with self.assertRaises(io.UnsupportedOperation): |
|
901 | with self.assertRaises(io.UnsupportedOperation): | |
897 | writer.fileno() |
|
902 | writer.fileno() | |
898 |
|
903 | |||
899 | self.assertFalse(writer.closed) |
|
904 | self.assertFalse(writer.closed) | |
900 |
|
905 | |||
901 | def test_fileno_file(self): |
|
906 | def test_fileno_file(self): | |
902 |
with tempfile.TemporaryFile( |
|
907 | with tempfile.TemporaryFile("wb") as tf: | |
903 | cctx = zstd.ZstdCompressor() |
|
908 | cctx = zstd.ZstdCompressor() | |
904 | writer = cctx.stream_writer(tf) |
|
909 | writer = cctx.stream_writer(tf) | |
905 |
|
910 | |||
906 | self.assertEqual(writer.fileno(), tf.fileno()) |
|
911 | self.assertEqual(writer.fileno(), tf.fileno()) | |
907 |
|
912 | |||
908 | def test_close(self): |
|
913 | def test_close(self): | |
909 | buffer = NonClosingBytesIO() |
|
914 | buffer = NonClosingBytesIO() | |
910 | cctx = zstd.ZstdCompressor(level=1) |
|
915 | cctx = zstd.ZstdCompressor(level=1) | |
911 | writer = cctx.stream_writer(buffer) |
|
916 | writer = cctx.stream_writer(buffer) | |
912 |
|
917 | |||
913 |
writer.write(b |
|
918 | writer.write(b"foo" * 1024) | |
914 | self.assertFalse(writer.closed) |
|
919 | self.assertFalse(writer.closed) | |
915 | self.assertFalse(buffer.closed) |
|
920 | self.assertFalse(buffer.closed) | |
916 | writer.close() |
|
921 | writer.close() | |
917 | self.assertTrue(writer.closed) |
|
922 | self.assertTrue(writer.closed) | |
918 | self.assertTrue(buffer.closed) |
|
923 | self.assertTrue(buffer.closed) | |
919 |
|
924 | |||
920 |
with self.assertRaisesRegex |
|
925 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
921 |
writer.write(b |
|
926 | writer.write(b"foo") | |
922 |
|
927 | |||
923 |
with self.assertRaisesRegex |
|
928 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
924 | writer.flush() |
|
929 | writer.flush() | |
925 |
|
930 | |||
926 |
with self.assertRaisesRegex |
|
931 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
927 | with writer: |
|
932 | with writer: | |
928 | pass |
|
933 | pass | |
929 |
|
934 | |||
930 |
self.assertEqual( |
|
935 | self.assertEqual( | |
931 | b'\x28\xb5\x2f\xfd\x00\x48\x55\x00\x00\x18\x66\x6f' |
|
936 | buffer.getvalue(), | |
932 | b'\x6f\x01\x00\xfa\xd3\x77\x43') |
|
937 | b"\x28\xb5\x2f\xfd\x00\x48\x55\x00\x00\x18\x66\x6f" | |
|
938 | b"\x6f\x01\x00\xfa\xd3\x77\x43", | |||
|
939 | ) | |||
933 |
|
940 | |||
934 | # Context manager exit should close stream. |
|
941 | # Context manager exit should close stream. | |
935 | buffer = io.BytesIO() |
|
942 | buffer = io.BytesIO() | |
936 | writer = cctx.stream_writer(buffer) |
|
943 | writer = cctx.stream_writer(buffer) | |
937 |
|
944 | |||
938 | with writer: |
|
945 | with writer: | |
939 |
writer.write(b |
|
946 | writer.write(b"foo") | |
940 |
|
947 | |||
941 | self.assertTrue(writer.closed) |
|
948 | self.assertTrue(writer.closed) | |
942 |
|
949 | |||
943 | def test_empty(self): |
|
950 | def test_empty(self): | |
944 | buffer = NonClosingBytesIO() |
|
951 | buffer = NonClosingBytesIO() | |
945 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
952 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |
946 | with cctx.stream_writer(buffer) as compressor: |
|
953 | with cctx.stream_writer(buffer) as compressor: | |
947 |
compressor.write(b |
|
954 | compressor.write(b"") | |
948 |
|
955 | |||
949 | result = buffer.getvalue() |
|
956 | result = buffer.getvalue() | |
950 |
self.assertEqual(result, b |
|
957 | self.assertEqual(result, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") | |
951 |
|
958 | |||
952 | params = zstd.get_frame_parameters(result) |
|
959 | params = zstd.get_frame_parameters(result) | |
953 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
960 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
954 | self.assertEqual(params.window_size, 524288) |
|
961 | self.assertEqual(params.window_size, 524288) | |
955 | self.assertEqual(params.dict_id, 0) |
|
962 | self.assertEqual(params.dict_id, 0) | |
956 | self.assertFalse(params.has_checksum) |
|
963 | self.assertFalse(params.has_checksum) | |
957 |
|
964 | |||
958 | # Test without context manager. |
|
965 | # Test without context manager. | |
959 | buffer = io.BytesIO() |
|
966 | buffer = io.BytesIO() | |
960 | compressor = cctx.stream_writer(buffer) |
|
967 | compressor = cctx.stream_writer(buffer) | |
961 |
self.assertEqual(compressor.write(b |
|
968 | self.assertEqual(compressor.write(b""), 0) | |
962 |
self.assertEqual(buffer.getvalue(), b |
|
969 | self.assertEqual(buffer.getvalue(), b"") | |
963 | self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 9) |
|
970 | self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 9) | |
964 | result = buffer.getvalue() |
|
971 | result = buffer.getvalue() | |
965 |
self.assertEqual(result, b |
|
972 | self.assertEqual(result, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") | |
966 |
|
973 | |||
967 | params = zstd.get_frame_parameters(result) |
|
974 | params = zstd.get_frame_parameters(result) | |
968 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
975 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
969 | self.assertEqual(params.window_size, 524288) |
|
976 | self.assertEqual(params.window_size, 524288) | |
970 | self.assertEqual(params.dict_id, 0) |
|
977 | self.assertEqual(params.dict_id, 0) | |
971 | self.assertFalse(params.has_checksum) |
|
978 | self.assertFalse(params.has_checksum) | |
972 |
|
979 | |||
973 | # Test write_return_read=True |
|
980 | # Test write_return_read=True | |
974 | compressor = cctx.stream_writer(buffer, write_return_read=True) |
|
981 | compressor = cctx.stream_writer(buffer, write_return_read=True) | |
975 |
self.assertEqual(compressor.write(b |
|
982 | self.assertEqual(compressor.write(b""), 0) | |
976 |
|
983 | |||
977 | def test_input_types(self): |
|
984 | def test_input_types(self): | |
978 |
expected = b |
|
985 | expected = b"\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f" | |
979 | cctx = zstd.ZstdCompressor(level=1) |
|
986 | cctx = zstd.ZstdCompressor(level=1) | |
980 |
|
987 | |||
981 | mutable_array = bytearray(3) |
|
988 | mutable_array = bytearray(3) | |
982 |
mutable_array[:] = b |
|
989 | mutable_array[:] = b"foo" | |
983 |
|
990 | |||
984 | sources = [ |
|
991 | sources = [ | |
985 |
memoryview(b |
|
992 | memoryview(b"foo"), | |
986 |
bytearray(b |
|
993 | bytearray(b"foo"), | |
987 | mutable_array, |
|
994 | mutable_array, | |
988 | ] |
|
995 | ] | |
989 |
|
996 | |||
990 | for source in sources: |
|
997 | for source in sources: | |
991 | buffer = NonClosingBytesIO() |
|
998 | buffer = NonClosingBytesIO() | |
992 | with cctx.stream_writer(buffer) as compressor: |
|
999 | with cctx.stream_writer(buffer) as compressor: | |
993 | compressor.write(source) |
|
1000 | compressor.write(source) | |
994 |
|
1001 | |||
995 | self.assertEqual(buffer.getvalue(), expected) |
|
1002 | self.assertEqual(buffer.getvalue(), expected) | |
996 |
|
1003 | |||
997 | compressor = cctx.stream_writer(buffer, write_return_read=True) |
|
1004 | compressor = cctx.stream_writer(buffer, write_return_read=True) | |
998 | self.assertEqual(compressor.write(source), len(source)) |
|
1005 | self.assertEqual(compressor.write(source), len(source)) | |
999 |
|
1006 | |||
1000 | def test_multiple_compress(self): |
|
1007 | def test_multiple_compress(self): | |
1001 | buffer = NonClosingBytesIO() |
|
1008 | buffer = NonClosingBytesIO() | |
1002 | cctx = zstd.ZstdCompressor(level=5) |
|
1009 | cctx = zstd.ZstdCompressor(level=5) | |
1003 | with cctx.stream_writer(buffer) as compressor: |
|
1010 | with cctx.stream_writer(buffer) as compressor: | |
1004 |
self.assertEqual(compressor.write(b |
|
1011 | self.assertEqual(compressor.write(b"foo"), 0) | |
1005 |
self.assertEqual(compressor.write(b |
|
1012 | self.assertEqual(compressor.write(b"bar"), 0) | |
1006 |
self.assertEqual(compressor.write(b |
|
1013 | self.assertEqual(compressor.write(b"x" * 8192), 0) | |
1007 |
|
1014 | |||
1008 | result = buffer.getvalue() |
|
1015 | result = buffer.getvalue() | |
1009 |
self.assertEqual( |
|
1016 | self.assertEqual( | |
1010 | b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f' |
|
1017 | result, | |
1011 | b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23') |
|
1018 | b"\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f" | |
|
1019 | b"\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23", | |||
|
1020 | ) | |||
1012 |
|
1021 | |||
1013 | # Test without context manager. |
|
1022 | # Test without context manager. | |
1014 | buffer = io.BytesIO() |
|
1023 | buffer = io.BytesIO() | |
1015 | compressor = cctx.stream_writer(buffer) |
|
1024 | compressor = cctx.stream_writer(buffer) | |
1016 |
self.assertEqual(compressor.write(b |
|
1025 | self.assertEqual(compressor.write(b"foo"), 0) | |
1017 |
self.assertEqual(compressor.write(b |
|
1026 | self.assertEqual(compressor.write(b"bar"), 0) | |
1018 |
self.assertEqual(compressor.write(b |
|
1027 | self.assertEqual(compressor.write(b"x" * 8192), 0) | |
1019 | self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23) |
|
1028 | self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23) | |
1020 | result = buffer.getvalue() |
|
1029 | result = buffer.getvalue() | |
1021 |
self.assertEqual( |
|
1030 | self.assertEqual( | |
1022 | b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f' |
|
1031 | result, | |
1023 | b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23') |
|
1032 | b"\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f" | |
|
1033 | b"\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23", | |||
|
1034 | ) | |||
1024 |
|
1035 | |||
1025 | # Test with write_return_read=True. |
|
1036 | # Test with write_return_read=True. | |
1026 | compressor = cctx.stream_writer(buffer, write_return_read=True) |
|
1037 | compressor = cctx.stream_writer(buffer, write_return_read=True) | |
1027 |
self.assertEqual(compressor.write(b |
|
1038 | self.assertEqual(compressor.write(b"foo"), 3) | |
1028 |
self.assertEqual(compressor.write(b |
|
1039 | self.assertEqual(compressor.write(b"barbiz"), 6) | |
1029 |
self.assertEqual(compressor.write(b |
|
1040 | self.assertEqual(compressor.write(b"x" * 8192), 8192) | |
1030 |
|
1041 | |||
1031 | def test_dictionary(self): |
|
1042 | def test_dictionary(self): | |
1032 | samples = [] |
|
1043 | samples = [] | |
1033 | for i in range(128): |
|
1044 | for i in range(128): | |
1034 |
samples.append(b |
|
1045 | samples.append(b"foo" * 64) | |
1035 |
samples.append(b |
|
1046 | samples.append(b"bar" * 64) | |
1036 |
samples.append(b |
|
1047 | samples.append(b"foobar" * 64) | |
1037 |
|
1048 | |||
1038 | d = zstd.train_dictionary(8192, samples) |
|
1049 | d = zstd.train_dictionary(8192, samples) | |
1039 |
|
1050 | |||
1040 | h = hashlib.sha1(d.as_bytes()).hexdigest() |
|
1051 | h = hashlib.sha1(d.as_bytes()).hexdigest() | |
1041 |
self.assertEqual(h, |
|
1052 | self.assertEqual(h, "7a2e59a876db958f74257141045af8f912e00d4e") | |
1042 |
|
1053 | |||
1043 | buffer = NonClosingBytesIO() |
|
1054 | buffer = NonClosingBytesIO() | |
1044 | cctx = zstd.ZstdCompressor(level=9, dict_data=d) |
|
1055 | cctx = zstd.ZstdCompressor(level=9, dict_data=d) | |
1045 | with cctx.stream_writer(buffer) as compressor: |
|
1056 | with cctx.stream_writer(buffer) as compressor: | |
1046 |
self.assertEqual(compressor.write(b |
|
1057 | self.assertEqual(compressor.write(b"foo"), 0) | |
1047 |
self.assertEqual(compressor.write(b |
|
1058 | self.assertEqual(compressor.write(b"bar"), 0) | |
1048 |
self.assertEqual(compressor.write(b |
|
1059 | self.assertEqual(compressor.write(b"foo" * 16384), 0) | |
1049 |
|
1060 | |||
1050 | compressed = buffer.getvalue() |
|
1061 | compressed = buffer.getvalue() | |
1051 |
|
1062 | |||
1052 | params = zstd.get_frame_parameters(compressed) |
|
1063 | params = zstd.get_frame_parameters(compressed) | |
1053 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
1064 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
1054 | self.assertEqual(params.window_size, 2097152) |
|
1065 | self.assertEqual(params.window_size, 2097152) | |
1055 | self.assertEqual(params.dict_id, d.dict_id()) |
|
1066 | self.assertEqual(params.dict_id, d.dict_id()) | |
1056 | self.assertFalse(params.has_checksum) |
|
1067 | self.assertFalse(params.has_checksum) | |
1057 |
|
1068 | |||
1058 | h = hashlib.sha1(compressed).hexdigest() |
|
1069 | h = hashlib.sha1(compressed).hexdigest() | |
1059 |
self.assertEqual(h, |
|
1070 | self.assertEqual(h, "0a7c05635061f58039727cdbe76388c6f4cfef06") | |
1060 |
|
1071 | |||
1061 |
source = b |
|
1072 | source = b"foo" + b"bar" + (b"foo" * 16384) | |
1062 |
|
1073 | |||
1063 | dctx = zstd.ZstdDecompressor(dict_data=d) |
|
1074 | dctx = zstd.ZstdDecompressor(dict_data=d) | |
1064 |
|
1075 | |||
1065 | self.assertEqual(dctx.decompress(compressed, max_output_size=len(source)), |
|
1076 | self.assertEqual( | |
1066 | source) |
|
1077 | dctx.decompress(compressed, max_output_size=len(source)), source | |
|
1078 | ) | |||
1067 |
|
1079 | |||
1068 | def test_compression_params(self): |
|
1080 | def test_compression_params(self): | |
1069 | params = zstd.ZstdCompressionParameters( |
|
1081 | params = zstd.ZstdCompressionParameters( | |
1070 | window_log=20, |
|
1082 | window_log=20, | |
1071 | chain_log=6, |
|
1083 | chain_log=6, | |
1072 | hash_log=12, |
|
1084 | hash_log=12, | |
1073 | min_match=5, |
|
1085 | min_match=5, | |
1074 | search_log=4, |
|
1086 | search_log=4, | |
1075 | target_length=10, |
|
1087 | target_length=10, | |
1076 |
strategy=zstd.STRATEGY_FAST |
|
1088 | strategy=zstd.STRATEGY_FAST, | |
|
1089 | ) | |||
1077 |
|
1090 | |||
1078 | buffer = NonClosingBytesIO() |
|
1091 | buffer = NonClosingBytesIO() | |
1079 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
1092 | cctx = zstd.ZstdCompressor(compression_params=params) | |
1080 | with cctx.stream_writer(buffer) as compressor: |
|
1093 | with cctx.stream_writer(buffer) as compressor: | |
1081 |
self.assertEqual(compressor.write(b |
|
1094 | self.assertEqual(compressor.write(b"foo"), 0) | |
1082 |
self.assertEqual(compressor.write(b |
|
1095 | self.assertEqual(compressor.write(b"bar"), 0) | |
1083 |
self.assertEqual(compressor.write(b |
|
1096 | self.assertEqual(compressor.write(b"foobar" * 16384), 0) | |
1084 |
|
1097 | |||
1085 | compressed = buffer.getvalue() |
|
1098 | compressed = buffer.getvalue() | |
1086 |
|
1099 | |||
1087 | params = zstd.get_frame_parameters(compressed) |
|
1100 | params = zstd.get_frame_parameters(compressed) | |
1088 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
1101 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
1089 | self.assertEqual(params.window_size, 1048576) |
|
1102 | self.assertEqual(params.window_size, 1048576) | |
1090 | self.assertEqual(params.dict_id, 0) |
|
1103 | self.assertEqual(params.dict_id, 0) | |
1091 | self.assertFalse(params.has_checksum) |
|
1104 | self.assertFalse(params.has_checksum) | |
1092 |
|
1105 | |||
1093 | h = hashlib.sha1(compressed).hexdigest() |
|
1106 | h = hashlib.sha1(compressed).hexdigest() | |
1094 |
self.assertEqual(h, |
|
1107 | self.assertEqual(h, "dd4bb7d37c1a0235b38a2f6b462814376843ef0b") | |
1095 |
|
1108 | |||
1096 | def test_write_checksum(self): |
|
1109 | def test_write_checksum(self): | |
1097 | no_checksum = NonClosingBytesIO() |
|
1110 | no_checksum = NonClosingBytesIO() | |
1098 | cctx = zstd.ZstdCompressor(level=1) |
|
1111 | cctx = zstd.ZstdCompressor(level=1) | |
1099 | with cctx.stream_writer(no_checksum) as compressor: |
|
1112 | with cctx.stream_writer(no_checksum) as compressor: | |
1100 |
self.assertEqual(compressor.write(b |
|
1113 | self.assertEqual(compressor.write(b"foobar"), 0) | |
1101 |
|
1114 | |||
1102 | with_checksum = NonClosingBytesIO() |
|
1115 | with_checksum = NonClosingBytesIO() | |
1103 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) |
|
1116 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True) | |
1104 | with cctx.stream_writer(with_checksum) as compressor: |
|
1117 | with cctx.stream_writer(with_checksum) as compressor: | |
1105 |
self.assertEqual(compressor.write(b |
|
1118 | self.assertEqual(compressor.write(b"foobar"), 0) | |
1106 |
|
1119 | |||
1107 | no_params = zstd.get_frame_parameters(no_checksum.getvalue()) |
|
1120 | no_params = zstd.get_frame_parameters(no_checksum.getvalue()) | |
1108 | with_params = zstd.get_frame_parameters(with_checksum.getvalue()) |
|
1121 | with_params = zstd.get_frame_parameters(with_checksum.getvalue()) | |
1109 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
1122 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
1110 | self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
1123 | self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
1111 | self.assertEqual(no_params.dict_id, 0) |
|
1124 | self.assertEqual(no_params.dict_id, 0) | |
1112 | self.assertEqual(with_params.dict_id, 0) |
|
1125 | self.assertEqual(with_params.dict_id, 0) | |
1113 | self.assertFalse(no_params.has_checksum) |
|
1126 | self.assertFalse(no_params.has_checksum) | |
1114 | self.assertTrue(with_params.has_checksum) |
|
1127 | self.assertTrue(with_params.has_checksum) | |
1115 |
|
1128 | |||
1116 | self.assertEqual(len(with_checksum.getvalue()), |
|
1129 | self.assertEqual(len(with_checksum.getvalue()), len(no_checksum.getvalue()) + 4) | |
1117 | len(no_checksum.getvalue()) + 4) |
|
|||
1118 |
|
1130 | |||
1119 | def test_write_content_size(self): |
|
1131 | def test_write_content_size(self): | |
1120 | no_size = NonClosingBytesIO() |
|
1132 | no_size = NonClosingBytesIO() | |
1121 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
1133 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |
1122 | with cctx.stream_writer(no_size) as compressor: |
|
1134 | with cctx.stream_writer(no_size) as compressor: | |
1123 |
self.assertEqual(compressor.write(b |
|
1135 | self.assertEqual(compressor.write(b"foobar" * 256), 0) | |
1124 |
|
1136 | |||
1125 | with_size = NonClosingBytesIO() |
|
1137 | with_size = NonClosingBytesIO() | |
1126 | cctx = zstd.ZstdCompressor(level=1) |
|
1138 | cctx = zstd.ZstdCompressor(level=1) | |
1127 | with cctx.stream_writer(with_size) as compressor: |
|
1139 | with cctx.stream_writer(with_size) as compressor: | |
1128 |
self.assertEqual(compressor.write(b |
|
1140 | self.assertEqual(compressor.write(b"foobar" * 256), 0) | |
1129 |
|
1141 | |||
1130 | # Source size is not known in streaming mode, so header not |
|
1142 | # Source size is not known in streaming mode, so header not | |
1131 | # written. |
|
1143 | # written. | |
1132 | self.assertEqual(len(with_size.getvalue()), |
|
1144 | self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue())) | |
1133 | len(no_size.getvalue())) |
|
|||
1134 |
|
1145 | |||
1135 | # Declaring size will write the header. |
|
1146 | # Declaring size will write the header. | |
1136 | with_size = NonClosingBytesIO() |
|
1147 | with_size = NonClosingBytesIO() | |
1137 |
with cctx.stream_writer(with_size, size=len(b |
|
1148 | with cctx.stream_writer(with_size, size=len(b"foobar" * 256)) as compressor: | |
1138 |
self.assertEqual(compressor.write(b |
|
1149 | self.assertEqual(compressor.write(b"foobar" * 256), 0) | |
1139 |
|
1150 | |||
1140 | no_params = zstd.get_frame_parameters(no_size.getvalue()) |
|
1151 | no_params = zstd.get_frame_parameters(no_size.getvalue()) | |
1141 | with_params = zstd.get_frame_parameters(with_size.getvalue()) |
|
1152 | with_params = zstd.get_frame_parameters(with_size.getvalue()) | |
1142 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
1153 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
1143 | self.assertEqual(with_params.content_size, 1536) |
|
1154 | self.assertEqual(with_params.content_size, 1536) | |
1144 | self.assertEqual(no_params.dict_id, 0) |
|
1155 | self.assertEqual(no_params.dict_id, 0) | |
1145 | self.assertEqual(with_params.dict_id, 0) |
|
1156 | self.assertEqual(with_params.dict_id, 0) | |
1146 | self.assertFalse(no_params.has_checksum) |
|
1157 | self.assertFalse(no_params.has_checksum) | |
1147 | self.assertFalse(with_params.has_checksum) |
|
1158 | self.assertFalse(with_params.has_checksum) | |
1148 |
|
1159 | |||
1149 | self.assertEqual(len(with_size.getvalue()), |
|
1160 | self.assertEqual(len(with_size.getvalue()), len(no_size.getvalue()) + 1) | |
1150 | len(no_size.getvalue()) + 1) |
|
|||
1151 |
|
1161 | |||
1152 | def test_no_dict_id(self): |
|
1162 | def test_no_dict_id(self): | |
1153 | samples = [] |
|
1163 | samples = [] | |
1154 | for i in range(128): |
|
1164 | for i in range(128): | |
1155 |
samples.append(b |
|
1165 | samples.append(b"foo" * 64) | |
1156 |
samples.append(b |
|
1166 | samples.append(b"bar" * 64) | |
1157 |
samples.append(b |
|
1167 | samples.append(b"foobar" * 64) | |
1158 |
|
1168 | |||
1159 | d = zstd.train_dictionary(1024, samples) |
|
1169 | d = zstd.train_dictionary(1024, samples) | |
1160 |
|
1170 | |||
1161 | with_dict_id = NonClosingBytesIO() |
|
1171 | with_dict_id = NonClosingBytesIO() | |
1162 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
1172 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) | |
1163 | with cctx.stream_writer(with_dict_id) as compressor: |
|
1173 | with cctx.stream_writer(with_dict_id) as compressor: | |
1164 |
self.assertEqual(compressor.write(b |
|
1174 | self.assertEqual(compressor.write(b"foobarfoobar"), 0) | |
1165 |
|
1175 | |||
1166 |
self.assertEqual(with_dict_id.getvalue()[4:5], b |
|
1176 | self.assertEqual(with_dict_id.getvalue()[4:5], b"\x03") | |
1167 |
|
1177 | |||
1168 | cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False) |
|
1178 | cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False) | |
1169 | no_dict_id = NonClosingBytesIO() |
|
1179 | no_dict_id = NonClosingBytesIO() | |
1170 | with cctx.stream_writer(no_dict_id) as compressor: |
|
1180 | with cctx.stream_writer(no_dict_id) as compressor: | |
1171 |
self.assertEqual(compressor.write(b |
|
1181 | self.assertEqual(compressor.write(b"foobarfoobar"), 0) | |
1172 |
|
1182 | |||
1173 |
self.assertEqual(no_dict_id.getvalue()[4:5], b |
|
1183 | self.assertEqual(no_dict_id.getvalue()[4:5], b"\x00") | |
1174 |
|
1184 | |||
1175 | no_params = zstd.get_frame_parameters(no_dict_id.getvalue()) |
|
1185 | no_params = zstd.get_frame_parameters(no_dict_id.getvalue()) | |
1176 | with_params = zstd.get_frame_parameters(with_dict_id.getvalue()) |
|
1186 | with_params = zstd.get_frame_parameters(with_dict_id.getvalue()) | |
1177 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
1187 | self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
1178 | self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
1188 | self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
1179 | self.assertEqual(no_params.dict_id, 0) |
|
1189 | self.assertEqual(no_params.dict_id, 0) | |
1180 | self.assertEqual(with_params.dict_id, d.dict_id()) |
|
1190 | self.assertEqual(with_params.dict_id, d.dict_id()) | |
1181 | self.assertFalse(no_params.has_checksum) |
|
1191 | self.assertFalse(no_params.has_checksum) | |
1182 | self.assertFalse(with_params.has_checksum) |
|
1192 | self.assertFalse(with_params.has_checksum) | |
1183 |
|
1193 | |||
1184 | self.assertEqual(len(with_dict_id.getvalue()), |
|
1194 | self.assertEqual(len(with_dict_id.getvalue()), len(no_dict_id.getvalue()) + 4) | |
1185 | len(no_dict_id.getvalue()) + 4) |
|
|||
1186 |
|
1195 | |||
1187 | def test_memory_size(self): |
|
1196 | def test_memory_size(self): | |
1188 | cctx = zstd.ZstdCompressor(level=3) |
|
1197 | cctx = zstd.ZstdCompressor(level=3) | |
1189 | buffer = io.BytesIO() |
|
1198 | buffer = io.BytesIO() | |
1190 | with cctx.stream_writer(buffer) as compressor: |
|
1199 | with cctx.stream_writer(buffer) as compressor: | |
1191 |
compressor.write(b |
|
1200 | compressor.write(b"foo") | |
1192 | size = compressor.memory_size() |
|
1201 | size = compressor.memory_size() | |
1193 |
|
1202 | |||
1194 | self.assertGreater(size, 100000) |
|
1203 | self.assertGreater(size, 100000) | |
1195 |
|
1204 | |||
1196 | def test_write_size(self): |
|
1205 | def test_write_size(self): | |
1197 | cctx = zstd.ZstdCompressor(level=3) |
|
1206 | cctx = zstd.ZstdCompressor(level=3) | |
1198 | dest = OpCountingBytesIO() |
|
1207 | dest = OpCountingBytesIO() | |
1199 | with cctx.stream_writer(dest, write_size=1) as compressor: |
|
1208 | with cctx.stream_writer(dest, write_size=1) as compressor: | |
1200 |
self.assertEqual(compressor.write(b |
|
1209 | self.assertEqual(compressor.write(b"foo"), 0) | |
1201 |
self.assertEqual(compressor.write(b |
|
1210 | self.assertEqual(compressor.write(b"bar"), 0) | |
1202 |
self.assertEqual(compressor.write(b |
|
1211 | self.assertEqual(compressor.write(b"foobar"), 0) | |
1203 |
|
1212 | |||
1204 | self.assertEqual(len(dest.getvalue()), dest._write_count) |
|
1213 | self.assertEqual(len(dest.getvalue()), dest._write_count) | |
1205 |
|
1214 | |||
1206 | def test_flush_repeated(self): |
|
1215 | def test_flush_repeated(self): | |
1207 | cctx = zstd.ZstdCompressor(level=3) |
|
1216 | cctx = zstd.ZstdCompressor(level=3) | |
1208 | dest = OpCountingBytesIO() |
|
1217 | dest = OpCountingBytesIO() | |
1209 | with cctx.stream_writer(dest) as compressor: |
|
1218 | with cctx.stream_writer(dest) as compressor: | |
1210 |
self.assertEqual(compressor.write(b |
|
1219 | self.assertEqual(compressor.write(b"foo"), 0) | |
1211 | self.assertEqual(dest._write_count, 0) |
|
1220 | self.assertEqual(dest._write_count, 0) | |
1212 | self.assertEqual(compressor.flush(), 12) |
|
1221 | self.assertEqual(compressor.flush(), 12) | |
1213 | self.assertEqual(dest._write_count, 1) |
|
1222 | self.assertEqual(dest._write_count, 1) | |
1214 |
self.assertEqual(compressor.write(b |
|
1223 | self.assertEqual(compressor.write(b"bar"), 0) | |
1215 | self.assertEqual(dest._write_count, 1) |
|
1224 | self.assertEqual(dest._write_count, 1) | |
1216 | self.assertEqual(compressor.flush(), 6) |
|
1225 | self.assertEqual(compressor.flush(), 6) | |
1217 | self.assertEqual(dest._write_count, 2) |
|
1226 | self.assertEqual(dest._write_count, 2) | |
1218 |
self.assertEqual(compressor.write(b |
|
1227 | self.assertEqual(compressor.write(b"baz"), 0) | |
1219 |
|
1228 | |||
1220 | self.assertEqual(dest._write_count, 3) |
|
1229 | self.assertEqual(dest._write_count, 3) | |
1221 |
|
1230 | |||
1222 | def test_flush_empty_block(self): |
|
1231 | def test_flush_empty_block(self): | |
1223 | cctx = zstd.ZstdCompressor(level=3, write_checksum=True) |
|
1232 | cctx = zstd.ZstdCompressor(level=3, write_checksum=True) | |
1224 | dest = OpCountingBytesIO() |
|
1233 | dest = OpCountingBytesIO() | |
1225 | with cctx.stream_writer(dest) as compressor: |
|
1234 | with cctx.stream_writer(dest) as compressor: | |
1226 |
self.assertEqual(compressor.write(b |
|
1235 | self.assertEqual(compressor.write(b"foobar" * 8192), 0) | |
1227 | count = dest._write_count |
|
1236 | count = dest._write_count | |
1228 | offset = dest.tell() |
|
1237 | offset = dest.tell() | |
1229 | self.assertEqual(compressor.flush(), 23) |
|
1238 | self.assertEqual(compressor.flush(), 23) | |
1230 | self.assertGreater(dest._write_count, count) |
|
1239 | self.assertGreater(dest._write_count, count) | |
1231 | self.assertGreater(dest.tell(), offset) |
|
1240 | self.assertGreater(dest.tell(), offset) | |
1232 | offset = dest.tell() |
|
1241 | offset = dest.tell() | |
1233 | # Ending the write here should cause an empty block to be written |
|
1242 | # Ending the write here should cause an empty block to be written | |
1234 | # to denote end of frame. |
|
1243 | # to denote end of frame. | |
1235 |
|
1244 | |||
1236 | trailing = dest.getvalue()[offset:] |
|
1245 | trailing = dest.getvalue()[offset:] | |
1237 | # 3 bytes block header + 4 bytes frame checksum |
|
1246 | # 3 bytes block header + 4 bytes frame checksum | |
1238 | self.assertEqual(len(trailing), 7) |
|
1247 | self.assertEqual(len(trailing), 7) | |
1239 |
|
1248 | |||
1240 | header = trailing[0:3] |
|
1249 | header = trailing[0:3] | |
1241 |
self.assertEqual(header, b |
|
1250 | self.assertEqual(header, b"\x01\x00\x00") | |
1242 |
|
1251 | |||
1243 | def test_flush_frame(self): |
|
1252 | def test_flush_frame(self): | |
1244 | cctx = zstd.ZstdCompressor(level=3) |
|
1253 | cctx = zstd.ZstdCompressor(level=3) | |
1245 | dest = OpCountingBytesIO() |
|
1254 | dest = OpCountingBytesIO() | |
1246 |
|
1255 | |||
1247 | with cctx.stream_writer(dest) as compressor: |
|
1256 | with cctx.stream_writer(dest) as compressor: | |
1248 |
self.assertEqual(compressor.write(b |
|
1257 | self.assertEqual(compressor.write(b"foobar" * 8192), 0) | |
1249 | self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23) |
|
1258 | self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23) | |
1250 |
compressor.write(b |
|
1259 | compressor.write(b"biz" * 16384) | |
1251 |
|
1260 | |||
1252 |
self.assertEqual( |
|
1261 | self.assertEqual( | |
1253 | # Frame 1. |
|
1262 | dest.getvalue(), | |
1254 | b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x30\x66\x6f\x6f' |
|
1263 | # Frame 1. | |
1255 | b'\x62\x61\x72\x01\x00\xf7\xbf\xe8\xa5\x08' |
|
1264 | b"\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x30\x66\x6f\x6f" | |
1256 | # Frame 2. |
|
1265 | b"\x62\x61\x72\x01\x00\xf7\xbf\xe8\xa5\x08" | |
1257 | b'\x28\xb5\x2f\xfd\x00\x58\x5d\x00\x00\x18\x62\x69\x7a' |
|
1266 | # Frame 2. | |
1258 | b'\x01\x00\xfa\x3f\x75\x37\x04') |
|
1267 | b"\x28\xb5\x2f\xfd\x00\x58\x5d\x00\x00\x18\x62\x69\x7a" | |
|
1268 | b"\x01\x00\xfa\x3f\x75\x37\x04", | |||
|
1269 | ) | |||
1259 |
|
1270 | |||
1260 | def test_bad_flush_mode(self): |
|
1271 | def test_bad_flush_mode(self): | |
1261 | cctx = zstd.ZstdCompressor() |
|
1272 | cctx = zstd.ZstdCompressor() | |
1262 | dest = io.BytesIO() |
|
1273 | dest = io.BytesIO() | |
1263 | with cctx.stream_writer(dest) as compressor: |
|
1274 | with cctx.stream_writer(dest) as compressor: | |
1264 |
with self.assertRaisesRegex |
|
1275 | with self.assertRaisesRegex(ValueError, "unknown flush_mode: 42"): | |
1265 | compressor.flush(flush_mode=42) |
|
1276 | compressor.flush(flush_mode=42) | |
1266 |
|
1277 | |||
1267 | def test_multithreaded(self): |
|
1278 | def test_multithreaded(self): | |
1268 | dest = NonClosingBytesIO() |
|
1279 | dest = NonClosingBytesIO() | |
1269 | cctx = zstd.ZstdCompressor(threads=2) |
|
1280 | cctx = zstd.ZstdCompressor(threads=2) | |
1270 | with cctx.stream_writer(dest) as compressor: |
|
1281 | with cctx.stream_writer(dest) as compressor: | |
1271 |
compressor.write(b |
|
1282 | compressor.write(b"a" * 1048576) | |
1272 |
compressor.write(b |
|
1283 | compressor.write(b"b" * 1048576) | |
1273 |
compressor.write(b |
|
1284 | compressor.write(b"c" * 1048576) | |
1274 |
|
1285 | |||
1275 |
self.assertEqual(len(dest.getvalue()), |
|
1286 | self.assertEqual(len(dest.getvalue()), 111) | |
1276 |
|
1287 | |||
1277 | def test_tell(self): |
|
1288 | def test_tell(self): | |
1278 | dest = io.BytesIO() |
|
1289 | dest = io.BytesIO() | |
1279 | cctx = zstd.ZstdCompressor() |
|
1290 | cctx = zstd.ZstdCompressor() | |
1280 | with cctx.stream_writer(dest) as compressor: |
|
1291 | with cctx.stream_writer(dest) as compressor: | |
1281 | self.assertEqual(compressor.tell(), 0) |
|
1292 | self.assertEqual(compressor.tell(), 0) | |
1282 |
|
1293 | |||
1283 | for i in range(256): |
|
1294 | for i in range(256): | |
1284 |
compressor.write(b |
|
1295 | compressor.write(b"foo" * (i + 1)) | |
1285 | self.assertEqual(compressor.tell(), dest.tell()) |
|
1296 | self.assertEqual(compressor.tell(), dest.tell()) | |
1286 |
|
1297 | |||
1287 | def test_bad_size(self): |
|
1298 | def test_bad_size(self): | |
1288 | cctx = zstd.ZstdCompressor() |
|
1299 | cctx = zstd.ZstdCompressor() | |
1289 |
|
1300 | |||
1290 | dest = io.BytesIO() |
|
1301 | dest = io.BytesIO() | |
1291 |
|
1302 | |||
1292 |
with self.assertRaisesRegex |
|
1303 | with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): | |
1293 | with cctx.stream_writer(dest, size=2) as compressor: |
|
1304 | with cctx.stream_writer(dest, size=2) as compressor: | |
1294 |
compressor.write(b |
|
1305 | compressor.write(b"foo") | |
1295 |
|
1306 | |||
1296 | # Test another operation. |
|
1307 | # Test another operation. | |
1297 | with cctx.stream_writer(dest, size=42): |
|
1308 | with cctx.stream_writer(dest, size=42): | |
1298 | pass |
|
1309 | pass | |
1299 |
|
1310 | |||
1300 | def test_tarfile_compat(self): |
|
1311 | def test_tarfile_compat(self): | |
1301 | dest = NonClosingBytesIO() |
|
1312 | dest = NonClosingBytesIO() | |
1302 | cctx = zstd.ZstdCompressor() |
|
1313 | cctx = zstd.ZstdCompressor() | |
1303 | with cctx.stream_writer(dest) as compressor: |
|
1314 | with cctx.stream_writer(dest) as compressor: | |
1304 |
with tarfile.open( |
|
1315 | with tarfile.open("tf", mode="w|", fileobj=compressor) as tf: | |
1305 |
tf.add(__file__, |
|
1316 | tf.add(__file__, "test_compressor.py") | |
1306 |
|
1317 | |||
1307 | dest = io.BytesIO(dest.getvalue()) |
|
1318 | dest = io.BytesIO(dest.getvalue()) | |
1308 |
|
1319 | |||
1309 | dctx = zstd.ZstdDecompressor() |
|
1320 | dctx = zstd.ZstdDecompressor() | |
1310 | with dctx.stream_reader(dest) as reader: |
|
1321 | with dctx.stream_reader(dest) as reader: | |
1311 |
with tarfile.open(mode= |
|
1322 | with tarfile.open(mode="r|", fileobj=reader) as tf: | |
1312 | for member in tf: |
|
1323 | for member in tf: | |
1313 |
self.assertEqual(member.name, |
|
1324 | self.assertEqual(member.name, "test_compressor.py") | |
1314 |
|
1325 | |||
1315 |
|
1326 | |||
1316 | @make_cffi |
|
1327 | @make_cffi | |
1317 |
class TestCompressor_read_to_iter( |
|
1328 | class TestCompressor_read_to_iter(TestCase): | |
1318 | def test_type_validation(self): |
|
1329 | def test_type_validation(self): | |
1319 | cctx = zstd.ZstdCompressor() |
|
1330 | cctx = zstd.ZstdCompressor() | |
1320 |
|
1331 | |||
1321 | # Object with read() works. |
|
1332 | # Object with read() works. | |
1322 | for chunk in cctx.read_to_iter(io.BytesIO()): |
|
1333 | for chunk in cctx.read_to_iter(io.BytesIO()): | |
1323 | pass |
|
1334 | pass | |
1324 |
|
1335 | |||
1325 | # Buffer protocol works. |
|
1336 | # Buffer protocol works. | |
1326 |
for chunk in cctx.read_to_iter(b |
|
1337 | for chunk in cctx.read_to_iter(b"foobar"): | |
1327 | pass |
|
1338 | pass | |
1328 |
|
1339 | |||
1329 |
with self.assertRaisesRegex |
|
1340 | with self.assertRaisesRegex(ValueError, "must pass an object with a read"): | |
1330 | for chunk in cctx.read_to_iter(True): |
|
1341 | for chunk in cctx.read_to_iter(True): | |
1331 | pass |
|
1342 | pass | |
1332 |
|
1343 | |||
1333 | def test_read_empty(self): |
|
1344 | def test_read_empty(self): | |
1334 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
1345 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |
1335 |
|
1346 | |||
1336 | source = io.BytesIO() |
|
1347 | source = io.BytesIO() | |
1337 | it = cctx.read_to_iter(source) |
|
1348 | it = cctx.read_to_iter(source) | |
1338 | chunks = list(it) |
|
1349 | chunks = list(it) | |
1339 | self.assertEqual(len(chunks), 1) |
|
1350 | self.assertEqual(len(chunks), 1) | |
1340 |
compressed = b |
|
1351 | compressed = b"".join(chunks) | |
1341 |
self.assertEqual(compressed, b |
|
1352 | self.assertEqual(compressed, b"\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00") | |
1342 |
|
1353 | |||
1343 | # And again with the buffer protocol. |
|
1354 | # And again with the buffer protocol. | |
1344 |
it = cctx.read_to_iter(b |
|
1355 | it = cctx.read_to_iter(b"") | |
1345 | chunks = list(it) |
|
1356 | chunks = list(it) | |
1346 | self.assertEqual(len(chunks), 1) |
|
1357 | self.assertEqual(len(chunks), 1) | |
1347 |
compressed2 = b |
|
1358 | compressed2 = b"".join(chunks) | |
1348 | self.assertEqual(compressed2, compressed) |
|
1359 | self.assertEqual(compressed2, compressed) | |
1349 |
|
1360 | |||
1350 | def test_read_large(self): |
|
1361 | def test_read_large(self): | |
1351 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
1362 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |
1352 |
|
1363 | |||
1353 | source = io.BytesIO() |
|
1364 | source = io.BytesIO() | |
1354 |
source.write(b |
|
1365 | source.write(b"f" * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE) | |
1355 |
source.write(b |
|
1366 | source.write(b"o") | |
1356 | source.seek(0) |
|
1367 | source.seek(0) | |
1357 |
|
1368 | |||
1358 | # Creating an iterator should not perform any compression until |
|
1369 | # Creating an iterator should not perform any compression until | |
1359 | # first read. |
|
1370 | # first read. | |
1360 | it = cctx.read_to_iter(source, size=len(source.getvalue())) |
|
1371 | it = cctx.read_to_iter(source, size=len(source.getvalue())) | |
1361 | self.assertEqual(source.tell(), 0) |
|
1372 | self.assertEqual(source.tell(), 0) | |
1362 |
|
1373 | |||
1363 | # We should have exactly 2 output chunks. |
|
1374 | # We should have exactly 2 output chunks. | |
1364 | chunks = [] |
|
1375 | chunks = [] | |
1365 | chunk = next(it) |
|
1376 | chunk = next(it) | |
1366 | self.assertIsNotNone(chunk) |
|
1377 | self.assertIsNotNone(chunk) | |
1367 | self.assertEqual(source.tell(), zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE) |
|
1378 | self.assertEqual(source.tell(), zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE) | |
1368 | chunks.append(chunk) |
|
1379 | chunks.append(chunk) | |
1369 | chunk = next(it) |
|
1380 | chunk = next(it) | |
1370 | self.assertIsNotNone(chunk) |
|
1381 | self.assertIsNotNone(chunk) | |
1371 | chunks.append(chunk) |
|
1382 | chunks.append(chunk) | |
1372 |
|
1383 | |||
1373 | self.assertEqual(source.tell(), len(source.getvalue())) |
|
1384 | self.assertEqual(source.tell(), len(source.getvalue())) | |
1374 |
|
1385 | |||
1375 | with self.assertRaises(StopIteration): |
|
1386 | with self.assertRaises(StopIteration): | |
1376 | next(it) |
|
1387 | next(it) | |
1377 |
|
1388 | |||
1378 | # And again for good measure. |
|
1389 | # And again for good measure. | |
1379 | with self.assertRaises(StopIteration): |
|
1390 | with self.assertRaises(StopIteration): | |
1380 | next(it) |
|
1391 | next(it) | |
1381 |
|
1392 | |||
1382 | # We should get the same output as the one-shot compression mechanism. |
|
1393 | # We should get the same output as the one-shot compression mechanism. | |
1383 |
self.assertEqual(b |
|
1394 | self.assertEqual(b"".join(chunks), cctx.compress(source.getvalue())) | |
1384 |
|
1395 | |||
1385 |
params = zstd.get_frame_parameters(b |
|
1396 | params = zstd.get_frame_parameters(b"".join(chunks)) | |
1386 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
1397 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
1387 | self.assertEqual(params.window_size, 262144) |
|
1398 | self.assertEqual(params.window_size, 262144) | |
1388 | self.assertEqual(params.dict_id, 0) |
|
1399 | self.assertEqual(params.dict_id, 0) | |
1389 | self.assertFalse(params.has_checksum) |
|
1400 | self.assertFalse(params.has_checksum) | |
1390 |
|
1401 | |||
1391 | # Now check the buffer protocol. |
|
1402 | # Now check the buffer protocol. | |
1392 | it = cctx.read_to_iter(source.getvalue()) |
|
1403 | it = cctx.read_to_iter(source.getvalue()) | |
1393 | chunks = list(it) |
|
1404 | chunks = list(it) | |
1394 | self.assertEqual(len(chunks), 2) |
|
1405 | self.assertEqual(len(chunks), 2) | |
1395 |
|
1406 | |||
1396 |
params = zstd.get_frame_parameters(b |
|
1407 | params = zstd.get_frame_parameters(b"".join(chunks)) | |
1397 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
1408 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
1398 | #self.assertEqual(params.window_size, 262144) |
|
1409 | # self.assertEqual(params.window_size, 262144) | |
1399 | self.assertEqual(params.dict_id, 0) |
|
1410 | self.assertEqual(params.dict_id, 0) | |
1400 | self.assertFalse(params.has_checksum) |
|
1411 | self.assertFalse(params.has_checksum) | |
1401 |
|
1412 | |||
1402 |
self.assertEqual(b |
|
1413 | self.assertEqual(b"".join(chunks), cctx.compress(source.getvalue())) | |
1403 |
|
1414 | |||
1404 | def test_read_write_size(self): |
|
1415 | def test_read_write_size(self): | |
1405 |
source = OpCountingBytesIO(b |
|
1416 | source = OpCountingBytesIO(b"foobarfoobar") | |
1406 | cctx = zstd.ZstdCompressor(level=3) |
|
1417 | cctx = zstd.ZstdCompressor(level=3) | |
1407 | for chunk in cctx.read_to_iter(source, read_size=1, write_size=1): |
|
1418 | for chunk in cctx.read_to_iter(source, read_size=1, write_size=1): | |
1408 | self.assertEqual(len(chunk), 1) |
|
1419 | self.assertEqual(len(chunk), 1) | |
1409 |
|
1420 | |||
1410 | self.assertEqual(source._read_count, len(source.getvalue()) + 1) |
|
1421 | self.assertEqual(source._read_count, len(source.getvalue()) + 1) | |
1411 |
|
1422 | |||
1412 | def test_multithreaded(self): |
|
1423 | def test_multithreaded(self): | |
1413 | source = io.BytesIO() |
|
1424 | source = io.BytesIO() | |
1414 |
source.write(b |
|
1425 | source.write(b"a" * 1048576) | |
1415 |
source.write(b |
|
1426 | source.write(b"b" * 1048576) | |
1416 |
source.write(b |
|
1427 | source.write(b"c" * 1048576) | |
1417 | source.seek(0) |
|
1428 | source.seek(0) | |
1418 |
|
1429 | |||
1419 | cctx = zstd.ZstdCompressor(threads=2) |
|
1430 | cctx = zstd.ZstdCompressor(threads=2) | |
1420 |
|
1431 | |||
1421 |
compressed = b |
|
1432 | compressed = b"".join(cctx.read_to_iter(source)) | |
1422 |
self.assertEqual(len(compressed), |
|
1433 | self.assertEqual(len(compressed), 111) | |
1423 |
|
1434 | |||
1424 | def test_bad_size(self): |
|
1435 | def test_bad_size(self): | |
1425 | cctx = zstd.ZstdCompressor() |
|
1436 | cctx = zstd.ZstdCompressor() | |
1426 |
|
1437 | |||
1427 |
source = io.BytesIO(b |
|
1438 | source = io.BytesIO(b"a" * 42) | |
1428 |
|
1439 | |||
1429 |
with self.assertRaisesRegex |
|
1440 | with self.assertRaisesRegex(zstd.ZstdError, "Src size is incorrect"): | |
1430 |
b |
|
1441 | b"".join(cctx.read_to_iter(source, size=2)) | |
1431 |
|
1442 | |||
1432 | # Test another operation on errored compressor. |
|
1443 | # Test another operation on errored compressor. | |
1433 |
b |
|
1444 | b"".join(cctx.read_to_iter(source)) | |
1434 |
|
1445 | |||
1435 |
|
1446 | |||
1436 | @make_cffi |
|
1447 | @make_cffi | |
1437 |
class TestCompressor_chunker( |
|
1448 | class TestCompressor_chunker(TestCase): | |
1438 | def test_empty(self): |
|
1449 | def test_empty(self): | |
1439 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
1450 | cctx = zstd.ZstdCompressor(write_content_size=False) | |
1440 | chunker = cctx.chunker() |
|
1451 | chunker = cctx.chunker() | |
1441 |
|
1452 | |||
1442 |
it = chunker.compress(b |
|
1453 | it = chunker.compress(b"") | |
1443 |
|
1454 | |||
1444 | with self.assertRaises(StopIteration): |
|
1455 | with self.assertRaises(StopIteration): | |
1445 | next(it) |
|
1456 | next(it) | |
1446 |
|
1457 | |||
1447 | it = chunker.finish() |
|
1458 | it = chunker.finish() | |
1448 |
|
1459 | |||
1449 |
self.assertEqual(next(it), b |
|
1460 | self.assertEqual(next(it), b"\x28\xb5\x2f\xfd\x00\x58\x01\x00\x00") | |
1450 |
|
1461 | |||
1451 | with self.assertRaises(StopIteration): |
|
1462 | with self.assertRaises(StopIteration): | |
1452 | next(it) |
|
1463 | next(it) | |
1453 |
|
1464 | |||
1454 | def test_simple_input(self): |
|
1465 | def test_simple_input(self): | |
1455 | cctx = zstd.ZstdCompressor() |
|
1466 | cctx = zstd.ZstdCompressor() | |
1456 | chunker = cctx.chunker() |
|
1467 | chunker = cctx.chunker() | |
1457 |
|
1468 | |||
1458 |
it = chunker.compress(b |
|
1469 | it = chunker.compress(b"foobar") | |
1459 |
|
1470 | |||
1460 | with self.assertRaises(StopIteration): |
|
1471 | with self.assertRaises(StopIteration): | |
1461 | next(it) |
|
1472 | next(it) | |
1462 |
|
1473 | |||
1463 |
it = chunker.compress(b |
|
1474 | it = chunker.compress(b"baz" * 30) | |
1464 |
|
1475 | |||
1465 | with self.assertRaises(StopIteration): |
|
1476 | with self.assertRaises(StopIteration): | |
1466 | next(it) |
|
1477 | next(it) | |
1467 |
|
1478 | |||
1468 | it = chunker.finish() |
|
1479 | it = chunker.finish() | |
1469 |
|
1480 | |||
1470 |
self.assertEqual( |
|
1481 | self.assertEqual( | |
1471 | b'\x28\xb5\x2f\xfd\x00\x58\x7d\x00\x00\x48\x66\x6f' |
|
1482 | next(it), | |
1472 | b'\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e') |
|
1483 | b"\x28\xb5\x2f\xfd\x00\x58\x7d\x00\x00\x48\x66\x6f" | |
|
1484 | b"\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e", | |||
|
1485 | ) | |||
1473 |
|
1486 | |||
1474 | with self.assertRaises(StopIteration): |
|
1487 | with self.assertRaises(StopIteration): | |
1475 | next(it) |
|
1488 | next(it) | |
1476 |
|
1489 | |||
1477 | def test_input_size(self): |
|
1490 | def test_input_size(self): | |
1478 | cctx = zstd.ZstdCompressor() |
|
1491 | cctx = zstd.ZstdCompressor() | |
1479 | chunker = cctx.chunker(size=1024) |
|
1492 | chunker = cctx.chunker(size=1024) | |
1480 |
|
1493 | |||
1481 |
it = chunker.compress(b |
|
1494 | it = chunker.compress(b"x" * 1000) | |
1482 |
|
1495 | |||
1483 | with self.assertRaises(StopIteration): |
|
1496 | with self.assertRaises(StopIteration): | |
1484 | next(it) |
|
1497 | next(it) | |
1485 |
|
1498 | |||
1486 |
it = chunker.compress(b |
|
1499 | it = chunker.compress(b"y" * 24) | |
1487 |
|
1500 | |||
1488 | with self.assertRaises(StopIteration): |
|
1501 | with self.assertRaises(StopIteration): | |
1489 | next(it) |
|
1502 | next(it) | |
1490 |
|
1503 | |||
1491 | chunks = list(chunker.finish()) |
|
1504 | chunks = list(chunker.finish()) | |
1492 |
|
1505 | |||
1493 |
self.assertEqual( |
|
1506 | self.assertEqual( | |
1494 | b'\x28\xb5\x2f\xfd\x60\x00\x03\x65\x00\x00\x18\x78\x78\x79\x02\x00' |
|
1507 | chunks, | |
1495 | b'\xa0\x16\xe3\x2b\x80\x05' |
|
1508 | [ | |
1496 | ]) |
|
1509 | b"\x28\xb5\x2f\xfd\x60\x00\x03\x65\x00\x00\x18\x78\x78\x79\x02\x00" | |
|
1510 | b"\xa0\x16\xe3\x2b\x80\x05" | |||
|
1511 | ], | |||
|
1512 | ) | |||
1497 |
|
1513 | |||
1498 | dctx = zstd.ZstdDecompressor() |
|
1514 | dctx = zstd.ZstdDecompressor() | |
1499 |
|
1515 | |||
1500 |
self.assertEqual(dctx.decompress(b |
|
1516 | self.assertEqual(dctx.decompress(b"".join(chunks)), (b"x" * 1000) + (b"y" * 24)) | |
1501 | (b'x' * 1000) + (b'y' * 24)) |
|
|||
1502 |
|
1517 | |||
1503 | def test_small_chunk_size(self): |
|
1518 | def test_small_chunk_size(self): | |
1504 | cctx = zstd.ZstdCompressor() |
|
1519 | cctx = zstd.ZstdCompressor() | |
1505 | chunker = cctx.chunker(chunk_size=1) |
|
1520 | chunker = cctx.chunker(chunk_size=1) | |
1506 |
|
1521 | |||
1507 |
chunks = list(chunker.compress(b |
|
1522 | chunks = list(chunker.compress(b"foo" * 1024)) | |
1508 | self.assertEqual(chunks, []) |
|
1523 | self.assertEqual(chunks, []) | |
1509 |
|
1524 | |||
1510 | chunks = list(chunker.finish()) |
|
1525 | chunks = list(chunker.finish()) | |
1511 | self.assertTrue(all(len(chunk) == 1 for chunk in chunks)) |
|
1526 | self.assertTrue(all(len(chunk) == 1 for chunk in chunks)) | |
1512 |
|
1527 | |||
1513 | self.assertEqual( |
|
1528 | self.assertEqual( | |
1514 |
b |
|
1529 | b"".join(chunks), | |
1515 |
b |
|
1530 | b"\x28\xb5\x2f\xfd\x00\x58\x55\x00\x00\x18\x66\x6f\x6f\x01\x00" | |
1516 |
b |
|
1531 | b"\xfa\xd3\x77\x43", | |
|
1532 | ) | |||
1517 |
|
1533 | |||
1518 | dctx = zstd.ZstdDecompressor() |
|
1534 | dctx = zstd.ZstdDecompressor() | |
1519 |
self.assertEqual( |
|
1535 | self.assertEqual( | |
1520 | max_output_size=10000), |
|
1536 | dctx.decompress(b"".join(chunks), max_output_size=10000), b"foo" * 1024 | |
1521 | b'foo' * 1024) |
|
1537 | ) | |
1522 |
|
1538 | |||
1523 | def test_input_types(self): |
|
1539 | def test_input_types(self): | |
1524 | cctx = zstd.ZstdCompressor() |
|
1540 | cctx = zstd.ZstdCompressor() | |
1525 |
|
1541 | |||
1526 | mutable_array = bytearray(3) |
|
1542 | mutable_array = bytearray(3) | |
1527 |
mutable_array[:] = b |
|
1543 | mutable_array[:] = b"foo" | |
1528 |
|
1544 | |||
1529 | sources = [ |
|
1545 | sources = [ | |
1530 |
memoryview(b |
|
1546 | memoryview(b"foo"), | |
1531 |
bytearray(b |
|
1547 | bytearray(b"foo"), | |
1532 | mutable_array, |
|
1548 | mutable_array, | |
1533 | ] |
|
1549 | ] | |
1534 |
|
1550 | |||
1535 | for source in sources: |
|
1551 | for source in sources: | |
1536 | chunker = cctx.chunker() |
|
1552 | chunker = cctx.chunker() | |
1537 |
|
1553 | |||
1538 | self.assertEqual(list(chunker.compress(source)), []) |
|
1554 | self.assertEqual(list(chunker.compress(source)), []) | |
1539 |
self.assertEqual( |
|
1555 | self.assertEqual( | |
1540 | b'\x28\xb5\x2f\xfd\x00\x58\x19\x00\x00\x66\x6f\x6f' |
|
1556 | list(chunker.finish()), | |
1541 | ]) |
|
1557 | [b"\x28\xb5\x2f\xfd\x00\x58\x19\x00\x00\x66\x6f\x6f"], | |
|
1558 | ) | |||
1542 |
|
1559 | |||
1543 | def test_flush(self): |
|
1560 | def test_flush(self): | |
1544 | cctx = zstd.ZstdCompressor() |
|
1561 | cctx = zstd.ZstdCompressor() | |
1545 | chunker = cctx.chunker() |
|
1562 | chunker = cctx.chunker() | |
1546 |
|
1563 | |||
1547 |
self.assertEqual(list(chunker.compress(b |
|
1564 | self.assertEqual(list(chunker.compress(b"foo" * 1024)), []) | |
1548 |
self.assertEqual(list(chunker.compress(b |
|
1565 | self.assertEqual(list(chunker.compress(b"bar" * 1024)), []) | |
1549 |
|
1566 | |||
1550 | chunks1 = list(chunker.flush()) |
|
1567 | chunks1 = list(chunker.flush()) | |
1551 |
|
1568 | |||
1552 |
self.assertEqual( |
|
1569 | self.assertEqual( | |
1553 | b'\x28\xb5\x2f\xfd\x00\x58\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72' |
|
1570 | chunks1, | |
1554 | b'\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02' |
|
1571 | [ | |
1555 | ]) |
|
1572 | b"\x28\xb5\x2f\xfd\x00\x58\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72" | |
|
1573 | b"\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02" | |||
|
1574 | ], | |||
|
1575 | ) | |||
1556 |
|
1576 | |||
1557 | self.assertEqual(list(chunker.flush()), []) |
|
1577 | self.assertEqual(list(chunker.flush()), []) | |
1558 | self.assertEqual(list(chunker.flush()), []) |
|
1578 | self.assertEqual(list(chunker.flush()), []) | |
1559 |
|
1579 | |||
1560 |
self.assertEqual(list(chunker.compress(b |
|
1580 | self.assertEqual(list(chunker.compress(b"baz" * 1024)), []) | |
1561 |
|
1581 | |||
1562 | chunks2 = list(chunker.flush()) |
|
1582 | chunks2 = list(chunker.flush()) | |
1563 | self.assertEqual(len(chunks2), 1) |
|
1583 | self.assertEqual(len(chunks2), 1) | |
1564 |
|
1584 | |||
1565 | chunks3 = list(chunker.finish()) |
|
1585 | chunks3 = list(chunker.finish()) | |
1566 | self.assertEqual(len(chunks2), 1) |
|
1586 | self.assertEqual(len(chunks2), 1) | |
1567 |
|
1587 | |||
1568 | dctx = zstd.ZstdDecompressor() |
|
1588 | dctx = zstd.ZstdDecompressor() | |
1569 |
|
1589 | |||
1570 | self.assertEqual(dctx.decompress(b''.join(chunks1 + chunks2 + chunks3), |
|
1590 | self.assertEqual( | |
1571 | max_output_size=10000), |
|
1591 | dctx.decompress( | |
1572 | (b'foo' * 1024) + (b'bar' * 1024) + (b'baz' * 1024)) |
|
1592 | b"".join(chunks1 + chunks2 + chunks3), max_output_size=10000 | |
|
1593 | ), | |||
|
1594 | (b"foo" * 1024) + (b"bar" * 1024) + (b"baz" * 1024), | |||
|
1595 | ) | |||
1573 |
|
1596 | |||
1574 | def test_compress_after_finish(self): |
|
1597 | def test_compress_after_finish(self): | |
1575 | cctx = zstd.ZstdCompressor() |
|
1598 | cctx = zstd.ZstdCompressor() | |
1576 | chunker = cctx.chunker() |
|
1599 | chunker = cctx.chunker() | |
1577 |
|
1600 | |||
1578 |
list(chunker.compress(b |
|
1601 | list(chunker.compress(b"foo")) | |
1579 | list(chunker.finish()) |
|
1602 | list(chunker.finish()) | |
1580 |
|
1603 | |||
1581 |
with self.assertRaisesRegex |
|
1604 | with self.assertRaisesRegex( | |
1582 | zstd.ZstdError, |
|
1605 | zstd.ZstdError, r"cannot call compress\(\) after compression finished" | |
1583 | r'cannot call compress\(\) after compression finished'): |
|
1606 | ): | |
1584 |
list(chunker.compress(b |
|
1607 | list(chunker.compress(b"foo")) | |
1585 |
|
1608 | |||
1586 | def test_flush_after_finish(self): |
|
1609 | def test_flush_after_finish(self): | |
1587 | cctx = zstd.ZstdCompressor() |
|
1610 | cctx = zstd.ZstdCompressor() | |
1588 | chunker = cctx.chunker() |
|
1611 | chunker = cctx.chunker() | |
1589 |
|
1612 | |||
1590 |
list(chunker.compress(b |
|
1613 | list(chunker.compress(b"foo")) | |
1591 | list(chunker.finish()) |
|
1614 | list(chunker.finish()) | |
1592 |
|
1615 | |||
1593 |
with self.assertRaisesRegex |
|
1616 | with self.assertRaisesRegex( | |
1594 | zstd.ZstdError, |
|
1617 | zstd.ZstdError, r"cannot call flush\(\) after compression finished" | |
1595 | r'cannot call flush\(\) after compression finished'): |
|
1618 | ): | |
1596 | list(chunker.flush()) |
|
1619 | list(chunker.flush()) | |
1597 |
|
1620 | |||
1598 | def test_finish_after_finish(self): |
|
1621 | def test_finish_after_finish(self): | |
1599 | cctx = zstd.ZstdCompressor() |
|
1622 | cctx = zstd.ZstdCompressor() | |
1600 | chunker = cctx.chunker() |
|
1623 | chunker = cctx.chunker() | |
1601 |
|
1624 | |||
1602 |
list(chunker.compress(b |
|
1625 | list(chunker.compress(b"foo")) | |
1603 | list(chunker.finish()) |
|
1626 | list(chunker.finish()) | |
1604 |
|
1627 | |||
1605 |
with self.assertRaisesRegex |
|
1628 | with self.assertRaisesRegex( | |
1606 | zstd.ZstdError, |
|
1629 | zstd.ZstdError, r"cannot call finish\(\) after compression finished" | |
1607 | r'cannot call finish\(\) after compression finished'): |
|
1630 | ): | |
1608 | list(chunker.finish()) |
|
1631 | list(chunker.finish()) | |
1609 |
|
1632 | |||
1610 |
|
1633 | |||
1611 |
class TestCompressor_multi_compress_to_buffer( |
|
1634 | class TestCompressor_multi_compress_to_buffer(TestCase): | |
1612 | def test_invalid_inputs(self): |
|
1635 | def test_invalid_inputs(self): | |
1613 | cctx = zstd.ZstdCompressor() |
|
1636 | cctx = zstd.ZstdCompressor() | |
1614 |
|
1637 | |||
1615 |
if not hasattr(cctx, |
|
1638 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
1616 |
self.skipTest( |
|
1639 | self.skipTest("multi_compress_to_buffer not available") | |
1617 |
|
1640 | |||
1618 | with self.assertRaises(TypeError): |
|
1641 | with self.assertRaises(TypeError): | |
1619 | cctx.multi_compress_to_buffer(True) |
|
1642 | cctx.multi_compress_to_buffer(True) | |
1620 |
|
1643 | |||
1621 | with self.assertRaises(TypeError): |
|
1644 | with self.assertRaises(TypeError): | |
1622 | cctx.multi_compress_to_buffer((1, 2)) |
|
1645 | cctx.multi_compress_to_buffer((1, 2)) | |
1623 |
|
1646 | |||
1624 |
with self.assertRaisesRegex |
|
1647 | with self.assertRaisesRegex(TypeError, "item 0 not a bytes like object"): | |
1625 |
cctx.multi_compress_to_buffer([u |
|
1648 | cctx.multi_compress_to_buffer([u"foo"]) | |
1626 |
|
1649 | |||
1627 | def test_empty_input(self): |
|
1650 | def test_empty_input(self): | |
1628 | cctx = zstd.ZstdCompressor() |
|
1651 | cctx = zstd.ZstdCompressor() | |
1629 |
|
1652 | |||
1630 |
if not hasattr(cctx, |
|
1653 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
1631 |
self.skipTest( |
|
1654 | self.skipTest("multi_compress_to_buffer not available") | |
1632 |
|
1655 | |||
1633 |
with self.assertRaisesRegex |
|
1656 | with self.assertRaisesRegex(ValueError, "no source elements found"): | |
1634 | cctx.multi_compress_to_buffer([]) |
|
1657 | cctx.multi_compress_to_buffer([]) | |
1635 |
|
1658 | |||
1636 |
with self.assertRaisesRegex |
|
1659 | with self.assertRaisesRegex(ValueError, "source elements are empty"): | |
1637 |
cctx.multi_compress_to_buffer([b |
|
1660 | cctx.multi_compress_to_buffer([b"", b"", b""]) | |
1638 |
|
1661 | |||
1639 | def test_list_input(self): |
|
1662 | def test_list_input(self): | |
1640 | cctx = zstd.ZstdCompressor(write_checksum=True) |
|
1663 | cctx = zstd.ZstdCompressor(write_checksum=True) | |
1641 |
|
1664 | |||
1642 |
if not hasattr(cctx, |
|
1665 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
1643 |
self.skipTest( |
|
1666 | self.skipTest("multi_compress_to_buffer not available") | |
1644 |
|
1667 | |||
1645 |
original = [b |
|
1668 | original = [b"foo" * 12, b"bar" * 6] | |
1646 | frames = [cctx.compress(c) for c in original] |
|
1669 | frames = [cctx.compress(c) for c in original] | |
1647 | b = cctx.multi_compress_to_buffer(original) |
|
1670 | b = cctx.multi_compress_to_buffer(original) | |
1648 |
|
1671 | |||
1649 | self.assertIsInstance(b, zstd.BufferWithSegmentsCollection) |
|
1672 | self.assertIsInstance(b, zstd.BufferWithSegmentsCollection) | |
1650 |
|
1673 | |||
1651 | self.assertEqual(len(b), 2) |
|
1674 | self.assertEqual(len(b), 2) | |
1652 | self.assertEqual(b.size(), 44) |
|
1675 | self.assertEqual(b.size(), 44) | |
1653 |
|
1676 | |||
1654 | self.assertEqual(b[0].tobytes(), frames[0]) |
|
1677 | self.assertEqual(b[0].tobytes(), frames[0]) | |
1655 | self.assertEqual(b[1].tobytes(), frames[1]) |
|
1678 | self.assertEqual(b[1].tobytes(), frames[1]) | |
1656 |
|
1679 | |||
1657 | def test_buffer_with_segments_input(self): |
|
1680 | def test_buffer_with_segments_input(self): | |
1658 | cctx = zstd.ZstdCompressor(write_checksum=True) |
|
1681 | cctx = zstd.ZstdCompressor(write_checksum=True) | |
1659 |
|
1682 | |||
1660 |
if not hasattr(cctx, |
|
1683 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
1661 |
self.skipTest( |
|
1684 | self.skipTest("multi_compress_to_buffer not available") | |
1662 |
|
1685 | |||
1663 |
original = [b |
|
1686 | original = [b"foo" * 4, b"bar" * 6] | |
1664 | frames = [cctx.compress(c) for c in original] |
|
1687 | frames = [cctx.compress(c) for c in original] | |
1665 |
|
1688 | |||
1666 |
offsets = struct.pack( |
|
1689 | offsets = struct.pack( | |
1667 |
|
|
1690 | "=QQQQ", 0, len(original[0]), len(original[0]), len(original[1]) | |
1668 | segments = zstd.BufferWithSegments(b''.join(original), offsets) |
|
1691 | ) | |
|
1692 | segments = zstd.BufferWithSegments(b"".join(original), offsets) | |||
1669 |
|
1693 | |||
1670 | result = cctx.multi_compress_to_buffer(segments) |
|
1694 | result = cctx.multi_compress_to_buffer(segments) | |
1671 |
|
1695 | |||
1672 | self.assertEqual(len(result), 2) |
|
1696 | self.assertEqual(len(result), 2) | |
1673 | self.assertEqual(result.size(), 47) |
|
1697 | self.assertEqual(result.size(), 47) | |
1674 |
|
1698 | |||
1675 | self.assertEqual(result[0].tobytes(), frames[0]) |
|
1699 | self.assertEqual(result[0].tobytes(), frames[0]) | |
1676 | self.assertEqual(result[1].tobytes(), frames[1]) |
|
1700 | self.assertEqual(result[1].tobytes(), frames[1]) | |
1677 |
|
1701 | |||
1678 | def test_buffer_with_segments_collection_input(self): |
|
1702 | def test_buffer_with_segments_collection_input(self): | |
1679 | cctx = zstd.ZstdCompressor(write_checksum=True) |
|
1703 | cctx = zstd.ZstdCompressor(write_checksum=True) | |
1680 |
|
1704 | |||
1681 |
if not hasattr(cctx, |
|
1705 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
1682 |
self.skipTest( |
|
1706 | self.skipTest("multi_compress_to_buffer not available") | |
1683 |
|
1707 | |||
1684 | original = [ |
|
1708 | original = [ | |
1685 |
b |
|
1709 | b"foo1", | |
1686 |
b |
|
1710 | b"foo2" * 2, | |
1687 |
b |
|
1711 | b"foo3" * 3, | |
1688 |
b |
|
1712 | b"foo4" * 4, | |
1689 |
b |
|
1713 | b"foo5" * 5, | |
1690 | ] |
|
1714 | ] | |
1691 |
|
1715 | |||
1692 | frames = [cctx.compress(c) for c in original] |
|
1716 | frames = [cctx.compress(c) for c in original] | |
1693 |
|
1717 | |||
1694 |
b = b |
|
1718 | b = b"".join([original[0], original[1]]) | |
1695 |
b1 = zstd.BufferWithSegments( |
|
1719 | b1 = zstd.BufferWithSegments( | |
1696 | 0, len(original[0]), |
|
1720 | b, | |
1697 | len(original[0]), len(original[1]))) |
|
1721 | struct.pack( | |
1698 |
|
|
1722 | "=QQQQ", 0, len(original[0]), len(original[0]), len(original[1]) | |
1699 | b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ', |
|
1723 | ), | |
1700 | 0, len(original[2]), |
|
1724 | ) | |
1701 | len(original[2]), len(original[3]), |
|
1725 | b = b"".join([original[2], original[3], original[4]]) | |
1702 | len(original[2]) + len(original[3]), len(original[4]))) |
|
1726 | b2 = zstd.BufferWithSegments( | |
|
1727 | b, | |||
|
1728 | struct.pack( | |||
|
1729 | "=QQQQQQ", | |||
|
1730 | 0, | |||
|
1731 | len(original[2]), | |||
|
1732 | len(original[2]), | |||
|
1733 | len(original[3]), | |||
|
1734 | len(original[2]) + len(original[3]), | |||
|
1735 | len(original[4]), | |||
|
1736 | ), | |||
|
1737 | ) | |||
1703 |
|
1738 | |||
1704 | c = zstd.BufferWithSegmentsCollection(b1, b2) |
|
1739 | c = zstd.BufferWithSegmentsCollection(b1, b2) | |
1705 |
|
1740 | |||
1706 | result = cctx.multi_compress_to_buffer(c) |
|
1741 | result = cctx.multi_compress_to_buffer(c) | |
1707 |
|
1742 | |||
1708 | self.assertEqual(len(result), len(frames)) |
|
1743 | self.assertEqual(len(result), len(frames)) | |
1709 |
|
1744 | |||
1710 | for i, frame in enumerate(frames): |
|
1745 | for i, frame in enumerate(frames): | |
1711 | self.assertEqual(result[i].tobytes(), frame) |
|
1746 | self.assertEqual(result[i].tobytes(), frame) | |
1712 |
|
1747 | |||
1713 | def test_multiple_threads(self): |
|
1748 | def test_multiple_threads(self): | |
1714 | # threads argument will cause multi-threaded ZSTD APIs to be used, which will |
|
1749 | # threads argument will cause multi-threaded ZSTD APIs to be used, which will | |
1715 | # make output different. |
|
1750 | # make output different. | |
1716 | refcctx = zstd.ZstdCompressor(write_checksum=True) |
|
1751 | refcctx = zstd.ZstdCompressor(write_checksum=True) | |
1717 |
reference = [refcctx.compress(b |
|
1752 | reference = [refcctx.compress(b"x" * 64), refcctx.compress(b"y" * 64)] | |
1718 |
|
1753 | |||
1719 | cctx = zstd.ZstdCompressor(write_checksum=True) |
|
1754 | cctx = zstd.ZstdCompressor(write_checksum=True) | |
1720 |
|
1755 | |||
1721 |
if not hasattr(cctx, |
|
1756 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
1722 |
self.skipTest( |
|
1757 | self.skipTest("multi_compress_to_buffer not available") | |
1723 |
|
1758 | |||
1724 | frames = [] |
|
1759 | frames = [] | |
1725 |
frames.extend(b |
|
1760 | frames.extend(b"x" * 64 for i in range(256)) | |
1726 |
frames.extend(b |
|
1761 | frames.extend(b"y" * 64 for i in range(256)) | |
1727 |
|
1762 | |||
1728 | result = cctx.multi_compress_to_buffer(frames, threads=-1) |
|
1763 | result = cctx.multi_compress_to_buffer(frames, threads=-1) | |
1729 |
|
1764 | |||
1730 | self.assertEqual(len(result), 512) |
|
1765 | self.assertEqual(len(result), 512) | |
1731 | for i in range(512): |
|
1766 | for i in range(512): | |
1732 | if i < 256: |
|
1767 | if i < 256: | |
1733 | self.assertEqual(result[i].tobytes(), reference[0]) |
|
1768 | self.assertEqual(result[i].tobytes(), reference[0]) | |
1734 | else: |
|
1769 | else: | |
1735 | self.assertEqual(result[i].tobytes(), reference[1]) |
|
1770 | self.assertEqual(result[i].tobytes(), reference[1]) |
This diff has been collapsed as it changes many lines, (631 lines changed) Show them Hide them | |||||
@@ -1,711 +1,836 b'' | |||||
1 | import io |
|
1 | import io | |
2 | import os |
|
2 | import os | |
3 | import unittest |
|
3 | import unittest | |
4 |
|
4 | |||
5 | try: |
|
5 | try: | |
6 | import hypothesis |
|
6 | import hypothesis | |
7 | import hypothesis.strategies as strategies |
|
7 | import hypothesis.strategies as strategies | |
8 | except ImportError: |
|
8 | except ImportError: | |
9 |
raise unittest.SkipTest( |
|
9 | raise unittest.SkipTest("hypothesis not available") | |
10 |
|
10 | |||
11 | import zstandard as zstd |
|
11 | import zstandard as zstd | |
12 |
|
12 | |||
13 |
from . |
|
13 | from .common import ( | |
14 | make_cffi, |
|
14 | make_cffi, | |
15 | NonClosingBytesIO, |
|
15 | NonClosingBytesIO, | |
16 | random_input_data, |
|
16 | random_input_data, | |
|
17 | TestCase, | |||
17 | ) |
|
18 | ) | |
18 |
|
19 | |||
19 |
|
20 | |||
20 |
@unittest.skipUnless( |
|
21 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
21 | @make_cffi |
|
22 | @make_cffi | |
22 |
class TestCompressor_stream_reader_fuzzing( |
|
23 | class TestCompressor_stream_reader_fuzzing(TestCase): | |
23 | @hypothesis.settings( |
|
24 | @hypothesis.settings( | |
24 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
25 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
25 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
26 | ) | |
26 | level=strategies.integers(min_value=1, max_value=5), |
|
27 | @hypothesis.given( | |
27 | source_read_size=strategies.integers(1, 16384), |
|
28 | original=strategies.sampled_from(random_input_data()), | |
28 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) |
|
29 | level=strategies.integers(min_value=1, max_value=5), | |
29 | def test_stream_source_read(self, original, level, source_read_size, |
|
30 | source_read_size=strategies.integers(1, 16384), | |
30 | read_size): |
|
31 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
32 | ) | |||
|
33 | def test_stream_source_read(self, original, level, source_read_size, read_size): | |||
31 | if read_size == 0: |
|
34 | if read_size == 0: | |
32 | read_size = -1 |
|
35 | read_size = -1 | |
33 |
|
36 | |||
34 | refctx = zstd.ZstdCompressor(level=level) |
|
37 | refctx = zstd.ZstdCompressor(level=level) | |
35 | ref_frame = refctx.compress(original) |
|
38 | ref_frame = refctx.compress(original) | |
36 |
|
39 | |||
37 | cctx = zstd.ZstdCompressor(level=level) |
|
40 | cctx = zstd.ZstdCompressor(level=level) | |
38 |
with cctx.stream_reader( |
|
41 | with cctx.stream_reader( | |
39 | read_size=source_read_size) as reader: |
|
42 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
43 | ) as reader: | |||
40 | chunks = [] |
|
44 | chunks = [] | |
41 | while True: |
|
45 | while True: | |
42 | chunk = reader.read(read_size) |
|
46 | chunk = reader.read(read_size) | |
43 | if not chunk: |
|
47 | if not chunk: | |
44 | break |
|
48 | break | |
45 |
|
49 | |||
46 | chunks.append(chunk) |
|
50 | chunks.append(chunk) | |
47 |
|
51 | |||
48 |
self.assertEqual(b |
|
52 | self.assertEqual(b"".join(chunks), ref_frame) | |
49 |
|
53 | |||
50 | @hypothesis.settings( |
|
54 | @hypothesis.settings( | |
51 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
55 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
52 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
56 | ) | |
53 | level=strategies.integers(min_value=1, max_value=5), |
|
57 | @hypothesis.given( | |
54 | source_read_size=strategies.integers(1, 16384), |
|
58 | original=strategies.sampled_from(random_input_data()), | |
55 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) |
|
59 | level=strategies.integers(min_value=1, max_value=5), | |
56 | def test_buffer_source_read(self, original, level, source_read_size, |
|
60 | source_read_size=strategies.integers(1, 16384), | |
57 | read_size): |
|
61 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
62 | ) | |||
|
63 | def test_buffer_source_read(self, original, level, source_read_size, read_size): | |||
58 | if read_size == 0: |
|
64 | if read_size == 0: | |
59 | read_size = -1 |
|
65 | read_size = -1 | |
60 |
|
66 | |||
61 | refctx = zstd.ZstdCompressor(level=level) |
|
67 | refctx = zstd.ZstdCompressor(level=level) | |
62 | ref_frame = refctx.compress(original) |
|
68 | ref_frame = refctx.compress(original) | |
63 |
|
69 | |||
64 | cctx = zstd.ZstdCompressor(level=level) |
|
70 | cctx = zstd.ZstdCompressor(level=level) | |
65 |
with cctx.stream_reader( |
|
71 | with cctx.stream_reader( | |
66 | read_size=source_read_size) as reader: |
|
72 | original, size=len(original), read_size=source_read_size | |
|
73 | ) as reader: | |||
67 | chunks = [] |
|
74 | chunks = [] | |
68 | while True: |
|
75 | while True: | |
69 | chunk = reader.read(read_size) |
|
76 | chunk = reader.read(read_size) | |
70 | if not chunk: |
|
77 | if not chunk: | |
71 | break |
|
78 | break | |
72 |
|
79 | |||
73 | chunks.append(chunk) |
|
80 | chunks.append(chunk) | |
74 |
|
81 | |||
75 |
self.assertEqual(b |
|
82 | self.assertEqual(b"".join(chunks), ref_frame) | |
76 |
|
83 | |||
77 | @hypothesis.settings( |
|
84 | @hypothesis.settings( | |
78 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) |
|
85 | suppress_health_check=[ | |
79 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
86 | hypothesis.HealthCheck.large_base_example, | |
80 | level=strategies.integers(min_value=1, max_value=5), |
|
87 | hypothesis.HealthCheck.too_slow, | |
81 | source_read_size=strategies.integers(1, 16384), |
|
88 | ] | |
82 | read_sizes=strategies.data()) |
|
89 | ) | |
83 | def test_stream_source_read_variance(self, original, level, source_read_size, |
|
90 | @hypothesis.given( | |
84 | read_sizes): |
|
91 | original=strategies.sampled_from(random_input_data()), | |
|
92 | level=strategies.integers(min_value=1, max_value=5), | |||
|
93 | source_read_size=strategies.integers(1, 16384), | |||
|
94 | read_sizes=strategies.data(), | |||
|
95 | ) | |||
|
96 | def test_stream_source_read_variance( | |||
|
97 | self, original, level, source_read_size, read_sizes | |||
|
98 | ): | |||
85 | refctx = zstd.ZstdCompressor(level=level) |
|
99 | refctx = zstd.ZstdCompressor(level=level) | |
86 | ref_frame = refctx.compress(original) |
|
100 | ref_frame = refctx.compress(original) | |
87 |
|
101 | |||
88 | cctx = zstd.ZstdCompressor(level=level) |
|
102 | cctx = zstd.ZstdCompressor(level=level) | |
89 |
with cctx.stream_reader( |
|
103 | with cctx.stream_reader( | |
90 | read_size=source_read_size) as reader: |
|
104 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
105 | ) as reader: | |||
91 | chunks = [] |
|
106 | chunks = [] | |
92 | while True: |
|
107 | while True: | |
93 | read_size = read_sizes.draw(strategies.integers(-1, 16384)) |
|
108 | read_size = read_sizes.draw(strategies.integers(-1, 16384)) | |
94 | chunk = reader.read(read_size) |
|
109 | chunk = reader.read(read_size) | |
95 | if not chunk and read_size: |
|
110 | if not chunk and read_size: | |
96 | break |
|
111 | break | |
97 |
|
112 | |||
98 | chunks.append(chunk) |
|
113 | chunks.append(chunk) | |
99 |
|
114 | |||
100 |
self.assertEqual(b |
|
115 | self.assertEqual(b"".join(chunks), ref_frame) | |
101 |
|
116 | |||
102 | @hypothesis.settings( |
|
117 | @hypothesis.settings( | |
103 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) |
|
118 | suppress_health_check=[ | |
104 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
119 | hypothesis.HealthCheck.large_base_example, | |
105 | level=strategies.integers(min_value=1, max_value=5), |
|
120 | hypothesis.HealthCheck.too_slow, | |
106 | source_read_size=strategies.integers(1, 16384), |
|
121 | ] | |
107 | read_sizes=strategies.data()) |
|
122 | ) | |
108 | def test_buffer_source_read_variance(self, original, level, source_read_size, |
|
123 | @hypothesis.given( | |
109 | read_sizes): |
|
124 | original=strategies.sampled_from(random_input_data()), | |
|
125 | level=strategies.integers(min_value=1, max_value=5), | |||
|
126 | source_read_size=strategies.integers(1, 16384), | |||
|
127 | read_sizes=strategies.data(), | |||
|
128 | ) | |||
|
129 | def test_buffer_source_read_variance( | |||
|
130 | self, original, level, source_read_size, read_sizes | |||
|
131 | ): | |||
110 |
|
132 | |||
111 | refctx = zstd.ZstdCompressor(level=level) |
|
133 | refctx = zstd.ZstdCompressor(level=level) | |
112 | ref_frame = refctx.compress(original) |
|
134 | ref_frame = refctx.compress(original) | |
113 |
|
135 | |||
114 | cctx = zstd.ZstdCompressor(level=level) |
|
136 | cctx = zstd.ZstdCompressor(level=level) | |
115 |
with cctx.stream_reader( |
|
137 | with cctx.stream_reader( | |
116 | read_size=source_read_size) as reader: |
|
138 | original, size=len(original), read_size=source_read_size | |
|
139 | ) as reader: | |||
117 | chunks = [] |
|
140 | chunks = [] | |
118 | while True: |
|
141 | while True: | |
119 | read_size = read_sizes.draw(strategies.integers(-1, 16384)) |
|
142 | read_size = read_sizes.draw(strategies.integers(-1, 16384)) | |
120 | chunk = reader.read(read_size) |
|
143 | chunk = reader.read(read_size) | |
121 | if not chunk and read_size: |
|
144 | if not chunk and read_size: | |
122 | break |
|
145 | break | |
123 |
|
146 | |||
124 | chunks.append(chunk) |
|
147 | chunks.append(chunk) | |
125 |
|
148 | |||
126 |
self.assertEqual(b |
|
149 | self.assertEqual(b"".join(chunks), ref_frame) | |
127 |
|
150 | |||
128 | @hypothesis.settings( |
|
151 | @hypothesis.settings( | |
129 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
152 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
130 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
153 | ) | |
131 | level=strategies.integers(min_value=1, max_value=5), |
|
154 | @hypothesis.given( | |
132 | source_read_size=strategies.integers(1, 16384), |
|
155 | original=strategies.sampled_from(random_input_data()), | |
133 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) |
|
156 | level=strategies.integers(min_value=1, max_value=5), | |
134 | def test_stream_source_readinto(self, original, level, |
|
157 | source_read_size=strategies.integers(1, 16384), | |
135 | source_read_size, read_size): |
|
158 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
159 | ) | |||
|
160 | def test_stream_source_readinto(self, original, level, source_read_size, read_size): | |||
136 | refctx = zstd.ZstdCompressor(level=level) |
|
161 | refctx = zstd.ZstdCompressor(level=level) | |
137 | ref_frame = refctx.compress(original) |
|
162 | ref_frame = refctx.compress(original) | |
138 |
|
163 | |||
139 | cctx = zstd.ZstdCompressor(level=level) |
|
164 | cctx = zstd.ZstdCompressor(level=level) | |
140 |
with cctx.stream_reader( |
|
165 | with cctx.stream_reader( | |
141 | read_size=source_read_size) as reader: |
|
166 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
167 | ) as reader: | |||
142 | chunks = [] |
|
168 | chunks = [] | |
143 | while True: |
|
169 | while True: | |
144 | b = bytearray(read_size) |
|
170 | b = bytearray(read_size) | |
145 | count = reader.readinto(b) |
|
171 | count = reader.readinto(b) | |
146 |
|
172 | |||
147 | if not count: |
|
173 | if not count: | |
148 | break |
|
174 | break | |
149 |
|
175 | |||
150 | chunks.append(bytes(b[0:count])) |
|
176 | chunks.append(bytes(b[0:count])) | |
151 |
|
177 | |||
152 |
self.assertEqual(b |
|
178 | self.assertEqual(b"".join(chunks), ref_frame) | |
153 |
|
179 | |||
154 | @hypothesis.settings( |
|
180 | @hypothesis.settings( | |
155 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
181 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
156 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
182 | ) | |
157 | level=strategies.integers(min_value=1, max_value=5), |
|
183 | @hypothesis.given( | |
158 | source_read_size=strategies.integers(1, 16384), |
|
184 | original=strategies.sampled_from(random_input_data()), | |
159 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) |
|
185 | level=strategies.integers(min_value=1, max_value=5), | |
160 | def test_buffer_source_readinto(self, original, level, |
|
186 | source_read_size=strategies.integers(1, 16384), | |
161 | source_read_size, read_size): |
|
187 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
188 | ) | |||
|
189 | def test_buffer_source_readinto(self, original, level, source_read_size, read_size): | |||
162 |
|
190 | |||
163 | refctx = zstd.ZstdCompressor(level=level) |
|
191 | refctx = zstd.ZstdCompressor(level=level) | |
164 | ref_frame = refctx.compress(original) |
|
192 | ref_frame = refctx.compress(original) | |
165 |
|
193 | |||
166 | cctx = zstd.ZstdCompressor(level=level) |
|
194 | cctx = zstd.ZstdCompressor(level=level) | |
167 |
with cctx.stream_reader( |
|
195 | with cctx.stream_reader( | |
168 | read_size=source_read_size) as reader: |
|
196 | original, size=len(original), read_size=source_read_size | |
|
197 | ) as reader: | |||
169 | chunks = [] |
|
198 | chunks = [] | |
170 | while True: |
|
199 | while True: | |
171 | b = bytearray(read_size) |
|
200 | b = bytearray(read_size) | |
172 | count = reader.readinto(b) |
|
201 | count = reader.readinto(b) | |
173 |
|
202 | |||
174 | if not count: |
|
203 | if not count: | |
175 | break |
|
204 | break | |
176 |
|
205 | |||
177 | chunks.append(bytes(b[0:count])) |
|
206 | chunks.append(bytes(b[0:count])) | |
178 |
|
207 | |||
179 |
self.assertEqual(b |
|
208 | self.assertEqual(b"".join(chunks), ref_frame) | |
180 |
|
209 | |||
181 | @hypothesis.settings( |
|
210 | @hypothesis.settings( | |
182 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) |
|
211 | suppress_health_check=[ | |
183 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
212 | hypothesis.HealthCheck.large_base_example, | |
184 | level=strategies.integers(min_value=1, max_value=5), |
|
213 | hypothesis.HealthCheck.too_slow, | |
185 | source_read_size=strategies.integers(1, 16384), |
|
214 | ] | |
186 | read_sizes=strategies.data()) |
|
215 | ) | |
187 | def test_stream_source_readinto_variance(self, original, level, |
|
216 | @hypothesis.given( | |
188 | source_read_size, read_sizes): |
|
217 | original=strategies.sampled_from(random_input_data()), | |
|
218 | level=strategies.integers(min_value=1, max_value=5), | |||
|
219 | source_read_size=strategies.integers(1, 16384), | |||
|
220 | read_sizes=strategies.data(), | |||
|
221 | ) | |||
|
222 | def test_stream_source_readinto_variance( | |||
|
223 | self, original, level, source_read_size, read_sizes | |||
|
224 | ): | |||
189 | refctx = zstd.ZstdCompressor(level=level) |
|
225 | refctx = zstd.ZstdCompressor(level=level) | |
190 | ref_frame = refctx.compress(original) |
|
226 | ref_frame = refctx.compress(original) | |
191 |
|
227 | |||
192 | cctx = zstd.ZstdCompressor(level=level) |
|
228 | cctx = zstd.ZstdCompressor(level=level) | |
193 |
with cctx.stream_reader( |
|
229 | with cctx.stream_reader( | |
194 | read_size=source_read_size) as reader: |
|
230 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
231 | ) as reader: | |||
195 | chunks = [] |
|
232 | chunks = [] | |
196 | while True: |
|
233 | while True: | |
197 | read_size = read_sizes.draw(strategies.integers(1, 16384)) |
|
234 | read_size = read_sizes.draw(strategies.integers(1, 16384)) | |
198 | b = bytearray(read_size) |
|
235 | b = bytearray(read_size) | |
199 | count = reader.readinto(b) |
|
236 | count = reader.readinto(b) | |
200 |
|
237 | |||
201 | if not count: |
|
238 | if not count: | |
202 | break |
|
239 | break | |
203 |
|
240 | |||
204 | chunks.append(bytes(b[0:count])) |
|
241 | chunks.append(bytes(b[0:count])) | |
205 |
|
242 | |||
206 |
self.assertEqual(b |
|
243 | self.assertEqual(b"".join(chunks), ref_frame) | |
207 |
|
244 | |||
208 | @hypothesis.settings( |
|
245 | @hypothesis.settings( | |
209 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) |
|
246 | suppress_health_check=[ | |
210 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
247 | hypothesis.HealthCheck.large_base_example, | |
211 | level=strategies.integers(min_value=1, max_value=5), |
|
248 | hypothesis.HealthCheck.too_slow, | |
212 | source_read_size=strategies.integers(1, 16384), |
|
249 | ] | |
213 | read_sizes=strategies.data()) |
|
250 | ) | |
214 | def test_buffer_source_readinto_variance(self, original, level, |
|
251 | @hypothesis.given( | |
215 | source_read_size, read_sizes): |
|
252 | original=strategies.sampled_from(random_input_data()), | |
|
253 | level=strategies.integers(min_value=1, max_value=5), | |||
|
254 | source_read_size=strategies.integers(1, 16384), | |||
|
255 | read_sizes=strategies.data(), | |||
|
256 | ) | |||
|
257 | def test_buffer_source_readinto_variance( | |||
|
258 | self, original, level, source_read_size, read_sizes | |||
|
259 | ): | |||
216 |
|
260 | |||
217 | refctx = zstd.ZstdCompressor(level=level) |
|
261 | refctx = zstd.ZstdCompressor(level=level) | |
218 | ref_frame = refctx.compress(original) |
|
262 | ref_frame = refctx.compress(original) | |
219 |
|
263 | |||
220 | cctx = zstd.ZstdCompressor(level=level) |
|
264 | cctx = zstd.ZstdCompressor(level=level) | |
221 |
with cctx.stream_reader( |
|
265 | with cctx.stream_reader( | |
222 | read_size=source_read_size) as reader: |
|
266 | original, size=len(original), read_size=source_read_size | |
|
267 | ) as reader: | |||
223 | chunks = [] |
|
268 | chunks = [] | |
224 | while True: |
|
269 | while True: | |
225 | read_size = read_sizes.draw(strategies.integers(1, 16384)) |
|
270 | read_size = read_sizes.draw(strategies.integers(1, 16384)) | |
226 | b = bytearray(read_size) |
|
271 | b = bytearray(read_size) | |
227 | count = reader.readinto(b) |
|
272 | count = reader.readinto(b) | |
228 |
|
273 | |||
229 | if not count: |
|
274 | if not count: | |
230 | break |
|
275 | break | |
231 |
|
276 | |||
232 | chunks.append(bytes(b[0:count])) |
|
277 | chunks.append(bytes(b[0:count])) | |
233 |
|
278 | |||
234 |
self.assertEqual(b |
|
279 | self.assertEqual(b"".join(chunks), ref_frame) | |
235 |
|
280 | |||
236 | @hypothesis.settings( |
|
281 | @hypothesis.settings( | |
237 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
282 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
238 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
283 | ) | |
239 | level=strategies.integers(min_value=1, max_value=5), |
|
284 | @hypothesis.given( | |
240 | source_read_size=strategies.integers(1, 16384), |
|
285 | original=strategies.sampled_from(random_input_data()), | |
241 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) |
|
286 | level=strategies.integers(min_value=1, max_value=5), | |
242 | def test_stream_source_read1(self, original, level, source_read_size, |
|
287 | source_read_size=strategies.integers(1, 16384), | |
243 | read_size): |
|
288 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
289 | ) | |||
|
290 | def test_stream_source_read1(self, original, level, source_read_size, read_size): | |||
244 | if read_size == 0: |
|
291 | if read_size == 0: | |
245 | read_size = -1 |
|
292 | read_size = -1 | |
246 |
|
293 | |||
247 | refctx = zstd.ZstdCompressor(level=level) |
|
294 | refctx = zstd.ZstdCompressor(level=level) | |
248 | ref_frame = refctx.compress(original) |
|
295 | ref_frame = refctx.compress(original) | |
249 |
|
296 | |||
250 | cctx = zstd.ZstdCompressor(level=level) |
|
297 | cctx = zstd.ZstdCompressor(level=level) | |
251 |
with cctx.stream_reader( |
|
298 | with cctx.stream_reader( | |
252 | read_size=source_read_size) as reader: |
|
299 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
300 | ) as reader: | |||
253 | chunks = [] |
|
301 | chunks = [] | |
254 | while True: |
|
302 | while True: | |
255 | chunk = reader.read1(read_size) |
|
303 | chunk = reader.read1(read_size) | |
256 | if not chunk: |
|
304 | if not chunk: | |
257 | break |
|
305 | break | |
258 |
|
306 | |||
259 | chunks.append(chunk) |
|
307 | chunks.append(chunk) | |
260 |
|
308 | |||
261 |
self.assertEqual(b |
|
309 | self.assertEqual(b"".join(chunks), ref_frame) | |
262 |
|
310 | |||
263 | @hypothesis.settings( |
|
311 | @hypothesis.settings( | |
264 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
312 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
265 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
313 | ) | |
266 | level=strategies.integers(min_value=1, max_value=5), |
|
314 | @hypothesis.given( | |
267 | source_read_size=strategies.integers(1, 16384), |
|
315 | original=strategies.sampled_from(random_input_data()), | |
268 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) |
|
316 | level=strategies.integers(min_value=1, max_value=5), | |
269 | def test_buffer_source_read1(self, original, level, source_read_size, |
|
317 | source_read_size=strategies.integers(1, 16384), | |
270 | read_size): |
|
318 | read_size=strategies.integers(-1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
319 | ) | |||
|
320 | def test_buffer_source_read1(self, original, level, source_read_size, read_size): | |||
271 | if read_size == 0: |
|
321 | if read_size == 0: | |
272 | read_size = -1 |
|
322 | read_size = -1 | |
273 |
|
323 | |||
274 | refctx = zstd.ZstdCompressor(level=level) |
|
324 | refctx = zstd.ZstdCompressor(level=level) | |
275 | ref_frame = refctx.compress(original) |
|
325 | ref_frame = refctx.compress(original) | |
276 |
|
326 | |||
277 | cctx = zstd.ZstdCompressor(level=level) |
|
327 | cctx = zstd.ZstdCompressor(level=level) | |
278 |
with cctx.stream_reader( |
|
328 | with cctx.stream_reader( | |
279 | read_size=source_read_size) as reader: |
|
329 | original, size=len(original), read_size=source_read_size | |
|
330 | ) as reader: | |||
280 | chunks = [] |
|
331 | chunks = [] | |
281 | while True: |
|
332 | while True: | |
282 | chunk = reader.read1(read_size) |
|
333 | chunk = reader.read1(read_size) | |
283 | if not chunk: |
|
334 | if not chunk: | |
284 | break |
|
335 | break | |
285 |
|
336 | |||
286 | chunks.append(chunk) |
|
337 | chunks.append(chunk) | |
287 |
|
338 | |||
288 |
self.assertEqual(b |
|
339 | self.assertEqual(b"".join(chunks), ref_frame) | |
289 |
|
340 | |||
290 | @hypothesis.settings( |
|
341 | @hypothesis.settings( | |
291 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) |
|
342 | suppress_health_check=[ | |
292 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
343 | hypothesis.HealthCheck.large_base_example, | |
293 | level=strategies.integers(min_value=1, max_value=5), |
|
344 | hypothesis.HealthCheck.too_slow, | |
294 | source_read_size=strategies.integers(1, 16384), |
|
345 | ] | |
295 | read_sizes=strategies.data()) |
|
346 | ) | |
296 | def test_stream_source_read1_variance(self, original, level, source_read_size, |
|
347 | @hypothesis.given( | |
297 | read_sizes): |
|
348 | original=strategies.sampled_from(random_input_data()), | |
|
349 | level=strategies.integers(min_value=1, max_value=5), | |||
|
350 | source_read_size=strategies.integers(1, 16384), | |||
|
351 | read_sizes=strategies.data(), | |||
|
352 | ) | |||
|
353 | def test_stream_source_read1_variance( | |||
|
354 | self, original, level, source_read_size, read_sizes | |||
|
355 | ): | |||
298 | refctx = zstd.ZstdCompressor(level=level) |
|
356 | refctx = zstd.ZstdCompressor(level=level) | |
299 | ref_frame = refctx.compress(original) |
|
357 | ref_frame = refctx.compress(original) | |
300 |
|
358 | |||
301 | cctx = zstd.ZstdCompressor(level=level) |
|
359 | cctx = zstd.ZstdCompressor(level=level) | |
302 |
with cctx.stream_reader( |
|
360 | with cctx.stream_reader( | |
303 | read_size=source_read_size) as reader: |
|
361 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
362 | ) as reader: | |||
304 | chunks = [] |
|
363 | chunks = [] | |
305 | while True: |
|
364 | while True: | |
306 | read_size = read_sizes.draw(strategies.integers(-1, 16384)) |
|
365 | read_size = read_sizes.draw(strategies.integers(-1, 16384)) | |
307 | chunk = reader.read1(read_size) |
|
366 | chunk = reader.read1(read_size) | |
308 | if not chunk and read_size: |
|
367 | if not chunk and read_size: | |
309 | break |
|
368 | break | |
310 |
|
369 | |||
311 | chunks.append(chunk) |
|
370 | chunks.append(chunk) | |
312 |
|
371 | |||
313 |
self.assertEqual(b |
|
372 | self.assertEqual(b"".join(chunks), ref_frame) | |
314 |
|
373 | |||
315 | @hypothesis.settings( |
|
374 | @hypothesis.settings( | |
316 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) |
|
375 | suppress_health_check=[ | |
317 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
376 | hypothesis.HealthCheck.large_base_example, | |
318 | level=strategies.integers(min_value=1, max_value=5), |
|
377 | hypothesis.HealthCheck.too_slow, | |
319 | source_read_size=strategies.integers(1, 16384), |
|
378 | ] | |
320 | read_sizes=strategies.data()) |
|
379 | ) | |
321 | def test_buffer_source_read1_variance(self, original, level, source_read_size, |
|
380 | @hypothesis.given( | |
322 | read_sizes): |
|
381 | original=strategies.sampled_from(random_input_data()), | |
|
382 | level=strategies.integers(min_value=1, max_value=5), | |||
|
383 | source_read_size=strategies.integers(1, 16384), | |||
|
384 | read_sizes=strategies.data(), | |||
|
385 | ) | |||
|
386 | def test_buffer_source_read1_variance( | |||
|
387 | self, original, level, source_read_size, read_sizes | |||
|
388 | ): | |||
323 |
|
389 | |||
324 | refctx = zstd.ZstdCompressor(level=level) |
|
390 | refctx = zstd.ZstdCompressor(level=level) | |
325 | ref_frame = refctx.compress(original) |
|
391 | ref_frame = refctx.compress(original) | |
326 |
|
392 | |||
327 | cctx = zstd.ZstdCompressor(level=level) |
|
393 | cctx = zstd.ZstdCompressor(level=level) | |
328 |
with cctx.stream_reader( |
|
394 | with cctx.stream_reader( | |
329 | read_size=source_read_size) as reader: |
|
395 | original, size=len(original), read_size=source_read_size | |
|
396 | ) as reader: | |||
330 | chunks = [] |
|
397 | chunks = [] | |
331 | while True: |
|
398 | while True: | |
332 | read_size = read_sizes.draw(strategies.integers(-1, 16384)) |
|
399 | read_size = read_sizes.draw(strategies.integers(-1, 16384)) | |
333 | chunk = reader.read1(read_size) |
|
400 | chunk = reader.read1(read_size) | |
334 | if not chunk and read_size: |
|
401 | if not chunk and read_size: | |
335 | break |
|
402 | break | |
336 |
|
403 | |||
337 | chunks.append(chunk) |
|
404 | chunks.append(chunk) | |
338 |
|
405 | |||
339 |
self.assertEqual(b |
|
406 | self.assertEqual(b"".join(chunks), ref_frame) | |
340 |
|
||||
341 |
|
407 | |||
342 | @hypothesis.settings( |
|
408 | @hypothesis.settings( | |
343 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
409 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
344 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
410 | ) | |
345 | level=strategies.integers(min_value=1, max_value=5), |
|
411 | @hypothesis.given( | |
346 | source_read_size=strategies.integers(1, 16384), |
|
412 | original=strategies.sampled_from(random_input_data()), | |
347 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) |
|
413 | level=strategies.integers(min_value=1, max_value=5), | |
348 | def test_stream_source_readinto1(self, original, level, source_read_size, |
|
414 | source_read_size=strategies.integers(1, 16384), | |
349 | read_size): |
|
415 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
416 | ) | |||
|
417 | def test_stream_source_readinto1( | |||
|
418 | self, original, level, source_read_size, read_size | |||
|
419 | ): | |||
350 | if read_size == 0: |
|
420 | if read_size == 0: | |
351 | read_size = -1 |
|
421 | read_size = -1 | |
352 |
|
422 | |||
353 | refctx = zstd.ZstdCompressor(level=level) |
|
423 | refctx = zstd.ZstdCompressor(level=level) | |
354 | ref_frame = refctx.compress(original) |
|
424 | ref_frame = refctx.compress(original) | |
355 |
|
425 | |||
356 | cctx = zstd.ZstdCompressor(level=level) |
|
426 | cctx = zstd.ZstdCompressor(level=level) | |
357 |
with cctx.stream_reader( |
|
427 | with cctx.stream_reader( | |
358 | read_size=source_read_size) as reader: |
|
428 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
429 | ) as reader: | |||
359 | chunks = [] |
|
430 | chunks = [] | |
360 | while True: |
|
431 | while True: | |
361 | b = bytearray(read_size) |
|
432 | b = bytearray(read_size) | |
362 | count = reader.readinto1(b) |
|
433 | count = reader.readinto1(b) | |
363 |
|
434 | |||
364 | if not count: |
|
435 | if not count: | |
365 | break |
|
436 | break | |
366 |
|
437 | |||
367 | chunks.append(bytes(b[0:count])) |
|
438 | chunks.append(bytes(b[0:count])) | |
368 |
|
439 | |||
369 |
self.assertEqual(b |
|
440 | self.assertEqual(b"".join(chunks), ref_frame) | |
370 |
|
441 | |||
371 | @hypothesis.settings( |
|
442 | @hypothesis.settings( | |
372 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
443 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
373 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
444 | ) | |
374 | level=strategies.integers(min_value=1, max_value=5), |
|
445 | @hypothesis.given( | |
375 | source_read_size=strategies.integers(1, 16384), |
|
446 | original=strategies.sampled_from(random_input_data()), | |
376 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE)) |
|
447 | level=strategies.integers(min_value=1, max_value=5), | |
377 | def test_buffer_source_readinto1(self, original, level, source_read_size, |
|
448 | source_read_size=strategies.integers(1, 16384), | |
378 | read_size): |
|
449 | read_size=strategies.integers(1, zstd.COMPRESSION_RECOMMENDED_OUTPUT_SIZE), | |
|
450 | ) | |||
|
451 | def test_buffer_source_readinto1( | |||
|
452 | self, original, level, source_read_size, read_size | |||
|
453 | ): | |||
379 | if read_size == 0: |
|
454 | if read_size == 0: | |
380 | read_size = -1 |
|
455 | read_size = -1 | |
381 |
|
456 | |||
382 | refctx = zstd.ZstdCompressor(level=level) |
|
457 | refctx = zstd.ZstdCompressor(level=level) | |
383 | ref_frame = refctx.compress(original) |
|
458 | ref_frame = refctx.compress(original) | |
384 |
|
459 | |||
385 | cctx = zstd.ZstdCompressor(level=level) |
|
460 | cctx = zstd.ZstdCompressor(level=level) | |
386 |
with cctx.stream_reader( |
|
461 | with cctx.stream_reader( | |
387 | read_size=source_read_size) as reader: |
|
462 | original, size=len(original), read_size=source_read_size | |
|
463 | ) as reader: | |||
388 | chunks = [] |
|
464 | chunks = [] | |
389 | while True: |
|
465 | while True: | |
390 | b = bytearray(read_size) |
|
466 | b = bytearray(read_size) | |
391 | count = reader.readinto1(b) |
|
467 | count = reader.readinto1(b) | |
392 |
|
468 | |||
393 | if not count: |
|
469 | if not count: | |
394 | break |
|
470 | break | |
395 |
|
471 | |||
396 | chunks.append(bytes(b[0:count])) |
|
472 | chunks.append(bytes(b[0:count])) | |
397 |
|
473 | |||
398 |
self.assertEqual(b |
|
474 | self.assertEqual(b"".join(chunks), ref_frame) | |
399 |
|
475 | |||
400 | @hypothesis.settings( |
|
476 | @hypothesis.settings( | |
401 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) |
|
477 | suppress_health_check=[ | |
402 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
478 | hypothesis.HealthCheck.large_base_example, | |
403 | level=strategies.integers(min_value=1, max_value=5), |
|
479 | hypothesis.HealthCheck.too_slow, | |
404 | source_read_size=strategies.integers(1, 16384), |
|
480 | ] | |
405 | read_sizes=strategies.data()) |
|
481 | ) | |
406 | def test_stream_source_readinto1_variance(self, original, level, source_read_size, |
|
482 | @hypothesis.given( | |
407 | read_sizes): |
|
483 | original=strategies.sampled_from(random_input_data()), | |
|
484 | level=strategies.integers(min_value=1, max_value=5), | |||
|
485 | source_read_size=strategies.integers(1, 16384), | |||
|
486 | read_sizes=strategies.data(), | |||
|
487 | ) | |||
|
488 | def test_stream_source_readinto1_variance( | |||
|
489 | self, original, level, source_read_size, read_sizes | |||
|
490 | ): | |||
408 | refctx = zstd.ZstdCompressor(level=level) |
|
491 | refctx = zstd.ZstdCompressor(level=level) | |
409 | ref_frame = refctx.compress(original) |
|
492 | ref_frame = refctx.compress(original) | |
410 |
|
493 | |||
411 | cctx = zstd.ZstdCompressor(level=level) |
|
494 | cctx = zstd.ZstdCompressor(level=level) | |
412 |
with cctx.stream_reader( |
|
495 | with cctx.stream_reader( | |
413 | read_size=source_read_size) as reader: |
|
496 | io.BytesIO(original), size=len(original), read_size=source_read_size | |
|
497 | ) as reader: | |||
414 | chunks = [] |
|
498 | chunks = [] | |
415 | while True: |
|
499 | while True: | |
416 | read_size = read_sizes.draw(strategies.integers(1, 16384)) |
|
500 | read_size = read_sizes.draw(strategies.integers(1, 16384)) | |
417 | b = bytearray(read_size) |
|
501 | b = bytearray(read_size) | |
418 | count = reader.readinto1(b) |
|
502 | count = reader.readinto1(b) | |
419 |
|
503 | |||
420 | if not count: |
|
504 | if not count: | |
421 | break |
|
505 | break | |
422 |
|
506 | |||
423 | chunks.append(bytes(b[0:count])) |
|
507 | chunks.append(bytes(b[0:count])) | |
424 |
|
508 | |||
425 |
self.assertEqual(b |
|
509 | self.assertEqual(b"".join(chunks), ref_frame) | |
426 |
|
510 | |||
427 | @hypothesis.settings( |
|
511 | @hypothesis.settings( | |
428 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) |
|
512 | suppress_health_check=[ | |
429 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
513 | hypothesis.HealthCheck.large_base_example, | |
430 | level=strategies.integers(min_value=1, max_value=5), |
|
514 | hypothesis.HealthCheck.too_slow, | |
431 | source_read_size=strategies.integers(1, 16384), |
|
515 | ] | |
432 | read_sizes=strategies.data()) |
|
516 | ) | |
433 | def test_buffer_source_readinto1_variance(self, original, level, source_read_size, |
|
517 | @hypothesis.given( | |
434 | read_sizes): |
|
518 | original=strategies.sampled_from(random_input_data()), | |
|
519 | level=strategies.integers(min_value=1, max_value=5), | |||
|
520 | source_read_size=strategies.integers(1, 16384), | |||
|
521 | read_sizes=strategies.data(), | |||
|
522 | ) | |||
|
523 | def test_buffer_source_readinto1_variance( | |||
|
524 | self, original, level, source_read_size, read_sizes | |||
|
525 | ): | |||
435 |
|
526 | |||
436 | refctx = zstd.ZstdCompressor(level=level) |
|
527 | refctx = zstd.ZstdCompressor(level=level) | |
437 | ref_frame = refctx.compress(original) |
|
528 | ref_frame = refctx.compress(original) | |
438 |
|
529 | |||
439 | cctx = zstd.ZstdCompressor(level=level) |
|
530 | cctx = zstd.ZstdCompressor(level=level) | |
440 |
with cctx.stream_reader( |
|
531 | with cctx.stream_reader( | |
441 | read_size=source_read_size) as reader: |
|
532 | original, size=len(original), read_size=source_read_size | |
|
533 | ) as reader: | |||
442 | chunks = [] |
|
534 | chunks = [] | |
443 | while True: |
|
535 | while True: | |
444 | read_size = read_sizes.draw(strategies.integers(1, 16384)) |
|
536 | read_size = read_sizes.draw(strategies.integers(1, 16384)) | |
445 | b = bytearray(read_size) |
|
537 | b = bytearray(read_size) | |
446 | count = reader.readinto1(b) |
|
538 | count = reader.readinto1(b) | |
447 |
|
539 | |||
448 | if not count: |
|
540 | if not count: | |
449 | break |
|
541 | break | |
450 |
|
542 | |||
451 | chunks.append(bytes(b[0:count])) |
|
543 | chunks.append(bytes(b[0:count])) | |
452 |
|
544 | |||
453 |
self.assertEqual(b |
|
545 | self.assertEqual(b"".join(chunks), ref_frame) | |
454 |
|
||||
455 |
|
546 | |||
456 |
|
547 | |||
457 |
@unittest.skipUnless( |
|
548 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
458 | @make_cffi |
|
549 | @make_cffi | |
459 |
class TestCompressor_stream_writer_fuzzing( |
|
550 | class TestCompressor_stream_writer_fuzzing(TestCase): | |
460 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
551 | @hypothesis.given( | |
461 | level=strategies.integers(min_value=1, max_value=5), |
|
552 | original=strategies.sampled_from(random_input_data()), | |
462 |
|
|
553 | level=strategies.integers(min_value=1, max_value=5), | |
|
554 | write_size=strategies.integers(min_value=1, max_value=1048576), | |||
|
555 | ) | |||
463 | def test_write_size_variance(self, original, level, write_size): |
|
556 | def test_write_size_variance(self, original, level, write_size): | |
464 | refctx = zstd.ZstdCompressor(level=level) |
|
557 | refctx = zstd.ZstdCompressor(level=level) | |
465 | ref_frame = refctx.compress(original) |
|
558 | ref_frame = refctx.compress(original) | |
466 |
|
559 | |||
467 | cctx = zstd.ZstdCompressor(level=level) |
|
560 | cctx = zstd.ZstdCompressor(level=level) | |
468 | b = NonClosingBytesIO() |
|
561 | b = NonClosingBytesIO() | |
469 | with cctx.stream_writer(b, size=len(original), write_size=write_size) as compressor: |
|
562 | with cctx.stream_writer( | |
|
563 | b, size=len(original), write_size=write_size | |||
|
564 | ) as compressor: | |||
470 | compressor.write(original) |
|
565 | compressor.write(original) | |
471 |
|
566 | |||
472 | self.assertEqual(b.getvalue(), ref_frame) |
|
567 | self.assertEqual(b.getvalue(), ref_frame) | |
473 |
|
568 | |||
474 |
|
569 | |||
475 |
@unittest.skipUnless( |
|
570 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
476 | @make_cffi |
|
571 | @make_cffi | |
477 |
class TestCompressor_copy_stream_fuzzing( |
|
572 | class TestCompressor_copy_stream_fuzzing(TestCase): | |
478 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
573 | @hypothesis.given( | |
479 | level=strategies.integers(min_value=1, max_value=5), |
|
574 | original=strategies.sampled_from(random_input_data()), | |
480 |
|
|
575 | level=strategies.integers(min_value=1, max_value=5), | |
481 |
|
|
576 | read_size=strategies.integers(min_value=1, max_value=1048576), | |
|
577 | write_size=strategies.integers(min_value=1, max_value=1048576), | |||
|
578 | ) | |||
482 | def test_read_write_size_variance(self, original, level, read_size, write_size): |
|
579 | def test_read_write_size_variance(self, original, level, read_size, write_size): | |
483 | refctx = zstd.ZstdCompressor(level=level) |
|
580 | refctx = zstd.ZstdCompressor(level=level) | |
484 | ref_frame = refctx.compress(original) |
|
581 | ref_frame = refctx.compress(original) | |
485 |
|
582 | |||
486 | cctx = zstd.ZstdCompressor(level=level) |
|
583 | cctx = zstd.ZstdCompressor(level=level) | |
487 | source = io.BytesIO(original) |
|
584 | source = io.BytesIO(original) | |
488 | dest = io.BytesIO() |
|
585 | dest = io.BytesIO() | |
489 |
|
586 | |||
490 | cctx.copy_stream(source, dest, size=len(original), read_size=read_size, |
|
587 | cctx.copy_stream( | |
491 | write_size=write_size) |
|
588 | source, dest, size=len(original), read_size=read_size, write_size=write_size | |
|
589 | ) | |||
492 |
|
590 | |||
493 | self.assertEqual(dest.getvalue(), ref_frame) |
|
591 | self.assertEqual(dest.getvalue(), ref_frame) | |
494 |
|
592 | |||
495 |
|
593 | |||
496 |
@unittest.skipUnless( |
|
594 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
497 | @make_cffi |
|
595 | @make_cffi | |
498 |
class TestCompressor_compressobj_fuzzing( |
|
596 | class TestCompressor_compressobj_fuzzing(TestCase): | |
499 | @hypothesis.settings( |
|
597 | @hypothesis.settings( | |
500 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) |
|
598 | suppress_health_check=[ | |
501 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
599 | hypothesis.HealthCheck.large_base_example, | |
502 | level=strategies.integers(min_value=1, max_value=5), |
|
600 | hypothesis.HealthCheck.too_slow, | |
503 | chunk_sizes=strategies.data()) |
|
601 | ] | |
|
602 | ) | |||
|
603 | @hypothesis.given( | |||
|
604 | original=strategies.sampled_from(random_input_data()), | |||
|
605 | level=strategies.integers(min_value=1, max_value=5), | |||
|
606 | chunk_sizes=strategies.data(), | |||
|
607 | ) | |||
504 | def test_random_input_sizes(self, original, level, chunk_sizes): |
|
608 | def test_random_input_sizes(self, original, level, chunk_sizes): | |
505 | refctx = zstd.ZstdCompressor(level=level) |
|
609 | refctx = zstd.ZstdCompressor(level=level) | |
506 | ref_frame = refctx.compress(original) |
|
610 | ref_frame = refctx.compress(original) | |
507 |
|
611 | |||
508 | cctx = zstd.ZstdCompressor(level=level) |
|
612 | cctx = zstd.ZstdCompressor(level=level) | |
509 | cobj = cctx.compressobj(size=len(original)) |
|
613 | cobj = cctx.compressobj(size=len(original)) | |
510 |
|
614 | |||
511 | chunks = [] |
|
615 | chunks = [] | |
512 | i = 0 |
|
616 | i = 0 | |
513 | while True: |
|
617 | while True: | |
514 | chunk_size = chunk_sizes.draw(strategies.integers(1, 4096)) |
|
618 | chunk_size = chunk_sizes.draw(strategies.integers(1, 4096)) | |
515 | source = original[i:i + chunk_size] |
|
619 | source = original[i : i + chunk_size] | |
516 | if not source: |
|
620 | if not source: | |
517 | break |
|
621 | break | |
518 |
|
622 | |||
519 | chunks.append(cobj.compress(source)) |
|
623 | chunks.append(cobj.compress(source)) | |
520 | i += chunk_size |
|
624 | i += chunk_size | |
521 |
|
625 | |||
522 | chunks.append(cobj.flush()) |
|
626 | chunks.append(cobj.flush()) | |
523 |
|
627 | |||
524 |
self.assertEqual(b |
|
628 | self.assertEqual(b"".join(chunks), ref_frame) | |
525 |
|
629 | |||
526 | @hypothesis.settings( |
|
630 | @hypothesis.settings( | |
527 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) |
|
631 | suppress_health_check=[ | |
528 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
632 | hypothesis.HealthCheck.large_base_example, | |
529 | level=strategies.integers(min_value=1, max_value=5), |
|
633 | hypothesis.HealthCheck.too_slow, | |
530 | chunk_sizes=strategies.data(), |
|
634 | ] | |
531 | flushes=strategies.data()) |
|
635 | ) | |
|
636 | @hypothesis.given( | |||
|
637 | original=strategies.sampled_from(random_input_data()), | |||
|
638 | level=strategies.integers(min_value=1, max_value=5), | |||
|
639 | chunk_sizes=strategies.data(), | |||
|
640 | flushes=strategies.data(), | |||
|
641 | ) | |||
532 | def test_flush_block(self, original, level, chunk_sizes, flushes): |
|
642 | def test_flush_block(self, original, level, chunk_sizes, flushes): | |
533 | cctx = zstd.ZstdCompressor(level=level) |
|
643 | cctx = zstd.ZstdCompressor(level=level) | |
534 | cobj = cctx.compressobj() |
|
644 | cobj = cctx.compressobj() | |
535 |
|
645 | |||
536 | dctx = zstd.ZstdDecompressor() |
|
646 | dctx = zstd.ZstdDecompressor() | |
537 | dobj = dctx.decompressobj() |
|
647 | dobj = dctx.decompressobj() | |
538 |
|
648 | |||
539 | compressed_chunks = [] |
|
649 | compressed_chunks = [] | |
540 | decompressed_chunks = [] |
|
650 | decompressed_chunks = [] | |
541 | i = 0 |
|
651 | i = 0 | |
542 | while True: |
|
652 | while True: | |
543 | input_size = chunk_sizes.draw(strategies.integers(1, 4096)) |
|
653 | input_size = chunk_sizes.draw(strategies.integers(1, 4096)) | |
544 | source = original[i:i + input_size] |
|
654 | source = original[i : i + input_size] | |
545 | if not source: |
|
655 | if not source: | |
546 | break |
|
656 | break | |
547 |
|
657 | |||
548 | i += input_size |
|
658 | i += input_size | |
549 |
|
659 | |||
550 | chunk = cobj.compress(source) |
|
660 | chunk = cobj.compress(source) | |
551 | compressed_chunks.append(chunk) |
|
661 | compressed_chunks.append(chunk) | |
552 | decompressed_chunks.append(dobj.decompress(chunk)) |
|
662 | decompressed_chunks.append(dobj.decompress(chunk)) | |
553 |
|
663 | |||
554 | if not flushes.draw(strategies.booleans()): |
|
664 | if not flushes.draw(strategies.booleans()): | |
555 | continue |
|
665 | continue | |
556 |
|
666 | |||
557 | chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK) |
|
667 | chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK) | |
558 | compressed_chunks.append(chunk) |
|
668 | compressed_chunks.append(chunk) | |
559 | decompressed_chunks.append(dobj.decompress(chunk)) |
|
669 | decompressed_chunks.append(dobj.decompress(chunk)) | |
560 |
|
670 | |||
561 |
self.assertEqual(b |
|
671 | self.assertEqual(b"".join(decompressed_chunks), original[0:i]) | |
562 |
|
672 | |||
563 | chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_FINISH) |
|
673 | chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_FINISH) | |
564 | compressed_chunks.append(chunk) |
|
674 | compressed_chunks.append(chunk) | |
565 | decompressed_chunks.append(dobj.decompress(chunk)) |
|
675 | decompressed_chunks.append(dobj.decompress(chunk)) | |
566 |
|
676 | |||
567 | self.assertEqual(dctx.decompress(b''.join(compressed_chunks), |
|
677 | self.assertEqual( | |
568 | max_output_size=len(original)), |
|
678 | dctx.decompress(b"".join(compressed_chunks), max_output_size=len(original)), | |
569 |
|
|
679 | original, | |
570 | self.assertEqual(b''.join(decompressed_chunks), original) |
|
680 | ) | |
|
681 | self.assertEqual(b"".join(decompressed_chunks), original) | |||
|
682 | ||||
571 |
|
683 | |||
572 |
@unittest.skipUnless( |
|
684 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
573 | @make_cffi |
|
685 | @make_cffi | |
574 |
class TestCompressor_read_to_iter_fuzzing( |
|
686 | class TestCompressor_read_to_iter_fuzzing(TestCase): | |
575 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
687 | @hypothesis.given( | |
576 | level=strategies.integers(min_value=1, max_value=5), |
|
688 | original=strategies.sampled_from(random_input_data()), | |
577 |
|
|
689 | level=strategies.integers(min_value=1, max_value=5), | |
578 |
|
|
690 | read_size=strategies.integers(min_value=1, max_value=4096), | |
|
691 | write_size=strategies.integers(min_value=1, max_value=4096), | |||
|
692 | ) | |||
579 | def test_read_write_size_variance(self, original, level, read_size, write_size): |
|
693 | def test_read_write_size_variance(self, original, level, read_size, write_size): | |
580 | refcctx = zstd.ZstdCompressor(level=level) |
|
694 | refcctx = zstd.ZstdCompressor(level=level) | |
581 | ref_frame = refcctx.compress(original) |
|
695 | ref_frame = refcctx.compress(original) | |
582 |
|
696 | |||
583 | source = io.BytesIO(original) |
|
697 | source = io.BytesIO(original) | |
584 |
|
698 | |||
585 | cctx = zstd.ZstdCompressor(level=level) |
|
699 | cctx = zstd.ZstdCompressor(level=level) | |
586 | chunks = list(cctx.read_to_iter(source, size=len(original), |
|
700 | chunks = list( | |
587 | read_size=read_size, |
|
701 | cctx.read_to_iter( | |
588 | write_size=write_size)) |
|
702 | source, size=len(original), read_size=read_size, write_size=write_size | |
|
703 | ) | |||
|
704 | ) | |||
589 |
|
705 | |||
590 |
self.assertEqual(b |
|
706 | self.assertEqual(b"".join(chunks), ref_frame) | |
591 |
|
707 | |||
592 |
|
708 | |||
593 |
@unittest.skipUnless( |
|
709 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
594 |
class TestCompressor_multi_compress_to_buffer_fuzzing( |
|
710 | class TestCompressor_multi_compress_to_buffer_fuzzing(TestCase): | |
595 | @hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()), |
|
711 | @hypothesis.given( | |
596 | min_size=1, max_size=1024), |
|
712 | original=strategies.lists( | |
597 | threads=strategies.integers(min_value=1, max_value=8), |
|
713 | strategies.sampled_from(random_input_data()), min_size=1, max_size=1024 | |
598 | use_dict=strategies.booleans()) |
|
714 | ), | |
|
715 | threads=strategies.integers(min_value=1, max_value=8), | |||
|
716 | use_dict=strategies.booleans(), | |||
|
717 | ) | |||
599 | def test_data_equivalence(self, original, threads, use_dict): |
|
718 | def test_data_equivalence(self, original, threads, use_dict): | |
600 | kwargs = {} |
|
719 | kwargs = {} | |
601 |
|
720 | |||
602 | # Use a content dictionary because it is cheap to create. |
|
721 | # Use a content dictionary because it is cheap to create. | |
603 | if use_dict: |
|
722 | if use_dict: | |
604 |
kwargs[ |
|
723 | kwargs["dict_data"] = zstd.ZstdCompressionDict(original[0]) | |
605 |
|
724 | |||
606 | cctx = zstd.ZstdCompressor(level=1, |
|
725 | cctx = zstd.ZstdCompressor(level=1, write_checksum=True, **kwargs) | |
607 | write_checksum=True, |
|
|||
608 | **kwargs) |
|
|||
609 |
|
726 | |||
610 |
if not hasattr(cctx, |
|
727 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
611 |
self.skipTest( |
|
728 | self.skipTest("multi_compress_to_buffer not available") | |
612 |
|
729 | |||
613 | result = cctx.multi_compress_to_buffer(original, threads=-1) |
|
730 | result = cctx.multi_compress_to_buffer(original, threads=-1) | |
614 |
|
731 | |||
615 | self.assertEqual(len(result), len(original)) |
|
732 | self.assertEqual(len(result), len(original)) | |
616 |
|
733 | |||
617 | # The frame produced via the batch APIs may not be bit identical to that |
|
734 | # The frame produced via the batch APIs may not be bit identical to that | |
618 | # produced by compress() because compression parameters are adjusted |
|
735 | # produced by compress() because compression parameters are adjusted | |
619 | # from the first input in batch mode. So the only thing we can do is |
|
736 | # from the first input in batch mode. So the only thing we can do is | |
620 | # verify the decompressed data matches the input. |
|
737 | # verify the decompressed data matches the input. | |
621 | dctx = zstd.ZstdDecompressor(**kwargs) |
|
738 | dctx = zstd.ZstdDecompressor(**kwargs) | |
622 |
|
739 | |||
623 | for i, frame in enumerate(result): |
|
740 | for i, frame in enumerate(result): | |
624 | self.assertEqual(dctx.decompress(frame), original[i]) |
|
741 | self.assertEqual(dctx.decompress(frame), original[i]) | |
625 |
|
742 | |||
626 |
|
743 | |||
627 |
@unittest.skipUnless( |
|
744 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
628 | @make_cffi |
|
745 | @make_cffi | |
629 |
class TestCompressor_chunker_fuzzing( |
|
746 | class TestCompressor_chunker_fuzzing(TestCase): | |
630 | @hypothesis.settings( |
|
747 | @hypothesis.settings( | |
631 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) |
|
748 | suppress_health_check=[ | |
632 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
749 | hypothesis.HealthCheck.large_base_example, | |
633 | level=strategies.integers(min_value=1, max_value=5), |
|
750 | hypothesis.HealthCheck.too_slow, | |
634 | chunk_size=strategies.integers( |
|
751 | ] | |
635 | min_value=1, |
|
752 | ) | |
636 | max_value=32 * 1048576), |
|
753 | @hypothesis.given( | |
637 | input_sizes=strategies.data()) |
|
754 | original=strategies.sampled_from(random_input_data()), | |
|
755 | level=strategies.integers(min_value=1, max_value=5), | |||
|
756 | chunk_size=strategies.integers(min_value=1, max_value=32 * 1048576), | |||
|
757 | input_sizes=strategies.data(), | |||
|
758 | ) | |||
638 | def test_random_input_sizes(self, original, level, chunk_size, input_sizes): |
|
759 | def test_random_input_sizes(self, original, level, chunk_size, input_sizes): | |
639 | cctx = zstd.ZstdCompressor(level=level) |
|
760 | cctx = zstd.ZstdCompressor(level=level) | |
640 | chunker = cctx.chunker(chunk_size=chunk_size) |
|
761 | chunker = cctx.chunker(chunk_size=chunk_size) | |
641 |
|
762 | |||
642 | chunks = [] |
|
763 | chunks = [] | |
643 | i = 0 |
|
764 | i = 0 | |
644 | while True: |
|
765 | while True: | |
645 | input_size = input_sizes.draw(strategies.integers(1, 4096)) |
|
766 | input_size = input_sizes.draw(strategies.integers(1, 4096)) | |
646 | source = original[i:i + input_size] |
|
767 | source = original[i : i + input_size] | |
647 | if not source: |
|
768 | if not source: | |
648 | break |
|
769 | break | |
649 |
|
770 | |||
650 | chunks.extend(chunker.compress(source)) |
|
771 | chunks.extend(chunker.compress(source)) | |
651 | i += input_size |
|
772 | i += input_size | |
652 |
|
773 | |||
653 | chunks.extend(chunker.finish()) |
|
774 | chunks.extend(chunker.finish()) | |
654 |
|
775 | |||
655 | dctx = zstd.ZstdDecompressor() |
|
776 | dctx = zstd.ZstdDecompressor() | |
656 |
|
777 | |||
657 |
self.assertEqual( |
|
778 | self.assertEqual( | |
658 | max_output_size=len(original)), |
|
779 | dctx.decompress(b"".join(chunks), max_output_size=len(original)), original | |
659 | original) |
|
780 | ) | |
660 |
|
781 | |||
661 | self.assertTrue(all(len(chunk) == chunk_size for chunk in chunks[:-1])) |
|
782 | self.assertTrue(all(len(chunk) == chunk_size for chunk in chunks[:-1])) | |
662 |
|
783 | |||
663 | @hypothesis.settings( |
|
784 | @hypothesis.settings( | |
664 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) |
|
785 | suppress_health_check=[ | |
665 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
786 | hypothesis.HealthCheck.large_base_example, | |
666 | level=strategies.integers(min_value=1, max_value=5), |
|
787 | hypothesis.HealthCheck.too_slow, | |
667 | chunk_size=strategies.integers( |
|
788 | ] | |
668 | min_value=1, |
|
789 | ) | |
669 | max_value=32 * 1048576), |
|
790 | @hypothesis.given( | |
670 | input_sizes=strategies.data(), |
|
791 | original=strategies.sampled_from(random_input_data()), | |
671 | flushes=strategies.data()) |
|
792 | level=strategies.integers(min_value=1, max_value=5), | |
672 | def test_flush_block(self, original, level, chunk_size, input_sizes, |
|
793 | chunk_size=strategies.integers(min_value=1, max_value=32 * 1048576), | |
673 | flushes): |
|
794 | input_sizes=strategies.data(), | |
|
795 | flushes=strategies.data(), | |||
|
796 | ) | |||
|
797 | def test_flush_block(self, original, level, chunk_size, input_sizes, flushes): | |||
674 | cctx = zstd.ZstdCompressor(level=level) |
|
798 | cctx = zstd.ZstdCompressor(level=level) | |
675 | chunker = cctx.chunker(chunk_size=chunk_size) |
|
799 | chunker = cctx.chunker(chunk_size=chunk_size) | |
676 |
|
800 | |||
677 | dctx = zstd.ZstdDecompressor() |
|
801 | dctx = zstd.ZstdDecompressor() | |
678 | dobj = dctx.decompressobj() |
|
802 | dobj = dctx.decompressobj() | |
679 |
|
803 | |||
680 | compressed_chunks = [] |
|
804 | compressed_chunks = [] | |
681 | decompressed_chunks = [] |
|
805 | decompressed_chunks = [] | |
682 | i = 0 |
|
806 | i = 0 | |
683 | while True: |
|
807 | while True: | |
684 | input_size = input_sizes.draw(strategies.integers(1, 4096)) |
|
808 | input_size = input_sizes.draw(strategies.integers(1, 4096)) | |
685 | source = original[i:i + input_size] |
|
809 | source = original[i : i + input_size] | |
686 | if not source: |
|
810 | if not source: | |
687 | break |
|
811 | break | |
688 |
|
812 | |||
689 | i += input_size |
|
813 | i += input_size | |
690 |
|
814 | |||
691 | chunks = list(chunker.compress(source)) |
|
815 | chunks = list(chunker.compress(source)) | |
692 | compressed_chunks.extend(chunks) |
|
816 | compressed_chunks.extend(chunks) | |
693 |
decompressed_chunks.append(dobj.decompress(b |
|
817 | decompressed_chunks.append(dobj.decompress(b"".join(chunks))) | |
694 |
|
818 | |||
695 | if not flushes.draw(strategies.booleans()): |
|
819 | if not flushes.draw(strategies.booleans()): | |
696 | continue |
|
820 | continue | |
697 |
|
821 | |||
698 | chunks = list(chunker.flush()) |
|
822 | chunks = list(chunker.flush()) | |
699 | compressed_chunks.extend(chunks) |
|
823 | compressed_chunks.extend(chunks) | |
700 |
decompressed_chunks.append(dobj.decompress(b |
|
824 | decompressed_chunks.append(dobj.decompress(b"".join(chunks))) | |
701 |
|
825 | |||
702 |
self.assertEqual(b |
|
826 | self.assertEqual(b"".join(decompressed_chunks), original[0:i]) | |
703 |
|
827 | |||
704 | chunks = list(chunker.finish()) |
|
828 | chunks = list(chunker.finish()) | |
705 | compressed_chunks.extend(chunks) |
|
829 | compressed_chunks.extend(chunks) | |
706 |
decompressed_chunks.append(dobj.decompress(b |
|
830 | decompressed_chunks.append(dobj.decompress(b"".join(chunks))) | |
707 |
|
831 | |||
708 | self.assertEqual(dctx.decompress(b''.join(compressed_chunks), |
|
832 | self.assertEqual( | |
709 | max_output_size=len(original)), |
|
833 | dctx.decompress(b"".join(compressed_chunks), max_output_size=len(original)), | |
710 |
|
|
834 | original, | |
711 | self.assertEqual(b''.join(decompressed_chunks), original) No newline at end of file |
|
835 | ) | |
|
836 | self.assertEqual(b"".join(decompressed_chunks), original) |
@@ -1,228 +1,241 b'' | |||||
1 | import sys |
|
1 | import sys | |
2 | import unittest |
|
2 | import unittest | |
3 |
|
3 | |||
4 | import zstandard as zstd |
|
4 | import zstandard as zstd | |
5 |
|
5 | |||
6 |
from . |
|
6 | from .common import ( | |
7 | make_cffi, |
|
7 | make_cffi, | |
|
8 | TestCase, | |||
8 | ) |
|
9 | ) | |
9 |
|
10 | |||
10 |
|
11 | |||
11 | @make_cffi |
|
12 | @make_cffi | |
12 |
class TestCompressionParameters( |
|
13 | class TestCompressionParameters(TestCase): | |
13 | def test_bounds(self): |
|
14 | def test_bounds(self): | |
14 |
zstd.ZstdCompressionParameters( |
|
15 | zstd.ZstdCompressionParameters( | |
15 |
|
|
16 | window_log=zstd.WINDOWLOG_MIN, | |
16 |
|
|
17 | chain_log=zstd.CHAINLOG_MIN, | |
17 |
|
|
18 | hash_log=zstd.HASHLOG_MIN, | |
18 | min_match=zstd.MINMATCH_MIN + 1, |
|
19 | search_log=zstd.SEARCHLOG_MIN, | |
19 | target_length=zstd.TARGETLENGTH_MIN, |
|
20 | min_match=zstd.MINMATCH_MIN + 1, | |
20 | strategy=zstd.STRATEGY_FAST) |
|
21 | target_length=zstd.TARGETLENGTH_MIN, | |
|
22 | strategy=zstd.STRATEGY_FAST, | |||
|
23 | ) | |||
21 |
|
24 | |||
22 |
zstd.ZstdCompressionParameters( |
|
25 | zstd.ZstdCompressionParameters( | |
23 |
|
|
26 | window_log=zstd.WINDOWLOG_MAX, | |
24 |
|
|
27 | chain_log=zstd.CHAINLOG_MAX, | |
25 |
|
|
28 | hash_log=zstd.HASHLOG_MAX, | |
26 | min_match=zstd.MINMATCH_MAX - 1, |
|
29 | search_log=zstd.SEARCHLOG_MAX, | |
27 | target_length=zstd.TARGETLENGTH_MAX, |
|
30 | min_match=zstd.MINMATCH_MAX - 1, | |
28 | strategy=zstd.STRATEGY_BTULTRA2) |
|
31 | target_length=zstd.TARGETLENGTH_MAX, | |
|
32 | strategy=zstd.STRATEGY_BTULTRA2, | |||
|
33 | ) | |||
29 |
|
34 | |||
30 | def test_from_level(self): |
|
35 | def test_from_level(self): | |
31 | p = zstd.ZstdCompressionParameters.from_level(1) |
|
36 | p = zstd.ZstdCompressionParameters.from_level(1) | |
32 | self.assertIsInstance(p, zstd.CompressionParameters) |
|
37 | self.assertIsInstance(p, zstd.CompressionParameters) | |
33 |
|
38 | |||
34 | self.assertEqual(p.window_log, 19) |
|
39 | self.assertEqual(p.window_log, 19) | |
35 |
|
40 | |||
36 | p = zstd.ZstdCompressionParameters.from_level(-4) |
|
41 | p = zstd.ZstdCompressionParameters.from_level(-4) | |
37 | self.assertEqual(p.window_log, 19) |
|
42 | self.assertEqual(p.window_log, 19) | |
38 |
|
43 | |||
39 | def test_members(self): |
|
44 | def test_members(self): | |
40 |
p = zstd.ZstdCompressionParameters( |
|
45 | p = zstd.ZstdCompressionParameters( | |
41 | chain_log=6, |
|
46 | window_log=10, | |
42 | hash_log=7, |
|
47 | chain_log=6, | |
43 | search_log=4, |
|
48 | hash_log=7, | |
44 | min_match=5, |
|
49 | search_log=4, | |
45 | target_length=8, |
|
50 | min_match=5, | |
46 | strategy=1) |
|
51 | target_length=8, | |
|
52 | strategy=1, | |||
|
53 | ) | |||
47 | self.assertEqual(p.window_log, 10) |
|
54 | self.assertEqual(p.window_log, 10) | |
48 | self.assertEqual(p.chain_log, 6) |
|
55 | self.assertEqual(p.chain_log, 6) | |
49 | self.assertEqual(p.hash_log, 7) |
|
56 | self.assertEqual(p.hash_log, 7) | |
50 | self.assertEqual(p.search_log, 4) |
|
57 | self.assertEqual(p.search_log, 4) | |
51 | self.assertEqual(p.min_match, 5) |
|
58 | self.assertEqual(p.min_match, 5) | |
52 | self.assertEqual(p.target_length, 8) |
|
59 | self.assertEqual(p.target_length, 8) | |
53 | self.assertEqual(p.compression_strategy, 1) |
|
60 | self.assertEqual(p.compression_strategy, 1) | |
54 |
|
61 | |||
55 | p = zstd.ZstdCompressionParameters(compression_level=2) |
|
62 | p = zstd.ZstdCompressionParameters(compression_level=2) | |
56 | self.assertEqual(p.compression_level, 2) |
|
63 | self.assertEqual(p.compression_level, 2) | |
57 |
|
64 | |||
58 | p = zstd.ZstdCompressionParameters(threads=4) |
|
65 | p = zstd.ZstdCompressionParameters(threads=4) | |
59 | self.assertEqual(p.threads, 4) |
|
66 | self.assertEqual(p.threads, 4) | |
60 |
|
67 | |||
61 | p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576, |
|
68 | p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576, overlap_log=6) | |
62 | overlap_log=6) |
|
|||
63 | self.assertEqual(p.threads, 2) |
|
69 | self.assertEqual(p.threads, 2) | |
64 | self.assertEqual(p.job_size, 1048576) |
|
70 | self.assertEqual(p.job_size, 1048576) | |
65 | self.assertEqual(p.overlap_log, 6) |
|
71 | self.assertEqual(p.overlap_log, 6) | |
66 | self.assertEqual(p.overlap_size_log, 6) |
|
72 | self.assertEqual(p.overlap_size_log, 6) | |
67 |
|
73 | |||
68 | p = zstd.ZstdCompressionParameters(compression_level=-1) |
|
74 | p = zstd.ZstdCompressionParameters(compression_level=-1) | |
69 | self.assertEqual(p.compression_level, -1) |
|
75 | self.assertEqual(p.compression_level, -1) | |
70 |
|
76 | |||
71 | p = zstd.ZstdCompressionParameters(compression_level=-2) |
|
77 | p = zstd.ZstdCompressionParameters(compression_level=-2) | |
72 | self.assertEqual(p.compression_level, -2) |
|
78 | self.assertEqual(p.compression_level, -2) | |
73 |
|
79 | |||
74 | p = zstd.ZstdCompressionParameters(force_max_window=True) |
|
80 | p = zstd.ZstdCompressionParameters(force_max_window=True) | |
75 | self.assertEqual(p.force_max_window, 1) |
|
81 | self.assertEqual(p.force_max_window, 1) | |
76 |
|
82 | |||
77 | p = zstd.ZstdCompressionParameters(enable_ldm=True) |
|
83 | p = zstd.ZstdCompressionParameters(enable_ldm=True) | |
78 | self.assertEqual(p.enable_ldm, 1) |
|
84 | self.assertEqual(p.enable_ldm, 1) | |
79 |
|
85 | |||
80 | p = zstd.ZstdCompressionParameters(ldm_hash_log=7) |
|
86 | p = zstd.ZstdCompressionParameters(ldm_hash_log=7) | |
81 | self.assertEqual(p.ldm_hash_log, 7) |
|
87 | self.assertEqual(p.ldm_hash_log, 7) | |
82 |
|
88 | |||
83 | p = zstd.ZstdCompressionParameters(ldm_min_match=6) |
|
89 | p = zstd.ZstdCompressionParameters(ldm_min_match=6) | |
84 | self.assertEqual(p.ldm_min_match, 6) |
|
90 | self.assertEqual(p.ldm_min_match, 6) | |
85 |
|
91 | |||
86 | p = zstd.ZstdCompressionParameters(ldm_bucket_size_log=7) |
|
92 | p = zstd.ZstdCompressionParameters(ldm_bucket_size_log=7) | |
87 | self.assertEqual(p.ldm_bucket_size_log, 7) |
|
93 | self.assertEqual(p.ldm_bucket_size_log, 7) | |
88 |
|
94 | |||
89 | p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8) |
|
95 | p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8) | |
90 | self.assertEqual(p.ldm_hash_every_log, 8) |
|
96 | self.assertEqual(p.ldm_hash_every_log, 8) | |
91 | self.assertEqual(p.ldm_hash_rate_log, 8) |
|
97 | self.assertEqual(p.ldm_hash_rate_log, 8) | |
92 |
|
98 | |||
93 | def test_estimated_compression_context_size(self): |
|
99 | def test_estimated_compression_context_size(self): | |
94 |
p = zstd.ZstdCompressionParameters( |
|
100 | p = zstd.ZstdCompressionParameters( | |
95 | chain_log=16, |
|
101 | window_log=20, | |
96 | hash_log=17, |
|
102 | chain_log=16, | |
97 | search_log=1, |
|
103 | hash_log=17, | |
98 | min_match=5, |
|
104 | search_log=1, | |
99 | target_length=16, |
|
105 | min_match=5, | |
100 | strategy=zstd.STRATEGY_DFAST) |
|
106 | target_length=16, | |
|
107 | strategy=zstd.STRATEGY_DFAST, | |||
|
108 | ) | |||
101 |
|
109 | |||
102 | # 32-bit has slightly different values from 64-bit. |
|
110 | # 32-bit has slightly different values from 64-bit. | |
103 | self.assertAlmostEqual(p.estimated_compression_context_size(), 1294144, |
|
111 | self.assertAlmostEqual( | |
104 | delta=250) |
|
112 | p.estimated_compression_context_size(), 1294464, delta=400 | |
|
113 | ) | |||
105 |
|
114 | |||
106 | def test_strategy(self): |
|
115 | def test_strategy(self): | |
107 | with self.assertRaisesRegexp(ValueError, 'cannot specify both compression_strategy'): |
|
116 | with self.assertRaisesRegex( | |
|
117 | ValueError, "cannot specify both compression_strategy" | |||
|
118 | ): | |||
108 | zstd.ZstdCompressionParameters(strategy=0, compression_strategy=0) |
|
119 | zstd.ZstdCompressionParameters(strategy=0, compression_strategy=0) | |
109 |
|
120 | |||
110 | p = zstd.ZstdCompressionParameters(strategy=2) |
|
121 | p = zstd.ZstdCompressionParameters(strategy=2) | |
111 | self.assertEqual(p.compression_strategy, 2) |
|
122 | self.assertEqual(p.compression_strategy, 2) | |
112 |
|
123 | |||
113 | p = zstd.ZstdCompressionParameters(strategy=3) |
|
124 | p = zstd.ZstdCompressionParameters(strategy=3) | |
114 | self.assertEqual(p.compression_strategy, 3) |
|
125 | self.assertEqual(p.compression_strategy, 3) | |
115 |
|
126 | |||
116 | def test_ldm_hash_rate_log(self): |
|
127 | def test_ldm_hash_rate_log(self): | |
117 | with self.assertRaisesRegexp(ValueError, 'cannot specify both ldm_hash_rate_log'): |
|
128 | with self.assertRaisesRegex( | |
|
129 | ValueError, "cannot specify both ldm_hash_rate_log" | |||
|
130 | ): | |||
118 | zstd.ZstdCompressionParameters(ldm_hash_rate_log=8, ldm_hash_every_log=4) |
|
131 | zstd.ZstdCompressionParameters(ldm_hash_rate_log=8, ldm_hash_every_log=4) | |
119 |
|
132 | |||
120 | p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8) |
|
133 | p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8) | |
121 | self.assertEqual(p.ldm_hash_every_log, 8) |
|
134 | self.assertEqual(p.ldm_hash_every_log, 8) | |
122 |
|
135 | |||
123 | p = zstd.ZstdCompressionParameters(ldm_hash_every_log=16) |
|
136 | p = zstd.ZstdCompressionParameters(ldm_hash_every_log=16) | |
124 | self.assertEqual(p.ldm_hash_every_log, 16) |
|
137 | self.assertEqual(p.ldm_hash_every_log, 16) | |
125 |
|
138 | |||
126 | def test_overlap_log(self): |
|
139 | def test_overlap_log(self): | |
127 |
with self.assertRaisesRegex |
|
140 | with self.assertRaisesRegex(ValueError, "cannot specify both overlap_log"): | |
128 | zstd.ZstdCompressionParameters(overlap_log=1, overlap_size_log=9) |
|
141 | zstd.ZstdCompressionParameters(overlap_log=1, overlap_size_log=9) | |
129 |
|
142 | |||
130 | p = zstd.ZstdCompressionParameters(overlap_log=2) |
|
143 | p = zstd.ZstdCompressionParameters(overlap_log=2) | |
131 | self.assertEqual(p.overlap_log, 2) |
|
144 | self.assertEqual(p.overlap_log, 2) | |
132 | self.assertEqual(p.overlap_size_log, 2) |
|
145 | self.assertEqual(p.overlap_size_log, 2) | |
133 |
|
146 | |||
134 | p = zstd.ZstdCompressionParameters(overlap_size_log=4) |
|
147 | p = zstd.ZstdCompressionParameters(overlap_size_log=4) | |
135 | self.assertEqual(p.overlap_log, 4) |
|
148 | self.assertEqual(p.overlap_log, 4) | |
136 | self.assertEqual(p.overlap_size_log, 4) |
|
149 | self.assertEqual(p.overlap_size_log, 4) | |
137 |
|
150 | |||
138 |
|
151 | |||
139 | @make_cffi |
|
152 | @make_cffi | |
140 |
class TestFrameParameters( |
|
153 | class TestFrameParameters(TestCase): | |
141 | def test_invalid_type(self): |
|
154 | def test_invalid_type(self): | |
142 | with self.assertRaises(TypeError): |
|
155 | with self.assertRaises(TypeError): | |
143 | zstd.get_frame_parameters(None) |
|
156 | zstd.get_frame_parameters(None) | |
144 |
|
157 | |||
145 | # Python 3 doesn't appear to convert unicode to Py_buffer. |
|
158 | # Python 3 doesn't appear to convert unicode to Py_buffer. | |
146 | if sys.version_info[0] >= 3: |
|
159 | if sys.version_info[0] >= 3: | |
147 | with self.assertRaises(TypeError): |
|
160 | with self.assertRaises(TypeError): | |
148 |
zstd.get_frame_parameters(u |
|
161 | zstd.get_frame_parameters(u"foobarbaz") | |
149 | else: |
|
162 | else: | |
150 | # CPython will convert unicode to Py_buffer. But CFFI won't. |
|
163 | # CPython will convert unicode to Py_buffer. But CFFI won't. | |
151 |
if zstd.backend == |
|
164 | if zstd.backend == "cffi": | |
152 | with self.assertRaises(TypeError): |
|
165 | with self.assertRaises(TypeError): | |
153 |
zstd.get_frame_parameters(u |
|
166 | zstd.get_frame_parameters(u"foobarbaz") | |
154 | else: |
|
167 | else: | |
155 | with self.assertRaises(zstd.ZstdError): |
|
168 | with self.assertRaises(zstd.ZstdError): | |
156 |
zstd.get_frame_parameters(u |
|
169 | zstd.get_frame_parameters(u"foobarbaz") | |
157 |
|
170 | |||
158 | def test_invalid_input_sizes(self): |
|
171 | def test_invalid_input_sizes(self): | |
159 |
with self.assertRaisesRegex |
|
172 | with self.assertRaisesRegex(zstd.ZstdError, "not enough data for frame"): | |
160 |
zstd.get_frame_parameters(b |
|
173 | zstd.get_frame_parameters(b"") | |
161 |
|
174 | |||
162 |
with self.assertRaisesRegex |
|
175 | with self.assertRaisesRegex(zstd.ZstdError, "not enough data for frame"): | |
163 | zstd.get_frame_parameters(zstd.FRAME_HEADER) |
|
176 | zstd.get_frame_parameters(zstd.FRAME_HEADER) | |
164 |
|
177 | |||
165 | def test_invalid_frame(self): |
|
178 | def test_invalid_frame(self): | |
166 |
with self.assertRaisesRegex |
|
179 | with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"): | |
167 |
zstd.get_frame_parameters(b |
|
180 | zstd.get_frame_parameters(b"foobarbaz") | |
168 |
|
181 | |||
169 | def test_attributes(self): |
|
182 | def test_attributes(self): | |
170 |
params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b |
|
183 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x00\x00") | |
171 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
184 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
172 | self.assertEqual(params.window_size, 1024) |
|
185 | self.assertEqual(params.window_size, 1024) | |
173 | self.assertEqual(params.dict_id, 0) |
|
186 | self.assertEqual(params.dict_id, 0) | |
174 | self.assertFalse(params.has_checksum) |
|
187 | self.assertFalse(params.has_checksum) | |
175 |
|
188 | |||
176 | # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte. |
|
189 | # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte. | |
177 |
params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b |
|
190 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x01\x00\xff") | |
178 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
191 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
179 | self.assertEqual(params.window_size, 1024) |
|
192 | self.assertEqual(params.window_size, 1024) | |
180 | self.assertEqual(params.dict_id, 255) |
|
193 | self.assertEqual(params.dict_id, 255) | |
181 | self.assertFalse(params.has_checksum) |
|
194 | self.assertFalse(params.has_checksum) | |
182 |
|
195 | |||
183 | # Lowest 3rd bit indicates if checksum is present. |
|
196 | # Lowest 3rd bit indicates if checksum is present. | |
184 |
params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b |
|
197 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x04\x00") | |
185 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
198 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
186 | self.assertEqual(params.window_size, 1024) |
|
199 | self.assertEqual(params.window_size, 1024) | |
187 | self.assertEqual(params.dict_id, 0) |
|
200 | self.assertEqual(params.dict_id, 0) | |
188 | self.assertTrue(params.has_checksum) |
|
201 | self.assertTrue(params.has_checksum) | |
189 |
|
202 | |||
190 | # Upper 2 bits indicate content size. |
|
203 | # Upper 2 bits indicate content size. | |
191 |
params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b |
|
204 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x40\x00\xff\x00") | |
192 | self.assertEqual(params.content_size, 511) |
|
205 | self.assertEqual(params.content_size, 511) | |
193 | self.assertEqual(params.window_size, 1024) |
|
206 | self.assertEqual(params.window_size, 1024) | |
194 | self.assertEqual(params.dict_id, 0) |
|
207 | self.assertEqual(params.dict_id, 0) | |
195 | self.assertFalse(params.has_checksum) |
|
208 | self.assertFalse(params.has_checksum) | |
196 |
|
209 | |||
197 | # Window descriptor is 2nd byte after frame header. |
|
210 | # Window descriptor is 2nd byte after frame header. | |
198 |
params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b |
|
211 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x00\x40") | |
199 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
212 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
200 | self.assertEqual(params.window_size, 262144) |
|
213 | self.assertEqual(params.window_size, 262144) | |
201 | self.assertEqual(params.dict_id, 0) |
|
214 | self.assertEqual(params.dict_id, 0) | |
202 | self.assertFalse(params.has_checksum) |
|
215 | self.assertFalse(params.has_checksum) | |
203 |
|
216 | |||
204 | # Set multiple things. |
|
217 | # Set multiple things. | |
205 |
params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b |
|
218 | params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b"\x45\x40\x0f\x10\x00") | |
206 | self.assertEqual(params.content_size, 272) |
|
219 | self.assertEqual(params.content_size, 272) | |
207 | self.assertEqual(params.window_size, 262144) |
|
220 | self.assertEqual(params.window_size, 262144) | |
208 | self.assertEqual(params.dict_id, 15) |
|
221 | self.assertEqual(params.dict_id, 15) | |
209 | self.assertTrue(params.has_checksum) |
|
222 | self.assertTrue(params.has_checksum) | |
210 |
|
223 | |||
211 | def test_input_types(self): |
|
224 | def test_input_types(self): | |
212 |
v = zstd.FRAME_HEADER + b |
|
225 | v = zstd.FRAME_HEADER + b"\x00\x00" | |
213 |
|
226 | |||
214 | mutable_array = bytearray(len(v)) |
|
227 | mutable_array = bytearray(len(v)) | |
215 | mutable_array[:] = v |
|
228 | mutable_array[:] = v | |
216 |
|
229 | |||
217 | sources = [ |
|
230 | sources = [ | |
218 | memoryview(v), |
|
231 | memoryview(v), | |
219 | bytearray(v), |
|
232 | bytearray(v), | |
220 | mutable_array, |
|
233 | mutable_array, | |
221 | ] |
|
234 | ] | |
222 |
|
235 | |||
223 | for source in sources: |
|
236 | for source in sources: | |
224 | params = zstd.get_frame_parameters(source) |
|
237 | params = zstd.get_frame_parameters(source) | |
225 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) |
|
238 | self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN) | |
226 | self.assertEqual(params.window_size, 1024) |
|
239 | self.assertEqual(params.window_size, 1024) | |
227 | self.assertEqual(params.dict_id, 0) |
|
240 | self.assertEqual(params.dict_id, 0) | |
228 | self.assertFalse(params.has_checksum) |
|
241 | self.assertFalse(params.has_checksum) |
@@ -1,76 +1,105 b'' | |||||
1 | import io |
|
1 | import io | |
2 | import os |
|
2 | import os | |
3 | import sys |
|
3 | import sys | |
4 | import unittest |
|
4 | import unittest | |
5 |
|
5 | |||
6 | try: |
|
6 | try: | |
7 | import hypothesis |
|
7 | import hypothesis | |
8 | import hypothesis.strategies as strategies |
|
8 | import hypothesis.strategies as strategies | |
9 | except ImportError: |
|
9 | except ImportError: | |
10 |
raise unittest.SkipTest( |
|
10 | raise unittest.SkipTest("hypothesis not available") | |
11 |
|
11 | |||
12 | import zstandard as zstd |
|
12 | import zstandard as zstd | |
13 |
|
13 | |||
14 | from .common import ( |
|
14 | from .common import ( | |
15 | make_cffi, |
|
15 | make_cffi, | |
|
16 | TestCase, | |||
|
17 | ) | |||
|
18 | ||||
|
19 | ||||
|
20 | s_windowlog = strategies.integers( | |||
|
21 | min_value=zstd.WINDOWLOG_MIN, max_value=zstd.WINDOWLOG_MAX | |||
|
22 | ) | |||
|
23 | s_chainlog = strategies.integers( | |||
|
24 | min_value=zstd.CHAINLOG_MIN, max_value=zstd.CHAINLOG_MAX | |||
|
25 | ) | |||
|
26 | s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN, max_value=zstd.HASHLOG_MAX) | |||
|
27 | s_searchlog = strategies.integers( | |||
|
28 | min_value=zstd.SEARCHLOG_MIN, max_value=zstd.SEARCHLOG_MAX | |||
|
29 | ) | |||
|
30 | s_minmatch = strategies.integers( | |||
|
31 | min_value=zstd.MINMATCH_MIN, max_value=zstd.MINMATCH_MAX | |||
|
32 | ) | |||
|
33 | s_targetlength = strategies.integers( | |||
|
34 | min_value=zstd.TARGETLENGTH_MIN, max_value=zstd.TARGETLENGTH_MAX | |||
|
35 | ) | |||
|
36 | s_strategy = strategies.sampled_from( | |||
|
37 | ( | |||
|
38 | zstd.STRATEGY_FAST, | |||
|
39 | zstd.STRATEGY_DFAST, | |||
|
40 | zstd.STRATEGY_GREEDY, | |||
|
41 | zstd.STRATEGY_LAZY, | |||
|
42 | zstd.STRATEGY_LAZY2, | |||
|
43 | zstd.STRATEGY_BTLAZY2, | |||
|
44 | zstd.STRATEGY_BTOPT, | |||
|
45 | zstd.STRATEGY_BTULTRA, | |||
|
46 | zstd.STRATEGY_BTULTRA2, | |||
|
47 | ) | |||
16 | ) |
|
48 | ) | |
17 |
|
49 | |||
18 |
|
50 | |||
19 | s_windowlog = strategies.integers(min_value=zstd.WINDOWLOG_MIN, |
|
51 | @make_cffi | |
20 | max_value=zstd.WINDOWLOG_MAX) |
|
52 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
21 | s_chainlog = strategies.integers(min_value=zstd.CHAINLOG_MIN, |
|
53 | class TestCompressionParametersHypothesis(TestCase): | |
22 | max_value=zstd.CHAINLOG_MAX) |
|
54 | @hypothesis.given( | |
23 | s_hashlog = strategies.integers(min_value=zstd.HASHLOG_MIN, |
|
55 | s_windowlog, | |
24 | max_value=zstd.HASHLOG_MAX) |
|
56 | s_chainlog, | |
25 | s_searchlog = strategies.integers(min_value=zstd.SEARCHLOG_MIN, |
|
57 | s_hashlog, | |
26 | max_value=zstd.SEARCHLOG_MAX) |
|
58 | s_searchlog, | |
27 | s_minmatch = strategies.integers(min_value=zstd.MINMATCH_MIN, |
|
59 | s_minmatch, | |
28 | max_value=zstd.MINMATCH_MAX) |
|
60 | s_targetlength, | |
29 | s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN, |
|
61 | s_strategy, | |
30 | max_value=zstd.TARGETLENGTH_MAX) |
|
62 | ) | |
31 | s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST, |
|
63 | def test_valid_init( | |
32 | zstd.STRATEGY_DFAST, |
|
64 | self, windowlog, chainlog, hashlog, searchlog, minmatch, targetlength, strategy | |
33 | zstd.STRATEGY_GREEDY, |
|
65 | ): | |
34 | zstd.STRATEGY_LAZY, |
|
66 | zstd.ZstdCompressionParameters( | |
35 | zstd.STRATEGY_LAZY2, |
|
67 | window_log=windowlog, | |
36 | zstd.STRATEGY_BTLAZY2, |
|
68 | chain_log=chainlog, | |
37 | zstd.STRATEGY_BTOPT, |
|
69 | hash_log=hashlog, | |
38 | zstd.STRATEGY_BTULTRA, |
|
70 | search_log=searchlog, | |
39 | zstd.STRATEGY_BTULTRA2)) |
|
71 | min_match=minmatch, | |
40 |
|
72 | target_length=targetlength, | ||
|
73 | strategy=strategy, | |||
|
74 | ) | |||
41 |
|
75 | |||
42 | @make_cffi |
|
76 | @hypothesis.given( | |
43 | @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set') |
|
77 | s_windowlog, | |
44 | class TestCompressionParametersHypothesis(unittest.TestCase): |
|
78 | s_chainlog, | |
45 | @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog, |
|
79 | s_hashlog, | |
46 | s_minmatch, s_targetlength, s_strategy) |
|
80 | s_searchlog, | |
47 | def test_valid_init(self, windowlog, chainlog, hashlog, searchlog, |
|
81 | s_minmatch, | |
48 | minmatch, targetlength, strategy): |
|
82 | s_targetlength, | |
49 | zstd.ZstdCompressionParameters(window_log=windowlog, |
|
83 | s_strategy, | |
50 | chain_log=chainlog, |
|
84 | ) | |
51 | hash_log=hashlog, |
|
85 | def test_estimated_compression_context_size( | |
52 | search_log=searchlog, |
|
86 | self, windowlog, chainlog, hashlog, searchlog, minmatch, targetlength, strategy | |
53 | min_match=minmatch, |
|
87 | ): | |
54 | target_length=targetlength, |
|
88 | if minmatch == zstd.MINMATCH_MIN and strategy in ( | |
55 | strategy=strategy) |
|
89 | zstd.STRATEGY_FAST, | |
56 |
|
90 | zstd.STRATEGY_GREEDY, | ||
57 | @hypothesis.given(s_windowlog, s_chainlog, s_hashlog, s_searchlog, |
|
91 | ): | |
58 | s_minmatch, s_targetlength, s_strategy) |
|
|||
59 | def test_estimated_compression_context_size(self, windowlog, chainlog, |
|
|||
60 | hashlog, searchlog, |
|
|||
61 | minmatch, targetlength, |
|
|||
62 | strategy): |
|
|||
63 | if minmatch == zstd.MINMATCH_MIN and strategy in (zstd.STRATEGY_FAST, zstd.STRATEGY_GREEDY): |
|
|||
64 | minmatch += 1 |
|
92 | minmatch += 1 | |
65 | elif minmatch == zstd.MINMATCH_MAX and strategy != zstd.STRATEGY_FAST: |
|
93 | elif minmatch == zstd.MINMATCH_MAX and strategy != zstd.STRATEGY_FAST: | |
66 | minmatch -= 1 |
|
94 | minmatch -= 1 | |
67 |
|
95 | |||
68 |
p = zstd.ZstdCompressionParameters( |
|
96 | p = zstd.ZstdCompressionParameters( | |
69 | chain_log=chainlog, |
|
97 | window_log=windowlog, | |
70 | hash_log=hashlog, |
|
98 | chain_log=chainlog, | |
71 | search_log=searchlog, |
|
99 | hash_log=hashlog, | |
72 | min_match=minmatch, |
|
100 | search_log=searchlog, | |
73 | target_length=targetlength, |
|
101 | min_match=minmatch, | |
74 | strategy=strategy) |
|
102 | target_length=targetlength, | |
|
103 | strategy=strategy, | |||
|
104 | ) | |||
75 | size = p.estimated_compression_context_size() |
|
105 | size = p.estimated_compression_context_size() | |
76 |
|
This diff has been collapsed as it changes many lines, (729 lines changed) Show them Hide them | |||||
@@ -1,1611 +1,1670 b'' | |||||
1 | import io |
|
1 | import io | |
2 | import os |
|
2 | import os | |
3 | import random |
|
3 | import random | |
4 | import struct |
|
4 | import struct | |
5 | import sys |
|
5 | import sys | |
6 | import tempfile |
|
6 | import tempfile | |
7 | import unittest |
|
7 | import unittest | |
8 |
|
8 | |||
9 | import zstandard as zstd |
|
9 | import zstandard as zstd | |
10 |
|
10 | |||
11 | from .common import ( |
|
11 | from .common import ( | |
12 | generate_samples, |
|
12 | generate_samples, | |
13 | make_cffi, |
|
13 | make_cffi, | |
14 | NonClosingBytesIO, |
|
14 | NonClosingBytesIO, | |
15 | OpCountingBytesIO, |
|
15 | OpCountingBytesIO, | |
|
16 | TestCase, | |||
16 | ) |
|
17 | ) | |
17 |
|
18 | |||
18 |
|
19 | |||
19 | if sys.version_info[0] >= 3: |
|
20 | if sys.version_info[0] >= 3: | |
20 | next = lambda it: it.__next__() |
|
21 | next = lambda it: it.__next__() | |
21 | else: |
|
22 | else: | |
22 | next = lambda it: it.next() |
|
23 | next = lambda it: it.next() | |
23 |
|
24 | |||
24 |
|
25 | |||
25 | @make_cffi |
|
26 | @make_cffi | |
26 |
class TestFrameHeaderSize( |
|
27 | class TestFrameHeaderSize(TestCase): | |
27 | def test_empty(self): |
|
28 | def test_empty(self): | |
28 |
with self.assertRaisesRegex |
|
29 | with self.assertRaisesRegex( | |
29 | zstd.ZstdError, 'could not determine frame header size: Src size ' |
|
30 | zstd.ZstdError, | |
30 | 'is incorrect'): |
|
31 | "could not determine frame header size: Src size " "is incorrect", | |
31 | zstd.frame_header_size(b'') |
|
32 | ): | |
|
33 | zstd.frame_header_size(b"") | |||
32 |
|
34 | |||
33 | def test_too_small(self): |
|
35 | def test_too_small(self): | |
34 |
with self.assertRaisesRegex |
|
36 | with self.assertRaisesRegex( | |
35 | zstd.ZstdError, 'could not determine frame header size: Src size ' |
|
37 | zstd.ZstdError, | |
36 | 'is incorrect'): |
|
38 | "could not determine frame header size: Src size " "is incorrect", | |
37 | zstd.frame_header_size(b'foob') |
|
39 | ): | |
|
40 | zstd.frame_header_size(b"foob") | |||
38 |
|
41 | |||
39 | def test_basic(self): |
|
42 | def test_basic(self): | |
40 | # It doesn't matter that it isn't a valid frame. |
|
43 | # It doesn't matter that it isn't a valid frame. | |
41 |
self.assertEqual(zstd.frame_header_size(b |
|
44 | self.assertEqual(zstd.frame_header_size(b"long enough but no magic"), 6) | |
42 |
|
45 | |||
43 |
|
46 | |||
44 | @make_cffi |
|
47 | @make_cffi | |
45 |
class TestFrameContentSize( |
|
48 | class TestFrameContentSize(TestCase): | |
46 | def test_empty(self): |
|
49 | def test_empty(self): | |
47 |
with self.assertRaisesRegex |
|
50 | with self.assertRaisesRegex( | |
48 |
|
|
51 | zstd.ZstdError, "error when determining content size" | |
49 | zstd.frame_content_size(b'') |
|
52 | ): | |
|
53 | zstd.frame_content_size(b"") | |||
50 |
|
54 | |||
51 | def test_too_small(self): |
|
55 | def test_too_small(self): | |
52 |
with self.assertRaisesRegex |
|
56 | with self.assertRaisesRegex( | |
53 |
|
|
57 | zstd.ZstdError, "error when determining content size" | |
54 | zstd.frame_content_size(b'foob') |
|
58 | ): | |
|
59 | zstd.frame_content_size(b"foob") | |||
55 |
|
60 | |||
56 | def test_bad_frame(self): |
|
61 | def test_bad_frame(self): | |
57 |
with self.assertRaisesRegex |
|
62 | with self.assertRaisesRegex( | |
58 |
|
|
63 | zstd.ZstdError, "error when determining content size" | |
59 | zstd.frame_content_size(b'invalid frame header') |
|
64 | ): | |
|
65 | zstd.frame_content_size(b"invalid frame header") | |||
60 |
|
66 | |||
61 | def test_unknown(self): |
|
67 | def test_unknown(self): | |
62 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
68 | cctx = zstd.ZstdCompressor(write_content_size=False) | |
63 |
frame = cctx.compress(b |
|
69 | frame = cctx.compress(b"foobar") | |
64 |
|
70 | |||
65 | self.assertEqual(zstd.frame_content_size(frame), -1) |
|
71 | self.assertEqual(zstd.frame_content_size(frame), -1) | |
66 |
|
72 | |||
67 | def test_empty(self): |
|
73 | def test_empty(self): | |
68 | cctx = zstd.ZstdCompressor() |
|
74 | cctx = zstd.ZstdCompressor() | |
69 |
frame = cctx.compress(b |
|
75 | frame = cctx.compress(b"") | |
70 |
|
76 | |||
71 | self.assertEqual(zstd.frame_content_size(frame), 0) |
|
77 | self.assertEqual(zstd.frame_content_size(frame), 0) | |
72 |
|
78 | |||
73 | def test_basic(self): |
|
79 | def test_basic(self): | |
74 | cctx = zstd.ZstdCompressor() |
|
80 | cctx = zstd.ZstdCompressor() | |
75 |
frame = cctx.compress(b |
|
81 | frame = cctx.compress(b"foobar") | |
76 |
|
82 | |||
77 | self.assertEqual(zstd.frame_content_size(frame), 6) |
|
83 | self.assertEqual(zstd.frame_content_size(frame), 6) | |
78 |
|
84 | |||
79 |
|
85 | |||
80 | @make_cffi |
|
86 | @make_cffi | |
81 |
class TestDecompressor( |
|
87 | class TestDecompressor(TestCase): | |
82 | def test_memory_size(self): |
|
88 | def test_memory_size(self): | |
83 | dctx = zstd.ZstdDecompressor() |
|
89 | dctx = zstd.ZstdDecompressor() | |
84 |
|
90 | |||
85 | self.assertGreater(dctx.memory_size(), 100) |
|
91 | self.assertGreater(dctx.memory_size(), 100) | |
86 |
|
92 | |||
87 |
|
93 | |||
88 | @make_cffi |
|
94 | @make_cffi | |
89 |
class TestDecompressor_decompress( |
|
95 | class TestDecompressor_decompress(TestCase): | |
90 | def test_empty_input(self): |
|
96 | def test_empty_input(self): | |
91 | dctx = zstd.ZstdDecompressor() |
|
97 | dctx = zstd.ZstdDecompressor() | |
92 |
|
98 | |||
93 | with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'): |
|
99 | with self.assertRaisesRegex( | |
94 | dctx.decompress(b'') |
|
100 | zstd.ZstdError, "error determining content size from frame header" | |
|
101 | ): | |||
|
102 | dctx.decompress(b"") | |||
95 |
|
103 | |||
96 | def test_invalid_input(self): |
|
104 | def test_invalid_input(self): | |
97 | dctx = zstd.ZstdDecompressor() |
|
105 | dctx = zstd.ZstdDecompressor() | |
98 |
|
106 | |||
99 | with self.assertRaisesRegexp(zstd.ZstdError, 'error determining content size from frame header'): |
|
107 | with self.assertRaisesRegex( | |
100 | dctx.decompress(b'foobar') |
|
108 | zstd.ZstdError, "error determining content size from frame header" | |
|
109 | ): | |||
|
110 | dctx.decompress(b"foobar") | |||
101 |
|
111 | |||
102 | def test_input_types(self): |
|
112 | def test_input_types(self): | |
103 | cctx = zstd.ZstdCompressor(level=1) |
|
113 | cctx = zstd.ZstdCompressor(level=1) | |
104 |
compressed = cctx.compress(b |
|
114 | compressed = cctx.compress(b"foo") | |
105 |
|
115 | |||
106 | mutable_array = bytearray(len(compressed)) |
|
116 | mutable_array = bytearray(len(compressed)) | |
107 | mutable_array[:] = compressed |
|
117 | mutable_array[:] = compressed | |
108 |
|
118 | |||
109 | sources = [ |
|
119 | sources = [ | |
110 | memoryview(compressed), |
|
120 | memoryview(compressed), | |
111 | bytearray(compressed), |
|
121 | bytearray(compressed), | |
112 | mutable_array, |
|
122 | mutable_array, | |
113 | ] |
|
123 | ] | |
114 |
|
124 | |||
115 | dctx = zstd.ZstdDecompressor() |
|
125 | dctx = zstd.ZstdDecompressor() | |
116 | for source in sources: |
|
126 | for source in sources: | |
117 |
self.assertEqual(dctx.decompress(source), b |
|
127 | self.assertEqual(dctx.decompress(source), b"foo") | |
118 |
|
128 | |||
119 | def test_no_content_size_in_frame(self): |
|
129 | def test_no_content_size_in_frame(self): | |
120 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
130 | cctx = zstd.ZstdCompressor(write_content_size=False) | |
121 |
compressed = cctx.compress(b |
|
131 | compressed = cctx.compress(b"foobar") | |
122 |
|
132 | |||
123 | dctx = zstd.ZstdDecompressor() |
|
133 | dctx = zstd.ZstdDecompressor() | |
124 | with self.assertRaisesRegexp(zstd.ZstdError, 'could not determine content size in frame header'): |
|
134 | with self.assertRaisesRegex( | |
|
135 | zstd.ZstdError, "could not determine content size in frame header" | |||
|
136 | ): | |||
125 | dctx.decompress(compressed) |
|
137 | dctx.decompress(compressed) | |
126 |
|
138 | |||
127 | def test_content_size_present(self): |
|
139 | def test_content_size_present(self): | |
128 | cctx = zstd.ZstdCompressor() |
|
140 | cctx = zstd.ZstdCompressor() | |
129 |
compressed = cctx.compress(b |
|
141 | compressed = cctx.compress(b"foobar") | |
130 |
|
142 | |||
131 | dctx = zstd.ZstdDecompressor() |
|
143 | dctx = zstd.ZstdDecompressor() | |
132 | decompressed = dctx.decompress(compressed) |
|
144 | decompressed = dctx.decompress(compressed) | |
133 |
self.assertEqual(decompressed, b |
|
145 | self.assertEqual(decompressed, b"foobar") | |
134 |
|
146 | |||
135 | def test_empty_roundtrip(self): |
|
147 | def test_empty_roundtrip(self): | |
136 | cctx = zstd.ZstdCompressor() |
|
148 | cctx = zstd.ZstdCompressor() | |
137 |
compressed = cctx.compress(b |
|
149 | compressed = cctx.compress(b"") | |
138 |
|
150 | |||
139 | dctx = zstd.ZstdDecompressor() |
|
151 | dctx = zstd.ZstdDecompressor() | |
140 | decompressed = dctx.decompress(compressed) |
|
152 | decompressed = dctx.decompress(compressed) | |
141 |
|
153 | |||
142 |
self.assertEqual(decompressed, b |
|
154 | self.assertEqual(decompressed, b"") | |
143 |
|
155 | |||
144 | def test_max_output_size(self): |
|
156 | def test_max_output_size(self): | |
145 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
157 | cctx = zstd.ZstdCompressor(write_content_size=False) | |
146 |
source = b |
|
158 | source = b"foobar" * 256 | |
147 | compressed = cctx.compress(source) |
|
159 | compressed = cctx.compress(source) | |
148 |
|
160 | |||
149 | dctx = zstd.ZstdDecompressor() |
|
161 | dctx = zstd.ZstdDecompressor() | |
150 | # Will fit into buffer exactly the size of input. |
|
162 | # Will fit into buffer exactly the size of input. | |
151 | decompressed = dctx.decompress(compressed, max_output_size=len(source)) |
|
163 | decompressed = dctx.decompress(compressed, max_output_size=len(source)) | |
152 | self.assertEqual(decompressed, source) |
|
164 | self.assertEqual(decompressed, source) | |
153 |
|
165 | |||
154 | # Input size - 1 fails |
|
166 | # Input size - 1 fails | |
155 |
with self.assertRaisesRegex |
|
167 | with self.assertRaisesRegex( | |
156 |
|
|
168 | zstd.ZstdError, "decompression error: did not decompress full frame" | |
|
169 | ): | |||
157 | dctx.decompress(compressed, max_output_size=len(source) - 1) |
|
170 | dctx.decompress(compressed, max_output_size=len(source) - 1) | |
158 |
|
171 | |||
159 | # Input size + 1 works |
|
172 | # Input size + 1 works | |
160 | decompressed = dctx.decompress(compressed, max_output_size=len(source) + 1) |
|
173 | decompressed = dctx.decompress(compressed, max_output_size=len(source) + 1) | |
161 | self.assertEqual(decompressed, source) |
|
174 | self.assertEqual(decompressed, source) | |
162 |
|
175 | |||
163 | # A much larger buffer works. |
|
176 | # A much larger buffer works. | |
164 | decompressed = dctx.decompress(compressed, max_output_size=len(source) * 64) |
|
177 | decompressed = dctx.decompress(compressed, max_output_size=len(source) * 64) | |
165 | self.assertEqual(decompressed, source) |
|
178 | self.assertEqual(decompressed, source) | |
166 |
|
179 | |||
167 | def test_stupidly_large_output_buffer(self): |
|
180 | def test_stupidly_large_output_buffer(self): | |
168 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
181 | cctx = zstd.ZstdCompressor(write_content_size=False) | |
169 |
compressed = cctx.compress(b |
|
182 | compressed = cctx.compress(b"foobar" * 256) | |
170 | dctx = zstd.ZstdDecompressor() |
|
183 | dctx = zstd.ZstdDecompressor() | |
171 |
|
184 | |||
172 | # Will get OverflowError on some Python distributions that can't |
|
185 | # Will get OverflowError on some Python distributions that can't | |
173 | # handle really large integers. |
|
186 | # handle really large integers. | |
174 | with self.assertRaises((MemoryError, OverflowError)): |
|
187 | with self.assertRaises((MemoryError, OverflowError)): | |
175 | dctx.decompress(compressed, max_output_size=2**62) |
|
188 | dctx.decompress(compressed, max_output_size=2 ** 62) | |
176 |
|
189 | |||
177 | def test_dictionary(self): |
|
190 | def test_dictionary(self): | |
178 | samples = [] |
|
191 | samples = [] | |
179 | for i in range(128): |
|
192 | for i in range(128): | |
180 |
samples.append(b |
|
193 | samples.append(b"foo" * 64) | |
181 |
samples.append(b |
|
194 | samples.append(b"bar" * 64) | |
182 |
samples.append(b |
|
195 | samples.append(b"foobar" * 64) | |
183 |
|
196 | |||
184 | d = zstd.train_dictionary(8192, samples) |
|
197 | d = zstd.train_dictionary(8192, samples) | |
185 |
|
198 | |||
186 |
orig = b |
|
199 | orig = b"foobar" * 16384 | |
187 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
200 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) | |
188 | compressed = cctx.compress(orig) |
|
201 | compressed = cctx.compress(orig) | |
189 |
|
202 | |||
190 | dctx = zstd.ZstdDecompressor(dict_data=d) |
|
203 | dctx = zstd.ZstdDecompressor(dict_data=d) | |
191 | decompressed = dctx.decompress(compressed) |
|
204 | decompressed = dctx.decompress(compressed) | |
192 |
|
205 | |||
193 | self.assertEqual(decompressed, orig) |
|
206 | self.assertEqual(decompressed, orig) | |
194 |
|
207 | |||
195 | def test_dictionary_multiple(self): |
|
208 | def test_dictionary_multiple(self): | |
196 | samples = [] |
|
209 | samples = [] | |
197 | for i in range(128): |
|
210 | for i in range(128): | |
198 |
samples.append(b |
|
211 | samples.append(b"foo" * 64) | |
199 |
samples.append(b |
|
212 | samples.append(b"bar" * 64) | |
200 |
samples.append(b |
|
213 | samples.append(b"foobar" * 64) | |
201 |
|
214 | |||
202 | d = zstd.train_dictionary(8192, samples) |
|
215 | d = zstd.train_dictionary(8192, samples) | |
203 |
|
216 | |||
204 |
sources = (b |
|
217 | sources = (b"foobar" * 8192, b"foo" * 8192, b"bar" * 8192) | |
205 | compressed = [] |
|
218 | compressed = [] | |
206 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) |
|
219 | cctx = zstd.ZstdCompressor(level=1, dict_data=d) | |
207 | for source in sources: |
|
220 | for source in sources: | |
208 | compressed.append(cctx.compress(source)) |
|
221 | compressed.append(cctx.compress(source)) | |
209 |
|
222 | |||
210 | dctx = zstd.ZstdDecompressor(dict_data=d) |
|
223 | dctx = zstd.ZstdDecompressor(dict_data=d) | |
211 | for i in range(len(sources)): |
|
224 | for i in range(len(sources)): | |
212 | decompressed = dctx.decompress(compressed[i]) |
|
225 | decompressed = dctx.decompress(compressed[i]) | |
213 | self.assertEqual(decompressed, sources[i]) |
|
226 | self.assertEqual(decompressed, sources[i]) | |
214 |
|
227 | |||
215 | def test_max_window_size(self): |
|
228 | def test_max_window_size(self): | |
216 |
with open(__file__, |
|
229 | with open(__file__, "rb") as fh: | |
217 | source = fh.read() |
|
230 | source = fh.read() | |
218 |
|
231 | |||
219 | # If we write a content size, the decompressor engages single pass |
|
232 | # If we write a content size, the decompressor engages single pass | |
220 | # mode and the window size doesn't come into play. |
|
233 | # mode and the window size doesn't come into play. | |
221 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
234 | cctx = zstd.ZstdCompressor(write_content_size=False) | |
222 | frame = cctx.compress(source) |
|
235 | frame = cctx.compress(source) | |
223 |
|
236 | |||
224 | dctx = zstd.ZstdDecompressor(max_window_size=2**zstd.WINDOWLOG_MIN) |
|
237 | dctx = zstd.ZstdDecompressor(max_window_size=2 ** zstd.WINDOWLOG_MIN) | |
225 |
|
238 | |||
226 |
with self.assertRaisesRegex |
|
239 | with self.assertRaisesRegex( | |
227 |
zstd.ZstdError, |
|
240 | zstd.ZstdError, "decompression error: Frame requires too much memory" | |
|
241 | ): | |||
228 | dctx.decompress(frame, max_output_size=len(source)) |
|
242 | dctx.decompress(frame, max_output_size=len(source)) | |
229 |
|
243 | |||
230 |
|
244 | |||
231 | @make_cffi |
|
245 | @make_cffi | |
232 |
class TestDecompressor_copy_stream( |
|
246 | class TestDecompressor_copy_stream(TestCase): | |
233 | def test_no_read(self): |
|
247 | def test_no_read(self): | |
234 | source = object() |
|
248 | source = object() | |
235 | dest = io.BytesIO() |
|
249 | dest = io.BytesIO() | |
236 |
|
250 | |||
237 | dctx = zstd.ZstdDecompressor() |
|
251 | dctx = zstd.ZstdDecompressor() | |
238 | with self.assertRaises(ValueError): |
|
252 | with self.assertRaises(ValueError): | |
239 | dctx.copy_stream(source, dest) |
|
253 | dctx.copy_stream(source, dest) | |
240 |
|
254 | |||
241 | def test_no_write(self): |
|
255 | def test_no_write(self): | |
242 | source = io.BytesIO() |
|
256 | source = io.BytesIO() | |
243 | dest = object() |
|
257 | dest = object() | |
244 |
|
258 | |||
245 | dctx = zstd.ZstdDecompressor() |
|
259 | dctx = zstd.ZstdDecompressor() | |
246 | with self.assertRaises(ValueError): |
|
260 | with self.assertRaises(ValueError): | |
247 | dctx.copy_stream(source, dest) |
|
261 | dctx.copy_stream(source, dest) | |
248 |
|
262 | |||
249 | def test_empty(self): |
|
263 | def test_empty(self): | |
250 | source = io.BytesIO() |
|
264 | source = io.BytesIO() | |
251 | dest = io.BytesIO() |
|
265 | dest = io.BytesIO() | |
252 |
|
266 | |||
253 | dctx = zstd.ZstdDecompressor() |
|
267 | dctx = zstd.ZstdDecompressor() | |
254 | # TODO should this raise an error? |
|
268 | # TODO should this raise an error? | |
255 | r, w = dctx.copy_stream(source, dest) |
|
269 | r, w = dctx.copy_stream(source, dest) | |
256 |
|
270 | |||
257 | self.assertEqual(r, 0) |
|
271 | self.assertEqual(r, 0) | |
258 | self.assertEqual(w, 0) |
|
272 | self.assertEqual(w, 0) | |
259 |
self.assertEqual(dest.getvalue(), b |
|
273 | self.assertEqual(dest.getvalue(), b"") | |
260 |
|
274 | |||
261 | def test_large_data(self): |
|
275 | def test_large_data(self): | |
262 | source = io.BytesIO() |
|
276 | source = io.BytesIO() | |
263 | for i in range(255): |
|
277 | for i in range(255): | |
264 |
source.write(struct.Struct( |
|
278 | source.write(struct.Struct(">B").pack(i) * 16384) | |
265 | source.seek(0) |
|
279 | source.seek(0) | |
266 |
|
280 | |||
267 | compressed = io.BytesIO() |
|
281 | compressed = io.BytesIO() | |
268 | cctx = zstd.ZstdCompressor() |
|
282 | cctx = zstd.ZstdCompressor() | |
269 | cctx.copy_stream(source, compressed) |
|
283 | cctx.copy_stream(source, compressed) | |
270 |
|
284 | |||
271 | compressed.seek(0) |
|
285 | compressed.seek(0) | |
272 | dest = io.BytesIO() |
|
286 | dest = io.BytesIO() | |
273 | dctx = zstd.ZstdDecompressor() |
|
287 | dctx = zstd.ZstdDecompressor() | |
274 | r, w = dctx.copy_stream(compressed, dest) |
|
288 | r, w = dctx.copy_stream(compressed, dest) | |
275 |
|
289 | |||
276 | self.assertEqual(r, len(compressed.getvalue())) |
|
290 | self.assertEqual(r, len(compressed.getvalue())) | |
277 | self.assertEqual(w, len(source.getvalue())) |
|
291 | self.assertEqual(w, len(source.getvalue())) | |
278 |
|
292 | |||
279 | def test_read_write_size(self): |
|
293 | def test_read_write_size(self): | |
280 | source = OpCountingBytesIO(zstd.ZstdCompressor().compress( |
|
294 | source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b"foobarfoobar")) | |
281 | b'foobarfoobar')) |
|
|||
282 |
|
295 | |||
283 | dest = OpCountingBytesIO() |
|
296 | dest = OpCountingBytesIO() | |
284 | dctx = zstd.ZstdDecompressor() |
|
297 | dctx = zstd.ZstdDecompressor() | |
285 | r, w = dctx.copy_stream(source, dest, read_size=1, write_size=1) |
|
298 | r, w = dctx.copy_stream(source, dest, read_size=1, write_size=1) | |
286 |
|
299 | |||
287 | self.assertEqual(r, len(source.getvalue())) |
|
300 | self.assertEqual(r, len(source.getvalue())) | |
288 |
self.assertEqual(w, len(b |
|
301 | self.assertEqual(w, len(b"foobarfoobar")) | |
289 | self.assertEqual(source._read_count, len(source.getvalue()) + 1) |
|
302 | self.assertEqual(source._read_count, len(source.getvalue()) + 1) | |
290 | self.assertEqual(dest._write_count, len(dest.getvalue())) |
|
303 | self.assertEqual(dest._write_count, len(dest.getvalue())) | |
291 |
|
304 | |||
292 |
|
305 | |||
293 | @make_cffi |
|
306 | @make_cffi | |
294 |
class TestDecompressor_stream_reader( |
|
307 | class TestDecompressor_stream_reader(TestCase): | |
295 | def test_context_manager(self): |
|
308 | def test_context_manager(self): | |
296 | dctx = zstd.ZstdDecompressor() |
|
309 | dctx = zstd.ZstdDecompressor() | |
297 |
|
310 | |||
298 |
with dctx.stream_reader(b |
|
311 | with dctx.stream_reader(b"foo") as reader: | |
299 |
with self.assertRaisesRegex |
|
312 | with self.assertRaisesRegex(ValueError, "cannot __enter__ multiple times"): | |
300 | with reader as reader2: |
|
313 | with reader as reader2: | |
301 | pass |
|
314 | pass | |
302 |
|
315 | |||
303 | def test_not_implemented(self): |
|
316 | def test_not_implemented(self): | |
304 | dctx = zstd.ZstdDecompressor() |
|
317 | dctx = zstd.ZstdDecompressor() | |
305 |
|
318 | |||
306 |
with dctx.stream_reader(b |
|
319 | with dctx.stream_reader(b"foo") as reader: | |
307 | with self.assertRaises(io.UnsupportedOperation): |
|
320 | with self.assertRaises(io.UnsupportedOperation): | |
308 | reader.readline() |
|
321 | reader.readline() | |
309 |
|
322 | |||
310 | with self.assertRaises(io.UnsupportedOperation): |
|
323 | with self.assertRaises(io.UnsupportedOperation): | |
311 | reader.readlines() |
|
324 | reader.readlines() | |
312 |
|
325 | |||
313 | with self.assertRaises(io.UnsupportedOperation): |
|
326 | with self.assertRaises(io.UnsupportedOperation): | |
314 | iter(reader) |
|
327 | iter(reader) | |
315 |
|
328 | |||
316 | with self.assertRaises(io.UnsupportedOperation): |
|
329 | with self.assertRaises(io.UnsupportedOperation): | |
317 | next(reader) |
|
330 | next(reader) | |
318 |
|
331 | |||
319 | with self.assertRaises(io.UnsupportedOperation): |
|
332 | with self.assertRaises(io.UnsupportedOperation): | |
320 |
reader.write(b |
|
333 | reader.write(b"foo") | |
321 |
|
334 | |||
322 | with self.assertRaises(io.UnsupportedOperation): |
|
335 | with self.assertRaises(io.UnsupportedOperation): | |
323 | reader.writelines([]) |
|
336 | reader.writelines([]) | |
324 |
|
337 | |||
325 | def test_constant_methods(self): |
|
338 | def test_constant_methods(self): | |
326 | dctx = zstd.ZstdDecompressor() |
|
339 | dctx = zstd.ZstdDecompressor() | |
327 |
|
340 | |||
328 |
with dctx.stream_reader(b |
|
341 | with dctx.stream_reader(b"foo") as reader: | |
329 | self.assertFalse(reader.closed) |
|
342 | self.assertFalse(reader.closed) | |
330 | self.assertTrue(reader.readable()) |
|
343 | self.assertTrue(reader.readable()) | |
331 | self.assertFalse(reader.writable()) |
|
344 | self.assertFalse(reader.writable()) | |
332 | self.assertTrue(reader.seekable()) |
|
345 | self.assertTrue(reader.seekable()) | |
333 | self.assertFalse(reader.isatty()) |
|
346 | self.assertFalse(reader.isatty()) | |
334 | self.assertFalse(reader.closed) |
|
347 | self.assertFalse(reader.closed) | |
335 | self.assertIsNone(reader.flush()) |
|
348 | self.assertIsNone(reader.flush()) | |
336 | self.assertFalse(reader.closed) |
|
349 | self.assertFalse(reader.closed) | |
337 |
|
350 | |||
338 | self.assertTrue(reader.closed) |
|
351 | self.assertTrue(reader.closed) | |
339 |
|
352 | |||
340 | def test_read_closed(self): |
|
353 | def test_read_closed(self): | |
341 | dctx = zstd.ZstdDecompressor() |
|
354 | dctx = zstd.ZstdDecompressor() | |
342 |
|
355 | |||
343 |
with dctx.stream_reader(b |
|
356 | with dctx.stream_reader(b"foo") as reader: | |
344 | reader.close() |
|
357 | reader.close() | |
345 | self.assertTrue(reader.closed) |
|
358 | self.assertTrue(reader.closed) | |
346 |
with self.assertRaisesRegex |
|
359 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
347 | reader.read(1) |
|
360 | reader.read(1) | |
348 |
|
361 | |||
349 | def test_read_sizes(self): |
|
362 | def test_read_sizes(self): | |
350 | cctx = zstd.ZstdCompressor() |
|
363 | cctx = zstd.ZstdCompressor() | |
351 |
foo = cctx.compress(b |
|
364 | foo = cctx.compress(b"foo") | |
352 |
|
365 | |||
353 | dctx = zstd.ZstdDecompressor() |
|
366 | dctx = zstd.ZstdDecompressor() | |
354 |
|
367 | |||
355 | with dctx.stream_reader(foo) as reader: |
|
368 | with dctx.stream_reader(foo) as reader: | |
356 | with self.assertRaisesRegexp(ValueError, 'cannot read negative amounts less than -1'): |
|
369 | with self.assertRaisesRegex( | |
|
370 | ValueError, "cannot read negative amounts less than -1" | |||
|
371 | ): | |||
357 | reader.read(-2) |
|
372 | reader.read(-2) | |
358 |
|
373 | |||
359 |
self.assertEqual(reader.read(0), b |
|
374 | self.assertEqual(reader.read(0), b"") | |
360 |
self.assertEqual(reader.read(), b |
|
375 | self.assertEqual(reader.read(), b"foo") | |
361 |
|
376 | |||
362 | def test_read_buffer(self): |
|
377 | def test_read_buffer(self): | |
363 | cctx = zstd.ZstdCompressor() |
|
378 | cctx = zstd.ZstdCompressor() | |
364 |
|
379 | |||
365 |
source = b |
|
380 | source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) | |
366 | frame = cctx.compress(source) |
|
381 | frame = cctx.compress(source) | |
367 |
|
382 | |||
368 | dctx = zstd.ZstdDecompressor() |
|
383 | dctx = zstd.ZstdDecompressor() | |
369 |
|
384 | |||
370 | with dctx.stream_reader(frame) as reader: |
|
385 | with dctx.stream_reader(frame) as reader: | |
371 | self.assertEqual(reader.tell(), 0) |
|
386 | self.assertEqual(reader.tell(), 0) | |
372 |
|
387 | |||
373 | # We should get entire frame in one read. |
|
388 | # We should get entire frame in one read. | |
374 | result = reader.read(8192) |
|
389 | result = reader.read(8192) | |
375 | self.assertEqual(result, source) |
|
390 | self.assertEqual(result, source) | |
376 | self.assertEqual(reader.tell(), len(source)) |
|
391 | self.assertEqual(reader.tell(), len(source)) | |
377 |
|
392 | |||
378 | # Read after EOF should return empty bytes. |
|
393 | # Read after EOF should return empty bytes. | |
379 |
self.assertEqual(reader.read(1), b |
|
394 | self.assertEqual(reader.read(1), b"") | |
380 | self.assertEqual(reader.tell(), len(result)) |
|
395 | self.assertEqual(reader.tell(), len(result)) | |
381 |
|
396 | |||
382 | self.assertTrue(reader.closed) |
|
397 | self.assertTrue(reader.closed) | |
383 |
|
398 | |||
384 | def test_read_buffer_small_chunks(self): |
|
399 | def test_read_buffer_small_chunks(self): | |
385 | cctx = zstd.ZstdCompressor() |
|
400 | cctx = zstd.ZstdCompressor() | |
386 |
source = b |
|
401 | source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) | |
387 | frame = cctx.compress(source) |
|
402 | frame = cctx.compress(source) | |
388 |
|
403 | |||
389 | dctx = zstd.ZstdDecompressor() |
|
404 | dctx = zstd.ZstdDecompressor() | |
390 | chunks = [] |
|
405 | chunks = [] | |
391 |
|
406 | |||
392 | with dctx.stream_reader(frame, read_size=1) as reader: |
|
407 | with dctx.stream_reader(frame, read_size=1) as reader: | |
393 | while True: |
|
408 | while True: | |
394 | chunk = reader.read(1) |
|
409 | chunk = reader.read(1) | |
395 | if not chunk: |
|
410 | if not chunk: | |
396 | break |
|
411 | break | |
397 |
|
412 | |||
398 | chunks.append(chunk) |
|
413 | chunks.append(chunk) | |
399 | self.assertEqual(reader.tell(), sum(map(len, chunks))) |
|
414 | self.assertEqual(reader.tell(), sum(map(len, chunks))) | |
400 |
|
415 | |||
401 |
self.assertEqual(b |
|
416 | self.assertEqual(b"".join(chunks), source) | |
402 |
|
417 | |||
403 | def test_read_stream(self): |
|
418 | def test_read_stream(self): | |
404 | cctx = zstd.ZstdCompressor() |
|
419 | cctx = zstd.ZstdCompressor() | |
405 |
source = b |
|
420 | source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) | |
406 | frame = cctx.compress(source) |
|
421 | frame = cctx.compress(source) | |
407 |
|
422 | |||
408 | dctx = zstd.ZstdDecompressor() |
|
423 | dctx = zstd.ZstdDecompressor() | |
409 | with dctx.stream_reader(io.BytesIO(frame)) as reader: |
|
424 | with dctx.stream_reader(io.BytesIO(frame)) as reader: | |
410 | self.assertEqual(reader.tell(), 0) |
|
425 | self.assertEqual(reader.tell(), 0) | |
411 |
|
426 | |||
412 | chunk = reader.read(8192) |
|
427 | chunk = reader.read(8192) | |
413 | self.assertEqual(chunk, source) |
|
428 | self.assertEqual(chunk, source) | |
414 | self.assertEqual(reader.tell(), len(source)) |
|
429 | self.assertEqual(reader.tell(), len(source)) | |
415 |
self.assertEqual(reader.read(1), b |
|
430 | self.assertEqual(reader.read(1), b"") | |
416 | self.assertEqual(reader.tell(), len(source)) |
|
431 | self.assertEqual(reader.tell(), len(source)) | |
417 | self.assertFalse(reader.closed) |
|
432 | self.assertFalse(reader.closed) | |
418 |
|
433 | |||
419 | self.assertTrue(reader.closed) |
|
434 | self.assertTrue(reader.closed) | |
420 |
|
435 | |||
421 | def test_read_stream_small_chunks(self): |
|
436 | def test_read_stream_small_chunks(self): | |
422 | cctx = zstd.ZstdCompressor() |
|
437 | cctx = zstd.ZstdCompressor() | |
423 |
source = b |
|
438 | source = b"".join([b"foo" * 60, b"bar" * 60, b"baz" * 60]) | |
424 | frame = cctx.compress(source) |
|
439 | frame = cctx.compress(source) | |
425 |
|
440 | |||
426 | dctx = zstd.ZstdDecompressor() |
|
441 | dctx = zstd.ZstdDecompressor() | |
427 | chunks = [] |
|
442 | chunks = [] | |
428 |
|
443 | |||
429 | with dctx.stream_reader(io.BytesIO(frame), read_size=1) as reader: |
|
444 | with dctx.stream_reader(io.BytesIO(frame), read_size=1) as reader: | |
430 | while True: |
|
445 | while True: | |
431 | chunk = reader.read(1) |
|
446 | chunk = reader.read(1) | |
432 | if not chunk: |
|
447 | if not chunk: | |
433 | break |
|
448 | break | |
434 |
|
449 | |||
435 | chunks.append(chunk) |
|
450 | chunks.append(chunk) | |
436 | self.assertEqual(reader.tell(), sum(map(len, chunks))) |
|
451 | self.assertEqual(reader.tell(), sum(map(len, chunks))) | |
437 |
|
452 | |||
438 |
self.assertEqual(b |
|
453 | self.assertEqual(b"".join(chunks), source) | |
439 |
|
454 | |||
440 | def test_read_after_exit(self): |
|
455 | def test_read_after_exit(self): | |
441 | cctx = zstd.ZstdCompressor() |
|
456 | cctx = zstd.ZstdCompressor() | |
442 |
frame = cctx.compress(b |
|
457 | frame = cctx.compress(b"foo" * 60) | |
443 |
|
458 | |||
444 | dctx = zstd.ZstdDecompressor() |
|
459 | dctx = zstd.ZstdDecompressor() | |
445 |
|
460 | |||
446 | with dctx.stream_reader(frame) as reader: |
|
461 | with dctx.stream_reader(frame) as reader: | |
447 | while reader.read(16): |
|
462 | while reader.read(16): | |
448 | pass |
|
463 | pass | |
449 |
|
464 | |||
450 | self.assertTrue(reader.closed) |
|
465 | self.assertTrue(reader.closed) | |
451 |
|
466 | |||
452 |
with self.assertRaisesRegex |
|
467 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
453 | reader.read(10) |
|
468 | reader.read(10) | |
454 |
|
469 | |||
455 | def test_illegal_seeks(self): |
|
470 | def test_illegal_seeks(self): | |
456 | cctx = zstd.ZstdCompressor() |
|
471 | cctx = zstd.ZstdCompressor() | |
457 |
frame = cctx.compress(b |
|
472 | frame = cctx.compress(b"foo" * 60) | |
458 |
|
473 | |||
459 | dctx = zstd.ZstdDecompressor() |
|
474 | dctx = zstd.ZstdDecompressor() | |
460 |
|
475 | |||
461 | with dctx.stream_reader(frame) as reader: |
|
476 | with dctx.stream_reader(frame) as reader: | |
462 |
with self.assertRaisesRegex |
|
477 | with self.assertRaisesRegex(ValueError, "cannot seek to negative position"): | |
463 | 'cannot seek to negative position'): |
|
|||
464 | reader.seek(-1, os.SEEK_SET) |
|
478 | reader.seek(-1, os.SEEK_SET) | |
465 |
|
479 | |||
466 | reader.read(1) |
|
480 | reader.read(1) | |
467 |
|
481 | |||
468 |
with self.assertRaisesRegex |
|
482 | with self.assertRaisesRegex( | |
469 |
ValueError, |
|
483 | ValueError, "cannot seek zstd decompression stream backwards" | |
|
484 | ): | |||
470 | reader.seek(0, os.SEEK_SET) |
|
485 | reader.seek(0, os.SEEK_SET) | |
471 |
|
486 | |||
472 |
with self.assertRaisesRegex |
|
487 | with self.assertRaisesRegex( | |
473 |
ValueError, |
|
488 | ValueError, "cannot seek zstd decompression stream backwards" | |
|
489 | ): | |||
474 | reader.seek(-1, os.SEEK_CUR) |
|
490 | reader.seek(-1, os.SEEK_CUR) | |
475 |
|
491 | |||
476 |
with self.assertRaisesRegex |
|
492 | with self.assertRaisesRegex( | |
477 | ValueError, |
|
493 | ValueError, "zstd decompression streams cannot be seeked with SEEK_END" | |
478 | 'zstd decompression streams cannot be seeked with SEEK_END'): |
|
494 | ): | |
479 | reader.seek(0, os.SEEK_END) |
|
495 | reader.seek(0, os.SEEK_END) | |
480 |
|
496 | |||
481 | reader.close() |
|
497 | reader.close() | |
482 |
|
498 | |||
483 |
with self.assertRaisesRegex |
|
499 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
484 | reader.seek(4, os.SEEK_SET) |
|
500 | reader.seek(4, os.SEEK_SET) | |
485 |
|
501 | |||
486 |
with self.assertRaisesRegex |
|
502 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
487 | reader.seek(0) |
|
503 | reader.seek(0) | |
488 |
|
504 | |||
489 | def test_seek(self): |
|
505 | def test_seek(self): | |
490 |
source = b |
|
506 | source = b"foobar" * 60 | |
491 | cctx = zstd.ZstdCompressor() |
|
507 | cctx = zstd.ZstdCompressor() | |
492 | frame = cctx.compress(source) |
|
508 | frame = cctx.compress(source) | |
493 |
|
509 | |||
494 | dctx = zstd.ZstdDecompressor() |
|
510 | dctx = zstd.ZstdDecompressor() | |
495 |
|
511 | |||
496 | with dctx.stream_reader(frame) as reader: |
|
512 | with dctx.stream_reader(frame) as reader: | |
497 | reader.seek(3) |
|
513 | reader.seek(3) | |
498 |
self.assertEqual(reader.read(3), b |
|
514 | self.assertEqual(reader.read(3), b"bar") | |
499 |
|
515 | |||
500 | reader.seek(4, os.SEEK_CUR) |
|
516 | reader.seek(4, os.SEEK_CUR) | |
501 |
self.assertEqual(reader.read(2), b |
|
517 | self.assertEqual(reader.read(2), b"ar") | |
502 |
|
518 | |||
503 | def test_no_context_manager(self): |
|
519 | def test_no_context_manager(self): | |
504 |
source = b |
|
520 | source = b"foobar" * 60 | |
505 | cctx = zstd.ZstdCompressor() |
|
521 | cctx = zstd.ZstdCompressor() | |
506 | frame = cctx.compress(source) |
|
522 | frame = cctx.compress(source) | |
507 |
|
523 | |||
508 | dctx = zstd.ZstdDecompressor() |
|
524 | dctx = zstd.ZstdDecompressor() | |
509 | reader = dctx.stream_reader(frame) |
|
525 | reader = dctx.stream_reader(frame) | |
510 |
|
526 | |||
511 |
self.assertEqual(reader.read(6), b |
|
527 | self.assertEqual(reader.read(6), b"foobar") | |
512 |
self.assertEqual(reader.read(18), b |
|
528 | self.assertEqual(reader.read(18), b"foobar" * 3) | |
513 | self.assertFalse(reader.closed) |
|
529 | self.assertFalse(reader.closed) | |
514 |
|
530 | |||
515 | # Calling close prevents subsequent use. |
|
531 | # Calling close prevents subsequent use. | |
516 | reader.close() |
|
532 | reader.close() | |
517 | self.assertTrue(reader.closed) |
|
533 | self.assertTrue(reader.closed) | |
518 |
|
534 | |||
519 |
with self.assertRaisesRegex |
|
535 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
520 | reader.read(6) |
|
536 | reader.read(6) | |
521 |
|
537 | |||
522 | def test_read_after_error(self): |
|
538 | def test_read_after_error(self): | |
523 |
source = io.BytesIO(b |
|
539 | source = io.BytesIO(b"") | |
524 | dctx = zstd.ZstdDecompressor() |
|
540 | dctx = zstd.ZstdDecompressor() | |
525 |
|
541 | |||
526 | reader = dctx.stream_reader(source) |
|
542 | reader = dctx.stream_reader(source) | |
527 |
|
543 | |||
528 | with reader: |
|
544 | with reader: | |
529 | reader.read(0) |
|
545 | reader.read(0) | |
530 |
|
546 | |||
531 | with reader: |
|
547 | with reader: | |
532 |
with self.assertRaisesRegex |
|
548 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
533 | reader.read(100) |
|
549 | reader.read(100) | |
534 |
|
550 | |||
535 | def test_partial_read(self): |
|
551 | def test_partial_read(self): | |
536 | # Inspired by https://github.com/indygreg/python-zstandard/issues/71. |
|
552 | # Inspired by https://github.com/indygreg/python-zstandard/issues/71. | |
537 | buffer = io.BytesIO() |
|
553 | buffer = io.BytesIO() | |
538 | cctx = zstd.ZstdCompressor() |
|
554 | cctx = zstd.ZstdCompressor() | |
539 | writer = cctx.stream_writer(buffer) |
|
555 | writer = cctx.stream_writer(buffer) | |
540 | writer.write(bytearray(os.urandom(1000000))) |
|
556 | writer.write(bytearray(os.urandom(1000000))) | |
541 | writer.flush(zstd.FLUSH_FRAME) |
|
557 | writer.flush(zstd.FLUSH_FRAME) | |
542 | buffer.seek(0) |
|
558 | buffer.seek(0) | |
543 |
|
559 | |||
544 | dctx = zstd.ZstdDecompressor() |
|
560 | dctx = zstd.ZstdDecompressor() | |
545 | reader = dctx.stream_reader(buffer) |
|
561 | reader = dctx.stream_reader(buffer) | |
546 |
|
562 | |||
547 | while True: |
|
563 | while True: | |
548 | chunk = reader.read(8192) |
|
564 | chunk = reader.read(8192) | |
549 | if not chunk: |
|
565 | if not chunk: | |
550 | break |
|
566 | break | |
551 |
|
567 | |||
552 | def test_read_multiple_frames(self): |
|
568 | def test_read_multiple_frames(self): | |
553 | cctx = zstd.ZstdCompressor() |
|
569 | cctx = zstd.ZstdCompressor() | |
554 | source = io.BytesIO() |
|
570 | source = io.BytesIO() | |
555 | writer = cctx.stream_writer(source) |
|
571 | writer = cctx.stream_writer(source) | |
556 |
writer.write(b |
|
572 | writer.write(b"foo") | |
557 | writer.flush(zstd.FLUSH_FRAME) |
|
573 | writer.flush(zstd.FLUSH_FRAME) | |
558 |
writer.write(b |
|
574 | writer.write(b"bar") | |
559 | writer.flush(zstd.FLUSH_FRAME) |
|
575 | writer.flush(zstd.FLUSH_FRAME) | |
560 |
|
576 | |||
561 | dctx = zstd.ZstdDecompressor() |
|
577 | dctx = zstd.ZstdDecompressor() | |
562 |
|
578 | |||
563 | reader = dctx.stream_reader(source.getvalue()) |
|
579 | reader = dctx.stream_reader(source.getvalue()) | |
564 |
self.assertEqual(reader.read(2), b |
|
580 | self.assertEqual(reader.read(2), b"fo") | |
565 |
self.assertEqual(reader.read(2), b |
|
581 | self.assertEqual(reader.read(2), b"o") | |
566 |
self.assertEqual(reader.read(2), b |
|
582 | self.assertEqual(reader.read(2), b"ba") | |
567 |
self.assertEqual(reader.read(2), b |
|
583 | self.assertEqual(reader.read(2), b"r") | |
568 |
|
584 | |||
569 | source.seek(0) |
|
585 | source.seek(0) | |
570 | reader = dctx.stream_reader(source) |
|
586 | reader = dctx.stream_reader(source) | |
571 |
self.assertEqual(reader.read(2), b |
|
587 | self.assertEqual(reader.read(2), b"fo") | |
572 |
self.assertEqual(reader.read(2), b |
|
588 | self.assertEqual(reader.read(2), b"o") | |
573 |
self.assertEqual(reader.read(2), b |
|
589 | self.assertEqual(reader.read(2), b"ba") | |
574 |
self.assertEqual(reader.read(2), b |
|
590 | self.assertEqual(reader.read(2), b"r") | |
575 |
|
591 | |||
576 | reader = dctx.stream_reader(source.getvalue()) |
|
592 | reader = dctx.stream_reader(source.getvalue()) | |
577 |
self.assertEqual(reader.read(3), b |
|
593 | self.assertEqual(reader.read(3), b"foo") | |
578 |
self.assertEqual(reader.read(3), b |
|
594 | self.assertEqual(reader.read(3), b"bar") | |
579 |
|
595 | |||
580 | source.seek(0) |
|
596 | source.seek(0) | |
581 | reader = dctx.stream_reader(source) |
|
597 | reader = dctx.stream_reader(source) | |
582 |
self.assertEqual(reader.read(3), b |
|
598 | self.assertEqual(reader.read(3), b"foo") | |
583 |
self.assertEqual(reader.read(3), b |
|
599 | self.assertEqual(reader.read(3), b"bar") | |
584 |
|
600 | |||
585 | reader = dctx.stream_reader(source.getvalue()) |
|
601 | reader = dctx.stream_reader(source.getvalue()) | |
586 |
self.assertEqual(reader.read(4), b |
|
602 | self.assertEqual(reader.read(4), b"foo") | |
587 |
self.assertEqual(reader.read(4), b |
|
603 | self.assertEqual(reader.read(4), b"bar") | |
588 |
|
604 | |||
589 | source.seek(0) |
|
605 | source.seek(0) | |
590 | reader = dctx.stream_reader(source) |
|
606 | reader = dctx.stream_reader(source) | |
591 |
self.assertEqual(reader.read(4), b |
|
607 | self.assertEqual(reader.read(4), b"foo") | |
592 |
self.assertEqual(reader.read(4), b |
|
608 | self.assertEqual(reader.read(4), b"bar") | |
593 |
|
609 | |||
594 | reader = dctx.stream_reader(source.getvalue()) |
|
610 | reader = dctx.stream_reader(source.getvalue()) | |
595 |
self.assertEqual(reader.read(128), b |
|
611 | self.assertEqual(reader.read(128), b"foo") | |
596 |
self.assertEqual(reader.read(128), b |
|
612 | self.assertEqual(reader.read(128), b"bar") | |
597 |
|
613 | |||
598 | source.seek(0) |
|
614 | source.seek(0) | |
599 | reader = dctx.stream_reader(source) |
|
615 | reader = dctx.stream_reader(source) | |
600 |
self.assertEqual(reader.read(128), b |
|
616 | self.assertEqual(reader.read(128), b"foo") | |
601 |
self.assertEqual(reader.read(128), b |
|
617 | self.assertEqual(reader.read(128), b"bar") | |
602 |
|
618 | |||
603 | # Now tests for reads spanning frames. |
|
619 | # Now tests for reads spanning frames. | |
604 | reader = dctx.stream_reader(source.getvalue(), read_across_frames=True) |
|
620 | reader = dctx.stream_reader(source.getvalue(), read_across_frames=True) | |
605 |
self.assertEqual(reader.read(3), b |
|
621 | self.assertEqual(reader.read(3), b"foo") | |
606 |
self.assertEqual(reader.read(3), b |
|
622 | self.assertEqual(reader.read(3), b"bar") | |
607 |
|
623 | |||
608 | source.seek(0) |
|
624 | source.seek(0) | |
609 | reader = dctx.stream_reader(source, read_across_frames=True) |
|
625 | reader = dctx.stream_reader(source, read_across_frames=True) | |
610 |
self.assertEqual(reader.read(3), b |
|
626 | self.assertEqual(reader.read(3), b"foo") | |
611 |
self.assertEqual(reader.read(3), b |
|
627 | self.assertEqual(reader.read(3), b"bar") | |
612 |
|
628 | |||
613 | reader = dctx.stream_reader(source.getvalue(), read_across_frames=True) |
|
629 | reader = dctx.stream_reader(source.getvalue(), read_across_frames=True) | |
614 |
self.assertEqual(reader.read(6), b |
|
630 | self.assertEqual(reader.read(6), b"foobar") | |
615 |
|
631 | |||
616 | source.seek(0) |
|
632 | source.seek(0) | |
617 | reader = dctx.stream_reader(source, read_across_frames=True) |
|
633 | reader = dctx.stream_reader(source, read_across_frames=True) | |
618 |
self.assertEqual(reader.read(6), b |
|
634 | self.assertEqual(reader.read(6), b"foobar") | |
619 |
|
635 | |||
620 | reader = dctx.stream_reader(source.getvalue(), read_across_frames=True) |
|
636 | reader = dctx.stream_reader(source.getvalue(), read_across_frames=True) | |
621 |
self.assertEqual(reader.read(7), b |
|
637 | self.assertEqual(reader.read(7), b"foobar") | |
622 |
|
638 | |||
623 | source.seek(0) |
|
639 | source.seek(0) | |
624 | reader = dctx.stream_reader(source, read_across_frames=True) |
|
640 | reader = dctx.stream_reader(source, read_across_frames=True) | |
625 |
self.assertEqual(reader.read(7), b |
|
641 | self.assertEqual(reader.read(7), b"foobar") | |
626 |
|
642 | |||
627 | reader = dctx.stream_reader(source.getvalue(), read_across_frames=True) |
|
643 | reader = dctx.stream_reader(source.getvalue(), read_across_frames=True) | |
628 |
self.assertEqual(reader.read(128), b |
|
644 | self.assertEqual(reader.read(128), b"foobar") | |
629 |
|
645 | |||
630 | source.seek(0) |
|
646 | source.seek(0) | |
631 | reader = dctx.stream_reader(source, read_across_frames=True) |
|
647 | reader = dctx.stream_reader(source, read_across_frames=True) | |
632 |
self.assertEqual(reader.read(128), b |
|
648 | self.assertEqual(reader.read(128), b"foobar") | |
633 |
|
649 | |||
634 | def test_readinto(self): |
|
650 | def test_readinto(self): | |
635 | cctx = zstd.ZstdCompressor() |
|
651 | cctx = zstd.ZstdCompressor() | |
636 |
foo = cctx.compress(b |
|
652 | foo = cctx.compress(b"foo") | |
637 |
|
653 | |||
638 | dctx = zstd.ZstdDecompressor() |
|
654 | dctx = zstd.ZstdDecompressor() | |
639 |
|
655 | |||
640 | # Attempting to readinto() a non-writable buffer fails. |
|
656 | # Attempting to readinto() a non-writable buffer fails. | |
641 | # The exact exception varies based on the backend. |
|
657 | # The exact exception varies based on the backend. | |
642 | reader = dctx.stream_reader(foo) |
|
658 | reader = dctx.stream_reader(foo) | |
643 | with self.assertRaises(Exception): |
|
659 | with self.assertRaises(Exception): | |
644 |
reader.readinto(b |
|
660 | reader.readinto(b"foobar") | |
645 |
|
661 | |||
646 | # readinto() with sufficiently large destination. |
|
662 | # readinto() with sufficiently large destination. | |
647 | b = bytearray(1024) |
|
663 | b = bytearray(1024) | |
648 | reader = dctx.stream_reader(foo) |
|
664 | reader = dctx.stream_reader(foo) | |
649 | self.assertEqual(reader.readinto(b), 3) |
|
665 | self.assertEqual(reader.readinto(b), 3) | |
650 |
self.assertEqual(b[0:3], b |
|
666 | self.assertEqual(b[0:3], b"foo") | |
651 | self.assertEqual(reader.readinto(b), 0) |
|
667 | self.assertEqual(reader.readinto(b), 0) | |
652 |
self.assertEqual(b[0:3], b |
|
668 | self.assertEqual(b[0:3], b"foo") | |
653 |
|
669 | |||
654 | # readinto() with small reads. |
|
670 | # readinto() with small reads. | |
655 | b = bytearray(1024) |
|
671 | b = bytearray(1024) | |
656 | reader = dctx.stream_reader(foo, read_size=1) |
|
672 | reader = dctx.stream_reader(foo, read_size=1) | |
657 | self.assertEqual(reader.readinto(b), 3) |
|
673 | self.assertEqual(reader.readinto(b), 3) | |
658 |
self.assertEqual(b[0:3], b |
|
674 | self.assertEqual(b[0:3], b"foo") | |
659 |
|
675 | |||
660 | # Too small destination buffer. |
|
676 | # Too small destination buffer. | |
661 | b = bytearray(2) |
|
677 | b = bytearray(2) | |
662 | reader = dctx.stream_reader(foo) |
|
678 | reader = dctx.stream_reader(foo) | |
663 | self.assertEqual(reader.readinto(b), 2) |
|
679 | self.assertEqual(reader.readinto(b), 2) | |
664 |
self.assertEqual(b[:], b |
|
680 | self.assertEqual(b[:], b"fo") | |
665 |
|
681 | |||
666 | def test_readinto1(self): |
|
682 | def test_readinto1(self): | |
667 | cctx = zstd.ZstdCompressor() |
|
683 | cctx = zstd.ZstdCompressor() | |
668 |
foo = cctx.compress(b |
|
684 | foo = cctx.compress(b"foo") | |
669 |
|
685 | |||
670 | dctx = zstd.ZstdDecompressor() |
|
686 | dctx = zstd.ZstdDecompressor() | |
671 |
|
687 | |||
672 | reader = dctx.stream_reader(foo) |
|
688 | reader = dctx.stream_reader(foo) | |
673 | with self.assertRaises(Exception): |
|
689 | with self.assertRaises(Exception): | |
674 |
reader.readinto1(b |
|
690 | reader.readinto1(b"foobar") | |
675 |
|
691 | |||
676 | # Sufficiently large destination. |
|
692 | # Sufficiently large destination. | |
677 | b = bytearray(1024) |
|
693 | b = bytearray(1024) | |
678 | reader = dctx.stream_reader(foo) |
|
694 | reader = dctx.stream_reader(foo) | |
679 | self.assertEqual(reader.readinto1(b), 3) |
|
695 | self.assertEqual(reader.readinto1(b), 3) | |
680 |
self.assertEqual(b[0:3], b |
|
696 | self.assertEqual(b[0:3], b"foo") | |
681 | self.assertEqual(reader.readinto1(b), 0) |
|
697 | self.assertEqual(reader.readinto1(b), 0) | |
682 |
self.assertEqual(b[0:3], b |
|
698 | self.assertEqual(b[0:3], b"foo") | |
683 |
|
699 | |||
684 | # readinto() with small reads. |
|
700 | # readinto() with small reads. | |
685 | b = bytearray(1024) |
|
701 | b = bytearray(1024) | |
686 | reader = dctx.stream_reader(foo, read_size=1) |
|
702 | reader = dctx.stream_reader(foo, read_size=1) | |
687 | self.assertEqual(reader.readinto1(b), 3) |
|
703 | self.assertEqual(reader.readinto1(b), 3) | |
688 |
self.assertEqual(b[0:3], b |
|
704 | self.assertEqual(b[0:3], b"foo") | |
689 |
|
705 | |||
690 | # Too small destination buffer. |
|
706 | # Too small destination buffer. | |
691 | b = bytearray(2) |
|
707 | b = bytearray(2) | |
692 | reader = dctx.stream_reader(foo) |
|
708 | reader = dctx.stream_reader(foo) | |
693 | self.assertEqual(reader.readinto1(b), 2) |
|
709 | self.assertEqual(reader.readinto1(b), 2) | |
694 |
self.assertEqual(b[:], b |
|
710 | self.assertEqual(b[:], b"fo") | |
695 |
|
711 | |||
696 | def test_readall(self): |
|
712 | def test_readall(self): | |
697 | cctx = zstd.ZstdCompressor() |
|
713 | cctx = zstd.ZstdCompressor() | |
698 |
foo = cctx.compress(b |
|
714 | foo = cctx.compress(b"foo") | |
699 |
|
715 | |||
700 | dctx = zstd.ZstdDecompressor() |
|
716 | dctx = zstd.ZstdDecompressor() | |
701 | reader = dctx.stream_reader(foo) |
|
717 | reader = dctx.stream_reader(foo) | |
702 |
|
718 | |||
703 |
self.assertEqual(reader.readall(), b |
|
719 | self.assertEqual(reader.readall(), b"foo") | |
704 |
|
720 | |||
705 | def test_read1(self): |
|
721 | def test_read1(self): | |
706 | cctx = zstd.ZstdCompressor() |
|
722 | cctx = zstd.ZstdCompressor() | |
707 |
foo = cctx.compress(b |
|
723 | foo = cctx.compress(b"foo") | |
708 |
|
724 | |||
709 | dctx = zstd.ZstdDecompressor() |
|
725 | dctx = zstd.ZstdDecompressor() | |
710 |
|
726 | |||
711 | b = OpCountingBytesIO(foo) |
|
727 | b = OpCountingBytesIO(foo) | |
712 | reader = dctx.stream_reader(b) |
|
728 | reader = dctx.stream_reader(b) | |
713 |
|
729 | |||
714 |
self.assertEqual(reader.read1(), b |
|
730 | self.assertEqual(reader.read1(), b"foo") | |
715 | self.assertEqual(b._read_count, 1) |
|
731 | self.assertEqual(b._read_count, 1) | |
716 |
|
732 | |||
717 | b = OpCountingBytesIO(foo) |
|
733 | b = OpCountingBytesIO(foo) | |
718 | reader = dctx.stream_reader(b) |
|
734 | reader = dctx.stream_reader(b) | |
719 |
|
735 | |||
720 |
self.assertEqual(reader.read1(0), b |
|
736 | self.assertEqual(reader.read1(0), b"") | |
721 |
self.assertEqual(reader.read1(2), b |
|
737 | self.assertEqual(reader.read1(2), b"fo") | |
722 | self.assertEqual(b._read_count, 1) |
|
738 | self.assertEqual(b._read_count, 1) | |
723 |
self.assertEqual(reader.read1(1), b |
|
739 | self.assertEqual(reader.read1(1), b"o") | |
724 | self.assertEqual(b._read_count, 1) |
|
740 | self.assertEqual(b._read_count, 1) | |
725 |
self.assertEqual(reader.read1(1), b |
|
741 | self.assertEqual(reader.read1(1), b"") | |
726 | self.assertEqual(b._read_count, 2) |
|
742 | self.assertEqual(b._read_count, 2) | |
727 |
|
743 | |||
728 | def test_read_lines(self): |
|
744 | def test_read_lines(self): | |
729 | cctx = zstd.ZstdCompressor() |
|
745 | cctx = zstd.ZstdCompressor() | |
730 |
source = b |
|
746 | source = b"\n".join(("line %d" % i).encode("ascii") for i in range(1024)) | |
731 |
|
747 | |||
732 | frame = cctx.compress(source) |
|
748 | frame = cctx.compress(source) | |
733 |
|
749 | |||
734 | dctx = zstd.ZstdDecompressor() |
|
750 | dctx = zstd.ZstdDecompressor() | |
735 | reader = dctx.stream_reader(frame) |
|
751 | reader = dctx.stream_reader(frame) | |
736 |
tr = io.TextIOWrapper(reader, encoding= |
|
752 | tr = io.TextIOWrapper(reader, encoding="utf-8") | |
737 |
|
753 | |||
738 | lines = [] |
|
754 | lines = [] | |
739 | for line in tr: |
|
755 | for line in tr: | |
740 |
lines.append(line.encode( |
|
756 | lines.append(line.encode("utf-8")) | |
741 |
|
757 | |||
742 | self.assertEqual(len(lines), 1024) |
|
758 | self.assertEqual(len(lines), 1024) | |
743 |
self.assertEqual(b |
|
759 | self.assertEqual(b"".join(lines), source) | |
744 |
|
760 | |||
745 | reader = dctx.stream_reader(frame) |
|
761 | reader = dctx.stream_reader(frame) | |
746 |
tr = io.TextIOWrapper(reader, encoding= |
|
762 | tr = io.TextIOWrapper(reader, encoding="utf-8") | |
747 |
|
763 | |||
748 | lines = tr.readlines() |
|
764 | lines = tr.readlines() | |
749 | self.assertEqual(len(lines), 1024) |
|
765 | self.assertEqual(len(lines), 1024) | |
750 |
self.assertEqual( |
|
766 | self.assertEqual("".join(lines).encode("utf-8"), source) | |
751 |
|
767 | |||
752 | reader = dctx.stream_reader(frame) |
|
768 | reader = dctx.stream_reader(frame) | |
753 |
tr = io.TextIOWrapper(reader, encoding= |
|
769 | tr = io.TextIOWrapper(reader, encoding="utf-8") | |
754 |
|
770 | |||
755 | lines = [] |
|
771 | lines = [] | |
756 | while True: |
|
772 | while True: | |
757 | line = tr.readline() |
|
773 | line = tr.readline() | |
758 | if not line: |
|
774 | if not line: | |
759 | break |
|
775 | break | |
760 |
|
776 | |||
761 |
lines.append(line.encode( |
|
777 | lines.append(line.encode("utf-8")) | |
762 |
|
778 | |||
763 | self.assertEqual(len(lines), 1024) |
|
779 | self.assertEqual(len(lines), 1024) | |
764 |
self.assertEqual(b |
|
780 | self.assertEqual(b"".join(lines), source) | |
765 |
|
781 | |||
766 |
|
782 | |||
767 | @make_cffi |
|
783 | @make_cffi | |
768 |
class TestDecompressor_decompressobj( |
|
784 | class TestDecompressor_decompressobj(TestCase): | |
769 | def test_simple(self): |
|
785 | def test_simple(self): | |
770 |
data = zstd.ZstdCompressor(level=1).compress(b |
|
786 | data = zstd.ZstdCompressor(level=1).compress(b"foobar") | |
771 |
|
787 | |||
772 | dctx = zstd.ZstdDecompressor() |
|
788 | dctx = zstd.ZstdDecompressor() | |
773 | dobj = dctx.decompressobj() |
|
789 | dobj = dctx.decompressobj() | |
774 |
self.assertEqual(dobj.decompress(data), b |
|
790 | self.assertEqual(dobj.decompress(data), b"foobar") | |
775 | self.assertIsNone(dobj.flush()) |
|
791 | self.assertIsNone(dobj.flush()) | |
776 | self.assertIsNone(dobj.flush(10)) |
|
792 | self.assertIsNone(dobj.flush(10)) | |
777 | self.assertIsNone(dobj.flush(length=100)) |
|
793 | self.assertIsNone(dobj.flush(length=100)) | |
778 |
|
794 | |||
779 | def test_input_types(self): |
|
795 | def test_input_types(self): | |
780 |
compressed = zstd.ZstdCompressor(level=1).compress(b |
|
796 | compressed = zstd.ZstdCompressor(level=1).compress(b"foo") | |
781 |
|
797 | |||
782 | dctx = zstd.ZstdDecompressor() |
|
798 | dctx = zstd.ZstdDecompressor() | |
783 |
|
799 | |||
784 | mutable_array = bytearray(len(compressed)) |
|
800 | mutable_array = bytearray(len(compressed)) | |
785 | mutable_array[:] = compressed |
|
801 | mutable_array[:] = compressed | |
786 |
|
802 | |||
787 | sources = [ |
|
803 | sources = [ | |
788 | memoryview(compressed), |
|
804 | memoryview(compressed), | |
789 | bytearray(compressed), |
|
805 | bytearray(compressed), | |
790 | mutable_array, |
|
806 | mutable_array, | |
791 | ] |
|
807 | ] | |
792 |
|
808 | |||
793 | for source in sources: |
|
809 | for source in sources: | |
794 | dobj = dctx.decompressobj() |
|
810 | dobj = dctx.decompressobj() | |
795 | self.assertIsNone(dobj.flush()) |
|
811 | self.assertIsNone(dobj.flush()) | |
796 | self.assertIsNone(dobj.flush(10)) |
|
812 | self.assertIsNone(dobj.flush(10)) | |
797 | self.assertIsNone(dobj.flush(length=100)) |
|
813 | self.assertIsNone(dobj.flush(length=100)) | |
798 |
self.assertEqual(dobj.decompress(source), b |
|
814 | self.assertEqual(dobj.decompress(source), b"foo") | |
799 | self.assertIsNone(dobj.flush()) |
|
815 | self.assertIsNone(dobj.flush()) | |
800 |
|
816 | |||
801 | def test_reuse(self): |
|
817 | def test_reuse(self): | |
802 |
data = zstd.ZstdCompressor(level=1).compress(b |
|
818 | data = zstd.ZstdCompressor(level=1).compress(b"foobar") | |
803 |
|
819 | |||
804 | dctx = zstd.ZstdDecompressor() |
|
820 | dctx = zstd.ZstdDecompressor() | |
805 | dobj = dctx.decompressobj() |
|
821 | dobj = dctx.decompressobj() | |
806 | dobj.decompress(data) |
|
822 | dobj.decompress(data) | |
807 |
|
823 | |||
808 |
with self.assertRaisesRegex |
|
824 | with self.assertRaisesRegex(zstd.ZstdError, "cannot use a decompressobj"): | |
809 | dobj.decompress(data) |
|
825 | dobj.decompress(data) | |
810 | self.assertIsNone(dobj.flush()) |
|
826 | self.assertIsNone(dobj.flush()) | |
811 |
|
827 | |||
812 | def test_bad_write_size(self): |
|
828 | def test_bad_write_size(self): | |
813 | dctx = zstd.ZstdDecompressor() |
|
829 | dctx = zstd.ZstdDecompressor() | |
814 |
|
830 | |||
815 |
with self.assertRaisesRegex |
|
831 | with self.assertRaisesRegex(ValueError, "write_size must be positive"): | |
816 | dctx.decompressobj(write_size=0) |
|
832 | dctx.decompressobj(write_size=0) | |
817 |
|
833 | |||
818 | def test_write_size(self): |
|
834 | def test_write_size(self): | |
819 |
source = b |
|
835 | source = b"foo" * 64 + b"bar" * 128 | |
820 | data = zstd.ZstdCompressor(level=1).compress(source) |
|
836 | data = zstd.ZstdCompressor(level=1).compress(source) | |
821 |
|
837 | |||
822 | dctx = zstd.ZstdDecompressor() |
|
838 | dctx = zstd.ZstdDecompressor() | |
823 |
|
839 | |||
824 | for i in range(128): |
|
840 | for i in range(128): | |
825 | dobj = dctx.decompressobj(write_size=i + 1) |
|
841 | dobj = dctx.decompressobj(write_size=i + 1) | |
826 | self.assertEqual(dobj.decompress(data), source) |
|
842 | self.assertEqual(dobj.decompress(data), source) | |
827 |
|
843 | |||
828 |
|
844 | |||
829 | def decompress_via_writer(data): |
|
845 | def decompress_via_writer(data): | |
830 | buffer = io.BytesIO() |
|
846 | buffer = io.BytesIO() | |
831 | dctx = zstd.ZstdDecompressor() |
|
847 | dctx = zstd.ZstdDecompressor() | |
832 | decompressor = dctx.stream_writer(buffer) |
|
848 | decompressor = dctx.stream_writer(buffer) | |
833 | decompressor.write(data) |
|
849 | decompressor.write(data) | |
834 |
|
850 | |||
835 | return buffer.getvalue() |
|
851 | return buffer.getvalue() | |
836 |
|
852 | |||
837 |
|
853 | |||
838 | @make_cffi |
|
854 | @make_cffi | |
839 |
class TestDecompressor_stream_writer( |
|
855 | class TestDecompressor_stream_writer(TestCase): | |
840 | def test_io_api(self): |
|
856 | def test_io_api(self): | |
841 | buffer = io.BytesIO() |
|
857 | buffer = io.BytesIO() | |
842 | dctx = zstd.ZstdDecompressor() |
|
858 | dctx = zstd.ZstdDecompressor() | |
843 | writer = dctx.stream_writer(buffer) |
|
859 | writer = dctx.stream_writer(buffer) | |
844 |
|
860 | |||
845 | self.assertFalse(writer.closed) |
|
861 | self.assertFalse(writer.closed) | |
846 | self.assertFalse(writer.isatty()) |
|
862 | self.assertFalse(writer.isatty()) | |
847 | self.assertFalse(writer.readable()) |
|
863 | self.assertFalse(writer.readable()) | |
848 |
|
864 | |||
849 | with self.assertRaises(io.UnsupportedOperation): |
|
865 | with self.assertRaises(io.UnsupportedOperation): | |
850 | writer.readline() |
|
866 | writer.readline() | |
851 |
|
867 | |||
852 | with self.assertRaises(io.UnsupportedOperation): |
|
868 | with self.assertRaises(io.UnsupportedOperation): | |
853 | writer.readline(42) |
|
869 | writer.readline(42) | |
854 |
|
870 | |||
855 | with self.assertRaises(io.UnsupportedOperation): |
|
871 | with self.assertRaises(io.UnsupportedOperation): | |
856 | writer.readline(size=42) |
|
872 | writer.readline(size=42) | |
857 |
|
873 | |||
858 | with self.assertRaises(io.UnsupportedOperation): |
|
874 | with self.assertRaises(io.UnsupportedOperation): | |
859 | writer.readlines() |
|
875 | writer.readlines() | |
860 |
|
876 | |||
861 | with self.assertRaises(io.UnsupportedOperation): |
|
877 | with self.assertRaises(io.UnsupportedOperation): | |
862 | writer.readlines(42) |
|
878 | writer.readlines(42) | |
863 |
|
879 | |||
864 | with self.assertRaises(io.UnsupportedOperation): |
|
880 | with self.assertRaises(io.UnsupportedOperation): | |
865 | writer.readlines(hint=42) |
|
881 | writer.readlines(hint=42) | |
866 |
|
882 | |||
867 | with self.assertRaises(io.UnsupportedOperation): |
|
883 | with self.assertRaises(io.UnsupportedOperation): | |
868 | writer.seek(0) |
|
884 | writer.seek(0) | |
869 |
|
885 | |||
870 | with self.assertRaises(io.UnsupportedOperation): |
|
886 | with self.assertRaises(io.UnsupportedOperation): | |
871 | writer.seek(10, os.SEEK_SET) |
|
887 | writer.seek(10, os.SEEK_SET) | |
872 |
|
888 | |||
873 | self.assertFalse(writer.seekable()) |
|
889 | self.assertFalse(writer.seekable()) | |
874 |
|
890 | |||
875 | with self.assertRaises(io.UnsupportedOperation): |
|
891 | with self.assertRaises(io.UnsupportedOperation): | |
876 | writer.tell() |
|
892 | writer.tell() | |
877 |
|
893 | |||
878 | with self.assertRaises(io.UnsupportedOperation): |
|
894 | with self.assertRaises(io.UnsupportedOperation): | |
879 | writer.truncate() |
|
895 | writer.truncate() | |
880 |
|
896 | |||
881 | with self.assertRaises(io.UnsupportedOperation): |
|
897 | with self.assertRaises(io.UnsupportedOperation): | |
882 | writer.truncate(42) |
|
898 | writer.truncate(42) | |
883 |
|
899 | |||
884 | with self.assertRaises(io.UnsupportedOperation): |
|
900 | with self.assertRaises(io.UnsupportedOperation): | |
885 | writer.truncate(size=42) |
|
901 | writer.truncate(size=42) | |
886 |
|
902 | |||
887 | self.assertTrue(writer.writable()) |
|
903 | self.assertTrue(writer.writable()) | |
888 |
|
904 | |||
889 | with self.assertRaises(io.UnsupportedOperation): |
|
905 | with self.assertRaises(io.UnsupportedOperation): | |
890 | writer.writelines([]) |
|
906 | writer.writelines([]) | |
891 |
|
907 | |||
892 | with self.assertRaises(io.UnsupportedOperation): |
|
908 | with self.assertRaises(io.UnsupportedOperation): | |
893 | writer.read() |
|
909 | writer.read() | |
894 |
|
910 | |||
895 | with self.assertRaises(io.UnsupportedOperation): |
|
911 | with self.assertRaises(io.UnsupportedOperation): | |
896 | writer.read(42) |
|
912 | writer.read(42) | |
897 |
|
913 | |||
898 | with self.assertRaises(io.UnsupportedOperation): |
|
914 | with self.assertRaises(io.UnsupportedOperation): | |
899 | writer.read(size=42) |
|
915 | writer.read(size=42) | |
900 |
|
916 | |||
901 | with self.assertRaises(io.UnsupportedOperation): |
|
917 | with self.assertRaises(io.UnsupportedOperation): | |
902 | writer.readall() |
|
918 | writer.readall() | |
903 |
|
919 | |||
904 | with self.assertRaises(io.UnsupportedOperation): |
|
920 | with self.assertRaises(io.UnsupportedOperation): | |
905 | writer.readinto(None) |
|
921 | writer.readinto(None) | |
906 |
|
922 | |||
907 | with self.assertRaises(io.UnsupportedOperation): |
|
923 | with self.assertRaises(io.UnsupportedOperation): | |
908 | writer.fileno() |
|
924 | writer.fileno() | |
909 |
|
925 | |||
910 | def test_fileno_file(self): |
|
926 | def test_fileno_file(self): | |
911 |
with tempfile.TemporaryFile( |
|
927 | with tempfile.TemporaryFile("wb") as tf: | |
912 | dctx = zstd.ZstdDecompressor() |
|
928 | dctx = zstd.ZstdDecompressor() | |
913 | writer = dctx.stream_writer(tf) |
|
929 | writer = dctx.stream_writer(tf) | |
914 |
|
930 | |||
915 | self.assertEqual(writer.fileno(), tf.fileno()) |
|
931 | self.assertEqual(writer.fileno(), tf.fileno()) | |
916 |
|
932 | |||
917 | def test_close(self): |
|
933 | def test_close(self): | |
918 |
foo = zstd.ZstdCompressor().compress(b |
|
934 | foo = zstd.ZstdCompressor().compress(b"foo") | |
919 |
|
935 | |||
920 | buffer = NonClosingBytesIO() |
|
936 | buffer = NonClosingBytesIO() | |
921 | dctx = zstd.ZstdDecompressor() |
|
937 | dctx = zstd.ZstdDecompressor() | |
922 | writer = dctx.stream_writer(buffer) |
|
938 | writer = dctx.stream_writer(buffer) | |
923 |
|
939 | |||
924 | writer.write(foo) |
|
940 | writer.write(foo) | |
925 | self.assertFalse(writer.closed) |
|
941 | self.assertFalse(writer.closed) | |
926 | self.assertFalse(buffer.closed) |
|
942 | self.assertFalse(buffer.closed) | |
927 | writer.close() |
|
943 | writer.close() | |
928 | self.assertTrue(writer.closed) |
|
944 | self.assertTrue(writer.closed) | |
929 | self.assertTrue(buffer.closed) |
|
945 | self.assertTrue(buffer.closed) | |
930 |
|
946 | |||
931 |
with self.assertRaisesRegex |
|
947 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
932 |
writer.write(b |
|
948 | writer.write(b"") | |
933 |
|
949 | |||
934 |
with self.assertRaisesRegex |
|
950 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
935 | writer.flush() |
|
951 | writer.flush() | |
936 |
|
952 | |||
937 |
with self.assertRaisesRegex |
|
953 | with self.assertRaisesRegex(ValueError, "stream is closed"): | |
938 | with writer: |
|
954 | with writer: | |
939 | pass |
|
955 | pass | |
940 |
|
956 | |||
941 |
self.assertEqual(buffer.getvalue(), b |
|
957 | self.assertEqual(buffer.getvalue(), b"foo") | |
942 |
|
958 | |||
943 | # Context manager exit should close stream. |
|
959 | # Context manager exit should close stream. | |
944 | buffer = NonClosingBytesIO() |
|
960 | buffer = NonClosingBytesIO() | |
945 | writer = dctx.stream_writer(buffer) |
|
961 | writer = dctx.stream_writer(buffer) | |
946 |
|
962 | |||
947 | with writer: |
|
963 | with writer: | |
948 | writer.write(foo) |
|
964 | writer.write(foo) | |
949 |
|
965 | |||
950 | self.assertTrue(writer.closed) |
|
966 | self.assertTrue(writer.closed) | |
951 |
self.assertEqual(buffer.getvalue(), b |
|
967 | self.assertEqual(buffer.getvalue(), b"foo") | |
952 |
|
968 | |||
953 | def test_flush(self): |
|
969 | def test_flush(self): | |
954 | buffer = OpCountingBytesIO() |
|
970 | buffer = OpCountingBytesIO() | |
955 | dctx = zstd.ZstdDecompressor() |
|
971 | dctx = zstd.ZstdDecompressor() | |
956 | writer = dctx.stream_writer(buffer) |
|
972 | writer = dctx.stream_writer(buffer) | |
957 |
|
973 | |||
958 | writer.flush() |
|
974 | writer.flush() | |
959 | self.assertEqual(buffer._flush_count, 1) |
|
975 | self.assertEqual(buffer._flush_count, 1) | |
960 | writer.flush() |
|
976 | writer.flush() | |
961 | self.assertEqual(buffer._flush_count, 2) |
|
977 | self.assertEqual(buffer._flush_count, 2) | |
962 |
|
978 | |||
963 | def test_empty_roundtrip(self): |
|
979 | def test_empty_roundtrip(self): | |
964 | cctx = zstd.ZstdCompressor() |
|
980 | cctx = zstd.ZstdCompressor() | |
965 |
empty = cctx.compress(b |
|
981 | empty = cctx.compress(b"") | |
966 |
self.assertEqual(decompress_via_writer(empty), b |
|
982 | self.assertEqual(decompress_via_writer(empty), b"") | |
967 |
|
983 | |||
968 | def test_input_types(self): |
|
984 | def test_input_types(self): | |
969 | cctx = zstd.ZstdCompressor(level=1) |
|
985 | cctx = zstd.ZstdCompressor(level=1) | |
970 |
compressed = cctx.compress(b |
|
986 | compressed = cctx.compress(b"foo") | |
971 |
|
987 | |||
972 | mutable_array = bytearray(len(compressed)) |
|
988 | mutable_array = bytearray(len(compressed)) | |
973 | mutable_array[:] = compressed |
|
989 | mutable_array[:] = compressed | |
974 |
|
990 | |||
975 | sources = [ |
|
991 | sources = [ | |
976 | memoryview(compressed), |
|
992 | memoryview(compressed), | |
977 | bytearray(compressed), |
|
993 | bytearray(compressed), | |
978 | mutable_array, |
|
994 | mutable_array, | |
979 | ] |
|
995 | ] | |
980 |
|
996 | |||
981 | dctx = zstd.ZstdDecompressor() |
|
997 | dctx = zstd.ZstdDecompressor() | |
982 | for source in sources: |
|
998 | for source in sources: | |
983 | buffer = io.BytesIO() |
|
999 | buffer = io.BytesIO() | |
984 |
|
1000 | |||
985 | decompressor = dctx.stream_writer(buffer) |
|
1001 | decompressor = dctx.stream_writer(buffer) | |
986 | decompressor.write(source) |
|
1002 | decompressor.write(source) | |
987 |
self.assertEqual(buffer.getvalue(), b |
|
1003 | self.assertEqual(buffer.getvalue(), b"foo") | |
988 |
|
1004 | |||
989 | buffer = NonClosingBytesIO() |
|
1005 | buffer = NonClosingBytesIO() | |
990 |
|
1006 | |||
991 | with dctx.stream_writer(buffer) as decompressor: |
|
1007 | with dctx.stream_writer(buffer) as decompressor: | |
992 | self.assertEqual(decompressor.write(source), 3) |
|
1008 | self.assertEqual(decompressor.write(source), 3) | |
993 |
|
1009 | |||
994 |
self.assertEqual(buffer.getvalue(), b |
|
1010 | self.assertEqual(buffer.getvalue(), b"foo") | |
995 |
|
1011 | |||
996 | buffer = io.BytesIO() |
|
1012 | buffer = io.BytesIO() | |
997 | writer = dctx.stream_writer(buffer, write_return_read=True) |
|
1013 | writer = dctx.stream_writer(buffer, write_return_read=True) | |
998 | self.assertEqual(writer.write(source), len(source)) |
|
1014 | self.assertEqual(writer.write(source), len(source)) | |
999 |
self.assertEqual(buffer.getvalue(), b |
|
1015 | self.assertEqual(buffer.getvalue(), b"foo") | |
1000 |
|
1016 | |||
1001 | def test_large_roundtrip(self): |
|
1017 | def test_large_roundtrip(self): | |
1002 | chunks = [] |
|
1018 | chunks = [] | |
1003 | for i in range(255): |
|
1019 | for i in range(255): | |
1004 |
chunks.append(struct.Struct( |
|
1020 | chunks.append(struct.Struct(">B").pack(i) * 16384) | |
1005 |
orig = b |
|
1021 | orig = b"".join(chunks) | |
1006 | cctx = zstd.ZstdCompressor() |
|
1022 | cctx = zstd.ZstdCompressor() | |
1007 | compressed = cctx.compress(orig) |
|
1023 | compressed = cctx.compress(orig) | |
1008 |
|
1024 | |||
1009 | self.assertEqual(decompress_via_writer(compressed), orig) |
|
1025 | self.assertEqual(decompress_via_writer(compressed), orig) | |
1010 |
|
1026 | |||
1011 | def test_multiple_calls(self): |
|
1027 | def test_multiple_calls(self): | |
1012 | chunks = [] |
|
1028 | chunks = [] | |
1013 | for i in range(255): |
|
1029 | for i in range(255): | |
1014 | for j in range(255): |
|
1030 | for j in range(255): | |
1015 |
chunks.append(struct.Struct( |
|
1031 | chunks.append(struct.Struct(">B").pack(j) * i) | |
1016 |
|
1032 | |||
1017 |
orig = b |
|
1033 | orig = b"".join(chunks) | |
1018 | cctx = zstd.ZstdCompressor() |
|
1034 | cctx = zstd.ZstdCompressor() | |
1019 | compressed = cctx.compress(orig) |
|
1035 | compressed = cctx.compress(orig) | |
1020 |
|
1036 | |||
1021 | buffer = NonClosingBytesIO() |
|
1037 | buffer = NonClosingBytesIO() | |
1022 | dctx = zstd.ZstdDecompressor() |
|
1038 | dctx = zstd.ZstdDecompressor() | |
1023 | with dctx.stream_writer(buffer) as decompressor: |
|
1039 | with dctx.stream_writer(buffer) as decompressor: | |
1024 | pos = 0 |
|
1040 | pos = 0 | |
1025 | while pos < len(compressed): |
|
1041 | while pos < len(compressed): | |
1026 | pos2 = pos + 8192 |
|
1042 | pos2 = pos + 8192 | |
1027 | decompressor.write(compressed[pos:pos2]) |
|
1043 | decompressor.write(compressed[pos:pos2]) | |
1028 | pos += 8192 |
|
1044 | pos += 8192 | |
1029 | self.assertEqual(buffer.getvalue(), orig) |
|
1045 | self.assertEqual(buffer.getvalue(), orig) | |
1030 |
|
1046 | |||
1031 | # Again with write_return_read=True |
|
1047 | # Again with write_return_read=True | |
1032 | buffer = io.BytesIO() |
|
1048 | buffer = io.BytesIO() | |
1033 | writer = dctx.stream_writer(buffer, write_return_read=True) |
|
1049 | writer = dctx.stream_writer(buffer, write_return_read=True) | |
1034 | pos = 0 |
|
1050 | pos = 0 | |
1035 | while pos < len(compressed): |
|
1051 | while pos < len(compressed): | |
1036 | pos2 = pos + 8192 |
|
1052 | pos2 = pos + 8192 | |
1037 | chunk = compressed[pos:pos2] |
|
1053 | chunk = compressed[pos:pos2] | |
1038 | self.assertEqual(writer.write(chunk), len(chunk)) |
|
1054 | self.assertEqual(writer.write(chunk), len(chunk)) | |
1039 | pos += 8192 |
|
1055 | pos += 8192 | |
1040 | self.assertEqual(buffer.getvalue(), orig) |
|
1056 | self.assertEqual(buffer.getvalue(), orig) | |
1041 |
|
1057 | |||
1042 | def test_dictionary(self): |
|
1058 | def test_dictionary(self): | |
1043 | samples = [] |
|
1059 | samples = [] | |
1044 | for i in range(128): |
|
1060 | for i in range(128): | |
1045 |
samples.append(b |
|
1061 | samples.append(b"foo" * 64) | |
1046 |
samples.append(b |
|
1062 | samples.append(b"bar" * 64) | |
1047 |
samples.append(b |
|
1063 | samples.append(b"foobar" * 64) | |
1048 |
|
1064 | |||
1049 | d = zstd.train_dictionary(8192, samples) |
|
1065 | d = zstd.train_dictionary(8192, samples) | |
1050 |
|
1066 | |||
1051 |
orig = b |
|
1067 | orig = b"foobar" * 16384 | |
1052 | buffer = NonClosingBytesIO() |
|
1068 | buffer = NonClosingBytesIO() | |
1053 | cctx = zstd.ZstdCompressor(dict_data=d) |
|
1069 | cctx = zstd.ZstdCompressor(dict_data=d) | |
1054 | with cctx.stream_writer(buffer) as compressor: |
|
1070 | with cctx.stream_writer(buffer) as compressor: | |
1055 | self.assertEqual(compressor.write(orig), 0) |
|
1071 | self.assertEqual(compressor.write(orig), 0) | |
1056 |
|
1072 | |||
1057 | compressed = buffer.getvalue() |
|
1073 | compressed = buffer.getvalue() | |
1058 | buffer = io.BytesIO() |
|
1074 | buffer = io.BytesIO() | |
1059 |
|
1075 | |||
1060 | dctx = zstd.ZstdDecompressor(dict_data=d) |
|
1076 | dctx = zstd.ZstdDecompressor(dict_data=d) | |
1061 | decompressor = dctx.stream_writer(buffer) |
|
1077 | decompressor = dctx.stream_writer(buffer) | |
1062 | self.assertEqual(decompressor.write(compressed), len(orig)) |
|
1078 | self.assertEqual(decompressor.write(compressed), len(orig)) | |
1063 | self.assertEqual(buffer.getvalue(), orig) |
|
1079 | self.assertEqual(buffer.getvalue(), orig) | |
1064 |
|
1080 | |||
1065 | buffer = NonClosingBytesIO() |
|
1081 | buffer = NonClosingBytesIO() | |
1066 |
|
1082 | |||
1067 | with dctx.stream_writer(buffer) as decompressor: |
|
1083 | with dctx.stream_writer(buffer) as decompressor: | |
1068 | self.assertEqual(decompressor.write(compressed), len(orig)) |
|
1084 | self.assertEqual(decompressor.write(compressed), len(orig)) | |
1069 |
|
1085 | |||
1070 | self.assertEqual(buffer.getvalue(), orig) |
|
1086 | self.assertEqual(buffer.getvalue(), orig) | |
1071 |
|
1087 | |||
1072 | def test_memory_size(self): |
|
1088 | def test_memory_size(self): | |
1073 | dctx = zstd.ZstdDecompressor() |
|
1089 | dctx = zstd.ZstdDecompressor() | |
1074 | buffer = io.BytesIO() |
|
1090 | buffer = io.BytesIO() | |
1075 |
|
1091 | |||
1076 | decompressor = dctx.stream_writer(buffer) |
|
1092 | decompressor = dctx.stream_writer(buffer) | |
1077 | size = decompressor.memory_size() |
|
1093 | size = decompressor.memory_size() | |
1078 | self.assertGreater(size, 100000) |
|
1094 | self.assertGreater(size, 100000) | |
1079 |
|
1095 | |||
1080 | with dctx.stream_writer(buffer) as decompressor: |
|
1096 | with dctx.stream_writer(buffer) as decompressor: | |
1081 | size = decompressor.memory_size() |
|
1097 | size = decompressor.memory_size() | |
1082 |
|
1098 | |||
1083 | self.assertGreater(size, 100000) |
|
1099 | self.assertGreater(size, 100000) | |
1084 |
|
1100 | |||
1085 | def test_write_size(self): |
|
1101 | def test_write_size(self): | |
1086 |
source = zstd.ZstdCompressor().compress(b |
|
1102 | source = zstd.ZstdCompressor().compress(b"foobarfoobar") | |
1087 | dest = OpCountingBytesIO() |
|
1103 | dest = OpCountingBytesIO() | |
1088 | dctx = zstd.ZstdDecompressor() |
|
1104 | dctx = zstd.ZstdDecompressor() | |
1089 | with dctx.stream_writer(dest, write_size=1) as decompressor: |
|
1105 | with dctx.stream_writer(dest, write_size=1) as decompressor: | |
1090 |
s = struct.Struct( |
|
1106 | s = struct.Struct(">B") | |
1091 | for c in source: |
|
1107 | for c in source: | |
1092 | if not isinstance(c, str): |
|
1108 | if not isinstance(c, str): | |
1093 | c = s.pack(c) |
|
1109 | c = s.pack(c) | |
1094 | decompressor.write(c) |
|
1110 | decompressor.write(c) | |
1095 |
|
1111 | |||
1096 |
self.assertEqual(dest.getvalue(), b |
|
1112 | self.assertEqual(dest.getvalue(), b"foobarfoobar") | |
1097 | self.assertEqual(dest._write_count, len(dest.getvalue())) |
|
1113 | self.assertEqual(dest._write_count, len(dest.getvalue())) | |
1098 |
|
1114 | |||
1099 |
|
1115 | |||
1100 | @make_cffi |
|
1116 | @make_cffi | |
1101 |
class TestDecompressor_read_to_iter( |
|
1117 | class TestDecompressor_read_to_iter(TestCase): | |
1102 | def test_type_validation(self): |
|
1118 | def test_type_validation(self): | |
1103 | dctx = zstd.ZstdDecompressor() |
|
1119 | dctx = zstd.ZstdDecompressor() | |
1104 |
|
1120 | |||
1105 | # Object with read() works. |
|
1121 | # Object with read() works. | |
1106 | dctx.read_to_iter(io.BytesIO()) |
|
1122 | dctx.read_to_iter(io.BytesIO()) | |
1107 |
|
1123 | |||
1108 | # Buffer protocol works. |
|
1124 | # Buffer protocol works. | |
1109 |
dctx.read_to_iter(b |
|
1125 | dctx.read_to_iter(b"foobar") | |
1110 |
|
1126 | |||
1111 |
with self.assertRaisesRegex |
|
1127 | with self.assertRaisesRegex(ValueError, "must pass an object with a read"): | |
1112 |
b |
|
1128 | b"".join(dctx.read_to_iter(True)) | |
1113 |
|
1129 | |||
1114 | def test_empty_input(self): |
|
1130 | def test_empty_input(self): | |
1115 | dctx = zstd.ZstdDecompressor() |
|
1131 | dctx = zstd.ZstdDecompressor() | |
1116 |
|
1132 | |||
1117 | source = io.BytesIO() |
|
1133 | source = io.BytesIO() | |
1118 | it = dctx.read_to_iter(source) |
|
1134 | it = dctx.read_to_iter(source) | |
1119 | # TODO this is arguably wrong. Should get an error about missing frame foo. |
|
1135 | # TODO this is arguably wrong. Should get an error about missing frame foo. | |
1120 | with self.assertRaises(StopIteration): |
|
1136 | with self.assertRaises(StopIteration): | |
1121 | next(it) |
|
1137 | next(it) | |
1122 |
|
1138 | |||
1123 |
it = dctx.read_to_iter(b |
|
1139 | it = dctx.read_to_iter(b"") | |
1124 | with self.assertRaises(StopIteration): |
|
1140 | with self.assertRaises(StopIteration): | |
1125 | next(it) |
|
1141 | next(it) | |
1126 |
|
1142 | |||
1127 | def test_invalid_input(self): |
|
1143 | def test_invalid_input(self): | |
1128 | dctx = zstd.ZstdDecompressor() |
|
1144 | dctx = zstd.ZstdDecompressor() | |
1129 |
|
1145 | |||
1130 |
source = io.BytesIO(b |
|
1146 | source = io.BytesIO(b"foobar") | |
1131 | it = dctx.read_to_iter(source) |
|
1147 | it = dctx.read_to_iter(source) | |
1132 |
with self.assertRaisesRegex |
|
1148 | with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"): | |
1133 | next(it) |
|
1149 | next(it) | |
1134 |
|
1150 | |||
1135 |
it = dctx.read_to_iter(b |
|
1151 | it = dctx.read_to_iter(b"foobar") | |
1136 |
with self.assertRaisesRegex |
|
1152 | with self.assertRaisesRegex(zstd.ZstdError, "Unknown frame descriptor"): | |
1137 | next(it) |
|
1153 | next(it) | |
1138 |
|
1154 | |||
1139 | def test_empty_roundtrip(self): |
|
1155 | def test_empty_roundtrip(self): | |
1140 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) |
|
1156 | cctx = zstd.ZstdCompressor(level=1, write_content_size=False) | |
1141 |
empty = cctx.compress(b |
|
1157 | empty = cctx.compress(b"") | |
1142 |
|
1158 | |||
1143 | source = io.BytesIO(empty) |
|
1159 | source = io.BytesIO(empty) | |
1144 | source.seek(0) |
|
1160 | source.seek(0) | |
1145 |
|
1161 | |||
1146 | dctx = zstd.ZstdDecompressor() |
|
1162 | dctx = zstd.ZstdDecompressor() | |
1147 | it = dctx.read_to_iter(source) |
|
1163 | it = dctx.read_to_iter(source) | |
1148 |
|
1164 | |||
1149 | # No chunks should be emitted since there is no data. |
|
1165 | # No chunks should be emitted since there is no data. | |
1150 | with self.assertRaises(StopIteration): |
|
1166 | with self.assertRaises(StopIteration): | |
1151 | next(it) |
|
1167 | next(it) | |
1152 |
|
1168 | |||
1153 | # Again for good measure. |
|
1169 | # Again for good measure. | |
1154 | with self.assertRaises(StopIteration): |
|
1170 | with self.assertRaises(StopIteration): | |
1155 | next(it) |
|
1171 | next(it) | |
1156 |
|
1172 | |||
1157 | def test_skip_bytes_too_large(self): |
|
1173 | def test_skip_bytes_too_large(self): | |
1158 | dctx = zstd.ZstdDecompressor() |
|
1174 | dctx = zstd.ZstdDecompressor() | |
1159 |
|
1175 | |||
1160 | with self.assertRaisesRegexp(ValueError, 'skip_bytes must be smaller than read_size'): |
|
1176 | with self.assertRaisesRegex( | |
1161 | b''.join(dctx.read_to_iter(b'', skip_bytes=1, read_size=1)) |
|
1177 | ValueError, "skip_bytes must be smaller than read_size" | |
|
1178 | ): | |||
|
1179 | b"".join(dctx.read_to_iter(b"", skip_bytes=1, read_size=1)) | |||
1162 |
|
1180 | |||
1163 | with self.assertRaisesRegexp(ValueError, 'skip_bytes larger than first input chunk'): |
|
1181 | with self.assertRaisesRegex( | |
1164 | b''.join(dctx.read_to_iter(b'foobar', skip_bytes=10)) |
|
1182 | ValueError, "skip_bytes larger than first input chunk" | |
|
1183 | ): | |||
|
1184 | b"".join(dctx.read_to_iter(b"foobar", skip_bytes=10)) | |||
1165 |
|
1185 | |||
1166 | def test_skip_bytes(self): |
|
1186 | def test_skip_bytes(self): | |
1167 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
1187 | cctx = zstd.ZstdCompressor(write_content_size=False) | |
1168 |
compressed = cctx.compress(b |
|
1188 | compressed = cctx.compress(b"foobar") | |
1169 |
|
1189 | |||
1170 | dctx = zstd.ZstdDecompressor() |
|
1190 | dctx = zstd.ZstdDecompressor() | |
1171 |
output = b |
|
1191 | output = b"".join(dctx.read_to_iter(b"hdr" + compressed, skip_bytes=3)) | |
1172 |
self.assertEqual(output, b |
|
1192 | self.assertEqual(output, b"foobar") | |
1173 |
|
1193 | |||
1174 | def test_large_output(self): |
|
1194 | def test_large_output(self): | |
1175 | source = io.BytesIO() |
|
1195 | source = io.BytesIO() | |
1176 |
source.write(b |
|
1196 | source.write(b"f" * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE) | |
1177 |
source.write(b |
|
1197 | source.write(b"o") | |
1178 | source.seek(0) |
|
1198 | source.seek(0) | |
1179 |
|
1199 | |||
1180 | cctx = zstd.ZstdCompressor(level=1) |
|
1200 | cctx = zstd.ZstdCompressor(level=1) | |
1181 | compressed = io.BytesIO(cctx.compress(source.getvalue())) |
|
1201 | compressed = io.BytesIO(cctx.compress(source.getvalue())) | |
1182 | compressed.seek(0) |
|
1202 | compressed.seek(0) | |
1183 |
|
1203 | |||
1184 | dctx = zstd.ZstdDecompressor() |
|
1204 | dctx = zstd.ZstdDecompressor() | |
1185 | it = dctx.read_to_iter(compressed) |
|
1205 | it = dctx.read_to_iter(compressed) | |
1186 |
|
1206 | |||
1187 | chunks = [] |
|
1207 | chunks = [] | |
1188 | chunks.append(next(it)) |
|
1208 | chunks.append(next(it)) | |
1189 | chunks.append(next(it)) |
|
1209 | chunks.append(next(it)) | |
1190 |
|
1210 | |||
1191 | with self.assertRaises(StopIteration): |
|
1211 | with self.assertRaises(StopIteration): | |
1192 | next(it) |
|
1212 | next(it) | |
1193 |
|
1213 | |||
1194 |
decompressed = b |
|
1214 | decompressed = b"".join(chunks) | |
1195 | self.assertEqual(decompressed, source.getvalue()) |
|
1215 | self.assertEqual(decompressed, source.getvalue()) | |
1196 |
|
1216 | |||
1197 | # And again with buffer protocol. |
|
1217 | # And again with buffer protocol. | |
1198 | it = dctx.read_to_iter(compressed.getvalue()) |
|
1218 | it = dctx.read_to_iter(compressed.getvalue()) | |
1199 | chunks = [] |
|
1219 | chunks = [] | |
1200 | chunks.append(next(it)) |
|
1220 | chunks.append(next(it)) | |
1201 | chunks.append(next(it)) |
|
1221 | chunks.append(next(it)) | |
1202 |
|
1222 | |||
1203 | with self.assertRaises(StopIteration): |
|
1223 | with self.assertRaises(StopIteration): | |
1204 | next(it) |
|
1224 | next(it) | |
1205 |
|
1225 | |||
1206 |
decompressed = b |
|
1226 | decompressed = b"".join(chunks) | |
1207 | self.assertEqual(decompressed, source.getvalue()) |
|
1227 | self.assertEqual(decompressed, source.getvalue()) | |
1208 |
|
1228 | |||
1209 |
@unittest.skipUnless( |
|
1229 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
1210 | def test_large_input(self): |
|
1230 | def test_large_input(self): | |
1211 |
bytes = list(struct.Struct( |
|
1231 | bytes = list(struct.Struct(">B").pack(i) for i in range(256)) | |
1212 | compressed = NonClosingBytesIO() |
|
1232 | compressed = NonClosingBytesIO() | |
1213 | input_size = 0 |
|
1233 | input_size = 0 | |
1214 | cctx = zstd.ZstdCompressor(level=1) |
|
1234 | cctx = zstd.ZstdCompressor(level=1) | |
1215 | with cctx.stream_writer(compressed) as compressor: |
|
1235 | with cctx.stream_writer(compressed) as compressor: | |
1216 | while True: |
|
1236 | while True: | |
1217 | compressor.write(random.choice(bytes)) |
|
1237 | compressor.write(random.choice(bytes)) | |
1218 | input_size += 1 |
|
1238 | input_size += 1 | |
1219 |
|
1239 | |||
1220 | have_compressed = len(compressed.getvalue()) > zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE |
|
1240 | have_compressed = ( | |
|
1241 | len(compressed.getvalue()) | |||
|
1242 | > zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE | |||
|
1243 | ) | |||
1221 | have_raw = input_size > zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE * 2 |
|
1244 | have_raw = input_size > zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE * 2 | |
1222 | if have_compressed and have_raw: |
|
1245 | if have_compressed and have_raw: | |
1223 | break |
|
1246 | break | |
1224 |
|
1247 | |||
1225 | compressed = io.BytesIO(compressed.getvalue()) |
|
1248 | compressed = io.BytesIO(compressed.getvalue()) | |
1226 |
self.assertGreater( |
|
1249 | self.assertGreater( | |
1227 |
|
|
1250 | len(compressed.getvalue()), zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE | |
|
1251 | ) | |||
1228 |
|
1252 | |||
1229 | dctx = zstd.ZstdDecompressor() |
|
1253 | dctx = zstd.ZstdDecompressor() | |
1230 | it = dctx.read_to_iter(compressed) |
|
1254 | it = dctx.read_to_iter(compressed) | |
1231 |
|
1255 | |||
1232 | chunks = [] |
|
1256 | chunks = [] | |
1233 | chunks.append(next(it)) |
|
1257 | chunks.append(next(it)) | |
1234 | chunks.append(next(it)) |
|
1258 | chunks.append(next(it)) | |
1235 | chunks.append(next(it)) |
|
1259 | chunks.append(next(it)) | |
1236 |
|
1260 | |||
1237 | with self.assertRaises(StopIteration): |
|
1261 | with self.assertRaises(StopIteration): | |
1238 | next(it) |
|
1262 | next(it) | |
1239 |
|
1263 | |||
1240 |
decompressed = b |
|
1264 | decompressed = b"".join(chunks) | |
1241 | self.assertEqual(len(decompressed), input_size) |
|
1265 | self.assertEqual(len(decompressed), input_size) | |
1242 |
|
1266 | |||
1243 | # And again with buffer protocol. |
|
1267 | # And again with buffer protocol. | |
1244 | it = dctx.read_to_iter(compressed.getvalue()) |
|
1268 | it = dctx.read_to_iter(compressed.getvalue()) | |
1245 |
|
1269 | |||
1246 | chunks = [] |
|
1270 | chunks = [] | |
1247 | chunks.append(next(it)) |
|
1271 | chunks.append(next(it)) | |
1248 | chunks.append(next(it)) |
|
1272 | chunks.append(next(it)) | |
1249 | chunks.append(next(it)) |
|
1273 | chunks.append(next(it)) | |
1250 |
|
1274 | |||
1251 | with self.assertRaises(StopIteration): |
|
1275 | with self.assertRaises(StopIteration): | |
1252 | next(it) |
|
1276 | next(it) | |
1253 |
|
1277 | |||
1254 |
decompressed = b |
|
1278 | decompressed = b"".join(chunks) | |
1255 | self.assertEqual(len(decompressed), input_size) |
|
1279 | self.assertEqual(len(decompressed), input_size) | |
1256 |
|
1280 | |||
1257 | def test_interesting(self): |
|
1281 | def test_interesting(self): | |
1258 | # Found this edge case via fuzzing. |
|
1282 | # Found this edge case via fuzzing. | |
1259 | cctx = zstd.ZstdCompressor(level=1) |
|
1283 | cctx = zstd.ZstdCompressor(level=1) | |
1260 |
|
1284 | |||
1261 | source = io.BytesIO() |
|
1285 | source = io.BytesIO() | |
1262 |
|
1286 | |||
1263 | compressed = NonClosingBytesIO() |
|
1287 | compressed = NonClosingBytesIO() | |
1264 | with cctx.stream_writer(compressed) as compressor: |
|
1288 | with cctx.stream_writer(compressed) as compressor: | |
1265 | for i in range(256): |
|
1289 | for i in range(256): | |
1266 |
chunk = b |
|
1290 | chunk = b"\0" * 1024 | |
1267 | compressor.write(chunk) |
|
1291 | compressor.write(chunk) | |
1268 | source.write(chunk) |
|
1292 | source.write(chunk) | |
1269 |
|
1293 | |||
1270 | dctx = zstd.ZstdDecompressor() |
|
1294 | dctx = zstd.ZstdDecompressor() | |
1271 |
|
1295 | |||
1272 |
simple = dctx.decompress( |
|
1296 | simple = dctx.decompress( | |
1273 |
|
|
1297 | compressed.getvalue(), max_output_size=len(source.getvalue()) | |
|
1298 | ) | |||
1274 | self.assertEqual(simple, source.getvalue()) |
|
1299 | self.assertEqual(simple, source.getvalue()) | |
1275 |
|
1300 | |||
1276 | compressed = io.BytesIO(compressed.getvalue()) |
|
1301 | compressed = io.BytesIO(compressed.getvalue()) | |
1277 |
streamed = b |
|
1302 | streamed = b"".join(dctx.read_to_iter(compressed)) | |
1278 | self.assertEqual(streamed, source.getvalue()) |
|
1303 | self.assertEqual(streamed, source.getvalue()) | |
1279 |
|
1304 | |||
1280 | def test_read_write_size(self): |
|
1305 | def test_read_write_size(self): | |
1281 |
source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b |
|
1306 | source = OpCountingBytesIO(zstd.ZstdCompressor().compress(b"foobarfoobar")) | |
1282 | dctx = zstd.ZstdDecompressor() |
|
1307 | dctx = zstd.ZstdDecompressor() | |
1283 | for chunk in dctx.read_to_iter(source, read_size=1, write_size=1): |
|
1308 | for chunk in dctx.read_to_iter(source, read_size=1, write_size=1): | |
1284 | self.assertEqual(len(chunk), 1) |
|
1309 | self.assertEqual(len(chunk), 1) | |
1285 |
|
1310 | |||
1286 | self.assertEqual(source._read_count, len(source.getvalue())) |
|
1311 | self.assertEqual(source._read_count, len(source.getvalue())) | |
1287 |
|
1312 | |||
1288 | def test_magic_less(self): |
|
1313 | def test_magic_less(self): | |
1289 | params = zstd.CompressionParameters.from_level( |
|
1314 | params = zstd.CompressionParameters.from_level( | |
1290 |
1, format=zstd.FORMAT_ZSTD1_MAGICLESS |
|
1315 | 1, format=zstd.FORMAT_ZSTD1_MAGICLESS | |
|
1316 | ) | |||
1291 | cctx = zstd.ZstdCompressor(compression_params=params) |
|
1317 | cctx = zstd.ZstdCompressor(compression_params=params) | |
1292 |
frame = cctx.compress(b |
|
1318 | frame = cctx.compress(b"foobar") | |
1293 |
|
1319 | |||
1294 |
self.assertNotEqual(frame[0:4], b |
|
1320 | self.assertNotEqual(frame[0:4], b"\x28\xb5\x2f\xfd") | |
1295 |
|
1321 | |||
1296 | dctx = zstd.ZstdDecompressor() |
|
1322 | dctx = zstd.ZstdDecompressor() | |
1297 |
with self.assertRaisesRegex |
|
1323 | with self.assertRaisesRegex( | |
1298 |
zstd.ZstdError, |
|
1324 | zstd.ZstdError, "error determining content size from frame header" | |
|
1325 | ): | |||
1299 | dctx.decompress(frame) |
|
1326 | dctx.decompress(frame) | |
1300 |
|
1327 | |||
1301 | dctx = zstd.ZstdDecompressor(format=zstd.FORMAT_ZSTD1_MAGICLESS) |
|
1328 | dctx = zstd.ZstdDecompressor(format=zstd.FORMAT_ZSTD1_MAGICLESS) | |
1302 |
res = b |
|
1329 | res = b"".join(dctx.read_to_iter(frame)) | |
1303 |
self.assertEqual(res, b |
|
1330 | self.assertEqual(res, b"foobar") | |
1304 |
|
1331 | |||
1305 |
|
1332 | |||
1306 | @make_cffi |
|
1333 | @make_cffi | |
1307 |
class TestDecompressor_content_dict_chain( |
|
1334 | class TestDecompressor_content_dict_chain(TestCase): | |
1308 | def test_bad_inputs_simple(self): |
|
1335 | def test_bad_inputs_simple(self): | |
1309 | dctx = zstd.ZstdDecompressor() |
|
1336 | dctx = zstd.ZstdDecompressor() | |
1310 |
|
1337 | |||
1311 | with self.assertRaises(TypeError): |
|
1338 | with self.assertRaises(TypeError): | |
1312 |
dctx.decompress_content_dict_chain(b |
|
1339 | dctx.decompress_content_dict_chain(b"foo") | |
1313 |
|
1340 | |||
1314 | with self.assertRaises(TypeError): |
|
1341 | with self.assertRaises(TypeError): | |
1315 |
dctx.decompress_content_dict_chain((b |
|
1342 | dctx.decompress_content_dict_chain((b"foo", b"bar")) | |
1316 |
|
1343 | |||
1317 |
with self.assertRaisesRegex |
|
1344 | with self.assertRaisesRegex(ValueError, "empty input chain"): | |
1318 | dctx.decompress_content_dict_chain([]) |
|
1345 | dctx.decompress_content_dict_chain([]) | |
1319 |
|
1346 | |||
1320 |
with self.assertRaisesRegex |
|
1347 | with self.assertRaisesRegex(ValueError, "chunk 0 must be bytes"): | |
1321 |
dctx.decompress_content_dict_chain([u |
|
1348 | dctx.decompress_content_dict_chain([u"foo"]) | |
1322 |
|
1349 | |||
1323 |
with self.assertRaisesRegex |
|
1350 | with self.assertRaisesRegex(ValueError, "chunk 0 must be bytes"): | |
1324 | dctx.decompress_content_dict_chain([True]) |
|
1351 | dctx.decompress_content_dict_chain([True]) | |
1325 |
|
1352 | |||
1326 | with self.assertRaisesRegexp(ValueError, 'chunk 0 is too small to contain a zstd frame'): |
|
1353 | with self.assertRaisesRegex( | |
|
1354 | ValueError, "chunk 0 is too small to contain a zstd frame" | |||
|
1355 | ): | |||
1327 | dctx.decompress_content_dict_chain([zstd.FRAME_HEADER]) |
|
1356 | dctx.decompress_content_dict_chain([zstd.FRAME_HEADER]) | |
1328 |
|
1357 | |||
1329 |
with self.assertRaisesRegex |
|
1358 | with self.assertRaisesRegex(ValueError, "chunk 0 is not a valid zstd frame"): | |
1330 |
dctx.decompress_content_dict_chain([b |
|
1359 | dctx.decompress_content_dict_chain([b"foo" * 8]) | |
1331 |
|
1360 | |||
1332 |
no_size = zstd.ZstdCompressor(write_content_size=False).compress(b |
|
1361 | no_size = zstd.ZstdCompressor(write_content_size=False).compress(b"foo" * 64) | |
1333 |
|
1362 | |||
1334 | with self.assertRaisesRegexp(ValueError, 'chunk 0 missing content size in frame'): |
|
1363 | with self.assertRaisesRegex( | |
|
1364 | ValueError, "chunk 0 missing content size in frame" | |||
|
1365 | ): | |||
1335 | dctx.decompress_content_dict_chain([no_size]) |
|
1366 | dctx.decompress_content_dict_chain([no_size]) | |
1336 |
|
1367 | |||
1337 | # Corrupt first frame. |
|
1368 | # Corrupt first frame. | |
1338 |
frame = zstd.ZstdCompressor().compress(b |
|
1369 | frame = zstd.ZstdCompressor().compress(b"foo" * 64) | |
1339 | frame = frame[0:12] + frame[15:] |
|
1370 | frame = frame[0:12] + frame[15:] | |
1340 |
with self.assertRaisesRegex |
|
1371 | with self.assertRaisesRegex( | |
1341 |
|
|
1372 | zstd.ZstdError, "chunk 0 did not decompress full frame" | |
|
1373 | ): | |||
1342 | dctx.decompress_content_dict_chain([frame]) |
|
1374 | dctx.decompress_content_dict_chain([frame]) | |
1343 |
|
1375 | |||
1344 | def test_bad_subsequent_input(self): |
|
1376 | def test_bad_subsequent_input(self): | |
1345 |
initial = zstd.ZstdCompressor().compress(b |
|
1377 | initial = zstd.ZstdCompressor().compress(b"foo" * 64) | |
1346 |
|
1378 | |||
1347 | dctx = zstd.ZstdDecompressor() |
|
1379 | dctx = zstd.ZstdDecompressor() | |
1348 |
|
1380 | |||
1349 |
with self.assertRaisesRegex |
|
1381 | with self.assertRaisesRegex(ValueError, "chunk 1 must be bytes"): | |
1350 |
dctx.decompress_content_dict_chain([initial, u |
|
1382 | dctx.decompress_content_dict_chain([initial, u"foo"]) | |
1351 |
|
1383 | |||
1352 |
with self.assertRaisesRegex |
|
1384 | with self.assertRaisesRegex(ValueError, "chunk 1 must be bytes"): | |
1353 | dctx.decompress_content_dict_chain([initial, None]) |
|
1385 | dctx.decompress_content_dict_chain([initial, None]) | |
1354 |
|
1386 | |||
1355 | with self.assertRaisesRegexp(ValueError, 'chunk 1 is too small to contain a zstd frame'): |
|
1387 | with self.assertRaisesRegex( | |
|
1388 | ValueError, "chunk 1 is too small to contain a zstd frame" | |||
|
1389 | ): | |||
1356 | dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER]) |
|
1390 | dctx.decompress_content_dict_chain([initial, zstd.FRAME_HEADER]) | |
1357 |
|
1391 | |||
1358 |
with self.assertRaisesRegex |
|
1392 | with self.assertRaisesRegex(ValueError, "chunk 1 is not a valid zstd frame"): | |
1359 |
dctx.decompress_content_dict_chain([initial, b |
|
1393 | dctx.decompress_content_dict_chain([initial, b"foo" * 8]) | |
1360 |
|
1394 | |||
1361 |
no_size = zstd.ZstdCompressor(write_content_size=False).compress(b |
|
1395 | no_size = zstd.ZstdCompressor(write_content_size=False).compress(b"foo" * 64) | |
1362 |
|
1396 | |||
1363 | with self.assertRaisesRegexp(ValueError, 'chunk 1 missing content size in frame'): |
|
1397 | with self.assertRaisesRegex( | |
|
1398 | ValueError, "chunk 1 missing content size in frame" | |||
|
1399 | ): | |||
1364 | dctx.decompress_content_dict_chain([initial, no_size]) |
|
1400 | dctx.decompress_content_dict_chain([initial, no_size]) | |
1365 |
|
1401 | |||
1366 | # Corrupt second frame. |
|
1402 | # Corrupt second frame. | |
1367 |
cctx = zstd.ZstdCompressor(dict_data=zstd.ZstdCompressionDict(b |
|
1403 | cctx = zstd.ZstdCompressor(dict_data=zstd.ZstdCompressionDict(b"foo" * 64)) | |
1368 |
frame = cctx.compress(b |
|
1404 | frame = cctx.compress(b"bar" * 64) | |
1369 | frame = frame[0:12] + frame[15:] |
|
1405 | frame = frame[0:12] + frame[15:] | |
1370 |
|
1406 | |||
1371 | with self.assertRaisesRegexp(zstd.ZstdError, 'chunk 1 did not decompress full frame'): |
|
1407 | with self.assertRaisesRegex( | |
|
1408 | zstd.ZstdError, "chunk 1 did not decompress full frame" | |||
|
1409 | ): | |||
1372 | dctx.decompress_content_dict_chain([initial, frame]) |
|
1410 | dctx.decompress_content_dict_chain([initial, frame]) | |
1373 |
|
1411 | |||
1374 | def test_simple(self): |
|
1412 | def test_simple(self): | |
1375 | original = [ |
|
1413 | original = [ | |
1376 |
b |
|
1414 | b"foo" * 64, | |
1377 |
b |
|
1415 | b"foobar" * 64, | |
1378 |
b |
|
1416 | b"baz" * 64, | |
1379 |
b |
|
1417 | b"foobaz" * 64, | |
1380 |
b |
|
1418 | b"foobarbaz" * 64, | |
1381 | ] |
|
1419 | ] | |
1382 |
|
1420 | |||
1383 | chunks = [] |
|
1421 | chunks = [] | |
1384 | chunks.append(zstd.ZstdCompressor().compress(original[0])) |
|
1422 | chunks.append(zstd.ZstdCompressor().compress(original[0])) | |
1385 | for i, chunk in enumerate(original[1:]): |
|
1423 | for i, chunk in enumerate(original[1:]): | |
1386 | d = zstd.ZstdCompressionDict(original[i]) |
|
1424 | d = zstd.ZstdCompressionDict(original[i]) | |
1387 | cctx = zstd.ZstdCompressor(dict_data=d) |
|
1425 | cctx = zstd.ZstdCompressor(dict_data=d) | |
1388 | chunks.append(cctx.compress(chunk)) |
|
1426 | chunks.append(cctx.compress(chunk)) | |
1389 |
|
1427 | |||
1390 | for i in range(1, len(original)): |
|
1428 | for i in range(1, len(original)): | |
1391 | chain = chunks[0:i] |
|
1429 | chain = chunks[0:i] | |
1392 | expected = original[i - 1] |
|
1430 | expected = original[i - 1] | |
1393 | dctx = zstd.ZstdDecompressor() |
|
1431 | dctx = zstd.ZstdDecompressor() | |
1394 | decompressed = dctx.decompress_content_dict_chain(chain) |
|
1432 | decompressed = dctx.decompress_content_dict_chain(chain) | |
1395 | self.assertEqual(decompressed, expected) |
|
1433 | self.assertEqual(decompressed, expected) | |
1396 |
|
1434 | |||
1397 |
|
1435 | |||
1398 | # TODO enable for CFFI |
|
1436 | # TODO enable for CFFI | |
1399 |
class TestDecompressor_multi_decompress_to_buffer( |
|
1437 | class TestDecompressor_multi_decompress_to_buffer(TestCase): | |
1400 | def test_invalid_inputs(self): |
|
1438 | def test_invalid_inputs(self): | |
1401 | dctx = zstd.ZstdDecompressor() |
|
1439 | dctx = zstd.ZstdDecompressor() | |
1402 |
|
1440 | |||
1403 |
if not hasattr(dctx, |
|
1441 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
1404 |
self.skipTest( |
|
1442 | self.skipTest("multi_decompress_to_buffer not available") | |
1405 |
|
1443 | |||
1406 | with self.assertRaises(TypeError): |
|
1444 | with self.assertRaises(TypeError): | |
1407 | dctx.multi_decompress_to_buffer(True) |
|
1445 | dctx.multi_decompress_to_buffer(True) | |
1408 |
|
1446 | |||
1409 | with self.assertRaises(TypeError): |
|
1447 | with self.assertRaises(TypeError): | |
1410 | dctx.multi_decompress_to_buffer((1, 2)) |
|
1448 | dctx.multi_decompress_to_buffer((1, 2)) | |
1411 |
|
1449 | |||
1412 |
with self.assertRaisesRegex |
|
1450 | with self.assertRaisesRegex(TypeError, "item 0 not a bytes like object"): | |
1413 |
dctx.multi_decompress_to_buffer([u |
|
1451 | dctx.multi_decompress_to_buffer([u"foo"]) | |
1414 |
|
1452 | |||
1415 | with self.assertRaisesRegexp(ValueError, 'could not determine decompressed size of item 0'): |
|
1453 | with self.assertRaisesRegex( | |
1416 | dctx.multi_decompress_to_buffer([b'foobarbaz']) |
|
1454 | ValueError, "could not determine decompressed size of item 0" | |
|
1455 | ): | |||
|
1456 | dctx.multi_decompress_to_buffer([b"foobarbaz"]) | |||
1417 |
|
1457 | |||
1418 | def test_list_input(self): |
|
1458 | def test_list_input(self): | |
1419 | cctx = zstd.ZstdCompressor() |
|
1459 | cctx = zstd.ZstdCompressor() | |
1420 |
|
1460 | |||
1421 |
original = [b |
|
1461 | original = [b"foo" * 4, b"bar" * 6] | |
1422 | frames = [cctx.compress(d) for d in original] |
|
1462 | frames = [cctx.compress(d) for d in original] | |
1423 |
|
1463 | |||
1424 | dctx = zstd.ZstdDecompressor() |
|
1464 | dctx = zstd.ZstdDecompressor() | |
1425 |
|
1465 | |||
1426 |
if not hasattr(dctx, |
|
1466 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
1427 |
self.skipTest( |
|
1467 | self.skipTest("multi_decompress_to_buffer not available") | |
1428 |
|
1468 | |||
1429 | result = dctx.multi_decompress_to_buffer(frames) |
|
1469 | result = dctx.multi_decompress_to_buffer(frames) | |
1430 |
|
1470 | |||
1431 | self.assertEqual(len(result), len(frames)) |
|
1471 | self.assertEqual(len(result), len(frames)) | |
1432 | self.assertEqual(result.size(), sum(map(len, original))) |
|
1472 | self.assertEqual(result.size(), sum(map(len, original))) | |
1433 |
|
1473 | |||
1434 | for i, data in enumerate(original): |
|
1474 | for i, data in enumerate(original): | |
1435 | self.assertEqual(result[i].tobytes(), data) |
|
1475 | self.assertEqual(result[i].tobytes(), data) | |
1436 |
|
1476 | |||
1437 | self.assertEqual(result[0].offset, 0) |
|
1477 | self.assertEqual(result[0].offset, 0) | |
1438 | self.assertEqual(len(result[0]), 12) |
|
1478 | self.assertEqual(len(result[0]), 12) | |
1439 | self.assertEqual(result[1].offset, 12) |
|
1479 | self.assertEqual(result[1].offset, 12) | |
1440 | self.assertEqual(len(result[1]), 18) |
|
1480 | self.assertEqual(len(result[1]), 18) | |
1441 |
|
1481 | |||
1442 | def test_list_input_frame_sizes(self): |
|
1482 | def test_list_input_frame_sizes(self): | |
1443 | cctx = zstd.ZstdCompressor() |
|
1483 | cctx = zstd.ZstdCompressor() | |
1444 |
|
1484 | |||
1445 |
original = [b |
|
1485 | original = [b"foo" * 4, b"bar" * 6, b"baz" * 8] | |
1446 | frames = [cctx.compress(d) for d in original] |
|
1486 | frames = [cctx.compress(d) for d in original] | |
1447 |
sizes = struct.pack( |
|
1487 | sizes = struct.pack("=" + "Q" * len(original), *map(len, original)) | |
1448 |
|
1488 | |||
1449 | dctx = zstd.ZstdDecompressor() |
|
1489 | dctx = zstd.ZstdDecompressor() | |
1450 |
|
1490 | |||
1451 |
if not hasattr(dctx, |
|
1491 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
1452 |
self.skipTest( |
|
1492 | self.skipTest("multi_decompress_to_buffer not available") | |
1453 |
|
1493 | |||
1454 | result = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes) |
|
1494 | result = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes) | |
1455 |
|
1495 | |||
1456 | self.assertEqual(len(result), len(frames)) |
|
1496 | self.assertEqual(len(result), len(frames)) | |
1457 | self.assertEqual(result.size(), sum(map(len, original))) |
|
1497 | self.assertEqual(result.size(), sum(map(len, original))) | |
1458 |
|
1498 | |||
1459 | for i, data in enumerate(original): |
|
1499 | for i, data in enumerate(original): | |
1460 | self.assertEqual(result[i].tobytes(), data) |
|
1500 | self.assertEqual(result[i].tobytes(), data) | |
1461 |
|
1501 | |||
1462 | def test_buffer_with_segments_input(self): |
|
1502 | def test_buffer_with_segments_input(self): | |
1463 | cctx = zstd.ZstdCompressor() |
|
1503 | cctx = zstd.ZstdCompressor() | |
1464 |
|
1504 | |||
1465 |
original = [b |
|
1505 | original = [b"foo" * 4, b"bar" * 6] | |
1466 | frames = [cctx.compress(d) for d in original] |
|
1506 | frames = [cctx.compress(d) for d in original] | |
1467 |
|
1507 | |||
1468 | dctx = zstd.ZstdDecompressor() |
|
1508 | dctx = zstd.ZstdDecompressor() | |
1469 |
|
1509 | |||
1470 |
if not hasattr(dctx, |
|
1510 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
1471 |
self.skipTest( |
|
1511 | self.skipTest("multi_decompress_to_buffer not available") | |
1472 |
|
1512 | |||
1473 | segments = struct.pack('=QQQQ', 0, len(frames[0]), len(frames[0]), len(frames[1])) |
|
1513 | segments = struct.pack( | |
1474 | b = zstd.BufferWithSegments(b''.join(frames), segments) |
|
1514 | "=QQQQ", 0, len(frames[0]), len(frames[0]), len(frames[1]) | |
|
1515 | ) | |||
|
1516 | b = zstd.BufferWithSegments(b"".join(frames), segments) | |||
1475 |
|
1517 | |||
1476 | result = dctx.multi_decompress_to_buffer(b) |
|
1518 | result = dctx.multi_decompress_to_buffer(b) | |
1477 |
|
1519 | |||
1478 | self.assertEqual(len(result), len(frames)) |
|
1520 | self.assertEqual(len(result), len(frames)) | |
1479 | self.assertEqual(result[0].offset, 0) |
|
1521 | self.assertEqual(result[0].offset, 0) | |
1480 | self.assertEqual(len(result[0]), 12) |
|
1522 | self.assertEqual(len(result[0]), 12) | |
1481 | self.assertEqual(result[1].offset, 12) |
|
1523 | self.assertEqual(result[1].offset, 12) | |
1482 | self.assertEqual(len(result[1]), 18) |
|
1524 | self.assertEqual(len(result[1]), 18) | |
1483 |
|
1525 | |||
1484 | def test_buffer_with_segments_sizes(self): |
|
1526 | def test_buffer_with_segments_sizes(self): | |
1485 | cctx = zstd.ZstdCompressor(write_content_size=False) |
|
1527 | cctx = zstd.ZstdCompressor(write_content_size=False) | |
1486 |
original = [b |
|
1528 | original = [b"foo" * 4, b"bar" * 6, b"baz" * 8] | |
1487 | frames = [cctx.compress(d) for d in original] |
|
1529 | frames = [cctx.compress(d) for d in original] | |
1488 |
sizes = struct.pack( |
|
1530 | sizes = struct.pack("=" + "Q" * len(original), *map(len, original)) | |
1489 |
|
1531 | |||
1490 | dctx = zstd.ZstdDecompressor() |
|
1532 | dctx = zstd.ZstdDecompressor() | |
1491 |
|
1533 | |||
1492 |
if not hasattr(dctx, |
|
1534 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
1493 |
self.skipTest( |
|
1535 | self.skipTest("multi_decompress_to_buffer not available") | |
1494 |
|
1536 | |||
1495 |
segments = struct.pack( |
|
1537 | segments = struct.pack( | |
1496 | len(frames[0]), len(frames[1]), |
|
1538 | "=QQQQQQ", | |
1497 | len(frames[0]) + len(frames[1]), len(frames[2])) |
|
1539 | 0, | |
1498 | b = zstd.BufferWithSegments(b''.join(frames), segments) |
|
1540 | len(frames[0]), | |
|
1541 | len(frames[0]), | |||
|
1542 | len(frames[1]), | |||
|
1543 | len(frames[0]) + len(frames[1]), | |||
|
1544 | len(frames[2]), | |||
|
1545 | ) | |||
|
1546 | b = zstd.BufferWithSegments(b"".join(frames), segments) | |||
1499 |
|
1547 | |||
1500 | result = dctx.multi_decompress_to_buffer(b, decompressed_sizes=sizes) |
|
1548 | result = dctx.multi_decompress_to_buffer(b, decompressed_sizes=sizes) | |
1501 |
|
1549 | |||
1502 | self.assertEqual(len(result), len(frames)) |
|
1550 | self.assertEqual(len(result), len(frames)) | |
1503 | self.assertEqual(result.size(), sum(map(len, original))) |
|
1551 | self.assertEqual(result.size(), sum(map(len, original))) | |
1504 |
|
1552 | |||
1505 | for i, data in enumerate(original): |
|
1553 | for i, data in enumerate(original): | |
1506 | self.assertEqual(result[i].tobytes(), data) |
|
1554 | self.assertEqual(result[i].tobytes(), data) | |
1507 |
|
1555 | |||
1508 | def test_buffer_with_segments_collection_input(self): |
|
1556 | def test_buffer_with_segments_collection_input(self): | |
1509 | cctx = zstd.ZstdCompressor() |
|
1557 | cctx = zstd.ZstdCompressor() | |
1510 |
|
1558 | |||
1511 | original = [ |
|
1559 | original = [ | |
1512 |
b |
|
1560 | b"foo0" * 2, | |
1513 |
b |
|
1561 | b"foo1" * 3, | |
1514 |
b |
|
1562 | b"foo2" * 4, | |
1515 |
b |
|
1563 | b"foo3" * 5, | |
1516 |
b |
|
1564 | b"foo4" * 6, | |
1517 | ] |
|
1565 | ] | |
1518 |
|
1566 | |||
1519 |
if not hasattr(cctx, |
|
1567 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
1520 |
self.skipTest( |
|
1568 | self.skipTest("multi_compress_to_buffer not available") | |
1521 |
|
1569 | |||
1522 | frames = cctx.multi_compress_to_buffer(original) |
|
1570 | frames = cctx.multi_compress_to_buffer(original) | |
1523 |
|
1571 | |||
1524 | # Check round trip. |
|
1572 | # Check round trip. | |
1525 | dctx = zstd.ZstdDecompressor() |
|
1573 | dctx = zstd.ZstdDecompressor() | |
1526 |
|
1574 | |||
1527 | decompressed = dctx.multi_decompress_to_buffer(frames, threads=3) |
|
1575 | decompressed = dctx.multi_decompress_to_buffer(frames, threads=3) | |
1528 |
|
1576 | |||
1529 | self.assertEqual(len(decompressed), len(original)) |
|
1577 | self.assertEqual(len(decompressed), len(original)) | |
1530 |
|
1578 | |||
1531 | for i, data in enumerate(original): |
|
1579 | for i, data in enumerate(original): | |
1532 | self.assertEqual(data, decompressed[i].tobytes()) |
|
1580 | self.assertEqual(data, decompressed[i].tobytes()) | |
1533 |
|
1581 | |||
1534 | # And a manual mode. |
|
1582 | # And a manual mode. | |
1535 |
b = b |
|
1583 | b = b"".join([frames[0].tobytes(), frames[1].tobytes()]) | |
1536 |
b1 = zstd.BufferWithSegments( |
|
1584 | b1 = zstd.BufferWithSegments( | |
1537 | 0, len(frames[0]), |
|
1585 | b, struct.pack("=QQQQ", 0, len(frames[0]), len(frames[0]), len(frames[1])) | |
1538 | len(frames[0]), len(frames[1]))) |
|
1586 | ) | |
1539 |
|
1587 | |||
1540 |
b = b |
|
1588 | b = b"".join([frames[2].tobytes(), frames[3].tobytes(), frames[4].tobytes()]) | |
1541 |
b2 = zstd.BufferWithSegments( |
|
1589 | b2 = zstd.BufferWithSegments( | |
1542 | 0, len(frames[2]), |
|
1590 | b, | |
1543 | len(frames[2]), len(frames[3]), |
|
1591 | struct.pack( | |
1544 | len(frames[2]) + len(frames[3]), len(frames[4]))) |
|
1592 | "=QQQQQQ", | |
|
1593 | 0, | |||
|
1594 | len(frames[2]), | |||
|
1595 | len(frames[2]), | |||
|
1596 | len(frames[3]), | |||
|
1597 | len(frames[2]) + len(frames[3]), | |||
|
1598 | len(frames[4]), | |||
|
1599 | ), | |||
|
1600 | ) | |||
1545 |
|
1601 | |||
1546 | c = zstd.BufferWithSegmentsCollection(b1, b2) |
|
1602 | c = zstd.BufferWithSegmentsCollection(b1, b2) | |
1547 |
|
1603 | |||
1548 | dctx = zstd.ZstdDecompressor() |
|
1604 | dctx = zstd.ZstdDecompressor() | |
1549 | decompressed = dctx.multi_decompress_to_buffer(c) |
|
1605 | decompressed = dctx.multi_decompress_to_buffer(c) | |
1550 |
|
1606 | |||
1551 | self.assertEqual(len(decompressed), 5) |
|
1607 | self.assertEqual(len(decompressed), 5) | |
1552 | for i in range(5): |
|
1608 | for i in range(5): | |
1553 | self.assertEqual(decompressed[i].tobytes(), original[i]) |
|
1609 | self.assertEqual(decompressed[i].tobytes(), original[i]) | |
1554 |
|
1610 | |||
1555 | def test_dict(self): |
|
1611 | def test_dict(self): | |
1556 | d = zstd.train_dictionary(16384, generate_samples(), k=64, d=16) |
|
1612 | d = zstd.train_dictionary(16384, generate_samples(), k=64, d=16) | |
1557 |
|
1613 | |||
1558 | cctx = zstd.ZstdCompressor(dict_data=d, level=1) |
|
1614 | cctx = zstd.ZstdCompressor(dict_data=d, level=1) | |
1559 | frames = [cctx.compress(s) for s in generate_samples()] |
|
1615 | frames = [cctx.compress(s) for s in generate_samples()] | |
1560 |
|
1616 | |||
1561 | dctx = zstd.ZstdDecompressor(dict_data=d) |
|
1617 | dctx = zstd.ZstdDecompressor(dict_data=d) | |
1562 |
|
1618 | |||
1563 |
if not hasattr(dctx, |
|
1619 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
1564 |
self.skipTest( |
|
1620 | self.skipTest("multi_decompress_to_buffer not available") | |
1565 |
|
1621 | |||
1566 | result = dctx.multi_decompress_to_buffer(frames) |
|
1622 | result = dctx.multi_decompress_to_buffer(frames) | |
1567 |
|
1623 | |||
1568 | self.assertEqual([o.tobytes() for o in result], generate_samples()) |
|
1624 | self.assertEqual([o.tobytes() for o in result], generate_samples()) | |
1569 |
|
1625 | |||
1570 | def test_multiple_threads(self): |
|
1626 | def test_multiple_threads(self): | |
1571 | cctx = zstd.ZstdCompressor() |
|
1627 | cctx = zstd.ZstdCompressor() | |
1572 |
|
1628 | |||
1573 | frames = [] |
|
1629 | frames = [] | |
1574 |
frames.extend(cctx.compress(b |
|
1630 | frames.extend(cctx.compress(b"x" * 64) for i in range(256)) | |
1575 |
frames.extend(cctx.compress(b |
|
1631 | frames.extend(cctx.compress(b"y" * 64) for i in range(256)) | |
1576 |
|
1632 | |||
1577 | dctx = zstd.ZstdDecompressor() |
|
1633 | dctx = zstd.ZstdDecompressor() | |
1578 |
|
1634 | |||
1579 |
if not hasattr(dctx, |
|
1635 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
1580 |
self.skipTest( |
|
1636 | self.skipTest("multi_decompress_to_buffer not available") | |
1581 |
|
1637 | |||
1582 | result = dctx.multi_decompress_to_buffer(frames, threads=-1) |
|
1638 | result = dctx.multi_decompress_to_buffer(frames, threads=-1) | |
1583 |
|
1639 | |||
1584 | self.assertEqual(len(result), len(frames)) |
|
1640 | self.assertEqual(len(result), len(frames)) | |
1585 | self.assertEqual(result.size(), 2 * 64 * 256) |
|
1641 | self.assertEqual(result.size(), 2 * 64 * 256) | |
1586 |
self.assertEqual(result[0].tobytes(), b |
|
1642 | self.assertEqual(result[0].tobytes(), b"x" * 64) | |
1587 |
self.assertEqual(result[256].tobytes(), b |
|
1643 | self.assertEqual(result[256].tobytes(), b"y" * 64) | |
1588 |
|
1644 | |||
1589 | def test_item_failure(self): |
|
1645 | def test_item_failure(self): | |
1590 | cctx = zstd.ZstdCompressor() |
|
1646 | cctx = zstd.ZstdCompressor() | |
1591 |
frames = [cctx.compress(b |
|
1647 | frames = [cctx.compress(b"x" * 128), cctx.compress(b"y" * 128)] | |
1592 |
|
1648 | |||
1593 |
frames[1] = frames[1][0:15] + b |
|
1649 | frames[1] = frames[1][0:15] + b"extra" + frames[1][15:] | |
1594 |
|
1650 | |||
1595 | dctx = zstd.ZstdDecompressor() |
|
1651 | dctx = zstd.ZstdDecompressor() | |
1596 |
|
1652 | |||
1597 |
if not hasattr(dctx, |
|
1653 | if not hasattr(dctx, "multi_decompress_to_buffer"): | |
1598 |
self.skipTest( |
|
1654 | self.skipTest("multi_decompress_to_buffer not available") | |
1599 |
|
1655 | |||
1600 |
with self.assertRaisesRegex |
|
1656 | with self.assertRaisesRegex( | |
1601 | 'error decompressing item 1: (' |
|
1657 | zstd.ZstdError, | |
1602 | 'Corrupted block|' |
|
1658 | "error decompressing item 1: (" | |
1603 | 'Destination buffer is too small)'): |
|
1659 | "Corrupted block|" | |
|
1660 | "Destination buffer is too small)", | |||
|
1661 | ): | |||
1604 | dctx.multi_decompress_to_buffer(frames) |
|
1662 | dctx.multi_decompress_to_buffer(frames) | |
1605 |
|
1663 | |||
1606 |
with self.assertRaisesRegex |
|
1664 | with self.assertRaisesRegex( | |
1607 | 'error decompressing item 1: (' |
|
1665 | zstd.ZstdError, | |
1608 | 'Corrupted block|' |
|
1666 | "error decompressing item 1: (" | |
1609 | 'Destination buffer is too small)'): |
|
1667 | "Corrupted block|" | |
|
1668 | "Destination buffer is too small)", | |||
|
1669 | ): | |||
1610 | dctx.multi_decompress_to_buffer(frames, threads=2) |
|
1670 | dctx.multi_decompress_to_buffer(frames, threads=2) | |
1611 |
|
@@ -1,485 +1,576 b'' | |||||
1 | import io |
|
1 | import io | |
2 | import os |
|
2 | import os | |
3 | import unittest |
|
3 | import unittest | |
4 |
|
4 | |||
5 | try: |
|
5 | try: | |
6 | import hypothesis |
|
6 | import hypothesis | |
7 | import hypothesis.strategies as strategies |
|
7 | import hypothesis.strategies as strategies | |
8 | except ImportError: |
|
8 | except ImportError: | |
9 |
raise unittest.SkipTest( |
|
9 | raise unittest.SkipTest("hypothesis not available") | |
10 |
|
10 | |||
11 | import zstandard as zstd |
|
11 | import zstandard as zstd | |
12 |
|
12 | |||
13 |
from . |
|
13 | from .common import ( | |
14 | make_cffi, |
|
14 | make_cffi, | |
15 | NonClosingBytesIO, |
|
15 | NonClosingBytesIO, | |
16 | random_input_data, |
|
16 | random_input_data, | |
|
17 | TestCase, | |||
17 | ) |
|
18 | ) | |
18 |
|
19 | |||
19 |
|
20 | |||
20 |
@unittest.skipUnless( |
|
21 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
21 | @make_cffi |
|
22 | @make_cffi | |
22 |
class TestDecompressor_stream_reader_fuzzing( |
|
23 | class TestDecompressor_stream_reader_fuzzing(TestCase): | |
23 | @hypothesis.settings( |
|
24 | @hypothesis.settings( | |
24 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) |
|
25 | suppress_health_check=[ | |
25 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
26 | hypothesis.HealthCheck.large_base_example, | |
26 | level=strategies.integers(min_value=1, max_value=5), |
|
27 | hypothesis.HealthCheck.too_slow, | |
27 | streaming=strategies.booleans(), |
|
28 | ] | |
28 | source_read_size=strategies.integers(1, 1048576), |
|
29 | ) | |
29 | read_sizes=strategies.data()) |
|
30 | @hypothesis.given( | |
30 | def test_stream_source_read_variance(self, original, level, streaming, |
|
31 | original=strategies.sampled_from(random_input_data()), | |
31 | source_read_size, read_sizes): |
|
32 | level=strategies.integers(min_value=1, max_value=5), | |
|
33 | streaming=strategies.booleans(), | |||
|
34 | source_read_size=strategies.integers(1, 1048576), | |||
|
35 | read_sizes=strategies.data(), | |||
|
36 | ) | |||
|
37 | def test_stream_source_read_variance( | |||
|
38 | self, original, level, streaming, source_read_size, read_sizes | |||
|
39 | ): | |||
32 | cctx = zstd.ZstdCompressor(level=level) |
|
40 | cctx = zstd.ZstdCompressor(level=level) | |
33 |
|
41 | |||
34 | if streaming: |
|
42 | if streaming: | |
35 | source = io.BytesIO() |
|
43 | source = io.BytesIO() | |
36 | writer = cctx.stream_writer(source) |
|
44 | writer = cctx.stream_writer(source) | |
37 | writer.write(original) |
|
45 | writer.write(original) | |
38 | writer.flush(zstd.FLUSH_FRAME) |
|
46 | writer.flush(zstd.FLUSH_FRAME) | |
39 | source.seek(0) |
|
47 | source.seek(0) | |
40 | else: |
|
48 | else: | |
41 | frame = cctx.compress(original) |
|
49 | frame = cctx.compress(original) | |
42 | source = io.BytesIO(frame) |
|
50 | source = io.BytesIO(frame) | |
43 |
|
51 | |||
44 | dctx = zstd.ZstdDecompressor() |
|
52 | dctx = zstd.ZstdDecompressor() | |
45 |
|
53 | |||
46 | chunks = [] |
|
54 | chunks = [] | |
47 | with dctx.stream_reader(source, read_size=source_read_size) as reader: |
|
55 | with dctx.stream_reader(source, read_size=source_read_size) as reader: | |
48 | while True: |
|
56 | while True: | |
49 | read_size = read_sizes.draw(strategies.integers(-1, 131072)) |
|
57 | read_size = read_sizes.draw(strategies.integers(-1, 131072)) | |
50 | chunk = reader.read(read_size) |
|
58 | chunk = reader.read(read_size) | |
51 | if not chunk and read_size: |
|
59 | if not chunk and read_size: | |
52 | break |
|
60 | break | |
53 |
|
61 | |||
54 | chunks.append(chunk) |
|
62 | chunks.append(chunk) | |
55 |
|
63 | |||
56 |
self.assertEqual(b |
|
64 | self.assertEqual(b"".join(chunks), original) | |
57 |
|
65 | |||
58 | # Similar to above except we have a constant read() size. |
|
66 | # Similar to above except we have a constant read() size. | |
59 | @hypothesis.settings( |
|
67 | @hypothesis.settings( | |
60 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
68 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
61 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
69 | ) | |
62 | level=strategies.integers(min_value=1, max_value=5), |
|
70 | @hypothesis.given( | |
63 | streaming=strategies.booleans(), |
|
71 | original=strategies.sampled_from(random_input_data()), | |
64 | source_read_size=strategies.integers(1, 1048576), |
|
72 | level=strategies.integers(min_value=1, max_value=5), | |
65 | read_size=strategies.integers(-1, 131072)) |
|
73 | streaming=strategies.booleans(), | |
66 | def test_stream_source_read_size(self, original, level, streaming, |
|
74 | source_read_size=strategies.integers(1, 1048576), | |
67 | source_read_size, read_size): |
|
75 | read_size=strategies.integers(-1, 131072), | |
|
76 | ) | |||
|
77 | def test_stream_source_read_size( | |||
|
78 | self, original, level, streaming, source_read_size, read_size | |||
|
79 | ): | |||
68 | if read_size == 0: |
|
80 | if read_size == 0: | |
69 | read_size = 1 |
|
81 | read_size = 1 | |
70 |
|
82 | |||
71 | cctx = zstd.ZstdCompressor(level=level) |
|
83 | cctx = zstd.ZstdCompressor(level=level) | |
72 |
|
84 | |||
73 | if streaming: |
|
85 | if streaming: | |
74 | source = io.BytesIO() |
|
86 | source = io.BytesIO() | |
75 | writer = cctx.stream_writer(source) |
|
87 | writer = cctx.stream_writer(source) | |
76 | writer.write(original) |
|
88 | writer.write(original) | |
77 | writer.flush(zstd.FLUSH_FRAME) |
|
89 | writer.flush(zstd.FLUSH_FRAME) | |
78 | source.seek(0) |
|
90 | source.seek(0) | |
79 | else: |
|
91 | else: | |
80 | frame = cctx.compress(original) |
|
92 | frame = cctx.compress(original) | |
81 | source = io.BytesIO(frame) |
|
93 | source = io.BytesIO(frame) | |
82 |
|
94 | |||
83 | dctx = zstd.ZstdDecompressor() |
|
95 | dctx = zstd.ZstdDecompressor() | |
84 |
|
96 | |||
85 | chunks = [] |
|
97 | chunks = [] | |
86 | reader = dctx.stream_reader(source, read_size=source_read_size) |
|
98 | reader = dctx.stream_reader(source, read_size=source_read_size) | |
87 | while True: |
|
99 | while True: | |
88 | chunk = reader.read(read_size) |
|
100 | chunk = reader.read(read_size) | |
89 | if not chunk and read_size: |
|
101 | if not chunk and read_size: | |
90 | break |
|
102 | break | |
91 |
|
103 | |||
92 | chunks.append(chunk) |
|
104 | chunks.append(chunk) | |
93 |
|
105 | |||
94 |
self.assertEqual(b |
|
106 | self.assertEqual(b"".join(chunks), original) | |
95 |
|
107 | |||
96 | @hypothesis.settings( |
|
108 | @hypothesis.settings( | |
97 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) |
|
109 | suppress_health_check=[ | |
98 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
110 | hypothesis.HealthCheck.large_base_example, | |
99 | level=strategies.integers(min_value=1, max_value=5), |
|
111 | hypothesis.HealthCheck.too_slow, | |
100 | streaming=strategies.booleans(), |
|
112 | ] | |
101 | source_read_size=strategies.integers(1, 1048576), |
|
113 | ) | |
102 | read_sizes=strategies.data()) |
|
114 | @hypothesis.given( | |
103 | def test_buffer_source_read_variance(self, original, level, streaming, |
|
115 | original=strategies.sampled_from(random_input_data()), | |
104 | source_read_size, read_sizes): |
|
116 | level=strategies.integers(min_value=1, max_value=5), | |
|
117 | streaming=strategies.booleans(), | |||
|
118 | source_read_size=strategies.integers(1, 1048576), | |||
|
119 | read_sizes=strategies.data(), | |||
|
120 | ) | |||
|
121 | def test_buffer_source_read_variance( | |||
|
122 | self, original, level, streaming, source_read_size, read_sizes | |||
|
123 | ): | |||
105 | cctx = zstd.ZstdCompressor(level=level) |
|
124 | cctx = zstd.ZstdCompressor(level=level) | |
106 |
|
125 | |||
107 | if streaming: |
|
126 | if streaming: | |
108 | source = io.BytesIO() |
|
127 | source = io.BytesIO() | |
109 | writer = cctx.stream_writer(source) |
|
128 | writer = cctx.stream_writer(source) | |
110 | writer.write(original) |
|
129 | writer.write(original) | |
111 | writer.flush(zstd.FLUSH_FRAME) |
|
130 | writer.flush(zstd.FLUSH_FRAME) | |
112 | frame = source.getvalue() |
|
131 | frame = source.getvalue() | |
113 | else: |
|
132 | else: | |
114 | frame = cctx.compress(original) |
|
133 | frame = cctx.compress(original) | |
115 |
|
134 | |||
116 | dctx = zstd.ZstdDecompressor() |
|
135 | dctx = zstd.ZstdDecompressor() | |
117 | chunks = [] |
|
136 | chunks = [] | |
118 |
|
137 | |||
119 | with dctx.stream_reader(frame, read_size=source_read_size) as reader: |
|
138 | with dctx.stream_reader(frame, read_size=source_read_size) as reader: | |
120 | while True: |
|
139 | while True: | |
121 | read_size = read_sizes.draw(strategies.integers(-1, 131072)) |
|
140 | read_size = read_sizes.draw(strategies.integers(-1, 131072)) | |
122 | chunk = reader.read(read_size) |
|
141 | chunk = reader.read(read_size) | |
123 | if not chunk and read_size: |
|
142 | if not chunk and read_size: | |
124 | break |
|
143 | break | |
125 |
|
144 | |||
126 | chunks.append(chunk) |
|
145 | chunks.append(chunk) | |
127 |
|
146 | |||
128 |
self.assertEqual(b |
|
147 | self.assertEqual(b"".join(chunks), original) | |
129 |
|
148 | |||
130 | # Similar to above except we have a constant read() size. |
|
149 | # Similar to above except we have a constant read() size. | |
131 | @hypothesis.settings( |
|
150 | @hypothesis.settings( | |
132 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
151 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
133 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
152 | ) | |
134 | level=strategies.integers(min_value=1, max_value=5), |
|
153 | @hypothesis.given( | |
135 | streaming=strategies.booleans(), |
|
154 | original=strategies.sampled_from(random_input_data()), | |
136 | source_read_size=strategies.integers(1, 1048576), |
|
155 | level=strategies.integers(min_value=1, max_value=5), | |
137 | read_size=strategies.integers(-1, 131072)) |
|
156 | streaming=strategies.booleans(), | |
138 | def test_buffer_source_constant_read_size(self, original, level, streaming, |
|
157 | source_read_size=strategies.integers(1, 1048576), | |
139 | source_read_size, read_size): |
|
158 | read_size=strategies.integers(-1, 131072), | |
|
159 | ) | |||
|
160 | def test_buffer_source_constant_read_size( | |||
|
161 | self, original, level, streaming, source_read_size, read_size | |||
|
162 | ): | |||
140 | if read_size == 0: |
|
163 | if read_size == 0: | |
141 | read_size = -1 |
|
164 | read_size = -1 | |
142 |
|
165 | |||
143 | cctx = zstd.ZstdCompressor(level=level) |
|
166 | cctx = zstd.ZstdCompressor(level=level) | |
144 |
|
167 | |||
145 | if streaming: |
|
168 | if streaming: | |
146 | source = io.BytesIO() |
|
169 | source = io.BytesIO() | |
147 | writer = cctx.stream_writer(source) |
|
170 | writer = cctx.stream_writer(source) | |
148 | writer.write(original) |
|
171 | writer.write(original) | |
149 | writer.flush(zstd.FLUSH_FRAME) |
|
172 | writer.flush(zstd.FLUSH_FRAME) | |
150 | frame = source.getvalue() |
|
173 | frame = source.getvalue() | |
151 | else: |
|
174 | else: | |
152 | frame = cctx.compress(original) |
|
175 | frame = cctx.compress(original) | |
153 |
|
176 | |||
154 | dctx = zstd.ZstdDecompressor() |
|
177 | dctx = zstd.ZstdDecompressor() | |
155 | chunks = [] |
|
178 | chunks = [] | |
156 |
|
179 | |||
157 | reader = dctx.stream_reader(frame, read_size=source_read_size) |
|
180 | reader = dctx.stream_reader(frame, read_size=source_read_size) | |
158 | while True: |
|
181 | while True: | |
159 | chunk = reader.read(read_size) |
|
182 | chunk = reader.read(read_size) | |
160 | if not chunk and read_size: |
|
183 | if not chunk and read_size: | |
161 | break |
|
184 | break | |
162 |
|
185 | |||
163 | chunks.append(chunk) |
|
186 | chunks.append(chunk) | |
164 |
|
187 | |||
165 |
self.assertEqual(b |
|
188 | self.assertEqual(b"".join(chunks), original) | |
166 |
|
189 | |||
167 | @hypothesis.settings( |
|
190 | @hypothesis.settings( | |
168 |
suppress_health_check=[hypothesis.HealthCheck.large_base_example] |
|
191 | suppress_health_check=[hypothesis.HealthCheck.large_base_example] | |
169 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
192 | ) | |
170 | level=strategies.integers(min_value=1, max_value=5), |
|
193 | @hypothesis.given( | |
171 | streaming=strategies.booleans(), |
|
194 | original=strategies.sampled_from(random_input_data()), | |
172 | source_read_size=strategies.integers(1, 1048576)) |
|
195 | level=strategies.integers(min_value=1, max_value=5), | |
173 | def test_stream_source_readall(self, original, level, streaming, |
|
196 | streaming=strategies.booleans(), | |
174 | source_read_size): |
|
197 | source_read_size=strategies.integers(1, 1048576), | |
|
198 | ) | |||
|
199 | def test_stream_source_readall(self, original, level, streaming, source_read_size): | |||
175 | cctx = zstd.ZstdCompressor(level=level) |
|
200 | cctx = zstd.ZstdCompressor(level=level) | |
176 |
|
201 | |||
177 | if streaming: |
|
202 | if streaming: | |
178 | source = io.BytesIO() |
|
203 | source = io.BytesIO() | |
179 | writer = cctx.stream_writer(source) |
|
204 | writer = cctx.stream_writer(source) | |
180 | writer.write(original) |
|
205 | writer.write(original) | |
181 | writer.flush(zstd.FLUSH_FRAME) |
|
206 | writer.flush(zstd.FLUSH_FRAME) | |
182 | source.seek(0) |
|
207 | source.seek(0) | |
183 | else: |
|
208 | else: | |
184 | frame = cctx.compress(original) |
|
209 | frame = cctx.compress(original) | |
185 | source = io.BytesIO(frame) |
|
210 | source = io.BytesIO(frame) | |
186 |
|
211 | |||
187 | dctx = zstd.ZstdDecompressor() |
|
212 | dctx = zstd.ZstdDecompressor() | |
188 |
|
213 | |||
189 | data = dctx.stream_reader(source, read_size=source_read_size).readall() |
|
214 | data = dctx.stream_reader(source, read_size=source_read_size).readall() | |
190 | self.assertEqual(data, original) |
|
215 | self.assertEqual(data, original) | |
191 |
|
216 | |||
192 | @hypothesis.settings( |
|
217 | @hypothesis.settings( | |
193 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) |
|
218 | suppress_health_check=[ | |
194 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
219 | hypothesis.HealthCheck.large_base_example, | |
195 | level=strategies.integers(min_value=1, max_value=5), |
|
220 | hypothesis.HealthCheck.too_slow, | |
196 | streaming=strategies.booleans(), |
|
221 | ] | |
197 | source_read_size=strategies.integers(1, 1048576), |
|
222 | ) | |
198 | read_sizes=strategies.data()) |
|
223 | @hypothesis.given( | |
199 | def test_stream_source_read1_variance(self, original, level, streaming, |
|
224 | original=strategies.sampled_from(random_input_data()), | |
200 | source_read_size, read_sizes): |
|
225 | level=strategies.integers(min_value=1, max_value=5), | |
|
226 | streaming=strategies.booleans(), | |||
|
227 | source_read_size=strategies.integers(1, 1048576), | |||
|
228 | read_sizes=strategies.data(), | |||
|
229 | ) | |||
|
230 | def test_stream_source_read1_variance( | |||
|
231 | self, original, level, streaming, source_read_size, read_sizes | |||
|
232 | ): | |||
201 | cctx = zstd.ZstdCompressor(level=level) |
|
233 | cctx = zstd.ZstdCompressor(level=level) | |
202 |
|
234 | |||
203 | if streaming: |
|
235 | if streaming: | |
204 | source = io.BytesIO() |
|
236 | source = io.BytesIO() | |
205 | writer = cctx.stream_writer(source) |
|
237 | writer = cctx.stream_writer(source) | |
206 | writer.write(original) |
|
238 | writer.write(original) | |
207 | writer.flush(zstd.FLUSH_FRAME) |
|
239 | writer.flush(zstd.FLUSH_FRAME) | |
208 | source.seek(0) |
|
240 | source.seek(0) | |
209 | else: |
|
241 | else: | |
210 | frame = cctx.compress(original) |
|
242 | frame = cctx.compress(original) | |
211 | source = io.BytesIO(frame) |
|
243 | source = io.BytesIO(frame) | |
212 |
|
244 | |||
213 | dctx = zstd.ZstdDecompressor() |
|
245 | dctx = zstd.ZstdDecompressor() | |
214 |
|
246 | |||
215 | chunks = [] |
|
247 | chunks = [] | |
216 | with dctx.stream_reader(source, read_size=source_read_size) as reader: |
|
248 | with dctx.stream_reader(source, read_size=source_read_size) as reader: | |
217 | while True: |
|
249 | while True: | |
218 | read_size = read_sizes.draw(strategies.integers(-1, 131072)) |
|
250 | read_size = read_sizes.draw(strategies.integers(-1, 131072)) | |
219 | chunk = reader.read1(read_size) |
|
251 | chunk = reader.read1(read_size) | |
220 | if not chunk and read_size: |
|
252 | if not chunk and read_size: | |
221 | break |
|
253 | break | |
222 |
|
254 | |||
223 | chunks.append(chunk) |
|
255 | chunks.append(chunk) | |
224 |
|
256 | |||
225 |
self.assertEqual(b |
|
257 | self.assertEqual(b"".join(chunks), original) | |
226 |
|
258 | |||
227 | @hypothesis.settings( |
|
259 | @hypothesis.settings( | |
228 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) |
|
260 | suppress_health_check=[ | |
229 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
261 | hypothesis.HealthCheck.large_base_example, | |
230 | level=strategies.integers(min_value=1, max_value=5), |
|
262 | hypothesis.HealthCheck.too_slow, | |
231 | streaming=strategies.booleans(), |
|
263 | ] | |
232 | source_read_size=strategies.integers(1, 1048576), |
|
264 | ) | |
233 | read_sizes=strategies.data()) |
|
265 | @hypothesis.given( | |
234 | def test_stream_source_readinto1_variance(self, original, level, streaming, |
|
266 | original=strategies.sampled_from(random_input_data()), | |
235 | source_read_size, read_sizes): |
|
267 | level=strategies.integers(min_value=1, max_value=5), | |
|
268 | streaming=strategies.booleans(), | |||
|
269 | source_read_size=strategies.integers(1, 1048576), | |||
|
270 | read_sizes=strategies.data(), | |||
|
271 | ) | |||
|
272 | def test_stream_source_readinto1_variance( | |||
|
273 | self, original, level, streaming, source_read_size, read_sizes | |||
|
274 | ): | |||
236 | cctx = zstd.ZstdCompressor(level=level) |
|
275 | cctx = zstd.ZstdCompressor(level=level) | |
237 |
|
276 | |||
238 | if streaming: |
|
277 | if streaming: | |
239 | source = io.BytesIO() |
|
278 | source = io.BytesIO() | |
240 | writer = cctx.stream_writer(source) |
|
279 | writer = cctx.stream_writer(source) | |
241 | writer.write(original) |
|
280 | writer.write(original) | |
242 | writer.flush(zstd.FLUSH_FRAME) |
|
281 | writer.flush(zstd.FLUSH_FRAME) | |
243 | source.seek(0) |
|
282 | source.seek(0) | |
244 | else: |
|
283 | else: | |
245 | frame = cctx.compress(original) |
|
284 | frame = cctx.compress(original) | |
246 | source = io.BytesIO(frame) |
|
285 | source = io.BytesIO(frame) | |
247 |
|
286 | |||
248 | dctx = zstd.ZstdDecompressor() |
|
287 | dctx = zstd.ZstdDecompressor() | |
249 |
|
288 | |||
250 | chunks = [] |
|
289 | chunks = [] | |
251 | with dctx.stream_reader(source, read_size=source_read_size) as reader: |
|
290 | with dctx.stream_reader(source, read_size=source_read_size) as reader: | |
252 | while True: |
|
291 | while True: | |
253 | read_size = read_sizes.draw(strategies.integers(1, 131072)) |
|
292 | read_size = read_sizes.draw(strategies.integers(1, 131072)) | |
254 | b = bytearray(read_size) |
|
293 | b = bytearray(read_size) | |
255 | count = reader.readinto1(b) |
|
294 | count = reader.readinto1(b) | |
256 |
|
295 | |||
257 | if not count: |
|
296 | if not count: | |
258 | break |
|
297 | break | |
259 |
|
298 | |||
260 | chunks.append(bytes(b[0:count])) |
|
299 | chunks.append(bytes(b[0:count])) | |
261 |
|
300 | |||
262 |
self.assertEqual(b |
|
301 | self.assertEqual(b"".join(chunks), original) | |
263 |
|
302 | |||
264 | @hypothesis.settings( |
|
303 | @hypothesis.settings( | |
265 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) |
|
304 | suppress_health_check=[ | |
|
305 | hypothesis.HealthCheck.large_base_example, | |||
|
306 | hypothesis.HealthCheck.too_slow, | |||
|
307 | ] | |||
|
308 | ) | |||
266 | @hypothesis.given( |
|
309 | @hypothesis.given( | |
267 | original=strategies.sampled_from(random_input_data()), |
|
310 | original=strategies.sampled_from(random_input_data()), | |
268 | level=strategies.integers(min_value=1, max_value=5), |
|
311 | level=strategies.integers(min_value=1, max_value=5), | |
269 | source_read_size=strategies.integers(1, 1048576), |
|
312 | source_read_size=strategies.integers(1, 1048576), | |
270 | seek_amounts=strategies.data(), |
|
313 | seek_amounts=strategies.data(), | |
271 |
read_sizes=strategies.data() |
|
314 | read_sizes=strategies.data(), | |
272 | def test_relative_seeks(self, original, level, source_read_size, seek_amounts, |
|
315 | ) | |
273 | read_sizes): |
|
316 | def test_relative_seeks( | |
|
317 | self, original, level, source_read_size, seek_amounts, read_sizes | |||
|
318 | ): | |||
274 | cctx = zstd.ZstdCompressor(level=level) |
|
319 | cctx = zstd.ZstdCompressor(level=level) | |
275 | frame = cctx.compress(original) |
|
320 | frame = cctx.compress(original) | |
276 |
|
321 | |||
277 | dctx = zstd.ZstdDecompressor() |
|
322 | dctx = zstd.ZstdDecompressor() | |
278 |
|
323 | |||
279 | with dctx.stream_reader(frame, read_size=source_read_size) as reader: |
|
324 | with dctx.stream_reader(frame, read_size=source_read_size) as reader: | |
280 | while True: |
|
325 | while True: | |
281 | amount = seek_amounts.draw(strategies.integers(0, 16384)) |
|
326 | amount = seek_amounts.draw(strategies.integers(0, 16384)) | |
282 | reader.seek(amount, os.SEEK_CUR) |
|
327 | reader.seek(amount, os.SEEK_CUR) | |
283 |
|
328 | |||
284 | offset = reader.tell() |
|
329 | offset = reader.tell() | |
285 | read_amount = read_sizes.draw(strategies.integers(1, 16384)) |
|
330 | read_amount = read_sizes.draw(strategies.integers(1, 16384)) | |
286 | chunk = reader.read(read_amount) |
|
331 | chunk = reader.read(read_amount) | |
287 |
|
332 | |||
288 | if not chunk: |
|
333 | if not chunk: | |
289 | break |
|
334 | break | |
290 |
|
335 | |||
291 | self.assertEqual(original[offset:offset + len(chunk)], chunk) |
|
336 | self.assertEqual(original[offset : offset + len(chunk)], chunk) | |
292 |
|
337 | |||
293 | @hypothesis.settings( |
|
338 | @hypothesis.settings( | |
294 | suppress_health_check=[hypothesis.HealthCheck.large_base_example]) |
|
339 | suppress_health_check=[ | |
|
340 | hypothesis.HealthCheck.large_base_example, | |||
|
341 | hypothesis.HealthCheck.too_slow, | |||
|
342 | ] | |||
|
343 | ) | |||
295 | @hypothesis.given( |
|
344 | @hypothesis.given( | |
296 | originals=strategies.data(), |
|
345 | originals=strategies.data(), | |
297 | frame_count=strategies.integers(min_value=2, max_value=10), |
|
346 | frame_count=strategies.integers(min_value=2, max_value=10), | |
298 | level=strategies.integers(min_value=1, max_value=5), |
|
347 | level=strategies.integers(min_value=1, max_value=5), | |
299 | source_read_size=strategies.integers(1, 1048576), |
|
348 | source_read_size=strategies.integers(1, 1048576), | |
300 |
read_sizes=strategies.data() |
|
349 | read_sizes=strategies.data(), | |
301 | def test_multiple_frames(self, originals, frame_count, level, |
|
350 | ) | |
302 | source_read_size, read_sizes): |
|
351 | def test_multiple_frames( | |
|
352 | self, originals, frame_count, level, source_read_size, read_sizes | |||
|
353 | ): | |||
303 |
|
354 | |||
304 | cctx = zstd.ZstdCompressor(level=level) |
|
355 | cctx = zstd.ZstdCompressor(level=level) | |
305 | source = io.BytesIO() |
|
356 | source = io.BytesIO() | |
306 | buffer = io.BytesIO() |
|
357 | buffer = io.BytesIO() | |
307 | writer = cctx.stream_writer(buffer) |
|
358 | writer = cctx.stream_writer(buffer) | |
308 |
|
359 | |||
309 | for i in range(frame_count): |
|
360 | for i in range(frame_count): | |
310 | data = originals.draw(strategies.sampled_from(random_input_data())) |
|
361 | data = originals.draw(strategies.sampled_from(random_input_data())) | |
311 | source.write(data) |
|
362 | source.write(data) | |
312 | writer.write(data) |
|
363 | writer.write(data) | |
313 | writer.flush(zstd.FLUSH_FRAME) |
|
364 | writer.flush(zstd.FLUSH_FRAME) | |
314 |
|
365 | |||
315 | dctx = zstd.ZstdDecompressor() |
|
366 | dctx = zstd.ZstdDecompressor() | |
316 | buffer.seek(0) |
|
367 | buffer.seek(0) | |
317 |
reader = dctx.stream_reader( |
|
368 | reader = dctx.stream_reader( | |
318 | read_across_frames=True) |
|
369 | buffer, read_size=source_read_size, read_across_frames=True | |
|
370 | ) | |||
319 |
|
371 | |||
320 | chunks = [] |
|
372 | chunks = [] | |
321 |
|
373 | |||
322 | while True: |
|
374 | while True: | |
323 | read_amount = read_sizes.draw(strategies.integers(-1, 16384)) |
|
375 | read_amount = read_sizes.draw(strategies.integers(-1, 16384)) | |
324 | chunk = reader.read(read_amount) |
|
376 | chunk = reader.read(read_amount) | |
325 |
|
377 | |||
326 | if not chunk and read_amount: |
|
378 | if not chunk and read_amount: | |
327 | break |
|
379 | break | |
328 |
|
380 | |||
329 | chunks.append(chunk) |
|
381 | chunks.append(chunk) | |
330 |
|
382 | |||
331 |
self.assertEqual(source.getvalue(), b |
|
383 | self.assertEqual(source.getvalue(), b"".join(chunks)) | |
332 |
|
384 | |||
333 |
|
385 | |||
334 |
@unittest.skipUnless( |
|
386 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
335 | @make_cffi |
|
387 | @make_cffi | |
336 |
class TestDecompressor_stream_writer_fuzzing( |
|
388 | class TestDecompressor_stream_writer_fuzzing(TestCase): | |
337 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
389 | @hypothesis.settings( | |
338 | level=strategies.integers(min_value=1, max_value=5), |
|
390 | suppress_health_check=[ | |
339 | write_size=strategies.integers(min_value=1, max_value=8192), |
|
391 | hypothesis.HealthCheck.large_base_example, | |
340 | input_sizes=strategies.data()) |
|
392 | hypothesis.HealthCheck.too_slow, | |
|
393 | ] | |||
|
394 | ) | |||
|
395 | @hypothesis.given( | |||
|
396 | original=strategies.sampled_from(random_input_data()), | |||
|
397 | level=strategies.integers(min_value=1, max_value=5), | |||
|
398 | write_size=strategies.integers(min_value=1, max_value=8192), | |||
|
399 | input_sizes=strategies.data(), | |||
|
400 | ) | |||
341 | def test_write_size_variance(self, original, level, write_size, input_sizes): |
|
401 | def test_write_size_variance(self, original, level, write_size, input_sizes): | |
342 | cctx = zstd.ZstdCompressor(level=level) |
|
402 | cctx = zstd.ZstdCompressor(level=level) | |
343 | frame = cctx.compress(original) |
|
403 | frame = cctx.compress(original) | |
344 |
|
404 | |||
345 | dctx = zstd.ZstdDecompressor() |
|
405 | dctx = zstd.ZstdDecompressor() | |
346 | source = io.BytesIO(frame) |
|
406 | source = io.BytesIO(frame) | |
347 | dest = NonClosingBytesIO() |
|
407 | dest = NonClosingBytesIO() | |
348 |
|
408 | |||
349 | with dctx.stream_writer(dest, write_size=write_size) as decompressor: |
|
409 | with dctx.stream_writer(dest, write_size=write_size) as decompressor: | |
350 | while True: |
|
410 | while True: | |
351 | input_size = input_sizes.draw(strategies.integers(1, 4096)) |
|
411 | input_size = input_sizes.draw(strategies.integers(1, 4096)) | |
352 | chunk = source.read(input_size) |
|
412 | chunk = source.read(input_size) | |
353 | if not chunk: |
|
413 | if not chunk: | |
354 | break |
|
414 | break | |
355 |
|
415 | |||
356 | decompressor.write(chunk) |
|
416 | decompressor.write(chunk) | |
357 |
|
417 | |||
358 | self.assertEqual(dest.getvalue(), original) |
|
418 | self.assertEqual(dest.getvalue(), original) | |
359 |
|
419 | |||
360 |
|
420 | |||
361 |
@unittest.skipUnless( |
|
421 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
362 | @make_cffi |
|
422 | @make_cffi | |
363 |
class TestDecompressor_copy_stream_fuzzing( |
|
423 | class TestDecompressor_copy_stream_fuzzing(TestCase): | |
364 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
424 | @hypothesis.settings( | |
365 | level=strategies.integers(min_value=1, max_value=5), |
|
425 | suppress_health_check=[ | |
366 | read_size=strategies.integers(min_value=1, max_value=8192), |
|
426 | hypothesis.HealthCheck.large_base_example, | |
367 | write_size=strategies.integers(min_value=1, max_value=8192)) |
|
427 | hypothesis.HealthCheck.too_slow, | |
|
428 | ] | |||
|
429 | ) | |||
|
430 | @hypothesis.given( | |||
|
431 | original=strategies.sampled_from(random_input_data()), | |||
|
432 | level=strategies.integers(min_value=1, max_value=5), | |||
|
433 | read_size=strategies.integers(min_value=1, max_value=8192), | |||
|
434 | write_size=strategies.integers(min_value=1, max_value=8192), | |||
|
435 | ) | |||
368 | def test_read_write_size_variance(self, original, level, read_size, write_size): |
|
436 | def test_read_write_size_variance(self, original, level, read_size, write_size): | |
369 | cctx = zstd.ZstdCompressor(level=level) |
|
437 | cctx = zstd.ZstdCompressor(level=level) | |
370 | frame = cctx.compress(original) |
|
438 | frame = cctx.compress(original) | |
371 |
|
439 | |||
372 | source = io.BytesIO(frame) |
|
440 | source = io.BytesIO(frame) | |
373 | dest = io.BytesIO() |
|
441 | dest = io.BytesIO() | |
374 |
|
442 | |||
375 | dctx = zstd.ZstdDecompressor() |
|
443 | dctx = zstd.ZstdDecompressor() | |
376 | dctx.copy_stream(source, dest, read_size=read_size, write_size=write_size) |
|
444 | dctx.copy_stream(source, dest, read_size=read_size, write_size=write_size) | |
377 |
|
445 | |||
378 | self.assertEqual(dest.getvalue(), original) |
|
446 | self.assertEqual(dest.getvalue(), original) | |
379 |
|
447 | |||
380 |
|
448 | |||
381 |
@unittest.skipUnless( |
|
449 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
382 | @make_cffi |
|
450 | @make_cffi | |
383 |
class TestDecompressor_decompressobj_fuzzing( |
|
451 | class TestDecompressor_decompressobj_fuzzing(TestCase): | |
384 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
452 | @hypothesis.settings( | |
385 | level=strategies.integers(min_value=1, max_value=5), |
|
453 | suppress_health_check=[ | |
386 | chunk_sizes=strategies.data()) |
|
454 | hypothesis.HealthCheck.large_base_example, | |
|
455 | hypothesis.HealthCheck.too_slow, | |||
|
456 | ] | |||
|
457 | ) | |||
|
458 | @hypothesis.given( | |||
|
459 | original=strategies.sampled_from(random_input_data()), | |||
|
460 | level=strategies.integers(min_value=1, max_value=5), | |||
|
461 | chunk_sizes=strategies.data(), | |||
|
462 | ) | |||
387 | def test_random_input_sizes(self, original, level, chunk_sizes): |
|
463 | def test_random_input_sizes(self, original, level, chunk_sizes): | |
388 | cctx = zstd.ZstdCompressor(level=level) |
|
464 | cctx = zstd.ZstdCompressor(level=level) | |
389 | frame = cctx.compress(original) |
|
465 | frame = cctx.compress(original) | |
390 |
|
466 | |||
391 | source = io.BytesIO(frame) |
|
467 | source = io.BytesIO(frame) | |
392 |
|
468 | |||
393 | dctx = zstd.ZstdDecompressor() |
|
469 | dctx = zstd.ZstdDecompressor() | |
394 | dobj = dctx.decompressobj() |
|
470 | dobj = dctx.decompressobj() | |
395 |
|
471 | |||
396 | chunks = [] |
|
472 | chunks = [] | |
397 | while True: |
|
473 | while True: | |
398 | chunk_size = chunk_sizes.draw(strategies.integers(1, 4096)) |
|
474 | chunk_size = chunk_sizes.draw(strategies.integers(1, 4096)) | |
399 | chunk = source.read(chunk_size) |
|
475 | chunk = source.read(chunk_size) | |
400 | if not chunk: |
|
476 | if not chunk: | |
401 | break |
|
477 | break | |
402 |
|
478 | |||
403 | chunks.append(dobj.decompress(chunk)) |
|
479 | chunks.append(dobj.decompress(chunk)) | |
404 |
|
480 | |||
405 |
self.assertEqual(b |
|
481 | self.assertEqual(b"".join(chunks), original) | |
406 |
|
482 | |||
407 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
483 | @hypothesis.settings( | |
408 | level=strategies.integers(min_value=1, max_value=5), |
|
484 | suppress_health_check=[ | |
409 | write_size=strategies.integers(min_value=1, |
|
485 | hypothesis.HealthCheck.large_base_example, | |
410 | max_value=4 * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE), |
|
486 | hypothesis.HealthCheck.too_slow, | |
411 | chunk_sizes=strategies.data()) |
|
487 | ] | |
|
488 | ) | |||
|
489 | @hypothesis.given( | |||
|
490 | original=strategies.sampled_from(random_input_data()), | |||
|
491 | level=strategies.integers(min_value=1, max_value=5), | |||
|
492 | write_size=strategies.integers( | |||
|
493 | min_value=1, max_value=4 * zstd.DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE | |||
|
494 | ), | |||
|
495 | chunk_sizes=strategies.data(), | |||
|
496 | ) | |||
412 | def test_random_output_sizes(self, original, level, write_size, chunk_sizes): |
|
497 | def test_random_output_sizes(self, original, level, write_size, chunk_sizes): | |
413 | cctx = zstd.ZstdCompressor(level=level) |
|
498 | cctx = zstd.ZstdCompressor(level=level) | |
414 | frame = cctx.compress(original) |
|
499 | frame = cctx.compress(original) | |
415 |
|
500 | |||
416 | source = io.BytesIO(frame) |
|
501 | source = io.BytesIO(frame) | |
417 |
|
502 | |||
418 | dctx = zstd.ZstdDecompressor() |
|
503 | dctx = zstd.ZstdDecompressor() | |
419 | dobj = dctx.decompressobj(write_size=write_size) |
|
504 | dobj = dctx.decompressobj(write_size=write_size) | |
420 |
|
505 | |||
421 | chunks = [] |
|
506 | chunks = [] | |
422 | while True: |
|
507 | while True: | |
423 | chunk_size = chunk_sizes.draw(strategies.integers(1, 4096)) |
|
508 | chunk_size = chunk_sizes.draw(strategies.integers(1, 4096)) | |
424 | chunk = source.read(chunk_size) |
|
509 | chunk = source.read(chunk_size) | |
425 | if not chunk: |
|
510 | if not chunk: | |
426 | break |
|
511 | break | |
427 |
|
512 | |||
428 | chunks.append(dobj.decompress(chunk)) |
|
513 | chunks.append(dobj.decompress(chunk)) | |
429 |
|
514 | |||
430 |
self.assertEqual(b |
|
515 | self.assertEqual(b"".join(chunks), original) | |
431 |
|
516 | |||
432 |
|
517 | |||
433 |
@unittest.skipUnless( |
|
518 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
434 | @make_cffi |
|
519 | @make_cffi | |
435 |
class TestDecompressor_read_to_iter_fuzzing( |
|
520 | class TestDecompressor_read_to_iter_fuzzing(TestCase): | |
436 | @hypothesis.given(original=strategies.sampled_from(random_input_data()), |
|
521 | @hypothesis.given( | |
437 | level=strategies.integers(min_value=1, max_value=5), |
|
522 | original=strategies.sampled_from(random_input_data()), | |
438 |
|
|
523 | level=strategies.integers(min_value=1, max_value=5), | |
439 |
|
|
524 | read_size=strategies.integers(min_value=1, max_value=4096), | |
|
525 | write_size=strategies.integers(min_value=1, max_value=4096), | |||
|
526 | ) | |||
440 | def test_read_write_size_variance(self, original, level, read_size, write_size): |
|
527 | def test_read_write_size_variance(self, original, level, read_size, write_size): | |
441 | cctx = zstd.ZstdCompressor(level=level) |
|
528 | cctx = zstd.ZstdCompressor(level=level) | |
442 | frame = cctx.compress(original) |
|
529 | frame = cctx.compress(original) | |
443 |
|
530 | |||
444 | source = io.BytesIO(frame) |
|
531 | source = io.BytesIO(frame) | |
445 |
|
532 | |||
446 | dctx = zstd.ZstdDecompressor() |
|
533 | dctx = zstd.ZstdDecompressor() | |
447 | chunks = list(dctx.read_to_iter(source, read_size=read_size, write_size=write_size)) |
|
534 | chunks = list( | |
|
535 | dctx.read_to_iter(source, read_size=read_size, write_size=write_size) | |||
|
536 | ) | |||
448 |
|
537 | |||
449 |
self.assertEqual(b |
|
538 | self.assertEqual(b"".join(chunks), original) | |
450 |
|
539 | |||
451 |
|
540 | |||
452 |
@unittest.skipUnless( |
|
541 | @unittest.skipUnless("ZSTD_SLOW_TESTS" in os.environ, "ZSTD_SLOW_TESTS not set") | |
453 |
class TestDecompressor_multi_decompress_to_buffer_fuzzing( |
|
542 | class TestDecompressor_multi_decompress_to_buffer_fuzzing(TestCase): | |
454 | @hypothesis.given(original=strategies.lists(strategies.sampled_from(random_input_data()), |
|
543 | @hypothesis.given( | |
455 | min_size=1, max_size=1024), |
|
544 | original=strategies.lists( | |
456 | threads=strategies.integers(min_value=1, max_value=8), |
|
545 | strategies.sampled_from(random_input_data()), min_size=1, max_size=1024 | |
457 | use_dict=strategies.booleans()) |
|
546 | ), | |
|
547 | threads=strategies.integers(min_value=1, max_value=8), | |||
|
548 | use_dict=strategies.booleans(), | |||
|
549 | ) | |||
458 | def test_data_equivalence(self, original, threads, use_dict): |
|
550 | def test_data_equivalence(self, original, threads, use_dict): | |
459 | kwargs = {} |
|
551 | kwargs = {} | |
460 | if use_dict: |
|
552 | if use_dict: | |
461 |
kwargs[ |
|
553 | kwargs["dict_data"] = zstd.ZstdCompressionDict(original[0]) | |
462 |
|
554 | |||
463 |
cctx = zstd.ZstdCompressor( |
|
555 | cctx = zstd.ZstdCompressor( | |
464 | write_content_size=True, |
|
556 | level=1, write_content_size=True, write_checksum=True, **kwargs | |
465 | write_checksum=True, |
|
557 | ) | |
466 | **kwargs) |
|
|||
467 |
|
558 | |||
468 |
if not hasattr(cctx, |
|
559 | if not hasattr(cctx, "multi_compress_to_buffer"): | |
469 |
self.skipTest( |
|
560 | self.skipTest("multi_compress_to_buffer not available") | |
470 |
|
561 | |||
471 | frames_buffer = cctx.multi_compress_to_buffer(original, threads=-1) |
|
562 | frames_buffer = cctx.multi_compress_to_buffer(original, threads=-1) | |
472 |
|
563 | |||
473 | dctx = zstd.ZstdDecompressor(**kwargs) |
|
564 | dctx = zstd.ZstdDecompressor(**kwargs) | |
474 | result = dctx.multi_decompress_to_buffer(frames_buffer) |
|
565 | result = dctx.multi_decompress_to_buffer(frames_buffer) | |
475 |
|
566 | |||
476 | self.assertEqual(len(result), len(original)) |
|
567 | self.assertEqual(len(result), len(original)) | |
477 | for i, frame in enumerate(result): |
|
568 | for i, frame in enumerate(result): | |
478 | self.assertEqual(frame.tobytes(), original[i]) |
|
569 | self.assertEqual(frame.tobytes(), original[i]) | |
479 |
|
570 | |||
480 | frames_list = [f.tobytes() for f in frames_buffer] |
|
571 | frames_list = [f.tobytes() for f in frames_buffer] | |
481 | result = dctx.multi_decompress_to_buffer(frames_list) |
|
572 | result = dctx.multi_decompress_to_buffer(frames_list) | |
482 |
|
573 | |||
483 | self.assertEqual(len(result), len(original)) |
|
574 | self.assertEqual(len(result), len(original)) | |
484 | for i, frame in enumerate(result): |
|
575 | for i, frame in enumerate(result): | |
485 | self.assertEqual(frame.tobytes(), original[i]) |
|
576 | self.assertEqual(frame.tobytes(), original[i]) |
@@ -1,15 +1,15 b'' | |||||
1 | import unittest |
|
1 | import unittest | |
2 |
|
2 | |||
3 | import zstandard as zstd |
|
3 | import zstandard as zstd | |
4 |
|
4 | |||
5 |
from . |
|
5 | from .common import ( | |
6 | make_cffi, |
|
6 | make_cffi, | |
|
7 | TestCase, | |||
7 | ) |
|
8 | ) | |
8 |
|
9 | |||
9 |
|
10 | |||
10 | @make_cffi |
|
11 | @make_cffi | |
11 |
class TestSizes( |
|
12 | class TestSizes(TestCase): | |
12 | def test_decompression_size(self): |
|
13 | def test_decompression_size(self): | |
13 | size = zstd.estimate_decompression_context_size() |
|
14 | size = zstd.estimate_decompression_context_size() | |
14 | self.assertGreater(size, 100000) |
|
15 | self.assertGreater(size, 100000) | |
15 |
|
@@ -1,69 +1,70 b'' | |||||
1 | from __future__ import unicode_literals |
|
1 | from __future__ import unicode_literals | |
2 |
|
2 | |||
3 | import unittest |
|
3 | import unittest | |
4 |
|
4 | |||
5 | import zstandard as zstd |
|
5 | import zstandard as zstd | |
6 |
|
6 | |||
7 |
from . |
|
7 | from .common import ( | |
8 | make_cffi, |
|
8 | make_cffi, | |
|
9 | TestCase, | |||
9 | ) |
|
10 | ) | |
10 |
|
11 | |||
11 |
|
12 | |||
12 | @make_cffi |
|
13 | @make_cffi | |
13 |
class TestModuleAttributes( |
|
14 | class TestModuleAttributes(TestCase): | |
14 | def test_version(self): |
|
15 | def test_version(self): | |
15 |
self.assertEqual(zstd.ZSTD_VERSION, (1, 4, |
|
16 | self.assertEqual(zstd.ZSTD_VERSION, (1, 4, 4)) | |
16 |
|
17 | |||
17 |
self.assertEqual(zstd.__version__, |
|
18 | self.assertEqual(zstd.__version__, "0.13.0") | |
18 |
|
19 | |||
19 | def test_constants(self): |
|
20 | def test_constants(self): | |
20 | self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22) |
|
21 | self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22) | |
21 |
self.assertEqual(zstd.FRAME_HEADER, b |
|
22 | self.assertEqual(zstd.FRAME_HEADER, b"\x28\xb5\x2f\xfd") | |
22 |
|
23 | |||
23 | def test_hasattr(self): |
|
24 | def test_hasattr(self): | |
24 | attrs = ( |
|
25 | attrs = ( | |
25 |
|
|
26 | "CONTENTSIZE_UNKNOWN", | |
26 |
|
|
27 | "CONTENTSIZE_ERROR", | |
27 |
|
|
28 | "COMPRESSION_RECOMMENDED_INPUT_SIZE", | |
28 |
|
|
29 | "COMPRESSION_RECOMMENDED_OUTPUT_SIZE", | |
29 |
|
|
30 | "DECOMPRESSION_RECOMMENDED_INPUT_SIZE", | |
30 |
|
|
31 | "DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE", | |
31 |
|
|
32 | "MAGIC_NUMBER", | |
32 |
|
|
33 | "FLUSH_BLOCK", | |
33 |
|
|
34 | "FLUSH_FRAME", | |
34 |
|
|
35 | "BLOCKSIZELOG_MAX", | |
35 |
|
|
36 | "BLOCKSIZE_MAX", | |
36 |
|
|
37 | "WINDOWLOG_MIN", | |
37 |
|
|
38 | "WINDOWLOG_MAX", | |
38 |
|
|
39 | "CHAINLOG_MIN", | |
39 |
|
|
40 | "CHAINLOG_MAX", | |
40 |
|
|
41 | "HASHLOG_MIN", | |
41 |
|
|
42 | "HASHLOG_MAX", | |
42 |
|
|
43 | "HASHLOG3_MAX", | |
43 |
|
|
44 | "MINMATCH_MIN", | |
44 |
|
|
45 | "MINMATCH_MAX", | |
45 |
|
|
46 | "SEARCHLOG_MIN", | |
46 |
|
|
47 | "SEARCHLOG_MAX", | |
47 |
|
|
48 | "SEARCHLENGTH_MIN", | |
48 |
|
|
49 | "SEARCHLENGTH_MAX", | |
49 |
|
|
50 | "TARGETLENGTH_MIN", | |
50 |
|
|
51 | "TARGETLENGTH_MAX", | |
51 |
|
|
52 | "LDM_MINMATCH_MIN", | |
52 |
|
|
53 | "LDM_MINMATCH_MAX", | |
53 |
|
|
54 | "LDM_BUCKETSIZELOG_MAX", | |
54 |
|
|
55 | "STRATEGY_FAST", | |
55 |
|
|
56 | "STRATEGY_DFAST", | |
56 |
|
|
57 | "STRATEGY_GREEDY", | |
57 |
|
|
58 | "STRATEGY_LAZY", | |
58 |
|
|
59 | "STRATEGY_LAZY2", | |
59 |
|
|
60 | "STRATEGY_BTLAZY2", | |
60 |
|
|
61 | "STRATEGY_BTOPT", | |
61 |
|
|
62 | "STRATEGY_BTULTRA", | |
62 |
|
|
63 | "STRATEGY_BTULTRA2", | |
63 |
|
|
64 | "DICT_TYPE_AUTO", | |
64 |
|
|
65 | "DICT_TYPE_RAWCONTENT", | |
65 |
|
|
66 | "DICT_TYPE_FULLDICT", | |
66 | ) |
|
67 | ) | |
67 |
|
68 | |||
68 | for a in attrs: |
|
69 | for a in attrs: | |
69 | self.assertTrue(hasattr(zstd, a), a) |
|
70 | self.assertTrue(hasattr(zstd, a), a) |
@@ -1,89 +1,92 b'' | |||||
1 | import struct |
|
1 | import struct | |
2 | import sys |
|
2 | import sys | |
3 | import unittest |
|
3 | import unittest | |
4 |
|
4 | |||
5 | import zstandard as zstd |
|
5 | import zstandard as zstd | |
6 |
|
6 | |||
7 |
from . |
|
7 | from .common import ( | |
8 | generate_samples, |
|
8 | generate_samples, | |
9 | make_cffi, |
|
9 | make_cffi, | |
10 | random_input_data, |
|
10 | random_input_data, | |
|
11 | TestCase, | |||
11 | ) |
|
12 | ) | |
12 |
|
13 | |||
13 | if sys.version_info[0] >= 3: |
|
14 | if sys.version_info[0] >= 3: | |
14 | int_type = int |
|
15 | int_type = int | |
15 | else: |
|
16 | else: | |
16 | int_type = long |
|
17 | int_type = long | |
17 |
|
18 | |||
18 |
|
19 | |||
19 | @make_cffi |
|
20 | @make_cffi | |
20 |
class TestTrainDictionary( |
|
21 | class TestTrainDictionary(TestCase): | |
21 | def test_no_args(self): |
|
22 | def test_no_args(self): | |
22 | with self.assertRaises(TypeError): |
|
23 | with self.assertRaises(TypeError): | |
23 | zstd.train_dictionary() |
|
24 | zstd.train_dictionary() | |
24 |
|
25 | |||
25 | def test_bad_args(self): |
|
26 | def test_bad_args(self): | |
26 | with self.assertRaises(TypeError): |
|
27 | with self.assertRaises(TypeError): | |
27 |
zstd.train_dictionary(8192, u |
|
28 | zstd.train_dictionary(8192, u"foo") | |
28 |
|
29 | |||
29 | with self.assertRaises(ValueError): |
|
30 | with self.assertRaises(ValueError): | |
30 |
zstd.train_dictionary(8192, [u |
|
31 | zstd.train_dictionary(8192, [u"foo"]) | |
31 |
|
32 | |||
32 | def test_no_params(self): |
|
33 | def test_no_params(self): | |
33 | d = zstd.train_dictionary(8192, random_input_data()) |
|
34 | d = zstd.train_dictionary(8192, random_input_data()) | |
34 | self.assertIsInstance(d.dict_id(), int_type) |
|
35 | self.assertIsInstance(d.dict_id(), int_type) | |
35 |
|
36 | |||
36 | # The dictionary ID may be different across platforms. |
|
37 | # The dictionary ID may be different across platforms. | |
37 |
expected = b |
|
38 | expected = b"\x37\xa4\x30\xec" + struct.pack("<I", d.dict_id()) | |
38 |
|
39 | |||
39 | data = d.as_bytes() |
|
40 | data = d.as_bytes() | |
40 | self.assertEqual(data[0:8], expected) |
|
41 | self.assertEqual(data[0:8], expected) | |
41 |
|
42 | |||
42 | def test_basic(self): |
|
43 | def test_basic(self): | |
43 | d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16) |
|
44 | d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16) | |
44 | self.assertIsInstance(d.dict_id(), int_type) |
|
45 | self.assertIsInstance(d.dict_id(), int_type) | |
45 |
|
46 | |||
46 | data = d.as_bytes() |
|
47 | data = d.as_bytes() | |
47 |
self.assertEqual(data[0:4], b |
|
48 | self.assertEqual(data[0:4], b"\x37\xa4\x30\xec") | |
48 |
|
49 | |||
49 | self.assertEqual(d.k, 64) |
|
50 | self.assertEqual(d.k, 64) | |
50 | self.assertEqual(d.d, 16) |
|
51 | self.assertEqual(d.d, 16) | |
51 |
|
52 | |||
52 | def test_set_dict_id(self): |
|
53 | def test_set_dict_id(self): | |
53 | d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16, |
|
54 | d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16, dict_id=42) | |
54 | dict_id=42) |
|
|||
55 | self.assertEqual(d.dict_id(), 42) |
|
55 | self.assertEqual(d.dict_id(), 42) | |
56 |
|
56 | |||
57 | def test_optimize(self): |
|
57 | def test_optimize(self): | |
58 | d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1, |
|
58 | d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1, d=16) | |
59 | d=16) |
|
|||
60 |
|
59 | |||
61 | # This varies by platform. |
|
60 | # This varies by platform. | |
62 | self.assertIn(d.k, (50, 2000)) |
|
61 | self.assertIn(d.k, (50, 2000)) | |
63 | self.assertEqual(d.d, 16) |
|
62 | self.assertEqual(d.d, 16) | |
64 |
|
63 | |||
|
64 | ||||
65 | @make_cffi |
|
65 | @make_cffi | |
66 |
class TestCompressionDict( |
|
66 | class TestCompressionDict(TestCase): | |
67 | def test_bad_mode(self): |
|
67 | def test_bad_mode(self): | |
68 |
with self.assertRaisesRegex |
|
68 | with self.assertRaisesRegex(ValueError, "invalid dictionary load mode"): | |
69 |
zstd.ZstdCompressionDict(b |
|
69 | zstd.ZstdCompressionDict(b"foo", dict_type=42) | |
70 |
|
70 | |||
71 | def test_bad_precompute_compress(self): |
|
71 | def test_bad_precompute_compress(self): | |
72 | d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16) |
|
72 | d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16) | |
73 |
|
73 | |||
74 |
with self.assertRaisesRegex |
|
74 | with self.assertRaisesRegex(ValueError, "must specify one of level or "): | |
75 | d.precompute_compress() |
|
75 | d.precompute_compress() | |
76 |
|
76 | |||
77 |
with self.assertRaisesRegex |
|
77 | with self.assertRaisesRegex(ValueError, "must only specify one of level or "): | |
78 |
d.precompute_compress( |
|
78 | d.precompute_compress( | |
79 |
|
|
79 | level=3, compression_params=zstd.CompressionParameters() | |
|
80 | ) | |||
80 |
|
81 | |||
81 | def test_precompute_compress_rawcontent(self): |
|
82 | def test_precompute_compress_rawcontent(self): | |
82 |
d = zstd.ZstdCompressionDict( |
|
83 | d = zstd.ZstdCompressionDict( | |
83 |
|
|
84 | b"dictcontent" * 64, dict_type=zstd.DICT_TYPE_RAWCONTENT | |
|
85 | ) | |||
84 | d.precompute_compress(level=1) |
|
86 | d.precompute_compress(level=1) | |
85 |
|
87 | |||
86 |
d = zstd.ZstdCompressionDict( |
|
88 | d = zstd.ZstdCompressionDict( | |
87 |
|
|
89 | b"dictcontent" * 64, dict_type=zstd.DICT_TYPE_FULLDICT | |
88 | with self.assertRaisesRegexp(zstd.ZstdError, 'unable to precompute dictionary'): |
|
90 | ) | |
|
91 | with self.assertRaisesRegex(zstd.ZstdError, "unable to precompute dictionary"): | |||
89 | d.precompute_compress(level=1) |
|
92 | d.precompute_compress(level=1) |
@@ -1,65 +1,75 b'' | |||||
1 | # Copyright (c) 2017-present, Gregory Szorc |
|
1 | # Copyright (c) 2017-present, Gregory Szorc | |
2 | # All rights reserved. |
|
2 | # All rights reserved. | |
3 | # |
|
3 | # | |
4 | # This software may be modified and distributed under the terms |
|
4 | # This software may be modified and distributed under the terms | |
5 | # of the BSD license. See the LICENSE file for details. |
|
5 | # of the BSD license. See the LICENSE file for details. | |
6 |
|
6 | |||
7 | """Python interface to the Zstandard (zstd) compression library.""" |
|
7 | """Python interface to the Zstandard (zstd) compression library.""" | |
8 |
|
8 | |||
9 | from __future__ import absolute_import, unicode_literals |
|
9 | from __future__ import absolute_import, unicode_literals | |
10 |
|
10 | |||
11 | # This module serves 2 roles: |
|
11 | # This module serves 2 roles: | |
12 | # |
|
12 | # | |
13 | # 1) Export the C or CFFI "backend" through a central module. |
|
13 | # 1) Export the C or CFFI "backend" through a central module. | |
14 | # 2) Implement additional functionality built on top of C or CFFI backend. |
|
14 | # 2) Implement additional functionality built on top of C or CFFI backend. | |
15 |
|
15 | |||
16 | import os |
|
16 | import os | |
17 | import platform |
|
17 | import platform | |
18 |
|
18 | |||
19 | # Some Python implementations don't support C extensions. That's why we have |
|
19 | # Some Python implementations don't support C extensions. That's why we have | |
20 | # a CFFI implementation in the first place. The code here import one of our |
|
20 | # a CFFI implementation in the first place. The code here import one of our | |
21 | # "backends" then re-exports the symbols from this module. For convenience, |
|
21 | # "backends" then re-exports the symbols from this module. For convenience, | |
22 | # we support falling back to the CFFI backend if the C extension can't be |
|
22 | # we support falling back to the CFFI backend if the C extension can't be | |
23 | # imported. But for performance reasons, we only do this on unknown Python |
|
23 | # imported. But for performance reasons, we only do this on unknown Python | |
24 | # implementation. Notably, for CPython we require the C extension by default. |
|
24 | # implementation. Notably, for CPython we require the C extension by default. | |
25 | # Because someone will inevitably want special behavior, the behavior is |
|
25 | # Because someone will inevitably want special behavior, the behavior is | |
26 | # configurable via an environment variable. A potentially better way to handle |
|
26 | # configurable via an environment variable. A potentially better way to handle | |
27 | # this is to import a special ``__importpolicy__`` module or something |
|
27 | # this is to import a special ``__importpolicy__`` module or something | |
28 | # defining a variable and `setup.py` could write the file with whatever |
|
28 | # defining a variable and `setup.py` could write the file with whatever | |
29 | # policy was specified at build time. Until someone needs it, we go with |
|
29 | # policy was specified at build time. Until someone needs it, we go with | |
30 | # the hacky but simple environment variable approach. |
|
30 | # the hacky but simple environment variable approach. | |
31 |
_module_policy = os.environ.get( |
|
31 | _module_policy = os.environ.get("PYTHON_ZSTANDARD_IMPORT_POLICY", "default") | |
32 |
|
32 | |||
33 |
if _module_policy == |
|
33 | if _module_policy == "default": | |
34 |
if platform.python_implementation() in ( |
|
34 | if platform.python_implementation() in ("CPython",): | |
35 | from zstd import * |
|
35 | from zstd import * | |
36 | backend = 'cext' |
|
36 | ||
37 | elif platform.python_implementation() in ('PyPy',): |
|
37 | backend = "cext" | |
|
38 | elif platform.python_implementation() in ("PyPy",): | |||
38 | from .cffi import * |
|
39 | from .cffi import * | |
39 | backend = 'cffi' |
|
40 | ||
|
41 | backend = "cffi" | |||
40 | else: |
|
42 | else: | |
41 | try: |
|
43 | try: | |
42 | from zstd import * |
|
44 | from zstd import * | |
43 | backend = 'cext' |
|
45 | ||
|
46 | backend = "cext" | |||
44 | except ImportError: |
|
47 | except ImportError: | |
45 | from .cffi import * |
|
48 | from .cffi import * | |
46 | backend = 'cffi' |
|
49 | ||
47 | elif _module_policy == 'cffi_fallback': |
|
50 | backend = "cffi" | |
|
51 | elif _module_policy == "cffi_fallback": | |||
48 | try: |
|
52 | try: | |
49 | from zstd import * |
|
53 | from zstd import * | |
50 | backend = 'cext' |
|
54 | ||
|
55 | backend = "cext" | |||
51 | except ImportError: |
|
56 | except ImportError: | |
52 | from .cffi import * |
|
57 | from .cffi import * | |
53 | backend = 'cffi' |
|
58 | ||
54 | elif _module_policy == 'cext': |
|
59 | backend = "cffi" | |
|
60 | elif _module_policy == "cext": | |||
55 | from zstd import * |
|
61 | from zstd import * | |
56 | backend = 'cext' |
|
62 | ||
57 | elif _module_policy == 'cffi': |
|
63 | backend = "cext" | |
|
64 | elif _module_policy == "cffi": | |||
58 | from .cffi import * |
|
65 | from .cffi import * | |
59 | backend = 'cffi' |
|
66 | ||
|
67 | backend = "cffi" | |||
60 | else: |
|
68 | else: | |
61 | raise ImportError('unknown module import policy: %s; use default, cffi_fallback, ' |
|
69 | raise ImportError( | |
62 | 'cext, or cffi' % _module_policy) |
|
70 | "unknown module import policy: %s; use default, cffi_fallback, " | |
|
71 | "cext, or cffi" % _module_policy | |||
|
72 | ) | |||
63 |
|
73 | |||
64 | # Keep this in sync with python-zstandard.h. |
|
74 | # Keep this in sync with python-zstandard.h. | |
65 |
__version__ = |
|
75 | __version__ = "0.13.0" |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
1 | NO CONTENT: modified file |
|
NO CONTENT: modified file | ||
The requested commit or file is too big and content was truncated. Show full diff |
General Comments 0
You need to be logged in to leave comments.
Login now