##// END OF EJS Templates
zstandard: vendor python-zstandard 0.12...
Gregory Szorc -
r43207:69de49c4 default
parent child Browse files
Show More

The requested changes are too big and content was truncated. Show full diff

1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
@@ -1,99 +1,103 b''
1 # Files that just need to be migrated to the formatter.
1 # Files that just need to be migrated to the formatter.
2 # Do not add new files here!
2 # Do not add new files here!
3 mercurial/cext/dirs.c
3 mercurial/cext/dirs.c
4 mercurial/cext/manifest.c
4 mercurial/cext/manifest.c
5 mercurial/cext/osutil.c
5 mercurial/cext/osutil.c
6 # Vendored code that we should never format:
6 # Vendored code that we should never format:
7 contrib/python-zstandard/c-ext/bufferutil.c
7 contrib/python-zstandard/c-ext/bufferutil.c
8 contrib/python-zstandard/c-ext/compressionchunker.c
8 contrib/python-zstandard/c-ext/compressionchunker.c
9 contrib/python-zstandard/c-ext/compressiondict.c
9 contrib/python-zstandard/c-ext/compressiondict.c
10 contrib/python-zstandard/c-ext/compressionparams.c
10 contrib/python-zstandard/c-ext/compressionparams.c
11 contrib/python-zstandard/c-ext/compressionreader.c
11 contrib/python-zstandard/c-ext/compressionreader.c
12 contrib/python-zstandard/c-ext/compressionwriter.c
12 contrib/python-zstandard/c-ext/compressionwriter.c
13 contrib/python-zstandard/c-ext/compressobj.c
13 contrib/python-zstandard/c-ext/compressobj.c
14 contrib/python-zstandard/c-ext/compressor.c
14 contrib/python-zstandard/c-ext/compressor.c
15 contrib/python-zstandard/c-ext/compressoriterator.c
15 contrib/python-zstandard/c-ext/compressoriterator.c
16 contrib/python-zstandard/c-ext/constants.c
16 contrib/python-zstandard/c-ext/constants.c
17 contrib/python-zstandard/c-ext/decompressionreader.c
17 contrib/python-zstandard/c-ext/decompressionreader.c
18 contrib/python-zstandard/c-ext/decompressionwriter.c
18 contrib/python-zstandard/c-ext/decompressionwriter.c
19 contrib/python-zstandard/c-ext/decompressobj.c
19 contrib/python-zstandard/c-ext/decompressobj.c
20 contrib/python-zstandard/c-ext/decompressor.c
20 contrib/python-zstandard/c-ext/decompressor.c
21 contrib/python-zstandard/c-ext/decompressoriterator.c
21 contrib/python-zstandard/c-ext/decompressoriterator.c
22 contrib/python-zstandard/c-ext/frameparams.c
22 contrib/python-zstandard/c-ext/frameparams.c
23 contrib/python-zstandard/c-ext/python-zstandard.h
23 contrib/python-zstandard/c-ext/python-zstandard.h
24 contrib/python-zstandard/zstd.c
24 contrib/python-zstandard/zstd.c
25 contrib/python-zstandard/zstd/common/bitstream.h
25 contrib/python-zstandard/zstd/common/bitstream.h
26 contrib/python-zstandard/zstd/common/compiler.h
26 contrib/python-zstandard/zstd/common/compiler.h
27 contrib/python-zstandard/zstd/common/cpu.h
27 contrib/python-zstandard/zstd/common/cpu.h
28 contrib/python-zstandard/zstd/common/debug.c
28 contrib/python-zstandard/zstd/common/debug.c
29 contrib/python-zstandard/zstd/common/debug.h
29 contrib/python-zstandard/zstd/common/debug.h
30 contrib/python-zstandard/zstd/common/entropy_common.c
30 contrib/python-zstandard/zstd/common/entropy_common.c
31 contrib/python-zstandard/zstd/common/error_private.c
31 contrib/python-zstandard/zstd/common/error_private.c
32 contrib/python-zstandard/zstd/common/error_private.h
32 contrib/python-zstandard/zstd/common/error_private.h
33 contrib/python-zstandard/zstd/common/fse_decompress.c
33 contrib/python-zstandard/zstd/common/fse_decompress.c
34 contrib/python-zstandard/zstd/common/fse.h
34 contrib/python-zstandard/zstd/common/fse.h
35 contrib/python-zstandard/zstd/common/huf.h
35 contrib/python-zstandard/zstd/common/huf.h
36 contrib/python-zstandard/zstd/common/mem.h
36 contrib/python-zstandard/zstd/common/mem.h
37 contrib/python-zstandard/zstd/common/pool.c
37 contrib/python-zstandard/zstd/common/pool.c
38 contrib/python-zstandard/zstd/common/pool.h
38 contrib/python-zstandard/zstd/common/pool.h
39 contrib/python-zstandard/zstd/common/threading.c
39 contrib/python-zstandard/zstd/common/threading.c
40 contrib/python-zstandard/zstd/common/threading.h
40 contrib/python-zstandard/zstd/common/threading.h
41 contrib/python-zstandard/zstd/common/xxhash.c
41 contrib/python-zstandard/zstd/common/xxhash.c
42 contrib/python-zstandard/zstd/common/xxhash.h
42 contrib/python-zstandard/zstd/common/xxhash.h
43 contrib/python-zstandard/zstd/common/zstd_common.c
43 contrib/python-zstandard/zstd/common/zstd_common.c
44 contrib/python-zstandard/zstd/common/zstd_errors.h
44 contrib/python-zstandard/zstd/common/zstd_errors.h
45 contrib/python-zstandard/zstd/common/zstd_internal.h
45 contrib/python-zstandard/zstd/common/zstd_internal.h
46 contrib/python-zstandard/zstd/compress/fse_compress.c
46 contrib/python-zstandard/zstd/compress/fse_compress.c
47 contrib/python-zstandard/zstd/compress/hist.c
47 contrib/python-zstandard/zstd/compress/hist.c
48 contrib/python-zstandard/zstd/compress/hist.h
48 contrib/python-zstandard/zstd/compress/hist.h
49 contrib/python-zstandard/zstd/compress/huf_compress.c
49 contrib/python-zstandard/zstd/compress/huf_compress.c
50 contrib/python-zstandard/zstd/compress/zstd_compress.c
50 contrib/python-zstandard/zstd/compress/zstd_compress.c
51 contrib/python-zstandard/zstd/compress/zstd_compress_internal.h
51 contrib/python-zstandard/zstd/compress/zstd_compress_internal.h
52 contrib/python-zstandard/zstd/compress/zstd_compress_literals.c
53 contrib/python-zstandard/zstd/compress/zstd_compress_literals.h
54 contrib/python-zstandard/zstd/compress/zstd_compress_sequences.c
55 contrib/python-zstandard/zstd/compress/zstd_compress_sequences.h
52 contrib/python-zstandard/zstd/compress/zstd_double_fast.c
56 contrib/python-zstandard/zstd/compress/zstd_double_fast.c
53 contrib/python-zstandard/zstd/compress/zstd_double_fast.h
57 contrib/python-zstandard/zstd/compress/zstd_double_fast.h
54 contrib/python-zstandard/zstd/compress/zstd_fast.c
58 contrib/python-zstandard/zstd/compress/zstd_fast.c
55 contrib/python-zstandard/zstd/compress/zstd_fast.h
59 contrib/python-zstandard/zstd/compress/zstd_fast.h
56 contrib/python-zstandard/zstd/compress/zstd_lazy.c
60 contrib/python-zstandard/zstd/compress/zstd_lazy.c
57 contrib/python-zstandard/zstd/compress/zstd_lazy.h
61 contrib/python-zstandard/zstd/compress/zstd_lazy.h
58 contrib/python-zstandard/zstd/compress/zstd_ldm.c
62 contrib/python-zstandard/zstd/compress/zstd_ldm.c
59 contrib/python-zstandard/zstd/compress/zstd_ldm.h
63 contrib/python-zstandard/zstd/compress/zstd_ldm.h
60 contrib/python-zstandard/zstd/compress/zstdmt_compress.c
64 contrib/python-zstandard/zstd/compress/zstdmt_compress.c
61 contrib/python-zstandard/zstd/compress/zstdmt_compress.h
65 contrib/python-zstandard/zstd/compress/zstdmt_compress.h
62 contrib/python-zstandard/zstd/compress/zstd_opt.c
66 contrib/python-zstandard/zstd/compress/zstd_opt.c
63 contrib/python-zstandard/zstd/compress/zstd_opt.h
67 contrib/python-zstandard/zstd/compress/zstd_opt.h
64 contrib/python-zstandard/zstd/decompress/huf_decompress.c
68 contrib/python-zstandard/zstd/decompress/huf_decompress.c
65 contrib/python-zstandard/zstd/decompress/zstd_ddict.c
69 contrib/python-zstandard/zstd/decompress/zstd_ddict.c
66 contrib/python-zstandard/zstd/decompress/zstd_ddict.h
70 contrib/python-zstandard/zstd/decompress/zstd_ddict.h
67 contrib/python-zstandard/zstd/decompress/zstd_decompress_block.c
71 contrib/python-zstandard/zstd/decompress/zstd_decompress_block.c
68 contrib/python-zstandard/zstd/decompress/zstd_decompress_block.h
72 contrib/python-zstandard/zstd/decompress/zstd_decompress_block.h
69 contrib/python-zstandard/zstd/decompress/zstd_decompress_internal.h
73 contrib/python-zstandard/zstd/decompress/zstd_decompress_internal.h
70 contrib/python-zstandard/zstd/decompress/zstd_decompress.c
74 contrib/python-zstandard/zstd/decompress/zstd_decompress.c
71 contrib/python-zstandard/zstd/deprecated/zbuff_common.c
75 contrib/python-zstandard/zstd/deprecated/zbuff_common.c
72 contrib/python-zstandard/zstd/deprecated/zbuff_compress.c
76 contrib/python-zstandard/zstd/deprecated/zbuff_compress.c
73 contrib/python-zstandard/zstd/deprecated/zbuff_decompress.c
77 contrib/python-zstandard/zstd/deprecated/zbuff_decompress.c
74 contrib/python-zstandard/zstd/deprecated/zbuff.h
78 contrib/python-zstandard/zstd/deprecated/zbuff.h
75 contrib/python-zstandard/zstd/dictBuilder/cover.c
79 contrib/python-zstandard/zstd/dictBuilder/cover.c
76 contrib/python-zstandard/zstd/dictBuilder/cover.h
80 contrib/python-zstandard/zstd/dictBuilder/cover.h
77 contrib/python-zstandard/zstd/dictBuilder/divsufsort.c
81 contrib/python-zstandard/zstd/dictBuilder/divsufsort.c
78 contrib/python-zstandard/zstd/dictBuilder/divsufsort.h
82 contrib/python-zstandard/zstd/dictBuilder/divsufsort.h
79 contrib/python-zstandard/zstd/dictBuilder/fastcover.c
83 contrib/python-zstandard/zstd/dictBuilder/fastcover.c
80 contrib/python-zstandard/zstd/dictBuilder/zdict.c
84 contrib/python-zstandard/zstd/dictBuilder/zdict.c
81 contrib/python-zstandard/zstd/dictBuilder/zdict.h
85 contrib/python-zstandard/zstd/dictBuilder/zdict.h
82 contrib/python-zstandard/zstd/zstd.h
86 contrib/python-zstandard/zstd/zstd.h
83 hgext/fsmonitor/pywatchman/bser.c
87 hgext/fsmonitor/pywatchman/bser.c
84 mercurial/thirdparty/xdiff/xdiff.h
88 mercurial/thirdparty/xdiff/xdiff.h
85 mercurial/thirdparty/xdiff/xdiffi.c
89 mercurial/thirdparty/xdiff/xdiffi.c
86 mercurial/thirdparty/xdiff/xdiffi.h
90 mercurial/thirdparty/xdiff/xdiffi.h
87 mercurial/thirdparty/xdiff/xemit.c
91 mercurial/thirdparty/xdiff/xemit.c
88 mercurial/thirdparty/xdiff/xemit.h
92 mercurial/thirdparty/xdiff/xemit.h
89 mercurial/thirdparty/xdiff/xhistogram.c
93 mercurial/thirdparty/xdiff/xhistogram.c
90 mercurial/thirdparty/xdiff/xinclude.h
94 mercurial/thirdparty/xdiff/xinclude.h
91 mercurial/thirdparty/xdiff/xmacros.h
95 mercurial/thirdparty/xdiff/xmacros.h
92 mercurial/thirdparty/xdiff/xmerge.c
96 mercurial/thirdparty/xdiff/xmerge.c
93 mercurial/thirdparty/xdiff/xpatience.c
97 mercurial/thirdparty/xdiff/xpatience.c
94 mercurial/thirdparty/xdiff/xprepare.c
98 mercurial/thirdparty/xdiff/xprepare.c
95 mercurial/thirdparty/xdiff/xprepare.h
99 mercurial/thirdparty/xdiff/xprepare.h
96 mercurial/thirdparty/xdiff/xtypes.h
100 mercurial/thirdparty/xdiff/xtypes.h
97 mercurial/thirdparty/xdiff/xutils.c
101 mercurial/thirdparty/xdiff/xutils.c
98 mercurial/thirdparty/xdiff/xutils.h
102 mercurial/thirdparty/xdiff/xutils.h
99 mercurial/thirdparty/zope/interface/_zope_interface_coptimizations.c
103 mercurial/thirdparty/zope/interface/_zope_interface_coptimizations.c
@@ -1,672 +1,702 b''
1 ===============
1 ===============
2 Version History
2 Version History
3 ===============
3 ===============
4
4
5 1.0.0 (not yet released)
5 1.0.0 (not yet released)
6 ========================
6 ========================
7
7
8 Actions Blocking Release
8 Actions Blocking Release
9 ------------------------
9 ------------------------
10
10
11 * compression and decompression APIs that support ``io.RawIOBase`` interface
11 * compression and decompression APIs that support ``io.RawIOBase`` interface
12 (#13).
12 (#13).
13 * ``stream_writer()`` APIs should support ``io.RawIOBase`` interface.
13 * ``stream_writer()`` APIs should support ``io.RawIOBase`` interface.
14 * Properly handle non-blocking I/O and partial writes for objects implementing
14 * Properly handle non-blocking I/O and partial writes for objects implementing
15 ``io.RawIOBase``.
15 ``io.RawIOBase``.
16 * Make ``write_return_read=True`` the default for objects implementing
16 * Make ``write_return_read=True`` the default for objects implementing
17 ``io.RawIOBase``.
17 ``io.RawIOBase``.
18 * Audit for consistent and proper behavior of ``flush()`` and ``close()`` for
18 * Audit for consistent and proper behavior of ``flush()`` and ``close()`` for
19 all objects implementing ``io.RawIOBase``. Is calling ``close()`` on
19 all objects implementing ``io.RawIOBase``. Is calling ``close()`` on
20 wrapped stream acceptable, should ``__exit__`` always call ``close()``,
20 wrapped stream acceptable, should ``__exit__`` always call ``close()``,
21 should ``close()`` imply ``flush()``, etc.
21 should ``close()`` imply ``flush()``, etc.
22 * Consider making reads across frames configurable behavior.
22 * Consider making reads across frames configurable behavior.
23 * Refactor module names so C and CFFI extensions live under ``zstandard``
23 * Refactor module names so C and CFFI extensions live under ``zstandard``
24 package.
24 package.
25 * Overall API design review.
25 * Overall API design review.
26 * Use Python allocator where possible.
26 * Use Python allocator where possible.
27 * Figure out what to do about experimental APIs not implemented by CFFI.
27 * Figure out what to do about experimental APIs not implemented by CFFI.
28 * APIs for auto adjusting compression parameters based on input size. e.g.
28 * APIs for auto adjusting compression parameters based on input size. e.g.
29 clamping the window log so it isn't too large for input.
29 clamping the window log so it isn't too large for input.
30 * Consider allowing compressor and decompressor instances to be thread safe,
30 * Consider allowing compressor and decompressor instances to be thread safe,
31 support concurrent operations. Or track when an operation is in progress and
31 support concurrent operations. Or track when an operation is in progress and
32 refuse to let concurrent operations use the same instance.
32 refuse to let concurrent operations use the same instance.
33 * Support for magic-less frames for all decompression operations (``decompress()``
33 * Support for magic-less frames for all decompression operations (``decompress()``
34 doesn't work due to sniffing the content size and the lack of a ZSTD API to
34 doesn't work due to sniffing the content size and the lack of a ZSTD API to
35 sniff magic-less frames - this should be fixed in 1.3.5.).
35 sniff magic-less frames - this should be fixed in 1.3.5.).
36 * Audit for complete flushing when ending compression streams.
36 * Audit for complete flushing when ending compression streams.
37 * Deprecate legacy APIs.
37 * Deprecate legacy APIs.
38 * Audit for ability to control read/write sizes on all APIs.
38 * Audit for ability to control read/write sizes on all APIs.
39 * Detect memory leaks via bench.py.
39 * Detect memory leaks via bench.py.
40 * Remove low-level compression parameters from ``ZstdCompressor.__init__`` and
40 * Remove low-level compression parameters from ``ZstdCompressor.__init__`` and
41 require use of ``CompressionParameters``.
41 require use of ``CompressionParameters``.
42 * Expose ``ZSTD_getFrameProgression()`` from more compressor types.
42 * Expose ``ZSTD_getFrameProgression()`` from more compressor types.
43 * Support modifying compression parameters mid operation when supported by
43 * Support modifying compression parameters mid operation when supported by
44 zstd API.
44 zstd API.
45 * Expose ``ZSTD_CLEVEL_DEFAULT`` constant.
45 * Expose ``ZSTD_CLEVEL_DEFAULT`` constant.
46 * Support ``ZSTD_p_forceAttachDict`` compression parameter.
46 * Support ``ZSTD_p_forceAttachDict`` compression parameter.
47 * Support ``ZSTD_c_literalCompressionMode `` compression parameter.
47 * Use ``ZSTD_CCtx_getParameter()``/``ZSTD_CCtxParam_getParameter()`` for retrieving
48 * Use ``ZSTD_CCtx_getParameter()``/``ZSTD_CCtxParam_getParameter()`` for retrieving
48 compression parameters.
49 compression parameters.
49 * Consider exposing ``ZSTDMT_toFlushNow()``.
50 * Consider exposing ``ZSTDMT_toFlushNow()``.
50 * Expose ``ZDICT_trainFromBuffer_fastCover()``,
51 * Expose ``ZDICT_trainFromBuffer_fastCover()``,
51 ``ZDICT_optimizeTrainFromBuffer_fastCover``.
52 ``ZDICT_optimizeTrainFromBuffer_fastCover``.
52 * Expose and enforce ``ZSTD_minCLevel()`` for minimum compression level.
53 * Expose and enforce ``ZSTD_minCLevel()`` for minimum compression level.
53 * Consider a ``chunker()`` API for decompression.
54 * Consider a ``chunker()`` API for decompression.
54 * Consider stats for ``chunker()`` API, including finding the last consumed
55 * Consider stats for ``chunker()`` API, including finding the last consumed
55 offset of input data.
56 offset of input data.
56 * Consider exposing ``ZSTD_cParam_getBounds()`` and
57 * Consider exposing ``ZSTD_cParam_getBounds()`` and
57 ``ZSTD_dParam_getBounds()`` APIs.
58 ``ZSTD_dParam_getBounds()`` APIs.
58 * Consider controls over resetting compression contexts (session only, parameters,
59 * Consider controls over resetting compression contexts (session only, parameters,
59 or session and parameters).
60 or session and parameters).
60 * Actually use the CFFI backend in fuzzing tests.
61 * Actually use the CFFI backend in fuzzing tests.
61
62
62 Other Actions Not Blocking Release
63 Other Actions Not Blocking Release
63 ---------------------------------------
64 ---------------------------------------
64
65
65 * Support for block compression APIs.
66 * Support for block compression APIs.
66 * API for ensuring max memory ceiling isn't exceeded.
67 * API for ensuring max memory ceiling isn't exceeded.
67 * Move off nose for testing.
68 * Move off nose for testing.
68
69
70 0.12.0 (released 2019-09-15)
71 ============================
72
73 Backwards Compatibility Notes
74 -----------------------------
75
76 * Support for Python 3.4 has been dropped since Python 3.4 is no longer
77 a supported Python version upstream. (But it will likely continue to
78 work until Python 2.7 support is dropped and we port to Python 3.5+
79 APIs.)
80
81 Bug Fixes
82 ---------
83
84 * Fix ``ZstdDecompressor.__init__`` on 64-bit big-endian systems (#91).
85 * Fix memory leak in ``ZstdDecompressionReader.seek()`` (#82).
86
87 Changes
88 -------
89
90 * CI transitioned to Azure Pipelines (from AppVeyor and Travis CI).
91 * Switched to ``pytest`` for running tests (from ``nose``).
92 * Bundled zstandard library upgraded from 1.3.8 to 1.4.3.
93
94 0.11.1 (released 2019-05-14)
95 ============================
96
97 * Fix memory leak in ``ZstdDecompressionReader.seek()`` (#82).
98
69 0.11.0 (released 2019-02-24)
99 0.11.0 (released 2019-02-24)
70 ============================
100 ============================
71
101
72 Backwards Compatibility Nodes
102 Backwards Compatibility Notes
73 -----------------------------
103 -----------------------------
74
104
75 * ``ZstdDecompressor.read()`` now allows reading sizes of ``-1`` or ``0``
105 * ``ZstdDecompressor.read()`` now allows reading sizes of ``-1`` or ``0``
76 and defaults to ``-1``, per the documented behavior of
106 and defaults to ``-1``, per the documented behavior of
77 ``io.RawIOBase.read()``. Previously, we required an argument that was
107 ``io.RawIOBase.read()``. Previously, we required an argument that was
78 a positive value.
108 a positive value.
79 * The ``readline()``, ``readlines()``, ``__iter__``, and ``__next__`` methods
109 * The ``readline()``, ``readlines()``, ``__iter__``, and ``__next__`` methods
80 of ``ZstdDecompressionReader()`` now raise ``io.UnsupportedOperation``
110 of ``ZstdDecompressionReader()`` now raise ``io.UnsupportedOperation``
81 instead of ``NotImplementedError``.
111 instead of ``NotImplementedError``.
82 * ``ZstdDecompressor.stream_reader()`` now accepts a ``read_across_frames``
112 * ``ZstdDecompressor.stream_reader()`` now accepts a ``read_across_frames``
83 argument. The default value will likely be changed in a future release
113 argument. The default value will likely be changed in a future release
84 and consumers are advised to pass the argument to avoid unwanted change
114 and consumers are advised to pass the argument to avoid unwanted change
85 of behavior in the future.
115 of behavior in the future.
86 * ``setup.py`` now always disables the CFFI backend if the installed
116 * ``setup.py`` now always disables the CFFI backend if the installed
87 CFFI package does not meet the minimum version requirements. Before, it was
117 CFFI package does not meet the minimum version requirements. Before, it was
88 possible for the CFFI backend to be generated and a run-time error to
118 possible for the CFFI backend to be generated and a run-time error to
89 occur.
119 occur.
90 * In the CFFI backend, ``CompressionReader`` and ``DecompressionReader``
120 * In the CFFI backend, ``CompressionReader`` and ``DecompressionReader``
91 were renamed to ``ZstdCompressionReader`` and ``ZstdDecompressionReader``,
121 were renamed to ``ZstdCompressionReader`` and ``ZstdDecompressionReader``,
92 respectively so naming is identical to the C extension. This should have
122 respectively so naming is identical to the C extension. This should have
93 no meaningful end-user impact, as instances aren't meant to be
123 no meaningful end-user impact, as instances aren't meant to be
94 constructed directly.
124 constructed directly.
95 * ``ZstdDecompressor.stream_writer()`` now accepts a ``write_return_read``
125 * ``ZstdDecompressor.stream_writer()`` now accepts a ``write_return_read``
96 argument to control whether ``write()`` returns the number of bytes
126 argument to control whether ``write()`` returns the number of bytes
97 read from the source / written to the decompressor. It defaults to off,
127 read from the source / written to the decompressor. It defaults to off,
98 which preserves the existing behavior of returning the number of bytes
128 which preserves the existing behavior of returning the number of bytes
99 emitted from the decompressor. The default will change in a future release
129 emitted from the decompressor. The default will change in a future release
100 so behavior aligns with the specified behavior of ``io.RawIOBase``.
130 so behavior aligns with the specified behavior of ``io.RawIOBase``.
101 * ``ZstdDecompressionWriter.__exit__`` now calls ``self.close()``. This
131 * ``ZstdDecompressionWriter.__exit__`` now calls ``self.close()``. This
102 will result in that stream plus the underlying stream being closed as
132 will result in that stream plus the underlying stream being closed as
103 well. If this behavior is not desirable, do not use instances as
133 well. If this behavior is not desirable, do not use instances as
104 context managers.
134 context managers.
105 * ``ZstdCompressor.stream_writer()`` now accepts a ``write_return_read``
135 * ``ZstdCompressor.stream_writer()`` now accepts a ``write_return_read``
106 argument to control whether ``write()`` returns the number of bytes read
136 argument to control whether ``write()`` returns the number of bytes read
107 from the source / written to the compressor. It defaults to off, which
137 from the source / written to the compressor. It defaults to off, which
108 preserves the existing behavior of returning the number of bytes emitted
138 preserves the existing behavior of returning the number of bytes emitted
109 from the compressor. The default will change in a future release so
139 from the compressor. The default will change in a future release so
110 behavior aligns with the specified behavior of ``io.RawIOBase``.
140 behavior aligns with the specified behavior of ``io.RawIOBase``.
111 * ``ZstdCompressionWriter.__exit__`` now calls ``self.close()``. This will
141 * ``ZstdCompressionWriter.__exit__`` now calls ``self.close()``. This will
112 result in that stream plus any underlying stream being closed as well. If
142 result in that stream plus any underlying stream being closed as well. If
113 this behavior is not desirable, do not use instances as context managers.
143 this behavior is not desirable, do not use instances as context managers.
114 * ``ZstdDecompressionWriter`` no longer requires being used as a context
144 * ``ZstdDecompressionWriter`` no longer requires being used as a context
115 manager (#57).
145 manager (#57).
116 * ``ZstdCompressionWriter`` no longer requires being used as a context
146 * ``ZstdCompressionWriter`` no longer requires being used as a context
117 manager (#57).
147 manager (#57).
118 * The ``overlap_size_log`` attribute on ``CompressionParameters`` instances
148 * The ``overlap_size_log`` attribute on ``CompressionParameters`` instances
119 has been deprecated and will be removed in a future release. The
149 has been deprecated and will be removed in a future release. The
120 ``overlap_log`` attribute should be used instead.
150 ``overlap_log`` attribute should be used instead.
121 * The ``overlap_size_log`` argument to ``CompressionParameters`` has been
151 * The ``overlap_size_log`` argument to ``CompressionParameters`` has been
122 deprecated and will be removed in a future release. The ``overlap_log``
152 deprecated and will be removed in a future release. The ``overlap_log``
123 argument should be used instead.
153 argument should be used instead.
124 * The ``ldm_hash_every_log`` attribute on ``CompressionParameters`` instances
154 * The ``ldm_hash_every_log`` attribute on ``CompressionParameters`` instances
125 has been deprecated and will be removed in a future release. The
155 has been deprecated and will be removed in a future release. The
126 ``ldm_hash_rate_log`` attribute should be used instead.
156 ``ldm_hash_rate_log`` attribute should be used instead.
127 * The ``ldm_hash_every_log`` argument to ``CompressionParameters`` has been
157 * The ``ldm_hash_every_log`` argument to ``CompressionParameters`` has been
128 deprecated and will be removed in a future release. The ``ldm_hash_rate_log``
158 deprecated and will be removed in a future release. The ``ldm_hash_rate_log``
129 argument should be used instead.
159 argument should be used instead.
130 * The ``compression_strategy`` argument to ``CompressionParameters`` has been
160 * The ``compression_strategy`` argument to ``CompressionParameters`` has been
131 deprecated and will be removed in a future release. The ``strategy``
161 deprecated and will be removed in a future release. The ``strategy``
132 argument should be used instead.
162 argument should be used instead.
133 * The ``SEARCHLENGTH_MIN`` and ``SEARCHLENGTH_MAX`` constants are deprecated
163 * The ``SEARCHLENGTH_MIN`` and ``SEARCHLENGTH_MAX`` constants are deprecated
134 and will be removed in a future release. Use ``MINMATCH_MIN`` and
164 and will be removed in a future release. Use ``MINMATCH_MIN`` and
135 ``MINMATCH_MAX`` instead.
165 ``MINMATCH_MAX`` instead.
136 * The ``zstd_cffi`` module has been renamed to ``zstandard.cffi``. As had
166 * The ``zstd_cffi`` module has been renamed to ``zstandard.cffi``. As had
137 been documented in the ``README`` file since the ``0.9.0`` release, the
167 been documented in the ``README`` file since the ``0.9.0`` release, the
138 module should not be imported directly at its new location. Instead,
168 module should not be imported directly at its new location. Instead,
139 ``import zstandard`` to cause an appropriate backend module to be loaded
169 ``import zstandard`` to cause an appropriate backend module to be loaded
140 automatically.
170 automatically.
141
171
142 Bug Fixes
172 Bug Fixes
143 ---------
173 ---------
144
174
145 * CFFI backend could encounter a failure when sending an empty chunk into
175 * CFFI backend could encounter a failure when sending an empty chunk into
146 ``ZstdDecompressionObj.decompress()``. The issue has been fixed.
176 ``ZstdDecompressionObj.decompress()``. The issue has been fixed.
147 * CFFI backend could encounter an error when calling
177 * CFFI backend could encounter an error when calling
148 ``ZstdDecompressionReader.read()`` if there was data remaining in an
178 ``ZstdDecompressionReader.read()`` if there was data remaining in an
149 internal buffer. The issue has been fixed. (#71)
179 internal buffer. The issue has been fixed. (#71)
150
180
151 Changes
181 Changes
152 -------
182 -------
153
183
154 * ``ZstDecompressionObj.decompress()`` now properly handles empty inputs in
184 * ``ZstDecompressionObj.decompress()`` now properly handles empty inputs in
155 the CFFI backend.
185 the CFFI backend.
156 * ``ZstdCompressionReader`` now implements ``read1()`` and ``readinto1()``.
186 * ``ZstdCompressionReader`` now implements ``read1()`` and ``readinto1()``.
157 These are part of the ``io.BufferedIOBase`` interface.
187 These are part of the ``io.BufferedIOBase`` interface.
158 * ``ZstdCompressionReader`` has gained a ``readinto(b)`` method for reading
188 * ``ZstdCompressionReader`` has gained a ``readinto(b)`` method for reading
159 compressed output into an existing buffer.
189 compressed output into an existing buffer.
160 * ``ZstdCompressionReader.read()`` now defaults to ``size=-1`` and accepts
190 * ``ZstdCompressionReader.read()`` now defaults to ``size=-1`` and accepts
161 read sizes of ``-1`` and ``0``. The new behavior aligns with the documented
191 read sizes of ``-1`` and ``0``. The new behavior aligns with the documented
162 behavior of ``io.RawIOBase``.
192 behavior of ``io.RawIOBase``.
163 * ``ZstdCompressionReader`` now implements ``readall()``. Previously, this
193 * ``ZstdCompressionReader`` now implements ``readall()``. Previously, this
164 method raised ``NotImplementedError``.
194 method raised ``NotImplementedError``.
165 * ``ZstdDecompressionReader`` now implements ``read1()`` and ``readinto1()``.
195 * ``ZstdDecompressionReader`` now implements ``read1()`` and ``readinto1()``.
166 These are part of the ``io.BufferedIOBase`` interface.
196 These are part of the ``io.BufferedIOBase`` interface.
167 * ``ZstdDecompressionReader.read()`` now defaults to ``size=-1`` and accepts
197 * ``ZstdDecompressionReader.read()`` now defaults to ``size=-1`` and accepts
168 read sizes of ``-1`` and ``0``. The new behavior aligns with the documented
198 read sizes of ``-1`` and ``0``. The new behavior aligns with the documented
169 behavior of ``io.RawIOBase``.
199 behavior of ``io.RawIOBase``.
170 * ``ZstdDecompressionReader()`` now implements ``readall()``. Previously, this
200 * ``ZstdDecompressionReader()`` now implements ``readall()``. Previously, this
171 method raised ``NotImplementedError``.
201 method raised ``NotImplementedError``.
172 * The ``readline()``, ``readlines()``, ``__iter__``, and ``__next__`` methods
202 * The ``readline()``, ``readlines()``, ``__iter__``, and ``__next__`` methods
173 of ``ZstdDecompressionReader()`` now raise ``io.UnsupportedOperation``
203 of ``ZstdDecompressionReader()`` now raise ``io.UnsupportedOperation``
174 instead of ``NotImplementedError``. This reflects a decision to never
204 instead of ``NotImplementedError``. This reflects a decision to never
175 implement text-based I/O on (de)compressors and keep the low-level API
205 implement text-based I/O on (de)compressors and keep the low-level API
176 operating in the binary domain. (#13)
206 operating in the binary domain. (#13)
177 * ``README.rst`` now documented how to achieve linewise iteration using
207 * ``README.rst`` now documented how to achieve linewise iteration using
178 an ``io.TextIOWrapper`` with a ``ZstdDecompressionReader``.
208 an ``io.TextIOWrapper`` with a ``ZstdDecompressionReader``.
179 * ``ZstdDecompressionReader`` has gained a ``readinto(b)`` method for
209 * ``ZstdDecompressionReader`` has gained a ``readinto(b)`` method for
180 reading decompressed output into an existing buffer. This allows chaining
210 reading decompressed output into an existing buffer. This allows chaining
181 to an ``io.TextIOWrapper`` on Python 3 without using an ``io.BufferedReader``.
211 to an ``io.TextIOWrapper`` on Python 3 without using an ``io.BufferedReader``.
182 * ``ZstdDecompressor.stream_reader()`` now accepts a ``read_across_frames``
212 * ``ZstdDecompressor.stream_reader()`` now accepts a ``read_across_frames``
183 argument to control behavior when the input data has multiple zstd
213 argument to control behavior when the input data has multiple zstd
184 *frames*. When ``False`` (the default for backwards compatibility), a
214 *frames*. When ``False`` (the default for backwards compatibility), a
185 ``read()`` will stop when the end of a zstd *frame* is encountered. When
215 ``read()`` will stop when the end of a zstd *frame* is encountered. When
186 ``True``, ``read()`` can potentially return data spanning multiple zstd
216 ``True``, ``read()`` can potentially return data spanning multiple zstd
187 *frames*. The default will likely be changed to ``True`` in a future
217 *frames*. The default will likely be changed to ``True`` in a future
188 release.
218 release.
189 * ``setup.py`` now performs CFFI version sniffing and disables the CFFI
219 * ``setup.py`` now performs CFFI version sniffing and disables the CFFI
190 backend if CFFI is too old. Previously, we only used ``install_requires``
220 backend if CFFI is too old. Previously, we only used ``install_requires``
191 to enforce the CFFI version and not all build modes would properly enforce
221 to enforce the CFFI version and not all build modes would properly enforce
192 the minimum CFFI version. (#69)
222 the minimum CFFI version. (#69)
193 * CFFI's ``ZstdDecompressionReader.read()`` now properly handles data
223 * CFFI's ``ZstdDecompressionReader.read()`` now properly handles data
194 remaining in any internal buffer. Before, repeated ``read()`` could
224 remaining in any internal buffer. Before, repeated ``read()`` could
195 result in *random* errors. (#71)
225 result in *random* errors. (#71)
196 * Upgraded various Python packages in CI environment.
226 * Upgraded various Python packages in CI environment.
197 * Upgrade to hypothesis 4.5.11.
227 * Upgrade to hypothesis 4.5.11.
198 * In the CFFI backend, ``CompressionReader`` and ``DecompressionReader``
228 * In the CFFI backend, ``CompressionReader`` and ``DecompressionReader``
199 were renamed to ``ZstdCompressionReader`` and ``ZstdDecompressionReader``,
229 were renamed to ``ZstdCompressionReader`` and ``ZstdDecompressionReader``,
200 respectively.
230 respectively.
201 * ``ZstdDecompressor.stream_writer()`` now accepts a ``write_return_read``
231 * ``ZstdDecompressor.stream_writer()`` now accepts a ``write_return_read``
202 argument to control whether ``write()`` returns the number of bytes read
232 argument to control whether ``write()`` returns the number of bytes read
203 from the source. It defaults to ``False`` to preserve backwards
233 from the source. It defaults to ``False`` to preserve backwards
204 compatibility.
234 compatibility.
205 * ``ZstdDecompressor.stream_writer()`` now implements the ``io.RawIOBase``
235 * ``ZstdDecompressor.stream_writer()`` now implements the ``io.RawIOBase``
206 interface and behaves as a proper stream object.
236 interface and behaves as a proper stream object.
207 * ``ZstdCompressor.stream_writer()`` now accepts a ``write_return_read``
237 * ``ZstdCompressor.stream_writer()`` now accepts a ``write_return_read``
208 argument to control whether ``write()`` returns the number of bytes read
238 argument to control whether ``write()`` returns the number of bytes read
209 from the source. It defaults to ``False`` to preserve backwards
239 from the source. It defaults to ``False`` to preserve backwards
210 compatibility.
240 compatibility.
211 * ``ZstdCompressionWriter`` now implements the ``io.RawIOBase`` interface and
241 * ``ZstdCompressionWriter`` now implements the ``io.RawIOBase`` interface and
212 behaves as a proper stream object. ``close()`` will now close the stream
242 behaves as a proper stream object. ``close()`` will now close the stream
213 and the underlying stream (if possible). ``__exit__`` will now call
243 and the underlying stream (if possible). ``__exit__`` will now call
214 ``close()``. Methods like ``writable()`` and ``fileno()`` are implemented.
244 ``close()``. Methods like ``writable()`` and ``fileno()`` are implemented.
215 * ``ZstdDecompressionWriter`` no longer must be used as a context manager.
245 * ``ZstdDecompressionWriter`` no longer must be used as a context manager.
216 * ``ZstdCompressionWriter`` no longer must be used as a context manager.
246 * ``ZstdCompressionWriter`` no longer must be used as a context manager.
217 When not using as a context manager, it is important to call
247 When not using as a context manager, it is important to call
218 ``flush(FRAME_FRAME)`` or the compression stream won't be properly
248 ``flush(FRAME_FRAME)`` or the compression stream won't be properly
219 terminated and decoders may complain about malformed input.
249 terminated and decoders may complain about malformed input.
220 * ``ZstdCompressionWriter.flush()`` (what is returned from
250 * ``ZstdCompressionWriter.flush()`` (what is returned from
221 ``ZstdCompressor.stream_writer()``) now accepts an argument controlling the
251 ``ZstdCompressor.stream_writer()``) now accepts an argument controlling the
222 flush behavior. Its value can be one of the new constants
252 flush behavior. Its value can be one of the new constants
223 ``FLUSH_BLOCK`` or ``FLUSH_FRAME``.
253 ``FLUSH_BLOCK`` or ``FLUSH_FRAME``.
224 * ``ZstdDecompressionObj`` instances now have a ``flush([length=None])`` method.
254 * ``ZstdDecompressionObj`` instances now have a ``flush([length=None])`` method.
225 This provides parity with standard library equivalent types. (#65)
255 This provides parity with standard library equivalent types. (#65)
226 * ``CompressionParameters`` no longer redundantly store individual compression
256 * ``CompressionParameters`` no longer redundantly store individual compression
227 parameters on each instance. Instead, compression parameters are stored inside
257 parameters on each instance. Instead, compression parameters are stored inside
228 the underlying ``ZSTD_CCtx_params`` instance. Attributes for obtaining
258 the underlying ``ZSTD_CCtx_params`` instance. Attributes for obtaining
229 parameters are now properties rather than instance variables.
259 parameters are now properties rather than instance variables.
230 * Exposed the ``STRATEGY_BTULTRA2`` constant.
260 * Exposed the ``STRATEGY_BTULTRA2`` constant.
231 * ``CompressionParameters`` instances now expose an ``overlap_log`` attribute.
261 * ``CompressionParameters`` instances now expose an ``overlap_log`` attribute.
232 This behaves identically to the ``overlap_size_log`` attribute.
262 This behaves identically to the ``overlap_size_log`` attribute.
233 * ``CompressionParameters()`` now accepts an ``overlap_log`` argument that
263 * ``CompressionParameters()`` now accepts an ``overlap_log`` argument that
234 behaves identically to the ``overlap_size_log`` argument. An error will be
264 behaves identically to the ``overlap_size_log`` argument. An error will be
235 raised if both arguments are specified.
265 raised if both arguments are specified.
236 * ``CompressionParameters`` instances now expose an ``ldm_hash_rate_log``
266 * ``CompressionParameters`` instances now expose an ``ldm_hash_rate_log``
237 attribute. This behaves identically to the ``ldm_hash_every_log`` attribute.
267 attribute. This behaves identically to the ``ldm_hash_every_log`` attribute.
238 * ``CompressionParameters()`` now accepts a ``ldm_hash_rate_log`` argument that
268 * ``CompressionParameters()`` now accepts a ``ldm_hash_rate_log`` argument that
239 behaves identically to the ``ldm_hash_every_log`` argument. An error will be
269 behaves identically to the ``ldm_hash_every_log`` argument. An error will be
240 raised if both arguments are specified.
270 raised if both arguments are specified.
241 * ``CompressionParameters()`` now accepts a ``strategy`` argument that behaves
271 * ``CompressionParameters()`` now accepts a ``strategy`` argument that behaves
242 identically to the ``compression_strategy`` argument. An error will be raised
272 identically to the ``compression_strategy`` argument. An error will be raised
243 if both arguments are specified.
273 if both arguments are specified.
244 * The ``MINMATCH_MIN`` and ``MINMATCH_MAX`` constants were added. They are
274 * The ``MINMATCH_MIN`` and ``MINMATCH_MAX`` constants were added. They are
245 semantically equivalent to the old ``SEARCHLENGTH_MIN`` and
275 semantically equivalent to the old ``SEARCHLENGTH_MIN`` and
246 ``SEARCHLENGTH_MAX`` constants.
276 ``SEARCHLENGTH_MAX`` constants.
247 * Bundled zstandard library upgraded from 1.3.7 to 1.3.8.
277 * Bundled zstandard library upgraded from 1.3.7 to 1.3.8.
248 * ``setup.py`` denotes support for Python 3.7 (Python 3.7 was supported and
278 * ``setup.py`` denotes support for Python 3.7 (Python 3.7 was supported and
249 tested in the 0.10 release).
279 tested in the 0.10 release).
250 * ``zstd_cffi`` module has been renamed to ``zstandard.cffi``.
280 * ``zstd_cffi`` module has been renamed to ``zstandard.cffi``.
251 * ``ZstdCompressor.stream_writer()`` now reuses a buffer in order to avoid
281 * ``ZstdCompressor.stream_writer()`` now reuses a buffer in order to avoid
252 allocating a new buffer for every operation. This should result in faster
282 allocating a new buffer for every operation. This should result in faster
253 performance in cases where ``write()`` or ``flush()`` are being called
283 performance in cases where ``write()`` or ``flush()`` are being called
254 frequently. (#62)
284 frequently. (#62)
255 * Bundled zstandard library upgraded from 1.3.6 to 1.3.7.
285 * Bundled zstandard library upgraded from 1.3.6 to 1.3.7.
256
286
257 0.10.2 (released 2018-11-03)
287 0.10.2 (released 2018-11-03)
258 ============================
288 ============================
259
289
260 Bug Fixes
290 Bug Fixes
261 ---------
291 ---------
262
292
263 * ``zstd_cffi.py`` added to ``setup.py`` (#60).
293 * ``zstd_cffi.py`` added to ``setup.py`` (#60).
264
294
265 Changes
295 Changes
266 -------
296 -------
267
297
268 * Change some integer casts to avoid ``ssize_t`` (#61).
298 * Change some integer casts to avoid ``ssize_t`` (#61).
269
299
270 0.10.1 (released 2018-10-08)
300 0.10.1 (released 2018-10-08)
271 ============================
301 ============================
272
302
273 Backwards Compatibility Notes
303 Backwards Compatibility Notes
274 -----------------------------
304 -----------------------------
275
305
276 * ``ZstdCompressor.stream_reader().closed`` is now a property instead of a
306 * ``ZstdCompressor.stream_reader().closed`` is now a property instead of a
277 method (#58).
307 method (#58).
278 * ``ZstdDecompressor.stream_reader().closed`` is now a property instead of a
308 * ``ZstdDecompressor.stream_reader().closed`` is now a property instead of a
279 method (#58).
309 method (#58).
280
310
281 Changes
311 Changes
282 -------
312 -------
283
313
284 * Stop attempting to package Python 3.6 for Miniconda. The latest version of
314 * Stop attempting to package Python 3.6 for Miniconda. The latest version of
285 Miniconda is using Python 3.7. The Python 3.6 Miniconda packages were a lie
315 Miniconda is using Python 3.7. The Python 3.6 Miniconda packages were a lie
286 since this were built against Python 3.7.
316 since this were built against Python 3.7.
287 * ``ZstdCompressor.stream_reader()``'s and ``ZstdDecompressor.stream_reader()``'s
317 * ``ZstdCompressor.stream_reader()``'s and ``ZstdDecompressor.stream_reader()``'s
288 ``closed`` attribute is now a read-only property instead of a method. This now
318 ``closed`` attribute is now a read-only property instead of a method. This now
289 properly matches the ``IOBase`` API and allows instances to be used in more
319 properly matches the ``IOBase`` API and allows instances to be used in more
290 places that accept ``IOBase`` instances.
320 places that accept ``IOBase`` instances.
291
321
292 0.10.0 (released 2018-10-08)
322 0.10.0 (released 2018-10-08)
293 ============================
323 ============================
294
324
295 Backwards Compatibility Notes
325 Backwards Compatibility Notes
296 -----------------------------
326 -----------------------------
297
327
298 * ``ZstdDecompressor.stream_reader().read()`` now consistently requires an
328 * ``ZstdDecompressor.stream_reader().read()`` now consistently requires an
299 argument in both the C and CFFI backends. Before, the CFFI implementation
329 argument in both the C and CFFI backends. Before, the CFFI implementation
300 would assume a default value of ``-1``, which was later rejected.
330 would assume a default value of ``-1``, which was later rejected.
301 * The ``compress_literals`` argument and attribute has been removed from
331 * The ``compress_literals`` argument and attribute has been removed from
302 ``zstd.ZstdCompressionParameters`` because it was removed by the zstd 1.3.5
332 ``zstd.ZstdCompressionParameters`` because it was removed by the zstd 1.3.5
303 API.
333 API.
304 * ``ZSTD_CCtx_setParametersUsingCCtxParams()`` is no longer called on every
334 * ``ZSTD_CCtx_setParametersUsingCCtxParams()`` is no longer called on every
305 operation performed against ``ZstdCompressor`` instances. The reason for this
335 operation performed against ``ZstdCompressor`` instances. The reason for this
306 change is that the zstd 1.3.5 API no longer allows this without calling
336 change is that the zstd 1.3.5 API no longer allows this without calling
307 ``ZSTD_CCtx_resetParameters()`` first. But if we called
337 ``ZSTD_CCtx_resetParameters()`` first. But if we called
308 ``ZSTD_CCtx_resetParameters()`` on every operation, we'd have to redo
338 ``ZSTD_CCtx_resetParameters()`` on every operation, we'd have to redo
309 potentially expensive setup when using dictionaries. We now call
339 potentially expensive setup when using dictionaries. We now call
310 ``ZSTD_CCtx_reset()`` on every operation and don't attempt to change
340 ``ZSTD_CCtx_reset()`` on every operation and don't attempt to change
311 compression parameters.
341 compression parameters.
312 * Objects returned by ``ZstdCompressor.stream_reader()`` no longer need to be
342 * Objects returned by ``ZstdCompressor.stream_reader()`` no longer need to be
313 used as a context manager. The context manager interface still exists and its
343 used as a context manager. The context manager interface still exists and its
314 behavior is unchanged.
344 behavior is unchanged.
315 * Objects returned by ``ZstdDecompressor.stream_reader()`` no longer need to be
345 * Objects returned by ``ZstdDecompressor.stream_reader()`` no longer need to be
316 used as a context manager. The context manager interface still exists and its
346 used as a context manager. The context manager interface still exists and its
317 behavior is unchanged.
347 behavior is unchanged.
318
348
319 Bug Fixes
349 Bug Fixes
320 ---------
350 ---------
321
351
322 * ``ZstdDecompressor.decompressobj().decompress()`` should now return all data
352 * ``ZstdDecompressor.decompressobj().decompress()`` should now return all data
323 from internal buffers in more scenarios. Before, it was possible for data to
353 from internal buffers in more scenarios. Before, it was possible for data to
324 remain in internal buffers. This data would be emitted on a subsequent call
354 remain in internal buffers. This data would be emitted on a subsequent call
325 to ``decompress()``. The overall output stream would still be valid. But if
355 to ``decompress()``. The overall output stream would still be valid. But if
326 callers were expecting input data to exactly map to output data (say the
356 callers were expecting input data to exactly map to output data (say the
327 producer had used ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` and was attempting to
357 producer had used ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` and was attempting to
328 map input chunks to output chunks), then the previous behavior would be
358 map input chunks to output chunks), then the previous behavior would be
329 wrong. The new behavior is such that output from
359 wrong. The new behavior is such that output from
330 ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` fed into ``decompressobj().decompress()``
360 ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` fed into ``decompressobj().decompress()``
331 should produce all available compressed input.
361 should produce all available compressed input.
332 * ``ZstdDecompressor.stream_reader().read()`` should no longer segfault after
362 * ``ZstdDecompressor.stream_reader().read()`` should no longer segfault after
333 a previous context manager resulted in error (#56).
363 a previous context manager resulted in error (#56).
334 * ``ZstdCompressor.compressobj().flush(COMPRESSOBJ_FLUSH_BLOCK)`` now returns
364 * ``ZstdCompressor.compressobj().flush(COMPRESSOBJ_FLUSH_BLOCK)`` now returns
335 all data necessary to flush a block. Before, it was possible for the
365 all data necessary to flush a block. Before, it was possible for the
336 ``flush()`` to not emit all data necessary to fully represent a block. This
366 ``flush()`` to not emit all data necessary to fully represent a block. This
337 would mean decompressors wouldn't be able to decompress all data that had been
367 would mean decompressors wouldn't be able to decompress all data that had been
338 fed into the compressor and ``flush()``ed. (#55).
368 fed into the compressor and ``flush()``ed. (#55).
339
369
340 New Features
370 New Features
341 ------------
371 ------------
342
372
343 * New module constants ``BLOCKSIZELOG_MAX``, ``BLOCKSIZE_MAX``,
373 * New module constants ``BLOCKSIZELOG_MAX``, ``BLOCKSIZE_MAX``,
344 ``TARGETLENGTH_MAX`` that expose constants from libzstd.
374 ``TARGETLENGTH_MAX`` that expose constants from libzstd.
345 * New ``ZstdCompressor.chunker()`` API for manually feeding data into a
375 * New ``ZstdCompressor.chunker()`` API for manually feeding data into a
346 compressor and emitting chunks of a fixed size. Like ``compressobj()``, the
376 compressor and emitting chunks of a fixed size. Like ``compressobj()``, the
347 API doesn't impose restrictions on the input or output types for the
377 API doesn't impose restrictions on the input or output types for the
348 data streams. Unlike ``compressobj()``, it ensures output chunks are of a
378 data streams. Unlike ``compressobj()``, it ensures output chunks are of a
349 fixed size. This makes this API useful when the compressed output is being
379 fixed size. This makes this API useful when the compressed output is being
350 fed into an I/O layer, where uniform write sizes are useful.
380 fed into an I/O layer, where uniform write sizes are useful.
351 * ``ZstdCompressor.stream_reader()`` no longer needs to be used as a context
381 * ``ZstdCompressor.stream_reader()`` no longer needs to be used as a context
352 manager (#34).
382 manager (#34).
353 * ``ZstdDecompressor.stream_reader()`` no longer needs to be used as a context
383 * ``ZstdDecompressor.stream_reader()`` no longer needs to be used as a context
354 manager (#34).
384 manager (#34).
355 * Bundled zstandard library upgraded from 1.3.4 to 1.3.6.
385 * Bundled zstandard library upgraded from 1.3.4 to 1.3.6.
356
386
357 Changes
387 Changes
358 -------
388 -------
359
389
360 * Added ``zstd_cffi.py`` and ``NEWS.rst`` to ``MANIFEST.in``.
390 * Added ``zstd_cffi.py`` and ``NEWS.rst`` to ``MANIFEST.in``.
361 * ``zstandard.__version__`` is now defined (#50).
391 * ``zstandard.__version__`` is now defined (#50).
362 * Upgrade pip, setuptools, wheel, and cibuildwheel packages to latest versions.
392 * Upgrade pip, setuptools, wheel, and cibuildwheel packages to latest versions.
363 * Upgrade various packages used in CI to latest versions. Notably tox (in
393 * Upgrade various packages used in CI to latest versions. Notably tox (in
364 order to support Python 3.7).
394 order to support Python 3.7).
365 * Use relative paths in setup.py to appease Python 3.7 (#51).
395 * Use relative paths in setup.py to appease Python 3.7 (#51).
366 * Added CI for Python 3.7.
396 * Added CI for Python 3.7.
367
397
368 0.9.1 (released 2018-06-04)
398 0.9.1 (released 2018-06-04)
369 ===========================
399 ===========================
370
400
371 * Debian packaging support.
401 * Debian packaging support.
372 * Fix typo in setup.py (#44).
402 * Fix typo in setup.py (#44).
373 * Support building with mingw compiler (#46).
403 * Support building with mingw compiler (#46).
374
404
375 0.9.0 (released 2018-04-08)
405 0.9.0 (released 2018-04-08)
376 ===========================
406 ===========================
377
407
378 Backwards Compatibility Notes
408 Backwards Compatibility Notes
379 -----------------------------
409 -----------------------------
380
410
381 * CFFI 1.11 or newer is now required (previous requirement was 1.8).
411 * CFFI 1.11 or newer is now required (previous requirement was 1.8).
382 * The primary module is now ``zstandard``. Please change imports of ``zstd``
412 * The primary module is now ``zstandard``. Please change imports of ``zstd``
383 and ``zstd_cffi`` to ``import zstandard``. See the README for more. Support
413 and ``zstd_cffi`` to ``import zstandard``. See the README for more. Support
384 for importing the old names will be dropped in the next release.
414 for importing the old names will be dropped in the next release.
385 * ``ZstdCompressor.read_from()`` and ``ZstdDecompressor.read_from()`` have
415 * ``ZstdCompressor.read_from()`` and ``ZstdDecompressor.read_from()`` have
386 been renamed to ``read_to_iter()``. ``read_from()`` is aliased to the new
416 been renamed to ``read_to_iter()``. ``read_from()`` is aliased to the new
387 name and will be deleted in a future release.
417 name and will be deleted in a future release.
388 * Support for Python 2.6 has been removed.
418 * Support for Python 2.6 has been removed.
389 * Support for Python 3.3 has been removed.
419 * Support for Python 3.3 has been removed.
390 * The ``selectivity`` argument to ``train_dictionary()`` has been removed, as
420 * The ``selectivity`` argument to ``train_dictionary()`` has been removed, as
391 the feature disappeared from zstd 1.3.
421 the feature disappeared from zstd 1.3.
392 * Support for legacy dictionaries has been removed. Cover dictionaries are now
422 * Support for legacy dictionaries has been removed. Cover dictionaries are now
393 the default. ``train_cover_dictionary()`` has effectively been renamed to
423 the default. ``train_cover_dictionary()`` has effectively been renamed to
394 ``train_dictionary()``.
424 ``train_dictionary()``.
395 * The ``allow_empty`` argument from ``ZstdCompressor.compress()`` has been
425 * The ``allow_empty`` argument from ``ZstdCompressor.compress()`` has been
396 deleted and the method now allows empty inputs to be compressed by default.
426 deleted and the method now allows empty inputs to be compressed by default.
397 * ``estimate_compression_context_size()`` has been removed. Use
427 * ``estimate_compression_context_size()`` has been removed. Use
398 ``CompressionParameters.estimated_compression_context_size()`` instead.
428 ``CompressionParameters.estimated_compression_context_size()`` instead.
399 * ``get_compression_parameters()`` has been removed. Use
429 * ``get_compression_parameters()`` has been removed. Use
400 ``CompressionParameters.from_level()`` instead.
430 ``CompressionParameters.from_level()`` instead.
401 * The arguments to ``CompressionParameters.__init__()`` have changed. If you
431 * The arguments to ``CompressionParameters.__init__()`` have changed. If you
402 were using positional arguments before, the positions now map to different
432 were using positional arguments before, the positions now map to different
403 arguments. It is recommended to use keyword arguments to construct
433 arguments. It is recommended to use keyword arguments to construct
404 ``CompressionParameters`` instances.
434 ``CompressionParameters`` instances.
405 * ``TARGETLENGTH_MAX`` constant has been removed (it disappeared from zstandard
435 * ``TARGETLENGTH_MAX`` constant has been removed (it disappeared from zstandard
406 1.3.4).
436 1.3.4).
407 * ``ZstdCompressor.write_to()`` and ``ZstdDecompressor.write_to()`` have been
437 * ``ZstdCompressor.write_to()`` and ``ZstdDecompressor.write_to()`` have been
408 renamed to ``ZstdCompressor.stream_writer()`` and
438 renamed to ``ZstdCompressor.stream_writer()`` and
409 ``ZstdDecompressor.stream_writer()``, respectively. The old names are still
439 ``ZstdDecompressor.stream_writer()``, respectively. The old names are still
410 aliased, but will be removed in the next major release.
440 aliased, but will be removed in the next major release.
411 * Content sizes are written into frame headers by default
441 * Content sizes are written into frame headers by default
412 (``ZstdCompressor(write_content_size=True)`` is now the default).
442 (``ZstdCompressor(write_content_size=True)`` is now the default).
413 * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters``
443 * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters``
414 for consistency with other types. The old name is an alias and will be removed
444 for consistency with other types. The old name is an alias and will be removed
415 in the next major release.
445 in the next major release.
416
446
417 Bug Fixes
447 Bug Fixes
418 ---------
448 ---------
419
449
420 * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40) (from 0.8.2).
450 * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40) (from 0.8.2).
421 * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35) (from 0.8.2).
451 * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35) (from 0.8.2).
422 * Fixed memory leak of ``ZSTD_DDict`` instances in CFFI's ``ZstdDecompressor``.
452 * Fixed memory leak of ``ZSTD_DDict`` instances in CFFI's ``ZstdDecompressor``.
423
453
424 New Features
454 New Features
425 ------------
455 ------------
426
456
427 * Bundled zstandard library upgraded from 1.1.3 to 1.3.4. This delivers various
457 * Bundled zstandard library upgraded from 1.1.3 to 1.3.4. This delivers various
428 bug fixes and performance improvements. It also gives us access to newer
458 bug fixes and performance improvements. It also gives us access to newer
429 features.
459 features.
430 * Support for negative compression levels.
460 * Support for negative compression levels.
431 * Support for *long distance matching* (facilitates compression ratios that approach
461 * Support for *long distance matching* (facilitates compression ratios that approach
432 LZMA).
462 LZMA).
433 * Supporting for reading empty zstandard frames (with an embedded content size
463 * Supporting for reading empty zstandard frames (with an embedded content size
434 of 0).
464 of 0).
435 * Support for writing and partial support for reading zstandard frames without a
465 * Support for writing and partial support for reading zstandard frames without a
436 magic header.
466 magic header.
437 * New ``stream_reader()`` API that exposes the ``io.RawIOBase`` interface (allows
467 * New ``stream_reader()`` API that exposes the ``io.RawIOBase`` interface (allows
438 you to ``.read()`` from a file-like object).
468 you to ``.read()`` from a file-like object).
439 * Several minor features, bug fixes, and performance enhancements.
469 * Several minor features, bug fixes, and performance enhancements.
440 * Wheels for Linux and macOS are now provided with releases.
470 * Wheels for Linux and macOS are now provided with releases.
441
471
442 Changes
472 Changes
443 -------
473 -------
444
474
445 * Functions accepting bytes data now use the buffer protocol and can accept
475 * Functions accepting bytes data now use the buffer protocol and can accept
446 more types (like ``memoryview`` and ``bytearray``) (#26).
476 more types (like ``memoryview`` and ``bytearray``) (#26).
447 * Add #includes so compilation on OS X and BSDs works (#20).
477 * Add #includes so compilation on OS X and BSDs works (#20).
448 * New ``ZstdDecompressor.stream_reader()`` API to obtain a read-only i/o stream
478 * New ``ZstdDecompressor.stream_reader()`` API to obtain a read-only i/o stream
449 of decompressed data for a source.
479 of decompressed data for a source.
450 * New ``ZstdCompressor.stream_reader()`` API to obtain a read-only i/o stream of
480 * New ``ZstdCompressor.stream_reader()`` API to obtain a read-only i/o stream of
451 compressed data for a source.
481 compressed data for a source.
452 * Renamed ``ZstdDecompressor.read_from()`` to ``ZstdDecompressor.read_to_iter()``.
482 * Renamed ``ZstdDecompressor.read_from()`` to ``ZstdDecompressor.read_to_iter()``.
453 The old name is still available.
483 The old name is still available.
454 * Renamed ``ZstdCompressor.read_from()`` to ``ZstdCompressor.read_to_iter()``.
484 * Renamed ``ZstdCompressor.read_from()`` to ``ZstdCompressor.read_to_iter()``.
455 ``read_from()`` is still available at its old location.
485 ``read_from()`` is still available at its old location.
456 * Introduce the ``zstandard`` module to import and re-export the C or CFFI
486 * Introduce the ``zstandard`` module to import and re-export the C or CFFI
457 *backend* as appropriate. Behavior can be controlled via the
487 *backend* as appropriate. Behavior can be controlled via the
458 ``PYTHON_ZSTANDARD_IMPORT_POLICY`` environment variable. See README for
488 ``PYTHON_ZSTANDARD_IMPORT_POLICY`` environment variable. See README for
459 usage info.
489 usage info.
460 * Vendored version of zstd upgraded to 1.3.4.
490 * Vendored version of zstd upgraded to 1.3.4.
461 * Added module constants ``CONTENTSIZE_UNKNOWN`` and ``CONTENTSIZE_ERROR``.
491 * Added module constants ``CONTENTSIZE_UNKNOWN`` and ``CONTENTSIZE_ERROR``.
462 * Add ``STRATEGY_BTULTRA`` compression strategy constant.
492 * Add ``STRATEGY_BTULTRA`` compression strategy constant.
463 * Switch from deprecated ``ZSTD_getDecompressedSize()`` to
493 * Switch from deprecated ``ZSTD_getDecompressedSize()`` to
464 ``ZSTD_getFrameContentSize()`` replacement.
494 ``ZSTD_getFrameContentSize()`` replacement.
465 * ``ZstdCompressor.compress()`` can now compress empty inputs without requiring
495 * ``ZstdCompressor.compress()`` can now compress empty inputs without requiring
466 special handling.
496 special handling.
467 * ``ZstdCompressor`` and ``ZstdDecompressor`` now have a ``memory_size()``
497 * ``ZstdCompressor`` and ``ZstdDecompressor`` now have a ``memory_size()``
468 method for determining the current memory utilization of the underlying zstd
498 method for determining the current memory utilization of the underlying zstd
469 primitive.
499 primitive.
470 * ``train_dictionary()`` has new arguments and functionality for trying multiple
500 * ``train_dictionary()`` has new arguments and functionality for trying multiple
471 variations of COVER parameters and selecting the best one.
501 variations of COVER parameters and selecting the best one.
472 * Added module constants ``LDM_MINMATCH_MIN``, ``LDM_MINMATCH_MAX``, and
502 * Added module constants ``LDM_MINMATCH_MIN``, ``LDM_MINMATCH_MAX``, and
473 ``LDM_BUCKETSIZELOG_MAX``.
503 ``LDM_BUCKETSIZELOG_MAX``.
474 * Converted all consumers to the zstandard *new advanced API*, which uses
504 * Converted all consumers to the zstandard *new advanced API*, which uses
475 ``ZSTD_compress_generic()``
505 ``ZSTD_compress_generic()``
476 * ``CompressionParameters.__init__`` now accepts several more arguments,
506 * ``CompressionParameters.__init__`` now accepts several more arguments,
477 including support for *long distance matching*.
507 including support for *long distance matching*.
478 * ``ZstdCompressionDict.__init__`` now accepts a ``dict_type`` argument that
508 * ``ZstdCompressionDict.__init__`` now accepts a ``dict_type`` argument that
479 controls how the dictionary should be interpreted. This can be used to
509 controls how the dictionary should be interpreted. This can be used to
480 force the use of *content-only* dictionaries or to require the presence
510 force the use of *content-only* dictionaries or to require the presence
481 of the dictionary magic header.
511 of the dictionary magic header.
482 * ``ZstdCompressionDict.precompute_compress()`` can be used to precompute the
512 * ``ZstdCompressionDict.precompute_compress()`` can be used to precompute the
483 compression dictionary so it can efficiently be used with multiple
513 compression dictionary so it can efficiently be used with multiple
484 ``ZstdCompressor`` instances.
514 ``ZstdCompressor`` instances.
485 * Digested dictionaries are now stored in ``ZstdCompressionDict`` instances,
515 * Digested dictionaries are now stored in ``ZstdCompressionDict`` instances,
486 created automatically on first use, and automatically reused by all
516 created automatically on first use, and automatically reused by all
487 ``ZstdDecompressor`` instances bound to that dictionary.
517 ``ZstdDecompressor`` instances bound to that dictionary.
488 * All meaningful functions now accept keyword arguments.
518 * All meaningful functions now accept keyword arguments.
489 * ``ZstdDecompressor.decompressobj()`` now accepts a ``write_size`` argument
519 * ``ZstdDecompressor.decompressobj()`` now accepts a ``write_size`` argument
490 to control how much work to perform on every decompressor invocation.
520 to control how much work to perform on every decompressor invocation.
491 * ``ZstdCompressor.write_to()`` now exposes a ``tell()``, which exposes the
521 * ``ZstdCompressor.write_to()`` now exposes a ``tell()``, which exposes the
492 total number of bytes written so far.
522 total number of bytes written so far.
493 * ``ZstdDecompressor.stream_reader()`` now supports ``seek()`` when moving
523 * ``ZstdDecompressor.stream_reader()`` now supports ``seek()`` when moving
494 forward in the stream.
524 forward in the stream.
495 * Removed ``TARGETLENGTH_MAX`` constant.
525 * Removed ``TARGETLENGTH_MAX`` constant.
496 * Added ``frame_header_size(data)`` function.
526 * Added ``frame_header_size(data)`` function.
497 * Added ``frame_content_size(data)`` function.
527 * Added ``frame_content_size(data)`` function.
498 * Consumers of ``ZSTD_decompress*`` have been switched to the new *advanced
528 * Consumers of ``ZSTD_decompress*`` have been switched to the new *advanced
499 decompression* API.
529 decompression* API.
500 * ``ZstdCompressor`` and ``ZstdCompressionParams`` can now be constructed with
530 * ``ZstdCompressor`` and ``ZstdCompressionParams`` can now be constructed with
501 negative compression levels.
531 negative compression levels.
502 * ``ZstdDecompressor`` now accepts a ``max_window_size`` argument to limit the
532 * ``ZstdDecompressor`` now accepts a ``max_window_size`` argument to limit the
503 amount of memory required for decompression operations.
533 amount of memory required for decompression operations.
504 * ``FORMAT_ZSTD1`` and ``FORMAT_ZSTD1_MAGICLESS`` constants to be used with
534 * ``FORMAT_ZSTD1`` and ``FORMAT_ZSTD1_MAGICLESS`` constants to be used with
505 the ``format`` compression parameter to control whether the frame magic
535 the ``format`` compression parameter to control whether the frame magic
506 header is written.
536 header is written.
507 * ``ZstdDecompressor`` now accepts a ``format`` argument to control the
537 * ``ZstdDecompressor`` now accepts a ``format`` argument to control the
508 expected frame format.
538 expected frame format.
509 * ``ZstdCompressor`` now has a ``frame_progression()`` method to return
539 * ``ZstdCompressor`` now has a ``frame_progression()`` method to return
510 information about the current compression operation.
540 information about the current compression operation.
511 * Error messages in CFFI no longer have ``b''`` literals.
541 * Error messages in CFFI no longer have ``b''`` literals.
512 * Compiler warnings and underlying overflow issues on 32-bit platforms have been
542 * Compiler warnings and underlying overflow issues on 32-bit platforms have been
513 fixed.
543 fixed.
514 * Builds in CI now build with compiler warnings as errors. This should hopefully
544 * Builds in CI now build with compiler warnings as errors. This should hopefully
515 fix new compiler warnings from being introduced.
545 fix new compiler warnings from being introduced.
516 * Make ``ZstdCompressor(write_content_size=True)`` and
546 * Make ``ZstdCompressor(write_content_size=True)`` and
517 ``CompressionParameters(write_content_size=True)`` the default.
547 ``CompressionParameters(write_content_size=True)`` the default.
518 * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters``.
548 * ``CompressionParameters`` has been renamed to ``ZstdCompressionParameters``.
519
549
520 0.8.2 (released 2018-02-22)
550 0.8.2 (released 2018-02-22)
521 ---------------------------
551 ---------------------------
522
552
523 * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40).
553 * Fixed memory leak in ``ZstdCompressor.copy_stream()`` (#40).
524 * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35).
554 * Fixed memory leak in ``ZstdDecompressor.copy_stream()`` (#35).
525
555
526 0.8.1 (released 2017-04-08)
556 0.8.1 (released 2017-04-08)
527 ---------------------------
557 ---------------------------
528
558
529 * Add #includes so compilation on OS X and BSDs works (#20).
559 * Add #includes so compilation on OS X and BSDs works (#20).
530
560
531 0.8.0 (released 2017-03-08)
561 0.8.0 (released 2017-03-08)
532 ===========================
562 ===========================
533
563
534 * CompressionParameters now has a estimated_compression_context_size() method.
564 * CompressionParameters now has a estimated_compression_context_size() method.
535 zstd.estimate_compression_context_size() is now deprecated and slated for
565 zstd.estimate_compression_context_size() is now deprecated and slated for
536 removal.
566 removal.
537 * Implemented a lot of fuzzing tests.
567 * Implemented a lot of fuzzing tests.
538 * CompressionParameters instances now perform extra validation by calling
568 * CompressionParameters instances now perform extra validation by calling
539 ZSTD_checkCParams() at construction time.
569 ZSTD_checkCParams() at construction time.
540 * multi_compress_to_buffer() API for compressing multiple inputs as a
570 * multi_compress_to_buffer() API for compressing multiple inputs as a
541 single operation, as efficiently as possible.
571 single operation, as efficiently as possible.
542 * ZSTD_CStream instances are now used across multiple operations on
572 * ZSTD_CStream instances are now used across multiple operations on
543 ZstdCompressor instances, resulting in much better performance for
573 ZstdCompressor instances, resulting in much better performance for
544 APIs that do streaming.
574 APIs that do streaming.
545 * ZSTD_DStream instances are now used across multiple operations on
575 * ZSTD_DStream instances are now used across multiple operations on
546 ZstdDecompressor instances, resulting in much better performance for
576 ZstdDecompressor instances, resulting in much better performance for
547 APIs that do streaming.
577 APIs that do streaming.
548 * train_dictionary() now releases the GIL.
578 * train_dictionary() now releases the GIL.
549 * Support for training dictionaries using the COVER algorithm.
579 * Support for training dictionaries using the COVER algorithm.
550 * multi_decompress_to_buffer() API for decompressing multiple frames as a
580 * multi_decompress_to_buffer() API for decompressing multiple frames as a
551 single operation, as efficiently as possible.
581 single operation, as efficiently as possible.
552 * Support for multi-threaded compression.
582 * Support for multi-threaded compression.
553 * Disable deprecation warnings when compiling CFFI module.
583 * Disable deprecation warnings when compiling CFFI module.
554 * Fixed memory leak in train_dictionary().
584 * Fixed memory leak in train_dictionary().
555 * Removed DictParameters type.
585 * Removed DictParameters type.
556 * train_dictionary() now accepts keyword arguments instead of a
586 * train_dictionary() now accepts keyword arguments instead of a
557 DictParameters instance to control dictionary generation.
587 DictParameters instance to control dictionary generation.
558
588
559 0.7.0 (released 2017-02-07)
589 0.7.0 (released 2017-02-07)
560 ===========================
590 ===========================
561
591
562 * Added zstd.get_frame_parameters() to obtain info about a zstd frame.
592 * Added zstd.get_frame_parameters() to obtain info about a zstd frame.
563 * Added ZstdDecompressor.decompress_content_dict_chain() for efficient
593 * Added ZstdDecompressor.decompress_content_dict_chain() for efficient
564 decompression of *content-only dictionary chains*.
594 decompression of *content-only dictionary chains*.
565 * CFFI module fully implemented; all tests run against both C extension and
595 * CFFI module fully implemented; all tests run against both C extension and
566 CFFI implementation.
596 CFFI implementation.
567 * Vendored version of zstd updated to 1.1.3.
597 * Vendored version of zstd updated to 1.1.3.
568 * Use ZstdDecompressor.decompress() now uses ZSTD_createDDict_byReference()
598 * Use ZstdDecompressor.decompress() now uses ZSTD_createDDict_byReference()
569 to avoid extra memory allocation of dict data.
599 to avoid extra memory allocation of dict data.
570 * Add function names to error messages (by using ":name" in PyArg_Parse*
600 * Add function names to error messages (by using ":name" in PyArg_Parse*
571 functions).
601 functions).
572 * Reuse decompression context across operations. Previously, we created a
602 * Reuse decompression context across operations. Previously, we created a
573 new ZSTD_DCtx for each decompress(). This was measured to slow down
603 new ZSTD_DCtx for each decompress(). This was measured to slow down
574 decompression by 40-200MB/s. The API guarantees say ZstdDecompressor
604 decompression by 40-200MB/s. The API guarantees say ZstdDecompressor
575 is not thread safe. So we reuse the ZSTD_DCtx across operations and make
605 is not thread safe. So we reuse the ZSTD_DCtx across operations and make
576 things faster in the process.
606 things faster in the process.
577 * ZstdCompressor.write_to()'s compress() and flush() methods now return number
607 * ZstdCompressor.write_to()'s compress() and flush() methods now return number
578 of bytes written.
608 of bytes written.
579 * ZstdDecompressor.write_to()'s write() method now returns the number of bytes
609 * ZstdDecompressor.write_to()'s write() method now returns the number of bytes
580 written to the underlying output object.
610 written to the underlying output object.
581 * CompressionParameters instances now expose their values as attributes.
611 * CompressionParameters instances now expose their values as attributes.
582 * CompressionParameters instances no longer are subscriptable nor behave
612 * CompressionParameters instances no longer are subscriptable nor behave
583 as tuples (backwards incompatible). Use attributes to obtain values.
613 as tuples (backwards incompatible). Use attributes to obtain values.
584 * DictParameters instances now expose their values as attributes.
614 * DictParameters instances now expose their values as attributes.
585
615
586 0.6.0 (released 2017-01-14)
616 0.6.0 (released 2017-01-14)
587 ===========================
617 ===========================
588
618
589 * Support for legacy zstd protocols (build time opt in feature).
619 * Support for legacy zstd protocols (build time opt in feature).
590 * Automation improvements to test against Python 3.6, latest versions
620 * Automation improvements to test against Python 3.6, latest versions
591 of Tox, more deterministic AppVeyor behavior.
621 of Tox, more deterministic AppVeyor behavior.
592 * CFFI "parser" improved to use a compiler preprocessor instead of rewriting
622 * CFFI "parser" improved to use a compiler preprocessor instead of rewriting
593 source code manually.
623 source code manually.
594 * Vendored version of zstd updated to 1.1.2.
624 * Vendored version of zstd updated to 1.1.2.
595 * Documentation improvements.
625 * Documentation improvements.
596 * Introduce a bench.py script for performing (crude) benchmarks.
626 * Introduce a bench.py script for performing (crude) benchmarks.
597 * ZSTD_CCtx instances are now reused across multiple compress() operations.
627 * ZSTD_CCtx instances are now reused across multiple compress() operations.
598 * ZstdCompressor.write_to() now has a flush() method.
628 * ZstdCompressor.write_to() now has a flush() method.
599 * ZstdCompressor.compressobj()'s flush() method now accepts an argument to
629 * ZstdCompressor.compressobj()'s flush() method now accepts an argument to
600 flush a block (as opposed to ending the stream).
630 flush a block (as opposed to ending the stream).
601 * Disallow compress(b'') when writing content sizes by default (issue #11).
631 * Disallow compress(b'') when writing content sizes by default (issue #11).
602
632
603 0.5.2 (released 2016-11-12)
633 0.5.2 (released 2016-11-12)
604 ===========================
634 ===========================
605
635
606 * more packaging fixes for source distribution
636 * more packaging fixes for source distribution
607
637
608 0.5.1 (released 2016-11-12)
638 0.5.1 (released 2016-11-12)
609 ===========================
639 ===========================
610
640
611 * setup_zstd.py is included in the source distribution
641 * setup_zstd.py is included in the source distribution
612
642
613 0.5.0 (released 2016-11-10)
643 0.5.0 (released 2016-11-10)
614 ===========================
644 ===========================
615
645
616 * Vendored version of zstd updated to 1.1.1.
646 * Vendored version of zstd updated to 1.1.1.
617 * Continuous integration for Python 3.6 and 3.7
647 * Continuous integration for Python 3.6 and 3.7
618 * Continuous integration for Conda
648 * Continuous integration for Conda
619 * Added compression and decompression APIs providing similar interfaces
649 * Added compression and decompression APIs providing similar interfaces
620 to the standard library ``zlib`` and ``bz2`` modules. This allows
650 to the standard library ``zlib`` and ``bz2`` modules. This allows
621 coding to a common interface.
651 coding to a common interface.
622 * ``zstd.__version__` is now defined.
652 * ``zstd.__version__` is now defined.
623 * ``read_from()`` on various APIs now accepts objects implementing the buffer
653 * ``read_from()`` on various APIs now accepts objects implementing the buffer
624 protocol.
654 protocol.
625 * ``read_from()`` has gained a ``skip_bytes`` argument. This allows callers
655 * ``read_from()`` has gained a ``skip_bytes`` argument. This allows callers
626 to pass in an existing buffer with a header without having to create a
656 to pass in an existing buffer with a header without having to create a
627 slice or a new object.
657 slice or a new object.
628 * Implemented ``ZstdCompressionDict.as_bytes()``.
658 * Implemented ``ZstdCompressionDict.as_bytes()``.
629 * Python's memory allocator is now used instead of ``malloc()``.
659 * Python's memory allocator is now used instead of ``malloc()``.
630 * Low-level zstd data structures are reused in more instances, cutting down
660 * Low-level zstd data structures are reused in more instances, cutting down
631 on overhead for certain operations.
661 on overhead for certain operations.
632 * ``distutils`` boilerplate for obtaining an ``Extension`` instance
662 * ``distutils`` boilerplate for obtaining an ``Extension`` instance
633 has now been refactored into a standalone ``setup_zstd.py`` file. This
663 has now been refactored into a standalone ``setup_zstd.py`` file. This
634 allows other projects with ``setup.py`` files to reuse the
664 allows other projects with ``setup.py`` files to reuse the
635 ``distutils`` code for this project without copying code.
665 ``distutils`` code for this project without copying code.
636 * The monolithic ``zstd.c`` file has been split into a header file defining
666 * The monolithic ``zstd.c`` file has been split into a header file defining
637 types and separate ``.c`` source files for the implementation.
667 types and separate ``.c`` source files for the implementation.
638
668
639 Older History
669 Older History
640 =============
670 =============
641
671
642 2016-08-31 - Zstandard 1.0.0 is released and Gregory starts hacking on a
672 2016-08-31 - Zstandard 1.0.0 is released and Gregory starts hacking on a
643 Python extension for use by the Mercurial project. A very hacky prototype
673 Python extension for use by the Mercurial project. A very hacky prototype
644 is sent to the mercurial-devel list for RFC.
674 is sent to the mercurial-devel list for RFC.
645
675
646 2016-09-03 - Most functionality from Zstandard C API implemented. Source
676 2016-09-03 - Most functionality from Zstandard C API implemented. Source
647 code published on https://github.com/indygreg/python-zstandard. Travis-CI
677 code published on https://github.com/indygreg/python-zstandard. Travis-CI
648 automation configured. 0.0.1 release on PyPI.
678 automation configured. 0.0.1 release on PyPI.
649
679
650 2016-09-05 - After the API was rounded out a bit and support for Python
680 2016-09-05 - After the API was rounded out a bit and support for Python
651 2.6 and 2.7 was added, version 0.1 was released to PyPI.
681 2.6 and 2.7 was added, version 0.1 was released to PyPI.
652
682
653 2016-09-05 - After the compressor and decompressor APIs were changed, 0.2
683 2016-09-05 - After the compressor and decompressor APIs were changed, 0.2
654 was released to PyPI.
684 was released to PyPI.
655
685
656 2016-09-10 - 0.3 is released with a bunch of new features. ZstdCompressor
686 2016-09-10 - 0.3 is released with a bunch of new features. ZstdCompressor
657 now accepts arguments controlling frame parameters. The source size can now
687 now accepts arguments controlling frame parameters. The source size can now
658 be declared when performing streaming compression. ZstdDecompressor.decompress()
688 be declared when performing streaming compression. ZstdDecompressor.decompress()
659 is implemented. Compression dictionaries are now cached when using the simple
689 is implemented. Compression dictionaries are now cached when using the simple
660 compression and decompression APIs. Memory size APIs added.
690 compression and decompression APIs. Memory size APIs added.
661 ZstdCompressor.read_from() and ZstdDecompressor.read_from() have been
691 ZstdCompressor.read_from() and ZstdDecompressor.read_from() have been
662 implemented. This rounds out the major compression/decompression APIs planned
692 implemented. This rounds out the major compression/decompression APIs planned
663 by the author.
693 by the author.
664
694
665 2016-10-02 - 0.3.3 is released with a bug fix for read_from not fully
695 2016-10-02 - 0.3.3 is released with a bug fix for read_from not fully
666 decoding a zstd frame (issue #2).
696 decoding a zstd frame (issue #2).
667
697
668 2016-10-02 - 0.4.0 is released with zstd 1.1.0, support for custom read and
698 2016-10-02 - 0.4.0 is released with zstd 1.1.0, support for custom read and
669 write buffer sizes, and a few bug fixes involving failure to read/write
699 write buffer sizes, and a few bug fixes involving failure to read/write
670 all data when buffer sizes were too small to hold remaining data.
700 all data when buffer sizes were too small to hold remaining data.
671
701
672 2016-11-10 - 0.5.0 is released with zstd 1.1.1 and other enhancements.
702 2016-11-10 - 0.5.0 is released with zstd 1.1.1 and other enhancements.
@@ -1,1606 +1,1602 b''
1 ================
1 ================
2 python-zstandard
2 python-zstandard
3 ================
3 ================
4
4
5 This project provides Python bindings for interfacing with the
5 This project provides Python bindings for interfacing with the
6 `Zstandard <http://www.zstd.net>`_ compression library. A C extension
6 `Zstandard <http://www.zstd.net>`_ compression library. A C extension
7 and CFFI interface are provided.
7 and CFFI interface are provided.
8
8
9 The primary goal of the project is to provide a rich interface to the
9 The primary goal of the project is to provide a rich interface to the
10 underlying C API through a Pythonic interface while not sacrificing
10 underlying C API through a Pythonic interface while not sacrificing
11 performance. This means exposing most of the features and flexibility
11 performance. This means exposing most of the features and flexibility
12 of the C API while not sacrificing usability or safety that Python provides.
12 of the C API while not sacrificing usability or safety that Python provides.
13
13
14 The canonical home for this project lives in a Mercurial repository run by
14 The canonical home for this project lives in a Mercurial repository run by
15 the author. For convenience, that repository is frequently synchronized to
15 the author. For convenience, that repository is frequently synchronized to
16 https://github.com/indygreg/python-zstandard.
16 https://github.com/indygreg/python-zstandard.
17
17
18 | |ci-status| |win-ci-status|
18 | |ci-status|
19
19
20 Requirements
20 Requirements
21 ============
21 ============
22
22
23 This extension is designed to run with Python 2.7, 3.4, 3.5, 3.6, and 3.7
23 This extension is designed to run with Python 2.7, 3.4, 3.5, 3.6, and 3.7
24 on common platforms (Linux, Windows, and OS X). On PyPy (both PyPy2 and PyPy3) we support version 6.0.0 and above.
24 on common platforms (Linux, Windows, and OS X). On PyPy (both PyPy2 and PyPy3) we support version 6.0.0 and above.
25 x86 and x86_64 are well-tested on Windows. Only x86_64 is well-tested on Linux and macOS.
25 x86 and x86_64 are well-tested on Windows. Only x86_64 is well-tested on Linux and macOS.
26
26
27 Installing
27 Installing
28 ==========
28 ==========
29
29
30 This package is uploaded to PyPI at https://pypi.python.org/pypi/zstandard.
30 This package is uploaded to PyPI at https://pypi.python.org/pypi/zstandard.
31 So, to install this package::
31 So, to install this package::
32
32
33 $ pip install zstandard
33 $ pip install zstandard
34
34
35 Binary wheels are made available for some platforms. If you need to
35 Binary wheels are made available for some platforms. If you need to
36 install from a source distribution, all you should need is a working C
36 install from a source distribution, all you should need is a working C
37 compiler and the Python development headers/libraries. On many Linux
37 compiler and the Python development headers/libraries. On many Linux
38 distributions, you can install a ``python-dev`` or ``python-devel``
38 distributions, you can install a ``python-dev`` or ``python-devel``
39 package to provide these dependencies.
39 package to provide these dependencies.
40
40
41 Packages are also uploaded to Anaconda Cloud at
41 Packages are also uploaded to Anaconda Cloud at
42 https://anaconda.org/indygreg/zstandard. See that URL for how to install
42 https://anaconda.org/indygreg/zstandard. See that URL for how to install
43 this package with ``conda``.
43 this package with ``conda``.
44
44
45 Performance
45 Performance
46 ===========
46 ===========
47
47
48 zstandard is a highly tunable compression algorithm. In its default settings
48 zstandard is a highly tunable compression algorithm. In its default settings
49 (compression level 3), it will be faster at compression and decompression and
49 (compression level 3), it will be faster at compression and decompression and
50 will have better compression ratios than zlib on most data sets. When tuned
50 will have better compression ratios than zlib on most data sets. When tuned
51 for speed, it approaches lz4's speed and ratios. When tuned for compression
51 for speed, it approaches lz4's speed and ratios. When tuned for compression
52 ratio, it approaches lzma ratios and compression speed, but decompression
52 ratio, it approaches lzma ratios and compression speed, but decompression
53 speed is much faster. See the official zstandard documentation for more.
53 speed is much faster. See the official zstandard documentation for more.
54
54
55 zstandard and this library support multi-threaded compression. There is a
55 zstandard and this library support multi-threaded compression. There is a
56 mechanism to compress large inputs using multiple threads.
56 mechanism to compress large inputs using multiple threads.
57
57
58 The performance of this library is usually very similar to what the zstandard
58 The performance of this library is usually very similar to what the zstandard
59 C API can deliver. Overhead in this library is due to general Python overhead
59 C API can deliver. Overhead in this library is due to general Python overhead
60 and can't easily be avoided by *any* zstandard Python binding. This library
60 and can't easily be avoided by *any* zstandard Python binding. This library
61 exposes multiple APIs for performing compression and decompression so callers
61 exposes multiple APIs for performing compression and decompression so callers
62 can pick an API suitable for their need. Contrast with the compression
62 can pick an API suitable for their need. Contrast with the compression
63 modules in Python's standard library (like ``zlib``), which only offer limited
63 modules in Python's standard library (like ``zlib``), which only offer limited
64 mechanisms for performing operations. The API flexibility means consumers can
64 mechanisms for performing operations. The API flexibility means consumers can
65 choose to use APIs that facilitate zero copying or minimize Python object
65 choose to use APIs that facilitate zero copying or minimize Python object
66 creation and garbage collection overhead.
66 creation and garbage collection overhead.
67
67
68 This library is capable of single-threaded throughputs well over 1 GB/s. For
68 This library is capable of single-threaded throughputs well over 1 GB/s. For
69 exact numbers, measure yourself. The source code repository has a ``bench.py``
69 exact numbers, measure yourself. The source code repository has a ``bench.py``
70 script that can be used to measure things.
70 script that can be used to measure things.
71
71
72 API
72 API
73 ===
73 ===
74
74
75 To interface with Zstandard, simply import the ``zstandard`` module::
75 To interface with Zstandard, simply import the ``zstandard`` module::
76
76
77 import zstandard
77 import zstandard
78
78
79 It is a popular convention to alias the module as a different name for
79 It is a popular convention to alias the module as a different name for
80 brevity::
80 brevity::
81
81
82 import zstandard as zstd
82 import zstandard as zstd
83
83
84 This module attempts to import and use either the C extension or CFFI
84 This module attempts to import and use either the C extension or CFFI
85 implementation. On Python platforms known to support C extensions (like
85 implementation. On Python platforms known to support C extensions (like
86 CPython), it raises an ImportError if the C extension cannot be imported.
86 CPython), it raises an ImportError if the C extension cannot be imported.
87 On Python platforms known to not support C extensions (like PyPy), it only
87 On Python platforms known to not support C extensions (like PyPy), it only
88 attempts to import the CFFI implementation and raises ImportError if that
88 attempts to import the CFFI implementation and raises ImportError if that
89 can't be done. On other platforms, it first tries to import the C extension
89 can't be done. On other platforms, it first tries to import the C extension
90 then falls back to CFFI if that fails and raises ImportError if CFFI fails.
90 then falls back to CFFI if that fails and raises ImportError if CFFI fails.
91
91
92 To change the module import behavior, a ``PYTHON_ZSTANDARD_IMPORT_POLICY``
92 To change the module import behavior, a ``PYTHON_ZSTANDARD_IMPORT_POLICY``
93 environment variable can be set. The following values are accepted:
93 environment variable can be set. The following values are accepted:
94
94
95 default
95 default
96 The behavior described above.
96 The behavior described above.
97 cffi_fallback
97 cffi_fallback
98 Always try to import the C extension then fall back to CFFI if that
98 Always try to import the C extension then fall back to CFFI if that
99 fails.
99 fails.
100 cext
100 cext
101 Only attempt to import the C extension.
101 Only attempt to import the C extension.
102 cffi
102 cffi
103 Only attempt to import the CFFI implementation.
103 Only attempt to import the CFFI implementation.
104
104
105 In addition, the ``zstandard`` module exports a ``backend`` attribute
105 In addition, the ``zstandard`` module exports a ``backend`` attribute
106 containing the string name of the backend being used. It will be one
106 containing the string name of the backend being used. It will be one
107 of ``cext`` or ``cffi`` (for *C extension* and *cffi*, respectively).
107 of ``cext`` or ``cffi`` (for *C extension* and *cffi*, respectively).
108
108
109 The types, functions, and attributes exposed by the ``zstandard`` module
109 The types, functions, and attributes exposed by the ``zstandard`` module
110 are documented in the sections below.
110 are documented in the sections below.
111
111
112 .. note::
112 .. note::
113
113
114 The documentation in this section makes references to various zstd
114 The documentation in this section makes references to various zstd
115 concepts and functionality. The source repository contains a
115 concepts and functionality. The source repository contains a
116 ``docs/concepts.rst`` file explaining these in more detail.
116 ``docs/concepts.rst`` file explaining these in more detail.
117
117
118 ZstdCompressor
118 ZstdCompressor
119 --------------
119 --------------
120
120
121 The ``ZstdCompressor`` class provides an interface for performing
121 The ``ZstdCompressor`` class provides an interface for performing
122 compression operations. Each instance is essentially a wrapper around a
122 compression operations. Each instance is essentially a wrapper around a
123 ``ZSTD_CCtx`` from the C API.
123 ``ZSTD_CCtx`` from the C API.
124
124
125 Each instance is associated with parameters that control compression
125 Each instance is associated with parameters that control compression
126 behavior. These come from the following named arguments (all optional):
126 behavior. These come from the following named arguments (all optional):
127
127
128 level
128 level
129 Integer compression level. Valid values are between 1 and 22.
129 Integer compression level. Valid values are between 1 and 22.
130 dict_data
130 dict_data
131 Compression dictionary to use.
131 Compression dictionary to use.
132
132
133 Note: When using dictionary data and ``compress()`` is called multiple
133 Note: When using dictionary data and ``compress()`` is called multiple
134 times, the ``ZstdCompressionParameters`` derived from an integer
134 times, the ``ZstdCompressionParameters`` derived from an integer
135 compression ``level`` and the first compressed data's size will be reused
135 compression ``level`` and the first compressed data's size will be reused
136 for all subsequent operations. This may not be desirable if source data
136 for all subsequent operations. This may not be desirable if source data
137 size varies significantly.
137 size varies significantly.
138 compression_params
138 compression_params
139 A ``ZstdCompressionParameters`` instance defining compression settings.
139 A ``ZstdCompressionParameters`` instance defining compression settings.
140 write_checksum
140 write_checksum
141 Whether a 4 byte checksum should be written with the compressed data.
141 Whether a 4 byte checksum should be written with the compressed data.
142 Defaults to False. If True, the decompressor can verify that decompressed
142 Defaults to False. If True, the decompressor can verify that decompressed
143 data matches the original input data.
143 data matches the original input data.
144 write_content_size
144 write_content_size
145 Whether the size of the uncompressed data will be written into the
145 Whether the size of the uncompressed data will be written into the
146 header of compressed data. Defaults to True. The data will only be
146 header of compressed data. Defaults to True. The data will only be
147 written if the compressor knows the size of the input data. This is
147 written if the compressor knows the size of the input data. This is
148 often not true for streaming compression.
148 often not true for streaming compression.
149 write_dict_id
149 write_dict_id
150 Whether to write the dictionary ID into the compressed data.
150 Whether to write the dictionary ID into the compressed data.
151 Defaults to True. The dictionary ID is only written if a dictionary
151 Defaults to True. The dictionary ID is only written if a dictionary
152 is being used.
152 is being used.
153 threads
153 threads
154 Enables and sets the number of threads to use for multi-threaded compression
154 Enables and sets the number of threads to use for multi-threaded compression
155 operations. Defaults to 0, which means to use single-threaded compression.
155 operations. Defaults to 0, which means to use single-threaded compression.
156 Negative values will resolve to the number of logical CPUs in the system.
156 Negative values will resolve to the number of logical CPUs in the system.
157 Read below for more info on multi-threaded compression. This argument only
157 Read below for more info on multi-threaded compression. This argument only
158 controls thread count for operations that operate on individual pieces of
158 controls thread count for operations that operate on individual pieces of
159 data. APIs that spawn multiple threads for working on multiple pieces of
159 data. APIs that spawn multiple threads for working on multiple pieces of
160 data have their own ``threads`` argument.
160 data have their own ``threads`` argument.
161
161
162 ``compression_params`` is mutually exclusive with ``level``, ``write_checksum``,
162 ``compression_params`` is mutually exclusive with ``level``, ``write_checksum``,
163 ``write_content_size``, ``write_dict_id``, and ``threads``.
163 ``write_content_size``, ``write_dict_id``, and ``threads``.
164
164
165 Unless specified otherwise, assume that no two methods of ``ZstdCompressor``
165 Unless specified otherwise, assume that no two methods of ``ZstdCompressor``
166 instances can be called from multiple Python threads simultaneously. In other
166 instances can be called from multiple Python threads simultaneously. In other
167 words, assume instances are not thread safe unless stated otherwise.
167 words, assume instances are not thread safe unless stated otherwise.
168
168
169 Utility Methods
169 Utility Methods
170 ^^^^^^^^^^^^^^^
170 ^^^^^^^^^^^^^^^
171
171
172 ``frame_progression()`` returns a 3-tuple containing the number of bytes
172 ``frame_progression()`` returns a 3-tuple containing the number of bytes
173 ingested, consumed, and produced by the current compression operation.
173 ingested, consumed, and produced by the current compression operation.
174
174
175 ``memory_size()`` obtains the memory utilization of the underlying zstd
175 ``memory_size()`` obtains the memory utilization of the underlying zstd
176 compression context, in bytes.::
176 compression context, in bytes.::
177
177
178 cctx = zstd.ZstdCompressor()
178 cctx = zstd.ZstdCompressor()
179 memory = cctx.memory_size()
179 memory = cctx.memory_size()
180
180
181 Simple API
181 Simple API
182 ^^^^^^^^^^
182 ^^^^^^^^^^
183
183
184 ``compress(data)`` compresses and returns data as a one-shot operation.::
184 ``compress(data)`` compresses and returns data as a one-shot operation.::
185
185
186 cctx = zstd.ZstdCompressor()
186 cctx = zstd.ZstdCompressor()
187 compressed = cctx.compress(b'data to compress')
187 compressed = cctx.compress(b'data to compress')
188
188
189 The ``data`` argument can be any object that implements the *buffer protocol*.
189 The ``data`` argument can be any object that implements the *buffer protocol*.
190
190
191 Stream Reader API
191 Stream Reader API
192 ^^^^^^^^^^^^^^^^^
192 ^^^^^^^^^^^^^^^^^
193
193
194 ``stream_reader(source)`` can be used to obtain an object conforming to the
194 ``stream_reader(source)`` can be used to obtain an object conforming to the
195 ``io.RawIOBase`` interface for reading compressed output as a stream::
195 ``io.RawIOBase`` interface for reading compressed output as a stream::
196
196
197 with open(path, 'rb') as fh:
197 with open(path, 'rb') as fh:
198 cctx = zstd.ZstdCompressor()
198 cctx = zstd.ZstdCompressor()
199 reader = cctx.stream_reader(fh)
199 reader = cctx.stream_reader(fh)
200 while True:
200 while True:
201 chunk = reader.read(16384)
201 chunk = reader.read(16384)
202 if not chunk:
202 if not chunk:
203 break
203 break
204
204
205 # Do something with compressed chunk.
205 # Do something with compressed chunk.
206
206
207 Instances can also be used as context managers::
207 Instances can also be used as context managers::
208
208
209 with open(path, 'rb') as fh:
209 with open(path, 'rb') as fh:
210 with cctx.stream_reader(fh) as reader:
210 with cctx.stream_reader(fh) as reader:
211 while True:
211 while True:
212 chunk = reader.read(16384)
212 chunk = reader.read(16384)
213 if not chunk:
213 if not chunk:
214 break
214 break
215
215
216 # Do something with compressed chunk.
216 # Do something with compressed chunk.
217
217
218 When the context manager exits or ``close()`` is called, the stream is closed,
218 When the context manager exits or ``close()`` is called, the stream is closed,
219 underlying resources are released, and future operations against the compression
219 underlying resources are released, and future operations against the compression
220 stream will fail.
220 stream will fail.
221
221
222 The ``source`` argument to ``stream_reader()`` can be any object with a
222 The ``source`` argument to ``stream_reader()`` can be any object with a
223 ``read(size)`` method or any object implementing the *buffer protocol*.
223 ``read(size)`` method or any object implementing the *buffer protocol*.
224
224
225 ``stream_reader()`` accepts a ``size`` argument specifying how large the input
225 ``stream_reader()`` accepts a ``size`` argument specifying how large the input
226 stream is. This is used to adjust compression parameters so they are
226 stream is. This is used to adjust compression parameters so they are
227 tailored to the source size.::
227 tailored to the source size.::
228
228
229 with open(path, 'rb') as fh:
229 with open(path, 'rb') as fh:
230 cctx = zstd.ZstdCompressor()
230 cctx = zstd.ZstdCompressor()
231 with cctx.stream_reader(fh, size=os.stat(path).st_size) as reader:
231 with cctx.stream_reader(fh, size=os.stat(path).st_size) as reader:
232 ...
232 ...
233
233
234 If the ``source`` is a stream, you can specify how large ``read()`` requests
234 If the ``source`` is a stream, you can specify how large ``read()`` requests
235 to that stream should be via the ``read_size`` argument. It defaults to
235 to that stream should be via the ``read_size`` argument. It defaults to
236 ``zstandard.COMPRESSION_RECOMMENDED_INPUT_SIZE``.::
236 ``zstandard.COMPRESSION_RECOMMENDED_INPUT_SIZE``.::
237
237
238 with open(path, 'rb') as fh:
238 with open(path, 'rb') as fh:
239 cctx = zstd.ZstdCompressor()
239 cctx = zstd.ZstdCompressor()
240 # Will perform fh.read(8192) when obtaining data to feed into the
240 # Will perform fh.read(8192) when obtaining data to feed into the
241 # compressor.
241 # compressor.
242 with cctx.stream_reader(fh, read_size=8192) as reader:
242 with cctx.stream_reader(fh, read_size=8192) as reader:
243 ...
243 ...
244
244
245 The stream returned by ``stream_reader()`` is neither writable nor seekable
245 The stream returned by ``stream_reader()`` is neither writable nor seekable
246 (even if the underlying source is seekable). ``readline()`` and
246 (even if the underlying source is seekable). ``readline()`` and
247 ``readlines()`` are not implemented because they don't make sense for
247 ``readlines()`` are not implemented because they don't make sense for
248 compressed data. ``tell()`` returns the number of compressed bytes
248 compressed data. ``tell()`` returns the number of compressed bytes
249 emitted so far.
249 emitted so far.
250
250
251 Streaming Input API
251 Streaming Input API
252 ^^^^^^^^^^^^^^^^^^^
252 ^^^^^^^^^^^^^^^^^^^
253
253
254 ``stream_writer(fh)`` allows you to *stream* data into a compressor.
254 ``stream_writer(fh)`` allows you to *stream* data into a compressor.
255
255
256 Returned instances implement the ``io.RawIOBase`` interface. Only methods
256 Returned instances implement the ``io.RawIOBase`` interface. Only methods
257 that involve writing will do useful things.
257 that involve writing will do useful things.
258
258
259 The argument to ``stream_writer()`` must have a ``write(data)`` method. As
259 The argument to ``stream_writer()`` must have a ``write(data)`` method. As
260 compressed data is available, ``write()`` will be called with the compressed
260 compressed data is available, ``write()`` will be called with the compressed
261 data as its argument. Many common Python types implement ``write()``, including
261 data as its argument. Many common Python types implement ``write()``, including
262 open file handles and ``io.BytesIO``.
262 open file handles and ``io.BytesIO``.
263
263
264 The ``write(data)`` method is used to feed data into the compressor.
264 The ``write(data)`` method is used to feed data into the compressor.
265
265
266 The ``flush([flush_mode=FLUSH_BLOCK])`` method can be called to evict whatever
266 The ``flush([flush_mode=FLUSH_BLOCK])`` method can be called to evict whatever
267 data remains within the compressor's internal state into the output object. This
267 data remains within the compressor's internal state into the output object. This
268 may result in 0 or more ``write()`` calls to the output object. This method
268 may result in 0 or more ``write()`` calls to the output object. This method
269 accepts an optional ``flush_mode`` argument to control the flushing behavior.
269 accepts an optional ``flush_mode`` argument to control the flushing behavior.
270 Its value can be any of the ``FLUSH_*`` constants.
270 Its value can be any of the ``FLUSH_*`` constants.
271
271
272 Both ``write()`` and ``flush()`` return the number of bytes written to the
272 Both ``write()`` and ``flush()`` return the number of bytes written to the
273 object's ``write()``. In many cases, small inputs do not accumulate enough
273 object's ``write()``. In many cases, small inputs do not accumulate enough
274 data to cause a write and ``write()`` will return ``0``.
274 data to cause a write and ``write()`` will return ``0``.
275
275
276 Calling ``close()`` will mark the stream as closed and subsequent I/O
276 Calling ``close()`` will mark the stream as closed and subsequent I/O
277 operations will raise ``ValueError`` (per the documented behavior of
277 operations will raise ``ValueError`` (per the documented behavior of
278 ``io.RawIOBase``). ``close()`` will also call ``close()`` on the underlying
278 ``io.RawIOBase``). ``close()`` will also call ``close()`` on the underlying
279 stream if such a method exists.
279 stream if such a method exists.
280
280
281 Typically usage is as follows::
281 Typically usage is as follows::
282
282
283 cctx = zstd.ZstdCompressor(level=10)
283 cctx = zstd.ZstdCompressor(level=10)
284 compressor = cctx.stream_writer(fh)
284 compressor = cctx.stream_writer(fh)
285
285
286 compressor.write(b'chunk 0\n')
286 compressor.write(b'chunk 0\n')
287 compressor.write(b'chunk 1\n')
287 compressor.write(b'chunk 1\n')
288 compressor.flush()
288 compressor.flush()
289 # Receiver will be able to decode ``chunk 0\nchunk 1\n`` at this point.
289 # Receiver will be able to decode ``chunk 0\nchunk 1\n`` at this point.
290 # Receiver is also expecting more data in the zstd *frame*.
290 # Receiver is also expecting more data in the zstd *frame*.
291
291
292 compressor.write(b'chunk 2\n')
292 compressor.write(b'chunk 2\n')
293 compressor.flush(zstd.FLUSH_FRAME)
293 compressor.flush(zstd.FLUSH_FRAME)
294 # Receiver will be able to decode ``chunk 0\nchunk 1\nchunk 2``.
294 # Receiver will be able to decode ``chunk 0\nchunk 1\nchunk 2``.
295 # Receiver is expecting no more data, as the zstd frame is closed.
295 # Receiver is expecting no more data, as the zstd frame is closed.
296 # Any future calls to ``write()`` at this point will construct a new
296 # Any future calls to ``write()`` at this point will construct a new
297 # zstd frame.
297 # zstd frame.
298
298
299 Instances can be used as context managers. Exiting the context manager is
299 Instances can be used as context managers. Exiting the context manager is
300 the equivalent of calling ``close()``, which is equivalent to calling
300 the equivalent of calling ``close()``, which is equivalent to calling
301 ``flush(zstd.FLUSH_FRAME)``::
301 ``flush(zstd.FLUSH_FRAME)``::
302
302
303 cctx = zstd.ZstdCompressor(level=10)
303 cctx = zstd.ZstdCompressor(level=10)
304 with cctx.stream_writer(fh) as compressor:
304 with cctx.stream_writer(fh) as compressor:
305 compressor.write(b'chunk 0')
305 compressor.write(b'chunk 0')
306 compressor.write(b'chunk 1')
306 compressor.write(b'chunk 1')
307 ...
307 ...
308
308
309 .. important::
309 .. important::
310
310
311 If ``flush(FLUSH_FRAME)`` is not called, emitted data doesn't constitute
311 If ``flush(FLUSH_FRAME)`` is not called, emitted data doesn't constitute
312 a full zstd *frame* and consumers of this data may complain about malformed
312 a full zstd *frame* and consumers of this data may complain about malformed
313 input. It is recommended to use instances as a context manager to ensure
313 input. It is recommended to use instances as a context manager to ensure
314 *frames* are properly finished.
314 *frames* are properly finished.
315
315
316 If the size of the data being fed to this streaming compressor is known,
316 If the size of the data being fed to this streaming compressor is known,
317 you can declare it before compression begins::
317 you can declare it before compression begins::
318
318
319 cctx = zstd.ZstdCompressor()
319 cctx = zstd.ZstdCompressor()
320 with cctx.stream_writer(fh, size=data_len) as compressor:
320 with cctx.stream_writer(fh, size=data_len) as compressor:
321 compressor.write(chunk0)
321 compressor.write(chunk0)
322 compressor.write(chunk1)
322 compressor.write(chunk1)
323 ...
323 ...
324
324
325 Declaring the size of the source data allows compression parameters to
325 Declaring the size of the source data allows compression parameters to
326 be tuned. And if ``write_content_size`` is used, it also results in the
326 be tuned. And if ``write_content_size`` is used, it also results in the
327 content size being written into the frame header of the output data.
327 content size being written into the frame header of the output data.
328
328
329 The size of chunks being ``write()`` to the destination can be specified::
329 The size of chunks being ``write()`` to the destination can be specified::
330
330
331 cctx = zstd.ZstdCompressor()
331 cctx = zstd.ZstdCompressor()
332 with cctx.stream_writer(fh, write_size=32768) as compressor:
332 with cctx.stream_writer(fh, write_size=32768) as compressor:
333 ...
333 ...
334
334
335 To see how much memory is being used by the streaming compressor::
335 To see how much memory is being used by the streaming compressor::
336
336
337 cctx = zstd.ZstdCompressor()
337 cctx = zstd.ZstdCompressor()
338 with cctx.stream_writer(fh) as compressor:
338 with cctx.stream_writer(fh) as compressor:
339 ...
339 ...
340 byte_size = compressor.memory_size()
340 byte_size = compressor.memory_size()
341
341
342 Thte total number of bytes written so far are exposed via ``tell()``::
342 Thte total number of bytes written so far are exposed via ``tell()``::
343
343
344 cctx = zstd.ZstdCompressor()
344 cctx = zstd.ZstdCompressor()
345 with cctx.stream_writer(fh) as compressor:
345 with cctx.stream_writer(fh) as compressor:
346 ...
346 ...
347 total_written = compressor.tell()
347 total_written = compressor.tell()
348
348
349 ``stream_writer()`` accepts a ``write_return_read`` boolean argument to control
349 ``stream_writer()`` accepts a ``write_return_read`` boolean argument to control
350 the return value of ``write()``. When ``False`` (the default), ``write()`` returns
350 the return value of ``write()``. When ``False`` (the default), ``write()`` returns
351 the number of bytes that were ``write()``en to the underlying object. When
351 the number of bytes that were ``write()``en to the underlying object. When
352 ``True``, ``write()`` returns the number of bytes read from the input that
352 ``True``, ``write()`` returns the number of bytes read from the input that
353 were subsequently written to the compressor. ``True`` is the *proper* behavior
353 were subsequently written to the compressor. ``True`` is the *proper* behavior
354 for ``write()`` as specified by the ``io.RawIOBase`` interface and will become
354 for ``write()`` as specified by the ``io.RawIOBase`` interface and will become
355 the default value in a future release.
355 the default value in a future release.
356
356
357 Streaming Output API
357 Streaming Output API
358 ^^^^^^^^^^^^^^^^^^^^
358 ^^^^^^^^^^^^^^^^^^^^
359
359
360 ``read_to_iter(reader)`` provides a mechanism to stream data out of a
360 ``read_to_iter(reader)`` provides a mechanism to stream data out of a
361 compressor as an iterator of data chunks.::
361 compressor as an iterator of data chunks.::
362
362
363 cctx = zstd.ZstdCompressor()
363 cctx = zstd.ZstdCompressor()
364 for chunk in cctx.read_to_iter(fh):
364 for chunk in cctx.read_to_iter(fh):
365 # Do something with emitted data.
365 # Do something with emitted data.
366
366
367 ``read_to_iter()`` accepts an object that has a ``read(size)`` method or
367 ``read_to_iter()`` accepts an object that has a ``read(size)`` method or
368 conforms to the buffer protocol.
368 conforms to the buffer protocol.
369
369
370 Uncompressed data is fetched from the source either by calling ``read(size)``
370 Uncompressed data is fetched from the source either by calling ``read(size)``
371 or by fetching a slice of data from the object directly (in the case where
371 or by fetching a slice of data from the object directly (in the case where
372 the buffer protocol is being used). The returned iterator consists of chunks
372 the buffer protocol is being used). The returned iterator consists of chunks
373 of compressed data.
373 of compressed data.
374
374
375 If reading from the source via ``read()``, ``read()`` will be called until
375 If reading from the source via ``read()``, ``read()`` will be called until
376 it raises or returns an empty bytes (``b''``). It is perfectly valid for
376 it raises or returns an empty bytes (``b''``). It is perfectly valid for
377 the source to deliver fewer bytes than were what requested by ``read(size)``.
377 the source to deliver fewer bytes than were what requested by ``read(size)``.
378
378
379 Like ``stream_writer()``, ``read_to_iter()`` also accepts a ``size`` argument
379 Like ``stream_writer()``, ``read_to_iter()`` also accepts a ``size`` argument
380 declaring the size of the input stream::
380 declaring the size of the input stream::
381
381
382 cctx = zstd.ZstdCompressor()
382 cctx = zstd.ZstdCompressor()
383 for chunk in cctx.read_to_iter(fh, size=some_int):
383 for chunk in cctx.read_to_iter(fh, size=some_int):
384 pass
384 pass
385
385
386 You can also control the size that data is ``read()`` from the source and
386 You can also control the size that data is ``read()`` from the source and
387 the ideal size of output chunks::
387 the ideal size of output chunks::
388
388
389 cctx = zstd.ZstdCompressor()
389 cctx = zstd.ZstdCompressor()
390 for chunk in cctx.read_to_iter(fh, read_size=16384, write_size=8192):
390 for chunk in cctx.read_to_iter(fh, read_size=16384, write_size=8192):
391 pass
391 pass
392
392
393 Unlike ``stream_writer()``, ``read_to_iter()`` does not give direct control
393 Unlike ``stream_writer()``, ``read_to_iter()`` does not give direct control
394 over the sizes of chunks fed into the compressor. Instead, chunk sizes will
394 over the sizes of chunks fed into the compressor. Instead, chunk sizes will
395 be whatever the object being read from delivers. These will often be of a
395 be whatever the object being read from delivers. These will often be of a
396 uniform size.
396 uniform size.
397
397
398 Stream Copying API
398 Stream Copying API
399 ^^^^^^^^^^^^^^^^^^
399 ^^^^^^^^^^^^^^^^^^
400
400
401 ``copy_stream(ifh, ofh)`` can be used to copy data between 2 streams while
401 ``copy_stream(ifh, ofh)`` can be used to copy data between 2 streams while
402 compressing it.::
402 compressing it.::
403
403
404 cctx = zstd.ZstdCompressor()
404 cctx = zstd.ZstdCompressor()
405 cctx.copy_stream(ifh, ofh)
405 cctx.copy_stream(ifh, ofh)
406
406
407 For example, say you wish to compress a file::
407 For example, say you wish to compress a file::
408
408
409 cctx = zstd.ZstdCompressor()
409 cctx = zstd.ZstdCompressor()
410 with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
410 with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
411 cctx.copy_stream(ifh, ofh)
411 cctx.copy_stream(ifh, ofh)
412
412
413 It is also possible to declare the size of the source stream::
413 It is also possible to declare the size of the source stream::
414
414
415 cctx = zstd.ZstdCompressor()
415 cctx = zstd.ZstdCompressor()
416 cctx.copy_stream(ifh, ofh, size=len_of_input)
416 cctx.copy_stream(ifh, ofh, size=len_of_input)
417
417
418 You can also specify how large the chunks that are ``read()`` and ``write()``
418 You can also specify how large the chunks that are ``read()`` and ``write()``
419 from and to the streams::
419 from and to the streams::
420
420
421 cctx = zstd.ZstdCompressor()
421 cctx = zstd.ZstdCompressor()
422 cctx.copy_stream(ifh, ofh, read_size=32768, write_size=16384)
422 cctx.copy_stream(ifh, ofh, read_size=32768, write_size=16384)
423
423
424 The stream copier returns a 2-tuple of bytes read and written::
424 The stream copier returns a 2-tuple of bytes read and written::
425
425
426 cctx = zstd.ZstdCompressor()
426 cctx = zstd.ZstdCompressor()
427 read_count, write_count = cctx.copy_stream(ifh, ofh)
427 read_count, write_count = cctx.copy_stream(ifh, ofh)
428
428
429 Compressor API
429 Compressor API
430 ^^^^^^^^^^^^^^
430 ^^^^^^^^^^^^^^
431
431
432 ``compressobj()`` returns an object that exposes ``compress(data)`` and
432 ``compressobj()`` returns an object that exposes ``compress(data)`` and
433 ``flush()`` methods. Each returns compressed data or an empty bytes.
433 ``flush()`` methods. Each returns compressed data or an empty bytes.
434
434
435 The purpose of ``compressobj()`` is to provide an API-compatible interface
435 The purpose of ``compressobj()`` is to provide an API-compatible interface
436 with ``zlib.compressobj``, ``bz2.BZ2Compressor``, etc. This allows callers to
436 with ``zlib.compressobj``, ``bz2.BZ2Compressor``, etc. This allows callers to
437 swap in different compressor objects while using the same API.
437 swap in different compressor objects while using the same API.
438
438
439 ``flush()`` accepts an optional argument indicating how to end the stream.
439 ``flush()`` accepts an optional argument indicating how to end the stream.
440 ``zstd.COMPRESSOBJ_FLUSH_FINISH`` (the default) ends the compression stream.
440 ``zstd.COMPRESSOBJ_FLUSH_FINISH`` (the default) ends the compression stream.
441 Once this type of flush is performed, ``compress()`` and ``flush()`` can
441 Once this type of flush is performed, ``compress()`` and ``flush()`` can
442 no longer be called. This type of flush **must** be called to end the
442 no longer be called. This type of flush **must** be called to end the
443 compression context. If not called, returned data may be incomplete.
443 compression context. If not called, returned data may be incomplete.
444
444
445 A ``zstd.COMPRESSOBJ_FLUSH_BLOCK`` argument to ``flush()`` will flush a
445 A ``zstd.COMPRESSOBJ_FLUSH_BLOCK`` argument to ``flush()`` will flush a
446 zstd block. Flushes of this type can be performed multiple times. The next
446 zstd block. Flushes of this type can be performed multiple times. The next
447 call to ``compress()`` will begin a new zstd block.
447 call to ``compress()`` will begin a new zstd block.
448
448
449 Here is how this API should be used::
449 Here is how this API should be used::
450
450
451 cctx = zstd.ZstdCompressor()
451 cctx = zstd.ZstdCompressor()
452 cobj = cctx.compressobj()
452 cobj = cctx.compressobj()
453 data = cobj.compress(b'raw input 0')
453 data = cobj.compress(b'raw input 0')
454 data = cobj.compress(b'raw input 1')
454 data = cobj.compress(b'raw input 1')
455 data = cobj.flush()
455 data = cobj.flush()
456
456
457 Or to flush blocks::
457 Or to flush blocks::
458
458
459 cctx.zstd.ZstdCompressor()
459 cctx.zstd.ZstdCompressor()
460 cobj = cctx.compressobj()
460 cobj = cctx.compressobj()
461 data = cobj.compress(b'chunk in first block')
461 data = cobj.compress(b'chunk in first block')
462 data = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
462 data = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
463 data = cobj.compress(b'chunk in second block')
463 data = cobj.compress(b'chunk in second block')
464 data = cobj.flush()
464 data = cobj.flush()
465
465
466 For best performance results, keep input chunks under 256KB. This avoids
466 For best performance results, keep input chunks under 256KB. This avoids
467 extra allocations for a large output object.
467 extra allocations for a large output object.
468
468
469 It is possible to declare the input size of the data that will be fed into
469 It is possible to declare the input size of the data that will be fed into
470 the compressor::
470 the compressor::
471
471
472 cctx = zstd.ZstdCompressor()
472 cctx = zstd.ZstdCompressor()
473 cobj = cctx.compressobj(size=6)
473 cobj = cctx.compressobj(size=6)
474 data = cobj.compress(b'foobar')
474 data = cobj.compress(b'foobar')
475 data = cobj.flush()
475 data = cobj.flush()
476
476
477 Chunker API
477 Chunker API
478 ^^^^^^^^^^^
478 ^^^^^^^^^^^
479
479
480 ``chunker(size=None, chunk_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE)`` returns
480 ``chunker(size=None, chunk_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE)`` returns
481 an object that can be used to iteratively feed chunks of data into a compressor
481 an object that can be used to iteratively feed chunks of data into a compressor
482 and produce output chunks of a uniform size.
482 and produce output chunks of a uniform size.
483
483
484 The object returned by ``chunker()`` exposes the following methods:
484 The object returned by ``chunker()`` exposes the following methods:
485
485
486 ``compress(data)``
486 ``compress(data)``
487 Feeds new input data into the compressor.
487 Feeds new input data into the compressor.
488
488
489 ``flush()``
489 ``flush()``
490 Flushes all data currently in the compressor.
490 Flushes all data currently in the compressor.
491
491
492 ``finish()``
492 ``finish()``
493 Signals the end of input data. No new data can be compressed after this
493 Signals the end of input data. No new data can be compressed after this
494 method is called.
494 method is called.
495
495
496 ``compress()``, ``flush()``, and ``finish()`` all return an iterator of
496 ``compress()``, ``flush()``, and ``finish()`` all return an iterator of
497 ``bytes`` instances holding compressed data. The iterator may be empty. Callers
497 ``bytes`` instances holding compressed data. The iterator may be empty. Callers
498 MUST iterate through all elements of the returned iterator before performing
498 MUST iterate through all elements of the returned iterator before performing
499 another operation on the object.
499 another operation on the object.
500
500
501 All chunks emitted by ``compress()`` will have a length of ``chunk_size``.
501 All chunks emitted by ``compress()`` will have a length of ``chunk_size``.
502
502
503 ``flush()`` and ``finish()`` may return a final chunk smaller than
503 ``flush()`` and ``finish()`` may return a final chunk smaller than
504 ``chunk_size``.
504 ``chunk_size``.
505
505
506 Here is how the API should be used::
506 Here is how the API should be used::
507
507
508 cctx = zstd.ZstdCompressor()
508 cctx = zstd.ZstdCompressor()
509 chunker = cctx.chunker(chunk_size=32768)
509 chunker = cctx.chunker(chunk_size=32768)
510
510
511 with open(path, 'rb') as fh:
511 with open(path, 'rb') as fh:
512 while True:
512 while True:
513 in_chunk = fh.read(32768)
513 in_chunk = fh.read(32768)
514 if not in_chunk:
514 if not in_chunk:
515 break
515 break
516
516
517 for out_chunk in chunker.compress(in_chunk):
517 for out_chunk in chunker.compress(in_chunk):
518 # Do something with output chunk of size 32768.
518 # Do something with output chunk of size 32768.
519
519
520 for out_chunk in chunker.finish():
520 for out_chunk in chunker.finish():
521 # Do something with output chunks that finalize the zstd frame.
521 # Do something with output chunks that finalize the zstd frame.
522
522
523 The ``chunker()`` API is often a better alternative to ``compressobj()``.
523 The ``chunker()`` API is often a better alternative to ``compressobj()``.
524
524
525 ``compressobj()`` will emit output data as it is available. This results in a
525 ``compressobj()`` will emit output data as it is available. This results in a
526 *stream* of output chunks of varying sizes. The consistency of the output chunk
526 *stream* of output chunks of varying sizes. The consistency of the output chunk
527 size with ``chunker()`` is more appropriate for many usages, such as sending
527 size with ``chunker()`` is more appropriate for many usages, such as sending
528 compressed data to a socket.
528 compressed data to a socket.
529
529
530 ``compressobj()`` may also perform extra memory reallocations in order to
530 ``compressobj()`` may also perform extra memory reallocations in order to
531 dynamically adjust the sizes of the output chunks. Since ``chunker()`` output
531 dynamically adjust the sizes of the output chunks. Since ``chunker()`` output
532 chunks are all the same size (except for flushed or final chunks), there is
532 chunks are all the same size (except for flushed or final chunks), there is
533 less memory allocation overhead.
533 less memory allocation overhead.
534
534
535 Batch Compression API
535 Batch Compression API
536 ^^^^^^^^^^^^^^^^^^^^^
536 ^^^^^^^^^^^^^^^^^^^^^
537
537
538 (Experimental. Not yet supported in CFFI bindings.)
538 (Experimental. Not yet supported in CFFI bindings.)
539
539
540 ``multi_compress_to_buffer(data, [threads=0])`` performs compression of multiple
540 ``multi_compress_to_buffer(data, [threads=0])`` performs compression of multiple
541 inputs as a single operation.
541 inputs as a single operation.
542
542
543 Data to be compressed can be passed as a ``BufferWithSegmentsCollection``, a
543 Data to be compressed can be passed as a ``BufferWithSegmentsCollection``, a
544 ``BufferWithSegments``, or a list containing byte like objects. Each element of
544 ``BufferWithSegments``, or a list containing byte like objects. Each element of
545 the container will be compressed individually using the configured parameters
545 the container will be compressed individually using the configured parameters
546 on the ``ZstdCompressor`` instance.
546 on the ``ZstdCompressor`` instance.
547
547
548 The ``threads`` argument controls how many threads to use for compression. The
548 The ``threads`` argument controls how many threads to use for compression. The
549 default is ``0`` which means to use a single thread. Negative values use the
549 default is ``0`` which means to use a single thread. Negative values use the
550 number of logical CPUs in the machine.
550 number of logical CPUs in the machine.
551
551
552 The function returns a ``BufferWithSegmentsCollection``. This type represents
552 The function returns a ``BufferWithSegmentsCollection``. This type represents
553 N discrete memory allocations, eaching holding 1 or more compressed frames.
553 N discrete memory allocations, eaching holding 1 or more compressed frames.
554
554
555 Output data is written to shared memory buffers. This means that unlike
555 Output data is written to shared memory buffers. This means that unlike
556 regular Python objects, a reference to *any* object within the collection
556 regular Python objects, a reference to *any* object within the collection
557 keeps the shared buffer and therefore memory backing it alive. This can have
557 keeps the shared buffer and therefore memory backing it alive. This can have
558 undesirable effects on process memory usage.
558 undesirable effects on process memory usage.
559
559
560 The API and behavior of this function is experimental and will likely change.
560 The API and behavior of this function is experimental and will likely change.
561 Known deficiencies include:
561 Known deficiencies include:
562
562
563 * If asked to use multiple threads, it will always spawn that many threads,
563 * If asked to use multiple threads, it will always spawn that many threads,
564 even if the input is too small to use them. It should automatically lower
564 even if the input is too small to use them. It should automatically lower
565 the thread count when the extra threads would just add overhead.
565 the thread count when the extra threads would just add overhead.
566 * The buffer allocation strategy is fixed. There is room to make it dynamic,
566 * The buffer allocation strategy is fixed. There is room to make it dynamic,
567 perhaps even to allow one output buffer per input, facilitating a variation
567 perhaps even to allow one output buffer per input, facilitating a variation
568 of the API to return a list without the adverse effects of shared memory
568 of the API to return a list without the adverse effects of shared memory
569 buffers.
569 buffers.
570
570
571 ZstdDecompressor
571 ZstdDecompressor
572 ----------------
572 ----------------
573
573
574 The ``ZstdDecompressor`` class provides an interface for performing
574 The ``ZstdDecompressor`` class provides an interface for performing
575 decompression. It is effectively a wrapper around the ``ZSTD_DCtx`` type from
575 decompression. It is effectively a wrapper around the ``ZSTD_DCtx`` type from
576 the C API.
576 the C API.
577
577
578 Each instance is associated with parameters that control decompression. These
578 Each instance is associated with parameters that control decompression. These
579 come from the following named arguments (all optional):
579 come from the following named arguments (all optional):
580
580
581 dict_data
581 dict_data
582 Compression dictionary to use.
582 Compression dictionary to use.
583 max_window_size
583 max_window_size
584 Sets an uppet limit on the window size for decompression operations in
584 Sets an uppet limit on the window size for decompression operations in
585 kibibytes. This setting can be used to prevent large memory allocations
585 kibibytes. This setting can be used to prevent large memory allocations
586 for inputs using large compression windows.
586 for inputs using large compression windows.
587 format
587 format
588 Set the format of data for the decoder. By default, this is
588 Set the format of data for the decoder. By default, this is
589 ``zstd.FORMAT_ZSTD1``. It can be set to ``zstd.FORMAT_ZSTD1_MAGICLESS`` to
589 ``zstd.FORMAT_ZSTD1``. It can be set to ``zstd.FORMAT_ZSTD1_MAGICLESS`` to
590 allow decoding frames without the 4 byte magic header. Not all decompression
590 allow decoding frames without the 4 byte magic header. Not all decompression
591 APIs support this mode.
591 APIs support this mode.
592
592
593 The interface of this class is very similar to ``ZstdCompressor`` (by design).
593 The interface of this class is very similar to ``ZstdCompressor`` (by design).
594
594
595 Unless specified otherwise, assume that no two methods of ``ZstdDecompressor``
595 Unless specified otherwise, assume that no two methods of ``ZstdDecompressor``
596 instances can be called from multiple Python threads simultaneously. In other
596 instances can be called from multiple Python threads simultaneously. In other
597 words, assume instances are not thread safe unless stated otherwise.
597 words, assume instances are not thread safe unless stated otherwise.
598
598
599 Utility Methods
599 Utility Methods
600 ^^^^^^^^^^^^^^^
600 ^^^^^^^^^^^^^^^
601
601
602 ``memory_size()`` obtains the size of the underlying zstd decompression context,
602 ``memory_size()`` obtains the size of the underlying zstd decompression context,
603 in bytes.::
603 in bytes.::
604
604
605 dctx = zstd.ZstdDecompressor()
605 dctx = zstd.ZstdDecompressor()
606 size = dctx.memory_size()
606 size = dctx.memory_size()
607
607
608 Simple API
608 Simple API
609 ^^^^^^^^^^
609 ^^^^^^^^^^
610
610
611 ``decompress(data)`` can be used to decompress an entire compressed zstd
611 ``decompress(data)`` can be used to decompress an entire compressed zstd
612 frame in a single operation.::
612 frame in a single operation.::
613
613
614 dctx = zstd.ZstdDecompressor()
614 dctx = zstd.ZstdDecompressor()
615 decompressed = dctx.decompress(data)
615 decompressed = dctx.decompress(data)
616
616
617 By default, ``decompress(data)`` will only work on data written with the content
617 By default, ``decompress(data)`` will only work on data written with the content
618 size encoded in its header (this is the default behavior of
618 size encoded in its header (this is the default behavior of
619 ``ZstdCompressor().compress()`` but may not be true for streaming compression). If
619 ``ZstdCompressor().compress()`` but may not be true for streaming compression). If
620 compressed data without an embedded content size is seen, ``zstd.ZstdError`` will
620 compressed data without an embedded content size is seen, ``zstd.ZstdError`` will
621 be raised.
621 be raised.
622
622
623 If the compressed data doesn't have its content size embedded within it,
623 If the compressed data doesn't have its content size embedded within it,
624 decompression can be attempted by specifying the ``max_output_size``
624 decompression can be attempted by specifying the ``max_output_size``
625 argument.::
625 argument.::
626
626
627 dctx = zstd.ZstdDecompressor()
627 dctx = zstd.ZstdDecompressor()
628 uncompressed = dctx.decompress(data, max_output_size=1048576)
628 uncompressed = dctx.decompress(data, max_output_size=1048576)
629
629
630 Ideally, ``max_output_size`` will be identical to the decompressed output
630 Ideally, ``max_output_size`` will be identical to the decompressed output
631 size.
631 size.
632
632
633 If ``max_output_size`` is too small to hold the decompressed data,
633 If ``max_output_size`` is too small to hold the decompressed data,
634 ``zstd.ZstdError`` will be raised.
634 ``zstd.ZstdError`` will be raised.
635
635
636 If ``max_output_size`` is larger than the decompressed data, the allocated
636 If ``max_output_size`` is larger than the decompressed data, the allocated
637 output buffer will be resized to only use the space required.
637 output buffer will be resized to only use the space required.
638
638
639 Please note that an allocation of the requested ``max_output_size`` will be
639 Please note that an allocation of the requested ``max_output_size`` will be
640 performed every time the method is called. Setting to a very large value could
640 performed every time the method is called. Setting to a very large value could
641 result in a lot of work for the memory allocator and may result in
641 result in a lot of work for the memory allocator and may result in
642 ``MemoryError`` being raised if the allocation fails.
642 ``MemoryError`` being raised if the allocation fails.
643
643
644 .. important::
644 .. important::
645
645
646 If the exact size of decompressed data is unknown (not passed in explicitly
646 If the exact size of decompressed data is unknown (not passed in explicitly
647 and not stored in the zstandard frame), for performance reasons it is
647 and not stored in the zstandard frame), for performance reasons it is
648 encouraged to use a streaming API.
648 encouraged to use a streaming API.
649
649
650 Stream Reader API
650 Stream Reader API
651 ^^^^^^^^^^^^^^^^^
651 ^^^^^^^^^^^^^^^^^
652
652
653 ``stream_reader(source)`` can be used to obtain an object conforming to the
653 ``stream_reader(source)`` can be used to obtain an object conforming to the
654 ``io.RawIOBase`` interface for reading decompressed output as a stream::
654 ``io.RawIOBase`` interface for reading decompressed output as a stream::
655
655
656 with open(path, 'rb') as fh:
656 with open(path, 'rb') as fh:
657 dctx = zstd.ZstdDecompressor()
657 dctx = zstd.ZstdDecompressor()
658 reader = dctx.stream_reader(fh)
658 reader = dctx.stream_reader(fh)
659 while True:
659 while True:
660 chunk = reader.read(16384)
660 chunk = reader.read(16384)
661 if not chunk:
661 if not chunk:
662 break
662 break
663
663
664 # Do something with decompressed chunk.
664 # Do something with decompressed chunk.
665
665
666 The stream can also be used as a context manager::
666 The stream can also be used as a context manager::
667
667
668 with open(path, 'rb') as fh:
668 with open(path, 'rb') as fh:
669 dctx = zstd.ZstdDecompressor()
669 dctx = zstd.ZstdDecompressor()
670 with dctx.stream_reader(fh) as reader:
670 with dctx.stream_reader(fh) as reader:
671 ...
671 ...
672
672
673 When used as a context manager, the stream is closed and the underlying
673 When used as a context manager, the stream is closed and the underlying
674 resources are released when the context manager exits. Future operations against
674 resources are released when the context manager exits. Future operations against
675 the stream will fail.
675 the stream will fail.
676
676
677 The ``source`` argument to ``stream_reader()`` can be any object with a
677 The ``source`` argument to ``stream_reader()`` can be any object with a
678 ``read(size)`` method or any object implementing the *buffer protocol*.
678 ``read(size)`` method or any object implementing the *buffer protocol*.
679
679
680 If the ``source`` is a stream, you can specify how large ``read()`` requests
680 If the ``source`` is a stream, you can specify how large ``read()`` requests
681 to that stream should be via the ``read_size`` argument. It defaults to
681 to that stream should be via the ``read_size`` argument. It defaults to
682 ``zstandard.DECOMPRESSION_RECOMMENDED_INPUT_SIZE``.::
682 ``zstandard.DECOMPRESSION_RECOMMENDED_INPUT_SIZE``.::
683
683
684 with open(path, 'rb') as fh:
684 with open(path, 'rb') as fh:
685 dctx = zstd.ZstdDecompressor()
685 dctx = zstd.ZstdDecompressor()
686 # Will perform fh.read(8192) when obtaining data for the decompressor.
686 # Will perform fh.read(8192) when obtaining data for the decompressor.
687 with dctx.stream_reader(fh, read_size=8192) as reader:
687 with dctx.stream_reader(fh, read_size=8192) as reader:
688 ...
688 ...
689
689
690 The stream returned by ``stream_reader()`` is not writable.
690 The stream returned by ``stream_reader()`` is not writable.
691
691
692 The stream returned by ``stream_reader()`` is *partially* seekable.
692 The stream returned by ``stream_reader()`` is *partially* seekable.
693 Absolute and relative positions (``SEEK_SET`` and ``SEEK_CUR``) forward
693 Absolute and relative positions (``SEEK_SET`` and ``SEEK_CUR``) forward
694 of the current position are allowed. Offsets behind the current read
694 of the current position are allowed. Offsets behind the current read
695 position and offsets relative to the end of stream are not allowed and
695 position and offsets relative to the end of stream are not allowed and
696 will raise ``ValueError`` if attempted.
696 will raise ``ValueError`` if attempted.
697
697
698 ``tell()`` returns the number of decompressed bytes read so far.
698 ``tell()`` returns the number of decompressed bytes read so far.
699
699
700 Not all I/O methods are implemented. Notably missing is support for
700 Not all I/O methods are implemented. Notably missing is support for
701 ``readline()``, ``readlines()``, and linewise iteration support. This is
701 ``readline()``, ``readlines()``, and linewise iteration support. This is
702 because streams operate on binary data - not text data. If you want to
702 because streams operate on binary data - not text data. If you want to
703 convert decompressed output to text, you can chain an ``io.TextIOWrapper``
703 convert decompressed output to text, you can chain an ``io.TextIOWrapper``
704 to the stream::
704 to the stream::
705
705
706 with open(path, 'rb') as fh:
706 with open(path, 'rb') as fh:
707 dctx = zstd.ZstdDecompressor()
707 dctx = zstd.ZstdDecompressor()
708 stream_reader = dctx.stream_reader(fh)
708 stream_reader = dctx.stream_reader(fh)
709 text_stream = io.TextIOWrapper(stream_reader, encoding='utf-8')
709 text_stream = io.TextIOWrapper(stream_reader, encoding='utf-8')
710
710
711 for line in text_stream:
711 for line in text_stream:
712 ...
712 ...
713
713
714 The ``read_across_frames`` argument to ``stream_reader()`` controls the
714 The ``read_across_frames`` argument to ``stream_reader()`` controls the
715 behavior of read operations when the end of a zstd *frame* is encountered.
715 behavior of read operations when the end of a zstd *frame* is encountered.
716 When ``False`` (the default), a read will complete when the end of a
716 When ``False`` (the default), a read will complete when the end of a
717 zstd *frame* is encountered. When ``True``, a read can potentially
717 zstd *frame* is encountered. When ``True``, a read can potentially
718 return data spanning multiple zstd *frames*.
718 return data spanning multiple zstd *frames*.
719
719
720 Streaming Input API
720 Streaming Input API
721 ^^^^^^^^^^^^^^^^^^^
721 ^^^^^^^^^^^^^^^^^^^
722
722
723 ``stream_writer(fh)`` allows you to *stream* data into a decompressor.
723 ``stream_writer(fh)`` allows you to *stream* data into a decompressor.
724
724
725 Returned instances implement the ``io.RawIOBase`` interface. Only methods
725 Returned instances implement the ``io.RawIOBase`` interface. Only methods
726 that involve writing will do useful things.
726 that involve writing will do useful things.
727
727
728 The argument to ``stream_writer()`` is typically an object that also implements
728 The argument to ``stream_writer()`` is typically an object that also implements
729 ``io.RawIOBase``. But any object with a ``write(data)`` method will work. Many
729 ``io.RawIOBase``. But any object with a ``write(data)`` method will work. Many
730 common Python types conform to this interface, including open file handles
730 common Python types conform to this interface, including open file handles
731 and ``io.BytesIO``.
731 and ``io.BytesIO``.
732
732
733 Behavior is similar to ``ZstdCompressor.stream_writer()``: compressed data
733 Behavior is similar to ``ZstdCompressor.stream_writer()``: compressed data
734 is sent to the decompressor by calling ``write(data)`` and decompressed
734 is sent to the decompressor by calling ``write(data)`` and decompressed
735 output is written to the underlying stream by calling its ``write(data)``
735 output is written to the underlying stream by calling its ``write(data)``
736 method.::
736 method.::
737
737
738 dctx = zstd.ZstdDecompressor()
738 dctx = zstd.ZstdDecompressor()
739 decompressor = dctx.stream_writer(fh)
739 decompressor = dctx.stream_writer(fh)
740
740
741 decompressor.write(compressed_data)
741 decompressor.write(compressed_data)
742 ...
742 ...
743
743
744
744
745 Calls to ``write()`` will return the number of bytes written to the output
745 Calls to ``write()`` will return the number of bytes written to the output
746 object. Not all inputs will result in bytes being written, so return values
746 object. Not all inputs will result in bytes being written, so return values
747 of ``0`` are possible.
747 of ``0`` are possible.
748
748
749 Like the ``stream_writer()`` compressor, instances can be used as context
749 Like the ``stream_writer()`` compressor, instances can be used as context
750 managers. However, context managers add no extra special behavior and offer
750 managers. However, context managers add no extra special behavior and offer
751 little to no benefit to being used.
751 little to no benefit to being used.
752
752
753 Calling ``close()`` will mark the stream as closed and subsequent I/O operations
753 Calling ``close()`` will mark the stream as closed and subsequent I/O operations
754 will raise ``ValueError`` (per the documented behavior of ``io.RawIOBase``).
754 will raise ``ValueError`` (per the documented behavior of ``io.RawIOBase``).
755 ``close()`` will also call ``close()`` on the underlying stream if such a
755 ``close()`` will also call ``close()`` on the underlying stream if such a
756 method exists.
756 method exists.
757
757
758 The size of chunks being ``write()`` to the destination can be specified::
758 The size of chunks being ``write()`` to the destination can be specified::
759
759
760 dctx = zstd.ZstdDecompressor()
760 dctx = zstd.ZstdDecompressor()
761 with dctx.stream_writer(fh, write_size=16384) as decompressor:
761 with dctx.stream_writer(fh, write_size=16384) as decompressor:
762 pass
762 pass
763
763
764 You can see how much memory is being used by the decompressor::
764 You can see how much memory is being used by the decompressor::
765
765
766 dctx = zstd.ZstdDecompressor()
766 dctx = zstd.ZstdDecompressor()
767 with dctx.stream_writer(fh) as decompressor:
767 with dctx.stream_writer(fh) as decompressor:
768 byte_size = decompressor.memory_size()
768 byte_size = decompressor.memory_size()
769
769
770 ``stream_writer()`` accepts a ``write_return_read`` boolean argument to control
770 ``stream_writer()`` accepts a ``write_return_read`` boolean argument to control
771 the return value of ``write()``. When ``False`` (the default)``, ``write()``
771 the return value of ``write()``. When ``False`` (the default)``, ``write()``
772 returns the number of bytes that were ``write()``en to the underlying stream.
772 returns the number of bytes that were ``write()``en to the underlying stream.
773 When ``True``, ``write()`` returns the number of bytes read from the input.
773 When ``True``, ``write()`` returns the number of bytes read from the input.
774 ``True`` is the *proper* behavior for ``write()`` as specified by the
774 ``True`` is the *proper* behavior for ``write()`` as specified by the
775 ``io.RawIOBase`` interface and will become the default in a future release.
775 ``io.RawIOBase`` interface and will become the default in a future release.
776
776
777 Streaming Output API
777 Streaming Output API
778 ^^^^^^^^^^^^^^^^^^^^
778 ^^^^^^^^^^^^^^^^^^^^
779
779
780 ``read_to_iter(fh)`` provides a mechanism to stream decompressed data out of a
780 ``read_to_iter(fh)`` provides a mechanism to stream decompressed data out of a
781 compressed source as an iterator of data chunks.::
781 compressed source as an iterator of data chunks.::
782
782
783 dctx = zstd.ZstdDecompressor()
783 dctx = zstd.ZstdDecompressor()
784 for chunk in dctx.read_to_iter(fh):
784 for chunk in dctx.read_to_iter(fh):
785 # Do something with original data.
785 # Do something with original data.
786
786
787 ``read_to_iter()`` accepts an object with a ``read(size)`` method that will
787 ``read_to_iter()`` accepts an object with a ``read(size)`` method that will
788 return compressed bytes or an object conforming to the buffer protocol that
788 return compressed bytes or an object conforming to the buffer protocol that
789 can expose its data as a contiguous range of bytes.
789 can expose its data as a contiguous range of bytes.
790
790
791 ``read_to_iter()`` returns an iterator whose elements are chunks of the
791 ``read_to_iter()`` returns an iterator whose elements are chunks of the
792 decompressed data.
792 decompressed data.
793
793
794 The size of requested ``read()`` from the source can be specified::
794 The size of requested ``read()`` from the source can be specified::
795
795
796 dctx = zstd.ZstdDecompressor()
796 dctx = zstd.ZstdDecompressor()
797 for chunk in dctx.read_to_iter(fh, read_size=16384):
797 for chunk in dctx.read_to_iter(fh, read_size=16384):
798 pass
798 pass
799
799
800 It is also possible to skip leading bytes in the input data::
800 It is also possible to skip leading bytes in the input data::
801
801
802 dctx = zstd.ZstdDecompressor()
802 dctx = zstd.ZstdDecompressor()
803 for chunk in dctx.read_to_iter(fh, skip_bytes=1):
803 for chunk in dctx.read_to_iter(fh, skip_bytes=1):
804 pass
804 pass
805
805
806 .. tip::
806 .. tip::
807
807
808 Skipping leading bytes is useful if the source data contains extra
808 Skipping leading bytes is useful if the source data contains extra
809 *header* data. Traditionally, you would need to create a slice or
809 *header* data. Traditionally, you would need to create a slice or
810 ``memoryview`` of the data you want to decompress. This would create
810 ``memoryview`` of the data you want to decompress. This would create
811 overhead. It is more efficient to pass the offset into this API.
811 overhead. It is more efficient to pass the offset into this API.
812
812
813 Similarly to ``ZstdCompressor.read_to_iter()``, the consumer of the iterator
813 Similarly to ``ZstdCompressor.read_to_iter()``, the consumer of the iterator
814 controls when data is decompressed. If the iterator isn't consumed,
814 controls when data is decompressed. If the iterator isn't consumed,
815 decompression is put on hold.
815 decompression is put on hold.
816
816
817 When ``read_to_iter()`` is passed an object conforming to the buffer protocol,
817 When ``read_to_iter()`` is passed an object conforming to the buffer protocol,
818 the behavior may seem similar to what occurs when the simple decompression
818 the behavior may seem similar to what occurs when the simple decompression
819 API is used. However, this API works when the decompressed size is unknown.
819 API is used. However, this API works when the decompressed size is unknown.
820 Furthermore, if feeding large inputs, the decompressor will work in chunks
820 Furthermore, if feeding large inputs, the decompressor will work in chunks
821 instead of performing a single operation.
821 instead of performing a single operation.
822
822
823 Stream Copying API
823 Stream Copying API
824 ^^^^^^^^^^^^^^^^^^
824 ^^^^^^^^^^^^^^^^^^
825
825
826 ``copy_stream(ifh, ofh)`` can be used to copy data across 2 streams while
826 ``copy_stream(ifh, ofh)`` can be used to copy data across 2 streams while
827 performing decompression.::
827 performing decompression.::
828
828
829 dctx = zstd.ZstdDecompressor()
829 dctx = zstd.ZstdDecompressor()
830 dctx.copy_stream(ifh, ofh)
830 dctx.copy_stream(ifh, ofh)
831
831
832 e.g. to decompress a file to another file::
832 e.g. to decompress a file to another file::
833
833
834 dctx = zstd.ZstdDecompressor()
834 dctx = zstd.ZstdDecompressor()
835 with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
835 with open(input_path, 'rb') as ifh, open(output_path, 'wb') as ofh:
836 dctx.copy_stream(ifh, ofh)
836 dctx.copy_stream(ifh, ofh)
837
837
838 The size of chunks being ``read()`` and ``write()`` from and to the streams
838 The size of chunks being ``read()`` and ``write()`` from and to the streams
839 can be specified::
839 can be specified::
840
840
841 dctx = zstd.ZstdDecompressor()
841 dctx = zstd.ZstdDecompressor()
842 dctx.copy_stream(ifh, ofh, read_size=8192, write_size=16384)
842 dctx.copy_stream(ifh, ofh, read_size=8192, write_size=16384)
843
843
844 Decompressor API
844 Decompressor API
845 ^^^^^^^^^^^^^^^^
845 ^^^^^^^^^^^^^^^^
846
846
847 ``decompressobj()`` returns an object that exposes a ``decompress(data)``
847 ``decompressobj()`` returns an object that exposes a ``decompress(data)``
848 method. Compressed data chunks are fed into ``decompress(data)`` and
848 method. Compressed data chunks are fed into ``decompress(data)`` and
849 uncompressed output (or an empty bytes) is returned. Output from subsequent
849 uncompressed output (or an empty bytes) is returned. Output from subsequent
850 calls needs to be concatenated to reassemble the full decompressed byte
850 calls needs to be concatenated to reassemble the full decompressed byte
851 sequence.
851 sequence.
852
852
853 The purpose of ``decompressobj()`` is to provide an API-compatible interface
853 The purpose of ``decompressobj()`` is to provide an API-compatible interface
854 with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor``. This allows callers
854 with ``zlib.decompressobj`` and ``bz2.BZ2Decompressor``. This allows callers
855 to swap in different decompressor objects while using the same API.
855 to swap in different decompressor objects while using the same API.
856
856
857 Each object is single use: once an input frame is decoded, ``decompress()``
857 Each object is single use: once an input frame is decoded, ``decompress()``
858 can no longer be called.
858 can no longer be called.
859
859
860 Here is how this API should be used::
860 Here is how this API should be used::
861
861
862 dctx = zstd.ZstdDecompressor()
862 dctx = zstd.ZstdDecompressor()
863 dobj = dctx.decompressobj()
863 dobj = dctx.decompressobj()
864 data = dobj.decompress(compressed_chunk_0)
864 data = dobj.decompress(compressed_chunk_0)
865 data = dobj.decompress(compressed_chunk_1)
865 data = dobj.decompress(compressed_chunk_1)
866
866
867 By default, calls to ``decompress()`` write output data in chunks of size
867 By default, calls to ``decompress()`` write output data in chunks of size
868 ``DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE``. These chunks are concatenated
868 ``DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE``. These chunks are concatenated
869 before being returned to the caller. It is possible to define the size of
869 before being returned to the caller. It is possible to define the size of
870 these temporary chunks by passing ``write_size`` to ``decompressobj()``::
870 these temporary chunks by passing ``write_size`` to ``decompressobj()``::
871
871
872 dctx = zstd.ZstdDecompressor()
872 dctx = zstd.ZstdDecompressor()
873 dobj = dctx.decompressobj(write_size=1048576)
873 dobj = dctx.decompressobj(write_size=1048576)
874
874
875 .. note::
875 .. note::
876
876
877 Because calls to ``decompress()`` may need to perform multiple
877 Because calls to ``decompress()`` may need to perform multiple
878 memory (re)allocations, this streaming decompression API isn't as
878 memory (re)allocations, this streaming decompression API isn't as
879 efficient as other APIs.
879 efficient as other APIs.
880
880
881 For compatibility with the standard library APIs, instances expose a
881 For compatibility with the standard library APIs, instances expose a
882 ``flush([length=None])`` method. This method no-ops and has no meaningful
882 ``flush([length=None])`` method. This method no-ops and has no meaningful
883 side-effects, making it safe to call any time.
883 side-effects, making it safe to call any time.
884
884
885 Batch Decompression API
885 Batch Decompression API
886 ^^^^^^^^^^^^^^^^^^^^^^^
886 ^^^^^^^^^^^^^^^^^^^^^^^
887
887
888 (Experimental. Not yet supported in CFFI bindings.)
888 (Experimental. Not yet supported in CFFI bindings.)
889
889
890 ``multi_decompress_to_buffer()`` performs decompression of multiple
890 ``multi_decompress_to_buffer()`` performs decompression of multiple
891 frames as a single operation and returns a ``BufferWithSegmentsCollection``
891 frames as a single operation and returns a ``BufferWithSegmentsCollection``
892 containing decompressed data for all inputs.
892 containing decompressed data for all inputs.
893
893
894 Compressed frames can be passed to the function as a ``BufferWithSegments``,
894 Compressed frames can be passed to the function as a ``BufferWithSegments``,
895 a ``BufferWithSegmentsCollection``, or as a list containing objects that
895 a ``BufferWithSegmentsCollection``, or as a list containing objects that
896 conform to the buffer protocol. For best performance, pass a
896 conform to the buffer protocol. For best performance, pass a
897 ``BufferWithSegmentsCollection`` or a ``BufferWithSegments``, as
897 ``BufferWithSegmentsCollection`` or a ``BufferWithSegments``, as
898 minimal input validation will be done for that type. If calling from
898 minimal input validation will be done for that type. If calling from
899 Python (as opposed to C), constructing one of these instances may add
899 Python (as opposed to C), constructing one of these instances may add
900 overhead cancelling out the performance overhead of validation for list
900 overhead cancelling out the performance overhead of validation for list
901 inputs.::
901 inputs.::
902
902
903 dctx = zstd.ZstdDecompressor()
903 dctx = zstd.ZstdDecompressor()
904 results = dctx.multi_decompress_to_buffer([b'...', b'...'])
904 results = dctx.multi_decompress_to_buffer([b'...', b'...'])
905
905
906 The decompressed size of each frame MUST be discoverable. It can either be
906 The decompressed size of each frame MUST be discoverable. It can either be
907 embedded within the zstd frame (``write_content_size=True`` argument to
907 embedded within the zstd frame (``write_content_size=True`` argument to
908 ``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument.
908 ``ZstdCompressor``) or passed in via the ``decompressed_sizes`` argument.
909
909
910 The ``decompressed_sizes`` argument is an object conforming to the buffer
910 The ``decompressed_sizes`` argument is an object conforming to the buffer
911 protocol which holds an array of 64-bit unsigned integers in the machine's
911 protocol which holds an array of 64-bit unsigned integers in the machine's
912 native format defining the decompressed sizes of each frame. If this argument
912 native format defining the decompressed sizes of each frame. If this argument
913 is passed, it avoids having to scan each frame for its decompressed size.
913 is passed, it avoids having to scan each frame for its decompressed size.
914 This frame scanning can add noticeable overhead in some scenarios.::
914 This frame scanning can add noticeable overhead in some scenarios.::
915
915
916 frames = [...]
916 frames = [...]
917 sizes = struct.pack('=QQQQ', len0, len1, len2, len3)
917 sizes = struct.pack('=QQQQ', len0, len1, len2, len3)
918
918
919 dctx = zstd.ZstdDecompressor()
919 dctx = zstd.ZstdDecompressor()
920 results = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes)
920 results = dctx.multi_decompress_to_buffer(frames, decompressed_sizes=sizes)
921
921
922 The ``threads`` argument controls the number of threads to use to perform
922 The ``threads`` argument controls the number of threads to use to perform
923 decompression operations. The default (``0``) or the value ``1`` means to
923 decompression operations. The default (``0``) or the value ``1`` means to
924 use a single thread. Negative values use the number of logical CPUs in the
924 use a single thread. Negative values use the number of logical CPUs in the
925 machine.
925 machine.
926
926
927 .. note::
927 .. note::
928
928
929 It is possible to pass a ``mmap.mmap()`` instance into this function by
929 It is possible to pass a ``mmap.mmap()`` instance into this function by
930 wrapping it with a ``BufferWithSegments`` instance (which will define the
930 wrapping it with a ``BufferWithSegments`` instance (which will define the
931 offsets of frames within the memory mapped region).
931 offsets of frames within the memory mapped region).
932
932
933 This function is logically equivalent to performing ``dctx.decompress()``
933 This function is logically equivalent to performing ``dctx.decompress()``
934 on each input frame and returning the result.
934 on each input frame and returning the result.
935
935
936 This function exists to perform decompression on multiple frames as fast
936 This function exists to perform decompression on multiple frames as fast
937 as possible by having as little overhead as possible. Since decompression is
937 as possible by having as little overhead as possible. Since decompression is
938 performed as a single operation and since the decompressed output is stored in
938 performed as a single operation and since the decompressed output is stored in
939 a single buffer, extra memory allocations, Python objects, and Python function
939 a single buffer, extra memory allocations, Python objects, and Python function
940 calls are avoided. This is ideal for scenarios where callers know up front that
940 calls are avoided. This is ideal for scenarios where callers know up front that
941 they need to access data for multiple frames, such as when *delta chains* are
941 they need to access data for multiple frames, such as when *delta chains* are
942 being used.
942 being used.
943
943
944 Currently, the implementation always spawns multiple threads when requested,
944 Currently, the implementation always spawns multiple threads when requested,
945 even if the amount of work to do is small. In the future, it will be smarter
945 even if the amount of work to do is small. In the future, it will be smarter
946 about avoiding threads and their associated overhead when the amount of
946 about avoiding threads and their associated overhead when the amount of
947 work to do is small.
947 work to do is small.
948
948
949 Prefix Dictionary Chain Decompression
949 Prefix Dictionary Chain Decompression
950 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
950 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
951
951
952 ``decompress_content_dict_chain(frames)`` performs decompression of a list of
952 ``decompress_content_dict_chain(frames)`` performs decompression of a list of
953 zstd frames produced using chained *prefix* dictionary compression. Such
953 zstd frames produced using chained *prefix* dictionary compression. Such
954 a list of frames is produced by compressing discrete inputs where each
954 a list of frames is produced by compressing discrete inputs where each
955 non-initial input is compressed with a *prefix* dictionary consisting of the
955 non-initial input is compressed with a *prefix* dictionary consisting of the
956 content of the previous input.
956 content of the previous input.
957
957
958 For example, say you have the following inputs::
958 For example, say you have the following inputs::
959
959
960 inputs = [b'input 1', b'input 2', b'input 3']
960 inputs = [b'input 1', b'input 2', b'input 3']
961
961
962 The zstd frame chain consists of:
962 The zstd frame chain consists of:
963
963
964 1. ``b'input 1'`` compressed in standalone/discrete mode
964 1. ``b'input 1'`` compressed in standalone/discrete mode
965 2. ``b'input 2'`` compressed using ``b'input 1'`` as a *prefix* dictionary
965 2. ``b'input 2'`` compressed using ``b'input 1'`` as a *prefix* dictionary
966 3. ``b'input 3'`` compressed using ``b'input 2'`` as a *prefix* dictionary
966 3. ``b'input 3'`` compressed using ``b'input 2'`` as a *prefix* dictionary
967
967
968 Each zstd frame **must** have the content size written.
968 Each zstd frame **must** have the content size written.
969
969
970 The following Python code can be used to produce a *prefix dictionary chain*::
970 The following Python code can be used to produce a *prefix dictionary chain*::
971
971
972 def make_chain(inputs):
972 def make_chain(inputs):
973 frames = []
973 frames = []
974
974
975 # First frame is compressed in standalone/discrete mode.
975 # First frame is compressed in standalone/discrete mode.
976 zctx = zstd.ZstdCompressor()
976 zctx = zstd.ZstdCompressor()
977 frames.append(zctx.compress(inputs[0]))
977 frames.append(zctx.compress(inputs[0]))
978
978
979 # Subsequent frames use the previous fulltext as a prefix dictionary
979 # Subsequent frames use the previous fulltext as a prefix dictionary
980 for i, raw in enumerate(inputs[1:]):
980 for i, raw in enumerate(inputs[1:]):
981 dict_data = zstd.ZstdCompressionDict(
981 dict_data = zstd.ZstdCompressionDict(
982 inputs[i], dict_type=zstd.DICT_TYPE_RAWCONTENT)
982 inputs[i], dict_type=zstd.DICT_TYPE_RAWCONTENT)
983 zctx = zstd.ZstdCompressor(dict_data=dict_data)
983 zctx = zstd.ZstdCompressor(dict_data=dict_data)
984 frames.append(zctx.compress(raw))
984 frames.append(zctx.compress(raw))
985
985
986 return frames
986 return frames
987
987
988 ``decompress_content_dict_chain()`` returns the uncompressed data of the last
988 ``decompress_content_dict_chain()`` returns the uncompressed data of the last
989 element in the input chain.
989 element in the input chain.
990
990
991
991
992 .. note::
992 .. note::
993
993
994 It is possible to implement *prefix dictionary chain* decompression
994 It is possible to implement *prefix dictionary chain* decompression
995 on top of other APIs. However, this function will likely be faster -
995 on top of other APIs. However, this function will likely be faster -
996 especially for long input chains - as it avoids the overhead of instantiating
996 especially for long input chains - as it avoids the overhead of instantiating
997 and passing around intermediate objects between C and Python.
997 and passing around intermediate objects between C and Python.
998
998
999 Multi-Threaded Compression
999 Multi-Threaded Compression
1000 --------------------------
1000 --------------------------
1001
1001
1002 ``ZstdCompressor`` accepts a ``threads`` argument that controls the number
1002 ``ZstdCompressor`` accepts a ``threads`` argument that controls the number
1003 of threads to use for compression. The way this works is that input is split
1003 of threads to use for compression. The way this works is that input is split
1004 into segments and each segment is fed into a worker pool for compression. Once
1004 into segments and each segment is fed into a worker pool for compression. Once
1005 a segment is compressed, it is flushed/appended to the output.
1005 a segment is compressed, it is flushed/appended to the output.
1006
1006
1007 .. note::
1007 .. note::
1008
1008
1009 These threads are created at the C layer and are not Python threads. So they
1009 These threads are created at the C layer and are not Python threads. So they
1010 work outside the GIL. It is therefore possible to CPU saturate multiple cores
1010 work outside the GIL. It is therefore possible to CPU saturate multiple cores
1011 from Python.
1011 from Python.
1012
1012
1013 The segment size for multi-threaded compression is chosen from the window size
1013 The segment size for multi-threaded compression is chosen from the window size
1014 of the compressor. This is derived from the ``window_log`` attribute of a
1014 of the compressor. This is derived from the ``window_log`` attribute of a
1015 ``ZstdCompressionParameters`` instance. By default, segment sizes are in the 1+MB
1015 ``ZstdCompressionParameters`` instance. By default, segment sizes are in the 1+MB
1016 range.
1016 range.
1017
1017
1018 If multi-threaded compression is requested and the input is smaller than the
1018 If multi-threaded compression is requested and the input is smaller than the
1019 configured segment size, only a single compression thread will be used. If the
1019 configured segment size, only a single compression thread will be used. If the
1020 input is smaller than the segment size multiplied by the thread pool size or
1020 input is smaller than the segment size multiplied by the thread pool size or
1021 if data cannot be delivered to the compressor fast enough, not all requested
1021 if data cannot be delivered to the compressor fast enough, not all requested
1022 compressor threads may be active simultaneously.
1022 compressor threads may be active simultaneously.
1023
1023
1024 Compared to non-multi-threaded compression, multi-threaded compression has
1024 Compared to non-multi-threaded compression, multi-threaded compression has
1025 higher per-operation overhead. This includes extra memory operations,
1025 higher per-operation overhead. This includes extra memory operations,
1026 thread creation, lock acquisition, etc.
1026 thread creation, lock acquisition, etc.
1027
1027
1028 Due to the nature of multi-threaded compression using *N* compression
1028 Due to the nature of multi-threaded compression using *N* compression
1029 *states*, the output from multi-threaded compression will likely be larger
1029 *states*, the output from multi-threaded compression will likely be larger
1030 than non-multi-threaded compression. The difference is usually small. But
1030 than non-multi-threaded compression. The difference is usually small. But
1031 there is a CPU/wall time versus size trade off that may warrant investigation.
1031 there is a CPU/wall time versus size trade off that may warrant investigation.
1032
1032
1033 Output from multi-threaded compression does not require any special handling
1033 Output from multi-threaded compression does not require any special handling
1034 on the decompression side. To the decompressor, data generated with single
1034 on the decompression side. To the decompressor, data generated with single
1035 threaded compressor looks the same as data generated by a multi-threaded
1035 threaded compressor looks the same as data generated by a multi-threaded
1036 compressor and does not require any special handling or additional resource
1036 compressor and does not require any special handling or additional resource
1037 requirements.
1037 requirements.
1038
1038
1039 Dictionary Creation and Management
1039 Dictionary Creation and Management
1040 ----------------------------------
1040 ----------------------------------
1041
1041
1042 Compression dictionaries are represented with the ``ZstdCompressionDict`` type.
1042 Compression dictionaries are represented with the ``ZstdCompressionDict`` type.
1043
1043
1044 Instances can be constructed from bytes::
1044 Instances can be constructed from bytes::
1045
1045
1046 dict_data = zstd.ZstdCompressionDict(data)
1046 dict_data = zstd.ZstdCompressionDict(data)
1047
1047
1048 It is possible to construct a dictionary from *any* data. If the data doesn't
1048 It is possible to construct a dictionary from *any* data. If the data doesn't
1049 begin with a magic header, it will be treated as a *prefix* dictionary.
1049 begin with a magic header, it will be treated as a *prefix* dictionary.
1050 *Prefix* dictionaries allow compression operations to reference raw data
1050 *Prefix* dictionaries allow compression operations to reference raw data
1051 within the dictionary.
1051 within the dictionary.
1052
1052
1053 It is possible to force the use of *prefix* dictionaries or to require a
1053 It is possible to force the use of *prefix* dictionaries or to require a
1054 dictionary header:
1054 dictionary header:
1055
1055
1056 dict_data = zstd.ZstdCompressionDict(data,
1056 dict_data = zstd.ZstdCompressionDict(data,
1057 dict_type=zstd.DICT_TYPE_RAWCONTENT)
1057 dict_type=zstd.DICT_TYPE_RAWCONTENT)
1058
1058
1059 dict_data = zstd.ZstdCompressionDict(data,
1059 dict_data = zstd.ZstdCompressionDict(data,
1060 dict_type=zstd.DICT_TYPE_FULLDICT)
1060 dict_type=zstd.DICT_TYPE_FULLDICT)
1061
1061
1062 You can see how many bytes are in the dictionary by calling ``len()``::
1062 You can see how many bytes are in the dictionary by calling ``len()``::
1063
1063
1064 dict_data = zstd.train_dictionary(size, samples)
1064 dict_data = zstd.train_dictionary(size, samples)
1065 dict_size = len(dict_data) # will not be larger than ``size``
1065 dict_size = len(dict_data) # will not be larger than ``size``
1066
1066
1067 Once you have a dictionary, you can pass it to the objects performing
1067 Once you have a dictionary, you can pass it to the objects performing
1068 compression and decompression::
1068 compression and decompression::
1069
1069
1070 dict_data = zstd.train_dictionary(131072, samples)
1070 dict_data = zstd.train_dictionary(131072, samples)
1071
1071
1072 cctx = zstd.ZstdCompressor(dict_data=dict_data)
1072 cctx = zstd.ZstdCompressor(dict_data=dict_data)
1073 for source_data in input_data:
1073 for source_data in input_data:
1074 compressed = cctx.compress(source_data)
1074 compressed = cctx.compress(source_data)
1075 # Do something with compressed data.
1075 # Do something with compressed data.
1076
1076
1077 dctx = zstd.ZstdDecompressor(dict_data=dict_data)
1077 dctx = zstd.ZstdDecompressor(dict_data=dict_data)
1078 for compressed_data in input_data:
1078 for compressed_data in input_data:
1079 buffer = io.BytesIO()
1079 buffer = io.BytesIO()
1080 with dctx.stream_writer(buffer) as decompressor:
1080 with dctx.stream_writer(buffer) as decompressor:
1081 decompressor.write(compressed_data)
1081 decompressor.write(compressed_data)
1082 # Do something with raw data in ``buffer``.
1082 # Do something with raw data in ``buffer``.
1083
1083
1084 Dictionaries have unique integer IDs. You can retrieve this ID via::
1084 Dictionaries have unique integer IDs. You can retrieve this ID via::
1085
1085
1086 dict_id = zstd.dictionary_id(dict_data)
1086 dict_id = zstd.dictionary_id(dict_data)
1087
1087
1088 You can obtain the raw data in the dict (useful for persisting and constructing
1088 You can obtain the raw data in the dict (useful for persisting and constructing
1089 a ``ZstdCompressionDict`` later) via ``as_bytes()``::
1089 a ``ZstdCompressionDict`` later) via ``as_bytes()``::
1090
1090
1091 dict_data = zstd.train_dictionary(size, samples)
1091 dict_data = zstd.train_dictionary(size, samples)
1092 raw_data = dict_data.as_bytes()
1092 raw_data = dict_data.as_bytes()
1093
1093
1094 By default, when a ``ZstdCompressionDict`` is *attached* to a
1094 By default, when a ``ZstdCompressionDict`` is *attached* to a
1095 ``ZstdCompressor``, each ``ZstdCompressor`` performs work to prepare the
1095 ``ZstdCompressor``, each ``ZstdCompressor`` performs work to prepare the
1096 dictionary for use. This is fine if only 1 compression operation is being
1096 dictionary for use. This is fine if only 1 compression operation is being
1097 performed or if the ``ZstdCompressor`` is being reused for multiple operations.
1097 performed or if the ``ZstdCompressor`` is being reused for multiple operations.
1098 But if multiple ``ZstdCompressor`` instances are being used with the dictionary,
1098 But if multiple ``ZstdCompressor`` instances are being used with the dictionary,
1099 this can add overhead.
1099 this can add overhead.
1100
1100
1101 It is possible to *precompute* the dictionary so it can readily be consumed
1101 It is possible to *precompute* the dictionary so it can readily be consumed
1102 by multiple ``ZstdCompressor`` instances::
1102 by multiple ``ZstdCompressor`` instances::
1103
1103
1104 d = zstd.ZstdCompressionDict(data)
1104 d = zstd.ZstdCompressionDict(data)
1105
1105
1106 # Precompute for compression level 3.
1106 # Precompute for compression level 3.
1107 d.precompute_compress(level=3)
1107 d.precompute_compress(level=3)
1108
1108
1109 # Precompute with specific compression parameters.
1109 # Precompute with specific compression parameters.
1110 params = zstd.ZstdCompressionParameters(...)
1110 params = zstd.ZstdCompressionParameters(...)
1111 d.precompute_compress(compression_params=params)
1111 d.precompute_compress(compression_params=params)
1112
1112
1113 .. note::
1113 .. note::
1114
1114
1115 When a dictionary is precomputed, the compression parameters used to
1115 When a dictionary is precomputed, the compression parameters used to
1116 precompute the dictionary overwrite some of the compression parameters
1116 precompute the dictionary overwrite some of the compression parameters
1117 specified to ``ZstdCompressor.__init__``.
1117 specified to ``ZstdCompressor.__init__``.
1118
1118
1119 Training Dictionaries
1119 Training Dictionaries
1120 ^^^^^^^^^^^^^^^^^^^^^
1120 ^^^^^^^^^^^^^^^^^^^^^
1121
1121
1122 Unless using *prefix* dictionaries, dictionary data is produced by *training*
1122 Unless using *prefix* dictionaries, dictionary data is produced by *training*
1123 on existing data::
1123 on existing data::
1124
1124
1125 dict_data = zstd.train_dictionary(size, samples)
1125 dict_data = zstd.train_dictionary(size, samples)
1126
1126
1127 This takes a target dictionary size and list of bytes instances and creates and
1127 This takes a target dictionary size and list of bytes instances and creates and
1128 returns a ``ZstdCompressionDict``.
1128 returns a ``ZstdCompressionDict``.
1129
1129
1130 The dictionary training mechanism is known as *cover*. More details about it are
1130 The dictionary training mechanism is known as *cover*. More details about it are
1131 available in the paper *Effective Construction of Relative Lempel-Ziv
1131 available in the paper *Effective Construction of Relative Lempel-Ziv
1132 Dictionaries* (authors: Liao, Petri, Moffat, Wirth).
1132 Dictionaries* (authors: Liao, Petri, Moffat, Wirth).
1133
1133
1134 The cover algorithm takes parameters ``k` and ``d``. These are the
1134 The cover algorithm takes parameters ``k` and ``d``. These are the
1135 *segment size* and *dmer size*, respectively. The returned dictionary
1135 *segment size* and *dmer size*, respectively. The returned dictionary
1136 instance created by this function has ``k`` and ``d`` attributes
1136 instance created by this function has ``k`` and ``d`` attributes
1137 containing the values for these parameters. If a ``ZstdCompressionDict``
1137 containing the values for these parameters. If a ``ZstdCompressionDict``
1138 is constructed from raw bytes data (a content-only dictionary), the
1138 is constructed from raw bytes data (a content-only dictionary), the
1139 ``k`` and ``d`` attributes will be ``0``.
1139 ``k`` and ``d`` attributes will be ``0``.
1140
1140
1141 The segment and dmer size parameters to the cover algorithm can either be
1141 The segment and dmer size parameters to the cover algorithm can either be
1142 specified manually or ``train_dictionary()`` can try multiple values
1142 specified manually or ``train_dictionary()`` can try multiple values
1143 and pick the best one, where *best* means the smallest compressed data size.
1143 and pick the best one, where *best* means the smallest compressed data size.
1144 This later mode is called *optimization* mode.
1144 This later mode is called *optimization* mode.
1145
1145
1146 If none of ``k``, ``d``, ``steps``, ``threads``, ``level``, ``notifications``,
1146 If none of ``k``, ``d``, ``steps``, ``threads``, ``level``, ``notifications``,
1147 or ``dict_id`` (basically anything from the underlying ``ZDICT_cover_params_t``
1147 or ``dict_id`` (basically anything from the underlying ``ZDICT_cover_params_t``
1148 struct) are defined, *optimization* mode is used with default parameter
1148 struct) are defined, *optimization* mode is used with default parameter
1149 values.
1149 values.
1150
1150
1151 If ``steps`` or ``threads`` are defined, then *optimization* mode is engaged
1151 If ``steps`` or ``threads`` are defined, then *optimization* mode is engaged
1152 with explicit control over those parameters. Specifying ``threads=0`` or
1152 with explicit control over those parameters. Specifying ``threads=0`` or
1153 ``threads=1`` can be used to engage *optimization* mode if other parameters
1153 ``threads=1`` can be used to engage *optimization* mode if other parameters
1154 are not defined.
1154 are not defined.
1155
1155
1156 Otherwise, non-*optimization* mode is used with the parameters specified.
1156 Otherwise, non-*optimization* mode is used with the parameters specified.
1157
1157
1158 This function takes the following arguments:
1158 This function takes the following arguments:
1159
1159
1160 dict_size
1160 dict_size
1161 Target size in bytes of the dictionary to generate.
1161 Target size in bytes of the dictionary to generate.
1162 samples
1162 samples
1163 A list of bytes holding samples the dictionary will be trained from.
1163 A list of bytes holding samples the dictionary will be trained from.
1164 k
1164 k
1165 Parameter to cover algorithm defining the segment size. A reasonable range
1165 Parameter to cover algorithm defining the segment size. A reasonable range
1166 is [16, 2048+].
1166 is [16, 2048+].
1167 d
1167 d
1168 Parameter to cover algorithm defining the dmer size. A reasonable range is
1168 Parameter to cover algorithm defining the dmer size. A reasonable range is
1169 [6, 16]. ``d`` must be less than or equal to ``k``.
1169 [6, 16]. ``d`` must be less than or equal to ``k``.
1170 dict_id
1170 dict_id
1171 Integer dictionary ID for the produced dictionary. Default is 0, which uses
1171 Integer dictionary ID for the produced dictionary. Default is 0, which uses
1172 a random value.
1172 a random value.
1173 steps
1173 steps
1174 Number of steps through ``k`` values to perform when trying parameter
1174 Number of steps through ``k`` values to perform when trying parameter
1175 variations.
1175 variations.
1176 threads
1176 threads
1177 Number of threads to use when trying parameter variations. Default is 0,
1177 Number of threads to use when trying parameter variations. Default is 0,
1178 which means to use a single thread. A negative value can be specified to
1178 which means to use a single thread. A negative value can be specified to
1179 use as many threads as there are detected logical CPUs.
1179 use as many threads as there are detected logical CPUs.
1180 level
1180 level
1181 Integer target compression level when trying parameter variations.
1181 Integer target compression level when trying parameter variations.
1182 notifications
1182 notifications
1183 Controls writing of informational messages to ``stderr``. ``0`` (the
1183 Controls writing of informational messages to ``stderr``. ``0`` (the
1184 default) means to write nothing. ``1`` writes errors. ``2`` writes
1184 default) means to write nothing. ``1`` writes errors. ``2`` writes
1185 progression info. ``3`` writes more details. And ``4`` writes all info.
1185 progression info. ``3`` writes more details. And ``4`` writes all info.
1186
1186
1187 Explicit Compression Parameters
1187 Explicit Compression Parameters
1188 -------------------------------
1188 -------------------------------
1189
1189
1190 Zstandard offers a high-level *compression level* that maps to lower-level
1190 Zstandard offers a high-level *compression level* that maps to lower-level
1191 compression parameters. For many consumers, this numeric level is the only
1191 compression parameters. For many consumers, this numeric level is the only
1192 compression setting you'll need to touch.
1192 compression setting you'll need to touch.
1193
1193
1194 But for advanced use cases, it might be desirable to tweak these lower-level
1194 But for advanced use cases, it might be desirable to tweak these lower-level
1195 settings.
1195 settings.
1196
1196
1197 The ``ZstdCompressionParameters`` type represents these low-level compression
1197 The ``ZstdCompressionParameters`` type represents these low-level compression
1198 settings.
1198 settings.
1199
1199
1200 Instances of this type can be constructed from a myriad of keyword arguments
1200 Instances of this type can be constructed from a myriad of keyword arguments
1201 (defined below) for complete low-level control over each adjustable
1201 (defined below) for complete low-level control over each adjustable
1202 compression setting.
1202 compression setting.
1203
1203
1204 From a higher level, one can construct a ``ZstdCompressionParameters`` instance
1204 From a higher level, one can construct a ``ZstdCompressionParameters`` instance
1205 given a desired compression level and target input and dictionary size
1205 given a desired compression level and target input and dictionary size
1206 using ``ZstdCompressionParameters.from_level()``. e.g.::
1206 using ``ZstdCompressionParameters.from_level()``. e.g.::
1207
1207
1208 # Derive compression settings for compression level 7.
1208 # Derive compression settings for compression level 7.
1209 params = zstd.ZstdCompressionParameters.from_level(7)
1209 params = zstd.ZstdCompressionParameters.from_level(7)
1210
1210
1211 # With an input size of 1MB
1211 # With an input size of 1MB
1212 params = zstd.ZstdCompressionParameters.from_level(7, source_size=1048576)
1212 params = zstd.ZstdCompressionParameters.from_level(7, source_size=1048576)
1213
1213
1214 Using ``from_level()``, it is also possible to override individual compression
1214 Using ``from_level()``, it is also possible to override individual compression
1215 parameters or to define additional settings that aren't automatically derived.
1215 parameters or to define additional settings that aren't automatically derived.
1216 e.g.::
1216 e.g.::
1217
1217
1218 params = zstd.ZstdCompressionParameters.from_level(4, window_log=10)
1218 params = zstd.ZstdCompressionParameters.from_level(4, window_log=10)
1219 params = zstd.ZstdCompressionParameters.from_level(5, threads=4)
1219 params = zstd.ZstdCompressionParameters.from_level(5, threads=4)
1220
1220
1221 Or you can define low-level compression settings directly::
1221 Or you can define low-level compression settings directly::
1222
1222
1223 params = zstd.ZstdCompressionParameters(window_log=12, enable_ldm=True)
1223 params = zstd.ZstdCompressionParameters(window_log=12, enable_ldm=True)
1224
1224
1225 Once a ``ZstdCompressionParameters`` instance is obtained, it can be used to
1225 Once a ``ZstdCompressionParameters`` instance is obtained, it can be used to
1226 configure a compressor::
1226 configure a compressor::
1227
1227
1228 cctx = zstd.ZstdCompressor(compression_params=params)
1228 cctx = zstd.ZstdCompressor(compression_params=params)
1229
1229
1230 The named arguments and attributes of ``ZstdCompressionParameters`` are as
1230 The named arguments and attributes of ``ZstdCompressionParameters`` are as
1231 follows:
1231 follows:
1232
1232
1233 * format
1233 * format
1234 * compression_level
1234 * compression_level
1235 * window_log
1235 * window_log
1236 * hash_log
1236 * hash_log
1237 * chain_log
1237 * chain_log
1238 * search_log
1238 * search_log
1239 * min_match
1239 * min_match
1240 * target_length
1240 * target_length
1241 * strategy
1241 * strategy
1242 * compression_strategy (deprecated: same as ``strategy``)
1242 * compression_strategy (deprecated: same as ``strategy``)
1243 * write_content_size
1243 * write_content_size
1244 * write_checksum
1244 * write_checksum
1245 * write_dict_id
1245 * write_dict_id
1246 * job_size
1246 * job_size
1247 * overlap_log
1247 * overlap_log
1248 * overlap_size_log (deprecated: same as ``overlap_log``)
1248 * overlap_size_log (deprecated: same as ``overlap_log``)
1249 * force_max_window
1249 * force_max_window
1250 * enable_ldm
1250 * enable_ldm
1251 * ldm_hash_log
1251 * ldm_hash_log
1252 * ldm_min_match
1252 * ldm_min_match
1253 * ldm_bucket_size_log
1253 * ldm_bucket_size_log
1254 * ldm_hash_rate_log
1254 * ldm_hash_rate_log
1255 * ldm_hash_every_log (deprecated: same as ``ldm_hash_rate_log``)
1255 * ldm_hash_every_log (deprecated: same as ``ldm_hash_rate_log``)
1256 * threads
1256 * threads
1257
1257
1258 Some of these are very low-level settings. It may help to consult the official
1258 Some of these are very low-level settings. It may help to consult the official
1259 zstandard documentation for their behavior. Look for the ``ZSTD_p_*`` constants
1259 zstandard documentation for their behavior. Look for the ``ZSTD_p_*`` constants
1260 in ``zstd.h`` (https://github.com/facebook/zstd/blob/dev/lib/zstd.h).
1260 in ``zstd.h`` (https://github.com/facebook/zstd/blob/dev/lib/zstd.h).
1261
1261
1262 Frame Inspection
1262 Frame Inspection
1263 ----------------
1263 ----------------
1264
1264
1265 Data emitted from zstd compression is encapsulated in a *frame*. This frame
1265 Data emitted from zstd compression is encapsulated in a *frame*. This frame
1266 begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing
1266 begins with a 4 byte *magic number* header followed by 2 to 14 bytes describing
1267 the frame in more detail. For more info, see
1267 the frame in more detail. For more info, see
1268 https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md.
1268 https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md.
1269
1269
1270 ``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes
1270 ``zstd.get_frame_parameters(data)`` parses a zstd *frame* header from a bytes
1271 instance and return a ``FrameParameters`` object describing the frame.
1271 instance and return a ``FrameParameters`` object describing the frame.
1272
1272
1273 Depending on which fields are present in the frame and their values, the
1273 Depending on which fields are present in the frame and their values, the
1274 length of the frame parameters varies. If insufficient bytes are passed
1274 length of the frame parameters varies. If insufficient bytes are passed
1275 in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure
1275 in to fully parse the frame parameters, ``ZstdError`` is raised. To ensure
1276 frame parameters can be parsed, pass in at least 18 bytes.
1276 frame parameters can be parsed, pass in at least 18 bytes.
1277
1277
1278 ``FrameParameters`` instances have the following attributes:
1278 ``FrameParameters`` instances have the following attributes:
1279
1279
1280 content_size
1280 content_size
1281 Integer size of original, uncompressed content. This will be ``0`` if the
1281 Integer size of original, uncompressed content. This will be ``0`` if the
1282 original content size isn't written to the frame (controlled with the
1282 original content size isn't written to the frame (controlled with the
1283 ``write_content_size`` argument to ``ZstdCompressor``) or if the input
1283 ``write_content_size`` argument to ``ZstdCompressor``) or if the input
1284 content size was ``0``.
1284 content size was ``0``.
1285
1285
1286 window_size
1286 window_size
1287 Integer size of maximum back-reference distance in compressed data.
1287 Integer size of maximum back-reference distance in compressed data.
1288
1288
1289 dict_id
1289 dict_id
1290 Integer of dictionary ID used for compression. ``0`` if no dictionary
1290 Integer of dictionary ID used for compression. ``0`` if no dictionary
1291 ID was used or if the dictionary ID was ``0``.
1291 ID was used or if the dictionary ID was ``0``.
1292
1292
1293 has_checksum
1293 has_checksum
1294 Bool indicating whether a 4 byte content checksum is stored at the end
1294 Bool indicating whether a 4 byte content checksum is stored at the end
1295 of the frame.
1295 of the frame.
1296
1296
1297 ``zstd.frame_header_size(data)`` returns the size of the zstandard frame
1297 ``zstd.frame_header_size(data)`` returns the size of the zstandard frame
1298 header.
1298 header.
1299
1299
1300 ``zstd.frame_content_size(data)`` returns the content size as parsed from
1300 ``zstd.frame_content_size(data)`` returns the content size as parsed from
1301 the frame header. ``-1`` means the content size is unknown. ``0`` means
1301 the frame header. ``-1`` means the content size is unknown. ``0`` means
1302 an empty frame. The content size is usually correct. However, it may not
1302 an empty frame. The content size is usually correct. However, it may not
1303 be accurate.
1303 be accurate.
1304
1304
1305 Misc Functionality
1305 Misc Functionality
1306 ------------------
1306 ------------------
1307
1307
1308 estimate_decompression_context_size()
1308 estimate_decompression_context_size()
1309 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1309 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1310
1310
1311 Estimate the memory size requirements for a decompressor instance.
1311 Estimate the memory size requirements for a decompressor instance.
1312
1312
1313 Constants
1313 Constants
1314 ---------
1314 ---------
1315
1315
1316 The following module constants/attributes are exposed:
1316 The following module constants/attributes are exposed:
1317
1317
1318 ZSTD_VERSION
1318 ZSTD_VERSION
1319 This module attribute exposes a 3-tuple of the Zstandard version. e.g.
1319 This module attribute exposes a 3-tuple of the Zstandard version. e.g.
1320 ``(1, 0, 0)``
1320 ``(1, 0, 0)``
1321 MAX_COMPRESSION_LEVEL
1321 MAX_COMPRESSION_LEVEL
1322 Integer max compression level accepted by compression functions
1322 Integer max compression level accepted by compression functions
1323 COMPRESSION_RECOMMENDED_INPUT_SIZE
1323 COMPRESSION_RECOMMENDED_INPUT_SIZE
1324 Recommended chunk size to feed to compressor functions
1324 Recommended chunk size to feed to compressor functions
1325 COMPRESSION_RECOMMENDED_OUTPUT_SIZE
1325 COMPRESSION_RECOMMENDED_OUTPUT_SIZE
1326 Recommended chunk size for compression output
1326 Recommended chunk size for compression output
1327 DECOMPRESSION_RECOMMENDED_INPUT_SIZE
1327 DECOMPRESSION_RECOMMENDED_INPUT_SIZE
1328 Recommended chunk size to feed into decompresor functions
1328 Recommended chunk size to feed into decompresor functions
1329 DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE
1329 DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE
1330 Recommended chunk size for decompression output
1330 Recommended chunk size for decompression output
1331
1331
1332 FRAME_HEADER
1332 FRAME_HEADER
1333 bytes containing header of the Zstandard frame
1333 bytes containing header of the Zstandard frame
1334 MAGIC_NUMBER
1334 MAGIC_NUMBER
1335 Frame header as an integer
1335 Frame header as an integer
1336
1336
1337 FLUSH_BLOCK
1337 FLUSH_BLOCK
1338 Flushing behavior that denotes to flush a zstd block. A decompressor will
1338 Flushing behavior that denotes to flush a zstd block. A decompressor will
1339 be able to decode all data fed into the compressor so far.
1339 be able to decode all data fed into the compressor so far.
1340 FLUSH_FRAME
1340 FLUSH_FRAME
1341 Flushing behavior that denotes to end a zstd frame. Any new data fed
1341 Flushing behavior that denotes to end a zstd frame. Any new data fed
1342 to the compressor will start a new frame.
1342 to the compressor will start a new frame.
1343
1343
1344 CONTENTSIZE_UNKNOWN
1344 CONTENTSIZE_UNKNOWN
1345 Value for content size when the content size is unknown.
1345 Value for content size when the content size is unknown.
1346 CONTENTSIZE_ERROR
1346 CONTENTSIZE_ERROR
1347 Value for content size when content size couldn't be determined.
1347 Value for content size when content size couldn't be determined.
1348
1348
1349 WINDOWLOG_MIN
1349 WINDOWLOG_MIN
1350 Minimum value for compression parameter
1350 Minimum value for compression parameter
1351 WINDOWLOG_MAX
1351 WINDOWLOG_MAX
1352 Maximum value for compression parameter
1352 Maximum value for compression parameter
1353 CHAINLOG_MIN
1353 CHAINLOG_MIN
1354 Minimum value for compression parameter
1354 Minimum value for compression parameter
1355 CHAINLOG_MAX
1355 CHAINLOG_MAX
1356 Maximum value for compression parameter
1356 Maximum value for compression parameter
1357 HASHLOG_MIN
1357 HASHLOG_MIN
1358 Minimum value for compression parameter
1358 Minimum value for compression parameter
1359 HASHLOG_MAX
1359 HASHLOG_MAX
1360 Maximum value for compression parameter
1360 Maximum value for compression parameter
1361 SEARCHLOG_MIN
1361 SEARCHLOG_MIN
1362 Minimum value for compression parameter
1362 Minimum value for compression parameter
1363 SEARCHLOG_MAX
1363 SEARCHLOG_MAX
1364 Maximum value for compression parameter
1364 Maximum value for compression parameter
1365 MINMATCH_MIN
1365 MINMATCH_MIN
1366 Minimum value for compression parameter
1366 Minimum value for compression parameter
1367 MINMATCH_MAX
1367 MINMATCH_MAX
1368 Maximum value for compression parameter
1368 Maximum value for compression parameter
1369 SEARCHLENGTH_MIN
1369 SEARCHLENGTH_MIN
1370 Minimum value for compression parameter
1370 Minimum value for compression parameter
1371
1371
1372 Deprecated: use ``MINMATCH_MIN``
1372 Deprecated: use ``MINMATCH_MIN``
1373 SEARCHLENGTH_MAX
1373 SEARCHLENGTH_MAX
1374 Maximum value for compression parameter
1374 Maximum value for compression parameter
1375
1375
1376 Deprecated: use ``MINMATCH_MAX``
1376 Deprecated: use ``MINMATCH_MAX``
1377 TARGETLENGTH_MIN
1377 TARGETLENGTH_MIN
1378 Minimum value for compression parameter
1378 Minimum value for compression parameter
1379 STRATEGY_FAST
1379 STRATEGY_FAST
1380 Compression strategy
1380 Compression strategy
1381 STRATEGY_DFAST
1381 STRATEGY_DFAST
1382 Compression strategy
1382 Compression strategy
1383 STRATEGY_GREEDY
1383 STRATEGY_GREEDY
1384 Compression strategy
1384 Compression strategy
1385 STRATEGY_LAZY
1385 STRATEGY_LAZY
1386 Compression strategy
1386 Compression strategy
1387 STRATEGY_LAZY2
1387 STRATEGY_LAZY2
1388 Compression strategy
1388 Compression strategy
1389 STRATEGY_BTLAZY2
1389 STRATEGY_BTLAZY2
1390 Compression strategy
1390 Compression strategy
1391 STRATEGY_BTOPT
1391 STRATEGY_BTOPT
1392 Compression strategy
1392 Compression strategy
1393 STRATEGY_BTULTRA
1393 STRATEGY_BTULTRA
1394 Compression strategy
1394 Compression strategy
1395 STRATEGY_BTULTRA2
1395 STRATEGY_BTULTRA2
1396 Compression strategy
1396 Compression strategy
1397
1397
1398 FORMAT_ZSTD1
1398 FORMAT_ZSTD1
1399 Zstandard frame format
1399 Zstandard frame format
1400 FORMAT_ZSTD1_MAGICLESS
1400 FORMAT_ZSTD1_MAGICLESS
1401 Zstandard frame format without magic header
1401 Zstandard frame format without magic header
1402
1402
1403 Performance Considerations
1403 Performance Considerations
1404 --------------------------
1404 --------------------------
1405
1405
1406 The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a
1406 The ``ZstdCompressor`` and ``ZstdDecompressor`` types maintain state to a
1407 persistent compression or decompression *context*. Reusing a ``ZstdCompressor``
1407 persistent compression or decompression *context*. Reusing a ``ZstdCompressor``
1408 or ``ZstdDecompressor`` instance for multiple operations is faster than
1408 or ``ZstdDecompressor`` instance for multiple operations is faster than
1409 instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each
1409 instantiating a new ``ZstdCompressor`` or ``ZstdDecompressor`` for each
1410 operation. The differences are magnified as the size of data decreases. For
1410 operation. The differences are magnified as the size of data decreases. For
1411 example, the difference between *context* reuse and non-reuse for 100,000
1411 example, the difference between *context* reuse and non-reuse for 100,000
1412 100 byte inputs will be significant (possiby over 10x faster to reuse contexts)
1412 100 byte inputs will be significant (possiby over 10x faster to reuse contexts)
1413 whereas 10 100,000,000 byte inputs will be more similar in speed (because the
1413 whereas 10 100,000,000 byte inputs will be more similar in speed (because the
1414 time spent doing compression dwarfs time spent creating new *contexts*).
1414 time spent doing compression dwarfs time spent creating new *contexts*).
1415
1415
1416 Buffer Types
1416 Buffer Types
1417 ------------
1417 ------------
1418
1418
1419 The API exposes a handful of custom types for interfacing with memory buffers.
1419 The API exposes a handful of custom types for interfacing with memory buffers.
1420 The primary goal of these types is to facilitate efficient multi-object
1420 The primary goal of these types is to facilitate efficient multi-object
1421 operations.
1421 operations.
1422
1422
1423 The essential idea is to have a single memory allocation provide backing
1423 The essential idea is to have a single memory allocation provide backing
1424 storage for multiple logical objects. This has 2 main advantages: fewer
1424 storage for multiple logical objects. This has 2 main advantages: fewer
1425 allocations and optimal memory access patterns. This avoids having to allocate
1425 allocations and optimal memory access patterns. This avoids having to allocate
1426 a Python object for each logical object and furthermore ensures that access of
1426 a Python object for each logical object and furthermore ensures that access of
1427 data for objects can be sequential (read: fast) in memory.
1427 data for objects can be sequential (read: fast) in memory.
1428
1428
1429 BufferWithSegments
1429 BufferWithSegments
1430 ^^^^^^^^^^^^^^^^^^
1430 ^^^^^^^^^^^^^^^^^^
1431
1431
1432 The ``BufferWithSegments`` type represents a memory buffer containing N
1432 The ``BufferWithSegments`` type represents a memory buffer containing N
1433 discrete items of known lengths (segments). It is essentially a fixed size
1433 discrete items of known lengths (segments). It is essentially a fixed size
1434 memory address and an array of 2-tuples of ``(offset, length)`` 64-bit
1434 memory address and an array of 2-tuples of ``(offset, length)`` 64-bit
1435 unsigned native endian integers defining the byte offset and length of each
1435 unsigned native endian integers defining the byte offset and length of each
1436 segment within the buffer.
1436 segment within the buffer.
1437
1437
1438 Instances behave like containers.
1438 Instances behave like containers.
1439
1439
1440 ``len()`` returns the number of segments within the instance.
1440 ``len()`` returns the number of segments within the instance.
1441
1441
1442 ``o[index]`` or ``__getitem__`` obtains a ``BufferSegment`` representing an
1442 ``o[index]`` or ``__getitem__`` obtains a ``BufferSegment`` representing an
1443 individual segment within the backing buffer. That returned object references
1443 individual segment within the backing buffer. That returned object references
1444 (not copies) memory. This means that iterating all objects doesn't copy
1444 (not copies) memory. This means that iterating all objects doesn't copy
1445 data within the buffer.
1445 data within the buffer.
1446
1446
1447 The ``.size`` attribute contains the total size in bytes of the backing
1447 The ``.size`` attribute contains the total size in bytes of the backing
1448 buffer.
1448 buffer.
1449
1449
1450 Instances conform to the buffer protocol. So a reference to the backing bytes
1450 Instances conform to the buffer protocol. So a reference to the backing bytes
1451 can be obtained via ``memoryview(o)``. A *copy* of the backing bytes can also
1451 can be obtained via ``memoryview(o)``. A *copy* of the backing bytes can also
1452 be obtained via ``.tobytes()``.
1452 be obtained via ``.tobytes()``.
1453
1453
1454 The ``.segments`` attribute exposes the array of ``(offset, length)`` for
1454 The ``.segments`` attribute exposes the array of ``(offset, length)`` for
1455 segments within the buffer. It is a ``BufferSegments`` type.
1455 segments within the buffer. It is a ``BufferSegments`` type.
1456
1456
1457 BufferSegment
1457 BufferSegment
1458 ^^^^^^^^^^^^^
1458 ^^^^^^^^^^^^^
1459
1459
1460 The ``BufferSegment`` type represents a segment within a ``BufferWithSegments``.
1460 The ``BufferSegment`` type represents a segment within a ``BufferWithSegments``.
1461 It is essentially a reference to N bytes within a ``BufferWithSegments``.
1461 It is essentially a reference to N bytes within a ``BufferWithSegments``.
1462
1462
1463 ``len()`` returns the length of the segment in bytes.
1463 ``len()`` returns the length of the segment in bytes.
1464
1464
1465 ``.offset`` contains the byte offset of this segment within its parent
1465 ``.offset`` contains the byte offset of this segment within its parent
1466 ``BufferWithSegments`` instance.
1466 ``BufferWithSegments`` instance.
1467
1467
1468 The object conforms to the buffer protocol. ``.tobytes()`` can be called to
1468 The object conforms to the buffer protocol. ``.tobytes()`` can be called to
1469 obtain a ``bytes`` instance with a copy of the backing bytes.
1469 obtain a ``bytes`` instance with a copy of the backing bytes.
1470
1470
1471 BufferSegments
1471 BufferSegments
1472 ^^^^^^^^^^^^^^
1472 ^^^^^^^^^^^^^^
1473
1473
1474 This type represents an array of ``(offset, length)`` integers defining segments
1474 This type represents an array of ``(offset, length)`` integers defining segments
1475 within a ``BufferWithSegments``.
1475 within a ``BufferWithSegments``.
1476
1476
1477 The array members are 64-bit unsigned integers using host/native bit order.
1477 The array members are 64-bit unsigned integers using host/native bit order.
1478
1478
1479 Instances conform to the buffer protocol.
1479 Instances conform to the buffer protocol.
1480
1480
1481 BufferWithSegmentsCollection
1481 BufferWithSegmentsCollection
1482 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1482 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1483
1483
1484 The ``BufferWithSegmentsCollection`` type represents a virtual spanning view
1484 The ``BufferWithSegmentsCollection`` type represents a virtual spanning view
1485 of multiple ``BufferWithSegments`` instances.
1485 of multiple ``BufferWithSegments`` instances.
1486
1486
1487 Instances are constructed from 1 or more ``BufferWithSegments`` instances. The
1487 Instances are constructed from 1 or more ``BufferWithSegments`` instances. The
1488 resulting object behaves like an ordered sequence whose members are the
1488 resulting object behaves like an ordered sequence whose members are the
1489 segments within each ``BufferWithSegments``.
1489 segments within each ``BufferWithSegments``.
1490
1490
1491 ``len()`` returns the number of segments within all ``BufferWithSegments``
1491 ``len()`` returns the number of segments within all ``BufferWithSegments``
1492 instances.
1492 instances.
1493
1493
1494 ``o[index]`` and ``__getitem__(index)`` return the ``BufferSegment`` at
1494 ``o[index]`` and ``__getitem__(index)`` return the ``BufferSegment`` at
1495 that offset as if all ``BufferWithSegments`` instances were a single
1495 that offset as if all ``BufferWithSegments`` instances were a single
1496 entity.
1496 entity.
1497
1497
1498 If the object is composed of 2 ``BufferWithSegments`` instances with the
1498 If the object is composed of 2 ``BufferWithSegments`` instances with the
1499 first having 2 segments and the second have 3 segments, then ``b[0]``
1499 first having 2 segments and the second have 3 segments, then ``b[0]``
1500 and ``b[1]`` access segments in the first object and ``b[2]``, ``b[3]``,
1500 and ``b[1]`` access segments in the first object and ``b[2]``, ``b[3]``,
1501 and ``b[4]`` access segments from the second.
1501 and ``b[4]`` access segments from the second.
1502
1502
1503 Choosing an API
1503 Choosing an API
1504 ===============
1504 ===============
1505
1505
1506 There are multiple APIs for performing compression and decompression. This is
1506 There are multiple APIs for performing compression and decompression. This is
1507 because different applications have different needs and the library wants to
1507 because different applications have different needs and the library wants to
1508 facilitate optimal use in as many use cases as possible.
1508 facilitate optimal use in as many use cases as possible.
1509
1509
1510 From a high-level, APIs are divided into *one-shot* and *streaming*: either you
1510 From a high-level, APIs are divided into *one-shot* and *streaming*: either you
1511 are operating on all data at once or you operate on it piecemeal.
1511 are operating on all data at once or you operate on it piecemeal.
1512
1512
1513 The *one-shot* APIs are useful for small data, where the input or output
1513 The *one-shot* APIs are useful for small data, where the input or output
1514 size is known. (The size can come from a buffer length, file size, or
1514 size is known. (The size can come from a buffer length, file size, or
1515 stored in the zstd frame header.) A limitation of the *one-shot* APIs is that
1515 stored in the zstd frame header.) A limitation of the *one-shot* APIs is that
1516 input and output must fit in memory simultaneously. For say a 4 GB input,
1516 input and output must fit in memory simultaneously. For say a 4 GB input,
1517 this is often not feasible.
1517 this is often not feasible.
1518
1518
1519 The *one-shot* APIs also perform all work as a single operation. So, if you
1519 The *one-shot* APIs also perform all work as a single operation. So, if you
1520 feed it large input, it could take a long time for the function to return.
1520 feed it large input, it could take a long time for the function to return.
1521
1521
1522 The streaming APIs do not have the limitations of the simple API. But the
1522 The streaming APIs do not have the limitations of the simple API. But the
1523 price you pay for this flexibility is that they are more complex than a
1523 price you pay for this flexibility is that they are more complex than a
1524 single function call.
1524 single function call.
1525
1525
1526 The streaming APIs put the caller in control of compression and decompression
1526 The streaming APIs put the caller in control of compression and decompression
1527 behavior by allowing them to directly control either the input or output side
1527 behavior by allowing them to directly control either the input or output side
1528 of the operation.
1528 of the operation.
1529
1529
1530 With the *streaming input*, *compressor*, and *decompressor* APIs, the caller
1530 With the *streaming input*, *compressor*, and *decompressor* APIs, the caller
1531 has full control over the input to the compression or decompression stream.
1531 has full control over the input to the compression or decompression stream.
1532 They can directly choose when new data is operated on.
1532 They can directly choose when new data is operated on.
1533
1533
1534 With the *streaming ouput* APIs, the caller has full control over the output
1534 With the *streaming ouput* APIs, the caller has full control over the output
1535 of the compression or decompression stream. It can choose when to receive
1535 of the compression or decompression stream. It can choose when to receive
1536 new data.
1536 new data.
1537
1537
1538 When using the *streaming* APIs that operate on file-like or stream objects,
1538 When using the *streaming* APIs that operate on file-like or stream objects,
1539 it is important to consider what happens in that object when I/O is requested.
1539 it is important to consider what happens in that object when I/O is requested.
1540 There is potential for long pauses as data is read or written from the
1540 There is potential for long pauses as data is read or written from the
1541 underlying stream (say from interacting with a filesystem or network). This
1541 underlying stream (say from interacting with a filesystem or network). This
1542 could add considerable overhead.
1542 could add considerable overhead.
1543
1543
1544 Thread Safety
1544 Thread Safety
1545 =============
1545 =============
1546
1546
1547 ``ZstdCompressor`` and ``ZstdDecompressor`` instances have no guarantees
1547 ``ZstdCompressor`` and ``ZstdDecompressor`` instances have no guarantees
1548 about thread safety. Do not operate on the same ``ZstdCompressor`` and
1548 about thread safety. Do not operate on the same ``ZstdCompressor`` and
1549 ``ZstdDecompressor`` instance simultaneously from different threads. It is
1549 ``ZstdDecompressor`` instance simultaneously from different threads. It is
1550 fine to have different threads call into a single instance, just not at the
1550 fine to have different threads call into a single instance, just not at the
1551 same time.
1551 same time.
1552
1552
1553 Some operations require multiple function calls to complete. e.g. streaming
1553 Some operations require multiple function calls to complete. e.g. streaming
1554 operations. A single ``ZstdCompressor`` or ``ZstdDecompressor`` cannot be used
1554 operations. A single ``ZstdCompressor`` or ``ZstdDecompressor`` cannot be used
1555 for simultaneously active operations. e.g. you must not start a streaming
1555 for simultaneously active operations. e.g. you must not start a streaming
1556 operation when another streaming operation is already active.
1556 operation when another streaming operation is already active.
1557
1557
1558 The C extension releases the GIL during non-trivial calls into the zstd C
1558 The C extension releases the GIL during non-trivial calls into the zstd C
1559 API. Non-trivial calls are notably compression and decompression. Trivial
1559 API. Non-trivial calls are notably compression and decompression. Trivial
1560 calls are things like parsing frame parameters. Where the GIL is released
1560 calls are things like parsing frame parameters. Where the GIL is released
1561 is considered an implementation detail and can change in any release.
1561 is considered an implementation detail and can change in any release.
1562
1562
1563 APIs that accept bytes-like objects don't enforce that the underlying object
1563 APIs that accept bytes-like objects don't enforce that the underlying object
1564 is read-only. However, it is assumed that the passed object is read-only for
1564 is read-only. However, it is assumed that the passed object is read-only for
1565 the duration of the function call. It is possible to pass a mutable object
1565 the duration of the function call. It is possible to pass a mutable object
1566 (like a ``bytearray``) to e.g. ``ZstdCompressor.compress()``, have the GIL
1566 (like a ``bytearray``) to e.g. ``ZstdCompressor.compress()``, have the GIL
1567 released, and mutate the object from another thread. Such a race condition
1567 released, and mutate the object from another thread. Such a race condition
1568 is a bug in the consumer of python-zstandard. Most Python data types are
1568 is a bug in the consumer of python-zstandard. Most Python data types are
1569 immutable, so unless you are doing something fancy, you don't need to
1569 immutable, so unless you are doing something fancy, you don't need to
1570 worry about this.
1570 worry about this.
1571
1571
1572 Note on Zstandard's *Experimental* API
1572 Note on Zstandard's *Experimental* API
1573 ======================================
1573 ======================================
1574
1574
1575 Many of the Zstandard APIs used by this module are marked as *experimental*
1575 Many of the Zstandard APIs used by this module are marked as *experimental*
1576 within the Zstandard project.
1576 within the Zstandard project.
1577
1577
1578 It is unclear how Zstandard's C API will evolve over time, especially with
1578 It is unclear how Zstandard's C API will evolve over time, especially with
1579 regards to this *experimental* functionality. We will try to maintain
1579 regards to this *experimental* functionality. We will try to maintain
1580 backwards compatibility at the Python API level. However, we cannot
1580 backwards compatibility at the Python API level. However, we cannot
1581 guarantee this for things not under our control.
1581 guarantee this for things not under our control.
1582
1582
1583 Since a copy of the Zstandard source code is distributed with this
1583 Since a copy of the Zstandard source code is distributed with this
1584 module and since we compile against it, the behavior of a specific
1584 module and since we compile against it, the behavior of a specific
1585 version of this module should be constant for all of time. So if you
1585 version of this module should be constant for all of time. So if you
1586 pin the version of this module used in your projects (which is a Python
1586 pin the version of this module used in your projects (which is a Python
1587 best practice), you should be shielded from unwanted future changes.
1587 best practice), you should be shielded from unwanted future changes.
1588
1588
1589 Donate
1589 Donate
1590 ======
1590 ======
1591
1591
1592 A lot of time has been invested into this project by the author.
1592 A lot of time has been invested into this project by the author.
1593
1593
1594 If you find this project useful and would like to thank the author for
1594 If you find this project useful and would like to thank the author for
1595 their work, consider donating some money. Any amount is appreciated.
1595 their work, consider donating some money. Any amount is appreciated.
1596
1596
1597 .. image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif
1597 .. image:: https://www.paypalobjects.com/en_US/i/btn/btn_donate_LG.gif
1598 :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard&currency_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted
1598 :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard&currency_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted
1599 :alt: Donate via PayPal
1599 :alt: Donate via PayPal
1600
1600
1601 .. |ci-status| image:: https://travis-ci.org/indygreg/python-zstandard.svg?branch=master
1601 .. |ci-status| image:: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master
1602 :target: https://travis-ci.org/indygreg/python-zstandard
1602 :target: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master
1603
1604 .. |win-ci-status| image:: https://ci.appveyor.com/api/projects/status/github/indygreg/python-zstandard?svg=true
1605 :target: https://ci.appveyor.com/project/indygreg/python-zstandard
1606 :alt: Windows build status
@@ -1,572 +1,572 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value) {
13 int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value) {
14 size_t zresult = ZSTD_CCtxParam_setParameter(params, param, value);
14 size_t zresult = ZSTD_CCtxParams_setParameter(params, param, value);
15 if (ZSTD_isError(zresult)) {
15 if (ZSTD_isError(zresult)) {
16 PyErr_Format(ZstdError, "unable to set compression context parameter: %s",
16 PyErr_Format(ZstdError, "unable to set compression context parameter: %s",
17 ZSTD_getErrorName(zresult));
17 ZSTD_getErrorName(zresult));
18 return 1;
18 return 1;
19 }
19 }
20
20
21 return 0;
21 return 0;
22 }
22 }
23
23
24 #define TRY_SET_PARAMETER(params, param, value) if (set_parameter(params, param, value)) return -1;
24 #define TRY_SET_PARAMETER(params, param, value) if (set_parameter(params, param, value)) return -1;
25
25
26 #define TRY_COPY_PARAMETER(source, dest, param) { \
26 #define TRY_COPY_PARAMETER(source, dest, param) { \
27 int result; \
27 int result; \
28 size_t zresult = ZSTD_CCtxParam_getParameter(source, param, &result); \
28 size_t zresult = ZSTD_CCtxParams_getParameter(source, param, &result); \
29 if (ZSTD_isError(zresult)) { \
29 if (ZSTD_isError(zresult)) { \
30 return 1; \
30 return 1; \
31 } \
31 } \
32 zresult = ZSTD_CCtxParam_setParameter(dest, param, result); \
32 zresult = ZSTD_CCtxParams_setParameter(dest, param, result); \
33 if (ZSTD_isError(zresult)) { \
33 if (ZSTD_isError(zresult)) { \
34 return 1; \
34 return 1; \
35 } \
35 } \
36 }
36 }
37
37
38 int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj) {
38 int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj) {
39 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_nbWorkers);
39 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_nbWorkers);
40
40
41 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_format);
41 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_format);
42 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_compressionLevel);
42 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_compressionLevel);
43 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_windowLog);
43 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_windowLog);
44 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_hashLog);
44 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_hashLog);
45 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_chainLog);
45 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_chainLog);
46 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_searchLog);
46 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_searchLog);
47 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_minMatch);
47 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_minMatch);
48 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_targetLength);
48 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_targetLength);
49 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_strategy);
49 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_strategy);
50 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_contentSizeFlag);
50 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_contentSizeFlag);
51 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_checksumFlag);
51 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_checksumFlag);
52 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_dictIDFlag);
52 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_dictIDFlag);
53 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_jobSize);
53 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_jobSize);
54 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_overlapLog);
54 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_overlapLog);
55 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_forceMaxWindow);
55 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_forceMaxWindow);
56 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_enableLongDistanceMatching);
56 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_enableLongDistanceMatching);
57 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashLog);
57 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashLog);
58 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmMinMatch);
58 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmMinMatch);
59 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmBucketSizeLog);
59 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmBucketSizeLog);
60 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashRateLog);
60 TRY_COPY_PARAMETER(obj->params, params, ZSTD_c_ldmHashRateLog);
61
61
62 return 0;
62 return 0;
63 }
63 }
64
64
65 int reset_params(ZstdCompressionParametersObject* params) {
65 int reset_params(ZstdCompressionParametersObject* params) {
66 if (params->params) {
66 if (params->params) {
67 ZSTD_CCtxParams_reset(params->params);
67 ZSTD_CCtxParams_reset(params->params);
68 }
68 }
69 else {
69 else {
70 params->params = ZSTD_createCCtxParams();
70 params->params = ZSTD_createCCtxParams();
71 if (!params->params) {
71 if (!params->params) {
72 PyErr_NoMemory();
72 PyErr_NoMemory();
73 return 1;
73 return 1;
74 }
74 }
75 }
75 }
76
76
77 return set_parameters(params->params, params);
77 return set_parameters(params->params, params);
78 }
78 }
79
79
80 #define TRY_GET_PARAMETER(params, param, value) { \
80 #define TRY_GET_PARAMETER(params, param, value) { \
81 size_t zresult = ZSTD_CCtxParam_getParameter(params, param, value); \
81 size_t zresult = ZSTD_CCtxParams_getParameter(params, param, value); \
82 if (ZSTD_isError(zresult)) { \
82 if (ZSTD_isError(zresult)) { \
83 PyErr_Format(ZstdError, "unable to retrieve parameter: %s", ZSTD_getErrorName(zresult)); \
83 PyErr_Format(ZstdError, "unable to retrieve parameter: %s", ZSTD_getErrorName(zresult)); \
84 return 1; \
84 return 1; \
85 } \
85 } \
86 }
86 }
87
87
88 int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams) {
88 int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams) {
89 int value;
89 int value;
90
90
91 TRY_GET_PARAMETER(params->params, ZSTD_c_windowLog, &value);
91 TRY_GET_PARAMETER(params->params, ZSTD_c_windowLog, &value);
92 cparams->windowLog = value;
92 cparams->windowLog = value;
93
93
94 TRY_GET_PARAMETER(params->params, ZSTD_c_chainLog, &value);
94 TRY_GET_PARAMETER(params->params, ZSTD_c_chainLog, &value);
95 cparams->chainLog = value;
95 cparams->chainLog = value;
96
96
97 TRY_GET_PARAMETER(params->params, ZSTD_c_hashLog, &value);
97 TRY_GET_PARAMETER(params->params, ZSTD_c_hashLog, &value);
98 cparams->hashLog = value;
98 cparams->hashLog = value;
99
99
100 TRY_GET_PARAMETER(params->params, ZSTD_c_searchLog, &value);
100 TRY_GET_PARAMETER(params->params, ZSTD_c_searchLog, &value);
101 cparams->searchLog = value;
101 cparams->searchLog = value;
102
102
103 TRY_GET_PARAMETER(params->params, ZSTD_c_minMatch, &value);
103 TRY_GET_PARAMETER(params->params, ZSTD_c_minMatch, &value);
104 cparams->minMatch = value;
104 cparams->minMatch = value;
105
105
106 TRY_GET_PARAMETER(params->params, ZSTD_c_targetLength, &value);
106 TRY_GET_PARAMETER(params->params, ZSTD_c_targetLength, &value);
107 cparams->targetLength = value;
107 cparams->targetLength = value;
108
108
109 TRY_GET_PARAMETER(params->params, ZSTD_c_strategy, &value);
109 TRY_GET_PARAMETER(params->params, ZSTD_c_strategy, &value);
110 cparams->strategy = value;
110 cparams->strategy = value;
111
111
112 return 0;
112 return 0;
113 }
113 }
114
114
115 static int ZstdCompressionParameters_init(ZstdCompressionParametersObject* self, PyObject* args, PyObject* kwargs) {
115 static int ZstdCompressionParameters_init(ZstdCompressionParametersObject* self, PyObject* args, PyObject* kwargs) {
116 static char* kwlist[] = {
116 static char* kwlist[] = {
117 "format",
117 "format",
118 "compression_level",
118 "compression_level",
119 "window_log",
119 "window_log",
120 "hash_log",
120 "hash_log",
121 "chain_log",
121 "chain_log",
122 "search_log",
122 "search_log",
123 "min_match",
123 "min_match",
124 "target_length",
124 "target_length",
125 "compression_strategy",
125 "compression_strategy",
126 "strategy",
126 "strategy",
127 "write_content_size",
127 "write_content_size",
128 "write_checksum",
128 "write_checksum",
129 "write_dict_id",
129 "write_dict_id",
130 "job_size",
130 "job_size",
131 "overlap_log",
131 "overlap_log",
132 "overlap_size_log",
132 "overlap_size_log",
133 "force_max_window",
133 "force_max_window",
134 "enable_ldm",
134 "enable_ldm",
135 "ldm_hash_log",
135 "ldm_hash_log",
136 "ldm_min_match",
136 "ldm_min_match",
137 "ldm_bucket_size_log",
137 "ldm_bucket_size_log",
138 "ldm_hash_rate_log",
138 "ldm_hash_rate_log",
139 "ldm_hash_every_log",
139 "ldm_hash_every_log",
140 "threads",
140 "threads",
141 NULL
141 NULL
142 };
142 };
143
143
144 int format = 0;
144 int format = 0;
145 int compressionLevel = 0;
145 int compressionLevel = 0;
146 int windowLog = 0;
146 int windowLog = 0;
147 int hashLog = 0;
147 int hashLog = 0;
148 int chainLog = 0;
148 int chainLog = 0;
149 int searchLog = 0;
149 int searchLog = 0;
150 int minMatch = 0;
150 int minMatch = 0;
151 int targetLength = 0;
151 int targetLength = 0;
152 int compressionStrategy = -1;
152 int compressionStrategy = -1;
153 int strategy = -1;
153 int strategy = -1;
154 int contentSizeFlag = 1;
154 int contentSizeFlag = 1;
155 int checksumFlag = 0;
155 int checksumFlag = 0;
156 int dictIDFlag = 0;
156 int dictIDFlag = 0;
157 int jobSize = 0;
157 int jobSize = 0;
158 int overlapLog = -1;
158 int overlapLog = -1;
159 int overlapSizeLog = -1;
159 int overlapSizeLog = -1;
160 int forceMaxWindow = 0;
160 int forceMaxWindow = 0;
161 int enableLDM = 0;
161 int enableLDM = 0;
162 int ldmHashLog = 0;
162 int ldmHashLog = 0;
163 int ldmMinMatch = 0;
163 int ldmMinMatch = 0;
164 int ldmBucketSizeLog = 0;
164 int ldmBucketSizeLog = 0;
165 int ldmHashRateLog = -1;
165 int ldmHashRateLog = -1;
166 int ldmHashEveryLog = -1;
166 int ldmHashEveryLog = -1;
167 int threads = 0;
167 int threads = 0;
168
168
169 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
169 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
170 "|iiiiiiiiiiiiiiiiiiiiiiii:CompressionParameters",
170 "|iiiiiiiiiiiiiiiiiiiiiiii:CompressionParameters",
171 kwlist, &format, &compressionLevel, &windowLog, &hashLog, &chainLog,
171 kwlist, &format, &compressionLevel, &windowLog, &hashLog, &chainLog,
172 &searchLog, &minMatch, &targetLength, &compressionStrategy, &strategy,
172 &searchLog, &minMatch, &targetLength, &compressionStrategy, &strategy,
173 &contentSizeFlag, &checksumFlag, &dictIDFlag, &jobSize, &overlapLog,
173 &contentSizeFlag, &checksumFlag, &dictIDFlag, &jobSize, &overlapLog,
174 &overlapSizeLog, &forceMaxWindow, &enableLDM, &ldmHashLog, &ldmMinMatch,
174 &overlapSizeLog, &forceMaxWindow, &enableLDM, &ldmHashLog, &ldmMinMatch,
175 &ldmBucketSizeLog, &ldmHashRateLog, &ldmHashEveryLog, &threads)) {
175 &ldmBucketSizeLog, &ldmHashRateLog, &ldmHashEveryLog, &threads)) {
176 return -1;
176 return -1;
177 }
177 }
178
178
179 if (reset_params(self)) {
179 if (reset_params(self)) {
180 return -1;
180 return -1;
181 }
181 }
182
182
183 if (threads < 0) {
183 if (threads < 0) {
184 threads = cpu_count();
184 threads = cpu_count();
185 }
185 }
186
186
187 /* We need to set ZSTD_c_nbWorkers before ZSTD_c_jobSize and ZSTD_c_overlapLog
187 /* We need to set ZSTD_c_nbWorkers before ZSTD_c_jobSize and ZSTD_c_overlapLog
188 * because setting ZSTD_c_nbWorkers resets the other parameters. */
188 * because setting ZSTD_c_nbWorkers resets the other parameters. */
189 TRY_SET_PARAMETER(self->params, ZSTD_c_nbWorkers, threads);
189 TRY_SET_PARAMETER(self->params, ZSTD_c_nbWorkers, threads);
190
190
191 TRY_SET_PARAMETER(self->params, ZSTD_c_format, format);
191 TRY_SET_PARAMETER(self->params, ZSTD_c_format, format);
192 TRY_SET_PARAMETER(self->params, ZSTD_c_compressionLevel, compressionLevel);
192 TRY_SET_PARAMETER(self->params, ZSTD_c_compressionLevel, compressionLevel);
193 TRY_SET_PARAMETER(self->params, ZSTD_c_windowLog, windowLog);
193 TRY_SET_PARAMETER(self->params, ZSTD_c_windowLog, windowLog);
194 TRY_SET_PARAMETER(self->params, ZSTD_c_hashLog, hashLog);
194 TRY_SET_PARAMETER(self->params, ZSTD_c_hashLog, hashLog);
195 TRY_SET_PARAMETER(self->params, ZSTD_c_chainLog, chainLog);
195 TRY_SET_PARAMETER(self->params, ZSTD_c_chainLog, chainLog);
196 TRY_SET_PARAMETER(self->params, ZSTD_c_searchLog, searchLog);
196 TRY_SET_PARAMETER(self->params, ZSTD_c_searchLog, searchLog);
197 TRY_SET_PARAMETER(self->params, ZSTD_c_minMatch, minMatch);
197 TRY_SET_PARAMETER(self->params, ZSTD_c_minMatch, minMatch);
198 TRY_SET_PARAMETER(self->params, ZSTD_c_targetLength, targetLength);
198 TRY_SET_PARAMETER(self->params, ZSTD_c_targetLength, targetLength);
199
199
200 if (compressionStrategy != -1 && strategy != -1) {
200 if (compressionStrategy != -1 && strategy != -1) {
201 PyErr_SetString(PyExc_ValueError, "cannot specify both compression_strategy and strategy");
201 PyErr_SetString(PyExc_ValueError, "cannot specify both compression_strategy and strategy");
202 return -1;
202 return -1;
203 }
203 }
204
204
205 if (compressionStrategy != -1) {
205 if (compressionStrategy != -1) {
206 strategy = compressionStrategy;
206 strategy = compressionStrategy;
207 }
207 }
208 else if (strategy == -1) {
208 else if (strategy == -1) {
209 strategy = 0;
209 strategy = 0;
210 }
210 }
211
211
212 TRY_SET_PARAMETER(self->params, ZSTD_c_strategy, strategy);
212 TRY_SET_PARAMETER(self->params, ZSTD_c_strategy, strategy);
213 TRY_SET_PARAMETER(self->params, ZSTD_c_contentSizeFlag, contentSizeFlag);
213 TRY_SET_PARAMETER(self->params, ZSTD_c_contentSizeFlag, contentSizeFlag);
214 TRY_SET_PARAMETER(self->params, ZSTD_c_checksumFlag, checksumFlag);
214 TRY_SET_PARAMETER(self->params, ZSTD_c_checksumFlag, checksumFlag);
215 TRY_SET_PARAMETER(self->params, ZSTD_c_dictIDFlag, dictIDFlag);
215 TRY_SET_PARAMETER(self->params, ZSTD_c_dictIDFlag, dictIDFlag);
216 TRY_SET_PARAMETER(self->params, ZSTD_c_jobSize, jobSize);
216 TRY_SET_PARAMETER(self->params, ZSTD_c_jobSize, jobSize);
217
217
218 if (overlapLog != -1 && overlapSizeLog != -1) {
218 if (overlapLog != -1 && overlapSizeLog != -1) {
219 PyErr_SetString(PyExc_ValueError, "cannot specify both overlap_log and overlap_size_log");
219 PyErr_SetString(PyExc_ValueError, "cannot specify both overlap_log and overlap_size_log");
220 return -1;
220 return -1;
221 }
221 }
222
222
223 if (overlapSizeLog != -1) {
223 if (overlapSizeLog != -1) {
224 overlapLog = overlapSizeLog;
224 overlapLog = overlapSizeLog;
225 }
225 }
226 else if (overlapLog == -1) {
226 else if (overlapLog == -1) {
227 overlapLog = 0;
227 overlapLog = 0;
228 }
228 }
229
229
230 TRY_SET_PARAMETER(self->params, ZSTD_c_overlapLog, overlapLog);
230 TRY_SET_PARAMETER(self->params, ZSTD_c_overlapLog, overlapLog);
231 TRY_SET_PARAMETER(self->params, ZSTD_c_forceMaxWindow, forceMaxWindow);
231 TRY_SET_PARAMETER(self->params, ZSTD_c_forceMaxWindow, forceMaxWindow);
232 TRY_SET_PARAMETER(self->params, ZSTD_c_enableLongDistanceMatching, enableLDM);
232 TRY_SET_PARAMETER(self->params, ZSTD_c_enableLongDistanceMatching, enableLDM);
233 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashLog, ldmHashLog);
233 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashLog, ldmHashLog);
234 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmMinMatch, ldmMinMatch);
234 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmMinMatch, ldmMinMatch);
235 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmBucketSizeLog, ldmBucketSizeLog);
235 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmBucketSizeLog, ldmBucketSizeLog);
236
236
237 if (ldmHashRateLog != -1 && ldmHashEveryLog != -1) {
237 if (ldmHashRateLog != -1 && ldmHashEveryLog != -1) {
238 PyErr_SetString(PyExc_ValueError, "cannot specify both ldm_hash_rate_log and ldm_hash_everyLog");
238 PyErr_SetString(PyExc_ValueError, "cannot specify both ldm_hash_rate_log and ldm_hash_everyLog");
239 return -1;
239 return -1;
240 }
240 }
241
241
242 if (ldmHashEveryLog != -1) {
242 if (ldmHashEveryLog != -1) {
243 ldmHashRateLog = ldmHashEveryLog;
243 ldmHashRateLog = ldmHashEveryLog;
244 }
244 }
245 else if (ldmHashRateLog == -1) {
245 else if (ldmHashRateLog == -1) {
246 ldmHashRateLog = 0;
246 ldmHashRateLog = 0;
247 }
247 }
248
248
249 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashRateLog, ldmHashRateLog);
249 TRY_SET_PARAMETER(self->params, ZSTD_c_ldmHashRateLog, ldmHashRateLog);
250
250
251 return 0;
251 return 0;
252 }
252 }
253
253
254 PyDoc_STRVAR(ZstdCompressionParameters_from_level__doc__,
254 PyDoc_STRVAR(ZstdCompressionParameters_from_level__doc__,
255 "Create a CompressionParameters from a compression level and target sizes\n"
255 "Create a CompressionParameters from a compression level and target sizes\n"
256 );
256 );
257
257
258 ZstdCompressionParametersObject* CompressionParameters_from_level(PyObject* undef, PyObject* args, PyObject* kwargs) {
258 ZstdCompressionParametersObject* CompressionParameters_from_level(PyObject* undef, PyObject* args, PyObject* kwargs) {
259 int managedKwargs = 0;
259 int managedKwargs = 0;
260 int level;
260 int level;
261 PyObject* sourceSize = NULL;
261 PyObject* sourceSize = NULL;
262 PyObject* dictSize = NULL;
262 PyObject* dictSize = NULL;
263 unsigned PY_LONG_LONG iSourceSize = 0;
263 unsigned PY_LONG_LONG iSourceSize = 0;
264 Py_ssize_t iDictSize = 0;
264 Py_ssize_t iDictSize = 0;
265 PyObject* val;
265 PyObject* val;
266 ZSTD_compressionParameters params;
266 ZSTD_compressionParameters params;
267 ZstdCompressionParametersObject* result = NULL;
267 ZstdCompressionParametersObject* result = NULL;
268 int res;
268 int res;
269
269
270 if (!PyArg_ParseTuple(args, "i:from_level",
270 if (!PyArg_ParseTuple(args, "i:from_level",
271 &level)) {
271 &level)) {
272 return NULL;
272 return NULL;
273 }
273 }
274
274
275 if (!kwargs) {
275 if (!kwargs) {
276 kwargs = PyDict_New();
276 kwargs = PyDict_New();
277 if (!kwargs) {
277 if (!kwargs) {
278 return NULL;
278 return NULL;
279 }
279 }
280 managedKwargs = 1;
280 managedKwargs = 1;
281 }
281 }
282
282
283 sourceSize = PyDict_GetItemString(kwargs, "source_size");
283 sourceSize = PyDict_GetItemString(kwargs, "source_size");
284 if (sourceSize) {
284 if (sourceSize) {
285 #if PY_MAJOR_VERSION >= 3
285 #if PY_MAJOR_VERSION >= 3
286 iSourceSize = PyLong_AsUnsignedLongLong(sourceSize);
286 iSourceSize = PyLong_AsUnsignedLongLong(sourceSize);
287 if (iSourceSize == (unsigned PY_LONG_LONG)(-1)) {
287 if (iSourceSize == (unsigned PY_LONG_LONG)(-1)) {
288 goto cleanup;
288 goto cleanup;
289 }
289 }
290 #else
290 #else
291 iSourceSize = PyInt_AsUnsignedLongLongMask(sourceSize);
291 iSourceSize = PyInt_AsUnsignedLongLongMask(sourceSize);
292 #endif
292 #endif
293
293
294 PyDict_DelItemString(kwargs, "source_size");
294 PyDict_DelItemString(kwargs, "source_size");
295 }
295 }
296
296
297 dictSize = PyDict_GetItemString(kwargs, "dict_size");
297 dictSize = PyDict_GetItemString(kwargs, "dict_size");
298 if (dictSize) {
298 if (dictSize) {
299 #if PY_MAJOR_VERSION >= 3
299 #if PY_MAJOR_VERSION >= 3
300 iDictSize = PyLong_AsSsize_t(dictSize);
300 iDictSize = PyLong_AsSsize_t(dictSize);
301 #else
301 #else
302 iDictSize = PyInt_AsSsize_t(dictSize);
302 iDictSize = PyInt_AsSsize_t(dictSize);
303 #endif
303 #endif
304 if (iDictSize == -1) {
304 if (iDictSize == -1) {
305 goto cleanup;
305 goto cleanup;
306 }
306 }
307
307
308 PyDict_DelItemString(kwargs, "dict_size");
308 PyDict_DelItemString(kwargs, "dict_size");
309 }
309 }
310
310
311
311
312 params = ZSTD_getCParams(level, iSourceSize, iDictSize);
312 params = ZSTD_getCParams(level, iSourceSize, iDictSize);
313
313
314 /* Values derived from the input level and sizes are passed along to the
314 /* Values derived from the input level and sizes are passed along to the
315 constructor. But only if a value doesn't already exist. */
315 constructor. But only if a value doesn't already exist. */
316 val = PyDict_GetItemString(kwargs, "window_log");
316 val = PyDict_GetItemString(kwargs, "window_log");
317 if (!val) {
317 if (!val) {
318 val = PyLong_FromUnsignedLong(params.windowLog);
318 val = PyLong_FromUnsignedLong(params.windowLog);
319 if (!val) {
319 if (!val) {
320 goto cleanup;
320 goto cleanup;
321 }
321 }
322 PyDict_SetItemString(kwargs, "window_log", val);
322 PyDict_SetItemString(kwargs, "window_log", val);
323 Py_DECREF(val);
323 Py_DECREF(val);
324 }
324 }
325
325
326 val = PyDict_GetItemString(kwargs, "chain_log");
326 val = PyDict_GetItemString(kwargs, "chain_log");
327 if (!val) {
327 if (!val) {
328 val = PyLong_FromUnsignedLong(params.chainLog);
328 val = PyLong_FromUnsignedLong(params.chainLog);
329 if (!val) {
329 if (!val) {
330 goto cleanup;
330 goto cleanup;
331 }
331 }
332 PyDict_SetItemString(kwargs, "chain_log", val);
332 PyDict_SetItemString(kwargs, "chain_log", val);
333 Py_DECREF(val);
333 Py_DECREF(val);
334 }
334 }
335
335
336 val = PyDict_GetItemString(kwargs, "hash_log");
336 val = PyDict_GetItemString(kwargs, "hash_log");
337 if (!val) {
337 if (!val) {
338 val = PyLong_FromUnsignedLong(params.hashLog);
338 val = PyLong_FromUnsignedLong(params.hashLog);
339 if (!val) {
339 if (!val) {
340 goto cleanup;
340 goto cleanup;
341 }
341 }
342 PyDict_SetItemString(kwargs, "hash_log", val);
342 PyDict_SetItemString(kwargs, "hash_log", val);
343 Py_DECREF(val);
343 Py_DECREF(val);
344 }
344 }
345
345
346 val = PyDict_GetItemString(kwargs, "search_log");
346 val = PyDict_GetItemString(kwargs, "search_log");
347 if (!val) {
347 if (!val) {
348 val = PyLong_FromUnsignedLong(params.searchLog);
348 val = PyLong_FromUnsignedLong(params.searchLog);
349 if (!val) {
349 if (!val) {
350 goto cleanup;
350 goto cleanup;
351 }
351 }
352 PyDict_SetItemString(kwargs, "search_log", val);
352 PyDict_SetItemString(kwargs, "search_log", val);
353 Py_DECREF(val);
353 Py_DECREF(val);
354 }
354 }
355
355
356 val = PyDict_GetItemString(kwargs, "min_match");
356 val = PyDict_GetItemString(kwargs, "min_match");
357 if (!val) {
357 if (!val) {
358 val = PyLong_FromUnsignedLong(params.minMatch);
358 val = PyLong_FromUnsignedLong(params.minMatch);
359 if (!val) {
359 if (!val) {
360 goto cleanup;
360 goto cleanup;
361 }
361 }
362 PyDict_SetItemString(kwargs, "min_match", val);
362 PyDict_SetItemString(kwargs, "min_match", val);
363 Py_DECREF(val);
363 Py_DECREF(val);
364 }
364 }
365
365
366 val = PyDict_GetItemString(kwargs, "target_length");
366 val = PyDict_GetItemString(kwargs, "target_length");
367 if (!val) {
367 if (!val) {
368 val = PyLong_FromUnsignedLong(params.targetLength);
368 val = PyLong_FromUnsignedLong(params.targetLength);
369 if (!val) {
369 if (!val) {
370 goto cleanup;
370 goto cleanup;
371 }
371 }
372 PyDict_SetItemString(kwargs, "target_length", val);
372 PyDict_SetItemString(kwargs, "target_length", val);
373 Py_DECREF(val);
373 Py_DECREF(val);
374 }
374 }
375
375
376 val = PyDict_GetItemString(kwargs, "compression_strategy");
376 val = PyDict_GetItemString(kwargs, "compression_strategy");
377 if (!val) {
377 if (!val) {
378 val = PyLong_FromUnsignedLong(params.strategy);
378 val = PyLong_FromUnsignedLong(params.strategy);
379 if (!val) {
379 if (!val) {
380 goto cleanup;
380 goto cleanup;
381 }
381 }
382 PyDict_SetItemString(kwargs, "compression_strategy", val);
382 PyDict_SetItemString(kwargs, "compression_strategy", val);
383 Py_DECREF(val);
383 Py_DECREF(val);
384 }
384 }
385
385
386 result = PyObject_New(ZstdCompressionParametersObject, &ZstdCompressionParametersType);
386 result = PyObject_New(ZstdCompressionParametersObject, &ZstdCompressionParametersType);
387 if (!result) {
387 if (!result) {
388 goto cleanup;
388 goto cleanup;
389 }
389 }
390
390
391 result->params = NULL;
391 result->params = NULL;
392
392
393 val = PyTuple_New(0);
393 val = PyTuple_New(0);
394 if (!val) {
394 if (!val) {
395 Py_CLEAR(result);
395 Py_CLEAR(result);
396 goto cleanup;
396 goto cleanup;
397 }
397 }
398
398
399 res = ZstdCompressionParameters_init(result, val, kwargs);
399 res = ZstdCompressionParameters_init(result, val, kwargs);
400 Py_DECREF(val);
400 Py_DECREF(val);
401
401
402 if (res) {
402 if (res) {
403 Py_CLEAR(result);
403 Py_CLEAR(result);
404 goto cleanup;
404 goto cleanup;
405 }
405 }
406
406
407 cleanup:
407 cleanup:
408 if (managedKwargs) {
408 if (managedKwargs) {
409 Py_DECREF(kwargs);
409 Py_DECREF(kwargs);
410 }
410 }
411
411
412 return result;
412 return result;
413 }
413 }
414
414
415 PyDoc_STRVAR(ZstdCompressionParameters_estimated_compression_context_size__doc__,
415 PyDoc_STRVAR(ZstdCompressionParameters_estimated_compression_context_size__doc__,
416 "Estimate the size in bytes of a compression context for compression parameters\n"
416 "Estimate the size in bytes of a compression context for compression parameters\n"
417 );
417 );
418
418
419 PyObject* ZstdCompressionParameters_estimated_compression_context_size(ZstdCompressionParametersObject* self) {
419 PyObject* ZstdCompressionParameters_estimated_compression_context_size(ZstdCompressionParametersObject* self) {
420 return PyLong_FromSize_t(ZSTD_estimateCCtxSize_usingCCtxParams(self->params));
420 return PyLong_FromSize_t(ZSTD_estimateCCtxSize_usingCCtxParams(self->params));
421 }
421 }
422
422
423 PyDoc_STRVAR(ZstdCompressionParameters__doc__,
423 PyDoc_STRVAR(ZstdCompressionParameters__doc__,
424 "ZstdCompressionParameters: low-level control over zstd compression");
424 "ZstdCompressionParameters: low-level control over zstd compression");
425
425
426 static void ZstdCompressionParameters_dealloc(ZstdCompressionParametersObject* self) {
426 static void ZstdCompressionParameters_dealloc(ZstdCompressionParametersObject* self) {
427 if (self->params) {
427 if (self->params) {
428 ZSTD_freeCCtxParams(self->params);
428 ZSTD_freeCCtxParams(self->params);
429 self->params = NULL;
429 self->params = NULL;
430 }
430 }
431
431
432 PyObject_Del(self);
432 PyObject_Del(self);
433 }
433 }
434
434
435 #define PARAM_GETTER(name, param) PyObject* ZstdCompressionParameters_get_##name(PyObject* self, void* unused) { \
435 #define PARAM_GETTER(name, param) PyObject* ZstdCompressionParameters_get_##name(PyObject* self, void* unused) { \
436 int result; \
436 int result; \
437 size_t zresult; \
437 size_t zresult; \
438 ZstdCompressionParametersObject* p = (ZstdCompressionParametersObject*)(self); \
438 ZstdCompressionParametersObject* p = (ZstdCompressionParametersObject*)(self); \
439 zresult = ZSTD_CCtxParam_getParameter(p->params, param, &result); \
439 zresult = ZSTD_CCtxParams_getParameter(p->params, param, &result); \
440 if (ZSTD_isError(zresult)) { \
440 if (ZSTD_isError(zresult)) { \
441 PyErr_Format(ZstdError, "unable to get compression parameter: %s", \
441 PyErr_Format(ZstdError, "unable to get compression parameter: %s", \
442 ZSTD_getErrorName(zresult)); \
442 ZSTD_getErrorName(zresult)); \
443 return NULL; \
443 return NULL; \
444 } \
444 } \
445 return PyLong_FromLong(result); \
445 return PyLong_FromLong(result); \
446 }
446 }
447
447
448 PARAM_GETTER(format, ZSTD_c_format)
448 PARAM_GETTER(format, ZSTD_c_format)
449 PARAM_GETTER(compression_level, ZSTD_c_compressionLevel)
449 PARAM_GETTER(compression_level, ZSTD_c_compressionLevel)
450 PARAM_GETTER(window_log, ZSTD_c_windowLog)
450 PARAM_GETTER(window_log, ZSTD_c_windowLog)
451 PARAM_GETTER(hash_log, ZSTD_c_hashLog)
451 PARAM_GETTER(hash_log, ZSTD_c_hashLog)
452 PARAM_GETTER(chain_log, ZSTD_c_chainLog)
452 PARAM_GETTER(chain_log, ZSTD_c_chainLog)
453 PARAM_GETTER(search_log, ZSTD_c_searchLog)
453 PARAM_GETTER(search_log, ZSTD_c_searchLog)
454 PARAM_GETTER(min_match, ZSTD_c_minMatch)
454 PARAM_GETTER(min_match, ZSTD_c_minMatch)
455 PARAM_GETTER(target_length, ZSTD_c_targetLength)
455 PARAM_GETTER(target_length, ZSTD_c_targetLength)
456 PARAM_GETTER(compression_strategy, ZSTD_c_strategy)
456 PARAM_GETTER(compression_strategy, ZSTD_c_strategy)
457 PARAM_GETTER(write_content_size, ZSTD_c_contentSizeFlag)
457 PARAM_GETTER(write_content_size, ZSTD_c_contentSizeFlag)
458 PARAM_GETTER(write_checksum, ZSTD_c_checksumFlag)
458 PARAM_GETTER(write_checksum, ZSTD_c_checksumFlag)
459 PARAM_GETTER(write_dict_id, ZSTD_c_dictIDFlag)
459 PARAM_GETTER(write_dict_id, ZSTD_c_dictIDFlag)
460 PARAM_GETTER(job_size, ZSTD_c_jobSize)
460 PARAM_GETTER(job_size, ZSTD_c_jobSize)
461 PARAM_GETTER(overlap_log, ZSTD_c_overlapLog)
461 PARAM_GETTER(overlap_log, ZSTD_c_overlapLog)
462 PARAM_GETTER(force_max_window, ZSTD_c_forceMaxWindow)
462 PARAM_GETTER(force_max_window, ZSTD_c_forceMaxWindow)
463 PARAM_GETTER(enable_ldm, ZSTD_c_enableLongDistanceMatching)
463 PARAM_GETTER(enable_ldm, ZSTD_c_enableLongDistanceMatching)
464 PARAM_GETTER(ldm_hash_log, ZSTD_c_ldmHashLog)
464 PARAM_GETTER(ldm_hash_log, ZSTD_c_ldmHashLog)
465 PARAM_GETTER(ldm_min_match, ZSTD_c_ldmMinMatch)
465 PARAM_GETTER(ldm_min_match, ZSTD_c_ldmMinMatch)
466 PARAM_GETTER(ldm_bucket_size_log, ZSTD_c_ldmBucketSizeLog)
466 PARAM_GETTER(ldm_bucket_size_log, ZSTD_c_ldmBucketSizeLog)
467 PARAM_GETTER(ldm_hash_rate_log, ZSTD_c_ldmHashRateLog)
467 PARAM_GETTER(ldm_hash_rate_log, ZSTD_c_ldmHashRateLog)
468 PARAM_GETTER(threads, ZSTD_c_nbWorkers)
468 PARAM_GETTER(threads, ZSTD_c_nbWorkers)
469
469
470 static PyMethodDef ZstdCompressionParameters_methods[] = {
470 static PyMethodDef ZstdCompressionParameters_methods[] = {
471 {
471 {
472 "from_level",
472 "from_level",
473 (PyCFunction)CompressionParameters_from_level,
473 (PyCFunction)CompressionParameters_from_level,
474 METH_VARARGS | METH_KEYWORDS | METH_STATIC,
474 METH_VARARGS | METH_KEYWORDS | METH_STATIC,
475 ZstdCompressionParameters_from_level__doc__
475 ZstdCompressionParameters_from_level__doc__
476 },
476 },
477 {
477 {
478 "estimated_compression_context_size",
478 "estimated_compression_context_size",
479 (PyCFunction)ZstdCompressionParameters_estimated_compression_context_size,
479 (PyCFunction)ZstdCompressionParameters_estimated_compression_context_size,
480 METH_NOARGS,
480 METH_NOARGS,
481 ZstdCompressionParameters_estimated_compression_context_size__doc__
481 ZstdCompressionParameters_estimated_compression_context_size__doc__
482 },
482 },
483 { NULL, NULL }
483 { NULL, NULL }
484 };
484 };
485
485
486 #define GET_SET_ENTRY(name) { #name, ZstdCompressionParameters_get_##name, NULL, NULL, NULL }
486 #define GET_SET_ENTRY(name) { #name, ZstdCompressionParameters_get_##name, NULL, NULL, NULL }
487
487
488 static PyGetSetDef ZstdCompressionParameters_getset[] = {
488 static PyGetSetDef ZstdCompressionParameters_getset[] = {
489 GET_SET_ENTRY(format),
489 GET_SET_ENTRY(format),
490 GET_SET_ENTRY(compression_level),
490 GET_SET_ENTRY(compression_level),
491 GET_SET_ENTRY(window_log),
491 GET_SET_ENTRY(window_log),
492 GET_SET_ENTRY(hash_log),
492 GET_SET_ENTRY(hash_log),
493 GET_SET_ENTRY(chain_log),
493 GET_SET_ENTRY(chain_log),
494 GET_SET_ENTRY(search_log),
494 GET_SET_ENTRY(search_log),
495 GET_SET_ENTRY(min_match),
495 GET_SET_ENTRY(min_match),
496 GET_SET_ENTRY(target_length),
496 GET_SET_ENTRY(target_length),
497 GET_SET_ENTRY(compression_strategy),
497 GET_SET_ENTRY(compression_strategy),
498 GET_SET_ENTRY(write_content_size),
498 GET_SET_ENTRY(write_content_size),
499 GET_SET_ENTRY(write_checksum),
499 GET_SET_ENTRY(write_checksum),
500 GET_SET_ENTRY(write_dict_id),
500 GET_SET_ENTRY(write_dict_id),
501 GET_SET_ENTRY(threads),
501 GET_SET_ENTRY(threads),
502 GET_SET_ENTRY(job_size),
502 GET_SET_ENTRY(job_size),
503 GET_SET_ENTRY(overlap_log),
503 GET_SET_ENTRY(overlap_log),
504 /* TODO remove this deprecated attribute */
504 /* TODO remove this deprecated attribute */
505 { "overlap_size_log", ZstdCompressionParameters_get_overlap_log, NULL, NULL, NULL },
505 { "overlap_size_log", ZstdCompressionParameters_get_overlap_log, NULL, NULL, NULL },
506 GET_SET_ENTRY(force_max_window),
506 GET_SET_ENTRY(force_max_window),
507 GET_SET_ENTRY(enable_ldm),
507 GET_SET_ENTRY(enable_ldm),
508 GET_SET_ENTRY(ldm_hash_log),
508 GET_SET_ENTRY(ldm_hash_log),
509 GET_SET_ENTRY(ldm_min_match),
509 GET_SET_ENTRY(ldm_min_match),
510 GET_SET_ENTRY(ldm_bucket_size_log),
510 GET_SET_ENTRY(ldm_bucket_size_log),
511 GET_SET_ENTRY(ldm_hash_rate_log),
511 GET_SET_ENTRY(ldm_hash_rate_log),
512 /* TODO remove this deprecated attribute */
512 /* TODO remove this deprecated attribute */
513 { "ldm_hash_every_log", ZstdCompressionParameters_get_ldm_hash_rate_log, NULL, NULL, NULL },
513 { "ldm_hash_every_log", ZstdCompressionParameters_get_ldm_hash_rate_log, NULL, NULL, NULL },
514 { NULL }
514 { NULL }
515 };
515 };
516
516
517 PyTypeObject ZstdCompressionParametersType = {
517 PyTypeObject ZstdCompressionParametersType = {
518 PyVarObject_HEAD_INIT(NULL, 0)
518 PyVarObject_HEAD_INIT(NULL, 0)
519 "ZstdCompressionParameters", /* tp_name */
519 "ZstdCompressionParameters", /* tp_name */
520 sizeof(ZstdCompressionParametersObject), /* tp_basicsize */
520 sizeof(ZstdCompressionParametersObject), /* tp_basicsize */
521 0, /* tp_itemsize */
521 0, /* tp_itemsize */
522 (destructor)ZstdCompressionParameters_dealloc, /* tp_dealloc */
522 (destructor)ZstdCompressionParameters_dealloc, /* tp_dealloc */
523 0, /* tp_print */
523 0, /* tp_print */
524 0, /* tp_getattr */
524 0, /* tp_getattr */
525 0, /* tp_setattr */
525 0, /* tp_setattr */
526 0, /* tp_compare */
526 0, /* tp_compare */
527 0, /* tp_repr */
527 0, /* tp_repr */
528 0, /* tp_as_number */
528 0, /* tp_as_number */
529 0, /* tp_as_sequence */
529 0, /* tp_as_sequence */
530 0, /* tp_as_mapping */
530 0, /* tp_as_mapping */
531 0, /* tp_hash */
531 0, /* tp_hash */
532 0, /* tp_call */
532 0, /* tp_call */
533 0, /* tp_str */
533 0, /* tp_str */
534 0, /* tp_getattro */
534 0, /* tp_getattro */
535 0, /* tp_setattro */
535 0, /* tp_setattro */
536 0, /* tp_as_buffer */
536 0, /* tp_as_buffer */
537 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
537 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
538 ZstdCompressionParameters__doc__, /* tp_doc */
538 ZstdCompressionParameters__doc__, /* tp_doc */
539 0, /* tp_traverse */
539 0, /* tp_traverse */
540 0, /* tp_clear */
540 0, /* tp_clear */
541 0, /* tp_richcompare */
541 0, /* tp_richcompare */
542 0, /* tp_weaklistoffset */
542 0, /* tp_weaklistoffset */
543 0, /* tp_iter */
543 0, /* tp_iter */
544 0, /* tp_iternext */
544 0, /* tp_iternext */
545 ZstdCompressionParameters_methods, /* tp_methods */
545 ZstdCompressionParameters_methods, /* tp_methods */
546 0, /* tp_members */
546 0, /* tp_members */
547 ZstdCompressionParameters_getset, /* tp_getset */
547 ZstdCompressionParameters_getset, /* tp_getset */
548 0, /* tp_base */
548 0, /* tp_base */
549 0, /* tp_dict */
549 0, /* tp_dict */
550 0, /* tp_descr_get */
550 0, /* tp_descr_get */
551 0, /* tp_descr_set */
551 0, /* tp_descr_set */
552 0, /* tp_dictoffset */
552 0, /* tp_dictoffset */
553 (initproc)ZstdCompressionParameters_init, /* tp_init */
553 (initproc)ZstdCompressionParameters_init, /* tp_init */
554 0, /* tp_alloc */
554 0, /* tp_alloc */
555 PyType_GenericNew, /* tp_new */
555 PyType_GenericNew, /* tp_new */
556 };
556 };
557
557
558 void compressionparams_module_init(PyObject* mod) {
558 void compressionparams_module_init(PyObject* mod) {
559 Py_TYPE(&ZstdCompressionParametersType) = &PyType_Type;
559 Py_TYPE(&ZstdCompressionParametersType) = &PyType_Type;
560 if (PyType_Ready(&ZstdCompressionParametersType) < 0) {
560 if (PyType_Ready(&ZstdCompressionParametersType) < 0) {
561 return;
561 return;
562 }
562 }
563
563
564 Py_INCREF(&ZstdCompressionParametersType);
564 Py_INCREF(&ZstdCompressionParametersType);
565 PyModule_AddObject(mod, "ZstdCompressionParameters",
565 PyModule_AddObject(mod, "ZstdCompressionParameters",
566 (PyObject*)&ZstdCompressionParametersType);
566 (PyObject*)&ZstdCompressionParametersType);
567
567
568 /* TODO remove deprecated alias. */
568 /* TODO remove deprecated alias. */
569 Py_INCREF(&ZstdCompressionParametersType);
569 Py_INCREF(&ZstdCompressionParametersType);
570 PyModule_AddObject(mod, "CompressionParameters",
570 PyModule_AddObject(mod, "CompressionParameters",
571 (PyObject*)&ZstdCompressionParametersType);
571 (PyObject*)&ZstdCompressionParametersType);
572 }
572 }
@@ -1,779 +1,781 b''
1 /**
1 /**
2 * Copyright (c) 2017-present, Gregory Szorc
2 * Copyright (c) 2017-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #include "python-zstandard.h"
9 #include "python-zstandard.h"
10
10
11 extern PyObject* ZstdError;
11 extern PyObject* ZstdError;
12
12
13 static void set_unsupported_operation(void) {
13 static void set_unsupported_operation(void) {
14 PyObject* iomod;
14 PyObject* iomod;
15 PyObject* exc;
15 PyObject* exc;
16
16
17 iomod = PyImport_ImportModule("io");
17 iomod = PyImport_ImportModule("io");
18 if (NULL == iomod) {
18 if (NULL == iomod) {
19 return;
19 return;
20 }
20 }
21
21
22 exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
22 exc = PyObject_GetAttrString(iomod, "UnsupportedOperation");
23 if (NULL == exc) {
23 if (NULL == exc) {
24 Py_DECREF(iomod);
24 Py_DECREF(iomod);
25 return;
25 return;
26 }
26 }
27
27
28 PyErr_SetNone(exc);
28 PyErr_SetNone(exc);
29 Py_DECREF(exc);
29 Py_DECREF(exc);
30 Py_DECREF(iomod);
30 Py_DECREF(iomod);
31 }
31 }
32
32
33 static void reader_dealloc(ZstdDecompressionReader* self) {
33 static void reader_dealloc(ZstdDecompressionReader* self) {
34 Py_XDECREF(self->decompressor);
34 Py_XDECREF(self->decompressor);
35 Py_XDECREF(self->reader);
35 Py_XDECREF(self->reader);
36
36
37 if (self->buffer.buf) {
37 if (self->buffer.buf) {
38 PyBuffer_Release(&self->buffer);
38 PyBuffer_Release(&self->buffer);
39 }
39 }
40
40
41 PyObject_Del(self);
41 PyObject_Del(self);
42 }
42 }
43
43
44 static ZstdDecompressionReader* reader_enter(ZstdDecompressionReader* self) {
44 static ZstdDecompressionReader* reader_enter(ZstdDecompressionReader* self) {
45 if (self->entered) {
45 if (self->entered) {
46 PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
46 PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
47 return NULL;
47 return NULL;
48 }
48 }
49
49
50 self->entered = 1;
50 self->entered = 1;
51
51
52 Py_INCREF(self);
52 Py_INCREF(self);
53 return self;
53 return self;
54 }
54 }
55
55
56 static PyObject* reader_exit(ZstdDecompressionReader* self, PyObject* args) {
56 static PyObject* reader_exit(ZstdDecompressionReader* self, PyObject* args) {
57 PyObject* exc_type;
57 PyObject* exc_type;
58 PyObject* exc_value;
58 PyObject* exc_value;
59 PyObject* exc_tb;
59 PyObject* exc_tb;
60
60
61 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
61 if (!PyArg_ParseTuple(args, "OOO:__exit__", &exc_type, &exc_value, &exc_tb)) {
62 return NULL;
62 return NULL;
63 }
63 }
64
64
65 self->entered = 0;
65 self->entered = 0;
66 self->closed = 1;
66 self->closed = 1;
67
67
68 /* Release resources. */
68 /* Release resources. */
69 Py_CLEAR(self->reader);
69 Py_CLEAR(self->reader);
70 if (self->buffer.buf) {
70 if (self->buffer.buf) {
71 PyBuffer_Release(&self->buffer);
71 PyBuffer_Release(&self->buffer);
72 memset(&self->buffer, 0, sizeof(self->buffer));
72 memset(&self->buffer, 0, sizeof(self->buffer));
73 }
73 }
74
74
75 Py_CLEAR(self->decompressor);
75 Py_CLEAR(self->decompressor);
76
76
77 Py_RETURN_FALSE;
77 Py_RETURN_FALSE;
78 }
78 }
79
79
80 static PyObject* reader_readable(PyObject* self) {
80 static PyObject* reader_readable(PyObject* self) {
81 Py_RETURN_TRUE;
81 Py_RETURN_TRUE;
82 }
82 }
83
83
84 static PyObject* reader_writable(PyObject* self) {
84 static PyObject* reader_writable(PyObject* self) {
85 Py_RETURN_FALSE;
85 Py_RETURN_FALSE;
86 }
86 }
87
87
88 static PyObject* reader_seekable(PyObject* self) {
88 static PyObject* reader_seekable(PyObject* self) {
89 Py_RETURN_TRUE;
89 Py_RETURN_TRUE;
90 }
90 }
91
91
92 static PyObject* reader_close(ZstdDecompressionReader* self) {
92 static PyObject* reader_close(ZstdDecompressionReader* self) {
93 self->closed = 1;
93 self->closed = 1;
94 Py_RETURN_NONE;
94 Py_RETURN_NONE;
95 }
95 }
96
96
97 static PyObject* reader_flush(PyObject* self) {
97 static PyObject* reader_flush(PyObject* self) {
98 Py_RETURN_NONE;
98 Py_RETURN_NONE;
99 }
99 }
100
100
101 static PyObject* reader_isatty(PyObject* self) {
101 static PyObject* reader_isatty(PyObject* self) {
102 Py_RETURN_FALSE;
102 Py_RETURN_FALSE;
103 }
103 }
104
104
105 /**
105 /**
106 * Read available input.
106 * Read available input.
107 *
107 *
108 * Returns 0 if no data was added to input.
108 * Returns 0 if no data was added to input.
109 * Returns 1 if new input data is available.
109 * Returns 1 if new input data is available.
110 * Returns -1 on error and sets a Python exception as a side-effect.
110 * Returns -1 on error and sets a Python exception as a side-effect.
111 */
111 */
112 int read_decompressor_input(ZstdDecompressionReader* self) {
112 int read_decompressor_input(ZstdDecompressionReader* self) {
113 if (self->finishedInput) {
113 if (self->finishedInput) {
114 return 0;
114 return 0;
115 }
115 }
116
116
117 if (self->input.pos != self->input.size) {
117 if (self->input.pos != self->input.size) {
118 return 0;
118 return 0;
119 }
119 }
120
120
121 if (self->reader) {
121 if (self->reader) {
122 Py_buffer buffer;
122 Py_buffer buffer;
123
123
124 assert(self->readResult == NULL);
124 assert(self->readResult == NULL);
125 self->readResult = PyObject_CallMethod(self->reader, "read",
125 self->readResult = PyObject_CallMethod(self->reader, "read",
126 "k", self->readSize);
126 "k", self->readSize);
127 if (NULL == self->readResult) {
127 if (NULL == self->readResult) {
128 return -1;
128 return -1;
129 }
129 }
130
130
131 memset(&buffer, 0, sizeof(buffer));
131 memset(&buffer, 0, sizeof(buffer));
132
132
133 if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
133 if (0 != PyObject_GetBuffer(self->readResult, &buffer, PyBUF_CONTIG_RO)) {
134 return -1;
134 return -1;
135 }
135 }
136
136
137 /* EOF */
137 /* EOF */
138 if (0 == buffer.len) {
138 if (0 == buffer.len) {
139 self->finishedInput = 1;
139 self->finishedInput = 1;
140 Py_CLEAR(self->readResult);
140 Py_CLEAR(self->readResult);
141 }
141 }
142 else {
142 else {
143 self->input.src = buffer.buf;
143 self->input.src = buffer.buf;
144 self->input.size = buffer.len;
144 self->input.size = buffer.len;
145 self->input.pos = 0;
145 self->input.pos = 0;
146 }
146 }
147
147
148 PyBuffer_Release(&buffer);
148 PyBuffer_Release(&buffer);
149 }
149 }
150 else {
150 else {
151 assert(self->buffer.buf);
151 assert(self->buffer.buf);
152 /*
152 /*
153 * We should only get here once since expectation is we always
153 * We should only get here once since expectation is we always
154 * exhaust input buffer before reading again.
154 * exhaust input buffer before reading again.
155 */
155 */
156 assert(self->input.src == NULL);
156 assert(self->input.src == NULL);
157
157
158 self->input.src = self->buffer.buf;
158 self->input.src = self->buffer.buf;
159 self->input.size = self->buffer.len;
159 self->input.size = self->buffer.len;
160 self->input.pos = 0;
160 self->input.pos = 0;
161 }
161 }
162
162
163 return 1;
163 return 1;
164 }
164 }
165
165
166 /**
166 /**
167 * Decompresses available input into an output buffer.
167 * Decompresses available input into an output buffer.
168 *
168 *
169 * Returns 0 if we need more input.
169 * Returns 0 if we need more input.
170 * Returns 1 if output buffer should be emitted.
170 * Returns 1 if output buffer should be emitted.
171 * Returns -1 on error and sets a Python exception.
171 * Returns -1 on error and sets a Python exception.
172 */
172 */
173 int decompress_input(ZstdDecompressionReader* self, ZSTD_outBuffer* output) {
173 int decompress_input(ZstdDecompressionReader* self, ZSTD_outBuffer* output) {
174 size_t zresult;
174 size_t zresult;
175
175
176 if (self->input.pos >= self->input.size) {
176 if (self->input.pos >= self->input.size) {
177 return 0;
177 return 0;
178 }
178 }
179
179
180 Py_BEGIN_ALLOW_THREADS
180 Py_BEGIN_ALLOW_THREADS
181 zresult = ZSTD_decompressStream(self->decompressor->dctx, output, &self->input);
181 zresult = ZSTD_decompressStream(self->decompressor->dctx, output, &self->input);
182 Py_END_ALLOW_THREADS
182 Py_END_ALLOW_THREADS
183
183
184 /* Input exhausted. Clear our state tracking. */
184 /* Input exhausted. Clear our state tracking. */
185 if (self->input.pos == self->input.size) {
185 if (self->input.pos == self->input.size) {
186 memset(&self->input, 0, sizeof(self->input));
186 memset(&self->input, 0, sizeof(self->input));
187 Py_CLEAR(self->readResult);
187 Py_CLEAR(self->readResult);
188
188
189 if (self->buffer.buf) {
189 if (self->buffer.buf) {
190 self->finishedInput = 1;
190 self->finishedInput = 1;
191 }
191 }
192 }
192 }
193
193
194 if (ZSTD_isError(zresult)) {
194 if (ZSTD_isError(zresult)) {
195 PyErr_Format(ZstdError, "zstd decompress error: %s", ZSTD_getErrorName(zresult));
195 PyErr_Format(ZstdError, "zstd decompress error: %s", ZSTD_getErrorName(zresult));
196 return -1;
196 return -1;
197 }
197 }
198
198
199 /* We fulfilled the full read request. Signal to emit. */
199 /* We fulfilled the full read request. Signal to emit. */
200 if (output->pos && output->pos == output->size) {
200 if (output->pos && output->pos == output->size) {
201 return 1;
201 return 1;
202 }
202 }
203 /* We're at the end of a frame and we aren't allowed to return data
203 /* We're at the end of a frame and we aren't allowed to return data
204 spanning frames. */
204 spanning frames. */
205 else if (output->pos && zresult == 0 && !self->readAcrossFrames) {
205 else if (output->pos && zresult == 0 && !self->readAcrossFrames) {
206 return 1;
206 return 1;
207 }
207 }
208
208
209 /* There is more room in the output. Signal to collect more data. */
209 /* There is more room in the output. Signal to collect more data. */
210 return 0;
210 return 0;
211 }
211 }
212
212
213 static PyObject* reader_read(ZstdDecompressionReader* self, PyObject* args, PyObject* kwargs) {
213 static PyObject* reader_read(ZstdDecompressionReader* self, PyObject* args, PyObject* kwargs) {
214 static char* kwlist[] = {
214 static char* kwlist[] = {
215 "size",
215 "size",
216 NULL
216 NULL
217 };
217 };
218
218
219 Py_ssize_t size = -1;
219 Py_ssize_t size = -1;
220 PyObject* result = NULL;
220 PyObject* result = NULL;
221 char* resultBuffer;
221 char* resultBuffer;
222 Py_ssize_t resultSize;
222 Py_ssize_t resultSize;
223 ZSTD_outBuffer output;
223 ZSTD_outBuffer output;
224 int decompressResult, readResult;
224 int decompressResult, readResult;
225
225
226 if (self->closed) {
226 if (self->closed) {
227 PyErr_SetString(PyExc_ValueError, "stream is closed");
227 PyErr_SetString(PyExc_ValueError, "stream is closed");
228 return NULL;
228 return NULL;
229 }
229 }
230
230
231 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
231 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
232 return NULL;
232 return NULL;
233 }
233 }
234
234
235 if (size < -1) {
235 if (size < -1) {
236 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
236 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
237 return NULL;
237 return NULL;
238 }
238 }
239
239
240 if (size == -1) {
240 if (size == -1) {
241 return PyObject_CallMethod((PyObject*)self, "readall", NULL);
241 return PyObject_CallMethod((PyObject*)self, "readall", NULL);
242 }
242 }
243
243
244 if (self->finishedOutput || size == 0) {
244 if (self->finishedOutput || size == 0) {
245 return PyBytes_FromStringAndSize("", 0);
245 return PyBytes_FromStringAndSize("", 0);
246 }
246 }
247
247
248 result = PyBytes_FromStringAndSize(NULL, size);
248 result = PyBytes_FromStringAndSize(NULL, size);
249 if (NULL == result) {
249 if (NULL == result) {
250 return NULL;
250 return NULL;
251 }
251 }
252
252
253 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
253 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
254
254
255 output.dst = resultBuffer;
255 output.dst = resultBuffer;
256 output.size = resultSize;
256 output.size = resultSize;
257 output.pos = 0;
257 output.pos = 0;
258
258
259 readinput:
259 readinput:
260
260
261 decompressResult = decompress_input(self, &output);
261 decompressResult = decompress_input(self, &output);
262
262
263 if (-1 == decompressResult) {
263 if (-1 == decompressResult) {
264 Py_XDECREF(result);
264 Py_XDECREF(result);
265 return NULL;
265 return NULL;
266 }
266 }
267 else if (0 == decompressResult) { }
267 else if (0 == decompressResult) { }
268 else if (1 == decompressResult) {
268 else if (1 == decompressResult) {
269 self->bytesDecompressed += output.pos;
269 self->bytesDecompressed += output.pos;
270
270
271 if (output.pos != output.size) {
271 if (output.pos != output.size) {
272 if (safe_pybytes_resize(&result, output.pos)) {
272 if (safe_pybytes_resize(&result, output.pos)) {
273 Py_XDECREF(result);
273 Py_XDECREF(result);
274 return NULL;
274 return NULL;
275 }
275 }
276 }
276 }
277 return result;
277 return result;
278 }
278 }
279 else {
279 else {
280 assert(0);
280 assert(0);
281 }
281 }
282
282
283 readResult = read_decompressor_input(self);
283 readResult = read_decompressor_input(self);
284
284
285 if (-1 == readResult) {
285 if (-1 == readResult) {
286 Py_XDECREF(result);
286 Py_XDECREF(result);
287 return NULL;
287 return NULL;
288 }
288 }
289 else if (0 == readResult) {}
289 else if (0 == readResult) {}
290 else if (1 == readResult) {}
290 else if (1 == readResult) {}
291 else {
291 else {
292 assert(0);
292 assert(0);
293 }
293 }
294
294
295 if (self->input.size) {
295 if (self->input.size) {
296 goto readinput;
296 goto readinput;
297 }
297 }
298
298
299 /* EOF */
299 /* EOF */
300 self->bytesDecompressed += output.pos;
300 self->bytesDecompressed += output.pos;
301
301
302 if (safe_pybytes_resize(&result, output.pos)) {
302 if (safe_pybytes_resize(&result, output.pos)) {
303 Py_XDECREF(result);
303 Py_XDECREF(result);
304 return NULL;
304 return NULL;
305 }
305 }
306
306
307 return result;
307 return result;
308 }
308 }
309
309
310 static PyObject* reader_read1(ZstdDecompressionReader* self, PyObject* args, PyObject* kwargs) {
310 static PyObject* reader_read1(ZstdDecompressionReader* self, PyObject* args, PyObject* kwargs) {
311 static char* kwlist[] = {
311 static char* kwlist[] = {
312 "size",
312 "size",
313 NULL
313 NULL
314 };
314 };
315
315
316 Py_ssize_t size = -1;
316 Py_ssize_t size = -1;
317 PyObject* result = NULL;
317 PyObject* result = NULL;
318 char* resultBuffer;
318 char* resultBuffer;
319 Py_ssize_t resultSize;
319 Py_ssize_t resultSize;
320 ZSTD_outBuffer output;
320 ZSTD_outBuffer output;
321
321
322 if (self->closed) {
322 if (self->closed) {
323 PyErr_SetString(PyExc_ValueError, "stream is closed");
323 PyErr_SetString(PyExc_ValueError, "stream is closed");
324 return NULL;
324 return NULL;
325 }
325 }
326
326
327 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
327 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|n", kwlist, &size)) {
328 return NULL;
328 return NULL;
329 }
329 }
330
330
331 if (size < -1) {
331 if (size < -1) {
332 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
332 PyErr_SetString(PyExc_ValueError, "cannot read negative amounts less than -1");
333 return NULL;
333 return NULL;
334 }
334 }
335
335
336 if (self->finishedOutput || size == 0) {
336 if (self->finishedOutput || size == 0) {
337 return PyBytes_FromStringAndSize("", 0);
337 return PyBytes_FromStringAndSize("", 0);
338 }
338 }
339
339
340 if (size == -1) {
340 if (size == -1) {
341 size = ZSTD_DStreamOutSize();
341 size = ZSTD_DStreamOutSize();
342 }
342 }
343
343
344 result = PyBytes_FromStringAndSize(NULL, size);
344 result = PyBytes_FromStringAndSize(NULL, size);
345 if (NULL == result) {
345 if (NULL == result) {
346 return NULL;
346 return NULL;
347 }
347 }
348
348
349 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
349 PyBytes_AsStringAndSize(result, &resultBuffer, &resultSize);
350
350
351 output.dst = resultBuffer;
351 output.dst = resultBuffer;
352 output.size = resultSize;
352 output.size = resultSize;
353 output.pos = 0;
353 output.pos = 0;
354
354
355 /* read1() is supposed to use at most 1 read() from the underlying stream.
355 /* read1() is supposed to use at most 1 read() from the underlying stream.
356 * However, we can't satisfy this requirement with decompression due to the
356 * However, we can't satisfy this requirement with decompression due to the
357 * nature of how decompression works. Our strategy is to read + decompress
357 * nature of how decompression works. Our strategy is to read + decompress
358 * until we get any output, at which point we return. This satisfies the
358 * until we get any output, at which point we return. This satisfies the
359 * intent of the read1() API to limit read operations.
359 * intent of the read1() API to limit read operations.
360 */
360 */
361 while (!self->finishedInput) {
361 while (!self->finishedInput) {
362 int readResult, decompressResult;
362 int readResult, decompressResult;
363
363
364 readResult = read_decompressor_input(self);
364 readResult = read_decompressor_input(self);
365 if (-1 == readResult) {
365 if (-1 == readResult) {
366 Py_XDECREF(result);
366 Py_XDECREF(result);
367 return NULL;
367 return NULL;
368 }
368 }
369 else if (0 == readResult || 1 == readResult) { }
369 else if (0 == readResult || 1 == readResult) { }
370 else {
370 else {
371 assert(0);
371 assert(0);
372 }
372 }
373
373
374 decompressResult = decompress_input(self, &output);
374 decompressResult = decompress_input(self, &output);
375
375
376 if (-1 == decompressResult) {
376 if (-1 == decompressResult) {
377 Py_XDECREF(result);
377 Py_XDECREF(result);
378 return NULL;
378 return NULL;
379 }
379 }
380 else if (0 == decompressResult || 1 == decompressResult) { }
380 else if (0 == decompressResult || 1 == decompressResult) { }
381 else {
381 else {
382 assert(0);
382 assert(0);
383 }
383 }
384
384
385 if (output.pos) {
385 if (output.pos) {
386 break;
386 break;
387 }
387 }
388 }
388 }
389
389
390 self->bytesDecompressed += output.pos;
390 self->bytesDecompressed += output.pos;
391 if (safe_pybytes_resize(&result, output.pos)) {
391 if (safe_pybytes_resize(&result, output.pos)) {
392 Py_XDECREF(result);
392 Py_XDECREF(result);
393 return NULL;
393 return NULL;
394 }
394 }
395
395
396 return result;
396 return result;
397 }
397 }
398
398
399 static PyObject* reader_readinto(ZstdDecompressionReader* self, PyObject* args) {
399 static PyObject* reader_readinto(ZstdDecompressionReader* self, PyObject* args) {
400 Py_buffer dest;
400 Py_buffer dest;
401 ZSTD_outBuffer output;
401 ZSTD_outBuffer output;
402 int decompressResult, readResult;
402 int decompressResult, readResult;
403 PyObject* result = NULL;
403 PyObject* result = NULL;
404
404
405 if (self->closed) {
405 if (self->closed) {
406 PyErr_SetString(PyExc_ValueError, "stream is closed");
406 PyErr_SetString(PyExc_ValueError, "stream is closed");
407 return NULL;
407 return NULL;
408 }
408 }
409
409
410 if (self->finishedOutput) {
410 if (self->finishedOutput) {
411 return PyLong_FromLong(0);
411 return PyLong_FromLong(0);
412 }
412 }
413
413
414 if (!PyArg_ParseTuple(args, "w*:readinto", &dest)) {
414 if (!PyArg_ParseTuple(args, "w*:readinto", &dest)) {
415 return NULL;
415 return NULL;
416 }
416 }
417
417
418 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
418 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
419 PyErr_SetString(PyExc_ValueError,
419 PyErr_SetString(PyExc_ValueError,
420 "destination buffer should be contiguous and have at most one dimension");
420 "destination buffer should be contiguous and have at most one dimension");
421 goto finally;
421 goto finally;
422 }
422 }
423
423
424 output.dst = dest.buf;
424 output.dst = dest.buf;
425 output.size = dest.len;
425 output.size = dest.len;
426 output.pos = 0;
426 output.pos = 0;
427
427
428 readinput:
428 readinput:
429
429
430 decompressResult = decompress_input(self, &output);
430 decompressResult = decompress_input(self, &output);
431
431
432 if (-1 == decompressResult) {
432 if (-1 == decompressResult) {
433 goto finally;
433 goto finally;
434 }
434 }
435 else if (0 == decompressResult) { }
435 else if (0 == decompressResult) { }
436 else if (1 == decompressResult) {
436 else if (1 == decompressResult) {
437 self->bytesDecompressed += output.pos;
437 self->bytesDecompressed += output.pos;
438 result = PyLong_FromSize_t(output.pos);
438 result = PyLong_FromSize_t(output.pos);
439 goto finally;
439 goto finally;
440 }
440 }
441 else {
441 else {
442 assert(0);
442 assert(0);
443 }
443 }
444
444
445 readResult = read_decompressor_input(self);
445 readResult = read_decompressor_input(self);
446
446
447 if (-1 == readResult) {
447 if (-1 == readResult) {
448 goto finally;
448 goto finally;
449 }
449 }
450 else if (0 == readResult) {}
450 else if (0 == readResult) {}
451 else if (1 == readResult) {}
451 else if (1 == readResult) {}
452 else {
452 else {
453 assert(0);
453 assert(0);
454 }
454 }
455
455
456 if (self->input.size) {
456 if (self->input.size) {
457 goto readinput;
457 goto readinput;
458 }
458 }
459
459
460 /* EOF */
460 /* EOF */
461 self->bytesDecompressed += output.pos;
461 self->bytesDecompressed += output.pos;
462 result = PyLong_FromSize_t(output.pos);
462 result = PyLong_FromSize_t(output.pos);
463
463
464 finally:
464 finally:
465 PyBuffer_Release(&dest);
465 PyBuffer_Release(&dest);
466
466
467 return result;
467 return result;
468 }
468 }
469
469
470 static PyObject* reader_readinto1(ZstdDecompressionReader* self, PyObject* args) {
470 static PyObject* reader_readinto1(ZstdDecompressionReader* self, PyObject* args) {
471 Py_buffer dest;
471 Py_buffer dest;
472 ZSTD_outBuffer output;
472 ZSTD_outBuffer output;
473 PyObject* result = NULL;
473 PyObject* result = NULL;
474
474
475 if (self->closed) {
475 if (self->closed) {
476 PyErr_SetString(PyExc_ValueError, "stream is closed");
476 PyErr_SetString(PyExc_ValueError, "stream is closed");
477 return NULL;
477 return NULL;
478 }
478 }
479
479
480 if (self->finishedOutput) {
480 if (self->finishedOutput) {
481 return PyLong_FromLong(0);
481 return PyLong_FromLong(0);
482 }
482 }
483
483
484 if (!PyArg_ParseTuple(args, "w*:readinto1", &dest)) {
484 if (!PyArg_ParseTuple(args, "w*:readinto1", &dest)) {
485 return NULL;
485 return NULL;
486 }
486 }
487
487
488 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
488 if (!PyBuffer_IsContiguous(&dest, 'C') || dest.ndim > 1) {
489 PyErr_SetString(PyExc_ValueError,
489 PyErr_SetString(PyExc_ValueError,
490 "destination buffer should be contiguous and have at most one dimension");
490 "destination buffer should be contiguous and have at most one dimension");
491 goto finally;
491 goto finally;
492 }
492 }
493
493
494 output.dst = dest.buf;
494 output.dst = dest.buf;
495 output.size = dest.len;
495 output.size = dest.len;
496 output.pos = 0;
496 output.pos = 0;
497
497
498 while (!self->finishedInput && !self->finishedOutput) {
498 while (!self->finishedInput && !self->finishedOutput) {
499 int decompressResult, readResult;
499 int decompressResult, readResult;
500
500
501 readResult = read_decompressor_input(self);
501 readResult = read_decompressor_input(self);
502
502
503 if (-1 == readResult) {
503 if (-1 == readResult) {
504 goto finally;
504 goto finally;
505 }
505 }
506 else if (0 == readResult || 1 == readResult) {}
506 else if (0 == readResult || 1 == readResult) {}
507 else {
507 else {
508 assert(0);
508 assert(0);
509 }
509 }
510
510
511 decompressResult = decompress_input(self, &output);
511 decompressResult = decompress_input(self, &output);
512
512
513 if (-1 == decompressResult) {
513 if (-1 == decompressResult) {
514 goto finally;
514 goto finally;
515 }
515 }
516 else if (0 == decompressResult || 1 == decompressResult) {}
516 else if (0 == decompressResult || 1 == decompressResult) {}
517 else {
517 else {
518 assert(0);
518 assert(0);
519 }
519 }
520
520
521 if (output.pos) {
521 if (output.pos) {
522 break;
522 break;
523 }
523 }
524 }
524 }
525
525
526 self->bytesDecompressed += output.pos;
526 self->bytesDecompressed += output.pos;
527 result = PyLong_FromSize_t(output.pos);
527 result = PyLong_FromSize_t(output.pos);
528
528
529 finally:
529 finally:
530 PyBuffer_Release(&dest);
530 PyBuffer_Release(&dest);
531
531
532 return result;
532 return result;
533 }
533 }
534
534
535 static PyObject* reader_readall(PyObject* self) {
535 static PyObject* reader_readall(PyObject* self) {
536 PyObject* chunks = NULL;
536 PyObject* chunks = NULL;
537 PyObject* empty = NULL;
537 PyObject* empty = NULL;
538 PyObject* result = NULL;
538 PyObject* result = NULL;
539
539
540 /* Our strategy is to collect chunks into a list then join all the
540 /* Our strategy is to collect chunks into a list then join all the
541 * chunks at the end. We could potentially use e.g. an io.BytesIO. But
541 * chunks at the end. We could potentially use e.g. an io.BytesIO. But
542 * this feels simple enough to implement and avoids potentially expensive
542 * this feels simple enough to implement and avoids potentially expensive
543 * reallocations of large buffers.
543 * reallocations of large buffers.
544 */
544 */
545 chunks = PyList_New(0);
545 chunks = PyList_New(0);
546 if (NULL == chunks) {
546 if (NULL == chunks) {
547 return NULL;
547 return NULL;
548 }
548 }
549
549
550 while (1) {
550 while (1) {
551 PyObject* chunk = PyObject_CallMethod(self, "read", "i", 1048576);
551 PyObject* chunk = PyObject_CallMethod(self, "read", "i", 1048576);
552 if (NULL == chunk) {
552 if (NULL == chunk) {
553 Py_DECREF(chunks);
553 Py_DECREF(chunks);
554 return NULL;
554 return NULL;
555 }
555 }
556
556
557 if (!PyBytes_Size(chunk)) {
557 if (!PyBytes_Size(chunk)) {
558 Py_DECREF(chunk);
558 Py_DECREF(chunk);
559 break;
559 break;
560 }
560 }
561
561
562 if (PyList_Append(chunks, chunk)) {
562 if (PyList_Append(chunks, chunk)) {
563 Py_DECREF(chunk);
563 Py_DECREF(chunk);
564 Py_DECREF(chunks);
564 Py_DECREF(chunks);
565 return NULL;
565 return NULL;
566 }
566 }
567
567
568 Py_DECREF(chunk);
568 Py_DECREF(chunk);
569 }
569 }
570
570
571 empty = PyBytes_FromStringAndSize("", 0);
571 empty = PyBytes_FromStringAndSize("", 0);
572 if (NULL == empty) {
572 if (NULL == empty) {
573 Py_DECREF(chunks);
573 Py_DECREF(chunks);
574 return NULL;
574 return NULL;
575 }
575 }
576
576
577 result = PyObject_CallMethod(empty, "join", "O", chunks);
577 result = PyObject_CallMethod(empty, "join", "O", chunks);
578
578
579 Py_DECREF(empty);
579 Py_DECREF(empty);
580 Py_DECREF(chunks);
580 Py_DECREF(chunks);
581
581
582 return result;
582 return result;
583 }
583 }
584
584
585 static PyObject* reader_readline(PyObject* self) {
585 static PyObject* reader_readline(PyObject* self) {
586 set_unsupported_operation();
586 set_unsupported_operation();
587 return NULL;
587 return NULL;
588 }
588 }
589
589
590 static PyObject* reader_readlines(PyObject* self) {
590 static PyObject* reader_readlines(PyObject* self) {
591 set_unsupported_operation();
591 set_unsupported_operation();
592 return NULL;
592 return NULL;
593 }
593 }
594
594
595 static PyObject* reader_seek(ZstdDecompressionReader* self, PyObject* args) {
595 static PyObject* reader_seek(ZstdDecompressionReader* self, PyObject* args) {
596 Py_ssize_t pos;
596 Py_ssize_t pos;
597 int whence = 0;
597 int whence = 0;
598 unsigned long long readAmount = 0;
598 unsigned long long readAmount = 0;
599 size_t defaultOutSize = ZSTD_DStreamOutSize();
599 size_t defaultOutSize = ZSTD_DStreamOutSize();
600
600
601 if (self->closed) {
601 if (self->closed) {
602 PyErr_SetString(PyExc_ValueError, "stream is closed");
602 PyErr_SetString(PyExc_ValueError, "stream is closed");
603 return NULL;
603 return NULL;
604 }
604 }
605
605
606 if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &whence)) {
606 if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &whence)) {
607 return NULL;
607 return NULL;
608 }
608 }
609
609
610 if (whence == SEEK_SET) {
610 if (whence == SEEK_SET) {
611 if (pos < 0) {
611 if (pos < 0) {
612 PyErr_SetString(PyExc_ValueError,
612 PyErr_SetString(PyExc_ValueError,
613 "cannot seek to negative position with SEEK_SET");
613 "cannot seek to negative position with SEEK_SET");
614 return NULL;
614 return NULL;
615 }
615 }
616
616
617 if ((unsigned long long)pos < self->bytesDecompressed) {
617 if ((unsigned long long)pos < self->bytesDecompressed) {
618 PyErr_SetString(PyExc_ValueError,
618 PyErr_SetString(PyExc_ValueError,
619 "cannot seek zstd decompression stream backwards");
619 "cannot seek zstd decompression stream backwards");
620 return NULL;
620 return NULL;
621 }
621 }
622
622
623 readAmount = pos - self->bytesDecompressed;
623 readAmount = pos - self->bytesDecompressed;
624 }
624 }
625 else if (whence == SEEK_CUR) {
625 else if (whence == SEEK_CUR) {
626 if (pos < 0) {
626 if (pos < 0) {
627 PyErr_SetString(PyExc_ValueError,
627 PyErr_SetString(PyExc_ValueError,
628 "cannot seek zstd decompression stream backwards");
628 "cannot seek zstd decompression stream backwards");
629 return NULL;
629 return NULL;
630 }
630 }
631
631
632 readAmount = pos;
632 readAmount = pos;
633 }
633 }
634 else if (whence == SEEK_END) {
634 else if (whence == SEEK_END) {
635 /* We /could/ support this with pos==0. But let's not do that until someone
635 /* We /could/ support this with pos==0. But let's not do that until someone
636 needs it. */
636 needs it. */
637 PyErr_SetString(PyExc_ValueError,
637 PyErr_SetString(PyExc_ValueError,
638 "zstd decompression streams cannot be seeked with SEEK_END");
638 "zstd decompression streams cannot be seeked with SEEK_END");
639 return NULL;
639 return NULL;
640 }
640 }
641
641
642 /* It is a bit inefficient to do this via the Python API. But since there
642 /* It is a bit inefficient to do this via the Python API. But since there
643 is a bit of state tracking involved to read from this type, it is the
643 is a bit of state tracking involved to read from this type, it is the
644 easiest to implement. */
644 easiest to implement. */
645 while (readAmount) {
645 while (readAmount) {
646 Py_ssize_t readSize;
646 Py_ssize_t readSize;
647 PyObject* readResult = PyObject_CallMethod((PyObject*)self, "read", "K",
647 PyObject* readResult = PyObject_CallMethod((PyObject*)self, "read", "K",
648 readAmount < defaultOutSize ? readAmount : defaultOutSize);
648 readAmount < defaultOutSize ? readAmount : defaultOutSize);
649
649
650 if (!readResult) {
650 if (!readResult) {
651 return NULL;
651 return NULL;
652 }
652 }
653
653
654 readSize = PyBytes_GET_SIZE(readResult);
654 readSize = PyBytes_GET_SIZE(readResult);
655
655
656 Py_CLEAR(readResult);
657
656 /* Empty read means EOF. */
658 /* Empty read means EOF. */
657 if (!readSize) {
659 if (!readSize) {
658 break;
660 break;
659 }
661 }
660
662
661 readAmount -= readSize;
663 readAmount -= readSize;
662 }
664 }
663
665
664 return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
666 return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
665 }
667 }
666
668
667 static PyObject* reader_tell(ZstdDecompressionReader* self) {
669 static PyObject* reader_tell(ZstdDecompressionReader* self) {
668 /* TODO should this raise OSError since stream isn't seekable? */
670 /* TODO should this raise OSError since stream isn't seekable? */
669 return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
671 return PyLong_FromUnsignedLongLong(self->bytesDecompressed);
670 }
672 }
671
673
672 static PyObject* reader_write(PyObject* self, PyObject* args) {
674 static PyObject* reader_write(PyObject* self, PyObject* args) {
673 set_unsupported_operation();
675 set_unsupported_operation();
674 return NULL;
676 return NULL;
675 }
677 }
676
678
677 static PyObject* reader_writelines(PyObject* self, PyObject* args) {
679 static PyObject* reader_writelines(PyObject* self, PyObject* args) {
678 set_unsupported_operation();
680 set_unsupported_operation();
679 return NULL;
681 return NULL;
680 }
682 }
681
683
682 static PyObject* reader_iter(PyObject* self) {
684 static PyObject* reader_iter(PyObject* self) {
683 set_unsupported_operation();
685 set_unsupported_operation();
684 return NULL;
686 return NULL;
685 }
687 }
686
688
687 static PyObject* reader_iternext(PyObject* self) {
689 static PyObject* reader_iternext(PyObject* self) {
688 set_unsupported_operation();
690 set_unsupported_operation();
689 return NULL;
691 return NULL;
690 }
692 }
691
693
692 static PyMethodDef reader_methods[] = {
694 static PyMethodDef reader_methods[] = {
693 { "__enter__", (PyCFunction)reader_enter, METH_NOARGS,
695 { "__enter__", (PyCFunction)reader_enter, METH_NOARGS,
694 PyDoc_STR("Enter a compression context") },
696 PyDoc_STR("Enter a compression context") },
695 { "__exit__", (PyCFunction)reader_exit, METH_VARARGS,
697 { "__exit__", (PyCFunction)reader_exit, METH_VARARGS,
696 PyDoc_STR("Exit a compression context") },
698 PyDoc_STR("Exit a compression context") },
697 { "close", (PyCFunction)reader_close, METH_NOARGS,
699 { "close", (PyCFunction)reader_close, METH_NOARGS,
698 PyDoc_STR("Close the stream so it cannot perform any more operations") },
700 PyDoc_STR("Close the stream so it cannot perform any more operations") },
699 { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
701 { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
700 { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
702 { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
701 { "readable", (PyCFunction)reader_readable, METH_NOARGS,
703 { "readable", (PyCFunction)reader_readable, METH_NOARGS,
702 PyDoc_STR("Returns True") },
704 PyDoc_STR("Returns True") },
703 { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS,
705 { "read", (PyCFunction)reader_read, METH_VARARGS | METH_KEYWORDS,
704 PyDoc_STR("read compressed data") },
706 PyDoc_STR("read compressed data") },
705 { "read1", (PyCFunction)reader_read1, METH_VARARGS | METH_KEYWORDS,
707 { "read1", (PyCFunction)reader_read1, METH_VARARGS | METH_KEYWORDS,
706 PyDoc_STR("read compressed data") },
708 PyDoc_STR("read compressed data") },
707 { "readinto", (PyCFunction)reader_readinto, METH_VARARGS, NULL },
709 { "readinto", (PyCFunction)reader_readinto, METH_VARARGS, NULL },
708 { "readinto1", (PyCFunction)reader_readinto1, METH_VARARGS, NULL },
710 { "readinto1", (PyCFunction)reader_readinto1, METH_VARARGS, NULL },
709 { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
711 { "readall", (PyCFunction)reader_readall, METH_NOARGS, PyDoc_STR("Not implemented") },
710 { "readline", (PyCFunction)reader_readline, METH_NOARGS, PyDoc_STR("Not implemented") },
712 { "readline", (PyCFunction)reader_readline, METH_NOARGS, PyDoc_STR("Not implemented") },
711 { "readlines", (PyCFunction)reader_readlines, METH_NOARGS, PyDoc_STR("Not implemented") },
713 { "readlines", (PyCFunction)reader_readlines, METH_NOARGS, PyDoc_STR("Not implemented") },
712 { "seek", (PyCFunction)reader_seek, METH_VARARGS, PyDoc_STR("Seek the stream") },
714 { "seek", (PyCFunction)reader_seek, METH_VARARGS, PyDoc_STR("Seek the stream") },
713 { "seekable", (PyCFunction)reader_seekable, METH_NOARGS,
715 { "seekable", (PyCFunction)reader_seekable, METH_NOARGS,
714 PyDoc_STR("Returns True") },
716 PyDoc_STR("Returns True") },
715 { "tell", (PyCFunction)reader_tell, METH_NOARGS,
717 { "tell", (PyCFunction)reader_tell, METH_NOARGS,
716 PyDoc_STR("Returns current number of bytes compressed") },
718 PyDoc_STR("Returns current number of bytes compressed") },
717 { "writable", (PyCFunction)reader_writable, METH_NOARGS,
719 { "writable", (PyCFunction)reader_writable, METH_NOARGS,
718 PyDoc_STR("Returns False") },
720 PyDoc_STR("Returns False") },
719 { "write", (PyCFunction)reader_write, METH_VARARGS, PyDoc_STR("unsupported operation") },
721 { "write", (PyCFunction)reader_write, METH_VARARGS, PyDoc_STR("unsupported operation") },
720 { "writelines", (PyCFunction)reader_writelines, METH_VARARGS, PyDoc_STR("unsupported operation") },
722 { "writelines", (PyCFunction)reader_writelines, METH_VARARGS, PyDoc_STR("unsupported operation") },
721 { NULL, NULL }
723 { NULL, NULL }
722 };
724 };
723
725
724 static PyMemberDef reader_members[] = {
726 static PyMemberDef reader_members[] = {
725 { "closed", T_BOOL, offsetof(ZstdDecompressionReader, closed),
727 { "closed", T_BOOL, offsetof(ZstdDecompressionReader, closed),
726 READONLY, "whether stream is closed" },
728 READONLY, "whether stream is closed" },
727 { NULL }
729 { NULL }
728 };
730 };
729
731
730 PyTypeObject ZstdDecompressionReaderType = {
732 PyTypeObject ZstdDecompressionReaderType = {
731 PyVarObject_HEAD_INIT(NULL, 0)
733 PyVarObject_HEAD_INIT(NULL, 0)
732 "zstd.ZstdDecompressionReader", /* tp_name */
734 "zstd.ZstdDecompressionReader", /* tp_name */
733 sizeof(ZstdDecompressionReader), /* tp_basicsize */
735 sizeof(ZstdDecompressionReader), /* tp_basicsize */
734 0, /* tp_itemsize */
736 0, /* tp_itemsize */
735 (destructor)reader_dealloc, /* tp_dealloc */
737 (destructor)reader_dealloc, /* tp_dealloc */
736 0, /* tp_print */
738 0, /* tp_print */
737 0, /* tp_getattr */
739 0, /* tp_getattr */
738 0, /* tp_setattr */
740 0, /* tp_setattr */
739 0, /* tp_compare */
741 0, /* tp_compare */
740 0, /* tp_repr */
742 0, /* tp_repr */
741 0, /* tp_as_number */
743 0, /* tp_as_number */
742 0, /* tp_as_sequence */
744 0, /* tp_as_sequence */
743 0, /* tp_as_mapping */
745 0, /* tp_as_mapping */
744 0, /* tp_hash */
746 0, /* tp_hash */
745 0, /* tp_call */
747 0, /* tp_call */
746 0, /* tp_str */
748 0, /* tp_str */
747 0, /* tp_getattro */
749 0, /* tp_getattro */
748 0, /* tp_setattro */
750 0, /* tp_setattro */
749 0, /* tp_as_buffer */
751 0, /* tp_as_buffer */
750 Py_TPFLAGS_DEFAULT, /* tp_flags */
752 Py_TPFLAGS_DEFAULT, /* tp_flags */
751 0, /* tp_doc */
753 0, /* tp_doc */
752 0, /* tp_traverse */
754 0, /* tp_traverse */
753 0, /* tp_clear */
755 0, /* tp_clear */
754 0, /* tp_richcompare */
756 0, /* tp_richcompare */
755 0, /* tp_weaklistoffset */
757 0, /* tp_weaklistoffset */
756 reader_iter, /* tp_iter */
758 reader_iter, /* tp_iter */
757 reader_iternext, /* tp_iternext */
759 reader_iternext, /* tp_iternext */
758 reader_methods, /* tp_methods */
760 reader_methods, /* tp_methods */
759 reader_members, /* tp_members */
761 reader_members, /* tp_members */
760 0, /* tp_getset */
762 0, /* tp_getset */
761 0, /* tp_base */
763 0, /* tp_base */
762 0, /* tp_dict */
764 0, /* tp_dict */
763 0, /* tp_descr_get */
765 0, /* tp_descr_get */
764 0, /* tp_descr_set */
766 0, /* tp_descr_set */
765 0, /* tp_dictoffset */
767 0, /* tp_dictoffset */
766 0, /* tp_init */
768 0, /* tp_init */
767 0, /* tp_alloc */
769 0, /* tp_alloc */
768 PyType_GenericNew, /* tp_new */
770 PyType_GenericNew, /* tp_new */
769 };
771 };
770
772
771
773
772 void decompressionreader_module_init(PyObject* mod) {
774 void decompressionreader_module_init(PyObject* mod) {
773 /* TODO make reader a sub-class of io.RawIOBase */
775 /* TODO make reader a sub-class of io.RawIOBase */
774
776
775 Py_TYPE(&ZstdDecompressionReaderType) = &PyType_Type;
777 Py_TYPE(&ZstdDecompressionReaderType) = &PyType_Type;
776 if (PyType_Ready(&ZstdDecompressionReaderType) < 0) {
778 if (PyType_Ready(&ZstdDecompressionReaderType) < 0) {
777 return;
779 return;
778 }
780 }
779 }
781 }
@@ -1,359 +1,359 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 #define PY_SSIZE_T_CLEAN
9 #define PY_SSIZE_T_CLEAN
10 #include <Python.h>
10 #include <Python.h>
11 #include "structmember.h"
11 #include "structmember.h"
12
12
13 #define ZSTD_STATIC_LINKING_ONLY
13 #define ZSTD_STATIC_LINKING_ONLY
14 #define ZDICT_STATIC_LINKING_ONLY
14 #define ZDICT_STATIC_LINKING_ONLY
15 #include <zstd.h>
15 #include <zstd.h>
16 #include <zdict.h>
16 #include <zdict.h>
17
17
18 /* Remember to change the string in zstandard/__init__ as well */
18 /* Remember to change the string in zstandard/__init__ as well */
19 #define PYTHON_ZSTANDARD_VERSION "0.11.0"
19 #define PYTHON_ZSTANDARD_VERSION "0.12.0"
20
20
21 typedef enum {
21 typedef enum {
22 compressorobj_flush_finish,
22 compressorobj_flush_finish,
23 compressorobj_flush_block,
23 compressorobj_flush_block,
24 } CompressorObj_Flush;
24 } CompressorObj_Flush;
25
25
26 /*
26 /*
27 Represents a ZstdCompressionParameters type.
27 Represents a ZstdCompressionParameters type.
28
28
29 This type holds all the low-level compression parameters that can be set.
29 This type holds all the low-level compression parameters that can be set.
30 */
30 */
31 typedef struct {
31 typedef struct {
32 PyObject_HEAD
32 PyObject_HEAD
33 ZSTD_CCtx_params* params;
33 ZSTD_CCtx_params* params;
34 } ZstdCompressionParametersObject;
34 } ZstdCompressionParametersObject;
35
35
36 extern PyTypeObject ZstdCompressionParametersType;
36 extern PyTypeObject ZstdCompressionParametersType;
37
37
38 /*
38 /*
39 Represents a FrameParameters type.
39 Represents a FrameParameters type.
40
40
41 This type is basically a wrapper around ZSTD_frameParams.
41 This type is basically a wrapper around ZSTD_frameParams.
42 */
42 */
43 typedef struct {
43 typedef struct {
44 PyObject_HEAD
44 PyObject_HEAD
45 unsigned long long frameContentSize;
45 unsigned long long frameContentSize;
46 unsigned long long windowSize;
46 unsigned long long windowSize;
47 unsigned dictID;
47 unsigned dictID;
48 char checksumFlag;
48 char checksumFlag;
49 } FrameParametersObject;
49 } FrameParametersObject;
50
50
51 extern PyTypeObject FrameParametersType;
51 extern PyTypeObject FrameParametersType;
52
52
53 /*
53 /*
54 Represents a ZstdCompressionDict type.
54 Represents a ZstdCompressionDict type.
55
55
56 Instances hold data used for a zstd compression dictionary.
56 Instances hold data used for a zstd compression dictionary.
57 */
57 */
58 typedef struct {
58 typedef struct {
59 PyObject_HEAD
59 PyObject_HEAD
60
60
61 /* Pointer to dictionary data. Owned by self. */
61 /* Pointer to dictionary data. Owned by self. */
62 void* dictData;
62 void* dictData;
63 /* Size of dictionary data. */
63 /* Size of dictionary data. */
64 size_t dictSize;
64 size_t dictSize;
65 ZSTD_dictContentType_e dictType;
65 ZSTD_dictContentType_e dictType;
66 /* k parameter for cover dictionaries. Only populated by train_cover_dict(). */
66 /* k parameter for cover dictionaries. Only populated by train_cover_dict(). */
67 unsigned k;
67 unsigned k;
68 /* d parameter for cover dictionaries. Only populated by train_cover_dict(). */
68 /* d parameter for cover dictionaries. Only populated by train_cover_dict(). */
69 unsigned d;
69 unsigned d;
70 /* Digested dictionary, suitable for reuse. */
70 /* Digested dictionary, suitable for reuse. */
71 ZSTD_CDict* cdict;
71 ZSTD_CDict* cdict;
72 ZSTD_DDict* ddict;
72 ZSTD_DDict* ddict;
73 } ZstdCompressionDict;
73 } ZstdCompressionDict;
74
74
75 extern PyTypeObject ZstdCompressionDictType;
75 extern PyTypeObject ZstdCompressionDictType;
76
76
77 /*
77 /*
78 Represents a ZstdCompressor type.
78 Represents a ZstdCompressor type.
79 */
79 */
80 typedef struct {
80 typedef struct {
81 PyObject_HEAD
81 PyObject_HEAD
82
82
83 /* Number of threads to use for operations. */
83 /* Number of threads to use for operations. */
84 unsigned int threads;
84 unsigned int threads;
85 /* Pointer to compression dictionary to use. NULL if not using dictionary
85 /* Pointer to compression dictionary to use. NULL if not using dictionary
86 compression. */
86 compression. */
87 ZstdCompressionDict* dict;
87 ZstdCompressionDict* dict;
88 /* Compression context to use. Populated during object construction. */
88 /* Compression context to use. Populated during object construction. */
89 ZSTD_CCtx* cctx;
89 ZSTD_CCtx* cctx;
90 /* Compression parameters in use. */
90 /* Compression parameters in use. */
91 ZSTD_CCtx_params* params;
91 ZSTD_CCtx_params* params;
92 } ZstdCompressor;
92 } ZstdCompressor;
93
93
94 extern PyTypeObject ZstdCompressorType;
94 extern PyTypeObject ZstdCompressorType;
95
95
96 typedef struct {
96 typedef struct {
97 PyObject_HEAD
97 PyObject_HEAD
98
98
99 ZstdCompressor* compressor;
99 ZstdCompressor* compressor;
100 ZSTD_outBuffer output;
100 ZSTD_outBuffer output;
101 int finished;
101 int finished;
102 } ZstdCompressionObj;
102 } ZstdCompressionObj;
103
103
104 extern PyTypeObject ZstdCompressionObjType;
104 extern PyTypeObject ZstdCompressionObjType;
105
105
106 typedef struct {
106 typedef struct {
107 PyObject_HEAD
107 PyObject_HEAD
108
108
109 ZstdCompressor* compressor;
109 ZstdCompressor* compressor;
110 PyObject* writer;
110 PyObject* writer;
111 ZSTD_outBuffer output;
111 ZSTD_outBuffer output;
112 size_t outSize;
112 size_t outSize;
113 int entered;
113 int entered;
114 int closed;
114 int closed;
115 int writeReturnRead;
115 int writeReturnRead;
116 unsigned long long bytesCompressed;
116 unsigned long long bytesCompressed;
117 } ZstdCompressionWriter;
117 } ZstdCompressionWriter;
118
118
119 extern PyTypeObject ZstdCompressionWriterType;
119 extern PyTypeObject ZstdCompressionWriterType;
120
120
121 typedef struct {
121 typedef struct {
122 PyObject_HEAD
122 PyObject_HEAD
123
123
124 ZstdCompressor* compressor;
124 ZstdCompressor* compressor;
125 PyObject* reader;
125 PyObject* reader;
126 Py_buffer buffer;
126 Py_buffer buffer;
127 Py_ssize_t bufferOffset;
127 Py_ssize_t bufferOffset;
128 size_t inSize;
128 size_t inSize;
129 size_t outSize;
129 size_t outSize;
130
130
131 ZSTD_inBuffer input;
131 ZSTD_inBuffer input;
132 ZSTD_outBuffer output;
132 ZSTD_outBuffer output;
133 int finishedOutput;
133 int finishedOutput;
134 int finishedInput;
134 int finishedInput;
135 PyObject* readResult;
135 PyObject* readResult;
136 } ZstdCompressorIterator;
136 } ZstdCompressorIterator;
137
137
138 extern PyTypeObject ZstdCompressorIteratorType;
138 extern PyTypeObject ZstdCompressorIteratorType;
139
139
140 typedef struct {
140 typedef struct {
141 PyObject_HEAD
141 PyObject_HEAD
142
142
143 ZstdCompressor* compressor;
143 ZstdCompressor* compressor;
144 PyObject* reader;
144 PyObject* reader;
145 Py_buffer buffer;
145 Py_buffer buffer;
146 size_t readSize;
146 size_t readSize;
147
147
148 int entered;
148 int entered;
149 int closed;
149 int closed;
150 unsigned long long bytesCompressed;
150 unsigned long long bytesCompressed;
151
151
152 ZSTD_inBuffer input;
152 ZSTD_inBuffer input;
153 ZSTD_outBuffer output;
153 ZSTD_outBuffer output;
154 int finishedInput;
154 int finishedInput;
155 int finishedOutput;
155 int finishedOutput;
156 PyObject* readResult;
156 PyObject* readResult;
157 } ZstdCompressionReader;
157 } ZstdCompressionReader;
158
158
159 extern PyTypeObject ZstdCompressionReaderType;
159 extern PyTypeObject ZstdCompressionReaderType;
160
160
161 typedef struct {
161 typedef struct {
162 PyObject_HEAD
162 PyObject_HEAD
163
163
164 ZstdCompressor* compressor;
164 ZstdCompressor* compressor;
165 ZSTD_inBuffer input;
165 ZSTD_inBuffer input;
166 ZSTD_outBuffer output;
166 ZSTD_outBuffer output;
167 Py_buffer inBuffer;
167 Py_buffer inBuffer;
168 int finished;
168 int finished;
169 size_t chunkSize;
169 size_t chunkSize;
170 } ZstdCompressionChunker;
170 } ZstdCompressionChunker;
171
171
172 extern PyTypeObject ZstdCompressionChunkerType;
172 extern PyTypeObject ZstdCompressionChunkerType;
173
173
174 typedef enum {
174 typedef enum {
175 compressionchunker_mode_normal,
175 compressionchunker_mode_normal,
176 compressionchunker_mode_flush,
176 compressionchunker_mode_flush,
177 compressionchunker_mode_finish,
177 compressionchunker_mode_finish,
178 } CompressionChunkerMode;
178 } CompressionChunkerMode;
179
179
180 typedef struct {
180 typedef struct {
181 PyObject_HEAD
181 PyObject_HEAD
182
182
183 ZstdCompressionChunker* chunker;
183 ZstdCompressionChunker* chunker;
184 CompressionChunkerMode mode;
184 CompressionChunkerMode mode;
185 } ZstdCompressionChunkerIterator;
185 } ZstdCompressionChunkerIterator;
186
186
187 extern PyTypeObject ZstdCompressionChunkerIteratorType;
187 extern PyTypeObject ZstdCompressionChunkerIteratorType;
188
188
189 typedef struct {
189 typedef struct {
190 PyObject_HEAD
190 PyObject_HEAD
191
191
192 ZSTD_DCtx* dctx;
192 ZSTD_DCtx* dctx;
193 ZstdCompressionDict* dict;
193 ZstdCompressionDict* dict;
194 size_t maxWindowSize;
194 size_t maxWindowSize;
195 ZSTD_format_e format;
195 ZSTD_format_e format;
196 } ZstdDecompressor;
196 } ZstdDecompressor;
197
197
198 extern PyTypeObject ZstdDecompressorType;
198 extern PyTypeObject ZstdDecompressorType;
199
199
200 typedef struct {
200 typedef struct {
201 PyObject_HEAD
201 PyObject_HEAD
202
202
203 ZstdDecompressor* decompressor;
203 ZstdDecompressor* decompressor;
204 size_t outSize;
204 size_t outSize;
205 int finished;
205 int finished;
206 } ZstdDecompressionObj;
206 } ZstdDecompressionObj;
207
207
208 extern PyTypeObject ZstdDecompressionObjType;
208 extern PyTypeObject ZstdDecompressionObjType;
209
209
210 typedef struct {
210 typedef struct {
211 PyObject_HEAD
211 PyObject_HEAD
212
212
213 /* Parent decompressor to which this object is associated. */
213 /* Parent decompressor to which this object is associated. */
214 ZstdDecompressor* decompressor;
214 ZstdDecompressor* decompressor;
215 /* Object to read() from (if reading from a stream). */
215 /* Object to read() from (if reading from a stream). */
216 PyObject* reader;
216 PyObject* reader;
217 /* Size for read() operations on reader. */
217 /* Size for read() operations on reader. */
218 size_t readSize;
218 size_t readSize;
219 /* Whether a read() can return data spanning multiple zstd frames. */
219 /* Whether a read() can return data spanning multiple zstd frames. */
220 int readAcrossFrames;
220 int readAcrossFrames;
221 /* Buffer to read from (if reading from a buffer). */
221 /* Buffer to read from (if reading from a buffer). */
222 Py_buffer buffer;
222 Py_buffer buffer;
223
223
224 /* Whether the context manager is active. */
224 /* Whether the context manager is active. */
225 int entered;
225 int entered;
226 /* Whether we've closed the stream. */
226 /* Whether we've closed the stream. */
227 int closed;
227 int closed;
228
228
229 /* Number of bytes decompressed and returned to user. */
229 /* Number of bytes decompressed and returned to user. */
230 unsigned long long bytesDecompressed;
230 unsigned long long bytesDecompressed;
231
231
232 /* Tracks data going into decompressor. */
232 /* Tracks data going into decompressor. */
233 ZSTD_inBuffer input;
233 ZSTD_inBuffer input;
234
234
235 /* Holds output from read() operation on reader. */
235 /* Holds output from read() operation on reader. */
236 PyObject* readResult;
236 PyObject* readResult;
237
237
238 /* Whether all input has been sent to the decompressor. */
238 /* Whether all input has been sent to the decompressor. */
239 int finishedInput;
239 int finishedInput;
240 /* Whether all output has been flushed from the decompressor. */
240 /* Whether all output has been flushed from the decompressor. */
241 int finishedOutput;
241 int finishedOutput;
242 } ZstdDecompressionReader;
242 } ZstdDecompressionReader;
243
243
244 extern PyTypeObject ZstdDecompressionReaderType;
244 extern PyTypeObject ZstdDecompressionReaderType;
245
245
246 typedef struct {
246 typedef struct {
247 PyObject_HEAD
247 PyObject_HEAD
248
248
249 ZstdDecompressor* decompressor;
249 ZstdDecompressor* decompressor;
250 PyObject* writer;
250 PyObject* writer;
251 size_t outSize;
251 size_t outSize;
252 int entered;
252 int entered;
253 int closed;
253 int closed;
254 int writeReturnRead;
254 int writeReturnRead;
255 } ZstdDecompressionWriter;
255 } ZstdDecompressionWriter;
256
256
257 extern PyTypeObject ZstdDecompressionWriterType;
257 extern PyTypeObject ZstdDecompressionWriterType;
258
258
259 typedef struct {
259 typedef struct {
260 PyObject_HEAD
260 PyObject_HEAD
261
261
262 ZstdDecompressor* decompressor;
262 ZstdDecompressor* decompressor;
263 PyObject* reader;
263 PyObject* reader;
264 Py_buffer buffer;
264 Py_buffer buffer;
265 Py_ssize_t bufferOffset;
265 Py_ssize_t bufferOffset;
266 size_t inSize;
266 size_t inSize;
267 size_t outSize;
267 size_t outSize;
268 size_t skipBytes;
268 size_t skipBytes;
269 ZSTD_inBuffer input;
269 ZSTD_inBuffer input;
270 ZSTD_outBuffer output;
270 ZSTD_outBuffer output;
271 Py_ssize_t readCount;
271 Py_ssize_t readCount;
272 int finishedInput;
272 int finishedInput;
273 int finishedOutput;
273 int finishedOutput;
274 } ZstdDecompressorIterator;
274 } ZstdDecompressorIterator;
275
275
276 extern PyTypeObject ZstdDecompressorIteratorType;
276 extern PyTypeObject ZstdDecompressorIteratorType;
277
277
278 typedef struct {
278 typedef struct {
279 int errored;
279 int errored;
280 PyObject* chunk;
280 PyObject* chunk;
281 } DecompressorIteratorResult;
281 } DecompressorIteratorResult;
282
282
283 typedef struct {
283 typedef struct {
284 /* The public API is that these are 64-bit unsigned integers. So these can't
284 /* The public API is that these are 64-bit unsigned integers. So these can't
285 * be size_t, even though values larger than SIZE_MAX or PY_SSIZE_T_MAX may
285 * be size_t, even though values larger than SIZE_MAX or PY_SSIZE_T_MAX may
286 * be nonsensical for this platform. */
286 * be nonsensical for this platform. */
287 unsigned long long offset;
287 unsigned long long offset;
288 unsigned long long length;
288 unsigned long long length;
289 } BufferSegment;
289 } BufferSegment;
290
290
291 typedef struct {
291 typedef struct {
292 PyObject_HEAD
292 PyObject_HEAD
293
293
294 PyObject* parent;
294 PyObject* parent;
295 BufferSegment* segments;
295 BufferSegment* segments;
296 Py_ssize_t segmentCount;
296 Py_ssize_t segmentCount;
297 } ZstdBufferSegments;
297 } ZstdBufferSegments;
298
298
299 extern PyTypeObject ZstdBufferSegmentsType;
299 extern PyTypeObject ZstdBufferSegmentsType;
300
300
301 typedef struct {
301 typedef struct {
302 PyObject_HEAD
302 PyObject_HEAD
303
303
304 PyObject* parent;
304 PyObject* parent;
305 void* data;
305 void* data;
306 Py_ssize_t dataSize;
306 Py_ssize_t dataSize;
307 unsigned long long offset;
307 unsigned long long offset;
308 } ZstdBufferSegment;
308 } ZstdBufferSegment;
309
309
310 extern PyTypeObject ZstdBufferSegmentType;
310 extern PyTypeObject ZstdBufferSegmentType;
311
311
312 typedef struct {
312 typedef struct {
313 PyObject_HEAD
313 PyObject_HEAD
314
314
315 Py_buffer parent;
315 Py_buffer parent;
316 void* data;
316 void* data;
317 unsigned long long dataSize;
317 unsigned long long dataSize;
318 BufferSegment* segments;
318 BufferSegment* segments;
319 Py_ssize_t segmentCount;
319 Py_ssize_t segmentCount;
320 int useFree;
320 int useFree;
321 } ZstdBufferWithSegments;
321 } ZstdBufferWithSegments;
322
322
323 extern PyTypeObject ZstdBufferWithSegmentsType;
323 extern PyTypeObject ZstdBufferWithSegmentsType;
324
324
325 /**
325 /**
326 * An ordered collection of BufferWithSegments exposed as a squashed collection.
326 * An ordered collection of BufferWithSegments exposed as a squashed collection.
327 *
327 *
328 * This type provides a virtual view spanning multiple BufferWithSegments
328 * This type provides a virtual view spanning multiple BufferWithSegments
329 * instances. It allows multiple instances to be "chained" together and
329 * instances. It allows multiple instances to be "chained" together and
330 * exposed as a single collection. e.g. if there are 2 buffers holding
330 * exposed as a single collection. e.g. if there are 2 buffers holding
331 * 10 segments each, then o[14] will access the 5th segment in the 2nd buffer.
331 * 10 segments each, then o[14] will access the 5th segment in the 2nd buffer.
332 */
332 */
333 typedef struct {
333 typedef struct {
334 PyObject_HEAD
334 PyObject_HEAD
335
335
336 /* An array of buffers that should be exposed through this instance. */
336 /* An array of buffers that should be exposed through this instance. */
337 ZstdBufferWithSegments** buffers;
337 ZstdBufferWithSegments** buffers;
338 /* Number of elements in buffers array. */
338 /* Number of elements in buffers array. */
339 Py_ssize_t bufferCount;
339 Py_ssize_t bufferCount;
340 /* Array of first offset in each buffer instance. 0th entry corresponds
340 /* Array of first offset in each buffer instance. 0th entry corresponds
341 to number of elements in the 0th buffer. 1st entry corresponds to the
341 to number of elements in the 0th buffer. 1st entry corresponds to the
342 sum of elements in 0th and 1st buffers. */
342 sum of elements in 0th and 1st buffers. */
343 Py_ssize_t* firstElements;
343 Py_ssize_t* firstElements;
344 } ZstdBufferWithSegmentsCollection;
344 } ZstdBufferWithSegmentsCollection;
345
345
346 extern PyTypeObject ZstdBufferWithSegmentsCollectionType;
346 extern PyTypeObject ZstdBufferWithSegmentsCollectionType;
347
347
348 int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
348 int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
349 int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj);
349 int set_parameters(ZSTD_CCtx_params* params, ZstdCompressionParametersObject* obj);
350 int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams);
350 int to_cparams(ZstdCompressionParametersObject* params, ZSTD_compressionParameters* cparams);
351 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs);
351 FrameParametersObject* get_frame_parameters(PyObject* self, PyObject* args, PyObject* kwargs);
352 int ensure_ddict(ZstdCompressionDict* dict);
352 int ensure_ddict(ZstdCompressionDict* dict);
353 int ensure_dctx(ZstdDecompressor* decompressor, int loadDict);
353 int ensure_dctx(ZstdDecompressor* decompressor, int loadDict);
354 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
354 ZstdCompressionDict* train_dictionary(PyObject* self, PyObject* args, PyObject* kwargs);
355 ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize);
355 ZstdBufferWithSegments* BufferWithSegments_FromMemory(void* data, unsigned long long dataSize, BufferSegment* segments, Py_ssize_t segmentsSize);
356 Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*);
356 Py_ssize_t BufferWithSegmentsCollection_length(ZstdBufferWithSegmentsCollection*);
357 int cpu_count(void);
357 int cpu_count(void);
358 size_t roundpow2(size_t);
358 size_t roundpow2(size_t);
359 int safe_pybytes_resize(PyObject** obj, Py_ssize_t size);
359 int safe_pybytes_resize(PyObject** obj, Py_ssize_t size);
@@ -1,201 +1,207 b''
1 # Copyright (c) 2016-present, Gregory Szorc
1 # Copyright (c) 2016-present, Gregory Szorc
2 # All rights reserved.
2 # All rights reserved.
3 #
3 #
4 # This software may be modified and distributed under the terms
4 # This software may be modified and distributed under the terms
5 # of the BSD license. See the LICENSE file for details.
5 # of the BSD license. See the LICENSE file for details.
6
6
7 from __future__ import absolute_import
7 from __future__ import absolute_import
8
8
9 import cffi
9 import cffi
10 import distutils.ccompiler
10 import distutils.ccompiler
11 import os
11 import os
12 import re
12 import re
13 import subprocess
13 import subprocess
14 import tempfile
14 import tempfile
15
15
16
16
17 HERE = os.path.abspath(os.path.dirname(__file__))
17 HERE = os.path.abspath(os.path.dirname(__file__))
18
18
19 SOURCES = ['zstd/%s' % p for p in (
19 SOURCES = ['zstd/%s' % p for p in (
20 'common/debug.c',
20 'common/debug.c',
21 'common/entropy_common.c',
21 'common/entropy_common.c',
22 'common/error_private.c',
22 'common/error_private.c',
23 'common/fse_decompress.c',
23 'common/fse_decompress.c',
24 'common/pool.c',
24 'common/pool.c',
25 'common/threading.c',
25 'common/threading.c',
26 'common/xxhash.c',
26 'common/xxhash.c',
27 'common/zstd_common.c',
27 'common/zstd_common.c',
28 'compress/fse_compress.c',
28 'compress/fse_compress.c',
29 'compress/hist.c',
29 'compress/hist.c',
30 'compress/huf_compress.c',
30 'compress/huf_compress.c',
31 'compress/zstd_compress.c',
31 'compress/zstd_compress.c',
32 'compress/zstd_compress_literals.c',
33 'compress/zstd_compress_sequences.c',
32 'compress/zstd_double_fast.c',
34 'compress/zstd_double_fast.c',
33 'compress/zstd_fast.c',
35 'compress/zstd_fast.c',
34 'compress/zstd_lazy.c',
36 'compress/zstd_lazy.c',
35 'compress/zstd_ldm.c',
37 'compress/zstd_ldm.c',
36 'compress/zstd_opt.c',
38 'compress/zstd_opt.c',
37 'compress/zstdmt_compress.c',
39 'compress/zstdmt_compress.c',
38 'decompress/huf_decompress.c',
40 'decompress/huf_decompress.c',
39 'decompress/zstd_ddict.c',
41 'decompress/zstd_ddict.c',
40 'decompress/zstd_decompress.c',
42 'decompress/zstd_decompress.c',
41 'decompress/zstd_decompress_block.c',
43 'decompress/zstd_decompress_block.c',
42 'dictBuilder/cover.c',
44 'dictBuilder/cover.c',
43 'dictBuilder/fastcover.c',
45 'dictBuilder/fastcover.c',
44 'dictBuilder/divsufsort.c',
46 'dictBuilder/divsufsort.c',
45 'dictBuilder/zdict.c',
47 'dictBuilder/zdict.c',
46 )]
48 )]
47
49
48 # Headers whose preprocessed output will be fed into cdef().
50 # Headers whose preprocessed output will be fed into cdef().
49 HEADERS = [os.path.join(HERE, 'zstd', *p) for p in (
51 HEADERS = [os.path.join(HERE, 'zstd', *p) for p in (
50 ('zstd.h',),
52 ('zstd.h',),
51 ('dictBuilder', 'zdict.h'),
53 ('dictBuilder', 'zdict.h'),
52 )]
54 )]
53
55
54 INCLUDE_DIRS = [os.path.join(HERE, d) for d in (
56 INCLUDE_DIRS = [os.path.join(HERE, d) for d in (
55 'zstd',
57 'zstd',
56 'zstd/common',
58 'zstd/common',
57 'zstd/compress',
59 'zstd/compress',
58 'zstd/decompress',
60 'zstd/decompress',
59 'zstd/dictBuilder',
61 'zstd/dictBuilder',
60 )]
62 )]
61
63
62 # cffi can't parse some of the primitives in zstd.h. So we invoke the
64 # cffi can't parse some of the primitives in zstd.h. So we invoke the
63 # preprocessor and feed its output into cffi.
65 # preprocessor and feed its output into cffi.
64 compiler = distutils.ccompiler.new_compiler()
66 compiler = distutils.ccompiler.new_compiler()
65
67
66 # Needed for MSVC.
68 # Needed for MSVC.
67 if hasattr(compiler, 'initialize'):
69 if hasattr(compiler, 'initialize'):
68 compiler.initialize()
70 compiler.initialize()
69
71
70 # Distutils doesn't set compiler.preprocessor, so invoke the preprocessor
72 # Distutils doesn't set compiler.preprocessor, so invoke the preprocessor
71 # manually.
73 # manually.
72 if compiler.compiler_type == 'unix':
74 if compiler.compiler_type == 'unix':
73 args = list(compiler.executables['compiler'])
75 args = list(compiler.executables['compiler'])
74 args.extend([
76 args.extend([
75 '-E',
77 '-E',
76 '-DZSTD_STATIC_LINKING_ONLY',
78 '-DZSTD_STATIC_LINKING_ONLY',
77 '-DZDICT_STATIC_LINKING_ONLY',
79 '-DZDICT_STATIC_LINKING_ONLY',
78 ])
80 ])
79 elif compiler.compiler_type == 'msvc':
81 elif compiler.compiler_type == 'msvc':
80 args = [compiler.cc]
82 args = [compiler.cc]
81 args.extend([
83 args.extend([
82 '/EP',
84 '/EP',
83 '/DZSTD_STATIC_LINKING_ONLY',
85 '/DZSTD_STATIC_LINKING_ONLY',
84 '/DZDICT_STATIC_LINKING_ONLY',
86 '/DZDICT_STATIC_LINKING_ONLY',
85 ])
87 ])
86 else:
88 else:
87 raise Exception('unsupported compiler type: %s' % compiler.compiler_type)
89 raise Exception('unsupported compiler type: %s' % compiler.compiler_type)
88
90
89 def preprocess(path):
91 def preprocess(path):
90 with open(path, 'rb') as fh:
92 with open(path, 'rb') as fh:
91 lines = []
93 lines = []
92 it = iter(fh)
94 it = iter(fh)
93
95
94 for l in it:
96 for l in it:
95 # zstd.h includes <stddef.h>, which is also included by cffi's
97 # zstd.h includes <stddef.h>, which is also included by cffi's
96 # boilerplate. This can lead to duplicate declarations. So we strip
98 # boilerplate. This can lead to duplicate declarations. So we strip
97 # this include from the preprocessor invocation.
99 # this include from the preprocessor invocation.
98 #
100 #
99 # The same things happens for including zstd.h, so give it the same
101 # The same things happens for including zstd.h, so give it the same
100 # treatment.
102 # treatment.
101 #
103 #
102 # We define ZSTD_STATIC_LINKING_ONLY, which is redundant with the inline
104 # We define ZSTD_STATIC_LINKING_ONLY, which is redundant with the inline
103 # #define in zstdmt_compress.h and results in a compiler warning. So drop
105 # #define in zstdmt_compress.h and results in a compiler warning. So drop
104 # the inline #define.
106 # the inline #define.
105 if l.startswith((b'#include <stddef.h>',
107 if l.startswith((b'#include <stddef.h>',
106 b'#include "zstd.h"',
108 b'#include "zstd.h"',
107 b'#define ZSTD_STATIC_LINKING_ONLY')):
109 b'#define ZSTD_STATIC_LINKING_ONLY')):
108 continue
110 continue
109
111
110 # ZSTDLIB_API may not be defined if we dropped zstd.h. It isn't
112 # ZSTDLIB_API may not be defined if we dropped zstd.h. It isn't
111 # important so just filter it out.
113 # important so just filter it out.
112 if l.startswith(b'ZSTDLIB_API'):
114 if l.startswith(b'ZSTDLIB_API'):
113 l = l[len(b'ZSTDLIB_API '):]
115 l = l[len(b'ZSTDLIB_API '):]
114
116
115 lines.append(l)
117 lines.append(l)
116
118
117 fd, input_file = tempfile.mkstemp(suffix='.h')
119 fd, input_file = tempfile.mkstemp(suffix='.h')
118 os.write(fd, b''.join(lines))
120 os.write(fd, b''.join(lines))
119 os.close(fd)
121 os.close(fd)
120
122
121 try:
123 try:
122 process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE)
124 env = dict(os.environ)
125 if getattr(compiler, '_paths', None):
126 env['PATH'] = compiler._paths
127 process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE,
128 env=env)
123 output = process.communicate()[0]
129 output = process.communicate()[0]
124 ret = process.poll()
130 ret = process.poll()
125 if ret:
131 if ret:
126 raise Exception('preprocessor exited with error')
132 raise Exception('preprocessor exited with error')
127
133
128 return output
134 return output
129 finally:
135 finally:
130 os.unlink(input_file)
136 os.unlink(input_file)
131
137
132
138
133 def normalize_output(output):
139 def normalize_output(output):
134 lines = []
140 lines = []
135 for line in output.splitlines():
141 for line in output.splitlines():
136 # CFFI's parser doesn't like __attribute__ on UNIX compilers.
142 # CFFI's parser doesn't like __attribute__ on UNIX compilers.
137 if line.startswith(b'__attribute__ ((visibility ("default"))) '):
143 if line.startswith(b'__attribute__ ((visibility ("default"))) '):
138 line = line[len(b'__attribute__ ((visibility ("default"))) '):]
144 line = line[len(b'__attribute__ ((visibility ("default"))) '):]
139
145
140 if line.startswith(b'__attribute__((deprecated('):
146 if line.startswith(b'__attribute__((deprecated('):
141 continue
147 continue
142 elif b'__declspec(deprecated(' in line:
148 elif b'__declspec(deprecated(' in line:
143 continue
149 continue
144
150
145 lines.append(line)
151 lines.append(line)
146
152
147 return b'\n'.join(lines)
153 return b'\n'.join(lines)
148
154
149
155
150 ffi = cffi.FFI()
156 ffi = cffi.FFI()
151 # zstd.h uses a possible undefined MIN(). Define it until
157 # zstd.h uses a possible undefined MIN(). Define it until
152 # https://github.com/facebook/zstd/issues/976 is fixed.
158 # https://github.com/facebook/zstd/issues/976 is fixed.
153 # *_DISABLE_DEPRECATE_WARNINGS prevents the compiler from emitting a warning
159 # *_DISABLE_DEPRECATE_WARNINGS prevents the compiler from emitting a warning
154 # when cffi uses the function. Since we statically link against zstd, even
160 # when cffi uses the function. Since we statically link against zstd, even
155 # if we use the deprecated functions it shouldn't be a huge problem.
161 # if we use the deprecated functions it shouldn't be a huge problem.
156 ffi.set_source('_zstd_cffi', '''
162 ffi.set_source('_zstd_cffi', '''
157 #define MIN(a,b) ((a)<(b) ? (a) : (b))
163 #define MIN(a,b) ((a)<(b) ? (a) : (b))
158 #define ZSTD_STATIC_LINKING_ONLY
164 #define ZSTD_STATIC_LINKING_ONLY
159 #include <zstd.h>
165 #include <zstd.h>
160 #define ZDICT_STATIC_LINKING_ONLY
166 #define ZDICT_STATIC_LINKING_ONLY
161 #define ZDICT_DISABLE_DEPRECATE_WARNINGS
167 #define ZDICT_DISABLE_DEPRECATE_WARNINGS
162 #include <zdict.h>
168 #include <zdict.h>
163 ''', sources=SOURCES,
169 ''', sources=SOURCES,
164 include_dirs=INCLUDE_DIRS,
170 include_dirs=INCLUDE_DIRS,
165 extra_compile_args=['-DZSTD_MULTITHREAD'])
171 extra_compile_args=['-DZSTD_MULTITHREAD'])
166
172
167 DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ')
173 DEFINE = re.compile(b'^\\#define ([a-zA-Z0-9_]+) ')
168
174
169 sources = []
175 sources = []
170
176
171 # Feed normalized preprocessor output for headers into the cdef parser.
177 # Feed normalized preprocessor output for headers into the cdef parser.
172 for header in HEADERS:
178 for header in HEADERS:
173 preprocessed = preprocess(header)
179 preprocessed = preprocess(header)
174 sources.append(normalize_output(preprocessed))
180 sources.append(normalize_output(preprocessed))
175
181
176 # #define's are effectively erased as part of going through preprocessor.
182 # #define's are effectively erased as part of going through preprocessor.
177 # So perform a manual pass to re-add those to the cdef source.
183 # So perform a manual pass to re-add those to the cdef source.
178 with open(header, 'rb') as fh:
184 with open(header, 'rb') as fh:
179 for line in fh:
185 for line in fh:
180 line = line.strip()
186 line = line.strip()
181 m = DEFINE.match(line)
187 m = DEFINE.match(line)
182 if not m:
188 if not m:
183 continue
189 continue
184
190
185 if m.group(1) == b'ZSTD_STATIC_LINKING_ONLY':
191 if m.group(1) == b'ZSTD_STATIC_LINKING_ONLY':
186 continue
192 continue
187
193
188 # The parser doesn't like some constants with complex values.
194 # The parser doesn't like some constants with complex values.
189 if m.group(1) in (b'ZSTD_LIB_VERSION', b'ZSTD_VERSION_STRING'):
195 if m.group(1) in (b'ZSTD_LIB_VERSION', b'ZSTD_VERSION_STRING'):
190 continue
196 continue
191
197
192 # The ... is magic syntax by the cdef parser to resolve the
198 # The ... is magic syntax by the cdef parser to resolve the
193 # value at compile time.
199 # value at compile time.
194 sources.append(m.group(0) + b' ...')
200 sources.append(m.group(0) + b' ...')
195
201
196 cdeflines = b'\n'.join(sources).splitlines()
202 cdeflines = b'\n'.join(sources).splitlines()
197 cdeflines = [l for l in cdeflines if l.strip()]
203 cdeflines = [l for l in cdeflines if l.strip()]
198 ffi.cdef(b'\n'.join(cdeflines).decode('latin1'))
204 ffi.cdef(b'\n'.join(cdeflines).decode('latin1'))
199
205
200 if __name__ == '__main__':
206 if __name__ == '__main__':
201 ffi.compile()
207 ffi.compile()
@@ -1,113 +1,112 b''
1 #!/usr/bin/env python
1 #!/usr/bin/env python
2 # Copyright (c) 2016-present, Gregory Szorc
2 # Copyright (c) 2016-present, Gregory Szorc
3 # All rights reserved.
3 # All rights reserved.
4 #
4 #
5 # This software may be modified and distributed under the terms
5 # This software may be modified and distributed under the terms
6 # of the BSD license. See the LICENSE file for details.
6 # of the BSD license. See the LICENSE file for details.
7
7
8 from __future__ import print_function
8 from __future__ import print_function
9
9
10 from distutils.version import LooseVersion
10 from distutils.version import LooseVersion
11 import os
11 import os
12 import sys
12 import sys
13 from setuptools import setup
13 from setuptools import setup
14
14
15 # Need change in 1.10 for ffi.from_buffer() to handle all buffer types
15 # Need change in 1.10 for ffi.from_buffer() to handle all buffer types
16 # (like memoryview).
16 # (like memoryview).
17 # Need feature in 1.11 for ffi.gc() to declare size of objects so we avoid
17 # Need feature in 1.11 for ffi.gc() to declare size of objects so we avoid
18 # garbage collection pitfalls.
18 # garbage collection pitfalls.
19 MINIMUM_CFFI_VERSION = '1.11'
19 MINIMUM_CFFI_VERSION = '1.11'
20
20
21 try:
21 try:
22 import cffi
22 import cffi
23
23
24 # PyPy (and possibly other distros) have CFFI distributed as part of
24 # PyPy (and possibly other distros) have CFFI distributed as part of
25 # them. The install_requires for CFFI below won't work. We need to sniff
25 # them. The install_requires for CFFI below won't work. We need to sniff
26 # out the CFFI version here and reject CFFI if it is too old.
26 # out the CFFI version here and reject CFFI if it is too old.
27 cffi_version = LooseVersion(cffi.__version__)
27 cffi_version = LooseVersion(cffi.__version__)
28 if cffi_version < LooseVersion(MINIMUM_CFFI_VERSION):
28 if cffi_version < LooseVersion(MINIMUM_CFFI_VERSION):
29 print('CFFI 1.11 or newer required (%s found); '
29 print('CFFI 1.11 or newer required (%s found); '
30 'not building CFFI backend' % cffi_version,
30 'not building CFFI backend' % cffi_version,
31 file=sys.stderr)
31 file=sys.stderr)
32 cffi = None
32 cffi = None
33
33
34 except ImportError:
34 except ImportError:
35 cffi = None
35 cffi = None
36
36
37 import setup_zstd
37 import setup_zstd
38
38
39 SUPPORT_LEGACY = False
39 SUPPORT_LEGACY = False
40 SYSTEM_ZSTD = False
40 SYSTEM_ZSTD = False
41 WARNINGS_AS_ERRORS = False
41 WARNINGS_AS_ERRORS = False
42
42
43 if os.environ.get('ZSTD_WARNINGS_AS_ERRORS', ''):
43 if os.environ.get('ZSTD_WARNINGS_AS_ERRORS', ''):
44 WARNINGS_AS_ERRORS = True
44 WARNINGS_AS_ERRORS = True
45
45
46 if '--legacy' in sys.argv:
46 if '--legacy' in sys.argv:
47 SUPPORT_LEGACY = True
47 SUPPORT_LEGACY = True
48 sys.argv.remove('--legacy')
48 sys.argv.remove('--legacy')
49
49
50 if '--system-zstd' in sys.argv:
50 if '--system-zstd' in sys.argv:
51 SYSTEM_ZSTD = True
51 SYSTEM_ZSTD = True
52 sys.argv.remove('--system-zstd')
52 sys.argv.remove('--system-zstd')
53
53
54 if '--warnings-as-errors' in sys.argv:
54 if '--warnings-as-errors' in sys.argv:
55 WARNINGS_AS_ERRORS = True
55 WARNINGS_AS_ERRORS = True
56 sys.argv.remove('--warning-as-errors')
56 sys.argv.remove('--warning-as-errors')
57
57
58 # Code for obtaining the Extension instance is in its own module to
58 # Code for obtaining the Extension instance is in its own module to
59 # facilitate reuse in other projects.
59 # facilitate reuse in other projects.
60 extensions = [
60 extensions = [
61 setup_zstd.get_c_extension(name='zstd',
61 setup_zstd.get_c_extension(name='zstd',
62 support_legacy=SUPPORT_LEGACY,
62 support_legacy=SUPPORT_LEGACY,
63 system_zstd=SYSTEM_ZSTD,
63 system_zstd=SYSTEM_ZSTD,
64 warnings_as_errors=WARNINGS_AS_ERRORS),
64 warnings_as_errors=WARNINGS_AS_ERRORS),
65 ]
65 ]
66
66
67 install_requires = []
67 install_requires = []
68
68
69 if cffi:
69 if cffi:
70 import make_cffi
70 import make_cffi
71 extensions.append(make_cffi.ffi.distutils_extension())
71 extensions.append(make_cffi.ffi.distutils_extension())
72 install_requires.append('cffi>=%s' % MINIMUM_CFFI_VERSION)
72 install_requires.append('cffi>=%s' % MINIMUM_CFFI_VERSION)
73
73
74 version = None
74 version = None
75
75
76 with open('c-ext/python-zstandard.h', 'r') as fh:
76 with open('c-ext/python-zstandard.h', 'r') as fh:
77 for line in fh:
77 for line in fh:
78 if not line.startswith('#define PYTHON_ZSTANDARD_VERSION'):
78 if not line.startswith('#define PYTHON_ZSTANDARD_VERSION'):
79 continue
79 continue
80
80
81 version = line.split()[2][1:-1]
81 version = line.split()[2][1:-1]
82 break
82 break
83
83
84 if not version:
84 if not version:
85 raise Exception('could not resolve package version; '
85 raise Exception('could not resolve package version; '
86 'this should never happen')
86 'this should never happen')
87
87
88 setup(
88 setup(
89 name='zstandard',
89 name='zstandard',
90 version=version,
90 version=version,
91 description='Zstandard bindings for Python',
91 description='Zstandard bindings for Python',
92 long_description=open('README.rst', 'r').read(),
92 long_description=open('README.rst', 'r').read(),
93 url='https://github.com/indygreg/python-zstandard',
93 url='https://github.com/indygreg/python-zstandard',
94 author='Gregory Szorc',
94 author='Gregory Szorc',
95 author_email='gregory.szorc@gmail.com',
95 author_email='gregory.szorc@gmail.com',
96 license='BSD',
96 license='BSD',
97 classifiers=[
97 classifiers=[
98 'Development Status :: 4 - Beta',
98 'Development Status :: 4 - Beta',
99 'Intended Audience :: Developers',
99 'Intended Audience :: Developers',
100 'License :: OSI Approved :: BSD License',
100 'License :: OSI Approved :: BSD License',
101 'Programming Language :: C',
101 'Programming Language :: C',
102 'Programming Language :: Python :: 2.7',
102 'Programming Language :: Python :: 2.7',
103 'Programming Language :: Python :: 3.4',
104 'Programming Language :: Python :: 3.5',
103 'Programming Language :: Python :: 3.5',
105 'Programming Language :: Python :: 3.6',
104 'Programming Language :: Python :: 3.6',
106 'Programming Language :: Python :: 3.7',
105 'Programming Language :: Python :: 3.7',
107 ],
106 ],
108 keywords='zstandard zstd compression',
107 keywords='zstandard zstd compression',
109 packages=['zstandard'],
108 packages=['zstandard'],
110 ext_modules=extensions,
109 ext_modules=extensions,
111 test_suite='tests',
110 test_suite='tests',
112 install_requires=install_requires,
111 install_requires=install_requires,
113 )
112 )
@@ -1,190 +1,192 b''
1 # Copyright (c) 2016-present, Gregory Szorc
1 # Copyright (c) 2016-present, Gregory Szorc
2 # All rights reserved.
2 # All rights reserved.
3 #
3 #
4 # This software may be modified and distributed under the terms
4 # This software may be modified and distributed under the terms
5 # of the BSD license. See the LICENSE file for details.
5 # of the BSD license. See the LICENSE file for details.
6
6
7 import distutils.ccompiler
7 import distutils.ccompiler
8 import os
8 import os
9
9
10 from distutils.extension import Extension
10 from distutils.extension import Extension
11
11
12
12
13 zstd_sources = ['zstd/%s' % p for p in (
13 zstd_sources = ['zstd/%s' % p for p in (
14 'common/debug.c',
14 'common/debug.c',
15 'common/entropy_common.c',
15 'common/entropy_common.c',
16 'common/error_private.c',
16 'common/error_private.c',
17 'common/fse_decompress.c',
17 'common/fse_decompress.c',
18 'common/pool.c',
18 'common/pool.c',
19 'common/threading.c',
19 'common/threading.c',
20 'common/xxhash.c',
20 'common/xxhash.c',
21 'common/zstd_common.c',
21 'common/zstd_common.c',
22 'compress/fse_compress.c',
22 'compress/fse_compress.c',
23 'compress/hist.c',
23 'compress/hist.c',
24 'compress/huf_compress.c',
24 'compress/huf_compress.c',
25 'compress/zstd_compress_literals.c',
26 'compress/zstd_compress_sequences.c',
25 'compress/zstd_compress.c',
27 'compress/zstd_compress.c',
26 'compress/zstd_double_fast.c',
28 'compress/zstd_double_fast.c',
27 'compress/zstd_fast.c',
29 'compress/zstd_fast.c',
28 'compress/zstd_lazy.c',
30 'compress/zstd_lazy.c',
29 'compress/zstd_ldm.c',
31 'compress/zstd_ldm.c',
30 'compress/zstd_opt.c',
32 'compress/zstd_opt.c',
31 'compress/zstdmt_compress.c',
33 'compress/zstdmt_compress.c',
32 'decompress/huf_decompress.c',
34 'decompress/huf_decompress.c',
33 'decompress/zstd_ddict.c',
35 'decompress/zstd_ddict.c',
34 'decompress/zstd_decompress.c',
36 'decompress/zstd_decompress.c',
35 'decompress/zstd_decompress_block.c',
37 'decompress/zstd_decompress_block.c',
36 'dictBuilder/cover.c',
38 'dictBuilder/cover.c',
37 'dictBuilder/divsufsort.c',
39 'dictBuilder/divsufsort.c',
38 'dictBuilder/fastcover.c',
40 'dictBuilder/fastcover.c',
39 'dictBuilder/zdict.c',
41 'dictBuilder/zdict.c',
40 )]
42 )]
41
43
42 zstd_sources_legacy = ['zstd/%s' % p for p in (
44 zstd_sources_legacy = ['zstd/%s' % p for p in (
43 'deprecated/zbuff_common.c',
45 'deprecated/zbuff_common.c',
44 'deprecated/zbuff_compress.c',
46 'deprecated/zbuff_compress.c',
45 'deprecated/zbuff_decompress.c',
47 'deprecated/zbuff_decompress.c',
46 'legacy/zstd_v01.c',
48 'legacy/zstd_v01.c',
47 'legacy/zstd_v02.c',
49 'legacy/zstd_v02.c',
48 'legacy/zstd_v03.c',
50 'legacy/zstd_v03.c',
49 'legacy/zstd_v04.c',
51 'legacy/zstd_v04.c',
50 'legacy/zstd_v05.c',
52 'legacy/zstd_v05.c',
51 'legacy/zstd_v06.c',
53 'legacy/zstd_v06.c',
52 'legacy/zstd_v07.c'
54 'legacy/zstd_v07.c'
53 )]
55 )]
54
56
55 zstd_includes = [
57 zstd_includes = [
56 'zstd',
58 'zstd',
57 'zstd/common',
59 'zstd/common',
58 'zstd/compress',
60 'zstd/compress',
59 'zstd/decompress',
61 'zstd/decompress',
60 'zstd/dictBuilder',
62 'zstd/dictBuilder',
61 ]
63 ]
62
64
63 zstd_includes_legacy = [
65 zstd_includes_legacy = [
64 'zstd/deprecated',
66 'zstd/deprecated',
65 'zstd/legacy',
67 'zstd/legacy',
66 ]
68 ]
67
69
68 ext_includes = [
70 ext_includes = [
69 'c-ext',
71 'c-ext',
70 'zstd/common',
72 'zstd/common',
71 ]
73 ]
72
74
73 ext_sources = [
75 ext_sources = [
74 'zstd/common/pool.c',
76 'zstd/common/pool.c',
75 'zstd/common/threading.c',
77 'zstd/common/threading.c',
76 'zstd.c',
78 'zstd.c',
77 'c-ext/bufferutil.c',
79 'c-ext/bufferutil.c',
78 'c-ext/compressiondict.c',
80 'c-ext/compressiondict.c',
79 'c-ext/compressobj.c',
81 'c-ext/compressobj.c',
80 'c-ext/compressor.c',
82 'c-ext/compressor.c',
81 'c-ext/compressoriterator.c',
83 'c-ext/compressoriterator.c',
82 'c-ext/compressionchunker.c',
84 'c-ext/compressionchunker.c',
83 'c-ext/compressionparams.c',
85 'c-ext/compressionparams.c',
84 'c-ext/compressionreader.c',
86 'c-ext/compressionreader.c',
85 'c-ext/compressionwriter.c',
87 'c-ext/compressionwriter.c',
86 'c-ext/constants.c',
88 'c-ext/constants.c',
87 'c-ext/decompressobj.c',
89 'c-ext/decompressobj.c',
88 'c-ext/decompressor.c',
90 'c-ext/decompressor.c',
89 'c-ext/decompressoriterator.c',
91 'c-ext/decompressoriterator.c',
90 'c-ext/decompressionreader.c',
92 'c-ext/decompressionreader.c',
91 'c-ext/decompressionwriter.c',
93 'c-ext/decompressionwriter.c',
92 'c-ext/frameparams.c',
94 'c-ext/frameparams.c',
93 ]
95 ]
94
96
95 zstd_depends = [
97 zstd_depends = [
96 'c-ext/python-zstandard.h',
98 'c-ext/python-zstandard.h',
97 ]
99 ]
98
100
99
101
100 def get_c_extension(support_legacy=False, system_zstd=False, name='zstd',
102 def get_c_extension(support_legacy=False, system_zstd=False, name='zstd',
101 warnings_as_errors=False, root=None):
103 warnings_as_errors=False, root=None):
102 """Obtain a distutils.extension.Extension for the C extension.
104 """Obtain a distutils.extension.Extension for the C extension.
103
105
104 ``support_legacy`` controls whether to compile in legacy zstd format support.
106 ``support_legacy`` controls whether to compile in legacy zstd format support.
105
107
106 ``system_zstd`` controls whether to compile against the system zstd library.
108 ``system_zstd`` controls whether to compile against the system zstd library.
107 For this to work, the system zstd library and headers must match what
109 For this to work, the system zstd library and headers must match what
108 python-zstandard is coded against exactly.
110 python-zstandard is coded against exactly.
109
111
110 ``name`` is the module name of the C extension to produce.
112 ``name`` is the module name of the C extension to produce.
111
113
112 ``warnings_as_errors`` controls whether compiler warnings are turned into
114 ``warnings_as_errors`` controls whether compiler warnings are turned into
113 compiler errors.
115 compiler errors.
114
116
115 ``root`` defines a root path that source should be computed as relative
117 ``root`` defines a root path that source should be computed as relative
116 to. This should be the directory with the main ``setup.py`` that is
118 to. This should be the directory with the main ``setup.py`` that is
117 being invoked. If not defined, paths will be relative to this file.
119 being invoked. If not defined, paths will be relative to this file.
118 """
120 """
119 actual_root = os.path.abspath(os.path.dirname(__file__))
121 actual_root = os.path.abspath(os.path.dirname(__file__))
120 root = root or actual_root
122 root = root or actual_root
121
123
122 sources = set([os.path.join(actual_root, p) for p in ext_sources])
124 sources = set([os.path.join(actual_root, p) for p in ext_sources])
123 if not system_zstd:
125 if not system_zstd:
124 sources.update([os.path.join(actual_root, p) for p in zstd_sources])
126 sources.update([os.path.join(actual_root, p) for p in zstd_sources])
125 if support_legacy:
127 if support_legacy:
126 sources.update([os.path.join(actual_root, p)
128 sources.update([os.path.join(actual_root, p)
127 for p in zstd_sources_legacy])
129 for p in zstd_sources_legacy])
128 sources = list(sources)
130 sources = list(sources)
129
131
130 include_dirs = set([os.path.join(actual_root, d) for d in ext_includes])
132 include_dirs = set([os.path.join(actual_root, d) for d in ext_includes])
131 if not system_zstd:
133 if not system_zstd:
132 include_dirs.update([os.path.join(actual_root, d)
134 include_dirs.update([os.path.join(actual_root, d)
133 for d in zstd_includes])
135 for d in zstd_includes])
134 if support_legacy:
136 if support_legacy:
135 include_dirs.update([os.path.join(actual_root, d)
137 include_dirs.update([os.path.join(actual_root, d)
136 for d in zstd_includes_legacy])
138 for d in zstd_includes_legacy])
137 include_dirs = list(include_dirs)
139 include_dirs = list(include_dirs)
138
140
139 depends = [os.path.join(actual_root, p) for p in zstd_depends]
141 depends = [os.path.join(actual_root, p) for p in zstd_depends]
140
142
141 compiler = distutils.ccompiler.new_compiler()
143 compiler = distutils.ccompiler.new_compiler()
142
144
143 # Needed for MSVC.
145 # Needed for MSVC.
144 if hasattr(compiler, 'initialize'):
146 if hasattr(compiler, 'initialize'):
145 compiler.initialize()
147 compiler.initialize()
146
148
147 if compiler.compiler_type == 'unix':
149 if compiler.compiler_type == 'unix':
148 compiler_type = 'unix'
150 compiler_type = 'unix'
149 elif compiler.compiler_type == 'msvc':
151 elif compiler.compiler_type == 'msvc':
150 compiler_type = 'msvc'
152 compiler_type = 'msvc'
151 elif compiler.compiler_type == 'mingw32':
153 elif compiler.compiler_type == 'mingw32':
152 compiler_type = 'mingw32'
154 compiler_type = 'mingw32'
153 else:
155 else:
154 raise Exception('unhandled compiler type: %s' %
156 raise Exception('unhandled compiler type: %s' %
155 compiler.compiler_type)
157 compiler.compiler_type)
156
158
157 extra_args = ['-DZSTD_MULTITHREAD']
159 extra_args = ['-DZSTD_MULTITHREAD']
158
160
159 if not system_zstd:
161 if not system_zstd:
160 extra_args.append('-DZSTDLIB_VISIBILITY=')
162 extra_args.append('-DZSTDLIB_VISIBILITY=')
161 extra_args.append('-DZDICTLIB_VISIBILITY=')
163 extra_args.append('-DZDICTLIB_VISIBILITY=')
162 extra_args.append('-DZSTDERRORLIB_VISIBILITY=')
164 extra_args.append('-DZSTDERRORLIB_VISIBILITY=')
163
165
164 if compiler_type == 'unix':
166 if compiler_type == 'unix':
165 extra_args.append('-fvisibility=hidden')
167 extra_args.append('-fvisibility=hidden')
166
168
167 if not system_zstd and support_legacy:
169 if not system_zstd and support_legacy:
168 extra_args.append('-DZSTD_LEGACY_SUPPORT=1')
170 extra_args.append('-DZSTD_LEGACY_SUPPORT=1')
169
171
170 if warnings_as_errors:
172 if warnings_as_errors:
171 if compiler_type in ('unix', 'mingw32'):
173 if compiler_type in ('unix', 'mingw32'):
172 extra_args.append('-Werror')
174 extra_args.append('-Werror')
173 elif compiler_type == 'msvc':
175 elif compiler_type == 'msvc':
174 extra_args.append('/WX')
176 extra_args.append('/WX')
175 else:
177 else:
176 assert False
178 assert False
177
179
178 libraries = ['zstd'] if system_zstd else []
180 libraries = ['zstd'] if system_zstd else []
179
181
180 # Python 3.7 doesn't like absolute paths. So normalize to relative.
182 # Python 3.7 doesn't like absolute paths. So normalize to relative.
181 sources = [os.path.relpath(p, root) for p in sources]
183 sources = [os.path.relpath(p, root) for p in sources]
182 include_dirs = [os.path.relpath(p, root) for p in include_dirs]
184 include_dirs = [os.path.relpath(p, root) for p in include_dirs]
183 depends = [os.path.relpath(p, root) for p in depends]
185 depends = [os.path.relpath(p, root) for p in depends]
184
186
185 # TODO compile with optimizations.
187 # TODO compile with optimizations.
186 return Extension(name, sources,
188 return Extension(name, sources,
187 include_dirs=include_dirs,
189 include_dirs=include_dirs,
188 depends=depends,
190 depends=depends,
189 extra_compile_args=extra_args,
191 extra_compile_args=extra_args,
190 libraries=libraries)
192 libraries=libraries)
@@ -1,1735 +1,1735 b''
1 import hashlib
1 import hashlib
2 import io
2 import io
3 import os
3 import os
4 import struct
4 import struct
5 import sys
5 import sys
6 import tarfile
6 import tarfile
7 import tempfile
7 import tempfile
8 import unittest
8 import unittest
9
9
10 import zstandard as zstd
10 import zstandard as zstd
11
11
12 from .common import (
12 from .common import (
13 make_cffi,
13 make_cffi,
14 NonClosingBytesIO,
14 NonClosingBytesIO,
15 OpCountingBytesIO,
15 OpCountingBytesIO,
16 )
16 )
17
17
18
18
19 if sys.version_info[0] >= 3:
19 if sys.version_info[0] >= 3:
20 next = lambda it: it.__next__()
20 next = lambda it: it.__next__()
21 else:
21 else:
22 next = lambda it: it.next()
22 next = lambda it: it.next()
23
23
24
24
25 def multithreaded_chunk_size(level, source_size=0):
25 def multithreaded_chunk_size(level, source_size=0):
26 params = zstd.ZstdCompressionParameters.from_level(level,
26 params = zstd.ZstdCompressionParameters.from_level(level,
27 source_size=source_size)
27 source_size=source_size)
28
28
29 return 1 << (params.window_log + 2)
29 return 1 << (params.window_log + 2)
30
30
31
31
32 @make_cffi
32 @make_cffi
33 class TestCompressor(unittest.TestCase):
33 class TestCompressor(unittest.TestCase):
34 def test_level_bounds(self):
34 def test_level_bounds(self):
35 with self.assertRaises(ValueError):
35 with self.assertRaises(ValueError):
36 zstd.ZstdCompressor(level=23)
36 zstd.ZstdCompressor(level=23)
37
37
38 def test_memory_size(self):
38 def test_memory_size(self):
39 cctx = zstd.ZstdCompressor(level=1)
39 cctx = zstd.ZstdCompressor(level=1)
40 self.assertGreater(cctx.memory_size(), 100)
40 self.assertGreater(cctx.memory_size(), 100)
41
41
42
42
43 @make_cffi
43 @make_cffi
44 class TestCompressor_compress(unittest.TestCase):
44 class TestCompressor_compress(unittest.TestCase):
45 def test_compress_empty(self):
45 def test_compress_empty(self):
46 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
46 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
47 result = cctx.compress(b'')
47 result = cctx.compress(b'')
48 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
48 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
49 params = zstd.get_frame_parameters(result)
49 params = zstd.get_frame_parameters(result)
50 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
50 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
51 self.assertEqual(params.window_size, 524288)
51 self.assertEqual(params.window_size, 524288)
52 self.assertEqual(params.dict_id, 0)
52 self.assertEqual(params.dict_id, 0)
53 self.assertFalse(params.has_checksum, 0)
53 self.assertFalse(params.has_checksum, 0)
54
54
55 cctx = zstd.ZstdCompressor()
55 cctx = zstd.ZstdCompressor()
56 result = cctx.compress(b'')
56 result = cctx.compress(b'')
57 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x20\x00\x01\x00\x00')
57 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x20\x00\x01\x00\x00')
58 params = zstd.get_frame_parameters(result)
58 params = zstd.get_frame_parameters(result)
59 self.assertEqual(params.content_size, 0)
59 self.assertEqual(params.content_size, 0)
60
60
61 def test_input_types(self):
61 def test_input_types(self):
62 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
62 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
63 expected = b'\x28\xb5\x2f\xfd\x00\x00\x19\x00\x00\x66\x6f\x6f'
63 expected = b'\x28\xb5\x2f\xfd\x00\x00\x19\x00\x00\x66\x6f\x6f'
64
64
65 mutable_array = bytearray(3)
65 mutable_array = bytearray(3)
66 mutable_array[:] = b'foo'
66 mutable_array[:] = b'foo'
67
67
68 sources = [
68 sources = [
69 memoryview(b'foo'),
69 memoryview(b'foo'),
70 bytearray(b'foo'),
70 bytearray(b'foo'),
71 mutable_array,
71 mutable_array,
72 ]
72 ]
73
73
74 for source in sources:
74 for source in sources:
75 self.assertEqual(cctx.compress(source), expected)
75 self.assertEqual(cctx.compress(source), expected)
76
76
77 def test_compress_large(self):
77 def test_compress_large(self):
78 chunks = []
78 chunks = []
79 for i in range(255):
79 for i in range(255):
80 chunks.append(struct.Struct('>B').pack(i) * 16384)
80 chunks.append(struct.Struct('>B').pack(i) * 16384)
81
81
82 cctx = zstd.ZstdCompressor(level=3, write_content_size=False)
82 cctx = zstd.ZstdCompressor(level=3, write_content_size=False)
83 result = cctx.compress(b''.join(chunks))
83 result = cctx.compress(b''.join(chunks))
84 self.assertEqual(len(result), 999)
84 self.assertEqual(len(result), 999)
85 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
85 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
86
86
87 # This matches the test for read_to_iter() below.
87 # This matches the test for read_to_iter() below.
88 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
88 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
89 result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o')
89 result = cctx.compress(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE + b'o')
90 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00'
90 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x40\x54\x00\x00'
91 b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0'
91 b'\x10\x66\x66\x01\x00\xfb\xff\x39\xc0'
92 b'\x02\x09\x00\x00\x6f')
92 b'\x02\x09\x00\x00\x6f')
93
93
94 def test_negative_level(self):
94 def test_negative_level(self):
95 cctx = zstd.ZstdCompressor(level=-4)
95 cctx = zstd.ZstdCompressor(level=-4)
96 result = cctx.compress(b'foo' * 256)
96 result = cctx.compress(b'foo' * 256)
97
97
98 def test_no_magic(self):
98 def test_no_magic(self):
99 params = zstd.ZstdCompressionParameters.from_level(
99 params = zstd.ZstdCompressionParameters.from_level(
100 1, format=zstd.FORMAT_ZSTD1)
100 1, format=zstd.FORMAT_ZSTD1)
101 cctx = zstd.ZstdCompressor(compression_params=params)
101 cctx = zstd.ZstdCompressor(compression_params=params)
102 magic = cctx.compress(b'foobar')
102 magic = cctx.compress(b'foobar')
103
103
104 params = zstd.ZstdCompressionParameters.from_level(
104 params = zstd.ZstdCompressionParameters.from_level(
105 1, format=zstd.FORMAT_ZSTD1_MAGICLESS)
105 1, format=zstd.FORMAT_ZSTD1_MAGICLESS)
106 cctx = zstd.ZstdCompressor(compression_params=params)
106 cctx = zstd.ZstdCompressor(compression_params=params)
107 no_magic = cctx.compress(b'foobar')
107 no_magic = cctx.compress(b'foobar')
108
108
109 self.assertEqual(magic[0:4], b'\x28\xb5\x2f\xfd')
109 self.assertEqual(magic[0:4], b'\x28\xb5\x2f\xfd')
110 self.assertEqual(magic[4:], no_magic)
110 self.assertEqual(magic[4:], no_magic)
111
111
112 def test_write_checksum(self):
112 def test_write_checksum(self):
113 cctx = zstd.ZstdCompressor(level=1)
113 cctx = zstd.ZstdCompressor(level=1)
114 no_checksum = cctx.compress(b'foobar')
114 no_checksum = cctx.compress(b'foobar')
115 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
115 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
116 with_checksum = cctx.compress(b'foobar')
116 with_checksum = cctx.compress(b'foobar')
117
117
118 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
118 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
119
119
120 no_params = zstd.get_frame_parameters(no_checksum)
120 no_params = zstd.get_frame_parameters(no_checksum)
121 with_params = zstd.get_frame_parameters(with_checksum)
121 with_params = zstd.get_frame_parameters(with_checksum)
122
122
123 self.assertFalse(no_params.has_checksum)
123 self.assertFalse(no_params.has_checksum)
124 self.assertTrue(with_params.has_checksum)
124 self.assertTrue(with_params.has_checksum)
125
125
126 def test_write_content_size(self):
126 def test_write_content_size(self):
127 cctx = zstd.ZstdCompressor(level=1)
127 cctx = zstd.ZstdCompressor(level=1)
128 with_size = cctx.compress(b'foobar' * 256)
128 with_size = cctx.compress(b'foobar' * 256)
129 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
129 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
130 no_size = cctx.compress(b'foobar' * 256)
130 no_size = cctx.compress(b'foobar' * 256)
131
131
132 self.assertEqual(len(with_size), len(no_size) + 1)
132 self.assertEqual(len(with_size), len(no_size) + 1)
133
133
134 no_params = zstd.get_frame_parameters(no_size)
134 no_params = zstd.get_frame_parameters(no_size)
135 with_params = zstd.get_frame_parameters(with_size)
135 with_params = zstd.get_frame_parameters(with_size)
136 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
136 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
137 self.assertEqual(with_params.content_size, 1536)
137 self.assertEqual(with_params.content_size, 1536)
138
138
139 def test_no_dict_id(self):
139 def test_no_dict_id(self):
140 samples = []
140 samples = []
141 for i in range(128):
141 for i in range(128):
142 samples.append(b'foo' * 64)
142 samples.append(b'foo' * 64)
143 samples.append(b'bar' * 64)
143 samples.append(b'bar' * 64)
144 samples.append(b'foobar' * 64)
144 samples.append(b'foobar' * 64)
145
145
146 d = zstd.train_dictionary(1024, samples)
146 d = zstd.train_dictionary(1024, samples)
147
147
148 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
148 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
149 with_dict_id = cctx.compress(b'foobarfoobar')
149 with_dict_id = cctx.compress(b'foobarfoobar')
150
150
151 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
151 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
152 no_dict_id = cctx.compress(b'foobarfoobar')
152 no_dict_id = cctx.compress(b'foobarfoobar')
153
153
154 self.assertEqual(len(with_dict_id), len(no_dict_id) + 4)
154 self.assertEqual(len(with_dict_id), len(no_dict_id) + 4)
155
155
156 no_params = zstd.get_frame_parameters(no_dict_id)
156 no_params = zstd.get_frame_parameters(no_dict_id)
157 with_params = zstd.get_frame_parameters(with_dict_id)
157 with_params = zstd.get_frame_parameters(with_dict_id)
158 self.assertEqual(no_params.dict_id, 0)
158 self.assertEqual(no_params.dict_id, 0)
159 self.assertEqual(with_params.dict_id, 1880053135)
159 self.assertEqual(with_params.dict_id, 1880053135)
160
160
161 def test_compress_dict_multiple(self):
161 def test_compress_dict_multiple(self):
162 samples = []
162 samples = []
163 for i in range(128):
163 for i in range(128):
164 samples.append(b'foo' * 64)
164 samples.append(b'foo' * 64)
165 samples.append(b'bar' * 64)
165 samples.append(b'bar' * 64)
166 samples.append(b'foobar' * 64)
166 samples.append(b'foobar' * 64)
167
167
168 d = zstd.train_dictionary(8192, samples)
168 d = zstd.train_dictionary(8192, samples)
169
169
170 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
170 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
171
171
172 for i in range(32):
172 for i in range(32):
173 cctx.compress(b'foo bar foobar foo bar foobar')
173 cctx.compress(b'foo bar foobar foo bar foobar')
174
174
175 def test_dict_precompute(self):
175 def test_dict_precompute(self):
176 samples = []
176 samples = []
177 for i in range(128):
177 for i in range(128):
178 samples.append(b'foo' * 64)
178 samples.append(b'foo' * 64)
179 samples.append(b'bar' * 64)
179 samples.append(b'bar' * 64)
180 samples.append(b'foobar' * 64)
180 samples.append(b'foobar' * 64)
181
181
182 d = zstd.train_dictionary(8192, samples)
182 d = zstd.train_dictionary(8192, samples)
183 d.precompute_compress(level=1)
183 d.precompute_compress(level=1)
184
184
185 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
185 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
186
186
187 for i in range(32):
187 for i in range(32):
188 cctx.compress(b'foo bar foobar foo bar foobar')
188 cctx.compress(b'foo bar foobar foo bar foobar')
189
189
190 def test_multithreaded(self):
190 def test_multithreaded(self):
191 chunk_size = multithreaded_chunk_size(1)
191 chunk_size = multithreaded_chunk_size(1)
192 source = b''.join([b'x' * chunk_size, b'y' * chunk_size])
192 source = b''.join([b'x' * chunk_size, b'y' * chunk_size])
193
193
194 cctx = zstd.ZstdCompressor(level=1, threads=2)
194 cctx = zstd.ZstdCompressor(level=1, threads=2)
195 compressed = cctx.compress(source)
195 compressed = cctx.compress(source)
196
196
197 params = zstd.get_frame_parameters(compressed)
197 params = zstd.get_frame_parameters(compressed)
198 self.assertEqual(params.content_size, chunk_size * 2)
198 self.assertEqual(params.content_size, chunk_size * 2)
199 self.assertEqual(params.dict_id, 0)
199 self.assertEqual(params.dict_id, 0)
200 self.assertFalse(params.has_checksum)
200 self.assertFalse(params.has_checksum)
201
201
202 dctx = zstd.ZstdDecompressor()
202 dctx = zstd.ZstdDecompressor()
203 self.assertEqual(dctx.decompress(compressed), source)
203 self.assertEqual(dctx.decompress(compressed), source)
204
204
205 def test_multithreaded_dict(self):
205 def test_multithreaded_dict(self):
206 samples = []
206 samples = []
207 for i in range(128):
207 for i in range(128):
208 samples.append(b'foo' * 64)
208 samples.append(b'foo' * 64)
209 samples.append(b'bar' * 64)
209 samples.append(b'bar' * 64)
210 samples.append(b'foobar' * 64)
210 samples.append(b'foobar' * 64)
211
211
212 d = zstd.train_dictionary(1024, samples)
212 d = zstd.train_dictionary(1024, samples)
213
213
214 cctx = zstd.ZstdCompressor(dict_data=d, threads=2)
214 cctx = zstd.ZstdCompressor(dict_data=d, threads=2)
215
215
216 result = cctx.compress(b'foo')
216 result = cctx.compress(b'foo')
217 params = zstd.get_frame_parameters(result);
217 params = zstd.get_frame_parameters(result);
218 self.assertEqual(params.content_size, 3);
218 self.assertEqual(params.content_size, 3);
219 self.assertEqual(params.dict_id, d.dict_id())
219 self.assertEqual(params.dict_id, d.dict_id())
220
220
221 self.assertEqual(result,
221 self.assertEqual(result,
222 b'\x28\xb5\x2f\xfd\x23\x8f\x55\x0f\x70\x03\x19\x00\x00'
222 b'\x28\xb5\x2f\xfd\x23\x8f\x55\x0f\x70\x03\x19\x00\x00'
223 b'\x66\x6f\x6f')
223 b'\x66\x6f\x6f')
224
224
225 def test_multithreaded_compression_params(self):
225 def test_multithreaded_compression_params(self):
226 params = zstd.ZstdCompressionParameters.from_level(0, threads=2)
226 params = zstd.ZstdCompressionParameters.from_level(0, threads=2)
227 cctx = zstd.ZstdCompressor(compression_params=params)
227 cctx = zstd.ZstdCompressor(compression_params=params)
228
228
229 result = cctx.compress(b'foo')
229 result = cctx.compress(b'foo')
230 params = zstd.get_frame_parameters(result);
230 params = zstd.get_frame_parameters(result);
231 self.assertEqual(params.content_size, 3);
231 self.assertEqual(params.content_size, 3);
232
232
233 self.assertEqual(result,
233 self.assertEqual(result,
234 b'\x28\xb5\x2f\xfd\x20\x03\x19\x00\x00\x66\x6f\x6f')
234 b'\x28\xb5\x2f\xfd\x20\x03\x19\x00\x00\x66\x6f\x6f')
235
235
236
236
237 @make_cffi
237 @make_cffi
238 class TestCompressor_compressobj(unittest.TestCase):
238 class TestCompressor_compressobj(unittest.TestCase):
239 def test_compressobj_empty(self):
239 def test_compressobj_empty(self):
240 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
240 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
241 cobj = cctx.compressobj()
241 cobj = cctx.compressobj()
242 self.assertEqual(cobj.compress(b''), b'')
242 self.assertEqual(cobj.compress(b''), b'')
243 self.assertEqual(cobj.flush(),
243 self.assertEqual(cobj.flush(),
244 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
244 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
245
245
246 def test_input_types(self):
246 def test_input_types(self):
247 expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f'
247 expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f'
248 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
248 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
249
249
250 mutable_array = bytearray(3)
250 mutable_array = bytearray(3)
251 mutable_array[:] = b'foo'
251 mutable_array[:] = b'foo'
252
252
253 sources = [
253 sources = [
254 memoryview(b'foo'),
254 memoryview(b'foo'),
255 bytearray(b'foo'),
255 bytearray(b'foo'),
256 mutable_array,
256 mutable_array,
257 ]
257 ]
258
258
259 for source in sources:
259 for source in sources:
260 cobj = cctx.compressobj()
260 cobj = cctx.compressobj()
261 self.assertEqual(cobj.compress(source), b'')
261 self.assertEqual(cobj.compress(source), b'')
262 self.assertEqual(cobj.flush(), expected)
262 self.assertEqual(cobj.flush(), expected)
263
263
264 def test_compressobj_large(self):
264 def test_compressobj_large(self):
265 chunks = []
265 chunks = []
266 for i in range(255):
266 for i in range(255):
267 chunks.append(struct.Struct('>B').pack(i) * 16384)
267 chunks.append(struct.Struct('>B').pack(i) * 16384)
268
268
269 cctx = zstd.ZstdCompressor(level=3)
269 cctx = zstd.ZstdCompressor(level=3)
270 cobj = cctx.compressobj()
270 cobj = cctx.compressobj()
271
271
272 result = cobj.compress(b''.join(chunks)) + cobj.flush()
272 result = cobj.compress(b''.join(chunks)) + cobj.flush()
273 self.assertEqual(len(result), 999)
273 self.assertEqual(len(result), 999)
274 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
274 self.assertEqual(result[0:4], b'\x28\xb5\x2f\xfd')
275
275
276 params = zstd.get_frame_parameters(result)
276 params = zstd.get_frame_parameters(result)
277 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
277 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
278 self.assertEqual(params.window_size, 2097152)
278 self.assertEqual(params.window_size, 2097152)
279 self.assertEqual(params.dict_id, 0)
279 self.assertEqual(params.dict_id, 0)
280 self.assertFalse(params.has_checksum)
280 self.assertFalse(params.has_checksum)
281
281
282 def test_write_checksum(self):
282 def test_write_checksum(self):
283 cctx = zstd.ZstdCompressor(level=1)
283 cctx = zstd.ZstdCompressor(level=1)
284 cobj = cctx.compressobj()
284 cobj = cctx.compressobj()
285 no_checksum = cobj.compress(b'foobar') + cobj.flush()
285 no_checksum = cobj.compress(b'foobar') + cobj.flush()
286 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
286 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
287 cobj = cctx.compressobj()
287 cobj = cctx.compressobj()
288 with_checksum = cobj.compress(b'foobar') + cobj.flush()
288 with_checksum = cobj.compress(b'foobar') + cobj.flush()
289
289
290 no_params = zstd.get_frame_parameters(no_checksum)
290 no_params = zstd.get_frame_parameters(no_checksum)
291 with_params = zstd.get_frame_parameters(with_checksum)
291 with_params = zstd.get_frame_parameters(with_checksum)
292 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
292 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
293 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
293 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
294 self.assertEqual(no_params.dict_id, 0)
294 self.assertEqual(no_params.dict_id, 0)
295 self.assertEqual(with_params.dict_id, 0)
295 self.assertEqual(with_params.dict_id, 0)
296 self.assertFalse(no_params.has_checksum)
296 self.assertFalse(no_params.has_checksum)
297 self.assertTrue(with_params.has_checksum)
297 self.assertTrue(with_params.has_checksum)
298
298
299 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
299 self.assertEqual(len(with_checksum), len(no_checksum) + 4)
300
300
301 def test_write_content_size(self):
301 def test_write_content_size(self):
302 cctx = zstd.ZstdCompressor(level=1)
302 cctx = zstd.ZstdCompressor(level=1)
303 cobj = cctx.compressobj(size=len(b'foobar' * 256))
303 cobj = cctx.compressobj(size=len(b'foobar' * 256))
304 with_size = cobj.compress(b'foobar' * 256) + cobj.flush()
304 with_size = cobj.compress(b'foobar' * 256) + cobj.flush()
305 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
305 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
306 cobj = cctx.compressobj(size=len(b'foobar' * 256))
306 cobj = cctx.compressobj(size=len(b'foobar' * 256))
307 no_size = cobj.compress(b'foobar' * 256) + cobj.flush()
307 no_size = cobj.compress(b'foobar' * 256) + cobj.flush()
308
308
309 no_params = zstd.get_frame_parameters(no_size)
309 no_params = zstd.get_frame_parameters(no_size)
310 with_params = zstd.get_frame_parameters(with_size)
310 with_params = zstd.get_frame_parameters(with_size)
311 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
311 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
312 self.assertEqual(with_params.content_size, 1536)
312 self.assertEqual(with_params.content_size, 1536)
313 self.assertEqual(no_params.dict_id, 0)
313 self.assertEqual(no_params.dict_id, 0)
314 self.assertEqual(with_params.dict_id, 0)
314 self.assertEqual(with_params.dict_id, 0)
315 self.assertFalse(no_params.has_checksum)
315 self.assertFalse(no_params.has_checksum)
316 self.assertFalse(with_params.has_checksum)
316 self.assertFalse(with_params.has_checksum)
317
317
318 self.assertEqual(len(with_size), len(no_size) + 1)
318 self.assertEqual(len(with_size), len(no_size) + 1)
319
319
320 def test_compress_after_finished(self):
320 def test_compress_after_finished(self):
321 cctx = zstd.ZstdCompressor()
321 cctx = zstd.ZstdCompressor()
322 cobj = cctx.compressobj()
322 cobj = cctx.compressobj()
323
323
324 cobj.compress(b'foo')
324 cobj.compress(b'foo')
325 cobj.flush()
325 cobj.flush()
326
326
327 with self.assertRaisesRegexp(zstd.ZstdError, r'cannot call compress\(\) after compressor'):
327 with self.assertRaisesRegexp(zstd.ZstdError, r'cannot call compress\(\) after compressor'):
328 cobj.compress(b'foo')
328 cobj.compress(b'foo')
329
329
330 with self.assertRaisesRegexp(zstd.ZstdError, 'compressor object already finished'):
330 with self.assertRaisesRegexp(zstd.ZstdError, 'compressor object already finished'):
331 cobj.flush()
331 cobj.flush()
332
332
333 def test_flush_block_repeated(self):
333 def test_flush_block_repeated(self):
334 cctx = zstd.ZstdCompressor(level=1)
334 cctx = zstd.ZstdCompressor(level=1)
335 cobj = cctx.compressobj()
335 cobj = cctx.compressobj()
336
336
337 self.assertEqual(cobj.compress(b'foo'), b'')
337 self.assertEqual(cobj.compress(b'foo'), b'')
338 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
338 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
339 b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo')
339 b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo')
340 self.assertEqual(cobj.compress(b'bar'), b'')
340 self.assertEqual(cobj.compress(b'bar'), b'')
341 # 3 byte header plus content.
341 # 3 byte header plus content.
342 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
342 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
343 b'\x18\x00\x00bar')
343 b'\x18\x00\x00bar')
344 self.assertEqual(cobj.flush(), b'\x01\x00\x00')
344 self.assertEqual(cobj.flush(), b'\x01\x00\x00')
345
345
346 def test_flush_empty_block(self):
346 def test_flush_empty_block(self):
347 cctx = zstd.ZstdCompressor(write_checksum=True)
347 cctx = zstd.ZstdCompressor(write_checksum=True)
348 cobj = cctx.compressobj()
348 cobj = cctx.compressobj()
349
349
350 cobj.compress(b'foobar')
350 cobj.compress(b'foobar')
351 cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
351 cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
352 # No-op if no block is active (this is internal to zstd).
352 # No-op if no block is active (this is internal to zstd).
353 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b'')
353 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b'')
354
354
355 trailing = cobj.flush()
355 trailing = cobj.flush()
356 # 3 bytes block header + 4 bytes frame checksum
356 # 3 bytes block header + 4 bytes frame checksum
357 self.assertEqual(len(trailing), 7)
357 self.assertEqual(len(trailing), 7)
358 header = trailing[0:3]
358 header = trailing[0:3]
359 self.assertEqual(header, b'\x01\x00\x00')
359 self.assertEqual(header, b'\x01\x00\x00')
360
360
361 def test_multithreaded(self):
361 def test_multithreaded(self):
362 source = io.BytesIO()
362 source = io.BytesIO()
363 source.write(b'a' * 1048576)
363 source.write(b'a' * 1048576)
364 source.write(b'b' * 1048576)
364 source.write(b'b' * 1048576)
365 source.write(b'c' * 1048576)
365 source.write(b'c' * 1048576)
366 source.seek(0)
366 source.seek(0)
367
367
368 cctx = zstd.ZstdCompressor(level=1, threads=2)
368 cctx = zstd.ZstdCompressor(level=1, threads=2)
369 cobj = cctx.compressobj()
369 cobj = cctx.compressobj()
370
370
371 chunks = []
371 chunks = []
372 while True:
372 while True:
373 d = source.read(8192)
373 d = source.read(8192)
374 if not d:
374 if not d:
375 break
375 break
376
376
377 chunks.append(cobj.compress(d))
377 chunks.append(cobj.compress(d))
378
378
379 chunks.append(cobj.flush())
379 chunks.append(cobj.flush())
380
380
381 compressed = b''.join(chunks)
381 compressed = b''.join(chunks)
382
382
383 self.assertEqual(len(compressed), 295)
383 self.assertEqual(len(compressed), 295)
384
384
385 def test_frame_progression(self):
385 def test_frame_progression(self):
386 cctx = zstd.ZstdCompressor()
386 cctx = zstd.ZstdCompressor()
387
387
388 self.assertEqual(cctx.frame_progression(), (0, 0, 0))
388 self.assertEqual(cctx.frame_progression(), (0, 0, 0))
389
389
390 cobj = cctx.compressobj()
390 cobj = cctx.compressobj()
391
391
392 cobj.compress(b'foobar')
392 cobj.compress(b'foobar')
393 self.assertEqual(cctx.frame_progression(), (6, 0, 0))
393 self.assertEqual(cctx.frame_progression(), (6, 0, 0))
394
394
395 cobj.flush()
395 cobj.flush()
396 self.assertEqual(cctx.frame_progression(), (6, 6, 15))
396 self.assertEqual(cctx.frame_progression(), (6, 6, 15))
397
397
398 def test_bad_size(self):
398 def test_bad_size(self):
399 cctx = zstd.ZstdCompressor()
399 cctx = zstd.ZstdCompressor()
400
400
401 cobj = cctx.compressobj(size=2)
401 cobj = cctx.compressobj(size=2)
402 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
402 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
403 cobj.compress(b'foo')
403 cobj.compress(b'foo')
404
404
405 # Try another operation on this instance.
405 # Try another operation on this instance.
406 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
406 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
407 cobj.compress(b'aa')
407 cobj.compress(b'aa')
408
408
409 # Try another operation on the compressor.
409 # Try another operation on the compressor.
410 cctx.compressobj(size=4)
410 cctx.compressobj(size=4)
411 cctx.compress(b'foobar')
411 cctx.compress(b'foobar')
412
412
413
413
414 @make_cffi
414 @make_cffi
415 class TestCompressor_copy_stream(unittest.TestCase):
415 class TestCompressor_copy_stream(unittest.TestCase):
416 def test_no_read(self):
416 def test_no_read(self):
417 source = object()
417 source = object()
418 dest = io.BytesIO()
418 dest = io.BytesIO()
419
419
420 cctx = zstd.ZstdCompressor()
420 cctx = zstd.ZstdCompressor()
421 with self.assertRaises(ValueError):
421 with self.assertRaises(ValueError):
422 cctx.copy_stream(source, dest)
422 cctx.copy_stream(source, dest)
423
423
424 def test_no_write(self):
424 def test_no_write(self):
425 source = io.BytesIO()
425 source = io.BytesIO()
426 dest = object()
426 dest = object()
427
427
428 cctx = zstd.ZstdCompressor()
428 cctx = zstd.ZstdCompressor()
429 with self.assertRaises(ValueError):
429 with self.assertRaises(ValueError):
430 cctx.copy_stream(source, dest)
430 cctx.copy_stream(source, dest)
431
431
432 def test_empty(self):
432 def test_empty(self):
433 source = io.BytesIO()
433 source = io.BytesIO()
434 dest = io.BytesIO()
434 dest = io.BytesIO()
435
435
436 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
436 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
437 r, w = cctx.copy_stream(source, dest)
437 r, w = cctx.copy_stream(source, dest)
438 self.assertEqual(int(r), 0)
438 self.assertEqual(int(r), 0)
439 self.assertEqual(w, 9)
439 self.assertEqual(w, 9)
440
440
441 self.assertEqual(dest.getvalue(),
441 self.assertEqual(dest.getvalue(),
442 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
442 b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
443
443
444 def test_large_data(self):
444 def test_large_data(self):
445 source = io.BytesIO()
445 source = io.BytesIO()
446 for i in range(255):
446 for i in range(255):
447 source.write(struct.Struct('>B').pack(i) * 16384)
447 source.write(struct.Struct('>B').pack(i) * 16384)
448 source.seek(0)
448 source.seek(0)
449
449
450 dest = io.BytesIO()
450 dest = io.BytesIO()
451 cctx = zstd.ZstdCompressor()
451 cctx = zstd.ZstdCompressor()
452 r, w = cctx.copy_stream(source, dest)
452 r, w = cctx.copy_stream(source, dest)
453
453
454 self.assertEqual(r, 255 * 16384)
454 self.assertEqual(r, 255 * 16384)
455 self.assertEqual(w, 999)
455 self.assertEqual(w, 999)
456
456
457 params = zstd.get_frame_parameters(dest.getvalue())
457 params = zstd.get_frame_parameters(dest.getvalue())
458 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
458 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
459 self.assertEqual(params.window_size, 2097152)
459 self.assertEqual(params.window_size, 2097152)
460 self.assertEqual(params.dict_id, 0)
460 self.assertEqual(params.dict_id, 0)
461 self.assertFalse(params.has_checksum)
461 self.assertFalse(params.has_checksum)
462
462
463 def test_write_checksum(self):
463 def test_write_checksum(self):
464 source = io.BytesIO(b'foobar')
464 source = io.BytesIO(b'foobar')
465 no_checksum = io.BytesIO()
465 no_checksum = io.BytesIO()
466
466
467 cctx = zstd.ZstdCompressor(level=1)
467 cctx = zstd.ZstdCompressor(level=1)
468 cctx.copy_stream(source, no_checksum)
468 cctx.copy_stream(source, no_checksum)
469
469
470 source.seek(0)
470 source.seek(0)
471 with_checksum = io.BytesIO()
471 with_checksum = io.BytesIO()
472 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
472 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
473 cctx.copy_stream(source, with_checksum)
473 cctx.copy_stream(source, with_checksum)
474
474
475 self.assertEqual(len(with_checksum.getvalue()),
475 self.assertEqual(len(with_checksum.getvalue()),
476 len(no_checksum.getvalue()) + 4)
476 len(no_checksum.getvalue()) + 4)
477
477
478 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
478 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
479 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
479 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
480 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
480 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
481 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
481 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
482 self.assertEqual(no_params.dict_id, 0)
482 self.assertEqual(no_params.dict_id, 0)
483 self.assertEqual(with_params.dict_id, 0)
483 self.assertEqual(with_params.dict_id, 0)
484 self.assertFalse(no_params.has_checksum)
484 self.assertFalse(no_params.has_checksum)
485 self.assertTrue(with_params.has_checksum)
485 self.assertTrue(with_params.has_checksum)
486
486
487 def test_write_content_size(self):
487 def test_write_content_size(self):
488 source = io.BytesIO(b'foobar' * 256)
488 source = io.BytesIO(b'foobar' * 256)
489 no_size = io.BytesIO()
489 no_size = io.BytesIO()
490
490
491 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
491 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
492 cctx.copy_stream(source, no_size)
492 cctx.copy_stream(source, no_size)
493
493
494 source.seek(0)
494 source.seek(0)
495 with_size = io.BytesIO()
495 with_size = io.BytesIO()
496 cctx = zstd.ZstdCompressor(level=1)
496 cctx = zstd.ZstdCompressor(level=1)
497 cctx.copy_stream(source, with_size)
497 cctx.copy_stream(source, with_size)
498
498
499 # Source content size is unknown, so no content size written.
499 # Source content size is unknown, so no content size written.
500 self.assertEqual(len(with_size.getvalue()),
500 self.assertEqual(len(with_size.getvalue()),
501 len(no_size.getvalue()))
501 len(no_size.getvalue()))
502
502
503 source.seek(0)
503 source.seek(0)
504 with_size = io.BytesIO()
504 with_size = io.BytesIO()
505 cctx.copy_stream(source, with_size, size=len(source.getvalue()))
505 cctx.copy_stream(source, with_size, size=len(source.getvalue()))
506
506
507 # We specified source size, so content size header is present.
507 # We specified source size, so content size header is present.
508 self.assertEqual(len(with_size.getvalue()),
508 self.assertEqual(len(with_size.getvalue()),
509 len(no_size.getvalue()) + 1)
509 len(no_size.getvalue()) + 1)
510
510
511 no_params = zstd.get_frame_parameters(no_size.getvalue())
511 no_params = zstd.get_frame_parameters(no_size.getvalue())
512 with_params = zstd.get_frame_parameters(with_size.getvalue())
512 with_params = zstd.get_frame_parameters(with_size.getvalue())
513 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
513 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
514 self.assertEqual(with_params.content_size, 1536)
514 self.assertEqual(with_params.content_size, 1536)
515 self.assertEqual(no_params.dict_id, 0)
515 self.assertEqual(no_params.dict_id, 0)
516 self.assertEqual(with_params.dict_id, 0)
516 self.assertEqual(with_params.dict_id, 0)
517 self.assertFalse(no_params.has_checksum)
517 self.assertFalse(no_params.has_checksum)
518 self.assertFalse(with_params.has_checksum)
518 self.assertFalse(with_params.has_checksum)
519
519
520 def test_read_write_size(self):
520 def test_read_write_size(self):
521 source = OpCountingBytesIO(b'foobarfoobar')
521 source = OpCountingBytesIO(b'foobarfoobar')
522 dest = OpCountingBytesIO()
522 dest = OpCountingBytesIO()
523 cctx = zstd.ZstdCompressor()
523 cctx = zstd.ZstdCompressor()
524 r, w = cctx.copy_stream(source, dest, read_size=1, write_size=1)
524 r, w = cctx.copy_stream(source, dest, read_size=1, write_size=1)
525
525
526 self.assertEqual(r, len(source.getvalue()))
526 self.assertEqual(r, len(source.getvalue()))
527 self.assertEqual(w, 21)
527 self.assertEqual(w, 21)
528 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
528 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
529 self.assertEqual(dest._write_count, len(dest.getvalue()))
529 self.assertEqual(dest._write_count, len(dest.getvalue()))
530
530
531 def test_multithreaded(self):
531 def test_multithreaded(self):
532 source = io.BytesIO()
532 source = io.BytesIO()
533 source.write(b'a' * 1048576)
533 source.write(b'a' * 1048576)
534 source.write(b'b' * 1048576)
534 source.write(b'b' * 1048576)
535 source.write(b'c' * 1048576)
535 source.write(b'c' * 1048576)
536 source.seek(0)
536 source.seek(0)
537
537
538 dest = io.BytesIO()
538 dest = io.BytesIO()
539 cctx = zstd.ZstdCompressor(threads=2, write_content_size=False)
539 cctx = zstd.ZstdCompressor(threads=2, write_content_size=False)
540 r, w = cctx.copy_stream(source, dest)
540 r, w = cctx.copy_stream(source, dest)
541 self.assertEqual(r, 3145728)
541 self.assertEqual(r, 3145728)
542 self.assertEqual(w, 295)
542 self.assertEqual(w, 295)
543
543
544 params = zstd.get_frame_parameters(dest.getvalue())
544 params = zstd.get_frame_parameters(dest.getvalue())
545 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
545 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
546 self.assertEqual(params.dict_id, 0)
546 self.assertEqual(params.dict_id, 0)
547 self.assertFalse(params.has_checksum)
547 self.assertFalse(params.has_checksum)
548
548
549 # Writing content size and checksum works.
549 # Writing content size and checksum works.
550 cctx = zstd.ZstdCompressor(threads=2, write_checksum=True)
550 cctx = zstd.ZstdCompressor(threads=2, write_checksum=True)
551 dest = io.BytesIO()
551 dest = io.BytesIO()
552 source.seek(0)
552 source.seek(0)
553 cctx.copy_stream(source, dest, size=len(source.getvalue()))
553 cctx.copy_stream(source, dest, size=len(source.getvalue()))
554
554
555 params = zstd.get_frame_parameters(dest.getvalue())
555 params = zstd.get_frame_parameters(dest.getvalue())
556 self.assertEqual(params.content_size, 3145728)
556 self.assertEqual(params.content_size, 3145728)
557 self.assertEqual(params.dict_id, 0)
557 self.assertEqual(params.dict_id, 0)
558 self.assertTrue(params.has_checksum)
558 self.assertTrue(params.has_checksum)
559
559
560 def test_bad_size(self):
560 def test_bad_size(self):
561 source = io.BytesIO()
561 source = io.BytesIO()
562 source.write(b'a' * 32768)
562 source.write(b'a' * 32768)
563 source.write(b'b' * 32768)
563 source.write(b'b' * 32768)
564 source.seek(0)
564 source.seek(0)
565
565
566 dest = io.BytesIO()
566 dest = io.BytesIO()
567
567
568 cctx = zstd.ZstdCompressor()
568 cctx = zstd.ZstdCompressor()
569
569
570 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
570 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
571 cctx.copy_stream(source, dest, size=42)
571 cctx.copy_stream(source, dest, size=42)
572
572
573 # Try another operation on this compressor.
573 # Try another operation on this compressor.
574 source.seek(0)
574 source.seek(0)
575 dest = io.BytesIO()
575 dest = io.BytesIO()
576 cctx.copy_stream(source, dest)
576 cctx.copy_stream(source, dest)
577
577
578
578
579 @make_cffi
579 @make_cffi
580 class TestCompressor_stream_reader(unittest.TestCase):
580 class TestCompressor_stream_reader(unittest.TestCase):
581 def test_context_manager(self):
581 def test_context_manager(self):
582 cctx = zstd.ZstdCompressor()
582 cctx = zstd.ZstdCompressor()
583
583
584 with cctx.stream_reader(b'foo') as reader:
584 with cctx.stream_reader(b'foo') as reader:
585 with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'):
585 with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'):
586 with reader as reader2:
586 with reader as reader2:
587 pass
587 pass
588
588
589 def test_no_context_manager(self):
589 def test_no_context_manager(self):
590 cctx = zstd.ZstdCompressor()
590 cctx = zstd.ZstdCompressor()
591
591
592 reader = cctx.stream_reader(b'foo')
592 reader = cctx.stream_reader(b'foo')
593 reader.read(4)
593 reader.read(4)
594 self.assertFalse(reader.closed)
594 self.assertFalse(reader.closed)
595
595
596 reader.close()
596 reader.close()
597 self.assertTrue(reader.closed)
597 self.assertTrue(reader.closed)
598 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
598 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
599 reader.read(1)
599 reader.read(1)
600
600
601 def test_not_implemented(self):
601 def test_not_implemented(self):
602 cctx = zstd.ZstdCompressor()
602 cctx = zstd.ZstdCompressor()
603
603
604 with cctx.stream_reader(b'foo' * 60) as reader:
604 with cctx.stream_reader(b'foo' * 60) as reader:
605 with self.assertRaises(io.UnsupportedOperation):
605 with self.assertRaises(io.UnsupportedOperation):
606 reader.readline()
606 reader.readline()
607
607
608 with self.assertRaises(io.UnsupportedOperation):
608 with self.assertRaises(io.UnsupportedOperation):
609 reader.readlines()
609 reader.readlines()
610
610
611 with self.assertRaises(io.UnsupportedOperation):
611 with self.assertRaises(io.UnsupportedOperation):
612 iter(reader)
612 iter(reader)
613
613
614 with self.assertRaises(io.UnsupportedOperation):
614 with self.assertRaises(io.UnsupportedOperation):
615 next(reader)
615 next(reader)
616
616
617 with self.assertRaises(OSError):
617 with self.assertRaises(OSError):
618 reader.writelines([])
618 reader.writelines([])
619
619
620 with self.assertRaises(OSError):
620 with self.assertRaises(OSError):
621 reader.write(b'foo')
621 reader.write(b'foo')
622
622
623 def test_constant_methods(self):
623 def test_constant_methods(self):
624 cctx = zstd.ZstdCompressor()
624 cctx = zstd.ZstdCompressor()
625
625
626 with cctx.stream_reader(b'boo') as reader:
626 with cctx.stream_reader(b'boo') as reader:
627 self.assertTrue(reader.readable())
627 self.assertTrue(reader.readable())
628 self.assertFalse(reader.writable())
628 self.assertFalse(reader.writable())
629 self.assertFalse(reader.seekable())
629 self.assertFalse(reader.seekable())
630 self.assertFalse(reader.isatty())
630 self.assertFalse(reader.isatty())
631 self.assertFalse(reader.closed)
631 self.assertFalse(reader.closed)
632 self.assertIsNone(reader.flush())
632 self.assertIsNone(reader.flush())
633 self.assertFalse(reader.closed)
633 self.assertFalse(reader.closed)
634
634
635 self.assertTrue(reader.closed)
635 self.assertTrue(reader.closed)
636
636
637 def test_read_closed(self):
637 def test_read_closed(self):
638 cctx = zstd.ZstdCompressor()
638 cctx = zstd.ZstdCompressor()
639
639
640 with cctx.stream_reader(b'foo' * 60) as reader:
640 with cctx.stream_reader(b'foo' * 60) as reader:
641 reader.close()
641 reader.close()
642 self.assertTrue(reader.closed)
642 self.assertTrue(reader.closed)
643 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
643 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
644 reader.read(10)
644 reader.read(10)
645
645
646 def test_read_sizes(self):
646 def test_read_sizes(self):
647 cctx = zstd.ZstdCompressor()
647 cctx = zstd.ZstdCompressor()
648 foo = cctx.compress(b'foo')
648 foo = cctx.compress(b'foo')
649
649
650 with cctx.stream_reader(b'foo') as reader:
650 with cctx.stream_reader(b'foo') as reader:
651 with self.assertRaisesRegexp(ValueError, 'cannot read negative amounts less than -1'):
651 with self.assertRaisesRegexp(ValueError, 'cannot read negative amounts less than -1'):
652 reader.read(-2)
652 reader.read(-2)
653
653
654 self.assertEqual(reader.read(0), b'')
654 self.assertEqual(reader.read(0), b'')
655 self.assertEqual(reader.read(), foo)
655 self.assertEqual(reader.read(), foo)
656
656
657 def test_read_buffer(self):
657 def test_read_buffer(self):
658 cctx = zstd.ZstdCompressor()
658 cctx = zstd.ZstdCompressor()
659
659
660 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
660 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
661 frame = cctx.compress(source)
661 frame = cctx.compress(source)
662
662
663 with cctx.stream_reader(source) as reader:
663 with cctx.stream_reader(source) as reader:
664 self.assertEqual(reader.tell(), 0)
664 self.assertEqual(reader.tell(), 0)
665
665
666 # We should get entire frame in one read.
666 # We should get entire frame in one read.
667 result = reader.read(8192)
667 result = reader.read(8192)
668 self.assertEqual(result, frame)
668 self.assertEqual(result, frame)
669 self.assertEqual(reader.tell(), len(result))
669 self.assertEqual(reader.tell(), len(result))
670 self.assertEqual(reader.read(), b'')
670 self.assertEqual(reader.read(), b'')
671 self.assertEqual(reader.tell(), len(result))
671 self.assertEqual(reader.tell(), len(result))
672
672
673 def test_read_buffer_small_chunks(self):
673 def test_read_buffer_small_chunks(self):
674 cctx = zstd.ZstdCompressor()
674 cctx = zstd.ZstdCompressor()
675
675
676 source = b'foo' * 60
676 source = b'foo' * 60
677 chunks = []
677 chunks = []
678
678
679 with cctx.stream_reader(source) as reader:
679 with cctx.stream_reader(source) as reader:
680 self.assertEqual(reader.tell(), 0)
680 self.assertEqual(reader.tell(), 0)
681
681
682 while True:
682 while True:
683 chunk = reader.read(1)
683 chunk = reader.read(1)
684 if not chunk:
684 if not chunk:
685 break
685 break
686
686
687 chunks.append(chunk)
687 chunks.append(chunk)
688 self.assertEqual(reader.tell(), sum(map(len, chunks)))
688 self.assertEqual(reader.tell(), sum(map(len, chunks)))
689
689
690 self.assertEqual(b''.join(chunks), cctx.compress(source))
690 self.assertEqual(b''.join(chunks), cctx.compress(source))
691
691
692 def test_read_stream(self):
692 def test_read_stream(self):
693 cctx = zstd.ZstdCompressor()
693 cctx = zstd.ZstdCompressor()
694
694
695 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
695 source = b''.join([b'foo' * 60, b'bar' * 60, b'baz' * 60])
696 frame = cctx.compress(source)
696 frame = cctx.compress(source)
697
697
698 with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader:
698 with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader:
699 self.assertEqual(reader.tell(), 0)
699 self.assertEqual(reader.tell(), 0)
700
700
701 chunk = reader.read(8192)
701 chunk = reader.read(8192)
702 self.assertEqual(chunk, frame)
702 self.assertEqual(chunk, frame)
703 self.assertEqual(reader.tell(), len(chunk))
703 self.assertEqual(reader.tell(), len(chunk))
704 self.assertEqual(reader.read(), b'')
704 self.assertEqual(reader.read(), b'')
705 self.assertEqual(reader.tell(), len(chunk))
705 self.assertEqual(reader.tell(), len(chunk))
706
706
707 def test_read_stream_small_chunks(self):
707 def test_read_stream_small_chunks(self):
708 cctx = zstd.ZstdCompressor()
708 cctx = zstd.ZstdCompressor()
709
709
710 source = b'foo' * 60
710 source = b'foo' * 60
711 chunks = []
711 chunks = []
712
712
713 with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader:
713 with cctx.stream_reader(io.BytesIO(source), size=len(source)) as reader:
714 self.assertEqual(reader.tell(), 0)
714 self.assertEqual(reader.tell(), 0)
715
715
716 while True:
716 while True:
717 chunk = reader.read(1)
717 chunk = reader.read(1)
718 if not chunk:
718 if not chunk:
719 break
719 break
720
720
721 chunks.append(chunk)
721 chunks.append(chunk)
722 self.assertEqual(reader.tell(), sum(map(len, chunks)))
722 self.assertEqual(reader.tell(), sum(map(len, chunks)))
723
723
724 self.assertEqual(b''.join(chunks), cctx.compress(source))
724 self.assertEqual(b''.join(chunks), cctx.compress(source))
725
725
726 def test_read_after_exit(self):
726 def test_read_after_exit(self):
727 cctx = zstd.ZstdCompressor()
727 cctx = zstd.ZstdCompressor()
728
728
729 with cctx.stream_reader(b'foo' * 60) as reader:
729 with cctx.stream_reader(b'foo' * 60) as reader:
730 while reader.read(8192):
730 while reader.read(8192):
731 pass
731 pass
732
732
733 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
733 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
734 reader.read(10)
734 reader.read(10)
735
735
736 def test_bad_size(self):
736 def test_bad_size(self):
737 cctx = zstd.ZstdCompressor()
737 cctx = zstd.ZstdCompressor()
738
738
739 source = io.BytesIO(b'foobar')
739 source = io.BytesIO(b'foobar')
740
740
741 with cctx.stream_reader(source, size=2) as reader:
741 with cctx.stream_reader(source, size=2) as reader:
742 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
742 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
743 reader.read(10)
743 reader.read(10)
744
744
745 # Try another compression operation.
745 # Try another compression operation.
746 with cctx.stream_reader(source, size=42):
746 with cctx.stream_reader(source, size=42):
747 pass
747 pass
748
748
749 def test_readall(self):
749 def test_readall(self):
750 cctx = zstd.ZstdCompressor()
750 cctx = zstd.ZstdCompressor()
751 frame = cctx.compress(b'foo' * 1024)
751 frame = cctx.compress(b'foo' * 1024)
752
752
753 reader = cctx.stream_reader(b'foo' * 1024)
753 reader = cctx.stream_reader(b'foo' * 1024)
754 self.assertEqual(reader.readall(), frame)
754 self.assertEqual(reader.readall(), frame)
755
755
756 def test_readinto(self):
756 def test_readinto(self):
757 cctx = zstd.ZstdCompressor()
757 cctx = zstd.ZstdCompressor()
758 foo = cctx.compress(b'foo')
758 foo = cctx.compress(b'foo')
759
759
760 reader = cctx.stream_reader(b'foo')
760 reader = cctx.stream_reader(b'foo')
761 with self.assertRaises(Exception):
761 with self.assertRaises(Exception):
762 reader.readinto(b'foobar')
762 reader.readinto(b'foobar')
763
763
764 # readinto() with sufficiently large destination.
764 # readinto() with sufficiently large destination.
765 b = bytearray(1024)
765 b = bytearray(1024)
766 reader = cctx.stream_reader(b'foo')
766 reader = cctx.stream_reader(b'foo')
767 self.assertEqual(reader.readinto(b), len(foo))
767 self.assertEqual(reader.readinto(b), len(foo))
768 self.assertEqual(b[0:len(foo)], foo)
768 self.assertEqual(b[0:len(foo)], foo)
769 self.assertEqual(reader.readinto(b), 0)
769 self.assertEqual(reader.readinto(b), 0)
770 self.assertEqual(b[0:len(foo)], foo)
770 self.assertEqual(b[0:len(foo)], foo)
771
771
772 # readinto() with small reads.
772 # readinto() with small reads.
773 b = bytearray(1024)
773 b = bytearray(1024)
774 reader = cctx.stream_reader(b'foo', read_size=1)
774 reader = cctx.stream_reader(b'foo', read_size=1)
775 self.assertEqual(reader.readinto(b), len(foo))
775 self.assertEqual(reader.readinto(b), len(foo))
776 self.assertEqual(b[0:len(foo)], foo)
776 self.assertEqual(b[0:len(foo)], foo)
777
777
778 # Too small destination buffer.
778 # Too small destination buffer.
779 b = bytearray(2)
779 b = bytearray(2)
780 reader = cctx.stream_reader(b'foo')
780 reader = cctx.stream_reader(b'foo')
781 self.assertEqual(reader.readinto(b), 2)
781 self.assertEqual(reader.readinto(b), 2)
782 self.assertEqual(b[:], foo[0:2])
782 self.assertEqual(b[:], foo[0:2])
783 self.assertEqual(reader.readinto(b), 2)
783 self.assertEqual(reader.readinto(b), 2)
784 self.assertEqual(b[:], foo[2:4])
784 self.assertEqual(b[:], foo[2:4])
785 self.assertEqual(reader.readinto(b), 2)
785 self.assertEqual(reader.readinto(b), 2)
786 self.assertEqual(b[:], foo[4:6])
786 self.assertEqual(b[:], foo[4:6])
787
787
788 def test_readinto1(self):
788 def test_readinto1(self):
789 cctx = zstd.ZstdCompressor()
789 cctx = zstd.ZstdCompressor()
790 foo = b''.join(cctx.read_to_iter(io.BytesIO(b'foo')))
790 foo = b''.join(cctx.read_to_iter(io.BytesIO(b'foo')))
791
791
792 reader = cctx.stream_reader(b'foo')
792 reader = cctx.stream_reader(b'foo')
793 with self.assertRaises(Exception):
793 with self.assertRaises(Exception):
794 reader.readinto1(b'foobar')
794 reader.readinto1(b'foobar')
795
795
796 b = bytearray(1024)
796 b = bytearray(1024)
797 source = OpCountingBytesIO(b'foo')
797 source = OpCountingBytesIO(b'foo')
798 reader = cctx.stream_reader(source)
798 reader = cctx.stream_reader(source)
799 self.assertEqual(reader.readinto1(b), len(foo))
799 self.assertEqual(reader.readinto1(b), len(foo))
800 self.assertEqual(b[0:len(foo)], foo)
800 self.assertEqual(b[0:len(foo)], foo)
801 self.assertEqual(source._read_count, 2)
801 self.assertEqual(source._read_count, 2)
802
802
803 # readinto1() with small reads.
803 # readinto1() with small reads.
804 b = bytearray(1024)
804 b = bytearray(1024)
805 source = OpCountingBytesIO(b'foo')
805 source = OpCountingBytesIO(b'foo')
806 reader = cctx.stream_reader(source, read_size=1)
806 reader = cctx.stream_reader(source, read_size=1)
807 self.assertEqual(reader.readinto1(b), len(foo))
807 self.assertEqual(reader.readinto1(b), len(foo))
808 self.assertEqual(b[0:len(foo)], foo)
808 self.assertEqual(b[0:len(foo)], foo)
809 self.assertEqual(source._read_count, 4)
809 self.assertEqual(source._read_count, 4)
810
810
811 def test_read1(self):
811 def test_read1(self):
812 cctx = zstd.ZstdCompressor()
812 cctx = zstd.ZstdCompressor()
813 foo = b''.join(cctx.read_to_iter(io.BytesIO(b'foo')))
813 foo = b''.join(cctx.read_to_iter(io.BytesIO(b'foo')))
814
814
815 b = OpCountingBytesIO(b'foo')
815 b = OpCountingBytesIO(b'foo')
816 reader = cctx.stream_reader(b)
816 reader = cctx.stream_reader(b)
817
817
818 self.assertEqual(reader.read1(), foo)
818 self.assertEqual(reader.read1(), foo)
819 self.assertEqual(b._read_count, 2)
819 self.assertEqual(b._read_count, 2)
820
820
821 b = OpCountingBytesIO(b'foo')
821 b = OpCountingBytesIO(b'foo')
822 reader = cctx.stream_reader(b)
822 reader = cctx.stream_reader(b)
823
823
824 self.assertEqual(reader.read1(0), b'')
824 self.assertEqual(reader.read1(0), b'')
825 self.assertEqual(reader.read1(2), foo[0:2])
825 self.assertEqual(reader.read1(2), foo[0:2])
826 self.assertEqual(b._read_count, 2)
826 self.assertEqual(b._read_count, 2)
827 self.assertEqual(reader.read1(2), foo[2:4])
827 self.assertEqual(reader.read1(2), foo[2:4])
828 self.assertEqual(reader.read1(1024), foo[4:])
828 self.assertEqual(reader.read1(1024), foo[4:])
829
829
830
830
831 @make_cffi
831 @make_cffi
832 class TestCompressor_stream_writer(unittest.TestCase):
832 class TestCompressor_stream_writer(unittest.TestCase):
833 def test_io_api(self):
833 def test_io_api(self):
834 buffer = io.BytesIO()
834 buffer = io.BytesIO()
835 cctx = zstd.ZstdCompressor()
835 cctx = zstd.ZstdCompressor()
836 writer = cctx.stream_writer(buffer)
836 writer = cctx.stream_writer(buffer)
837
837
838 self.assertFalse(writer.isatty())
838 self.assertFalse(writer.isatty())
839 self.assertFalse(writer.readable())
839 self.assertFalse(writer.readable())
840
840
841 with self.assertRaises(io.UnsupportedOperation):
841 with self.assertRaises(io.UnsupportedOperation):
842 writer.readline()
842 writer.readline()
843
843
844 with self.assertRaises(io.UnsupportedOperation):
844 with self.assertRaises(io.UnsupportedOperation):
845 writer.readline(42)
845 writer.readline(42)
846
846
847 with self.assertRaises(io.UnsupportedOperation):
847 with self.assertRaises(io.UnsupportedOperation):
848 writer.readline(size=42)
848 writer.readline(size=42)
849
849
850 with self.assertRaises(io.UnsupportedOperation):
850 with self.assertRaises(io.UnsupportedOperation):
851 writer.readlines()
851 writer.readlines()
852
852
853 with self.assertRaises(io.UnsupportedOperation):
853 with self.assertRaises(io.UnsupportedOperation):
854 writer.readlines(42)
854 writer.readlines(42)
855
855
856 with self.assertRaises(io.UnsupportedOperation):
856 with self.assertRaises(io.UnsupportedOperation):
857 writer.readlines(hint=42)
857 writer.readlines(hint=42)
858
858
859 with self.assertRaises(io.UnsupportedOperation):
859 with self.assertRaises(io.UnsupportedOperation):
860 writer.seek(0)
860 writer.seek(0)
861
861
862 with self.assertRaises(io.UnsupportedOperation):
862 with self.assertRaises(io.UnsupportedOperation):
863 writer.seek(10, os.SEEK_SET)
863 writer.seek(10, os.SEEK_SET)
864
864
865 self.assertFalse(writer.seekable())
865 self.assertFalse(writer.seekable())
866
866
867 with self.assertRaises(io.UnsupportedOperation):
867 with self.assertRaises(io.UnsupportedOperation):
868 writer.truncate()
868 writer.truncate()
869
869
870 with self.assertRaises(io.UnsupportedOperation):
870 with self.assertRaises(io.UnsupportedOperation):
871 writer.truncate(42)
871 writer.truncate(42)
872
872
873 with self.assertRaises(io.UnsupportedOperation):
873 with self.assertRaises(io.UnsupportedOperation):
874 writer.truncate(size=42)
874 writer.truncate(size=42)
875
875
876 self.assertTrue(writer.writable())
876 self.assertTrue(writer.writable())
877
877
878 with self.assertRaises(NotImplementedError):
878 with self.assertRaises(NotImplementedError):
879 writer.writelines([])
879 writer.writelines([])
880
880
881 with self.assertRaises(io.UnsupportedOperation):
881 with self.assertRaises(io.UnsupportedOperation):
882 writer.read()
882 writer.read()
883
883
884 with self.assertRaises(io.UnsupportedOperation):
884 with self.assertRaises(io.UnsupportedOperation):
885 writer.read(42)
885 writer.read(42)
886
886
887 with self.assertRaises(io.UnsupportedOperation):
887 with self.assertRaises(io.UnsupportedOperation):
888 writer.read(size=42)
888 writer.read(size=42)
889
889
890 with self.assertRaises(io.UnsupportedOperation):
890 with self.assertRaises(io.UnsupportedOperation):
891 writer.readall()
891 writer.readall()
892
892
893 with self.assertRaises(io.UnsupportedOperation):
893 with self.assertRaises(io.UnsupportedOperation):
894 writer.readinto(None)
894 writer.readinto(None)
895
895
896 with self.assertRaises(io.UnsupportedOperation):
896 with self.assertRaises(io.UnsupportedOperation):
897 writer.fileno()
897 writer.fileno()
898
898
899 self.assertFalse(writer.closed)
899 self.assertFalse(writer.closed)
900
900
901 def test_fileno_file(self):
901 def test_fileno_file(self):
902 with tempfile.TemporaryFile('wb') as tf:
902 with tempfile.TemporaryFile('wb') as tf:
903 cctx = zstd.ZstdCompressor()
903 cctx = zstd.ZstdCompressor()
904 writer = cctx.stream_writer(tf)
904 writer = cctx.stream_writer(tf)
905
905
906 self.assertEqual(writer.fileno(), tf.fileno())
906 self.assertEqual(writer.fileno(), tf.fileno())
907
907
908 def test_close(self):
908 def test_close(self):
909 buffer = NonClosingBytesIO()
909 buffer = NonClosingBytesIO()
910 cctx = zstd.ZstdCompressor(level=1)
910 cctx = zstd.ZstdCompressor(level=1)
911 writer = cctx.stream_writer(buffer)
911 writer = cctx.stream_writer(buffer)
912
912
913 writer.write(b'foo' * 1024)
913 writer.write(b'foo' * 1024)
914 self.assertFalse(writer.closed)
914 self.assertFalse(writer.closed)
915 self.assertFalse(buffer.closed)
915 self.assertFalse(buffer.closed)
916 writer.close()
916 writer.close()
917 self.assertTrue(writer.closed)
917 self.assertTrue(writer.closed)
918 self.assertTrue(buffer.closed)
918 self.assertTrue(buffer.closed)
919
919
920 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
920 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
921 writer.write(b'foo')
921 writer.write(b'foo')
922
922
923 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
923 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
924 writer.flush()
924 writer.flush()
925
925
926 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
926 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
927 with writer:
927 with writer:
928 pass
928 pass
929
929
930 self.assertEqual(buffer.getvalue(),
930 self.assertEqual(buffer.getvalue(),
931 b'\x28\xb5\x2f\xfd\x00\x48\x55\x00\x00\x18\x66\x6f'
931 b'\x28\xb5\x2f\xfd\x00\x48\x55\x00\x00\x18\x66\x6f'
932 b'\x6f\x01\x00\xfa\xd3\x77\x43')
932 b'\x6f\x01\x00\xfa\xd3\x77\x43')
933
933
934 # Context manager exit should close stream.
934 # Context manager exit should close stream.
935 buffer = io.BytesIO()
935 buffer = io.BytesIO()
936 writer = cctx.stream_writer(buffer)
936 writer = cctx.stream_writer(buffer)
937
937
938 with writer:
938 with writer:
939 writer.write(b'foo')
939 writer.write(b'foo')
940
940
941 self.assertTrue(writer.closed)
941 self.assertTrue(writer.closed)
942
942
943 def test_empty(self):
943 def test_empty(self):
944 buffer = NonClosingBytesIO()
944 buffer = NonClosingBytesIO()
945 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
945 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
946 with cctx.stream_writer(buffer) as compressor:
946 with cctx.stream_writer(buffer) as compressor:
947 compressor.write(b'')
947 compressor.write(b'')
948
948
949 result = buffer.getvalue()
949 result = buffer.getvalue()
950 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
950 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
951
951
952 params = zstd.get_frame_parameters(result)
952 params = zstd.get_frame_parameters(result)
953 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
953 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
954 self.assertEqual(params.window_size, 524288)
954 self.assertEqual(params.window_size, 524288)
955 self.assertEqual(params.dict_id, 0)
955 self.assertEqual(params.dict_id, 0)
956 self.assertFalse(params.has_checksum)
956 self.assertFalse(params.has_checksum)
957
957
958 # Test without context manager.
958 # Test without context manager.
959 buffer = io.BytesIO()
959 buffer = io.BytesIO()
960 compressor = cctx.stream_writer(buffer)
960 compressor = cctx.stream_writer(buffer)
961 self.assertEqual(compressor.write(b''), 0)
961 self.assertEqual(compressor.write(b''), 0)
962 self.assertEqual(buffer.getvalue(), b'')
962 self.assertEqual(buffer.getvalue(), b'')
963 self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 9)
963 self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 9)
964 result = buffer.getvalue()
964 result = buffer.getvalue()
965 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
965 self.assertEqual(result, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
966
966
967 params = zstd.get_frame_parameters(result)
967 params = zstd.get_frame_parameters(result)
968 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
968 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
969 self.assertEqual(params.window_size, 524288)
969 self.assertEqual(params.window_size, 524288)
970 self.assertEqual(params.dict_id, 0)
970 self.assertEqual(params.dict_id, 0)
971 self.assertFalse(params.has_checksum)
971 self.assertFalse(params.has_checksum)
972
972
973 # Test write_return_read=True
973 # Test write_return_read=True
974 compressor = cctx.stream_writer(buffer, write_return_read=True)
974 compressor = cctx.stream_writer(buffer, write_return_read=True)
975 self.assertEqual(compressor.write(b''), 0)
975 self.assertEqual(compressor.write(b''), 0)
976
976
977 def test_input_types(self):
977 def test_input_types(self):
978 expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f'
978 expected = b'\x28\xb5\x2f\xfd\x00\x48\x19\x00\x00\x66\x6f\x6f'
979 cctx = zstd.ZstdCompressor(level=1)
979 cctx = zstd.ZstdCompressor(level=1)
980
980
981 mutable_array = bytearray(3)
981 mutable_array = bytearray(3)
982 mutable_array[:] = b'foo'
982 mutable_array[:] = b'foo'
983
983
984 sources = [
984 sources = [
985 memoryview(b'foo'),
985 memoryview(b'foo'),
986 bytearray(b'foo'),
986 bytearray(b'foo'),
987 mutable_array,
987 mutable_array,
988 ]
988 ]
989
989
990 for source in sources:
990 for source in sources:
991 buffer = NonClosingBytesIO()
991 buffer = NonClosingBytesIO()
992 with cctx.stream_writer(buffer) as compressor:
992 with cctx.stream_writer(buffer) as compressor:
993 compressor.write(source)
993 compressor.write(source)
994
994
995 self.assertEqual(buffer.getvalue(), expected)
995 self.assertEqual(buffer.getvalue(), expected)
996
996
997 compressor = cctx.stream_writer(buffer, write_return_read=True)
997 compressor = cctx.stream_writer(buffer, write_return_read=True)
998 self.assertEqual(compressor.write(source), len(source))
998 self.assertEqual(compressor.write(source), len(source))
999
999
1000 def test_multiple_compress(self):
1000 def test_multiple_compress(self):
1001 buffer = NonClosingBytesIO()
1001 buffer = NonClosingBytesIO()
1002 cctx = zstd.ZstdCompressor(level=5)
1002 cctx = zstd.ZstdCompressor(level=5)
1003 with cctx.stream_writer(buffer) as compressor:
1003 with cctx.stream_writer(buffer) as compressor:
1004 self.assertEqual(compressor.write(b'foo'), 0)
1004 self.assertEqual(compressor.write(b'foo'), 0)
1005 self.assertEqual(compressor.write(b'bar'), 0)
1005 self.assertEqual(compressor.write(b'bar'), 0)
1006 self.assertEqual(compressor.write(b'x' * 8192), 0)
1006 self.assertEqual(compressor.write(b'x' * 8192), 0)
1007
1007
1008 result = buffer.getvalue()
1008 result = buffer.getvalue()
1009 self.assertEqual(result,
1009 self.assertEqual(result,
1010 b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f'
1010 b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f'
1011 b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23')
1011 b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23')
1012
1012
1013 # Test without context manager.
1013 # Test without context manager.
1014 buffer = io.BytesIO()
1014 buffer = io.BytesIO()
1015 compressor = cctx.stream_writer(buffer)
1015 compressor = cctx.stream_writer(buffer)
1016 self.assertEqual(compressor.write(b'foo'), 0)
1016 self.assertEqual(compressor.write(b'foo'), 0)
1017 self.assertEqual(compressor.write(b'bar'), 0)
1017 self.assertEqual(compressor.write(b'bar'), 0)
1018 self.assertEqual(compressor.write(b'x' * 8192), 0)
1018 self.assertEqual(compressor.write(b'x' * 8192), 0)
1019 self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23)
1019 self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23)
1020 result = buffer.getvalue()
1020 result = buffer.getvalue()
1021 self.assertEqual(result,
1021 self.assertEqual(result,
1022 b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f'
1022 b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x38\x66\x6f'
1023 b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23')
1023 b'\x6f\x62\x61\x72\x78\x01\x00\xfc\xdf\x03\x23')
1024
1024
1025 # Test with write_return_read=True.
1025 # Test with write_return_read=True.
1026 compressor = cctx.stream_writer(buffer, write_return_read=True)
1026 compressor = cctx.stream_writer(buffer, write_return_read=True)
1027 self.assertEqual(compressor.write(b'foo'), 3)
1027 self.assertEqual(compressor.write(b'foo'), 3)
1028 self.assertEqual(compressor.write(b'barbiz'), 6)
1028 self.assertEqual(compressor.write(b'barbiz'), 6)
1029 self.assertEqual(compressor.write(b'x' * 8192), 8192)
1029 self.assertEqual(compressor.write(b'x' * 8192), 8192)
1030
1030
1031 def test_dictionary(self):
1031 def test_dictionary(self):
1032 samples = []
1032 samples = []
1033 for i in range(128):
1033 for i in range(128):
1034 samples.append(b'foo' * 64)
1034 samples.append(b'foo' * 64)
1035 samples.append(b'bar' * 64)
1035 samples.append(b'bar' * 64)
1036 samples.append(b'foobar' * 64)
1036 samples.append(b'foobar' * 64)
1037
1037
1038 d = zstd.train_dictionary(8192, samples)
1038 d = zstd.train_dictionary(8192, samples)
1039
1039
1040 h = hashlib.sha1(d.as_bytes()).hexdigest()
1040 h = hashlib.sha1(d.as_bytes()).hexdigest()
1041 self.assertEqual(h, '88ca0d38332aff379d4ced166a51c280a7679aad')
1041 self.assertEqual(h, '7a2e59a876db958f74257141045af8f912e00d4e')
1042
1042
1043 buffer = NonClosingBytesIO()
1043 buffer = NonClosingBytesIO()
1044 cctx = zstd.ZstdCompressor(level=9, dict_data=d)
1044 cctx = zstd.ZstdCompressor(level=9, dict_data=d)
1045 with cctx.stream_writer(buffer) as compressor:
1045 with cctx.stream_writer(buffer) as compressor:
1046 self.assertEqual(compressor.write(b'foo'), 0)
1046 self.assertEqual(compressor.write(b'foo'), 0)
1047 self.assertEqual(compressor.write(b'bar'), 0)
1047 self.assertEqual(compressor.write(b'bar'), 0)
1048 self.assertEqual(compressor.write(b'foo' * 16384), 0)
1048 self.assertEqual(compressor.write(b'foo' * 16384), 0)
1049
1049
1050 compressed = buffer.getvalue()
1050 compressed = buffer.getvalue()
1051
1051
1052 params = zstd.get_frame_parameters(compressed)
1052 params = zstd.get_frame_parameters(compressed)
1053 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1053 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1054 self.assertEqual(params.window_size, 2097152)
1054 self.assertEqual(params.window_size, 2097152)
1055 self.assertEqual(params.dict_id, d.dict_id())
1055 self.assertEqual(params.dict_id, d.dict_id())
1056 self.assertFalse(params.has_checksum)
1056 self.assertFalse(params.has_checksum)
1057
1057
1058 h = hashlib.sha1(compressed).hexdigest()
1058 h = hashlib.sha1(compressed).hexdigest()
1059 self.assertEqual(h, '8703b4316f274d26697ea5dd480f29c08e85d940')
1059 self.assertEqual(h, '0a7c05635061f58039727cdbe76388c6f4cfef06')
1060
1060
1061 source = b'foo' + b'bar' + (b'foo' * 16384)
1061 source = b'foo' + b'bar' + (b'foo' * 16384)
1062
1062
1063 dctx = zstd.ZstdDecompressor(dict_data=d)
1063 dctx = zstd.ZstdDecompressor(dict_data=d)
1064
1064
1065 self.assertEqual(dctx.decompress(compressed, max_output_size=len(source)),
1065 self.assertEqual(dctx.decompress(compressed, max_output_size=len(source)),
1066 source)
1066 source)
1067
1067
1068 def test_compression_params(self):
1068 def test_compression_params(self):
1069 params = zstd.ZstdCompressionParameters(
1069 params = zstd.ZstdCompressionParameters(
1070 window_log=20,
1070 window_log=20,
1071 chain_log=6,
1071 chain_log=6,
1072 hash_log=12,
1072 hash_log=12,
1073 min_match=5,
1073 min_match=5,
1074 search_log=4,
1074 search_log=4,
1075 target_length=10,
1075 target_length=10,
1076 strategy=zstd.STRATEGY_FAST)
1076 strategy=zstd.STRATEGY_FAST)
1077
1077
1078 buffer = NonClosingBytesIO()
1078 buffer = NonClosingBytesIO()
1079 cctx = zstd.ZstdCompressor(compression_params=params)
1079 cctx = zstd.ZstdCompressor(compression_params=params)
1080 with cctx.stream_writer(buffer) as compressor:
1080 with cctx.stream_writer(buffer) as compressor:
1081 self.assertEqual(compressor.write(b'foo'), 0)
1081 self.assertEqual(compressor.write(b'foo'), 0)
1082 self.assertEqual(compressor.write(b'bar'), 0)
1082 self.assertEqual(compressor.write(b'bar'), 0)
1083 self.assertEqual(compressor.write(b'foobar' * 16384), 0)
1083 self.assertEqual(compressor.write(b'foobar' * 16384), 0)
1084
1084
1085 compressed = buffer.getvalue()
1085 compressed = buffer.getvalue()
1086
1086
1087 params = zstd.get_frame_parameters(compressed)
1087 params = zstd.get_frame_parameters(compressed)
1088 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1088 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1089 self.assertEqual(params.window_size, 1048576)
1089 self.assertEqual(params.window_size, 1048576)
1090 self.assertEqual(params.dict_id, 0)
1090 self.assertEqual(params.dict_id, 0)
1091 self.assertFalse(params.has_checksum)
1091 self.assertFalse(params.has_checksum)
1092
1092
1093 h = hashlib.sha1(compressed).hexdigest()
1093 h = hashlib.sha1(compressed).hexdigest()
1094 self.assertEqual(h, '2a8111d72eb5004cdcecbdac37da9f26720d30ef')
1094 self.assertEqual(h, 'dd4bb7d37c1a0235b38a2f6b462814376843ef0b')
1095
1095
1096 def test_write_checksum(self):
1096 def test_write_checksum(self):
1097 no_checksum = NonClosingBytesIO()
1097 no_checksum = NonClosingBytesIO()
1098 cctx = zstd.ZstdCompressor(level=1)
1098 cctx = zstd.ZstdCompressor(level=1)
1099 with cctx.stream_writer(no_checksum) as compressor:
1099 with cctx.stream_writer(no_checksum) as compressor:
1100 self.assertEqual(compressor.write(b'foobar'), 0)
1100 self.assertEqual(compressor.write(b'foobar'), 0)
1101
1101
1102 with_checksum = NonClosingBytesIO()
1102 with_checksum = NonClosingBytesIO()
1103 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
1103 cctx = zstd.ZstdCompressor(level=1, write_checksum=True)
1104 with cctx.stream_writer(with_checksum) as compressor:
1104 with cctx.stream_writer(with_checksum) as compressor:
1105 self.assertEqual(compressor.write(b'foobar'), 0)
1105 self.assertEqual(compressor.write(b'foobar'), 0)
1106
1106
1107 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
1107 no_params = zstd.get_frame_parameters(no_checksum.getvalue())
1108 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
1108 with_params = zstd.get_frame_parameters(with_checksum.getvalue())
1109 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1109 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1110 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1110 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1111 self.assertEqual(no_params.dict_id, 0)
1111 self.assertEqual(no_params.dict_id, 0)
1112 self.assertEqual(with_params.dict_id, 0)
1112 self.assertEqual(with_params.dict_id, 0)
1113 self.assertFalse(no_params.has_checksum)
1113 self.assertFalse(no_params.has_checksum)
1114 self.assertTrue(with_params.has_checksum)
1114 self.assertTrue(with_params.has_checksum)
1115
1115
1116 self.assertEqual(len(with_checksum.getvalue()),
1116 self.assertEqual(len(with_checksum.getvalue()),
1117 len(no_checksum.getvalue()) + 4)
1117 len(no_checksum.getvalue()) + 4)
1118
1118
1119 def test_write_content_size(self):
1119 def test_write_content_size(self):
1120 no_size = NonClosingBytesIO()
1120 no_size = NonClosingBytesIO()
1121 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
1121 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
1122 with cctx.stream_writer(no_size) as compressor:
1122 with cctx.stream_writer(no_size) as compressor:
1123 self.assertEqual(compressor.write(b'foobar' * 256), 0)
1123 self.assertEqual(compressor.write(b'foobar' * 256), 0)
1124
1124
1125 with_size = NonClosingBytesIO()
1125 with_size = NonClosingBytesIO()
1126 cctx = zstd.ZstdCompressor(level=1)
1126 cctx = zstd.ZstdCompressor(level=1)
1127 with cctx.stream_writer(with_size) as compressor:
1127 with cctx.stream_writer(with_size) as compressor:
1128 self.assertEqual(compressor.write(b'foobar' * 256), 0)
1128 self.assertEqual(compressor.write(b'foobar' * 256), 0)
1129
1129
1130 # Source size is not known in streaming mode, so header not
1130 # Source size is not known in streaming mode, so header not
1131 # written.
1131 # written.
1132 self.assertEqual(len(with_size.getvalue()),
1132 self.assertEqual(len(with_size.getvalue()),
1133 len(no_size.getvalue()))
1133 len(no_size.getvalue()))
1134
1134
1135 # Declaring size will write the header.
1135 # Declaring size will write the header.
1136 with_size = NonClosingBytesIO()
1136 with_size = NonClosingBytesIO()
1137 with cctx.stream_writer(with_size, size=len(b'foobar' * 256)) as compressor:
1137 with cctx.stream_writer(with_size, size=len(b'foobar' * 256)) as compressor:
1138 self.assertEqual(compressor.write(b'foobar' * 256), 0)
1138 self.assertEqual(compressor.write(b'foobar' * 256), 0)
1139
1139
1140 no_params = zstd.get_frame_parameters(no_size.getvalue())
1140 no_params = zstd.get_frame_parameters(no_size.getvalue())
1141 with_params = zstd.get_frame_parameters(with_size.getvalue())
1141 with_params = zstd.get_frame_parameters(with_size.getvalue())
1142 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1142 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1143 self.assertEqual(with_params.content_size, 1536)
1143 self.assertEqual(with_params.content_size, 1536)
1144 self.assertEqual(no_params.dict_id, 0)
1144 self.assertEqual(no_params.dict_id, 0)
1145 self.assertEqual(with_params.dict_id, 0)
1145 self.assertEqual(with_params.dict_id, 0)
1146 self.assertFalse(no_params.has_checksum)
1146 self.assertFalse(no_params.has_checksum)
1147 self.assertFalse(with_params.has_checksum)
1147 self.assertFalse(with_params.has_checksum)
1148
1148
1149 self.assertEqual(len(with_size.getvalue()),
1149 self.assertEqual(len(with_size.getvalue()),
1150 len(no_size.getvalue()) + 1)
1150 len(no_size.getvalue()) + 1)
1151
1151
1152 def test_no_dict_id(self):
1152 def test_no_dict_id(self):
1153 samples = []
1153 samples = []
1154 for i in range(128):
1154 for i in range(128):
1155 samples.append(b'foo' * 64)
1155 samples.append(b'foo' * 64)
1156 samples.append(b'bar' * 64)
1156 samples.append(b'bar' * 64)
1157 samples.append(b'foobar' * 64)
1157 samples.append(b'foobar' * 64)
1158
1158
1159 d = zstd.train_dictionary(1024, samples)
1159 d = zstd.train_dictionary(1024, samples)
1160
1160
1161 with_dict_id = NonClosingBytesIO()
1161 with_dict_id = NonClosingBytesIO()
1162 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
1162 cctx = zstd.ZstdCompressor(level=1, dict_data=d)
1163 with cctx.stream_writer(with_dict_id) as compressor:
1163 with cctx.stream_writer(with_dict_id) as compressor:
1164 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
1164 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
1165
1165
1166 self.assertEqual(with_dict_id.getvalue()[4:5], b'\x03')
1166 self.assertEqual(with_dict_id.getvalue()[4:5], b'\x03')
1167
1167
1168 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
1168 cctx = zstd.ZstdCompressor(level=1, dict_data=d, write_dict_id=False)
1169 no_dict_id = NonClosingBytesIO()
1169 no_dict_id = NonClosingBytesIO()
1170 with cctx.stream_writer(no_dict_id) as compressor:
1170 with cctx.stream_writer(no_dict_id) as compressor:
1171 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
1171 self.assertEqual(compressor.write(b'foobarfoobar'), 0)
1172
1172
1173 self.assertEqual(no_dict_id.getvalue()[4:5], b'\x00')
1173 self.assertEqual(no_dict_id.getvalue()[4:5], b'\x00')
1174
1174
1175 no_params = zstd.get_frame_parameters(no_dict_id.getvalue())
1175 no_params = zstd.get_frame_parameters(no_dict_id.getvalue())
1176 with_params = zstd.get_frame_parameters(with_dict_id.getvalue())
1176 with_params = zstd.get_frame_parameters(with_dict_id.getvalue())
1177 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1177 self.assertEqual(no_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1178 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1178 self.assertEqual(with_params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1179 self.assertEqual(no_params.dict_id, 0)
1179 self.assertEqual(no_params.dict_id, 0)
1180 self.assertEqual(with_params.dict_id, d.dict_id())
1180 self.assertEqual(with_params.dict_id, d.dict_id())
1181 self.assertFalse(no_params.has_checksum)
1181 self.assertFalse(no_params.has_checksum)
1182 self.assertFalse(with_params.has_checksum)
1182 self.assertFalse(with_params.has_checksum)
1183
1183
1184 self.assertEqual(len(with_dict_id.getvalue()),
1184 self.assertEqual(len(with_dict_id.getvalue()),
1185 len(no_dict_id.getvalue()) + 4)
1185 len(no_dict_id.getvalue()) + 4)
1186
1186
1187 def test_memory_size(self):
1187 def test_memory_size(self):
1188 cctx = zstd.ZstdCompressor(level=3)
1188 cctx = zstd.ZstdCompressor(level=3)
1189 buffer = io.BytesIO()
1189 buffer = io.BytesIO()
1190 with cctx.stream_writer(buffer) as compressor:
1190 with cctx.stream_writer(buffer) as compressor:
1191 compressor.write(b'foo')
1191 compressor.write(b'foo')
1192 size = compressor.memory_size()
1192 size = compressor.memory_size()
1193
1193
1194 self.assertGreater(size, 100000)
1194 self.assertGreater(size, 100000)
1195
1195
1196 def test_write_size(self):
1196 def test_write_size(self):
1197 cctx = zstd.ZstdCompressor(level=3)
1197 cctx = zstd.ZstdCompressor(level=3)
1198 dest = OpCountingBytesIO()
1198 dest = OpCountingBytesIO()
1199 with cctx.stream_writer(dest, write_size=1) as compressor:
1199 with cctx.stream_writer(dest, write_size=1) as compressor:
1200 self.assertEqual(compressor.write(b'foo'), 0)
1200 self.assertEqual(compressor.write(b'foo'), 0)
1201 self.assertEqual(compressor.write(b'bar'), 0)
1201 self.assertEqual(compressor.write(b'bar'), 0)
1202 self.assertEqual(compressor.write(b'foobar'), 0)
1202 self.assertEqual(compressor.write(b'foobar'), 0)
1203
1203
1204 self.assertEqual(len(dest.getvalue()), dest._write_count)
1204 self.assertEqual(len(dest.getvalue()), dest._write_count)
1205
1205
1206 def test_flush_repeated(self):
1206 def test_flush_repeated(self):
1207 cctx = zstd.ZstdCompressor(level=3)
1207 cctx = zstd.ZstdCompressor(level=3)
1208 dest = OpCountingBytesIO()
1208 dest = OpCountingBytesIO()
1209 with cctx.stream_writer(dest) as compressor:
1209 with cctx.stream_writer(dest) as compressor:
1210 self.assertEqual(compressor.write(b'foo'), 0)
1210 self.assertEqual(compressor.write(b'foo'), 0)
1211 self.assertEqual(dest._write_count, 0)
1211 self.assertEqual(dest._write_count, 0)
1212 self.assertEqual(compressor.flush(), 12)
1212 self.assertEqual(compressor.flush(), 12)
1213 self.assertEqual(dest._write_count, 1)
1213 self.assertEqual(dest._write_count, 1)
1214 self.assertEqual(compressor.write(b'bar'), 0)
1214 self.assertEqual(compressor.write(b'bar'), 0)
1215 self.assertEqual(dest._write_count, 1)
1215 self.assertEqual(dest._write_count, 1)
1216 self.assertEqual(compressor.flush(), 6)
1216 self.assertEqual(compressor.flush(), 6)
1217 self.assertEqual(dest._write_count, 2)
1217 self.assertEqual(dest._write_count, 2)
1218 self.assertEqual(compressor.write(b'baz'), 0)
1218 self.assertEqual(compressor.write(b'baz'), 0)
1219
1219
1220 self.assertEqual(dest._write_count, 3)
1220 self.assertEqual(dest._write_count, 3)
1221
1221
1222 def test_flush_empty_block(self):
1222 def test_flush_empty_block(self):
1223 cctx = zstd.ZstdCompressor(level=3, write_checksum=True)
1223 cctx = zstd.ZstdCompressor(level=3, write_checksum=True)
1224 dest = OpCountingBytesIO()
1224 dest = OpCountingBytesIO()
1225 with cctx.stream_writer(dest) as compressor:
1225 with cctx.stream_writer(dest) as compressor:
1226 self.assertEqual(compressor.write(b'foobar' * 8192), 0)
1226 self.assertEqual(compressor.write(b'foobar' * 8192), 0)
1227 count = dest._write_count
1227 count = dest._write_count
1228 offset = dest.tell()
1228 offset = dest.tell()
1229 self.assertEqual(compressor.flush(), 23)
1229 self.assertEqual(compressor.flush(), 23)
1230 self.assertGreater(dest._write_count, count)
1230 self.assertGreater(dest._write_count, count)
1231 self.assertGreater(dest.tell(), offset)
1231 self.assertGreater(dest.tell(), offset)
1232 offset = dest.tell()
1232 offset = dest.tell()
1233 # Ending the write here should cause an empty block to be written
1233 # Ending the write here should cause an empty block to be written
1234 # to denote end of frame.
1234 # to denote end of frame.
1235
1235
1236 trailing = dest.getvalue()[offset:]
1236 trailing = dest.getvalue()[offset:]
1237 # 3 bytes block header + 4 bytes frame checksum
1237 # 3 bytes block header + 4 bytes frame checksum
1238 self.assertEqual(len(trailing), 7)
1238 self.assertEqual(len(trailing), 7)
1239
1239
1240 header = trailing[0:3]
1240 header = trailing[0:3]
1241 self.assertEqual(header, b'\x01\x00\x00')
1241 self.assertEqual(header, b'\x01\x00\x00')
1242
1242
1243 def test_flush_frame(self):
1243 def test_flush_frame(self):
1244 cctx = zstd.ZstdCompressor(level=3)
1244 cctx = zstd.ZstdCompressor(level=3)
1245 dest = OpCountingBytesIO()
1245 dest = OpCountingBytesIO()
1246
1246
1247 with cctx.stream_writer(dest) as compressor:
1247 with cctx.stream_writer(dest) as compressor:
1248 self.assertEqual(compressor.write(b'foobar' * 8192), 0)
1248 self.assertEqual(compressor.write(b'foobar' * 8192), 0)
1249 self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23)
1249 self.assertEqual(compressor.flush(zstd.FLUSH_FRAME), 23)
1250 compressor.write(b'biz' * 16384)
1250 compressor.write(b'biz' * 16384)
1251
1251
1252 self.assertEqual(dest.getvalue(),
1252 self.assertEqual(dest.getvalue(),
1253 # Frame 1.
1253 # Frame 1.
1254 b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x30\x66\x6f\x6f'
1254 b'\x28\xb5\x2f\xfd\x00\x58\x75\x00\x00\x30\x66\x6f\x6f'
1255 b'\x62\x61\x72\x01\x00\xf7\xbf\xe8\xa5\x08'
1255 b'\x62\x61\x72\x01\x00\xf7\xbf\xe8\xa5\x08'
1256 # Frame 2.
1256 # Frame 2.
1257 b'\x28\xb5\x2f\xfd\x00\x58\x5d\x00\x00\x18\x62\x69\x7a'
1257 b'\x28\xb5\x2f\xfd\x00\x58\x5d\x00\x00\x18\x62\x69\x7a'
1258 b'\x01\x00\xfa\x3f\x75\x37\x04')
1258 b'\x01\x00\xfa\x3f\x75\x37\x04')
1259
1259
1260 def test_bad_flush_mode(self):
1260 def test_bad_flush_mode(self):
1261 cctx = zstd.ZstdCompressor()
1261 cctx = zstd.ZstdCompressor()
1262 dest = io.BytesIO()
1262 dest = io.BytesIO()
1263 with cctx.stream_writer(dest) as compressor:
1263 with cctx.stream_writer(dest) as compressor:
1264 with self.assertRaisesRegexp(ValueError, 'unknown flush_mode: 42'):
1264 with self.assertRaisesRegexp(ValueError, 'unknown flush_mode: 42'):
1265 compressor.flush(flush_mode=42)
1265 compressor.flush(flush_mode=42)
1266
1266
1267 def test_multithreaded(self):
1267 def test_multithreaded(self):
1268 dest = NonClosingBytesIO()
1268 dest = NonClosingBytesIO()
1269 cctx = zstd.ZstdCompressor(threads=2)
1269 cctx = zstd.ZstdCompressor(threads=2)
1270 with cctx.stream_writer(dest) as compressor:
1270 with cctx.stream_writer(dest) as compressor:
1271 compressor.write(b'a' * 1048576)
1271 compressor.write(b'a' * 1048576)
1272 compressor.write(b'b' * 1048576)
1272 compressor.write(b'b' * 1048576)
1273 compressor.write(b'c' * 1048576)
1273 compressor.write(b'c' * 1048576)
1274
1274
1275 self.assertEqual(len(dest.getvalue()), 295)
1275 self.assertEqual(len(dest.getvalue()), 295)
1276
1276
1277 def test_tell(self):
1277 def test_tell(self):
1278 dest = io.BytesIO()
1278 dest = io.BytesIO()
1279 cctx = zstd.ZstdCompressor()
1279 cctx = zstd.ZstdCompressor()
1280 with cctx.stream_writer(dest) as compressor:
1280 with cctx.stream_writer(dest) as compressor:
1281 self.assertEqual(compressor.tell(), 0)
1281 self.assertEqual(compressor.tell(), 0)
1282
1282
1283 for i in range(256):
1283 for i in range(256):
1284 compressor.write(b'foo' * (i + 1))
1284 compressor.write(b'foo' * (i + 1))
1285 self.assertEqual(compressor.tell(), dest.tell())
1285 self.assertEqual(compressor.tell(), dest.tell())
1286
1286
1287 def test_bad_size(self):
1287 def test_bad_size(self):
1288 cctx = zstd.ZstdCompressor()
1288 cctx = zstd.ZstdCompressor()
1289
1289
1290 dest = io.BytesIO()
1290 dest = io.BytesIO()
1291
1291
1292 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
1292 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
1293 with cctx.stream_writer(dest, size=2) as compressor:
1293 with cctx.stream_writer(dest, size=2) as compressor:
1294 compressor.write(b'foo')
1294 compressor.write(b'foo')
1295
1295
1296 # Test another operation.
1296 # Test another operation.
1297 with cctx.stream_writer(dest, size=42):
1297 with cctx.stream_writer(dest, size=42):
1298 pass
1298 pass
1299
1299
1300 def test_tarfile_compat(self):
1300 def test_tarfile_compat(self):
1301 dest = NonClosingBytesIO()
1301 dest = NonClosingBytesIO()
1302 cctx = zstd.ZstdCompressor()
1302 cctx = zstd.ZstdCompressor()
1303 with cctx.stream_writer(dest) as compressor:
1303 with cctx.stream_writer(dest) as compressor:
1304 with tarfile.open('tf', mode='w|', fileobj=compressor) as tf:
1304 with tarfile.open('tf', mode='w|', fileobj=compressor) as tf:
1305 tf.add(__file__, 'test_compressor.py')
1305 tf.add(__file__, 'test_compressor.py')
1306
1306
1307 dest = io.BytesIO(dest.getvalue())
1307 dest = io.BytesIO(dest.getvalue())
1308
1308
1309 dctx = zstd.ZstdDecompressor()
1309 dctx = zstd.ZstdDecompressor()
1310 with dctx.stream_reader(dest) as reader:
1310 with dctx.stream_reader(dest) as reader:
1311 with tarfile.open(mode='r|', fileobj=reader) as tf:
1311 with tarfile.open(mode='r|', fileobj=reader) as tf:
1312 for member in tf:
1312 for member in tf:
1313 self.assertEqual(member.name, 'test_compressor.py')
1313 self.assertEqual(member.name, 'test_compressor.py')
1314
1314
1315
1315
1316 @make_cffi
1316 @make_cffi
1317 class TestCompressor_read_to_iter(unittest.TestCase):
1317 class TestCompressor_read_to_iter(unittest.TestCase):
1318 def test_type_validation(self):
1318 def test_type_validation(self):
1319 cctx = zstd.ZstdCompressor()
1319 cctx = zstd.ZstdCompressor()
1320
1320
1321 # Object with read() works.
1321 # Object with read() works.
1322 for chunk in cctx.read_to_iter(io.BytesIO()):
1322 for chunk in cctx.read_to_iter(io.BytesIO()):
1323 pass
1323 pass
1324
1324
1325 # Buffer protocol works.
1325 # Buffer protocol works.
1326 for chunk in cctx.read_to_iter(b'foobar'):
1326 for chunk in cctx.read_to_iter(b'foobar'):
1327 pass
1327 pass
1328
1328
1329 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
1329 with self.assertRaisesRegexp(ValueError, 'must pass an object with a read'):
1330 for chunk in cctx.read_to_iter(True):
1330 for chunk in cctx.read_to_iter(True):
1331 pass
1331 pass
1332
1332
1333 def test_read_empty(self):
1333 def test_read_empty(self):
1334 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
1334 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
1335
1335
1336 source = io.BytesIO()
1336 source = io.BytesIO()
1337 it = cctx.read_to_iter(source)
1337 it = cctx.read_to_iter(source)
1338 chunks = list(it)
1338 chunks = list(it)
1339 self.assertEqual(len(chunks), 1)
1339 self.assertEqual(len(chunks), 1)
1340 compressed = b''.join(chunks)
1340 compressed = b''.join(chunks)
1341 self.assertEqual(compressed, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
1341 self.assertEqual(compressed, b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00')
1342
1342
1343 # And again with the buffer protocol.
1343 # And again with the buffer protocol.
1344 it = cctx.read_to_iter(b'')
1344 it = cctx.read_to_iter(b'')
1345 chunks = list(it)
1345 chunks = list(it)
1346 self.assertEqual(len(chunks), 1)
1346 self.assertEqual(len(chunks), 1)
1347 compressed2 = b''.join(chunks)
1347 compressed2 = b''.join(chunks)
1348 self.assertEqual(compressed2, compressed)
1348 self.assertEqual(compressed2, compressed)
1349
1349
1350 def test_read_large(self):
1350 def test_read_large(self):
1351 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
1351 cctx = zstd.ZstdCompressor(level=1, write_content_size=False)
1352
1352
1353 source = io.BytesIO()
1353 source = io.BytesIO()
1354 source.write(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
1354 source.write(b'f' * zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
1355 source.write(b'o')
1355 source.write(b'o')
1356 source.seek(0)
1356 source.seek(0)
1357
1357
1358 # Creating an iterator should not perform any compression until
1358 # Creating an iterator should not perform any compression until
1359 # first read.
1359 # first read.
1360 it = cctx.read_to_iter(source, size=len(source.getvalue()))
1360 it = cctx.read_to_iter(source, size=len(source.getvalue()))
1361 self.assertEqual(source.tell(), 0)
1361 self.assertEqual(source.tell(), 0)
1362
1362
1363 # We should have exactly 2 output chunks.
1363 # We should have exactly 2 output chunks.
1364 chunks = []
1364 chunks = []
1365 chunk = next(it)
1365 chunk = next(it)
1366 self.assertIsNotNone(chunk)
1366 self.assertIsNotNone(chunk)
1367 self.assertEqual(source.tell(), zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
1367 self.assertEqual(source.tell(), zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE)
1368 chunks.append(chunk)
1368 chunks.append(chunk)
1369 chunk = next(it)
1369 chunk = next(it)
1370 self.assertIsNotNone(chunk)
1370 self.assertIsNotNone(chunk)
1371 chunks.append(chunk)
1371 chunks.append(chunk)
1372
1372
1373 self.assertEqual(source.tell(), len(source.getvalue()))
1373 self.assertEqual(source.tell(), len(source.getvalue()))
1374
1374
1375 with self.assertRaises(StopIteration):
1375 with self.assertRaises(StopIteration):
1376 next(it)
1376 next(it)
1377
1377
1378 # And again for good measure.
1378 # And again for good measure.
1379 with self.assertRaises(StopIteration):
1379 with self.assertRaises(StopIteration):
1380 next(it)
1380 next(it)
1381
1381
1382 # We should get the same output as the one-shot compression mechanism.
1382 # We should get the same output as the one-shot compression mechanism.
1383 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
1383 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
1384
1384
1385 params = zstd.get_frame_parameters(b''.join(chunks))
1385 params = zstd.get_frame_parameters(b''.join(chunks))
1386 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1386 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1387 self.assertEqual(params.window_size, 262144)
1387 self.assertEqual(params.window_size, 262144)
1388 self.assertEqual(params.dict_id, 0)
1388 self.assertEqual(params.dict_id, 0)
1389 self.assertFalse(params.has_checksum)
1389 self.assertFalse(params.has_checksum)
1390
1390
1391 # Now check the buffer protocol.
1391 # Now check the buffer protocol.
1392 it = cctx.read_to_iter(source.getvalue())
1392 it = cctx.read_to_iter(source.getvalue())
1393 chunks = list(it)
1393 chunks = list(it)
1394 self.assertEqual(len(chunks), 2)
1394 self.assertEqual(len(chunks), 2)
1395
1395
1396 params = zstd.get_frame_parameters(b''.join(chunks))
1396 params = zstd.get_frame_parameters(b''.join(chunks))
1397 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1397 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
1398 #self.assertEqual(params.window_size, 262144)
1398 #self.assertEqual(params.window_size, 262144)
1399 self.assertEqual(params.dict_id, 0)
1399 self.assertEqual(params.dict_id, 0)
1400 self.assertFalse(params.has_checksum)
1400 self.assertFalse(params.has_checksum)
1401
1401
1402 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
1402 self.assertEqual(b''.join(chunks), cctx.compress(source.getvalue()))
1403
1403
1404 def test_read_write_size(self):
1404 def test_read_write_size(self):
1405 source = OpCountingBytesIO(b'foobarfoobar')
1405 source = OpCountingBytesIO(b'foobarfoobar')
1406 cctx = zstd.ZstdCompressor(level=3)
1406 cctx = zstd.ZstdCompressor(level=3)
1407 for chunk in cctx.read_to_iter(source, read_size=1, write_size=1):
1407 for chunk in cctx.read_to_iter(source, read_size=1, write_size=1):
1408 self.assertEqual(len(chunk), 1)
1408 self.assertEqual(len(chunk), 1)
1409
1409
1410 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
1410 self.assertEqual(source._read_count, len(source.getvalue()) + 1)
1411
1411
1412 def test_multithreaded(self):
1412 def test_multithreaded(self):
1413 source = io.BytesIO()
1413 source = io.BytesIO()
1414 source.write(b'a' * 1048576)
1414 source.write(b'a' * 1048576)
1415 source.write(b'b' * 1048576)
1415 source.write(b'b' * 1048576)
1416 source.write(b'c' * 1048576)
1416 source.write(b'c' * 1048576)
1417 source.seek(0)
1417 source.seek(0)
1418
1418
1419 cctx = zstd.ZstdCompressor(threads=2)
1419 cctx = zstd.ZstdCompressor(threads=2)
1420
1420
1421 compressed = b''.join(cctx.read_to_iter(source))
1421 compressed = b''.join(cctx.read_to_iter(source))
1422 self.assertEqual(len(compressed), 295)
1422 self.assertEqual(len(compressed), 295)
1423
1423
1424 def test_bad_size(self):
1424 def test_bad_size(self):
1425 cctx = zstd.ZstdCompressor()
1425 cctx = zstd.ZstdCompressor()
1426
1426
1427 source = io.BytesIO(b'a' * 42)
1427 source = io.BytesIO(b'a' * 42)
1428
1428
1429 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
1429 with self.assertRaisesRegexp(zstd.ZstdError, 'Src size is incorrect'):
1430 b''.join(cctx.read_to_iter(source, size=2))
1430 b''.join(cctx.read_to_iter(source, size=2))
1431
1431
1432 # Test another operation on errored compressor.
1432 # Test another operation on errored compressor.
1433 b''.join(cctx.read_to_iter(source))
1433 b''.join(cctx.read_to_iter(source))
1434
1434
1435
1435
1436 @make_cffi
1436 @make_cffi
1437 class TestCompressor_chunker(unittest.TestCase):
1437 class TestCompressor_chunker(unittest.TestCase):
1438 def test_empty(self):
1438 def test_empty(self):
1439 cctx = zstd.ZstdCompressor(write_content_size=False)
1439 cctx = zstd.ZstdCompressor(write_content_size=False)
1440 chunker = cctx.chunker()
1440 chunker = cctx.chunker()
1441
1441
1442 it = chunker.compress(b'')
1442 it = chunker.compress(b'')
1443
1443
1444 with self.assertRaises(StopIteration):
1444 with self.assertRaises(StopIteration):
1445 next(it)
1445 next(it)
1446
1446
1447 it = chunker.finish()
1447 it = chunker.finish()
1448
1448
1449 self.assertEqual(next(it), b'\x28\xb5\x2f\xfd\x00\x58\x01\x00\x00')
1449 self.assertEqual(next(it), b'\x28\xb5\x2f\xfd\x00\x58\x01\x00\x00')
1450
1450
1451 with self.assertRaises(StopIteration):
1451 with self.assertRaises(StopIteration):
1452 next(it)
1452 next(it)
1453
1453
1454 def test_simple_input(self):
1454 def test_simple_input(self):
1455 cctx = zstd.ZstdCompressor()
1455 cctx = zstd.ZstdCompressor()
1456 chunker = cctx.chunker()
1456 chunker = cctx.chunker()
1457
1457
1458 it = chunker.compress(b'foobar')
1458 it = chunker.compress(b'foobar')
1459
1459
1460 with self.assertRaises(StopIteration):
1460 with self.assertRaises(StopIteration):
1461 next(it)
1461 next(it)
1462
1462
1463 it = chunker.compress(b'baz' * 30)
1463 it = chunker.compress(b'baz' * 30)
1464
1464
1465 with self.assertRaises(StopIteration):
1465 with self.assertRaises(StopIteration):
1466 next(it)
1466 next(it)
1467
1467
1468 it = chunker.finish()
1468 it = chunker.finish()
1469
1469
1470 self.assertEqual(next(it),
1470 self.assertEqual(next(it),
1471 b'\x28\xb5\x2f\xfd\x00\x58\x7d\x00\x00\x48\x66\x6f'
1471 b'\x28\xb5\x2f\xfd\x00\x58\x7d\x00\x00\x48\x66\x6f'
1472 b'\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e')
1472 b'\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e')
1473
1473
1474 with self.assertRaises(StopIteration):
1474 with self.assertRaises(StopIteration):
1475 next(it)
1475 next(it)
1476
1476
1477 def test_input_size(self):
1477 def test_input_size(self):
1478 cctx = zstd.ZstdCompressor()
1478 cctx = zstd.ZstdCompressor()
1479 chunker = cctx.chunker(size=1024)
1479 chunker = cctx.chunker(size=1024)
1480
1480
1481 it = chunker.compress(b'x' * 1000)
1481 it = chunker.compress(b'x' * 1000)
1482
1482
1483 with self.assertRaises(StopIteration):
1483 with self.assertRaises(StopIteration):
1484 next(it)
1484 next(it)
1485
1485
1486 it = chunker.compress(b'y' * 24)
1486 it = chunker.compress(b'y' * 24)
1487
1487
1488 with self.assertRaises(StopIteration):
1488 with self.assertRaises(StopIteration):
1489 next(it)
1489 next(it)
1490
1490
1491 chunks = list(chunker.finish())
1491 chunks = list(chunker.finish())
1492
1492
1493 self.assertEqual(chunks, [
1493 self.assertEqual(chunks, [
1494 b'\x28\xb5\x2f\xfd\x60\x00\x03\x65\x00\x00\x18\x78\x78\x79\x02\x00'
1494 b'\x28\xb5\x2f\xfd\x60\x00\x03\x65\x00\x00\x18\x78\x78\x79\x02\x00'
1495 b'\xa0\x16\xe3\x2b\x80\x05'
1495 b'\xa0\x16\xe3\x2b\x80\x05'
1496 ])
1496 ])
1497
1497
1498 dctx = zstd.ZstdDecompressor()
1498 dctx = zstd.ZstdDecompressor()
1499
1499
1500 self.assertEqual(dctx.decompress(b''.join(chunks)),
1500 self.assertEqual(dctx.decompress(b''.join(chunks)),
1501 (b'x' * 1000) + (b'y' * 24))
1501 (b'x' * 1000) + (b'y' * 24))
1502
1502
1503 def test_small_chunk_size(self):
1503 def test_small_chunk_size(self):
1504 cctx = zstd.ZstdCompressor()
1504 cctx = zstd.ZstdCompressor()
1505 chunker = cctx.chunker(chunk_size=1)
1505 chunker = cctx.chunker(chunk_size=1)
1506
1506
1507 chunks = list(chunker.compress(b'foo' * 1024))
1507 chunks = list(chunker.compress(b'foo' * 1024))
1508 self.assertEqual(chunks, [])
1508 self.assertEqual(chunks, [])
1509
1509
1510 chunks = list(chunker.finish())
1510 chunks = list(chunker.finish())
1511 self.assertTrue(all(len(chunk) == 1 for chunk in chunks))
1511 self.assertTrue(all(len(chunk) == 1 for chunk in chunks))
1512
1512
1513 self.assertEqual(
1513 self.assertEqual(
1514 b''.join(chunks),
1514 b''.join(chunks),
1515 b'\x28\xb5\x2f\xfd\x00\x58\x55\x00\x00\x18\x66\x6f\x6f\x01\x00'
1515 b'\x28\xb5\x2f\xfd\x00\x58\x55\x00\x00\x18\x66\x6f\x6f\x01\x00'
1516 b'\xfa\xd3\x77\x43')
1516 b'\xfa\xd3\x77\x43')
1517
1517
1518 dctx = zstd.ZstdDecompressor()
1518 dctx = zstd.ZstdDecompressor()
1519 self.assertEqual(dctx.decompress(b''.join(chunks),
1519 self.assertEqual(dctx.decompress(b''.join(chunks),
1520 max_output_size=10000),
1520 max_output_size=10000),
1521 b'foo' * 1024)
1521 b'foo' * 1024)
1522
1522
1523 def test_input_types(self):
1523 def test_input_types(self):
1524 cctx = zstd.ZstdCompressor()
1524 cctx = zstd.ZstdCompressor()
1525
1525
1526 mutable_array = bytearray(3)
1526 mutable_array = bytearray(3)
1527 mutable_array[:] = b'foo'
1527 mutable_array[:] = b'foo'
1528
1528
1529 sources = [
1529 sources = [
1530 memoryview(b'foo'),
1530 memoryview(b'foo'),
1531 bytearray(b'foo'),
1531 bytearray(b'foo'),
1532 mutable_array,
1532 mutable_array,
1533 ]
1533 ]
1534
1534
1535 for source in sources:
1535 for source in sources:
1536 chunker = cctx.chunker()
1536 chunker = cctx.chunker()
1537
1537
1538 self.assertEqual(list(chunker.compress(source)), [])
1538 self.assertEqual(list(chunker.compress(source)), [])
1539 self.assertEqual(list(chunker.finish()), [
1539 self.assertEqual(list(chunker.finish()), [
1540 b'\x28\xb5\x2f\xfd\x00\x58\x19\x00\x00\x66\x6f\x6f'
1540 b'\x28\xb5\x2f\xfd\x00\x58\x19\x00\x00\x66\x6f\x6f'
1541 ])
1541 ])
1542
1542
1543 def test_flush(self):
1543 def test_flush(self):
1544 cctx = zstd.ZstdCompressor()
1544 cctx = zstd.ZstdCompressor()
1545 chunker = cctx.chunker()
1545 chunker = cctx.chunker()
1546
1546
1547 self.assertEqual(list(chunker.compress(b'foo' * 1024)), [])
1547 self.assertEqual(list(chunker.compress(b'foo' * 1024)), [])
1548 self.assertEqual(list(chunker.compress(b'bar' * 1024)), [])
1548 self.assertEqual(list(chunker.compress(b'bar' * 1024)), [])
1549
1549
1550 chunks1 = list(chunker.flush())
1550 chunks1 = list(chunker.flush())
1551
1551
1552 self.assertEqual(chunks1, [
1552 self.assertEqual(chunks1, [
1553 b'\x28\xb5\x2f\xfd\x00\x58\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72'
1553 b'\x28\xb5\x2f\xfd\x00\x58\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72'
1554 b'\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02'
1554 b'\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02'
1555 ])
1555 ])
1556
1556
1557 self.assertEqual(list(chunker.flush()), [])
1557 self.assertEqual(list(chunker.flush()), [])
1558 self.assertEqual(list(chunker.flush()), [])
1558 self.assertEqual(list(chunker.flush()), [])
1559
1559
1560 self.assertEqual(list(chunker.compress(b'baz' * 1024)), [])
1560 self.assertEqual(list(chunker.compress(b'baz' * 1024)), [])
1561
1561
1562 chunks2 = list(chunker.flush())
1562 chunks2 = list(chunker.flush())
1563 self.assertEqual(len(chunks2), 1)
1563 self.assertEqual(len(chunks2), 1)
1564
1564
1565 chunks3 = list(chunker.finish())
1565 chunks3 = list(chunker.finish())
1566 self.assertEqual(len(chunks2), 1)
1566 self.assertEqual(len(chunks2), 1)
1567
1567
1568 dctx = zstd.ZstdDecompressor()
1568 dctx = zstd.ZstdDecompressor()
1569
1569
1570 self.assertEqual(dctx.decompress(b''.join(chunks1 + chunks2 + chunks3),
1570 self.assertEqual(dctx.decompress(b''.join(chunks1 + chunks2 + chunks3),
1571 max_output_size=10000),
1571 max_output_size=10000),
1572 (b'foo' * 1024) + (b'bar' * 1024) + (b'baz' * 1024))
1572 (b'foo' * 1024) + (b'bar' * 1024) + (b'baz' * 1024))
1573
1573
1574 def test_compress_after_finish(self):
1574 def test_compress_after_finish(self):
1575 cctx = zstd.ZstdCompressor()
1575 cctx = zstd.ZstdCompressor()
1576 chunker = cctx.chunker()
1576 chunker = cctx.chunker()
1577
1577
1578 list(chunker.compress(b'foo'))
1578 list(chunker.compress(b'foo'))
1579 list(chunker.finish())
1579 list(chunker.finish())
1580
1580
1581 with self.assertRaisesRegexp(
1581 with self.assertRaisesRegexp(
1582 zstd.ZstdError,
1582 zstd.ZstdError,
1583 r'cannot call compress\(\) after compression finished'):
1583 r'cannot call compress\(\) after compression finished'):
1584 list(chunker.compress(b'foo'))
1584 list(chunker.compress(b'foo'))
1585
1585
1586 def test_flush_after_finish(self):
1586 def test_flush_after_finish(self):
1587 cctx = zstd.ZstdCompressor()
1587 cctx = zstd.ZstdCompressor()
1588 chunker = cctx.chunker()
1588 chunker = cctx.chunker()
1589
1589
1590 list(chunker.compress(b'foo'))
1590 list(chunker.compress(b'foo'))
1591 list(chunker.finish())
1591 list(chunker.finish())
1592
1592
1593 with self.assertRaisesRegexp(
1593 with self.assertRaisesRegexp(
1594 zstd.ZstdError,
1594 zstd.ZstdError,
1595 r'cannot call flush\(\) after compression finished'):
1595 r'cannot call flush\(\) after compression finished'):
1596 list(chunker.flush())
1596 list(chunker.flush())
1597
1597
1598 def test_finish_after_finish(self):
1598 def test_finish_after_finish(self):
1599 cctx = zstd.ZstdCompressor()
1599 cctx = zstd.ZstdCompressor()
1600 chunker = cctx.chunker()
1600 chunker = cctx.chunker()
1601
1601
1602 list(chunker.compress(b'foo'))
1602 list(chunker.compress(b'foo'))
1603 list(chunker.finish())
1603 list(chunker.finish())
1604
1604
1605 with self.assertRaisesRegexp(
1605 with self.assertRaisesRegexp(
1606 zstd.ZstdError,
1606 zstd.ZstdError,
1607 r'cannot call finish\(\) after compression finished'):
1607 r'cannot call finish\(\) after compression finished'):
1608 list(chunker.finish())
1608 list(chunker.finish())
1609
1609
1610
1610
1611 class TestCompressor_multi_compress_to_buffer(unittest.TestCase):
1611 class TestCompressor_multi_compress_to_buffer(unittest.TestCase):
1612 def test_invalid_inputs(self):
1612 def test_invalid_inputs(self):
1613 cctx = zstd.ZstdCompressor()
1613 cctx = zstd.ZstdCompressor()
1614
1614
1615 if not hasattr(cctx, 'multi_compress_to_buffer'):
1615 if not hasattr(cctx, 'multi_compress_to_buffer'):
1616 self.skipTest('multi_compress_to_buffer not available')
1616 self.skipTest('multi_compress_to_buffer not available')
1617
1617
1618 with self.assertRaises(TypeError):
1618 with self.assertRaises(TypeError):
1619 cctx.multi_compress_to_buffer(True)
1619 cctx.multi_compress_to_buffer(True)
1620
1620
1621 with self.assertRaises(TypeError):
1621 with self.assertRaises(TypeError):
1622 cctx.multi_compress_to_buffer((1, 2))
1622 cctx.multi_compress_to_buffer((1, 2))
1623
1623
1624 with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'):
1624 with self.assertRaisesRegexp(TypeError, 'item 0 not a bytes like object'):
1625 cctx.multi_compress_to_buffer([u'foo'])
1625 cctx.multi_compress_to_buffer([u'foo'])
1626
1626
1627 def test_empty_input(self):
1627 def test_empty_input(self):
1628 cctx = zstd.ZstdCompressor()
1628 cctx = zstd.ZstdCompressor()
1629
1629
1630 if not hasattr(cctx, 'multi_compress_to_buffer'):
1630 if not hasattr(cctx, 'multi_compress_to_buffer'):
1631 self.skipTest('multi_compress_to_buffer not available')
1631 self.skipTest('multi_compress_to_buffer not available')
1632
1632
1633 with self.assertRaisesRegexp(ValueError, 'no source elements found'):
1633 with self.assertRaisesRegexp(ValueError, 'no source elements found'):
1634 cctx.multi_compress_to_buffer([])
1634 cctx.multi_compress_to_buffer([])
1635
1635
1636 with self.assertRaisesRegexp(ValueError, 'source elements are empty'):
1636 with self.assertRaisesRegexp(ValueError, 'source elements are empty'):
1637 cctx.multi_compress_to_buffer([b'', b'', b''])
1637 cctx.multi_compress_to_buffer([b'', b'', b''])
1638
1638
1639 def test_list_input(self):
1639 def test_list_input(self):
1640 cctx = zstd.ZstdCompressor(write_checksum=True)
1640 cctx = zstd.ZstdCompressor(write_checksum=True)
1641
1641
1642 if not hasattr(cctx, 'multi_compress_to_buffer'):
1642 if not hasattr(cctx, 'multi_compress_to_buffer'):
1643 self.skipTest('multi_compress_to_buffer not available')
1643 self.skipTest('multi_compress_to_buffer not available')
1644
1644
1645 original = [b'foo' * 12, b'bar' * 6]
1645 original = [b'foo' * 12, b'bar' * 6]
1646 frames = [cctx.compress(c) for c in original]
1646 frames = [cctx.compress(c) for c in original]
1647 b = cctx.multi_compress_to_buffer(original)
1647 b = cctx.multi_compress_to_buffer(original)
1648
1648
1649 self.assertIsInstance(b, zstd.BufferWithSegmentsCollection)
1649 self.assertIsInstance(b, zstd.BufferWithSegmentsCollection)
1650
1650
1651 self.assertEqual(len(b), 2)
1651 self.assertEqual(len(b), 2)
1652 self.assertEqual(b.size(), 44)
1652 self.assertEqual(b.size(), 44)
1653
1653
1654 self.assertEqual(b[0].tobytes(), frames[0])
1654 self.assertEqual(b[0].tobytes(), frames[0])
1655 self.assertEqual(b[1].tobytes(), frames[1])
1655 self.assertEqual(b[1].tobytes(), frames[1])
1656
1656
1657 def test_buffer_with_segments_input(self):
1657 def test_buffer_with_segments_input(self):
1658 cctx = zstd.ZstdCompressor(write_checksum=True)
1658 cctx = zstd.ZstdCompressor(write_checksum=True)
1659
1659
1660 if not hasattr(cctx, 'multi_compress_to_buffer'):
1660 if not hasattr(cctx, 'multi_compress_to_buffer'):
1661 self.skipTest('multi_compress_to_buffer not available')
1661 self.skipTest('multi_compress_to_buffer not available')
1662
1662
1663 original = [b'foo' * 4, b'bar' * 6]
1663 original = [b'foo' * 4, b'bar' * 6]
1664 frames = [cctx.compress(c) for c in original]
1664 frames = [cctx.compress(c) for c in original]
1665
1665
1666 offsets = struct.pack('=QQQQ', 0, len(original[0]),
1666 offsets = struct.pack('=QQQQ', 0, len(original[0]),
1667 len(original[0]), len(original[1]))
1667 len(original[0]), len(original[1]))
1668 segments = zstd.BufferWithSegments(b''.join(original), offsets)
1668 segments = zstd.BufferWithSegments(b''.join(original), offsets)
1669
1669
1670 result = cctx.multi_compress_to_buffer(segments)
1670 result = cctx.multi_compress_to_buffer(segments)
1671
1671
1672 self.assertEqual(len(result), 2)
1672 self.assertEqual(len(result), 2)
1673 self.assertEqual(result.size(), 47)
1673 self.assertEqual(result.size(), 47)
1674
1674
1675 self.assertEqual(result[0].tobytes(), frames[0])
1675 self.assertEqual(result[0].tobytes(), frames[0])
1676 self.assertEqual(result[1].tobytes(), frames[1])
1676 self.assertEqual(result[1].tobytes(), frames[1])
1677
1677
1678 def test_buffer_with_segments_collection_input(self):
1678 def test_buffer_with_segments_collection_input(self):
1679 cctx = zstd.ZstdCompressor(write_checksum=True)
1679 cctx = zstd.ZstdCompressor(write_checksum=True)
1680
1680
1681 if not hasattr(cctx, 'multi_compress_to_buffer'):
1681 if not hasattr(cctx, 'multi_compress_to_buffer'):
1682 self.skipTest('multi_compress_to_buffer not available')
1682 self.skipTest('multi_compress_to_buffer not available')
1683
1683
1684 original = [
1684 original = [
1685 b'foo1',
1685 b'foo1',
1686 b'foo2' * 2,
1686 b'foo2' * 2,
1687 b'foo3' * 3,
1687 b'foo3' * 3,
1688 b'foo4' * 4,
1688 b'foo4' * 4,
1689 b'foo5' * 5,
1689 b'foo5' * 5,
1690 ]
1690 ]
1691
1691
1692 frames = [cctx.compress(c) for c in original]
1692 frames = [cctx.compress(c) for c in original]
1693
1693
1694 b = b''.join([original[0], original[1]])
1694 b = b''.join([original[0], original[1]])
1695 b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ',
1695 b1 = zstd.BufferWithSegments(b, struct.pack('=QQQQ',
1696 0, len(original[0]),
1696 0, len(original[0]),
1697 len(original[0]), len(original[1])))
1697 len(original[0]), len(original[1])))
1698 b = b''.join([original[2], original[3], original[4]])
1698 b = b''.join([original[2], original[3], original[4]])
1699 b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ',
1699 b2 = zstd.BufferWithSegments(b, struct.pack('=QQQQQQ',
1700 0, len(original[2]),
1700 0, len(original[2]),
1701 len(original[2]), len(original[3]),
1701 len(original[2]), len(original[3]),
1702 len(original[2]) + len(original[3]), len(original[4])))
1702 len(original[2]) + len(original[3]), len(original[4])))
1703
1703
1704 c = zstd.BufferWithSegmentsCollection(b1, b2)
1704 c = zstd.BufferWithSegmentsCollection(b1, b2)
1705
1705
1706 result = cctx.multi_compress_to_buffer(c)
1706 result = cctx.multi_compress_to_buffer(c)
1707
1707
1708 self.assertEqual(len(result), len(frames))
1708 self.assertEqual(len(result), len(frames))
1709
1709
1710 for i, frame in enumerate(frames):
1710 for i, frame in enumerate(frames):
1711 self.assertEqual(result[i].tobytes(), frame)
1711 self.assertEqual(result[i].tobytes(), frame)
1712
1712
1713 def test_multiple_threads(self):
1713 def test_multiple_threads(self):
1714 # threads argument will cause multi-threaded ZSTD APIs to be used, which will
1714 # threads argument will cause multi-threaded ZSTD APIs to be used, which will
1715 # make output different.
1715 # make output different.
1716 refcctx = zstd.ZstdCompressor(write_checksum=True)
1716 refcctx = zstd.ZstdCompressor(write_checksum=True)
1717 reference = [refcctx.compress(b'x' * 64), refcctx.compress(b'y' * 64)]
1717 reference = [refcctx.compress(b'x' * 64), refcctx.compress(b'y' * 64)]
1718
1718
1719 cctx = zstd.ZstdCompressor(write_checksum=True)
1719 cctx = zstd.ZstdCompressor(write_checksum=True)
1720
1720
1721 if not hasattr(cctx, 'multi_compress_to_buffer'):
1721 if not hasattr(cctx, 'multi_compress_to_buffer'):
1722 self.skipTest('multi_compress_to_buffer not available')
1722 self.skipTest('multi_compress_to_buffer not available')
1723
1723
1724 frames = []
1724 frames = []
1725 frames.extend(b'x' * 64 for i in range(256))
1725 frames.extend(b'x' * 64 for i in range(256))
1726 frames.extend(b'y' * 64 for i in range(256))
1726 frames.extend(b'y' * 64 for i in range(256))
1727
1727
1728 result = cctx.multi_compress_to_buffer(frames, threads=-1)
1728 result = cctx.multi_compress_to_buffer(frames, threads=-1)
1729
1729
1730 self.assertEqual(len(result), 512)
1730 self.assertEqual(len(result), 512)
1731 for i in range(512):
1731 for i in range(512):
1732 if i < 256:
1732 if i < 256:
1733 self.assertEqual(result[i].tobytes(), reference[0])
1733 self.assertEqual(result[i].tobytes(), reference[0])
1734 else:
1734 else:
1735 self.assertEqual(result[i].tobytes(), reference[1])
1735 self.assertEqual(result[i].tobytes(), reference[1])
@@ -1,228 +1,228 b''
1 import sys
1 import sys
2 import unittest
2 import unittest
3
3
4 import zstandard as zstd
4 import zstandard as zstd
5
5
6 from . common import (
6 from . common import (
7 make_cffi,
7 make_cffi,
8 )
8 )
9
9
10
10
11 @make_cffi
11 @make_cffi
12 class TestCompressionParameters(unittest.TestCase):
12 class TestCompressionParameters(unittest.TestCase):
13 def test_bounds(self):
13 def test_bounds(self):
14 zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MIN,
14 zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MIN,
15 chain_log=zstd.CHAINLOG_MIN,
15 chain_log=zstd.CHAINLOG_MIN,
16 hash_log=zstd.HASHLOG_MIN,
16 hash_log=zstd.HASHLOG_MIN,
17 search_log=zstd.SEARCHLOG_MIN,
17 search_log=zstd.SEARCHLOG_MIN,
18 min_match=zstd.MINMATCH_MIN + 1,
18 min_match=zstd.MINMATCH_MIN + 1,
19 target_length=zstd.TARGETLENGTH_MIN,
19 target_length=zstd.TARGETLENGTH_MIN,
20 strategy=zstd.STRATEGY_FAST)
20 strategy=zstd.STRATEGY_FAST)
21
21
22 zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MAX,
22 zstd.ZstdCompressionParameters(window_log=zstd.WINDOWLOG_MAX,
23 chain_log=zstd.CHAINLOG_MAX,
23 chain_log=zstd.CHAINLOG_MAX,
24 hash_log=zstd.HASHLOG_MAX,
24 hash_log=zstd.HASHLOG_MAX,
25 search_log=zstd.SEARCHLOG_MAX,
25 search_log=zstd.SEARCHLOG_MAX,
26 min_match=zstd.MINMATCH_MAX - 1,
26 min_match=zstd.MINMATCH_MAX - 1,
27 target_length=zstd.TARGETLENGTH_MAX,
27 target_length=zstd.TARGETLENGTH_MAX,
28 strategy=zstd.STRATEGY_BTULTRA2)
28 strategy=zstd.STRATEGY_BTULTRA2)
29
29
30 def test_from_level(self):
30 def test_from_level(self):
31 p = zstd.ZstdCompressionParameters.from_level(1)
31 p = zstd.ZstdCompressionParameters.from_level(1)
32 self.assertIsInstance(p, zstd.CompressionParameters)
32 self.assertIsInstance(p, zstd.CompressionParameters)
33
33
34 self.assertEqual(p.window_log, 19)
34 self.assertEqual(p.window_log, 19)
35
35
36 p = zstd.ZstdCompressionParameters.from_level(-4)
36 p = zstd.ZstdCompressionParameters.from_level(-4)
37 self.assertEqual(p.window_log, 19)
37 self.assertEqual(p.window_log, 19)
38
38
39 def test_members(self):
39 def test_members(self):
40 p = zstd.ZstdCompressionParameters(window_log=10,
40 p = zstd.ZstdCompressionParameters(window_log=10,
41 chain_log=6,
41 chain_log=6,
42 hash_log=7,
42 hash_log=7,
43 search_log=4,
43 search_log=4,
44 min_match=5,
44 min_match=5,
45 target_length=8,
45 target_length=8,
46 strategy=1)
46 strategy=1)
47 self.assertEqual(p.window_log, 10)
47 self.assertEqual(p.window_log, 10)
48 self.assertEqual(p.chain_log, 6)
48 self.assertEqual(p.chain_log, 6)
49 self.assertEqual(p.hash_log, 7)
49 self.assertEqual(p.hash_log, 7)
50 self.assertEqual(p.search_log, 4)
50 self.assertEqual(p.search_log, 4)
51 self.assertEqual(p.min_match, 5)
51 self.assertEqual(p.min_match, 5)
52 self.assertEqual(p.target_length, 8)
52 self.assertEqual(p.target_length, 8)
53 self.assertEqual(p.compression_strategy, 1)
53 self.assertEqual(p.compression_strategy, 1)
54
54
55 p = zstd.ZstdCompressionParameters(compression_level=2)
55 p = zstd.ZstdCompressionParameters(compression_level=2)
56 self.assertEqual(p.compression_level, 2)
56 self.assertEqual(p.compression_level, 2)
57
57
58 p = zstd.ZstdCompressionParameters(threads=4)
58 p = zstd.ZstdCompressionParameters(threads=4)
59 self.assertEqual(p.threads, 4)
59 self.assertEqual(p.threads, 4)
60
60
61 p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576,
61 p = zstd.ZstdCompressionParameters(threads=2, job_size=1048576,
62 overlap_log=6)
62 overlap_log=6)
63 self.assertEqual(p.threads, 2)
63 self.assertEqual(p.threads, 2)
64 self.assertEqual(p.job_size, 1048576)
64 self.assertEqual(p.job_size, 1048576)
65 self.assertEqual(p.overlap_log, 6)
65 self.assertEqual(p.overlap_log, 6)
66 self.assertEqual(p.overlap_size_log, 6)
66 self.assertEqual(p.overlap_size_log, 6)
67
67
68 p = zstd.ZstdCompressionParameters(compression_level=-1)
68 p = zstd.ZstdCompressionParameters(compression_level=-1)
69 self.assertEqual(p.compression_level, -1)
69 self.assertEqual(p.compression_level, -1)
70
70
71 p = zstd.ZstdCompressionParameters(compression_level=-2)
71 p = zstd.ZstdCompressionParameters(compression_level=-2)
72 self.assertEqual(p.compression_level, -2)
72 self.assertEqual(p.compression_level, -2)
73
73
74 p = zstd.ZstdCompressionParameters(force_max_window=True)
74 p = zstd.ZstdCompressionParameters(force_max_window=True)
75 self.assertEqual(p.force_max_window, 1)
75 self.assertEqual(p.force_max_window, 1)
76
76
77 p = zstd.ZstdCompressionParameters(enable_ldm=True)
77 p = zstd.ZstdCompressionParameters(enable_ldm=True)
78 self.assertEqual(p.enable_ldm, 1)
78 self.assertEqual(p.enable_ldm, 1)
79
79
80 p = zstd.ZstdCompressionParameters(ldm_hash_log=7)
80 p = zstd.ZstdCompressionParameters(ldm_hash_log=7)
81 self.assertEqual(p.ldm_hash_log, 7)
81 self.assertEqual(p.ldm_hash_log, 7)
82
82
83 p = zstd.ZstdCompressionParameters(ldm_min_match=6)
83 p = zstd.ZstdCompressionParameters(ldm_min_match=6)
84 self.assertEqual(p.ldm_min_match, 6)
84 self.assertEqual(p.ldm_min_match, 6)
85
85
86 p = zstd.ZstdCompressionParameters(ldm_bucket_size_log=7)
86 p = zstd.ZstdCompressionParameters(ldm_bucket_size_log=7)
87 self.assertEqual(p.ldm_bucket_size_log, 7)
87 self.assertEqual(p.ldm_bucket_size_log, 7)
88
88
89 p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8)
89 p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8)
90 self.assertEqual(p.ldm_hash_every_log, 8)
90 self.assertEqual(p.ldm_hash_every_log, 8)
91 self.assertEqual(p.ldm_hash_rate_log, 8)
91 self.assertEqual(p.ldm_hash_rate_log, 8)
92
92
93 def test_estimated_compression_context_size(self):
93 def test_estimated_compression_context_size(self):
94 p = zstd.ZstdCompressionParameters(window_log=20,
94 p = zstd.ZstdCompressionParameters(window_log=20,
95 chain_log=16,
95 chain_log=16,
96 hash_log=17,
96 hash_log=17,
97 search_log=1,
97 search_log=1,
98 min_match=5,
98 min_match=5,
99 target_length=16,
99 target_length=16,
100 strategy=zstd.STRATEGY_DFAST)
100 strategy=zstd.STRATEGY_DFAST)
101
101
102 # 32-bit has slightly different values from 64-bit.
102 # 32-bit has slightly different values from 64-bit.
103 self.assertAlmostEqual(p.estimated_compression_context_size(), 1294072,
103 self.assertAlmostEqual(p.estimated_compression_context_size(), 1294144,
104 delta=250)
104 delta=250)
105
105
106 def test_strategy(self):
106 def test_strategy(self):
107 with self.assertRaisesRegexp(ValueError, 'cannot specify both compression_strategy'):
107 with self.assertRaisesRegexp(ValueError, 'cannot specify both compression_strategy'):
108 zstd.ZstdCompressionParameters(strategy=0, compression_strategy=0)
108 zstd.ZstdCompressionParameters(strategy=0, compression_strategy=0)
109
109
110 p = zstd.ZstdCompressionParameters(strategy=2)
110 p = zstd.ZstdCompressionParameters(strategy=2)
111 self.assertEqual(p.compression_strategy, 2)
111 self.assertEqual(p.compression_strategy, 2)
112
112
113 p = zstd.ZstdCompressionParameters(strategy=3)
113 p = zstd.ZstdCompressionParameters(strategy=3)
114 self.assertEqual(p.compression_strategy, 3)
114 self.assertEqual(p.compression_strategy, 3)
115
115
116 def test_ldm_hash_rate_log(self):
116 def test_ldm_hash_rate_log(self):
117 with self.assertRaisesRegexp(ValueError, 'cannot specify both ldm_hash_rate_log'):
117 with self.assertRaisesRegexp(ValueError, 'cannot specify both ldm_hash_rate_log'):
118 zstd.ZstdCompressionParameters(ldm_hash_rate_log=8, ldm_hash_every_log=4)
118 zstd.ZstdCompressionParameters(ldm_hash_rate_log=8, ldm_hash_every_log=4)
119
119
120 p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8)
120 p = zstd.ZstdCompressionParameters(ldm_hash_rate_log=8)
121 self.assertEqual(p.ldm_hash_every_log, 8)
121 self.assertEqual(p.ldm_hash_every_log, 8)
122
122
123 p = zstd.ZstdCompressionParameters(ldm_hash_every_log=16)
123 p = zstd.ZstdCompressionParameters(ldm_hash_every_log=16)
124 self.assertEqual(p.ldm_hash_every_log, 16)
124 self.assertEqual(p.ldm_hash_every_log, 16)
125
125
126 def test_overlap_log(self):
126 def test_overlap_log(self):
127 with self.assertRaisesRegexp(ValueError, 'cannot specify both overlap_log'):
127 with self.assertRaisesRegexp(ValueError, 'cannot specify both overlap_log'):
128 zstd.ZstdCompressionParameters(overlap_log=1, overlap_size_log=9)
128 zstd.ZstdCompressionParameters(overlap_log=1, overlap_size_log=9)
129
129
130 p = zstd.ZstdCompressionParameters(overlap_log=2)
130 p = zstd.ZstdCompressionParameters(overlap_log=2)
131 self.assertEqual(p.overlap_log, 2)
131 self.assertEqual(p.overlap_log, 2)
132 self.assertEqual(p.overlap_size_log, 2)
132 self.assertEqual(p.overlap_size_log, 2)
133
133
134 p = zstd.ZstdCompressionParameters(overlap_size_log=4)
134 p = zstd.ZstdCompressionParameters(overlap_size_log=4)
135 self.assertEqual(p.overlap_log, 4)
135 self.assertEqual(p.overlap_log, 4)
136 self.assertEqual(p.overlap_size_log, 4)
136 self.assertEqual(p.overlap_size_log, 4)
137
137
138
138
139 @make_cffi
139 @make_cffi
140 class TestFrameParameters(unittest.TestCase):
140 class TestFrameParameters(unittest.TestCase):
141 def test_invalid_type(self):
141 def test_invalid_type(self):
142 with self.assertRaises(TypeError):
142 with self.assertRaises(TypeError):
143 zstd.get_frame_parameters(None)
143 zstd.get_frame_parameters(None)
144
144
145 # Python 3 doesn't appear to convert unicode to Py_buffer.
145 # Python 3 doesn't appear to convert unicode to Py_buffer.
146 if sys.version_info[0] >= 3:
146 if sys.version_info[0] >= 3:
147 with self.assertRaises(TypeError):
147 with self.assertRaises(TypeError):
148 zstd.get_frame_parameters(u'foobarbaz')
148 zstd.get_frame_parameters(u'foobarbaz')
149 else:
149 else:
150 # CPython will convert unicode to Py_buffer. But CFFI won't.
150 # CPython will convert unicode to Py_buffer. But CFFI won't.
151 if zstd.backend == 'cffi':
151 if zstd.backend == 'cffi':
152 with self.assertRaises(TypeError):
152 with self.assertRaises(TypeError):
153 zstd.get_frame_parameters(u'foobarbaz')
153 zstd.get_frame_parameters(u'foobarbaz')
154 else:
154 else:
155 with self.assertRaises(zstd.ZstdError):
155 with self.assertRaises(zstd.ZstdError):
156 zstd.get_frame_parameters(u'foobarbaz')
156 zstd.get_frame_parameters(u'foobarbaz')
157
157
158 def test_invalid_input_sizes(self):
158 def test_invalid_input_sizes(self):
159 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
159 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
160 zstd.get_frame_parameters(b'')
160 zstd.get_frame_parameters(b'')
161
161
162 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
162 with self.assertRaisesRegexp(zstd.ZstdError, 'not enough data for frame'):
163 zstd.get_frame_parameters(zstd.FRAME_HEADER)
163 zstd.get_frame_parameters(zstd.FRAME_HEADER)
164
164
165 def test_invalid_frame(self):
165 def test_invalid_frame(self):
166 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
166 with self.assertRaisesRegexp(zstd.ZstdError, 'Unknown frame descriptor'):
167 zstd.get_frame_parameters(b'foobarbaz')
167 zstd.get_frame_parameters(b'foobarbaz')
168
168
169 def test_attributes(self):
169 def test_attributes(self):
170 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00')
170 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x00')
171 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
171 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
172 self.assertEqual(params.window_size, 1024)
172 self.assertEqual(params.window_size, 1024)
173 self.assertEqual(params.dict_id, 0)
173 self.assertEqual(params.dict_id, 0)
174 self.assertFalse(params.has_checksum)
174 self.assertFalse(params.has_checksum)
175
175
176 # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte.
176 # Lowest 2 bits indicate a dictionary and length. Here, the dict id is 1 byte.
177 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff')
177 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x01\x00\xff')
178 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
178 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
179 self.assertEqual(params.window_size, 1024)
179 self.assertEqual(params.window_size, 1024)
180 self.assertEqual(params.dict_id, 255)
180 self.assertEqual(params.dict_id, 255)
181 self.assertFalse(params.has_checksum)
181 self.assertFalse(params.has_checksum)
182
182
183 # Lowest 3rd bit indicates if checksum is present.
183 # Lowest 3rd bit indicates if checksum is present.
184 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00')
184 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x04\x00')
185 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
185 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
186 self.assertEqual(params.window_size, 1024)
186 self.assertEqual(params.window_size, 1024)
187 self.assertEqual(params.dict_id, 0)
187 self.assertEqual(params.dict_id, 0)
188 self.assertTrue(params.has_checksum)
188 self.assertTrue(params.has_checksum)
189
189
190 # Upper 2 bits indicate content size.
190 # Upper 2 bits indicate content size.
191 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x40\x00\xff\x00')
191 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x40\x00\xff\x00')
192 self.assertEqual(params.content_size, 511)
192 self.assertEqual(params.content_size, 511)
193 self.assertEqual(params.window_size, 1024)
193 self.assertEqual(params.window_size, 1024)
194 self.assertEqual(params.dict_id, 0)
194 self.assertEqual(params.dict_id, 0)
195 self.assertFalse(params.has_checksum)
195 self.assertFalse(params.has_checksum)
196
196
197 # Window descriptor is 2nd byte after frame header.
197 # Window descriptor is 2nd byte after frame header.
198 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40')
198 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x00\x40')
199 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
199 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
200 self.assertEqual(params.window_size, 262144)
200 self.assertEqual(params.window_size, 262144)
201 self.assertEqual(params.dict_id, 0)
201 self.assertEqual(params.dict_id, 0)
202 self.assertFalse(params.has_checksum)
202 self.assertFalse(params.has_checksum)
203
203
204 # Set multiple things.
204 # Set multiple things.
205 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x45\x40\x0f\x10\x00')
205 params = zstd.get_frame_parameters(zstd.FRAME_HEADER + b'\x45\x40\x0f\x10\x00')
206 self.assertEqual(params.content_size, 272)
206 self.assertEqual(params.content_size, 272)
207 self.assertEqual(params.window_size, 262144)
207 self.assertEqual(params.window_size, 262144)
208 self.assertEqual(params.dict_id, 15)
208 self.assertEqual(params.dict_id, 15)
209 self.assertTrue(params.has_checksum)
209 self.assertTrue(params.has_checksum)
210
210
211 def test_input_types(self):
211 def test_input_types(self):
212 v = zstd.FRAME_HEADER + b'\x00\x00'
212 v = zstd.FRAME_HEADER + b'\x00\x00'
213
213
214 mutable_array = bytearray(len(v))
214 mutable_array = bytearray(len(v))
215 mutable_array[:] = v
215 mutable_array[:] = v
216
216
217 sources = [
217 sources = [
218 memoryview(v),
218 memoryview(v),
219 bytearray(v),
219 bytearray(v),
220 mutable_array,
220 mutable_array,
221 ]
221 ]
222
222
223 for source in sources:
223 for source in sources:
224 params = zstd.get_frame_parameters(source)
224 params = zstd.get_frame_parameters(source)
225 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
225 self.assertEqual(params.content_size, zstd.CONTENTSIZE_UNKNOWN)
226 self.assertEqual(params.window_size, 1024)
226 self.assertEqual(params.window_size, 1024)
227 self.assertEqual(params.dict_id, 0)
227 self.assertEqual(params.dict_id, 0)
228 self.assertFalse(params.has_checksum)
228 self.assertFalse(params.has_checksum)
@@ -1,69 +1,69 b''
1 from __future__ import unicode_literals
1 from __future__ import unicode_literals
2
2
3 import unittest
3 import unittest
4
4
5 import zstandard as zstd
5 import zstandard as zstd
6
6
7 from . common import (
7 from . common import (
8 make_cffi,
8 make_cffi,
9 )
9 )
10
10
11
11
12 @make_cffi
12 @make_cffi
13 class TestModuleAttributes(unittest.TestCase):
13 class TestModuleAttributes(unittest.TestCase):
14 def test_version(self):
14 def test_version(self):
15 self.assertEqual(zstd.ZSTD_VERSION, (1, 3, 8))
15 self.assertEqual(zstd.ZSTD_VERSION, (1, 4, 3))
16
16
17 self.assertEqual(zstd.__version__, '0.11.0')
17 self.assertEqual(zstd.__version__, '0.12.0')
18
18
19 def test_constants(self):
19 def test_constants(self):
20 self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22)
20 self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22)
21 self.assertEqual(zstd.FRAME_HEADER, b'\x28\xb5\x2f\xfd')
21 self.assertEqual(zstd.FRAME_HEADER, b'\x28\xb5\x2f\xfd')
22
22
23 def test_hasattr(self):
23 def test_hasattr(self):
24 attrs = (
24 attrs = (
25 'CONTENTSIZE_UNKNOWN',
25 'CONTENTSIZE_UNKNOWN',
26 'CONTENTSIZE_ERROR',
26 'CONTENTSIZE_ERROR',
27 'COMPRESSION_RECOMMENDED_INPUT_SIZE',
27 'COMPRESSION_RECOMMENDED_INPUT_SIZE',
28 'COMPRESSION_RECOMMENDED_OUTPUT_SIZE',
28 'COMPRESSION_RECOMMENDED_OUTPUT_SIZE',
29 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE',
29 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE',
30 'DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE',
30 'DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE',
31 'MAGIC_NUMBER',
31 'MAGIC_NUMBER',
32 'FLUSH_BLOCK',
32 'FLUSH_BLOCK',
33 'FLUSH_FRAME',
33 'FLUSH_FRAME',
34 'BLOCKSIZELOG_MAX',
34 'BLOCKSIZELOG_MAX',
35 'BLOCKSIZE_MAX',
35 'BLOCKSIZE_MAX',
36 'WINDOWLOG_MIN',
36 'WINDOWLOG_MIN',
37 'WINDOWLOG_MAX',
37 'WINDOWLOG_MAX',
38 'CHAINLOG_MIN',
38 'CHAINLOG_MIN',
39 'CHAINLOG_MAX',
39 'CHAINLOG_MAX',
40 'HASHLOG_MIN',
40 'HASHLOG_MIN',
41 'HASHLOG_MAX',
41 'HASHLOG_MAX',
42 'HASHLOG3_MAX',
42 'HASHLOG3_MAX',
43 'MINMATCH_MIN',
43 'MINMATCH_MIN',
44 'MINMATCH_MAX',
44 'MINMATCH_MAX',
45 'SEARCHLOG_MIN',
45 'SEARCHLOG_MIN',
46 'SEARCHLOG_MAX',
46 'SEARCHLOG_MAX',
47 'SEARCHLENGTH_MIN',
47 'SEARCHLENGTH_MIN',
48 'SEARCHLENGTH_MAX',
48 'SEARCHLENGTH_MAX',
49 'TARGETLENGTH_MIN',
49 'TARGETLENGTH_MIN',
50 'TARGETLENGTH_MAX',
50 'TARGETLENGTH_MAX',
51 'LDM_MINMATCH_MIN',
51 'LDM_MINMATCH_MIN',
52 'LDM_MINMATCH_MAX',
52 'LDM_MINMATCH_MAX',
53 'LDM_BUCKETSIZELOG_MAX',
53 'LDM_BUCKETSIZELOG_MAX',
54 'STRATEGY_FAST',
54 'STRATEGY_FAST',
55 'STRATEGY_DFAST',
55 'STRATEGY_DFAST',
56 'STRATEGY_GREEDY',
56 'STRATEGY_GREEDY',
57 'STRATEGY_LAZY',
57 'STRATEGY_LAZY',
58 'STRATEGY_LAZY2',
58 'STRATEGY_LAZY2',
59 'STRATEGY_BTLAZY2',
59 'STRATEGY_BTLAZY2',
60 'STRATEGY_BTOPT',
60 'STRATEGY_BTOPT',
61 'STRATEGY_BTULTRA',
61 'STRATEGY_BTULTRA',
62 'STRATEGY_BTULTRA2',
62 'STRATEGY_BTULTRA2',
63 'DICT_TYPE_AUTO',
63 'DICT_TYPE_AUTO',
64 'DICT_TYPE_RAWCONTENT',
64 'DICT_TYPE_RAWCONTENT',
65 'DICT_TYPE_FULLDICT',
65 'DICT_TYPE_FULLDICT',
66 )
66 )
67
67
68 for a in attrs:
68 for a in attrs:
69 self.assertTrue(hasattr(zstd, a), a)
69 self.assertTrue(hasattr(zstd, a), a)
@@ -1,88 +1,89 b''
1 import struct
1 import struct
2 import sys
2 import sys
3 import unittest
3 import unittest
4
4
5 import zstandard as zstd
5 import zstandard as zstd
6
6
7 from . common import (
7 from . common import (
8 generate_samples,
8 generate_samples,
9 make_cffi,
9 make_cffi,
10 random_input_data,
10 )
11 )
11
12
12 if sys.version_info[0] >= 3:
13 if sys.version_info[0] >= 3:
13 int_type = int
14 int_type = int
14 else:
15 else:
15 int_type = long
16 int_type = long
16
17
17
18
18 @make_cffi
19 @make_cffi
19 class TestTrainDictionary(unittest.TestCase):
20 class TestTrainDictionary(unittest.TestCase):
20 def test_no_args(self):
21 def test_no_args(self):
21 with self.assertRaises(TypeError):
22 with self.assertRaises(TypeError):
22 zstd.train_dictionary()
23 zstd.train_dictionary()
23
24
24 def test_bad_args(self):
25 def test_bad_args(self):
25 with self.assertRaises(TypeError):
26 with self.assertRaises(TypeError):
26 zstd.train_dictionary(8192, u'foo')
27 zstd.train_dictionary(8192, u'foo')
27
28
28 with self.assertRaises(ValueError):
29 with self.assertRaises(ValueError):
29 zstd.train_dictionary(8192, [u'foo'])
30 zstd.train_dictionary(8192, [u'foo'])
30
31
31 def test_no_params(self):
32 def test_no_params(self):
32 d = zstd.train_dictionary(8192, generate_samples())
33 d = zstd.train_dictionary(8192, random_input_data())
33 self.assertIsInstance(d.dict_id(), int_type)
34 self.assertIsInstance(d.dict_id(), int_type)
34
35
35 # The dictionary ID may be different across platforms.
36 # The dictionary ID may be different across platforms.
36 expected = b'\x37\xa4\x30\xec' + struct.pack('<I', d.dict_id())
37 expected = b'\x37\xa4\x30\xec' + struct.pack('<I', d.dict_id())
37
38
38 data = d.as_bytes()
39 data = d.as_bytes()
39 self.assertEqual(data[0:8], expected)
40 self.assertEqual(data[0:8], expected)
40
41
41 def test_basic(self):
42 def test_basic(self):
42 d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
43 d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
43 self.assertIsInstance(d.dict_id(), int_type)
44 self.assertIsInstance(d.dict_id(), int_type)
44
45
45 data = d.as_bytes()
46 data = d.as_bytes()
46 self.assertEqual(data[0:4], b'\x37\xa4\x30\xec')
47 self.assertEqual(data[0:4], b'\x37\xa4\x30\xec')
47
48
48 self.assertEqual(d.k, 64)
49 self.assertEqual(d.k, 64)
49 self.assertEqual(d.d, 16)
50 self.assertEqual(d.d, 16)
50
51
51 def test_set_dict_id(self):
52 def test_set_dict_id(self):
52 d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16,
53 d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16,
53 dict_id=42)
54 dict_id=42)
54 self.assertEqual(d.dict_id(), 42)
55 self.assertEqual(d.dict_id(), 42)
55
56
56 def test_optimize(self):
57 def test_optimize(self):
57 d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1,
58 d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1,
58 d=16)
59 d=16)
59
60
60 # This varies by platform.
61 # This varies by platform.
61 self.assertIn(d.k, (50, 2000))
62 self.assertIn(d.k, (50, 2000))
62 self.assertEqual(d.d, 16)
63 self.assertEqual(d.d, 16)
63
64
64 @make_cffi
65 @make_cffi
65 class TestCompressionDict(unittest.TestCase):
66 class TestCompressionDict(unittest.TestCase):
66 def test_bad_mode(self):
67 def test_bad_mode(self):
67 with self.assertRaisesRegexp(ValueError, 'invalid dictionary load mode'):
68 with self.assertRaisesRegexp(ValueError, 'invalid dictionary load mode'):
68 zstd.ZstdCompressionDict(b'foo', dict_type=42)
69 zstd.ZstdCompressionDict(b'foo', dict_type=42)
69
70
70 def test_bad_precompute_compress(self):
71 def test_bad_precompute_compress(self):
71 d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
72 d = zstd.train_dictionary(8192, generate_samples(), k=64, d=16)
72
73
73 with self.assertRaisesRegexp(ValueError, 'must specify one of level or '):
74 with self.assertRaisesRegexp(ValueError, 'must specify one of level or '):
74 d.precompute_compress()
75 d.precompute_compress()
75
76
76 with self.assertRaisesRegexp(ValueError, 'must only specify one of level or '):
77 with self.assertRaisesRegexp(ValueError, 'must only specify one of level or '):
77 d.precompute_compress(level=3,
78 d.precompute_compress(level=3,
78 compression_params=zstd.CompressionParameters())
79 compression_params=zstd.CompressionParameters())
79
80
80 def test_precompute_compress_rawcontent(self):
81 def test_precompute_compress_rawcontent(self):
81 d = zstd.ZstdCompressionDict(b'dictcontent' * 64,
82 d = zstd.ZstdCompressionDict(b'dictcontent' * 64,
82 dict_type=zstd.DICT_TYPE_RAWCONTENT)
83 dict_type=zstd.DICT_TYPE_RAWCONTENT)
83 d.precompute_compress(level=1)
84 d.precompute_compress(level=1)
84
85
85 d = zstd.ZstdCompressionDict(b'dictcontent' * 64,
86 d = zstd.ZstdCompressionDict(b'dictcontent' * 64,
86 dict_type=zstd.DICT_TYPE_FULLDICT)
87 dict_type=zstd.DICT_TYPE_FULLDICT)
87 with self.assertRaisesRegexp(zstd.ZstdError, 'unable to precompute dictionary'):
88 with self.assertRaisesRegexp(zstd.ZstdError, 'unable to precompute dictionary'):
88 d.precompute_compress(level=1)
89 d.precompute_compress(level=1)
@@ -1,65 +1,65 b''
1 # Copyright (c) 2017-present, Gregory Szorc
1 # Copyright (c) 2017-present, Gregory Szorc
2 # All rights reserved.
2 # All rights reserved.
3 #
3 #
4 # This software may be modified and distributed under the terms
4 # This software may be modified and distributed under the terms
5 # of the BSD license. See the LICENSE file for details.
5 # of the BSD license. See the LICENSE file for details.
6
6
7 """Python interface to the Zstandard (zstd) compression library."""
7 """Python interface to the Zstandard (zstd) compression library."""
8
8
9 from __future__ import absolute_import, unicode_literals
9 from __future__ import absolute_import, unicode_literals
10
10
11 # This module serves 2 roles:
11 # This module serves 2 roles:
12 #
12 #
13 # 1) Export the C or CFFI "backend" through a central module.
13 # 1) Export the C or CFFI "backend" through a central module.
14 # 2) Implement additional functionality built on top of C or CFFI backend.
14 # 2) Implement additional functionality built on top of C or CFFI backend.
15
15
16 import os
16 import os
17 import platform
17 import platform
18
18
19 # Some Python implementations don't support C extensions. That's why we have
19 # Some Python implementations don't support C extensions. That's why we have
20 # a CFFI implementation in the first place. The code here import one of our
20 # a CFFI implementation in the first place. The code here import one of our
21 # "backends" then re-exports the symbols from this module. For convenience,
21 # "backends" then re-exports the symbols from this module. For convenience,
22 # we support falling back to the CFFI backend if the C extension can't be
22 # we support falling back to the CFFI backend if the C extension can't be
23 # imported. But for performance reasons, we only do this on unknown Python
23 # imported. But for performance reasons, we only do this on unknown Python
24 # implementation. Notably, for CPython we require the C extension by default.
24 # implementation. Notably, for CPython we require the C extension by default.
25 # Because someone will inevitably want special behavior, the behavior is
25 # Because someone will inevitably want special behavior, the behavior is
26 # configurable via an environment variable. A potentially better way to handle
26 # configurable via an environment variable. A potentially better way to handle
27 # this is to import a special ``__importpolicy__`` module or something
27 # this is to import a special ``__importpolicy__`` module or something
28 # defining a variable and `setup.py` could write the file with whatever
28 # defining a variable and `setup.py` could write the file with whatever
29 # policy was specified at build time. Until someone needs it, we go with
29 # policy was specified at build time. Until someone needs it, we go with
30 # the hacky but simple environment variable approach.
30 # the hacky but simple environment variable approach.
31 _module_policy = os.environ.get('PYTHON_ZSTANDARD_IMPORT_POLICY', 'default')
31 _module_policy = os.environ.get('PYTHON_ZSTANDARD_IMPORT_POLICY', 'default')
32
32
33 if _module_policy == 'default':
33 if _module_policy == 'default':
34 if platform.python_implementation() in ('CPython',):
34 if platform.python_implementation() in ('CPython',):
35 from zstd import *
35 from zstd import *
36 backend = 'cext'
36 backend = 'cext'
37 elif platform.python_implementation() in ('PyPy',):
37 elif platform.python_implementation() in ('PyPy',):
38 from .cffi import *
38 from .cffi import *
39 backend = 'cffi'
39 backend = 'cffi'
40 else:
40 else:
41 try:
41 try:
42 from zstd import *
42 from zstd import *
43 backend = 'cext'
43 backend = 'cext'
44 except ImportError:
44 except ImportError:
45 from .cffi import *
45 from .cffi import *
46 backend = 'cffi'
46 backend = 'cffi'
47 elif _module_policy == 'cffi_fallback':
47 elif _module_policy == 'cffi_fallback':
48 try:
48 try:
49 from zstd import *
49 from zstd import *
50 backend = 'cext'
50 backend = 'cext'
51 except ImportError:
51 except ImportError:
52 from .cffi import *
52 from .cffi import *
53 backend = 'cffi'
53 backend = 'cffi'
54 elif _module_policy == 'cext':
54 elif _module_policy == 'cext':
55 from zstd import *
55 from zstd import *
56 backend = 'cext'
56 backend = 'cext'
57 elif _module_policy == 'cffi':
57 elif _module_policy == 'cffi':
58 from .cffi import *
58 from .cffi import *
59 backend = 'cffi'
59 backend = 'cffi'
60 else:
60 else:
61 raise ImportError('unknown module import policy: %s; use default, cffi_fallback, '
61 raise ImportError('unknown module import policy: %s; use default, cffi_fallback, '
62 'cext, or cffi' % _module_policy)
62 'cext, or cffi' % _module_policy)
63
63
64 # Keep this in sync with python-zstandard.h.
64 # Keep this in sync with python-zstandard.h.
65 __version__ = '0.11.0'
65 __version__ = '0.12.0'
@@ -1,2515 +1,2515 b''
1 # Copyright (c) 2016-present, Gregory Szorc
1 # Copyright (c) 2016-present, Gregory Szorc
2 # All rights reserved.
2 # All rights reserved.
3 #
3 #
4 # This software may be modified and distributed under the terms
4 # This software may be modified and distributed under the terms
5 # of the BSD license. See the LICENSE file for details.
5 # of the BSD license. See the LICENSE file for details.
6
6
7 """Python interface to the Zstandard (zstd) compression library."""
7 """Python interface to the Zstandard (zstd) compression library."""
8
8
9 from __future__ import absolute_import, unicode_literals
9 from __future__ import absolute_import, unicode_literals
10
10
11 # This should match what the C extension exports.
11 # This should match what the C extension exports.
12 __all__ = [
12 __all__ = [
13 #'BufferSegment',
13 #'BufferSegment',
14 #'BufferSegments',
14 #'BufferSegments',
15 #'BufferWithSegments',
15 #'BufferWithSegments',
16 #'BufferWithSegmentsCollection',
16 #'BufferWithSegmentsCollection',
17 'CompressionParameters',
17 'CompressionParameters',
18 'ZstdCompressionDict',
18 'ZstdCompressionDict',
19 'ZstdCompressionParameters',
19 'ZstdCompressionParameters',
20 'ZstdCompressor',
20 'ZstdCompressor',
21 'ZstdError',
21 'ZstdError',
22 'ZstdDecompressor',
22 'ZstdDecompressor',
23 'FrameParameters',
23 'FrameParameters',
24 'estimate_decompression_context_size',
24 'estimate_decompression_context_size',
25 'frame_content_size',
25 'frame_content_size',
26 'frame_header_size',
26 'frame_header_size',
27 'get_frame_parameters',
27 'get_frame_parameters',
28 'train_dictionary',
28 'train_dictionary',
29
29
30 # Constants.
30 # Constants.
31 'FLUSH_BLOCK',
31 'FLUSH_BLOCK',
32 'FLUSH_FRAME',
32 'FLUSH_FRAME',
33 'COMPRESSOBJ_FLUSH_FINISH',
33 'COMPRESSOBJ_FLUSH_FINISH',
34 'COMPRESSOBJ_FLUSH_BLOCK',
34 'COMPRESSOBJ_FLUSH_BLOCK',
35 'ZSTD_VERSION',
35 'ZSTD_VERSION',
36 'FRAME_HEADER',
36 'FRAME_HEADER',
37 'CONTENTSIZE_UNKNOWN',
37 'CONTENTSIZE_UNKNOWN',
38 'CONTENTSIZE_ERROR',
38 'CONTENTSIZE_ERROR',
39 'MAX_COMPRESSION_LEVEL',
39 'MAX_COMPRESSION_LEVEL',
40 'COMPRESSION_RECOMMENDED_INPUT_SIZE',
40 'COMPRESSION_RECOMMENDED_INPUT_SIZE',
41 'COMPRESSION_RECOMMENDED_OUTPUT_SIZE',
41 'COMPRESSION_RECOMMENDED_OUTPUT_SIZE',
42 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE',
42 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE',
43 'DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE',
43 'DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE',
44 'MAGIC_NUMBER',
44 'MAGIC_NUMBER',
45 'BLOCKSIZELOG_MAX',
45 'BLOCKSIZELOG_MAX',
46 'BLOCKSIZE_MAX',
46 'BLOCKSIZE_MAX',
47 'WINDOWLOG_MIN',
47 'WINDOWLOG_MIN',
48 'WINDOWLOG_MAX',
48 'WINDOWLOG_MAX',
49 'CHAINLOG_MIN',
49 'CHAINLOG_MIN',
50 'CHAINLOG_MAX',
50 'CHAINLOG_MAX',
51 'HASHLOG_MIN',
51 'HASHLOG_MIN',
52 'HASHLOG_MAX',
52 'HASHLOG_MAX',
53 'HASHLOG3_MAX',
53 'HASHLOG3_MAX',
54 'MINMATCH_MIN',
54 'MINMATCH_MIN',
55 'MINMATCH_MAX',
55 'MINMATCH_MAX',
56 'SEARCHLOG_MIN',
56 'SEARCHLOG_MIN',
57 'SEARCHLOG_MAX',
57 'SEARCHLOG_MAX',
58 'SEARCHLENGTH_MIN',
58 'SEARCHLENGTH_MIN',
59 'SEARCHLENGTH_MAX',
59 'SEARCHLENGTH_MAX',
60 'TARGETLENGTH_MIN',
60 'TARGETLENGTH_MIN',
61 'TARGETLENGTH_MAX',
61 'TARGETLENGTH_MAX',
62 'LDM_MINMATCH_MIN',
62 'LDM_MINMATCH_MIN',
63 'LDM_MINMATCH_MAX',
63 'LDM_MINMATCH_MAX',
64 'LDM_BUCKETSIZELOG_MAX',
64 'LDM_BUCKETSIZELOG_MAX',
65 'STRATEGY_FAST',
65 'STRATEGY_FAST',
66 'STRATEGY_DFAST',
66 'STRATEGY_DFAST',
67 'STRATEGY_GREEDY',
67 'STRATEGY_GREEDY',
68 'STRATEGY_LAZY',
68 'STRATEGY_LAZY',
69 'STRATEGY_LAZY2',
69 'STRATEGY_LAZY2',
70 'STRATEGY_BTLAZY2',
70 'STRATEGY_BTLAZY2',
71 'STRATEGY_BTOPT',
71 'STRATEGY_BTOPT',
72 'STRATEGY_BTULTRA',
72 'STRATEGY_BTULTRA',
73 'STRATEGY_BTULTRA2',
73 'STRATEGY_BTULTRA2',
74 'DICT_TYPE_AUTO',
74 'DICT_TYPE_AUTO',
75 'DICT_TYPE_RAWCONTENT',
75 'DICT_TYPE_RAWCONTENT',
76 'DICT_TYPE_FULLDICT',
76 'DICT_TYPE_FULLDICT',
77 'FORMAT_ZSTD1',
77 'FORMAT_ZSTD1',
78 'FORMAT_ZSTD1_MAGICLESS',
78 'FORMAT_ZSTD1_MAGICLESS',
79 ]
79 ]
80
80
81 import io
81 import io
82 import os
82 import os
83 import sys
83 import sys
84
84
85 from _zstd_cffi import (
85 from _zstd_cffi import (
86 ffi,
86 ffi,
87 lib,
87 lib,
88 )
88 )
89
89
90 if sys.version_info[0] == 2:
90 if sys.version_info[0] == 2:
91 bytes_type = str
91 bytes_type = str
92 int_type = long
92 int_type = long
93 else:
93 else:
94 bytes_type = bytes
94 bytes_type = bytes
95 int_type = int
95 int_type = int
96
96
97
97
98 COMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_CStreamInSize()
98 COMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_CStreamInSize()
99 COMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_CStreamOutSize()
99 COMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_CStreamOutSize()
100 DECOMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_DStreamInSize()
100 DECOMPRESSION_RECOMMENDED_INPUT_SIZE = lib.ZSTD_DStreamInSize()
101 DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_DStreamOutSize()
101 DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE = lib.ZSTD_DStreamOutSize()
102
102
103 new_nonzero = ffi.new_allocator(should_clear_after_alloc=False)
103 new_nonzero = ffi.new_allocator(should_clear_after_alloc=False)
104
104
105
105
106 MAX_COMPRESSION_LEVEL = lib.ZSTD_maxCLevel()
106 MAX_COMPRESSION_LEVEL = lib.ZSTD_maxCLevel()
107 MAGIC_NUMBER = lib.ZSTD_MAGICNUMBER
107 MAGIC_NUMBER = lib.ZSTD_MAGICNUMBER
108 FRAME_HEADER = b'\x28\xb5\x2f\xfd'
108 FRAME_HEADER = b'\x28\xb5\x2f\xfd'
109 CONTENTSIZE_UNKNOWN = lib.ZSTD_CONTENTSIZE_UNKNOWN
109 CONTENTSIZE_UNKNOWN = lib.ZSTD_CONTENTSIZE_UNKNOWN
110 CONTENTSIZE_ERROR = lib.ZSTD_CONTENTSIZE_ERROR
110 CONTENTSIZE_ERROR = lib.ZSTD_CONTENTSIZE_ERROR
111 ZSTD_VERSION = (lib.ZSTD_VERSION_MAJOR, lib.ZSTD_VERSION_MINOR, lib.ZSTD_VERSION_RELEASE)
111 ZSTD_VERSION = (lib.ZSTD_VERSION_MAJOR, lib.ZSTD_VERSION_MINOR, lib.ZSTD_VERSION_RELEASE)
112
112
113 BLOCKSIZELOG_MAX = lib.ZSTD_BLOCKSIZELOG_MAX
113 BLOCKSIZELOG_MAX = lib.ZSTD_BLOCKSIZELOG_MAX
114 BLOCKSIZE_MAX = lib.ZSTD_BLOCKSIZE_MAX
114 BLOCKSIZE_MAX = lib.ZSTD_BLOCKSIZE_MAX
115 WINDOWLOG_MIN = lib.ZSTD_WINDOWLOG_MIN
115 WINDOWLOG_MIN = lib.ZSTD_WINDOWLOG_MIN
116 WINDOWLOG_MAX = lib.ZSTD_WINDOWLOG_MAX
116 WINDOWLOG_MAX = lib.ZSTD_WINDOWLOG_MAX
117 CHAINLOG_MIN = lib.ZSTD_CHAINLOG_MIN
117 CHAINLOG_MIN = lib.ZSTD_CHAINLOG_MIN
118 CHAINLOG_MAX = lib.ZSTD_CHAINLOG_MAX
118 CHAINLOG_MAX = lib.ZSTD_CHAINLOG_MAX
119 HASHLOG_MIN = lib.ZSTD_HASHLOG_MIN
119 HASHLOG_MIN = lib.ZSTD_HASHLOG_MIN
120 HASHLOG_MAX = lib.ZSTD_HASHLOG_MAX
120 HASHLOG_MAX = lib.ZSTD_HASHLOG_MAX
121 HASHLOG3_MAX = lib.ZSTD_HASHLOG3_MAX
121 HASHLOG3_MAX = lib.ZSTD_HASHLOG3_MAX
122 MINMATCH_MIN = lib.ZSTD_MINMATCH_MIN
122 MINMATCH_MIN = lib.ZSTD_MINMATCH_MIN
123 MINMATCH_MAX = lib.ZSTD_MINMATCH_MAX
123 MINMATCH_MAX = lib.ZSTD_MINMATCH_MAX
124 SEARCHLOG_MIN = lib.ZSTD_SEARCHLOG_MIN
124 SEARCHLOG_MIN = lib.ZSTD_SEARCHLOG_MIN
125 SEARCHLOG_MAX = lib.ZSTD_SEARCHLOG_MAX
125 SEARCHLOG_MAX = lib.ZSTD_SEARCHLOG_MAX
126 SEARCHLENGTH_MIN = lib.ZSTD_MINMATCH_MIN
126 SEARCHLENGTH_MIN = lib.ZSTD_MINMATCH_MIN
127 SEARCHLENGTH_MAX = lib.ZSTD_MINMATCH_MAX
127 SEARCHLENGTH_MAX = lib.ZSTD_MINMATCH_MAX
128 TARGETLENGTH_MIN = lib.ZSTD_TARGETLENGTH_MIN
128 TARGETLENGTH_MIN = lib.ZSTD_TARGETLENGTH_MIN
129 TARGETLENGTH_MAX = lib.ZSTD_TARGETLENGTH_MAX
129 TARGETLENGTH_MAX = lib.ZSTD_TARGETLENGTH_MAX
130 LDM_MINMATCH_MIN = lib.ZSTD_LDM_MINMATCH_MIN
130 LDM_MINMATCH_MIN = lib.ZSTD_LDM_MINMATCH_MIN
131 LDM_MINMATCH_MAX = lib.ZSTD_LDM_MINMATCH_MAX
131 LDM_MINMATCH_MAX = lib.ZSTD_LDM_MINMATCH_MAX
132 LDM_BUCKETSIZELOG_MAX = lib.ZSTD_LDM_BUCKETSIZELOG_MAX
132 LDM_BUCKETSIZELOG_MAX = lib.ZSTD_LDM_BUCKETSIZELOG_MAX
133
133
134 STRATEGY_FAST = lib.ZSTD_fast
134 STRATEGY_FAST = lib.ZSTD_fast
135 STRATEGY_DFAST = lib.ZSTD_dfast
135 STRATEGY_DFAST = lib.ZSTD_dfast
136 STRATEGY_GREEDY = lib.ZSTD_greedy
136 STRATEGY_GREEDY = lib.ZSTD_greedy
137 STRATEGY_LAZY = lib.ZSTD_lazy
137 STRATEGY_LAZY = lib.ZSTD_lazy
138 STRATEGY_LAZY2 = lib.ZSTD_lazy2
138 STRATEGY_LAZY2 = lib.ZSTD_lazy2
139 STRATEGY_BTLAZY2 = lib.ZSTD_btlazy2
139 STRATEGY_BTLAZY2 = lib.ZSTD_btlazy2
140 STRATEGY_BTOPT = lib.ZSTD_btopt
140 STRATEGY_BTOPT = lib.ZSTD_btopt
141 STRATEGY_BTULTRA = lib.ZSTD_btultra
141 STRATEGY_BTULTRA = lib.ZSTD_btultra
142 STRATEGY_BTULTRA2 = lib.ZSTD_btultra2
142 STRATEGY_BTULTRA2 = lib.ZSTD_btultra2
143
143
144 DICT_TYPE_AUTO = lib.ZSTD_dct_auto
144 DICT_TYPE_AUTO = lib.ZSTD_dct_auto
145 DICT_TYPE_RAWCONTENT = lib.ZSTD_dct_rawContent
145 DICT_TYPE_RAWCONTENT = lib.ZSTD_dct_rawContent
146 DICT_TYPE_FULLDICT = lib.ZSTD_dct_fullDict
146 DICT_TYPE_FULLDICT = lib.ZSTD_dct_fullDict
147
147
148 FORMAT_ZSTD1 = lib.ZSTD_f_zstd1
148 FORMAT_ZSTD1 = lib.ZSTD_f_zstd1
149 FORMAT_ZSTD1_MAGICLESS = lib.ZSTD_f_zstd1_magicless
149 FORMAT_ZSTD1_MAGICLESS = lib.ZSTD_f_zstd1_magicless
150
150
151 FLUSH_BLOCK = 0
151 FLUSH_BLOCK = 0
152 FLUSH_FRAME = 1
152 FLUSH_FRAME = 1
153
153
154 COMPRESSOBJ_FLUSH_FINISH = 0
154 COMPRESSOBJ_FLUSH_FINISH = 0
155 COMPRESSOBJ_FLUSH_BLOCK = 1
155 COMPRESSOBJ_FLUSH_BLOCK = 1
156
156
157
157
158 def _cpu_count():
158 def _cpu_count():
159 # os.cpu_count() was introducd in Python 3.4.
159 # os.cpu_count() was introducd in Python 3.4.
160 try:
160 try:
161 return os.cpu_count() or 0
161 return os.cpu_count() or 0
162 except AttributeError:
162 except AttributeError:
163 pass
163 pass
164
164
165 # Linux.
165 # Linux.
166 try:
166 try:
167 if sys.version_info[0] == 2:
167 if sys.version_info[0] == 2:
168 return os.sysconf(b'SC_NPROCESSORS_ONLN')
168 return os.sysconf(b'SC_NPROCESSORS_ONLN')
169 else:
169 else:
170 return os.sysconf(u'SC_NPROCESSORS_ONLN')
170 return os.sysconf(u'SC_NPROCESSORS_ONLN')
171 except (AttributeError, ValueError):
171 except (AttributeError, ValueError):
172 pass
172 pass
173
173
174 # TODO implement on other platforms.
174 # TODO implement on other platforms.
175 return 0
175 return 0
176
176
177
177
178 class ZstdError(Exception):
178 class ZstdError(Exception):
179 pass
179 pass
180
180
181
181
182 def _zstd_error(zresult):
182 def _zstd_error(zresult):
183 # Resolves to bytes on Python 2 and 3. We use the string for formatting
183 # Resolves to bytes on Python 2 and 3. We use the string for formatting
184 # into error messages, which will be literal unicode. So convert it to
184 # into error messages, which will be literal unicode. So convert it to
185 # unicode.
185 # unicode.
186 return ffi.string(lib.ZSTD_getErrorName(zresult)).decode('utf-8')
186 return ffi.string(lib.ZSTD_getErrorName(zresult)).decode('utf-8')
187
187
188 def _make_cctx_params(params):
188 def _make_cctx_params(params):
189 res = lib.ZSTD_createCCtxParams()
189 res = lib.ZSTD_createCCtxParams()
190 if res == ffi.NULL:
190 if res == ffi.NULL:
191 raise MemoryError()
191 raise MemoryError()
192
192
193 res = ffi.gc(res, lib.ZSTD_freeCCtxParams)
193 res = ffi.gc(res, lib.ZSTD_freeCCtxParams)
194
194
195 attrs = [
195 attrs = [
196 (lib.ZSTD_c_format, params.format),
196 (lib.ZSTD_c_format, params.format),
197 (lib.ZSTD_c_compressionLevel, params.compression_level),
197 (lib.ZSTD_c_compressionLevel, params.compression_level),
198 (lib.ZSTD_c_windowLog, params.window_log),
198 (lib.ZSTD_c_windowLog, params.window_log),
199 (lib.ZSTD_c_hashLog, params.hash_log),
199 (lib.ZSTD_c_hashLog, params.hash_log),
200 (lib.ZSTD_c_chainLog, params.chain_log),
200 (lib.ZSTD_c_chainLog, params.chain_log),
201 (lib.ZSTD_c_searchLog, params.search_log),
201 (lib.ZSTD_c_searchLog, params.search_log),
202 (lib.ZSTD_c_minMatch, params.min_match),
202 (lib.ZSTD_c_minMatch, params.min_match),
203 (lib.ZSTD_c_targetLength, params.target_length),
203 (lib.ZSTD_c_targetLength, params.target_length),
204 (lib.ZSTD_c_strategy, params.compression_strategy),
204 (lib.ZSTD_c_strategy, params.compression_strategy),
205 (lib.ZSTD_c_contentSizeFlag, params.write_content_size),
205 (lib.ZSTD_c_contentSizeFlag, params.write_content_size),
206 (lib.ZSTD_c_checksumFlag, params.write_checksum),
206 (lib.ZSTD_c_checksumFlag, params.write_checksum),
207 (lib.ZSTD_c_dictIDFlag, params.write_dict_id),
207 (lib.ZSTD_c_dictIDFlag, params.write_dict_id),
208 (lib.ZSTD_c_nbWorkers, params.threads),
208 (lib.ZSTD_c_nbWorkers, params.threads),
209 (lib.ZSTD_c_jobSize, params.job_size),
209 (lib.ZSTD_c_jobSize, params.job_size),
210 (lib.ZSTD_c_overlapLog, params.overlap_log),
210 (lib.ZSTD_c_overlapLog, params.overlap_log),
211 (lib.ZSTD_c_forceMaxWindow, params.force_max_window),
211 (lib.ZSTD_c_forceMaxWindow, params.force_max_window),
212 (lib.ZSTD_c_enableLongDistanceMatching, params.enable_ldm),
212 (lib.ZSTD_c_enableLongDistanceMatching, params.enable_ldm),
213 (lib.ZSTD_c_ldmHashLog, params.ldm_hash_log),
213 (lib.ZSTD_c_ldmHashLog, params.ldm_hash_log),
214 (lib.ZSTD_c_ldmMinMatch, params.ldm_min_match),
214 (lib.ZSTD_c_ldmMinMatch, params.ldm_min_match),
215 (lib.ZSTD_c_ldmBucketSizeLog, params.ldm_bucket_size_log),
215 (lib.ZSTD_c_ldmBucketSizeLog, params.ldm_bucket_size_log),
216 (lib.ZSTD_c_ldmHashRateLog, params.ldm_hash_rate_log),
216 (lib.ZSTD_c_ldmHashRateLog, params.ldm_hash_rate_log),
217 ]
217 ]
218
218
219 for param, value in attrs:
219 for param, value in attrs:
220 _set_compression_parameter(res, param, value)
220 _set_compression_parameter(res, param, value)
221
221
222 return res
222 return res
223
223
224 class ZstdCompressionParameters(object):
224 class ZstdCompressionParameters(object):
225 @staticmethod
225 @staticmethod
226 def from_level(level, source_size=0, dict_size=0, **kwargs):
226 def from_level(level, source_size=0, dict_size=0, **kwargs):
227 params = lib.ZSTD_getCParams(level, source_size, dict_size)
227 params = lib.ZSTD_getCParams(level, source_size, dict_size)
228
228
229 args = {
229 args = {
230 'window_log': 'windowLog',
230 'window_log': 'windowLog',
231 'chain_log': 'chainLog',
231 'chain_log': 'chainLog',
232 'hash_log': 'hashLog',
232 'hash_log': 'hashLog',
233 'search_log': 'searchLog',
233 'search_log': 'searchLog',
234 'min_match': 'minMatch',
234 'min_match': 'minMatch',
235 'target_length': 'targetLength',
235 'target_length': 'targetLength',
236 'compression_strategy': 'strategy',
236 'compression_strategy': 'strategy',
237 }
237 }
238
238
239 for arg, attr in args.items():
239 for arg, attr in args.items():
240 if arg not in kwargs:
240 if arg not in kwargs:
241 kwargs[arg] = getattr(params, attr)
241 kwargs[arg] = getattr(params, attr)
242
242
243 return ZstdCompressionParameters(**kwargs)
243 return ZstdCompressionParameters(**kwargs)
244
244
245 def __init__(self, format=0, compression_level=0, window_log=0, hash_log=0,
245 def __init__(self, format=0, compression_level=0, window_log=0, hash_log=0,
246 chain_log=0, search_log=0, min_match=0, target_length=0,
246 chain_log=0, search_log=0, min_match=0, target_length=0,
247 strategy=-1, compression_strategy=-1,
247 strategy=-1, compression_strategy=-1,
248 write_content_size=1, write_checksum=0,
248 write_content_size=1, write_checksum=0,
249 write_dict_id=0, job_size=0, overlap_log=-1,
249 write_dict_id=0, job_size=0, overlap_log=-1,
250 overlap_size_log=-1, force_max_window=0, enable_ldm=0,
250 overlap_size_log=-1, force_max_window=0, enable_ldm=0,
251 ldm_hash_log=0, ldm_min_match=0, ldm_bucket_size_log=0,
251 ldm_hash_log=0, ldm_min_match=0, ldm_bucket_size_log=0,
252 ldm_hash_rate_log=-1, ldm_hash_every_log=-1, threads=0):
252 ldm_hash_rate_log=-1, ldm_hash_every_log=-1, threads=0):
253
253
254 params = lib.ZSTD_createCCtxParams()
254 params = lib.ZSTD_createCCtxParams()
255 if params == ffi.NULL:
255 if params == ffi.NULL:
256 raise MemoryError()
256 raise MemoryError()
257
257
258 params = ffi.gc(params, lib.ZSTD_freeCCtxParams)
258 params = ffi.gc(params, lib.ZSTD_freeCCtxParams)
259
259
260 self._params = params
260 self._params = params
261
261
262 if threads < 0:
262 if threads < 0:
263 threads = _cpu_count()
263 threads = _cpu_count()
264
264
265 # We need to set ZSTD_c_nbWorkers before ZSTD_c_jobSize and ZSTD_c_overlapLog
265 # We need to set ZSTD_c_nbWorkers before ZSTD_c_jobSize and ZSTD_c_overlapLog
266 # because setting ZSTD_c_nbWorkers resets the other parameters.
266 # because setting ZSTD_c_nbWorkers resets the other parameters.
267 _set_compression_parameter(params, lib.ZSTD_c_nbWorkers, threads)
267 _set_compression_parameter(params, lib.ZSTD_c_nbWorkers, threads)
268
268
269 _set_compression_parameter(params, lib.ZSTD_c_format, format)
269 _set_compression_parameter(params, lib.ZSTD_c_format, format)
270 _set_compression_parameter(params, lib.ZSTD_c_compressionLevel, compression_level)
270 _set_compression_parameter(params, lib.ZSTD_c_compressionLevel, compression_level)
271 _set_compression_parameter(params, lib.ZSTD_c_windowLog, window_log)
271 _set_compression_parameter(params, lib.ZSTD_c_windowLog, window_log)
272 _set_compression_parameter(params, lib.ZSTD_c_hashLog, hash_log)
272 _set_compression_parameter(params, lib.ZSTD_c_hashLog, hash_log)
273 _set_compression_parameter(params, lib.ZSTD_c_chainLog, chain_log)
273 _set_compression_parameter(params, lib.ZSTD_c_chainLog, chain_log)
274 _set_compression_parameter(params, lib.ZSTD_c_searchLog, search_log)
274 _set_compression_parameter(params, lib.ZSTD_c_searchLog, search_log)
275 _set_compression_parameter(params, lib.ZSTD_c_minMatch, min_match)
275 _set_compression_parameter(params, lib.ZSTD_c_minMatch, min_match)
276 _set_compression_parameter(params, lib.ZSTD_c_targetLength, target_length)
276 _set_compression_parameter(params, lib.ZSTD_c_targetLength, target_length)
277
277
278 if strategy != -1 and compression_strategy != -1:
278 if strategy != -1 and compression_strategy != -1:
279 raise ValueError('cannot specify both compression_strategy and strategy')
279 raise ValueError('cannot specify both compression_strategy and strategy')
280
280
281 if compression_strategy != -1:
281 if compression_strategy != -1:
282 strategy = compression_strategy
282 strategy = compression_strategy
283 elif strategy == -1:
283 elif strategy == -1:
284 strategy = 0
284 strategy = 0
285
285
286 _set_compression_parameter(params, lib.ZSTD_c_strategy, strategy)
286 _set_compression_parameter(params, lib.ZSTD_c_strategy, strategy)
287 _set_compression_parameter(params, lib.ZSTD_c_contentSizeFlag, write_content_size)
287 _set_compression_parameter(params, lib.ZSTD_c_contentSizeFlag, write_content_size)
288 _set_compression_parameter(params, lib.ZSTD_c_checksumFlag, write_checksum)
288 _set_compression_parameter(params, lib.ZSTD_c_checksumFlag, write_checksum)
289 _set_compression_parameter(params, lib.ZSTD_c_dictIDFlag, write_dict_id)
289 _set_compression_parameter(params, lib.ZSTD_c_dictIDFlag, write_dict_id)
290 _set_compression_parameter(params, lib.ZSTD_c_jobSize, job_size)
290 _set_compression_parameter(params, lib.ZSTD_c_jobSize, job_size)
291
291
292 if overlap_log != -1 and overlap_size_log != -1:
292 if overlap_log != -1 and overlap_size_log != -1:
293 raise ValueError('cannot specify both overlap_log and overlap_size_log')
293 raise ValueError('cannot specify both overlap_log and overlap_size_log')
294
294
295 if overlap_size_log != -1:
295 if overlap_size_log != -1:
296 overlap_log = overlap_size_log
296 overlap_log = overlap_size_log
297 elif overlap_log == -1:
297 elif overlap_log == -1:
298 overlap_log = 0
298 overlap_log = 0
299
299
300 _set_compression_parameter(params, lib.ZSTD_c_overlapLog, overlap_log)
300 _set_compression_parameter(params, lib.ZSTD_c_overlapLog, overlap_log)
301 _set_compression_parameter(params, lib.ZSTD_c_forceMaxWindow, force_max_window)
301 _set_compression_parameter(params, lib.ZSTD_c_forceMaxWindow, force_max_window)
302 _set_compression_parameter(params, lib.ZSTD_c_enableLongDistanceMatching, enable_ldm)
302 _set_compression_parameter(params, lib.ZSTD_c_enableLongDistanceMatching, enable_ldm)
303 _set_compression_parameter(params, lib.ZSTD_c_ldmHashLog, ldm_hash_log)
303 _set_compression_parameter(params, lib.ZSTD_c_ldmHashLog, ldm_hash_log)
304 _set_compression_parameter(params, lib.ZSTD_c_ldmMinMatch, ldm_min_match)
304 _set_compression_parameter(params, lib.ZSTD_c_ldmMinMatch, ldm_min_match)
305 _set_compression_parameter(params, lib.ZSTD_c_ldmBucketSizeLog, ldm_bucket_size_log)
305 _set_compression_parameter(params, lib.ZSTD_c_ldmBucketSizeLog, ldm_bucket_size_log)
306
306
307 if ldm_hash_rate_log != -1 and ldm_hash_every_log != -1:
307 if ldm_hash_rate_log != -1 and ldm_hash_every_log != -1:
308 raise ValueError('cannot specify both ldm_hash_rate_log and ldm_hash_every_log')
308 raise ValueError('cannot specify both ldm_hash_rate_log and ldm_hash_every_log')
309
309
310 if ldm_hash_every_log != -1:
310 if ldm_hash_every_log != -1:
311 ldm_hash_rate_log = ldm_hash_every_log
311 ldm_hash_rate_log = ldm_hash_every_log
312 elif ldm_hash_rate_log == -1:
312 elif ldm_hash_rate_log == -1:
313 ldm_hash_rate_log = 0
313 ldm_hash_rate_log = 0
314
314
315 _set_compression_parameter(params, lib.ZSTD_c_ldmHashRateLog, ldm_hash_rate_log)
315 _set_compression_parameter(params, lib.ZSTD_c_ldmHashRateLog, ldm_hash_rate_log)
316
316
317 @property
317 @property
318 def format(self):
318 def format(self):
319 return _get_compression_parameter(self._params, lib.ZSTD_c_format)
319 return _get_compression_parameter(self._params, lib.ZSTD_c_format)
320
320
321 @property
321 @property
322 def compression_level(self):
322 def compression_level(self):
323 return _get_compression_parameter(self._params, lib.ZSTD_c_compressionLevel)
323 return _get_compression_parameter(self._params, lib.ZSTD_c_compressionLevel)
324
324
325 @property
325 @property
326 def window_log(self):
326 def window_log(self):
327 return _get_compression_parameter(self._params, lib.ZSTD_c_windowLog)
327 return _get_compression_parameter(self._params, lib.ZSTD_c_windowLog)
328
328
329 @property
329 @property
330 def hash_log(self):
330 def hash_log(self):
331 return _get_compression_parameter(self._params, lib.ZSTD_c_hashLog)
331 return _get_compression_parameter(self._params, lib.ZSTD_c_hashLog)
332
332
333 @property
333 @property
334 def chain_log(self):
334 def chain_log(self):
335 return _get_compression_parameter(self._params, lib.ZSTD_c_chainLog)
335 return _get_compression_parameter(self._params, lib.ZSTD_c_chainLog)
336
336
337 @property
337 @property
338 def search_log(self):
338 def search_log(self):
339 return _get_compression_parameter(self._params, lib.ZSTD_c_searchLog)
339 return _get_compression_parameter(self._params, lib.ZSTD_c_searchLog)
340
340
341 @property
341 @property
342 def min_match(self):
342 def min_match(self):
343 return _get_compression_parameter(self._params, lib.ZSTD_c_minMatch)
343 return _get_compression_parameter(self._params, lib.ZSTD_c_minMatch)
344
344
345 @property
345 @property
346 def target_length(self):
346 def target_length(self):
347 return _get_compression_parameter(self._params, lib.ZSTD_c_targetLength)
347 return _get_compression_parameter(self._params, lib.ZSTD_c_targetLength)
348
348
349 @property
349 @property
350 def compression_strategy(self):
350 def compression_strategy(self):
351 return _get_compression_parameter(self._params, lib.ZSTD_c_strategy)
351 return _get_compression_parameter(self._params, lib.ZSTD_c_strategy)
352
352
353 @property
353 @property
354 def write_content_size(self):
354 def write_content_size(self):
355 return _get_compression_parameter(self._params, lib.ZSTD_c_contentSizeFlag)
355 return _get_compression_parameter(self._params, lib.ZSTD_c_contentSizeFlag)
356
356
357 @property
357 @property
358 def write_checksum(self):
358 def write_checksum(self):
359 return _get_compression_parameter(self._params, lib.ZSTD_c_checksumFlag)
359 return _get_compression_parameter(self._params, lib.ZSTD_c_checksumFlag)
360
360
361 @property
361 @property
362 def write_dict_id(self):
362 def write_dict_id(self):
363 return _get_compression_parameter(self._params, lib.ZSTD_c_dictIDFlag)
363 return _get_compression_parameter(self._params, lib.ZSTD_c_dictIDFlag)
364
364
365 @property
365 @property
366 def job_size(self):
366 def job_size(self):
367 return _get_compression_parameter(self._params, lib.ZSTD_c_jobSize)
367 return _get_compression_parameter(self._params, lib.ZSTD_c_jobSize)
368
368
369 @property
369 @property
370 def overlap_log(self):
370 def overlap_log(self):
371 return _get_compression_parameter(self._params, lib.ZSTD_c_overlapLog)
371 return _get_compression_parameter(self._params, lib.ZSTD_c_overlapLog)
372
372
373 @property
373 @property
374 def overlap_size_log(self):
374 def overlap_size_log(self):
375 return self.overlap_log
375 return self.overlap_log
376
376
377 @property
377 @property
378 def force_max_window(self):
378 def force_max_window(self):
379 return _get_compression_parameter(self._params, lib.ZSTD_c_forceMaxWindow)
379 return _get_compression_parameter(self._params, lib.ZSTD_c_forceMaxWindow)
380
380
381 @property
381 @property
382 def enable_ldm(self):
382 def enable_ldm(self):
383 return _get_compression_parameter(self._params, lib.ZSTD_c_enableLongDistanceMatching)
383 return _get_compression_parameter(self._params, lib.ZSTD_c_enableLongDistanceMatching)
384
384
385 @property
385 @property
386 def ldm_hash_log(self):
386 def ldm_hash_log(self):
387 return _get_compression_parameter(self._params, lib.ZSTD_c_ldmHashLog)
387 return _get_compression_parameter(self._params, lib.ZSTD_c_ldmHashLog)
388
388
389 @property
389 @property
390 def ldm_min_match(self):
390 def ldm_min_match(self):
391 return _get_compression_parameter(self._params, lib.ZSTD_c_ldmMinMatch)
391 return _get_compression_parameter(self._params, lib.ZSTD_c_ldmMinMatch)
392
392
393 @property
393 @property
394 def ldm_bucket_size_log(self):
394 def ldm_bucket_size_log(self):
395 return _get_compression_parameter(self._params, lib.ZSTD_c_ldmBucketSizeLog)
395 return _get_compression_parameter(self._params, lib.ZSTD_c_ldmBucketSizeLog)
396
396
397 @property
397 @property
398 def ldm_hash_rate_log(self):
398 def ldm_hash_rate_log(self):
399 return _get_compression_parameter(self._params, lib.ZSTD_c_ldmHashRateLog)
399 return _get_compression_parameter(self._params, lib.ZSTD_c_ldmHashRateLog)
400
400
401 @property
401 @property
402 def ldm_hash_every_log(self):
402 def ldm_hash_every_log(self):
403 return self.ldm_hash_rate_log
403 return self.ldm_hash_rate_log
404
404
405 @property
405 @property
406 def threads(self):
406 def threads(self):
407 return _get_compression_parameter(self._params, lib.ZSTD_c_nbWorkers)
407 return _get_compression_parameter(self._params, lib.ZSTD_c_nbWorkers)
408
408
409 def estimated_compression_context_size(self):
409 def estimated_compression_context_size(self):
410 return lib.ZSTD_estimateCCtxSize_usingCCtxParams(self._params)
410 return lib.ZSTD_estimateCCtxSize_usingCCtxParams(self._params)
411
411
412 CompressionParameters = ZstdCompressionParameters
412 CompressionParameters = ZstdCompressionParameters
413
413
414 def estimate_decompression_context_size():
414 def estimate_decompression_context_size():
415 return lib.ZSTD_estimateDCtxSize()
415 return lib.ZSTD_estimateDCtxSize()
416
416
417
417
418 def _set_compression_parameter(params, param, value):
418 def _set_compression_parameter(params, param, value):
419 zresult = lib.ZSTD_CCtxParam_setParameter(params, param, value)
419 zresult = lib.ZSTD_CCtxParams_setParameter(params, param, value)
420 if lib.ZSTD_isError(zresult):
420 if lib.ZSTD_isError(zresult):
421 raise ZstdError('unable to set compression context parameter: %s' %
421 raise ZstdError('unable to set compression context parameter: %s' %
422 _zstd_error(zresult))
422 _zstd_error(zresult))
423
423
424
424
425 def _get_compression_parameter(params, param):
425 def _get_compression_parameter(params, param):
426 result = ffi.new('int *')
426 result = ffi.new('int *')
427
427
428 zresult = lib.ZSTD_CCtxParam_getParameter(params, param, result)
428 zresult = lib.ZSTD_CCtxParams_getParameter(params, param, result)
429 if lib.ZSTD_isError(zresult):
429 if lib.ZSTD_isError(zresult):
430 raise ZstdError('unable to get compression context parameter: %s' %
430 raise ZstdError('unable to get compression context parameter: %s' %
431 _zstd_error(zresult))
431 _zstd_error(zresult))
432
432
433 return result[0]
433 return result[0]
434
434
435
435
436 class ZstdCompressionWriter(object):
436 class ZstdCompressionWriter(object):
437 def __init__(self, compressor, writer, source_size, write_size,
437 def __init__(self, compressor, writer, source_size, write_size,
438 write_return_read):
438 write_return_read):
439 self._compressor = compressor
439 self._compressor = compressor
440 self._writer = writer
440 self._writer = writer
441 self._write_size = write_size
441 self._write_size = write_size
442 self._write_return_read = bool(write_return_read)
442 self._write_return_read = bool(write_return_read)
443 self._entered = False
443 self._entered = False
444 self._closed = False
444 self._closed = False
445 self._bytes_compressed = 0
445 self._bytes_compressed = 0
446
446
447 self._dst_buffer = ffi.new('char[]', write_size)
447 self._dst_buffer = ffi.new('char[]', write_size)
448 self._out_buffer = ffi.new('ZSTD_outBuffer *')
448 self._out_buffer = ffi.new('ZSTD_outBuffer *')
449 self._out_buffer.dst = self._dst_buffer
449 self._out_buffer.dst = self._dst_buffer
450 self._out_buffer.size = len(self._dst_buffer)
450 self._out_buffer.size = len(self._dst_buffer)
451 self._out_buffer.pos = 0
451 self._out_buffer.pos = 0
452
452
453 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(compressor._cctx,
453 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(compressor._cctx,
454 source_size)
454 source_size)
455 if lib.ZSTD_isError(zresult):
455 if lib.ZSTD_isError(zresult):
456 raise ZstdError('error setting source size: %s' %
456 raise ZstdError('error setting source size: %s' %
457 _zstd_error(zresult))
457 _zstd_error(zresult))
458
458
459 def __enter__(self):
459 def __enter__(self):
460 if self._closed:
460 if self._closed:
461 raise ValueError('stream is closed')
461 raise ValueError('stream is closed')
462
462
463 if self._entered:
463 if self._entered:
464 raise ZstdError('cannot __enter__ multiple times')
464 raise ZstdError('cannot __enter__ multiple times')
465
465
466 self._entered = True
466 self._entered = True
467 return self
467 return self
468
468
469 def __exit__(self, exc_type, exc_value, exc_tb):
469 def __exit__(self, exc_type, exc_value, exc_tb):
470 self._entered = False
470 self._entered = False
471
471
472 if not exc_type and not exc_value and not exc_tb:
472 if not exc_type and not exc_value and not exc_tb:
473 self.close()
473 self.close()
474
474
475 self._compressor = None
475 self._compressor = None
476
476
477 return False
477 return False
478
478
479 def memory_size(self):
479 def memory_size(self):
480 return lib.ZSTD_sizeof_CCtx(self._compressor._cctx)
480 return lib.ZSTD_sizeof_CCtx(self._compressor._cctx)
481
481
482 def fileno(self):
482 def fileno(self):
483 f = getattr(self._writer, 'fileno', None)
483 f = getattr(self._writer, 'fileno', None)
484 if f:
484 if f:
485 return f()
485 return f()
486 else:
486 else:
487 raise OSError('fileno not available on underlying writer')
487 raise OSError('fileno not available on underlying writer')
488
488
489 def close(self):
489 def close(self):
490 if self._closed:
490 if self._closed:
491 return
491 return
492
492
493 try:
493 try:
494 self.flush(FLUSH_FRAME)
494 self.flush(FLUSH_FRAME)
495 finally:
495 finally:
496 self._closed = True
496 self._closed = True
497
497
498 # Call close() on underlying stream as well.
498 # Call close() on underlying stream as well.
499 f = getattr(self._writer, 'close', None)
499 f = getattr(self._writer, 'close', None)
500 if f:
500 if f:
501 f()
501 f()
502
502
503 @property
503 @property
504 def closed(self):
504 def closed(self):
505 return self._closed
505 return self._closed
506
506
507 def isatty(self):
507 def isatty(self):
508 return False
508 return False
509
509
510 def readable(self):
510 def readable(self):
511 return False
511 return False
512
512
513 def readline(self, size=-1):
513 def readline(self, size=-1):
514 raise io.UnsupportedOperation()
514 raise io.UnsupportedOperation()
515
515
516 def readlines(self, hint=-1):
516 def readlines(self, hint=-1):
517 raise io.UnsupportedOperation()
517 raise io.UnsupportedOperation()
518
518
519 def seek(self, offset, whence=None):
519 def seek(self, offset, whence=None):
520 raise io.UnsupportedOperation()
520 raise io.UnsupportedOperation()
521
521
522 def seekable(self):
522 def seekable(self):
523 return False
523 return False
524
524
525 def truncate(self, size=None):
525 def truncate(self, size=None):
526 raise io.UnsupportedOperation()
526 raise io.UnsupportedOperation()
527
527
528 def writable(self):
528 def writable(self):
529 return True
529 return True
530
530
531 def writelines(self, lines):
531 def writelines(self, lines):
532 raise NotImplementedError('writelines() is not yet implemented')
532 raise NotImplementedError('writelines() is not yet implemented')
533
533
534 def read(self, size=-1):
534 def read(self, size=-1):
535 raise io.UnsupportedOperation()
535 raise io.UnsupportedOperation()
536
536
537 def readall(self):
537 def readall(self):
538 raise io.UnsupportedOperation()
538 raise io.UnsupportedOperation()
539
539
540 def readinto(self, b):
540 def readinto(self, b):
541 raise io.UnsupportedOperation()
541 raise io.UnsupportedOperation()
542
542
543 def write(self, data):
543 def write(self, data):
544 if self._closed:
544 if self._closed:
545 raise ValueError('stream is closed')
545 raise ValueError('stream is closed')
546
546
547 total_write = 0
547 total_write = 0
548
548
549 data_buffer = ffi.from_buffer(data)
549 data_buffer = ffi.from_buffer(data)
550
550
551 in_buffer = ffi.new('ZSTD_inBuffer *')
551 in_buffer = ffi.new('ZSTD_inBuffer *')
552 in_buffer.src = data_buffer
552 in_buffer.src = data_buffer
553 in_buffer.size = len(data_buffer)
553 in_buffer.size = len(data_buffer)
554 in_buffer.pos = 0
554 in_buffer.pos = 0
555
555
556 out_buffer = self._out_buffer
556 out_buffer = self._out_buffer
557 out_buffer.pos = 0
557 out_buffer.pos = 0
558
558
559 while in_buffer.pos < in_buffer.size:
559 while in_buffer.pos < in_buffer.size:
560 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
560 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
561 out_buffer, in_buffer,
561 out_buffer, in_buffer,
562 lib.ZSTD_e_continue)
562 lib.ZSTD_e_continue)
563 if lib.ZSTD_isError(zresult):
563 if lib.ZSTD_isError(zresult):
564 raise ZstdError('zstd compress error: %s' %
564 raise ZstdError('zstd compress error: %s' %
565 _zstd_error(zresult))
565 _zstd_error(zresult))
566
566
567 if out_buffer.pos:
567 if out_buffer.pos:
568 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
568 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
569 total_write += out_buffer.pos
569 total_write += out_buffer.pos
570 self._bytes_compressed += out_buffer.pos
570 self._bytes_compressed += out_buffer.pos
571 out_buffer.pos = 0
571 out_buffer.pos = 0
572
572
573 if self._write_return_read:
573 if self._write_return_read:
574 return in_buffer.pos
574 return in_buffer.pos
575 else:
575 else:
576 return total_write
576 return total_write
577
577
578 def flush(self, flush_mode=FLUSH_BLOCK):
578 def flush(self, flush_mode=FLUSH_BLOCK):
579 if flush_mode == FLUSH_BLOCK:
579 if flush_mode == FLUSH_BLOCK:
580 flush = lib.ZSTD_e_flush
580 flush = lib.ZSTD_e_flush
581 elif flush_mode == FLUSH_FRAME:
581 elif flush_mode == FLUSH_FRAME:
582 flush = lib.ZSTD_e_end
582 flush = lib.ZSTD_e_end
583 else:
583 else:
584 raise ValueError('unknown flush_mode: %r' % flush_mode)
584 raise ValueError('unknown flush_mode: %r' % flush_mode)
585
585
586 if self._closed:
586 if self._closed:
587 raise ValueError('stream is closed')
587 raise ValueError('stream is closed')
588
588
589 total_write = 0
589 total_write = 0
590
590
591 out_buffer = self._out_buffer
591 out_buffer = self._out_buffer
592 out_buffer.pos = 0
592 out_buffer.pos = 0
593
593
594 in_buffer = ffi.new('ZSTD_inBuffer *')
594 in_buffer = ffi.new('ZSTD_inBuffer *')
595 in_buffer.src = ffi.NULL
595 in_buffer.src = ffi.NULL
596 in_buffer.size = 0
596 in_buffer.size = 0
597 in_buffer.pos = 0
597 in_buffer.pos = 0
598
598
599 while True:
599 while True:
600 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
600 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
601 out_buffer, in_buffer,
601 out_buffer, in_buffer,
602 flush)
602 flush)
603 if lib.ZSTD_isError(zresult):
603 if lib.ZSTD_isError(zresult):
604 raise ZstdError('zstd compress error: %s' %
604 raise ZstdError('zstd compress error: %s' %
605 _zstd_error(zresult))
605 _zstd_error(zresult))
606
606
607 if out_buffer.pos:
607 if out_buffer.pos:
608 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
608 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
609 total_write += out_buffer.pos
609 total_write += out_buffer.pos
610 self._bytes_compressed += out_buffer.pos
610 self._bytes_compressed += out_buffer.pos
611 out_buffer.pos = 0
611 out_buffer.pos = 0
612
612
613 if not zresult:
613 if not zresult:
614 break
614 break
615
615
616 return total_write
616 return total_write
617
617
618 def tell(self):
618 def tell(self):
619 return self._bytes_compressed
619 return self._bytes_compressed
620
620
621
621
622 class ZstdCompressionObj(object):
622 class ZstdCompressionObj(object):
623 def compress(self, data):
623 def compress(self, data):
624 if self._finished:
624 if self._finished:
625 raise ZstdError('cannot call compress() after compressor finished')
625 raise ZstdError('cannot call compress() after compressor finished')
626
626
627 data_buffer = ffi.from_buffer(data)
627 data_buffer = ffi.from_buffer(data)
628 source = ffi.new('ZSTD_inBuffer *')
628 source = ffi.new('ZSTD_inBuffer *')
629 source.src = data_buffer
629 source.src = data_buffer
630 source.size = len(data_buffer)
630 source.size = len(data_buffer)
631 source.pos = 0
631 source.pos = 0
632
632
633 chunks = []
633 chunks = []
634
634
635 while source.pos < len(data):
635 while source.pos < len(data):
636 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
636 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
637 self._out,
637 self._out,
638 source,
638 source,
639 lib.ZSTD_e_continue)
639 lib.ZSTD_e_continue)
640 if lib.ZSTD_isError(zresult):
640 if lib.ZSTD_isError(zresult):
641 raise ZstdError('zstd compress error: %s' %
641 raise ZstdError('zstd compress error: %s' %
642 _zstd_error(zresult))
642 _zstd_error(zresult))
643
643
644 if self._out.pos:
644 if self._out.pos:
645 chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:])
645 chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:])
646 self._out.pos = 0
646 self._out.pos = 0
647
647
648 return b''.join(chunks)
648 return b''.join(chunks)
649
649
650 def flush(self, flush_mode=COMPRESSOBJ_FLUSH_FINISH):
650 def flush(self, flush_mode=COMPRESSOBJ_FLUSH_FINISH):
651 if flush_mode not in (COMPRESSOBJ_FLUSH_FINISH, COMPRESSOBJ_FLUSH_BLOCK):
651 if flush_mode not in (COMPRESSOBJ_FLUSH_FINISH, COMPRESSOBJ_FLUSH_BLOCK):
652 raise ValueError('flush mode not recognized')
652 raise ValueError('flush mode not recognized')
653
653
654 if self._finished:
654 if self._finished:
655 raise ZstdError('compressor object already finished')
655 raise ZstdError('compressor object already finished')
656
656
657 if flush_mode == COMPRESSOBJ_FLUSH_BLOCK:
657 if flush_mode == COMPRESSOBJ_FLUSH_BLOCK:
658 z_flush_mode = lib.ZSTD_e_flush
658 z_flush_mode = lib.ZSTD_e_flush
659 elif flush_mode == COMPRESSOBJ_FLUSH_FINISH:
659 elif flush_mode == COMPRESSOBJ_FLUSH_FINISH:
660 z_flush_mode = lib.ZSTD_e_end
660 z_flush_mode = lib.ZSTD_e_end
661 self._finished = True
661 self._finished = True
662 else:
662 else:
663 raise ZstdError('unhandled flush mode')
663 raise ZstdError('unhandled flush mode')
664
664
665 assert self._out.pos == 0
665 assert self._out.pos == 0
666
666
667 in_buffer = ffi.new('ZSTD_inBuffer *')
667 in_buffer = ffi.new('ZSTD_inBuffer *')
668 in_buffer.src = ffi.NULL
668 in_buffer.src = ffi.NULL
669 in_buffer.size = 0
669 in_buffer.size = 0
670 in_buffer.pos = 0
670 in_buffer.pos = 0
671
671
672 chunks = []
672 chunks = []
673
673
674 while True:
674 while True:
675 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
675 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
676 self._out,
676 self._out,
677 in_buffer,
677 in_buffer,
678 z_flush_mode)
678 z_flush_mode)
679 if lib.ZSTD_isError(zresult):
679 if lib.ZSTD_isError(zresult):
680 raise ZstdError('error ending compression stream: %s' %
680 raise ZstdError('error ending compression stream: %s' %
681 _zstd_error(zresult))
681 _zstd_error(zresult))
682
682
683 if self._out.pos:
683 if self._out.pos:
684 chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:])
684 chunks.append(ffi.buffer(self._out.dst, self._out.pos)[:])
685 self._out.pos = 0
685 self._out.pos = 0
686
686
687 if not zresult:
687 if not zresult:
688 break
688 break
689
689
690 return b''.join(chunks)
690 return b''.join(chunks)
691
691
692
692
693 class ZstdCompressionChunker(object):
693 class ZstdCompressionChunker(object):
694 def __init__(self, compressor, chunk_size):
694 def __init__(self, compressor, chunk_size):
695 self._compressor = compressor
695 self._compressor = compressor
696 self._out = ffi.new('ZSTD_outBuffer *')
696 self._out = ffi.new('ZSTD_outBuffer *')
697 self._dst_buffer = ffi.new('char[]', chunk_size)
697 self._dst_buffer = ffi.new('char[]', chunk_size)
698 self._out.dst = self._dst_buffer
698 self._out.dst = self._dst_buffer
699 self._out.size = chunk_size
699 self._out.size = chunk_size
700 self._out.pos = 0
700 self._out.pos = 0
701
701
702 self._in = ffi.new('ZSTD_inBuffer *')
702 self._in = ffi.new('ZSTD_inBuffer *')
703 self._in.src = ffi.NULL
703 self._in.src = ffi.NULL
704 self._in.size = 0
704 self._in.size = 0
705 self._in.pos = 0
705 self._in.pos = 0
706 self._finished = False
706 self._finished = False
707
707
708 def compress(self, data):
708 def compress(self, data):
709 if self._finished:
709 if self._finished:
710 raise ZstdError('cannot call compress() after compression finished')
710 raise ZstdError('cannot call compress() after compression finished')
711
711
712 if self._in.src != ffi.NULL:
712 if self._in.src != ffi.NULL:
713 raise ZstdError('cannot perform operation before consuming output '
713 raise ZstdError('cannot perform operation before consuming output '
714 'from previous operation')
714 'from previous operation')
715
715
716 data_buffer = ffi.from_buffer(data)
716 data_buffer = ffi.from_buffer(data)
717
717
718 if not len(data_buffer):
718 if not len(data_buffer):
719 return
719 return
720
720
721 self._in.src = data_buffer
721 self._in.src = data_buffer
722 self._in.size = len(data_buffer)
722 self._in.size = len(data_buffer)
723 self._in.pos = 0
723 self._in.pos = 0
724
724
725 while self._in.pos < self._in.size:
725 while self._in.pos < self._in.size:
726 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
726 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
727 self._out,
727 self._out,
728 self._in,
728 self._in,
729 lib.ZSTD_e_continue)
729 lib.ZSTD_e_continue)
730
730
731 if self._in.pos == self._in.size:
731 if self._in.pos == self._in.size:
732 self._in.src = ffi.NULL
732 self._in.src = ffi.NULL
733 self._in.size = 0
733 self._in.size = 0
734 self._in.pos = 0
734 self._in.pos = 0
735
735
736 if lib.ZSTD_isError(zresult):
736 if lib.ZSTD_isError(zresult):
737 raise ZstdError('zstd compress error: %s' %
737 raise ZstdError('zstd compress error: %s' %
738 _zstd_error(zresult))
738 _zstd_error(zresult))
739
739
740 if self._out.pos == self._out.size:
740 if self._out.pos == self._out.size:
741 yield ffi.buffer(self._out.dst, self._out.pos)[:]
741 yield ffi.buffer(self._out.dst, self._out.pos)[:]
742 self._out.pos = 0
742 self._out.pos = 0
743
743
744 def flush(self):
744 def flush(self):
745 if self._finished:
745 if self._finished:
746 raise ZstdError('cannot call flush() after compression finished')
746 raise ZstdError('cannot call flush() after compression finished')
747
747
748 if self._in.src != ffi.NULL:
748 if self._in.src != ffi.NULL:
749 raise ZstdError('cannot call flush() before consuming output from '
749 raise ZstdError('cannot call flush() before consuming output from '
750 'previous operation')
750 'previous operation')
751
751
752 while True:
752 while True:
753 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
753 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
754 self._out, self._in,
754 self._out, self._in,
755 lib.ZSTD_e_flush)
755 lib.ZSTD_e_flush)
756 if lib.ZSTD_isError(zresult):
756 if lib.ZSTD_isError(zresult):
757 raise ZstdError('zstd compress error: %s' % _zstd_error(zresult))
757 raise ZstdError('zstd compress error: %s' % _zstd_error(zresult))
758
758
759 if self._out.pos:
759 if self._out.pos:
760 yield ffi.buffer(self._out.dst, self._out.pos)[:]
760 yield ffi.buffer(self._out.dst, self._out.pos)[:]
761 self._out.pos = 0
761 self._out.pos = 0
762
762
763 if not zresult:
763 if not zresult:
764 return
764 return
765
765
766 def finish(self):
766 def finish(self):
767 if self._finished:
767 if self._finished:
768 raise ZstdError('cannot call finish() after compression finished')
768 raise ZstdError('cannot call finish() after compression finished')
769
769
770 if self._in.src != ffi.NULL:
770 if self._in.src != ffi.NULL:
771 raise ZstdError('cannot call finish() before consuming output from '
771 raise ZstdError('cannot call finish() before consuming output from '
772 'previous operation')
772 'previous operation')
773
773
774 while True:
774 while True:
775 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
775 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
776 self._out, self._in,
776 self._out, self._in,
777 lib.ZSTD_e_end)
777 lib.ZSTD_e_end)
778 if lib.ZSTD_isError(zresult):
778 if lib.ZSTD_isError(zresult):
779 raise ZstdError('zstd compress error: %s' % _zstd_error(zresult))
779 raise ZstdError('zstd compress error: %s' % _zstd_error(zresult))
780
780
781 if self._out.pos:
781 if self._out.pos:
782 yield ffi.buffer(self._out.dst, self._out.pos)[:]
782 yield ffi.buffer(self._out.dst, self._out.pos)[:]
783 self._out.pos = 0
783 self._out.pos = 0
784
784
785 if not zresult:
785 if not zresult:
786 self._finished = True
786 self._finished = True
787 return
787 return
788
788
789
789
790 class ZstdCompressionReader(object):
790 class ZstdCompressionReader(object):
791 def __init__(self, compressor, source, read_size):
791 def __init__(self, compressor, source, read_size):
792 self._compressor = compressor
792 self._compressor = compressor
793 self._source = source
793 self._source = source
794 self._read_size = read_size
794 self._read_size = read_size
795 self._entered = False
795 self._entered = False
796 self._closed = False
796 self._closed = False
797 self._bytes_compressed = 0
797 self._bytes_compressed = 0
798 self._finished_input = False
798 self._finished_input = False
799 self._finished_output = False
799 self._finished_output = False
800
800
801 self._in_buffer = ffi.new('ZSTD_inBuffer *')
801 self._in_buffer = ffi.new('ZSTD_inBuffer *')
802 # Holds a ref so backing bytes in self._in_buffer stay alive.
802 # Holds a ref so backing bytes in self._in_buffer stay alive.
803 self._source_buffer = None
803 self._source_buffer = None
804
804
805 def __enter__(self):
805 def __enter__(self):
806 if self._entered:
806 if self._entered:
807 raise ValueError('cannot __enter__ multiple times')
807 raise ValueError('cannot __enter__ multiple times')
808
808
809 self._entered = True
809 self._entered = True
810 return self
810 return self
811
811
812 def __exit__(self, exc_type, exc_value, exc_tb):
812 def __exit__(self, exc_type, exc_value, exc_tb):
813 self._entered = False
813 self._entered = False
814 self._closed = True
814 self._closed = True
815 self._source = None
815 self._source = None
816 self._compressor = None
816 self._compressor = None
817
817
818 return False
818 return False
819
819
820 def readable(self):
820 def readable(self):
821 return True
821 return True
822
822
823 def writable(self):
823 def writable(self):
824 return False
824 return False
825
825
826 def seekable(self):
826 def seekable(self):
827 return False
827 return False
828
828
829 def readline(self):
829 def readline(self):
830 raise io.UnsupportedOperation()
830 raise io.UnsupportedOperation()
831
831
832 def readlines(self):
832 def readlines(self):
833 raise io.UnsupportedOperation()
833 raise io.UnsupportedOperation()
834
834
835 def write(self, data):
835 def write(self, data):
836 raise OSError('stream is not writable')
836 raise OSError('stream is not writable')
837
837
838 def writelines(self, ignored):
838 def writelines(self, ignored):
839 raise OSError('stream is not writable')
839 raise OSError('stream is not writable')
840
840
841 def isatty(self):
841 def isatty(self):
842 return False
842 return False
843
843
844 def flush(self):
844 def flush(self):
845 return None
845 return None
846
846
847 def close(self):
847 def close(self):
848 self._closed = True
848 self._closed = True
849 return None
849 return None
850
850
851 @property
851 @property
852 def closed(self):
852 def closed(self):
853 return self._closed
853 return self._closed
854
854
855 def tell(self):
855 def tell(self):
856 return self._bytes_compressed
856 return self._bytes_compressed
857
857
858 def readall(self):
858 def readall(self):
859 chunks = []
859 chunks = []
860
860
861 while True:
861 while True:
862 chunk = self.read(1048576)
862 chunk = self.read(1048576)
863 if not chunk:
863 if not chunk:
864 break
864 break
865
865
866 chunks.append(chunk)
866 chunks.append(chunk)
867
867
868 return b''.join(chunks)
868 return b''.join(chunks)
869
869
870 def __iter__(self):
870 def __iter__(self):
871 raise io.UnsupportedOperation()
871 raise io.UnsupportedOperation()
872
872
873 def __next__(self):
873 def __next__(self):
874 raise io.UnsupportedOperation()
874 raise io.UnsupportedOperation()
875
875
876 next = __next__
876 next = __next__
877
877
878 def _read_input(self):
878 def _read_input(self):
879 if self._finished_input:
879 if self._finished_input:
880 return
880 return
881
881
882 if hasattr(self._source, 'read'):
882 if hasattr(self._source, 'read'):
883 data = self._source.read(self._read_size)
883 data = self._source.read(self._read_size)
884
884
885 if not data:
885 if not data:
886 self._finished_input = True
886 self._finished_input = True
887 return
887 return
888
888
889 self._source_buffer = ffi.from_buffer(data)
889 self._source_buffer = ffi.from_buffer(data)
890 self._in_buffer.src = self._source_buffer
890 self._in_buffer.src = self._source_buffer
891 self._in_buffer.size = len(self._source_buffer)
891 self._in_buffer.size = len(self._source_buffer)
892 self._in_buffer.pos = 0
892 self._in_buffer.pos = 0
893 else:
893 else:
894 self._source_buffer = ffi.from_buffer(self._source)
894 self._source_buffer = ffi.from_buffer(self._source)
895 self._in_buffer.src = self._source_buffer
895 self._in_buffer.src = self._source_buffer
896 self._in_buffer.size = len(self._source_buffer)
896 self._in_buffer.size = len(self._source_buffer)
897 self._in_buffer.pos = 0
897 self._in_buffer.pos = 0
898
898
899 def _compress_into_buffer(self, out_buffer):
899 def _compress_into_buffer(self, out_buffer):
900 if self._in_buffer.pos >= self._in_buffer.size:
900 if self._in_buffer.pos >= self._in_buffer.size:
901 return
901 return
902
902
903 old_pos = out_buffer.pos
903 old_pos = out_buffer.pos
904
904
905 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
905 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
906 out_buffer, self._in_buffer,
906 out_buffer, self._in_buffer,
907 lib.ZSTD_e_continue)
907 lib.ZSTD_e_continue)
908
908
909 self._bytes_compressed += out_buffer.pos - old_pos
909 self._bytes_compressed += out_buffer.pos - old_pos
910
910
911 if self._in_buffer.pos == self._in_buffer.size:
911 if self._in_buffer.pos == self._in_buffer.size:
912 self._in_buffer.src = ffi.NULL
912 self._in_buffer.src = ffi.NULL
913 self._in_buffer.pos = 0
913 self._in_buffer.pos = 0
914 self._in_buffer.size = 0
914 self._in_buffer.size = 0
915 self._source_buffer = None
915 self._source_buffer = None
916
916
917 if not hasattr(self._source, 'read'):
917 if not hasattr(self._source, 'read'):
918 self._finished_input = True
918 self._finished_input = True
919
919
920 if lib.ZSTD_isError(zresult):
920 if lib.ZSTD_isError(zresult):
921 raise ZstdError('zstd compress error: %s',
921 raise ZstdError('zstd compress error: %s',
922 _zstd_error(zresult))
922 _zstd_error(zresult))
923
923
924 return out_buffer.pos and out_buffer.pos == out_buffer.size
924 return out_buffer.pos and out_buffer.pos == out_buffer.size
925
925
926 def read(self, size=-1):
926 def read(self, size=-1):
927 if self._closed:
927 if self._closed:
928 raise ValueError('stream is closed')
928 raise ValueError('stream is closed')
929
929
930 if size < -1:
930 if size < -1:
931 raise ValueError('cannot read negative amounts less than -1')
931 raise ValueError('cannot read negative amounts less than -1')
932
932
933 if size == -1:
933 if size == -1:
934 return self.readall()
934 return self.readall()
935
935
936 if self._finished_output or size == 0:
936 if self._finished_output or size == 0:
937 return b''
937 return b''
938
938
939 # Need a dedicated ref to dest buffer otherwise it gets collected.
939 # Need a dedicated ref to dest buffer otherwise it gets collected.
940 dst_buffer = ffi.new('char[]', size)
940 dst_buffer = ffi.new('char[]', size)
941 out_buffer = ffi.new('ZSTD_outBuffer *')
941 out_buffer = ffi.new('ZSTD_outBuffer *')
942 out_buffer.dst = dst_buffer
942 out_buffer.dst = dst_buffer
943 out_buffer.size = size
943 out_buffer.size = size
944 out_buffer.pos = 0
944 out_buffer.pos = 0
945
945
946 if self._compress_into_buffer(out_buffer):
946 if self._compress_into_buffer(out_buffer):
947 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
947 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
948
948
949 while not self._finished_input:
949 while not self._finished_input:
950 self._read_input()
950 self._read_input()
951
951
952 if self._compress_into_buffer(out_buffer):
952 if self._compress_into_buffer(out_buffer):
953 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
953 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
954
954
955 # EOF
955 # EOF
956 old_pos = out_buffer.pos
956 old_pos = out_buffer.pos
957
957
958 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
958 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
959 out_buffer, self._in_buffer,
959 out_buffer, self._in_buffer,
960 lib.ZSTD_e_end)
960 lib.ZSTD_e_end)
961
961
962 self._bytes_compressed += out_buffer.pos - old_pos
962 self._bytes_compressed += out_buffer.pos - old_pos
963
963
964 if lib.ZSTD_isError(zresult):
964 if lib.ZSTD_isError(zresult):
965 raise ZstdError('error ending compression stream: %s',
965 raise ZstdError('error ending compression stream: %s',
966 _zstd_error(zresult))
966 _zstd_error(zresult))
967
967
968 if zresult == 0:
968 if zresult == 0:
969 self._finished_output = True
969 self._finished_output = True
970
970
971 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
971 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
972
972
973 def read1(self, size=-1):
973 def read1(self, size=-1):
974 if self._closed:
974 if self._closed:
975 raise ValueError('stream is closed')
975 raise ValueError('stream is closed')
976
976
977 if size < -1:
977 if size < -1:
978 raise ValueError('cannot read negative amounts less than -1')
978 raise ValueError('cannot read negative amounts less than -1')
979
979
980 if self._finished_output or size == 0:
980 if self._finished_output or size == 0:
981 return b''
981 return b''
982
982
983 # -1 returns arbitrary number of bytes.
983 # -1 returns arbitrary number of bytes.
984 if size == -1:
984 if size == -1:
985 size = COMPRESSION_RECOMMENDED_OUTPUT_SIZE
985 size = COMPRESSION_RECOMMENDED_OUTPUT_SIZE
986
986
987 dst_buffer = ffi.new('char[]', size)
987 dst_buffer = ffi.new('char[]', size)
988 out_buffer = ffi.new('ZSTD_outBuffer *')
988 out_buffer = ffi.new('ZSTD_outBuffer *')
989 out_buffer.dst = dst_buffer
989 out_buffer.dst = dst_buffer
990 out_buffer.size = size
990 out_buffer.size = size
991 out_buffer.pos = 0
991 out_buffer.pos = 0
992
992
993 # read1() dictates that we can perform at most 1 call to the
993 # read1() dictates that we can perform at most 1 call to the
994 # underlying stream to get input. However, we can't satisfy this
994 # underlying stream to get input. However, we can't satisfy this
995 # restriction with compression because not all input generates output.
995 # restriction with compression because not all input generates output.
996 # It is possible to perform a block flush in order to ensure output.
996 # It is possible to perform a block flush in order to ensure output.
997 # But this may not be desirable behavior. So we allow multiple read()
997 # But this may not be desirable behavior. So we allow multiple read()
998 # to the underlying stream. But unlike read(), we stop once we have
998 # to the underlying stream. But unlike read(), we stop once we have
999 # any output.
999 # any output.
1000
1000
1001 self._compress_into_buffer(out_buffer)
1001 self._compress_into_buffer(out_buffer)
1002 if out_buffer.pos:
1002 if out_buffer.pos:
1003 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1003 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1004
1004
1005 while not self._finished_input:
1005 while not self._finished_input:
1006 self._read_input()
1006 self._read_input()
1007
1007
1008 # If we've filled the output buffer, return immediately.
1008 # If we've filled the output buffer, return immediately.
1009 if self._compress_into_buffer(out_buffer):
1009 if self._compress_into_buffer(out_buffer):
1010 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1010 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1011
1011
1012 # If we've populated the output buffer and we're not at EOF,
1012 # If we've populated the output buffer and we're not at EOF,
1013 # also return, as we've satisfied the read1() limits.
1013 # also return, as we've satisfied the read1() limits.
1014 if out_buffer.pos and not self._finished_input:
1014 if out_buffer.pos and not self._finished_input:
1015 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1015 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1016
1016
1017 # Else if we're at EOS and we have room left in the buffer,
1017 # Else if we're at EOS and we have room left in the buffer,
1018 # fall through to below and try to add more data to the output.
1018 # fall through to below and try to add more data to the output.
1019
1019
1020 # EOF.
1020 # EOF.
1021 old_pos = out_buffer.pos
1021 old_pos = out_buffer.pos
1022
1022
1023 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
1023 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
1024 out_buffer, self._in_buffer,
1024 out_buffer, self._in_buffer,
1025 lib.ZSTD_e_end)
1025 lib.ZSTD_e_end)
1026
1026
1027 self._bytes_compressed += out_buffer.pos - old_pos
1027 self._bytes_compressed += out_buffer.pos - old_pos
1028
1028
1029 if lib.ZSTD_isError(zresult):
1029 if lib.ZSTD_isError(zresult):
1030 raise ZstdError('error ending compression stream: %s' %
1030 raise ZstdError('error ending compression stream: %s' %
1031 _zstd_error(zresult))
1031 _zstd_error(zresult))
1032
1032
1033 if zresult == 0:
1033 if zresult == 0:
1034 self._finished_output = True
1034 self._finished_output = True
1035
1035
1036 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1036 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1037
1037
1038 def readinto(self, b):
1038 def readinto(self, b):
1039 if self._closed:
1039 if self._closed:
1040 raise ValueError('stream is closed')
1040 raise ValueError('stream is closed')
1041
1041
1042 if self._finished_output:
1042 if self._finished_output:
1043 return 0
1043 return 0
1044
1044
1045 # TODO use writable=True once we require CFFI >= 1.12.
1045 # TODO use writable=True once we require CFFI >= 1.12.
1046 dest_buffer = ffi.from_buffer(b)
1046 dest_buffer = ffi.from_buffer(b)
1047 ffi.memmove(b, b'', 0)
1047 ffi.memmove(b, b'', 0)
1048 out_buffer = ffi.new('ZSTD_outBuffer *')
1048 out_buffer = ffi.new('ZSTD_outBuffer *')
1049 out_buffer.dst = dest_buffer
1049 out_buffer.dst = dest_buffer
1050 out_buffer.size = len(dest_buffer)
1050 out_buffer.size = len(dest_buffer)
1051 out_buffer.pos = 0
1051 out_buffer.pos = 0
1052
1052
1053 if self._compress_into_buffer(out_buffer):
1053 if self._compress_into_buffer(out_buffer):
1054 return out_buffer.pos
1054 return out_buffer.pos
1055
1055
1056 while not self._finished_input:
1056 while not self._finished_input:
1057 self._read_input()
1057 self._read_input()
1058 if self._compress_into_buffer(out_buffer):
1058 if self._compress_into_buffer(out_buffer):
1059 return out_buffer.pos
1059 return out_buffer.pos
1060
1060
1061 # EOF.
1061 # EOF.
1062 old_pos = out_buffer.pos
1062 old_pos = out_buffer.pos
1063 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
1063 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
1064 out_buffer, self._in_buffer,
1064 out_buffer, self._in_buffer,
1065 lib.ZSTD_e_end)
1065 lib.ZSTD_e_end)
1066
1066
1067 self._bytes_compressed += out_buffer.pos - old_pos
1067 self._bytes_compressed += out_buffer.pos - old_pos
1068
1068
1069 if lib.ZSTD_isError(zresult):
1069 if lib.ZSTD_isError(zresult):
1070 raise ZstdError('error ending compression stream: %s',
1070 raise ZstdError('error ending compression stream: %s',
1071 _zstd_error(zresult))
1071 _zstd_error(zresult))
1072
1072
1073 if zresult == 0:
1073 if zresult == 0:
1074 self._finished_output = True
1074 self._finished_output = True
1075
1075
1076 return out_buffer.pos
1076 return out_buffer.pos
1077
1077
1078 def readinto1(self, b):
1078 def readinto1(self, b):
1079 if self._closed:
1079 if self._closed:
1080 raise ValueError('stream is closed')
1080 raise ValueError('stream is closed')
1081
1081
1082 if self._finished_output:
1082 if self._finished_output:
1083 return 0
1083 return 0
1084
1084
1085 # TODO use writable=True once we require CFFI >= 1.12.
1085 # TODO use writable=True once we require CFFI >= 1.12.
1086 dest_buffer = ffi.from_buffer(b)
1086 dest_buffer = ffi.from_buffer(b)
1087 ffi.memmove(b, b'', 0)
1087 ffi.memmove(b, b'', 0)
1088
1088
1089 out_buffer = ffi.new('ZSTD_outBuffer *')
1089 out_buffer = ffi.new('ZSTD_outBuffer *')
1090 out_buffer.dst = dest_buffer
1090 out_buffer.dst = dest_buffer
1091 out_buffer.size = len(dest_buffer)
1091 out_buffer.size = len(dest_buffer)
1092 out_buffer.pos = 0
1092 out_buffer.pos = 0
1093
1093
1094 self._compress_into_buffer(out_buffer)
1094 self._compress_into_buffer(out_buffer)
1095 if out_buffer.pos:
1095 if out_buffer.pos:
1096 return out_buffer.pos
1096 return out_buffer.pos
1097
1097
1098 while not self._finished_input:
1098 while not self._finished_input:
1099 self._read_input()
1099 self._read_input()
1100
1100
1101 if self._compress_into_buffer(out_buffer):
1101 if self._compress_into_buffer(out_buffer):
1102 return out_buffer.pos
1102 return out_buffer.pos
1103
1103
1104 if out_buffer.pos and not self._finished_input:
1104 if out_buffer.pos and not self._finished_input:
1105 return out_buffer.pos
1105 return out_buffer.pos
1106
1106
1107 # EOF.
1107 # EOF.
1108 old_pos = out_buffer.pos
1108 old_pos = out_buffer.pos
1109
1109
1110 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
1110 zresult = lib.ZSTD_compressStream2(self._compressor._cctx,
1111 out_buffer, self._in_buffer,
1111 out_buffer, self._in_buffer,
1112 lib.ZSTD_e_end)
1112 lib.ZSTD_e_end)
1113
1113
1114 self._bytes_compressed += out_buffer.pos - old_pos
1114 self._bytes_compressed += out_buffer.pos - old_pos
1115
1115
1116 if lib.ZSTD_isError(zresult):
1116 if lib.ZSTD_isError(zresult):
1117 raise ZstdError('error ending compression stream: %s' %
1117 raise ZstdError('error ending compression stream: %s' %
1118 _zstd_error(zresult))
1118 _zstd_error(zresult))
1119
1119
1120 if zresult == 0:
1120 if zresult == 0:
1121 self._finished_output = True
1121 self._finished_output = True
1122
1122
1123 return out_buffer.pos
1123 return out_buffer.pos
1124
1124
1125
1125
1126 class ZstdCompressor(object):
1126 class ZstdCompressor(object):
1127 def __init__(self, level=3, dict_data=None, compression_params=None,
1127 def __init__(self, level=3, dict_data=None, compression_params=None,
1128 write_checksum=None, write_content_size=None,
1128 write_checksum=None, write_content_size=None,
1129 write_dict_id=None, threads=0):
1129 write_dict_id=None, threads=0):
1130 if level > lib.ZSTD_maxCLevel():
1130 if level > lib.ZSTD_maxCLevel():
1131 raise ValueError('level must be less than %d' % lib.ZSTD_maxCLevel())
1131 raise ValueError('level must be less than %d' % lib.ZSTD_maxCLevel())
1132
1132
1133 if threads < 0:
1133 if threads < 0:
1134 threads = _cpu_count()
1134 threads = _cpu_count()
1135
1135
1136 if compression_params and write_checksum is not None:
1136 if compression_params and write_checksum is not None:
1137 raise ValueError('cannot define compression_params and '
1137 raise ValueError('cannot define compression_params and '
1138 'write_checksum')
1138 'write_checksum')
1139
1139
1140 if compression_params and write_content_size is not None:
1140 if compression_params and write_content_size is not None:
1141 raise ValueError('cannot define compression_params and '
1141 raise ValueError('cannot define compression_params and '
1142 'write_content_size')
1142 'write_content_size')
1143
1143
1144 if compression_params and write_dict_id is not None:
1144 if compression_params and write_dict_id is not None:
1145 raise ValueError('cannot define compression_params and '
1145 raise ValueError('cannot define compression_params and '
1146 'write_dict_id')
1146 'write_dict_id')
1147
1147
1148 if compression_params and threads:
1148 if compression_params and threads:
1149 raise ValueError('cannot define compression_params and threads')
1149 raise ValueError('cannot define compression_params and threads')
1150
1150
1151 if compression_params:
1151 if compression_params:
1152 self._params = _make_cctx_params(compression_params)
1152 self._params = _make_cctx_params(compression_params)
1153 else:
1153 else:
1154 if write_dict_id is None:
1154 if write_dict_id is None:
1155 write_dict_id = True
1155 write_dict_id = True
1156
1156
1157 params = lib.ZSTD_createCCtxParams()
1157 params = lib.ZSTD_createCCtxParams()
1158 if params == ffi.NULL:
1158 if params == ffi.NULL:
1159 raise MemoryError()
1159 raise MemoryError()
1160
1160
1161 self._params = ffi.gc(params, lib.ZSTD_freeCCtxParams)
1161 self._params = ffi.gc(params, lib.ZSTD_freeCCtxParams)
1162
1162
1163 _set_compression_parameter(self._params,
1163 _set_compression_parameter(self._params,
1164 lib.ZSTD_c_compressionLevel,
1164 lib.ZSTD_c_compressionLevel,
1165 level)
1165 level)
1166
1166
1167 _set_compression_parameter(
1167 _set_compression_parameter(
1168 self._params,
1168 self._params,
1169 lib.ZSTD_c_contentSizeFlag,
1169 lib.ZSTD_c_contentSizeFlag,
1170 write_content_size if write_content_size is not None else 1)
1170 write_content_size if write_content_size is not None else 1)
1171
1171
1172 _set_compression_parameter(self._params,
1172 _set_compression_parameter(self._params,
1173 lib.ZSTD_c_checksumFlag,
1173 lib.ZSTD_c_checksumFlag,
1174 1 if write_checksum else 0)
1174 1 if write_checksum else 0)
1175
1175
1176 _set_compression_parameter(self._params,
1176 _set_compression_parameter(self._params,
1177 lib.ZSTD_c_dictIDFlag,
1177 lib.ZSTD_c_dictIDFlag,
1178 1 if write_dict_id else 0)
1178 1 if write_dict_id else 0)
1179
1179
1180 if threads:
1180 if threads:
1181 _set_compression_parameter(self._params,
1181 _set_compression_parameter(self._params,
1182 lib.ZSTD_c_nbWorkers,
1182 lib.ZSTD_c_nbWorkers,
1183 threads)
1183 threads)
1184
1184
1185 cctx = lib.ZSTD_createCCtx()
1185 cctx = lib.ZSTD_createCCtx()
1186 if cctx == ffi.NULL:
1186 if cctx == ffi.NULL:
1187 raise MemoryError()
1187 raise MemoryError()
1188
1188
1189 self._cctx = cctx
1189 self._cctx = cctx
1190 self._dict_data = dict_data
1190 self._dict_data = dict_data
1191
1191
1192 # We defer setting up garbage collection until after calling
1192 # We defer setting up garbage collection until after calling
1193 # _setup_cctx() to ensure the memory size estimate is more accurate.
1193 # _setup_cctx() to ensure the memory size estimate is more accurate.
1194 try:
1194 try:
1195 self._setup_cctx()
1195 self._setup_cctx()
1196 finally:
1196 finally:
1197 self._cctx = ffi.gc(cctx, lib.ZSTD_freeCCtx,
1197 self._cctx = ffi.gc(cctx, lib.ZSTD_freeCCtx,
1198 size=lib.ZSTD_sizeof_CCtx(cctx))
1198 size=lib.ZSTD_sizeof_CCtx(cctx))
1199
1199
1200 def _setup_cctx(self):
1200 def _setup_cctx(self):
1201 zresult = lib.ZSTD_CCtx_setParametersUsingCCtxParams(self._cctx,
1201 zresult = lib.ZSTD_CCtx_setParametersUsingCCtxParams(self._cctx,
1202 self._params)
1202 self._params)
1203 if lib.ZSTD_isError(zresult):
1203 if lib.ZSTD_isError(zresult):
1204 raise ZstdError('could not set compression parameters: %s' %
1204 raise ZstdError('could not set compression parameters: %s' %
1205 _zstd_error(zresult))
1205 _zstd_error(zresult))
1206
1206
1207 dict_data = self._dict_data
1207 dict_data = self._dict_data
1208
1208
1209 if dict_data:
1209 if dict_data:
1210 if dict_data._cdict:
1210 if dict_data._cdict:
1211 zresult = lib.ZSTD_CCtx_refCDict(self._cctx, dict_data._cdict)
1211 zresult = lib.ZSTD_CCtx_refCDict(self._cctx, dict_data._cdict)
1212 else:
1212 else:
1213 zresult = lib.ZSTD_CCtx_loadDictionary_advanced(
1213 zresult = lib.ZSTD_CCtx_loadDictionary_advanced(
1214 self._cctx, dict_data.as_bytes(), len(dict_data),
1214 self._cctx, dict_data.as_bytes(), len(dict_data),
1215 lib.ZSTD_dlm_byRef, dict_data._dict_type)
1215 lib.ZSTD_dlm_byRef, dict_data._dict_type)
1216
1216
1217 if lib.ZSTD_isError(zresult):
1217 if lib.ZSTD_isError(zresult):
1218 raise ZstdError('could not load compression dictionary: %s' %
1218 raise ZstdError('could not load compression dictionary: %s' %
1219 _zstd_error(zresult))
1219 _zstd_error(zresult))
1220
1220
1221 def memory_size(self):
1221 def memory_size(self):
1222 return lib.ZSTD_sizeof_CCtx(self._cctx)
1222 return lib.ZSTD_sizeof_CCtx(self._cctx)
1223
1223
1224 def compress(self, data):
1224 def compress(self, data):
1225 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
1225 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
1226
1226
1227 data_buffer = ffi.from_buffer(data)
1227 data_buffer = ffi.from_buffer(data)
1228
1228
1229 dest_size = lib.ZSTD_compressBound(len(data_buffer))
1229 dest_size = lib.ZSTD_compressBound(len(data_buffer))
1230 out = new_nonzero('char[]', dest_size)
1230 out = new_nonzero('char[]', dest_size)
1231
1231
1232 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, len(data_buffer))
1232 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, len(data_buffer))
1233 if lib.ZSTD_isError(zresult):
1233 if lib.ZSTD_isError(zresult):
1234 raise ZstdError('error setting source size: %s' %
1234 raise ZstdError('error setting source size: %s' %
1235 _zstd_error(zresult))
1235 _zstd_error(zresult))
1236
1236
1237 out_buffer = ffi.new('ZSTD_outBuffer *')
1237 out_buffer = ffi.new('ZSTD_outBuffer *')
1238 in_buffer = ffi.new('ZSTD_inBuffer *')
1238 in_buffer = ffi.new('ZSTD_inBuffer *')
1239
1239
1240 out_buffer.dst = out
1240 out_buffer.dst = out
1241 out_buffer.size = dest_size
1241 out_buffer.size = dest_size
1242 out_buffer.pos = 0
1242 out_buffer.pos = 0
1243
1243
1244 in_buffer.src = data_buffer
1244 in_buffer.src = data_buffer
1245 in_buffer.size = len(data_buffer)
1245 in_buffer.size = len(data_buffer)
1246 in_buffer.pos = 0
1246 in_buffer.pos = 0
1247
1247
1248 zresult = lib.ZSTD_compressStream2(self._cctx,
1248 zresult = lib.ZSTD_compressStream2(self._cctx,
1249 out_buffer,
1249 out_buffer,
1250 in_buffer,
1250 in_buffer,
1251 lib.ZSTD_e_end)
1251 lib.ZSTD_e_end)
1252
1252
1253 if lib.ZSTD_isError(zresult):
1253 if lib.ZSTD_isError(zresult):
1254 raise ZstdError('cannot compress: %s' %
1254 raise ZstdError('cannot compress: %s' %
1255 _zstd_error(zresult))
1255 _zstd_error(zresult))
1256 elif zresult:
1256 elif zresult:
1257 raise ZstdError('unexpected partial frame flush')
1257 raise ZstdError('unexpected partial frame flush')
1258
1258
1259 return ffi.buffer(out, out_buffer.pos)[:]
1259 return ffi.buffer(out, out_buffer.pos)[:]
1260
1260
1261 def compressobj(self, size=-1):
1261 def compressobj(self, size=-1):
1262 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
1262 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
1263
1263
1264 if size < 0:
1264 if size < 0:
1265 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
1265 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
1266
1266
1267 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
1267 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
1268 if lib.ZSTD_isError(zresult):
1268 if lib.ZSTD_isError(zresult):
1269 raise ZstdError('error setting source size: %s' %
1269 raise ZstdError('error setting source size: %s' %
1270 _zstd_error(zresult))
1270 _zstd_error(zresult))
1271
1271
1272 cobj = ZstdCompressionObj()
1272 cobj = ZstdCompressionObj()
1273 cobj._out = ffi.new('ZSTD_outBuffer *')
1273 cobj._out = ffi.new('ZSTD_outBuffer *')
1274 cobj._dst_buffer = ffi.new('char[]', COMPRESSION_RECOMMENDED_OUTPUT_SIZE)
1274 cobj._dst_buffer = ffi.new('char[]', COMPRESSION_RECOMMENDED_OUTPUT_SIZE)
1275 cobj._out.dst = cobj._dst_buffer
1275 cobj._out.dst = cobj._dst_buffer
1276 cobj._out.size = COMPRESSION_RECOMMENDED_OUTPUT_SIZE
1276 cobj._out.size = COMPRESSION_RECOMMENDED_OUTPUT_SIZE
1277 cobj._out.pos = 0
1277 cobj._out.pos = 0
1278 cobj._compressor = self
1278 cobj._compressor = self
1279 cobj._finished = False
1279 cobj._finished = False
1280
1280
1281 return cobj
1281 return cobj
1282
1282
1283 def chunker(self, size=-1, chunk_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
1283 def chunker(self, size=-1, chunk_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
1284 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
1284 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
1285
1285
1286 if size < 0:
1286 if size < 0:
1287 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
1287 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
1288
1288
1289 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
1289 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
1290 if lib.ZSTD_isError(zresult):
1290 if lib.ZSTD_isError(zresult):
1291 raise ZstdError('error setting source size: %s' %
1291 raise ZstdError('error setting source size: %s' %
1292 _zstd_error(zresult))
1292 _zstd_error(zresult))
1293
1293
1294 return ZstdCompressionChunker(self, chunk_size=chunk_size)
1294 return ZstdCompressionChunker(self, chunk_size=chunk_size)
1295
1295
1296 def copy_stream(self, ifh, ofh, size=-1,
1296 def copy_stream(self, ifh, ofh, size=-1,
1297 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
1297 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
1298 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
1298 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
1299
1299
1300 if not hasattr(ifh, 'read'):
1300 if not hasattr(ifh, 'read'):
1301 raise ValueError('first argument must have a read() method')
1301 raise ValueError('first argument must have a read() method')
1302 if not hasattr(ofh, 'write'):
1302 if not hasattr(ofh, 'write'):
1303 raise ValueError('second argument must have a write() method')
1303 raise ValueError('second argument must have a write() method')
1304
1304
1305 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
1305 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
1306
1306
1307 if size < 0:
1307 if size < 0:
1308 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
1308 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
1309
1309
1310 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
1310 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
1311 if lib.ZSTD_isError(zresult):
1311 if lib.ZSTD_isError(zresult):
1312 raise ZstdError('error setting source size: %s' %
1312 raise ZstdError('error setting source size: %s' %
1313 _zstd_error(zresult))
1313 _zstd_error(zresult))
1314
1314
1315 in_buffer = ffi.new('ZSTD_inBuffer *')
1315 in_buffer = ffi.new('ZSTD_inBuffer *')
1316 out_buffer = ffi.new('ZSTD_outBuffer *')
1316 out_buffer = ffi.new('ZSTD_outBuffer *')
1317
1317
1318 dst_buffer = ffi.new('char[]', write_size)
1318 dst_buffer = ffi.new('char[]', write_size)
1319 out_buffer.dst = dst_buffer
1319 out_buffer.dst = dst_buffer
1320 out_buffer.size = write_size
1320 out_buffer.size = write_size
1321 out_buffer.pos = 0
1321 out_buffer.pos = 0
1322
1322
1323 total_read, total_write = 0, 0
1323 total_read, total_write = 0, 0
1324
1324
1325 while True:
1325 while True:
1326 data = ifh.read(read_size)
1326 data = ifh.read(read_size)
1327 if not data:
1327 if not data:
1328 break
1328 break
1329
1329
1330 data_buffer = ffi.from_buffer(data)
1330 data_buffer = ffi.from_buffer(data)
1331 total_read += len(data_buffer)
1331 total_read += len(data_buffer)
1332 in_buffer.src = data_buffer
1332 in_buffer.src = data_buffer
1333 in_buffer.size = len(data_buffer)
1333 in_buffer.size = len(data_buffer)
1334 in_buffer.pos = 0
1334 in_buffer.pos = 0
1335
1335
1336 while in_buffer.pos < in_buffer.size:
1336 while in_buffer.pos < in_buffer.size:
1337 zresult = lib.ZSTD_compressStream2(self._cctx,
1337 zresult = lib.ZSTD_compressStream2(self._cctx,
1338 out_buffer,
1338 out_buffer,
1339 in_buffer,
1339 in_buffer,
1340 lib.ZSTD_e_continue)
1340 lib.ZSTD_e_continue)
1341 if lib.ZSTD_isError(zresult):
1341 if lib.ZSTD_isError(zresult):
1342 raise ZstdError('zstd compress error: %s' %
1342 raise ZstdError('zstd compress error: %s' %
1343 _zstd_error(zresult))
1343 _zstd_error(zresult))
1344
1344
1345 if out_buffer.pos:
1345 if out_buffer.pos:
1346 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
1346 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
1347 total_write += out_buffer.pos
1347 total_write += out_buffer.pos
1348 out_buffer.pos = 0
1348 out_buffer.pos = 0
1349
1349
1350 # We've finished reading. Flush the compressor.
1350 # We've finished reading. Flush the compressor.
1351 while True:
1351 while True:
1352 zresult = lib.ZSTD_compressStream2(self._cctx,
1352 zresult = lib.ZSTD_compressStream2(self._cctx,
1353 out_buffer,
1353 out_buffer,
1354 in_buffer,
1354 in_buffer,
1355 lib.ZSTD_e_end)
1355 lib.ZSTD_e_end)
1356 if lib.ZSTD_isError(zresult):
1356 if lib.ZSTD_isError(zresult):
1357 raise ZstdError('error ending compression stream: %s' %
1357 raise ZstdError('error ending compression stream: %s' %
1358 _zstd_error(zresult))
1358 _zstd_error(zresult))
1359
1359
1360 if out_buffer.pos:
1360 if out_buffer.pos:
1361 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
1361 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
1362 total_write += out_buffer.pos
1362 total_write += out_buffer.pos
1363 out_buffer.pos = 0
1363 out_buffer.pos = 0
1364
1364
1365 if zresult == 0:
1365 if zresult == 0:
1366 break
1366 break
1367
1367
1368 return total_read, total_write
1368 return total_read, total_write
1369
1369
1370 def stream_reader(self, source, size=-1,
1370 def stream_reader(self, source, size=-1,
1371 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE):
1371 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE):
1372 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
1372 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
1373
1373
1374 try:
1374 try:
1375 size = len(source)
1375 size = len(source)
1376 except Exception:
1376 except Exception:
1377 pass
1377 pass
1378
1378
1379 if size < 0:
1379 if size < 0:
1380 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
1380 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
1381
1381
1382 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
1382 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
1383 if lib.ZSTD_isError(zresult):
1383 if lib.ZSTD_isError(zresult):
1384 raise ZstdError('error setting source size: %s' %
1384 raise ZstdError('error setting source size: %s' %
1385 _zstd_error(zresult))
1385 _zstd_error(zresult))
1386
1386
1387 return ZstdCompressionReader(self, source, read_size)
1387 return ZstdCompressionReader(self, source, read_size)
1388
1388
1389 def stream_writer(self, writer, size=-1,
1389 def stream_writer(self, writer, size=-1,
1390 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE,
1390 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE,
1391 write_return_read=False):
1391 write_return_read=False):
1392
1392
1393 if not hasattr(writer, 'write'):
1393 if not hasattr(writer, 'write'):
1394 raise ValueError('must pass an object with a write() method')
1394 raise ValueError('must pass an object with a write() method')
1395
1395
1396 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
1396 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
1397
1397
1398 if size < 0:
1398 if size < 0:
1399 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
1399 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
1400
1400
1401 return ZstdCompressionWriter(self, writer, size, write_size,
1401 return ZstdCompressionWriter(self, writer, size, write_size,
1402 write_return_read)
1402 write_return_read)
1403
1403
1404 write_to = stream_writer
1404 write_to = stream_writer
1405
1405
1406 def read_to_iter(self, reader, size=-1,
1406 def read_to_iter(self, reader, size=-1,
1407 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
1407 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
1408 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
1408 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
1409 if hasattr(reader, 'read'):
1409 if hasattr(reader, 'read'):
1410 have_read = True
1410 have_read = True
1411 elif hasattr(reader, '__getitem__'):
1411 elif hasattr(reader, '__getitem__'):
1412 have_read = False
1412 have_read = False
1413 buffer_offset = 0
1413 buffer_offset = 0
1414 size = len(reader)
1414 size = len(reader)
1415 else:
1415 else:
1416 raise ValueError('must pass an object with a read() method or '
1416 raise ValueError('must pass an object with a read() method or '
1417 'conforms to buffer protocol')
1417 'conforms to buffer protocol')
1418
1418
1419 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
1419 lib.ZSTD_CCtx_reset(self._cctx, lib.ZSTD_reset_session_only)
1420
1420
1421 if size < 0:
1421 if size < 0:
1422 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
1422 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
1423
1423
1424 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
1424 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
1425 if lib.ZSTD_isError(zresult):
1425 if lib.ZSTD_isError(zresult):
1426 raise ZstdError('error setting source size: %s' %
1426 raise ZstdError('error setting source size: %s' %
1427 _zstd_error(zresult))
1427 _zstd_error(zresult))
1428
1428
1429 in_buffer = ffi.new('ZSTD_inBuffer *')
1429 in_buffer = ffi.new('ZSTD_inBuffer *')
1430 out_buffer = ffi.new('ZSTD_outBuffer *')
1430 out_buffer = ffi.new('ZSTD_outBuffer *')
1431
1431
1432 in_buffer.src = ffi.NULL
1432 in_buffer.src = ffi.NULL
1433 in_buffer.size = 0
1433 in_buffer.size = 0
1434 in_buffer.pos = 0
1434 in_buffer.pos = 0
1435
1435
1436 dst_buffer = ffi.new('char[]', write_size)
1436 dst_buffer = ffi.new('char[]', write_size)
1437 out_buffer.dst = dst_buffer
1437 out_buffer.dst = dst_buffer
1438 out_buffer.size = write_size
1438 out_buffer.size = write_size
1439 out_buffer.pos = 0
1439 out_buffer.pos = 0
1440
1440
1441 while True:
1441 while True:
1442 # We should never have output data sitting around after a previous
1442 # We should never have output data sitting around after a previous
1443 # iteration.
1443 # iteration.
1444 assert out_buffer.pos == 0
1444 assert out_buffer.pos == 0
1445
1445
1446 # Collect input data.
1446 # Collect input data.
1447 if have_read:
1447 if have_read:
1448 read_result = reader.read(read_size)
1448 read_result = reader.read(read_size)
1449 else:
1449 else:
1450 remaining = len(reader) - buffer_offset
1450 remaining = len(reader) - buffer_offset
1451 slice_size = min(remaining, read_size)
1451 slice_size = min(remaining, read_size)
1452 read_result = reader[buffer_offset:buffer_offset + slice_size]
1452 read_result = reader[buffer_offset:buffer_offset + slice_size]
1453 buffer_offset += slice_size
1453 buffer_offset += slice_size
1454
1454
1455 # No new input data. Break out of the read loop.
1455 # No new input data. Break out of the read loop.
1456 if not read_result:
1456 if not read_result:
1457 break
1457 break
1458
1458
1459 # Feed all read data into the compressor and emit output until
1459 # Feed all read data into the compressor and emit output until
1460 # exhausted.
1460 # exhausted.
1461 read_buffer = ffi.from_buffer(read_result)
1461 read_buffer = ffi.from_buffer(read_result)
1462 in_buffer.src = read_buffer
1462 in_buffer.src = read_buffer
1463 in_buffer.size = len(read_buffer)
1463 in_buffer.size = len(read_buffer)
1464 in_buffer.pos = 0
1464 in_buffer.pos = 0
1465
1465
1466 while in_buffer.pos < in_buffer.size:
1466 while in_buffer.pos < in_buffer.size:
1467 zresult = lib.ZSTD_compressStream2(self._cctx, out_buffer, in_buffer,
1467 zresult = lib.ZSTD_compressStream2(self._cctx, out_buffer, in_buffer,
1468 lib.ZSTD_e_continue)
1468 lib.ZSTD_e_continue)
1469 if lib.ZSTD_isError(zresult):
1469 if lib.ZSTD_isError(zresult):
1470 raise ZstdError('zstd compress error: %s' %
1470 raise ZstdError('zstd compress error: %s' %
1471 _zstd_error(zresult))
1471 _zstd_error(zresult))
1472
1472
1473 if out_buffer.pos:
1473 if out_buffer.pos:
1474 data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1474 data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1475 out_buffer.pos = 0
1475 out_buffer.pos = 0
1476 yield data
1476 yield data
1477
1477
1478 assert out_buffer.pos == 0
1478 assert out_buffer.pos == 0
1479
1479
1480 # And repeat the loop to collect more data.
1480 # And repeat the loop to collect more data.
1481 continue
1481 continue
1482
1482
1483 # If we get here, input is exhausted. End the stream and emit what
1483 # If we get here, input is exhausted. End the stream and emit what
1484 # remains.
1484 # remains.
1485 while True:
1485 while True:
1486 assert out_buffer.pos == 0
1486 assert out_buffer.pos == 0
1487 zresult = lib.ZSTD_compressStream2(self._cctx,
1487 zresult = lib.ZSTD_compressStream2(self._cctx,
1488 out_buffer,
1488 out_buffer,
1489 in_buffer,
1489 in_buffer,
1490 lib.ZSTD_e_end)
1490 lib.ZSTD_e_end)
1491 if lib.ZSTD_isError(zresult):
1491 if lib.ZSTD_isError(zresult):
1492 raise ZstdError('error ending compression stream: %s' %
1492 raise ZstdError('error ending compression stream: %s' %
1493 _zstd_error(zresult))
1493 _zstd_error(zresult))
1494
1494
1495 if out_buffer.pos:
1495 if out_buffer.pos:
1496 data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1496 data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1497 out_buffer.pos = 0
1497 out_buffer.pos = 0
1498 yield data
1498 yield data
1499
1499
1500 if zresult == 0:
1500 if zresult == 0:
1501 break
1501 break
1502
1502
1503 read_from = read_to_iter
1503 read_from = read_to_iter
1504
1504
1505 def frame_progression(self):
1505 def frame_progression(self):
1506 progression = lib.ZSTD_getFrameProgression(self._cctx)
1506 progression = lib.ZSTD_getFrameProgression(self._cctx)
1507
1507
1508 return progression.ingested, progression.consumed, progression.produced
1508 return progression.ingested, progression.consumed, progression.produced
1509
1509
1510
1510
1511 class FrameParameters(object):
1511 class FrameParameters(object):
1512 def __init__(self, fparams):
1512 def __init__(self, fparams):
1513 self.content_size = fparams.frameContentSize
1513 self.content_size = fparams.frameContentSize
1514 self.window_size = fparams.windowSize
1514 self.window_size = fparams.windowSize
1515 self.dict_id = fparams.dictID
1515 self.dict_id = fparams.dictID
1516 self.has_checksum = bool(fparams.checksumFlag)
1516 self.has_checksum = bool(fparams.checksumFlag)
1517
1517
1518
1518
1519 def frame_content_size(data):
1519 def frame_content_size(data):
1520 data_buffer = ffi.from_buffer(data)
1520 data_buffer = ffi.from_buffer(data)
1521
1521
1522 size = lib.ZSTD_getFrameContentSize(data_buffer, len(data_buffer))
1522 size = lib.ZSTD_getFrameContentSize(data_buffer, len(data_buffer))
1523
1523
1524 if size == lib.ZSTD_CONTENTSIZE_ERROR:
1524 if size == lib.ZSTD_CONTENTSIZE_ERROR:
1525 raise ZstdError('error when determining content size')
1525 raise ZstdError('error when determining content size')
1526 elif size == lib.ZSTD_CONTENTSIZE_UNKNOWN:
1526 elif size == lib.ZSTD_CONTENTSIZE_UNKNOWN:
1527 return -1
1527 return -1
1528 else:
1528 else:
1529 return size
1529 return size
1530
1530
1531
1531
1532 def frame_header_size(data):
1532 def frame_header_size(data):
1533 data_buffer = ffi.from_buffer(data)
1533 data_buffer = ffi.from_buffer(data)
1534
1534
1535 zresult = lib.ZSTD_frameHeaderSize(data_buffer, len(data_buffer))
1535 zresult = lib.ZSTD_frameHeaderSize(data_buffer, len(data_buffer))
1536 if lib.ZSTD_isError(zresult):
1536 if lib.ZSTD_isError(zresult):
1537 raise ZstdError('could not determine frame header size: %s' %
1537 raise ZstdError('could not determine frame header size: %s' %
1538 _zstd_error(zresult))
1538 _zstd_error(zresult))
1539
1539
1540 return zresult
1540 return zresult
1541
1541
1542
1542
1543 def get_frame_parameters(data):
1543 def get_frame_parameters(data):
1544 params = ffi.new('ZSTD_frameHeader *')
1544 params = ffi.new('ZSTD_frameHeader *')
1545
1545
1546 data_buffer = ffi.from_buffer(data)
1546 data_buffer = ffi.from_buffer(data)
1547 zresult = lib.ZSTD_getFrameHeader(params, data_buffer, len(data_buffer))
1547 zresult = lib.ZSTD_getFrameHeader(params, data_buffer, len(data_buffer))
1548 if lib.ZSTD_isError(zresult):
1548 if lib.ZSTD_isError(zresult):
1549 raise ZstdError('cannot get frame parameters: %s' %
1549 raise ZstdError('cannot get frame parameters: %s' %
1550 _zstd_error(zresult))
1550 _zstd_error(zresult))
1551
1551
1552 if zresult:
1552 if zresult:
1553 raise ZstdError('not enough data for frame parameters; need %d bytes' %
1553 raise ZstdError('not enough data for frame parameters; need %d bytes' %
1554 zresult)
1554 zresult)
1555
1555
1556 return FrameParameters(params[0])
1556 return FrameParameters(params[0])
1557
1557
1558
1558
1559 class ZstdCompressionDict(object):
1559 class ZstdCompressionDict(object):
1560 def __init__(self, data, dict_type=DICT_TYPE_AUTO, k=0, d=0):
1560 def __init__(self, data, dict_type=DICT_TYPE_AUTO, k=0, d=0):
1561 assert isinstance(data, bytes_type)
1561 assert isinstance(data, bytes_type)
1562 self._data = data
1562 self._data = data
1563 self.k = k
1563 self.k = k
1564 self.d = d
1564 self.d = d
1565
1565
1566 if dict_type not in (DICT_TYPE_AUTO, DICT_TYPE_RAWCONTENT,
1566 if dict_type not in (DICT_TYPE_AUTO, DICT_TYPE_RAWCONTENT,
1567 DICT_TYPE_FULLDICT):
1567 DICT_TYPE_FULLDICT):
1568 raise ValueError('invalid dictionary load mode: %d; must use '
1568 raise ValueError('invalid dictionary load mode: %d; must use '
1569 'DICT_TYPE_* constants')
1569 'DICT_TYPE_* constants')
1570
1570
1571 self._dict_type = dict_type
1571 self._dict_type = dict_type
1572 self._cdict = None
1572 self._cdict = None
1573
1573
1574 def __len__(self):
1574 def __len__(self):
1575 return len(self._data)
1575 return len(self._data)
1576
1576
1577 def dict_id(self):
1577 def dict_id(self):
1578 return int_type(lib.ZDICT_getDictID(self._data, len(self._data)))
1578 return int_type(lib.ZDICT_getDictID(self._data, len(self._data)))
1579
1579
1580 def as_bytes(self):
1580 def as_bytes(self):
1581 return self._data
1581 return self._data
1582
1582
1583 def precompute_compress(self, level=0, compression_params=None):
1583 def precompute_compress(self, level=0, compression_params=None):
1584 if level and compression_params:
1584 if level and compression_params:
1585 raise ValueError('must only specify one of level or '
1585 raise ValueError('must only specify one of level or '
1586 'compression_params')
1586 'compression_params')
1587
1587
1588 if not level and not compression_params:
1588 if not level and not compression_params:
1589 raise ValueError('must specify one of level or compression_params')
1589 raise ValueError('must specify one of level or compression_params')
1590
1590
1591 if level:
1591 if level:
1592 cparams = lib.ZSTD_getCParams(level, 0, len(self._data))
1592 cparams = lib.ZSTD_getCParams(level, 0, len(self._data))
1593 else:
1593 else:
1594 cparams = ffi.new('ZSTD_compressionParameters')
1594 cparams = ffi.new('ZSTD_compressionParameters')
1595 cparams.chainLog = compression_params.chain_log
1595 cparams.chainLog = compression_params.chain_log
1596 cparams.hashLog = compression_params.hash_log
1596 cparams.hashLog = compression_params.hash_log
1597 cparams.minMatch = compression_params.min_match
1597 cparams.minMatch = compression_params.min_match
1598 cparams.searchLog = compression_params.search_log
1598 cparams.searchLog = compression_params.search_log
1599 cparams.strategy = compression_params.compression_strategy
1599 cparams.strategy = compression_params.compression_strategy
1600 cparams.targetLength = compression_params.target_length
1600 cparams.targetLength = compression_params.target_length
1601 cparams.windowLog = compression_params.window_log
1601 cparams.windowLog = compression_params.window_log
1602
1602
1603 cdict = lib.ZSTD_createCDict_advanced(self._data, len(self._data),
1603 cdict = lib.ZSTD_createCDict_advanced(self._data, len(self._data),
1604 lib.ZSTD_dlm_byRef,
1604 lib.ZSTD_dlm_byRef,
1605 self._dict_type,
1605 self._dict_type,
1606 cparams,
1606 cparams,
1607 lib.ZSTD_defaultCMem)
1607 lib.ZSTD_defaultCMem)
1608 if cdict == ffi.NULL:
1608 if cdict == ffi.NULL:
1609 raise ZstdError('unable to precompute dictionary')
1609 raise ZstdError('unable to precompute dictionary')
1610
1610
1611 self._cdict = ffi.gc(cdict, lib.ZSTD_freeCDict,
1611 self._cdict = ffi.gc(cdict, lib.ZSTD_freeCDict,
1612 size=lib.ZSTD_sizeof_CDict(cdict))
1612 size=lib.ZSTD_sizeof_CDict(cdict))
1613
1613
1614 @property
1614 @property
1615 def _ddict(self):
1615 def _ddict(self):
1616 ddict = lib.ZSTD_createDDict_advanced(self._data, len(self._data),
1616 ddict = lib.ZSTD_createDDict_advanced(self._data, len(self._data),
1617 lib.ZSTD_dlm_byRef,
1617 lib.ZSTD_dlm_byRef,
1618 self._dict_type,
1618 self._dict_type,
1619 lib.ZSTD_defaultCMem)
1619 lib.ZSTD_defaultCMem)
1620
1620
1621 if ddict == ffi.NULL:
1621 if ddict == ffi.NULL:
1622 raise ZstdError('could not create decompression dict')
1622 raise ZstdError('could not create decompression dict')
1623
1623
1624 ddict = ffi.gc(ddict, lib.ZSTD_freeDDict,
1624 ddict = ffi.gc(ddict, lib.ZSTD_freeDDict,
1625 size=lib.ZSTD_sizeof_DDict(ddict))
1625 size=lib.ZSTD_sizeof_DDict(ddict))
1626 self.__dict__['_ddict'] = ddict
1626 self.__dict__['_ddict'] = ddict
1627
1627
1628 return ddict
1628 return ddict
1629
1629
1630 def train_dictionary(dict_size, samples, k=0, d=0, notifications=0, dict_id=0,
1630 def train_dictionary(dict_size, samples, k=0, d=0, notifications=0, dict_id=0,
1631 level=0, steps=0, threads=0):
1631 level=0, steps=0, threads=0):
1632 if not isinstance(samples, list):
1632 if not isinstance(samples, list):
1633 raise TypeError('samples must be a list')
1633 raise TypeError('samples must be a list')
1634
1634
1635 if threads < 0:
1635 if threads < 0:
1636 threads = _cpu_count()
1636 threads = _cpu_count()
1637
1637
1638 total_size = sum(map(len, samples))
1638 total_size = sum(map(len, samples))
1639
1639
1640 samples_buffer = new_nonzero('char[]', total_size)
1640 samples_buffer = new_nonzero('char[]', total_size)
1641 sample_sizes = new_nonzero('size_t[]', len(samples))
1641 sample_sizes = new_nonzero('size_t[]', len(samples))
1642
1642
1643 offset = 0
1643 offset = 0
1644 for i, sample in enumerate(samples):
1644 for i, sample in enumerate(samples):
1645 if not isinstance(sample, bytes_type):
1645 if not isinstance(sample, bytes_type):
1646 raise ValueError('samples must be bytes')
1646 raise ValueError('samples must be bytes')
1647
1647
1648 l = len(sample)
1648 l = len(sample)
1649 ffi.memmove(samples_buffer + offset, sample, l)
1649 ffi.memmove(samples_buffer + offset, sample, l)
1650 offset += l
1650 offset += l
1651 sample_sizes[i] = l
1651 sample_sizes[i] = l
1652
1652
1653 dict_data = new_nonzero('char[]', dict_size)
1653 dict_data = new_nonzero('char[]', dict_size)
1654
1654
1655 dparams = ffi.new('ZDICT_cover_params_t *')[0]
1655 dparams = ffi.new('ZDICT_cover_params_t *')[0]
1656 dparams.k = k
1656 dparams.k = k
1657 dparams.d = d
1657 dparams.d = d
1658 dparams.steps = steps
1658 dparams.steps = steps
1659 dparams.nbThreads = threads
1659 dparams.nbThreads = threads
1660 dparams.zParams.notificationLevel = notifications
1660 dparams.zParams.notificationLevel = notifications
1661 dparams.zParams.dictID = dict_id
1661 dparams.zParams.dictID = dict_id
1662 dparams.zParams.compressionLevel = level
1662 dparams.zParams.compressionLevel = level
1663
1663
1664 if (not dparams.k and not dparams.d and not dparams.steps
1664 if (not dparams.k and not dparams.d and not dparams.steps
1665 and not dparams.nbThreads and not dparams.zParams.notificationLevel
1665 and not dparams.nbThreads and not dparams.zParams.notificationLevel
1666 and not dparams.zParams.dictID
1666 and not dparams.zParams.dictID
1667 and not dparams.zParams.compressionLevel):
1667 and not dparams.zParams.compressionLevel):
1668 zresult = lib.ZDICT_trainFromBuffer(
1668 zresult = lib.ZDICT_trainFromBuffer(
1669 ffi.addressof(dict_data), dict_size,
1669 ffi.addressof(dict_data), dict_size,
1670 ffi.addressof(samples_buffer),
1670 ffi.addressof(samples_buffer),
1671 ffi.addressof(sample_sizes, 0), len(samples))
1671 ffi.addressof(sample_sizes, 0), len(samples))
1672 elif dparams.steps or dparams.nbThreads:
1672 elif dparams.steps or dparams.nbThreads:
1673 zresult = lib.ZDICT_optimizeTrainFromBuffer_cover(
1673 zresult = lib.ZDICT_optimizeTrainFromBuffer_cover(
1674 ffi.addressof(dict_data), dict_size,
1674 ffi.addressof(dict_data), dict_size,
1675 ffi.addressof(samples_buffer),
1675 ffi.addressof(samples_buffer),
1676 ffi.addressof(sample_sizes, 0), len(samples),
1676 ffi.addressof(sample_sizes, 0), len(samples),
1677 ffi.addressof(dparams))
1677 ffi.addressof(dparams))
1678 else:
1678 else:
1679 zresult = lib.ZDICT_trainFromBuffer_cover(
1679 zresult = lib.ZDICT_trainFromBuffer_cover(
1680 ffi.addressof(dict_data), dict_size,
1680 ffi.addressof(dict_data), dict_size,
1681 ffi.addressof(samples_buffer),
1681 ffi.addressof(samples_buffer),
1682 ffi.addressof(sample_sizes, 0), len(samples),
1682 ffi.addressof(sample_sizes, 0), len(samples),
1683 dparams)
1683 dparams)
1684
1684
1685 if lib.ZDICT_isError(zresult):
1685 if lib.ZDICT_isError(zresult):
1686 msg = ffi.string(lib.ZDICT_getErrorName(zresult)).decode('utf-8')
1686 msg = ffi.string(lib.ZDICT_getErrorName(zresult)).decode('utf-8')
1687 raise ZstdError('cannot train dict: %s' % msg)
1687 raise ZstdError('cannot train dict: %s' % msg)
1688
1688
1689 return ZstdCompressionDict(ffi.buffer(dict_data, zresult)[:],
1689 return ZstdCompressionDict(ffi.buffer(dict_data, zresult)[:],
1690 dict_type=DICT_TYPE_FULLDICT,
1690 dict_type=DICT_TYPE_FULLDICT,
1691 k=dparams.k, d=dparams.d)
1691 k=dparams.k, d=dparams.d)
1692
1692
1693
1693
1694 class ZstdDecompressionObj(object):
1694 class ZstdDecompressionObj(object):
1695 def __init__(self, decompressor, write_size):
1695 def __init__(self, decompressor, write_size):
1696 self._decompressor = decompressor
1696 self._decompressor = decompressor
1697 self._write_size = write_size
1697 self._write_size = write_size
1698 self._finished = False
1698 self._finished = False
1699
1699
1700 def decompress(self, data):
1700 def decompress(self, data):
1701 if self._finished:
1701 if self._finished:
1702 raise ZstdError('cannot use a decompressobj multiple times')
1702 raise ZstdError('cannot use a decompressobj multiple times')
1703
1703
1704 in_buffer = ffi.new('ZSTD_inBuffer *')
1704 in_buffer = ffi.new('ZSTD_inBuffer *')
1705 out_buffer = ffi.new('ZSTD_outBuffer *')
1705 out_buffer = ffi.new('ZSTD_outBuffer *')
1706
1706
1707 data_buffer = ffi.from_buffer(data)
1707 data_buffer = ffi.from_buffer(data)
1708
1708
1709 if len(data_buffer) == 0:
1709 if len(data_buffer) == 0:
1710 return b''
1710 return b''
1711
1711
1712 in_buffer.src = data_buffer
1712 in_buffer.src = data_buffer
1713 in_buffer.size = len(data_buffer)
1713 in_buffer.size = len(data_buffer)
1714 in_buffer.pos = 0
1714 in_buffer.pos = 0
1715
1715
1716 dst_buffer = ffi.new('char[]', self._write_size)
1716 dst_buffer = ffi.new('char[]', self._write_size)
1717 out_buffer.dst = dst_buffer
1717 out_buffer.dst = dst_buffer
1718 out_buffer.size = len(dst_buffer)
1718 out_buffer.size = len(dst_buffer)
1719 out_buffer.pos = 0
1719 out_buffer.pos = 0
1720
1720
1721 chunks = []
1721 chunks = []
1722
1722
1723 while True:
1723 while True:
1724 zresult = lib.ZSTD_decompressStream(self._decompressor._dctx,
1724 zresult = lib.ZSTD_decompressStream(self._decompressor._dctx,
1725 out_buffer, in_buffer)
1725 out_buffer, in_buffer)
1726 if lib.ZSTD_isError(zresult):
1726 if lib.ZSTD_isError(zresult):
1727 raise ZstdError('zstd decompressor error: %s' %
1727 raise ZstdError('zstd decompressor error: %s' %
1728 _zstd_error(zresult))
1728 _zstd_error(zresult))
1729
1729
1730 if zresult == 0:
1730 if zresult == 0:
1731 self._finished = True
1731 self._finished = True
1732 self._decompressor = None
1732 self._decompressor = None
1733
1733
1734 if out_buffer.pos:
1734 if out_buffer.pos:
1735 chunks.append(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
1735 chunks.append(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
1736
1736
1737 if (zresult == 0 or
1737 if (zresult == 0 or
1738 (in_buffer.pos == in_buffer.size and out_buffer.pos == 0)):
1738 (in_buffer.pos == in_buffer.size and out_buffer.pos == 0)):
1739 break
1739 break
1740
1740
1741 out_buffer.pos = 0
1741 out_buffer.pos = 0
1742
1742
1743 return b''.join(chunks)
1743 return b''.join(chunks)
1744
1744
1745 def flush(self, length=0):
1745 def flush(self, length=0):
1746 pass
1746 pass
1747
1747
1748
1748
1749 class ZstdDecompressionReader(object):
1749 class ZstdDecompressionReader(object):
1750 def __init__(self, decompressor, source, read_size, read_across_frames):
1750 def __init__(self, decompressor, source, read_size, read_across_frames):
1751 self._decompressor = decompressor
1751 self._decompressor = decompressor
1752 self._source = source
1752 self._source = source
1753 self._read_size = read_size
1753 self._read_size = read_size
1754 self._read_across_frames = bool(read_across_frames)
1754 self._read_across_frames = bool(read_across_frames)
1755 self._entered = False
1755 self._entered = False
1756 self._closed = False
1756 self._closed = False
1757 self._bytes_decompressed = 0
1757 self._bytes_decompressed = 0
1758 self._finished_input = False
1758 self._finished_input = False
1759 self._finished_output = False
1759 self._finished_output = False
1760 self._in_buffer = ffi.new('ZSTD_inBuffer *')
1760 self._in_buffer = ffi.new('ZSTD_inBuffer *')
1761 # Holds a ref to self._in_buffer.src.
1761 # Holds a ref to self._in_buffer.src.
1762 self._source_buffer = None
1762 self._source_buffer = None
1763
1763
1764 def __enter__(self):
1764 def __enter__(self):
1765 if self._entered:
1765 if self._entered:
1766 raise ValueError('cannot __enter__ multiple times')
1766 raise ValueError('cannot __enter__ multiple times')
1767
1767
1768 self._entered = True
1768 self._entered = True
1769 return self
1769 return self
1770
1770
1771 def __exit__(self, exc_type, exc_value, exc_tb):
1771 def __exit__(self, exc_type, exc_value, exc_tb):
1772 self._entered = False
1772 self._entered = False
1773 self._closed = True
1773 self._closed = True
1774 self._source = None
1774 self._source = None
1775 self._decompressor = None
1775 self._decompressor = None
1776
1776
1777 return False
1777 return False
1778
1778
1779 def readable(self):
1779 def readable(self):
1780 return True
1780 return True
1781
1781
1782 def writable(self):
1782 def writable(self):
1783 return False
1783 return False
1784
1784
1785 def seekable(self):
1785 def seekable(self):
1786 return True
1786 return True
1787
1787
1788 def readline(self):
1788 def readline(self):
1789 raise io.UnsupportedOperation()
1789 raise io.UnsupportedOperation()
1790
1790
1791 def readlines(self):
1791 def readlines(self):
1792 raise io.UnsupportedOperation()
1792 raise io.UnsupportedOperation()
1793
1793
1794 def write(self, data):
1794 def write(self, data):
1795 raise io.UnsupportedOperation()
1795 raise io.UnsupportedOperation()
1796
1796
1797 def writelines(self, lines):
1797 def writelines(self, lines):
1798 raise io.UnsupportedOperation()
1798 raise io.UnsupportedOperation()
1799
1799
1800 def isatty(self):
1800 def isatty(self):
1801 return False
1801 return False
1802
1802
1803 def flush(self):
1803 def flush(self):
1804 return None
1804 return None
1805
1805
1806 def close(self):
1806 def close(self):
1807 self._closed = True
1807 self._closed = True
1808 return None
1808 return None
1809
1809
1810 @property
1810 @property
1811 def closed(self):
1811 def closed(self):
1812 return self._closed
1812 return self._closed
1813
1813
1814 def tell(self):
1814 def tell(self):
1815 return self._bytes_decompressed
1815 return self._bytes_decompressed
1816
1816
1817 def readall(self):
1817 def readall(self):
1818 chunks = []
1818 chunks = []
1819
1819
1820 while True:
1820 while True:
1821 chunk = self.read(1048576)
1821 chunk = self.read(1048576)
1822 if not chunk:
1822 if not chunk:
1823 break
1823 break
1824
1824
1825 chunks.append(chunk)
1825 chunks.append(chunk)
1826
1826
1827 return b''.join(chunks)
1827 return b''.join(chunks)
1828
1828
1829 def __iter__(self):
1829 def __iter__(self):
1830 raise io.UnsupportedOperation()
1830 raise io.UnsupportedOperation()
1831
1831
1832 def __next__(self):
1832 def __next__(self):
1833 raise io.UnsupportedOperation()
1833 raise io.UnsupportedOperation()
1834
1834
1835 next = __next__
1835 next = __next__
1836
1836
1837 def _read_input(self):
1837 def _read_input(self):
1838 # We have data left over in the input buffer. Use it.
1838 # We have data left over in the input buffer. Use it.
1839 if self._in_buffer.pos < self._in_buffer.size:
1839 if self._in_buffer.pos < self._in_buffer.size:
1840 return
1840 return
1841
1841
1842 # All input data exhausted. Nothing to do.
1842 # All input data exhausted. Nothing to do.
1843 if self._finished_input:
1843 if self._finished_input:
1844 return
1844 return
1845
1845
1846 # Else populate the input buffer from our source.
1846 # Else populate the input buffer from our source.
1847 if hasattr(self._source, 'read'):
1847 if hasattr(self._source, 'read'):
1848 data = self._source.read(self._read_size)
1848 data = self._source.read(self._read_size)
1849
1849
1850 if not data:
1850 if not data:
1851 self._finished_input = True
1851 self._finished_input = True
1852 return
1852 return
1853
1853
1854 self._source_buffer = ffi.from_buffer(data)
1854 self._source_buffer = ffi.from_buffer(data)
1855 self._in_buffer.src = self._source_buffer
1855 self._in_buffer.src = self._source_buffer
1856 self._in_buffer.size = len(self._source_buffer)
1856 self._in_buffer.size = len(self._source_buffer)
1857 self._in_buffer.pos = 0
1857 self._in_buffer.pos = 0
1858 else:
1858 else:
1859 self._source_buffer = ffi.from_buffer(self._source)
1859 self._source_buffer = ffi.from_buffer(self._source)
1860 self._in_buffer.src = self._source_buffer
1860 self._in_buffer.src = self._source_buffer
1861 self._in_buffer.size = len(self._source_buffer)
1861 self._in_buffer.size = len(self._source_buffer)
1862 self._in_buffer.pos = 0
1862 self._in_buffer.pos = 0
1863
1863
1864 def _decompress_into_buffer(self, out_buffer):
1864 def _decompress_into_buffer(self, out_buffer):
1865 """Decompress available input into an output buffer.
1865 """Decompress available input into an output buffer.
1866
1866
1867 Returns True if data in output buffer should be emitted.
1867 Returns True if data in output buffer should be emitted.
1868 """
1868 """
1869 zresult = lib.ZSTD_decompressStream(self._decompressor._dctx,
1869 zresult = lib.ZSTD_decompressStream(self._decompressor._dctx,
1870 out_buffer, self._in_buffer)
1870 out_buffer, self._in_buffer)
1871
1871
1872 if self._in_buffer.pos == self._in_buffer.size:
1872 if self._in_buffer.pos == self._in_buffer.size:
1873 self._in_buffer.src = ffi.NULL
1873 self._in_buffer.src = ffi.NULL
1874 self._in_buffer.pos = 0
1874 self._in_buffer.pos = 0
1875 self._in_buffer.size = 0
1875 self._in_buffer.size = 0
1876 self._source_buffer = None
1876 self._source_buffer = None
1877
1877
1878 if not hasattr(self._source, 'read'):
1878 if not hasattr(self._source, 'read'):
1879 self._finished_input = True
1879 self._finished_input = True
1880
1880
1881 if lib.ZSTD_isError(zresult):
1881 if lib.ZSTD_isError(zresult):
1882 raise ZstdError('zstd decompress error: %s' %
1882 raise ZstdError('zstd decompress error: %s' %
1883 _zstd_error(zresult))
1883 _zstd_error(zresult))
1884
1884
1885 # Emit data if there is data AND either:
1885 # Emit data if there is data AND either:
1886 # a) output buffer is full (read amount is satisfied)
1886 # a) output buffer is full (read amount is satisfied)
1887 # b) we're at end of a frame and not in frame spanning mode
1887 # b) we're at end of a frame and not in frame spanning mode
1888 return (out_buffer.pos and
1888 return (out_buffer.pos and
1889 (out_buffer.pos == out_buffer.size or
1889 (out_buffer.pos == out_buffer.size or
1890 zresult == 0 and not self._read_across_frames))
1890 zresult == 0 and not self._read_across_frames))
1891
1891
1892 def read(self, size=-1):
1892 def read(self, size=-1):
1893 if self._closed:
1893 if self._closed:
1894 raise ValueError('stream is closed')
1894 raise ValueError('stream is closed')
1895
1895
1896 if size < -1:
1896 if size < -1:
1897 raise ValueError('cannot read negative amounts less than -1')
1897 raise ValueError('cannot read negative amounts less than -1')
1898
1898
1899 if size == -1:
1899 if size == -1:
1900 # This is recursive. But it gets the job done.
1900 # This is recursive. But it gets the job done.
1901 return self.readall()
1901 return self.readall()
1902
1902
1903 if self._finished_output or size == 0:
1903 if self._finished_output or size == 0:
1904 return b''
1904 return b''
1905
1905
1906 # We /could/ call into readinto() here. But that introduces more
1906 # We /could/ call into readinto() here. But that introduces more
1907 # overhead.
1907 # overhead.
1908 dst_buffer = ffi.new('char[]', size)
1908 dst_buffer = ffi.new('char[]', size)
1909 out_buffer = ffi.new('ZSTD_outBuffer *')
1909 out_buffer = ffi.new('ZSTD_outBuffer *')
1910 out_buffer.dst = dst_buffer
1910 out_buffer.dst = dst_buffer
1911 out_buffer.size = size
1911 out_buffer.size = size
1912 out_buffer.pos = 0
1912 out_buffer.pos = 0
1913
1913
1914 self._read_input()
1914 self._read_input()
1915 if self._decompress_into_buffer(out_buffer):
1915 if self._decompress_into_buffer(out_buffer):
1916 self._bytes_decompressed += out_buffer.pos
1916 self._bytes_decompressed += out_buffer.pos
1917 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1917 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1918
1918
1919 while not self._finished_input:
1919 while not self._finished_input:
1920 self._read_input()
1920 self._read_input()
1921 if self._decompress_into_buffer(out_buffer):
1921 if self._decompress_into_buffer(out_buffer):
1922 self._bytes_decompressed += out_buffer.pos
1922 self._bytes_decompressed += out_buffer.pos
1923 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1923 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1924
1924
1925 self._bytes_decompressed += out_buffer.pos
1925 self._bytes_decompressed += out_buffer.pos
1926 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1926 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1927
1927
1928 def readinto(self, b):
1928 def readinto(self, b):
1929 if self._closed:
1929 if self._closed:
1930 raise ValueError('stream is closed')
1930 raise ValueError('stream is closed')
1931
1931
1932 if self._finished_output:
1932 if self._finished_output:
1933 return 0
1933 return 0
1934
1934
1935 # TODO use writable=True once we require CFFI >= 1.12.
1935 # TODO use writable=True once we require CFFI >= 1.12.
1936 dest_buffer = ffi.from_buffer(b)
1936 dest_buffer = ffi.from_buffer(b)
1937 ffi.memmove(b, b'', 0)
1937 ffi.memmove(b, b'', 0)
1938 out_buffer = ffi.new('ZSTD_outBuffer *')
1938 out_buffer = ffi.new('ZSTD_outBuffer *')
1939 out_buffer.dst = dest_buffer
1939 out_buffer.dst = dest_buffer
1940 out_buffer.size = len(dest_buffer)
1940 out_buffer.size = len(dest_buffer)
1941 out_buffer.pos = 0
1941 out_buffer.pos = 0
1942
1942
1943 self._read_input()
1943 self._read_input()
1944 if self._decompress_into_buffer(out_buffer):
1944 if self._decompress_into_buffer(out_buffer):
1945 self._bytes_decompressed += out_buffer.pos
1945 self._bytes_decompressed += out_buffer.pos
1946 return out_buffer.pos
1946 return out_buffer.pos
1947
1947
1948 while not self._finished_input:
1948 while not self._finished_input:
1949 self._read_input()
1949 self._read_input()
1950 if self._decompress_into_buffer(out_buffer):
1950 if self._decompress_into_buffer(out_buffer):
1951 self._bytes_decompressed += out_buffer.pos
1951 self._bytes_decompressed += out_buffer.pos
1952 return out_buffer.pos
1952 return out_buffer.pos
1953
1953
1954 self._bytes_decompressed += out_buffer.pos
1954 self._bytes_decompressed += out_buffer.pos
1955 return out_buffer.pos
1955 return out_buffer.pos
1956
1956
1957 def read1(self, size=-1):
1957 def read1(self, size=-1):
1958 if self._closed:
1958 if self._closed:
1959 raise ValueError('stream is closed')
1959 raise ValueError('stream is closed')
1960
1960
1961 if size < -1:
1961 if size < -1:
1962 raise ValueError('cannot read negative amounts less than -1')
1962 raise ValueError('cannot read negative amounts less than -1')
1963
1963
1964 if self._finished_output or size == 0:
1964 if self._finished_output or size == 0:
1965 return b''
1965 return b''
1966
1966
1967 # -1 returns arbitrary number of bytes.
1967 # -1 returns arbitrary number of bytes.
1968 if size == -1:
1968 if size == -1:
1969 size = DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE
1969 size = DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE
1970
1970
1971 dst_buffer = ffi.new('char[]', size)
1971 dst_buffer = ffi.new('char[]', size)
1972 out_buffer = ffi.new('ZSTD_outBuffer *')
1972 out_buffer = ffi.new('ZSTD_outBuffer *')
1973 out_buffer.dst = dst_buffer
1973 out_buffer.dst = dst_buffer
1974 out_buffer.size = size
1974 out_buffer.size = size
1975 out_buffer.pos = 0
1975 out_buffer.pos = 0
1976
1976
1977 # read1() dictates that we can perform at most 1 call to underlying
1977 # read1() dictates that we can perform at most 1 call to underlying
1978 # stream to get input. However, we can't satisfy this restriction with
1978 # stream to get input. However, we can't satisfy this restriction with
1979 # decompression because not all input generates output. So we allow
1979 # decompression because not all input generates output. So we allow
1980 # multiple read(). But unlike read(), we stop once we have any output.
1980 # multiple read(). But unlike read(), we stop once we have any output.
1981 while not self._finished_input:
1981 while not self._finished_input:
1982 self._read_input()
1982 self._read_input()
1983 self._decompress_into_buffer(out_buffer)
1983 self._decompress_into_buffer(out_buffer)
1984
1984
1985 if out_buffer.pos:
1985 if out_buffer.pos:
1986 break
1986 break
1987
1987
1988 self._bytes_decompressed += out_buffer.pos
1988 self._bytes_decompressed += out_buffer.pos
1989 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1989 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1990
1990
1991 def readinto1(self, b):
1991 def readinto1(self, b):
1992 if self._closed:
1992 if self._closed:
1993 raise ValueError('stream is closed')
1993 raise ValueError('stream is closed')
1994
1994
1995 if self._finished_output:
1995 if self._finished_output:
1996 return 0
1996 return 0
1997
1997
1998 # TODO use writable=True once we require CFFI >= 1.12.
1998 # TODO use writable=True once we require CFFI >= 1.12.
1999 dest_buffer = ffi.from_buffer(b)
1999 dest_buffer = ffi.from_buffer(b)
2000 ffi.memmove(b, b'', 0)
2000 ffi.memmove(b, b'', 0)
2001
2001
2002 out_buffer = ffi.new('ZSTD_outBuffer *')
2002 out_buffer = ffi.new('ZSTD_outBuffer *')
2003 out_buffer.dst = dest_buffer
2003 out_buffer.dst = dest_buffer
2004 out_buffer.size = len(dest_buffer)
2004 out_buffer.size = len(dest_buffer)
2005 out_buffer.pos = 0
2005 out_buffer.pos = 0
2006
2006
2007 while not self._finished_input and not self._finished_output:
2007 while not self._finished_input and not self._finished_output:
2008 self._read_input()
2008 self._read_input()
2009 self._decompress_into_buffer(out_buffer)
2009 self._decompress_into_buffer(out_buffer)
2010
2010
2011 if out_buffer.pos:
2011 if out_buffer.pos:
2012 break
2012 break
2013
2013
2014 self._bytes_decompressed += out_buffer.pos
2014 self._bytes_decompressed += out_buffer.pos
2015 return out_buffer.pos
2015 return out_buffer.pos
2016
2016
2017 def seek(self, pos, whence=os.SEEK_SET):
2017 def seek(self, pos, whence=os.SEEK_SET):
2018 if self._closed:
2018 if self._closed:
2019 raise ValueError('stream is closed')
2019 raise ValueError('stream is closed')
2020
2020
2021 read_amount = 0
2021 read_amount = 0
2022
2022
2023 if whence == os.SEEK_SET:
2023 if whence == os.SEEK_SET:
2024 if pos < 0:
2024 if pos < 0:
2025 raise ValueError('cannot seek to negative position with SEEK_SET')
2025 raise ValueError('cannot seek to negative position with SEEK_SET')
2026
2026
2027 if pos < self._bytes_decompressed:
2027 if pos < self._bytes_decompressed:
2028 raise ValueError('cannot seek zstd decompression stream '
2028 raise ValueError('cannot seek zstd decompression stream '
2029 'backwards')
2029 'backwards')
2030
2030
2031 read_amount = pos - self._bytes_decompressed
2031 read_amount = pos - self._bytes_decompressed
2032
2032
2033 elif whence == os.SEEK_CUR:
2033 elif whence == os.SEEK_CUR:
2034 if pos < 0:
2034 if pos < 0:
2035 raise ValueError('cannot seek zstd decompression stream '
2035 raise ValueError('cannot seek zstd decompression stream '
2036 'backwards')
2036 'backwards')
2037
2037
2038 read_amount = pos
2038 read_amount = pos
2039 elif whence == os.SEEK_END:
2039 elif whence == os.SEEK_END:
2040 raise ValueError('zstd decompression streams cannot be seeked '
2040 raise ValueError('zstd decompression streams cannot be seeked '
2041 'with SEEK_END')
2041 'with SEEK_END')
2042
2042
2043 while read_amount:
2043 while read_amount:
2044 result = self.read(min(read_amount,
2044 result = self.read(min(read_amount,
2045 DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE))
2045 DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE))
2046
2046
2047 if not result:
2047 if not result:
2048 break
2048 break
2049
2049
2050 read_amount -= len(result)
2050 read_amount -= len(result)
2051
2051
2052 return self._bytes_decompressed
2052 return self._bytes_decompressed
2053
2053
2054 class ZstdDecompressionWriter(object):
2054 class ZstdDecompressionWriter(object):
2055 def __init__(self, decompressor, writer, write_size, write_return_read):
2055 def __init__(self, decompressor, writer, write_size, write_return_read):
2056 decompressor._ensure_dctx()
2056 decompressor._ensure_dctx()
2057
2057
2058 self._decompressor = decompressor
2058 self._decompressor = decompressor
2059 self._writer = writer
2059 self._writer = writer
2060 self._write_size = write_size
2060 self._write_size = write_size
2061 self._write_return_read = bool(write_return_read)
2061 self._write_return_read = bool(write_return_read)
2062 self._entered = False
2062 self._entered = False
2063 self._closed = False
2063 self._closed = False
2064
2064
2065 def __enter__(self):
2065 def __enter__(self):
2066 if self._closed:
2066 if self._closed:
2067 raise ValueError('stream is closed')
2067 raise ValueError('stream is closed')
2068
2068
2069 if self._entered:
2069 if self._entered:
2070 raise ZstdError('cannot __enter__ multiple times')
2070 raise ZstdError('cannot __enter__ multiple times')
2071
2071
2072 self._entered = True
2072 self._entered = True
2073
2073
2074 return self
2074 return self
2075
2075
2076 def __exit__(self, exc_type, exc_value, exc_tb):
2076 def __exit__(self, exc_type, exc_value, exc_tb):
2077 self._entered = False
2077 self._entered = False
2078 self.close()
2078 self.close()
2079
2079
2080 def memory_size(self):
2080 def memory_size(self):
2081 return lib.ZSTD_sizeof_DCtx(self._decompressor._dctx)
2081 return lib.ZSTD_sizeof_DCtx(self._decompressor._dctx)
2082
2082
2083 def close(self):
2083 def close(self):
2084 if self._closed:
2084 if self._closed:
2085 return
2085 return
2086
2086
2087 try:
2087 try:
2088 self.flush()
2088 self.flush()
2089 finally:
2089 finally:
2090 self._closed = True
2090 self._closed = True
2091
2091
2092 f = getattr(self._writer, 'close', None)
2092 f = getattr(self._writer, 'close', None)
2093 if f:
2093 if f:
2094 f()
2094 f()
2095
2095
2096 @property
2096 @property
2097 def closed(self):
2097 def closed(self):
2098 return self._closed
2098 return self._closed
2099
2099
2100 def fileno(self):
2100 def fileno(self):
2101 f = getattr(self._writer, 'fileno', None)
2101 f = getattr(self._writer, 'fileno', None)
2102 if f:
2102 if f:
2103 return f()
2103 return f()
2104 else:
2104 else:
2105 raise OSError('fileno not available on underlying writer')
2105 raise OSError('fileno not available on underlying writer')
2106
2106
2107 def flush(self):
2107 def flush(self):
2108 if self._closed:
2108 if self._closed:
2109 raise ValueError('stream is closed')
2109 raise ValueError('stream is closed')
2110
2110
2111 f = getattr(self._writer, 'flush', None)
2111 f = getattr(self._writer, 'flush', None)
2112 if f:
2112 if f:
2113 return f()
2113 return f()
2114
2114
2115 def isatty(self):
2115 def isatty(self):
2116 return False
2116 return False
2117
2117
2118 def readable(self):
2118 def readable(self):
2119 return False
2119 return False
2120
2120
2121 def readline(self, size=-1):
2121 def readline(self, size=-1):
2122 raise io.UnsupportedOperation()
2122 raise io.UnsupportedOperation()
2123
2123
2124 def readlines(self, hint=-1):
2124 def readlines(self, hint=-1):
2125 raise io.UnsupportedOperation()
2125 raise io.UnsupportedOperation()
2126
2126
2127 def seek(self, offset, whence=None):
2127 def seek(self, offset, whence=None):
2128 raise io.UnsupportedOperation()
2128 raise io.UnsupportedOperation()
2129
2129
2130 def seekable(self):
2130 def seekable(self):
2131 return False
2131 return False
2132
2132
2133 def tell(self):
2133 def tell(self):
2134 raise io.UnsupportedOperation()
2134 raise io.UnsupportedOperation()
2135
2135
2136 def truncate(self, size=None):
2136 def truncate(self, size=None):
2137 raise io.UnsupportedOperation()
2137 raise io.UnsupportedOperation()
2138
2138
2139 def writable(self):
2139 def writable(self):
2140 return True
2140 return True
2141
2141
2142 def writelines(self, lines):
2142 def writelines(self, lines):
2143 raise io.UnsupportedOperation()
2143 raise io.UnsupportedOperation()
2144
2144
2145 def read(self, size=-1):
2145 def read(self, size=-1):
2146 raise io.UnsupportedOperation()
2146 raise io.UnsupportedOperation()
2147
2147
2148 def readall(self):
2148 def readall(self):
2149 raise io.UnsupportedOperation()
2149 raise io.UnsupportedOperation()
2150
2150
2151 def readinto(self, b):
2151 def readinto(self, b):
2152 raise io.UnsupportedOperation()
2152 raise io.UnsupportedOperation()
2153
2153
2154 def write(self, data):
2154 def write(self, data):
2155 if self._closed:
2155 if self._closed:
2156 raise ValueError('stream is closed')
2156 raise ValueError('stream is closed')
2157
2157
2158 total_write = 0
2158 total_write = 0
2159
2159
2160 in_buffer = ffi.new('ZSTD_inBuffer *')
2160 in_buffer = ffi.new('ZSTD_inBuffer *')
2161 out_buffer = ffi.new('ZSTD_outBuffer *')
2161 out_buffer = ffi.new('ZSTD_outBuffer *')
2162
2162
2163 data_buffer = ffi.from_buffer(data)
2163 data_buffer = ffi.from_buffer(data)
2164 in_buffer.src = data_buffer
2164 in_buffer.src = data_buffer
2165 in_buffer.size = len(data_buffer)
2165 in_buffer.size = len(data_buffer)
2166 in_buffer.pos = 0
2166 in_buffer.pos = 0
2167
2167
2168 dst_buffer = ffi.new('char[]', self._write_size)
2168 dst_buffer = ffi.new('char[]', self._write_size)
2169 out_buffer.dst = dst_buffer
2169 out_buffer.dst = dst_buffer
2170 out_buffer.size = len(dst_buffer)
2170 out_buffer.size = len(dst_buffer)
2171 out_buffer.pos = 0
2171 out_buffer.pos = 0
2172
2172
2173 dctx = self._decompressor._dctx
2173 dctx = self._decompressor._dctx
2174
2174
2175 while in_buffer.pos < in_buffer.size:
2175 while in_buffer.pos < in_buffer.size:
2176 zresult = lib.ZSTD_decompressStream(dctx, out_buffer, in_buffer)
2176 zresult = lib.ZSTD_decompressStream(dctx, out_buffer, in_buffer)
2177 if lib.ZSTD_isError(zresult):
2177 if lib.ZSTD_isError(zresult):
2178 raise ZstdError('zstd decompress error: %s' %
2178 raise ZstdError('zstd decompress error: %s' %
2179 _zstd_error(zresult))
2179 _zstd_error(zresult))
2180
2180
2181 if out_buffer.pos:
2181 if out_buffer.pos:
2182 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
2182 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
2183 total_write += out_buffer.pos
2183 total_write += out_buffer.pos
2184 out_buffer.pos = 0
2184 out_buffer.pos = 0
2185
2185
2186 if self._write_return_read:
2186 if self._write_return_read:
2187 return in_buffer.pos
2187 return in_buffer.pos
2188 else:
2188 else:
2189 return total_write
2189 return total_write
2190
2190
2191
2191
2192 class ZstdDecompressor(object):
2192 class ZstdDecompressor(object):
2193 def __init__(self, dict_data=None, max_window_size=0, format=FORMAT_ZSTD1):
2193 def __init__(self, dict_data=None, max_window_size=0, format=FORMAT_ZSTD1):
2194 self._dict_data = dict_data
2194 self._dict_data = dict_data
2195 self._max_window_size = max_window_size
2195 self._max_window_size = max_window_size
2196 self._format = format
2196 self._format = format
2197
2197
2198 dctx = lib.ZSTD_createDCtx()
2198 dctx = lib.ZSTD_createDCtx()
2199 if dctx == ffi.NULL:
2199 if dctx == ffi.NULL:
2200 raise MemoryError()
2200 raise MemoryError()
2201
2201
2202 self._dctx = dctx
2202 self._dctx = dctx
2203
2203
2204 # Defer setting up garbage collection until full state is loaded so
2204 # Defer setting up garbage collection until full state is loaded so
2205 # the memory size is more accurate.
2205 # the memory size is more accurate.
2206 try:
2206 try:
2207 self._ensure_dctx()
2207 self._ensure_dctx()
2208 finally:
2208 finally:
2209 self._dctx = ffi.gc(dctx, lib.ZSTD_freeDCtx,
2209 self._dctx = ffi.gc(dctx, lib.ZSTD_freeDCtx,
2210 size=lib.ZSTD_sizeof_DCtx(dctx))
2210 size=lib.ZSTD_sizeof_DCtx(dctx))
2211
2211
2212 def memory_size(self):
2212 def memory_size(self):
2213 return lib.ZSTD_sizeof_DCtx(self._dctx)
2213 return lib.ZSTD_sizeof_DCtx(self._dctx)
2214
2214
2215 def decompress(self, data, max_output_size=0):
2215 def decompress(self, data, max_output_size=0):
2216 self._ensure_dctx()
2216 self._ensure_dctx()
2217
2217
2218 data_buffer = ffi.from_buffer(data)
2218 data_buffer = ffi.from_buffer(data)
2219
2219
2220 output_size = lib.ZSTD_getFrameContentSize(data_buffer, len(data_buffer))
2220 output_size = lib.ZSTD_getFrameContentSize(data_buffer, len(data_buffer))
2221
2221
2222 if output_size == lib.ZSTD_CONTENTSIZE_ERROR:
2222 if output_size == lib.ZSTD_CONTENTSIZE_ERROR:
2223 raise ZstdError('error determining content size from frame header')
2223 raise ZstdError('error determining content size from frame header')
2224 elif output_size == 0:
2224 elif output_size == 0:
2225 return b''
2225 return b''
2226 elif output_size == lib.ZSTD_CONTENTSIZE_UNKNOWN:
2226 elif output_size == lib.ZSTD_CONTENTSIZE_UNKNOWN:
2227 if not max_output_size:
2227 if not max_output_size:
2228 raise ZstdError('could not determine content size in frame header')
2228 raise ZstdError('could not determine content size in frame header')
2229
2229
2230 result_buffer = ffi.new('char[]', max_output_size)
2230 result_buffer = ffi.new('char[]', max_output_size)
2231 result_size = max_output_size
2231 result_size = max_output_size
2232 output_size = 0
2232 output_size = 0
2233 else:
2233 else:
2234 result_buffer = ffi.new('char[]', output_size)
2234 result_buffer = ffi.new('char[]', output_size)
2235 result_size = output_size
2235 result_size = output_size
2236
2236
2237 out_buffer = ffi.new('ZSTD_outBuffer *')
2237 out_buffer = ffi.new('ZSTD_outBuffer *')
2238 out_buffer.dst = result_buffer
2238 out_buffer.dst = result_buffer
2239 out_buffer.size = result_size
2239 out_buffer.size = result_size
2240 out_buffer.pos = 0
2240 out_buffer.pos = 0
2241
2241
2242 in_buffer = ffi.new('ZSTD_inBuffer *')
2242 in_buffer = ffi.new('ZSTD_inBuffer *')
2243 in_buffer.src = data_buffer
2243 in_buffer.src = data_buffer
2244 in_buffer.size = len(data_buffer)
2244 in_buffer.size = len(data_buffer)
2245 in_buffer.pos = 0
2245 in_buffer.pos = 0
2246
2246
2247 zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer)
2247 zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer)
2248 if lib.ZSTD_isError(zresult):
2248 if lib.ZSTD_isError(zresult):
2249 raise ZstdError('decompression error: %s' %
2249 raise ZstdError('decompression error: %s' %
2250 _zstd_error(zresult))
2250 _zstd_error(zresult))
2251 elif zresult:
2251 elif zresult:
2252 raise ZstdError('decompression error: did not decompress full frame')
2252 raise ZstdError('decompression error: did not decompress full frame')
2253 elif output_size and out_buffer.pos != output_size:
2253 elif output_size and out_buffer.pos != output_size:
2254 raise ZstdError('decompression error: decompressed %d bytes; expected %d' %
2254 raise ZstdError('decompression error: decompressed %d bytes; expected %d' %
2255 (zresult, output_size))
2255 (zresult, output_size))
2256
2256
2257 return ffi.buffer(result_buffer, out_buffer.pos)[:]
2257 return ffi.buffer(result_buffer, out_buffer.pos)[:]
2258
2258
2259 def stream_reader(self, source, read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
2259 def stream_reader(self, source, read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
2260 read_across_frames=False):
2260 read_across_frames=False):
2261 self._ensure_dctx()
2261 self._ensure_dctx()
2262 return ZstdDecompressionReader(self, source, read_size, read_across_frames)
2262 return ZstdDecompressionReader(self, source, read_size, read_across_frames)
2263
2263
2264 def decompressobj(self, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE):
2264 def decompressobj(self, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE):
2265 if write_size < 1:
2265 if write_size < 1:
2266 raise ValueError('write_size must be positive')
2266 raise ValueError('write_size must be positive')
2267
2267
2268 self._ensure_dctx()
2268 self._ensure_dctx()
2269 return ZstdDecompressionObj(self, write_size=write_size)
2269 return ZstdDecompressionObj(self, write_size=write_size)
2270
2270
2271 def read_to_iter(self, reader, read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
2271 def read_to_iter(self, reader, read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
2272 write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
2272 write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
2273 skip_bytes=0):
2273 skip_bytes=0):
2274 if skip_bytes >= read_size:
2274 if skip_bytes >= read_size:
2275 raise ValueError('skip_bytes must be smaller than read_size')
2275 raise ValueError('skip_bytes must be smaller than read_size')
2276
2276
2277 if hasattr(reader, 'read'):
2277 if hasattr(reader, 'read'):
2278 have_read = True
2278 have_read = True
2279 elif hasattr(reader, '__getitem__'):
2279 elif hasattr(reader, '__getitem__'):
2280 have_read = False
2280 have_read = False
2281 buffer_offset = 0
2281 buffer_offset = 0
2282 size = len(reader)
2282 size = len(reader)
2283 else:
2283 else:
2284 raise ValueError('must pass an object with a read() method or '
2284 raise ValueError('must pass an object with a read() method or '
2285 'conforms to buffer protocol')
2285 'conforms to buffer protocol')
2286
2286
2287 if skip_bytes:
2287 if skip_bytes:
2288 if have_read:
2288 if have_read:
2289 reader.read(skip_bytes)
2289 reader.read(skip_bytes)
2290 else:
2290 else:
2291 if skip_bytes > size:
2291 if skip_bytes > size:
2292 raise ValueError('skip_bytes larger than first input chunk')
2292 raise ValueError('skip_bytes larger than first input chunk')
2293
2293
2294 buffer_offset = skip_bytes
2294 buffer_offset = skip_bytes
2295
2295
2296 self._ensure_dctx()
2296 self._ensure_dctx()
2297
2297
2298 in_buffer = ffi.new('ZSTD_inBuffer *')
2298 in_buffer = ffi.new('ZSTD_inBuffer *')
2299 out_buffer = ffi.new('ZSTD_outBuffer *')
2299 out_buffer = ffi.new('ZSTD_outBuffer *')
2300
2300
2301 dst_buffer = ffi.new('char[]', write_size)
2301 dst_buffer = ffi.new('char[]', write_size)
2302 out_buffer.dst = dst_buffer
2302 out_buffer.dst = dst_buffer
2303 out_buffer.size = len(dst_buffer)
2303 out_buffer.size = len(dst_buffer)
2304 out_buffer.pos = 0
2304 out_buffer.pos = 0
2305
2305
2306 while True:
2306 while True:
2307 assert out_buffer.pos == 0
2307 assert out_buffer.pos == 0
2308
2308
2309 if have_read:
2309 if have_read:
2310 read_result = reader.read(read_size)
2310 read_result = reader.read(read_size)
2311 else:
2311 else:
2312 remaining = size - buffer_offset
2312 remaining = size - buffer_offset
2313 slice_size = min(remaining, read_size)
2313 slice_size = min(remaining, read_size)
2314 read_result = reader[buffer_offset:buffer_offset + slice_size]
2314 read_result = reader[buffer_offset:buffer_offset + slice_size]
2315 buffer_offset += slice_size
2315 buffer_offset += slice_size
2316
2316
2317 # No new input. Break out of read loop.
2317 # No new input. Break out of read loop.
2318 if not read_result:
2318 if not read_result:
2319 break
2319 break
2320
2320
2321 # Feed all read data into decompressor and emit output until
2321 # Feed all read data into decompressor and emit output until
2322 # exhausted.
2322 # exhausted.
2323 read_buffer = ffi.from_buffer(read_result)
2323 read_buffer = ffi.from_buffer(read_result)
2324 in_buffer.src = read_buffer
2324 in_buffer.src = read_buffer
2325 in_buffer.size = len(read_buffer)
2325 in_buffer.size = len(read_buffer)
2326 in_buffer.pos = 0
2326 in_buffer.pos = 0
2327
2327
2328 while in_buffer.pos < in_buffer.size:
2328 while in_buffer.pos < in_buffer.size:
2329 assert out_buffer.pos == 0
2329 assert out_buffer.pos == 0
2330
2330
2331 zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer)
2331 zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer)
2332 if lib.ZSTD_isError(zresult):
2332 if lib.ZSTD_isError(zresult):
2333 raise ZstdError('zstd decompress error: %s' %
2333 raise ZstdError('zstd decompress error: %s' %
2334 _zstd_error(zresult))
2334 _zstd_error(zresult))
2335
2335
2336 if out_buffer.pos:
2336 if out_buffer.pos:
2337 data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
2337 data = ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
2338 out_buffer.pos = 0
2338 out_buffer.pos = 0
2339 yield data
2339 yield data
2340
2340
2341 if zresult == 0:
2341 if zresult == 0:
2342 return
2342 return
2343
2343
2344 # Repeat loop to collect more input data.
2344 # Repeat loop to collect more input data.
2345 continue
2345 continue
2346
2346
2347 # If we get here, input is exhausted.
2347 # If we get here, input is exhausted.
2348
2348
2349 read_from = read_to_iter
2349 read_from = read_to_iter
2350
2350
2351 def stream_writer(self, writer, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
2351 def stream_writer(self, writer, write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE,
2352 write_return_read=False):
2352 write_return_read=False):
2353 if not hasattr(writer, 'write'):
2353 if not hasattr(writer, 'write'):
2354 raise ValueError('must pass an object with a write() method')
2354 raise ValueError('must pass an object with a write() method')
2355
2355
2356 return ZstdDecompressionWriter(self, writer, write_size,
2356 return ZstdDecompressionWriter(self, writer, write_size,
2357 write_return_read)
2357 write_return_read)
2358
2358
2359 write_to = stream_writer
2359 write_to = stream_writer
2360
2360
2361 def copy_stream(self, ifh, ofh,
2361 def copy_stream(self, ifh, ofh,
2362 read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
2362 read_size=DECOMPRESSION_RECOMMENDED_INPUT_SIZE,
2363 write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE):
2363 write_size=DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE):
2364 if not hasattr(ifh, 'read'):
2364 if not hasattr(ifh, 'read'):
2365 raise ValueError('first argument must have a read() method')
2365 raise ValueError('first argument must have a read() method')
2366 if not hasattr(ofh, 'write'):
2366 if not hasattr(ofh, 'write'):
2367 raise ValueError('second argument must have a write() method')
2367 raise ValueError('second argument must have a write() method')
2368
2368
2369 self._ensure_dctx()
2369 self._ensure_dctx()
2370
2370
2371 in_buffer = ffi.new('ZSTD_inBuffer *')
2371 in_buffer = ffi.new('ZSTD_inBuffer *')
2372 out_buffer = ffi.new('ZSTD_outBuffer *')
2372 out_buffer = ffi.new('ZSTD_outBuffer *')
2373
2373
2374 dst_buffer = ffi.new('char[]', write_size)
2374 dst_buffer = ffi.new('char[]', write_size)
2375 out_buffer.dst = dst_buffer
2375 out_buffer.dst = dst_buffer
2376 out_buffer.size = write_size
2376 out_buffer.size = write_size
2377 out_buffer.pos = 0
2377 out_buffer.pos = 0
2378
2378
2379 total_read, total_write = 0, 0
2379 total_read, total_write = 0, 0
2380
2380
2381 # Read all available input.
2381 # Read all available input.
2382 while True:
2382 while True:
2383 data = ifh.read(read_size)
2383 data = ifh.read(read_size)
2384 if not data:
2384 if not data:
2385 break
2385 break
2386
2386
2387 data_buffer = ffi.from_buffer(data)
2387 data_buffer = ffi.from_buffer(data)
2388 total_read += len(data_buffer)
2388 total_read += len(data_buffer)
2389 in_buffer.src = data_buffer
2389 in_buffer.src = data_buffer
2390 in_buffer.size = len(data_buffer)
2390 in_buffer.size = len(data_buffer)
2391 in_buffer.pos = 0
2391 in_buffer.pos = 0
2392
2392
2393 # Flush all read data to output.
2393 # Flush all read data to output.
2394 while in_buffer.pos < in_buffer.size:
2394 while in_buffer.pos < in_buffer.size:
2395 zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer)
2395 zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer)
2396 if lib.ZSTD_isError(zresult):
2396 if lib.ZSTD_isError(zresult):
2397 raise ZstdError('zstd decompressor error: %s' %
2397 raise ZstdError('zstd decompressor error: %s' %
2398 _zstd_error(zresult))
2398 _zstd_error(zresult))
2399
2399
2400 if out_buffer.pos:
2400 if out_buffer.pos:
2401 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
2401 ofh.write(ffi.buffer(out_buffer.dst, out_buffer.pos))
2402 total_write += out_buffer.pos
2402 total_write += out_buffer.pos
2403 out_buffer.pos = 0
2403 out_buffer.pos = 0
2404
2404
2405 # Continue loop to keep reading.
2405 # Continue loop to keep reading.
2406
2406
2407 return total_read, total_write
2407 return total_read, total_write
2408
2408
2409 def decompress_content_dict_chain(self, frames):
2409 def decompress_content_dict_chain(self, frames):
2410 if not isinstance(frames, list):
2410 if not isinstance(frames, list):
2411 raise TypeError('argument must be a list')
2411 raise TypeError('argument must be a list')
2412
2412
2413 if not frames:
2413 if not frames:
2414 raise ValueError('empty input chain')
2414 raise ValueError('empty input chain')
2415
2415
2416 # First chunk should not be using a dictionary. We handle it specially.
2416 # First chunk should not be using a dictionary. We handle it specially.
2417 chunk = frames[0]
2417 chunk = frames[0]
2418 if not isinstance(chunk, bytes_type):
2418 if not isinstance(chunk, bytes_type):
2419 raise ValueError('chunk 0 must be bytes')
2419 raise ValueError('chunk 0 must be bytes')
2420
2420
2421 # All chunks should be zstd frames and should have content size set.
2421 # All chunks should be zstd frames and should have content size set.
2422 chunk_buffer = ffi.from_buffer(chunk)
2422 chunk_buffer = ffi.from_buffer(chunk)
2423 params = ffi.new('ZSTD_frameHeader *')
2423 params = ffi.new('ZSTD_frameHeader *')
2424 zresult = lib.ZSTD_getFrameHeader(params, chunk_buffer, len(chunk_buffer))
2424 zresult = lib.ZSTD_getFrameHeader(params, chunk_buffer, len(chunk_buffer))
2425 if lib.ZSTD_isError(zresult):
2425 if lib.ZSTD_isError(zresult):
2426 raise ValueError('chunk 0 is not a valid zstd frame')
2426 raise ValueError('chunk 0 is not a valid zstd frame')
2427 elif zresult:
2427 elif zresult:
2428 raise ValueError('chunk 0 is too small to contain a zstd frame')
2428 raise ValueError('chunk 0 is too small to contain a zstd frame')
2429
2429
2430 if params.frameContentSize == lib.ZSTD_CONTENTSIZE_UNKNOWN:
2430 if params.frameContentSize == lib.ZSTD_CONTENTSIZE_UNKNOWN:
2431 raise ValueError('chunk 0 missing content size in frame')
2431 raise ValueError('chunk 0 missing content size in frame')
2432
2432
2433 self._ensure_dctx(load_dict=False)
2433 self._ensure_dctx(load_dict=False)
2434
2434
2435 last_buffer = ffi.new('char[]', params.frameContentSize)
2435 last_buffer = ffi.new('char[]', params.frameContentSize)
2436
2436
2437 out_buffer = ffi.new('ZSTD_outBuffer *')
2437 out_buffer = ffi.new('ZSTD_outBuffer *')
2438 out_buffer.dst = last_buffer
2438 out_buffer.dst = last_buffer
2439 out_buffer.size = len(last_buffer)
2439 out_buffer.size = len(last_buffer)
2440 out_buffer.pos = 0
2440 out_buffer.pos = 0
2441
2441
2442 in_buffer = ffi.new('ZSTD_inBuffer *')
2442 in_buffer = ffi.new('ZSTD_inBuffer *')
2443 in_buffer.src = chunk_buffer
2443 in_buffer.src = chunk_buffer
2444 in_buffer.size = len(chunk_buffer)
2444 in_buffer.size = len(chunk_buffer)
2445 in_buffer.pos = 0
2445 in_buffer.pos = 0
2446
2446
2447 zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer)
2447 zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer)
2448 if lib.ZSTD_isError(zresult):
2448 if lib.ZSTD_isError(zresult):
2449 raise ZstdError('could not decompress chunk 0: %s' %
2449 raise ZstdError('could not decompress chunk 0: %s' %
2450 _zstd_error(zresult))
2450 _zstd_error(zresult))
2451 elif zresult:
2451 elif zresult:
2452 raise ZstdError('chunk 0 did not decompress full frame')
2452 raise ZstdError('chunk 0 did not decompress full frame')
2453
2453
2454 # Special case of chain length of 1
2454 # Special case of chain length of 1
2455 if len(frames) == 1:
2455 if len(frames) == 1:
2456 return ffi.buffer(last_buffer, len(last_buffer))[:]
2456 return ffi.buffer(last_buffer, len(last_buffer))[:]
2457
2457
2458 i = 1
2458 i = 1
2459 while i < len(frames):
2459 while i < len(frames):
2460 chunk = frames[i]
2460 chunk = frames[i]
2461 if not isinstance(chunk, bytes_type):
2461 if not isinstance(chunk, bytes_type):
2462 raise ValueError('chunk %d must be bytes' % i)
2462 raise ValueError('chunk %d must be bytes' % i)
2463
2463
2464 chunk_buffer = ffi.from_buffer(chunk)
2464 chunk_buffer = ffi.from_buffer(chunk)
2465 zresult = lib.ZSTD_getFrameHeader(params, chunk_buffer, len(chunk_buffer))
2465 zresult = lib.ZSTD_getFrameHeader(params, chunk_buffer, len(chunk_buffer))
2466 if lib.ZSTD_isError(zresult):
2466 if lib.ZSTD_isError(zresult):
2467 raise ValueError('chunk %d is not a valid zstd frame' % i)
2467 raise ValueError('chunk %d is not a valid zstd frame' % i)
2468 elif zresult:
2468 elif zresult:
2469 raise ValueError('chunk %d is too small to contain a zstd frame' % i)
2469 raise ValueError('chunk %d is too small to contain a zstd frame' % i)
2470
2470
2471 if params.frameContentSize == lib.ZSTD_CONTENTSIZE_UNKNOWN:
2471 if params.frameContentSize == lib.ZSTD_CONTENTSIZE_UNKNOWN:
2472 raise ValueError('chunk %d missing content size in frame' % i)
2472 raise ValueError('chunk %d missing content size in frame' % i)
2473
2473
2474 dest_buffer = ffi.new('char[]', params.frameContentSize)
2474 dest_buffer = ffi.new('char[]', params.frameContentSize)
2475
2475
2476 out_buffer.dst = dest_buffer
2476 out_buffer.dst = dest_buffer
2477 out_buffer.size = len(dest_buffer)
2477 out_buffer.size = len(dest_buffer)
2478 out_buffer.pos = 0
2478 out_buffer.pos = 0
2479
2479
2480 in_buffer.src = chunk_buffer
2480 in_buffer.src = chunk_buffer
2481 in_buffer.size = len(chunk_buffer)
2481 in_buffer.size = len(chunk_buffer)
2482 in_buffer.pos = 0
2482 in_buffer.pos = 0
2483
2483
2484 zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer)
2484 zresult = lib.ZSTD_decompressStream(self._dctx, out_buffer, in_buffer)
2485 if lib.ZSTD_isError(zresult):
2485 if lib.ZSTD_isError(zresult):
2486 raise ZstdError('could not decompress chunk %d: %s' %
2486 raise ZstdError('could not decompress chunk %d: %s' %
2487 _zstd_error(zresult))
2487 _zstd_error(zresult))
2488 elif zresult:
2488 elif zresult:
2489 raise ZstdError('chunk %d did not decompress full frame' % i)
2489 raise ZstdError('chunk %d did not decompress full frame' % i)
2490
2490
2491 last_buffer = dest_buffer
2491 last_buffer = dest_buffer
2492 i += 1
2492 i += 1
2493
2493
2494 return ffi.buffer(last_buffer, len(last_buffer))[:]
2494 return ffi.buffer(last_buffer, len(last_buffer))[:]
2495
2495
2496 def _ensure_dctx(self, load_dict=True):
2496 def _ensure_dctx(self, load_dict=True):
2497 lib.ZSTD_DCtx_reset(self._dctx, lib.ZSTD_reset_session_only)
2497 lib.ZSTD_DCtx_reset(self._dctx, lib.ZSTD_reset_session_only)
2498
2498
2499 if self._max_window_size:
2499 if self._max_window_size:
2500 zresult = lib.ZSTD_DCtx_setMaxWindowSize(self._dctx,
2500 zresult = lib.ZSTD_DCtx_setMaxWindowSize(self._dctx,
2501 self._max_window_size)
2501 self._max_window_size)
2502 if lib.ZSTD_isError(zresult):
2502 if lib.ZSTD_isError(zresult):
2503 raise ZstdError('unable to set max window size: %s' %
2503 raise ZstdError('unable to set max window size: %s' %
2504 _zstd_error(zresult))
2504 _zstd_error(zresult))
2505
2505
2506 zresult = lib.ZSTD_DCtx_setFormat(self._dctx, self._format)
2506 zresult = lib.ZSTD_DCtx_setFormat(self._dctx, self._format)
2507 if lib.ZSTD_isError(zresult):
2507 if lib.ZSTD_isError(zresult):
2508 raise ZstdError('unable to set decoding format: %s' %
2508 raise ZstdError('unable to set decoding format: %s' %
2509 _zstd_error(zresult))
2509 _zstd_error(zresult))
2510
2510
2511 if self._dict_data and load_dict:
2511 if self._dict_data and load_dict:
2512 zresult = lib.ZSTD_DCtx_refDDict(self._dctx, self._dict_data._ddict)
2512 zresult = lib.ZSTD_DCtx_refDDict(self._dctx, self._dict_data._ddict)
2513 if lib.ZSTD_isError(zresult):
2513 if lib.ZSTD_isError(zresult):
2514 raise ZstdError('unable to reference prepared dictionary: %s' %
2514 raise ZstdError('unable to reference prepared dictionary: %s' %
2515 _zstd_error(zresult))
2515 _zstd_error(zresult))
@@ -1,344 +1,344 b''
1 /**
1 /**
2 * Copyright (c) 2016-present, Gregory Szorc
2 * Copyright (c) 2016-present, Gregory Szorc
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This software may be modified and distributed under the terms
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
6 * of the BSD license. See the LICENSE file for details.
7 */
7 */
8
8
9 /* A Python C extension for Zstandard. */
9 /* A Python C extension for Zstandard. */
10
10
11 #if defined(_WIN32)
11 #if defined(_WIN32)
12 #define WIN32_LEAN_AND_MEAN
12 #define WIN32_LEAN_AND_MEAN
13 #include <Windows.h>
13 #include <Windows.h>
14 #elif defined(__APPLE__) || defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
14 #elif defined(__APPLE__) || defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
15 #include <sys/types.h>
15 #include <sys/types.h>
16 #include <sys/sysctl.h>
16 #include <sys/sysctl.h>
17 #endif
17 #endif
18
18
19 #include "python-zstandard.h"
19 #include "python-zstandard.h"
20
20
21 PyObject *ZstdError;
21 PyObject *ZstdError;
22
22
23 PyDoc_STRVAR(estimate_decompression_context_size__doc__,
23 PyDoc_STRVAR(estimate_decompression_context_size__doc__,
24 "estimate_decompression_context_size()\n"
24 "estimate_decompression_context_size()\n"
25 "\n"
25 "\n"
26 "Estimate the amount of memory allocated to a decompression context.\n"
26 "Estimate the amount of memory allocated to a decompression context.\n"
27 );
27 );
28
28
29 static PyObject* estimate_decompression_context_size(PyObject* self) {
29 static PyObject* estimate_decompression_context_size(PyObject* self) {
30 return PyLong_FromSize_t(ZSTD_estimateDCtxSize());
30 return PyLong_FromSize_t(ZSTD_estimateDCtxSize());
31 }
31 }
32
32
33 PyDoc_STRVAR(frame_content_size__doc__,
33 PyDoc_STRVAR(frame_content_size__doc__,
34 "frame_content_size(data)\n"
34 "frame_content_size(data)\n"
35 "\n"
35 "\n"
36 "Obtain the decompressed size of a frame."
36 "Obtain the decompressed size of a frame."
37 );
37 );
38
38
39 static PyObject* frame_content_size(PyObject* self, PyObject* args, PyObject* kwargs) {
39 static PyObject* frame_content_size(PyObject* self, PyObject* args, PyObject* kwargs) {
40 static char* kwlist[] = {
40 static char* kwlist[] = {
41 "source",
41 "source",
42 NULL
42 NULL
43 };
43 };
44
44
45 Py_buffer source;
45 Py_buffer source;
46 PyObject* result = NULL;
46 PyObject* result = NULL;
47 unsigned long long size;
47 unsigned long long size;
48
48
49 #if PY_MAJOR_VERSION >= 3
49 #if PY_MAJOR_VERSION >= 3
50 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_content_size",
50 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_content_size",
51 #else
51 #else
52 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_content_size",
52 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_content_size",
53 #endif
53 #endif
54 kwlist, &source)) {
54 kwlist, &source)) {
55 return NULL;
55 return NULL;
56 }
56 }
57
57
58 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
58 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
59 PyErr_SetString(PyExc_ValueError,
59 PyErr_SetString(PyExc_ValueError,
60 "data buffer should be contiguous and have at most one dimension");
60 "data buffer should be contiguous and have at most one dimension");
61 goto finally;
61 goto finally;
62 }
62 }
63
63
64 size = ZSTD_getFrameContentSize(source.buf, source.len);
64 size = ZSTD_getFrameContentSize(source.buf, source.len);
65
65
66 if (size == ZSTD_CONTENTSIZE_ERROR) {
66 if (size == ZSTD_CONTENTSIZE_ERROR) {
67 PyErr_SetString(ZstdError, "error when determining content size");
67 PyErr_SetString(ZstdError, "error when determining content size");
68 }
68 }
69 else if (size == ZSTD_CONTENTSIZE_UNKNOWN) {
69 else if (size == ZSTD_CONTENTSIZE_UNKNOWN) {
70 result = PyLong_FromLong(-1);
70 result = PyLong_FromLong(-1);
71 }
71 }
72 else {
72 else {
73 result = PyLong_FromUnsignedLongLong(size);
73 result = PyLong_FromUnsignedLongLong(size);
74 }
74 }
75
75
76 finally:
76 finally:
77 PyBuffer_Release(&source);
77 PyBuffer_Release(&source);
78
78
79 return result;
79 return result;
80 }
80 }
81
81
82 PyDoc_STRVAR(frame_header_size__doc__,
82 PyDoc_STRVAR(frame_header_size__doc__,
83 "frame_header_size(data)\n"
83 "frame_header_size(data)\n"
84 "\n"
84 "\n"
85 "Obtain the size of a frame header.\n"
85 "Obtain the size of a frame header.\n"
86 );
86 );
87
87
88 static PyObject* frame_header_size(PyObject* self, PyObject* args, PyObject* kwargs) {
88 static PyObject* frame_header_size(PyObject* self, PyObject* args, PyObject* kwargs) {
89 static char* kwlist[] = {
89 static char* kwlist[] = {
90 "source",
90 "source",
91 NULL
91 NULL
92 };
92 };
93
93
94 Py_buffer source;
94 Py_buffer source;
95 PyObject* result = NULL;
95 PyObject* result = NULL;
96 size_t zresult;
96 size_t zresult;
97
97
98 #if PY_MAJOR_VERSION >= 3
98 #if PY_MAJOR_VERSION >= 3
99 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_header_size",
99 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:frame_header_size",
100 #else
100 #else
101 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_header_size",
101 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:frame_header_size",
102 #endif
102 #endif
103 kwlist, &source)) {
103 kwlist, &source)) {
104 return NULL;
104 return NULL;
105 }
105 }
106
106
107 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
107 if (!PyBuffer_IsContiguous(&source, 'C') || source.ndim > 1) {
108 PyErr_SetString(PyExc_ValueError,
108 PyErr_SetString(PyExc_ValueError,
109 "data buffer should be contiguous and have at most one dimension");
109 "data buffer should be contiguous and have at most one dimension");
110 goto finally;
110 goto finally;
111 }
111 }
112
112
113 zresult = ZSTD_frameHeaderSize(source.buf, source.len);
113 zresult = ZSTD_frameHeaderSize(source.buf, source.len);
114 if (ZSTD_isError(zresult)) {
114 if (ZSTD_isError(zresult)) {
115 PyErr_Format(ZstdError, "could not determine frame header size: %s",
115 PyErr_Format(ZstdError, "could not determine frame header size: %s",
116 ZSTD_getErrorName(zresult));
116 ZSTD_getErrorName(zresult));
117 }
117 }
118 else {
118 else {
119 result = PyLong_FromSize_t(zresult);
119 result = PyLong_FromSize_t(zresult);
120 }
120 }
121
121
122 finally:
122 finally:
123
123
124 PyBuffer_Release(&source);
124 PyBuffer_Release(&source);
125
125
126 return result;
126 return result;
127 }
127 }
128
128
129 PyDoc_STRVAR(get_frame_parameters__doc__,
129 PyDoc_STRVAR(get_frame_parameters__doc__,
130 "get_frame_parameters(data)\n"
130 "get_frame_parameters(data)\n"
131 "\n"
131 "\n"
132 "Obtains a ``FrameParameters`` instance by parsing data.\n");
132 "Obtains a ``FrameParameters`` instance by parsing data.\n");
133
133
134 PyDoc_STRVAR(train_dictionary__doc__,
134 PyDoc_STRVAR(train_dictionary__doc__,
135 "train_dictionary(dict_size, samples, k=None, d=None, steps=None,\n"
135 "train_dictionary(dict_size, samples, k=None, d=None, steps=None,\n"
136 " threads=None,notifications=0, dict_id=0, level=0)\n"
136 " threads=None,notifications=0, dict_id=0, level=0)\n"
137 "\n"
137 "\n"
138 "Train a dictionary from sample data using the COVER algorithm.\n"
138 "Train a dictionary from sample data using the COVER algorithm.\n"
139 "\n"
139 "\n"
140 "A compression dictionary of size ``dict_size`` will be created from the\n"
140 "A compression dictionary of size ``dict_size`` will be created from the\n"
141 "iterable of ``samples``. The raw dictionary bytes will be returned.\n"
141 "iterable of ``samples``. The raw dictionary bytes will be returned.\n"
142 "\n"
142 "\n"
143 "The COVER algorithm has 2 parameters: ``k`` and ``d``. These control the\n"
143 "The COVER algorithm has 2 parameters: ``k`` and ``d``. These control the\n"
144 "*segment size* and *dmer size*. A reasonable range for ``k`` is\n"
144 "*segment size* and *dmer size*. A reasonable range for ``k`` is\n"
145 "``[16, 2048+]``. A reasonable range for ``d`` is ``[6, 16]``.\n"
145 "``[16, 2048+]``. A reasonable range for ``d`` is ``[6, 16]``.\n"
146 "``d`` must be less than or equal to ``k``.\n"
146 "``d`` must be less than or equal to ``k``.\n"
147 "\n"
147 "\n"
148 "``steps`` can be specified to control the number of steps through potential\n"
148 "``steps`` can be specified to control the number of steps through potential\n"
149 "values of ``k`` and ``d`` to try. ``k`` and ``d`` will only be varied if\n"
149 "values of ``k`` and ``d`` to try. ``k`` and ``d`` will only be varied if\n"
150 "those arguments are not defined. i.e. if ``d`` is ``8``, then only ``k``\n"
150 "those arguments are not defined. i.e. if ``d`` is ``8``, then only ``k``\n"
151 "will be varied in this mode.\n"
151 "will be varied in this mode.\n"
152 "\n"
152 "\n"
153 "``threads`` can specify how many threads to use to test various ``k`` and\n"
153 "``threads`` can specify how many threads to use to test various ``k`` and\n"
154 "``d`` values. ``-1`` will use as many threads as available CPUs. By default,\n"
154 "``d`` values. ``-1`` will use as many threads as available CPUs. By default,\n"
155 "a single thread is used.\n"
155 "a single thread is used.\n"
156 "\n"
156 "\n"
157 "When ``k`` and ``d`` are not defined, default values are used and the\n"
157 "When ``k`` and ``d`` are not defined, default values are used and the\n"
158 "algorithm will perform multiple iterations - or steps - to try to find\n"
158 "algorithm will perform multiple iterations - or steps - to try to find\n"
159 "ideal parameters. If both ``k`` and ``d`` are specified, then those values\n"
159 "ideal parameters. If both ``k`` and ``d`` are specified, then those values\n"
160 "will be used. ``steps`` or ``threads`` triggers optimization mode to test\n"
160 "will be used. ``steps`` or ``threads`` triggers optimization mode to test\n"
161 "multiple ``k`` and ``d`` variations.\n"
161 "multiple ``k`` and ``d`` variations.\n"
162 );
162 );
163
163
164 static char zstd_doc[] = "Interface to zstandard";
164 static char zstd_doc[] = "Interface to zstandard";
165
165
166 static PyMethodDef zstd_methods[] = {
166 static PyMethodDef zstd_methods[] = {
167 { "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size,
167 { "estimate_decompression_context_size", (PyCFunction)estimate_decompression_context_size,
168 METH_NOARGS, estimate_decompression_context_size__doc__ },
168 METH_NOARGS, estimate_decompression_context_size__doc__ },
169 { "frame_content_size", (PyCFunction)frame_content_size,
169 { "frame_content_size", (PyCFunction)frame_content_size,
170 METH_VARARGS | METH_KEYWORDS, frame_content_size__doc__ },
170 METH_VARARGS | METH_KEYWORDS, frame_content_size__doc__ },
171 { "frame_header_size", (PyCFunction)frame_header_size,
171 { "frame_header_size", (PyCFunction)frame_header_size,
172 METH_VARARGS | METH_KEYWORDS, frame_header_size__doc__ },
172 METH_VARARGS | METH_KEYWORDS, frame_header_size__doc__ },
173 { "get_frame_parameters", (PyCFunction)get_frame_parameters,
173 { "get_frame_parameters", (PyCFunction)get_frame_parameters,
174 METH_VARARGS | METH_KEYWORDS, get_frame_parameters__doc__ },
174 METH_VARARGS | METH_KEYWORDS, get_frame_parameters__doc__ },
175 { "train_dictionary", (PyCFunction)train_dictionary,
175 { "train_dictionary", (PyCFunction)train_dictionary,
176 METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ },
176 METH_VARARGS | METH_KEYWORDS, train_dictionary__doc__ },
177 { NULL, NULL }
177 { NULL, NULL }
178 };
178 };
179
179
180 void bufferutil_module_init(PyObject* mod);
180 void bufferutil_module_init(PyObject* mod);
181 void compressobj_module_init(PyObject* mod);
181 void compressobj_module_init(PyObject* mod);
182 void compressor_module_init(PyObject* mod);
182 void compressor_module_init(PyObject* mod);
183 void compressionparams_module_init(PyObject* mod);
183 void compressionparams_module_init(PyObject* mod);
184 void constants_module_init(PyObject* mod);
184 void constants_module_init(PyObject* mod);
185 void compressionchunker_module_init(PyObject* mod);
185 void compressionchunker_module_init(PyObject* mod);
186 void compressiondict_module_init(PyObject* mod);
186 void compressiondict_module_init(PyObject* mod);
187 void compressionreader_module_init(PyObject* mod);
187 void compressionreader_module_init(PyObject* mod);
188 void compressionwriter_module_init(PyObject* mod);
188 void compressionwriter_module_init(PyObject* mod);
189 void compressoriterator_module_init(PyObject* mod);
189 void compressoriterator_module_init(PyObject* mod);
190 void decompressor_module_init(PyObject* mod);
190 void decompressor_module_init(PyObject* mod);
191 void decompressobj_module_init(PyObject* mod);
191 void decompressobj_module_init(PyObject* mod);
192 void decompressionreader_module_init(PyObject *mod);
192 void decompressionreader_module_init(PyObject *mod);
193 void decompressionwriter_module_init(PyObject* mod);
193 void decompressionwriter_module_init(PyObject* mod);
194 void decompressoriterator_module_init(PyObject* mod);
194 void decompressoriterator_module_init(PyObject* mod);
195 void frameparams_module_init(PyObject* mod);
195 void frameparams_module_init(PyObject* mod);
196
196
197 void zstd_module_init(PyObject* m) {
197 void zstd_module_init(PyObject* m) {
198 /* python-zstandard relies on unstable zstd C API features. This means
198 /* python-zstandard relies on unstable zstd C API features. This means
199 that changes in zstd may break expectations in python-zstandard.
199 that changes in zstd may break expectations in python-zstandard.
200
200
201 python-zstandard is distributed with a copy of the zstd sources.
201 python-zstandard is distributed with a copy of the zstd sources.
202 python-zstandard is only guaranteed to work with the bundled version
202 python-zstandard is only guaranteed to work with the bundled version
203 of zstd.
203 of zstd.
204
204
205 However, downstream redistributors or packagers may unbundle zstd
205 However, downstream redistributors or packagers may unbundle zstd
206 from python-zstandard. This can result in a mismatch between zstd
206 from python-zstandard. This can result in a mismatch between zstd
207 versions and API semantics. This essentially "voids the warranty"
207 versions and API semantics. This essentially "voids the warranty"
208 of python-zstandard and may cause undefined behavior.
208 of python-zstandard and may cause undefined behavior.
209
209
210 We detect this mismatch here and refuse to load the module if this
210 We detect this mismatch here and refuse to load the module if this
211 scenario is detected.
211 scenario is detected.
212 */
212 */
213 if (ZSTD_VERSION_NUMBER != 10308 || ZSTD_versionNumber() != 10308) {
213 if (ZSTD_VERSION_NUMBER != 10403 || ZSTD_versionNumber() != 10403) {
214 PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
214 PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
215 return;
215 return;
216 }
216 }
217
217
218 bufferutil_module_init(m);
218 bufferutil_module_init(m);
219 compressionparams_module_init(m);
219 compressionparams_module_init(m);
220 compressiondict_module_init(m);
220 compressiondict_module_init(m);
221 compressobj_module_init(m);
221 compressobj_module_init(m);
222 compressor_module_init(m);
222 compressor_module_init(m);
223 compressionchunker_module_init(m);
223 compressionchunker_module_init(m);
224 compressionreader_module_init(m);
224 compressionreader_module_init(m);
225 compressionwriter_module_init(m);
225 compressionwriter_module_init(m);
226 compressoriterator_module_init(m);
226 compressoriterator_module_init(m);
227 constants_module_init(m);
227 constants_module_init(m);
228 decompressor_module_init(m);
228 decompressor_module_init(m);
229 decompressobj_module_init(m);
229 decompressobj_module_init(m);
230 decompressionreader_module_init(m);
230 decompressionreader_module_init(m);
231 decompressionwriter_module_init(m);
231 decompressionwriter_module_init(m);
232 decompressoriterator_module_init(m);
232 decompressoriterator_module_init(m);
233 frameparams_module_init(m);
233 frameparams_module_init(m);
234 }
234 }
235
235
236 #if defined(__GNUC__) && (__GNUC__ >= 4)
236 #if defined(__GNUC__) && (__GNUC__ >= 4)
237 # define PYTHON_ZSTD_VISIBILITY __attribute__ ((visibility ("default")))
237 # define PYTHON_ZSTD_VISIBILITY __attribute__ ((visibility ("default")))
238 #else
238 #else
239 # define PYTHON_ZSTD_VISIBILITY
239 # define PYTHON_ZSTD_VISIBILITY
240 #endif
240 #endif
241
241
242 #if PY_MAJOR_VERSION >= 3
242 #if PY_MAJOR_VERSION >= 3
243 static struct PyModuleDef zstd_module = {
243 static struct PyModuleDef zstd_module = {
244 PyModuleDef_HEAD_INIT,
244 PyModuleDef_HEAD_INIT,
245 "zstd",
245 "zstd",
246 zstd_doc,
246 zstd_doc,
247 -1,
247 -1,
248 zstd_methods
248 zstd_methods
249 };
249 };
250
250
251 PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC PyInit_zstd(void) {
251 PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC PyInit_zstd(void) {
252 PyObject *m = PyModule_Create(&zstd_module);
252 PyObject *m = PyModule_Create(&zstd_module);
253 if (m) {
253 if (m) {
254 zstd_module_init(m);
254 zstd_module_init(m);
255 if (PyErr_Occurred()) {
255 if (PyErr_Occurred()) {
256 Py_DECREF(m);
256 Py_DECREF(m);
257 m = NULL;
257 m = NULL;
258 }
258 }
259 }
259 }
260 return m;
260 return m;
261 }
261 }
262 #else
262 #else
263 PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC initzstd(void) {
263 PYTHON_ZSTD_VISIBILITY PyMODINIT_FUNC initzstd(void) {
264 PyObject *m = Py_InitModule3("zstd", zstd_methods, zstd_doc);
264 PyObject *m = Py_InitModule3("zstd", zstd_methods, zstd_doc);
265 if (m) {
265 if (m) {
266 zstd_module_init(m);
266 zstd_module_init(m);
267 }
267 }
268 }
268 }
269 #endif
269 #endif
270
270
271 /* Attempt to resolve the number of CPUs in the system. */
271 /* Attempt to resolve the number of CPUs in the system. */
272 int cpu_count() {
272 int cpu_count() {
273 int count = 0;
273 int count = 0;
274
274
275 #if defined(_WIN32)
275 #if defined(_WIN32)
276 SYSTEM_INFO si;
276 SYSTEM_INFO si;
277 si.dwNumberOfProcessors = 0;
277 si.dwNumberOfProcessors = 0;
278 GetSystemInfo(&si);
278 GetSystemInfo(&si);
279 count = si.dwNumberOfProcessors;
279 count = si.dwNumberOfProcessors;
280 #elif defined(__APPLE__)
280 #elif defined(__APPLE__)
281 int num;
281 int num;
282 size_t size = sizeof(int);
282 size_t size = sizeof(int);
283
283
284 if (0 == sysctlbyname("hw.logicalcpu", &num, &size, NULL, 0)) {
284 if (0 == sysctlbyname("hw.logicalcpu", &num, &size, NULL, 0)) {
285 count = num;
285 count = num;
286 }
286 }
287 #elif defined(__linux__)
287 #elif defined(__linux__)
288 count = sysconf(_SC_NPROCESSORS_ONLN);
288 count = sysconf(_SC_NPROCESSORS_ONLN);
289 #elif defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
289 #elif defined(__OpenBSD__) || defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__)
290 int mib[2];
290 int mib[2];
291 size_t len = sizeof(count);
291 size_t len = sizeof(count);
292 mib[0] = CTL_HW;
292 mib[0] = CTL_HW;
293 mib[1] = HW_NCPU;
293 mib[1] = HW_NCPU;
294 if (0 != sysctl(mib, 2, &count, &len, NULL, 0)) {
294 if (0 != sysctl(mib, 2, &count, &len, NULL, 0)) {
295 count = 0;
295 count = 0;
296 }
296 }
297 #elif defined(__hpux)
297 #elif defined(__hpux)
298 count = mpctl(MPC_GETNUMSPUS, NULL, NULL);
298 count = mpctl(MPC_GETNUMSPUS, NULL, NULL);
299 #endif
299 #endif
300
300
301 return count;
301 return count;
302 }
302 }
303
303
304 size_t roundpow2(size_t i) {
304 size_t roundpow2(size_t i) {
305 i--;
305 i--;
306 i |= i >> 1;
306 i |= i >> 1;
307 i |= i >> 2;
307 i |= i >> 2;
308 i |= i >> 4;
308 i |= i >> 4;
309 i |= i >> 8;
309 i |= i >> 8;
310 i |= i >> 16;
310 i |= i >> 16;
311 i++;
311 i++;
312
312
313 return i;
313 return i;
314 }
314 }
315
315
316 /* Safer version of _PyBytes_Resize().
316 /* Safer version of _PyBytes_Resize().
317 *
317 *
318 * _PyBytes_Resize() only works if the refcount is 1. In some scenarios,
318 * _PyBytes_Resize() only works if the refcount is 1. In some scenarios,
319 * we can get an object with a refcount > 1, even if it was just created
319 * we can get an object with a refcount > 1, even if it was just created
320 * with PyBytes_FromStringAndSize()! That's because (at least) CPython
320 * with PyBytes_FromStringAndSize()! That's because (at least) CPython
321 * pre-allocates PyBytes instances of size 1 for every possible byte value.
321 * pre-allocates PyBytes instances of size 1 for every possible byte value.
322 *
322 *
323 * If non-0 is returned, obj may or may not be NULL.
323 * If non-0 is returned, obj may or may not be NULL.
324 */
324 */
325 int safe_pybytes_resize(PyObject** obj, Py_ssize_t size) {
325 int safe_pybytes_resize(PyObject** obj, Py_ssize_t size) {
326 PyObject* tmp;
326 PyObject* tmp;
327
327
328 if ((*obj)->ob_refcnt == 1) {
328 if ((*obj)->ob_refcnt == 1) {
329 return _PyBytes_Resize(obj, size);
329 return _PyBytes_Resize(obj, size);
330 }
330 }
331
331
332 tmp = PyBytes_FromStringAndSize(NULL, size);
332 tmp = PyBytes_FromStringAndSize(NULL, size);
333 if (!tmp) {
333 if (!tmp) {
334 return -1;
334 return -1;
335 }
335 }
336
336
337 memcpy(PyBytes_AS_STRING(tmp), PyBytes_AS_STRING(*obj),
337 memcpy(PyBytes_AS_STRING(tmp), PyBytes_AS_STRING(*obj),
338 PyBytes_GET_SIZE(*obj));
338 PyBytes_GET_SIZE(*obj));
339
339
340 Py_DECREF(*obj);
340 Py_DECREF(*obj);
341 *obj = tmp;
341 *obj = tmp;
342
342
343 return 0;
343 return 0;
344 } No newline at end of file
344 }
@@ -1,455 +1,459 b''
1 /* ******************************************************************
1 /* ******************************************************************
2 bitstream
2 bitstream
3 Part of FSE library
3 Part of FSE library
4 Copyright (C) 2013-present, Yann Collet.
4 Copyright (C) 2013-present, Yann Collet.
5
5
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
7
8 Redistribution and use in source and binary forms, with or without
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
9 modification, are permitted provided that the following conditions are
10 met:
10 met:
11
11
12 * Redistributions of source code must retain the above copyright
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
16 in the documentation and/or other materials provided with the
17 distribution.
17 distribution.
18
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
30
31 You can contact the author at :
31 You can contact the author at :
32 - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 ****************************************************************** */
33 ****************************************************************** */
34 #ifndef BITSTREAM_H_MODULE
34 #ifndef BITSTREAM_H_MODULE
35 #define BITSTREAM_H_MODULE
35 #define BITSTREAM_H_MODULE
36
36
37 #if defined (__cplusplus)
37 #if defined (__cplusplus)
38 extern "C" {
38 extern "C" {
39 #endif
39 #endif
40
40
41 /*
41 /*
42 * This API consists of small unitary functions, which must be inlined for best performance.
42 * This API consists of small unitary functions, which must be inlined for best performance.
43 * Since link-time-optimization is not available for all compilers,
43 * Since link-time-optimization is not available for all compilers,
44 * these functions are defined into a .h to be included.
44 * these functions are defined into a .h to be included.
45 */
45 */
46
46
47 /*-****************************************
47 /*-****************************************
48 * Dependencies
48 * Dependencies
49 ******************************************/
49 ******************************************/
50 #include "mem.h" /* unaligned access routines */
50 #include "mem.h" /* unaligned access routines */
51 #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
51 #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
52 #include "error_private.h" /* error codes and messages */
52 #include "error_private.h" /* error codes and messages */
53
53
54
54
55 /*=========================================
55 /*=========================================
56 * Target specific
56 * Target specific
57 =========================================*/
57 =========================================*/
58 #if defined(__BMI__) && defined(__GNUC__)
58 #if defined(__BMI__) && defined(__GNUC__)
59 # include <immintrin.h> /* support for bextr (experimental) */
59 # include <immintrin.h> /* support for bextr (experimental) */
60 #elif defined(__ICCARM__)
61 # include <intrinsics.h>
60 #endif
62 #endif
61
63
62 #define STREAM_ACCUMULATOR_MIN_32 25
64 #define STREAM_ACCUMULATOR_MIN_32 25
63 #define STREAM_ACCUMULATOR_MIN_64 57
65 #define STREAM_ACCUMULATOR_MIN_64 57
64 #define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
66 #define STREAM_ACCUMULATOR_MIN ((U32)(MEM_32bits() ? STREAM_ACCUMULATOR_MIN_32 : STREAM_ACCUMULATOR_MIN_64))
65
67
66
68
67 /*-******************************************
69 /*-******************************************
68 * bitStream encoding API (write forward)
70 * bitStream encoding API (write forward)
69 ********************************************/
71 ********************************************/
70 /* bitStream can mix input from multiple sources.
72 /* bitStream can mix input from multiple sources.
71 * A critical property of these streams is that they encode and decode in **reverse** direction.
73 * A critical property of these streams is that they encode and decode in **reverse** direction.
72 * So the first bit sequence you add will be the last to be read, like a LIFO stack.
74 * So the first bit sequence you add will be the last to be read, like a LIFO stack.
73 */
75 */
74 typedef struct {
76 typedef struct {
75 size_t bitContainer;
77 size_t bitContainer;
76 unsigned bitPos;
78 unsigned bitPos;
77 char* startPtr;
79 char* startPtr;
78 char* ptr;
80 char* ptr;
79 char* endPtr;
81 char* endPtr;
80 } BIT_CStream_t;
82 } BIT_CStream_t;
81
83
82 MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
84 MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
83 MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
85 MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
84 MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC);
86 MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC);
85 MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
87 MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
86
88
87 /* Start with initCStream, providing the size of buffer to write into.
89 /* Start with initCStream, providing the size of buffer to write into.
88 * bitStream will never write outside of this buffer.
90 * bitStream will never write outside of this buffer.
89 * `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
91 * `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
90 *
92 *
91 * bits are first added to a local register.
93 * bits are first added to a local register.
92 * Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
94 * Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
93 * Writing data into memory is an explicit operation, performed by the flushBits function.
95 * Writing data into memory is an explicit operation, performed by the flushBits function.
94 * Hence keep track how many bits are potentially stored into local register to avoid register overflow.
96 * Hence keep track how many bits are potentially stored into local register to avoid register overflow.
95 * After a flushBits, a maximum of 7 bits might still be stored into local register.
97 * After a flushBits, a maximum of 7 bits might still be stored into local register.
96 *
98 *
97 * Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
99 * Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
98 *
100 *
99 * Last operation is to close the bitStream.
101 * Last operation is to close the bitStream.
100 * The function returns the final size of CStream in bytes.
102 * The function returns the final size of CStream in bytes.
101 * If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
103 * If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
102 */
104 */
103
105
104
106
105 /*-********************************************
107 /*-********************************************
106 * bitStream decoding API (read backward)
108 * bitStream decoding API (read backward)
107 **********************************************/
109 **********************************************/
108 typedef struct {
110 typedef struct {
109 size_t bitContainer;
111 size_t bitContainer;
110 unsigned bitsConsumed;
112 unsigned bitsConsumed;
111 const char* ptr;
113 const char* ptr;
112 const char* start;
114 const char* start;
113 const char* limitPtr;
115 const char* limitPtr;
114 } BIT_DStream_t;
116 } BIT_DStream_t;
115
117
116 typedef enum { BIT_DStream_unfinished = 0,
118 typedef enum { BIT_DStream_unfinished = 0,
117 BIT_DStream_endOfBuffer = 1,
119 BIT_DStream_endOfBuffer = 1,
118 BIT_DStream_completed = 2,
120 BIT_DStream_completed = 2,
119 BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */
121 BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */
120 /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
122 /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
121
123
122 MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
124 MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
123 MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
125 MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
124 MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
126 MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
125 MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
127 MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
126
128
127
129
128 /* Start by invoking BIT_initDStream().
130 /* Start by invoking BIT_initDStream().
129 * A chunk of the bitStream is then stored into a local register.
131 * A chunk of the bitStream is then stored into a local register.
130 * Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
132 * Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
131 * You can then retrieve bitFields stored into the local register, **in reverse order**.
133 * You can then retrieve bitFields stored into the local register, **in reverse order**.
132 * Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
134 * Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
133 * A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
135 * A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
134 * Otherwise, it can be less than that, so proceed accordingly.
136 * Otherwise, it can be less than that, so proceed accordingly.
135 * Checking if DStream has reached its end can be performed with BIT_endOfDStream().
137 * Checking if DStream has reached its end can be performed with BIT_endOfDStream().
136 */
138 */
137
139
138
140
139 /*-****************************************
141 /*-****************************************
140 * unsafe API
142 * unsafe API
141 ******************************************/
143 ******************************************/
142 MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
144 MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
143 /* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
145 /* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
144
146
145 MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
147 MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
146 /* unsafe version; does not check buffer overflow */
148 /* unsafe version; does not check buffer overflow */
147
149
148 MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
150 MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
149 /* faster, but works only if nbBits >= 1 */
151 /* faster, but works only if nbBits >= 1 */
150
152
151
153
152
154
153 /*-**************************************************************
155 /*-**************************************************************
154 * Internal functions
156 * Internal functions
155 ****************************************************************/
157 ****************************************************************/
156 MEM_STATIC unsigned BIT_highbit32 (U32 val)
158 MEM_STATIC unsigned BIT_highbit32 (U32 val)
157 {
159 {
158 assert(val != 0);
160 assert(val != 0);
159 {
161 {
160 # if defined(_MSC_VER) /* Visual */
162 # if defined(_MSC_VER) /* Visual */
161 unsigned long r=0;
163 unsigned long r=0;
162 _BitScanReverse ( &r, val );
164 _BitScanReverse ( &r, val );
163 return (unsigned) r;
165 return (unsigned) r;
164 # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
166 # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
165 return 31 - __builtin_clz (val);
167 return 31 - __builtin_clz (val);
168 # elif defined(__ICCARM__) /* IAR Intrinsic */
169 return 31 - __CLZ(val);
166 # else /* Software version */
170 # else /* Software version */
167 static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
171 static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29,
168 11, 14, 16, 18, 22, 25, 3, 30,
172 11, 14, 16, 18, 22, 25, 3, 30,
169 8, 12, 20, 28, 15, 17, 24, 7,
173 8, 12, 20, 28, 15, 17, 24, 7,
170 19, 27, 23, 6, 26, 5, 4, 31 };
174 19, 27, 23, 6, 26, 5, 4, 31 };
171 U32 v = val;
175 U32 v = val;
172 v |= v >> 1;
176 v |= v >> 1;
173 v |= v >> 2;
177 v |= v >> 2;
174 v |= v >> 4;
178 v |= v >> 4;
175 v |= v >> 8;
179 v |= v >> 8;
176 v |= v >> 16;
180 v |= v >> 16;
177 return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
181 return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
178 # endif
182 # endif
179 }
183 }
180 }
184 }
181
185
182 /*===== Local Constants =====*/
186 /*===== Local Constants =====*/
183 static const unsigned BIT_mask[] = {
187 static const unsigned BIT_mask[] = {
184 0, 1, 3, 7, 0xF, 0x1F,
188 0, 1, 3, 7, 0xF, 0x1F,
185 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
189 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF,
186 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF,
190 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF,
187 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,
191 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF,
188 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
192 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF, 0x7FFFFFF, 0xFFFFFFF, 0x1FFFFFFF,
189 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
193 0x3FFFFFFF, 0x7FFFFFFF}; /* up to 31 bits */
190 #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
194 #define BIT_MASK_SIZE (sizeof(BIT_mask) / sizeof(BIT_mask[0]))
191
195
192 /*-**************************************************************
196 /*-**************************************************************
193 * bitStream encoding
197 * bitStream encoding
194 ****************************************************************/
198 ****************************************************************/
195 /*! BIT_initCStream() :
199 /*! BIT_initCStream() :
196 * `dstCapacity` must be > sizeof(size_t)
200 * `dstCapacity` must be > sizeof(size_t)
197 * @return : 0 if success,
201 * @return : 0 if success,
198 * otherwise an error code (can be tested using ERR_isError()) */
202 * otherwise an error code (can be tested using ERR_isError()) */
199 MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
203 MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC,
200 void* startPtr, size_t dstCapacity)
204 void* startPtr, size_t dstCapacity)
201 {
205 {
202 bitC->bitContainer = 0;
206 bitC->bitContainer = 0;
203 bitC->bitPos = 0;
207 bitC->bitPos = 0;
204 bitC->startPtr = (char*)startPtr;
208 bitC->startPtr = (char*)startPtr;
205 bitC->ptr = bitC->startPtr;
209 bitC->ptr = bitC->startPtr;
206 bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
210 bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer);
207 if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
211 if (dstCapacity <= sizeof(bitC->bitContainer)) return ERROR(dstSize_tooSmall);
208 return 0;
212 return 0;
209 }
213 }
210
214
211 /*! BIT_addBits() :
215 /*! BIT_addBits() :
212 * can add up to 31 bits into `bitC`.
216 * can add up to 31 bits into `bitC`.
213 * Note : does not check for register overflow ! */
217 * Note : does not check for register overflow ! */
214 MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
218 MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC,
215 size_t value, unsigned nbBits)
219 size_t value, unsigned nbBits)
216 {
220 {
217 MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
221 MEM_STATIC_ASSERT(BIT_MASK_SIZE == 32);
218 assert(nbBits < BIT_MASK_SIZE);
222 assert(nbBits < BIT_MASK_SIZE);
219 assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
223 assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
220 bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
224 bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
221 bitC->bitPos += nbBits;
225 bitC->bitPos += nbBits;
222 }
226 }
223
227
224 /*! BIT_addBitsFast() :
228 /*! BIT_addBitsFast() :
225 * works only if `value` is _clean_,
229 * works only if `value` is _clean_,
226 * meaning all high bits above nbBits are 0 */
230 * meaning all high bits above nbBits are 0 */
227 MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
231 MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
228 size_t value, unsigned nbBits)
232 size_t value, unsigned nbBits)
229 {
233 {
230 assert((value>>nbBits) == 0);
234 assert((value>>nbBits) == 0);
231 assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
235 assert(nbBits + bitC->bitPos < sizeof(bitC->bitContainer) * 8);
232 bitC->bitContainer |= value << bitC->bitPos;
236 bitC->bitContainer |= value << bitC->bitPos;
233 bitC->bitPos += nbBits;
237 bitC->bitPos += nbBits;
234 }
238 }
235
239
236 /*! BIT_flushBitsFast() :
240 /*! BIT_flushBitsFast() :
237 * assumption : bitContainer has not overflowed
241 * assumption : bitContainer has not overflowed
238 * unsafe version; does not check buffer overflow */
242 * unsafe version; does not check buffer overflow */
239 MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
243 MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
240 {
244 {
241 size_t const nbBytes = bitC->bitPos >> 3;
245 size_t const nbBytes = bitC->bitPos >> 3;
242 assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
246 assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
243 MEM_writeLEST(bitC->ptr, bitC->bitContainer);
247 MEM_writeLEST(bitC->ptr, bitC->bitContainer);
244 bitC->ptr += nbBytes;
248 bitC->ptr += nbBytes;
245 assert(bitC->ptr <= bitC->endPtr);
249 assert(bitC->ptr <= bitC->endPtr);
246 bitC->bitPos &= 7;
250 bitC->bitPos &= 7;
247 bitC->bitContainer >>= nbBytes*8;
251 bitC->bitContainer >>= nbBytes*8;
248 }
252 }
249
253
250 /*! BIT_flushBits() :
254 /*! BIT_flushBits() :
251 * assumption : bitContainer has not overflowed
255 * assumption : bitContainer has not overflowed
252 * safe version; check for buffer overflow, and prevents it.
256 * safe version; check for buffer overflow, and prevents it.
253 * note : does not signal buffer overflow.
257 * note : does not signal buffer overflow.
254 * overflow will be revealed later on using BIT_closeCStream() */
258 * overflow will be revealed later on using BIT_closeCStream() */
255 MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
259 MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
256 {
260 {
257 size_t const nbBytes = bitC->bitPos >> 3;
261 size_t const nbBytes = bitC->bitPos >> 3;
258 assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
262 assert(bitC->bitPos < sizeof(bitC->bitContainer) * 8);
259 MEM_writeLEST(bitC->ptr, bitC->bitContainer);
263 MEM_writeLEST(bitC->ptr, bitC->bitContainer);
260 bitC->ptr += nbBytes;
264 bitC->ptr += nbBytes;
261 if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
265 if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
262 bitC->bitPos &= 7;
266 bitC->bitPos &= 7;
263 bitC->bitContainer >>= nbBytes*8;
267 bitC->bitContainer >>= nbBytes*8;
264 }
268 }
265
269
266 /*! BIT_closeCStream() :
270 /*! BIT_closeCStream() :
267 * @return : size of CStream, in bytes,
271 * @return : size of CStream, in bytes,
268 * or 0 if it could not fit into dstBuffer */
272 * or 0 if it could not fit into dstBuffer */
269 MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
273 MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
270 {
274 {
271 BIT_addBitsFast(bitC, 1, 1); /* endMark */
275 BIT_addBitsFast(bitC, 1, 1); /* endMark */
272 BIT_flushBits(bitC);
276 BIT_flushBits(bitC);
273 if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
277 if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
274 return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
278 return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
275 }
279 }
276
280
277
281
278 /*-********************************************************
282 /*-********************************************************
279 * bitStream decoding
283 * bitStream decoding
280 **********************************************************/
284 **********************************************************/
281 /*! BIT_initDStream() :
285 /*! BIT_initDStream() :
282 * Initialize a BIT_DStream_t.
286 * Initialize a BIT_DStream_t.
283 * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
287 * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
284 * `srcSize` must be the *exact* size of the bitStream, in bytes.
288 * `srcSize` must be the *exact* size of the bitStream, in bytes.
285 * @return : size of stream (== srcSize), or an errorCode if a problem is detected
289 * @return : size of stream (== srcSize), or an errorCode if a problem is detected
286 */
290 */
287 MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
291 MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
288 {
292 {
289 if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
293 if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
290
294
291 bitD->start = (const char*)srcBuffer;
295 bitD->start = (const char*)srcBuffer;
292 bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
296 bitD->limitPtr = bitD->start + sizeof(bitD->bitContainer);
293
297
294 if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */
298 if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */
295 bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
299 bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
296 bitD->bitContainer = MEM_readLEST(bitD->ptr);
300 bitD->bitContainer = MEM_readLEST(bitD->ptr);
297 { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
301 { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
298 bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
302 bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */
299 if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
303 if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
300 } else {
304 } else {
301 bitD->ptr = bitD->start;
305 bitD->ptr = bitD->start;
302 bitD->bitContainer = *(const BYTE*)(bitD->start);
306 bitD->bitContainer = *(const BYTE*)(bitD->start);
303 switch(srcSize)
307 switch(srcSize)
304 {
308 {
305 case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
309 case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
306 /* fall-through */
310 /* fall-through */
307
311
308 case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
312 case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
309 /* fall-through */
313 /* fall-through */
310
314
311 case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
315 case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
312 /* fall-through */
316 /* fall-through */
313
317
314 case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
318 case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
315 /* fall-through */
319 /* fall-through */
316
320
317 case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
321 case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
318 /* fall-through */
322 /* fall-through */
319
323
320 case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;
324 case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;
321 /* fall-through */
325 /* fall-through */
322
326
323 default: break;
327 default: break;
324 }
328 }
325 { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
329 { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
326 bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
330 bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
327 if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
331 if (lastByte == 0) return ERROR(corruption_detected); /* endMark not present */
328 }
332 }
329 bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
333 bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
330 }
334 }
331
335
332 return srcSize;
336 return srcSize;
333 }
337 }
334
338
335 MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
339 MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
336 {
340 {
337 return bitContainer >> start;
341 return bitContainer >> start;
338 }
342 }
339
343
340 MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
344 MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
341 {
345 {
342 U32 const regMask = sizeof(bitContainer)*8 - 1;
346 U32 const regMask = sizeof(bitContainer)*8 - 1;
343 /* if start > regMask, bitstream is corrupted, and result is undefined */
347 /* if start > regMask, bitstream is corrupted, and result is undefined */
344 assert(nbBits < BIT_MASK_SIZE);
348 assert(nbBits < BIT_MASK_SIZE);
345 return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
349 return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
346 }
350 }
347
351
348 MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
352 MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
349 {
353 {
350 assert(nbBits < BIT_MASK_SIZE);
354 assert(nbBits < BIT_MASK_SIZE);
351 return bitContainer & BIT_mask[nbBits];
355 return bitContainer & BIT_mask[nbBits];
352 }
356 }
353
357
354 /*! BIT_lookBits() :
358 /*! BIT_lookBits() :
355 * Provides next n bits from local register.
359 * Provides next n bits from local register.
356 * local register is not modified.
360 * local register is not modified.
357 * On 32-bits, maxNbBits==24.
361 * On 32-bits, maxNbBits==24.
358 * On 64-bits, maxNbBits==56.
362 * On 64-bits, maxNbBits==56.
359 * @return : value extracted */
363 * @return : value extracted */
360 MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
364 MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
361 {
365 {
362 /* arbitrate between double-shift and shift+mask */
366 /* arbitrate between double-shift and shift+mask */
363 #if 1
367 #if 1
364 /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
368 /* if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8,
365 * bitstream is likely corrupted, and result is undefined */
369 * bitstream is likely corrupted, and result is undefined */
366 return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
370 return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
367 #else
371 #else
368 /* this code path is slower on my os-x laptop */
372 /* this code path is slower on my os-x laptop */
369 U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
373 U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
370 return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
374 return ((bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> 1) >> ((regMask-nbBits) & regMask);
371 #endif
375 #endif
372 }
376 }
373
377
374 /*! BIT_lookBitsFast() :
378 /*! BIT_lookBitsFast() :
375 * unsafe version; only works if nbBits >= 1 */
379 * unsafe version; only works if nbBits >= 1 */
376 MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
380 MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
377 {
381 {
378 U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
382 U32 const regMask = sizeof(bitD->bitContainer)*8 - 1;
379 assert(nbBits >= 1);
383 assert(nbBits >= 1);
380 return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
384 return (bitD->bitContainer << (bitD->bitsConsumed & regMask)) >> (((regMask+1)-nbBits) & regMask);
381 }
385 }
382
386
383 MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
387 MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
384 {
388 {
385 bitD->bitsConsumed += nbBits;
389 bitD->bitsConsumed += nbBits;
386 }
390 }
387
391
388 /*! BIT_readBits() :
392 /*! BIT_readBits() :
389 * Read (consume) next n bits from local register and update.
393 * Read (consume) next n bits from local register and update.
390 * Pay attention to not read more than nbBits contained into local register.
394 * Pay attention to not read more than nbBits contained into local register.
391 * @return : extracted value. */
395 * @return : extracted value. */
392 MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
396 MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits)
393 {
397 {
394 size_t const value = BIT_lookBits(bitD, nbBits);
398 size_t const value = BIT_lookBits(bitD, nbBits);
395 BIT_skipBits(bitD, nbBits);
399 BIT_skipBits(bitD, nbBits);
396 return value;
400 return value;
397 }
401 }
398
402
399 /*! BIT_readBitsFast() :
403 /*! BIT_readBitsFast() :
400 * unsafe version; only works only if nbBits >= 1 */
404 * unsafe version; only works only if nbBits >= 1 */
401 MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
405 MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits)
402 {
406 {
403 size_t const value = BIT_lookBitsFast(bitD, nbBits);
407 size_t const value = BIT_lookBitsFast(bitD, nbBits);
404 assert(nbBits >= 1);
408 assert(nbBits >= 1);
405 BIT_skipBits(bitD, nbBits);
409 BIT_skipBits(bitD, nbBits);
406 return value;
410 return value;
407 }
411 }
408
412
409 /*! BIT_reloadDStream() :
413 /*! BIT_reloadDStream() :
410 * Refill `bitD` from buffer previously set in BIT_initDStream() .
414 * Refill `bitD` from buffer previously set in BIT_initDStream() .
411 * This function is safe, it guarantees it will not read beyond src buffer.
415 * This function is safe, it guarantees it will not read beyond src buffer.
412 * @return : status of `BIT_DStream_t` internal register.
416 * @return : status of `BIT_DStream_t` internal register.
413 * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
417 * when status == BIT_DStream_unfinished, internal register is filled with at least 25 or 57 bits */
414 MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
418 MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
415 {
419 {
416 if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
420 if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* overflow detected, like end of stream */
417 return BIT_DStream_overflow;
421 return BIT_DStream_overflow;
418
422
419 if (bitD->ptr >= bitD->limitPtr) {
423 if (bitD->ptr >= bitD->limitPtr) {
420 bitD->ptr -= bitD->bitsConsumed >> 3;
424 bitD->ptr -= bitD->bitsConsumed >> 3;
421 bitD->bitsConsumed &= 7;
425 bitD->bitsConsumed &= 7;
422 bitD->bitContainer = MEM_readLEST(bitD->ptr);
426 bitD->bitContainer = MEM_readLEST(bitD->ptr);
423 return BIT_DStream_unfinished;
427 return BIT_DStream_unfinished;
424 }
428 }
425 if (bitD->ptr == bitD->start) {
429 if (bitD->ptr == bitD->start) {
426 if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
430 if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
427 return BIT_DStream_completed;
431 return BIT_DStream_completed;
428 }
432 }
429 /* start < ptr < limitPtr */
433 /* start < ptr < limitPtr */
430 { U32 nbBytes = bitD->bitsConsumed >> 3;
434 { U32 nbBytes = bitD->bitsConsumed >> 3;
431 BIT_DStream_status result = BIT_DStream_unfinished;
435 BIT_DStream_status result = BIT_DStream_unfinished;
432 if (bitD->ptr - nbBytes < bitD->start) {
436 if (bitD->ptr - nbBytes < bitD->start) {
433 nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */
437 nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */
434 result = BIT_DStream_endOfBuffer;
438 result = BIT_DStream_endOfBuffer;
435 }
439 }
436 bitD->ptr -= nbBytes;
440 bitD->ptr -= nbBytes;
437 bitD->bitsConsumed -= nbBytes*8;
441 bitD->bitsConsumed -= nbBytes*8;
438 bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
442 bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD->bitContainer), otherwise bitD->ptr == bitD->start */
439 return result;
443 return result;
440 }
444 }
441 }
445 }
442
446
443 /*! BIT_endOfDStream() :
447 /*! BIT_endOfDStream() :
444 * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
448 * @return : 1 if DStream has _exactly_ reached its end (all bits consumed).
445 */
449 */
446 MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
450 MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
447 {
451 {
448 return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
452 return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
449 }
453 }
450
454
451 #if defined (__cplusplus)
455 #if defined (__cplusplus)
452 }
456 }
453 #endif
457 #endif
454
458
455 #endif /* BITSTREAM_H_MODULE */
459 #endif /* BITSTREAM_H_MODULE */
@@ -1,140 +1,147 b''
1 /*
1 /*
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This source code is licensed under both the BSD-style license (found in the
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
8 * You may select, at your option, one of the above-listed licenses.
9 */
9 */
10
10
11 #ifndef ZSTD_COMPILER_H
11 #ifndef ZSTD_COMPILER_H
12 #define ZSTD_COMPILER_H
12 #define ZSTD_COMPILER_H
13
13
14 /*-*******************************************************
14 /*-*******************************************************
15 * Compiler specifics
15 * Compiler specifics
16 *********************************************************/
16 *********************************************************/
17 /* force inlining */
17 /* force inlining */
18
18
19 #if !defined(ZSTD_NO_INLINE)
19 #if !defined(ZSTD_NO_INLINE)
20 #if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
20 #if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
21 # define INLINE_KEYWORD inline
21 # define INLINE_KEYWORD inline
22 #else
22 #else
23 # define INLINE_KEYWORD
23 # define INLINE_KEYWORD
24 #endif
24 #endif
25
25
26 #if defined(__GNUC__)
26 #if defined(__GNUC__) || defined(__ICCARM__)
27 # define FORCE_INLINE_ATTR __attribute__((always_inline))
27 # define FORCE_INLINE_ATTR __attribute__((always_inline))
28 #elif defined(_MSC_VER)
28 #elif defined(_MSC_VER)
29 # define FORCE_INLINE_ATTR __forceinline
29 # define FORCE_INLINE_ATTR __forceinline
30 #else
30 #else
31 # define FORCE_INLINE_ATTR
31 # define FORCE_INLINE_ATTR
32 #endif
32 #endif
33
33
34 #else
34 #else
35
35
36 #define INLINE_KEYWORD
36 #define INLINE_KEYWORD
37 #define FORCE_INLINE_ATTR
37 #define FORCE_INLINE_ATTR
38
38
39 #endif
39 #endif
40
40
41 /**
41 /**
42 * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
42 * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant
43 * parameters. They must be inlined for the compiler to elimininate the constant
43 * parameters. They must be inlined for the compiler to eliminate the constant
44 * branches.
44 * branches.
45 */
45 */
46 #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
46 #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
47 /**
47 /**
48 * HINT_INLINE is used to help the compiler generate better code. It is *not*
48 * HINT_INLINE is used to help the compiler generate better code. It is *not*
49 * used for "templates", so it can be tweaked based on the compilers
49 * used for "templates", so it can be tweaked based on the compilers
50 * performance.
50 * performance.
51 *
51 *
52 * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the
52 * gcc-4.8 and gcc-4.9 have been shown to benefit from leaving off the
53 * always_inline attribute.
53 * always_inline attribute.
54 *
54 *
55 * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline
55 * clang up to 5.0.0 (trunk) benefit tremendously from the always_inline
56 * attribute.
56 * attribute.
57 */
57 */
58 #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
58 #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ >= 8 && __GNUC__ < 5
59 # define HINT_INLINE static INLINE_KEYWORD
59 # define HINT_INLINE static INLINE_KEYWORD
60 #else
60 #else
61 # define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
61 # define HINT_INLINE static INLINE_KEYWORD FORCE_INLINE_ATTR
62 #endif
62 #endif
63
63
64 /* force no inlining */
64 /* force no inlining */
65 #ifdef _MSC_VER
65 #ifdef _MSC_VER
66 # define FORCE_NOINLINE static __declspec(noinline)
66 # define FORCE_NOINLINE static __declspec(noinline)
67 #else
67 #else
68 # ifdef __GNUC__
68 # if defined(__GNUC__) || defined(__ICCARM__)
69 # define FORCE_NOINLINE static __attribute__((__noinline__))
69 # define FORCE_NOINLINE static __attribute__((__noinline__))
70 # else
70 # else
71 # define FORCE_NOINLINE static
71 # define FORCE_NOINLINE static
72 # endif
72 # endif
73 #endif
73 #endif
74
74
75 /* target attribute */
75 /* target attribute */
76 #ifndef __has_attribute
76 #ifndef __has_attribute
77 #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
77 #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */
78 #endif
78 #endif
79 #if defined(__GNUC__)
79 #if defined(__GNUC__) || defined(__ICCARM__)
80 # define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
80 # define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
81 #else
81 #else
82 # define TARGET_ATTRIBUTE(target)
82 # define TARGET_ATTRIBUTE(target)
83 #endif
83 #endif
84
84
85 /* Enable runtime BMI2 dispatch based on the CPU.
85 /* Enable runtime BMI2 dispatch based on the CPU.
86 * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
86 * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
87 */
87 */
88 #ifndef DYNAMIC_BMI2
88 #ifndef DYNAMIC_BMI2
89 #if ((defined(__clang__) && __has_attribute(__target__)) \
89 #if ((defined(__clang__) && __has_attribute(__target__)) \
90 || (defined(__GNUC__) \
90 || (defined(__GNUC__) \
91 && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
91 && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
92 && (defined(__x86_64__) || defined(_M_X86)) \
92 && (defined(__x86_64__) || defined(_M_X86)) \
93 && !defined(__BMI2__)
93 && !defined(__BMI2__)
94 # define DYNAMIC_BMI2 1
94 # define DYNAMIC_BMI2 1
95 #else
95 #else
96 # define DYNAMIC_BMI2 0
96 # define DYNAMIC_BMI2 0
97 #endif
97 #endif
98 #endif
98 #endif
99
99
100 /* prefetch
100 /* prefetch
101 * can be disabled, by declaring NO_PREFETCH build macro */
101 * can be disabled, by declaring NO_PREFETCH build macro */
102 #if defined(NO_PREFETCH)
102 #if defined(NO_PREFETCH)
103 # define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
103 # define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
104 # define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
104 # define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
105 #else
105 #else
106 # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
106 # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
107 # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
107 # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
108 # define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
108 # define PREFETCH_L1(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
109 # define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
109 # define PREFETCH_L2(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
110 # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
110 # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
111 # define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
111 # define PREFETCH_L1(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
112 # define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
112 # define PREFETCH_L2(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
113 # else
113 # else
114 # define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
114 # define PREFETCH_L1(ptr) (void)(ptr) /* disabled */
115 # define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
115 # define PREFETCH_L2(ptr) (void)(ptr) /* disabled */
116 # endif
116 # endif
117 #endif /* NO_PREFETCH */
117 #endif /* NO_PREFETCH */
118
118
119 #define CACHELINE_SIZE 64
119 #define CACHELINE_SIZE 64
120
120
121 #define PREFETCH_AREA(p, s) { \
121 #define PREFETCH_AREA(p, s) { \
122 const char* const _ptr = (const char*)(p); \
122 const char* const _ptr = (const char*)(p); \
123 size_t const _size = (size_t)(s); \
123 size_t const _size = (size_t)(s); \
124 size_t _pos; \
124 size_t _pos; \
125 for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
125 for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
126 PREFETCH_L2(_ptr + _pos); \
126 PREFETCH_L2(_ptr + _pos); \
127 } \
127 } \
128 }
128 }
129
129
130 /* vectorization */
131 #if !defined(__clang__) && defined(__GNUC__)
132 # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
133 #else
134 # define DONT_VECTORIZE
135 #endif
136
130 /* disable warnings */
137 /* disable warnings */
131 #ifdef _MSC_VER /* Visual Studio */
138 #ifdef _MSC_VER /* Visual Studio */
132 # include <intrin.h> /* For Visual 2005 */
139 # include <intrin.h> /* For Visual 2005 */
133 # pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */
140 # pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */
134 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
141 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
135 # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
142 # pragma warning(disable : 4204) /* disable: C4204: non-constant aggregate initializer */
136 # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
143 # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
137 # pragma warning(disable : 4324) /* disable: C4324: padded structure */
144 # pragma warning(disable : 4324) /* disable: C4324: padded structure */
138 #endif
145 #endif
139
146
140 #endif /* ZSTD_COMPILER_H */
147 #endif /* ZSTD_COMPILER_H */
@@ -1,708 +1,708 b''
1 /* ******************************************************************
1 /* ******************************************************************
2 FSE : Finite State Entropy codec
2 FSE : Finite State Entropy codec
3 Public Prototypes declaration
3 Public Prototypes declaration
4 Copyright (C) 2013-2016, Yann Collet.
4 Copyright (C) 2013-2016, Yann Collet.
5
5
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
7
8 Redistribution and use in source and binary forms, with or without
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
9 modification, are permitted provided that the following conditions are
10 met:
10 met:
11
11
12 * Redistributions of source code must retain the above copyright
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
16 in the documentation and/or other materials provided with the
17 distribution.
17 distribution.
18
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
30
31 You can contact the author at :
31 You can contact the author at :
32 - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 ****************************************************************** */
33 ****************************************************************** */
34
34
35 #if defined (__cplusplus)
35 #if defined (__cplusplus)
36 extern "C" {
36 extern "C" {
37 #endif
37 #endif
38
38
39 #ifndef FSE_H
39 #ifndef FSE_H
40 #define FSE_H
40 #define FSE_H
41
41
42
42
43 /*-*****************************************
43 /*-*****************************************
44 * Dependencies
44 * Dependencies
45 ******************************************/
45 ******************************************/
46 #include <stddef.h> /* size_t, ptrdiff_t */
46 #include <stddef.h> /* size_t, ptrdiff_t */
47
47
48
48
49 /*-*****************************************
49 /*-*****************************************
50 * FSE_PUBLIC_API : control library symbols visibility
50 * FSE_PUBLIC_API : control library symbols visibility
51 ******************************************/
51 ******************************************/
52 #if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
52 #if defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) && defined(__GNUC__) && (__GNUC__ >= 4)
53 # define FSE_PUBLIC_API __attribute__ ((visibility ("default")))
53 # define FSE_PUBLIC_API __attribute__ ((visibility ("default")))
54 #elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */
54 #elif defined(FSE_DLL_EXPORT) && (FSE_DLL_EXPORT==1) /* Visual expected */
55 # define FSE_PUBLIC_API __declspec(dllexport)
55 # define FSE_PUBLIC_API __declspec(dllexport)
56 #elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
56 #elif defined(FSE_DLL_IMPORT) && (FSE_DLL_IMPORT==1)
57 # define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
57 # define FSE_PUBLIC_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/
58 #else
58 #else
59 # define FSE_PUBLIC_API
59 # define FSE_PUBLIC_API
60 #endif
60 #endif
61
61
62 /*------ Version ------*/
62 /*------ Version ------*/
63 #define FSE_VERSION_MAJOR 0
63 #define FSE_VERSION_MAJOR 0
64 #define FSE_VERSION_MINOR 9
64 #define FSE_VERSION_MINOR 9
65 #define FSE_VERSION_RELEASE 0
65 #define FSE_VERSION_RELEASE 0
66
66
67 #define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE
67 #define FSE_LIB_VERSION FSE_VERSION_MAJOR.FSE_VERSION_MINOR.FSE_VERSION_RELEASE
68 #define FSE_QUOTE(str) #str
68 #define FSE_QUOTE(str) #str
69 #define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str)
69 #define FSE_EXPAND_AND_QUOTE(str) FSE_QUOTE(str)
70 #define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION)
70 #define FSE_VERSION_STRING FSE_EXPAND_AND_QUOTE(FSE_LIB_VERSION)
71
71
72 #define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE)
72 #define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE)
73 FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
73 FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
74
74
75
75
76 /*-****************************************
76 /*-****************************************
77 * FSE simple functions
77 * FSE simple functions
78 ******************************************/
78 ******************************************/
79 /*! FSE_compress() :
79 /*! FSE_compress() :
80 Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
80 Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
81 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
81 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
82 @return : size of compressed data (<= dstCapacity).
82 @return : size of compressed data (<= dstCapacity).
83 Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
83 Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
84 if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
84 if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
85 if FSE_isError(return), compression failed (more details using FSE_getErrorName())
85 if FSE_isError(return), compression failed (more details using FSE_getErrorName())
86 */
86 */
87 FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
87 FSE_PUBLIC_API size_t FSE_compress(void* dst, size_t dstCapacity,
88 const void* src, size_t srcSize);
88 const void* src, size_t srcSize);
89
89
90 /*! FSE_decompress():
90 /*! FSE_decompress():
91 Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
91 Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
92 into already allocated destination buffer 'dst', of size 'dstCapacity'.
92 into already allocated destination buffer 'dst', of size 'dstCapacity'.
93 @return : size of regenerated data (<= maxDstSize),
93 @return : size of regenerated data (<= maxDstSize),
94 or an error code, which can be tested using FSE_isError() .
94 or an error code, which can be tested using FSE_isError() .
95
95
96 ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
96 ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
97 Why ? : making this distinction requires a header.
97 Why ? : making this distinction requires a header.
98 Header management is intentionally delegated to the user layer, which can better manage special cases.
98 Header management is intentionally delegated to the user layer, which can better manage special cases.
99 */
99 */
100 FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity,
100 FSE_PUBLIC_API size_t FSE_decompress(void* dst, size_t dstCapacity,
101 const void* cSrc, size_t cSrcSize);
101 const void* cSrc, size_t cSrcSize);
102
102
103
103
104 /*-*****************************************
104 /*-*****************************************
105 * Tool functions
105 * Tool functions
106 ******************************************/
106 ******************************************/
107 FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */
107 FSE_PUBLIC_API size_t FSE_compressBound(size_t size); /* maximum compressed size */
108
108
109 /* Error Management */
109 /* Error Management */
110 FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */
110 FSE_PUBLIC_API unsigned FSE_isError(size_t code); /* tells if a return value is an error code */
111 FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
111 FSE_PUBLIC_API const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
112
112
113
113
114 /*-*****************************************
114 /*-*****************************************
115 * FSE advanced functions
115 * FSE advanced functions
116 ******************************************/
116 ******************************************/
117 /*! FSE_compress2() :
117 /*! FSE_compress2() :
118 Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
118 Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
119 Both parameters can be defined as '0' to mean : use default value
119 Both parameters can be defined as '0' to mean : use default value
120 @return : size of compressed data
120 @return : size of compressed data
121 Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
121 Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
122 if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
122 if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
123 if FSE_isError(return), it's an error code.
123 if FSE_isError(return), it's an error code.
124 */
124 */
125 FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
125 FSE_PUBLIC_API size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
126
126
127
127
128 /*-*****************************************
128 /*-*****************************************
129 * FSE detailed API
129 * FSE detailed API
130 ******************************************/
130 ******************************************/
131 /*!
131 /*!
132 FSE_compress() does the following:
132 FSE_compress() does the following:
133 1. count symbol occurrence from source[] into table count[] (see hist.h)
133 1. count symbol occurrence from source[] into table count[] (see hist.h)
134 2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
134 2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
135 3. save normalized counters to memory buffer using writeNCount()
135 3. save normalized counters to memory buffer using writeNCount()
136 4. build encoding table 'CTable' from normalized counters
136 4. build encoding table 'CTable' from normalized counters
137 5. encode the data stream using encoding table 'CTable'
137 5. encode the data stream using encoding table 'CTable'
138
138
139 FSE_decompress() does the following:
139 FSE_decompress() does the following:
140 1. read normalized counters with readNCount()
140 1. read normalized counters with readNCount()
141 2. build decoding table 'DTable' from normalized counters
141 2. build decoding table 'DTable' from normalized counters
142 3. decode the data stream using decoding table 'DTable'
142 3. decode the data stream using decoding table 'DTable'
143
143
144 The following API allows targeting specific sub-functions for advanced tasks.
144 The following API allows targeting specific sub-functions for advanced tasks.
145 For example, it's possible to compress several blocks using the same 'CTable',
145 For example, it's possible to compress several blocks using the same 'CTable',
146 or to save and provide normalized distribution using external method.
146 or to save and provide normalized distribution using external method.
147 */
147 */
148
148
149 /* *** COMPRESSION *** */
149 /* *** COMPRESSION *** */
150
150
151 /*! FSE_optimalTableLog():
151 /*! FSE_optimalTableLog():
152 dynamically downsize 'tableLog' when conditions are met.
152 dynamically downsize 'tableLog' when conditions are met.
153 It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
153 It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
154 @return : recommended tableLog (necessarily <= 'maxTableLog') */
154 @return : recommended tableLog (necessarily <= 'maxTableLog') */
155 FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
155 FSE_PUBLIC_API unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
156
156
157 /*! FSE_normalizeCount():
157 /*! FSE_normalizeCount():
158 normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
158 normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
159 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
159 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
160 @return : tableLog,
160 @return : tableLog,
161 or an errorCode, which can be tested using FSE_isError() */
161 or an errorCode, which can be tested using FSE_isError() */
162 FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
162 FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
163 const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
163 const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
164
164
165 /*! FSE_NCountWriteBound():
165 /*! FSE_NCountWriteBound():
166 Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
166 Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
167 Typically useful for allocation purpose. */
167 Typically useful for allocation purpose. */
168 FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
168 FSE_PUBLIC_API size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
169
169
170 /*! FSE_writeNCount():
170 /*! FSE_writeNCount():
171 Compactly save 'normalizedCounter' into 'buffer'.
171 Compactly save 'normalizedCounter' into 'buffer'.
172 @return : size of the compressed table,
172 @return : size of the compressed table,
173 or an errorCode, which can be tested using FSE_isError(). */
173 or an errorCode, which can be tested using FSE_isError(). */
174 FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
174 FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
175 const short* normalizedCounter,
175 const short* normalizedCounter,
176 unsigned maxSymbolValue, unsigned tableLog);
176 unsigned maxSymbolValue, unsigned tableLog);
177
177
178 /*! Constructor and Destructor of FSE_CTable.
178 /*! Constructor and Destructor of FSE_CTable.
179 Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
179 Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
180 typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
180 typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
181 FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
181 FSE_PUBLIC_API FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog);
182 FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct);
182 FSE_PUBLIC_API void FSE_freeCTable (FSE_CTable* ct);
183
183
184 /*! FSE_buildCTable():
184 /*! FSE_buildCTable():
185 Builds `ct`, which must be already allocated, using FSE_createCTable().
185 Builds `ct`, which must be already allocated, using FSE_createCTable().
186 @return : 0, or an errorCode, which can be tested using FSE_isError() */
186 @return : 0, or an errorCode, which can be tested using FSE_isError() */
187 FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
187 FSE_PUBLIC_API size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
188
188
189 /*! FSE_compress_usingCTable():
189 /*! FSE_compress_usingCTable():
190 Compress `src` using `ct` into `dst` which must be already allocated.
190 Compress `src` using `ct` into `dst` which must be already allocated.
191 @return : size of compressed data (<= `dstCapacity`),
191 @return : size of compressed data (<= `dstCapacity`),
192 or 0 if compressed data could not fit into `dst`,
192 or 0 if compressed data could not fit into `dst`,
193 or an errorCode, which can be tested using FSE_isError() */
193 or an errorCode, which can be tested using FSE_isError() */
194 FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
194 FSE_PUBLIC_API size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
195
195
196 /*!
196 /*!
197 Tutorial :
197 Tutorial :
198 ----------
198 ----------
199 The first step is to count all symbols. FSE_count() does this job very fast.
199 The first step is to count all symbols. FSE_count() does this job very fast.
200 Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
200 Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
201 'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
201 'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
202 maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
202 maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
203 FSE_count() will return the number of occurrence of the most frequent symbol.
203 FSE_count() will return the number of occurrence of the most frequent symbol.
204 This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
204 This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
205 If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
205 If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
206
206
207 The next step is to normalize the frequencies.
207 The next step is to normalize the frequencies.
208 FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
208 FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
209 It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
209 It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
210 You can use 'tableLog'==0 to mean "use default tableLog value".
210 You can use 'tableLog'==0 to mean "use default tableLog value".
211 If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
211 If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
212 which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
212 which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
213
213
214 The result of FSE_normalizeCount() will be saved into a table,
214 The result of FSE_normalizeCount() will be saved into a table,
215 called 'normalizedCounter', which is a table of signed short.
215 called 'normalizedCounter', which is a table of signed short.
216 'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
216 'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
217 The return value is tableLog if everything proceeded as expected.
217 The return value is tableLog if everything proceeded as expected.
218 It is 0 if there is a single symbol within distribution.
218 It is 0 if there is a single symbol within distribution.
219 If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
219 If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
220
220
221 'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
221 'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
222 'buffer' must be already allocated.
222 'buffer' must be already allocated.
223 For guaranteed success, buffer size must be at least FSE_headerBound().
223 For guaranteed success, buffer size must be at least FSE_headerBound().
224 The result of the function is the number of bytes written into 'buffer'.
224 The result of the function is the number of bytes written into 'buffer'.
225 If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
225 If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
226
226
227 'normalizedCounter' can then be used to create the compression table 'CTable'.
227 'normalizedCounter' can then be used to create the compression table 'CTable'.
228 The space required by 'CTable' must be already allocated, using FSE_createCTable().
228 The space required by 'CTable' must be already allocated, using FSE_createCTable().
229 You can then use FSE_buildCTable() to fill 'CTable'.
229 You can then use FSE_buildCTable() to fill 'CTable'.
230 If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
230 If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
231
231
232 'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
232 'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
233 Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
233 Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
234 The function returns the size of compressed data (without header), necessarily <= `dstCapacity`.
234 The function returns the size of compressed data (without header), necessarily <= `dstCapacity`.
235 If it returns '0', compressed data could not fit into 'dst'.
235 If it returns '0', compressed data could not fit into 'dst'.
236 If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
236 If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
237 */
237 */
238
238
239
239
240 /* *** DECOMPRESSION *** */
240 /* *** DECOMPRESSION *** */
241
241
242 /*! FSE_readNCount():
242 /*! FSE_readNCount():
243 Read compactly saved 'normalizedCounter' from 'rBuffer'.
243 Read compactly saved 'normalizedCounter' from 'rBuffer'.
244 @return : size read from 'rBuffer',
244 @return : size read from 'rBuffer',
245 or an errorCode, which can be tested using FSE_isError().
245 or an errorCode, which can be tested using FSE_isError().
246 maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
246 maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
247 FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
247 FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
248 unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
248 unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
249 const void* rBuffer, size_t rBuffSize);
249 const void* rBuffer, size_t rBuffSize);
250
250
251 /*! Constructor and Destructor of FSE_DTable.
251 /*! Constructor and Destructor of FSE_DTable.
252 Note that its size depends on 'tableLog' */
252 Note that its size depends on 'tableLog' */
253 typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
253 typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
254 FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
254 FSE_PUBLIC_API FSE_DTable* FSE_createDTable(unsigned tableLog);
255 FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt);
255 FSE_PUBLIC_API void FSE_freeDTable(FSE_DTable* dt);
256
256
257 /*! FSE_buildDTable():
257 /*! FSE_buildDTable():
258 Builds 'dt', which must be already allocated, using FSE_createDTable().
258 Builds 'dt', which must be already allocated, using FSE_createDTable().
259 return : 0, or an errorCode, which can be tested using FSE_isError() */
259 return : 0, or an errorCode, which can be tested using FSE_isError() */
260 FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
260 FSE_PUBLIC_API size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
261
261
262 /*! FSE_decompress_usingDTable():
262 /*! FSE_decompress_usingDTable():
263 Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
263 Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
264 into `dst` which must be already allocated.
264 into `dst` which must be already allocated.
265 @return : size of regenerated data (necessarily <= `dstCapacity`),
265 @return : size of regenerated data (necessarily <= `dstCapacity`),
266 or an errorCode, which can be tested using FSE_isError() */
266 or an errorCode, which can be tested using FSE_isError() */
267 FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
267 FSE_PUBLIC_API size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
268
268
269 /*!
269 /*!
270 Tutorial :
270 Tutorial :
271 ----------
271 ----------
272 (Note : these functions only decompress FSE-compressed blocks.
272 (Note : these functions only decompress FSE-compressed blocks.
273 If block is uncompressed, use memcpy() instead
273 If block is uncompressed, use memcpy() instead
274 If block is a single repeated byte, use memset() instead )
274 If block is a single repeated byte, use memset() instead )
275
275
276 The first step is to obtain the normalized frequencies of symbols.
276 The first step is to obtain the normalized frequencies of symbols.
277 This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
277 This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
278 'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
278 'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
279 In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
279 In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
280 or size the table to handle worst case situations (typically 256).
280 or size the table to handle worst case situations (typically 256).
281 FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
281 FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
282 The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
282 The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
283 Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
283 Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
284 If there is an error, the function will return an error code, which can be tested using FSE_isError().
284 If there is an error, the function will return an error code, which can be tested using FSE_isError().
285
285
286 The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
286 The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
287 This is performed by the function FSE_buildDTable().
287 This is performed by the function FSE_buildDTable().
288 The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
288 The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
289 If there is an error, the function will return an error code, which can be tested using FSE_isError().
289 If there is an error, the function will return an error code, which can be tested using FSE_isError().
290
290
291 `FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable().
291 `FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable().
292 `cSrcSize` must be strictly correct, otherwise decompression will fail.
292 `cSrcSize` must be strictly correct, otherwise decompression will fail.
293 FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
293 FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
294 If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
294 If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
295 */
295 */
296
296
297 #endif /* FSE_H */
297 #endif /* FSE_H */
298
298
299 #if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
299 #if defined(FSE_STATIC_LINKING_ONLY) && !defined(FSE_H_FSE_STATIC_LINKING_ONLY)
300 #define FSE_H_FSE_STATIC_LINKING_ONLY
300 #define FSE_H_FSE_STATIC_LINKING_ONLY
301
301
302 /* *** Dependency *** */
302 /* *** Dependency *** */
303 #include "bitstream.h"
303 #include "bitstream.h"
304
304
305
305
306 /* *****************************************
306 /* *****************************************
307 * Static allocation
307 * Static allocation
308 *******************************************/
308 *******************************************/
309 /* FSE buffer bounds */
309 /* FSE buffer bounds */
310 #define FSE_NCOUNTBOUND 512
310 #define FSE_NCOUNTBOUND 512
311 #define FSE_BLOCKBOUND(size) (size + (size>>7))
311 #define FSE_BLOCKBOUND(size) (size + (size>>7))
312 #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
312 #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
313
313
314 /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
314 /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */
315 #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
315 #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
316 #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
316 #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
317
317
318 /* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
318 /* or use the size to malloc() space directly. Pay attention to alignment restrictions though */
319 #define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
319 #define FSE_CTABLE_SIZE(maxTableLog, maxSymbolValue) (FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) * sizeof(FSE_CTable))
320 #define FSE_DTABLE_SIZE(maxTableLog) (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable))
320 #define FSE_DTABLE_SIZE(maxTableLog) (FSE_DTABLE_SIZE_U32(maxTableLog) * sizeof(FSE_DTable))
321
321
322
322
323 /* *****************************************
323 /* *****************************************
324 * FSE advanced API
324 * FSE advanced API
325 ***************************************** */
325 ***************************************** */
326
326
327 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
327 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
328 /**< same as FSE_optimalTableLog(), which used `minus==2` */
328 /**< same as FSE_optimalTableLog(), which used `minus==2` */
329
329
330 /* FSE_compress_wksp() :
330 /* FSE_compress_wksp() :
331 * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
331 * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
332 * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
332 * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable.
333 */
333 */
334 #define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
334 #define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + ((maxTableLog > 12) ? (1 << (maxTableLog - 2)) : 1024) )
335 size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
335 size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
336
336
337 size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
337 size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
338 /**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
338 /**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */
339
339
340 size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
340 size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
341 /**< build a fake FSE_CTable, designed to compress always the same symbolValue */
341 /**< build a fake FSE_CTable, designed to compress always the same symbolValue */
342
342
343 /* FSE_buildCTable_wksp() :
343 /* FSE_buildCTable_wksp() :
344 * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
344 * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
345 * `wkspSize` must be >= `(1<<tableLog)`.
345 * `wkspSize` must be >= `(1<<tableLog)`.
346 */
346 */
347 size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
347 size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
348
348
349 size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
349 size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
350 /**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
350 /**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */
351
351
352 size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
352 size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
353 /**< build a fake FSE_DTable, designed to always generate the same symbolValue */
353 /**< build a fake FSE_DTable, designed to always generate the same symbolValue */
354
354
355 size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
355 size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog);
356 /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
356 /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */
357
357
358 typedef enum {
358 typedef enum {
359 FSE_repeat_none, /**< Cannot use the previous table */
359 FSE_repeat_none, /**< Cannot use the previous table */
360 FSE_repeat_check, /**< Can use the previous table but it must be checked */
360 FSE_repeat_check, /**< Can use the previous table but it must be checked */
361 FSE_repeat_valid /**< Can use the previous table and it is asumed to be valid */
361 FSE_repeat_valid /**< Can use the previous table and it is assumed to be valid */
362 } FSE_repeat;
362 } FSE_repeat;
363
363
364 /* *****************************************
364 /* *****************************************
365 * FSE symbol compression API
365 * FSE symbol compression API
366 *******************************************/
366 *******************************************/
367 /*!
367 /*!
368 This API consists of small unitary functions, which highly benefit from being inlined.
368 This API consists of small unitary functions, which highly benefit from being inlined.
369 Hence their body are included in next section.
369 Hence their body are included in next section.
370 */
370 */
371 typedef struct {
371 typedef struct {
372 ptrdiff_t value;
372 ptrdiff_t value;
373 const void* stateTable;
373 const void* stateTable;
374 const void* symbolTT;
374 const void* symbolTT;
375 unsigned stateLog;
375 unsigned stateLog;
376 } FSE_CState_t;
376 } FSE_CState_t;
377
377
378 static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct);
378 static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct);
379
379
380 static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol);
380 static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol);
381
381
382 static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr);
382 static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr);
383
383
384 /**<
384 /**<
385 These functions are inner components of FSE_compress_usingCTable().
385 These functions are inner components of FSE_compress_usingCTable().
386 They allow the creation of custom streams, mixing multiple tables and bit sources.
386 They allow the creation of custom streams, mixing multiple tables and bit sources.
387
387
388 A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
388 A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
389 So the first symbol you will encode is the last you will decode, like a LIFO stack.
389 So the first symbol you will encode is the last you will decode, like a LIFO stack.
390
390
391 You will need a few variables to track your CStream. They are :
391 You will need a few variables to track your CStream. They are :
392
392
393 FSE_CTable ct; // Provided by FSE_buildCTable()
393 FSE_CTable ct; // Provided by FSE_buildCTable()
394 BIT_CStream_t bitStream; // bitStream tracking structure
394 BIT_CStream_t bitStream; // bitStream tracking structure
395 FSE_CState_t state; // State tracking structure (can have several)
395 FSE_CState_t state; // State tracking structure (can have several)
396
396
397
397
398 The first thing to do is to init bitStream and state.
398 The first thing to do is to init bitStream and state.
399 size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize);
399 size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize);
400 FSE_initCState(&state, ct);
400 FSE_initCState(&state, ct);
401
401
402 Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError();
402 Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError();
403 You can then encode your input data, byte after byte.
403 You can then encode your input data, byte after byte.
404 FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time.
404 FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time.
405 Remember decoding will be done in reverse direction.
405 Remember decoding will be done in reverse direction.
406 FSE_encodeByte(&bitStream, &state, symbol);
406 FSE_encodeByte(&bitStream, &state, symbol);
407
407
408 At any time, you can also add any bit sequence.
408 At any time, you can also add any bit sequence.
409 Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
409 Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
410 BIT_addBits(&bitStream, bitField, nbBits);
410 BIT_addBits(&bitStream, bitField, nbBits);
411
411
412 The above methods don't commit data to memory, they just store it into local register, for speed.
412 The above methods don't commit data to memory, they just store it into local register, for speed.
413 Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
413 Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
414 Writing data to memory is a manual operation, performed by the flushBits function.
414 Writing data to memory is a manual operation, performed by the flushBits function.
415 BIT_flushBits(&bitStream);
415 BIT_flushBits(&bitStream);
416
416
417 Your last FSE encoding operation shall be to flush your last state value(s).
417 Your last FSE encoding operation shall be to flush your last state value(s).
418 FSE_flushState(&bitStream, &state);
418 FSE_flushState(&bitStream, &state);
419
419
420 Finally, you must close the bitStream.
420 Finally, you must close the bitStream.
421 The function returns the size of CStream in bytes.
421 The function returns the size of CStream in bytes.
422 If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible)
422 If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible)
423 If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
423 If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
424 size_t size = BIT_closeCStream(&bitStream);
424 size_t size = BIT_closeCStream(&bitStream);
425 */
425 */
426
426
427
427
428 /* *****************************************
428 /* *****************************************
429 * FSE symbol decompression API
429 * FSE symbol decompression API
430 *******************************************/
430 *******************************************/
431 typedef struct {
431 typedef struct {
432 size_t state;
432 size_t state;
433 const void* table; /* precise table may vary, depending on U16 */
433 const void* table; /* precise table may vary, depending on U16 */
434 } FSE_DState_t;
434 } FSE_DState_t;
435
435
436
436
437 static void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
437 static void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
438
438
439 static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
439 static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
440
440
441 static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
441 static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
442
442
443 /**<
443 /**<
444 Let's now decompose FSE_decompress_usingDTable() into its unitary components.
444 Let's now decompose FSE_decompress_usingDTable() into its unitary components.
445 You will decode FSE-encoded symbols from the bitStream,
445 You will decode FSE-encoded symbols from the bitStream,
446 and also any other bitFields you put in, **in reverse order**.
446 and also any other bitFields you put in, **in reverse order**.
447
447
448 You will need a few variables to track your bitStream. They are :
448 You will need a few variables to track your bitStream. They are :
449
449
450 BIT_DStream_t DStream; // Stream context
450 BIT_DStream_t DStream; // Stream context
451 FSE_DState_t DState; // State context. Multiple ones are possible
451 FSE_DState_t DState; // State context. Multiple ones are possible
452 FSE_DTable* DTablePtr; // Decoding table, provided by FSE_buildDTable()
452 FSE_DTable* DTablePtr; // Decoding table, provided by FSE_buildDTable()
453
453
454 The first thing to do is to init the bitStream.
454 The first thing to do is to init the bitStream.
455 errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
455 errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
456
456
457 You should then retrieve your initial state(s)
457 You should then retrieve your initial state(s)
458 (in reverse flushing order if you have several ones) :
458 (in reverse flushing order if you have several ones) :
459 errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
459 errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
460
460
461 You can then decode your data, symbol after symbol.
461 You can then decode your data, symbol after symbol.
462 For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
462 For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
463 Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
463 Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
464 unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
464 unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
465
465
466 You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
466 You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
467 Note : maximum allowed nbBits is 25, for 32-bits compatibility
467 Note : maximum allowed nbBits is 25, for 32-bits compatibility
468 size_t bitField = BIT_readBits(&DStream, nbBits);
468 size_t bitField = BIT_readBits(&DStream, nbBits);
469
469
470 All above operations only read from local register (which size depends on size_t).
470 All above operations only read from local register (which size depends on size_t).
471 Refueling the register from memory is manually performed by the reload method.
471 Refueling the register from memory is manually performed by the reload method.
472 endSignal = FSE_reloadDStream(&DStream);
472 endSignal = FSE_reloadDStream(&DStream);
473
473
474 BIT_reloadDStream() result tells if there is still some more data to read from DStream.
474 BIT_reloadDStream() result tells if there is still some more data to read from DStream.
475 BIT_DStream_unfinished : there is still some data left into the DStream.
475 BIT_DStream_unfinished : there is still some data left into the DStream.
476 BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
476 BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
477 BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
477 BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
478 BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
478 BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
479
479
480 When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
480 When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
481 to properly detect the exact end of stream.
481 to properly detect the exact end of stream.
482 After each decoded symbol, check if DStream is fully consumed using this simple test :
482 After each decoded symbol, check if DStream is fully consumed using this simple test :
483 BIT_reloadDStream(&DStream) >= BIT_DStream_completed
483 BIT_reloadDStream(&DStream) >= BIT_DStream_completed
484
484
485 When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
485 When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
486 Checking if DStream has reached its end is performed by :
486 Checking if DStream has reached its end is performed by :
487 BIT_endOfDStream(&DStream);
487 BIT_endOfDStream(&DStream);
488 Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
488 Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
489 FSE_endOfDState(&DState);
489 FSE_endOfDState(&DState);
490 */
490 */
491
491
492
492
493 /* *****************************************
493 /* *****************************************
494 * FSE unsafe API
494 * FSE unsafe API
495 *******************************************/
495 *******************************************/
496 static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
496 static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
497 /* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
497 /* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
498
498
499
499
500 /* *****************************************
500 /* *****************************************
501 * Implementation of inlined functions
501 * Implementation of inlined functions
502 *******************************************/
502 *******************************************/
503 typedef struct {
503 typedef struct {
504 int deltaFindState;
504 int deltaFindState;
505 U32 deltaNbBits;
505 U32 deltaNbBits;
506 } FSE_symbolCompressionTransform; /* total 8 bytes */
506 } FSE_symbolCompressionTransform; /* total 8 bytes */
507
507
508 MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
508 MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
509 {
509 {
510 const void* ptr = ct;
510 const void* ptr = ct;
511 const U16* u16ptr = (const U16*) ptr;
511 const U16* u16ptr = (const U16*) ptr;
512 const U32 tableLog = MEM_read16(ptr);
512 const U32 tableLog = MEM_read16(ptr);
513 statePtr->value = (ptrdiff_t)1<<tableLog;
513 statePtr->value = (ptrdiff_t)1<<tableLog;
514 statePtr->stateTable = u16ptr+2;
514 statePtr->stateTable = u16ptr+2;
515 statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1);
515 statePtr->symbolTT = ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1);
516 statePtr->stateLog = tableLog;
516 statePtr->stateLog = tableLog;
517 }
517 }
518
518
519
519
520 /*! FSE_initCState2() :
520 /*! FSE_initCState2() :
521 * Same as FSE_initCState(), but the first symbol to include (which will be the last to be read)
521 * Same as FSE_initCState(), but the first symbol to include (which will be the last to be read)
522 * uses the smallest state value possible, saving the cost of this symbol */
522 * uses the smallest state value possible, saving the cost of this symbol */
523 MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol)
523 MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol)
524 {
524 {
525 FSE_initCState(statePtr, ct);
525 FSE_initCState(statePtr, ct);
526 { const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
526 { const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
527 const U16* stateTable = (const U16*)(statePtr->stateTable);
527 const U16* stateTable = (const U16*)(statePtr->stateTable);
528 U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16);
528 U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16);
529 statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits;
529 statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits;
530 statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
530 statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
531 }
531 }
532 }
532 }
533
533
534 MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol)
534 MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, unsigned symbol)
535 {
535 {
536 FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
536 FSE_symbolCompressionTransform const symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
537 const U16* const stateTable = (const U16*)(statePtr->stateTable);
537 const U16* const stateTable = (const U16*)(statePtr->stateTable);
538 U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
538 U32 const nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
539 BIT_addBits(bitC, statePtr->value, nbBitsOut);
539 BIT_addBits(bitC, statePtr->value, nbBitsOut);
540 statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
540 statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
541 }
541 }
542
542
543 MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
543 MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
544 {
544 {
545 BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
545 BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
546 BIT_flushBits(bitC);
546 BIT_flushBits(bitC);
547 }
547 }
548
548
549
549
550 /* FSE_getMaxNbBits() :
550 /* FSE_getMaxNbBits() :
551 * Approximate maximum cost of a symbol, in bits.
551 * Approximate maximum cost of a symbol, in bits.
552 * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
552 * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
553 * note 1 : assume symbolValue is valid (<= maxSymbolValue)
553 * note 1 : assume symbolValue is valid (<= maxSymbolValue)
554 * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
554 * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
555 MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
555 MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
556 {
556 {
557 const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
557 const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
558 return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16;
558 return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16;
559 }
559 }
560
560
561 /* FSE_bitCost() :
561 /* FSE_bitCost() :
562 * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits)
562 * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits)
563 * note 1 : assume symbolValue is valid (<= maxSymbolValue)
563 * note 1 : assume symbolValue is valid (<= maxSymbolValue)
564 * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
564 * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
565 MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog)
565 MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog)
566 {
566 {
567 const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
567 const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
568 U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16;
568 U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16;
569 U32 const threshold = (minNbBits+1) << 16;
569 U32 const threshold = (minNbBits+1) << 16;
570 assert(tableLog < 16);
570 assert(tableLog < 16);
571 assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */
571 assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */
572 { U32 const tableSize = 1 << tableLog;
572 { U32 const tableSize = 1 << tableLog;
573 U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize);
573 U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize);
574 U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */
574 U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */
575 U32 const bitMultiplier = 1 << accuracyLog;
575 U32 const bitMultiplier = 1 << accuracyLog;
576 assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold);
576 assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold);
577 assert(normalizedDeltaFromThreshold <= bitMultiplier);
577 assert(normalizedDeltaFromThreshold <= bitMultiplier);
578 return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold;
578 return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold;
579 }
579 }
580 }
580 }
581
581
582
582
583 /* ====== Decompression ====== */
583 /* ====== Decompression ====== */
584
584
585 typedef struct {
585 typedef struct {
586 U16 tableLog;
586 U16 tableLog;
587 U16 fastMode;
587 U16 fastMode;
588 } FSE_DTableHeader; /* sizeof U32 */
588 } FSE_DTableHeader; /* sizeof U32 */
589
589
590 typedef struct
590 typedef struct
591 {
591 {
592 unsigned short newState;
592 unsigned short newState;
593 unsigned char symbol;
593 unsigned char symbol;
594 unsigned char nbBits;
594 unsigned char nbBits;
595 } FSE_decode_t; /* size == U32 */
595 } FSE_decode_t; /* size == U32 */
596
596
597 MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
597 MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
598 {
598 {
599 const void* ptr = dt;
599 const void* ptr = dt;
600 const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
600 const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
601 DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
601 DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
602 BIT_reloadDStream(bitD);
602 BIT_reloadDStream(bitD);
603 DStatePtr->table = dt + 1;
603 DStatePtr->table = dt + 1;
604 }
604 }
605
605
606 MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr)
606 MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr)
607 {
607 {
608 FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
608 FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
609 return DInfo.symbol;
609 return DInfo.symbol;
610 }
610 }
611
611
612 MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
612 MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
613 {
613 {
614 FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
614 FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
615 U32 const nbBits = DInfo.nbBits;
615 U32 const nbBits = DInfo.nbBits;
616 size_t const lowBits = BIT_readBits(bitD, nbBits);
616 size_t const lowBits = BIT_readBits(bitD, nbBits);
617 DStatePtr->state = DInfo.newState + lowBits;
617 DStatePtr->state = DInfo.newState + lowBits;
618 }
618 }
619
619
620 MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
620 MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
621 {
621 {
622 FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
622 FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
623 U32 const nbBits = DInfo.nbBits;
623 U32 const nbBits = DInfo.nbBits;
624 BYTE const symbol = DInfo.symbol;
624 BYTE const symbol = DInfo.symbol;
625 size_t const lowBits = BIT_readBits(bitD, nbBits);
625 size_t const lowBits = BIT_readBits(bitD, nbBits);
626
626
627 DStatePtr->state = DInfo.newState + lowBits;
627 DStatePtr->state = DInfo.newState + lowBits;
628 return symbol;
628 return symbol;
629 }
629 }
630
630
631 /*! FSE_decodeSymbolFast() :
631 /*! FSE_decodeSymbolFast() :
632 unsafe, only works if no symbol has a probability > 50% */
632 unsafe, only works if no symbol has a probability > 50% */
633 MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
633 MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
634 {
634 {
635 FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
635 FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
636 U32 const nbBits = DInfo.nbBits;
636 U32 const nbBits = DInfo.nbBits;
637 BYTE const symbol = DInfo.symbol;
637 BYTE const symbol = DInfo.symbol;
638 size_t const lowBits = BIT_readBitsFast(bitD, nbBits);
638 size_t const lowBits = BIT_readBitsFast(bitD, nbBits);
639
639
640 DStatePtr->state = DInfo.newState + lowBits;
640 DStatePtr->state = DInfo.newState + lowBits;
641 return symbol;
641 return symbol;
642 }
642 }
643
643
644 MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
644 MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
645 {
645 {
646 return DStatePtr->state == 0;
646 return DStatePtr->state == 0;
647 }
647 }
648
648
649
649
650
650
651 #ifndef FSE_COMMONDEFS_ONLY
651 #ifndef FSE_COMMONDEFS_ONLY
652
652
653 /* **************************************************************
653 /* **************************************************************
654 * Tuning parameters
654 * Tuning parameters
655 ****************************************************************/
655 ****************************************************************/
656 /*!MEMORY_USAGE :
656 /*!MEMORY_USAGE :
657 * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
657 * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
658 * Increasing memory usage improves compression ratio
658 * Increasing memory usage improves compression ratio
659 * Reduced memory usage can improve speed, due to cache effect
659 * Reduced memory usage can improve speed, due to cache effect
660 * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
660 * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
661 #ifndef FSE_MAX_MEMORY_USAGE
661 #ifndef FSE_MAX_MEMORY_USAGE
662 # define FSE_MAX_MEMORY_USAGE 14
662 # define FSE_MAX_MEMORY_USAGE 14
663 #endif
663 #endif
664 #ifndef FSE_DEFAULT_MEMORY_USAGE
664 #ifndef FSE_DEFAULT_MEMORY_USAGE
665 # define FSE_DEFAULT_MEMORY_USAGE 13
665 # define FSE_DEFAULT_MEMORY_USAGE 13
666 #endif
666 #endif
667
667
668 /*!FSE_MAX_SYMBOL_VALUE :
668 /*!FSE_MAX_SYMBOL_VALUE :
669 * Maximum symbol value authorized.
669 * Maximum symbol value authorized.
670 * Required for proper stack allocation */
670 * Required for proper stack allocation */
671 #ifndef FSE_MAX_SYMBOL_VALUE
671 #ifndef FSE_MAX_SYMBOL_VALUE
672 # define FSE_MAX_SYMBOL_VALUE 255
672 # define FSE_MAX_SYMBOL_VALUE 255
673 #endif
673 #endif
674
674
675 /* **************************************************************
675 /* **************************************************************
676 * template functions type & suffix
676 * template functions type & suffix
677 ****************************************************************/
677 ****************************************************************/
678 #define FSE_FUNCTION_TYPE BYTE
678 #define FSE_FUNCTION_TYPE BYTE
679 #define FSE_FUNCTION_EXTENSION
679 #define FSE_FUNCTION_EXTENSION
680 #define FSE_DECODE_TYPE FSE_decode_t
680 #define FSE_DECODE_TYPE FSE_decode_t
681
681
682
682
683 #endif /* !FSE_COMMONDEFS_ONLY */
683 #endif /* !FSE_COMMONDEFS_ONLY */
684
684
685
685
686 /* ***************************************************************
686 /* ***************************************************************
687 * Constants
687 * Constants
688 *****************************************************************/
688 *****************************************************************/
689 #define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2)
689 #define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2)
690 #define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
690 #define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
691 #define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
691 #define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
692 #define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
692 #define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
693 #define FSE_MIN_TABLELOG 5
693 #define FSE_MIN_TABLELOG 5
694
694
695 #define FSE_TABLELOG_ABSOLUTE_MAX 15
695 #define FSE_TABLELOG_ABSOLUTE_MAX 15
696 #if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
696 #if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
697 # error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
697 # error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
698 #endif
698 #endif
699
699
700 #define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
700 #define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
701
701
702
702
703 #endif /* FSE_STATIC_LINKING_ONLY */
703 #endif /* FSE_STATIC_LINKING_ONLY */
704
704
705
705
706 #if defined (__cplusplus)
706 #if defined (__cplusplus)
707 }
707 }
708 #endif
708 #endif
@@ -1,380 +1,380 b''
1 /*
1 /*
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This source code is licensed under both the BSD-style license (found in the
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
8 * You may select, at your option, one of the above-listed licenses.
9 */
9 */
10
10
11 #ifndef MEM_H_MODULE
11 #ifndef MEM_H_MODULE
12 #define MEM_H_MODULE
12 #define MEM_H_MODULE
13
13
14 #if defined (__cplusplus)
14 #if defined (__cplusplus)
15 extern "C" {
15 extern "C" {
16 #endif
16 #endif
17
17
18 /*-****************************************
18 /*-****************************************
19 * Dependencies
19 * Dependencies
20 ******************************************/
20 ******************************************/
21 #include <stddef.h> /* size_t, ptrdiff_t */
21 #include <stddef.h> /* size_t, ptrdiff_t */
22 #include <string.h> /* memcpy */
22 #include <string.h> /* memcpy */
23
23
24
24
25 /*-****************************************
25 /*-****************************************
26 * Compiler specifics
26 * Compiler specifics
27 ******************************************/
27 ******************************************/
28 #if defined(_MSC_VER) /* Visual Studio */
28 #if defined(_MSC_VER) /* Visual Studio */
29 # include <stdlib.h> /* _byteswap_ulong */
29 # include <stdlib.h> /* _byteswap_ulong */
30 # include <intrin.h> /* _byteswap_* */
30 # include <intrin.h> /* _byteswap_* */
31 #endif
31 #endif
32 #if defined(__GNUC__)
32 #if defined(__GNUC__)
33 # define MEM_STATIC static __inline __attribute__((unused))
33 # define MEM_STATIC static __inline __attribute__((unused))
34 #elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
34 #elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
35 # define MEM_STATIC static inline
35 # define MEM_STATIC static inline
36 #elif defined(_MSC_VER)
36 #elif defined(_MSC_VER)
37 # define MEM_STATIC static __inline
37 # define MEM_STATIC static __inline
38 #else
38 #else
39 # define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
39 # define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
40 #endif
40 #endif
41
41
42 #ifndef __has_builtin
42 #ifndef __has_builtin
43 # define __has_builtin(x) 0 /* compat. with non-clang compilers */
43 # define __has_builtin(x) 0 /* compat. with non-clang compilers */
44 #endif
44 #endif
45
45
46 /* code only tested on 32 and 64 bits systems */
46 /* code only tested on 32 and 64 bits systems */
47 #define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
47 #define MEM_STATIC_ASSERT(c) { enum { MEM_static_assert = 1/(int)(!!(c)) }; }
48 MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
48 MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
49
49
50
50
51 /*-**************************************************************
51 /*-**************************************************************
52 * Basic Types
52 * Basic Types
53 *****************************************************************/
53 *****************************************************************/
54 #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
54 #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
55 # include <stdint.h>
55 # include <stdint.h>
56 typedef uint8_t BYTE;
56 typedef uint8_t BYTE;
57 typedef uint16_t U16;
57 typedef uint16_t U16;
58 typedef int16_t S16;
58 typedef int16_t S16;
59 typedef uint32_t U32;
59 typedef uint32_t U32;
60 typedef int32_t S32;
60 typedef int32_t S32;
61 typedef uint64_t U64;
61 typedef uint64_t U64;
62 typedef int64_t S64;
62 typedef int64_t S64;
63 #else
63 #else
64 # include <limits.h>
64 # include <limits.h>
65 #if CHAR_BIT != 8
65 #if CHAR_BIT != 8
66 # error "this implementation requires char to be exactly 8-bit type"
66 # error "this implementation requires char to be exactly 8-bit type"
67 #endif
67 #endif
68 typedef unsigned char BYTE;
68 typedef unsigned char BYTE;
69 #if USHRT_MAX != 65535
69 #if USHRT_MAX != 65535
70 # error "this implementation requires short to be exactly 16-bit type"
70 # error "this implementation requires short to be exactly 16-bit type"
71 #endif
71 #endif
72 typedef unsigned short U16;
72 typedef unsigned short U16;
73 typedef signed short S16;
73 typedef signed short S16;
74 #if UINT_MAX != 4294967295
74 #if UINT_MAX != 4294967295
75 # error "this implementation requires int to be exactly 32-bit type"
75 # error "this implementation requires int to be exactly 32-bit type"
76 #endif
76 #endif
77 typedef unsigned int U32;
77 typedef unsigned int U32;
78 typedef signed int S32;
78 typedef signed int S32;
79 /* note : there are no limits defined for long long type in C90.
79 /* note : there are no limits defined for long long type in C90.
80 * limits exist in C99, however, in such case, <stdint.h> is preferred */
80 * limits exist in C99, however, in such case, <stdint.h> is preferred */
81 typedef unsigned long long U64;
81 typedef unsigned long long U64;
82 typedef signed long long S64;
82 typedef signed long long S64;
83 #endif
83 #endif
84
84
85
85
86 /*-**************************************************************
86 /*-**************************************************************
87 * Memory I/O
87 * Memory I/O
88 *****************************************************************/
88 *****************************************************************/
89 /* MEM_FORCE_MEMORY_ACCESS :
89 /* MEM_FORCE_MEMORY_ACCESS :
90 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
90 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
91 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
91 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
92 * The below switch allow to select different access method for improved performance.
92 * The below switch allow to select different access method for improved performance.
93 * Method 0 (default) : use `memcpy()`. Safe and portable.
93 * Method 0 (default) : use `memcpy()`. Safe and portable.
94 * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable).
94 * Method 1 : `__packed` statement. It depends on compiler extension (i.e., not portable).
95 * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
95 * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
96 * Method 2 : direct access. This method is portable but violate C standard.
96 * Method 2 : direct access. This method is portable but violate C standard.
97 * It can generate buggy code on targets depending on alignment.
97 * It can generate buggy code on targets depending on alignment.
98 * In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6)
98 * In some circumstances, it's the only known way to get the most performance (i.e. GCC + ARMv6)
99 * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
99 * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
100 * Prefer these methods in priority order (0 > 1 > 2)
100 * Prefer these methods in priority order (0 > 1 > 2)
101 */
101 */
102 #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
102 #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
103 # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
103 # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
104 # define MEM_FORCE_MEMORY_ACCESS 2
104 # define MEM_FORCE_MEMORY_ACCESS 2
105 # elif defined(__INTEL_COMPILER) || defined(__GNUC__)
105 # elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__)
106 # define MEM_FORCE_MEMORY_ACCESS 1
106 # define MEM_FORCE_MEMORY_ACCESS 1
107 # endif
107 # endif
108 #endif
108 #endif
109
109
110 MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; }
110 MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; }
111 MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; }
111 MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; }
112
112
113 MEM_STATIC unsigned MEM_isLittleEndian(void)
113 MEM_STATIC unsigned MEM_isLittleEndian(void)
114 {
114 {
115 const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
115 const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
116 return one.c[0];
116 return one.c[0];
117 }
117 }
118
118
119 #if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
119 #if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
120
120
121 /* violates C standard, by lying on structure alignment.
121 /* violates C standard, by lying on structure alignment.
122 Only use if no other choice to achieve best performance on target platform */
122 Only use if no other choice to achieve best performance on target platform */
123 MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
123 MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
124 MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
124 MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
125 MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
125 MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
126 MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; }
126 MEM_STATIC size_t MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; }
127
127
128 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
128 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
129 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
129 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
130 MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
130 MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
131
131
132 #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
132 #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
133
133
134 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
134 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
135 /* currently only defined for gcc and icc */
135 /* currently only defined for gcc and icc */
136 #if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
136 #if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
137 __pragma( pack(push, 1) )
137 __pragma( pack(push, 1) )
138 typedef struct { U16 v; } unalign16;
138 typedef struct { U16 v; } unalign16;
139 typedef struct { U32 v; } unalign32;
139 typedef struct { U32 v; } unalign32;
140 typedef struct { U64 v; } unalign64;
140 typedef struct { U64 v; } unalign64;
141 typedef struct { size_t v; } unalignArch;
141 typedef struct { size_t v; } unalignArch;
142 __pragma( pack(pop) )
142 __pragma( pack(pop) )
143 #else
143 #else
144 typedef struct { U16 v; } __attribute__((packed)) unalign16;
144 typedef struct { U16 v; } __attribute__((packed)) unalign16;
145 typedef struct { U32 v; } __attribute__((packed)) unalign32;
145 typedef struct { U32 v; } __attribute__((packed)) unalign32;
146 typedef struct { U64 v; } __attribute__((packed)) unalign64;
146 typedef struct { U64 v; } __attribute__((packed)) unalign64;
147 typedef struct { size_t v; } __attribute__((packed)) unalignArch;
147 typedef struct { size_t v; } __attribute__((packed)) unalignArch;
148 #endif
148 #endif
149
149
150 MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; }
150 MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign16*)ptr)->v; }
151 MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; }
151 MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign32*)ptr)->v; }
152 MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; }
152 MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign64*)ptr)->v; }
153 MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; }
153 MEM_STATIC size_t MEM_readST(const void* ptr) { return ((const unalignArch*)ptr)->v; }
154
154
155 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; }
155 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign16*)memPtr)->v = value; }
156 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; }
156 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign32*)memPtr)->v = value; }
157 MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; }
157 MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign64*)memPtr)->v = value; }
158
158
159 #else
159 #else
160
160
161 /* default method, safe and standard.
161 /* default method, safe and standard.
162 can sometimes prove slower */
162 can sometimes prove slower */
163
163
164 MEM_STATIC U16 MEM_read16(const void* memPtr)
164 MEM_STATIC U16 MEM_read16(const void* memPtr)
165 {
165 {
166 U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
166 U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
167 }
167 }
168
168
169 MEM_STATIC U32 MEM_read32(const void* memPtr)
169 MEM_STATIC U32 MEM_read32(const void* memPtr)
170 {
170 {
171 U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
171 U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
172 }
172 }
173
173
174 MEM_STATIC U64 MEM_read64(const void* memPtr)
174 MEM_STATIC U64 MEM_read64(const void* memPtr)
175 {
175 {
176 U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
176 U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
177 }
177 }
178
178
179 MEM_STATIC size_t MEM_readST(const void* memPtr)
179 MEM_STATIC size_t MEM_readST(const void* memPtr)
180 {
180 {
181 size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
181 size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
182 }
182 }
183
183
184 MEM_STATIC void MEM_write16(void* memPtr, U16 value)
184 MEM_STATIC void MEM_write16(void* memPtr, U16 value)
185 {
185 {
186 memcpy(memPtr, &value, sizeof(value));
186 memcpy(memPtr, &value, sizeof(value));
187 }
187 }
188
188
189 MEM_STATIC void MEM_write32(void* memPtr, U32 value)
189 MEM_STATIC void MEM_write32(void* memPtr, U32 value)
190 {
190 {
191 memcpy(memPtr, &value, sizeof(value));
191 memcpy(memPtr, &value, sizeof(value));
192 }
192 }
193
193
194 MEM_STATIC void MEM_write64(void* memPtr, U64 value)
194 MEM_STATIC void MEM_write64(void* memPtr, U64 value)
195 {
195 {
196 memcpy(memPtr, &value, sizeof(value));
196 memcpy(memPtr, &value, sizeof(value));
197 }
197 }
198
198
199 #endif /* MEM_FORCE_MEMORY_ACCESS */
199 #endif /* MEM_FORCE_MEMORY_ACCESS */
200
200
201 MEM_STATIC U32 MEM_swap32(U32 in)
201 MEM_STATIC U32 MEM_swap32(U32 in)
202 {
202 {
203 #if defined(_MSC_VER) /* Visual Studio */
203 #if defined(_MSC_VER) /* Visual Studio */
204 return _byteswap_ulong(in);
204 return _byteswap_ulong(in);
205 #elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
205 #elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
206 || (defined(__clang__) && __has_builtin(__builtin_bswap32))
206 || (defined(__clang__) && __has_builtin(__builtin_bswap32))
207 return __builtin_bswap32(in);
207 return __builtin_bswap32(in);
208 #else
208 #else
209 return ((in << 24) & 0xff000000 ) |
209 return ((in << 24) & 0xff000000 ) |
210 ((in << 8) & 0x00ff0000 ) |
210 ((in << 8) & 0x00ff0000 ) |
211 ((in >> 8) & 0x0000ff00 ) |
211 ((in >> 8) & 0x0000ff00 ) |
212 ((in >> 24) & 0x000000ff );
212 ((in >> 24) & 0x000000ff );
213 #endif
213 #endif
214 }
214 }
215
215
216 MEM_STATIC U64 MEM_swap64(U64 in)
216 MEM_STATIC U64 MEM_swap64(U64 in)
217 {
217 {
218 #if defined(_MSC_VER) /* Visual Studio */
218 #if defined(_MSC_VER) /* Visual Studio */
219 return _byteswap_uint64(in);
219 return _byteswap_uint64(in);
220 #elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
220 #elif (defined (__GNUC__) && (__GNUC__ * 100 + __GNUC_MINOR__ >= 403)) \
221 || (defined(__clang__) && __has_builtin(__builtin_bswap64))
221 || (defined(__clang__) && __has_builtin(__builtin_bswap64))
222 return __builtin_bswap64(in);
222 return __builtin_bswap64(in);
223 #else
223 #else
224 return ((in << 56) & 0xff00000000000000ULL) |
224 return ((in << 56) & 0xff00000000000000ULL) |
225 ((in << 40) & 0x00ff000000000000ULL) |
225 ((in << 40) & 0x00ff000000000000ULL) |
226 ((in << 24) & 0x0000ff0000000000ULL) |
226 ((in << 24) & 0x0000ff0000000000ULL) |
227 ((in << 8) & 0x000000ff00000000ULL) |
227 ((in << 8) & 0x000000ff00000000ULL) |
228 ((in >> 8) & 0x00000000ff000000ULL) |
228 ((in >> 8) & 0x00000000ff000000ULL) |
229 ((in >> 24) & 0x0000000000ff0000ULL) |
229 ((in >> 24) & 0x0000000000ff0000ULL) |
230 ((in >> 40) & 0x000000000000ff00ULL) |
230 ((in >> 40) & 0x000000000000ff00ULL) |
231 ((in >> 56) & 0x00000000000000ffULL);
231 ((in >> 56) & 0x00000000000000ffULL);
232 #endif
232 #endif
233 }
233 }
234
234
235 MEM_STATIC size_t MEM_swapST(size_t in)
235 MEM_STATIC size_t MEM_swapST(size_t in)
236 {
236 {
237 if (MEM_32bits())
237 if (MEM_32bits())
238 return (size_t)MEM_swap32((U32)in);
238 return (size_t)MEM_swap32((U32)in);
239 else
239 else
240 return (size_t)MEM_swap64((U64)in);
240 return (size_t)MEM_swap64((U64)in);
241 }
241 }
242
242
243 /*=== Little endian r/w ===*/
243 /*=== Little endian r/w ===*/
244
244
245 MEM_STATIC U16 MEM_readLE16(const void* memPtr)
245 MEM_STATIC U16 MEM_readLE16(const void* memPtr)
246 {
246 {
247 if (MEM_isLittleEndian())
247 if (MEM_isLittleEndian())
248 return MEM_read16(memPtr);
248 return MEM_read16(memPtr);
249 else {
249 else {
250 const BYTE* p = (const BYTE*)memPtr;
250 const BYTE* p = (const BYTE*)memPtr;
251 return (U16)(p[0] + (p[1]<<8));
251 return (U16)(p[0] + (p[1]<<8));
252 }
252 }
253 }
253 }
254
254
255 MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
255 MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
256 {
256 {
257 if (MEM_isLittleEndian()) {
257 if (MEM_isLittleEndian()) {
258 MEM_write16(memPtr, val);
258 MEM_write16(memPtr, val);
259 } else {
259 } else {
260 BYTE* p = (BYTE*)memPtr;
260 BYTE* p = (BYTE*)memPtr;
261 p[0] = (BYTE)val;
261 p[0] = (BYTE)val;
262 p[1] = (BYTE)(val>>8);
262 p[1] = (BYTE)(val>>8);
263 }
263 }
264 }
264 }
265
265
266 MEM_STATIC U32 MEM_readLE24(const void* memPtr)
266 MEM_STATIC U32 MEM_readLE24(const void* memPtr)
267 {
267 {
268 return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
268 return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
269 }
269 }
270
270
271 MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val)
271 MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val)
272 {
272 {
273 MEM_writeLE16(memPtr, (U16)val);
273 MEM_writeLE16(memPtr, (U16)val);
274 ((BYTE*)memPtr)[2] = (BYTE)(val>>16);
274 ((BYTE*)memPtr)[2] = (BYTE)(val>>16);
275 }
275 }
276
276
277 MEM_STATIC U32 MEM_readLE32(const void* memPtr)
277 MEM_STATIC U32 MEM_readLE32(const void* memPtr)
278 {
278 {
279 if (MEM_isLittleEndian())
279 if (MEM_isLittleEndian())
280 return MEM_read32(memPtr);
280 return MEM_read32(memPtr);
281 else
281 else
282 return MEM_swap32(MEM_read32(memPtr));
282 return MEM_swap32(MEM_read32(memPtr));
283 }
283 }
284
284
285 MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32)
285 MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32)
286 {
286 {
287 if (MEM_isLittleEndian())
287 if (MEM_isLittleEndian())
288 MEM_write32(memPtr, val32);
288 MEM_write32(memPtr, val32);
289 else
289 else
290 MEM_write32(memPtr, MEM_swap32(val32));
290 MEM_write32(memPtr, MEM_swap32(val32));
291 }
291 }
292
292
293 MEM_STATIC U64 MEM_readLE64(const void* memPtr)
293 MEM_STATIC U64 MEM_readLE64(const void* memPtr)
294 {
294 {
295 if (MEM_isLittleEndian())
295 if (MEM_isLittleEndian())
296 return MEM_read64(memPtr);
296 return MEM_read64(memPtr);
297 else
297 else
298 return MEM_swap64(MEM_read64(memPtr));
298 return MEM_swap64(MEM_read64(memPtr));
299 }
299 }
300
300
301 MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64)
301 MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64)
302 {
302 {
303 if (MEM_isLittleEndian())
303 if (MEM_isLittleEndian())
304 MEM_write64(memPtr, val64);
304 MEM_write64(memPtr, val64);
305 else
305 else
306 MEM_write64(memPtr, MEM_swap64(val64));
306 MEM_write64(memPtr, MEM_swap64(val64));
307 }
307 }
308
308
309 MEM_STATIC size_t MEM_readLEST(const void* memPtr)
309 MEM_STATIC size_t MEM_readLEST(const void* memPtr)
310 {
310 {
311 if (MEM_32bits())
311 if (MEM_32bits())
312 return (size_t)MEM_readLE32(memPtr);
312 return (size_t)MEM_readLE32(memPtr);
313 else
313 else
314 return (size_t)MEM_readLE64(memPtr);
314 return (size_t)MEM_readLE64(memPtr);
315 }
315 }
316
316
317 MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val)
317 MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val)
318 {
318 {
319 if (MEM_32bits())
319 if (MEM_32bits())
320 MEM_writeLE32(memPtr, (U32)val);
320 MEM_writeLE32(memPtr, (U32)val);
321 else
321 else
322 MEM_writeLE64(memPtr, (U64)val);
322 MEM_writeLE64(memPtr, (U64)val);
323 }
323 }
324
324
325 /*=== Big endian r/w ===*/
325 /*=== Big endian r/w ===*/
326
326
327 MEM_STATIC U32 MEM_readBE32(const void* memPtr)
327 MEM_STATIC U32 MEM_readBE32(const void* memPtr)
328 {
328 {
329 if (MEM_isLittleEndian())
329 if (MEM_isLittleEndian())
330 return MEM_swap32(MEM_read32(memPtr));
330 return MEM_swap32(MEM_read32(memPtr));
331 else
331 else
332 return MEM_read32(memPtr);
332 return MEM_read32(memPtr);
333 }
333 }
334
334
335 MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32)
335 MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32)
336 {
336 {
337 if (MEM_isLittleEndian())
337 if (MEM_isLittleEndian())
338 MEM_write32(memPtr, MEM_swap32(val32));
338 MEM_write32(memPtr, MEM_swap32(val32));
339 else
339 else
340 MEM_write32(memPtr, val32);
340 MEM_write32(memPtr, val32);
341 }
341 }
342
342
343 MEM_STATIC U64 MEM_readBE64(const void* memPtr)
343 MEM_STATIC U64 MEM_readBE64(const void* memPtr)
344 {
344 {
345 if (MEM_isLittleEndian())
345 if (MEM_isLittleEndian())
346 return MEM_swap64(MEM_read64(memPtr));
346 return MEM_swap64(MEM_read64(memPtr));
347 else
347 else
348 return MEM_read64(memPtr);
348 return MEM_read64(memPtr);
349 }
349 }
350
350
351 MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64)
351 MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64)
352 {
352 {
353 if (MEM_isLittleEndian())
353 if (MEM_isLittleEndian())
354 MEM_write64(memPtr, MEM_swap64(val64));
354 MEM_write64(memPtr, MEM_swap64(val64));
355 else
355 else
356 MEM_write64(memPtr, val64);
356 MEM_write64(memPtr, val64);
357 }
357 }
358
358
359 MEM_STATIC size_t MEM_readBEST(const void* memPtr)
359 MEM_STATIC size_t MEM_readBEST(const void* memPtr)
360 {
360 {
361 if (MEM_32bits())
361 if (MEM_32bits())
362 return (size_t)MEM_readBE32(memPtr);
362 return (size_t)MEM_readBE32(memPtr);
363 else
363 else
364 return (size_t)MEM_readBE64(memPtr);
364 return (size_t)MEM_readBE64(memPtr);
365 }
365 }
366
366
367 MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
367 MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
368 {
368 {
369 if (MEM_32bits())
369 if (MEM_32bits())
370 MEM_writeBE32(memPtr, (U32)val);
370 MEM_writeBE32(memPtr, (U32)val);
371 else
371 else
372 MEM_writeBE64(memPtr, (U64)val);
372 MEM_writeBE64(memPtr, (U64)val);
373 }
373 }
374
374
375
375
376 #if defined (__cplusplus)
376 #if defined (__cplusplus)
377 }
377 }
378 #endif
378 #endif
379
379
380 #endif /* MEM_H_MODULE */
380 #endif /* MEM_H_MODULE */
@@ -1,75 +1,75 b''
1 /**
1 /**
2 * Copyright (c) 2016 Tino Reichardt
2 * Copyright (c) 2016 Tino Reichardt
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This source code is licensed under both the BSD-style license (found in the
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
7 * in the COPYING file in the root directory of this source tree).
8 *
8 *
9 * You can contact the author at:
9 * You can contact the author at:
10 * - zstdmt source repository: https://github.com/mcmilk/zstdmt
10 * - zstdmt source repository: https://github.com/mcmilk/zstdmt
11 */
11 */
12
12
13 /**
13 /**
14 * This file will hold wrapper for systems, which do not support pthreads
14 * This file will hold wrapper for systems, which do not support pthreads
15 */
15 */
16
16
17 /* create fake symbol to avoid empty trnaslation unit warning */
17 /* create fake symbol to avoid empty translation unit warning */
18 int g_ZSTD_threading_useles_symbol;
18 int g_ZSTD_threading_useless_symbol;
19
19
20 #if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
20 #if defined(ZSTD_MULTITHREAD) && defined(_WIN32)
21
21
22 /**
22 /**
23 * Windows minimalist Pthread Wrapper, based on :
23 * Windows minimalist Pthread Wrapper, based on :
24 * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
24 * http://www.cse.wustl.edu/~schmidt/win32-cv-1.html
25 */
25 */
26
26
27
27
28 /* === Dependencies === */
28 /* === Dependencies === */
29 #include <process.h>
29 #include <process.h>
30 #include <errno.h>
30 #include <errno.h>
31 #include "threading.h"
31 #include "threading.h"
32
32
33
33
34 /* === Implementation === */
34 /* === Implementation === */
35
35
36 static unsigned __stdcall worker(void *arg)
36 static unsigned __stdcall worker(void *arg)
37 {
37 {
38 ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg;
38 ZSTD_pthread_t* const thread = (ZSTD_pthread_t*) arg;
39 thread->arg = thread->start_routine(thread->arg);
39 thread->arg = thread->start_routine(thread->arg);
40 return 0;
40 return 0;
41 }
41 }
42
42
43 int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused,
43 int ZSTD_pthread_create(ZSTD_pthread_t* thread, const void* unused,
44 void* (*start_routine) (void*), void* arg)
44 void* (*start_routine) (void*), void* arg)
45 {
45 {
46 (void)unused;
46 (void)unused;
47 thread->arg = arg;
47 thread->arg = arg;
48 thread->start_routine = start_routine;
48 thread->start_routine = start_routine;
49 thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL);
49 thread->handle = (HANDLE) _beginthreadex(NULL, 0, worker, thread, 0, NULL);
50
50
51 if (!thread->handle)
51 if (!thread->handle)
52 return errno;
52 return errno;
53 else
53 else
54 return 0;
54 return 0;
55 }
55 }
56
56
57 int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
57 int ZSTD_pthread_join(ZSTD_pthread_t thread, void **value_ptr)
58 {
58 {
59 DWORD result;
59 DWORD result;
60
60
61 if (!thread.handle) return 0;
61 if (!thread.handle) return 0;
62
62
63 result = WaitForSingleObject(thread.handle, INFINITE);
63 result = WaitForSingleObject(thread.handle, INFINITE);
64 switch (result) {
64 switch (result) {
65 case WAIT_OBJECT_0:
65 case WAIT_OBJECT_0:
66 if (value_ptr) *value_ptr = thread.arg;
66 if (value_ptr) *value_ptr = thread.arg;
67 return 0;
67 return 0;
68 case WAIT_ABANDONED:
68 case WAIT_ABANDONED:
69 return EINVAL;
69 return EINVAL;
70 default:
70 default:
71 return GetLastError();
71 return GetLastError();
72 }
72 }
73 }
73 }
74
74
75 #endif /* ZSTD_MULTITHREAD */
75 #endif /* ZSTD_MULTITHREAD */
@@ -1,876 +1,882 b''
1 /*
1 /*
2 * xxHash - Fast Hash algorithm
2 * xxHash - Fast Hash algorithm
3 * Copyright (C) 2012-2016, Yann Collet
3 * Copyright (C) 2012-2016, Yann Collet
4 *
4 *
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
6 *
7 * Redistribution and use in source and binary forms, with or without
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
8 * modification, are permitted provided that the following conditions are
9 * met:
9 * met:
10 *
10 *
11 * * Redistributions of source code must retain the above copyright
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
15 * in the documentation and/or other materials provided with the
16 * distribution.
16 * distribution.
17 *
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
29 *
30 * You can contact the author at :
30 * You can contact the author at :
31 * - xxHash homepage: http://www.xxhash.com
31 * - xxHash homepage: http://www.xxhash.com
32 * - xxHash source repository : https://github.com/Cyan4973/xxHash
32 * - xxHash source repository : https://github.com/Cyan4973/xxHash
33 */
33 */
34
34
35
35
36 /* *************************************
36 /* *************************************
37 * Tuning parameters
37 * Tuning parameters
38 ***************************************/
38 ***************************************/
39 /*!XXH_FORCE_MEMORY_ACCESS :
39 /*!XXH_FORCE_MEMORY_ACCESS :
40 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
40 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
41 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
41 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
42 * The below switch allow to select different access method for improved performance.
42 * The below switch allow to select different access method for improved performance.
43 * Method 0 (default) : use `memcpy()`. Safe and portable.
43 * Method 0 (default) : use `memcpy()`. Safe and portable.
44 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
44 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
45 * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
45 * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
46 * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
46 * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
47 * It can generate buggy code on targets which do not support unaligned memory accesses.
47 * It can generate buggy code on targets which do not support unaligned memory accesses.
48 * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
48 * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
49 * See http://stackoverflow.com/a/32095106/646947 for details.
49 * See http://stackoverflow.com/a/32095106/646947 for details.
50 * Prefer these methods in priority order (0 > 1 > 2)
50 * Prefer these methods in priority order (0 > 1 > 2)
51 */
51 */
52 #ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
52 #ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
53 # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
53 # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
54 # define XXH_FORCE_MEMORY_ACCESS 2
54 # define XXH_FORCE_MEMORY_ACCESS 2
55 # elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
55 # elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
56 (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
56 (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \
57 defined(__ICCARM__)
57 # define XXH_FORCE_MEMORY_ACCESS 1
58 # define XXH_FORCE_MEMORY_ACCESS 1
58 # endif
59 # endif
59 #endif
60 #endif
60
61
61 /*!XXH_ACCEPT_NULL_INPUT_POINTER :
62 /*!XXH_ACCEPT_NULL_INPUT_POINTER :
62 * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
63 * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
63 * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
64 * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
64 * By default, this option is disabled. To enable it, uncomment below define :
65 * By default, this option is disabled. To enable it, uncomment below define :
65 */
66 */
66 /* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
67 /* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
67
68
68 /*!XXH_FORCE_NATIVE_FORMAT :
69 /*!XXH_FORCE_NATIVE_FORMAT :
69 * By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
70 * By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
70 * Results are therefore identical for little-endian and big-endian CPU.
71 * Results are therefore identical for little-endian and big-endian CPU.
71 * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
72 * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
72 * Should endian-independance be of no importance for your application, you may set the #define below to 1,
73 * Should endian-independence be of no importance for your application, you may set the #define below to 1,
73 * to improve speed for Big-endian CPU.
74 * to improve speed for Big-endian CPU.
74 * This option has no impact on Little_Endian CPU.
75 * This option has no impact on Little_Endian CPU.
75 */
76 */
76 #ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */
77 #ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */
77 # define XXH_FORCE_NATIVE_FORMAT 0
78 # define XXH_FORCE_NATIVE_FORMAT 0
78 #endif
79 #endif
79
80
80 /*!XXH_FORCE_ALIGN_CHECK :
81 /*!XXH_FORCE_ALIGN_CHECK :
81 * This is a minor performance trick, only useful with lots of very small keys.
82 * This is a minor performance trick, only useful with lots of very small keys.
82 * It means : check for aligned/unaligned input.
83 * It means : check for aligned/unaligned input.
83 * The check costs one initial branch per hash; set to 0 when the input data
84 * The check costs one initial branch per hash; set to 0 when the input data
84 * is guaranteed to be aligned.
85 * is guaranteed to be aligned.
85 */
86 */
86 #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
87 #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
87 # if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
88 # if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
88 # define XXH_FORCE_ALIGN_CHECK 0
89 # define XXH_FORCE_ALIGN_CHECK 0
89 # else
90 # else
90 # define XXH_FORCE_ALIGN_CHECK 1
91 # define XXH_FORCE_ALIGN_CHECK 1
91 # endif
92 # endif
92 #endif
93 #endif
93
94
94
95
95 /* *************************************
96 /* *************************************
96 * Includes & Memory related functions
97 * Includes & Memory related functions
97 ***************************************/
98 ***************************************/
98 /* Modify the local functions below should you wish to use some other memory routines */
99 /* Modify the local functions below should you wish to use some other memory routines */
99 /* for malloc(), free() */
100 /* for malloc(), free() */
100 #include <stdlib.h>
101 #include <stdlib.h>
101 #include <stddef.h> /* size_t */
102 #include <stddef.h> /* size_t */
102 static void* XXH_malloc(size_t s) { return malloc(s); }
103 static void* XXH_malloc(size_t s) { return malloc(s); }
103 static void XXH_free (void* p) { free(p); }
104 static void XXH_free (void* p) { free(p); }
104 /* for memcpy() */
105 /* for memcpy() */
105 #include <string.h>
106 #include <string.h>
106 static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
107 static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
107
108
108 #ifndef XXH_STATIC_LINKING_ONLY
109 #ifndef XXH_STATIC_LINKING_ONLY
109 # define XXH_STATIC_LINKING_ONLY
110 # define XXH_STATIC_LINKING_ONLY
110 #endif
111 #endif
111 #include "xxhash.h"
112 #include "xxhash.h"
112
113
113
114
114 /* *************************************
115 /* *************************************
115 * Compiler Specific Options
116 * Compiler Specific Options
116 ***************************************/
117 ***************************************/
117 #if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
118 #if defined (__GNUC__) || defined(__cplusplus) || defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
118 # define INLINE_KEYWORD inline
119 # define INLINE_KEYWORD inline
119 #else
120 #else
120 # define INLINE_KEYWORD
121 # define INLINE_KEYWORD
121 #endif
122 #endif
122
123
123 #if defined(__GNUC__)
124 #if defined(__GNUC__) || defined(__ICCARM__)
124 # define FORCE_INLINE_ATTR __attribute__((always_inline))
125 # define FORCE_INLINE_ATTR __attribute__((always_inline))
125 #elif defined(_MSC_VER)
126 #elif defined(_MSC_VER)
126 # define FORCE_INLINE_ATTR __forceinline
127 # define FORCE_INLINE_ATTR __forceinline
127 #else
128 #else
128 # define FORCE_INLINE_ATTR
129 # define FORCE_INLINE_ATTR
129 #endif
130 #endif
130
131
131 #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
132 #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR
132
133
133
134
134 #ifdef _MSC_VER
135 #ifdef _MSC_VER
135 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
136 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
136 #endif
137 #endif
137
138
138
139
139 /* *************************************
140 /* *************************************
140 * Basic Types
141 * Basic Types
141 ***************************************/
142 ***************************************/
142 #ifndef MEM_MODULE
143 #ifndef MEM_MODULE
143 # define MEM_MODULE
144 # define MEM_MODULE
144 # if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
145 # if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
145 # include <stdint.h>
146 # include <stdint.h>
146 typedef uint8_t BYTE;
147 typedef uint8_t BYTE;
147 typedef uint16_t U16;
148 typedef uint16_t U16;
148 typedef uint32_t U32;
149 typedef uint32_t U32;
149 typedef int32_t S32;
150 typedef int32_t S32;
150 typedef uint64_t U64;
151 typedef uint64_t U64;
151 # else
152 # else
152 typedef unsigned char BYTE;
153 typedef unsigned char BYTE;
153 typedef unsigned short U16;
154 typedef unsigned short U16;
154 typedef unsigned int U32;
155 typedef unsigned int U32;
155 typedef signed int S32;
156 typedef signed int S32;
156 typedef unsigned long long U64; /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */
157 typedef unsigned long long U64; /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */
157 # endif
158 # endif
158 #endif
159 #endif
159
160
160
161
161 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
162 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
162
163
163 /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
164 /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
164 static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
165 static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
165 static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
166 static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
166
167
167 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
168 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
168
169
169 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
170 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
170 /* currently only defined for gcc and icc */
171 /* currently only defined for gcc and icc */
171 typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign;
172 typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign;
172
173
173 static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
174 static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
174 static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
175 static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
175
176
176 #else
177 #else
177
178
178 /* portable and safe solution. Generally efficient.
179 /* portable and safe solution. Generally efficient.
179 * see : http://stackoverflow.com/a/32095106/646947
180 * see : http://stackoverflow.com/a/32095106/646947
180 */
181 */
181
182
182 static U32 XXH_read32(const void* memPtr)
183 static U32 XXH_read32(const void* memPtr)
183 {
184 {
184 U32 val;
185 U32 val;
185 memcpy(&val, memPtr, sizeof(val));
186 memcpy(&val, memPtr, sizeof(val));
186 return val;
187 return val;
187 }
188 }
188
189
189 static U64 XXH_read64(const void* memPtr)
190 static U64 XXH_read64(const void* memPtr)
190 {
191 {
191 U64 val;
192 U64 val;
192 memcpy(&val, memPtr, sizeof(val));
193 memcpy(&val, memPtr, sizeof(val));
193 return val;
194 return val;
194 }
195 }
195
196
196 #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
197 #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
197
198
198
199
199 /* ****************************************
200 /* ****************************************
200 * Compiler-specific Functions and Macros
201 * Compiler-specific Functions and Macros
201 ******************************************/
202 ******************************************/
202 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
203 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
203
204
204 /* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
205 /* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
205 #if defined(_MSC_VER)
206 #if defined(_MSC_VER)
206 # define XXH_rotl32(x,r) _rotl(x,r)
207 # define XXH_rotl32(x,r) _rotl(x,r)
207 # define XXH_rotl64(x,r) _rotl64(x,r)
208 # define XXH_rotl64(x,r) _rotl64(x,r)
208 #else
209 #else
210 #if defined(__ICCARM__)
211 # include <intrinsics.h>
212 # define XXH_rotl32(x,r) __ROR(x,(32 - r))
213 #else
209 # define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
214 # define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
215 #endif
210 # define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
216 # define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
211 #endif
217 #endif
212
218
213 #if defined(_MSC_VER) /* Visual Studio */
219 #if defined(_MSC_VER) /* Visual Studio */
214 # define XXH_swap32 _byteswap_ulong
220 # define XXH_swap32 _byteswap_ulong
215 # define XXH_swap64 _byteswap_uint64
221 # define XXH_swap64 _byteswap_uint64
216 #elif GCC_VERSION >= 403
222 #elif GCC_VERSION >= 403
217 # define XXH_swap32 __builtin_bswap32
223 # define XXH_swap32 __builtin_bswap32
218 # define XXH_swap64 __builtin_bswap64
224 # define XXH_swap64 __builtin_bswap64
219 #else
225 #else
220 static U32 XXH_swap32 (U32 x)
226 static U32 XXH_swap32 (U32 x)
221 {
227 {
222 return ((x << 24) & 0xff000000 ) |
228 return ((x << 24) & 0xff000000 ) |
223 ((x << 8) & 0x00ff0000 ) |
229 ((x << 8) & 0x00ff0000 ) |
224 ((x >> 8) & 0x0000ff00 ) |
230 ((x >> 8) & 0x0000ff00 ) |
225 ((x >> 24) & 0x000000ff );
231 ((x >> 24) & 0x000000ff );
226 }
232 }
227 static U64 XXH_swap64 (U64 x)
233 static U64 XXH_swap64 (U64 x)
228 {
234 {
229 return ((x << 56) & 0xff00000000000000ULL) |
235 return ((x << 56) & 0xff00000000000000ULL) |
230 ((x << 40) & 0x00ff000000000000ULL) |
236 ((x << 40) & 0x00ff000000000000ULL) |
231 ((x << 24) & 0x0000ff0000000000ULL) |
237 ((x << 24) & 0x0000ff0000000000ULL) |
232 ((x << 8) & 0x000000ff00000000ULL) |
238 ((x << 8) & 0x000000ff00000000ULL) |
233 ((x >> 8) & 0x00000000ff000000ULL) |
239 ((x >> 8) & 0x00000000ff000000ULL) |
234 ((x >> 24) & 0x0000000000ff0000ULL) |
240 ((x >> 24) & 0x0000000000ff0000ULL) |
235 ((x >> 40) & 0x000000000000ff00ULL) |
241 ((x >> 40) & 0x000000000000ff00ULL) |
236 ((x >> 56) & 0x00000000000000ffULL);
242 ((x >> 56) & 0x00000000000000ffULL);
237 }
243 }
238 #endif
244 #endif
239
245
240
246
241 /* *************************************
247 /* *************************************
242 * Architecture Macros
248 * Architecture Macros
243 ***************************************/
249 ***************************************/
244 typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
250 typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
245
251
246 /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
252 /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
247 #ifndef XXH_CPU_LITTLE_ENDIAN
253 #ifndef XXH_CPU_LITTLE_ENDIAN
248 static const int g_one = 1;
254 static const int g_one = 1;
249 # define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one))
255 # define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one))
250 #endif
256 #endif
251
257
252
258
253 /* ***************************
259 /* ***************************
254 * Memory reads
260 * Memory reads
255 *****************************/
261 *****************************/
256 typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
262 typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
257
263
258 FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
264 FORCE_INLINE_TEMPLATE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
259 {
265 {
260 if (align==XXH_unaligned)
266 if (align==XXH_unaligned)
261 return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
267 return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
262 else
268 else
263 return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
269 return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
264 }
270 }
265
271
266 FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
272 FORCE_INLINE_TEMPLATE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
267 {
273 {
268 return XXH_readLE32_align(ptr, endian, XXH_unaligned);
274 return XXH_readLE32_align(ptr, endian, XXH_unaligned);
269 }
275 }
270
276
271 static U32 XXH_readBE32(const void* ptr)
277 static U32 XXH_readBE32(const void* ptr)
272 {
278 {
273 return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
279 return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
274 }
280 }
275
281
276 FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
282 FORCE_INLINE_TEMPLATE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
277 {
283 {
278 if (align==XXH_unaligned)
284 if (align==XXH_unaligned)
279 return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
285 return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
280 else
286 else
281 return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
287 return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
282 }
288 }
283
289
284 FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
290 FORCE_INLINE_TEMPLATE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
285 {
291 {
286 return XXH_readLE64_align(ptr, endian, XXH_unaligned);
292 return XXH_readLE64_align(ptr, endian, XXH_unaligned);
287 }
293 }
288
294
289 static U64 XXH_readBE64(const void* ptr)
295 static U64 XXH_readBE64(const void* ptr)
290 {
296 {
291 return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
297 return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
292 }
298 }
293
299
294
300
295 /* *************************************
301 /* *************************************
296 * Macros
302 * Macros
297 ***************************************/
303 ***************************************/
298 #define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
304 #define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
299
305
300
306
301 /* *************************************
307 /* *************************************
302 * Constants
308 * Constants
303 ***************************************/
309 ***************************************/
304 static const U32 PRIME32_1 = 2654435761U;
310 static const U32 PRIME32_1 = 2654435761U;
305 static const U32 PRIME32_2 = 2246822519U;
311 static const U32 PRIME32_2 = 2246822519U;
306 static const U32 PRIME32_3 = 3266489917U;
312 static const U32 PRIME32_3 = 3266489917U;
307 static const U32 PRIME32_4 = 668265263U;
313 static const U32 PRIME32_4 = 668265263U;
308 static const U32 PRIME32_5 = 374761393U;
314 static const U32 PRIME32_5 = 374761393U;
309
315
310 static const U64 PRIME64_1 = 11400714785074694791ULL;
316 static const U64 PRIME64_1 = 11400714785074694791ULL;
311 static const U64 PRIME64_2 = 14029467366897019727ULL;
317 static const U64 PRIME64_2 = 14029467366897019727ULL;
312 static const U64 PRIME64_3 = 1609587929392839161ULL;
318 static const U64 PRIME64_3 = 1609587929392839161ULL;
313 static const U64 PRIME64_4 = 9650029242287828579ULL;
319 static const U64 PRIME64_4 = 9650029242287828579ULL;
314 static const U64 PRIME64_5 = 2870177450012600261ULL;
320 static const U64 PRIME64_5 = 2870177450012600261ULL;
315
321
316 XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
322 XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
317
323
318
324
319 /* **************************
325 /* **************************
320 * Utils
326 * Utils
321 ****************************/
327 ****************************/
322 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState)
328 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState)
323 {
329 {
324 memcpy(dstState, srcState, sizeof(*dstState));
330 memcpy(dstState, srcState, sizeof(*dstState));
325 }
331 }
326
332
327 XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState)
333 XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState)
328 {
334 {
329 memcpy(dstState, srcState, sizeof(*dstState));
335 memcpy(dstState, srcState, sizeof(*dstState));
330 }
336 }
331
337
332
338
333 /* ***************************
339 /* ***************************
334 * Simple Hash Functions
340 * Simple Hash Functions
335 *****************************/
341 *****************************/
336
342
337 static U32 XXH32_round(U32 seed, U32 input)
343 static U32 XXH32_round(U32 seed, U32 input)
338 {
344 {
339 seed += input * PRIME32_2;
345 seed += input * PRIME32_2;
340 seed = XXH_rotl32(seed, 13);
346 seed = XXH_rotl32(seed, 13);
341 seed *= PRIME32_1;
347 seed *= PRIME32_1;
342 return seed;
348 return seed;
343 }
349 }
344
350
345 FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
351 FORCE_INLINE_TEMPLATE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
346 {
352 {
347 const BYTE* p = (const BYTE*)input;
353 const BYTE* p = (const BYTE*)input;
348 const BYTE* bEnd = p + len;
354 const BYTE* bEnd = p + len;
349 U32 h32;
355 U32 h32;
350 #define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
356 #define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
351
357
352 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
358 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
353 if (p==NULL) {
359 if (p==NULL) {
354 len=0;
360 len=0;
355 bEnd=p=(const BYTE*)(size_t)16;
361 bEnd=p=(const BYTE*)(size_t)16;
356 }
362 }
357 #endif
363 #endif
358
364
359 if (len>=16) {
365 if (len>=16) {
360 const BYTE* const limit = bEnd - 16;
366 const BYTE* const limit = bEnd - 16;
361 U32 v1 = seed + PRIME32_1 + PRIME32_2;
367 U32 v1 = seed + PRIME32_1 + PRIME32_2;
362 U32 v2 = seed + PRIME32_2;
368 U32 v2 = seed + PRIME32_2;
363 U32 v3 = seed + 0;
369 U32 v3 = seed + 0;
364 U32 v4 = seed - PRIME32_1;
370 U32 v4 = seed - PRIME32_1;
365
371
366 do {
372 do {
367 v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
373 v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
368 v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
374 v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
369 v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
375 v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
370 v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
376 v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
371 } while (p<=limit);
377 } while (p<=limit);
372
378
373 h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
379 h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
374 } else {
380 } else {
375 h32 = seed + PRIME32_5;
381 h32 = seed + PRIME32_5;
376 }
382 }
377
383
378 h32 += (U32) len;
384 h32 += (U32) len;
379
385
380 while (p+4<=bEnd) {
386 while (p+4<=bEnd) {
381 h32 += XXH_get32bits(p) * PRIME32_3;
387 h32 += XXH_get32bits(p) * PRIME32_3;
382 h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
388 h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
383 p+=4;
389 p+=4;
384 }
390 }
385
391
386 while (p<bEnd) {
392 while (p<bEnd) {
387 h32 += (*p) * PRIME32_5;
393 h32 += (*p) * PRIME32_5;
388 h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
394 h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
389 p++;
395 p++;
390 }
396 }
391
397
392 h32 ^= h32 >> 15;
398 h32 ^= h32 >> 15;
393 h32 *= PRIME32_2;
399 h32 *= PRIME32_2;
394 h32 ^= h32 >> 13;
400 h32 ^= h32 >> 13;
395 h32 *= PRIME32_3;
401 h32 *= PRIME32_3;
396 h32 ^= h32 >> 16;
402 h32 ^= h32 >> 16;
397
403
398 return h32;
404 return h32;
399 }
405 }
400
406
401
407
402 XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
408 XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
403 {
409 {
404 #if 0
410 #if 0
405 /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
411 /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
406 XXH32_CREATESTATE_STATIC(state);
412 XXH32_CREATESTATE_STATIC(state);
407 XXH32_reset(state, seed);
413 XXH32_reset(state, seed);
408 XXH32_update(state, input, len);
414 XXH32_update(state, input, len);
409 return XXH32_digest(state);
415 return XXH32_digest(state);
410 #else
416 #else
411 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
417 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
412
418
413 if (XXH_FORCE_ALIGN_CHECK) {
419 if (XXH_FORCE_ALIGN_CHECK) {
414 if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */
420 if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */
415 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
421 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
416 return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
422 return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
417 else
423 else
418 return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
424 return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
419 } }
425 } }
420
426
421 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
427 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
422 return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
428 return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
423 else
429 else
424 return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
430 return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
425 #endif
431 #endif
426 }
432 }
427
433
428
434
429 static U64 XXH64_round(U64 acc, U64 input)
435 static U64 XXH64_round(U64 acc, U64 input)
430 {
436 {
431 acc += input * PRIME64_2;
437 acc += input * PRIME64_2;
432 acc = XXH_rotl64(acc, 31);
438 acc = XXH_rotl64(acc, 31);
433 acc *= PRIME64_1;
439 acc *= PRIME64_1;
434 return acc;
440 return acc;
435 }
441 }
436
442
437 static U64 XXH64_mergeRound(U64 acc, U64 val)
443 static U64 XXH64_mergeRound(U64 acc, U64 val)
438 {
444 {
439 val = XXH64_round(0, val);
445 val = XXH64_round(0, val);
440 acc ^= val;
446 acc ^= val;
441 acc = acc * PRIME64_1 + PRIME64_4;
447 acc = acc * PRIME64_1 + PRIME64_4;
442 return acc;
448 return acc;
443 }
449 }
444
450
445 FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
451 FORCE_INLINE_TEMPLATE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
446 {
452 {
447 const BYTE* p = (const BYTE*)input;
453 const BYTE* p = (const BYTE*)input;
448 const BYTE* const bEnd = p + len;
454 const BYTE* const bEnd = p + len;
449 U64 h64;
455 U64 h64;
450 #define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
456 #define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
451
457
452 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
458 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
453 if (p==NULL) {
459 if (p==NULL) {
454 len=0;
460 len=0;
455 bEnd=p=(const BYTE*)(size_t)32;
461 bEnd=p=(const BYTE*)(size_t)32;
456 }
462 }
457 #endif
463 #endif
458
464
459 if (len>=32) {
465 if (len>=32) {
460 const BYTE* const limit = bEnd - 32;
466 const BYTE* const limit = bEnd - 32;
461 U64 v1 = seed + PRIME64_1 + PRIME64_2;
467 U64 v1 = seed + PRIME64_1 + PRIME64_2;
462 U64 v2 = seed + PRIME64_2;
468 U64 v2 = seed + PRIME64_2;
463 U64 v3 = seed + 0;
469 U64 v3 = seed + 0;
464 U64 v4 = seed - PRIME64_1;
470 U64 v4 = seed - PRIME64_1;
465
471
466 do {
472 do {
467 v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
473 v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
468 v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
474 v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
469 v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
475 v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
470 v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
476 v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
471 } while (p<=limit);
477 } while (p<=limit);
472
478
473 h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
479 h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
474 h64 = XXH64_mergeRound(h64, v1);
480 h64 = XXH64_mergeRound(h64, v1);
475 h64 = XXH64_mergeRound(h64, v2);
481 h64 = XXH64_mergeRound(h64, v2);
476 h64 = XXH64_mergeRound(h64, v3);
482 h64 = XXH64_mergeRound(h64, v3);
477 h64 = XXH64_mergeRound(h64, v4);
483 h64 = XXH64_mergeRound(h64, v4);
478
484
479 } else {
485 } else {
480 h64 = seed + PRIME64_5;
486 h64 = seed + PRIME64_5;
481 }
487 }
482
488
483 h64 += (U64) len;
489 h64 += (U64) len;
484
490
485 while (p+8<=bEnd) {
491 while (p+8<=bEnd) {
486 U64 const k1 = XXH64_round(0, XXH_get64bits(p));
492 U64 const k1 = XXH64_round(0, XXH_get64bits(p));
487 h64 ^= k1;
493 h64 ^= k1;
488 h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
494 h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
489 p+=8;
495 p+=8;
490 }
496 }
491
497
492 if (p+4<=bEnd) {
498 if (p+4<=bEnd) {
493 h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
499 h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
494 h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
500 h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
495 p+=4;
501 p+=4;
496 }
502 }
497
503
498 while (p<bEnd) {
504 while (p<bEnd) {
499 h64 ^= (*p) * PRIME64_5;
505 h64 ^= (*p) * PRIME64_5;
500 h64 = XXH_rotl64(h64, 11) * PRIME64_1;
506 h64 = XXH_rotl64(h64, 11) * PRIME64_1;
501 p++;
507 p++;
502 }
508 }
503
509
504 h64 ^= h64 >> 33;
510 h64 ^= h64 >> 33;
505 h64 *= PRIME64_2;
511 h64 *= PRIME64_2;
506 h64 ^= h64 >> 29;
512 h64 ^= h64 >> 29;
507 h64 *= PRIME64_3;
513 h64 *= PRIME64_3;
508 h64 ^= h64 >> 32;
514 h64 ^= h64 >> 32;
509
515
510 return h64;
516 return h64;
511 }
517 }
512
518
513
519
514 XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
520 XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
515 {
521 {
516 #if 0
522 #if 0
517 /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
523 /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
518 XXH64_CREATESTATE_STATIC(state);
524 XXH64_CREATESTATE_STATIC(state);
519 XXH64_reset(state, seed);
525 XXH64_reset(state, seed);
520 XXH64_update(state, input, len);
526 XXH64_update(state, input, len);
521 return XXH64_digest(state);
527 return XXH64_digest(state);
522 #else
528 #else
523 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
529 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
524
530
525 if (XXH_FORCE_ALIGN_CHECK) {
531 if (XXH_FORCE_ALIGN_CHECK) {
526 if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */
532 if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */
527 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
533 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
528 return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
534 return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
529 else
535 else
530 return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
536 return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
531 } }
537 } }
532
538
533 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
539 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
534 return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
540 return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
535 else
541 else
536 return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
542 return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
537 #endif
543 #endif
538 }
544 }
539
545
540
546
541 /* **************************************************
547 /* **************************************************
542 * Advanced Hash Functions
548 * Advanced Hash Functions
543 ****************************************************/
549 ****************************************************/
544
550
545 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
551 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
546 {
552 {
547 return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
553 return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
548 }
554 }
549 XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
555 XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
550 {
556 {
551 XXH_free(statePtr);
557 XXH_free(statePtr);
552 return XXH_OK;
558 return XXH_OK;
553 }
559 }
554
560
555 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
561 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
556 {
562 {
557 return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
563 return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
558 }
564 }
559 XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
565 XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
560 {
566 {
561 XXH_free(statePtr);
567 XXH_free(statePtr);
562 return XXH_OK;
568 return XXH_OK;
563 }
569 }
564
570
565
571
566 /*** Hash feed ***/
572 /*** Hash feed ***/
567
573
568 XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
574 XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
569 {
575 {
570 XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
576 XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
571 memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */
577 memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */
572 state.v1 = seed + PRIME32_1 + PRIME32_2;
578 state.v1 = seed + PRIME32_1 + PRIME32_2;
573 state.v2 = seed + PRIME32_2;
579 state.v2 = seed + PRIME32_2;
574 state.v3 = seed + 0;
580 state.v3 = seed + 0;
575 state.v4 = seed - PRIME32_1;
581 state.v4 = seed - PRIME32_1;
576 memcpy(statePtr, &state, sizeof(state));
582 memcpy(statePtr, &state, sizeof(state));
577 return XXH_OK;
583 return XXH_OK;
578 }
584 }
579
585
580
586
581 XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
587 XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
582 {
588 {
583 XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
589 XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
584 memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */
590 memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */
585 state.v1 = seed + PRIME64_1 + PRIME64_2;
591 state.v1 = seed + PRIME64_1 + PRIME64_2;
586 state.v2 = seed + PRIME64_2;
592 state.v2 = seed + PRIME64_2;
587 state.v3 = seed + 0;
593 state.v3 = seed + 0;
588 state.v4 = seed - PRIME64_1;
594 state.v4 = seed - PRIME64_1;
589 memcpy(statePtr, &state, sizeof(state));
595 memcpy(statePtr, &state, sizeof(state));
590 return XXH_OK;
596 return XXH_OK;
591 }
597 }
592
598
593
599
594 FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
600 FORCE_INLINE_TEMPLATE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
595 {
601 {
596 const BYTE* p = (const BYTE*)input;
602 const BYTE* p = (const BYTE*)input;
597 const BYTE* const bEnd = p + len;
603 const BYTE* const bEnd = p + len;
598
604
599 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
605 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
600 if (input==NULL) return XXH_ERROR;
606 if (input==NULL) return XXH_ERROR;
601 #endif
607 #endif
602
608
603 state->total_len_32 += (unsigned)len;
609 state->total_len_32 += (unsigned)len;
604 state->large_len |= (len>=16) | (state->total_len_32>=16);
610 state->large_len |= (len>=16) | (state->total_len_32>=16);
605
611
606 if (state->memsize + len < 16) { /* fill in tmp buffer */
612 if (state->memsize + len < 16) { /* fill in tmp buffer */
607 XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
613 XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
608 state->memsize += (unsigned)len;
614 state->memsize += (unsigned)len;
609 return XXH_OK;
615 return XXH_OK;
610 }
616 }
611
617
612 if (state->memsize) { /* some data left from previous update */
618 if (state->memsize) { /* some data left from previous update */
613 XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
619 XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
614 { const U32* p32 = state->mem32;
620 { const U32* p32 = state->mem32;
615 state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
621 state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
616 state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
622 state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
617 state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
623 state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
618 state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++;
624 state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++;
619 }
625 }
620 p += 16-state->memsize;
626 p += 16-state->memsize;
621 state->memsize = 0;
627 state->memsize = 0;
622 }
628 }
623
629
624 if (p <= bEnd-16) {
630 if (p <= bEnd-16) {
625 const BYTE* const limit = bEnd - 16;
631 const BYTE* const limit = bEnd - 16;
626 U32 v1 = state->v1;
632 U32 v1 = state->v1;
627 U32 v2 = state->v2;
633 U32 v2 = state->v2;
628 U32 v3 = state->v3;
634 U32 v3 = state->v3;
629 U32 v4 = state->v4;
635 U32 v4 = state->v4;
630
636
631 do {
637 do {
632 v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
638 v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
633 v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
639 v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
634 v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
640 v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
635 v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
641 v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
636 } while (p<=limit);
642 } while (p<=limit);
637
643
638 state->v1 = v1;
644 state->v1 = v1;
639 state->v2 = v2;
645 state->v2 = v2;
640 state->v3 = v3;
646 state->v3 = v3;
641 state->v4 = v4;
647 state->v4 = v4;
642 }
648 }
643
649
644 if (p < bEnd) {
650 if (p < bEnd) {
645 XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
651 XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
646 state->memsize = (unsigned)(bEnd-p);
652 state->memsize = (unsigned)(bEnd-p);
647 }
653 }
648
654
649 return XXH_OK;
655 return XXH_OK;
650 }
656 }
651
657
652 XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
658 XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
653 {
659 {
654 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
660 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
655
661
656 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
662 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
657 return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
663 return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
658 else
664 else
659 return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
665 return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
660 }
666 }
661
667
662
668
663
669
664 FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
670 FORCE_INLINE_TEMPLATE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
665 {
671 {
666 const BYTE * p = (const BYTE*)state->mem32;
672 const BYTE * p = (const BYTE*)state->mem32;
667 const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize;
673 const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize;
668 U32 h32;
674 U32 h32;
669
675
670 if (state->large_len) {
676 if (state->large_len) {
671 h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
677 h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
672 } else {
678 } else {
673 h32 = state->v3 /* == seed */ + PRIME32_5;
679 h32 = state->v3 /* == seed */ + PRIME32_5;
674 }
680 }
675
681
676 h32 += state->total_len_32;
682 h32 += state->total_len_32;
677
683
678 while (p+4<=bEnd) {
684 while (p+4<=bEnd) {
679 h32 += XXH_readLE32(p, endian) * PRIME32_3;
685 h32 += XXH_readLE32(p, endian) * PRIME32_3;
680 h32 = XXH_rotl32(h32, 17) * PRIME32_4;
686 h32 = XXH_rotl32(h32, 17) * PRIME32_4;
681 p+=4;
687 p+=4;
682 }
688 }
683
689
684 while (p<bEnd) {
690 while (p<bEnd) {
685 h32 += (*p) * PRIME32_5;
691 h32 += (*p) * PRIME32_5;
686 h32 = XXH_rotl32(h32, 11) * PRIME32_1;
692 h32 = XXH_rotl32(h32, 11) * PRIME32_1;
687 p++;
693 p++;
688 }
694 }
689
695
690 h32 ^= h32 >> 15;
696 h32 ^= h32 >> 15;
691 h32 *= PRIME32_2;
697 h32 *= PRIME32_2;
692 h32 ^= h32 >> 13;
698 h32 ^= h32 >> 13;
693 h32 *= PRIME32_3;
699 h32 *= PRIME32_3;
694 h32 ^= h32 >> 16;
700 h32 ^= h32 >> 16;
695
701
696 return h32;
702 return h32;
697 }
703 }
698
704
699
705
700 XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
706 XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
701 {
707 {
702 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
708 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
703
709
704 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
710 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
705 return XXH32_digest_endian(state_in, XXH_littleEndian);
711 return XXH32_digest_endian(state_in, XXH_littleEndian);
706 else
712 else
707 return XXH32_digest_endian(state_in, XXH_bigEndian);
713 return XXH32_digest_endian(state_in, XXH_bigEndian);
708 }
714 }
709
715
710
716
711
717
712 /* **** XXH64 **** */
718 /* **** XXH64 **** */
713
719
714 FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
720 FORCE_INLINE_TEMPLATE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
715 {
721 {
716 const BYTE* p = (const BYTE*)input;
722 const BYTE* p = (const BYTE*)input;
717 const BYTE* const bEnd = p + len;
723 const BYTE* const bEnd = p + len;
718
724
719 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
725 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
720 if (input==NULL) return XXH_ERROR;
726 if (input==NULL) return XXH_ERROR;
721 #endif
727 #endif
722
728
723 state->total_len += len;
729 state->total_len += len;
724
730
725 if (state->memsize + len < 32) { /* fill in tmp buffer */
731 if (state->memsize + len < 32) { /* fill in tmp buffer */
726 XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
732 XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
727 state->memsize += (U32)len;
733 state->memsize += (U32)len;
728 return XXH_OK;
734 return XXH_OK;
729 }
735 }
730
736
731 if (state->memsize) { /* tmp buffer is full */
737 if (state->memsize) { /* tmp buffer is full */
732 XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
738 XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
733 state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
739 state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
734 state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
740 state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
735 state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
741 state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
736 state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
742 state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
737 p += 32-state->memsize;
743 p += 32-state->memsize;
738 state->memsize = 0;
744 state->memsize = 0;
739 }
745 }
740
746
741 if (p+32 <= bEnd) {
747 if (p+32 <= bEnd) {
742 const BYTE* const limit = bEnd - 32;
748 const BYTE* const limit = bEnd - 32;
743 U64 v1 = state->v1;
749 U64 v1 = state->v1;
744 U64 v2 = state->v2;
750 U64 v2 = state->v2;
745 U64 v3 = state->v3;
751 U64 v3 = state->v3;
746 U64 v4 = state->v4;
752 U64 v4 = state->v4;
747
753
748 do {
754 do {
749 v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
755 v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
750 v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
756 v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
751 v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
757 v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
752 v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
758 v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
753 } while (p<=limit);
759 } while (p<=limit);
754
760
755 state->v1 = v1;
761 state->v1 = v1;
756 state->v2 = v2;
762 state->v2 = v2;
757 state->v3 = v3;
763 state->v3 = v3;
758 state->v4 = v4;
764 state->v4 = v4;
759 }
765 }
760
766
761 if (p < bEnd) {
767 if (p < bEnd) {
762 XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
768 XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
763 state->memsize = (unsigned)(bEnd-p);
769 state->memsize = (unsigned)(bEnd-p);
764 }
770 }
765
771
766 return XXH_OK;
772 return XXH_OK;
767 }
773 }
768
774
769 XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
775 XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
770 {
776 {
771 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
777 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
772
778
773 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
779 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
774 return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
780 return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
775 else
781 else
776 return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
782 return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
777 }
783 }
778
784
779
785
780
786
781 FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
787 FORCE_INLINE_TEMPLATE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
782 {
788 {
783 const BYTE * p = (const BYTE*)state->mem64;
789 const BYTE * p = (const BYTE*)state->mem64;
784 const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize;
790 const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize;
785 U64 h64;
791 U64 h64;
786
792
787 if (state->total_len >= 32) {
793 if (state->total_len >= 32) {
788 U64 const v1 = state->v1;
794 U64 const v1 = state->v1;
789 U64 const v2 = state->v2;
795 U64 const v2 = state->v2;
790 U64 const v3 = state->v3;
796 U64 const v3 = state->v3;
791 U64 const v4 = state->v4;
797 U64 const v4 = state->v4;
792
798
793 h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
799 h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
794 h64 = XXH64_mergeRound(h64, v1);
800 h64 = XXH64_mergeRound(h64, v1);
795 h64 = XXH64_mergeRound(h64, v2);
801 h64 = XXH64_mergeRound(h64, v2);
796 h64 = XXH64_mergeRound(h64, v3);
802 h64 = XXH64_mergeRound(h64, v3);
797 h64 = XXH64_mergeRound(h64, v4);
803 h64 = XXH64_mergeRound(h64, v4);
798 } else {
804 } else {
799 h64 = state->v3 + PRIME64_5;
805 h64 = state->v3 + PRIME64_5;
800 }
806 }
801
807
802 h64 += (U64) state->total_len;
808 h64 += (U64) state->total_len;
803
809
804 while (p+8<=bEnd) {
810 while (p+8<=bEnd) {
805 U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian));
811 U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian));
806 h64 ^= k1;
812 h64 ^= k1;
807 h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
813 h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
808 p+=8;
814 p+=8;
809 }
815 }
810
816
811 if (p+4<=bEnd) {
817 if (p+4<=bEnd) {
812 h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
818 h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
813 h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
819 h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
814 p+=4;
820 p+=4;
815 }
821 }
816
822
817 while (p<bEnd) {
823 while (p<bEnd) {
818 h64 ^= (*p) * PRIME64_5;
824 h64 ^= (*p) * PRIME64_5;
819 h64 = XXH_rotl64(h64, 11) * PRIME64_1;
825 h64 = XXH_rotl64(h64, 11) * PRIME64_1;
820 p++;
826 p++;
821 }
827 }
822
828
823 h64 ^= h64 >> 33;
829 h64 ^= h64 >> 33;
824 h64 *= PRIME64_2;
830 h64 *= PRIME64_2;
825 h64 ^= h64 >> 29;
831 h64 ^= h64 >> 29;
826 h64 *= PRIME64_3;
832 h64 *= PRIME64_3;
827 h64 ^= h64 >> 32;
833 h64 ^= h64 >> 32;
828
834
829 return h64;
835 return h64;
830 }
836 }
831
837
832
838
833 XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
839 XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
834 {
840 {
835 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
841 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
836
842
837 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
843 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
838 return XXH64_digest_endian(state_in, XXH_littleEndian);
844 return XXH64_digest_endian(state_in, XXH_littleEndian);
839 else
845 else
840 return XXH64_digest_endian(state_in, XXH_bigEndian);
846 return XXH64_digest_endian(state_in, XXH_bigEndian);
841 }
847 }
842
848
843
849
844 /* **************************
850 /* **************************
845 * Canonical representation
851 * Canonical representation
846 ****************************/
852 ****************************/
847
853
848 /*! Default XXH result types are basic unsigned 32 and 64 bits.
854 /*! Default XXH result types are basic unsigned 32 and 64 bits.
849 * The canonical representation follows human-readable write convention, aka big-endian (large digits first).
855 * The canonical representation follows human-readable write convention, aka big-endian (large digits first).
850 * These functions allow transformation of hash result into and from its canonical format.
856 * These functions allow transformation of hash result into and from its canonical format.
851 * This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs.
857 * This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs.
852 */
858 */
853
859
854 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
860 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
855 {
861 {
856 XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
862 XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
857 if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
863 if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
858 memcpy(dst, &hash, sizeof(*dst));
864 memcpy(dst, &hash, sizeof(*dst));
859 }
865 }
860
866
861 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
867 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
862 {
868 {
863 XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
869 XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
864 if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
870 if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
865 memcpy(dst, &hash, sizeof(*dst));
871 memcpy(dst, &hash, sizeof(*dst));
866 }
872 }
867
873
868 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
874 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
869 {
875 {
870 return XXH_readBE32(src);
876 return XXH_readBE32(src);
871 }
877 }
872
878
873 XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
879 XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
874 {
880 {
875 return XXH_readBE64(src);
881 return XXH_readBE64(src);
876 }
882 }
@@ -1,266 +1,373 b''
1 /*
1 /*
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
3 * All rights reserved.
4 *
4 *
5 * This source code is licensed under both the BSD-style license (found in the
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
8 * You may select, at your option, one of the above-listed licenses.
9 */
9 */
10
10
11 #ifndef ZSTD_CCOMMON_H_MODULE
11 #ifndef ZSTD_CCOMMON_H_MODULE
12 #define ZSTD_CCOMMON_H_MODULE
12 #define ZSTD_CCOMMON_H_MODULE
13
13
14 /* this module contains definitions which must be identical
14 /* this module contains definitions which must be identical
15 * across compression, decompression and dictBuilder.
15 * across compression, decompression and dictBuilder.
16 * It also contains a few functions useful to at least 2 of them
16 * It also contains a few functions useful to at least 2 of them
17 * and which benefit from being inlined */
17 * and which benefit from being inlined */
18
18
19 /*-*************************************
19 /*-*************************************
20 * Dependencies
20 * Dependencies
21 ***************************************/
21 ***************************************/
22 #include "compiler.h"
22 #include "compiler.h"
23 #include "mem.h"
23 #include "mem.h"
24 #include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
24 #include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
25 #include "error_private.h"
25 #include "error_private.h"
26 #define ZSTD_STATIC_LINKING_ONLY
26 #define ZSTD_STATIC_LINKING_ONLY
27 #include "zstd.h"
27 #include "zstd.h"
28 #define FSE_STATIC_LINKING_ONLY
28 #define FSE_STATIC_LINKING_ONLY
29 #include "fse.h"
29 #include "fse.h"
30 #define HUF_STATIC_LINKING_ONLY
30 #define HUF_STATIC_LINKING_ONLY
31 #include "huf.h"
31 #include "huf.h"
32 #ifndef XXH_STATIC_LINKING_ONLY
32 #ifndef XXH_STATIC_LINKING_ONLY
33 # define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
33 # define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
34 #endif
34 #endif
35 #include "xxhash.h" /* XXH_reset, update, digest */
35 #include "xxhash.h" /* XXH_reset, update, digest */
36
36
37
38 #if defined (__cplusplus)
37 #if defined (__cplusplus)
39 extern "C" {
38 extern "C" {
40 #endif
39 #endif
41
40
42 /* ---- static assert (debug) --- */
41 /* ---- static assert (debug) --- */
43 #define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
42 #define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
44 #define ZSTD_isError ERR_isError /* for inlining */
43 #define ZSTD_isError ERR_isError /* for inlining */
45 #define FSE_isError ERR_isError
44 #define FSE_isError ERR_isError
46 #define HUF_isError ERR_isError
45 #define HUF_isError ERR_isError
47
46
48
47
49 /*-*************************************
48 /*-*************************************
50 * shared macros
49 * shared macros
51 ***************************************/
50 ***************************************/
52 #undef MIN
51 #undef MIN
53 #undef MAX
52 #undef MAX
54 #define MIN(a,b) ((a)<(b) ? (a) : (b))
53 #define MIN(a,b) ((a)<(b) ? (a) : (b))
55 #define MAX(a,b) ((a)>(b) ? (a) : (b))
54 #define MAX(a,b) ((a)>(b) ? (a) : (b))
56 #define CHECK_F(f) { size_t const errcod = f; if (ERR_isError(errcod)) return errcod; } /* check and Forward error code */
55
57 #define CHECK_E(f, e) { size_t const errcod = f; if (ERR_isError(errcod)) return ERROR(e); } /* check and send Error code */
56 /**
57 * Return the specified error if the condition evaluates to true.
58 *
59 * In debug modes, prints additional information.
60 * In order to do that (particularly, printing the conditional that failed),
61 * this can't just wrap RETURN_ERROR().
62 */
63 #define RETURN_ERROR_IF(cond, err, ...) \
64 if (cond) { \
65 RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
66 RAWLOG(3, ": " __VA_ARGS__); \
67 RAWLOG(3, "\n"); \
68 return ERROR(err); \
69 }
70
71 /**
72 * Unconditionally return the specified error.
73 *
74 * In debug modes, prints additional information.
75 */
76 #define RETURN_ERROR(err, ...) \
77 do { \
78 RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
79 RAWLOG(3, ": " __VA_ARGS__); \
80 RAWLOG(3, "\n"); \
81 return ERROR(err); \
82 } while(0);
83
84 /**
85 * If the provided expression evaluates to an error code, returns that error code.
86 *
87 * In debug modes, prints additional information.
88 */
89 #define FORWARD_IF_ERROR(err, ...) \
90 do { \
91 size_t const err_code = (err); \
92 if (ERR_isError(err_code)) { \
93 RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
94 RAWLOG(3, ": " __VA_ARGS__); \
95 RAWLOG(3, "\n"); \
96 return err_code; \
97 } \
98 } while(0);
58
99
59
100
60 /*-*************************************
101 /*-*************************************
61 * Common constants
102 * Common constants
62 ***************************************/
103 ***************************************/
63 #define ZSTD_OPT_NUM (1<<12)
104 #define ZSTD_OPT_NUM (1<<12)
64
105
65 #define ZSTD_REP_NUM 3 /* number of repcodes */
106 #define ZSTD_REP_NUM 3 /* number of repcodes */
66 #define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
107 #define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
67 static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
108 static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
68
109
69 #define KB *(1 <<10)
110 #define KB *(1 <<10)
70 #define MB *(1 <<20)
111 #define MB *(1 <<20)
71 #define GB *(1U<<30)
112 #define GB *(1U<<30)
72
113
73 #define BIT7 128
114 #define BIT7 128
74 #define BIT6 64
115 #define BIT6 64
75 #define BIT5 32
116 #define BIT5 32
76 #define BIT4 16
117 #define BIT4 16
77 #define BIT1 2
118 #define BIT1 2
78 #define BIT0 1
119 #define BIT0 1
79
120
80 #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
121 #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
81 static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
122 static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
82 static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
123 static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
83
124
84 #define ZSTD_FRAMEIDSIZE 4 /* magic number size */
125 #define ZSTD_FRAMEIDSIZE 4 /* magic number size */
85
126
86 #define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
127 #define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
87 static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
128 static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
88 typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
129 typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
89
130
90 #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
131 #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
91 #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */
132 #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */
92
133
93 #define HufLog 12
134 #define HufLog 12
94 typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
135 typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
95
136
96 #define LONGNBSEQ 0x7F00
137 #define LONGNBSEQ 0x7F00
97
138
98 #define MINMATCH 3
139 #define MINMATCH 3
99
140
100 #define Litbits 8
141 #define Litbits 8
101 #define MaxLit ((1<<Litbits) - 1)
142 #define MaxLit ((1<<Litbits) - 1)
102 #define MaxML 52
143 #define MaxML 52
103 #define MaxLL 35
144 #define MaxLL 35
104 #define DefaultMaxOff 28
145 #define DefaultMaxOff 28
105 #define MaxOff 31
146 #define MaxOff 31
106 #define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
147 #define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
107 #define MLFSELog 9
148 #define MLFSELog 9
108 #define LLFSELog 9
149 #define LLFSELog 9
109 #define OffFSELog 8
150 #define OffFSELog 8
110 #define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
151 #define MaxFSELog MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
111
152
112 static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
153 static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
113 0, 0, 0, 0, 0, 0, 0, 0,
154 0, 0, 0, 0, 0, 0, 0, 0,
114 1, 1, 1, 1, 2, 2, 3, 3,
155 1, 1, 1, 1, 2, 2, 3, 3,
115 4, 6, 7, 8, 9,10,11,12,
156 4, 6, 7, 8, 9,10,11,12,
116 13,14,15,16 };
157 13,14,15,16 };
117 static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2,
158 static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2,
118 2, 2, 2, 2, 2, 1, 1, 1,
159 2, 2, 2, 2, 2, 1, 1, 1,
119 2, 2, 2, 2, 2, 2, 2, 2,
160 2, 2, 2, 2, 2, 2, 2, 2,
120 2, 3, 2, 1, 1, 1, 1, 1,
161 2, 3, 2, 1, 1, 1, 1, 1,
121 -1,-1,-1,-1 };
162 -1,-1,-1,-1 };
122 #define LL_DEFAULTNORMLOG 6 /* for static allocation */
163 #define LL_DEFAULTNORMLOG 6 /* for static allocation */
123 static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
164 static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
124
165
125 static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
166 static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0,
126 0, 0, 0, 0, 0, 0, 0, 0,
167 0, 0, 0, 0, 0, 0, 0, 0,
127 0, 0, 0, 0, 0, 0, 0, 0,
168 0, 0, 0, 0, 0, 0, 0, 0,
128 0, 0, 0, 0, 0, 0, 0, 0,
169 0, 0, 0, 0, 0, 0, 0, 0,
129 1, 1, 1, 1, 2, 2, 3, 3,
170 1, 1, 1, 1, 2, 2, 3, 3,
130 4, 4, 5, 7, 8, 9,10,11,
171 4, 4, 5, 7, 8, 9,10,11,
131 12,13,14,15,16 };
172 12,13,14,15,16 };
132 static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2,
173 static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2,
133 2, 1, 1, 1, 1, 1, 1, 1,
174 2, 1, 1, 1, 1, 1, 1, 1,
134 1, 1, 1, 1, 1, 1, 1, 1,
175 1, 1, 1, 1, 1, 1, 1, 1,
135 1, 1, 1, 1, 1, 1, 1, 1,
176 1, 1, 1, 1, 1, 1, 1, 1,
136 1, 1, 1, 1, 1, 1, 1, 1,
177 1, 1, 1, 1, 1, 1, 1, 1,
137 1, 1, 1, 1, 1, 1,-1,-1,
178 1, 1, 1, 1, 1, 1,-1,-1,
138 -1,-1,-1,-1,-1 };
179 -1,-1,-1,-1,-1 };
139 #define ML_DEFAULTNORMLOG 6 /* for static allocation */
180 #define ML_DEFAULTNORMLOG 6 /* for static allocation */
140 static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
181 static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
141
182
142 static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2,
183 static const S16 OF_defaultNorm[DefaultMaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2,
143 2, 1, 1, 1, 1, 1, 1, 1,
184 2, 1, 1, 1, 1, 1, 1, 1,
144 1, 1, 1, 1, 1, 1, 1, 1,
185 1, 1, 1, 1, 1, 1, 1, 1,
145 -1,-1,-1,-1,-1 };
186 -1,-1,-1,-1,-1 };
146 #define OF_DEFAULTNORMLOG 5 /* for static allocation */
187 #define OF_DEFAULTNORMLOG 5 /* for static allocation */
147 static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
188 static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
148
189
149
190
150 /*-*******************************************
191 /*-*******************************************
151 * Shared functions to include for inlining
192 * Shared functions to include for inlining
152 *********************************************/
193 *********************************************/
153 static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
194 static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
195
154 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
196 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
197 static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); }
198 #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
199
200 #define WILDCOPY_OVERLENGTH 8
201 #define VECLEN 16
202
203 typedef enum {
204 ZSTD_no_overlap,
205 ZSTD_overlap_src_before_dst,
206 /* ZSTD_overlap_dst_before_src, */
207 } ZSTD_overlap_e;
155
208
156 /*! ZSTD_wildcopy() :
209 /*! ZSTD_wildcopy() :
157 * custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
210 * custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */
158 #define WILDCOPY_OVERLENGTH 8
211 MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
159 MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length)
212 void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
160 {
213 {
214 ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
161 const BYTE* ip = (const BYTE*)src;
215 const BYTE* ip = (const BYTE*)src;
162 BYTE* op = (BYTE*)dst;
216 BYTE* op = (BYTE*)dst;
163 BYTE* const oend = op + length;
217 BYTE* const oend = op + length;
164 do
218
165 COPY8(op, ip)
219 assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
166 while (op < oend);
220 if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) {
221 do
222 COPY8(op, ip)
223 while (op < oend);
224 }
225 else {
226 if ((length & 8) == 0)
227 COPY8(op, ip);
228 do {
229 COPY16(op, ip);
230 }
231 while (op < oend);
232 }
233 }
234
235 /*! ZSTD_wildcopy_16min() :
236 * same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */
237 MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE
238 void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype)
239 {
240 ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
241 const BYTE* ip = (const BYTE*)src;
242 BYTE* op = (BYTE*)dst;
243 BYTE* const oend = op + length;
244
245 assert(length >= 8);
246 assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8));
247
248 if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) {
249 do
250 COPY8(op, ip)
251 while (op < oend);
252 }
253 else {
254 if ((length & 8) == 0)
255 COPY8(op, ip);
256 do {
257 COPY16(op, ip);
258 }
259 while (op < oend);
260 }
167 }
261 }
168
262
169 MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */
263 MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */
170 {
264 {
171 const BYTE* ip = (const BYTE*)src;
265 const BYTE* ip = (const BYTE*)src;
172 BYTE* op = (BYTE*)dst;
266 BYTE* op = (BYTE*)dst;
173 BYTE* const oend = (BYTE*)dstEnd;
267 BYTE* const oend = (BYTE*)dstEnd;
174 do
268 do
175 COPY8(op, ip)
269 COPY8(op, ip)
176 while (op < oend);
270 while (op < oend);
177 }
271 }
178
272
179
273
180 /*-*******************************************
274 /*-*******************************************
181 * Private declarations
275 * Private declarations
182 *********************************************/
276 *********************************************/
183 typedef struct seqDef_s {
277 typedef struct seqDef_s {
184 U32 offset;
278 U32 offset;
185 U16 litLength;
279 U16 litLength;
186 U16 matchLength;
280 U16 matchLength;
187 } seqDef;
281 } seqDef;
188
282
189 typedef struct {
283 typedef struct {
190 seqDef* sequencesStart;
284 seqDef* sequencesStart;
191 seqDef* sequences;
285 seqDef* sequences;
192 BYTE* litStart;
286 BYTE* litStart;
193 BYTE* lit;
287 BYTE* lit;
194 BYTE* llCode;
288 BYTE* llCode;
195 BYTE* mlCode;
289 BYTE* mlCode;
196 BYTE* ofCode;
290 BYTE* ofCode;
197 size_t maxNbSeq;
291 size_t maxNbSeq;
198 size_t maxNbLit;
292 size_t maxNbLit;
199 U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
293 U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
200 U32 longLengthPos;
294 U32 longLengthPos;
201 } seqStore_t;
295 } seqStore_t;
202
296
297 /**
298 * Contains the compressed frame size and an upper-bound for the decompressed frame size.
299 * Note: before using `compressedSize`, check for errors using ZSTD_isError().
300 * similarly, before using `decompressedBound`, check for errors using:
301 * `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
302 */
303 typedef struct {
304 size_t compressedSize;
305 unsigned long long decompressedBound;
306 } ZSTD_frameSizeInfo; /* decompress & legacy */
307
203 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */
308 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */
204 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
309 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
205
310
206 /* custom memory allocation functions */
311 /* custom memory allocation functions */
207 void* ZSTD_malloc(size_t size, ZSTD_customMem customMem);
312 void* ZSTD_malloc(size_t size, ZSTD_customMem customMem);
208 void* ZSTD_calloc(size_t size, ZSTD_customMem customMem);
313 void* ZSTD_calloc(size_t size, ZSTD_customMem customMem);
209 void ZSTD_free(void* ptr, ZSTD_customMem customMem);
314 void ZSTD_free(void* ptr, ZSTD_customMem customMem);
210
315
211
316
212 MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
317 MEM_STATIC U32 ZSTD_highbit32(U32 val) /* compress, dictBuilder, decodeCorpus */
213 {
318 {
214 assert(val != 0);
319 assert(val != 0);
215 {
320 {
216 # if defined(_MSC_VER) /* Visual */
321 # if defined(_MSC_VER) /* Visual */
217 unsigned long r=0;
322 unsigned long r=0;
218 _BitScanReverse(&r, val);
323 _BitScanReverse(&r, val);
219 return (unsigned)r;
324 return (unsigned)r;
220 # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
325 # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
221 return 31 - __builtin_clz(val);
326 return 31 - __builtin_clz(val);
327 # elif defined(__ICCARM__) /* IAR Intrinsic */
328 return 31 - __CLZ(val);
222 # else /* Software version */
329 # else /* Software version */
223 static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
330 static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
224 U32 v = val;
331 U32 v = val;
225 v |= v >> 1;
332 v |= v >> 1;
226 v |= v >> 2;
333 v |= v >> 2;
227 v |= v >> 4;
334 v |= v >> 4;
228 v |= v >> 8;
335 v |= v >> 8;
229 v |= v >> 16;
336 v |= v >> 16;
230 return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
337 return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
231 # endif
338 # endif
232 }
339 }
233 }
340 }
234
341
235
342
236 /* ZSTD_invalidateRepCodes() :
343 /* ZSTD_invalidateRepCodes() :
237 * ensures next compression will not use repcodes from previous block.
344 * ensures next compression will not use repcodes from previous block.
238 * Note : only works with regular variant;
345 * Note : only works with regular variant;
239 * do not use with extDict variant ! */
346 * do not use with extDict variant ! */
240 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */
347 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx); /* zstdmt, adaptive_compression (shouldn't get this definition from here) */
241
348
242
349
243 typedef struct {
350 typedef struct {
244 blockType_e blockType;
351 blockType_e blockType;
245 U32 lastBlock;
352 U32 lastBlock;
246 U32 origSize;
353 U32 origSize;
247 } blockProperties_t; /* declared here for decompress and fullbench */
354 } blockProperties_t; /* declared here for decompress and fullbench */
248
355
249 /*! ZSTD_getcBlockSize() :
356 /*! ZSTD_getcBlockSize() :
250 * Provides the size of compressed block from block header `src` */
357 * Provides the size of compressed block from block header `src` */
251 /* Used by: decompress, fullbench (does not get its definition from here) */
358 /* Used by: decompress, fullbench (does not get its definition from here) */
252 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
359 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
253 blockProperties_t* bpPtr);
360 blockProperties_t* bpPtr);
254
361
255 /*! ZSTD_decodeSeqHeaders() :
362 /*! ZSTD_decodeSeqHeaders() :
256 * decode sequence header from src */
363 * decode sequence header from src */
257 /* Used by: decompress, fullbench (does not get its definition from here) */
364 /* Used by: decompress, fullbench (does not get its definition from here) */
258 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
365 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
259 const void* src, size_t srcSize);
366 const void* src, size_t srcSize);
260
367
261
368
262 #if defined (__cplusplus)
369 #if defined (__cplusplus)
263 }
370 }
264 #endif
371 #endif
265
372
266 #endif /* ZSTD_CCOMMON_H_MODULE */
373 #endif /* ZSTD_CCOMMON_H_MODULE */
@@ -1,721 +1,721 b''
1 /* ******************************************************************
1 /* ******************************************************************
2 FSE : Finite State Entropy encoder
2 FSE : Finite State Entropy encoder
3 Copyright (C) 2013-present, Yann Collet.
3 Copyright (C) 2013-present, Yann Collet.
4
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
6
7 Redistribution and use in source and binary forms, with or without
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
8 modification, are permitted provided that the following conditions are
9 met:
9 met:
10
10
11 * Redistributions of source code must retain the above copyright
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
12 notice, this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above
13 * Redistributions in binary form must reproduce the above
14 copyright notice, this list of conditions and the following disclaimer
14 copyright notice, this list of conditions and the following disclaimer
15 in the documentation and/or other materials provided with the
15 in the documentation and/or other materials provided with the
16 distribution.
16 distribution.
17
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
29
30 You can contact the author at :
30 You can contact the author at :
31 - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
31 - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c
33 ****************************************************************** */
33 ****************************************************************** */
34
34
35 /* **************************************************************
35 /* **************************************************************
36 * Includes
36 * Includes
37 ****************************************************************/
37 ****************************************************************/
38 #include <stdlib.h> /* malloc, free, qsort */
38 #include <stdlib.h> /* malloc, free, qsort */
39 #include <string.h> /* memcpy, memset */
39 #include <string.h> /* memcpy, memset */
40 #include "compiler.h"
40 #include "compiler.h"
41 #include "mem.h" /* U32, U16, etc. */
41 #include "mem.h" /* U32, U16, etc. */
42 #include "debug.h" /* assert, DEBUGLOG */
42 #include "debug.h" /* assert, DEBUGLOG */
43 #include "hist.h" /* HIST_count_wksp */
43 #include "hist.h" /* HIST_count_wksp */
44 #include "bitstream.h"
44 #include "bitstream.h"
45 #define FSE_STATIC_LINKING_ONLY
45 #define FSE_STATIC_LINKING_ONLY
46 #include "fse.h"
46 #include "fse.h"
47 #include "error_private.h"
47 #include "error_private.h"
48
48
49
49
50 /* **************************************************************
50 /* **************************************************************
51 * Error Management
51 * Error Management
52 ****************************************************************/
52 ****************************************************************/
53 #define FSE_isError ERR_isError
53 #define FSE_isError ERR_isError
54
54
55
55
56 /* **************************************************************
56 /* **************************************************************
57 * Templates
57 * Templates
58 ****************************************************************/
58 ****************************************************************/
59 /*
59 /*
60 designed to be included
60 designed to be included
61 for type-specific functions (template emulation in C)
61 for type-specific functions (template emulation in C)
62 Objective is to write these functions only once, for improved maintenance
62 Objective is to write these functions only once, for improved maintenance
63 */
63 */
64
64
65 /* safety checks */
65 /* safety checks */
66 #ifndef FSE_FUNCTION_EXTENSION
66 #ifndef FSE_FUNCTION_EXTENSION
67 # error "FSE_FUNCTION_EXTENSION must be defined"
67 # error "FSE_FUNCTION_EXTENSION must be defined"
68 #endif
68 #endif
69 #ifndef FSE_FUNCTION_TYPE
69 #ifndef FSE_FUNCTION_TYPE
70 # error "FSE_FUNCTION_TYPE must be defined"
70 # error "FSE_FUNCTION_TYPE must be defined"
71 #endif
71 #endif
72
72
73 /* Function names */
73 /* Function names */
74 #define FSE_CAT(X,Y) X##Y
74 #define FSE_CAT(X,Y) X##Y
75 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
75 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
76 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
76 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
77
77
78
78
79 /* Function templates */
79 /* Function templates */
80
80
81 /* FSE_buildCTable_wksp() :
81 /* FSE_buildCTable_wksp() :
82 * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
82 * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
83 * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
83 * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
84 * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
84 * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
85 */
85 */
86 size_t FSE_buildCTable_wksp(FSE_CTable* ct,
86 size_t FSE_buildCTable_wksp(FSE_CTable* ct,
87 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
87 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
88 void* workSpace, size_t wkspSize)
88 void* workSpace, size_t wkspSize)
89 {
89 {
90 U32 const tableSize = 1 << tableLog;
90 U32 const tableSize = 1 << tableLog;
91 U32 const tableMask = tableSize - 1;
91 U32 const tableMask = tableSize - 1;
92 void* const ptr = ct;
92 void* const ptr = ct;
93 U16* const tableU16 = ( (U16*) ptr) + 2;
93 U16* const tableU16 = ( (U16*) ptr) + 2;
94 void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
94 void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
95 FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
95 FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
96 U32 const step = FSE_TABLESTEP(tableSize);
96 U32 const step = FSE_TABLESTEP(tableSize);
97 U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
97 U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
98
98
99 FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace;
99 FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace;
100 U32 highThreshold = tableSize-1;
100 U32 highThreshold = tableSize-1;
101
101
102 /* CTable header */
102 /* CTable header */
103 if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
103 if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
104 tableU16[-2] = (U16) tableLog;
104 tableU16[-2] = (U16) tableLog;
105 tableU16[-1] = (U16) maxSymbolValue;
105 tableU16[-1] = (U16) maxSymbolValue;
106 assert(tableLog < 16); /* required for threshold strategy to work */
106 assert(tableLog < 16); /* required for threshold strategy to work */
107
107
108 /* For explanations on how to distribute symbol values over the table :
108 /* For explanations on how to distribute symbol values over the table :
109 * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
109 * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
110
110
111 #ifdef __clang_analyzer__
111 #ifdef __clang_analyzer__
112 memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
112 memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
113 #endif
113 #endif
114
114
115 /* symbol start positions */
115 /* symbol start positions */
116 { U32 u;
116 { U32 u;
117 cumul[0] = 0;
117 cumul[0] = 0;
118 for (u=1; u <= maxSymbolValue+1; u++) {
118 for (u=1; u <= maxSymbolValue+1; u++) {
119 if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
119 if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
120 cumul[u] = cumul[u-1] + 1;
120 cumul[u] = cumul[u-1] + 1;
121 tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
121 tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
122 } else {
122 } else {
123 cumul[u] = cumul[u-1] + normalizedCounter[u-1];
123 cumul[u] = cumul[u-1] + normalizedCounter[u-1];
124 } }
124 } }
125 cumul[maxSymbolValue+1] = tableSize+1;
125 cumul[maxSymbolValue+1] = tableSize+1;
126 }
126 }
127
127
128 /* Spread symbols */
128 /* Spread symbols */
129 { U32 position = 0;
129 { U32 position = 0;
130 U32 symbol;
130 U32 symbol;
131 for (symbol=0; symbol<=maxSymbolValue; symbol++) {
131 for (symbol=0; symbol<=maxSymbolValue; symbol++) {
132 int nbOccurences;
132 int nbOccurrences;
133 int const freq = normalizedCounter[symbol];
133 int const freq = normalizedCounter[symbol];
134 for (nbOccurences=0; nbOccurences<freq; nbOccurences++) {
134 for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
135 tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
135 tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
136 position = (position + step) & tableMask;
136 position = (position + step) & tableMask;
137 while (position > highThreshold)
137 while (position > highThreshold)
138 position = (position + step) & tableMask; /* Low proba area */
138 position = (position + step) & tableMask; /* Low proba area */
139 } }
139 } }
140
140
141 assert(position==0); /* Must have initialized all positions */
141 assert(position==0); /* Must have initialized all positions */
142 }
142 }
143
143
144 /* Build table */
144 /* Build table */
145 { U32 u; for (u=0; u<tableSize; u++) {
145 { U32 u; for (u=0; u<tableSize; u++) {
146 FSE_FUNCTION_TYPE s = tableSymbol[u]; /* note : static analyzer may not understand tableSymbol is properly initialized */
146 FSE_FUNCTION_TYPE s = tableSymbol[u]; /* note : static analyzer may not understand tableSymbol is properly initialized */
147 tableU16[cumul[s]++] = (U16) (tableSize+u); /* TableU16 : sorted by symbol order; gives next state value */
147 tableU16[cumul[s]++] = (U16) (tableSize+u); /* TableU16 : sorted by symbol order; gives next state value */
148 } }
148 } }
149
149
150 /* Build Symbol Transformation Table */
150 /* Build Symbol Transformation Table */
151 { unsigned total = 0;
151 { unsigned total = 0;
152 unsigned s;
152 unsigned s;
153 for (s=0; s<=maxSymbolValue; s++) {
153 for (s=0; s<=maxSymbolValue; s++) {
154 switch (normalizedCounter[s])
154 switch (normalizedCounter[s])
155 {
155 {
156 case 0:
156 case 0:
157 /* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
157 /* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
158 symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);
158 symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);
159 break;
159 break;
160
160
161 case -1:
161 case -1:
162 case 1:
162 case 1:
163 symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
163 symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
164 symbolTT[s].deltaFindState = total - 1;
164 symbolTT[s].deltaFindState = total - 1;
165 total ++;
165 total ++;
166 break;
166 break;
167 default :
167 default :
168 {
168 {
169 U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
169 U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
170 U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
170 U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
171 symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
171 symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
172 symbolTT[s].deltaFindState = total - normalizedCounter[s];
172 symbolTT[s].deltaFindState = total - normalizedCounter[s];
173 total += normalizedCounter[s];
173 total += normalizedCounter[s];
174 } } } }
174 } } } }
175
175
176 #if 0 /* debug : symbol costs */
176 #if 0 /* debug : symbol costs */
177 DEBUGLOG(5, "\n --- table statistics : ");
177 DEBUGLOG(5, "\n --- table statistics : ");
178 { U32 symbol;
178 { U32 symbol;
179 for (symbol=0; symbol<=maxSymbolValue; symbol++) {
179 for (symbol=0; symbol<=maxSymbolValue; symbol++) {
180 DEBUGLOG(5, "%3u: w=%3i, maxBits=%u, fracBits=%.2f",
180 DEBUGLOG(5, "%3u: w=%3i, maxBits=%u, fracBits=%.2f",
181 symbol, normalizedCounter[symbol],
181 symbol, normalizedCounter[symbol],
182 FSE_getMaxNbBits(symbolTT, symbol),
182 FSE_getMaxNbBits(symbolTT, symbol),
183 (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
183 (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
184 }
184 }
185 }
185 }
186 #endif
186 #endif
187
187
188 return 0;
188 return 0;
189 }
189 }
190
190
191
191
192 size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
192 size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
193 {
193 {
194 FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
194 FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
195 return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
195 return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol));
196 }
196 }
197
197
198
198
199
199
200 #ifndef FSE_COMMONDEFS_ONLY
200 #ifndef FSE_COMMONDEFS_ONLY
201
201
202
202
203 /*-**************************************************************
203 /*-**************************************************************
204 * FSE NCount encoding
204 * FSE NCount encoding
205 ****************************************************************/
205 ****************************************************************/
206 size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
206 size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
207 {
207 {
208 size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
208 size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
209 return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
209 return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
210 }
210 }
211
211
212 static size_t
212 static size_t
213 FSE_writeNCount_generic (void* header, size_t headerBufferSize,
213 FSE_writeNCount_generic (void* header, size_t headerBufferSize,
214 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
214 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
215 unsigned writeIsSafe)
215 unsigned writeIsSafe)
216 {
216 {
217 BYTE* const ostart = (BYTE*) header;
217 BYTE* const ostart = (BYTE*) header;
218 BYTE* out = ostart;
218 BYTE* out = ostart;
219 BYTE* const oend = ostart + headerBufferSize;
219 BYTE* const oend = ostart + headerBufferSize;
220 int nbBits;
220 int nbBits;
221 const int tableSize = 1 << tableLog;
221 const int tableSize = 1 << tableLog;
222 int remaining;
222 int remaining;
223 int threshold;
223 int threshold;
224 U32 bitStream = 0;
224 U32 bitStream = 0;
225 int bitCount = 0;
225 int bitCount = 0;
226 unsigned symbol = 0;
226 unsigned symbol = 0;
227 unsigned const alphabetSize = maxSymbolValue + 1;
227 unsigned const alphabetSize = maxSymbolValue + 1;
228 int previousIs0 = 0;
228 int previousIs0 = 0;
229
229
230 /* Table Size */
230 /* Table Size */
231 bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
231 bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
232 bitCount += 4;
232 bitCount += 4;
233
233
234 /* Init */
234 /* Init */
235 remaining = tableSize+1; /* +1 for extra accuracy */
235 remaining = tableSize+1; /* +1 for extra accuracy */
236 threshold = tableSize;
236 threshold = tableSize;
237 nbBits = tableLog+1;
237 nbBits = tableLog+1;
238
238
239 while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */
239 while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */
240 if (previousIs0) {
240 if (previousIs0) {
241 unsigned start = symbol;
241 unsigned start = symbol;
242 while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
242 while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
243 if (symbol == alphabetSize) break; /* incorrect distribution */
243 if (symbol == alphabetSize) break; /* incorrect distribution */
244 while (symbol >= start+24) {
244 while (symbol >= start+24) {
245 start+=24;
245 start+=24;
246 bitStream += 0xFFFFU << bitCount;
246 bitStream += 0xFFFFU << bitCount;
247 if ((!writeIsSafe) && (out > oend-2))
247 if ((!writeIsSafe) && (out > oend-2))
248 return ERROR(dstSize_tooSmall); /* Buffer overflow */
248 return ERROR(dstSize_tooSmall); /* Buffer overflow */
249 out[0] = (BYTE) bitStream;
249 out[0] = (BYTE) bitStream;
250 out[1] = (BYTE)(bitStream>>8);
250 out[1] = (BYTE)(bitStream>>8);
251 out+=2;
251 out+=2;
252 bitStream>>=16;
252 bitStream>>=16;
253 }
253 }
254 while (symbol >= start+3) {
254 while (symbol >= start+3) {
255 start+=3;
255 start+=3;
256 bitStream += 3 << bitCount;
256 bitStream += 3 << bitCount;
257 bitCount += 2;
257 bitCount += 2;
258 }
258 }
259 bitStream += (symbol-start) << bitCount;
259 bitStream += (symbol-start) << bitCount;
260 bitCount += 2;
260 bitCount += 2;
261 if (bitCount>16) {
261 if (bitCount>16) {
262 if ((!writeIsSafe) && (out > oend - 2))
262 if ((!writeIsSafe) && (out > oend - 2))
263 return ERROR(dstSize_tooSmall); /* Buffer overflow */
263 return ERROR(dstSize_tooSmall); /* Buffer overflow */
264 out[0] = (BYTE)bitStream;
264 out[0] = (BYTE)bitStream;
265 out[1] = (BYTE)(bitStream>>8);
265 out[1] = (BYTE)(bitStream>>8);
266 out += 2;
266 out += 2;
267 bitStream >>= 16;
267 bitStream >>= 16;
268 bitCount -= 16;
268 bitCount -= 16;
269 } }
269 } }
270 { int count = normalizedCounter[symbol++];
270 { int count = normalizedCounter[symbol++];
271 int const max = (2*threshold-1) - remaining;
271 int const max = (2*threshold-1) - remaining;
272 remaining -= count < 0 ? -count : count;
272 remaining -= count < 0 ? -count : count;
273 count++; /* +1 for extra accuracy */
273 count++; /* +1 for extra accuracy */
274 if (count>=threshold)
274 if (count>=threshold)
275 count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
275 count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
276 bitStream += count << bitCount;
276 bitStream += count << bitCount;
277 bitCount += nbBits;
277 bitCount += nbBits;
278 bitCount -= (count<max);
278 bitCount -= (count<max);
279 previousIs0 = (count==1);
279 previousIs0 = (count==1);
280 if (remaining<1) return ERROR(GENERIC);
280 if (remaining<1) return ERROR(GENERIC);
281 while (remaining<threshold) { nbBits--; threshold>>=1; }
281 while (remaining<threshold) { nbBits--; threshold>>=1; }
282 }
282 }
283 if (bitCount>16) {
283 if (bitCount>16) {
284 if ((!writeIsSafe) && (out > oend - 2))
284 if ((!writeIsSafe) && (out > oend - 2))
285 return ERROR(dstSize_tooSmall); /* Buffer overflow */
285 return ERROR(dstSize_tooSmall); /* Buffer overflow */
286 out[0] = (BYTE)bitStream;
286 out[0] = (BYTE)bitStream;
287 out[1] = (BYTE)(bitStream>>8);
287 out[1] = (BYTE)(bitStream>>8);
288 out += 2;
288 out += 2;
289 bitStream >>= 16;
289 bitStream >>= 16;
290 bitCount -= 16;
290 bitCount -= 16;
291 } }
291 } }
292
292
293 if (remaining != 1)
293 if (remaining != 1)
294 return ERROR(GENERIC); /* incorrect normalized distribution */
294 return ERROR(GENERIC); /* incorrect normalized distribution */
295 assert(symbol <= alphabetSize);
295 assert(symbol <= alphabetSize);
296
296
297 /* flush remaining bitStream */
297 /* flush remaining bitStream */
298 if ((!writeIsSafe) && (out > oend - 2))
298 if ((!writeIsSafe) && (out > oend - 2))
299 return ERROR(dstSize_tooSmall); /* Buffer overflow */
299 return ERROR(dstSize_tooSmall); /* Buffer overflow */
300 out[0] = (BYTE)bitStream;
300 out[0] = (BYTE)bitStream;
301 out[1] = (BYTE)(bitStream>>8);
301 out[1] = (BYTE)(bitStream>>8);
302 out+= (bitCount+7) /8;
302 out+= (bitCount+7) /8;
303
303
304 return (out-ostart);
304 return (out-ostart);
305 }
305 }
306
306
307
307
308 size_t FSE_writeNCount (void* buffer, size_t bufferSize,
308 size_t FSE_writeNCount (void* buffer, size_t bufferSize,
309 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
309 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
310 {
310 {
311 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */
311 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */
312 if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */
312 if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */
313
313
314 if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
314 if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
315 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
315 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
316
316
317 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
317 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
318 }
318 }
319
319
320
320
321 /*-**************************************************************
321 /*-**************************************************************
322 * FSE Compression Code
322 * FSE Compression Code
323 ****************************************************************/
323 ****************************************************************/
324
324
325 FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
325 FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
326 {
326 {
327 size_t size;
327 size_t size;
328 if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
328 if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
329 size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
329 size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
330 return (FSE_CTable*)malloc(size);
330 return (FSE_CTable*)malloc(size);
331 }
331 }
332
332
333 void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
333 void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
334
334
335 /* provides the minimum logSize to safely represent a distribution */
335 /* provides the minimum logSize to safely represent a distribution */
336 static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
336 static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
337 {
337 {
338 U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
338 U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
339 U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
339 U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
340 U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
340 U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
341 assert(srcSize > 1); /* Not supported, RLE should be used instead */
341 assert(srcSize > 1); /* Not supported, RLE should be used instead */
342 return minBits;
342 return minBits;
343 }
343 }
344
344
345 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
345 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
346 {
346 {
347 U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
347 U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
348 U32 tableLog = maxTableLog;
348 U32 tableLog = maxTableLog;
349 U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
349 U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
350 assert(srcSize > 1); /* Not supported, RLE should be used instead */
350 assert(srcSize > 1); /* Not supported, RLE should be used instead */
351 if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
351 if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
352 if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */
352 if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */
353 if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */
353 if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */
354 if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
354 if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
355 if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
355 if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
356 return tableLog;
356 return tableLog;
357 }
357 }
358
358
359 unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
359 unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
360 {
360 {
361 return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
361 return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
362 }
362 }
363
363
364
364
365 /* Secondary normalization method.
365 /* Secondary normalization method.
366 To be used when primary method fails. */
366 To be used when primary method fails. */
367
367
368 static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
368 static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
369 {
369 {
370 short const NOT_YET_ASSIGNED = -2;
370 short const NOT_YET_ASSIGNED = -2;
371 U32 s;
371 U32 s;
372 U32 distributed = 0;
372 U32 distributed = 0;
373 U32 ToDistribute;
373 U32 ToDistribute;
374
374
375 /* Init */
375 /* Init */
376 U32 const lowThreshold = (U32)(total >> tableLog);
376 U32 const lowThreshold = (U32)(total >> tableLog);
377 U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
377 U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
378
378
379 for (s=0; s<=maxSymbolValue; s++) {
379 for (s=0; s<=maxSymbolValue; s++) {
380 if (count[s] == 0) {
380 if (count[s] == 0) {
381 norm[s]=0;
381 norm[s]=0;
382 continue;
382 continue;
383 }
383 }
384 if (count[s] <= lowThreshold) {
384 if (count[s] <= lowThreshold) {
385 norm[s] = -1;
385 norm[s] = -1;
386 distributed++;
386 distributed++;
387 total -= count[s];
387 total -= count[s];
388 continue;
388 continue;
389 }
389 }
390 if (count[s] <= lowOne) {
390 if (count[s] <= lowOne) {
391 norm[s] = 1;
391 norm[s] = 1;
392 distributed++;
392 distributed++;
393 total -= count[s];
393 total -= count[s];
394 continue;
394 continue;
395 }
395 }
396
396
397 norm[s]=NOT_YET_ASSIGNED;
397 norm[s]=NOT_YET_ASSIGNED;
398 }
398 }
399 ToDistribute = (1 << tableLog) - distributed;
399 ToDistribute = (1 << tableLog) - distributed;
400
400
401 if (ToDistribute == 0)
401 if (ToDistribute == 0)
402 return 0;
402 return 0;
403
403
404 if ((total / ToDistribute) > lowOne) {
404 if ((total / ToDistribute) > lowOne) {
405 /* risk of rounding to zero */
405 /* risk of rounding to zero */
406 lowOne = (U32)((total * 3) / (ToDistribute * 2));
406 lowOne = (U32)((total * 3) / (ToDistribute * 2));
407 for (s=0; s<=maxSymbolValue; s++) {
407 for (s=0; s<=maxSymbolValue; s++) {
408 if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
408 if ((norm[s] == NOT_YET_ASSIGNED) && (count[s] <= lowOne)) {
409 norm[s] = 1;
409 norm[s] = 1;
410 distributed++;
410 distributed++;
411 total -= count[s];
411 total -= count[s];
412 continue;
412 continue;
413 } }
413 } }
414 ToDistribute = (1 << tableLog) - distributed;
414 ToDistribute = (1 << tableLog) - distributed;
415 }
415 }
416
416
417 if (distributed == maxSymbolValue+1) {
417 if (distributed == maxSymbolValue+1) {
418 /* all values are pretty poor;
418 /* all values are pretty poor;
419 probably incompressible data (should have already been detected);
419 probably incompressible data (should have already been detected);
420 find max, then give all remaining points to max */
420 find max, then give all remaining points to max */
421 U32 maxV = 0, maxC = 0;
421 U32 maxV = 0, maxC = 0;
422 for (s=0; s<=maxSymbolValue; s++)
422 for (s=0; s<=maxSymbolValue; s++)
423 if (count[s] > maxC) { maxV=s; maxC=count[s]; }
423 if (count[s] > maxC) { maxV=s; maxC=count[s]; }
424 norm[maxV] += (short)ToDistribute;
424 norm[maxV] += (short)ToDistribute;
425 return 0;
425 return 0;
426 }
426 }
427
427
428 if (total == 0) {
428 if (total == 0) {
429 /* all of the symbols were low enough for the lowOne or lowThreshold */
429 /* all of the symbols were low enough for the lowOne or lowThreshold */
430 for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
430 for (s=0; ToDistribute > 0; s = (s+1)%(maxSymbolValue+1))
431 if (norm[s] > 0) { ToDistribute--; norm[s]++; }
431 if (norm[s] > 0) { ToDistribute--; norm[s]++; }
432 return 0;
432 return 0;
433 }
433 }
434
434
435 { U64 const vStepLog = 62 - tableLog;
435 { U64 const vStepLog = 62 - tableLog;
436 U64 const mid = (1ULL << (vStepLog-1)) - 1;
436 U64 const mid = (1ULL << (vStepLog-1)) - 1;
437 U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */
437 U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */
438 U64 tmpTotal = mid;
438 U64 tmpTotal = mid;
439 for (s=0; s<=maxSymbolValue; s++) {
439 for (s=0; s<=maxSymbolValue; s++) {
440 if (norm[s]==NOT_YET_ASSIGNED) {
440 if (norm[s]==NOT_YET_ASSIGNED) {
441 U64 const end = tmpTotal + (count[s] * rStep);
441 U64 const end = tmpTotal + (count[s] * rStep);
442 U32 const sStart = (U32)(tmpTotal >> vStepLog);
442 U32 const sStart = (U32)(tmpTotal >> vStepLog);
443 U32 const sEnd = (U32)(end >> vStepLog);
443 U32 const sEnd = (U32)(end >> vStepLog);
444 U32 const weight = sEnd - sStart;
444 U32 const weight = sEnd - sStart;
445 if (weight < 1)
445 if (weight < 1)
446 return ERROR(GENERIC);
446 return ERROR(GENERIC);
447 norm[s] = (short)weight;
447 norm[s] = (short)weight;
448 tmpTotal = end;
448 tmpTotal = end;
449 } } }
449 } } }
450
450
451 return 0;
451 return 0;
452 }
452 }
453
453
454
454
455 size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
455 size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
456 const unsigned* count, size_t total,
456 const unsigned* count, size_t total,
457 unsigned maxSymbolValue)
457 unsigned maxSymbolValue)
458 {
458 {
459 /* Sanity checks */
459 /* Sanity checks */
460 if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
460 if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
461 if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */
461 if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */
462 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */
462 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */
463 if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
463 if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
464
464
465 { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
465 { static U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
466 U64 const scale = 62 - tableLog;
466 U64 const scale = 62 - tableLog;
467 U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */
467 U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */
468 U64 const vStep = 1ULL<<(scale-20);
468 U64 const vStep = 1ULL<<(scale-20);
469 int stillToDistribute = 1<<tableLog;
469 int stillToDistribute = 1<<tableLog;
470 unsigned s;
470 unsigned s;
471 unsigned largest=0;
471 unsigned largest=0;
472 short largestP=0;
472 short largestP=0;
473 U32 lowThreshold = (U32)(total >> tableLog);
473 U32 lowThreshold = (U32)(total >> tableLog);
474
474
475 for (s=0; s<=maxSymbolValue; s++) {
475 for (s=0; s<=maxSymbolValue; s++) {
476 if (count[s] == total) return 0; /* rle special case */
476 if (count[s] == total) return 0; /* rle special case */
477 if (count[s] == 0) { normalizedCounter[s]=0; continue; }
477 if (count[s] == 0) { normalizedCounter[s]=0; continue; }
478 if (count[s] <= lowThreshold) {
478 if (count[s] <= lowThreshold) {
479 normalizedCounter[s] = -1;
479 normalizedCounter[s] = -1;
480 stillToDistribute--;
480 stillToDistribute--;
481 } else {
481 } else {
482 short proba = (short)((count[s]*step) >> scale);
482 short proba = (short)((count[s]*step) >> scale);
483 if (proba<8) {
483 if (proba<8) {
484 U64 restToBeat = vStep * rtbTable[proba];
484 U64 restToBeat = vStep * rtbTable[proba];
485 proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
485 proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
486 }
486 }
487 if (proba > largestP) { largestP=proba; largest=s; }
487 if (proba > largestP) { largestP=proba; largest=s; }
488 normalizedCounter[s] = proba;
488 normalizedCounter[s] = proba;
489 stillToDistribute -= proba;
489 stillToDistribute -= proba;
490 } }
490 } }
491 if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
491 if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
492 /* corner case, need another normalization method */
492 /* corner case, need another normalization method */
493 size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
493 size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
494 if (FSE_isError(errorCode)) return errorCode;
494 if (FSE_isError(errorCode)) return errorCode;
495 }
495 }
496 else normalizedCounter[largest] += (short)stillToDistribute;
496 else normalizedCounter[largest] += (short)stillToDistribute;
497 }
497 }
498
498
499 #if 0
499 #if 0
500 { /* Print Table (debug) */
500 { /* Print Table (debug) */
501 U32 s;
501 U32 s;
502 U32 nTotal = 0;
502 U32 nTotal = 0;
503 for (s=0; s<=maxSymbolValue; s++)
503 for (s=0; s<=maxSymbolValue; s++)
504 RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
504 RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
505 for (s=0; s<=maxSymbolValue; s++)
505 for (s=0; s<=maxSymbolValue; s++)
506 nTotal += abs(normalizedCounter[s]);
506 nTotal += abs(normalizedCounter[s]);
507 if (nTotal != (1U<<tableLog))
507 if (nTotal != (1U<<tableLog))
508 RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
508 RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
509 getchar();
509 getchar();
510 }
510 }
511 #endif
511 #endif
512
512
513 return tableLog;
513 return tableLog;
514 }
514 }
515
515
516
516
517 /* fake FSE_CTable, for raw (uncompressed) input */
517 /* fake FSE_CTable, for raw (uncompressed) input */
518 size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
518 size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
519 {
519 {
520 const unsigned tableSize = 1 << nbBits;
520 const unsigned tableSize = 1 << nbBits;
521 const unsigned tableMask = tableSize - 1;
521 const unsigned tableMask = tableSize - 1;
522 const unsigned maxSymbolValue = tableMask;
522 const unsigned maxSymbolValue = tableMask;
523 void* const ptr = ct;
523 void* const ptr = ct;
524 U16* const tableU16 = ( (U16*) ptr) + 2;
524 U16* const tableU16 = ( (U16*) ptr) + 2;
525 void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */
525 void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */
526 FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
526 FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
527 unsigned s;
527 unsigned s;
528
528
529 /* Sanity checks */
529 /* Sanity checks */
530 if (nbBits < 1) return ERROR(GENERIC); /* min size */
530 if (nbBits < 1) return ERROR(GENERIC); /* min size */
531
531
532 /* header */
532 /* header */
533 tableU16[-2] = (U16) nbBits;
533 tableU16[-2] = (U16) nbBits;
534 tableU16[-1] = (U16) maxSymbolValue;
534 tableU16[-1] = (U16) maxSymbolValue;
535
535
536 /* Build table */
536 /* Build table */
537 for (s=0; s<tableSize; s++)
537 for (s=0; s<tableSize; s++)
538 tableU16[s] = (U16)(tableSize + s);
538 tableU16[s] = (U16)(tableSize + s);
539
539
540 /* Build Symbol Transformation Table */
540 /* Build Symbol Transformation Table */
541 { const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
541 { const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
542 for (s=0; s<=maxSymbolValue; s++) {
542 for (s=0; s<=maxSymbolValue; s++) {
543 symbolTT[s].deltaNbBits = deltaNbBits;
543 symbolTT[s].deltaNbBits = deltaNbBits;
544 symbolTT[s].deltaFindState = s-1;
544 symbolTT[s].deltaFindState = s-1;
545 } }
545 } }
546
546
547 return 0;
547 return 0;
548 }
548 }
549
549
550 /* fake FSE_CTable, for rle input (always same symbol) */
550 /* fake FSE_CTable, for rle input (always same symbol) */
551 size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
551 size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
552 {
552 {
553 void* ptr = ct;
553 void* ptr = ct;
554 U16* tableU16 = ( (U16*) ptr) + 2;
554 U16* tableU16 = ( (U16*) ptr) + 2;
555 void* FSCTptr = (U32*)ptr + 2;
555 void* FSCTptr = (U32*)ptr + 2;
556 FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;
556 FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;
557
557
558 /* header */
558 /* header */
559 tableU16[-2] = (U16) 0;
559 tableU16[-2] = (U16) 0;
560 tableU16[-1] = (U16) symbolValue;
560 tableU16[-1] = (U16) symbolValue;
561
561
562 /* Build table */
562 /* Build table */
563 tableU16[0] = 0;
563 tableU16[0] = 0;
564 tableU16[1] = 0; /* just in case */
564 tableU16[1] = 0; /* just in case */
565
565
566 /* Build Symbol Transformation Table */
566 /* Build Symbol Transformation Table */
567 symbolTT[symbolValue].deltaNbBits = 0;
567 symbolTT[symbolValue].deltaNbBits = 0;
568 symbolTT[symbolValue].deltaFindState = 0;
568 symbolTT[symbolValue].deltaFindState = 0;
569
569
570 return 0;
570 return 0;
571 }
571 }
572
572
573
573
574 static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
574 static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
575 const void* src, size_t srcSize,
575 const void* src, size_t srcSize,
576 const FSE_CTable* ct, const unsigned fast)
576 const FSE_CTable* ct, const unsigned fast)
577 {
577 {
578 const BYTE* const istart = (const BYTE*) src;
578 const BYTE* const istart = (const BYTE*) src;
579 const BYTE* const iend = istart + srcSize;
579 const BYTE* const iend = istart + srcSize;
580 const BYTE* ip=iend;
580 const BYTE* ip=iend;
581
581
582 BIT_CStream_t bitC;
582 BIT_CStream_t bitC;
583 FSE_CState_t CState1, CState2;
583 FSE_CState_t CState1, CState2;
584
584
585 /* init */
585 /* init */
586 if (srcSize <= 2) return 0;
586 if (srcSize <= 2) return 0;
587 { size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
587 { size_t const initError = BIT_initCStream(&bitC, dst, dstSize);
588 if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }
588 if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ }
589
589
590 #define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
590 #define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
591
591
592 if (srcSize & 1) {
592 if (srcSize & 1) {
593 FSE_initCState2(&CState1, ct, *--ip);
593 FSE_initCState2(&CState1, ct, *--ip);
594 FSE_initCState2(&CState2, ct, *--ip);
594 FSE_initCState2(&CState2, ct, *--ip);
595 FSE_encodeSymbol(&bitC, &CState1, *--ip);
595 FSE_encodeSymbol(&bitC, &CState1, *--ip);
596 FSE_FLUSHBITS(&bitC);
596 FSE_FLUSHBITS(&bitC);
597 } else {
597 } else {
598 FSE_initCState2(&CState2, ct, *--ip);
598 FSE_initCState2(&CState2, ct, *--ip);
599 FSE_initCState2(&CState1, ct, *--ip);
599 FSE_initCState2(&CState1, ct, *--ip);
600 }
600 }
601
601
602 /* join to mod 4 */
602 /* join to mod 4 */
603 srcSize -= 2;
603 srcSize -= 2;
604 if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */
604 if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */
605 FSE_encodeSymbol(&bitC, &CState2, *--ip);
605 FSE_encodeSymbol(&bitC, &CState2, *--ip);
606 FSE_encodeSymbol(&bitC, &CState1, *--ip);
606 FSE_encodeSymbol(&bitC, &CState1, *--ip);
607 FSE_FLUSHBITS(&bitC);
607 FSE_FLUSHBITS(&bitC);
608 }
608 }
609
609
610 /* 2 or 4 encoding per loop */
610 /* 2 or 4 encoding per loop */
611 while ( ip>istart ) {
611 while ( ip>istart ) {
612
612
613 FSE_encodeSymbol(&bitC, &CState2, *--ip);
613 FSE_encodeSymbol(&bitC, &CState2, *--ip);
614
614
615 if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */
615 if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */
616 FSE_FLUSHBITS(&bitC);
616 FSE_FLUSHBITS(&bitC);
617
617
618 FSE_encodeSymbol(&bitC, &CState1, *--ip);
618 FSE_encodeSymbol(&bitC, &CState1, *--ip);
619
619
620 if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */
620 if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */
621 FSE_encodeSymbol(&bitC, &CState2, *--ip);
621 FSE_encodeSymbol(&bitC, &CState2, *--ip);
622 FSE_encodeSymbol(&bitC, &CState1, *--ip);
622 FSE_encodeSymbol(&bitC, &CState1, *--ip);
623 }
623 }
624
624
625 FSE_FLUSHBITS(&bitC);
625 FSE_FLUSHBITS(&bitC);
626 }
626 }
627
627
628 FSE_flushCState(&bitC, &CState2);
628 FSE_flushCState(&bitC, &CState2);
629 FSE_flushCState(&bitC, &CState1);
629 FSE_flushCState(&bitC, &CState1);
630 return BIT_closeCStream(&bitC);
630 return BIT_closeCStream(&bitC);
631 }
631 }
632
632
633 size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
633 size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
634 const void* src, size_t srcSize,
634 const void* src, size_t srcSize,
635 const FSE_CTable* ct)
635 const FSE_CTable* ct)
636 {
636 {
637 unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
637 unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
638
638
639 if (fast)
639 if (fast)
640 return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
640 return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
641 else
641 else
642 return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
642 return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
643 }
643 }
644
644
645
645
646 size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
646 size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
647
647
648 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
648 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
649 #define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
649 #define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
650
650
651 /* FSE_compress_wksp() :
651 /* FSE_compress_wksp() :
652 * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
652 * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`).
653 * `wkspSize` size must be `(1<<tableLog)`.
653 * `wkspSize` size must be `(1<<tableLog)`.
654 */
654 */
655 size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
655 size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
656 {
656 {
657 BYTE* const ostart = (BYTE*) dst;
657 BYTE* const ostart = (BYTE*) dst;
658 BYTE* op = ostart;
658 BYTE* op = ostart;
659 BYTE* const oend = ostart + dstSize;
659 BYTE* const oend = ostart + dstSize;
660
660
661 unsigned count[FSE_MAX_SYMBOL_VALUE+1];
661 unsigned count[FSE_MAX_SYMBOL_VALUE+1];
662 S16 norm[FSE_MAX_SYMBOL_VALUE+1];
662 S16 norm[FSE_MAX_SYMBOL_VALUE+1];
663 FSE_CTable* CTable = (FSE_CTable*)workSpace;
663 FSE_CTable* CTable = (FSE_CTable*)workSpace;
664 size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
664 size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue);
665 void* scratchBuffer = (void*)(CTable + CTableSize);
665 void* scratchBuffer = (void*)(CTable + CTableSize);
666 size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
666 size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable));
667
667
668 /* init conditions */
668 /* init conditions */
669 if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
669 if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge);
670 if (srcSize <= 1) return 0; /* Not compressible */
670 if (srcSize <= 1) return 0; /* Not compressible */
671 if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
671 if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
672 if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
672 if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
673
673
674 /* Scan input and build symbol stats */
674 /* Scan input and build symbol stats */
675 { CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) );
675 { CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, scratchBuffer, scratchBufferSize) );
676 if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
676 if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
677 if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
677 if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
678 if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
678 if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
679 }
679 }
680
680
681 tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
681 tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
682 CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) );
682 CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) );
683
683
684 /* Write table description header */
684 /* Write table description header */
685 { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
685 { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) );
686 op += nc_err;
686 op += nc_err;
687 }
687 }
688
688
689 /* Compress */
689 /* Compress */
690 CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
690 CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) );
691 { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
691 { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) );
692 if (cSize == 0) return 0; /* not enough space for compressed data */
692 if (cSize == 0) return 0; /* not enough space for compressed data */
693 op += cSize;
693 op += cSize;
694 }
694 }
695
695
696 /* check compressibility */
696 /* check compressibility */
697 if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
697 if ( (size_t)(op-ostart) >= srcSize-1 ) return 0;
698
698
699 return op-ostart;
699 return op-ostart;
700 }
700 }
701
701
702 typedef struct {
702 typedef struct {
703 FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
703 FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
704 BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
704 BYTE scratchBuffer[1 << FSE_MAX_TABLELOG];
705 } fseWkspMax_t;
705 } fseWkspMax_t;
706
706
707 size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
707 size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
708 {
708 {
709 fseWkspMax_t scratchBuffer;
709 fseWkspMax_t scratchBuffer;
710 DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
710 DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
711 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
711 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
712 return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
712 return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
713 }
713 }
714
714
715 size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
715 size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
716 {
716 {
717 return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
717 return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
718 }
718 }
719
719
720
720
721 #endif /* FSE_COMMONDEFS_ONLY */
721 #endif /* FSE_COMMONDEFS_ONLY */
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: modified file
NO CONTENT: modified file
The requested commit or file is too big and content was truncated. Show full diff
General Comments 0
You need to be logged in to leave comments. Login now