Show More
@@ -1,2 +1,5 b'' | |||
|
1 | graft c-ext | |
|
1 | 2 | graft zstd |
|
2 | 3 | include make_cffi.py |
|
4 | include setup_zstd.py | |
|
5 | include zstd.c |
@@ -1,6 +1,33 b'' | |||
|
1 | 1 | Version History |
|
2 | 2 | =============== |
|
3 | 3 | |
|
4 | 0.6.0 (released 2017-01-14) | |
|
5 | --------------------------- | |
|
6 | ||
|
7 | * Support for legacy zstd protocols (build time opt in feature). | |
|
8 | * Automation improvements to test against Python 3.6, latest versions | |
|
9 | of Tox, more deterministic AppVeyor behavior. | |
|
10 | * CFFI "parser" improved to use a compiler preprocessor instead of rewriting | |
|
11 | source code manually. | |
|
12 | * Vendored version of zstd updated to 1.1.2. | |
|
13 | * Documentation improvements. | |
|
14 | * Introduce a bench.py script for performing (crude) benchmarks. | |
|
15 | * ZSTD_CCtx instances are now reused across multiple compress() operations. | |
|
16 | * ZstdCompressor.write_to() now has a flush() method. | |
|
17 | * ZstdCompressor.compressobj()'s flush() method now accepts an argument to | |
|
18 | flush a block (as opposed to ending the stream). | |
|
19 | * Disallow compress(b'') when writing content sizes by default (issue #11). | |
|
20 | ||
|
21 | 0.5.2 (released 2016-11-12) | |
|
22 | --------------------------- | |
|
23 | ||
|
24 | * more packaging fixes for source distribution | |
|
25 | ||
|
26 | 0.5.1 (released 2016-11-12) | |
|
27 | --------------------------- | |
|
28 | ||
|
29 | * setup_zstd.py is included in the source distribution | |
|
30 | ||
|
4 | 31 | 0.5.0 (released 2016-11-10) |
|
5 | 32 | --------------------------- |
|
6 | 33 |
@@ -2,13 +2,17 b'' | |||
|
2 | 2 | python-zstandard |
|
3 | 3 | ================ |
|
4 | 4 | |
|
5 |
This project provides |
|
|
6 | `Zstandard <http://www.zstd.net>`_ compression library. | |
|
5 | This project provides Python bindings for interfacing with the | |
|
6 | `Zstandard <http://www.zstd.net>`_ compression library. A C extension | |
|
7 | and CFFI interface is provided. | |
|
7 | 8 | |
|
8 | 9 | The primary goal of the extension is to provide a Pythonic interface to |
|
9 | 10 | the underlying C API. This means exposing most of the features and flexibility |
|
10 | 11 | of the C API while not sacrificing usability or safety that Python provides. |
|
11 | 12 | |
|
13 | The canonical home for this project is | |
|
14 | https://github.com/indygreg/python-zstandard. | |
|
15 | ||
|
12 | 16 | | |ci-status| |win-ci-status| |
|
13 | 17 | |
|
14 | 18 | State of Project |
@@ -205,14 +209,32 b' write_dict_id' | |||
|
205 | 209 | Defaults to True. The dictionary ID is only written if a dictionary |
|
206 | 210 | is being used. |
|
207 | 211 | |
|
212 | Unless specified otherwise, assume that no two methods of ``ZstdCompressor`` | |
|
213 | instances can be called from multiple Python threads simultaneously. In other | |
|
214 | words, assume instances are not thread safe unless stated otherwise. | |
|
215 | ||
|
208 | 216 | Simple API |
|
209 | 217 | ^^^^^^^^^^ |
|
210 | 218 | |
|
211 | 219 | ``compress(data)`` compresses and returns data as a one-shot operation.:: |
|
212 | 220 | |
|
213 | cctx = zstd.ZsdCompressor() | |
|
221 | cctx = zstd.ZstdCompressor() | |
|
214 | 222 | compressed = cctx.compress(b'data to compress') |
|
215 | 223 | |
|
224 | Unless ``compression_params`` or ``dict_data`` are passed to the | |
|
225 | ``ZstdCompressor``, each invocation of ``compress()`` will calculate the | |
|
226 | optimal compression parameters for the configured compression ``level`` and | |
|
227 | input data size (some parameters are fine-tuned for small input sizes). | |
|
228 | ||
|
229 | If a compression dictionary is being used, the compression parameters | |
|
230 | determined from the first input's size will be reused for subsequent | |
|
231 | operations. | |
|
232 | ||
|
233 | There is currently a deficiency in zstd's C APIs that makes it difficult | |
|
234 | to round trip empty inputs when ``write_content_size=True``. Attempting | |
|
235 | this will raise a ``ValueError`` unless ``allow_empty=True`` is passed | |
|
236 | to ``compress()``. | |
|
237 | ||
|
216 | 238 | Streaming Input API |
|
217 | 239 | ^^^^^^^^^^^^^^^^^^^ |
|
218 | 240 | |
@@ -226,7 +248,7 b' data into a compressor.::' | |||
|
226 | 248 | ... |
|
227 | 249 | |
|
228 | 250 | The argument to ``write_to()`` must have a ``write(data)`` method. As |
|
229 |
compressed data is available, ``write()`` will be called with the com |
|
|
251 | compressed data is available, ``write()`` will be called with the compressed | |
|
230 | 252 | data as its argument. Many common Python types implement ``write()``, including |
|
231 | 253 | open file handles and ``io.BytesIO``. |
|
232 | 254 | |
@@ -234,6 +256,10 b' open file handles and ``io.BytesIO``.' | |||
|
234 | 256 | It **must** be used as a context manager. That object's ``write(data)`` method |
|
235 | 257 | is used to feed data into the compressor. |
|
236 | 258 | |
|
259 | A ``flush()`` method can be called to evict whatever data remains within the | |
|
260 | compressor's internal state into the output object. This may result in 0 or | |
|
261 | more ``write()`` calls to the output object. | |
|
262 | ||
|
237 | 263 | If the size of the data being fed to this streaming compressor is known, |
|
238 | 264 | you can declare it before compression begins:: |
|
239 | 265 | |
@@ -279,6 +305,10 b' or by fetching a slice of data from the ' | |||
|
279 | 305 | the buffer protocol is being used). The returned iterator consists of chunks |
|
280 | 306 | of compressed data. |
|
281 | 307 | |
|
308 | If reading from the source via ``read()``, ``read()`` will be called until | |
|
309 | it raises or returns an empty bytes (``b''``). It is perfectly valid for | |
|
310 | the source to deliver fewer bytes than were what requested by ``read(size)``. | |
|
311 | ||
|
282 | 312 | Like ``write_to()``, ``read_from()`` also accepts a ``size`` argument |
|
283 | 313 | declaring the size of the input stream:: |
|
284 | 314 | |
@@ -293,6 +323,10 b' the ideal size of output chunks::' | |||
|
293 | 323 | for chunk in cctx.read_from(fh, read_size=16384, write_size=8192): |
|
294 | 324 | pass |
|
295 | 325 | |
|
326 | Unlike ``write_to()``, ``read_from()`` does not give direct control over the | |
|
327 | sizes of chunks fed into the compressor. Instead, chunk sizes will be whatever | |
|
328 | the object being read from delivers. These will often be of a uniform size. | |
|
329 | ||
|
296 | 330 | Stream Copying API |
|
297 | 331 | ^^^^^^^^^^^^^^^^^^ |
|
298 | 332 | |
@@ -334,9 +368,15 b' The purpose of ``compressobj()`` is to p' | |||
|
334 | 368 | with ``zlib.compressobj`` and ``bz2.BZ2Compressor``. This allows callers to |
|
335 | 369 | swap in different compressor objects while using the same API. |
|
336 | 370 | |
|
337 | Once ``flush()`` is called, the compressor will no longer accept new data | |
|
338 | to ``compress()``. ``flush()`` **must** be called to end the compression | |
|
339 | context. If not called, the returned data may be incomplete. | |
|
371 | ``flush()`` accepts an optional argument indicating how to end the stream. | |
|
372 | ``zstd.COMPRESSOBJ_FLUSH_FINISH`` (the default) ends the compression stream. | |
|
373 | Once this type of flush is performed, ``compress()`` and ``flush()`` can | |
|
374 | no longer be called. This type of flush **must** be called to end the | |
|
375 | compression context. If not called, returned data may be incomplete. | |
|
376 | ||
|
377 | A ``zstd.COMPRESSOBJ_FLUSH_BLOCK`` argument to ``flush()`` will flush a | |
|
378 | zstd block. Flushes of this type can be performed multiple times. The next | |
|
379 | call to ``compress()`` will begin a new zstd block. | |
|
340 | 380 | |
|
341 | 381 | Here is how this API should be used:: |
|
342 | 382 | |
@@ -346,6 +386,15 b' Here is how this API should be used::' | |||
|
346 | 386 | data = cobj.compress(b'raw input 1') |
|
347 | 387 | data = cobj.flush() |
|
348 | 388 | |
|
389 | Or to flush blocks:: | |
|
390 | ||
|
391 | cctx.zstd.ZstdCompressor() | |
|
392 | cobj = cctx.compressobj() | |
|
393 | data = cobj.compress(b'chunk in first block') | |
|
394 | data = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK) | |
|
395 | data = cobj.compress(b'chunk in second block') | |
|
396 | data = cobj.flush() | |
|
397 | ||
|
349 | 398 | For best performance results, keep input chunks under 256KB. This avoids |
|
350 | 399 | extra allocations for a large output object. |
|
351 | 400 | |
@@ -371,6 +420,10 b' dict_data' | |||
|
371 | 420 | |
|
372 | 421 | The interface of this class is very similar to ``ZstdCompressor`` (by design). |
|
373 | 422 | |
|
423 | Unless specified otherwise, assume that no two methods of ``ZstdDecompressor`` | |
|
424 | instances can be called from multiple Python threads simultaneously. In other | |
|
425 | words, assume instances are not thread safe unless stated otherwise. | |
|
426 | ||
|
374 | 427 | Simple API |
|
375 | 428 | ^^^^^^^^^^ |
|
376 | 429 |
@@ -65,14 +65,14 b' ZstdCompressionDict* train_dictionary(Py' | |||
|
65 | 65 | |
|
66 | 66 | /* Now that we know the total size of the raw simples, we can allocate |
|
67 | 67 | a buffer for the raw data */ |
|
68 | sampleBuffer = malloc(samplesSize); | |
|
68 | sampleBuffer = PyMem_Malloc(samplesSize); | |
|
69 | 69 | if (!sampleBuffer) { |
|
70 | 70 | PyErr_NoMemory(); |
|
71 | 71 | return NULL; |
|
72 | 72 | } |
|
73 | sampleSizes = malloc(samplesLen * sizeof(size_t)); | |
|
73 | sampleSizes = PyMem_Malloc(samplesLen * sizeof(size_t)); | |
|
74 | 74 | if (!sampleSizes) { |
|
75 |
|
|
|
75 | PyMem_Free(sampleBuffer); | |
|
76 | 76 | PyErr_NoMemory(); |
|
77 | 77 | return NULL; |
|
78 | 78 | } |
@@ -87,10 +87,10 b' ZstdCompressionDict* train_dictionary(Py' | |||
|
87 | 87 | sampleOffset = (char*)sampleOffset + sampleSize; |
|
88 | 88 | } |
|
89 | 89 | |
|
90 | dict = malloc(capacity); | |
|
90 | dict = PyMem_Malloc(capacity); | |
|
91 | 91 | if (!dict) { |
|
92 |
|
|
|
93 |
|
|
|
92 | PyMem_Free(sampleSizes); | |
|
93 | PyMem_Free(sampleBuffer); | |
|
94 | 94 | PyErr_NoMemory(); |
|
95 | 95 | return NULL; |
|
96 | 96 | } |
@@ -100,9 +100,9 b' ZstdCompressionDict* train_dictionary(Py' | |||
|
100 | 100 | zparams); |
|
101 | 101 | if (ZDICT_isError(zresult)) { |
|
102 | 102 | PyErr_Format(ZstdError, "Cannot train dict: %s", ZDICT_getErrorName(zresult)); |
|
103 |
|
|
|
104 |
|
|
|
105 |
|
|
|
103 | PyMem_Free(dict); | |
|
104 | PyMem_Free(sampleSizes); | |
|
105 | PyMem_Free(sampleBuffer); | |
|
106 | 106 | return NULL; |
|
107 | 107 | } |
|
108 | 108 | |
@@ -140,7 +140,7 b' static int ZstdCompressionDict_init(Zstd' | |||
|
140 | 140 | return -1; |
|
141 | 141 | } |
|
142 | 142 | |
|
143 | self->dictData = malloc(sourceSize); | |
|
143 | self->dictData = PyMem_Malloc(sourceSize); | |
|
144 | 144 | if (!self->dictData) { |
|
145 | 145 | PyErr_NoMemory(); |
|
146 | 146 | return -1; |
@@ -154,7 +154,7 b' static int ZstdCompressionDict_init(Zstd' | |||
|
154 | 154 | |
|
155 | 155 | static void ZstdCompressionDict_dealloc(ZstdCompressionDict* self) { |
|
156 | 156 | if (self->dictData) { |
|
157 |
|
|
|
157 | PyMem_Free(self->dictData); | |
|
158 | 158 | self->dictData = NULL; |
|
159 | 159 | } |
|
160 | 160 |
@@ -61,7 +61,7 b' static PyObject* ZstdCompressionWriter_e' | |||
|
61 | 61 | if (self->cstream && exc_type == Py_None && exc_value == Py_None && |
|
62 | 62 | exc_tb == Py_None) { |
|
63 | 63 | |
|
64 | output.dst = malloc(self->outSize); | |
|
64 | output.dst = PyMem_Malloc(self->outSize); | |
|
65 | 65 | if (!output.dst) { |
|
66 | 66 | return PyErr_NoMemory(); |
|
67 | 67 | } |
@@ -73,7 +73,7 b' static PyObject* ZstdCompressionWriter_e' | |||
|
73 | 73 | if (ZSTD_isError(zresult)) { |
|
74 | 74 | PyErr_Format(ZstdError, "error ending compression stream: %s", |
|
75 | 75 | ZSTD_getErrorName(zresult)); |
|
76 |
|
|
|
76 | PyMem_Free(output.dst); | |
|
77 | 77 | return NULL; |
|
78 | 78 | } |
|
79 | 79 | |
@@ -94,7 +94,7 b' static PyObject* ZstdCompressionWriter_e' | |||
|
94 | 94 | output.pos = 0; |
|
95 | 95 | } |
|
96 | 96 | |
|
97 |
|
|
|
97 | PyMem_Free(output.dst); | |
|
98 | 98 | ZSTD_freeCStream(self->cstream); |
|
99 | 99 | self->cstream = NULL; |
|
100 | 100 | } |
@@ -133,7 +133,7 b' static PyObject* ZstdCompressionWriter_w' | |||
|
133 | 133 | return NULL; |
|
134 | 134 | } |
|
135 | 135 | |
|
136 | output.dst = malloc(self->outSize); | |
|
136 | output.dst = PyMem_Malloc(self->outSize); | |
|
137 | 137 | if (!output.dst) { |
|
138 | 138 | return PyErr_NoMemory(); |
|
139 | 139 | } |
@@ -150,7 +150,7 b' static PyObject* ZstdCompressionWriter_w' | |||
|
150 | 150 | Py_END_ALLOW_THREADS |
|
151 | 151 | |
|
152 | 152 | if (ZSTD_isError(zresult)) { |
|
153 |
|
|
|
153 | PyMem_Free(output.dst); | |
|
154 | 154 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); |
|
155 | 155 | return NULL; |
|
156 | 156 | } |
@@ -168,12 +168,63 b' static PyObject* ZstdCompressionWriter_w' | |||
|
168 | 168 | output.pos = 0; |
|
169 | 169 | } |
|
170 | 170 | |
|
171 |
|
|
|
171 | PyMem_Free(output.dst); | |
|
172 | 172 | |
|
173 | 173 | /* TODO return bytes written */ |
|
174 | 174 | Py_RETURN_NONE; |
|
175 | } | |
|
176 | ||
|
177 | static PyObject* ZstdCompressionWriter_flush(ZstdCompressionWriter* self, PyObject* args) { | |
|
178 | size_t zresult; | |
|
179 | ZSTD_outBuffer output; | |
|
180 | PyObject* res; | |
|
181 | ||
|
182 | if (!self->entered) { | |
|
183 | PyErr_SetString(ZstdError, "flush must be called from an active context manager"); | |
|
184 | return NULL; | |
|
175 | 185 | } |
|
176 | 186 | |
|
187 | output.dst = PyMem_Malloc(self->outSize); | |
|
188 | if (!output.dst) { | |
|
189 | return PyErr_NoMemory(); | |
|
190 | } | |
|
191 | output.size = self->outSize; | |
|
192 | output.pos = 0; | |
|
193 | ||
|
194 | while (1) { | |
|
195 | Py_BEGIN_ALLOW_THREADS | |
|
196 | zresult = ZSTD_flushStream(self->cstream, &output); | |
|
197 | Py_END_ALLOW_THREADS | |
|
198 | ||
|
199 | if (ZSTD_isError(zresult)) { | |
|
200 | PyMem_Free(output.dst); | |
|
201 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); | |
|
202 | return NULL; | |
|
203 | } | |
|
204 | ||
|
205 | if (!output.pos) { | |
|
206 | break; | |
|
207 | } | |
|
208 | ||
|
209 | /* Copy data from output buffer to writer. */ | |
|
210 | if (output.pos) { | |
|
211 | #if PY_MAJOR_VERSION >= 3 | |
|
212 | res = PyObject_CallMethod(self->writer, "write", "y#", | |
|
213 | #else | |
|
214 | res = PyObject_CallMethod(self->writer, "write", "s#", | |
|
215 | #endif | |
|
216 | output.dst, output.pos); | |
|
217 | Py_XDECREF(res); | |
|
218 | } | |
|
219 | output.pos = 0; | |
|
220 | } | |
|
221 | ||
|
222 | PyMem_Free(output.dst); | |
|
223 | ||
|
224 | /* TODO return bytes written */ | |
|
225 | Py_RETURN_NONE; | |
|
226 | } | |
|
227 | ||
|
177 | 228 | static PyMethodDef ZstdCompressionWriter_methods[] = { |
|
178 | 229 | { "__enter__", (PyCFunction)ZstdCompressionWriter_enter, METH_NOARGS, |
|
179 | 230 | PyDoc_STR("Enter a compression context.") }, |
@@ -183,6 +234,8 b' static PyMethodDef ZstdCompressionWriter' | |||
|
183 | 234 | PyDoc_STR("Obtain the memory size of the underlying compressor") }, |
|
184 | 235 | { "write", (PyCFunction)ZstdCompressionWriter_write, METH_VARARGS, |
|
185 | 236 | PyDoc_STR("Compress data") }, |
|
237 | { "flush", (PyCFunction)ZstdCompressionWriter_flush, METH_NOARGS, | |
|
238 | PyDoc_STR("Flush data and finish a zstd frame") }, | |
|
186 | 239 | { NULL, NULL } |
|
187 | 240 | }; |
|
188 | 241 |
@@ -36,8 +36,8 b' static PyObject* ZstdCompressionObj_comp' | |||
|
36 | 36 | PyObject* result = NULL; |
|
37 | 37 | Py_ssize_t resultSize = 0; |
|
38 | 38 | |
|
39 |
if (self->f |
|
|
40 |
PyErr_SetString(ZstdError, "cannot call compress() after |
|
|
39 | if (self->finished) { | |
|
40 | PyErr_SetString(ZstdError, "cannot call compress() after compressor finished"); | |
|
41 | 41 | return NULL; |
|
42 | 42 | } |
|
43 | 43 | |
@@ -92,17 +92,62 b' static PyObject* ZstdCompressionObj_comp' | |||
|
92 | 92 | } |
|
93 | 93 | } |
|
94 | 94 | |
|
95 | static PyObject* ZstdCompressionObj_flush(ZstdCompressionObj* self) { | |
|
95 | static PyObject* ZstdCompressionObj_flush(ZstdCompressionObj* self, PyObject* args) { | |
|
96 | int flushMode = compressorobj_flush_finish; | |
|
96 | 97 | size_t zresult; |
|
97 | 98 | PyObject* result = NULL; |
|
98 | 99 | Py_ssize_t resultSize = 0; |
|
99 | 100 | |
|
100 | if (self->flushed) { | |
|
101 | PyErr_SetString(ZstdError, "flush() already called"); | |
|
101 | if (!PyArg_ParseTuple(args, "|i", &flushMode)) { | |
|
102 | return NULL; | |
|
103 | } | |
|
104 | ||
|
105 | if (flushMode != compressorobj_flush_finish && flushMode != compressorobj_flush_block) { | |
|
106 | PyErr_SetString(PyExc_ValueError, "flush mode not recognized"); | |
|
107 | return NULL; | |
|
108 | } | |
|
109 | ||
|
110 | if (self->finished) { | |
|
111 | PyErr_SetString(ZstdError, "compressor object already finished"); | |
|
102 | 112 | return NULL; |
|
103 | 113 | } |
|
104 | 114 | |
|
105 | self->flushed = 1; | |
|
115 | assert(self->output.pos == 0); | |
|
116 | ||
|
117 | if (flushMode == compressorobj_flush_block) { | |
|
118 | /* The output buffer is of size ZSTD_CStreamOutSize(), which is | |
|
119 | guaranteed to hold a full block. */ | |
|
120 | Py_BEGIN_ALLOW_THREADS | |
|
121 | zresult = ZSTD_flushStream(self->cstream, &self->output); | |
|
122 | Py_END_ALLOW_THREADS | |
|
123 | ||
|
124 | if (ZSTD_isError(zresult)) { | |
|
125 | PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult)); | |
|
126 | return NULL; | |
|
127 | } | |
|
128 | ||
|
129 | /* Output buffer is guaranteed to hold full block. */ | |
|
130 | assert(zresult == 0); | |
|
131 | ||
|
132 | if (self->output.pos) { | |
|
133 | result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos); | |
|
134 | if (!result) { | |
|
135 | return NULL; | |
|
136 | } | |
|
137 | } | |
|
138 | ||
|
139 | self->output.pos = 0; | |
|
140 | ||
|
141 | if (result) { | |
|
142 | return result; | |
|
143 | } | |
|
144 | else { | |
|
145 | return PyBytes_FromString(""); | |
|
146 | } | |
|
147 | } | |
|
148 | ||
|
149 | assert(flushMode == compressorobj_flush_finish); | |
|
150 | self->finished = 1; | |
|
106 | 151 | |
|
107 | 152 | while (1) { |
|
108 | 153 | zresult = ZSTD_endStream(self->cstream, &self->output); |
@@ -151,7 +196,7 b' static PyObject* ZstdCompressionObj_flus' | |||
|
151 | 196 | static PyMethodDef ZstdCompressionObj_methods[] = { |
|
152 | 197 | { "compress", (PyCFunction)ZstdCompressionObj_compress, METH_VARARGS, |
|
153 | 198 | PyDoc_STR("compress data") }, |
|
154 |
{ "flush", (PyCFunction)ZstdCompressionObj_flush, METH_ |
|
|
199 | { "flush", (PyCFunction)ZstdCompressionObj_flush, METH_VARARGS, | |
|
155 | 200 | PyDoc_STR("finish compression operation") }, |
|
156 | 201 | { NULL, NULL } |
|
157 | 202 | }; |
@@ -10,6 +10,23 b'' | |||
|
10 | 10 | |
|
11 | 11 | extern PyObject* ZstdError; |
|
12 | 12 | |
|
13 | int populate_cdict(ZstdCompressor* compressor, void* dictData, size_t dictSize, ZSTD_parameters* zparams) { | |
|
14 | ZSTD_customMem zmem; | |
|
15 | assert(!compressor->cdict); | |
|
16 | Py_BEGIN_ALLOW_THREADS | |
|
17 | memset(&zmem, 0, sizeof(zmem)); | |
|
18 | compressor->cdict = ZSTD_createCDict_advanced(compressor->dict->dictData, | |
|
19 | compressor->dict->dictSize, *zparams, zmem); | |
|
20 | Py_END_ALLOW_THREADS | |
|
21 | ||
|
22 | if (!compressor->cdict) { | |
|
23 | PyErr_SetString(ZstdError, "could not create compression dictionary"); | |
|
24 | return 1; | |
|
25 | } | |
|
26 | ||
|
27 | return 0; | |
|
28 | } | |
|
29 | ||
|
13 | 30 | /** |
|
14 | 31 | * Initialize a zstd CStream from a ZstdCompressor instance. |
|
15 | 32 | * |
@@ -57,7 +74,6 b' ZSTD_CStream* CStream_from_ZstdCompresso' | |||
|
57 | 74 | return cstream; |
|
58 | 75 | } |
|
59 | 76 | |
|
60 | ||
|
61 | 77 | PyDoc_STRVAR(ZstdCompressor__doc__, |
|
62 | 78 | "ZstdCompressor(level=None, dict_data=None, compression_params=None)\n" |
|
63 | 79 | "\n" |
@@ -107,6 +123,7 b' static int ZstdCompressor_init(ZstdCompr' | |||
|
107 | 123 | PyObject* writeContentSize = NULL; |
|
108 | 124 | PyObject* writeDictID = NULL; |
|
109 | 125 | |
|
126 | self->cctx = NULL; | |
|
110 | 127 | self->dict = NULL; |
|
111 | 128 | self->cparams = NULL; |
|
112 | 129 | self->cdict = NULL; |
@@ -129,6 +146,14 b' static int ZstdCompressor_init(ZstdCompr' | |||
|
129 | 146 | return -1; |
|
130 | 147 | } |
|
131 | 148 | |
|
149 | /* We create a ZSTD_CCtx for reuse among multiple operations to reduce the | |
|
150 | overhead of each compression operation. */ | |
|
151 | self->cctx = ZSTD_createCCtx(); | |
|
152 | if (!self->cctx) { | |
|
153 | PyErr_NoMemory(); | |
|
154 | return -1; | |
|
155 | } | |
|
156 | ||
|
132 | 157 | self->compressionLevel = level; |
|
133 | 158 | |
|
134 | 159 | if (dict) { |
@@ -165,6 +190,11 b' static void ZstdCompressor_dealloc(ZstdC' | |||
|
165 | 190 | self->cdict = NULL; |
|
166 | 191 | } |
|
167 | 192 | |
|
193 | if (self->cctx) { | |
|
194 | ZSTD_freeCCtx(self->cctx); | |
|
195 | self->cctx = NULL; | |
|
196 | } | |
|
197 | ||
|
168 | 198 | PyObject_Del(self); |
|
169 | 199 | } |
|
170 | 200 | |
@@ -339,7 +369,7 b' finally:' | |||
|
339 | 369 | } |
|
340 | 370 | |
|
341 | 371 | PyDoc_STRVAR(ZstdCompressor_compress__doc__, |
|
342 | "compress(data)\n" | |
|
372 | "compress(data, allow_empty=False)\n" | |
|
343 | 373 | "\n" |
|
344 | 374 | "Compress data in a single operation.\n" |
|
345 | 375 | "\n" |
@@ -350,24 +380,41 b' PyDoc_STRVAR(ZstdCompressor_compress__do' | |||
|
350 | 380 | "streaming based APIs is preferred for larger values.\n" |
|
351 | 381 | ); |
|
352 | 382 | |
|
353 | static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args) { | |
|
383 | static PyObject* ZstdCompressor_compress(ZstdCompressor* self, PyObject* args, PyObject* kwargs) { | |
|
384 | static char* kwlist[] = { | |
|
385 | "data", | |
|
386 | "allow_empty", | |
|
387 | NULL | |
|
388 | }; | |
|
389 | ||
|
354 | 390 | const char* source; |
|
355 | 391 | Py_ssize_t sourceSize; |
|
392 | PyObject* allowEmpty = NULL; | |
|
356 | 393 | size_t destSize; |
|
357 | ZSTD_CCtx* cctx; | |
|
358 | 394 | PyObject* output; |
|
359 | 395 | char* dest; |
|
360 | 396 | void* dictData = NULL; |
|
361 | 397 | size_t dictSize = 0; |
|
362 | 398 | size_t zresult; |
|
363 | 399 | ZSTD_parameters zparams; |
|
364 | ZSTD_customMem zmem; | |
|
365 | 400 | |
|
366 | 401 | #if PY_MAJOR_VERSION >= 3 |
|
367 | if (!PyArg_ParseTuple(args, "y#", &source, &sourceSize)) { | |
|
402 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|O", | |
|
368 | 403 | #else |
|
369 | if (!PyArg_ParseTuple(args, "s#", &source, &sourceSize)) { | |
|
404 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|O", | |
|
370 | 405 | #endif |
|
406 | kwlist, &source, &sourceSize, &allowEmpty)) { | |
|
407 | return NULL; | |
|
408 | } | |
|
409 | ||
|
410 | /* Limitation in zstd C API doesn't let decompression side distinguish | |
|
411 | between content size of 0 and unknown content size. This can make round | |
|
412 | tripping via Python difficult. Until this is fixed, require a flag | |
|
413 | to fire the footgun. | |
|
414 | https://github.com/indygreg/python-zstandard/issues/11 */ | |
|
415 | if (0 == sourceSize && self->fparams.contentSizeFlag | |
|
416 | && (!allowEmpty || PyObject_Not(allowEmpty))) { | |
|
417 | PyErr_SetString(PyExc_ValueError, "cannot write empty inputs when writing content sizes"); | |
|
371 | 418 | return NULL; |
|
372 | 419 | } |
|
373 | 420 | |
@@ -379,13 +426,6 b' static PyObject* ZstdCompressor_compress' | |||
|
379 | 426 | |
|
380 | 427 | dest = PyBytes_AsString(output); |
|
381 | 428 | |
|
382 | cctx = ZSTD_createCCtx(); | |
|
383 | if (!cctx) { | |
|
384 | Py_DECREF(output); | |
|
385 | PyErr_SetString(ZstdError, "could not create CCtx"); | |
|
386 | return NULL; | |
|
387 | } | |
|
388 | ||
|
389 | 429 | if (self->dict) { |
|
390 | 430 | dictData = self->dict->dictData; |
|
391 | 431 | dictSize = self->dict->dictSize; |
@@ -406,23 +446,16 b' static PyObject* ZstdCompressor_compress' | |||
|
406 | 446 | /* The raw dict data has to be processed before it can be used. Since this |
|
407 | 447 | adds overhead - especially if multiple dictionary compression operations |
|
408 | 448 | are performed on the same ZstdCompressor instance - we create a |
|
409 |
ZSTD_CDict once and reuse it for all operations. |
|
|
449 | ZSTD_CDict once and reuse it for all operations. | |
|
410 | 450 |
|
|
411 | /* TODO the zparams (which can be derived from the source data size) used | |
|
412 | on first invocation are effectively reused for subsequent operations. This | |
|
413 | may not be appropriate if input sizes vary significantly and could affect | |
|
414 | chosen compression parameters. | |
|
415 | https://github.com/facebook/zstd/issues/358 tracks this issue. */ | |
|
451 | Note: the compression parameters used for the first invocation (possibly | |
|
452 | derived from the source size) will be reused on all subsequent invocations. | |
|
453 | https://github.com/facebook/zstd/issues/358 contains more info. We could | |
|
454 | potentially add an argument somewhere to control this behavior. | |
|
455 | */ | |
|
416 | 456 | if (dictData && !self->cdict) { |
|
417 | Py_BEGIN_ALLOW_THREADS | |
|
418 | memset(&zmem, 0, sizeof(zmem)); | |
|
419 | self->cdict = ZSTD_createCDict_advanced(dictData, dictSize, zparams, zmem); | |
|
420 | Py_END_ALLOW_THREADS | |
|
421 | ||
|
422 | if (!self->cdict) { | |
|
457 | if (populate_cdict(self, dictData, dictSize, &zparams)) { | |
|
423 | 458 | Py_DECREF(output); |
|
424 | ZSTD_freeCCtx(cctx); | |
|
425 | PyErr_SetString(ZstdError, "could not create compression dictionary"); | |
|
426 | 459 | return NULL; |
|
427 | 460 | } |
|
428 | 461 | } |
@@ -432,17 +465,15 b' static PyObject* ZstdCompressor_compress' | |||
|
432 | 465 | size. This means the argument to ZstdCompressor to control frame |
|
433 | 466 | parameters is honored. */ |
|
434 | 467 | if (self->cdict) { |
|
435 | zresult = ZSTD_compress_usingCDict(cctx, dest, destSize, | |
|
468 | zresult = ZSTD_compress_usingCDict(self->cctx, dest, destSize, | |
|
436 | 469 | source, sourceSize, self->cdict); |
|
437 | 470 | } |
|
438 | 471 | else { |
|
439 | zresult = ZSTD_compress_advanced(cctx, dest, destSize, | |
|
472 | zresult = ZSTD_compress_advanced(self->cctx, dest, destSize, | |
|
440 | 473 | source, sourceSize, dictData, dictSize, zparams); |
|
441 | 474 | } |
|
442 | 475 | Py_END_ALLOW_THREADS |
|
443 | 476 | |
|
444 | ZSTD_freeCCtx(cctx); | |
|
445 | ||
|
446 | 477 | if (ZSTD_isError(zresult)) { |
|
447 | 478 | PyErr_Format(ZstdError, "cannot compress: %s", ZSTD_getErrorName(zresult)); |
|
448 | 479 | Py_CLEAR(output); |
@@ -500,7 +531,7 b' static ZstdCompressionObj* ZstdCompresso' | |||
|
500 | 531 | result->compressor = self; |
|
501 | 532 | Py_INCREF(result->compressor); |
|
502 | 533 | |
|
503 |
result->f |
|
|
534 | result->finished = 0; | |
|
504 | 535 | |
|
505 | 536 | return result; |
|
506 | 537 | } |
@@ -691,8 +722,8 b' static ZstdCompressionWriter* ZstdCompre' | |||
|
691 | 722 | } |
|
692 | 723 | |
|
693 | 724 | static PyMethodDef ZstdCompressor_methods[] = { |
|
694 |
{ "compress", (PyCFunction)ZstdCompressor_compress, |
|
|
695 | ZstdCompressor_compress__doc__ }, | |
|
725 | { "compress", (PyCFunction)ZstdCompressor_compress, | |
|
726 | METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ }, | |
|
696 | 727 | { "compressobj", (PyCFunction)ZstdCompressor_compressobj, |
|
697 | 728 | METH_VARARGS | METH_KEYWORDS, ZstdCompressionObj__doc__ }, |
|
698 | 729 | { "copy_stream", (PyCFunction)ZstdCompressor_copy_stream, |
@@ -33,6 +33,9 b' void constants_module_init(PyObject* mod' | |||
|
33 | 33 | ZstdError = PyErr_NewException("zstd.ZstdError", NULL, NULL); |
|
34 | 34 | PyModule_AddObject(mod, "ZstdError", ZstdError); |
|
35 | 35 | |
|
36 | PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_FINISH", compressorobj_flush_finish); | |
|
37 | PyModule_AddIntConstant(mod, "COMPRESSOBJ_FLUSH_BLOCK", compressorobj_flush_block); | |
|
38 | ||
|
36 | 39 | /* For now, the version is a simple tuple instead of a dedicated type. */ |
|
37 | 40 | zstdVersion = PyTuple_New(3); |
|
38 | 41 | PyTuple_SetItem(zstdVersion, 0, PyLong_FromLong(ZSTD_VERSION_MAJOR)); |
@@ -85,7 +85,7 b' static PyObject* ZstdDecompressionWriter' | |||
|
85 | 85 | return NULL; |
|
86 | 86 | } |
|
87 | 87 | |
|
88 | output.dst = malloc(self->outSize); | |
|
88 | output.dst = PyMem_Malloc(self->outSize); | |
|
89 | 89 | if (!output.dst) { |
|
90 | 90 | return PyErr_NoMemory(); |
|
91 | 91 | } |
@@ -102,7 +102,7 b' static PyObject* ZstdDecompressionWriter' | |||
|
102 | 102 | Py_END_ALLOW_THREADS |
|
103 | 103 | |
|
104 | 104 | if (ZSTD_isError(zresult)) { |
|
105 |
|
|
|
105 | PyMem_Free(output.dst); | |
|
106 | 106 | PyErr_Format(ZstdError, "zstd decompress error: %s", |
|
107 | 107 | ZSTD_getErrorName(zresult)); |
|
108 | 108 | return NULL; |
@@ -120,7 +120,7 b' static PyObject* ZstdDecompressionWriter' | |||
|
120 | 120 | } |
|
121 | 121 | } |
|
122 | 122 | |
|
123 |
|
|
|
123 | PyMem_Free(output.dst); | |
|
124 | 124 | |
|
125 | 125 | /* TODO return bytes written */ |
|
126 | 126 | Py_RETURN_NONE; |
@@ -15,7 +15,12 b'' | |||
|
15 | 15 | #include "zstd.h" |
|
16 | 16 | #include "zdict.h" |
|
17 | 17 | |
|
18 |
#define PYTHON_ZSTANDARD_VERSION "0. |
|
|
18 | #define PYTHON_ZSTANDARD_VERSION "0.6.0" | |
|
19 | ||
|
20 | typedef enum { | |
|
21 | compressorobj_flush_finish, | |
|
22 | compressorobj_flush_block, | |
|
23 | } CompressorObj_Flush; | |
|
19 | 24 | |
|
20 | 25 | typedef struct { |
|
21 | 26 | PyObject_HEAD |
@@ -54,6 +59,7 b' typedef struct {' | |||
|
54 | 59 | |
|
55 | 60 | int compressionLevel; |
|
56 | 61 | ZstdCompressionDict* dict; |
|
62 | ZSTD_CCtx* cctx; | |
|
57 | 63 | ZSTD_CDict* cdict; |
|
58 | 64 | CompressionParametersObject* cparams; |
|
59 | 65 | ZSTD_frameParameters fparams; |
@@ -67,7 +73,7 b' typedef struct {' | |||
|
67 | 73 | ZstdCompressor* compressor; |
|
68 | 74 | ZSTD_CStream* cstream; |
|
69 | 75 | ZSTD_outBuffer output; |
|
70 |
int f |
|
|
76 | int finished; | |
|
71 | 77 | } ZstdCompressionObj; |
|
72 | 78 | |
|
73 | 79 | extern PyTypeObject ZstdCompressionObjType; |
@@ -7,7 +7,10 b'' | |||
|
7 | 7 | from __future__ import absolute_import |
|
8 | 8 | |
|
9 | 9 | import cffi |
|
10 | import distutils.ccompiler | |
|
10 | 11 | import os |
|
12 | import subprocess | |
|
13 | import tempfile | |
|
11 | 14 | |
|
12 | 15 | |
|
13 | 16 | HERE = os.path.abspath(os.path.dirname(__file__)) |
@@ -20,10 +23,8 b" SOURCES = ['zstd/%s' % p for p in (" | |||
|
20 | 23 | 'common/zstd_common.c', |
|
21 | 24 | 'compress/fse_compress.c', |
|
22 | 25 | 'compress/huf_compress.c', |
|
23 | 'compress/zbuff_compress.c', | |
|
24 | 26 | 'compress/zstd_compress.c', |
|
25 | 27 | 'decompress/huf_decompress.c', |
|
26 | 'decompress/zbuff_decompress.c', | |
|
27 | 28 | 'decompress/zstd_decompress.c', |
|
28 | 29 | 'dictBuilder/divsufsort.c', |
|
29 | 30 | 'dictBuilder/zdict.c', |
@@ -37,74 +38,71 b' INCLUDE_DIRS = [os.path.join(HERE, d) fo' | |||
|
37 | 38 | 'zstd/dictBuilder', |
|
38 | 39 | )] |
|
39 | 40 | |
|
41 | # cffi can't parse some of the primitives in zstd.h. So we invoke the | |
|
42 | # preprocessor and feed its output into cffi. | |
|
43 | compiler = distutils.ccompiler.new_compiler() | |
|
44 | ||
|
45 | # Needed for MSVC. | |
|
46 | if hasattr(compiler, 'initialize'): | |
|
47 | compiler.initialize() | |
|
48 | ||
|
49 | # Distutils doesn't set compiler.preprocessor, so invoke the preprocessor | |
|
50 | # manually. | |
|
51 | if compiler.compiler_type == 'unix': | |
|
52 | args = list(compiler.executables['compiler']) | |
|
53 | args.extend([ | |
|
54 | '-E', | |
|
55 | '-DZSTD_STATIC_LINKING_ONLY', | |
|
56 | ]) | |
|
57 | elif compiler.compiler_type == 'msvc': | |
|
58 | args = [compiler.cc] | |
|
59 | args.extend([ | |
|
60 | '/EP', | |
|
61 | '/DZSTD_STATIC_LINKING_ONLY', | |
|
62 | ]) | |
|
63 | else: | |
|
64 | raise Exception('unsupported compiler type: %s' % compiler.compiler_type) | |
|
65 | ||
|
66 | # zstd.h includes <stddef.h>, which is also included by cffi's boilerplate. | |
|
67 | # This can lead to duplicate declarations. So we strip this include from the | |
|
68 | # preprocessor invocation. | |
|
69 | ||
|
40 | 70 | with open(os.path.join(HERE, 'zstd', 'zstd.h'), 'rb') as fh: |
|
41 | zstd_h = fh.read() | |
|
71 | lines = [l for l in fh if not l.startswith(b'#include <stddef.h>')] | |
|
72 | ||
|
73 | fd, input_file = tempfile.mkstemp(suffix='.h') | |
|
74 | os.write(fd, b''.join(lines)) | |
|
75 | os.close(fd) | |
|
76 | ||
|
77 | args.append(input_file) | |
|
78 | ||
|
79 | try: | |
|
80 | process = subprocess.Popen(args, stdout=subprocess.PIPE) | |
|
81 | output = process.communicate()[0] | |
|
82 | ret = process.poll() | |
|
83 | if ret: | |
|
84 | raise Exception('preprocessor exited with error') | |
|
85 | finally: | |
|
86 | os.unlink(input_file) | |
|
87 | ||
|
88 | def normalize_output(): | |
|
89 | lines = [] | |
|
90 | for line in output.splitlines(): | |
|
91 | # CFFI's parser doesn't like __attribute__ on UNIX compilers. | |
|
92 | if line.startswith(b'__attribute__ ((visibility ("default"))) '): | |
|
93 | line = line[len(b'__attribute__ ((visibility ("default"))) '):] | |
|
94 | ||
|
95 | lines.append(line) | |
|
96 | ||
|
97 | return b'\n'.join(lines) | |
|
42 | 98 | |
|
43 | 99 | ffi = cffi.FFI() |
|
44 | 100 | ffi.set_source('_zstd_cffi', ''' |
|
45 | /* needed for typedefs like U32 references in zstd.h */ | |
|
46 | #include "mem.h" | |
|
47 | 101 | #define ZSTD_STATIC_LINKING_ONLY |
|
48 | 102 | #include "zstd.h" |
|
49 | ''', | |
|
50 | sources=SOURCES, include_dirs=INCLUDE_DIRS) | |
|
51 | ||
|
52 | # Rather than define the API definitions from zstd.h inline, munge the | |
|
53 | # source in a way that cdef() will accept. | |
|
54 | lines = zstd_h.splitlines() | |
|
55 | lines = [l.rstrip() for l in lines if l.strip()] | |
|
56 | ||
|
57 | # Strip preprocessor directives - they aren't important for our needs. | |
|
58 | lines = [l for l in lines | |
|
59 | if not l.startswith((b'#if', b'#else', b'#endif', b'#include'))] | |
|
60 | ||
|
61 | # Remove extern C block | |
|
62 | lines = [l for l in lines if l not in (b'extern "C" {', b'}')] | |
|
63 | ||
|
64 | # The version #defines don't parse and aren't necessary. Strip them. | |
|
65 | lines = [l for l in lines if not l.startswith(( | |
|
66 | b'#define ZSTD_H_235446', | |
|
67 | b'#define ZSTD_LIB_VERSION', | |
|
68 | b'#define ZSTD_QUOTE', | |
|
69 | b'#define ZSTD_EXPAND_AND_QUOTE', | |
|
70 | b'#define ZSTD_VERSION_STRING', | |
|
71 | b'#define ZSTD_VERSION_NUMBER'))] | |
|
103 | ''', sources=SOURCES, include_dirs=INCLUDE_DIRS) | |
|
72 | 104 | |
|
73 | # The C parser also doesn't like some constant defines referencing | |
|
74 | # other constants. | |
|
75 | # TODO we pick the 64-bit constants here. We should assert somewhere | |
|
76 | # we're compiling for 64-bit. | |
|
77 | def fix_constants(l): | |
|
78 | if l.startswith(b'#define ZSTD_WINDOWLOG_MAX '): | |
|
79 | return b'#define ZSTD_WINDOWLOG_MAX 27' | |
|
80 | elif l.startswith(b'#define ZSTD_CHAINLOG_MAX '): | |
|
81 | return b'#define ZSTD_CHAINLOG_MAX 28' | |
|
82 | elif l.startswith(b'#define ZSTD_HASHLOG_MAX '): | |
|
83 | return b'#define ZSTD_HASHLOG_MAX 27' | |
|
84 | elif l.startswith(b'#define ZSTD_CHAINLOG_MAX '): | |
|
85 | return b'#define ZSTD_CHAINLOG_MAX 28' | |
|
86 | elif l.startswith(b'#define ZSTD_CHAINLOG_MIN '): | |
|
87 | return b'#define ZSTD_CHAINLOG_MIN 6' | |
|
88 | elif l.startswith(b'#define ZSTD_SEARCHLOG_MAX '): | |
|
89 | return b'#define ZSTD_SEARCHLOG_MAX 26' | |
|
90 | elif l.startswith(b'#define ZSTD_BLOCKSIZE_ABSOLUTEMAX '): | |
|
91 | return b'#define ZSTD_BLOCKSIZE_ABSOLUTEMAX 131072' | |
|
92 | else: | |
|
93 | return l | |
|
94 | lines = map(fix_constants, lines) | |
|
95 | ||
|
96 | # ZSTDLIB_API isn't handled correctly. Strip it. | |
|
97 | lines = [l for l in lines if not l.startswith(b'# define ZSTDLIB_API')] | |
|
98 | def strip_api(l): | |
|
99 | if l.startswith(b'ZSTDLIB_API '): | |
|
100 | return l[len(b'ZSTDLIB_API '):] | |
|
101 | else: | |
|
102 | return l | |
|
103 | lines = map(strip_api, lines) | |
|
104 | ||
|
105 | source = b'\n'.join(lines) | |
|
106 | ffi.cdef(source.decode('latin1')) | |
|
107 | ||
|
105 | ffi.cdef(normalize_output().decode('latin1')) | |
|
108 | 106 | |
|
109 | 107 | if __name__ == '__main__': |
|
110 | 108 | ffi.compile() |
@@ -5,6 +5,7 b'' | |||
|
5 | 5 | # This software may be modified and distributed under the terms |
|
6 | 6 | # of the BSD license. See the LICENSE file for details. |
|
7 | 7 | |
|
8 | import sys | |
|
8 | 9 | from setuptools import setup |
|
9 | 10 | |
|
10 | 11 | try: |
@@ -14,9 +15,15 b' except ImportError:' | |||
|
14 | 15 | |
|
15 | 16 | import setup_zstd |
|
16 | 17 | |
|
18 | SUPPORT_LEGACY = False | |
|
19 | ||
|
20 | if "--legacy" in sys.argv: | |
|
21 | SUPPORT_LEGACY = True | |
|
22 | sys.argv.remove("--legacy") | |
|
23 | ||
|
17 | 24 | # Code for obtaining the Extension instance is in its own module to |
|
18 | 25 | # facilitate reuse in other projects. |
|
19 | extensions = [setup_zstd.get_c_extension()] | |
|
26 | extensions = [setup_zstd.get_c_extension(SUPPORT_LEGACY, 'zstd')] | |
|
20 | 27 | |
|
21 | 28 | if cffi: |
|
22 | 29 | import make_cffi |
@@ -16,15 +16,24 b" zstd_sources = ['zstd/%s' % p for p in (" | |||
|
16 | 16 | 'common/zstd_common.c', |
|
17 | 17 | 'compress/fse_compress.c', |
|
18 | 18 | 'compress/huf_compress.c', |
|
19 | 'compress/zbuff_compress.c', | |
|
20 | 19 | 'compress/zstd_compress.c', |
|
21 | 20 | 'decompress/huf_decompress.c', |
|
22 | 'decompress/zbuff_decompress.c', | |
|
23 | 21 | 'decompress/zstd_decompress.c', |
|
24 | 22 | 'dictBuilder/divsufsort.c', |
|
25 | 23 | 'dictBuilder/zdict.c', |
|
26 | 24 | )] |
|
27 | 25 | |
|
26 | zstd_sources_legacy = ['zstd/%s' % p for p in ( | |
|
27 | 'deprecated/zbuff_compress.c', | |
|
28 | 'deprecated/zbuff_decompress.c', | |
|
29 | 'legacy/zstd_v01.c', | |
|
30 | 'legacy/zstd_v02.c', | |
|
31 | 'legacy/zstd_v03.c', | |
|
32 | 'legacy/zstd_v04.c', | |
|
33 | 'legacy/zstd_v05.c', | |
|
34 | 'legacy/zstd_v06.c', | |
|
35 | 'legacy/zstd_v07.c' | |
|
36 | )] | |
|
28 | 37 | |
|
29 | 38 | zstd_includes = [ |
|
30 | 39 | 'c-ext', |
@@ -35,6 +44,11 b' zstd_includes = [' | |||
|
35 | 44 | 'zstd/dictBuilder', |
|
36 | 45 | ] |
|
37 | 46 | |
|
47 | zstd_includes_legacy = [ | |
|
48 | 'zstd/deprecated', | |
|
49 | 'zstd/legacy', | |
|
50 | ] | |
|
51 | ||
|
38 | 52 | ext_sources = [ |
|
39 | 53 | 'zstd.c', |
|
40 | 54 | 'c-ext/compressiondict.c', |
@@ -51,14 +65,27 b' ext_sources = [' | |||
|
51 | 65 | 'c-ext/dictparams.c', |
|
52 | 66 | ] |
|
53 | 67 | |
|
68 | zstd_depends = [ | |
|
69 | 'c-ext/python-zstandard.h', | |
|
70 | ] | |
|
54 | 71 | |
|
55 | def get_c_extension(name='zstd'): | |
|
72 | ||
|
73 | def get_c_extension(support_legacy=False, name='zstd'): | |
|
56 | 74 | """Obtain a distutils.extension.Extension for the C extension.""" |
|
57 | 75 | root = os.path.abspath(os.path.dirname(__file__)) |
|
58 | 76 | |
|
59 | 77 | sources = [os.path.join(root, p) for p in zstd_sources + ext_sources] |
|
78 | if support_legacy: | |
|
79 | sources.extend([os.path.join(root, p) for p in zstd_sources_legacy]) | |
|
80 | ||
|
60 | 81 | include_dirs = [os.path.join(root, d) for d in zstd_includes] |
|
82 | if support_legacy: | |
|
83 | include_dirs.extend([os.path.join(root, d) for d in zstd_includes_legacy]) | |
|
84 | ||
|
85 | depends = [os.path.join(root, p) for p in zstd_depends] | |
|
61 | 86 | |
|
62 | 87 | # TODO compile with optimizations. |
|
63 | 88 | return Extension(name, sources, |
|
64 |
include_dirs=include_dirs |
|
|
89 | include_dirs=include_dirs, | |
|
90 | depends=depends, | |
|
91 | extra_compile_args=["-DZSTD_LEGACY_SUPPORT=1"] if support_legacy else []) |
@@ -41,6 +41,14 b' class TestCompressor_compress(unittest.T' | |||
|
41 | 41 | self.assertEqual(cctx.compress(b''), |
|
42 | 42 | b'\x28\xb5\x2f\xfd\x00\x48\x01\x00\x00') |
|
43 | 43 | |
|
44 | # TODO should be temporary until https://github.com/facebook/zstd/issues/506 | |
|
45 | # is fixed. | |
|
46 | cctx = zstd.ZstdCompressor(write_content_size=True) | |
|
47 | with self.assertRaises(ValueError): | |
|
48 | cctx.compress(b'') | |
|
49 | ||
|
50 | cctx.compress(b'', allow_empty=True) | |
|
51 | ||
|
44 | 52 | def test_compress_large(self): |
|
45 | 53 | chunks = [] |
|
46 | 54 | for i in range(255): |
@@ -139,19 +147,45 b' class TestCompressor_compressobj(unittes' | |||
|
139 | 147 | |
|
140 | 148 | self.assertEqual(len(with_size), len(no_size) + 1) |
|
141 | 149 | |
|
142 |
def test_compress_after_f |
|
|
150 | def test_compress_after_finished(self): | |
|
143 | 151 | cctx = zstd.ZstdCompressor() |
|
144 | 152 | cobj = cctx.compressobj() |
|
145 | 153 | |
|
146 | 154 | cobj.compress(b'foo') |
|
147 | 155 | cobj.flush() |
|
148 | 156 | |
|
149 |
with self.assertRaisesRegexp(zstd.ZstdError, 'cannot call compress\(\) after |
|
|
157 | with self.assertRaisesRegexp(zstd.ZstdError, 'cannot call compress\(\) after compressor'): | |
|
150 | 158 | cobj.compress(b'foo') |
|
151 | 159 | |
|
152 |
with self.assertRaisesRegexp(zstd.ZstdError, ' |
|
|
160 | with self.assertRaisesRegexp(zstd.ZstdError, 'compressor object already finished'): | |
|
153 | 161 | cobj.flush() |
|
154 | 162 | |
|
163 | def test_flush_block_repeated(self): | |
|
164 | cctx = zstd.ZstdCompressor(level=1) | |
|
165 | cobj = cctx.compressobj() | |
|
166 | ||
|
167 | self.assertEqual(cobj.compress(b'foo'), b'') | |
|
168 | self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), | |
|
169 | b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo') | |
|
170 | self.assertEqual(cobj.compress(b'bar'), b'') | |
|
171 | # 3 byte header plus content. | |
|
172 | self.assertEqual(cobj.flush(), b'\x19\x00\x00bar') | |
|
173 | ||
|
174 | def test_flush_empty_block(self): | |
|
175 | cctx = zstd.ZstdCompressor(write_checksum=True) | |
|
176 | cobj = cctx.compressobj() | |
|
177 | ||
|
178 | cobj.compress(b'foobar') | |
|
179 | cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK) | |
|
180 | # No-op if no block is active (this is internal to zstd). | |
|
181 | self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK), b'') | |
|
182 | ||
|
183 | trailing = cobj.flush() | |
|
184 | # 3 bytes block header + 4 bytes frame checksum | |
|
185 | self.assertEqual(len(trailing), 7) | |
|
186 | header = trailing[0:3] | |
|
187 | self.assertEqual(header, b'\x01\x00\x00') | |
|
188 | ||
|
155 | 189 | |
|
156 | 190 | class TestCompressor_copy_stream(unittest.TestCase): |
|
157 | 191 | def test_no_read(self): |
@@ -384,6 +418,43 b' class TestCompressor_write_to(unittest.T' | |||
|
384 | 418 | |
|
385 | 419 | self.assertEqual(len(dest.getvalue()), dest._write_count) |
|
386 | 420 | |
|
421 | def test_flush_repeated(self): | |
|
422 | cctx = zstd.ZstdCompressor(level=3) | |
|
423 | dest = OpCountingBytesIO() | |
|
424 | with cctx.write_to(dest) as compressor: | |
|
425 | compressor.write(b'foo') | |
|
426 | self.assertEqual(dest._write_count, 0) | |
|
427 | compressor.flush() | |
|
428 | self.assertEqual(dest._write_count, 1) | |
|
429 | compressor.write(b'bar') | |
|
430 | self.assertEqual(dest._write_count, 1) | |
|
431 | compressor.flush() | |
|
432 | self.assertEqual(dest._write_count, 2) | |
|
433 | compressor.write(b'baz') | |
|
434 | ||
|
435 | self.assertEqual(dest._write_count, 3) | |
|
436 | ||
|
437 | def test_flush_empty_block(self): | |
|
438 | cctx = zstd.ZstdCompressor(level=3, write_checksum=True) | |
|
439 | dest = OpCountingBytesIO() | |
|
440 | with cctx.write_to(dest) as compressor: | |
|
441 | compressor.write(b'foobar' * 8192) | |
|
442 | count = dest._write_count | |
|
443 | offset = dest.tell() | |
|
444 | compressor.flush() | |
|
445 | self.assertGreater(dest._write_count, count) | |
|
446 | self.assertGreater(dest.tell(), offset) | |
|
447 | offset = dest.tell() | |
|
448 | # Ending the write here should cause an empty block to be written | |
|
449 | # to denote end of frame. | |
|
450 | ||
|
451 | trailing = dest.getvalue()[offset:] | |
|
452 | # 3 bytes block header + 4 bytes frame checksum | |
|
453 | self.assertEqual(len(trailing), 7) | |
|
454 | ||
|
455 | header = trailing[0:3] | |
|
456 | self.assertEqual(header, b'\x01\x00\x00') | |
|
457 | ||
|
387 | 458 | |
|
388 | 459 | class TestCompressor_read_from(unittest.TestCase): |
|
389 | 460 | def test_type_validation(self): |
@@ -9,7 +9,7 b' import zstd' | |||
|
9 | 9 | |
|
10 | 10 | class TestModuleAttributes(unittest.TestCase): |
|
11 | 11 | def test_version(self): |
|
12 |
self.assertEqual(zstd.ZSTD_VERSION, (1, 1, |
|
|
12 | self.assertEqual(zstd.ZSTD_VERSION, (1, 1, 2)) | |
|
13 | 13 | |
|
14 | 14 | def test_constants(self): |
|
15 | 15 | self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22) |
@@ -72,6 +72,26 b' void decompressionwriter_module_init(PyO' | |||
|
72 | 72 | void decompressoriterator_module_init(PyObject* mod); |
|
73 | 73 | |
|
74 | 74 | void zstd_module_init(PyObject* m) { |
|
75 | /* python-zstandard relies on unstable zstd C API features. This means | |
|
76 | that changes in zstd may break expectations in python-zstandard. | |
|
77 | ||
|
78 | python-zstandard is distributed with a copy of the zstd sources. | |
|
79 | python-zstandard is only guaranteed to work with the bundled version | |
|
80 | of zstd. | |
|
81 | ||
|
82 | However, downstream redistributors or packagers may unbundle zstd | |
|
83 | from python-zstandard. This can result in a mismatch between zstd | |
|
84 | versions and API semantics. This essentially "voids the warranty" | |
|
85 | of python-zstandard and may cause undefined behavior. | |
|
86 | ||
|
87 | We detect this mismatch here and refuse to load the module if this | |
|
88 | scenario is detected. | |
|
89 | */ | |
|
90 | if (ZSTD_VERSION_NUMBER != 10102 || ZSTD_versionNumber() != 10102) { | |
|
91 | PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version"); | |
|
92 | return; | |
|
93 | } | |
|
94 | ||
|
75 | 95 | compressionparams_module_init(m); |
|
76 | 96 | dictparams_module_init(m); |
|
77 | 97 | compressiondict_module_init(m); |
@@ -99,6 +119,10 b' PyMODINIT_FUNC PyInit_zstd(void) {' | |||
|
99 | 119 | PyObject *m = PyModule_Create(&zstd_module); |
|
100 | 120 | if (m) { |
|
101 | 121 | zstd_module_init(m); |
|
122 | if (PyErr_Occurred()) { | |
|
123 | Py_DECREF(m); | |
|
124 | m = NULL; | |
|
125 | } | |
|
102 | 126 | } |
|
103 | 127 | return m; |
|
104 | 128 | } |
@@ -266,7 +266,7 b' MEM_STATIC size_t BIT_initDStream(BIT_DS' | |||
|
266 | 266 | bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer); |
|
267 | 267 | bitD->bitContainer = MEM_readLEST(bitD->ptr); |
|
268 | 268 | { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1]; |
|
269 | bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; | |
|
269 | bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0; /* ensures bitsConsumed is always set */ | |
|
270 | 270 | if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ } |
|
271 | 271 | } else { |
|
272 | 272 | bitD->start = (const char*)srcBuffer; |
@@ -298,7 +298,7 b' MEM_STATIC size_t BIT_getUpperBits(size_' | |||
|
298 | 298 | |
|
299 | 299 | MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits) |
|
300 | 300 | { |
|
301 | #if defined(__BMI__) && defined(__GNUC__) /* experimental */ | |
|
301 | #if defined(__BMI__) && defined(__GNUC__) && __GNUC__*1000+__GNUC_MINOR__ >= 4008 /* experimental */ | |
|
302 | 302 | # if defined(__x86_64__) |
|
303 | 303 | if (sizeof(bitContainer)==8) |
|
304 | 304 | return _bextr_u64(bitContainer, start, nbBits); |
@@ -367,10 +367,10 b' MEM_STATIC size_t BIT_readBitsFast(BIT_D' | |||
|
367 | 367 | } |
|
368 | 368 | |
|
369 | 369 | /*! BIT_reloadDStream() : |
|
370 |
* Refill ` |
|
|
370 | * Refill `bitD` from buffer previously set in BIT_initDStream() . | |
|
371 | 371 | * This function is safe, it guarantees it will not read beyond src buffer. |
|
372 | 372 | * @return : status of `BIT_DStream_t` internal register. |
|
373 | if status == unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */ | |
|
373 | if status == BIT_DStream_unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */ | |
|
374 | 374 | MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD) |
|
375 | 375 | { |
|
376 | 376 | if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should not happen => corruption detected */ |
@@ -159,6 +159,7 b' size_t FSE_readNCount (short* normalized' | |||
|
159 | 159 | /*! HUF_readStats() : |
|
160 | 160 | Read compact Huffman tree, saved by HUF_writeCTable(). |
|
161 | 161 | `huffWeight` is destination buffer. |
|
162 | `rankStats` is assumed to be a table of at least HUF_TABLELOG_MAX U32. | |
|
162 | 163 | @return : size read from `src` , or an error Code . |
|
163 | 164 | Note : Needed by HUF_readCTable() and HUF_readDTableX?() . |
|
164 | 165 | */ |
@@ -187,16 +188,17 b' size_t HUF_readStats(BYTE* huffWeight, s' | |||
|
187 | 188 | huffWeight[n+1] = ip[n/2] & 15; |
|
188 | 189 | } } } |
|
189 | 190 | else { /* header compressed with FSE (normal case) */ |
|
191 | FSE_DTable fseWorkspace[FSE_DTABLE_SIZE_U32(6)]; /* 6 is max possible tableLog for HUF header (maybe even 5, to be tested) */ | |
|
190 | 192 | if (iSize+1 > srcSize) return ERROR(srcSize_wrong); |
|
191 | oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize); /* max (hwSize-1) values decoded, as last one is implied */ | |
|
193 | oSize = FSE_decompress_wksp(huffWeight, hwSize-1, ip+1, iSize, fseWorkspace, 6); /* max (hwSize-1) values decoded, as last one is implied */ | |
|
192 | 194 | if (FSE_isError(oSize)) return oSize; |
|
193 | 195 | } |
|
194 | 196 | |
|
195 | 197 | /* collect weight stats */ |
|
196 |
memset(rankStats, 0, (HUF_TABLELOG_ |
|
|
198 | memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32)); | |
|
197 | 199 | weightTotal = 0; |
|
198 | 200 | { U32 n; for (n=0; n<oSize; n++) { |
|
199 |
if (huffWeight[n] >= HUF_TABLELOG_ |
|
|
201 | if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected); | |
|
200 | 202 | rankStats[huffWeight[n]]++; |
|
201 | 203 | weightTotal += (1 << huffWeight[n]) >> 1; |
|
202 | 204 | } } |
@@ -204,7 +206,7 b' size_t HUF_readStats(BYTE* huffWeight, s' | |||
|
204 | 206 | |
|
205 | 207 | /* get last non-null symbol weight (implied, total must be 2^n) */ |
|
206 | 208 | { U32 const tableLog = BIT_highbit32(weightTotal) + 1; |
|
207 |
if (tableLog > HUF_TABLELOG_ |
|
|
209 | if (tableLog > HUF_TABLELOG_MAX) return ERROR(corruption_detected); | |
|
208 | 210 | *tableLogPtr = tableLog; |
|
209 | 211 | /* determine last weight */ |
|
210 | 212 | { U32 const total = 1 << tableLog; |
@@ -286,7 +286,7 b' If there is an error, the function will ' | |||
|
286 | 286 | #define FSE_BLOCKBOUND(size) (size + (size>>7)) |
|
287 | 287 | #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */ |
|
288 | 288 | |
|
289 |
/* It is possible to statically allocate FSE CTable/DTable as a table of |
|
|
289 | /* It is possible to statically allocate FSE CTable/DTable as a table of FSE_CTable/FSE_DTable using below macros */ | |
|
290 | 290 | #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2)) |
|
291 | 291 | #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog)) |
|
292 | 292 | |
@@ -294,37 +294,72 b' If there is an error, the function will ' | |||
|
294 | 294 | /* ***************************************** |
|
295 | 295 | * FSE advanced API |
|
296 | 296 | *******************************************/ |
|
297 | /* FSE_count_wksp() : | |
|
298 | * Same as FSE_count(), but using an externally provided scratch buffer. | |
|
299 | * `workSpace` size must be table of >= `1024` unsigned | |
|
300 | */ | |
|
301 | size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, | |
|
302 | const void* source, size_t sourceSize, unsigned* workSpace); | |
|
303 | ||
|
304 | /** FSE_countFast() : | |
|
305 | * same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr | |
|
306 | */ | |
|
297 | 307 | size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); |
|
298 | /**< same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr */ | |
|
308 | ||
|
309 | /* FSE_countFast_wksp() : | |
|
310 | * Same as FSE_countFast(), but using an externally provided scratch buffer. | |
|
311 | * `workSpace` must be a table of minimum `1024` unsigned | |
|
312 | */ | |
|
313 | size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace); | |
|
314 | ||
|
315 | /*! FSE_count_simple | |
|
316 | * Same as FSE_countFast(), but does not use any additional memory (not even on stack). | |
|
317 | * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`). | |
|
318 | */ | |
|
319 | size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize); | |
|
320 | ||
|
321 | ||
|
299 | 322 | |
|
300 | 323 | unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus); |
|
301 | 324 | /**< same as FSE_optimalTableLog(), which used `minus==2` */ |
|
302 | 325 | |
|
326 | /* FSE_compress_wksp() : | |
|
327 | * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). | |
|
328 | * FSE_WKSP_SIZE_U32() provides the minimum size required for `workSpace` as a table of FSE_CTable. | |
|
329 | */ | |
|
330 | #define FSE_WKSP_SIZE_U32(maxTableLog, maxSymbolValue) ( FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) + (1<<((maxTableLog>2)?(maxTableLog-2):0)) ) | |
|
331 | size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); | |
|
332 | ||
|
303 | 333 | size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits); |
|
304 |
/**< build a fake FSE_CTable, designed |
|
|
334 | /**< build a fake FSE_CTable, designed for a flat distribution, where each symbol uses nbBits */ | |
|
305 | 335 | |
|
306 | 336 | size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue); |
|
307 | 337 | /**< build a fake FSE_CTable, designed to compress always the same symbolValue */ |
|
308 | 338 | |
|
339 | /* FSE_buildCTable_wksp() : | |
|
340 | * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). | |
|
341 | * `wkspSize` must be >= `(1<<tableLog)`. | |
|
342 | */ | |
|
343 | size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); | |
|
344 | ||
|
309 | 345 | size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits); |
|
310 |
/**< build a fake FSE_DTable, designed to read a |
|
|
346 | /**< build a fake FSE_DTable, designed to read a flat distribution where each symbol uses nbBits */ | |
|
311 | 347 | |
|
312 | 348 | size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue); |
|
313 | 349 | /**< build a fake FSE_DTable, designed to always generate the same symbolValue */ |
|
314 | 350 | |
|
351 | size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog); | |
|
352 | /**< same as FSE_decompress(), using an externally allocated `workSpace` produced with `FSE_DTABLE_SIZE_U32(maxLog)` */ | |
|
353 | ||
|
315 | 354 | |
|
316 | 355 | /* ***************************************** |
|
317 | 356 | * FSE symbol compression API |
|
318 | 357 | *******************************************/ |
|
319 | 358 | /*! |
|
320 | 359 | This API consists of small unitary functions, which highly benefit from being inlined. |
|
321 | You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary. | |
|
322 | Visual seems to do it automatically. | |
|
323 | For gcc or clang, you'll need to add -flto flag at compilation and linking stages. | |
|
324 | If none of these solutions is applicable, include "fse.c" directly. | |
|
360 | Hence their body are included in next section. | |
|
325 | 361 | */ |
|
326 | typedef struct | |
|
327 | { | |
|
362 | typedef struct { | |
|
328 | 363 | ptrdiff_t value; |
|
329 | 364 | const void* stateTable; |
|
330 | 365 | const void* symbolTT; |
@@ -384,8 +419,7 b' If there is an error, it returns an erro' | |||
|
384 | 419 | /* ***************************************** |
|
385 | 420 | * FSE symbol decompression API |
|
386 | 421 | *******************************************/ |
|
387 | typedef struct | |
|
388 | { | |
|
422 | typedef struct { | |
|
389 | 423 | size_t state; |
|
390 | 424 | const void* table; /* precise table may vary, depending on U16 */ |
|
391 | 425 | } FSE_DState_t; |
@@ -76,12 +76,6 b'' | |||
|
76 | 76 | |
|
77 | 77 | |
|
78 | 78 | /* ************************************************************** |
|
79 | * Complex types | |
|
80 | ****************************************************************/ | |
|
81 | typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; | |
|
82 | ||
|
83 | ||
|
84 | /* ************************************************************** | |
|
85 | 79 | * Templates |
|
86 | 80 | ****************************************************************/ |
|
87 | 81 | /* |
@@ -300,28 +294,34 b' size_t FSE_decompress_usingDTable(void* ' | |||
|
300 | 294 | } |
|
301 | 295 | |
|
302 | 296 | |
|
303 |
size_t FSE_decompress(void* dst, size_t |
|
|
297 | size_t FSE_decompress_wksp(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, FSE_DTable* workSpace, unsigned maxLog) | |
|
304 | 298 | { |
|
305 | 299 | const BYTE* const istart = (const BYTE*)cSrc; |
|
306 | 300 | const BYTE* ip = istart; |
|
307 | 301 | short counting[FSE_MAX_SYMBOL_VALUE+1]; |
|
308 | DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ | |
|
309 | 302 | unsigned tableLog; |
|
310 | 303 | unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE; |
|
311 | 304 | |
|
312 | if (cSrcSize<2) return ERROR(srcSize_wrong); /* too small input size */ | |
|
305 | /* normal FSE decoding mode */ | |
|
306 | size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); | |
|
307 | if (FSE_isError(NCountLength)) return NCountLength; | |
|
308 | //if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size; supposed to be already checked in NCountLength, only remaining case : NCountLength==cSrcSize */ | |
|
309 | if (tableLog > maxLog) return ERROR(tableLog_tooLarge); | |
|
310 | ip += NCountLength; | |
|
311 | cSrcSize -= NCountLength; | |
|
312 | ||
|
313 | CHECK_F( FSE_buildDTable (workSpace, counting, maxSymbolValue, tableLog) ); | |
|
313 | 314 | |
|
314 | /* normal FSE decoding mode */ | |
|
315 | { size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize); | |
|
316 | if (FSE_isError(NCountLength)) return NCountLength; | |
|
317 | if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size */ | |
|
318 | ip += NCountLength; | |
|
319 | cSrcSize -= NCountLength; | |
|
320 | } | |
|
315 | return FSE_decompress_usingDTable (dst, dstCapacity, ip, cSrcSize, workSpace); /* always return, even if it is an error code */ | |
|
316 | } | |
|
317 | ||
|
321 | 318 | |
|
322 | CHECK_F( FSE_buildDTable (dt, counting, maxSymbolValue, tableLog) ); | |
|
319 | typedef FSE_DTable DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)]; | |
|
323 | 320 | |
|
324 | return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); /* always return, even if it is an error code */ | |
|
321 | size_t FSE_decompress(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize) | |
|
322 | { | |
|
323 | DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */ | |
|
324 | return FSE_decompress_wksp(dst, dstCapacity, cSrc, cSrcSize, dt, FSE_MAX_TABLELOG); | |
|
325 | 325 | } |
|
326 | 326 | |
|
327 | 327 |
@@ -62,21 +62,19 b' size_t HUF_compress(void* dst, size_t ds' | |||
|
62 | 62 | HUF_decompress() : |
|
63 | 63 | Decompress HUF data from buffer 'cSrc', of size 'cSrcSize', |
|
64 | 64 | into already allocated buffer 'dst', of minimum size 'dstSize'. |
|
65 |
` |
|
|
65 | `originalSize` : **must** be the ***exact*** size of original (uncompressed) data. | |
|
66 | 66 | Note : in contrast with FSE, HUF_decompress can regenerate |
|
67 | 67 | RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data, |
|
68 | 68 | because it knows size to regenerate. |
|
69 |
@return : size of regenerated data (== |
|
|
69 | @return : size of regenerated data (== originalSize), | |
|
70 | 70 | or an error code, which can be tested using HUF_isError() |
|
71 | 71 | */ |
|
72 |
size_t HUF_decompress(void* dst, size_t |
|
|
72 | size_t HUF_decompress(void* dst, size_t originalSize, | |
|
73 | 73 | const void* cSrc, size_t cSrcSize); |
|
74 | 74 | |
|
75 | 75 | |
|
76 | /* **************************************** | |
|
77 | * Tool functions | |
|
78 | ******************************************/ | |
|
79 | #define HUF_BLOCKSIZE_MAX (128 * 1024) | |
|
76 | /* *** Tool functions *** */ | |
|
77 | #define HUF_BLOCKSIZE_MAX (128 * 1024) /**< maximum input size for a single block compressed with HUF_compress */ | |
|
80 | 78 | size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */ |
|
81 | 79 | |
|
82 | 80 | /* Error Management */ |
@@ -84,12 +82,18 b' unsigned HUF_isError(size_t code); ' | |||
|
84 | 82 | const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */ |
|
85 | 83 | |
|
86 | 84 | |
|
87 | /* *** Advanced function *** */ | |
|
85 | /* *** Advanced function *** */ | |
|
88 | 86 | |
|
89 | 87 | /** HUF_compress2() : |
|
90 |
* Same as HUF_compress(), but offers direct control over `maxSymbolValue` and `tableLog` |
|
|
88 | * Same as HUF_compress(), but offers direct control over `maxSymbolValue` and `tableLog` . | |
|
89 | * `tableLog` must be `<= HUF_TABLELOG_MAX` . */ | |
|
91 | 90 | size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); |
|
92 | 91 | |
|
92 | /** HUF_compress4X_wksp() : | |
|
93 | * Same as HUF_compress2(), but uses externally allocated `workSpace`, which must be a table of >= 1024 unsigned */ | |
|
94 | size_t HUF_compress4X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least 1024 unsigned */ | |
|
95 | ||
|
96 | ||
|
93 | 97 | |
|
94 | 98 | #ifdef HUF_STATIC_LINKING_ONLY |
|
95 | 99 | |
@@ -98,7 +102,7 b' size_t HUF_compress2 (void* dst, size_t ' | |||
|
98 | 102 | |
|
99 | 103 | |
|
100 | 104 | /* *** Constants *** */ |
|
101 |
#define HUF_TABLELOG_ABSOLUTEMAX 1 |
|
|
105 | #define HUF_TABLELOG_ABSOLUTEMAX 15 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */ | |
|
102 | 106 | #define HUF_TABLELOG_MAX 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */ |
|
103 | 107 | #define HUF_TABLELOG_DEFAULT 11 /* tableLog by default, when not specified */ |
|
104 | 108 | #define HUF_SYMBOLVALUE_MAX 255 |
@@ -125,9 +129,9 b' size_t HUF_compress2 (void* dst, size_t ' | |||
|
125 | 129 | typedef U32 HUF_DTable; |
|
126 | 130 | #define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog))) |
|
127 | 131 | #define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \ |
|
128 | HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1)*0x1000001) } | |
|
132 | HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) } | |
|
129 | 133 | #define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \ |
|
130 | HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog)*0x1000001) } | |
|
134 | HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) } | |
|
131 | 135 | |
|
132 | 136 | |
|
133 | 137 | /* **************************************** |
@@ -141,10 +145,6 b' size_t HUF_decompress4X_hufOnly(HUF_DTab' | |||
|
141 | 145 | size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ |
|
142 | 146 | size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ |
|
143 | 147 | |
|
144 | size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); | |
|
145 | size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ | |
|
146 | size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ | |
|
147 | ||
|
148 | 148 | |
|
149 | 149 | /* **************************************** |
|
150 | 150 | * HUF detailed API |
@@ -169,6 +169,12 b' size_t HUF_writeCTable (void* dst, size_' | |||
|
169 | 169 | size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); |
|
170 | 170 | |
|
171 | 171 | |
|
172 | /** HUF_buildCTable_wksp() : | |
|
173 | * Same as HUF_buildCTable(), but using externally allocated scratch buffer. | |
|
174 | * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned. | |
|
175 | */ | |
|
176 | size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize); | |
|
177 | ||
|
172 | 178 | /*! HUF_readStats() : |
|
173 | 179 | Read compact Huffman tree, saved by HUF_writeCTable(). |
|
174 | 180 | `huffWeight` is destination buffer. |
@@ -208,16 +214,20 b' size_t HUF_decompress4X4_usingDTable(voi' | |||
|
208 | 214 | /* single stream variants */ |
|
209 | 215 | |
|
210 | 216 | size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog); |
|
217 | size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize); /**< `workSpace` must be a table of at least 1024 unsigned */ | |
|
211 | 218 | size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable); |
|
212 | 219 | |
|
213 | 220 | size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */ |
|
214 | 221 | size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */ |
|
215 | 222 | |
|
216 |
size_t HUF_decompress1X_ |
|
|
223 | size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); | |
|
224 | size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */ | |
|
225 | size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */ | |
|
226 | ||
|
227 | size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */ | |
|
217 | 228 | size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); |
|
218 | 229 | size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); |
|
219 | 230 | |
|
220 | ||
|
221 | 231 | #endif /* HUF_STATIC_LINKING_ONLY */ |
|
222 | 232 | |
|
223 | 233 |
@@ -55,14 +55,16 b' MEM_STATIC void MEM_check(void) { MEM_ST' | |||
|
55 | 55 | typedef int32_t S32; |
|
56 | 56 | typedef uint64_t U64; |
|
57 | 57 | typedef int64_t S64; |
|
58 | typedef intptr_t iPtrDiff; | |
|
58 | 59 | #else |
|
59 |
typedef unsigned char |
|
|
60 | typedef unsigned char BYTE; | |
|
60 | 61 | typedef unsigned short U16; |
|
61 | 62 | typedef signed short S16; |
|
62 | 63 | typedef unsigned int U32; |
|
63 | 64 | typedef signed int S32; |
|
64 | 65 | typedef unsigned long long U64; |
|
65 | 66 | typedef signed long long S64; |
|
67 | typedef ptrdiff_t iPtrDiff; | |
|
66 | 68 | #endif |
|
67 | 69 | |
|
68 | 70 |
@@ -16,7 +16,6 b'' | |||
|
16 | 16 | #include "error_private.h" |
|
17 | 17 | #define ZSTD_STATIC_LINKING_ONLY |
|
18 | 18 | #include "zstd.h" /* declaration of ZSTD_isError, ZSTD_getErrorName, ZSTD_getErrorCode, ZSTD_getErrorString, ZSTD_versionNumber */ |
|
19 | #include "zbuff.h" /* declaration of ZBUFF_isError, ZBUFF_getErrorName */ | |
|
20 | 19 | |
|
21 | 20 | |
|
22 | 21 | /*-**************************************** |
@@ -44,16 +43,11 b' ZSTD_ErrorCode ZSTD_getErrorCode(size_t ' | |||
|
44 | 43 | * provides error code string from enum */ |
|
45 | 44 | const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorName(code); } |
|
46 | 45 | |
|
47 | ||
|
48 | /* ************************************************************** | |
|
49 | * ZBUFF Error Management | |
|
50 | ****************************************************************/ | |
|
46 | /* --- ZBUFF Error Management (deprecated) --- */ | |
|
51 | 47 | unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); } |
|
52 | ||
|
53 | 48 | const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); } |
|
54 | 49 | |
|
55 | 50 | |
|
56 | ||
|
57 | 51 | /*=************************************************************** |
|
58 | 52 | * Custom allocator |
|
59 | 53 | ****************************************************************/ |
@@ -147,7 +147,7 b' static void ZSTD_copy8(void* dst, const ' | |||
|
147 | 147 | /*! ZSTD_wildcopy() : |
|
148 | 148 | * custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */ |
|
149 | 149 | #define WILDCOPY_OVERLENGTH 8 |
|
150 |
MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, |
|
|
150 | MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length) | |
|
151 | 151 | { |
|
152 | 152 | const BYTE* ip = (const BYTE*)src; |
|
153 | 153 | BYTE* op = (BYTE*)dst; |
@@ -222,6 +222,7 b' typedef struct {' | |||
|
222 | 222 | U32 log2litSum; |
|
223 | 223 | U32 log2offCodeSum; |
|
224 | 224 | U32 factor; |
|
225 | U32 staticPrices; | |
|
225 | 226 | U32 cachedPrice; |
|
226 | 227 | U32 cachedLitLength; |
|
227 | 228 | const BYTE* cachedLiterals; |
@@ -234,7 +235,9 b' int ZSTD_isSkipFrame(ZSTD_DCtx* dctx);' | |||
|
234 | 235 | /* custom memory allocation functions */ |
|
235 | 236 | void* ZSTD_defaultAllocFunction(void* opaque, size_t size); |
|
236 | 237 | void ZSTD_defaultFreeFunction(void* opaque, void* address); |
|
238 | #ifndef ZSTD_DLL_IMPORT | |
|
237 | 239 | static const ZSTD_customMem defaultCustomMem = { ZSTD_defaultAllocFunction, ZSTD_defaultFreeFunction, NULL }; |
|
240 | #endif | |
|
238 | 241 | void* ZSTD_malloc(size_t size, ZSTD_customMem customMem); |
|
239 | 242 | void ZSTD_free(void* ptr, ZSTD_customMem customMem); |
|
240 | 243 |
@@ -71,12 +71,6 b'' | |||
|
71 | 71 | |
|
72 | 72 | |
|
73 | 73 | /* ************************************************************** |
|
74 | * Complex types | |
|
75 | ****************************************************************/ | |
|
76 | typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; | |
|
77 | ||
|
78 | ||
|
79 | /* ************************************************************** | |
|
80 | 74 | * Templates |
|
81 | 75 | ****************************************************************/ |
|
82 | 76 | /* |
@@ -100,7 +94,13 b' typedef U32 CTable_max_t[FSE_CTABLE_SIZE' | |||
|
100 | 94 | |
|
101 | 95 | |
|
102 | 96 | /* Function templates */ |
|
103 | size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) | |
|
97 | ||
|
98 | /* FSE_buildCTable_wksp() : | |
|
99 | * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`). | |
|
100 | * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)` | |
|
101 | * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements | |
|
102 | */ | |
|
103 | size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) | |
|
104 | 104 | { |
|
105 | 105 | U32 const tableSize = 1 << tableLog; |
|
106 | 106 | U32 const tableMask = tableSize - 1; |
@@ -111,10 +111,11 b' size_t FSE_buildCTable(FSE_CTable* ct, c' | |||
|
111 | 111 | U32 const step = FSE_TABLESTEP(tableSize); |
|
112 | 112 | U32 cumul[FSE_MAX_SYMBOL_VALUE+2]; |
|
113 | 113 | |
|
114 | FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */ | |
|
114 | FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)workSpace; | |
|
115 | 115 | U32 highThreshold = tableSize-1; |
|
116 | 116 | |
|
117 | 117 | /* CTable header */ |
|
118 | if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge); | |
|
118 | 119 | tableU16[-2] = (U16) tableLog; |
|
119 | 120 | tableU16[-1] = (U16) maxSymbolValue; |
|
120 | 121 | |
@@ -181,6 +182,13 b' size_t FSE_buildCTable(FSE_CTable* ct, c' | |||
|
181 | 182 | } |
|
182 | 183 | |
|
183 | 184 | |
|
185 | size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog) | |
|
186 | { | |
|
187 | FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */ | |
|
188 | return FSE_buildCTable_wksp(ct, normalizedCounter, maxSymbolValue, tableLog, tableSymbol, sizeof(tableSymbol)); | |
|
189 | } | |
|
190 | ||
|
191 | ||
|
184 | 192 | |
|
185 | 193 | #ifndef FSE_COMMONDEFS_ONLY |
|
186 | 194 | |
@@ -189,7 +197,7 b' size_t FSE_buildCTable(FSE_CTable* ct, c' | |||
|
189 | 197 | ****************************************************************/ |
|
190 | 198 | size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog) |
|
191 | 199 | { |
|
192 | size_t maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3; | |
|
200 | size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3; | |
|
193 | 201 | return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */ |
|
194 | 202 | } |
|
195 | 203 | |
@@ -300,21 +308,20 b' size_t FSE_writeNCount (void* buffer, si' | |||
|
300 | 308 | * Counting histogram |
|
301 | 309 | ****************************************************************/ |
|
302 | 310 | /*! FSE_count_simple |
|
303 |
This function |
|
|
304 | and store the histogram into table `count`. | |
|
305 |
|
|
|
311 | This function counts byte values within `src`, and store the histogram into table `count`. | |
|
312 | It doesn't use any additional memory. | |
|
313 | But this function is unsafe : it doesn't check that all values within `src` can fit into `count`. | |
|
306 | 314 | For this reason, prefer using a table `count` with 256 elements. |
|
307 | 315 | @return : count of most numerous element |
|
308 | 316 | */ |
|
309 |
|
|
|
310 |
|
|
|
317 | size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, | |
|
318 | const void* src, size_t srcSize) | |
|
311 | 319 | { |
|
312 | 320 | const BYTE* ip = (const BYTE*)src; |
|
313 | 321 | const BYTE* const end = ip + srcSize; |
|
314 | 322 | unsigned maxSymbolValue = *maxSymbolValuePtr; |
|
315 | 323 | unsigned max=0; |
|
316 | 324 | |
|
317 | ||
|
318 | 325 | memset(count, 0, (maxSymbolValue+1)*sizeof(*count)); |
|
319 | 326 | if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; } |
|
320 | 327 | |
@@ -329,20 +336,24 b' static size_t FSE_count_simple(unsigned*' | |||
|
329 | 336 | } |
|
330 | 337 | |
|
331 | 338 | |
|
332 | static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr, | |
|
339 | /* FSE_count_parallel_wksp() : | |
|
340 | * Same as FSE_count_parallel(), but using an externally provided scratch buffer. | |
|
341 | * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`` */ | |
|
342 | static size_t FSE_count_parallel_wksp( | |
|
343 | unsigned* count, unsigned* maxSymbolValuePtr, | |
|
333 | 344 | const void* source, size_t sourceSize, |
|
334 | unsigned checkMax) | |
|
345 | unsigned checkMax, unsigned* const workSpace) | |
|
335 | 346 | { |
|
336 | 347 | const BYTE* ip = (const BYTE*)source; |
|
337 | 348 | const BYTE* const iend = ip+sourceSize; |
|
338 | 349 | unsigned maxSymbolValue = *maxSymbolValuePtr; |
|
339 | 350 | unsigned max=0; |
|
340 | ||
|
351 | U32* const Counting1 = workSpace; | |
|
352 | U32* const Counting2 = Counting1 + 256; | |
|
353 | U32* const Counting3 = Counting2 + 256; | |
|
354 | U32* const Counting4 = Counting3 + 256; | |
|
341 | 355 | |
|
342 | U32 Counting1[256] = { 0 }; | |
|
343 | U32 Counting2[256] = { 0 }; | |
|
344 | U32 Counting3[256] = { 0 }; | |
|
345 | U32 Counting4[256] = { 0 }; | |
|
356 | memset(Counting1, 0, 4*256*sizeof(unsigned)); | |
|
346 | 357 | |
|
347 | 358 | /* safety checks */ |
|
348 | 359 | if (!sourceSize) { |
@@ -388,31 +399,51 b' static size_t FSE_count_parallel(unsigne' | |||
|
388 | 399 | if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall); |
|
389 | 400 | } } |
|
390 | 401 | |
|
391 | { U32 s; for (s=0; s<=maxSymbolValue; s++) { | |
|
392 | count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; | |
|
393 | if (count[s] > max) max = count[s]; | |
|
394 | }} | |
|
402 | { U32 s; for (s=0; s<=maxSymbolValue; s++) { | |
|
403 | count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s]; | |
|
404 | if (count[s] > max) max = count[s]; | |
|
405 | } } | |
|
395 | 406 | |
|
396 | 407 | while (!count[maxSymbolValue]) maxSymbolValue--; |
|
397 | 408 | *maxSymbolValuePtr = maxSymbolValue; |
|
398 | 409 | return (size_t)max; |
|
399 | 410 | } |
|
400 | 411 | |
|
412 | /* FSE_countFast_wksp() : | |
|
413 | * Same as FSE_countFast(), but using an externally provided scratch buffer. | |
|
414 | * `workSpace` size must be table of >= `1024` unsigned */ | |
|
415 | size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, | |
|
416 | const void* source, size_t sourceSize, unsigned* workSpace) | |
|
417 | { | |
|
418 | if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize); | |
|
419 | return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace); | |
|
420 | } | |
|
421 | ||
|
401 | 422 | /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */ |
|
402 | 423 | size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, |
|
403 | 424 | const void* source, size_t sourceSize) |
|
404 | 425 | { |
|
405 | if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize); | |
|
406 |
return FSE_count_p |
|
|
426 | unsigned tmpCounters[1024]; | |
|
427 | return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters); | |
|
428 | } | |
|
429 | ||
|
430 | /* FSE_count_wksp() : | |
|
431 | * Same as FSE_count(), but using an externally provided scratch buffer. | |
|
432 | * `workSpace` size must be table of >= `1024` unsigned */ | |
|
433 | size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr, | |
|
434 | const void* source, size_t sourceSize, unsigned* workSpace) | |
|
435 | { | |
|
436 | if (*maxSymbolValuePtr < 255) | |
|
437 | return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace); | |
|
438 | *maxSymbolValuePtr = 255; | |
|
439 | return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace); | |
|
407 | 440 | } |
|
408 | 441 | |
|
409 | 442 | size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, |
|
410 |
const void* s |
|
|
443 | const void* src, size_t srcSize) | |
|
411 | 444 | { |
|
412 | if (*maxSymbolValuePtr <255) | |
|
413 |
|
|
|
414 | *maxSymbolValuePtr = 255; | |
|
415 | return FSE_countFast(count, maxSymbolValuePtr, source, sourceSize); | |
|
445 | unsigned tmpCounters[1024]; | |
|
446 | return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters); | |
|
416 | 447 | } |
|
417 | 448 | |
|
418 | 449 | |
@@ -428,14 +459,10 b' size_t FSE_count(unsigned* count, unsign' | |||
|
428 | 459 | `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable |
|
429 | 460 | Allocation is manual (C standard does not support variable-size structures). |
|
430 | 461 | */ |
|
431 | ||
|
432 | 462 | size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog) |
|
433 | 463 | { |
|
434 | size_t size; | |
|
435 | FSE_STATIC_ASSERT((size_t)FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)*4 >= sizeof(CTable_max_t)); /* A compilation error here means FSE_CTABLE_SIZE_U32 is not large enough */ | |
|
436 | if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC); | |
|
437 | size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); | |
|
438 | return size; | |
|
464 | if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); | |
|
465 | return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32); | |
|
439 | 466 | } |
|
440 | 467 | |
|
441 | 468 | FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog) |
@@ -486,7 +513,7 b' static size_t FSE_normalizeM2(short* nor' | |||
|
486 | 513 | U32 ToDistribute; |
|
487 | 514 | |
|
488 | 515 | /* Init */ |
|
489 | U32 lowThreshold = (U32)(total >> tableLog); | |
|
516 | U32 const lowThreshold = (U32)(total >> tableLog); | |
|
490 | 517 | U32 lowOne = (U32)((total * 3) >> (tableLog + 1)); |
|
491 | 518 | |
|
492 | 519 | for (s=0; s<=maxSymbolValue; s++) { |
@@ -534,17 +561,16 b' static size_t FSE_normalizeM2(short* nor' | |||
|
534 | 561 | return 0; |
|
535 | 562 | } |
|
536 | 563 | |
|
537 | { | |
|
538 | U64 const vStepLog = 62 - tableLog; | |
|
564 | { U64 const vStepLog = 62 - tableLog; | |
|
539 | 565 | U64 const mid = (1ULL << (vStepLog-1)) - 1; |
|
540 | 566 | U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */ |
|
541 | 567 | U64 tmpTotal = mid; |
|
542 | 568 | for (s=0; s<=maxSymbolValue; s++) { |
|
543 | 569 | if (norm[s]==-2) { |
|
544 | U64 end = tmpTotal + (count[s] * rStep); | |
|
545 | U32 sStart = (U32)(tmpTotal >> vStepLog); | |
|
546 | U32 sEnd = (U32)(end >> vStepLog); | |
|
547 | U32 weight = sEnd - sStart; | |
|
570 | U64 const end = tmpTotal + (count[s] * rStep); | |
|
571 | U32 const sStart = (U32)(tmpTotal >> vStepLog); | |
|
572 | U32 const sEnd = (U32)(end >> vStepLog); | |
|
573 | U32 const weight = sEnd - sStart; | |
|
548 | 574 | if (weight < 1) |
|
549 | 575 | return ERROR(GENERIC); |
|
550 | 576 | norm[s] = (short)weight; |
@@ -566,7 +592,6 b' size_t FSE_normalizeCount (short* normal' | |||
|
566 | 592 | if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */ |
|
567 | 593 | |
|
568 | 594 | { U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 }; |
|
569 | ||
|
570 | 595 | U64 const scale = 62 - tableLog; |
|
571 | 596 | U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */ |
|
572 | 597 | U64 const vStep = 1ULL<<(scale-20); |
@@ -594,7 +619,7 b' size_t FSE_normalizeCount (short* normal' | |||
|
594 | 619 | } } |
|
595 | 620 | if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) { |
|
596 | 621 | /* corner case, need another normalization method */ |
|
597 | size_t errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); | |
|
622 | size_t const errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue); | |
|
598 | 623 | if (FSE_isError(errorCode)) return errorCode; |
|
599 | 624 | } |
|
600 | 625 | else normalizedCounter[largest] += (short)stillToDistribute; |
@@ -643,17 +668,15 b' size_t FSE_buildCTable_raw (FSE_CTable* ' | |||
|
643 | 668 | |
|
644 | 669 | /* Build Symbol Transformation Table */ |
|
645 | 670 | { const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits); |
|
646 | ||
|
647 | 671 | for (s=0; s<=maxSymbolValue; s++) { |
|
648 | 672 | symbolTT[s].deltaNbBits = deltaNbBits; |
|
649 | 673 | symbolTT[s].deltaFindState = s-1; |
|
650 | 674 | } } |
|
651 | 675 | |
|
652 | ||
|
653 | 676 | return 0; |
|
654 | 677 | } |
|
655 | 678 | |
|
656 |
/* fake FSE_CTable, for rle ( |
|
|
679 | /* fake FSE_CTable, for rle input (always same symbol) */ | |
|
657 | 680 | size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue) |
|
658 | 681 | { |
|
659 | 682 | void* ptr = ct; |
@@ -685,14 +708,13 b' static size_t FSE_compress_usingCTable_g' | |||
|
685 | 708 | const BYTE* const iend = istart + srcSize; |
|
686 | 709 | const BYTE* ip=iend; |
|
687 | 710 | |
|
688 | ||
|
689 | 711 | BIT_CStream_t bitC; |
|
690 | 712 | FSE_CState_t CState1, CState2; |
|
691 | 713 | |
|
692 | 714 | /* init */ |
|
693 | 715 | if (srcSize <= 2) return 0; |
|
694 |
{ size_t const |
|
|
695 | if (FSE_isError(errorCode)) return 0; } | |
|
716 | { size_t const initError = BIT_initCStream(&bitC, dst, dstSize); | |
|
717 | if (FSE_isError(initError)) return 0; /* not enough space available to write a bitstream */ } | |
|
696 | 718 | |
|
697 | 719 | #define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s)) |
|
698 | 720 | |
@@ -715,7 +737,7 b' static size_t FSE_compress_usingCTable_g' | |||
|
715 | 737 | } |
|
716 | 738 | |
|
717 | 739 | /* 2 or 4 encoding per loop */ |
|
718 |
|
|
|
740 | while ( ip>istart ) { | |
|
719 | 741 | |
|
720 | 742 | FSE_encodeSymbol(&bitC, &CState2, *--ip); |
|
721 | 743 | |
@@ -741,7 +763,7 b' size_t FSE_compress_usingCTable (void* d' | |||
|
741 | 763 | const void* src, size_t srcSize, |
|
742 | 764 | const FSE_CTable* ct) |
|
743 | 765 | { |
|
744 |
const |
|
|
766 | unsigned const fast = (dstSize >= FSE_BLOCKBOUND(srcSize)); | |
|
745 | 767 | |
|
746 | 768 | if (fast) |
|
747 | 769 | return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1); |
@@ -752,58 +774,76 b' size_t FSE_compress_usingCTable (void* d' | |||
|
752 | 774 | |
|
753 | 775 | size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); } |
|
754 | 776 | |
|
755 | size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) | |
|
777 | #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return f | |
|
778 | #define CHECK_F(f) { CHECK_V_F(_var_err__, f); } | |
|
779 | ||
|
780 | /* FSE_compress_wksp() : | |
|
781 | * Same as FSE_compress2(), but using an externally allocated scratch buffer (`workSpace`). | |
|
782 | * `wkspSize` size must be `(1<<tableLog)`. | |
|
783 | */ | |
|
784 | size_t FSE_compress_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize) | |
|
756 | 785 | { |
|
757 | const BYTE* const istart = (const BYTE*) src; | |
|
758 | const BYTE* ip = istart; | |
|
759 | ||
|
760 | 786 | BYTE* const ostart = (BYTE*) dst; |
|
761 | 787 | BYTE* op = ostart; |
|
762 | 788 | BYTE* const oend = ostart + dstSize; |
|
763 | 789 | |
|
764 | 790 | U32 count[FSE_MAX_SYMBOL_VALUE+1]; |
|
765 | 791 | S16 norm[FSE_MAX_SYMBOL_VALUE+1]; |
|
766 | CTable_max_t ct; | |
|
767 | size_t errorCode; | |
|
792 | FSE_CTable* CTable = (FSE_CTable*)workSpace; | |
|
793 | size_t const CTableSize = FSE_CTABLE_SIZE_U32(tableLog, maxSymbolValue); | |
|
794 | void* scratchBuffer = (void*)(CTable + CTableSize); | |
|
795 | size_t const scratchBufferSize = wkspSize - (CTableSize * sizeof(FSE_CTable)); | |
|
768 | 796 | |
|
769 | 797 | /* init conditions */ |
|
770 | if (srcSize <= 1) return 0; /* Uncompressible */ | |
|
798 | if (wkspSize < FSE_WKSP_SIZE_U32(tableLog, maxSymbolValue)) return ERROR(tableLog_tooLarge); | |
|
799 | if (srcSize <= 1) return 0; /* Not compressible */ | |
|
771 | 800 | if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE; |
|
772 | 801 | if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG; |
|
773 | 802 | |
|
774 | 803 | /* Scan input and build symbol stats */ |
|
775 |
|
|
|
776 | if (FSE_isError(errorCode)) return errorCode; | |
|
777 | if (errorCode == srcSize) return 1; | |
|
778 | if (errorCode == 1) return 0; /* each symbol only present once */ | |
|
779 | if (errorCode < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ | |
|
804 | { CHECK_V_F(maxCount, FSE_count(count, &maxSymbolValue, src, srcSize) ); | |
|
805 | if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */ | |
|
806 | if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ | |
|
807 | if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */ | |
|
808 | } | |
|
780 | 809 | |
|
781 | 810 | tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue); |
|
782 |
|
|
|
783 | if (FSE_isError(errorCode)) return errorCode; | |
|
811 | CHECK_F( FSE_normalizeCount(norm, tableLog, count, srcSize, maxSymbolValue) ); | |
|
784 | 812 | |
|
785 | 813 | /* Write table description header */ |
|
786 |
|
|
|
787 | if (FSE_isError(errorCode)) return errorCode; | |
|
788 | op += errorCode; | |
|
814 | { CHECK_V_F(nc_err, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); | |
|
815 | op += nc_err; | |
|
816 | } | |
|
789 | 817 | |
|
790 | 818 | /* Compress */ |
|
791 |
|
|
|
792 | if (FSE_isError(errorCode)) return errorCode; | |
|
793 | errorCode = FSE_compress_usingCTable(op, oend - op, ip, srcSize, ct); | |
|
794 | if (errorCode == 0) return 0; /* not enough space for compressed data */ | |
|
795 | op += errorCode; | |
|
819 | CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, scratchBufferSize) ); | |
|
820 | { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, src, srcSize, CTable) ); | |
|
821 | if (cSize == 0) return 0; /* not enough space for compressed data */ | |
|
822 | op += cSize; | |
|
823 | } | |
|
796 | 824 | |
|
797 | 825 | /* check compressibility */ |
|
798 | if ( (size_t)(op-ostart) >= srcSize-1 ) | |
|
799 | return 0; | |
|
826 | if ( (size_t)(op-ostart) >= srcSize-1 ) return 0; | |
|
800 | 827 | |
|
801 | 828 | return op-ostart; |
|
802 | 829 | } |
|
803 | 830 | |
|
804 | size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize) | |
|
831 | typedef struct { | |
|
832 | FSE_CTable CTable_max[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)]; | |
|
833 | BYTE scratchBuffer[1 << FSE_MAX_TABLELOG]; | |
|
834 | } fseWkspMax_t; | |
|
835 | ||
|
836 | size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog) | |
|
805 | 837 | { |
|
806 | return FSE_compress2(dst, dstSize, src, (U32)srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG); | |
|
838 | fseWkspMax_t scratchBuffer; | |
|
839 | FSE_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */ | |
|
840 | if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); | |
|
841 | return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer)); | |
|
842 | } | |
|
843 | ||
|
844 | size_t FSE_compress (void* dst, size_t dstCapacity, const void* src, size_t srcSize) | |
|
845 | { | |
|
846 | return FSE_compress2(dst, dstCapacity, src, srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG); | |
|
807 | 847 | } |
|
808 | 848 | |
|
809 | 849 |
@@ -56,6 +56,8 b'' | |||
|
56 | 56 | * Error Management |
|
57 | 57 | ****************************************************************/ |
|
58 | 58 | #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */ |
|
59 | #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return f | |
|
60 | #define CHECK_F(f) { CHECK_V_F(_var_err__, f); } | |
|
59 | 61 | |
|
60 | 62 | |
|
61 | 63 | /* ************************************************************** |
@@ -70,31 +72,73 b' unsigned HUF_optimalTableLog(unsigned ma' | |||
|
70 | 72 | /* ******************************************************* |
|
71 | 73 | * HUF : Huffman block compression |
|
72 | 74 | *********************************************************/ |
|
75 | /* HUF_compressWeights() : | |
|
76 | * Same as FSE_compress(), but dedicated to huff0's weights compression. | |
|
77 | * The use case needs much less stack memory. | |
|
78 | * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX. | |
|
79 | */ | |
|
80 | #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6 | |
|
81 | size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize) | |
|
82 | { | |
|
83 | BYTE* const ostart = (BYTE*) dst; | |
|
84 | BYTE* op = ostart; | |
|
85 | BYTE* const oend = ostart + dstSize; | |
|
86 | ||
|
87 | U32 maxSymbolValue = HUF_TABLELOG_MAX; | |
|
88 | U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER; | |
|
89 | ||
|
90 | FSE_CTable CTable[FSE_CTABLE_SIZE_U32(MAX_FSE_TABLELOG_FOR_HUFF_HEADER, HUF_TABLELOG_MAX)]; | |
|
91 | BYTE scratchBuffer[1<<MAX_FSE_TABLELOG_FOR_HUFF_HEADER]; | |
|
92 | ||
|
93 | U32 count[HUF_TABLELOG_MAX+1]; | |
|
94 | S16 norm[HUF_TABLELOG_MAX+1]; | |
|
95 | ||
|
96 | /* init conditions */ | |
|
97 | if (wtSize <= 1) return 0; /* Not compressible */ | |
|
98 | ||
|
99 | /* Scan input and build symbol stats */ | |
|
100 | { CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize) ); | |
|
101 | if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */ | |
|
102 | if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */ | |
|
103 | } | |
|
104 | ||
|
105 | tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue); | |
|
106 | CHECK_F( FSE_normalizeCount(norm, tableLog, count, wtSize, maxSymbolValue) ); | |
|
107 | ||
|
108 | /* Write table description header */ | |
|
109 | { CHECK_V_F(hSize, FSE_writeNCount(op, oend-op, norm, maxSymbolValue, tableLog) ); | |
|
110 | op += hSize; | |
|
111 | } | |
|
112 | ||
|
113 | /* Compress */ | |
|
114 | CHECK_F( FSE_buildCTable_wksp(CTable, norm, maxSymbolValue, tableLog, scratchBuffer, sizeof(scratchBuffer)) ); | |
|
115 | { CHECK_V_F(cSize, FSE_compress_usingCTable(op, oend - op, weightTable, wtSize, CTable) ); | |
|
116 | if (cSize == 0) return 0; /* not enough space for compressed data */ | |
|
117 | op += cSize; | |
|
118 | } | |
|
119 | ||
|
120 | return op-ostart; | |
|
121 | } | |
|
122 | ||
|
123 | ||
|
73 | 124 | struct HUF_CElt_s { |
|
74 | 125 | U16 val; |
|
75 | 126 | BYTE nbBits; |
|
76 | 127 | }; /* typedef'd to HUF_CElt within "huf.h" */ |
|
77 | 128 | |
|
78 | typedef struct nodeElt_s { | |
|
79 | U32 count; | |
|
80 | U16 parent; | |
|
81 | BYTE byte; | |
|
82 | BYTE nbBits; | |
|
83 | } nodeElt; | |
|
84 | ||
|
85 | 129 | /*! HUF_writeCTable() : |
|
86 | 130 | `CTable` : huffman tree to save, using huf representation. |
|
87 | 131 | @return : size of saved CTable */ |
|
88 | 132 | size_t HUF_writeCTable (void* dst, size_t maxDstSize, |
|
89 | 133 | const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog) |
|
90 | 134 | { |
|
91 | BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; | |
|
135 | BYTE bitsToWeight[HUF_TABLELOG_MAX + 1]; /* precomputed conversion table */ | |
|
92 | 136 | BYTE huffWeight[HUF_SYMBOLVALUE_MAX]; |
|
93 | 137 | BYTE* op = (BYTE*)dst; |
|
94 | 138 | U32 n; |
|
95 | 139 | |
|
96 | 140 | /* check conditions */ |
|
97 |
if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR( |
|
|
141 | if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(maxSymbolValue_tooLarge); | |
|
98 | 142 | |
|
99 | 143 | /* convert to weight */ |
|
100 | 144 | bitsToWeight[0] = 0; |
@@ -103,38 +147,33 b' size_t HUF_writeCTable (void* dst, size_' | |||
|
103 | 147 | for (n=0; n<maxSymbolValue; n++) |
|
104 | 148 | huffWeight[n] = bitsToWeight[CTable[n].nbBits]; |
|
105 | 149 | |
|
106 | { size_t const size = FSE_compress(op+1, maxDstSize-1, huffWeight, maxSymbolValue); | |
|
107 | if (FSE_isError(size)) return size; | |
|
108 |
if (( |
|
|
109 |
op[0] = (BYTE) |
|
|
110 |
return |
|
|
111 | } | |
|
112 | } | |
|
150 | /* attempt weights compression by FSE */ | |
|
151 | { CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, huffWeight, maxSymbolValue) ); | |
|
152 | if ((hSize>1) & (hSize < maxSymbolValue/2)) { /* FSE compressed */ | |
|
153 | op[0] = (BYTE)hSize; | |
|
154 | return hSize+1; | |
|
155 | } } | |
|
113 | 156 | |
|
114 | /* raw values */ | |
|
115 | if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen */ | |
|
157 | /* write raw values as 4-bits (max : 15) */ | |
|
158 | if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen : likely means source cannot be compressed */ | |
|
116 | 159 | if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */ |
|
117 | 160 | op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1)); |
|
118 | huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause issue in final combination */ | |
|
161 | huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause msan issue in final combination */ | |
|
119 | 162 | for (n=0; n<maxSymbolValue; n+=2) |
|
120 | 163 | op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]); |
|
121 | 164 | return ((maxSymbolValue+1)/2) + 1; |
|
122 | ||
|
123 | 165 | } |
|
124 | 166 | |
|
125 | 167 | |
|
126 | 168 | size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, size_t srcSize) |
|
127 | 169 | { |
|
128 | BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; | |
|
170 | BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1]; /* init not required, even though some static analyzer may complain */ | |
|
129 | 171 | U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */ |
|
130 | 172 | U32 tableLog = 0; |
|
131 | size_t readSize; | |
|
132 | 173 | U32 nbSymbols = 0; |
|
133 | /*memset(huffWeight, 0, sizeof(huffWeight));*/ /* is not necessary, even though some analyzer complain ... */ | |
|
134 | 174 | |
|
135 | 175 | /* get symbol weights */ |
|
136 |
readSize |
|
|
137 | if (HUF_isError(readSize)) return readSize; | |
|
176 | CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize)); | |
|
138 | 177 | |
|
139 | 178 | /* check result */ |
|
140 | 179 | if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); |
@@ -174,6 +213,13 b' size_t HUF_readCTable (HUF_CElt* CTable,' | |||
|
174 | 213 | } |
|
175 | 214 | |
|
176 | 215 | |
|
216 | typedef struct nodeElt_s { | |
|
217 | U32 count; | |
|
218 | U16 parent; | |
|
219 | BYTE byte; | |
|
220 | BYTE nbBits; | |
|
221 | } nodeElt; | |
|
222 | ||
|
177 | 223 | static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits) |
|
178 | 224 | { |
|
179 | 225 | const U32 largestBits = huffNode[lastNonNull].nbBits; |
@@ -279,20 +325,26 b' static void HUF_sort(nodeElt* huffNode, ' | |||
|
279 | 325 | } |
|
280 | 326 | |
|
281 | 327 | |
|
328 | /** HUF_buildCTable_wksp() : | |
|
329 | * Same as HUF_buildCTable(), but using externally allocated scratch buffer. | |
|
330 | * `workSpace` must be aligned on 4-bytes boundaries, and be at least as large as a table of 1024 unsigned. | |
|
331 | */ | |
|
282 | 332 | #define STARTNODE (HUF_SYMBOLVALUE_MAX+1) |
|
283 | size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits) | |
|
333 | typedef nodeElt huffNodeTable[2*HUF_SYMBOLVALUE_MAX+1 +1]; | |
|
334 | size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize) | |
|
284 | 335 | { |
|
285 | nodeElt huffNode0[2*HUF_SYMBOLVALUE_MAX+1 +1]; | |
|
286 |
nodeElt* huffNode = huffNode0 |
|
|
336 | nodeElt* const huffNode0 = (nodeElt*)workSpace; | |
|
337 | nodeElt* const huffNode = huffNode0+1; | |
|
287 | 338 | U32 n, nonNullRank; |
|
288 | 339 | int lowS, lowN; |
|
289 | 340 | U16 nodeNb = STARTNODE; |
|
290 | 341 | U32 nodeRoot; |
|
291 | 342 | |
|
292 | 343 | /* safety checks */ |
|
344 | if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC); /* workSpace is not large enough */ | |
|
293 | 345 | if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT; |
|
294 | 346 | if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC); |
|
295 |
memset(huffNode0, 0, sizeof(huffNode |
|
|
347 | memset(huffNode0, 0, sizeof(huffNodeTable)); | |
|
296 | 348 | |
|
297 | 349 | /* sort, decreasing order */ |
|
298 | 350 | HUF_sort(huffNode, count, maxSymbolValue); |
@@ -305,7 +357,7 b' size_t HUF_buildCTable (HUF_CElt* tree, ' | |||
|
305 | 357 | huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb; |
|
306 | 358 | nodeNb++; lowS-=2; |
|
307 | 359 | for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30); |
|
308 | huffNode0[0].count = (U32)(1U<<31); | |
|
360 | huffNode0[0].count = (U32)(1U<<31); /* fake entry, strong barrier */ | |
|
309 | 361 | |
|
310 | 362 | /* create parents */ |
|
311 | 363 | while (nodeNb <= nodeRoot) { |
@@ -348,6 +400,15 b' size_t HUF_buildCTable (HUF_CElt* tree, ' | |||
|
348 | 400 | return maxNbBits; |
|
349 | 401 | } |
|
350 | 402 | |
|
403 | /** HUF_buildCTable() : | |
|
404 | * Note : count is used before tree is written, so they can safely overlap | |
|
405 | */ | |
|
406 | size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits) | |
|
407 | { | |
|
408 | huffNodeTable nodeTable; | |
|
409 | return HUF_buildCTable_wksp(tree, count, maxSymbolValue, maxNbBits, nodeTable, sizeof(nodeTable)); | |
|
410 | } | |
|
411 | ||
|
351 | 412 | static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable) |
|
352 | 413 | { |
|
353 | 414 | BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits); |
@@ -375,8 +436,8 b' size_t HUF_compress1X_usingCTable(void* ' | |||
|
375 | 436 | |
|
376 | 437 | /* init */ |
|
377 | 438 | if (dstSize < 8) return 0; /* not enough space to compress */ |
|
378 |
{ size_t const |
|
|
379 |
if (HUF_isError( |
|
|
439 | { size_t const initErr = BIT_initCStream(&bitC, op, oend-op); | |
|
440 | if (HUF_isError(initErr)) return 0; } | |
|
380 | 441 | |
|
381 | 442 | n = srcSize & ~3; /* join to mod 4 */ |
|
382 | 443 | switch (srcSize & 3) |
@@ -419,32 +480,28 b' size_t HUF_compress4X_usingCTable(void* ' | |||
|
419 | 480 | if (srcSize < 12) return 0; /* no saving possible : too small input */ |
|
420 | 481 | op += 6; /* jumpTable */ |
|
421 | 482 | |
|
422 |
{ |
|
|
423 | if (HUF_isError(cSize)) return cSize; | |
|
483 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) ); | |
|
424 | 484 | if (cSize==0) return 0; |
|
425 | 485 | MEM_writeLE16(ostart, (U16)cSize); |
|
426 | 486 | op += cSize; |
|
427 | 487 | } |
|
428 | 488 | |
|
429 | 489 | ip += segmentSize; |
|
430 |
{ |
|
|
431 | if (HUF_isError(cSize)) return cSize; | |
|
490 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) ); | |
|
432 | 491 | if (cSize==0) return 0; |
|
433 | 492 | MEM_writeLE16(ostart+2, (U16)cSize); |
|
434 | 493 | op += cSize; |
|
435 | 494 | } |
|
436 | 495 | |
|
437 | 496 | ip += segmentSize; |
|
438 |
{ |
|
|
439 | if (HUF_isError(cSize)) return cSize; | |
|
497 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable) ); | |
|
440 | 498 | if (cSize==0) return 0; |
|
441 | 499 | MEM_writeLE16(ostart+4, (U16)cSize); |
|
442 | 500 | op += cSize; |
|
443 | 501 | } |
|
444 | 502 | |
|
445 | 503 | ip += segmentSize; |
|
446 |
{ |
|
|
447 | if (HUF_isError(cSize)) return cSize; | |
|
504 | { CHECK_V_F(cSize, HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable) ); | |
|
448 | 505 | if (cSize==0) return 0; |
|
449 | 506 | op += cSize; |
|
450 | 507 | } |
@@ -453,20 +510,25 b' size_t HUF_compress4X_usingCTable(void* ' | |||
|
453 | 510 | } |
|
454 | 511 | |
|
455 | 512 | |
|
513 | /* `workSpace` must a table of at least 1024 unsigned */ | |
|
456 | 514 | static size_t HUF_compress_internal ( |
|
457 | 515 | void* dst, size_t dstSize, |
|
458 | 516 | const void* src, size_t srcSize, |
|
459 | 517 | unsigned maxSymbolValue, unsigned huffLog, |
|
460 |
unsigned singleStream |
|
|
518 | unsigned singleStream, | |
|
519 | void* workSpace, size_t wkspSize) | |
|
461 | 520 | { |
|
462 | 521 | BYTE* const ostart = (BYTE*)dst; |
|
463 | 522 | BYTE* const oend = ostart + dstSize; |
|
464 | 523 | BYTE* op = ostart; |
|
465 | 524 | |
|
466 | U32 count[HUF_SYMBOLVALUE_MAX+1]; | |
|
467 |
|
|
|
525 | union { | |
|
526 | U32 count[HUF_SYMBOLVALUE_MAX+1]; | |
|
527 | HUF_CElt CTable[HUF_SYMBOLVALUE_MAX+1]; | |
|
528 | } table; /* `count` can overlap with `CTable`; saves 1 KB */ | |
|
468 | 529 | |
|
469 | 530 | /* checks & inits */ |
|
531 | if (wkspSize < sizeof(huffNodeTable)) return ERROR(GENERIC); | |
|
470 | 532 | if (!srcSize) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */ |
|
471 | 533 | if (!dstSize) return 0; /* cannot fit within dst budget */ |
|
472 | 534 | if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */ |
@@ -475,30 +537,27 b' static size_t HUF_compress_internal (' | |||
|
475 | 537 | if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT; |
|
476 | 538 | |
|
477 | 539 | /* Scan input and build symbol stats */ |
|
478 |
{ |
|
|
479 | if (HUF_isError(largest)) return largest; | |
|
540 | { CHECK_V_F(largest, FSE_count_wksp (table.count, &maxSymbolValue, (const BYTE*)src, srcSize, (U32*)workSpace) ); | |
|
480 | 541 | if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */ |
|
481 | 542 | if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */ |
|
482 | 543 | } |
|
483 | 544 | |
|
484 | 545 | /* Build Huffman Tree */ |
|
485 | 546 | huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue); |
|
486 |
{ |
|
|
487 | if (HUF_isError(maxBits)) return maxBits; | |
|
547 | { CHECK_V_F(maxBits, HUF_buildCTable_wksp (table.CTable, table.count, maxSymbolValue, huffLog, workSpace, wkspSize) ); | |
|
488 | 548 | huffLog = (U32)maxBits; |
|
489 | 549 | } |
|
490 | 550 | |
|
491 | 551 | /* Write table description header */ |
|
492 |
{ |
|
|
493 | if (HUF_isError(hSize)) return hSize; | |
|
552 | { CHECK_V_F(hSize, HUF_writeCTable (op, dstSize, table.CTable, maxSymbolValue, huffLog) ); | |
|
494 | 553 | if (hSize + 12 >= srcSize) return 0; /* not useful to try compression */ |
|
495 | 554 | op += hSize; |
|
496 | 555 | } |
|
497 | 556 | |
|
498 | 557 | /* Compress */ |
|
499 | 558 | { size_t const cSize = (singleStream) ? |
|
500 | HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) : /* single segment */ | |
|
501 | HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable); | |
|
559 | HUF_compress1X_usingCTable(op, oend - op, src, srcSize, table.CTable) : /* single segment */ | |
|
560 | HUF_compress4X_usingCTable(op, oend - op, src, srcSize, table.CTable); | |
|
502 | 561 | if (HUF_isError(cSize)) return cSize; |
|
503 | 562 | if (cSize==0) return 0; /* uncompressible */ |
|
504 | 563 | op += cSize; |
@@ -512,21 +571,38 b' static size_t HUF_compress_internal (' | |||
|
512 | 571 | } |
|
513 | 572 | |
|
514 | 573 | |
|
574 | size_t HUF_compress1X_wksp (void* dst, size_t dstSize, | |
|
575 | const void* src, size_t srcSize, | |
|
576 | unsigned maxSymbolValue, unsigned huffLog, | |
|
577 | void* workSpace, size_t wkspSize) | |
|
578 | { | |
|
579 | return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1 /* single stream */, workSpace, wkspSize); | |
|
580 | } | |
|
581 | ||
|
515 | 582 | size_t HUF_compress1X (void* dst, size_t dstSize, |
|
516 | 583 | const void* src, size_t srcSize, |
|
517 | 584 | unsigned maxSymbolValue, unsigned huffLog) |
|
518 | 585 | { |
|
519 | return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1); | |
|
586 | unsigned workSpace[1024]; | |
|
587 | return HUF_compress1X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); | |
|
588 | } | |
|
589 | ||
|
590 | size_t HUF_compress4X_wksp (void* dst, size_t dstSize, | |
|
591 | const void* src, size_t srcSize, | |
|
592 | unsigned maxSymbolValue, unsigned huffLog, | |
|
593 | void* workSpace, size_t wkspSize) | |
|
594 | { | |
|
595 | return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0 /* 4 streams */, workSpace, wkspSize); | |
|
520 | 596 | } |
|
521 | 597 | |
|
522 | 598 | size_t HUF_compress2 (void* dst, size_t dstSize, |
|
523 | 599 | const void* src, size_t srcSize, |
|
524 | 600 | unsigned maxSymbolValue, unsigned huffLog) |
|
525 | 601 | { |
|
526 | return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0); | |
|
602 | unsigned workSpace[1024]; | |
|
603 | return HUF_compress4X_wksp(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, workSpace, sizeof(workSpace)); | |
|
527 | 604 | } |
|
528 | 605 | |
|
529 | ||
|
530 | 606 | size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize) |
|
531 | 607 | { |
|
532 | 608 | return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_TABLELOG_DEFAULT); |
@@ -33,6 +33,7 b' typedef enum { ZSTDcs_created=0, ZSTDcs_' | |||
|
33 | 33 | /*-************************************* |
|
34 | 34 | * Helper functions |
|
35 | 35 | ***************************************/ |
|
36 | #define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; } | |
|
36 | 37 | size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; } |
|
37 | 38 | |
|
38 | 39 | |
@@ -82,6 +83,7 b' struct ZSTD_CCtx_s' | |||
|
82 | 83 | FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)]; |
|
83 | 84 | FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)]; |
|
84 | 85 | FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)]; |
|
86 | unsigned tmpCounters[1024]; | |
|
85 | 87 | }; |
|
86 | 88 | |
|
87 | 89 | ZSTD_CCtx* ZSTD_createCCtx(void) |
@@ -147,6 +149,14 b' size_t ZSTD_checkCParams(ZSTD_compressio' | |||
|
147 | 149 | } |
|
148 | 150 | |
|
149 | 151 | |
|
152 | /** ZSTD_cycleLog() : | |
|
153 | * condition for correct operation : hashLog > 1 */ | |
|
154 | static U32 ZSTD_cycleLog(U32 hashLog, ZSTD_strategy strat) | |
|
155 | { | |
|
156 | U32 const btScale = ((U32)strat >= (U32)ZSTD_btlazy2); | |
|
157 | return hashLog - btScale; | |
|
158 | } | |
|
159 | ||
|
150 | 160 | /** ZSTD_adjustCParams() : |
|
151 | 161 | optimize `cPar` for a given input (`srcSize` and `dictSize`). |
|
152 | 162 | mostly downsizing to reduce memory consumption and initialization. |
@@ -165,9 +175,9 b' ZSTD_compressionParameters ZSTD_adjustCP' | |||
|
165 | 175 | if (cPar.windowLog > srcLog) cPar.windowLog = srcLog; |
|
166 | 176 | } } |
|
167 | 177 | if (cPar.hashLog > cPar.windowLog) cPar.hashLog = cPar.windowLog; |
|
168 | { U32 const btPlus = (cPar.strategy == ZSTD_btlazy2) | (cPar.strategy == ZSTD_btopt) | (cPar.strategy == ZSTD_btopt2); | |
|
169 | U32 const maxChainLog = cPar.windowLog+btPlus; | |
|
170 | if (cPar.chainLog > maxChainLog) cPar.chainLog = maxChainLog; } /* <= ZSTD_CHAINLOG_MAX */ | |
|
178 | { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy); | |
|
179 | if (cycleLog > cPar.windowLog) cPar.chainLog -= (cycleLog - cPar.windowLog); | |
|
180 | } | |
|
171 | 181 | |
|
172 | 182 | if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ |
|
173 | 183 | |
@@ -470,8 +480,8 b' static size_t ZSTD_compressLiterals (ZST' | |||
|
470 | 480 | singleStream = 1; |
|
471 | 481 | cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable); |
|
472 | 482 | } else { |
|
473 | cLitSize = singleStream ? HUF_compress1X(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11) | |
|
474 |
: HUF_compress |
|
|
483 | cLitSize = singleStream ? HUF_compress1X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters)) | |
|
484 | : HUF_compress4X_wksp(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, zc->tmpCounters, sizeof(zc->tmpCounters)); | |
|
475 | 485 | } |
|
476 | 486 | |
|
477 | 487 | if ((cLitSize==0) | (cLitSize >= srcSize - minGain)) |
@@ -566,6 +576,7 b' size_t ZSTD_compressSequences(ZSTD_CCtx*' | |||
|
566 | 576 | BYTE* op = ostart; |
|
567 | 577 | size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; |
|
568 | 578 | BYTE* seqHead; |
|
579 | BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)]; | |
|
569 | 580 | |
|
570 | 581 | /* Compress literals */ |
|
571 | 582 | { const BYTE* const literals = seqStorePtr->litStart; |
@@ -593,7 +604,7 b' size_t ZSTD_compressSequences(ZSTD_CCtx*' | |||
|
593 | 604 | |
|
594 | 605 | /* CTable for Literal Lengths */ |
|
595 | 606 | { U32 max = MaxLL; |
|
596 | size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq); | |
|
607 | size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, zc->tmpCounters); | |
|
597 | 608 | if ((mostFrequent == nbSeq) && (nbSeq > 2)) { |
|
598 | 609 | *op++ = llCodeTable[0]; |
|
599 | 610 | FSE_buildCTable_rle(CTable_LitLength, (BYTE)max); |
@@ -601,7 +612,7 b' size_t ZSTD_compressSequences(ZSTD_CCtx*' | |||
|
601 | 612 | } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { |
|
602 | 613 | LLtype = set_repeat; |
|
603 | 614 | } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) { |
|
604 | FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog); | |
|
615 | FSE_buildCTable_wksp(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); | |
|
605 | 616 | LLtype = set_basic; |
|
606 | 617 | } else { |
|
607 | 618 | size_t nbSeq_1 = nbSeq; |
@@ -611,13 +622,13 b' size_t ZSTD_compressSequences(ZSTD_CCtx*' | |||
|
611 | 622 | { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ |
|
612 | 623 | if (FSE_isError(NCountSize)) return ERROR(GENERIC); |
|
613 | 624 | op += NCountSize; } |
|
614 | FSE_buildCTable(CTable_LitLength, norm, max, tableLog); | |
|
625 | FSE_buildCTable_wksp(CTable_LitLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); | |
|
615 | 626 | LLtype = set_compressed; |
|
616 | 627 | } } |
|
617 | 628 | |
|
618 | 629 | /* CTable for Offsets */ |
|
619 | 630 | { U32 max = MaxOff; |
|
620 | size_t const mostFrequent = FSE_countFast(count, &max, ofCodeTable, nbSeq); | |
|
631 | size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, zc->tmpCounters); | |
|
621 | 632 | if ((mostFrequent == nbSeq) && (nbSeq > 2)) { |
|
622 | 633 | *op++ = ofCodeTable[0]; |
|
623 | 634 | FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max); |
@@ -625,7 +636,7 b' size_t ZSTD_compressSequences(ZSTD_CCtx*' | |||
|
625 | 636 | } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { |
|
626 | 637 | Offtype = set_repeat; |
|
627 | 638 | } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) { |
|
628 | FSE_buildCTable(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog); | |
|
639 | FSE_buildCTable_wksp(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); | |
|
629 | 640 | Offtype = set_basic; |
|
630 | 641 | } else { |
|
631 | 642 | size_t nbSeq_1 = nbSeq; |
@@ -635,13 +646,13 b' size_t ZSTD_compressSequences(ZSTD_CCtx*' | |||
|
635 | 646 | { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ |
|
636 | 647 | if (FSE_isError(NCountSize)) return ERROR(GENERIC); |
|
637 | 648 | op += NCountSize; } |
|
638 | FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog); | |
|
649 | FSE_buildCTable_wksp(CTable_OffsetBits, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); | |
|
639 | 650 | Offtype = set_compressed; |
|
640 | 651 | } } |
|
641 | 652 | |
|
642 | 653 | /* CTable for MatchLengths */ |
|
643 | 654 | { U32 max = MaxML; |
|
644 | size_t const mostFrequent = FSE_countFast(count, &max, mlCodeTable, nbSeq); | |
|
655 | size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, zc->tmpCounters); | |
|
645 | 656 | if ((mostFrequent == nbSeq) && (nbSeq > 2)) { |
|
646 | 657 | *op++ = *mlCodeTable; |
|
647 | 658 | FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max); |
@@ -649,7 +660,7 b' size_t ZSTD_compressSequences(ZSTD_CCtx*' | |||
|
649 | 660 | } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) { |
|
650 | 661 | MLtype = set_repeat; |
|
651 | 662 | } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) { |
|
652 | FSE_buildCTable(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog); | |
|
663 | FSE_buildCTable_wksp(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog, scratchBuffer, sizeof(scratchBuffer)); | |
|
653 | 664 | MLtype = set_basic; |
|
654 | 665 | } else { |
|
655 | 666 | size_t nbSeq_1 = nbSeq; |
@@ -659,7 +670,7 b' size_t ZSTD_compressSequences(ZSTD_CCtx*' | |||
|
659 | 670 | { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */ |
|
660 | 671 | if (FSE_isError(NCountSize)) return ERROR(GENERIC); |
|
661 | 672 | op += NCountSize; } |
|
662 | FSE_buildCTable(CTable_MatchLength, norm, max, tableLog); | |
|
673 | FSE_buildCTable_wksp(CTable_MatchLength, norm, max, tableLog, scratchBuffer, sizeof(scratchBuffer)); | |
|
663 | 674 | MLtype = set_compressed; |
|
664 | 675 | } } |
|
665 | 676 | |
@@ -739,8 +750,8 b' MEM_STATIC void ZSTD_storeSeq(seqStore_t' | |||
|
739 | 750 | { |
|
740 | 751 | #if 0 /* for debug */ |
|
741 | 752 | static const BYTE* g_start = NULL; |
|
742 | const U32 pos = (U32)(literals - g_start); | |
|
743 | if (g_start==NULL) g_start = literals; | |
|
753 | const U32 pos = (U32)((const BYTE*)literals - g_start); | |
|
754 | if (g_start==NULL) g_start = (const BYTE*)literals; | |
|
744 | 755 | //if ((pos > 1) && (pos < 50000)) |
|
745 | 756 | printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n", |
|
746 | 757 | pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode); |
@@ -1482,8 +1493,9 b' static U32 ZSTD_insertBt1(ZSTD_CCtx* zc,' | |||
|
1482 | 1493 | hashTable[h] = current; /* Update Hash Table */ |
|
1483 | 1494 | |
|
1484 | 1495 | while (nbCompares-- && (matchIndex > windowLow)) { |
|
1485 | U32* nextPtr = bt + 2*(matchIndex & btMask); | |
|
1496 | U32* const nextPtr = bt + 2*(matchIndex & btMask); | |
|
1486 | 1497 | size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ |
|
1498 | ||
|
1487 | 1499 | #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ |
|
1488 | 1500 | const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */ |
|
1489 | 1501 | if (matchIndex == predictedSmall) { |
@@ -1579,7 +1591,7 b' static size_t ZSTD_insertBtAndFindBestMa' | |||
|
1579 | 1591 | hashTable[h] = current; /* Update Hash Table */ |
|
1580 | 1592 | |
|
1581 | 1593 | while (nbCompares-- && (matchIndex > windowLow)) { |
|
1582 | U32* nextPtr = bt + 2*(matchIndex & btMask); | |
|
1594 | U32* const nextPtr = bt + 2*(matchIndex & btMask); | |
|
1583 | 1595 | size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ |
|
1584 | 1596 | const BYTE* match; |
|
1585 | 1597 | |
@@ -2271,16 +2283,16 b' static size_t ZSTD_compress_generic (ZST' | |||
|
2271 | 2283 | if (remaining < blockSize) blockSize = remaining; |
|
2272 | 2284 | |
|
2273 | 2285 | /* preemptive overflow correction */ |
|
2274 |
if (cctx->lowLimit > ( |
|
|
2275 | U32 const btplus = (cctx->params.cParams.strategy == ZSTD_btlazy2) | (cctx->params.cParams.strategy == ZSTD_btopt) | (cctx->params.cParams.strategy == ZSTD_btopt2); | |
|
2276 | U32 const chainMask = (1 << (cctx->params.cParams.chainLog - btplus)) - 1; | |
|
2277 |
U32 const |
|
|
2278 | U32 const newLowLimit = (cctx->lowLimit & chainMask) + (1 << supLog); /* preserve position % chainSize, ensure current-repcode doesn't underflow */ | |
|
2279 | U32 const correction = cctx->lowLimit - newLowLimit; | |
|
2286 | if (cctx->lowLimit > (2U<<30)) { | |
|
2287 | U32 const cycleMask = (1 << ZSTD_cycleLog(cctx->params.cParams.hashLog, cctx->params.cParams.strategy)) - 1; | |
|
2288 | U32 const current = (U32)(ip - cctx->base); | |
|
2289 | U32 const newCurrent = (current & cycleMask) + (1 << cctx->params.cParams.windowLog); | |
|
2290 | U32 const correction = current - newCurrent; | |
|
2291 | ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_64 <= 30); | |
|
2280 | 2292 | ZSTD_reduceIndex(cctx, correction); |
|
2281 | 2293 | cctx->base += correction; |
|
2282 | 2294 | cctx->dictBase += correction; |
|
2283 |
cctx->lowLimit = |
|
|
2295 | cctx->lowLimit -= correction; | |
|
2284 | 2296 | cctx->dictLimit -= correction; |
|
2285 | 2297 | if (cctx->nextToUpdate < correction) cctx->nextToUpdate = 0; |
|
2286 | 2298 | else cctx->nextToUpdate -= correction; |
@@ -2506,6 +2518,7 b' static size_t ZSTD_loadDictEntropyStats(' | |||
|
2506 | 2518 | const BYTE* const dictEnd = dictPtr + dictSize; |
|
2507 | 2519 | short offcodeNCount[MaxOff+1]; |
|
2508 | 2520 | unsigned offcodeMaxValue = MaxOff; |
|
2521 | BYTE scratchBuffer[1<<MAX(MLFSELog,LLFSELog)]; | |
|
2509 | 2522 | |
|
2510 | 2523 | { size_t const hufHeaderSize = HUF_readCTable(cctx->hufTable, 255, dict, dictSize); |
|
2511 | 2524 | if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted); |
@@ -2517,7 +2530,7 b' static size_t ZSTD_loadDictEntropyStats(' | |||
|
2517 | 2530 | if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted); |
|
2518 | 2531 | if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted); |
|
2519 | 2532 | /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ |
|
2520 | CHECK_E (FSE_buildCTable(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted); | |
|
2533 | CHECK_E (FSE_buildCTable_wksp(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, scratchBuffer, sizeof(scratchBuffer)), dictionary_corrupted); | |
|
2521 | 2534 | dictPtr += offcodeHeaderSize; |
|
2522 | 2535 | } |
|
2523 | 2536 | |
@@ -2528,7 +2541,7 b' static size_t ZSTD_loadDictEntropyStats(' | |||
|
2528 | 2541 | if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted); |
|
2529 | 2542 | /* Every match length code must have non-zero probability */ |
|
2530 | 2543 | CHECK_F (ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML)); |
|
2531 | CHECK_E (FSE_buildCTable(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted); | |
|
2544 | CHECK_E (FSE_buildCTable_wksp(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, scratchBuffer, sizeof(scratchBuffer)), dictionary_corrupted); | |
|
2532 | 2545 | dictPtr += matchlengthHeaderSize; |
|
2533 | 2546 | } |
|
2534 | 2547 | |
@@ -2539,7 +2552,7 b' static size_t ZSTD_loadDictEntropyStats(' | |||
|
2539 | 2552 | if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted); |
|
2540 | 2553 | /* Every literal length code must have non-zero probability */ |
|
2541 | 2554 | CHECK_F (ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL)); |
|
2542 | CHECK_E(FSE_buildCTable(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted); | |
|
2555 | CHECK_E(FSE_buildCTable_wksp(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, scratchBuffer, sizeof(scratchBuffer)), dictionary_corrupted); | |
|
2543 | 2556 | dictPtr += litlengthHeaderSize; |
|
2544 | 2557 | } |
|
2545 | 2558 | |
@@ -2695,7 +2708,7 b' size_t ZSTD_compress_advanced (ZSTD_CCtx' | |||
|
2695 | 2708 | |
|
2696 | 2709 | size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel) |
|
2697 | 2710 | { |
|
2698 | ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dictSize); | |
|
2711 | ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dict ? dictSize : 0); | |
|
2699 | 2712 | params.fParams.contentSizeFlag = 1; |
|
2700 | 2713 | return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params); |
|
2701 | 2714 | } |
@@ -2839,6 +2852,8 b' struct ZSTD_CStream_s {' | |||
|
2839 | 2852 | ZSTD_cStreamStage stage; |
|
2840 | 2853 | U32 checksum; |
|
2841 | 2854 | U32 frameEnded; |
|
2855 | U64 pledgedSrcSize; | |
|
2856 | U64 inputProcessed; | |
|
2842 | 2857 | ZSTD_parameters params; |
|
2843 | 2858 | ZSTD_customMem customMem; |
|
2844 | 2859 | }; /* typedef'd to ZSTD_CStream within "zstd.h" */ |
@@ -2896,6 +2911,8 b' size_t ZSTD_resetCStream(ZSTD_CStream* z' | |||
|
2896 | 2911 | zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0; |
|
2897 | 2912 | zcs->stage = zcss_load; |
|
2898 | 2913 | zcs->frameEnded = 0; |
|
2914 | zcs->pledgedSrcSize = pledgedSrcSize; | |
|
2915 | zcs->inputProcessed = 0; | |
|
2899 | 2916 | return 0; /* ready to go */ |
|
2900 | 2917 | } |
|
2901 | 2918 | |
@@ -2948,6 +2965,12 b' size_t ZSTD_initCStream_usingDict(ZSTD_C' | |||
|
2948 | 2965 | return ZSTD_initCStream_advanced(zcs, dict, dictSize, params, 0); |
|
2949 | 2966 | } |
|
2950 | 2967 | |
|
2968 | size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize) | |
|
2969 | { | |
|
2970 | ZSTD_parameters const params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0); | |
|
2971 | return ZSTD_initCStream_advanced(zcs, NULL, 0, params, pledgedSrcSize); | |
|
2972 | } | |
|
2973 | ||
|
2951 | 2974 | size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) |
|
2952 | 2975 | { |
|
2953 | 2976 | return ZSTD_initCStream_usingDict(zcs, NULL, 0, compressionLevel); |
@@ -3044,6 +3067,7 b' static size_t ZSTD_compressStream_generi' | |||
|
3044 | 3067 | |
|
3045 | 3068 | *srcSizePtr = ip - istart; |
|
3046 | 3069 | *dstCapacityPtr = op - ostart; |
|
3070 | zcs->inputProcessed += *srcSizePtr; | |
|
3047 | 3071 | if (zcs->frameEnded) return 0; |
|
3048 | 3072 | { size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos; |
|
3049 | 3073 | if (hintInSize==0) hintInSize = zcs->blockSize; |
@@ -3088,6 +3112,9 b' size_t ZSTD_endStream(ZSTD_CStream* zcs,' | |||
|
3088 | 3112 | BYTE* const oend = (BYTE*)(output->dst) + output->size; |
|
3089 | 3113 | BYTE* op = ostart; |
|
3090 | 3114 | |
|
3115 | if ((zcs->pledgedSrcSize) && (zcs->inputProcessed != zcs->pledgedSrcSize)) | |
|
3116 | return ERROR(srcSize_wrong); /* pledgedSrcSize not respected */ | |
|
3117 | ||
|
3091 | 3118 | if (zcs->stage != zcss_final) { |
|
3092 | 3119 | /* flush whatever remains */ |
|
3093 | 3120 | size_t srcSize = 0; |
@@ -15,8 +15,9 b'' | |||
|
15 | 15 | #define ZSTD_OPT_H_91842398743 |
|
16 | 16 | |
|
17 | 17 | |
|
18 |
#define ZSTD_ |
|
|
19 | #define ZSTD_MAX_PRICE (1<<30) | |
|
18 | #define ZSTD_LITFREQ_ADD 2 | |
|
19 | #define ZSTD_FREQ_DIV 4 | |
|
20 | #define ZSTD_MAX_PRICE (1<<30) | |
|
20 | 21 | |
|
21 | 22 | /*-************************************* |
|
22 | 23 | * Price functions for optimal parser |
@@ -31,22 +32,32 b' FORCE_INLINE void ZSTD_setLog2Prices(seq' | |||
|
31 | 32 | } |
|
32 | 33 | |
|
33 | 34 | |
|
34 | MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr) | |
|
35 | MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr, const BYTE* src, size_t srcSize) | |
|
35 | 36 | { |
|
36 | 37 | unsigned u; |
|
37 | 38 | |
|
38 | 39 | ssPtr->cachedLiterals = NULL; |
|
39 | 40 | ssPtr->cachedPrice = ssPtr->cachedLitLength = 0; |
|
41 | ssPtr->staticPrices = 0; | |
|
40 | 42 | |
|
41 | 43 | if (ssPtr->litLengthSum == 0) { |
|
42 | ssPtr->litSum = (2<<Litbits); | |
|
44 | if (srcSize <= 1024) ssPtr->staticPrices = 1; | |
|
45 | ||
|
46 | for (u=0; u<=MaxLit; u++) | |
|
47 | ssPtr->litFreq[u] = 0; | |
|
48 | for (u=0; u<srcSize; u++) | |
|
49 | ssPtr->litFreq[src[u]]++; | |
|
50 | ||
|
51 | ssPtr->litSum = 0; | |
|
43 | 52 | ssPtr->litLengthSum = MaxLL+1; |
|
44 | 53 | ssPtr->matchLengthSum = MaxML+1; |
|
45 | 54 | ssPtr->offCodeSum = (MaxOff+1); |
|
46 |
ssPtr->matchSum = ( |
|
|
55 | ssPtr->matchSum = (ZSTD_LITFREQ_ADD<<Litbits); | |
|
47 | 56 | |
|
48 | for (u=0; u<=MaxLit; u++) | |
|
49 |
ssPtr->litFreq[u] = |
|
|
57 | for (u=0; u<=MaxLit; u++) { | |
|
58 | ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV); | |
|
59 | ssPtr->litSum += ssPtr->litFreq[u]; | |
|
60 | } | |
|
50 | 61 | for (u=0; u<=MaxLL; u++) |
|
51 | 62 | ssPtr->litLengthFreq[u] = 1; |
|
52 | 63 | for (u=0; u<=MaxML; u++) |
@@ -61,11 +72,11 b' MEM_STATIC void ZSTD_rescaleFreqs(seqSto' | |||
|
61 | 72 | ssPtr->litSum = 0; |
|
62 | 73 | |
|
63 | 74 | for (u=0; u<=MaxLit; u++) { |
|
64 | ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV); | |
|
75 | ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>(ZSTD_FREQ_DIV+1)); | |
|
65 | 76 | ssPtr->litSum += ssPtr->litFreq[u]; |
|
66 | 77 | } |
|
67 | 78 | for (u=0; u<=MaxLL; u++) { |
|
68 | ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>ZSTD_FREQ_DIV); | |
|
79 | ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1)); | |
|
69 | 80 | ssPtr->litLengthSum += ssPtr->litLengthFreq[u]; |
|
70 | 81 | } |
|
71 | 82 | for (u=0; u<=MaxML; u++) { |
@@ -73,6 +84,7 b' MEM_STATIC void ZSTD_rescaleFreqs(seqSto' | |||
|
73 | 84 | ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u]; |
|
74 | 85 | ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3); |
|
75 | 86 | } |
|
87 | ssPtr->matchSum *= ZSTD_LITFREQ_ADD; | |
|
76 | 88 | for (u=0; u<=MaxOff; u++) { |
|
77 | 89 | ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV); |
|
78 | 90 | ssPtr->offCodeSum += ssPtr->offCodeFreq[u]; |
@@ -87,6 +99,9 b' FORCE_INLINE U32 ZSTD_getLiteralPrice(se' | |||
|
87 | 99 | { |
|
88 | 100 | U32 price, u; |
|
89 | 101 | |
|
102 | if (ssPtr->staticPrices) | |
|
103 | return ZSTD_highbit32((U32)litLength+1) + (litLength*6); | |
|
104 | ||
|
90 | 105 | if (litLength == 0) |
|
91 | 106 | return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0]+1); |
|
92 | 107 | |
@@ -124,9 +139,13 b' FORCE_INLINE U32 ZSTD_getLiteralPrice(se' | |||
|
124 | 139 | FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra) |
|
125 | 140 | { |
|
126 | 141 | /* offset */ |
|
142 | U32 price; | |
|
127 | 143 | BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1); |
|
128 | U32 price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1); | |
|
129 | 144 | |
|
145 | if (seqStorePtr->staticPrices) | |
|
146 | return ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + ZSTD_highbit32((U32)matchLength+1) + 16 + offCode; | |
|
147 | ||
|
148 | price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1); | |
|
130 | 149 | if (!ultra && offCode >= 20) price += (offCode-19)*2; |
|
131 | 150 | |
|
132 | 151 | /* match Length */ |
@@ -144,9 +163,9 b' MEM_STATIC void ZSTD_updatePrice(seqStor' | |||
|
144 | 163 | U32 u; |
|
145 | 164 | |
|
146 | 165 | /* literals */ |
|
147 | seqStorePtr->litSum += litLength; | |
|
166 | seqStorePtr->litSum += litLength*ZSTD_LITFREQ_ADD; | |
|
148 | 167 | for (u=0; u < litLength; u++) |
|
149 |
seqStorePtr->litFreq[literals[u]]+ |
|
|
168 | seqStorePtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; | |
|
150 | 169 | |
|
151 | 170 | /* literal Length */ |
|
152 | 171 | { const BYTE LL_deltaCode = 19; |
@@ -401,7 +420,7 b' void ZSTD_compressBlock_opt_generic(ZSTD' | |||
|
401 | 420 | |
|
402 | 421 | /* init */ |
|
403 | 422 | ctx->nextToUpdate3 = ctx->nextToUpdate; |
|
404 | ZSTD_rescaleFreqs(seqStorePtr); | |
|
423 | ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize); | |
|
405 | 424 | ip += (ip==prefixStart); |
|
406 | 425 | { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; } |
|
407 | 426 | |
@@ -416,7 +435,7 b' void ZSTD_compressBlock_opt_generic(ZSTD' | |||
|
416 | 435 | /* check repCode */ |
|
417 | 436 | { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); |
|
418 | 437 | for (i=(ip == anchor); i<last_i; i++) { |
|
419 |
const S32 repCur = ( |
|
|
438 | const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i]; | |
|
420 | 439 | if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart)) |
|
421 | 440 | && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - repCur, minMatch))) { |
|
422 | 441 | mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch; |
@@ -501,7 +520,7 b' void ZSTD_compressBlock_opt_generic(ZSTD' | |||
|
501 | 520 | best_mlen = minMatch; |
|
502 | 521 | { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); |
|
503 | 522 | for (i=(opt[cur].mlen != 1); i<last_i; i++) { /* check rep */ |
|
504 |
const S32 repCur = ( |
|
|
523 | const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i]; | |
|
505 | 524 | if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart)) |
|
506 | 525 | && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - repCur, minMatch))) { |
|
507 | 526 | mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch; |
@@ -601,7 +620,7 b' void ZSTD_compressBlock_opt_generic(ZSTD' | |||
|
601 | 620 | offset--; |
|
602 | 621 | } else { |
|
603 | 622 | if (offset != 0) { |
|
604 |
best_off = ( |
|
|
623 | best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]); | |
|
605 | 624 | if (offset != 1) rep[2] = rep[1]; |
|
606 | 625 | rep[1] = rep[0]; |
|
607 | 626 | rep[0] = best_off; |
@@ -656,7 +675,7 b' void ZSTD_compressBlock_opt_extDict_gene' | |||
|
656 | 675 | { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; } |
|
657 | 676 | |
|
658 | 677 | ctx->nextToUpdate3 = ctx->nextToUpdate; |
|
659 | ZSTD_rescaleFreqs(seqStorePtr); | |
|
678 | ZSTD_rescaleFreqs(seqStorePtr, (const BYTE*)src, srcSize); | |
|
660 | 679 | ip += (ip==prefixStart); |
|
661 | 680 | |
|
662 | 681 | /* Match Loop */ |
@@ -671,7 +690,7 b' void ZSTD_compressBlock_opt_extDict_gene' | |||
|
671 | 690 | /* check repCode */ |
|
672 | 691 | { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor); |
|
673 | 692 | for (i = (ip==anchor); i<last_i; i++) { |
|
674 |
const S32 repCur = ( |
|
|
693 | const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : rep[i]; | |
|
675 | 694 | const U32 repIndex = (U32)(current - repCur); |
|
676 | 695 | const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; |
|
677 | 696 | const BYTE* const repMatch = repBase + repIndex; |
@@ -767,7 +786,7 b' void ZSTD_compressBlock_opt_extDict_gene' | |||
|
767 | 786 | best_mlen = minMatch; |
|
768 | 787 | { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1); |
|
769 | 788 | for (i = (mlen != 1); i<last_i; i++) { |
|
770 |
const S32 repCur = ( |
|
|
789 | const S32 repCur = (i==ZSTD_REP_MOVE_OPT) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i]; | |
|
771 | 790 | const U32 repIndex = (U32)(current+cur - repCur); |
|
772 | 791 | const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; |
|
773 | 792 | const BYTE* const repMatch = repBase + repIndex; |
@@ -873,7 +892,7 b' void ZSTD_compressBlock_opt_extDict_gene' | |||
|
873 | 892 | offset--; |
|
874 | 893 | } else { |
|
875 | 894 | if (offset != 0) { |
|
876 |
best_off = ( |
|
|
895 | best_off = (offset==ZSTD_REP_MOVE_OPT) ? (rep[0] - 1) : (rep[offset]); | |
|
877 | 896 | if (offset != 1) rep[2] = rep[1]; |
|
878 | 897 | rep[1] = rep[0]; |
|
879 | 898 | rep[0] = best_off; |
@@ -358,13 +358,15 b' typedef struct { U16 sequence; BYTE nbBi' | |||
|
358 | 358 | |
|
359 | 359 | typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t; |
|
360 | 360 | |
|
361 | /* HUF_fillDTableX4Level2() : | |
|
362 | * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */ | |
|
361 | 363 | static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed, |
|
362 | 364 | const U32* rankValOrigin, const int minWeight, |
|
363 | 365 | const sortedSymbol_t* sortedSymbols, const U32 sortedListSize, |
|
364 | 366 | U32 nbBitsBaseline, U16 baseSeq) |
|
365 | 367 | { |
|
366 | 368 | HUF_DEltX4 DElt; |
|
367 |
U32 rankVal[HUF_TABLELOG_ |
|
|
369 | U32 rankVal[HUF_TABLELOG_MAX + 1]; | |
|
368 | 370 | |
|
369 | 371 | /* get pre-calculated rankVal */ |
|
370 | 372 | memcpy(rankVal, rankValOrigin, sizeof(rankVal)); |
@@ -398,14 +400,14 b' static void HUF_fillDTableX4Level2(HUF_D' | |||
|
398 | 400 | } } |
|
399 | 401 | } |
|
400 | 402 | |
|
401 |
typedef U32 rankVal_t[HUF_TABLELOG_ |
|
|
403 | typedef U32 rankVal_t[HUF_TABLELOG_MAX][HUF_TABLELOG_MAX + 1]; | |
|
402 | 404 | |
|
403 | 405 | static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog, |
|
404 | 406 | const sortedSymbol_t* sortedList, const U32 sortedListSize, |
|
405 | 407 | const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight, |
|
406 | 408 | const U32 nbBitsBaseline) |
|
407 | 409 | { |
|
408 |
U32 rankVal[HUF_TABLELOG_ |
|
|
410 | U32 rankVal[HUF_TABLELOG_MAX + 1]; | |
|
409 | 411 | const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */ |
|
410 | 412 | const U32 minBits = nbBitsBaseline - maxWeight; |
|
411 | 413 | U32 s; |
@@ -446,8 +448,8 b' size_t HUF_readDTableX4 (HUF_DTable* DTa' | |||
|
446 | 448 | { |
|
447 | 449 | BYTE weightList[HUF_SYMBOLVALUE_MAX + 1]; |
|
448 | 450 | sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1]; |
|
449 |
U32 rankStats[HUF_TABLELOG_ |
|
|
450 |
U32 rankStart0[HUF_TABLELOG_ |
|
|
451 | U32 rankStats[HUF_TABLELOG_MAX + 1] = { 0 }; | |
|
452 | U32 rankStart0[HUF_TABLELOG_MAX + 2] = { 0 }; | |
|
451 | 453 | U32* const rankStart = rankStart0+1; |
|
452 | 454 | rankVal_t rankVal; |
|
453 | 455 | U32 tableLog, maxW, sizeOfSort, nbSymbols; |
@@ -458,7 +460,7 b' size_t HUF_readDTableX4 (HUF_DTable* DTa' | |||
|
458 | 460 | HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr; |
|
459 | 461 | |
|
460 | 462 | HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compilation fails here, assertion is false */ |
|
461 |
if (maxTableLog > HUF_TABLELOG_ |
|
|
463 | if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge); | |
|
462 | 464 | /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */ |
|
463 | 465 | |
|
464 | 466 | iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize); |
This diff has been collapsed as it changes many lines, (512 lines changed) Show them Hide them | |||
@@ -56,6 +56,15 b'' | |||
|
56 | 56 | #endif |
|
57 | 57 | |
|
58 | 58 | |
|
59 | #if defined(_MSC_VER) | |
|
60 | # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */ | |
|
61 | # define ZSTD_PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0) | |
|
62 | #elif defined(__GNUC__) | |
|
63 | # define ZSTD_PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0) | |
|
64 | #else | |
|
65 | # define ZSTD_PREFETCH(ptr) /* disabled */ | |
|
66 | #endif | |
|
67 | ||
|
59 | 68 | /*-************************************* |
|
60 | 69 | * Macros |
|
61 | 70 | ***************************************/ |
@@ -104,7 +113,6 b' struct ZSTD_DCtx_s' | |||
|
104 | 113 | U32 dictID; |
|
105 | 114 | const BYTE* litPtr; |
|
106 | 115 | ZSTD_customMem customMem; |
|
107 | size_t litBufSize; | |
|
108 | 116 | size_t litSize; |
|
109 | 117 | size_t rleSize; |
|
110 | 118 | BYTE litBuffer[ZSTD_BLOCKSIZE_ABSOLUTEMAX + WILDCOPY_OVERLENGTH]; |
@@ -193,7 +201,24 b' static void ZSTD_refDCtx(ZSTD_DCtx* dstD' | |||
|
193 | 201 | * Decompression section |
|
194 | 202 | ***************************************************************/ |
|
195 | 203 | |
|
196 | /* See compression format details in : doc/zstd_compression_format.md */ | |
|
204 | /*! ZSTD_isFrame() : | |
|
205 | * Tells if the content of `buffer` starts with a valid Frame Identifier. | |
|
206 | * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. | |
|
207 | * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. | |
|
208 | * Note 3 : Skippable Frame Identifiers are considered valid. */ | |
|
209 | unsigned ZSTD_isFrame(const void* buffer, size_t size) | |
|
210 | { | |
|
211 | if (size < 4) return 0; | |
|
212 | { U32 const magic = MEM_readLE32(buffer); | |
|
213 | if (magic == ZSTD_MAGICNUMBER) return 1; | |
|
214 | if ((magic & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) return 1; | |
|
215 | } | |
|
216 | #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) | |
|
217 | if (ZSTD_isLegacy(buffer, size)) return 1; | |
|
218 | #endif | |
|
219 | return 0; | |
|
220 | } | |
|
221 | ||
|
197 | 222 | |
|
198 | 223 | /** ZSTD_frameHeaderSize() : |
|
199 | 224 | * srcSize must be >= ZSTD_frameHeaderSize_prefix. |
@@ -412,10 +437,10 b' size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt' | |||
|
412 | 437 | return ERROR(corruption_detected); |
|
413 | 438 | |
|
414 | 439 | dctx->litPtr = dctx->litBuffer; |
|
415 | dctx->litBufSize = ZSTD_BLOCKSIZE_ABSOLUTEMAX+WILDCOPY_OVERLENGTH; | |
|
416 | 440 | dctx->litSize = litSize; |
|
417 | 441 | dctx->litEntropy = 1; |
|
418 | 442 | if (litEncType==set_compressed) dctx->HUFptr = dctx->hufTable; |
|
443 | memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); | |
|
419 | 444 | return litCSize + lhSize; |
|
420 | 445 | } |
|
421 | 446 | |
@@ -442,13 +467,12 b' size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt' | |||
|
442 | 467 | if (litSize+lhSize > srcSize) return ERROR(corruption_detected); |
|
443 | 468 | memcpy(dctx->litBuffer, istart+lhSize, litSize); |
|
444 | 469 | dctx->litPtr = dctx->litBuffer; |
|
445 | dctx->litBufSize = ZSTD_BLOCKSIZE_ABSOLUTEMAX+8; | |
|
446 | 470 | dctx->litSize = litSize; |
|
471 | memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH); | |
|
447 | 472 | return lhSize+litSize; |
|
448 | 473 | } |
|
449 | 474 | /* direct reference into compressed stream */ |
|
450 | 475 | dctx->litPtr = istart+lhSize; |
|
451 | dctx->litBufSize = srcSize-lhSize; | |
|
452 | 476 | dctx->litSize = litSize; |
|
453 | 477 | return lhSize+litSize; |
|
454 | 478 | } |
@@ -473,9 +497,8 b' size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt' | |||
|
473 | 497 | break; |
|
474 | 498 | } |
|
475 | 499 | if (litSize > ZSTD_BLOCKSIZE_ABSOLUTEMAX) return ERROR(corruption_detected); |
|
476 | memset(dctx->litBuffer, istart[lhSize], litSize); | |
|
500 | memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); | |
|
477 | 501 | dctx->litPtr = dctx->litBuffer; |
|
478 | dctx->litBufSize = ZSTD_BLOCKSIZE_ABSOLUTEMAX+WILDCOPY_OVERLENGTH; | |
|
479 | 502 | dctx->litSize = litSize; |
|
480 | 503 | return lhSize+1; |
|
481 | 504 | } |
@@ -761,6 +784,7 b' typedef struct {' | |||
|
761 | 784 | size_t litLength; |
|
762 | 785 | size_t matchLength; |
|
763 | 786 | size_t offset; |
|
787 | const BYTE* match; | |
|
764 | 788 | } seq_t; |
|
765 | 789 | |
|
766 | 790 | typedef struct { |
@@ -769,88 +793,16 b' typedef struct {' | |||
|
769 | 793 | FSE_DState_t stateOffb; |
|
770 | 794 | FSE_DState_t stateML; |
|
771 | 795 | size_t prevOffset[ZSTD_REP_NUM]; |
|
796 | const BYTE* base; | |
|
797 | size_t pos; | |
|
798 | iPtrDiff gotoDict; | |
|
772 | 799 | } seqState_t; |
|
773 | 800 | |
|
774 | 801 | |
|
775 | static seq_t ZSTD_decodeSequence(seqState_t* seqState) | |
|
776 | { | |
|
777 | seq_t seq; | |
|
778 | ||
|
779 | U32 const llCode = FSE_peekSymbol(&seqState->stateLL); | |
|
780 | U32 const mlCode = FSE_peekSymbol(&seqState->stateML); | |
|
781 | U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */ | |
|
782 | ||
|
783 | U32 const llBits = LL_bits[llCode]; | |
|
784 | U32 const mlBits = ML_bits[mlCode]; | |
|
785 | U32 const ofBits = ofCode; | |
|
786 | U32 const totalBits = llBits+mlBits+ofBits; | |
|
787 | ||
|
788 | static const U32 LL_base[MaxLL+1] = { | |
|
789 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
|
790 | 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, | |
|
791 | 0x2000, 0x4000, 0x8000, 0x10000 }; | |
|
792 | ||
|
793 | static const U32 ML_base[MaxML+1] = { | |
|
794 | 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, | |
|
795 | 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, | |
|
796 | 35, 37, 39, 41, 43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, | |
|
797 | 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; | |
|
798 | ||
|
799 | static const U32 OF_base[MaxOff+1] = { | |
|
800 | 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, | |
|
801 | 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, | |
|
802 | 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, | |
|
803 | 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD }; | |
|
804 | ||
|
805 | /* sequence */ | |
|
806 | { size_t offset; | |
|
807 | if (!ofCode) | |
|
808 | offset = 0; | |
|
809 | else { | |
|
810 | offset = OF_base[ofCode] + BIT_readBits(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ | |
|
811 | if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); | |
|
812 | } | |
|
813 | ||
|
814 | if (ofCode <= 1) { | |
|
815 | offset += (llCode==0); | |
|
816 | if (offset) { | |
|
817 | size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; | |
|
818 | temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ | |
|
819 | if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; | |
|
820 | seqState->prevOffset[1] = seqState->prevOffset[0]; | |
|
821 | seqState->prevOffset[0] = offset = temp; | |
|
822 | } else { | |
|
823 | offset = seqState->prevOffset[0]; | |
|
824 | } | |
|
825 | } else { | |
|
826 | seqState->prevOffset[2] = seqState->prevOffset[1]; | |
|
827 | seqState->prevOffset[1] = seqState->prevOffset[0]; | |
|
828 | seqState->prevOffset[0] = offset; | |
|
829 | } | |
|
830 | seq.offset = offset; | |
|
831 | } | |
|
832 | ||
|
833 | seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBits(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ | |
|
834 | if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream); | |
|
835 | ||
|
836 | seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBits(&seqState->DStream, llBits) : 0); /* <= 16 bits */ | |
|
837 | if (MEM_32bits() || | |
|
838 | (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&seqState->DStream); | |
|
839 | ||
|
840 | /* ANS state update */ | |
|
841 | FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ | |
|
842 | FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ | |
|
843 | if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ | |
|
844 | FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ | |
|
845 | ||
|
846 | return seq; | |
|
847 | } | |
|
848 | ||
|
849 | ||
|
850 | 802 | FORCE_NOINLINE |
|
851 | 803 | size_t ZSTD_execSequenceLast7(BYTE* op, |
|
852 | 804 | BYTE* const oend, seq_t sequence, |
|
853 |
const BYTE** litPtr, const BYTE* const litLimit |
|
|
805 | const BYTE** litPtr, const BYTE* const litLimit, | |
|
854 | 806 | const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) |
|
855 | 807 | { |
|
856 | 808 | BYTE* const oLitEnd = op + sequence.litLength; |
@@ -862,7 +814,7 b' size_t ZSTD_execSequenceLast7(BYTE* op,' | |||
|
862 | 814 | |
|
863 | 815 | /* check */ |
|
864 | 816 | if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ |
|
865 |
if (iLitEnd > litLimit |
|
|
817 | if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */ | |
|
866 | 818 | if (oLitEnd <= oend_w) return ERROR(GENERIC); /* Precondition */ |
|
867 | 819 | |
|
868 | 820 | /* copy literals */ |
@@ -894,10 +846,87 b' size_t ZSTD_execSequenceLast7(BYTE* op,' | |||
|
894 | 846 | } |
|
895 | 847 | |
|
896 | 848 | |
|
849 | ||
|
850 | ||
|
851 | static seq_t ZSTD_decodeSequence(seqState_t* seqState) | |
|
852 | { | |
|
853 | seq_t seq; | |
|
854 | ||
|
855 | U32 const llCode = FSE_peekSymbol(&seqState->stateLL); | |
|
856 | U32 const mlCode = FSE_peekSymbol(&seqState->stateML); | |
|
857 | U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */ | |
|
858 | ||
|
859 | U32 const llBits = LL_bits[llCode]; | |
|
860 | U32 const mlBits = ML_bits[mlCode]; | |
|
861 | U32 const ofBits = ofCode; | |
|
862 | U32 const totalBits = llBits+mlBits+ofBits; | |
|
863 | ||
|
864 | static const U32 LL_base[MaxLL+1] = { | |
|
865 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
|
866 | 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, | |
|
867 | 0x2000, 0x4000, 0x8000, 0x10000 }; | |
|
868 | ||
|
869 | static const U32 ML_base[MaxML+1] = { | |
|
870 | 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, | |
|
871 | 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, | |
|
872 | 35, 37, 39, 41, 43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, | |
|
873 | 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; | |
|
874 | ||
|
875 | static const U32 OF_base[MaxOff+1] = { | |
|
876 | 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, | |
|
877 | 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, | |
|
878 | 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, | |
|
879 | 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD }; | |
|
880 | ||
|
881 | /* sequence */ | |
|
882 | { size_t offset; | |
|
883 | if (!ofCode) | |
|
884 | offset = 0; | |
|
885 | else { | |
|
886 | offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ | |
|
887 | if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); | |
|
888 | } | |
|
889 | ||
|
890 | if (ofCode <= 1) { | |
|
891 | offset += (llCode==0); | |
|
892 | if (offset) { | |
|
893 | size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; | |
|
894 | temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ | |
|
895 | if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; | |
|
896 | seqState->prevOffset[1] = seqState->prevOffset[0]; | |
|
897 | seqState->prevOffset[0] = offset = temp; | |
|
898 | } else { | |
|
899 | offset = seqState->prevOffset[0]; | |
|
900 | } | |
|
901 | } else { | |
|
902 | seqState->prevOffset[2] = seqState->prevOffset[1]; | |
|
903 | seqState->prevOffset[1] = seqState->prevOffset[0]; | |
|
904 | seqState->prevOffset[0] = offset; | |
|
905 | } | |
|
906 | seq.offset = offset; | |
|
907 | } | |
|
908 | ||
|
909 | seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ | |
|
910 | if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream); | |
|
911 | ||
|
912 | seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ | |
|
913 | if (MEM_32bits() || | |
|
914 | (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&seqState->DStream); | |
|
915 | ||
|
916 | /* ANS state update */ | |
|
917 | FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ | |
|
918 | FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ | |
|
919 | if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ | |
|
920 | FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ | |
|
921 | ||
|
922 | return seq; | |
|
923 | } | |
|
924 | ||
|
925 | ||
|
897 | 926 | FORCE_INLINE |
|
898 | 927 | size_t ZSTD_execSequence(BYTE* op, |
|
899 | 928 | BYTE* const oend, seq_t sequence, |
|
900 |
const BYTE** litPtr, const BYTE* const litLimit |
|
|
929 | const BYTE** litPtr, const BYTE* const litLimit, | |
|
901 | 930 | const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) |
|
902 | 931 | { |
|
903 | 932 | BYTE* const oLitEnd = op + sequence.litLength; |
@@ -909,8 +938,8 b' size_t ZSTD_execSequence(BYTE* op,' | |||
|
909 | 938 | |
|
910 | 939 | /* check */ |
|
911 | 940 | if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ |
|
912 |
if (iLitEnd > litLimit |
|
|
913 |
if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit |
|
|
941 | if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */ | |
|
942 | if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd); | |
|
914 | 943 | |
|
915 | 944 | /* copy Literals */ |
|
916 | 945 | ZSTD_copy8(op, *litPtr); |
@@ -923,7 +952,7 b' size_t ZSTD_execSequence(BYTE* op,' | |||
|
923 | 952 | if (sequence.offset > (size_t)(oLitEnd - base)) { |
|
924 | 953 | /* offset beyond prefix */ |
|
925 | 954 | if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected); |
|
926 |
match = dictEnd |
|
|
955 | match += (dictEnd-base); | |
|
927 | 956 | if (match + sequence.matchLength <= dictEnd) { |
|
928 | 957 | memmove(oLitEnd, match, sequence.matchLength); |
|
929 | 958 | return sequenceLength; |
@@ -934,13 +963,13 b' size_t ZSTD_execSequence(BYTE* op,' | |||
|
934 | 963 | op = oLitEnd + length1; |
|
935 | 964 | sequence.matchLength -= length1; |
|
936 | 965 | match = base; |
|
937 | if (op > oend_w) { | |
|
966 | if (op > oend_w || sequence.matchLength < MINMATCH) { | |
|
938 | 967 | U32 i; |
|
939 | 968 | for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i]; |
|
940 | 969 | return sequenceLength; |
|
941 | 970 | } |
|
942 | 971 | } } |
|
943 | /* Requirement: op <= oend_w */ | |
|
972 | /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */ | |
|
944 | 973 | |
|
945 | 974 | /* match within prefix */ |
|
946 | 975 | if (sequence.offset < 8) { |
@@ -968,7 +997,7 b' size_t ZSTD_execSequence(BYTE* op,' | |||
|
968 | 997 | } |
|
969 | 998 | while (op < oMatchEnd) *op++ = *match++; |
|
970 | 999 | } else { |
|
971 | ZSTD_wildcopy(op, match, sequence.matchLength-8); /* works even if matchLength < 8 */ | |
|
1000 | ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ | |
|
972 | 1001 | } |
|
973 | 1002 | return sequenceLength; |
|
974 | 1003 | } |
@@ -985,7 +1014,6 b' static size_t ZSTD_decompressSequences(' | |||
|
985 | 1014 | BYTE* const oend = ostart + maxDstSize; |
|
986 | 1015 | BYTE* op = ostart; |
|
987 | 1016 | const BYTE* litPtr = dctx->litPtr; |
|
988 | const BYTE* const litLimit_w = litPtr + dctx->litBufSize - WILDCOPY_OVERLENGTH; | |
|
989 | 1017 | const BYTE* const litEnd = litPtr + dctx->litSize; |
|
990 | 1018 | const BYTE* const base = (const BYTE*) (dctx->base); |
|
991 | 1019 | const BYTE* const vBase = (const BYTE*) (dctx->vBase); |
@@ -1011,7 +1039,7 b' static size_t ZSTD_decompressSequences(' | |||
|
1011 | 1039 | for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { |
|
1012 | 1040 | nbSeq--; |
|
1013 | 1041 | { seq_t const sequence = ZSTD_decodeSequence(&seqState); |
|
1014 |
size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, lit |
|
|
1042 | size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd); | |
|
1015 | 1043 | if (ZSTD_isError(oneSeqSize)) return oneSeqSize; |
|
1016 | 1044 | op += oneSeqSize; |
|
1017 | 1045 | } } |
@@ -1033,14 +1061,247 b' static size_t ZSTD_decompressSequences(' | |||
|
1033 | 1061 | } |
|
1034 | 1062 | |
|
1035 | 1063 | |
|
1036 | static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) | |
|
1064 | static seq_t ZSTD_decodeSequenceLong(seqState_t* seqState) | |
|
1065 | { | |
|
1066 | seq_t seq; | |
|
1067 | ||
|
1068 | U32 const llCode = FSE_peekSymbol(&seqState->stateLL); | |
|
1069 | U32 const mlCode = FSE_peekSymbol(&seqState->stateML); | |
|
1070 | U32 const ofCode = FSE_peekSymbol(&seqState->stateOffb); /* <= maxOff, by table construction */ | |
|
1071 | ||
|
1072 | U32 const llBits = LL_bits[llCode]; | |
|
1073 | U32 const mlBits = ML_bits[mlCode]; | |
|
1074 | U32 const ofBits = ofCode; | |
|
1075 | U32 const totalBits = llBits+mlBits+ofBits; | |
|
1076 | ||
|
1077 | static const U32 LL_base[MaxLL+1] = { | |
|
1078 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, | |
|
1079 | 16, 18, 20, 22, 24, 28, 32, 40, 48, 64, 0x80, 0x100, 0x200, 0x400, 0x800, 0x1000, | |
|
1080 | 0x2000, 0x4000, 0x8000, 0x10000 }; | |
|
1081 | ||
|
1082 | static const U32 ML_base[MaxML+1] = { | |
|
1083 | 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, | |
|
1084 | 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, | |
|
1085 | 35, 37, 39, 41, 43, 47, 51, 59, 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803, | |
|
1086 | 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 }; | |
|
1087 | ||
|
1088 | static const U32 OF_base[MaxOff+1] = { | |
|
1089 | 0, 1, 1, 5, 0xD, 0x1D, 0x3D, 0x7D, | |
|
1090 | 0xFD, 0x1FD, 0x3FD, 0x7FD, 0xFFD, 0x1FFD, 0x3FFD, 0x7FFD, | |
|
1091 | 0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD, | |
|
1092 | 0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD }; | |
|
1093 | ||
|
1094 | /* sequence */ | |
|
1095 | { size_t offset; | |
|
1096 | if (!ofCode) | |
|
1097 | offset = 0; | |
|
1098 | else { | |
|
1099 | offset = OF_base[ofCode] + BIT_readBitsFast(&seqState->DStream, ofBits); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */ | |
|
1100 | if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); | |
|
1101 | } | |
|
1102 | ||
|
1103 | if (ofCode <= 1) { | |
|
1104 | offset += (llCode==0); | |
|
1105 | if (offset) { | |
|
1106 | size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset]; | |
|
1107 | temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */ | |
|
1108 | if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1]; | |
|
1109 | seqState->prevOffset[1] = seqState->prevOffset[0]; | |
|
1110 | seqState->prevOffset[0] = offset = temp; | |
|
1111 | } else { | |
|
1112 | offset = seqState->prevOffset[0]; | |
|
1113 | } | |
|
1114 | } else { | |
|
1115 | seqState->prevOffset[2] = seqState->prevOffset[1]; | |
|
1116 | seqState->prevOffset[1] = seqState->prevOffset[0]; | |
|
1117 | seqState->prevOffset[0] = offset; | |
|
1118 | } | |
|
1119 | seq.offset = offset; | |
|
1120 | } | |
|
1121 | ||
|
1122 | seq.matchLength = ML_base[mlCode] + ((mlCode>31) ? BIT_readBitsFast(&seqState->DStream, mlBits) : 0); /* <= 16 bits */ | |
|
1123 | if (MEM_32bits() && (mlBits+llBits>24)) BIT_reloadDStream(&seqState->DStream); | |
|
1124 | ||
|
1125 | seq.litLength = LL_base[llCode] + ((llCode>15) ? BIT_readBitsFast(&seqState->DStream, llBits) : 0); /* <= 16 bits */ | |
|
1126 | if (MEM_32bits() || | |
|
1127 | (totalBits > 64 - 7 - (LLFSELog+MLFSELog+OffFSELog)) ) BIT_reloadDStream(&seqState->DStream); | |
|
1128 | ||
|
1129 | { size_t const pos = seqState->pos + seq.litLength; | |
|
1130 | seq.match = seqState->base + pos - seq.offset; /* single memory segment */ | |
|
1131 | if (seq.offset > pos) seq.match += seqState->gotoDict; /* separate memory segment */ | |
|
1132 | seqState->pos = pos + seq.matchLength; | |
|
1133 | } | |
|
1134 | ||
|
1135 | /* ANS state update */ | |
|
1136 | FSE_updateState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */ | |
|
1137 | FSE_updateState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */ | |
|
1138 | if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */ | |
|
1139 | FSE_updateState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */ | |
|
1140 | ||
|
1141 | return seq; | |
|
1142 | } | |
|
1143 | ||
|
1144 | FORCE_INLINE | |
|
1145 | size_t ZSTD_execSequenceLong(BYTE* op, | |
|
1146 | BYTE* const oend, seq_t sequence, | |
|
1147 | const BYTE** litPtr, const BYTE* const litLimit, | |
|
1148 | const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd) | |
|
1037 | 1149 | { |
|
1038 | if (dst != dctx->previousDstEnd) { /* not contiguous */ | |
|
1039 | dctx->dictEnd = dctx->previousDstEnd; | |
|
1040 | dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base)); | |
|
1041 | dctx->base = dst; | |
|
1042 | dctx->previousDstEnd = dst; | |
|
1150 | BYTE* const oLitEnd = op + sequence.litLength; | |
|
1151 | size_t const sequenceLength = sequence.litLength + sequence.matchLength; | |
|
1152 | BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */ | |
|
1153 | BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; | |
|
1154 | const BYTE* const iLitEnd = *litPtr + sequence.litLength; | |
|
1155 | const BYTE* match = sequence.match; | |
|
1156 | ||
|
1157 | /* check */ | |
|
1158 | #if 1 | |
|
1159 | if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */ | |
|
1160 | if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */ | |
|
1161 | if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd); | |
|
1162 | #endif | |
|
1163 | ||
|
1164 | /* copy Literals */ | |
|
1165 | ZSTD_copy8(op, *litPtr); | |
|
1166 | if (sequence.litLength > 8) | |
|
1167 | ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ | |
|
1168 | op = oLitEnd; | |
|
1169 | *litPtr = iLitEnd; /* update for next sequence */ | |
|
1170 | ||
|
1171 | /* copy Match */ | |
|
1172 | #if 1 | |
|
1173 | if (sequence.offset > (size_t)(oLitEnd - base)) { | |
|
1174 | /* offset beyond prefix */ | |
|
1175 | if (sequence.offset > (size_t)(oLitEnd - vBase)) return ERROR(corruption_detected); | |
|
1176 | if (match + sequence.matchLength <= dictEnd) { | |
|
1177 | memmove(oLitEnd, match, sequence.matchLength); | |
|
1178 | return sequenceLength; | |
|
1179 | } | |
|
1180 | /* span extDict & currentPrefixSegment */ | |
|
1181 | { size_t const length1 = dictEnd - match; | |
|
1182 | memmove(oLitEnd, match, length1); | |
|
1183 | op = oLitEnd + length1; | |
|
1184 | sequence.matchLength -= length1; | |
|
1185 | match = base; | |
|
1186 | if (op > oend_w || sequence.matchLength < MINMATCH) { | |
|
1187 | U32 i; | |
|
1188 | for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i]; | |
|
1189 | return sequenceLength; | |
|
1190 | } | |
|
1191 | } } | |
|
1192 | /* Requirement: op <= oend_w && sequence.matchLength >= MINMATCH */ | |
|
1193 | #endif | |
|
1194 | ||
|
1195 | /* match within prefix */ | |
|
1196 | if (sequence.offset < 8) { | |
|
1197 | /* close range match, overlap */ | |
|
1198 | static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */ | |
|
1199 | static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* substracted */ | |
|
1200 | int const sub2 = dec64table[sequence.offset]; | |
|
1201 | op[0] = match[0]; | |
|
1202 | op[1] = match[1]; | |
|
1203 | op[2] = match[2]; | |
|
1204 | op[3] = match[3]; | |
|
1205 | match += dec32table[sequence.offset]; | |
|
1206 | ZSTD_copy4(op+4, match); | |
|
1207 | match -= sub2; | |
|
1208 | } else { | |
|
1209 | ZSTD_copy8(op, match); | |
|
1210 | } | |
|
1211 | op += 8; match += 8; | |
|
1212 | ||
|
1213 | if (oMatchEnd > oend-(16-MINMATCH)) { | |
|
1214 | if (op < oend_w) { | |
|
1215 | ZSTD_wildcopy(op, match, oend_w - op); | |
|
1216 | match += oend_w - op; | |
|
1217 | op = oend_w; | |
|
1218 | } | |
|
1219 | while (op < oMatchEnd) *op++ = *match++; | |
|
1220 | } else { | |
|
1221 | ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ | |
|
1043 | 1222 | } |
|
1223 | return sequenceLength; | |
|
1224 | } | |
|
1225 | ||
|
1226 | static size_t ZSTD_decompressSequencesLong( | |
|
1227 | ZSTD_DCtx* dctx, | |
|
1228 | void* dst, size_t maxDstSize, | |
|
1229 | const void* seqStart, size_t seqSize) | |
|
1230 | { | |
|
1231 | const BYTE* ip = (const BYTE*)seqStart; | |
|
1232 | const BYTE* const iend = ip + seqSize; | |
|
1233 | BYTE* const ostart = (BYTE* const)dst; | |
|
1234 | BYTE* const oend = ostart + maxDstSize; | |
|
1235 | BYTE* op = ostart; | |
|
1236 | const BYTE* litPtr = dctx->litPtr; | |
|
1237 | const BYTE* const litEnd = litPtr + dctx->litSize; | |
|
1238 | const BYTE* const base = (const BYTE*) (dctx->base); | |
|
1239 | const BYTE* const vBase = (const BYTE*) (dctx->vBase); | |
|
1240 | const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd); | |
|
1241 | int nbSeq; | |
|
1242 | ||
|
1243 | /* Build Decoding Tables */ | |
|
1244 | { size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, seqSize); | |
|
1245 | if (ZSTD_isError(seqHSize)) return seqHSize; | |
|
1246 | ip += seqHSize; | |
|
1247 | } | |
|
1248 | ||
|
1249 | /* Regen sequences */ | |
|
1250 | if (nbSeq) { | |
|
1251 | #define STORED_SEQS 4 | |
|
1252 | #define STOSEQ_MASK (STORED_SEQS-1) | |
|
1253 | #define ADVANCED_SEQS 4 | |
|
1254 | seq_t sequences[STORED_SEQS]; | |
|
1255 | int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS); | |
|
1256 | seqState_t seqState; | |
|
1257 | int seqNb; | |
|
1258 | dctx->fseEntropy = 1; | |
|
1259 | { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->rep[i]; } | |
|
1260 | seqState.base = base; | |
|
1261 | seqState.pos = (size_t)(op-base); | |
|
1262 | seqState.gotoDict = (iPtrDiff)(dictEnd - base); | |
|
1263 | CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); | |
|
1264 | FSE_initDState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); | |
|
1265 | FSE_initDState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); | |
|
1266 | FSE_initDState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); | |
|
1267 | ||
|
1268 | /* prepare in advance */ | |
|
1269 | for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && seqNb<seqAdvance; seqNb++) { | |
|
1270 | sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState); | |
|
1271 | } | |
|
1272 | if (seqNb<seqAdvance) return ERROR(corruption_detected); | |
|
1273 | ||
|
1274 | /* decode and decompress */ | |
|
1275 | for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && seqNb<nbSeq ; seqNb++) { | |
|
1276 | seq_t const sequence = ZSTD_decodeSequenceLong(&seqState); | |
|
1277 | size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd); | |
|
1278 | if (ZSTD_isError(oneSeqSize)) return oneSeqSize; | |
|
1279 | ZSTD_PREFETCH(sequence.match); | |
|
1280 | sequences[seqNb&STOSEQ_MASK] = sequence; | |
|
1281 | op += oneSeqSize; | |
|
1282 | } | |
|
1283 | if (seqNb<nbSeq) return ERROR(corruption_detected); | |
|
1284 | ||
|
1285 | /* finish queue */ | |
|
1286 | seqNb -= seqAdvance; | |
|
1287 | for ( ; seqNb<nbSeq ; seqNb++) { | |
|
1288 | size_t const oneSeqSize = ZSTD_execSequenceLong(op, oend, sequences[seqNb&STOSEQ_MASK], &litPtr, litEnd, base, vBase, dictEnd); | |
|
1289 | if (ZSTD_isError(oneSeqSize)) return oneSeqSize; | |
|
1290 | op += oneSeqSize; | |
|
1291 | } | |
|
1292 | ||
|
1293 | /* save reps for next block */ | |
|
1294 | { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->rep[i] = (U32)(seqState.prevOffset[i]); } | |
|
1295 | } | |
|
1296 | ||
|
1297 | /* last literal segment */ | |
|
1298 | { size_t const lastLLSize = litEnd - litPtr; | |
|
1299 | if (lastLLSize > (size_t)(oend-op)) return ERROR(dstSize_tooSmall); | |
|
1300 | memcpy(op, litPtr, lastLLSize); | |
|
1301 | op += lastLLSize; | |
|
1302 | } | |
|
1303 | ||
|
1304 | return op-ostart; | |
|
1044 | 1305 | } |
|
1045 | 1306 | |
|
1046 | 1307 | |
@@ -1058,10 +1319,21 b' static size_t ZSTD_decompressBlock_inter' | |||
|
1058 | 1319 | ip += litCSize; |
|
1059 | 1320 | srcSize -= litCSize; |
|
1060 | 1321 | } |
|
1322 | if (dctx->fParams.windowSize > (1<<23)) return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize); | |
|
1061 | 1323 | return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize); |
|
1062 | 1324 | } |
|
1063 | 1325 | |
|
1064 | 1326 | |
|
1327 | static void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst) | |
|
1328 | { | |
|
1329 | if (dst != dctx->previousDstEnd) { /* not contiguous */ | |
|
1330 | dctx->dictEnd = dctx->previousDstEnd; | |
|
1331 | dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base)); | |
|
1332 | dctx->base = dst; | |
|
1333 | dctx->previousDstEnd = dst; | |
|
1334 | } | |
|
1335 | } | |
|
1336 | ||
|
1065 | 1337 | size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, |
|
1066 | 1338 | void* dst, size_t dstCapacity, |
|
1067 | 1339 | const void* src, size_t srcSize) |
@@ -1506,6 +1778,45 b' size_t ZSTD_sizeof_DDict(const ZSTD_DDic' | |||
|
1506 | 1778 | return sizeof(*ddict) + sizeof(ddict->refContext) + ddict->dictSize; |
|
1507 | 1779 | } |
|
1508 | 1780 | |
|
1781 | /*! ZSTD_getDictID_fromDict() : | |
|
1782 | * Provides the dictID stored within dictionary. | |
|
1783 | * if @return == 0, the dictionary is not conformant with Zstandard specification. | |
|
1784 | * It can still be loaded, but as a content-only dictionary. */ | |
|
1785 | unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize) | |
|
1786 | { | |
|
1787 | if (dictSize < 8) return 0; | |
|
1788 | if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return 0; | |
|
1789 | return MEM_readLE32((const char*)dict + 4); | |
|
1790 | } | |
|
1791 | ||
|
1792 | /*! ZSTD_getDictID_fromDDict() : | |
|
1793 | * Provides the dictID of the dictionary loaded into `ddict`. | |
|
1794 | * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. | |
|
1795 | * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ | |
|
1796 | unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) | |
|
1797 | { | |
|
1798 | if (ddict==NULL) return 0; | |
|
1799 | return ZSTD_getDictID_fromDict(ddict->dict, ddict->dictSize); | |
|
1800 | } | |
|
1801 | ||
|
1802 | /*! ZSTD_getDictID_fromFrame() : | |
|
1803 | * Provides the dictID required to decompressed the frame stored within `src`. | |
|
1804 | * If @return == 0, the dictID could not be decoded. | |
|
1805 | * This could for one of the following reasons : | |
|
1806 | * - The frame does not require a dictionary to be decoded (most common case). | |
|
1807 | * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. | |
|
1808 | * Note : this use case also happens when using a non-conformant dictionary. | |
|
1809 | * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). | |
|
1810 | * - This is not a Zstandard frame. | |
|
1811 | * When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */ | |
|
1812 | unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize) | |
|
1813 | { | |
|
1814 | ZSTD_frameParams zfp = { 0 , 0 , 0 , 0 }; | |
|
1815 | size_t const hError = ZSTD_getFrameParams(&zfp, src, srcSize); | |
|
1816 | if (ZSTD_isError(hError)) return 0; | |
|
1817 | return zfp.dictID; | |
|
1818 | } | |
|
1819 | ||
|
1509 | 1820 | |
|
1510 | 1821 | /*! ZSTD_decompress_usingDDict() : |
|
1511 | 1822 | * Decompression using a pre-digested Dictionary |
@@ -1687,7 +1998,8 b' size_t ZSTD_decompressStream(ZSTD_DStrea' | |||
|
1687 | 1998 | switch(zds->stage) |
|
1688 | 1999 | { |
|
1689 | 2000 | case zdss_init : |
|
1690 | return ERROR(init_missing); | |
|
2001 | ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */ | |
|
2002 | /* fall-through */ | |
|
1691 | 2003 | |
|
1692 | 2004 | case zdss_loadHeader : |
|
1693 | 2005 | { size_t const hSize = ZSTD_getFrameParams(&zds->fParams, zds->headerBuffer, zds->lhSize); |
@@ -898,12 +898,14 b' size_t ZDICT_trainFromBuffer_unsafe(' | |||
|
898 | 898 | U32 const nb = MIN(25, dictList[0].pos); |
|
899 | 899 | U32 const dictContentSize = ZDICT_dictSize(dictList); |
|
900 | 900 | U32 u; |
|
901 | DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos, dictContentSize); | |
|
902 | DISPLAYLEVEL(3, "list %u best segments \n", nb); | |
|
903 |
for (u=1; u< |
|
|
904 | U32 pos = dictList[u].pos; | |
|
905 | U32 length = dictList[u].length; | |
|
906 | U32 printedLength = MIN(40, length); | |
|
901 | DISPLAYLEVEL(3, "\n %u segments found, of total size %u \n", dictList[0].pos-1, dictContentSize); | |
|
902 | DISPLAYLEVEL(3, "list %u best segments \n", nb-1); | |
|
903 | for (u=1; u<nb; u++) { | |
|
904 | U32 const pos = dictList[u].pos; | |
|
905 | U32 const length = dictList[u].length; | |
|
906 | U32 const printedLength = MIN(40, length); | |
|
907 | if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize)) | |
|
908 | return ERROR(GENERIC); /* should never happen */ | |
|
907 | 909 | DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |", |
|
908 | 910 | u, length, pos, dictList[u].savings); |
|
909 | 911 | ZDICT_printHex((const char*)samplesBuffer+pos, printedLength); |
@@ -7,24 +7,24 b'' | |||
|
7 | 7 | * of patent rights can be found in the PATENTS file in the same directory. |
|
8 | 8 | */ |
|
9 | 9 | |
|
10 | #ifndef ZSTD_H_235446 | |
|
11 | #define ZSTD_H_235446 | |
|
12 | ||
|
13 | 10 | #if defined (__cplusplus) |
|
14 | 11 | extern "C" { |
|
15 | 12 | #endif |
|
16 | 13 | |
|
14 | #ifndef ZSTD_H_235446 | |
|
15 | #define ZSTD_H_235446 | |
|
16 | ||
|
17 | 17 | /* ====== Dependency ======*/ |
|
18 | 18 | #include <stddef.h> /* size_t */ |
|
19 | 19 | |
|
20 | 20 | |
|
21 | /* ====== Export for Windows ======*/ | |
|
22 | /* | |
|
23 | * ZSTD_DLL_EXPORT : | |
|
24 | * Enable exporting of functions when building a Windows DLL | |
|
25 | */ | |
|
26 | #if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) | |
|
21 | /* ===== ZSTDLIB_API : control library symbols visibility ===== */ | |
|
22 | #if defined(__GNUC__) && (__GNUC__ >= 4) | |
|
23 | # define ZSTDLIB_API __attribute__ ((visibility ("default"))) | |
|
24 | #elif defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1) | |
|
27 | 25 | # define ZSTDLIB_API __declspec(dllexport) |
|
26 | #elif defined(ZSTD_DLL_IMPORT) && (ZSTD_DLL_IMPORT==1) | |
|
27 | # define ZSTDLIB_API __declspec(dllimport) /* It isn't required but allows to generate better code, saving a function pointer load from the IAT and an indirect jump.*/ | |
|
28 | 28 | #else |
|
29 | 29 | # define ZSTDLIB_API |
|
30 | 30 | #endif |
@@ -51,11 +51,9 b' extern "C" {' | |||
|
51 | 51 | *********************************************************************************************************/ |
|
52 | 52 | |
|
53 | 53 | /*------ Version ------*/ |
|
54 | ZSTDLIB_API unsigned ZSTD_versionNumber (void); /**< returns version number of ZSTD */ | |
|
55 | ||
|
56 | 54 | #define ZSTD_VERSION_MAJOR 1 |
|
57 | 55 | #define ZSTD_VERSION_MINOR 1 |
|
58 |
#define ZSTD_VERSION_RELEASE |
|
|
56 | #define ZSTD_VERSION_RELEASE 2 | |
|
59 | 57 | |
|
60 | 58 | #define ZSTD_LIB_VERSION ZSTD_VERSION_MAJOR.ZSTD_VERSION_MINOR.ZSTD_VERSION_RELEASE |
|
61 | 59 | #define ZSTD_QUOTE(str) #str |
@@ -63,6 +61,7 b' ZSTDLIB_API unsigned ZSTD_versionNumber ' | |||
|
63 | 61 | #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) |
|
64 | 62 | |
|
65 | 63 | #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) |
|
64 | ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< library version number; to be used when checking dll version */ | |
|
66 | 65 | |
|
67 | 66 | |
|
68 | 67 | /*************************************** |
@@ -72,7 +71,7 b' ZSTDLIB_API unsigned ZSTD_versionNumber ' | |||
|
72 | 71 | Compresses `src` content as a single zstd compressed frame into already allocated `dst`. |
|
73 | 72 | Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. |
|
74 | 73 | @return : compressed size written into `dst` (<= `dstCapacity), |
|
75 | or an error code if it fails (which can be tested using ZSTD_isError()) */ | |
|
74 | or an error code if it fails (which can be tested using ZSTD_isError()). */ | |
|
76 | 75 | ZSTDLIB_API size_t ZSTD_compress( void* dst, size_t dstCapacity, |
|
77 | 76 | const void* src, size_t srcSize, |
|
78 | 77 | int compressionLevel); |
@@ -82,7 +81,7 b' ZSTDLIB_API size_t ZSTD_compress( void* ' | |||
|
82 | 81 | `dstCapacity` is an upper bound of originalSize. |
|
83 | 82 | If user cannot imply a maximum upper bound, it's better to use streaming mode to decompress data. |
|
84 | 83 | @return : the number of bytes decompressed into `dst` (<= `dstCapacity`), |
|
85 | or an errorCode if it fails (which can be tested using ZSTD_isError()) */ | |
|
84 | or an errorCode if it fails (which can be tested using ZSTD_isError()). */ | |
|
86 | 85 | ZSTDLIB_API size_t ZSTD_decompress( void* dst, size_t dstCapacity, |
|
87 | 86 | const void* src, size_t compressedSize); |
|
88 | 87 | |
@@ -116,16 +115,16 b' ZSTDLIB_API const char* ZSTD_getErrorNam' | |||
|
116 | 115 | * Explicit memory management |
|
117 | 116 | ***************************************/ |
|
118 | 117 | /*= Compression context |
|
119 |
* When compressing many me |
|
|
118 | * When compressing many times, | |
|
120 | 119 | * it is recommended to allocate a context just once, and re-use it for each successive compression operation. |
|
121 |
* This will make |
|
|
120 | * This will make workload friendlier for system's memory. | |
|
122 | 121 | * Use one context per thread for parallel execution in multi-threaded environments. */ |
|
123 | 122 | typedef struct ZSTD_CCtx_s ZSTD_CCtx; |
|
124 | 123 | ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); |
|
125 | 124 | ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); |
|
126 | 125 | |
|
127 | 126 | /*! ZSTD_compressCCtx() : |
|
128 | Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()) */ | |
|
127 | Same as ZSTD_compress(), requires an allocated ZSTD_CCtx (see ZSTD_createCCtx()). */ | |
|
129 | 128 | ZSTDLIB_API size_t ZSTD_compressCCtx(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel); |
|
130 | 129 | |
|
131 | 130 | /*= Decompression context */ |
@@ -134,7 +133,7 b' ZSTDLIB_API ZSTD_DCtx* ZSTD_createDCtx(v' | |||
|
134 | 133 | ZSTDLIB_API size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx); |
|
135 | 134 | |
|
136 | 135 | /*! ZSTD_decompressDCtx() : |
|
137 | * Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()) */ | |
|
136 | * Same as ZSTD_decompress(), requires an allocated ZSTD_DCtx (see ZSTD_createDCtx()). */ | |
|
138 | 137 | ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize); |
|
139 | 138 | |
|
140 | 139 | |
@@ -143,7 +142,8 b' ZSTDLIB_API size_t ZSTD_decompressDCtx(Z' | |||
|
143 | 142 | ***************************/ |
|
144 | 143 | /*! ZSTD_compress_usingDict() : |
|
145 | 144 | * Compression using a predefined Dictionary (see dictBuilder/zdict.h). |
|
146 |
* Note : This function load the dictionary, resulting in significant startup delay. |
|
|
145 | * Note : This function loads the dictionary, resulting in significant startup delay. | |
|
146 | * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ | |
|
147 | 147 | ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, |
|
148 | 148 | void* dst, size_t dstCapacity, |
|
149 | 149 | const void* src, size_t srcSize, |
@@ -153,7 +153,8 b' ZSTDLIB_API size_t ZSTD_compress_usingDi' | |||
|
153 | 153 | /*! ZSTD_decompress_usingDict() : |
|
154 | 154 | * Decompression using a predefined Dictionary (see dictBuilder/zdict.h). |
|
155 | 155 | * Dictionary must be identical to the one used during compression. |
|
156 |
* Note : This function load the dictionary, resulting in significant startup delay |
|
|
156 | * Note : This function loads the dictionary, resulting in significant startup delay. | |
|
157 | * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ | |
|
157 | 158 | ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, |
|
158 | 159 | void* dst, size_t dstCapacity, |
|
159 | 160 | const void* src, size_t srcSize, |
@@ -169,17 +170,17 b' typedef struct ZSTD_CDict_s ZSTD_CDict;' | |||
|
169 | 170 | * When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once. |
|
170 | 171 | * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay. |
|
171 | 172 | * ZSTD_CDict can be created once and used by multiple threads concurrently, as its usage is read-only. |
|
172 | * `dict` can be released after ZSTD_CDict creation */ | |
|
173 | * `dict` can be released after ZSTD_CDict creation. */ | |
|
173 | 174 | ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel); |
|
174 | 175 | |
|
175 | 176 | /*! ZSTD_freeCDict() : |
|
176 | * Function frees memory allocated by ZSTD_createCDict() */ | |
|
177 | * Function frees memory allocated by ZSTD_createCDict(). */ | |
|
177 | 178 | ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); |
|
178 | 179 | |
|
179 | 180 | /*! ZSTD_compress_usingCDict() : |
|
180 | 181 | * Compression using a digested Dictionary. |
|
181 | 182 | * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times. |
|
182 | * Note that compression level is decided during dictionary creation */ | |
|
183 | * Note that compression level is decided during dictionary creation. */ | |
|
183 | 184 | ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, |
|
184 | 185 | void* dst, size_t dstCapacity, |
|
185 | 186 | const void* src, size_t srcSize, |
@@ -190,7 +191,7 b' typedef struct ZSTD_DDict_s ZSTD_DDict;' | |||
|
190 | 191 | |
|
191 | 192 | /*! ZSTD_createDDict() : |
|
192 | 193 | * Create a digested dictionary, ready to start decompression operation without startup delay. |
|
193 | * `dict` can be released after creation */ | |
|
194 | * `dict` can be released after creation. */ | |
|
194 | 195 | ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize); |
|
195 | 196 | |
|
196 | 197 | /*! ZSTD_freeDDict() : |
@@ -198,7 +199,7 b' ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict' | |||
|
198 | 199 | ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); |
|
199 | 200 | |
|
200 | 201 | /*! ZSTD_decompress_usingDDict() : |
|
201 | * Decompression using a digested Dictionary | |
|
202 | * Decompression using a digested Dictionary. | |
|
202 | 203 | * Faster startup than ZSTD_decompress_usingDict(), recommended when same dictionary is used multiple times. */ |
|
203 | 204 | ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, |
|
204 | 205 | void* dst, size_t dstCapacity, |
@@ -236,20 +237,20 b' typedef struct ZSTD_outBuffer_s {' | |||
|
236 | 237 | * |
|
237 | 238 | * Start a new compression by initializing ZSTD_CStream. |
|
238 | 239 | * Use ZSTD_initCStream() to start a new compression operation. |
|
239 |
* Use ZSTD_initCStream_usingDict() for a compression which requires a dictionary |
|
|
240 | * Use ZSTD_initCStream_usingDict() or ZSTD_initCStream_usingCDict() for a compression which requires a dictionary (experimental section) | |
|
240 | 241 | * |
|
241 | 242 | * Use ZSTD_compressStream() repetitively to consume input stream. |
|
242 | 243 | * The function will automatically update both `pos` fields. |
|
243 | 244 | * Note that it may not consume the entire input, in which case `pos < size`, |
|
244 | 245 | * and it's up to the caller to present again remaining data. |
|
245 | 246 | * @return : a size hint, preferred nb of bytes to use as input for next function call |
|
246 | * (it's just a hint, to help latency a little, any other value will work fine) | |
|
247 | * (note : the size hint is guaranteed to be <= ZSTD_CStreamInSize() ) | |
|
248 | 247 | * or an error code, which can be tested using ZSTD_isError(). |
|
248 | * Note 1 : it's just a hint, to help latency a little, any other value will work fine. | |
|
249 | * Note 2 : size hint is guaranteed to be <= ZSTD_CStreamInSize() | |
|
249 | 250 | * |
|
250 | * At any moment, it's possible to flush whatever data remains within buffer, using ZSTD_flushStream(). | |
|
251 | * At any moment, it's possible to flush whatever data remains within internal buffer, using ZSTD_flushStream(). | |
|
251 | 252 | * `output->pos` will be updated. |
|
252 | * Note some content might still be left within internal buffer if `output->size` is too small. | |
|
253 | * Note that some content might still be left within internal buffer if `output->size` is too small. | |
|
253 | 254 | * @return : nb of bytes still present within internal buffer (0 if it's empty) |
|
254 | 255 | * or an error code, which can be tested using ZSTD_isError(). |
|
255 | 256 | * |
@@ -258,15 +259,15 b' typedef struct ZSTD_outBuffer_s {' | |||
|
258 | 259 | * The epilogue is required for decoders to consider a frame completed. |
|
259 | 260 | * Similar to ZSTD_flushStream(), it may not be able to flush the full content if `output->size` is too small. |
|
260 | 261 | * In which case, call again ZSTD_endStream() to complete the flush. |
|
261 | * @return : nb of bytes still present within internal buffer (0 if it's empty) | |
|
262 | * @return : nb of bytes still present within internal buffer (0 if it's empty, hence compression completed) | |
|
262 | 263 | * or an error code, which can be tested using ZSTD_isError(). |
|
263 | 264 | * |
|
264 | 265 | * *******************************************************************/ |
|
265 | 266 | |
|
266 | /*===== Streaming compression functions ======*/ | |
|
267 | 267 | typedef struct ZSTD_CStream_s ZSTD_CStream; |
|
268 | 268 | ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); |
|
269 | 269 | ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); |
|
270 | ||
|
270 | 271 | ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); |
|
271 | 272 | ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); |
|
272 | 273 | ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); |
@@ -295,23 +296,25 b' ZSTDLIB_API size_t ZSTD_CStreamOutSize(v' | |||
|
295 | 296 | * If `output.pos < output.size`, decoder has flushed everything it could. |
|
296 | 297 | * @return : 0 when a frame is completely decoded and fully flushed, |
|
297 | 298 | * an error code, which can be tested using ZSTD_isError(), |
|
298 |
* any other value > 0, which means there is still some |
|
|
299 |
* The return value is a suggested next input size ( |
|
|
299 | * any other value > 0, which means there is still some decoding to do to complete current frame. | |
|
300 | * The return value is a suggested next input size (a hint to improve latency) that will never load more than the current frame. | |
|
300 | 301 | * *******************************************************************************/ |
|
301 | 302 | |
|
302 | /*===== Streaming decompression functions =====*/ | |
|
303 | 303 | typedef struct ZSTD_DStream_s ZSTD_DStream; |
|
304 | 304 | ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); |
|
305 | 305 | ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); |
|
306 | ||
|
306 | 307 | ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); |
|
307 | 308 | ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); |
|
308 | 309 | |
|
309 | 310 | ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ |
|
310 | 311 | ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ |
|
311 | 312 | |
|
313 | #endif /* ZSTD_H_235446 */ | |
|
312 | 314 | |
|
313 | 315 | |
|
314 | #ifdef ZSTD_STATIC_LINKING_ONLY | |
|
316 | #if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) | |
|
317 | #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY | |
|
315 | 318 | |
|
316 | 319 | /**************************************************************************************** |
|
317 | 320 | * START OF ADVANCED AND EXPERIMENTAL FUNCTIONS |
@@ -403,15 +406,15 b' ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict' | |||
|
403 | 406 | * Gives the amount of memory used by a given ZSTD_sizeof_CDict */ |
|
404 | 407 | ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); |
|
405 | 408 | |
|
406 | /*! ZSTD_getParams() : | |
|
407 | * same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of a `ZSTD_compressionParameters`. | |
|
408 | * All fields of `ZSTD_frameParameters` are set to default (0) */ | |
|
409 | ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize); | |
|
409 | /*! ZSTD_getCParams() : | |
|
410 | * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize. | |
|
411 | * `estimatedSrcSize` value is optional, select 0 if not known */ | |
|
412 | ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); | |
|
410 | 413 | |
|
411 |
/*! ZSTD_get |
|
|
412 | * @return ZSTD_compressionParameters structure for a selected compression level and srcSize. | |
|
413 | * `srcSize` value is optional, select 0 if not known */ | |
|
414 |
ZSTDLIB_API ZSTD_ |
|
|
414 | /*! ZSTD_getParams() : | |
|
415 | * same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`. | |
|
416 | * All fields of `ZSTD_frameParameters` are set to default (0) */ | |
|
417 | ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); | |
|
415 | 418 | |
|
416 | 419 | /*! ZSTD_checkCParams() : |
|
417 | 420 | * Ensure param values remain within authorized range */ |
@@ -433,6 +436,13 b' ZSTDLIB_API size_t ZSTD_compress_advance' | |||
|
433 | 436 | |
|
434 | 437 | /*--- Advanced decompression functions ---*/ |
|
435 | 438 | |
|
439 | /*! ZSTD_isFrame() : | |
|
440 | * Tells if the content of `buffer` starts with a valid Frame Identifier. | |
|
441 | * Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0. | |
|
442 | * Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled. | |
|
443 | * Note 3 : Skippable Frame Identifiers are considered valid. */ | |
|
444 | ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size); | |
|
445 | ||
|
436 | 446 | /*! ZSTD_estimateDCtxSize() : |
|
437 | 447 | * Gives the potential amount of memory allocated to create a ZSTD_DCtx */ |
|
438 | 448 | ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void); |
@@ -449,6 +459,30 b' ZSTDLIB_API size_t ZSTD_sizeof_DCtx(cons' | |||
|
449 | 459 | * Gives the amount of memory used by a given ZSTD_DDict */ |
|
450 | 460 | ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); |
|
451 | 461 | |
|
462 | /*! ZSTD_getDictID_fromDict() : | |
|
463 | * Provides the dictID stored within dictionary. | |
|
464 | * if @return == 0, the dictionary is not conformant with Zstandard specification. | |
|
465 | * It can still be loaded, but as a content-only dictionary. */ | |
|
466 | unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); | |
|
467 | ||
|
468 | /*! ZSTD_getDictID_fromDDict() : | |
|
469 | * Provides the dictID of the dictionary loaded into `ddict`. | |
|
470 | * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. | |
|
471 | * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ | |
|
472 | unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); | |
|
473 | ||
|
474 | /*! ZSTD_getDictID_fromFrame() : | |
|
475 | * Provides the dictID required to decompressed the frame stored within `src`. | |
|
476 | * If @return == 0, the dictID could not be decoded. | |
|
477 | * This could for one of the following reasons : | |
|
478 | * - The frame does not require a dictionary to be decoded (most common case). | |
|
479 | * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. | |
|
480 | * Note : this use case also happens when using a non-conformant dictionary. | |
|
481 | * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). | |
|
482 | * - This is not a Zstandard frame. | |
|
483 | * When identifying the exact failure cause, it's possible to used ZSTD_getFrameParams(), which will provide a more precise error code. */ | |
|
484 | unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); | |
|
485 | ||
|
452 | 486 | |
|
453 | 487 | /******************************************************************** |
|
454 | 488 | * Advanced streaming functions |
@@ -456,6 +490,7 b' ZSTDLIB_API size_t ZSTD_sizeof_DDict(con' | |||
|
456 | 490 | |
|
457 | 491 | /*===== Advanced Streaming compression functions =====*/ |
|
458 | 492 | ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem); |
|
493 | ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct */ | |
|
459 | 494 | ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); |
|
460 | 495 | ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, |
|
461 | 496 | ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize is optional and can be zero == unknown */ |
@@ -631,10 +666,8 b' ZSTDLIB_API size_t ZSTD_decompressBlock(' | |||
|
631 | 666 | ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize); /**< insert block into `dctx` history. Useful for uncompressed blocks */ |
|
632 | 667 | |
|
633 | 668 | |
|
634 | #endif /* ZSTD_STATIC_LINKING_ONLY */ | |
|
669 | #endif /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */ | |
|
635 | 670 | |
|
636 | 671 | #if defined (__cplusplus) |
|
637 | 672 | } |
|
638 | 673 | #endif |
|
639 | ||
|
640 | #endif /* ZSTD_H_235446 */ |
|
1 | NO CONTENT: file was removed |
|
1 | NO CONTENT: file was removed |
|
1 | NO CONTENT: file was removed |
General Comments 0
You need to be logged in to leave comments.
Login now