##// END OF EJS Templates
zstandard: vendor python-zstandard 0.10.1...
Gregory Szorc -
r40157:73fef626 default
parent child Browse files
Show More
@@ -0,0 +1,360 b''
1 /**
2 * Copyright (c) 2018-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
8
9 #include "python-zstandard.h"
10
11 extern PyObject* ZstdError;
12
13 PyDoc_STRVAR(ZstdCompressionChunkerIterator__doc__,
14 "Iterator of output chunks from ZstdCompressionChunker.\n"
15 );
16
17 static void ZstdCompressionChunkerIterator_dealloc(ZstdCompressionChunkerIterator* self) {
18 Py_XDECREF(self->chunker);
19
20 PyObject_Del(self);
21 }
22
23 static PyObject* ZstdCompressionChunkerIterator_iter(PyObject* self) {
24 Py_INCREF(self);
25 return self;
26 }
27
28 static PyObject* ZstdCompressionChunkerIterator_iternext(ZstdCompressionChunkerIterator* self) {
29 size_t zresult;
30 PyObject* chunk;
31 ZstdCompressionChunker* chunker = self->chunker;
32 ZSTD_EndDirective zFlushMode;
33
34 if (self->mode != compressionchunker_mode_normal && chunker->input.pos != chunker->input.size) {
35 PyErr_SetString(ZstdError, "input should have been fully consumed before calling flush() or finish()");
36 return NULL;
37 }
38
39 if (chunker->finished) {
40 return NULL;
41 }
42
43 /* If we have data left in the input, consume it. */
44 while (chunker->input.pos < chunker->input.size) {
45 Py_BEGIN_ALLOW_THREADS
46 zresult = ZSTD_compress_generic(chunker->compressor->cctx, &chunker->output,
47 &chunker->input, ZSTD_e_continue);
48 Py_END_ALLOW_THREADS
49
50 /* Input is fully consumed. */
51 if (chunker->input.pos == chunker->input.size) {
52 chunker->input.src = NULL;
53 chunker->input.pos = 0;
54 chunker->input.size = 0;
55 PyBuffer_Release(&chunker->inBuffer);
56 }
57
58 if (ZSTD_isError(zresult)) {
59 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
60 return NULL;
61 }
62
63 /* If it produced a full output chunk, emit it. */
64 if (chunker->output.pos == chunker->output.size) {
65 chunk = PyBytes_FromStringAndSize(chunker->output.dst, chunker->output.pos);
66 if (!chunk) {
67 return NULL;
68 }
69
70 chunker->output.pos = 0;
71
72 return chunk;
73 }
74
75 /* Else continue to compress available input data. */
76 }
77
78 /* We also need this here for the special case of an empty input buffer. */
79 if (chunker->input.pos == chunker->input.size) {
80 chunker->input.src = NULL;
81 chunker->input.pos = 0;
82 chunker->input.size = 0;
83 PyBuffer_Release(&chunker->inBuffer);
84 }
85
86 /* No more input data. A partial chunk may be in chunker->output.
87 * If we're in normal compression mode, we're done. Otherwise if we're in
88 * flush or finish mode, we need to emit what data remains.
89 */
90 if (self->mode == compressionchunker_mode_normal) {
91 /* We don't need to set StopIteration. */
92 return NULL;
93 }
94
95 if (self->mode == compressionchunker_mode_flush) {
96 zFlushMode = ZSTD_e_flush;
97 }
98 else if (self->mode == compressionchunker_mode_finish) {
99 zFlushMode = ZSTD_e_end;
100 }
101 else {
102 PyErr_SetString(ZstdError, "unhandled compression mode; this should never happen");
103 return NULL;
104 }
105
106 Py_BEGIN_ALLOW_THREADS
107 zresult = ZSTD_compress_generic(chunker->compressor->cctx, &chunker->output,
108 &chunker->input, zFlushMode);
109 Py_END_ALLOW_THREADS
110
111 if (ZSTD_isError(zresult)) {
112 PyErr_Format(ZstdError, "zstd compress error: %s",
113 ZSTD_getErrorName(zresult));
114 return NULL;
115 }
116
117 if (!zresult && chunker->output.pos == 0) {
118 return NULL;
119 }
120
121 chunk = PyBytes_FromStringAndSize(chunker->output.dst, chunker->output.pos);
122 if (!chunk) {
123 return NULL;
124 }
125
126 chunker->output.pos = 0;
127
128 if (!zresult && self->mode == compressionchunker_mode_finish) {
129 chunker->finished = 1;
130 }
131
132 return chunk;
133 }
134
135 PyTypeObject ZstdCompressionChunkerIteratorType = {
136 PyVarObject_HEAD_INIT(NULL, 0)
137 "zstd.ZstdCompressionChunkerIterator", /* tp_name */
138 sizeof(ZstdCompressionChunkerIterator), /* tp_basicsize */
139 0, /* tp_itemsize */
140 (destructor)ZstdCompressionChunkerIterator_dealloc, /* tp_dealloc */
141 0, /* tp_print */
142 0, /* tp_getattr */
143 0, /* tp_setattr */
144 0, /* tp_compare */
145 0, /* tp_repr */
146 0, /* tp_as_number */
147 0, /* tp_as_sequence */
148 0, /* tp_as_mapping */
149 0, /* tp_hash */
150 0, /* tp_call */
151 0, /* tp_str */
152 0, /* tp_getattro */
153 0, /* tp_setattro */
154 0, /* tp_as_buffer */
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
156 ZstdCompressionChunkerIterator__doc__, /* tp_doc */
157 0, /* tp_traverse */
158 0, /* tp_clear */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
161 ZstdCompressionChunkerIterator_iter, /* tp_iter */
162 (iternextfunc)ZstdCompressionChunkerIterator_iternext, /* tp_iternext */
163 0, /* tp_methods */
164 0, /* tp_members */
165 0, /* tp_getset */
166 0, /* tp_base */
167 0, /* tp_dict */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
171 0, /* tp_init */
172 0, /* tp_alloc */
173 PyType_GenericNew, /* tp_new */
174 };
175
176 PyDoc_STRVAR(ZstdCompressionChunker__doc__,
177 "Compress chunks iteratively into exact chunk sizes.\n"
178 );
179
180 static void ZstdCompressionChunker_dealloc(ZstdCompressionChunker* self) {
181 PyBuffer_Release(&self->inBuffer);
182 self->input.src = NULL;
183
184 PyMem_Free(self->output.dst);
185 self->output.dst = NULL;
186
187 Py_XDECREF(self->compressor);
188
189 PyObject_Del(self);
190 }
191
192 static ZstdCompressionChunkerIterator* ZstdCompressionChunker_compress(ZstdCompressionChunker* self, PyObject* args, PyObject* kwargs) {
193 static char* kwlist[] = {
194 "data",
195 NULL
196 };
197
198 ZstdCompressionChunkerIterator* result;
199
200 if (self->finished) {
201 PyErr_SetString(ZstdError, "cannot call compress() after compression finished");
202 return NULL;
203 }
204
205 if (self->inBuffer.obj) {
206 PyErr_SetString(ZstdError,
207 "cannot perform operation before consuming output from previous operation");
208 return NULL;
209 }
210
211 #if PY_MAJOR_VERSION >= 3
212 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress",
213 #else
214 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:compress",
215 #endif
216 kwlist, &self->inBuffer)) {
217 return NULL;
218 }
219
220 if (!PyBuffer_IsContiguous(&self->inBuffer, 'C') || self->inBuffer.ndim > 1) {
221 PyErr_SetString(PyExc_ValueError,
222 "data buffer should be contiguous and have at most one dimension");
223 PyBuffer_Release(&self->inBuffer);
224 return NULL;
225 }
226
227 result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
228 if (!result) {
229 PyBuffer_Release(&self->inBuffer);
230 return NULL;
231 }
232
233 self->input.src = self->inBuffer.buf;
234 self->input.size = self->inBuffer.len;
235 self->input.pos = 0;
236
237 result->chunker = self;
238 Py_INCREF(result->chunker);
239
240 result->mode = compressionchunker_mode_normal;
241
242 return result;
243 }
244
245 static ZstdCompressionChunkerIterator* ZstdCompressionChunker_finish(ZstdCompressionChunker* self) {
246 ZstdCompressionChunkerIterator* result;
247
248 if (self->finished) {
249 PyErr_SetString(ZstdError, "cannot call finish() after compression finished");
250 return NULL;
251 }
252
253 if (self->inBuffer.obj) {
254 PyErr_SetString(ZstdError,
255 "cannot call finish() before consuming output from previous operation");
256 return NULL;
257 }
258
259 result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
260 if (!result) {
261 return NULL;
262 }
263
264 result->chunker = self;
265 Py_INCREF(result->chunker);
266
267 result->mode = compressionchunker_mode_finish;
268
269 return result;
270 }
271
272 static ZstdCompressionChunkerIterator* ZstdCompressionChunker_flush(ZstdCompressionChunker* self, PyObject* args, PyObject* kwargs) {
273 ZstdCompressionChunkerIterator* result;
274
275 if (self->finished) {
276 PyErr_SetString(ZstdError, "cannot call flush() after compression finished");
277 return NULL;
278 }
279
280 if (self->inBuffer.obj) {
281 PyErr_SetString(ZstdError,
282 "cannot call flush() before consuming output from previous operation");
283 return NULL;
284 }
285
286 result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
287 if (!result) {
288 return NULL;
289 }
290
291 result->chunker = self;
292 Py_INCREF(result->chunker);
293
294 result->mode = compressionchunker_mode_flush;
295
296 return result;
297 }
298
299 static PyMethodDef ZstdCompressionChunker_methods[] = {
300 { "compress", (PyCFunction)ZstdCompressionChunker_compress, METH_VARARGS | METH_KEYWORDS,
301 PyDoc_STR("compress data") },
302 { "finish", (PyCFunction)ZstdCompressionChunker_finish, METH_NOARGS,
303 PyDoc_STR("finish compression operation") },
304 { "flush", (PyCFunction)ZstdCompressionChunker_flush, METH_VARARGS | METH_KEYWORDS,
305 PyDoc_STR("finish compression operation") },
306 { NULL, NULL }
307 };
308
309 PyTypeObject ZstdCompressionChunkerType = {
310 PyVarObject_HEAD_INIT(NULL, 0)
311 "zstd.ZstdCompressionChunkerType", /* tp_name */
312 sizeof(ZstdCompressionChunker), /* tp_basicsize */
313 0, /* tp_itemsize */
314 (destructor)ZstdCompressionChunker_dealloc, /* tp_dealloc */
315 0, /* tp_print */
316 0, /* tp_getattr */
317 0, /* tp_setattr */
318 0, /* tp_compare */
319 0, /* tp_repr */
320 0, /* tp_as_number */
321 0, /* tp_as_sequence */
322 0, /* tp_as_mapping */
323 0, /* tp_hash */
324 0, /* tp_call */
325 0, /* tp_str */
326 0, /* tp_getattro */
327 0, /* tp_setattro */
328 0, /* tp_as_buffer */
329 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
330 ZstdCompressionChunker__doc__, /* tp_doc */
331 0, /* tp_traverse */
332 0, /* tp_clear */
333 0, /* tp_richcompare */
334 0, /* tp_weaklistoffset */
335 0, /* tp_iter */
336 0, /* tp_iternext */
337 ZstdCompressionChunker_methods, /* tp_methods */
338 0, /* tp_members */
339 0, /* tp_getset */
340 0, /* tp_base */
341 0, /* tp_dict */
342 0, /* tp_descr_get */
343 0, /* tp_descr_set */
344 0, /* tp_dictoffset */
345 0, /* tp_init */
346 0, /* tp_alloc */
347 PyType_GenericNew, /* tp_new */
348 };
349
350 void compressionchunker_module_init(PyObject* module) {
351 Py_TYPE(&ZstdCompressionChunkerIteratorType) = &PyType_Type;
352 if (PyType_Ready(&ZstdCompressionChunkerIteratorType) < 0) {
353 return;
354 }
355
356 Py_TYPE(&ZstdCompressionChunkerType) = &PyType_Type;
357 if (PyType_Ready(&ZstdCompressionChunkerType) < 0) {
358 return;
359 }
360 }
@@ -0,0 +1,44 b''
1 /* ******************************************************************
2 debug
3 Part of FSE library
4 Copyright (C) 2013-present, Yann Collet.
5
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
11
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
17 distribution.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 You can contact the author at :
32 - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 ****************************************************************** */
34
35
36 /*
37 * This module only hosts one global variable
38 * which can be used to dynamically influence the verbosity of traces,
39 * such as DEBUGLOG and RAWLOG
40 */
41
42 #include "debug.h"
43
44 int g_debuglevel = DEBUGLEVEL;
@@ -0,0 +1,123 b''
1 /* ******************************************************************
2 debug
3 Part of FSE library
4 Copyright (C) 2013-present, Yann Collet.
5
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
11
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
17 distribution.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 You can contact the author at :
32 - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 ****************************************************************** */
34
35
36 /*
37 * The purpose of this header is to enable debug functions.
38 * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time,
39 * and DEBUG_STATIC_ASSERT() for compile-time.
40 *
41 * By default, DEBUGLEVEL==0, which means run-time debug is disabled.
42 *
43 * Level 1 enables assert() only.
44 * Starting level 2, traces can be generated and pushed to stderr.
45 * The higher the level, the more verbose the traces.
46 *
47 * It's possible to dynamically adjust level using variable g_debug_level,
48 * which is only declared if DEBUGLEVEL>=2,
49 * and is a global variable, not multi-thread protected (use with care)
50 */
51
52 #ifndef DEBUG_H_12987983217
53 #define DEBUG_H_12987983217
54
55 #if defined (__cplusplus)
56 extern "C" {
57 #endif
58
59
60 /* static assert is triggered at compile time, leaving no runtime artefact,
61 * but can only work with compile-time constants.
62 * This variant can only be used inside a function. */
63 #define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1])
64
65
66 /* DEBUGLEVEL is expected to be defined externally,
67 * typically through compiler command line.
68 * Value must be a number. */
69 #ifndef DEBUGLEVEL
70 # define DEBUGLEVEL 0
71 #endif
72
73 /* recommended values for DEBUGLEVEL :
74 * 0 : no debug, all run-time functions disabled
75 * 1 : no display, enables assert() only
76 * 2 : reserved, for currently active debug path
77 * 3 : events once per object lifetime (CCtx, CDict, etc.)
78 * 4 : events once per frame
79 * 5 : events once per block
80 * 6 : events once per sequence (verbose)
81 * 7+: events at every position (*very* verbose)
82 *
83 * It's generally inconvenient to output traces > 5.
84 * In which case, it's possible to selectively enable higher verbosity levels
85 * by modifying g_debug_level.
86 */
87
88 #if (DEBUGLEVEL>=1)
89 # include <assert.h>
90 #else
91 # ifndef assert /* assert may be already defined, due to prior #include <assert.h> */
92 # define assert(condition) ((void)0) /* disable assert (default) */
93 # endif
94 #endif
95
96 #if (DEBUGLEVEL>=2)
97 # include <stdio.h>
98 extern int g_debuglevel; /* here, this variable is only declared,
99 it actually lives in debug.c,
100 and is shared by the whole process.
101 It's typically used to enable very verbose levels
102 on selective conditions (such as position in src) */
103
104 # define RAWLOG(l, ...) { \
105 if (l<=g_debuglevel) { \
106 fprintf(stderr, __VA_ARGS__); \
107 } }
108 # define DEBUGLOG(l, ...) { \
109 if (l<=g_debuglevel) { \
110 fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
111 fprintf(stderr, " \n"); \
112 } }
113 #else
114 # define RAWLOG(l, ...) {} /* disabled */
115 # define DEBUGLOG(l, ...) {} /* disabled */
116 #endif
117
118
119 #if defined (__cplusplus)
120 }
121 #endif
122
123 #endif /* DEBUG_H_12987983217 */
@@ -0,0 +1,195 b''
1 /* ******************************************************************
2 hist : Histogram functions
3 part of Finite State Entropy project
4 Copyright (C) 2013-present, Yann Collet.
5
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
11
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
17 distribution.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 You can contact the author at :
32 - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 - Public forum : https://groups.google.com/forum/#!forum/lz4c
34 ****************************************************************** */
35
36 /* --- dependencies --- */
37 #include "mem.h" /* U32, BYTE, etc. */
38 #include "debug.h" /* assert, DEBUGLOG */
39 #include "error_private.h" /* ERROR */
40 #include "hist.h"
41
42
43 /* --- Error management --- */
44 unsigned HIST_isError(size_t code) { return ERR_isError(code); }
45
46 /*-**************************************************************
47 * Histogram functions
48 ****************************************************************/
49 unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
50 const void* src, size_t srcSize)
51 {
52 const BYTE* ip = (const BYTE*)src;
53 const BYTE* const end = ip + srcSize;
54 unsigned maxSymbolValue = *maxSymbolValuePtr;
55 unsigned largestCount=0;
56
57 memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
58 if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
59
60 while (ip<end) {
61 assert(*ip <= maxSymbolValue);
62 count[*ip++]++;
63 }
64
65 while (!count[maxSymbolValue]) maxSymbolValue--;
66 *maxSymbolValuePtr = maxSymbolValue;
67
68 { U32 s;
69 for (s=0; s<=maxSymbolValue; s++)
70 if (count[s] > largestCount) largestCount = count[s];
71 }
72
73 return largestCount;
74 }
75
76
77 /* HIST_count_parallel_wksp() :
78 * store histogram into 4 intermediate tables, recombined at the end.
79 * this design makes better use of OoO cpus,
80 * and is noticeably faster when some values are heavily repeated.
81 * But it needs some additional workspace for intermediate tables.
82 * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32.
83 * @return : largest histogram frequency,
84 * or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
85 static size_t HIST_count_parallel_wksp(
86 unsigned* count, unsigned* maxSymbolValuePtr,
87 const void* source, size_t sourceSize,
88 unsigned checkMax,
89 unsigned* const workSpace)
90 {
91 const BYTE* ip = (const BYTE*)source;
92 const BYTE* const iend = ip+sourceSize;
93 unsigned maxSymbolValue = *maxSymbolValuePtr;
94 unsigned max=0;
95 U32* const Counting1 = workSpace;
96 U32* const Counting2 = Counting1 + 256;
97 U32* const Counting3 = Counting2 + 256;
98 U32* const Counting4 = Counting3 + 256;
99
100 memset(workSpace, 0, 4*256*sizeof(unsigned));
101
102 /* safety checks */
103 if (!sourceSize) {
104 memset(count, 0, maxSymbolValue + 1);
105 *maxSymbolValuePtr = 0;
106 return 0;
107 }
108 if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
109
110 /* by stripes of 16 bytes */
111 { U32 cached = MEM_read32(ip); ip += 4;
112 while (ip < iend-15) {
113 U32 c = cached; cached = MEM_read32(ip); ip += 4;
114 Counting1[(BYTE) c ]++;
115 Counting2[(BYTE)(c>>8) ]++;
116 Counting3[(BYTE)(c>>16)]++;
117 Counting4[ c>>24 ]++;
118 c = cached; cached = MEM_read32(ip); ip += 4;
119 Counting1[(BYTE) c ]++;
120 Counting2[(BYTE)(c>>8) ]++;
121 Counting3[(BYTE)(c>>16)]++;
122 Counting4[ c>>24 ]++;
123 c = cached; cached = MEM_read32(ip); ip += 4;
124 Counting1[(BYTE) c ]++;
125 Counting2[(BYTE)(c>>8) ]++;
126 Counting3[(BYTE)(c>>16)]++;
127 Counting4[ c>>24 ]++;
128 c = cached; cached = MEM_read32(ip); ip += 4;
129 Counting1[(BYTE) c ]++;
130 Counting2[(BYTE)(c>>8) ]++;
131 Counting3[(BYTE)(c>>16)]++;
132 Counting4[ c>>24 ]++;
133 }
134 ip-=4;
135 }
136
137 /* finish last symbols */
138 while (ip<iend) Counting1[*ip++]++;
139
140 if (checkMax) { /* verify stats will fit into destination table */
141 U32 s; for (s=255; s>maxSymbolValue; s--) {
142 Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
143 if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
144 } }
145
146 { U32 s;
147 if (maxSymbolValue > 255) maxSymbolValue = 255;
148 for (s=0; s<=maxSymbolValue; s++) {
149 count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
150 if (count[s] > max) max = count[s];
151 } }
152
153 while (!count[maxSymbolValue]) maxSymbolValue--;
154 *maxSymbolValuePtr = maxSymbolValue;
155 return (size_t)max;
156 }
157
158 /* HIST_countFast_wksp() :
159 * Same as HIST_countFast(), but using an externally provided scratch buffer.
160 * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
161 size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
162 const void* source, size_t sourceSize,
163 unsigned* workSpace)
164 {
165 if (sourceSize < 1500) /* heuristic threshold */
166 return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
167 return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
168 }
169
170 /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
171 size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
172 const void* source, size_t sourceSize)
173 {
174 unsigned tmpCounters[HIST_WKSP_SIZE_U32];
175 return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
176 }
177
178 /* HIST_count_wksp() :
179 * Same as HIST_count(), but using an externally provided scratch buffer.
180 * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
181 size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
182 const void* source, size_t sourceSize, unsigned* workSpace)
183 {
184 if (*maxSymbolValuePtr < 255)
185 return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
186 *maxSymbolValuePtr = 255;
187 return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
188 }
189
190 size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
191 const void* src, size_t srcSize)
192 {
193 unsigned tmpCounters[HIST_WKSP_SIZE_U32];
194 return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
195 }
@@ -0,0 +1,92 b''
1 /* ******************************************************************
2 hist : Histogram functions
3 part of Finite State Entropy project
4 Copyright (C) 2013-present, Yann Collet.
5
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
11
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
17 distribution.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 You can contact the author at :
32 - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 - Public forum : https://groups.google.com/forum/#!forum/lz4c
34 ****************************************************************** */
35
36 /* --- dependencies --- */
37 #include <stddef.h> /* size_t */
38
39
40 /* --- simple histogram functions --- */
41
42 /*! HIST_count():
43 * Provides the precise count of each byte within a table 'count'.
44 * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
45 * Updates *maxSymbolValuePtr with actual largest symbol value detected.
46 * @return : count of the most frequent symbol (which isn't identified).
47 * or an error code, which can be tested using HIST_isError().
48 * note : if return == srcSize, there is only one symbol.
49 */
50 size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
51 const void* src, size_t srcSize);
52
53 unsigned HIST_isError(size_t code); /**< tells if a return value is an error code */
54
55
56 /* --- advanced histogram functions --- */
57
58 #define HIST_WKSP_SIZE_U32 1024
59 /** HIST_count_wksp() :
60 * Same as HIST_count(), but using an externally provided scratch buffer.
61 * Benefit is this function will use very little stack space.
62 * `workSpace` must be a table of unsigned of size >= HIST_WKSP_SIZE_U32
63 */
64 size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
65 const void* src, size_t srcSize,
66 unsigned* workSpace);
67
68 /** HIST_countFast() :
69 * same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr.
70 * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr`
71 */
72 size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
73 const void* src, size_t srcSize);
74
75 /** HIST_countFast_wksp() :
76 * Same as HIST_countFast(), but using an externally provided scratch buffer.
77 * `workSpace` must be a table of unsigned of size >= HIST_WKSP_SIZE_U32
78 */
79 size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
80 const void* src, size_t srcSize,
81 unsigned* workSpace);
82
83 /*! HIST_count_simple() :
84 * Same as HIST_countFast(), this function is unsafe,
85 * and will segfault if any value within `src` is `> *maxSymbolValuePtr`.
86 * It is also a bit slower for large inputs.
87 * However, it does not need any additional memory (not even on stack).
88 * @return : count of the most frequent symbol.
89 * Note this function doesn't produce any error (i.e. it must succeed).
90 */
91 unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
92 const void* src, size_t srcSize);
@@ -0,0 +1,83 b''
1 #include <stdio.h> /* fprintf */
2 #include <stdlib.h> /* malloc, free, qsort */
3 #include <string.h> /* memset */
4 #include <time.h> /* clock */
5 #include "mem.h" /* read */
6 #include "pool.h"
7 #include "threading.h"
8 #include "zstd_internal.h" /* includes zstd.h */
9 #ifndef ZDICT_STATIC_LINKING_ONLY
10 #define ZDICT_STATIC_LINKING_ONLY
11 #endif
12 #include "zdict.h"
13
14 /**
15 * COVER_best_t is used for two purposes:
16 * 1. Synchronizing threads.
17 * 2. Saving the best parameters and dictionary.
18 *
19 * All of the methods except COVER_best_init() are thread safe if zstd is
20 * compiled with multithreaded support.
21 */
22 typedef struct COVER_best_s {
23 ZSTD_pthread_mutex_t mutex;
24 ZSTD_pthread_cond_t cond;
25 size_t liveJobs;
26 void *dict;
27 size_t dictSize;
28 ZDICT_cover_params_t parameters;
29 size_t compressedSize;
30 } COVER_best_t;
31
32 /**
33 * A segment is a range in the source as well as the score of the segment.
34 */
35 typedef struct {
36 U32 begin;
37 U32 end;
38 U32 score;
39 } COVER_segment_t;
40
41 /**
42 * Checks total compressed size of a dictionary
43 */
44 size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
45 const size_t *samplesSizes, const BYTE *samples,
46 size_t *offsets,
47 size_t nbTrainSamples, size_t nbSamples,
48 BYTE *const dict, size_t dictBufferCapacity);
49
50 /**
51 * Returns the sum of the sample sizes.
52 */
53 size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) ;
54
55 /**
56 * Initialize the `COVER_best_t`.
57 */
58 void COVER_best_init(COVER_best_t *best);
59
60 /**
61 * Wait until liveJobs == 0.
62 */
63 void COVER_best_wait(COVER_best_t *best);
64
65 /**
66 * Call COVER_best_wait() and then destroy the COVER_best_t.
67 */
68 void COVER_best_destroy(COVER_best_t *best);
69
70 /**
71 * Called when a thread is about to be launched.
72 * Increments liveJobs.
73 */
74 void COVER_best_start(COVER_best_t *best);
75
76 /**
77 * Called when a thread finishes executing, both on error or success.
78 * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
79 * If this dictionary is the best so far save it and its parameters.
80 */
81 void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
82 ZDICT_cover_params_t parameters, void *dict,
83 size_t dictSize);
This diff has been collapsed as it changes many lines, (728 lines changed) Show them Hide them
@@ -0,0 +1,728 b''
1 /*-*************************************
2 * Dependencies
3 ***************************************/
4 #include <stdio.h> /* fprintf */
5 #include <stdlib.h> /* malloc, free, qsort */
6 #include <string.h> /* memset */
7 #include <time.h> /* clock */
8
9 #include "mem.h" /* read */
10 #include "pool.h"
11 #include "threading.h"
12 #include "cover.h"
13 #include "zstd_internal.h" /* includes zstd.h */
14 #ifndef ZDICT_STATIC_LINKING_ONLY
15 #define ZDICT_STATIC_LINKING_ONLY
16 #endif
17 #include "zdict.h"
18
19
20 /*-*************************************
21 * Constants
22 ***************************************/
23 #define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
24 #define FASTCOVER_MAX_F 31
25 #define FASTCOVER_MAX_ACCEL 10
26 #define DEFAULT_SPLITPOINT 0.75
27 #define DEFAULT_F 20
28 #define DEFAULT_ACCEL 1
29
30
31 /*-*************************************
32 * Console display
33 ***************************************/
34 static int g_displayLevel = 2;
35 #define DISPLAY(...) \
36 { \
37 fprintf(stderr, __VA_ARGS__); \
38 fflush(stderr); \
39 }
40 #define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
41 if (displayLevel >= l) { \
42 DISPLAY(__VA_ARGS__); \
43 } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
44 #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
45
46 #define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
47 if (displayLevel >= l) { \
48 if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
49 g_time = clock(); \
50 DISPLAY(__VA_ARGS__); \
51 } \
52 }
53 #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
54 static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
55 static clock_t g_time = 0;
56
57
58 /*-*************************************
59 * Hash Functions
60 ***************************************/
61 static const U64 prime6bytes = 227718039650203ULL;
62 static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
63 static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
64
65 static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
66 static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
67 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
68
69
70 /**
71 * Hash the d-byte value pointed to by p and mod 2^f
72 */
73 static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 h, unsigned d) {
74 if (d == 6) {
75 return ZSTD_hash6Ptr(p, h) & ((1 << h) - 1);
76 }
77 return ZSTD_hash8Ptr(p, h) & ((1 << h) - 1);
78 }
79
80
81 /*-*************************************
82 * Acceleration
83 ***************************************/
84 typedef struct {
85 unsigned finalize; /* Percentage of training samples used for ZDICT_finalizeDictionary */
86 unsigned skip; /* Number of dmer skipped between each dmer counted in computeFrequency */
87 } FASTCOVER_accel_t;
88
89
90 static const FASTCOVER_accel_t FASTCOVER_defaultAccelParameters[FASTCOVER_MAX_ACCEL+1] = {
91 { 100, 0 }, /* accel = 0, should not happen because accel = 0 defaults to accel = 1 */
92 { 100, 0 }, /* accel = 1 */
93 { 50, 1 }, /* accel = 2 */
94 { 34, 2 }, /* accel = 3 */
95 { 25, 3 }, /* accel = 4 */
96 { 20, 4 }, /* accel = 5 */
97 { 17, 5 }, /* accel = 6 */
98 { 14, 6 }, /* accel = 7 */
99 { 13, 7 }, /* accel = 8 */
100 { 11, 8 }, /* accel = 9 */
101 { 10, 9 }, /* accel = 10 */
102 };
103
104
105 /*-*************************************
106 * Context
107 ***************************************/
108 typedef struct {
109 const BYTE *samples;
110 size_t *offsets;
111 const size_t *samplesSizes;
112 size_t nbSamples;
113 size_t nbTrainSamples;
114 size_t nbTestSamples;
115 size_t nbDmers;
116 U32 *freqs;
117 unsigned d;
118 unsigned f;
119 FASTCOVER_accel_t accelParams;
120 } FASTCOVER_ctx_t;
121
122
123 /*-*************************************
124 * Helper functions
125 ***************************************/
126 /**
127 * Selects the best segment in an epoch.
128 * Segments of are scored according to the function:
129 *
130 * Let F(d) be the frequency of all dmers with hash value d.
131 * Let S_i be hash value of the dmer at position i of segment S which has length k.
132 *
133 * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
134 *
135 * Once the dmer with hash value d is in the dictionay we set F(d) = 0.
136 */
137 static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
138 U32 *freqs, U32 begin, U32 end,
139 ZDICT_cover_params_t parameters,
140 U16* segmentFreqs) {
141 /* Constants */
142 const U32 k = parameters.k;
143 const U32 d = parameters.d;
144 const U32 f = ctx->f;
145 const U32 dmersInK = k - d + 1;
146
147 /* Try each segment (activeSegment) and save the best (bestSegment) */
148 COVER_segment_t bestSegment = {0, 0, 0};
149 COVER_segment_t activeSegment;
150
151 /* Reset the activeDmers in the segment */
152 /* The activeSegment starts at the beginning of the epoch. */
153 activeSegment.begin = begin;
154 activeSegment.end = begin;
155 activeSegment.score = 0;
156
157 /* Slide the activeSegment through the whole epoch.
158 * Save the best segment in bestSegment.
159 */
160 while (activeSegment.end < end) {
161 /* Get hash value of current dmer */
162 const size_t index = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d);
163
164 /* Add frequency of this index to score if this is the first occurence of index in active segment */
165 if (segmentFreqs[index] == 0) {
166 activeSegment.score += freqs[index];
167 }
168 /* Increment end of segment and segmentFreqs*/
169 activeSegment.end += 1;
170 segmentFreqs[index] += 1;
171 /* If the window is now too large, drop the first position */
172 if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
173 /* Get hash value of the dmer to be eliminated from active segment */
174 const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d);
175 segmentFreqs[delIndex] -= 1;
176 /* Subtract frequency of this index from score if this is the last occurrence of this index in active segment */
177 if (segmentFreqs[delIndex] == 0) {
178 activeSegment.score -= freqs[delIndex];
179 }
180 /* Increment start of segment */
181 activeSegment.begin += 1;
182 }
183
184 /* If this segment is the best so far save it */
185 if (activeSegment.score > bestSegment.score) {
186 bestSegment = activeSegment;
187 }
188 }
189
190 /* Zero out rest of segmentFreqs array */
191 while (activeSegment.begin < end) {
192 const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d);
193 segmentFreqs[delIndex] -= 1;
194 activeSegment.begin += 1;
195 }
196
197 {
198 /* Zero the frequency of hash value of each dmer covered by the chosen segment. */
199 U32 pos;
200 for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
201 const size_t i = FASTCOVER_hashPtrToIndex(ctx->samples + pos, f, d);
202 freqs[i] = 0;
203 }
204 }
205
206 return bestSegment;
207 }
208
209
210 static int FASTCOVER_checkParameters(ZDICT_cover_params_t parameters,
211 size_t maxDictSize, unsigned f,
212 unsigned accel) {
213 /* k, d, and f are required parameters */
214 if (parameters.d == 0 || parameters.k == 0) {
215 return 0;
216 }
217 /* d has to be 6 or 8 */
218 if (parameters.d != 6 && parameters.d != 8) {
219 return 0;
220 }
221 /* k <= maxDictSize */
222 if (parameters.k > maxDictSize) {
223 return 0;
224 }
225 /* d <= k */
226 if (parameters.d > parameters.k) {
227 return 0;
228 }
229 /* 0 < f <= FASTCOVER_MAX_F*/
230 if (f > FASTCOVER_MAX_F || f == 0) {
231 return 0;
232 }
233 /* 0 < splitPoint <= 1 */
234 if (parameters.splitPoint <= 0 || parameters.splitPoint > 1) {
235 return 0;
236 }
237 /* 0 < accel <= 10 */
238 if (accel > 10 || accel == 0) {
239 return 0;
240 }
241 return 1;
242 }
243
244
245 /**
246 * Clean up a context initialized with `FASTCOVER_ctx_init()`.
247 */
248 static void
249 FASTCOVER_ctx_destroy(FASTCOVER_ctx_t* ctx)
250 {
251 if (!ctx) return;
252
253 free(ctx->freqs);
254 ctx->freqs = NULL;
255
256 free(ctx->offsets);
257 ctx->offsets = NULL;
258 }
259
260
261 /**
262 * Calculate for frequency of hash value of each dmer in ctx->samples
263 */
264 static void
265 FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
266 {
267 const unsigned f = ctx->f;
268 const unsigned d = ctx->d;
269 const unsigned skip = ctx->accelParams.skip;
270 const unsigned readLength = MAX(d, 8);
271 size_t i;
272 assert(ctx->nbTrainSamples >= 5);
273 assert(ctx->nbTrainSamples <= ctx->nbSamples);
274 for (i = 0; i < ctx->nbTrainSamples; i++) {
275 size_t start = ctx->offsets[i]; /* start of current dmer */
276 size_t const currSampleEnd = ctx->offsets[i+1];
277 while (start + readLength <= currSampleEnd) {
278 const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, d);
279 freqs[dmerIndex]++;
280 start = start + skip + 1;
281 }
282 }
283 }
284
285
286 /**
287 * Prepare a context for dictionary building.
288 * The context is only dependent on the parameter `d` and can used multiple
289 * times.
290 * Returns 1 on success or zero on error.
291 * The context must be destroyed with `FASTCOVER_ctx_destroy()`.
292 */
293 static int
294 FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
295 const void* samplesBuffer,
296 const size_t* samplesSizes, unsigned nbSamples,
297 unsigned d, double splitPoint, unsigned f,
298 FASTCOVER_accel_t accelParams)
299 {
300 const BYTE* const samples = (const BYTE*)samplesBuffer;
301 const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
302 /* Split samples into testing and training sets */
303 const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples;
304 const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
305 const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
306 const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
307
308 /* Checks */
309 if (totalSamplesSize < MAX(d, sizeof(U64)) ||
310 totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
311 DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
312 (U32)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
313 return 0;
314 }
315
316 /* Check if there are at least 5 training samples */
317 if (nbTrainSamples < 5) {
318 DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples);
319 return 0;
320 }
321
322 /* Check if there's testing sample */
323 if (nbTestSamples < 1) {
324 DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples);
325 return 0;
326 }
327
328 /* Zero the context */
329 memset(ctx, 0, sizeof(*ctx));
330 DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
331 (U32)trainingSamplesSize);
332 DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
333 (U32)testSamplesSize);
334
335 ctx->samples = samples;
336 ctx->samplesSizes = samplesSizes;
337 ctx->nbSamples = nbSamples;
338 ctx->nbTrainSamples = nbTrainSamples;
339 ctx->nbTestSamples = nbTestSamples;
340 ctx->nbDmers = trainingSamplesSize - MAX(d, sizeof(U64)) + 1;
341 ctx->d = d;
342 ctx->f = f;
343 ctx->accelParams = accelParams;
344
345 /* The offsets of each file */
346 ctx->offsets = (size_t*)calloc((nbSamples + 1), sizeof(size_t));
347 if (ctx->offsets == NULL) {
348 DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n");
349 FASTCOVER_ctx_destroy(ctx);
350 return 0;
351 }
352
353 /* Fill offsets from the samplesSizes */
354 { U32 i;
355 ctx->offsets[0] = 0;
356 assert(nbSamples >= 5);
357 for (i = 1; i <= nbSamples; ++i) {
358 ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
359 }
360 }
361
362 /* Initialize frequency array of size 2^f */
363 ctx->freqs = (U32*)calloc(((U64)1 << f), sizeof(U32));
364 if (ctx->freqs == NULL) {
365 DISPLAYLEVEL(1, "Failed to allocate frequency table \n");
366 FASTCOVER_ctx_destroy(ctx);
367 return 0;
368 }
369
370 DISPLAYLEVEL(2, "Computing frequencies\n");
371 FASTCOVER_computeFrequency(ctx->freqs, ctx);
372
373 return 1;
374 }
375
376
377 /**
378 * Given the prepared context build the dictionary.
379 */
380 static size_t
381 FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,
382 U32* freqs,
383 void* dictBuffer, size_t dictBufferCapacity,
384 ZDICT_cover_params_t parameters,
385 U16* segmentFreqs)
386 {
387 BYTE *const dict = (BYTE *)dictBuffer;
388 size_t tail = dictBufferCapacity;
389 /* Divide the data up into epochs of equal size.
390 * We will select at least one segment from each epoch.
391 */
392 const U32 epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k));
393 const U32 epochSize = (U32)(ctx->nbDmers / epochs);
394 size_t epoch;
395 DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
396 epochSize);
397 /* Loop through the epochs until there are no more segments or the dictionary
398 * is full.
399 */
400 for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
401 const U32 epochBegin = (U32)(epoch * epochSize);
402 const U32 epochEnd = epochBegin + epochSize;
403 size_t segmentSize;
404 /* Select a segment */
405 COVER_segment_t segment = FASTCOVER_selectSegment(
406 ctx, freqs, epochBegin, epochEnd, parameters, segmentFreqs);
407
408 /* If the segment covers no dmers, then we are out of content */
409 if (segment.score == 0) {
410 break;
411 }
412
413 /* Trim the segment if necessary and if it is too small then we are done */
414 segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
415 if (segmentSize < parameters.d) {
416 break;
417 }
418
419 /* We fill the dictionary from the back to allow the best segments to be
420 * referenced with the smallest offsets.
421 */
422 tail -= segmentSize;
423 memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
424 DISPLAYUPDATE(
425 2, "\r%u%% ",
426 (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
427 }
428 DISPLAYLEVEL(2, "\r%79s\r", "");
429 return tail;
430 }
431
432
433 /**
434 * Parameters for FASTCOVER_tryParameters().
435 */
436 typedef struct FASTCOVER_tryParameters_data_s {
437 const FASTCOVER_ctx_t* ctx;
438 COVER_best_t* best;
439 size_t dictBufferCapacity;
440 ZDICT_cover_params_t parameters;
441 } FASTCOVER_tryParameters_data_t;
442
443
444 /**
445 * Tries a set of parameters and updates the COVER_best_t with the results.
446 * This function is thread safe if zstd is compiled with multithreaded support.
447 * It takes its parameters as an *OWNING* opaque pointer to support threading.
448 */
449 static void FASTCOVER_tryParameters(void *opaque)
450 {
451 /* Save parameters as local variables */
452 FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t *)opaque;
453 const FASTCOVER_ctx_t *const ctx = data->ctx;
454 const ZDICT_cover_params_t parameters = data->parameters;
455 size_t dictBufferCapacity = data->dictBufferCapacity;
456 size_t totalCompressedSize = ERROR(GENERIC);
457 /* Initialize array to keep track of frequency of dmer within activeSegment */
458 U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16));
459 /* Allocate space for hash table, dict, and freqs */
460 BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
461 U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
462 if (!segmentFreqs || !dict || !freqs) {
463 DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
464 goto _cleanup;
465 }
466 /* Copy the frequencies because we need to modify them */
467 memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32));
468 /* Build the dictionary */
469 { const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity,
470 parameters, segmentFreqs);
471 const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
472 dictBufferCapacity = ZDICT_finalizeDictionary(
473 dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
474 ctx->samples, ctx->samplesSizes, nbFinalizeSamples, parameters.zParams);
475 if (ZDICT_isError(dictBufferCapacity)) {
476 DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
477 goto _cleanup;
478 }
479 }
480 /* Check total compressed size */
481 totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
482 ctx->samples, ctx->offsets,
483 ctx->nbTrainSamples, ctx->nbSamples,
484 dict, dictBufferCapacity);
485 _cleanup:
486 COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
487 dictBufferCapacity);
488 free(data);
489 free(segmentFreqs);
490 free(dict);
491 free(freqs);
492 }
493
494
495 static void
496 FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams,
497 ZDICT_cover_params_t* coverParams)
498 {
499 coverParams->k = fastCoverParams.k;
500 coverParams->d = fastCoverParams.d;
501 coverParams->steps = fastCoverParams.steps;
502 coverParams->nbThreads = fastCoverParams.nbThreads;
503 coverParams->splitPoint = fastCoverParams.splitPoint;
504 coverParams->zParams = fastCoverParams.zParams;
505 }
506
507
508 static void
509 FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams,
510 ZDICT_fastCover_params_t* fastCoverParams,
511 unsigned f, unsigned accel)
512 {
513 fastCoverParams->k = coverParams.k;
514 fastCoverParams->d = coverParams.d;
515 fastCoverParams->steps = coverParams.steps;
516 fastCoverParams->nbThreads = coverParams.nbThreads;
517 fastCoverParams->splitPoint = coverParams.splitPoint;
518 fastCoverParams->f = f;
519 fastCoverParams->accel = accel;
520 fastCoverParams->zParams = coverParams.zParams;
521 }
522
523
524 ZDICTLIB_API size_t
525 ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
526 const void* samplesBuffer,
527 const size_t* samplesSizes, unsigned nbSamples,
528 ZDICT_fastCover_params_t parameters)
529 {
530 BYTE* const dict = (BYTE*)dictBuffer;
531 FASTCOVER_ctx_t ctx;
532 ZDICT_cover_params_t coverParams;
533 FASTCOVER_accel_t accelParams;
534 /* Initialize global data */
535 g_displayLevel = parameters.zParams.notificationLevel;
536 /* Assign splitPoint and f if not provided */
537 parameters.splitPoint = 1.0;
538 parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f;
539 parameters.accel = parameters.accel == 0 ? DEFAULT_ACCEL : parameters.accel;
540 /* Convert to cover parameter */
541 memset(&coverParams, 0 , sizeof(coverParams));
542 FASTCOVER_convertToCoverParams(parameters, &coverParams);
543 /* Checks */
544 if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f,
545 parameters.accel)) {
546 DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
547 return ERROR(GENERIC);
548 }
549 if (nbSamples == 0) {
550 DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
551 return ERROR(GENERIC);
552 }
553 if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
554 DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
555 ZDICT_DICTSIZE_MIN);
556 return ERROR(dstSize_tooSmall);
557 }
558 /* Assign corresponding FASTCOVER_accel_t to accelParams*/
559 accelParams = FASTCOVER_defaultAccelParameters[parameters.accel];
560 /* Initialize context */
561 if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
562 coverParams.d, parameters.splitPoint, parameters.f,
563 accelParams)) {
564 DISPLAYLEVEL(1, "Failed to initialize context\n");
565 return ERROR(GENERIC);
566 }
567 /* Build the dictionary */
568 DISPLAYLEVEL(2, "Building dictionary\n");
569 {
570 /* Initialize array to keep track of frequency of dmer within activeSegment */
571 U16* segmentFreqs = (U16 *)calloc(((U64)1 << parameters.f), sizeof(U16));
572 const size_t tail = FASTCOVER_buildDictionary(&ctx, ctx.freqs, dictBuffer,
573 dictBufferCapacity, coverParams, segmentFreqs);
574 const unsigned nbFinalizeSamples = (unsigned)(ctx.nbTrainSamples * ctx.accelParams.finalize / 100);
575 const size_t dictionarySize = ZDICT_finalizeDictionary(
576 dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
577 samplesBuffer, samplesSizes, nbFinalizeSamples, coverParams.zParams);
578 if (!ZSTD_isError(dictionarySize)) {
579 DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
580 (U32)dictionarySize);
581 }
582 FASTCOVER_ctx_destroy(&ctx);
583 free(segmentFreqs);
584 return dictionarySize;
585 }
586 }
587
588
589 ZDICTLIB_API size_t
590 ZDICT_optimizeTrainFromBuffer_fastCover(
591 void* dictBuffer, size_t dictBufferCapacity,
592 const void* samplesBuffer,
593 const size_t* samplesSizes, unsigned nbSamples,
594 ZDICT_fastCover_params_t* parameters)
595 {
596 ZDICT_cover_params_t coverParams;
597 FASTCOVER_accel_t accelParams;
598 /* constants */
599 const unsigned nbThreads = parameters->nbThreads;
600 const double splitPoint =
601 parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
602 const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
603 const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
604 const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
605 const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
606 const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
607 const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
608 const unsigned kIterations =
609 (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
610 const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f;
611 const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
612 /* Local variables */
613 const int displayLevel = parameters->zParams.notificationLevel;
614 unsigned iteration = 1;
615 unsigned d;
616 unsigned k;
617 COVER_best_t best;
618 POOL_ctx *pool = NULL;
619 /* Checks */
620 if (splitPoint <= 0 || splitPoint > 1) {
621 LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
622 return ERROR(GENERIC);
623 }
624 if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) {
625 LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n");
626 return ERROR(GENERIC);
627 }
628 if (kMinK < kMaxD || kMaxK < kMinK) {
629 LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
630 return ERROR(GENERIC);
631 }
632 if (nbSamples == 0) {
633 LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n");
634 return ERROR(GENERIC);
635 }
636 if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
637 LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n",
638 ZDICT_DICTSIZE_MIN);
639 return ERROR(dstSize_tooSmall);
640 }
641 if (nbThreads > 1) {
642 pool = POOL_create(nbThreads, 1);
643 if (!pool) {
644 return ERROR(memory_allocation);
645 }
646 }
647 /* Initialization */
648 COVER_best_init(&best);
649 memset(&coverParams, 0 , sizeof(coverParams));
650 FASTCOVER_convertToCoverParams(*parameters, &coverParams);
651 accelParams = FASTCOVER_defaultAccelParameters[accel];
652 /* Turn down global display level to clean up display at level 2 and below */
653 g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1;
654 /* Loop through d first because each new value needs a new context */
655 LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
656 kIterations);
657 for (d = kMinD; d <= kMaxD; d += 2) {
658 /* Initialize the context for this value of d */
659 FASTCOVER_ctx_t ctx;
660 LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
661 if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams)) {
662 LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
663 COVER_best_destroy(&best);
664 POOL_free(pool);
665 return ERROR(GENERIC);
666 }
667 /* Loop through k reusing the same context */
668 for (k = kMinK; k <= kMaxK; k += kStepSize) {
669 /* Prepare the arguments */
670 FASTCOVER_tryParameters_data_t *data = (FASTCOVER_tryParameters_data_t *)malloc(
671 sizeof(FASTCOVER_tryParameters_data_t));
672 LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
673 if (!data) {
674 LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
675 COVER_best_destroy(&best);
676 FASTCOVER_ctx_destroy(&ctx);
677 POOL_free(pool);
678 return ERROR(GENERIC);
679 }
680 data->ctx = &ctx;
681 data->best = &best;
682 data->dictBufferCapacity = dictBufferCapacity;
683 data->parameters = coverParams;
684 data->parameters.k = k;
685 data->parameters.d = d;
686 data->parameters.splitPoint = splitPoint;
687 data->parameters.steps = kSteps;
688 data->parameters.zParams.notificationLevel = g_displayLevel;
689 /* Check the parameters */
690 if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
691 data->ctx->f, accel)) {
692 DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
693 free(data);
694 continue;
695 }
696 /* Call the function and pass ownership of data to it */
697 COVER_best_start(&best);
698 if (pool) {
699 POOL_add(pool, &FASTCOVER_tryParameters, data);
700 } else {
701 FASTCOVER_tryParameters(data);
702 }
703 /* Print status */
704 LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
705 (U32)((iteration * 100) / kIterations));
706 ++iteration;
707 }
708 COVER_best_wait(&best);
709 FASTCOVER_ctx_destroy(&ctx);
710 }
711 LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
712 /* Fill the output buffer and parameters with output of the best parameters */
713 {
714 const size_t dictSize = best.dictSize;
715 if (ZSTD_isError(best.compressedSize)) {
716 const size_t compressedSize = best.compressedSize;
717 COVER_best_destroy(&best);
718 POOL_free(pool);
719 return compressedSize;
720 }
721 FASTCOVER_convertToFastCoverParams(best.parameters, parameters, f, accel);
722 memcpy(dictBuffer, best.dict, dictSize);
723 COVER_best_destroy(&best);
724 POOL_free(pool);
725 return dictSize;
726 }
727
728 }
@@ -6,6 +6,7 b' mercurial/cext/osutil.c'
6 6 mercurial/cext/revlog.c
7 7 # Vendored code that we should never format:
8 8 contrib/python-zstandard/c-ext/bufferutil.c
9 contrib/python-zstandard/c-ext/compressionchunker.c
9 10 contrib/python-zstandard/c-ext/compressiondict.c
10 11 contrib/python-zstandard/c-ext/compressionparams.c
11 12 contrib/python-zstandard/c-ext/compressionreader.c
@@ -25,6 +26,8 b' contrib/python-zstandard/zstd.c'
25 26 contrib/python-zstandard/zstd/common/bitstream.h
26 27 contrib/python-zstandard/zstd/common/compiler.h
27 28 contrib/python-zstandard/zstd/common/cpu.h
29 contrib/python-zstandard/zstd/common/debug.c
30 contrib/python-zstandard/zstd/common/debug.h
28 31 contrib/python-zstandard/zstd/common/entropy_common.c
29 32 contrib/python-zstandard/zstd/common/error_private.c
30 33 contrib/python-zstandard/zstd/common/error_private.h
@@ -42,6 +45,8 b' contrib/python-zstandard/zstd/common/zst'
42 45 contrib/python-zstandard/zstd/common/zstd_errors.h
43 46 contrib/python-zstandard/zstd/common/zstd_internal.h
44 47 contrib/python-zstandard/zstd/compress/fse_compress.c
48 contrib/python-zstandard/zstd/compress/hist.c
49 contrib/python-zstandard/zstd/compress/hist.h
45 50 contrib/python-zstandard/zstd/compress/huf_compress.c
46 51 contrib/python-zstandard/zstd/compress/zstd_compress.c
47 52 contrib/python-zstandard/zstd/compress/zstd_compress_internal.h
@@ -64,8 +69,10 b' contrib/python-zstandard/zstd/deprecated'
64 69 contrib/python-zstandard/zstd/deprecated/zbuff_decompress.c
65 70 contrib/python-zstandard/zstd/deprecated/zbuff.h
66 71 contrib/python-zstandard/zstd/dictBuilder/cover.c
72 contrib/python-zstandard/zstd/dictBuilder/cover.h
67 73 contrib/python-zstandard/zstd/dictBuilder/divsufsort.c
68 74 contrib/python-zstandard/zstd/dictBuilder/divsufsort.h
75 contrib/python-zstandard/zstd/dictBuilder/fastcover.c
69 76 contrib/python-zstandard/zstd/dictBuilder/zdict.c
70 77 contrib/python-zstandard/zstd/dictBuilder/zdict.h
71 78 contrib/python-zstandard/zstd/zstd.h
@@ -1,7 +1,10 b''
1 1 graft c-ext
2 graft debian
2 3 graft zstd
3 4 graft tests
4 5 include make_cffi.py
5 6 include setup_zstd.py
6 7 include zstd.c
8 include zstd_cffi.py
7 9 include LICENSE
10 include NEWS.rst
@@ -30,6 +30,19 b' Actions Blocking Release'
30 30 * Remove low-level compression parameters from ``ZstdCompressor.__init__`` and
31 31 require use of ``CompressionParameters``.
32 32 * Expose ``ZSTD_getFrameProgression()`` from more compressor types.
33 * Support modifying compression parameters mid operation when supported by
34 zstd API.
35 * Expose ``ZSTD_CLEVEL_DEFAULT`` constant.
36 * Support ``ZSTD_p_forceAttachDict`` compression parameter.
37 * Use ``ZSTD_CCtx_getParameter()``/``ZSTD_CCtxParam_getParameter()`` for retrieving
38 compression parameters.
39 * Consider exposing ``ZSTDMT_toFlushNow()``.
40 * Expose ``ZDICT_trainFromBuffer_fastCover()``,
41 ``ZDICT_optimizeTrainFromBuffer_fastCover``.
42 * Expose and enforce ``ZSTD_minCLevel()`` for minimum compression level.
43 * Consider a ``chunker()`` API for decompression.
44 * Consider stats for ``chunker()`` API, including finding the last consumed
45 offset of input data.
33 46
34 47 Other Actions Not Blocking Release
35 48 ---------------------------------------
@@ -38,6 +51,111 b' Other Actions Not Blocking Release'
38 51 * API for ensuring max memory ceiling isn't exceeded.
39 52 * Move off nose for testing.
40 53
54 0.10.1 (released 2018-10-08)
55 ============================
56
57 Backwards Compatibility Notes
58 -----------------------------
59
60 * ``ZstdCompressor.stream_reader().closed`` is now a property instead of a
61 method (#58).
62 * ``ZstdDecompressor.stream_reader().closed`` is now a property instead of a
63 method (#58).
64
65 Changes
66 -------
67
68 * Stop attempting to package Python 3.6 for Miniconda. The latest version of
69 Miniconda is using Python 3.7. The Python 3.6 Miniconda packages were a lie
70 since this were built against Python 3.7.
71 * ``ZstdCompressor.stream_reader()``'s and ``ZstdDecompressor.stream_reader()``'s
72 ``closed`` attribute is now a read-only property instead of a method. This now
73 properly matches the ``IOBase`` API and allows instances to be used in more
74 places that accept ``IOBase`` instances.
75
76 0.10.0 (released 2018-10-08)
77 ============================
78
79 Backwards Compatibility Notes
80 -----------------------------
81
82 * ``ZstdDecompressor.stream_reader().read()`` now consistently requires an
83 argument in both the C and CFFI backends. Before, the CFFI implementation
84 would assume a default value of ``-1``, which was later rejected.
85 * The ``compress_literals`` argument and attribute has been removed from
86 ``zstd.ZstdCompressionParameters`` because it was removed by the zstd 1.3.5
87 API.
88 * ``ZSTD_CCtx_setParametersUsingCCtxParams()`` is no longer called on every
89 operation performed against ``ZstdCompressor`` instances. The reason for this
90 change is that the zstd 1.3.5 API no longer allows this without calling
91 ``ZSTD_CCtx_resetParameters()`` first. But if we called
92 ``ZSTD_CCtx_resetParameters()`` on every operation, we'd have to redo
93 potentially expensive setup when using dictionaries. We now call
94 ``ZSTD_CCtx_reset()`` on every operation and don't attempt to change
95 compression parameters.
96 * Objects returned by ``ZstdCompressor.stream_reader()`` no longer need to be
97 used as a context manager. The context manager interface still exists and its
98 behavior is unchanged.
99 * Objects returned by ``ZstdDecompressor.stream_reader()`` no longer need to be
100 used as a context manager. The context manager interface still exists and its
101 behavior is unchanged.
102
103 Bug Fixes
104 ---------
105
106 * ``ZstdDecompressor.decompressobj().decompress()`` should now return all data
107 from internal buffers in more scenarios. Before, it was possible for data to
108 remain in internal buffers. This data would be emitted on a subsequent call
109 to ``decompress()``. The overall output stream would still be valid. But if
110 callers were expecting input data to exactly map to output data (say the
111 producer had used ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` and was attempting to
112 map input chunks to output chunks), then the previous behavior would be
113 wrong. The new behavior is such that output from
114 ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` fed into ``decompressobj().decompress()``
115 should produce all available compressed input.
116 * ``ZstdDecompressor.stream_reader().read()`` should no longer segfault after
117 a previous context manager resulted in error (#56).
118 * ``ZstdCompressor.compressobj().flush(COMPRESSOBJ_FLUSH_BLOCK)`` now returns
119 all data necessary to flush a block. Before, it was possible for the
120 ``flush()`` to not emit all data necessary to fully represent a block. This
121 would mean decompressors wouldn't be able to decompress all data that had been
122 fed into the compressor and ``flush()``ed. (#55).
123
124 New Features
125 ------------
126
127 * New module constants ``BLOCKSIZELOG_MAX``, ``BLOCKSIZE_MAX``,
128 ``TARGETLENGTH_MAX`` that expose constants from libzstd.
129 * New ``ZstdCompressor.chunker()`` API for manually feeding data into a
130 compressor and emitting chunks of a fixed size. Like ``compressobj()``, the
131 API doesn't impose restrictions on the input or output types for the
132 data streams. Unlike ``compressobj()``, it ensures output chunks are of a
133 fixed size. This makes this API useful when the compressed output is being
134 fed into an I/O layer, where uniform write sizes are useful.
135 * ``ZstdCompressor.stream_reader()`` no longer needs to be used as a context
136 manager (#34).
137 * ``ZstdDecompressor.stream_reader()`` no longer needs to be used as a context
138 manager (#34).
139 * Bundled zstandard library upgraded from 1.3.4 to 1.3.6.
140
141 Changes
142 -------
143
144 * Added ``zstd_cffi.py`` and ``NEWS.rst`` to ``MANIFEST.in``.
145 * ``zstandard.__version__`` is now defined (#50).
146 * Upgrade pip, setuptools, wheel, and cibuildwheel packages to latest versions.
147 * Upgrade various packages used in CI to latest versions. Notably tox (in
148 order to support Python 3.7).
149 * Use relative paths in setup.py to appease Python 3.7 (#51).
150 * Added CI for Python 3.7.
151
152 0.9.1 (released 2018-06-04)
153 ===========================
154
155 * Debian packaging support.
156 * Fix typo in setup.py (#44).
157 * Support building with mingw compiler (#46).
158
41 159 0.9.0 (released 2018-04-08)
42 160 ===========================
43 161
@@ -90,7 +208,7 b' Bug Fixes'
90 208 New Features
91 209 ------------
92 210
93 * Bundlded zstandard library upgraded from 1.1.3 to 1.3.4. This delivers various
211 * Bundled zstandard library upgraded from 1.1.3 to 1.3.4. This delivers various
94 212 bug fixes and performance improvements. It also gives us access to newer
95 213 features.
96 214 * Support for negative compression levels.
@@ -196,6 +196,17 b' Stream Reader API'
196 196
197 197 with open(path, 'rb') as fh:
198 198 cctx = zstd.ZstdCompressor()
199 reader = cctx.stream_reader(fh)
200 while True:
201 chunk = reader.read(16384)
202 if not chunk:
203 break
204
205 # Do something with compressed chunk.
206
207 Instances can also be used as context managers::
208
209 with open(path, 'rb') as fh:
199 210 with cctx.stream_reader(fh) as reader:
200 211 while True:
201 212 chunk = reader.read(16384)
@@ -204,9 +215,9 b' Stream Reader API'
204 215
205 216 # Do something with compressed chunk.
206 217
207 The stream can only be read within a context manager. When the context
208 manager exits, the stream is closed and the underlying resource is
209 released and future operations against the compression stream stream will fail.
218 When the context manager exists or ``close()`` is called, the stream is closed,
219 underlying resources are released, and future operations against the compression
220 stream will fail.
210 221
211 222 The ``source`` argument to ``stream_reader()`` can be any object with a
212 223 ``read(size)`` method or any object implementing the *buffer protocol*.
@@ -419,6 +430,64 b' the compressor::'
419 430 data = cobj.compress(b'foobar')
420 431 data = cobj.flush()
421 432
433 Chunker API
434 ^^^^^^^^^^^
435
436 ``chunker(size=None, chunk_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE)`` returns
437 an object that can be used to iteratively feed chunks of data into a compressor
438 and produce output chunks of a uniform size.
439
440 The object returned by ``chunker()`` exposes the following methods:
441
442 ``compress(data)``
443 Feeds new input data into the compressor.
444
445 ``flush()``
446 Flushes all data currently in the compressor.
447
448 ``finish()``
449 Signals the end of input data. No new data can be compressed after this
450 method is called.
451
452 ``compress()``, ``flush()``, and ``finish()`` all return an iterator of
453 ``bytes`` instances holding compressed data. The iterator may be empty. Callers
454 MUST iterate through all elements of the returned iterator before performing
455 another operation on the object.
456
457 All chunks emitted by ``compress()`` will have a length of ``chunk_size``.
458
459 ``flush()`` and ``finish()`` may return a final chunk smaller than
460 ``chunk_size``.
461
462 Here is how the API should be used::
463
464 cctx = zstd.ZstdCompressor()
465 chunker = cctx.chunker(chunk_size=32768)
466
467 with open(path, 'rb') as fh:
468 while True:
469 in_chunk = fh.read(32768)
470 if not in_chunk:
471 break
472
473 for out_chunk in chunker.compress(in_chunk):
474 # Do something with output chunk of size 32768.
475
476 for out_chunk in chunker.finish():
477 # Do something with output chunks that finalize the zstd frame.
478
479 The ``chunker()`` API is often a better alternative to ``compressobj()``.
480
481 ``compressobj()`` will emit output data as it is available. This results in a
482 *stream* of output chunks of varying sizes. The consistency of the output chunk
483 size with ``chunker()`` is more appropriate for many usages, such as sending
484 compressed data to a socket.
485
486 ``compressobj()`` may also perform extra memory reallocations in order to
487 dynamically adjust the sizes of the output chunks. Since ``chunker()`` output
488 chunks are all the same size (except for flushed or final chunks), there is
489 less memory allocation overhead.
490
422 491 Batch Compression API
423 492 ^^^^^^^^^^^^^^^^^^^^^
424 493
@@ -542,17 +611,24 b' Stream Reader API'
542 611
543 612 with open(path, 'rb') as fh:
544 613 dctx = zstd.ZstdDecompressor()
545 with dctx.stream_reader(fh) as reader:
546 while True:
547 chunk = reader.read(16384)
548 if not chunk:
549 break
614 reader = dctx.stream_reader(fh)
615 while True:
616 chunk = reader.read(16384)
617 if not chunk:
618 break
619
620 # Do something with decompressed chunk.
550 621
551 # Do something with decompressed chunk.
622 The stream can also be used as a context manager::
552 623
553 The stream can only be read within a context manager. When the context
554 manager exits, the stream is closed and the underlying resource is
555 released and future operations against the stream will fail.
624 with open(path, 'rb') as fh:
625 dctx = zstd.ZstdDecompressor()
626 with dctx.stream_reader(fh) as reader:
627 ...
628
629 When used as a context manager, the stream is closed and the underlying
630 resources are released when the context manager exits. Future operations against
631 the stream will fail.
556 632
557 633 The ``source`` argument to ``stream_reader()`` can be any object with a
558 634 ``read(size)`` method or any object implementing the *buffer protocol*.
@@ -1077,7 +1153,6 b' follows:'
1077 1153 * write_dict_id
1078 1154 * job_size
1079 1155 * overlap_size_log
1080 * compress_literals
1081 1156 * force_max_window
1082 1157 * enable_ldm
1083 1158 * ldm_hash_log
@@ -39,7 +39,6 b' int set_parameters(ZSTD_CCtx_params* par'
39 39 TRY_SET_PARAMETER(params, ZSTD_p_nbWorkers, obj->threads);
40 40 TRY_SET_PARAMETER(params, ZSTD_p_jobSize, obj->jobSize);
41 41 TRY_SET_PARAMETER(params, ZSTD_p_overlapSizeLog, obj->overlapSizeLog);
42 TRY_SET_PARAMETER(params, ZSTD_p_compressLiterals, obj->compressLiterals);
43 42 TRY_SET_PARAMETER(params, ZSTD_p_forceMaxWindow, obj->forceMaxWindow);
44 43 TRY_SET_PARAMETER(params, ZSTD_p_enableLongDistanceMatching, obj->enableLongDistanceMatching);
45 44 TRY_SET_PARAMETER(params, ZSTD_p_ldmHashLog, obj->ldmHashLog);
@@ -88,7 +87,6 b' static int ZstdCompressionParameters_ini'
88 87 "ldm_bucket_size_log",
89 88 "ldm_hash_every_log",
90 89 "threads",
91 "compress_literals",
92 90 NULL
93 91 };
94 92
@@ -114,18 +112,13 b' static int ZstdCompressionParameters_ini'
114 112 unsigned ldmHashEveryLog = 0;
115 113 int threads = 0;
116 114
117 /* Setting value 0 has the effect of disabling. So we use -1 as a default
118 * to detect whether to set. Then we automatically derive the expected value
119 * based on the level, just like zstandard does itself. */
120 int compressLiterals = -1;
121
122 115 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
123 "|IiIIIIIIIIIIIIIIIIIIii:CompressionParameters",
116 "|IiIIIIIIIIIIIIIIIIIIi:CompressionParameters",
124 117 kwlist, &format, &compressionLevel, &windowLog, &hashLog, &chainLog,
125 118 &searchLog, &minMatch, &targetLength, &compressionStrategy,
126 119 &contentSizeFlag, &checksumFlag, &dictIDFlag, &jobSize, &overlapSizeLog,
127 120 &forceMaxWindow, &enableLDM, &ldmHashLog, &ldmMinMatch, &ldmBucketSizeLog,
128 &ldmHashEveryLog, &threads, &compressLiterals)) {
121 &ldmHashEveryLog, &threads)) {
129 122 return -1;
130 123 }
131 124
@@ -133,10 +126,6 b' static int ZstdCompressionParameters_ini'
133 126 threads = cpu_count();
134 127 }
135 128
136 if (compressLiterals < 0) {
137 compressLiterals = compressionLevel >= 0;
138 }
139
140 129 self->format = format;
141 130 self->compressionLevel = compressionLevel;
142 131 self->windowLog = windowLog;
@@ -152,7 +141,6 b' static int ZstdCompressionParameters_ini'
152 141 self->threads = threads;
153 142 self->jobSize = jobSize;
154 143 self->overlapSizeLog = overlapSizeLog;
155 self->compressLiterals = compressLiterals;
156 144 self->forceMaxWindow = forceMaxWindow;
157 145 self->enableLongDistanceMatching = enableLDM;
158 146 self->ldmHashLog = ldmHashLog;
@@ -299,16 +287,6 b' ZstdCompressionParametersObject* Compres'
299 287 Py_DECREF(val);
300 288 }
301 289
302 val = PyDict_GetItemString(kwargs, "compress_literals");
303 if (!val) {
304 val = PyLong_FromLong(level >= 0 ? 1 : 0);
305 if (!val) {
306 goto cleanup;
307 }
308 PyDict_SetItemString(kwargs, "compress_literals", val);
309 Py_DECREF(val);
310 }
311
312 290 result = PyObject_New(ZstdCompressionParametersObject, &ZstdCompressionParametersType);
313 291 if (!result) {
314 292 goto cleanup;
@@ -420,9 +398,6 b' static PyMemberDef ZstdCompressionParame'
420 398 { "overlap_size_log", T_UINT,
421 399 offsetof(ZstdCompressionParametersObject, overlapSizeLog), READONLY,
422 400 "Size of previous input reloaded at the beginning of each job" },
423 { "compress_literals", T_UINT,
424 offsetof(ZstdCompressionParametersObject, compressLiterals), READONLY,
425 "whether Huffman compression of literals is in use" },
426 401 { "force_max_window", T_UINT,
427 402 offsetof(ZstdCompressionParametersObject, forceMaxWindow), READONLY,
428 403 "force back references to remain smaller than window size" },
@@ -43,20 +43,11 b' static void reader_dealloc(ZstdCompressi'
43 43 }
44 44
45 45 static ZstdCompressionReader* reader_enter(ZstdCompressionReader* self) {
46 size_t zresult;
47
48 46 if (self->entered) {
49 47 PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
50 48 return NULL;
51 49 }
52 50
53 zresult = ZSTD_CCtx_setPledgedSrcSize(self->compressor->cctx, self->sourceSize);
54 if (ZSTD_isError(zresult)) {
55 PyErr_Format(ZstdError, "error setting source size: %s",
56 ZSTD_getErrorName(zresult));
57 return NULL;
58 }
59
60 51 self->entered = 1;
61 52
62 53 Py_INCREF(self);
@@ -132,15 +123,6 b' static PyObject* reader_close(ZstdCompre'
132 123 Py_RETURN_NONE;
133 124 }
134 125
135 static PyObject* reader_closed(ZstdCompressionReader* self) {
136 if (self->closed) {
137 Py_RETURN_TRUE;
138 }
139 else {
140 Py_RETURN_FALSE;
141 }
142 }
143
144 126 static PyObject* reader_tell(ZstdCompressionReader* self) {
145 127 /* TODO should this raise OSError since stream isn't seekable? */
146 128 return PyLong_FromUnsignedLongLong(self->bytesCompressed);
@@ -159,11 +141,6 b' static PyObject* reader_read(ZstdCompres'
159 141 size_t zresult;
160 142 size_t oldPos;
161 143
162 if (!self->entered) {
163 PyErr_SetString(ZstdError, "read() must be called from an active context manager");
164 return NULL;
165 }
166
167 144 if (self->closed) {
168 145 PyErr_SetString(PyExc_ValueError, "stream is closed");
169 146 return NULL;
@@ -333,8 +310,6 b' static PyMethodDef reader_methods[] = {'
333 310 PyDoc_STR("Exit a compression context") },
334 311 { "close", (PyCFunction)reader_close, METH_NOARGS,
335 312 PyDoc_STR("Close the stream so it cannot perform any more operations") },
336 { "closed", (PyCFunction)reader_closed, METH_NOARGS,
337 PyDoc_STR("Whether stream is closed") },
338 313 { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
339 314 { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
340 315 { "readable", (PyCFunction)reader_readable, METH_NOARGS,
@@ -354,6 +329,12 b' static PyMethodDef reader_methods[] = {'
354 329 { NULL, NULL }
355 330 };
356 331
332 static PyMemberDef reader_members[] = {
333 { "closed", T_BOOL, offsetof(ZstdCompressionReader, closed),
334 READONLY, "whether stream is closed" },
335 { NULL }
336 };
337
357 338 PyTypeObject ZstdCompressionReaderType = {
358 339 PyVarObject_HEAD_INIT(NULL, 0)
359 340 "zstd.ZstdCompressionReader", /* tp_name */
@@ -383,7 +364,7 b' PyTypeObject ZstdCompressionReaderType ='
383 364 reader_iter, /* tp_iter */
384 365 reader_iternext, /* tp_iternext */
385 366 reader_methods, /* tp_methods */
386 0, /* tp_members */
367 reader_members, /* tp_members */
387 368 0, /* tp_getset */
388 369 0, /* tp_base */
389 370 0, /* tp_dict */
@@ -222,10 +222,6 b' static PyObject* ZstdCompressionWriter_f'
222 222 return NULL;
223 223 }
224 224
225 if (!output.pos) {
226 break;
227 }
228
229 225 /* Copy data from output buffer to writer. */
230 226 if (output.pos) {
231 227 #if PY_MAJOR_VERSION >= 3
@@ -238,7 +234,12 b' static PyObject* ZstdCompressionWriter_f'
238 234 totalWrite += output.pos;
239 235 self->bytesCompressed += output.pos;
240 236 }
237
241 238 output.pos = 0;
239
240 if (!zresult) {
241 break;
242 }
242 243 }
243 244
244 245 PyMem_Free(output.dst);
@@ -115,6 +115,7 b' static PyObject* ZstdCompressionObj_flus'
115 115 PyObject* result = NULL;
116 116 Py_ssize_t resultSize = 0;
117 117 ZSTD_inBuffer input;
118 ZSTD_EndDirective zFlushMode;
118 119
119 120 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:flush", kwlist, &flushMode)) {
120 121 return NULL;
@@ -130,52 +131,34 b' static PyObject* ZstdCompressionObj_flus'
130 131 return NULL;
131 132 }
132 133
134 switch (flushMode) {
135 case compressorobj_flush_block:
136 zFlushMode = ZSTD_e_flush;
137 break;
138
139 case compressorobj_flush_finish:
140 zFlushMode = ZSTD_e_end;
141 self->finished = 1;
142 break;
143
144 default:
145 PyErr_SetString(ZstdError, "unhandled flush mode");
146 return NULL;
147 }
148
133 149 assert(self->output.pos == 0);
134 150
135 151 input.src = NULL;
136 152 input.size = 0;
137 153 input.pos = 0;
138 154
139 if (flushMode == compressorobj_flush_block) {
140 /* The output buffer is of size ZSTD_CStreamOutSize(), which is
141 guaranteed to hold a full block. */
155 while (1) {
142 156 Py_BEGIN_ALLOW_THREADS
143 zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
144 &input, ZSTD_e_flush);
157 zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
158 &input, zFlushMode);
145 159 Py_END_ALLOW_THREADS
146 160
147 161 if (ZSTD_isError(zresult)) {
148 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
149 return NULL;
150 }
151
152 /* Output buffer is guaranteed to hold full block. */
153 assert(zresult == 0);
154
155 if (self->output.pos) {
156 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
157 if (!result) {
158 return NULL;
159 }
160 }
161
162 self->output.pos = 0;
163
164 if (result) {
165 return result;
166 }
167 else {
168 return PyBytes_FromString("");
169 }
170 }
171
172 assert(flushMode == compressorobj_flush_finish);
173 self->finished = 1;
174
175 while (1) {
176 zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
177 &input, ZSTD_e_end);
178 if (ZSTD_isError(zresult)) {
179 162 PyErr_Format(ZstdError, "error ending compression stream: %s",
180 163 ZSTD_getErrorName(zresult));
181 164 return NULL;
@@ -11,15 +11,13 b''
11 11
12 12 extern PyObject* ZstdError;
13 13
14 int ensure_cctx(ZstdCompressor* compressor) {
14 int setup_cctx(ZstdCompressor* compressor) {
15 15 size_t zresult;
16 16
17 17 assert(compressor);
18 18 assert(compressor->cctx);
19 19 assert(compressor->params);
20 20
21 ZSTD_CCtx_reset(compressor->cctx);
22
23 21 zresult = ZSTD_CCtx_setParametersUsingCCtxParams(compressor->cctx, compressor->params);
24 22 if (ZSTD_isError(zresult)) {
25 23 PyErr_Format(ZstdError, "could not set compression parameters: %s",
@@ -237,9 +235,9 b' static int ZstdCompressor_init(ZstdCompr'
237 235 Py_INCREF(dict);
238 236 }
239 237
240 if (ensure_cctx(self)) {
241 return -1;
242 }
238 if (setup_cctx(self)) {
239 return -1;
240 }
243 241
244 242 return 0;
245 243 }
@@ -346,9 +344,7 b' static PyObject* ZstdCompressor_copy_str'
346 344 return NULL;
347 345 }
348 346
349 if (ensure_cctx(self)) {
350 return NULL;
351 }
347 ZSTD_CCtx_reset(self->cctx);
352 348
353 349 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
354 350 if (ZSTD_isError(zresult)) {
@@ -489,6 +485,7 b' static ZstdCompressionReader* ZstdCompre'
489 485 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
490 486 size_t readSize = ZSTD_CStreamInSize();
491 487 ZstdCompressionReader* result = NULL;
488 size_t zresult;
492 489
493 490 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kk:stream_reader", kwlist,
494 491 &source, &sourceSize, &readSize)) {
@@ -520,13 +517,17 b' static ZstdCompressionReader* ZstdCompre'
520 517 goto except;
521 518 }
522 519
523 if (ensure_cctx(self)) {
520 ZSTD_CCtx_reset(self->cctx);
521
522 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
523 if (ZSTD_isError(zresult)) {
524 PyErr_Format(ZstdError, "error setting source source: %s",
525 ZSTD_getErrorName(zresult));
524 526 goto except;
525 527 }
526 528
527 529 result->compressor = self;
528 530 Py_INCREF(self);
529 result->sourceSize = sourceSize;
530 531
531 532 return result;
532 533
@@ -576,9 +577,7 b' static PyObject* ZstdCompressor_compress'
576 577 goto finally;
577 578 }
578 579
579 if (ensure_cctx(self)) {
580 goto finally;
581 }
580 ZSTD_CCtx_reset(self->cctx);
582 581
583 582 destSize = ZSTD_compressBound(source.len);
584 583 output = PyBytes_FromStringAndSize(NULL, destSize);
@@ -652,9 +651,7 b' static ZstdCompressionObj* ZstdCompresso'
652 651 return NULL;
653 652 }
654 653
655 if (ensure_cctx(self)) {
656 return NULL;
657 }
654 ZSTD_CCtx_reset(self->cctx);
658 655
659 656 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, inSize);
660 657 if (ZSTD_isError(zresult)) {
@@ -743,9 +740,7 b' static ZstdCompressorIterator* ZstdCompr'
743 740 goto except;
744 741 }
745 742
746 if (ensure_cctx(self)) {
747 return NULL;
748 }
743 ZSTD_CCtx_reset(self->cctx);
749 744
750 745 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
751 746 if (ZSTD_isError(zresult)) {
@@ -817,9 +812,7 b' static ZstdCompressionWriter* ZstdCompre'
817 812 return NULL;
818 813 }
819 814
820 if (ensure_cctx(self)) {
821 return NULL;
822 }
815 ZSTD_CCtx_reset(self->cctx);
823 816
824 817 result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL);
825 818 if (!result) {
@@ -839,6 +832,58 b' static ZstdCompressionWriter* ZstdCompre'
839 832 return result;
840 833 }
841 834
835 PyDoc_STRVAR(ZstdCompressor_chunker__doc__,
836 "Create an object for iterative compressing to same-sized chunks.\n"
837 );
838
839 static ZstdCompressionChunker* ZstdCompressor_chunker(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
840 static char* kwlist[] = {
841 "size",
842 "chunk_size",
843 NULL
844 };
845
846 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
847 size_t chunkSize = ZSTD_CStreamOutSize();
848 ZstdCompressionChunker* chunker;
849 size_t zresult;
850
851 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|Kk:chunker", kwlist,
852 &sourceSize, &chunkSize)) {
853 return NULL;
854 }
855
856 ZSTD_CCtx_reset(self->cctx);
857
858 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
859 if (ZSTD_isError(zresult)) {
860 PyErr_Format(ZstdError, "error setting source size: %s",
861 ZSTD_getErrorName(zresult));
862 return NULL;
863 }
864
865 chunker = (ZstdCompressionChunker*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerType, NULL);
866 if (!chunker) {
867 return NULL;
868 }
869
870 chunker->output.dst = PyMem_Malloc(chunkSize);
871 if (!chunker->output.dst) {
872 PyErr_NoMemory();
873 Py_DECREF(chunker);
874 return NULL;
875 }
876 chunker->output.size = chunkSize;
877 chunker->output.pos = 0;
878
879 chunker->compressor = self;
880 Py_INCREF(chunker->compressor);
881
882 chunker->chunkSize = chunkSize;
883
884 return chunker;
885 }
886
842 887 typedef struct {
843 888 void* sourceData;
844 889 size_t sourceSize;
@@ -1524,6 +1569,8 b' finally:'
1524 1569 }
1525 1570
1526 1571 static PyMethodDef ZstdCompressor_methods[] = {
1572 { "chunker", (PyCFunction)ZstdCompressor_chunker,
1573 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_chunker__doc__ },
1527 1574 { "compress", (PyCFunction)ZstdCompressor_compress,
1528 1575 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ },
1529 1576 { "compressobj", (PyCFunction)ZstdCompressor_compressobj,
@@ -27,7 +27,6 b' void constants_module_init(PyObject* mod'
27 27 #else
28 28 version = PyString_FromString(PYTHON_ZSTANDARD_VERSION);
29 29 #endif
30 Py_INCREF(version);
31 30 PyModule_AddObject(mod, "__version__", version);
32 31
33 32 ZstdError = PyErr_NewException("zstd.ZstdError", NULL, NULL);
@@ -41,7 +40,6 b' void constants_module_init(PyObject* mod'
41 40 PyTuple_SetItem(zstdVersion, 0, PyLong_FromLong(ZSTD_VERSION_MAJOR));
42 41 PyTuple_SetItem(zstdVersion, 1, PyLong_FromLong(ZSTD_VERSION_MINOR));
43 42 PyTuple_SetItem(zstdVersion, 2, PyLong_FromLong(ZSTD_VERSION_RELEASE));
44 Py_INCREF(zstdVersion);
45 43 PyModule_AddObject(mod, "ZSTD_VERSION", zstdVersion);
46 44
47 45 frameHeader = PyBytes_FromStringAndSize(frame_header, sizeof(frame_header));
@@ -68,6 +66,8 b' void constants_module_init(PyObject* mod'
68 66 (long)ZSTD_DStreamOutSize());
69 67
70 68 PyModule_AddIntConstant(mod, "MAGIC_NUMBER", ZSTD_MAGICNUMBER);
69 PyModule_AddIntConstant(mod, "BLOCKSIZELOG_MAX", ZSTD_BLOCKSIZELOG_MAX);
70 PyModule_AddIntConstant(mod, "BLOCKSIZE_MAX", ZSTD_BLOCKSIZE_MAX);
71 71 PyModule_AddIntConstant(mod, "WINDOWLOG_MIN", ZSTD_WINDOWLOG_MIN);
72 72 PyModule_AddIntConstant(mod, "WINDOWLOG_MAX", ZSTD_WINDOWLOG_MAX);
73 73 PyModule_AddIntConstant(mod, "CHAINLOG_MIN", ZSTD_CHAINLOG_MIN);
@@ -80,6 +80,7 b' void constants_module_init(PyObject* mod'
80 80 PyModule_AddIntConstant(mod, "SEARCHLENGTH_MIN", ZSTD_SEARCHLENGTH_MIN);
81 81 PyModule_AddIntConstant(mod, "SEARCHLENGTH_MAX", ZSTD_SEARCHLENGTH_MAX);
82 82 PyModule_AddIntConstant(mod, "TARGETLENGTH_MIN", ZSTD_TARGETLENGTH_MIN);
83 PyModule_AddIntConstant(mod, "TARGETLENGTH_MAX", ZSTD_TARGETLENGTH_MAX);
83 84 PyModule_AddIntConstant(mod, "LDM_MINMATCH_MIN", ZSTD_LDM_MINMATCH_MIN);
84 85 PyModule_AddIntConstant(mod, "LDM_MINMATCH_MAX", ZSTD_LDM_MINMATCH_MAX);
85 86 PyModule_AddIntConstant(mod, "LDM_BUCKETSIZELOG_MAX", ZSTD_LDM_BUCKETSIZELOG_MAX);
@@ -47,10 +47,6 b' static ZstdDecompressionReader* reader_e'
47 47 return NULL;
48 48 }
49 49
50 if (ensure_dctx(self->decompressor, 1)) {
51 return NULL;
52 }
53
54 50 self->entered = 1;
55 51
56 52 Py_INCREF(self);
@@ -98,15 +94,6 b' static PyObject* reader_close(ZstdDecomp'
98 94 Py_RETURN_NONE;
99 95 }
100 96
101 static PyObject* reader_closed(ZstdDecompressionReader* self) {
102 if (self->closed) {
103 Py_RETURN_TRUE;
104 }
105 else {
106 Py_RETURN_FALSE;
107 }
108 }
109
110 97 static PyObject* reader_flush(PyObject* self) {
111 98 Py_RETURN_NONE;
112 99 }
@@ -128,11 +115,6 b' static PyObject* reader_read(ZstdDecompr'
128 115 ZSTD_outBuffer output;
129 116 size_t zresult;
130 117
131 if (!self->entered) {
132 PyErr_SetString(ZstdError, "read() must be called from an active context manager");
133 return NULL;
134 }
135
136 118 if (self->closed) {
137 119 PyErr_SetString(PyExc_ValueError, "stream is closed");
138 120 return NULL;
@@ -281,11 +263,6 b' static PyObject* reader_seek(ZstdDecompr'
281 263 unsigned long long readAmount = 0;
282 264 size_t defaultOutSize = ZSTD_DStreamOutSize();
283 265
284 if (!self->entered) {
285 PyErr_SetString(ZstdError, "seek() must be called from an active context manager");
286 return NULL;
287 }
288
289 266 if (self->closed) {
290 267 PyErr_SetString(PyExc_ValueError, "stream is closed");
291 268 return NULL;
@@ -384,8 +361,6 b' static PyMethodDef reader_methods[] = {'
384 361 PyDoc_STR("Exit a compression context") },
385 362 { "close", (PyCFunction)reader_close, METH_NOARGS,
386 363 PyDoc_STR("Close the stream so it cannot perform any more operations") },
387 { "closed", (PyCFunction)reader_closed, METH_NOARGS,
388 PyDoc_STR("Whether stream is closed") },
389 364 { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
390 365 { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
391 366 { "readable", (PyCFunction)reader_readable, METH_NOARGS,
@@ -407,6 +382,12 b' static PyMethodDef reader_methods[] = {'
407 382 { NULL, NULL }
408 383 };
409 384
385 static PyMemberDef reader_members[] = {
386 { "closed", T_BOOL, offsetof(ZstdDecompressionReader, closed),
387 READONLY, "whether stream is closed" },
388 { NULL }
389 };
390
410 391 PyTypeObject ZstdDecompressionReaderType = {
411 392 PyVarObject_HEAD_INIT(NULL, 0)
412 393 "zstd.ZstdDecompressionReader", /* tp_name */
@@ -436,7 +417,7 b' PyTypeObject ZstdDecompressionReaderType'
436 417 reader_iter, /* tp_iter */
437 418 reader_iternext, /* tp_iternext */
438 419 reader_methods, /* tp_methods */
439 0, /* tp_members */
420 reader_members, /* tp_members */
440 421 0, /* tp_getset */
441 422 0, /* tp_base */
442 423 0, /* tp_dict */
@@ -33,6 +33,8 b' static PyObject* DecompressionObj_decomp'
33 33 PyObject* result = NULL;
34 34 Py_ssize_t resultSize = 0;
35 35
36 output.dst = NULL;
37
36 38 if (self->finished) {
37 39 PyErr_SetString(ZstdError, "cannot use a decompressobj multiple times");
38 40 return NULL;
@@ -53,6 +55,12 b' static PyObject* DecompressionObj_decomp'
53 55 goto finally;
54 56 }
55 57
58 /* Special case of empty input. Output will always be empty. */
59 if (source.len == 0) {
60 result = PyBytes_FromString("");
61 goto finally;
62 }
63
56 64 input.src = source.buf;
57 65 input.size = source.len;
58 66 input.pos = 0;
@@ -65,8 +73,7 b' static PyObject* DecompressionObj_decomp'
65 73 output.size = self->outSize;
66 74 output.pos = 0;
67 75
68 /* Read input until exhausted. */
69 while (input.pos < input.size) {
76 while (1) {
70 77 Py_BEGIN_ALLOW_THREADS
71 78 zresult = ZSTD_decompress_generic(self->decompressor->dctx, &output, &input);
72 79 Py_END_ALLOW_THREADS
@@ -98,9 +105,13 b' static PyObject* DecompressionObj_decomp'
98 105 goto except;
99 106 }
100 107 }
108 }
101 109
102 output.pos = 0;
110 if (zresult == 0 || (input.pos == input.size && output.pos == 0)) {
111 break;
103 112 }
113
114 output.pos = 0;
104 115 }
105 116
106 117 if (!result) {
@@ -575,6 +575,10 b' static ZstdDecompressionReader* Decompre'
575 575 return NULL;
576 576 }
577 577
578 if (ensure_dctx(self, 1)) {
579 return NULL;
580 }
581
578 582 result = (ZstdDecompressionReader*)PyObject_CallObject((PyObject*)&ZstdDecompressionReaderType, NULL);
579 583 if (NULL == result) {
580 584 return NULL;
@@ -15,7 +15,8 b''
15 15 #include <zstd.h>
16 16 #include <zdict.h>
17 17
18 #define PYTHON_ZSTANDARD_VERSION "0.9.0"
18 /* Remember to change the string in zstandard/__init__ as well */
19 #define PYTHON_ZSTANDARD_VERSION "0.10.1"
19 20
20 21 typedef enum {
21 22 compressorobj_flush_finish,
@@ -45,7 +46,6 b' typedef struct {'
45 46 unsigned threads;
46 47 unsigned jobSize;
47 48 unsigned overlapSizeLog;
48 unsigned compressLiterals;
49 49 unsigned forceMaxWindow;
50 50 unsigned enableLongDistanceMatching;
51 51 unsigned ldmHashLog;
@@ -162,7 +162,6 b' typedef struct {'
162 162 ZstdCompressor* compressor;
163 163 PyObject* reader;
164 164 Py_buffer buffer;
165 unsigned long long sourceSize;
166 165 size_t readSize;
167 166
168 167 int entered;
@@ -181,6 +180,34 b' extern PyTypeObject ZstdCompressionReade'
181 180 typedef struct {
182 181 PyObject_HEAD
183 182
183 ZstdCompressor* compressor;
184 ZSTD_inBuffer input;
185 ZSTD_outBuffer output;
186 Py_buffer inBuffer;
187 int finished;
188 size_t chunkSize;
189 } ZstdCompressionChunker;
190
191 extern PyTypeObject ZstdCompressionChunkerType;
192
193 typedef enum {
194 compressionchunker_mode_normal,
195 compressionchunker_mode_flush,
196 compressionchunker_mode_finish,
197 } CompressionChunkerMode;
198
199 typedef struct {
200 PyObject_HEAD
201
202 ZstdCompressionChunker* chunker;
203 CompressionChunkerMode mode;
204 } ZstdCompressionChunkerIterator;
205
206 extern PyTypeObject ZstdCompressionChunkerIteratorType;
207
208 typedef struct {
209 PyObject_HEAD
210
184 211 ZSTD_DCtx* dctx;
185 212 ZstdCompressionDict* dict;
186 213 size_t maxWindowSize;
@@ -17,6 +17,7 b' import tempfile'
17 17 HERE = os.path.abspath(os.path.dirname(__file__))
18 18
19 19 SOURCES = ['zstd/%s' % p for p in (
20 'common/debug.c',
20 21 'common/entropy_common.c',
21 22 'common/error_private.c',
22 23 'common/fse_decompress.c',
@@ -25,6 +26,7 b" SOURCES = ['zstd/%s' % p for p in ("
25 26 'common/xxhash.c',
26 27 'common/zstd_common.c',
27 28 'compress/fse_compress.c',
29 'compress/hist.c',
28 30 'compress/huf_compress.c',
29 31 'compress/zstd_compress.c',
30 32 'compress/zstd_double_fast.c',
@@ -36,6 +38,7 b" SOURCES = ['zstd/%s' % p for p in ("
36 38 'decompress/huf_decompress.c',
37 39 'decompress/zstd_decompress.c',
38 40 'dictBuilder/cover.c',
41 'dictBuilder/fastcover.c',
39 42 'dictBuilder/divsufsort.c',
40 43 'dictBuilder/zdict.c',
41 44 )]
@@ -6,12 +6,12 b''
6 6
7 7 import distutils.ccompiler
8 8 import os
9 import sys
10 9
11 10 from distutils.extension import Extension
12 11
13 12
14 13 zstd_sources = ['zstd/%s' % p for p in (
14 'common/debug.c',
15 15 'common/entropy_common.c',
16 16 'common/error_private.c',
17 17 'common/fse_decompress.c',
@@ -20,6 +20,7 b" zstd_sources = ['zstd/%s' % p for p in ("
20 20 'common/xxhash.c',
21 21 'common/zstd_common.c',
22 22 'compress/fse_compress.c',
23 'compress/hist.c',
23 24 'compress/huf_compress.c',
24 25 'compress/zstd_compress.c',
25 26 'compress/zstd_double_fast.c',
@@ -32,6 +33,7 b" zstd_sources = ['zstd/%s' % p for p in ("
32 33 'decompress/zstd_decompress.c',
33 34 'dictBuilder/cover.c',
34 35 'dictBuilder/divsufsort.c',
36 'dictBuilder/fastcover.c',
35 37 'dictBuilder/zdict.c',
36 38 )]
37 39
@@ -75,6 +77,7 b' ext_sources = ['
75 77 'c-ext/compressobj.c',
76 78 'c-ext/compressor.c',
77 79 'c-ext/compressoriterator.c',
80 'c-ext/compressionchunker.c',
78 81 'c-ext/compressionparams.c',
79 82 'c-ext/compressionreader.c',
80 83 'c-ext/compressionwriter.c',
@@ -93,25 +96,45 b' zstd_depends = ['
93 96
94 97
95 98 def get_c_extension(support_legacy=False, system_zstd=False, name='zstd',
96 warnings_as_errors=False):
97 """Obtain a distutils.extension.Extension for the C extension."""
98 root = os.path.abspath(os.path.dirname(__file__))
99 warnings_as_errors=False, root=None):
100 """Obtain a distutils.extension.Extension for the C extension.
101
102 ``support_legacy`` controls whether to compile in legacy zstd format support.
103
104 ``system_zstd`` controls whether to compile against the system zstd library.
105 For this to work, the system zstd library and headers must match what
106 python-zstandard is coded against exactly.
107
108 ``name`` is the module name of the C extension to produce.
109
110 ``warnings_as_errors`` controls whether compiler warnings are turned into
111 compiler errors.
99 112
100 sources = set([os.path.join(root, p) for p in ext_sources])
113 ``root`` defines a root path that source should be computed as relative
114 to. This should be the directory with the main ``setup.py`` that is
115 being invoked. If not defined, paths will be relative to this file.
116 """
117 actual_root = os.path.abspath(os.path.dirname(__file__))
118 root = root or actual_root
119
120 sources = set([os.path.join(actual_root, p) for p in ext_sources])
101 121 if not system_zstd:
102 sources.update([os.path.join(root, p) for p in zstd_sources])
122 sources.update([os.path.join(actual_root, p) for p in zstd_sources])
103 123 if support_legacy:
104 sources.update([os.path.join(root, p) for p in zstd_sources_legacy])
124 sources.update([os.path.join(actual_root, p)
125 for p in zstd_sources_legacy])
105 126 sources = list(sources)
106 127
107 include_dirs = set([os.path.join(root, d) for d in ext_includes])
128 include_dirs = set([os.path.join(actual_root, d) for d in ext_includes])
108 129 if not system_zstd:
109 include_dirs.update([os.path.join(root, d) for d in zstd_includes])
130 include_dirs.update([os.path.join(actual_root, d)
131 for d in zstd_includes])
110 132 if support_legacy:
111 include_dirs.update([os.path.join(root, d) for d in zstd_includes_legacy])
133 include_dirs.update([os.path.join(actual_root, d)
134 for d in zstd_includes_legacy])
112 135 include_dirs = list(include_dirs)
113 136
114 depends = [os.path.join(root, p) for p in zstd_depends]
137 depends = [os.path.join(actual_root, p) for p in zstd_depends]
115 138
116 139 compiler = distutils.ccompiler.new_compiler()
117 140
@@ -152,6 +175,11 b' def get_c_extension(support_legacy=False'
152 175
153 176 libraries = ['zstd'] if system_zstd else []
154 177
178 # Python 3.7 doesn't like absolute paths. So normalize to relative.
179 sources = [os.path.relpath(p, root) for p in sources]
180 include_dirs = [os.path.relpath(p, root) for p in include_dirs]
181 depends = [os.path.relpath(p, root) for p in depends]
182
155 183 # TODO compile with optimizations.
156 184 return Extension(name, sources,
157 185 include_dirs=include_dirs,
@@ -153,7 +153,7 b' class TestCompressor_compress(unittest.T'
153 153 no_params = zstd.get_frame_parameters(no_dict_id)
154 154 with_params = zstd.get_frame_parameters(with_dict_id)
155 155 self.assertEqual(no_params.dict_id, 0)
156 self.assertEqual(with_params.dict_id, 1387616518)
156 self.assertEqual(with_params.dict_id, 1880053135)
157 157
158 158 def test_compress_dict_multiple(self):
159 159 samples = []
@@ -216,7 +216,7 b' class TestCompressor_compress(unittest.T'
216 216 self.assertEqual(params.dict_id, d.dict_id())
217 217
218 218 self.assertEqual(result,
219 b'\x28\xb5\x2f\xfd\x23\x06\x59\xb5\x52\x03\x19\x00\x00'
219 b'\x28\xb5\x2f\xfd\x23\x8f\x55\x0f\x70\x03\x19\x00\x00'
220 220 b'\x66\x6f\x6f')
221 221
222 222 def test_multithreaded_compression_params(self):
@@ -336,7 +336,9 b' class TestCompressor_compressobj(unittes'
336 336 b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo')
337 337 self.assertEqual(cobj.compress(b'bar'), b'')
338 338 # 3 byte header plus content.
339 self.assertEqual(cobj.flush(), b'\x19\x00\x00bar')
339 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
340 b'\x18\x00\x00bar')
341 self.assertEqual(cobj.flush(), b'\x01\x00\x00')
340 342
341 343 def test_flush_empty_block(self):
342 344 cctx = zstd.ZstdCompressor(write_checksum=True)
@@ -576,15 +578,23 b' class TestCompressor_stream_reader(unitt'
576 578 def test_context_manager(self):
577 579 cctx = zstd.ZstdCompressor()
578 580
579 reader = cctx.stream_reader(b'foo' * 60)
580 with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'):
581 reader.read(10)
582
583 581 with cctx.stream_reader(b'foo') as reader:
584 582 with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'):
585 583 with reader as reader2:
586 584 pass
587 585
586 def test_no_context_manager(self):
587 cctx = zstd.ZstdCompressor()
588
589 reader = cctx.stream_reader(b'foo')
590 reader.read(4)
591 self.assertFalse(reader.closed)
592
593 reader.close()
594 self.assertTrue(reader.closed)
595 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
596 reader.read(1)
597
588 598 def test_not_implemented(self):
589 599 cctx = zstd.ZstdCompressor()
590 600
@@ -619,13 +629,18 b' class TestCompressor_stream_reader(unitt'
619 629 self.assertFalse(reader.writable())
620 630 self.assertFalse(reader.seekable())
621 631 self.assertFalse(reader.isatty())
632 self.assertFalse(reader.closed)
622 633 self.assertIsNone(reader.flush())
634 self.assertFalse(reader.closed)
635
636 self.assertTrue(reader.closed)
623 637
624 638 def test_read_closed(self):
625 639 cctx = zstd.ZstdCompressor()
626 640
627 641 with cctx.stream_reader(b'foo' * 60) as reader:
628 642 reader.close()
643 self.assertTrue(reader.closed)
629 644 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
630 645 reader.read(10)
631 646
@@ -715,7 +730,7 b' class TestCompressor_stream_reader(unitt'
715 730 while reader.read(8192):
716 731 pass
717 732
718 with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'):
733 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
719 734 reader.read(10)
720 735
721 736 def test_bad_size(self):
@@ -792,7 +807,7 b' class TestCompressor_stream_writer(unitt'
792 807 d = zstd.train_dictionary(8192, samples)
793 808
794 809 h = hashlib.sha1(d.as_bytes()).hexdigest()
795 self.assertEqual(h, '3040faa0ddc37d50e71a4dd28052cb8db5d9d027')
810 self.assertEqual(h, '2b3b6428da5bf2c9cc9d4bb58ba0bc5990dd0e79')
796 811
797 812 buffer = io.BytesIO()
798 813 cctx = zstd.ZstdCompressor(level=9, dict_data=d)
@@ -808,9 +823,16 b' class TestCompressor_stream_writer(unitt'
808 823 self.assertEqual(params.window_size, 2097152)
809 824 self.assertEqual(params.dict_id, d.dict_id())
810 825 self.assertFalse(params.has_checksum)
811 self.assertEqual(compressed,
812 b'\x28\xb5\x2f\xfd\x03\x58\x06\x59\xb5\x52\x5d\x00'
813 b'\x00\x00\x02\xfc\x3d\x3f\xd9\xb0\x51\x03\x45\x89')
826
827 h = hashlib.sha1(compressed).hexdigest()
828 self.assertEqual(h, '23f88344263678478f5f82298e0a5d1833125786')
829
830 source = b'foo' + b'bar' + (b'foo' * 16384)
831
832 dctx = zstd.ZstdDecompressor(dict_data=d)
833
834 self.assertEqual(dctx.decompress(compressed, max_output_size=len(source)),
835 source)
814 836
815 837 def test_compression_params(self):
816 838 params = zstd.ZstdCompressionParameters(
@@ -1157,6 +1179,181 b' class TestCompressor_read_to_iter(unitte'
1157 1179 b''.join(cctx.read_to_iter(source))
1158 1180
1159 1181
1182 @make_cffi
1183 class TestCompressor_chunker(unittest.TestCase):
1184 def test_empty(self):
1185 cctx = zstd.ZstdCompressor(write_content_size=False)
1186 chunker = cctx.chunker()
1187
1188 it = chunker.compress(b'')
1189
1190 with self.assertRaises(StopIteration):
1191 next(it)
1192
1193 it = chunker.finish()
1194
1195 self.assertEqual(next(it), b'\x28\xb5\x2f\xfd\x00\x50\x01\x00\x00')
1196
1197 with self.assertRaises(StopIteration):
1198 next(it)
1199
1200 def test_simple_input(self):
1201 cctx = zstd.ZstdCompressor()
1202 chunker = cctx.chunker()
1203
1204 it = chunker.compress(b'foobar')
1205
1206 with self.assertRaises(StopIteration):
1207 next(it)
1208
1209 it = chunker.compress(b'baz' * 30)
1210
1211 with self.assertRaises(StopIteration):
1212 next(it)
1213
1214 it = chunker.finish()
1215
1216 self.assertEqual(next(it),
1217 b'\x28\xb5\x2f\xfd\x00\x50\x7d\x00\x00\x48\x66\x6f'
1218 b'\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e')
1219
1220 with self.assertRaises(StopIteration):
1221 next(it)
1222
1223 def test_input_size(self):
1224 cctx = zstd.ZstdCompressor()
1225 chunker = cctx.chunker(size=1024)
1226
1227 it = chunker.compress(b'x' * 1000)
1228
1229 with self.assertRaises(StopIteration):
1230 next(it)
1231
1232 it = chunker.compress(b'y' * 24)
1233
1234 with self.assertRaises(StopIteration):
1235 next(it)
1236
1237 chunks = list(chunker.finish())
1238
1239 self.assertEqual(chunks, [
1240 b'\x28\xb5\x2f\xfd\x60\x00\x03\x65\x00\x00\x18\x78\x78\x79\x02\x00'
1241 b'\xa0\x16\xe3\x2b\x80\x05'
1242 ])
1243
1244 dctx = zstd.ZstdDecompressor()
1245
1246 self.assertEqual(dctx.decompress(b''.join(chunks)),
1247 (b'x' * 1000) + (b'y' * 24))
1248
1249 def test_small_chunk_size(self):
1250 cctx = zstd.ZstdCompressor()
1251 chunker = cctx.chunker(chunk_size=1)
1252
1253 chunks = list(chunker.compress(b'foo' * 1024))
1254 self.assertEqual(chunks, [])
1255
1256 chunks = list(chunker.finish())
1257 self.assertTrue(all(len(chunk) == 1 for chunk in chunks))
1258
1259 self.assertEqual(
1260 b''.join(chunks),
1261 b'\x28\xb5\x2f\xfd\x00\x50\x55\x00\x00\x18\x66\x6f\x6f\x01\x00'
1262 b'\xfa\xd3\x77\x43')
1263
1264 dctx = zstd.ZstdDecompressor()
1265 self.assertEqual(dctx.decompress(b''.join(chunks),
1266 max_output_size=10000),
1267 b'foo' * 1024)
1268
1269 def test_input_types(self):
1270 cctx = zstd.ZstdCompressor()
1271
1272 mutable_array = bytearray(3)
1273 mutable_array[:] = b'foo'
1274
1275 sources = [
1276 memoryview(b'foo'),
1277 bytearray(b'foo'),
1278 mutable_array,
1279 ]
1280
1281 for source in sources:
1282 chunker = cctx.chunker()
1283
1284 self.assertEqual(list(chunker.compress(source)), [])
1285 self.assertEqual(list(chunker.finish()), [
1286 b'\x28\xb5\x2f\xfd\x00\x50\x19\x00\x00\x66\x6f\x6f'
1287 ])
1288
1289 def test_flush(self):
1290 cctx = zstd.ZstdCompressor()
1291 chunker = cctx.chunker()
1292
1293 self.assertEqual(list(chunker.compress(b'foo' * 1024)), [])
1294 self.assertEqual(list(chunker.compress(b'bar' * 1024)), [])
1295
1296 chunks1 = list(chunker.flush())
1297
1298 self.assertEqual(chunks1, [
1299 b'\x28\xb5\x2f\xfd\x00\x50\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72'
1300 b'\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02'
1301 ])
1302
1303 self.assertEqual(list(chunker.flush()), [])
1304 self.assertEqual(list(chunker.flush()), [])
1305
1306 self.assertEqual(list(chunker.compress(b'baz' * 1024)), [])
1307
1308 chunks2 = list(chunker.flush())
1309 self.assertEqual(len(chunks2), 1)
1310
1311 chunks3 = list(chunker.finish())
1312 self.assertEqual(len(chunks2), 1)
1313
1314 dctx = zstd.ZstdDecompressor()
1315
1316 self.assertEqual(dctx.decompress(b''.join(chunks1 + chunks2 + chunks3),
1317 max_output_size=10000),
1318 (b'foo' * 1024) + (b'bar' * 1024) + (b'baz' * 1024))
1319
1320 def test_compress_after_finish(self):
1321 cctx = zstd.ZstdCompressor()
1322 chunker = cctx.chunker()
1323
1324 list(chunker.compress(b'foo'))
1325 list(chunker.finish())
1326
1327 with self.assertRaisesRegexp(
1328 zstd.ZstdError,
1329 'cannot call compress\(\) after compression finished'):
1330 list(chunker.compress(b'foo'))
1331
1332 def test_flush_after_finish(self):
1333 cctx = zstd.ZstdCompressor()
1334 chunker = cctx.chunker()
1335
1336 list(chunker.compress(b'foo'))
1337 list(chunker.finish())
1338
1339 with self.assertRaisesRegexp(
1340 zstd.ZstdError,
1341 'cannot call flush\(\) after compression finished'):
1342 list(chunker.flush())
1343
1344 def test_finish_after_finish(self):
1345 cctx = zstd.ZstdCompressor()
1346 chunker = cctx.chunker()
1347
1348 list(chunker.compress(b'foo'))
1349 list(chunker.finish())
1350
1351 with self.assertRaisesRegexp(
1352 zstd.ZstdError,
1353 'cannot call finish\(\) after compression finished'):
1354 list(chunker.finish())
1355
1356
1160 1357 class TestCompressor_multi_compress_to_buffer(unittest.TestCase):
1161 1358 def test_invalid_inputs(self):
1162 1359 cctx = zstd.ZstdCompressor()
@@ -135,6 +135,51 b' class TestCompressor_compressobj_fuzzing'
135 135
136 136 self.assertEqual(b''.join(chunks), ref_frame)
137 137
138 @hypothesis.settings(
139 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
140 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
141 level=strategies.integers(min_value=1, max_value=5),
142 chunk_sizes=strategies.data(),
143 flushes=strategies.data())
144 def test_flush_block(self, original, level, chunk_sizes, flushes):
145 cctx = zstd.ZstdCompressor(level=level)
146 cobj = cctx.compressobj()
147
148 dctx = zstd.ZstdDecompressor()
149 dobj = dctx.decompressobj()
150
151 compressed_chunks = []
152 decompressed_chunks = []
153 i = 0
154 while True:
155 input_size = chunk_sizes.draw(strategies.integers(1, 4096))
156 source = original[i:i + input_size]
157 if not source:
158 break
159
160 i += input_size
161
162 chunk = cobj.compress(source)
163 compressed_chunks.append(chunk)
164 decompressed_chunks.append(dobj.decompress(chunk))
165
166 if not flushes.draw(strategies.booleans()):
167 continue
168
169 chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
170 compressed_chunks.append(chunk)
171 decompressed_chunks.append(dobj.decompress(chunk))
172
173 self.assertEqual(b''.join(decompressed_chunks), original[0:i])
174
175 chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_FINISH)
176 compressed_chunks.append(chunk)
177 decompressed_chunks.append(dobj.decompress(chunk))
178
179 self.assertEqual(dctx.decompress(b''.join(compressed_chunks),
180 max_output_size=len(original)),
181 original)
182 self.assertEqual(b''.join(decompressed_chunks), original)
138 183
139 184 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
140 185 @make_cffi
@@ -186,3 +231,90 b' class TestCompressor_multi_compress_to_b'
186 231
187 232 for i, frame in enumerate(result):
188 233 self.assertEqual(dctx.decompress(frame), original[i])
234
235
236 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
237 @make_cffi
238 class TestCompressor_chunker_fuzzing(unittest.TestCase):
239 @hypothesis.settings(
240 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
241 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
242 level=strategies.integers(min_value=1, max_value=5),
243 chunk_size=strategies.integers(
244 min_value=1,
245 max_value=32 * 1048576),
246 input_sizes=strategies.data())
247 def test_random_input_sizes(self, original, level, chunk_size, input_sizes):
248 cctx = zstd.ZstdCompressor(level=level)
249 chunker = cctx.chunker(chunk_size=chunk_size)
250
251 chunks = []
252 i = 0
253 while True:
254 input_size = input_sizes.draw(strategies.integers(1, 4096))
255 source = original[i:i + input_size]
256 if not source:
257 break
258
259 chunks.extend(chunker.compress(source))
260 i += input_size
261
262 chunks.extend(chunker.finish())
263
264 dctx = zstd.ZstdDecompressor()
265
266 self.assertEqual(dctx.decompress(b''.join(chunks),
267 max_output_size=len(original)),
268 original)
269
270 self.assertTrue(all(len(chunk) == chunk_size for chunk in chunks[:-1]))
271
272 @hypothesis.settings(
273 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
274 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
275 level=strategies.integers(min_value=1, max_value=5),
276 chunk_size=strategies.integers(
277 min_value=1,
278 max_value=32 * 1048576),
279 input_sizes=strategies.data(),
280 flushes=strategies.data())
281 def test_flush_block(self, original, level, chunk_size, input_sizes,
282 flushes):
283 cctx = zstd.ZstdCompressor(level=level)
284 chunker = cctx.chunker(chunk_size=chunk_size)
285
286 dctx = zstd.ZstdDecompressor()
287 dobj = dctx.decompressobj()
288
289 compressed_chunks = []
290 decompressed_chunks = []
291 i = 0
292 while True:
293 input_size = input_sizes.draw(strategies.integers(1, 4096))
294 source = original[i:i + input_size]
295 if not source:
296 break
297
298 i += input_size
299
300 chunks = list(chunker.compress(source))
301 compressed_chunks.extend(chunks)
302 decompressed_chunks.append(dobj.decompress(b''.join(chunks)))
303
304 if not flushes.draw(strategies.booleans()):
305 continue
306
307 chunks = list(chunker.flush())
308 compressed_chunks.extend(chunks)
309 decompressed_chunks.append(dobj.decompress(b''.join(chunks)))
310
311 self.assertEqual(b''.join(decompressed_chunks), original[0:i])
312
313 chunks = list(chunker.finish())
314 compressed_chunks.extend(chunks)
315 decompressed_chunks.append(dobj.decompress(b''.join(chunks)))
316
317 self.assertEqual(dctx.decompress(b''.join(compressed_chunks),
318 max_output_size=len(original)),
319 original)
320 self.assertEqual(b''.join(decompressed_chunks), original) No newline at end of file
@@ -24,6 +24,7 b' class TestCompressionParameters(unittest'
24 24 hash_log=zstd.HASHLOG_MAX,
25 25 search_log=zstd.SEARCHLOG_MAX,
26 26 min_match=zstd.SEARCHLENGTH_MAX - 1,
27 target_length=zstd.TARGETLENGTH_MAX,
27 28 compression_strategy=zstd.STRATEGY_BTULTRA)
28 29
29 30 def test_from_level(self):
@@ -34,7 +35,6 b' class TestCompressionParameters(unittest'
34 35
35 36 p = zstd.ZstdCompressionParameters.from_level(-4)
36 37 self.assertEqual(p.window_log, 19)
37 self.assertEqual(p.compress_literals, 0)
38 38
39 39 def test_members(self):
40 40 p = zstd.ZstdCompressionParameters(window_log=10,
@@ -64,19 +64,11 b' class TestCompressionParameters(unittest'
64 64 self.assertEqual(p.job_size, 1048576)
65 65 self.assertEqual(p.overlap_size_log, 6)
66 66
67 p = zstd.ZstdCompressionParameters(compression_level=2)
68 self.assertEqual(p.compress_literals, 1)
69
70 p = zstd.ZstdCompressionParameters(compress_literals=False)
71 self.assertEqual(p.compress_literals, 0)
72
73 67 p = zstd.ZstdCompressionParameters(compression_level=-1)
74 68 self.assertEqual(p.compression_level, -1)
75 self.assertEqual(p.compress_literals, 0)
76 69
77 p = zstd.ZstdCompressionParameters(compression_level=-2, compress_literals=True)
70 p = zstd.ZstdCompressionParameters(compression_level=-2)
78 71 self.assertEqual(p.compression_level, -2)
79 self.assertEqual(p.compress_literals, 1)
80 72
81 73 p = zstd.ZstdCompressionParameters(force_max_window=True)
82 74 self.assertEqual(p.force_max_window, 1)
@@ -27,7 +27,7 b' s_searchlog = strategies.integers(min_va'
27 27 s_searchlength = strategies.integers(min_value=zstd.SEARCHLENGTH_MIN,
28 28 max_value=zstd.SEARCHLENGTH_MAX)
29 29 s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN,
30 max_value=2**32)
30 max_value=zstd.TARGETLENGTH_MAX)
31 31 s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST,
32 32 zstd.STRATEGY_DFAST,
33 33 zstd.STRATEGY_GREEDY,
@@ -293,10 +293,6 b' class TestDecompressor_stream_reader(uni'
293 293 def test_context_manager(self):
294 294 dctx = zstd.ZstdDecompressor()
295 295
296 reader = dctx.stream_reader(b'foo')
297 with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'):
298 reader.read(1)
299
300 296 with dctx.stream_reader(b'foo') as reader:
301 297 with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'):
302 298 with reader as reader2:
@@ -331,17 +327,23 b' class TestDecompressor_stream_reader(uni'
331 327 dctx = zstd.ZstdDecompressor()
332 328
333 329 with dctx.stream_reader(b'foo') as reader:
330 self.assertFalse(reader.closed)
334 331 self.assertTrue(reader.readable())
335 332 self.assertFalse(reader.writable())
336 333 self.assertTrue(reader.seekable())
337 334 self.assertFalse(reader.isatty())
335 self.assertFalse(reader.closed)
338 336 self.assertIsNone(reader.flush())
337 self.assertFalse(reader.closed)
338
339 self.assertTrue(reader.closed)
339 340
340 341 def test_read_closed(self):
341 342 dctx = zstd.ZstdDecompressor()
342 343
343 344 with dctx.stream_reader(b'foo') as reader:
344 345 reader.close()
346 self.assertTrue(reader.closed)
345 347 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
346 348 reader.read(1)
347 349
@@ -372,10 +374,10 b' class TestDecompressor_stream_reader(uni'
372 374 self.assertEqual(reader.tell(), len(source))
373 375
374 376 # Read after EOF should return empty bytes.
375 self.assertEqual(reader.read(), b'')
377 self.assertEqual(reader.read(1), b'')
376 378 self.assertEqual(reader.tell(), len(result))
377 379
378 self.assertTrue(reader.closed())
380 self.assertTrue(reader.closed)
379 381
380 382 def test_read_buffer_small_chunks(self):
381 383 cctx = zstd.ZstdCompressor()
@@ -408,8 +410,11 b' class TestDecompressor_stream_reader(uni'
408 410 chunk = reader.read(8192)
409 411 self.assertEqual(chunk, source)
410 412 self.assertEqual(reader.tell(), len(source))
411 self.assertEqual(reader.read(), b'')
413 self.assertEqual(reader.read(1), b'')
412 414 self.assertEqual(reader.tell(), len(source))
415 self.assertFalse(reader.closed)
416
417 self.assertTrue(reader.closed)
413 418
414 419 def test_read_stream_small_chunks(self):
415 420 cctx = zstd.ZstdCompressor()
@@ -440,7 +445,9 b' class TestDecompressor_stream_reader(uni'
440 445 while reader.read(16):
441 446 pass
442 447
443 with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'):
448 self.assertTrue(reader.closed)
449
450 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
444 451 reader.read(10)
445 452
446 453 def test_illegal_seeks(self):
@@ -474,8 +481,7 b' class TestDecompressor_stream_reader(uni'
474 481 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
475 482 reader.seek(4, os.SEEK_SET)
476 483
477 with self.assertRaisesRegexp(
478 zstd.ZstdError, 'seek\(\) must be called from an active context'):
484 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
479 485 reader.seek(0)
480 486
481 487 def test_seek(self):
@@ -492,6 +498,39 b' class TestDecompressor_stream_reader(uni'
492 498 reader.seek(4, os.SEEK_CUR)
493 499 self.assertEqual(reader.read(2), b'ar')
494 500
501 def test_no_context_manager(self):
502 source = b'foobar' * 60
503 cctx = zstd.ZstdCompressor()
504 frame = cctx.compress(source)
505
506 dctx = zstd.ZstdDecompressor()
507 reader = dctx.stream_reader(frame)
508
509 self.assertEqual(reader.read(6), b'foobar')
510 self.assertEqual(reader.read(18), b'foobar' * 3)
511 self.assertFalse(reader.closed)
512
513 # Calling close prevents subsequent use.
514 reader.close()
515 self.assertTrue(reader.closed)
516
517 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
518 reader.read(6)
519
520 def test_read_after_error(self):
521 source = io.BytesIO(b'')
522 dctx = zstd.ZstdDecompressor()
523
524 reader = dctx.stream_reader(source)
525
526 with reader:
527 with self.assertRaises(TypeError):
528 reader.read()
529
530 with reader:
531 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
532 reader.read(100)
533
495 534
496 535 @make_cffi
497 536 class TestDecompressor_decompressobj(unittest.TestCase):
@@ -12,7 +12,9 b' from . common import ('
12 12 @make_cffi
13 13 class TestModuleAttributes(unittest.TestCase):
14 14 def test_version(self):
15 self.assertEqual(zstd.ZSTD_VERSION, (1, 3, 4))
15 self.assertEqual(zstd.ZSTD_VERSION, (1, 3, 6))
16
17 self.assertEqual(zstd.__version__, '0.10.1')
16 18
17 19 def test_constants(self):
18 20 self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22)
@@ -27,6 +29,8 b' class TestModuleAttributes(unittest.Test'
27 29 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE',
28 30 'DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE',
29 31 'MAGIC_NUMBER',
32 'BLOCKSIZELOG_MAX',
33 'BLOCKSIZE_MAX',
30 34 'WINDOWLOG_MIN',
31 35 'WINDOWLOG_MAX',
32 36 'CHAINLOG_MIN',
@@ -39,6 +43,7 b' class TestModuleAttributes(unittest.Test'
39 43 'SEARCHLENGTH_MIN',
40 44 'SEARCHLENGTH_MAX',
41 45 'TARGETLENGTH_MIN',
46 'TARGETLENGTH_MAX',
42 47 'LDM_MINMATCH_MIN',
43 48 'LDM_MINMATCH_MAX',
44 49 'LDM_BUCKETSIZELOG_MAX',
@@ -57,7 +57,8 b' class TestTrainDictionary(unittest.TestC'
57 57 d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1,
58 58 d=16)
59 59
60 self.assertEqual(d.k, 50)
60 # This varies by platform.
61 self.assertIn(d.k, (50, 2000))
61 62 self.assertEqual(d.d, 16)
62 63
63 64 @make_cffi
@@ -60,3 +60,6 b" elif _module_policy == 'cffi':"
60 60 else:
61 61 raise ImportError('unknown module import policy: %s; use default, cffi_fallback, '
62 62 'cext, or cffi' % _module_policy)
63
64 # Keep this in sync with python-zstandard.h.
65 __version__ = '0.10.1'
@@ -182,6 +182,7 b' void compressobj_module_init(PyObject* m'
182 182 void compressor_module_init(PyObject* mod);
183 183 void compressionparams_module_init(PyObject* mod);
184 184 void constants_module_init(PyObject* mod);
185 void compressionchunker_module_init(PyObject* mod);
185 186 void compressiondict_module_init(PyObject* mod);
186 187 void compressionreader_module_init(PyObject* mod);
187 188 void compressionwriter_module_init(PyObject* mod);
@@ -209,7 +210,7 b' void zstd_module_init(PyObject* m) {'
209 210 We detect this mismatch here and refuse to load the module if this
210 211 scenario is detected.
211 212 */
212 if (ZSTD_VERSION_NUMBER != 10304 || ZSTD_versionNumber() != 10304) {
213 if (ZSTD_VERSION_NUMBER != 10306 || ZSTD_versionNumber() != 10306) {
213 214 PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
214 215 return;
215 216 }
@@ -219,6 +220,7 b' void zstd_module_init(PyObject* m) {'
219 220 compressiondict_module_init(m);
220 221 compressobj_module_init(m);
221 222 compressor_module_init(m);
223 compressionchunker_module_init(m);
222 224 compressionreader_module_init(m);
223 225 compressionwriter_module_init(m);
224 226 compressoriterator_module_init(m);
@@ -1,8 +1,7 b''
1 1 /* ******************************************************************
2 2 bitstream
3 3 Part of FSE library
4 header file (to include)
5 Copyright (C) 2013-2017, Yann Collet.
4 Copyright (C) 2013-present, Yann Collet.
6 5
7 6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
8 7
@@ -49,21 +48,10 b' extern "C" {'
49 48 * Dependencies
50 49 ******************************************/
51 50 #include "mem.h" /* unaligned access routines */
51 #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
52 52 #include "error_private.h" /* error codes and messages */
53 53
54 54
55 /*-*************************************
56 * Debug
57 ***************************************/
58 #if defined(BIT_DEBUG) && (BIT_DEBUG>=1)
59 # include <assert.h>
60 #else
61 # ifndef assert
62 # define assert(condition) ((void)0)
63 # endif
64 #endif
65
66
67 55 /*=========================================
68 56 * Target specific
69 57 =========================================*/
@@ -83,8 +71,7 b' extern "C" {'
83 71 * A critical property of these streams is that they encode and decode in **reverse** direction.
84 72 * So the first bit sequence you add will be the last to be read, like a LIFO stack.
85 73 */
86 typedef struct
87 {
74 typedef struct {
88 75 size_t bitContainer;
89 76 unsigned bitPos;
90 77 char* startPtr;
@@ -118,8 +105,7 b' MEM_STATIC size_t BIT_closeCStream(BIT_C'
118 105 /*-********************************************
119 106 * bitStream decoding API (read backward)
120 107 **********************************************/
121 typedef struct
122 {
108 typedef struct {
123 109 size_t bitContainer;
124 110 unsigned bitsConsumed;
125 111 const char* ptr;
@@ -236,7 +222,8 b' MEM_STATIC void BIT_addBits(BIT_CStream_'
236 222 }
237 223
238 224 /*! BIT_addBitsFast() :
239 * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
225 * works only if `value` is _clean_,
226 * meaning all high bits above nbBits are 0 */
240 227 MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
241 228 size_t value, unsigned nbBits)
242 229 {
@@ -77,9 +77,9 b''
77 77 * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
78 78 */
79 79 #ifndef DYNAMIC_BMI2
80 #if (defined(__clang__) && __has_attribute(__target__)) \
80 #if ((defined(__clang__) && __has_attribute(__target__)) \
81 81 || (defined(__GNUC__) \
82 && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) \
82 && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
83 83 && (defined(__x86_64__) || defined(_M_X86)) \
84 84 && !defined(__BMI2__)
85 85 # define DYNAMIC_BMI2 1
@@ -88,15 +88,37 b''
88 88 #endif
89 89 #endif
90 90
91 /* prefetch */
92 #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
93 # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
94 # define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0)
95 #elif defined(__GNUC__)
96 # define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0)
91 /* prefetch
92 * can be disabled, by declaring NO_PREFETCH macro
93 * All prefetch invocations use a single default locality 2,
94 * generating instruction prefetcht1,
95 * which, according to Intel, means "load data into L2 cache".
96 * This is a good enough "middle ground" for the time being,
97 * though in theory, it would be better to specialize locality depending on data being prefetched.
98 * Tests could not determine any sensible difference based on locality value. */
99 #if defined(NO_PREFETCH)
100 # define PREFETCH(ptr) (void)(ptr) /* disabled */
97 101 #else
98 # define PREFETCH(ptr) /* disabled */
99 #endif
102 # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
103 # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
104 # define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
105 # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
106 # define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
107 # else
108 # define PREFETCH(ptr) (void)(ptr) /* disabled */
109 # endif
110 #endif /* NO_PREFETCH */
111
112 #define CACHELINE_SIZE 64
113
114 #define PREFETCH_AREA(p, s) { \
115 const char* const _ptr = (const char*)(p); \
116 size_t const _size = (size_t)(s); \
117 size_t _pos; \
118 for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
119 PREFETCH(_ptr + _pos); \
120 } \
121 }
100 122
101 123 /* disable warnings */
102 124 #ifdef _MSC_VER /* Visual Studio */
@@ -36,7 +36,7 b' MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void)'
36 36 U32 f1d = 0;
37 37 U32 f7b = 0;
38 38 U32 f7c = 0;
39 #ifdef _MSC_VER
39 #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
40 40 int reg[4];
41 41 __cpuid((int*)reg, 0);
42 42 {
@@ -72,8 +72,7 b' MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void)'
72 72 "cpuid\n\t"
73 73 "popl %%ebx\n\t"
74 74 : "=a"(f1a), "=c"(f1c), "=d"(f1d)
75 : "a"(1)
76 :);
75 : "a"(1));
77 76 }
78 77 if (n >= 7) {
79 78 __asm__(
@@ -72,7 +72,21 b' size_t FSE_readNCount (short* normalized'
72 72 unsigned charnum = 0;
73 73 int previous0 = 0;
74 74
75 if (hbSize < 4) return ERROR(srcSize_wrong);
75 if (hbSize < 4) {
76 /* This function only works when hbSize >= 4 */
77 char buffer[4];
78 memset(buffer, 0, sizeof(buffer));
79 memcpy(buffer, headerBuffer, hbSize);
80 { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
81 buffer, sizeof(buffer));
82 if (FSE_isError(countSize)) return countSize;
83 if (countSize > hbSize) return ERROR(corruption_detected);
84 return countSize;
85 } }
86 assert(hbSize >= 4);
87
88 /* init */
89 memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */
76 90 bitStream = MEM_readLE32(ip);
77 91 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
78 92 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
@@ -105,6 +119,7 b' size_t FSE_readNCount (short* normalized'
105 119 if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
106 120 while (charnum < n0) normalizedCounter[charnum++] = 0;
107 121 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
122 assert((bitCount >> 3) <= 3); /* For first condition to work */
108 123 ip += bitCount>>3;
109 124 bitCount &= 7;
110 125 bitStream = MEM_readLE32(ip) >> bitCount;
@@ -72,6 +72,7 b' extern "C" {'
72 72 #define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE)
73 73 FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
74 74
75
75 76 /*-****************************************
76 77 * FSE simple functions
77 78 ******************************************/
@@ -129,7 +130,7 b' FSE_PUBLIC_API size_t FSE_compress2 (voi'
129 130 ******************************************/
130 131 /*!
131 132 FSE_compress() does the following:
132 1. count symbol occurrence from source[] into table count[]
133 1. count symbol occurrence from source[] into table count[] (see hist.h)
133 134 2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
134 135 3. save normalized counters to memory buffer using writeNCount()
135 136 4. build encoding table 'CTable' from normalized counters
@@ -147,15 +148,6 b' or to save and provide normalized distri'
147 148
148 149 /* *** COMPRESSION *** */
149 150
150 /*! FSE_count():
151 Provides the precise count of each byte within a table 'count'.
152 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
153 *maxSymbolValuePtr will be updated if detected smaller than initial value.
154 @return : the count of the most frequent symbol (which is not identified).
155 if return == srcSize, there is only one symbol.
156 Can also return an error code, which can be tested with FSE_isError(). */
157 FSE_PUBLIC_API size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
158
159 151 /*! FSE_optimalTableLog():
160 152 dynamically downsize 'tableLog' when conditions are met.
161 153 It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
@@ -167,7 +159,8 b' FSE_PUBLIC_API unsigned FSE_optimalTable'
167 159 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
168 160 @return : tableLog,
169 161 or an errorCode, which can be tested using FSE_isError() */
170 FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
162 FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
163 const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
171 164
172 165 /*! FSE_NCountWriteBound():
173 166 Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
@@ -178,8 +171,9 b' FSE_PUBLIC_API size_t FSE_NCountWriteBou'
178 171 Compactly save 'normalizedCounter' into 'buffer'.
179 172 @return : size of the compressed table,
180 173 or an errorCode, which can be tested using FSE_isError(). */
181 FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
182
174 FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
175 const short* normalizedCounter,
176 unsigned maxSymbolValue, unsigned tableLog);
183 177
184 178 /*! Constructor and Destructor of FSE_CTable.
185 179 Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
@@ -250,7 +244,9 b' If there is an error, the function will '
250 244 @return : size read from 'rBuffer',
251 245 or an errorCode, which can be tested using FSE_isError().
252 246 maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
253 FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
247 FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
248 unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
249 const void* rBuffer, size_t rBuffSize);
254 250
255 251 /*! Constructor and Destructor of FSE_DTable.
256 252 Note that its size depends on 'tableLog' */
@@ -325,33 +321,8 b' If there is an error, the function will '
325 321
326 322
327 323 /* *****************************************
328 * FSE advanced API
329 *******************************************/
330 /* FSE_count_wksp() :
331 * Same as FSE_count(), but using an externally provided scratch buffer.
332 * `workSpace` size must be table of >= `1024` unsigned
333 */
334 size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
335 const void* source, size_t sourceSize, unsigned* workSpace);
336
337 /** FSE_countFast() :
338 * same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr
339 */
340 size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
341
342 /* FSE_countFast_wksp() :
343 * Same as FSE_countFast(), but using an externally provided scratch buffer.
344 * `workSpace` must be a table of minimum `1024` unsigned
345 */
346 size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace);
347
348 /*! FSE_count_simple() :
349 * Same as FSE_countFast(), but does not use any additional memory (not even on stack).
350 * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`).
351 */
352 size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
353
354
324 * FSE advanced API
325 ***************************************** */
355 326
356 327 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
357 328 /**< same as FSE_optimalTableLog(), which used `minus==2` */
@@ -576,6 +547,39 b' MEM_STATIC void FSE_flushCState(BIT_CStr'
576 547 }
577 548
578 549
550 /* FSE_getMaxNbBits() :
551 * Approximate maximum cost of a symbol, in bits.
552 * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
553 * note 1 : assume symbolValue is valid (<= maxSymbolValue)
554 * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
555 MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
556 {
557 const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
558 return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16;
559 }
560
561 /* FSE_bitCost() :
562 * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits)
563 * note 1 : assume symbolValue is valid (<= maxSymbolValue)
564 * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
565 MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog)
566 {
567 const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
568 U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16;
569 U32 const threshold = (minNbBits+1) << 16;
570 assert(tableLog < 16);
571 assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */
572 { U32 const tableSize = 1 << tableLog;
573 U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize);
574 U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */
575 U32 const bitMultiplier = 1 << accuracyLog;
576 assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold);
577 assert(normalizedDeltaFromThreshold <= bitMultiplier);
578 return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold;
579 }
580 }
581
582
579 583 /* ====== Decompression ====== */
580 584
581 585 typedef struct {
@@ -49,7 +49,7 b''
49 49 * Error Management
50 50 ****************************************************************/
51 51 #define FSE_isError ERR_isError
52 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
52 #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
53 53
54 54 /* check and forward error code */
55 55 #define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; }
@@ -1,7 +1,7 b''
1 1 /* ******************************************************************
2 Huffman coder, part of New Generation Entropy library
3 header file
4 Copyright (C) 2013-2016, Yann Collet.
2 huff0 huffman codec,
3 part of Finite State Entropy library
4 Copyright (C) 2013-present, Yann Collet.
5 5
6 6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7 7
@@ -163,25 +163,25 b' HUF_PUBLIC_API size_t HUF_compress4X_wks'
163 163 /* static allocation of HUF's DTable */
164 164 typedef U32 HUF_DTable;
165 165 #define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog)))
166 #define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
166 #define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \
167 167 HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) }
168 #define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
168 #define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
169 169 HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) }
170 170
171 171
172 172 /* ****************************************
173 173 * Advanced decompression functions
174 174 ******************************************/
175 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
176 size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
175 size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
176 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
177 177
178 178 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */
179 179 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
180 180 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */
181 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
182 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
183 size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
184 size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
181 size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
182 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
183 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
184 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
185 185
186 186
187 187 /* ****************************************
@@ -208,7 +208,7 b' size_t HUF_compress4X_usingCTable(void* '
208 208 typedef enum {
209 209 HUF_repeat_none, /**< Cannot use the previous table */
210 210 HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
211 HUF_repeat_valid /**< Can use the previous table and it is asumed to be valid */
211 HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */
212 212 } HUF_repeat;
213 213 /** HUF_compress4X_repeat() :
214 214 * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
@@ -227,7 +227,9 b' size_t HUF_compress4X_repeat(void* dst, '
227 227 */
228 228 #define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
229 229 #define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
230 size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize);
230 size_t HUF_buildCTable_wksp (HUF_CElt* tree,
231 const U32* count, U32 maxSymbolValue, U32 maxNbBits,
232 void* workSpace, size_t wkspSize);
231 233
232 234 /*! HUF_readStats() :
233 235 * Read compact Huffman tree, saved by HUF_writeCTable().
@@ -242,10 +244,15 b' size_t HUF_readStats(BYTE* huffWeight, s'
242 244 * Loading a CTable saved with HUF_writeCTable() */
243 245 size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
244 246
247 /** HUF_getNbBits() :
248 * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
249 * Note 1 : is not inlined, as HUF_CElt definition is private
250 * Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */
251 U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue);
245 252
246 253 /*
247 254 * HUF_decompress() does the following:
248 * 1. select the decompression algorithm (X2, X4) based on pre-computed heuristics
255 * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics
249 256 * 2. build Huffman table from save, using HUF_readDTableX?()
250 257 * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable()
251 258 */
@@ -253,13 +260,13 b' size_t HUF_readCTable (HUF_CElt* CTable,'
253 260 /** HUF_selectDecoder() :
254 261 * Tells which decoder is likely to decode faster,
255 262 * based on a set of pre-computed metrics.
256 * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
263 * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
257 264 * Assumption : 0 < dstSize <= 128 KB */
258 265 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
259 266
260 267 /**
261 268 * The minimum workspace size for the `workSpace` used in
262 * HUF_readDTableX2_wksp() and HUF_readDTableX4_wksp().
269 * HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp().
263 270 *
264 271 * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
265 272 * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
@@ -270,14 +277,14 b' U32 HUF_selectDecoder (size_t dstSize, s'
270 277 #define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
271 278 #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
272 279
280 size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize);
281 size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
273 282 size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
274 283 size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
275 size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize);
276 size_t HUF_readDTableX4_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
277 284
278 285 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
286 size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
279 287 size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
280 size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
281 288
282 289
283 290 /* ====================== */
@@ -298,25 +305,25 b' size_t HUF_compress1X_repeat(void* dst, '
298 305 void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
299 306 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
300 307
301 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
302 size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
308 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
309 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
303 310
304 311 size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
305 312 size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
306 size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
307 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
308 size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
309 size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
313 size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
314 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
315 size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
316 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
310 317
311 318 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */
319 size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
312 320 size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
313 size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
314 321
315 322 /* BMI2 variants.
316 323 * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
317 324 */
318 325 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
319 size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
326 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
320 327 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
321 328 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
322 329
@@ -57,11 +57,23 b' MEM_STATIC void MEM_check(void) { MEM_ST'
57 57 typedef uint64_t U64;
58 58 typedef int64_t S64;
59 59 #else
60 # include <limits.h>
61 #if CHAR_BIT != 8
62 # error "this implementation requires char to be exactly 8-bit type"
63 #endif
60 64 typedef unsigned char BYTE;
65 #if USHRT_MAX != 65535
66 # error "this implementation requires short to be exactly 16-bit type"
67 #endif
61 68 typedef unsigned short U16;
62 69 typedef signed short S16;
70 #if UINT_MAX != 4294967295
71 # error "this implementation requires int to be exactly 32-bit type"
72 #endif
63 73 typedef unsigned int U32;
64 74 typedef signed int S32;
75 /* note : there are no limits defined for long long type in C90.
76 * limits exist in C99, however, in such case, <stdint.h> is preferred */
65 77 typedef unsigned long long U64;
66 78 typedef signed long long S64;
67 79 #endif
@@ -10,9 +10,10 b''
10 10
11 11
12 12 /* ====== Dependencies ======= */
13 #include <stddef.h> /* size_t */
13 #include <stddef.h> /* size_t */
14 #include "debug.h" /* assert */
15 #include "zstd_internal.h" /* ZSTD_malloc, ZSTD_free */
14 16 #include "pool.h"
15 #include "zstd_internal.h" /* ZSTD_malloc, ZSTD_free */
16 17
17 18 /* ====== Compiler specifics ====== */
18 19 #if defined(_MSC_VER)
@@ -33,8 +34,9 b' typedef struct POOL_job_s {'
33 34 struct POOL_ctx_s {
34 35 ZSTD_customMem customMem;
35 36 /* Keep track of the threads */
36 ZSTD_pthread_t *threads;
37 size_t numThreads;
37 ZSTD_pthread_t* threads;
38 size_t threadCapacity;
39 size_t threadLimit;
38 40
39 41 /* The queue is a circular buffer */
40 42 POOL_job *queue;
@@ -58,10 +60,10 b' struct POOL_ctx_s {'
58 60 };
59 61
60 62 /* POOL_thread() :
61 Work thread for the thread pool.
62 Waits for jobs and executes them.
63 @returns : NULL on failure else non-null.
64 */
63 * Work thread for the thread pool.
64 * Waits for jobs and executes them.
65 * @returns : NULL on failure else non-null.
66 */
65 67 static void* POOL_thread(void* opaque) {
66 68 POOL_ctx* const ctx = (POOL_ctx*)opaque;
67 69 if (!ctx) { return NULL; }
@@ -69,14 +71,17 b' static void* POOL_thread(void* opaque) {'
69 71 /* Lock the mutex and wait for a non-empty queue or until shutdown */
70 72 ZSTD_pthread_mutex_lock(&ctx->queueMutex);
71 73
72 while (ctx->queueEmpty && !ctx->shutdown) {
74 while ( ctx->queueEmpty
75 || (ctx->numThreadsBusy >= ctx->threadLimit) ) {
76 if (ctx->shutdown) {
77 /* even if !queueEmpty, (possible if numThreadsBusy >= threadLimit),
78 * a few threads will be shutdown while !queueEmpty,
79 * but enough threads will remain active to finish the queue */
80 ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
81 return opaque;
82 }
73 83 ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
74 84 }
75 /* empty => shutting down: so stop */
76 if (ctx->queueEmpty) {
77 ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
78 return opaque;
79 }
80 85 /* Pop a job off the queue */
81 86 { POOL_job const job = ctx->queue[ctx->queueHead];
82 87 ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
@@ -89,30 +94,32 b' static void* POOL_thread(void* opaque) {'
89 94 job.function(job.opaque);
90 95
91 96 /* If the intended queue size was 0, signal after finishing job */
97 ZSTD_pthread_mutex_lock(&ctx->queueMutex);
98 ctx->numThreadsBusy--;
92 99 if (ctx->queueSize == 1) {
93 ZSTD_pthread_mutex_lock(&ctx->queueMutex);
94 ctx->numThreadsBusy--;
95 ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
96 100 ZSTD_pthread_cond_signal(&ctx->queuePushCond);
97 } }
101 }
102 ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
103 }
98 104 } /* for (;;) */
99 /* Unreachable */
105 assert(0); /* Unreachable */
100 106 }
101 107
102 108 POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
103 109 return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
104 110 }
105 111
106 POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) {
112 POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
113 ZSTD_customMem customMem) {
107 114 POOL_ctx* ctx;
108 /* Check the parameters */
115 /* Check parameters */
109 116 if (!numThreads) { return NULL; }
110 117 /* Allocate the context and zero initialize */
111 118 ctx = (POOL_ctx*)ZSTD_calloc(sizeof(POOL_ctx), customMem);
112 119 if (!ctx) { return NULL; }
113 120 /* Initialize the job queue.
114 * It needs one extra space since one space is wasted to differentiate empty
115 * and full queues.
121 * It needs one extra space since one space is wasted to differentiate
122 * empty and full queues.
116 123 */
117 124 ctx->queueSize = queueSize + 1;
118 125 ctx->queue = (POOL_job*)ZSTD_malloc(ctx->queueSize * sizeof(POOL_job), customMem);
@@ -126,7 +133,7 b' POOL_ctx* POOL_create_advanced(size_t nu'
126 133 ctx->shutdown = 0;
127 134 /* Allocate space for the thread handles */
128 135 ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
129 ctx->numThreads = 0;
136 ctx->threadCapacity = 0;
130 137 ctx->customMem = customMem;
131 138 /* Check for errors */
132 139 if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; }
@@ -134,11 +141,12 b' POOL_ctx* POOL_create_advanced(size_t nu'
134 141 { size_t i;
135 142 for (i = 0; i < numThreads; ++i) {
136 143 if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) {
137 ctx->numThreads = i;
144 ctx->threadCapacity = i;
138 145 POOL_free(ctx);
139 146 return NULL;
140 147 } }
141 ctx->numThreads = numThreads;
148 ctx->threadCapacity = numThreads;
149 ctx->threadLimit = numThreads;
142 150 }
143 151 return ctx;
144 152 }
@@ -156,8 +164,8 b' static void POOL_join(POOL_ctx* ctx) {'
156 164 ZSTD_pthread_cond_broadcast(&ctx->queuePopCond);
157 165 /* Join all of the threads */
158 166 { size_t i;
159 for (i = 0; i < ctx->numThreads; ++i) {
160 ZSTD_pthread_join(ctx->threads[i], NULL);
167 for (i = 0; i < ctx->threadCapacity; ++i) {
168 ZSTD_pthread_join(ctx->threads[i], NULL); /* note : could fail */
161 169 } }
162 170 }
163 171
@@ -172,24 +180,68 b' void POOL_free(POOL_ctx *ctx) {'
172 180 ZSTD_free(ctx, ctx->customMem);
173 181 }
174 182
183
184
175 185 size_t POOL_sizeof(POOL_ctx *ctx) {
176 186 if (ctx==NULL) return 0; /* supports sizeof NULL */
177 187 return sizeof(*ctx)
178 188 + ctx->queueSize * sizeof(POOL_job)
179 + ctx->numThreads * sizeof(ZSTD_pthread_t);
189 + ctx->threadCapacity * sizeof(ZSTD_pthread_t);
190 }
191
192
193 /* @return : 0 on success, 1 on error */
194 static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads)
195 {
196 if (numThreads <= ctx->threadCapacity) {
197 if (!numThreads) return 1;
198 ctx->threadLimit = numThreads;
199 return 0;
200 }
201 /* numThreads > threadCapacity */
202 { ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
203 if (!threadPool) return 1;
204 /* replace existing thread pool */
205 memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool));
206 ZSTD_free(ctx->threads, ctx->customMem);
207 ctx->threads = threadPool;
208 /* Initialize additional threads */
209 { size_t threadId;
210 for (threadId = ctx->threadCapacity; threadId < numThreads; ++threadId) {
211 if (ZSTD_pthread_create(&threadPool[threadId], NULL, &POOL_thread, ctx)) {
212 ctx->threadCapacity = threadId;
213 return 1;
214 } }
215 } }
216 /* successfully expanded */
217 ctx->threadCapacity = numThreads;
218 ctx->threadLimit = numThreads;
219 return 0;
220 }
221
222 /* @return : 0 on success, 1 on error */
223 int POOL_resize(POOL_ctx* ctx, size_t numThreads)
224 {
225 int result;
226 if (ctx==NULL) return 1;
227 ZSTD_pthread_mutex_lock(&ctx->queueMutex);
228 result = POOL_resize_internal(ctx, numThreads);
229 ZSTD_pthread_cond_broadcast(&ctx->queuePopCond);
230 ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
231 return result;
180 232 }
181 233
182 234 /**
183 235 * Returns 1 if the queue is full and 0 otherwise.
184 236 *
185 * If the queueSize is 1 (the pool was created with an intended queueSize of 0),
186 * then a queue is empty if there is a thread free and no job is waiting.
237 * When queueSize is 1 (pool was created with an intended queueSize of 0),
238 * then a queue is empty if there is a thread free _and_ no job is waiting.
187 239 */
188 240 static int isQueueFull(POOL_ctx const* ctx) {
189 241 if (ctx->queueSize > 1) {
190 242 return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize);
191 243 } else {
192 return ctx->numThreadsBusy == ctx->numThreads ||
244 return (ctx->numThreadsBusy == ctx->threadLimit) ||
193 245 !ctx->queueEmpty;
194 246 }
195 247 }
@@ -263,6 +315,11 b' void POOL_free(POOL_ctx* ctx) {'
263 315 (void)ctx;
264 316 }
265 317
318 int POOL_resize(POOL_ctx* ctx, size_t numThreads) {
319 (void)ctx; (void)numThreads;
320 return 0;
321 }
322
266 323 void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) {
267 324 (void)ctx;
268 325 function(opaque);
@@ -30,40 +30,50 b' typedef struct POOL_ctx_s POOL_ctx;'
30 30 */
31 31 POOL_ctx* POOL_create(size_t numThreads, size_t queueSize);
32 32
33 POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem);
33 POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
34 ZSTD_customMem customMem);
34 35
35 36 /*! POOL_free() :
36 Free a thread pool returned by POOL_create().
37 */
37 * Free a thread pool returned by POOL_create().
38 */
38 39 void POOL_free(POOL_ctx* ctx);
39 40
41 /*! POOL_resize() :
42 * Expands or shrinks pool's number of threads.
43 * This is more efficient than releasing + creating a new context,
44 * since it tries to preserve and re-use existing threads.
45 * `numThreads` must be at least 1.
46 * @return : 0 when resize was successful,
47 * !0 (typically 1) if there is an error.
48 * note : only numThreads can be resized, queueSize remains unchanged.
49 */
50 int POOL_resize(POOL_ctx* ctx, size_t numThreads);
51
40 52 /*! POOL_sizeof() :
41 return memory usage of pool returned by POOL_create().
42 */
53 * @return threadpool memory usage
54 * note : compatible with NULL (returns 0 in this case)
55 */
43 56 size_t POOL_sizeof(POOL_ctx* ctx);
44 57
45 58 /*! POOL_function :
46 The function type that can be added to a thread pool.
47 */
59 * The function type that can be added to a thread pool.
60 */
48 61 typedef void (*POOL_function)(void*);
49 /*! POOL_add_function :
50 The function type for a generic thread pool add function.
51 */
52 typedef void (*POOL_add_function)(void*, POOL_function, void*);
53 62
54 63 /*! POOL_add() :
55 Add the job `function(opaque)` to the thread pool. `ctx` must be valid.
56 Possibly blocks until there is room in the queue.
57 Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed.
58 */
64 * Add the job `function(opaque)` to the thread pool. `ctx` must be valid.
65 * Possibly blocks until there is room in the queue.
66 * Note : The function may be executed asynchronously,
67 * therefore, `opaque` must live until function has been completed.
68 */
59 69 void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque);
60 70
61 71
62 72 /*! POOL_tryAdd() :
63 Add the job `function(opaque)` to the thread pool if a worker is available.
64 return immediately otherwise.
65 @return : 1 if successful, 0 if not.
66 */
73 * Add the job `function(opaque)` to thread pool _if_ a worker is available.
74 * Returns immediately even if not (does not block).
75 * @return : 1 if successful, 0 if not.
76 */
67 77 int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque);
68 78
69 79
@@ -98,6 +98,7 b''
98 98 /* Modify the local functions below should you wish to use some other memory routines */
99 99 /* for malloc(), free() */
100 100 #include <stdlib.h>
101 #include <stddef.h> /* size_t */
101 102 static void* XXH_malloc(size_t s) { return malloc(s); }
102 103 static void XXH_free (void* p) { free(p); }
103 104 /* for memcpy() */
@@ -46,11 +46,6 b' ZSTD_ErrorCode ZSTD_getErrorCode(size_t '
46 46 * provides error code string from enum */
47 47 const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
48 48
49 /*! g_debuglog_enable :
50 * turn on/off debug traces (global switch) */
51 #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 2)
52 int g_debuglog_enable = 1;
53 #endif
54 49
55 50
56 51 /*=**************************************************************
@@ -21,6 +21,7 b''
21 21 ***************************************/
22 22 #include "compiler.h"
23 23 #include "mem.h"
24 #include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
24 25 #include "error_private.h"
25 26 #define ZSTD_STATIC_LINKING_ONLY
26 27 #include "zstd.h"
@@ -38,43 +39,8 b''
38 39 extern "C" {
39 40 #endif
40 41
41
42 /*-*************************************
43 * Debug
44 ***************************************/
45 #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
46 # include <assert.h>
47 #else
48 # ifndef assert
49 # define assert(condition) ((void)0)
50 # endif
51 #endif
52
53 #define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; }
54
55 #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
56 # include <stdio.h>
57 extern int g_debuglog_enable;
58 /* recommended values for ZSTD_DEBUG display levels :
59 * 1 : no display, enables assert() only
60 * 2 : reserved for currently active debug path
61 * 3 : events once per object lifetime (CCtx, CDict, etc.)
62 * 4 : events once per frame
63 * 5 : events once per block
64 * 6 : events once per sequence (*very* verbose) */
65 # define RAWLOG(l, ...) { \
66 if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \
67 fprintf(stderr, __VA_ARGS__); \
68 } }
69 # define DEBUGLOG(l, ...) { \
70 if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \
71 fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
72 fprintf(stderr, " \n"); \
73 } }
74 #else
75 # define RAWLOG(l, ...) {} /* disabled */
76 # define DEBUGLOG(l, ...) {} /* disabled */
77 #endif
42 /* ---- static assert (debug) --- */
43 #define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
78 44
79 45
80 46 /*-*************************************
@@ -113,8 +79,7 b' static const U32 repStartValue[ZSTD_REP_'
113 79 static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
114 80 static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
115 81
116 #define ZSTD_FRAMEIDSIZE 4
117 static const size_t ZSTD_frameIdSize = ZSTD_FRAMEIDSIZE; /* magic number size */
82 #define ZSTD_FRAMEIDSIZE 4 /* magic number size */
118 83
119 84 #define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
120 85 static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
@@ -227,6 +192,8 b' typedef struct {'
227 192 BYTE* llCode;
228 193 BYTE* mlCode;
229 194 BYTE* ofCode;
195 size_t maxNbSeq;
196 size_t maxNbLit;
230 197 U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
231 198 U32 longLengthPos;
232 199 } seqStore_t;
@@ -1,6 +1,6 b''
1 1 /* ******************************************************************
2 2 FSE : Finite State Entropy encoder
3 Copyright (C) 2013-2015, Yann Collet.
3 Copyright (C) 2013-present, Yann Collet.
4 4
5 5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 6
@@ -37,9 +37,11 b''
37 37 ****************************************************************/
38 38 #include <stdlib.h> /* malloc, free, qsort */
39 39 #include <string.h> /* memcpy, memset */
40 #include <stdio.h> /* printf (debug) */
40 #include "compiler.h"
41 #include "mem.h" /* U32, U16, etc. */
42 #include "debug.h" /* assert, DEBUGLOG */
43 #include "hist.h" /* HIST_count_wksp */
41 44 #include "bitstream.h"
42 #include "compiler.h"
43 45 #define FSE_STATIC_LINKING_ONLY
44 46 #include "fse.h"
45 47 #include "error_private.h"
@@ -49,7 +51,6 b''
49 51 * Error Management
50 52 ****************************************************************/
51 53 #define FSE_isError ERR_isError
52 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
53 54
54 55
55 56 /* **************************************************************
@@ -82,7 +83,9 b''
82 83 * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
83 84 * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
84 85 */
85 size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
86 size_t FSE_buildCTable_wksp(FSE_CTable* ct,
87 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
88 void* workSpace, size_t wkspSize)
86 89 {
87 90 U32 const tableSize = 1 << tableLog;
88 91 U32 const tableMask = tableSize - 1;
@@ -100,9 +103,14 b' size_t FSE_buildCTable_wksp(FSE_CTable* '
100 103 if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
101 104 tableU16[-2] = (U16) tableLog;
102 105 tableU16[-1] = (U16) maxSymbolValue;
106 assert(tableLog < 16); /* required for threshold strategy to work */
103 107
104 108 /* For explanations on how to distribute symbol values over the table :
105 * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
109 * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
110
111 #ifdef __clang_analyzer__
112 memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
113 #endif
106 114
107 115 /* symbol start positions */
108 116 { U32 u;
@@ -122,13 +130,15 b' size_t FSE_buildCTable_wksp(FSE_CTable* '
122 130 U32 symbol;
123 131 for (symbol=0; symbol<=maxSymbolValue; symbol++) {
124 132 int nbOccurences;
125 for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++) {
133 int const freq = normalizedCounter[symbol];
134 for (nbOccurences=0; nbOccurences<freq; nbOccurences++) {
126 135 tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
127 136 position = (position + step) & tableMask;
128 while (position > highThreshold) position = (position + step) & tableMask; /* Low proba area */
137 while (position > highThreshold)
138 position = (position + step) & tableMask; /* Low proba area */
129 139 } }
130 140
131 if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */
141 assert(position==0); /* Must have initialized all positions */
132 142 }
133 143
134 144 /* Build table */
@@ -143,7 +153,10 b' size_t FSE_buildCTable_wksp(FSE_CTable* '
143 153 for (s=0; s<=maxSymbolValue; s++) {
144 154 switch (normalizedCounter[s])
145 155 {
146 case 0: break;
156 case 0:
157 /* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
158 symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);
159 break;
147 160
148 161 case -1:
149 162 case 1:
@@ -160,6 +173,18 b' size_t FSE_buildCTable_wksp(FSE_CTable* '
160 173 total += normalizedCounter[s];
161 174 } } } }
162 175
176 #if 0 /* debug : symbol costs */
177 DEBUGLOG(5, "\n --- table statistics : ");
178 { U32 symbol;
179 for (symbol=0; symbol<=maxSymbolValue; symbol++) {
180 DEBUGLOG(5, "%3u: w=%3i, maxBits=%u, fracBits=%.2f",
181 symbol, normalizedCounter[symbol],
182 FSE_getMaxNbBits(symbolTT, symbol),
183 (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
184 }
185 }
186 #endif
187
163 188 return 0;
164 189 }
165 190
@@ -174,8 +199,9 b' size_t FSE_buildCTable(FSE_CTable* ct, c'
174 199
175 200 #ifndef FSE_COMMONDEFS_ONLY
176 201
202
177 203 /*-**************************************************************
178 * FSE NCount encoding-decoding
204 * FSE NCount encoding
179 205 ****************************************************************/
180 206 size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
181 207 {
@@ -183,9 +209,10 b' size_t FSE_NCountWriteBound(unsigned max'
183 209 return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
184 210 }
185 211
186 static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
187 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
188 unsigned writeIsSafe)
212 static size_t
213 FSE_writeNCount_generic (void* header, size_t headerBufferSize,
214 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
215 unsigned writeIsSafe)
189 216 {
190 217 BYTE* const ostart = (BYTE*) header;
191 218 BYTE* out = ostart;
@@ -194,13 +221,12 b' static size_t FSE_writeNCount_generic (v'
194 221 const int tableSize = 1 << tableLog;
195 222 int remaining;
196 223 int threshold;
197 U32 bitStream;
198 int bitCount;
199 unsigned charnum = 0;
200 int previous0 = 0;
224 U32 bitStream = 0;
225 int bitCount = 0;
226 unsigned symbol = 0;
227 unsigned const alphabetSize = maxSymbolValue + 1;
228 int previousIs0 = 0;
201 229
202 bitStream = 0;
203 bitCount = 0;
204 230 /* Table Size */
205 231 bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
206 232 bitCount += 4;
@@ -210,48 +236,53 b' static size_t FSE_writeNCount_generic (v'
210 236 threshold = tableSize;
211 237 nbBits = tableLog+1;
212 238
213 while (remaining>1) { /* stops at 1 */
214 if (previous0) {
215 unsigned start = charnum;
216 while (!normalizedCounter[charnum]) charnum++;
217 while (charnum >= start+24) {
239 while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */
240 if (previousIs0) {
241 unsigned start = symbol;
242 while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
243 if (symbol == alphabetSize) break; /* incorrect distribution */
244 while (symbol >= start+24) {
218 245 start+=24;
219 246 bitStream += 0xFFFFU << bitCount;
220 if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
247 if ((!writeIsSafe) && (out > oend-2))
248 return ERROR(dstSize_tooSmall); /* Buffer overflow */
221 249 out[0] = (BYTE) bitStream;
222 250 out[1] = (BYTE)(bitStream>>8);
223 251 out+=2;
224 252 bitStream>>=16;
225 253 }
226 while (charnum >= start+3) {
254 while (symbol >= start+3) {
227 255 start+=3;
228 256 bitStream += 3 << bitCount;
229 257 bitCount += 2;
230 258 }
231 bitStream += (charnum-start) << bitCount;
259 bitStream += (symbol-start) << bitCount;
232 260 bitCount += 2;
233 261 if (bitCount>16) {
234 if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
262 if ((!writeIsSafe) && (out > oend - 2))
263 return ERROR(dstSize_tooSmall); /* Buffer overflow */
235 264 out[0] = (BYTE)bitStream;
236 265 out[1] = (BYTE)(bitStream>>8);
237 266 out += 2;
238 267 bitStream >>= 16;
239 268 bitCount -= 16;
240 269 } }
241 { int count = normalizedCounter[charnum++];
242 int const max = (2*threshold-1)-remaining;
270 { int count = normalizedCounter[symbol++];
271 int const max = (2*threshold-1) - remaining;
243 272 remaining -= count < 0 ? -count : count;
244 273 count++; /* +1 for extra accuracy */
245 if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
274 if (count>=threshold)
275 count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
246 276 bitStream += count << bitCount;
247 277 bitCount += nbBits;
248 278 bitCount -= (count<max);
249 previous0 = (count==1);
279 previousIs0 = (count==1);
250 280 if (remaining<1) return ERROR(GENERIC);
251 281 while (remaining<threshold) { nbBits--; threshold>>=1; }
252 282 }
253 283 if (bitCount>16) {
254 if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
284 if ((!writeIsSafe) && (out > oend - 2))
285 return ERROR(dstSize_tooSmall); /* Buffer overflow */
255 286 out[0] = (BYTE)bitStream;
256 287 out[1] = (BYTE)(bitStream>>8);
257 288 out += 2;
@@ -259,19 +290,23 b' static size_t FSE_writeNCount_generic (v'
259 290 bitCount -= 16;
260 291 } }
261 292
293 if (remaining != 1)
294 return ERROR(GENERIC); /* incorrect normalized distribution */
295 assert(symbol <= alphabetSize);
296
262 297 /* flush remaining bitStream */
263 if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
298 if ((!writeIsSafe) && (out > oend - 2))
299 return ERROR(dstSize_tooSmall); /* Buffer overflow */
264 300 out[0] = (BYTE)bitStream;
265 301 out[1] = (BYTE)(bitStream>>8);
266 302 out+= (bitCount+7) /8;
267 303
268 if (charnum > maxSymbolValue + 1) return ERROR(GENERIC);
269
270 304 return (out-ostart);
271 305 }
272 306
273 307
274 size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
308 size_t FSE_writeNCount (void* buffer, size_t bufferSize,
309 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
275 310 {
276 311 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */
277 312 if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */
@@ -279,179 +314,13 b' size_t FSE_writeNCount (void* buffer, si'
279 314 if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
280 315 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
281 316
282 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
283 }
284
285
286
287 /*-**************************************************************
288 * Counting histogram
289 ****************************************************************/
290 /*! FSE_count_simple
291 This function counts byte values within `src`, and store the histogram into table `count`.
292 It doesn't use any additional memory.
293 But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
294 For this reason, prefer using a table `count` with 256 elements.
295 @return : count of most numerous element.
296 */
297 size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
298 const void* src, size_t srcSize)
299 {
300 const BYTE* ip = (const BYTE*)src;
301 const BYTE* const end = ip + srcSize;
302 unsigned maxSymbolValue = *maxSymbolValuePtr;
303 unsigned max=0;
304
305 memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
306 if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
307
308 while (ip<end) {
309 assert(*ip <= maxSymbolValue);
310 count[*ip++]++;
311 }
312
313 while (!count[maxSymbolValue]) maxSymbolValue--;
314 *maxSymbolValuePtr = maxSymbolValue;
315
316 { U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; }
317
318 return (size_t)max;
317 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
319 318 }
320 319
321 320
322 /* FSE_count_parallel_wksp() :
323 * Same as FSE_count_parallel(), but using an externally provided scratch buffer.
324 * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`.
325 * @return : largest histogram frequency, or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
326 static size_t FSE_count_parallel_wksp(
327 unsigned* count, unsigned* maxSymbolValuePtr,
328 const void* source, size_t sourceSize,
329 unsigned checkMax, unsigned* const workSpace)
330 {
331 const BYTE* ip = (const BYTE*)source;
332 const BYTE* const iend = ip+sourceSize;
333 unsigned maxSymbolValue = *maxSymbolValuePtr;
334 unsigned max=0;
335 U32* const Counting1 = workSpace;
336 U32* const Counting2 = Counting1 + 256;
337 U32* const Counting3 = Counting2 + 256;
338 U32* const Counting4 = Counting3 + 256;
339
340 memset(workSpace, 0, 4*256*sizeof(unsigned));
341
342 /* safety checks */
343 if (!sourceSize) {
344 memset(count, 0, maxSymbolValue + 1);
345 *maxSymbolValuePtr = 0;
346 return 0;
347 }
348 if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
349
350 /* by stripes of 16 bytes */
351 { U32 cached = MEM_read32(ip); ip += 4;
352 while (ip < iend-15) {
353 U32 c = cached; cached = MEM_read32(ip); ip += 4;
354 Counting1[(BYTE) c ]++;
355 Counting2[(BYTE)(c>>8) ]++;
356 Counting3[(BYTE)(c>>16)]++;
357 Counting4[ c>>24 ]++;
358 c = cached; cached = MEM_read32(ip); ip += 4;
359 Counting1[(BYTE) c ]++;
360 Counting2[(BYTE)(c>>8) ]++;
361 Counting3[(BYTE)(c>>16)]++;
362 Counting4[ c>>24 ]++;
363 c = cached; cached = MEM_read32(ip); ip += 4;
364 Counting1[(BYTE) c ]++;
365 Counting2[(BYTE)(c>>8) ]++;
366 Counting3[(BYTE)(c>>16)]++;
367 Counting4[ c>>24 ]++;
368 c = cached; cached = MEM_read32(ip); ip += 4;
369 Counting1[(BYTE) c ]++;
370 Counting2[(BYTE)(c>>8) ]++;
371 Counting3[(BYTE)(c>>16)]++;
372 Counting4[ c>>24 ]++;
373 }
374 ip-=4;
375 }
376
377 /* finish last symbols */
378 while (ip<iend) Counting1[*ip++]++;
379
380 if (checkMax) { /* verify stats will fit into destination table */
381 U32 s; for (s=255; s>maxSymbolValue; s--) {
382 Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
383 if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
384 } }
385
386 { U32 s;
387 if (maxSymbolValue > 255) maxSymbolValue = 255;
388 for (s=0; s<=maxSymbolValue; s++) {
389 count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
390 if (count[s] > max) max = count[s];
391 } }
392
393 while (!count[maxSymbolValue]) maxSymbolValue--;
394 *maxSymbolValuePtr = maxSymbolValue;
395 return (size_t)max;
396 }
397
398 /* FSE_countFast_wksp() :
399 * Same as FSE_countFast(), but using an externally provided scratch buffer.
400 * `workSpace` size must be table of >= `1024` unsigned */
401 size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
402 const void* source, size_t sourceSize,
403 unsigned* workSpace)
404 {
405 if (sourceSize < 1500) /* heuristic threshold */
406 return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
407 return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
408 }
409
410 /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
411 size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
412 const void* source, size_t sourceSize)
413 {
414 unsigned tmpCounters[1024];
415 return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
416 }
417
418 /* FSE_count_wksp() :
419 * Same as FSE_count(), but using an externally provided scratch buffer.
420 * `workSpace` size must be table of >= `1024` unsigned */
421 size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
422 const void* source, size_t sourceSize, unsigned* workSpace)
423 {
424 if (*maxSymbolValuePtr < 255)
425 return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
426 *maxSymbolValuePtr = 255;
427 return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
428 }
429
430 size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
431 const void* src, size_t srcSize)
432 {
433 unsigned tmpCounters[1024];
434 return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
435 }
436
437
438
439 321 /*-**************************************************************
440 322 * FSE Compression Code
441 323 ****************************************************************/
442 /*! FSE_sizeof_CTable() :
443 FSE_CTable is a variable size structure which contains :
444 `U16 tableLog;`
445 `U16 maxSymbolValue;`
446 `U16 nextStateNumber[1 << tableLog];` // This size is variable
447 `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable
448 Allocation is manual (C standard does not support variable-size structures).
449 */
450 size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
451 {
452 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
453 return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
454 }
455 324
456 325 FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
457 326 {
@@ -466,7 +335,7 b' void FSE_freeCTable (FSE_CTable* ct) { f'
466 335 /* provides the minimum logSize to safely represent a distribution */
467 336 static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
468 337 {
469 U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
338 U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
470 339 U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
471 340 U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
472 341 assert(srcSize > 1); /* Not supported, RLE should be used instead */
@@ -529,6 +398,9 b' static size_t FSE_normalizeM2(short* nor'
529 398 }
530 399 ToDistribute = (1 << tableLog) - distributed;
531 400
401 if (ToDistribute == 0)
402 return 0;
403
532 404 if ((total / ToDistribute) > lowOne) {
533 405 /* risk of rounding to zero */
534 406 lowOne = (U32)((total * 3) / (ToDistribute * 2));
@@ -629,11 +501,11 b' size_t FSE_normalizeCount (short* normal'
629 501 U32 s;
630 502 U32 nTotal = 0;
631 503 for (s=0; s<=maxSymbolValue; s++)
632 printf("%3i: %4i \n", s, normalizedCounter[s]);
504 RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
633 505 for (s=0; s<=maxSymbolValue; s++)
634 506 nTotal += abs(normalizedCounter[s]);
635 507 if (nTotal != (1U<<tableLog))
636 printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
508 RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
637 509 getchar();
638 510 }
639 511 #endif
@@ -800,7 +672,7 b' size_t FSE_compress_wksp (void* dst, siz'
800 672 if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
801 673
802 674 /* Scan input and build symbol stats */
803 { CHECK_V_F(maxCount, FSE_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) );
675 { CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) );
804 676 if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
805 677 if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
806 678 if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
@@ -835,7 +707,7 b' typedef struct {'
835 707 size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
836 708 {
837 709 fseWkspMax_t scratchBuffer;
838 FSE_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
710 DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
839 711 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
840 712 return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
841 713 }
@@ -45,8 +45,9 b''
45 45 ****************************************************************/
46 46 #include <string.h> /* memcpy, memset */
47 47 #include <stdio.h> /* printf (debug) */
48 #include "compiler.h"
48 49 #include "bitstream.h"
49 #include "compiler.h"
50 #include "hist.h"
50 51 #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
51 52 #include "fse.h" /* header compression */
52 53 #define HUF_STATIC_LINKING_ONLY
@@ -58,7 +59,7 b''
58 59 * Error Management
59 60 ****************************************************************/
60 61 #define HUF_isError ERR_isError
61 #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
62 #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
62 63 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
63 64 #define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
64 65
@@ -81,7 +82,7 b' unsigned HUF_optimalTableLog(unsigned ma'
81 82 * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
82 83 */
83 84 #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
84 size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
85 static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
85 86 {
86 87 BYTE* const ostart = (BYTE*) dst;
87 88 BYTE* op = ostart;
@@ -100,9 +101,9 b' size_t HUF_compressWeights (void* dst, s'
100 101 if (wtSize <= 1) return 0; /* Not compressible */
101 102
102 103 /* Scan input and build symbol stats */
103 { CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize) );
104 { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */
104 105 if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
105 if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
106 if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
106 107 }
107 108
108 109 tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
@@ -216,6 +217,13 b' size_t HUF_readCTable (HUF_CElt* CTable,'
216 217 return readSize;
217 218 }
218 219
220 U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
221 {
222 const HUF_CElt* table = (const HUF_CElt*)symbolTable;
223 assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
224 return table[symbolValue].nbBits;
225 }
226
219 227
220 228 typedef struct nodeElt_s {
221 229 U32 count;
@@ -660,9 +668,9 b' static size_t HUF_compress_internal ('
660 668 }
661 669
662 670 /* Scan input and build symbol stats */
663 { CHECK_V_F(largest, FSE_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) );
671 { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) );
664 672 if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
665 if (largest <= (srcSize >> 7)+1) return 0; /* heuristic : probably not compressible enough */
673 if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
666 674 }
667 675
668 676 /* Check validity of previous table */
This diff has been collapsed as it changes many lines, (1413 lines changed) Show them Hide them
@@ -8,21 +8,13 b''
8 8 * You may select, at your option, one of the above-listed licenses.
9 9 */
10 10
11
12 /*-*************************************
13 * Tuning parameters
14 ***************************************/
15 #ifndef ZSTD_CLEVEL_DEFAULT
16 # define ZSTD_CLEVEL_DEFAULT 3
17 #endif
18
19
20 11 /*-*************************************
21 12 * Dependencies
22 13 ***************************************/
23 14 #include <string.h> /* memset */
24 15 #include "cpu.h"
25 16 #include "mem.h"
17 #include "hist.h" /* HIST_countFast_wksp */
26 18 #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
27 19 #include "fse.h"
28 20 #define HUF_STATIC_LINKING_ONLY
@@ -54,7 +46,6 b' struct ZSTD_CDict_s {'
54 46 size_t workspaceSize;
55 47 ZSTD_matchState_t matchState;
56 48 ZSTD_compressedBlockState_t cBlockState;
57 ZSTD_compressionParameters cParams;
58 49 ZSTD_customMem customMem;
59 50 U32 dictID;
60 51 }; /* typedef'd to ZSTD_CDict within "zstd.h" */
@@ -64,17 +55,26 b' ZSTD_CCtx* ZSTD_createCCtx(void)'
64 55 return ZSTD_createCCtx_advanced(ZSTD_defaultCMem);
65 56 }
66 57
58 static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager)
59 {
60 assert(cctx != NULL);
61 memset(cctx, 0, sizeof(*cctx));
62 cctx->customMem = memManager;
63 cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
64 { size_t const err = ZSTD_CCtx_resetParameters(cctx);
65 assert(!ZSTD_isError(err));
66 (void)err;
67 }
68 }
69
67 70 ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
68 71 {
69 72 ZSTD_STATIC_ASSERT(zcss_init==0);
70 73 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));
71 74 if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
72 { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_calloc(sizeof(ZSTD_CCtx), customMem);
75 { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_malloc(sizeof(ZSTD_CCtx), customMem);
73 76 if (!cctx) return NULL;
74 cctx->customMem = customMem;
75 cctx->requestedParams.compressionLevel = ZSTD_CLEVEL_DEFAULT;
76 cctx->requestedParams.fParams.contentSizeFlag = 1;
77 cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
77 ZSTD_initCCtx(cctx, customMem);
78 78 return cctx;
79 79 }
80 80 }
@@ -102,17 +102,24 b' ZSTD_CCtx* ZSTD_initStaticCCtx(void *wor'
102 102 return cctx;
103 103 }
104 104
105 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
105 static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)
106 106 {
107 if (cctx==NULL) return 0; /* support free on NULL */
108 if (cctx->staticSize) return ERROR(memory_allocation); /* not compatible with static CCtx */
107 assert(cctx != NULL);
108 assert(cctx->staticSize == 0);
109 109 ZSTD_free(cctx->workSpace, cctx->customMem); cctx->workSpace = NULL;
110 110 ZSTD_freeCDict(cctx->cdictLocal); cctx->cdictLocal = NULL;
111 111 #ifdef ZSTD_MULTITHREAD
112 112 ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL;
113 113 #endif
114 }
115
116 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
117 {
118 if (cctx==NULL) return 0; /* support free on NULL */
119 if (cctx->staticSize) return ERROR(memory_allocation); /* not compatible with static CCtx */
120 ZSTD_freeCCtxContent(cctx);
114 121 ZSTD_free(cctx, cctx->customMem);
115 return 0; /* reserved as a potential error code in the future */
122 return 0;
116 123 }
117 124
118 125
@@ -143,21 +150,6 b' size_t ZSTD_sizeof_CStream(const ZSTD_CS'
143 150 /* private API call, for dictBuilder only */
144 151 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
145 152
146 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
147 const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize)
148 {
149 ZSTD_compressionParameters cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize);
150 if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
151 if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog;
152 if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog;
153 if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog;
154 if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog;
155 if (CCtxParams->cParams.searchLength) cParams.searchLength = CCtxParams->cParams.searchLength;
156 if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength;
157 if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy;
158 return cParams;
159 }
160
161 153 static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
162 154 ZSTD_compressionParameters cParams)
163 155 {
@@ -251,7 +243,6 b' static int ZSTD_isUpdateAuthorized(ZSTD_'
251 243 case ZSTD_p_minMatch:
252 244 case ZSTD_p_targetLength:
253 245 case ZSTD_p_compressionStrategy:
254 case ZSTD_p_compressLiterals:
255 246 return 1;
256 247
257 248 case ZSTD_p_format:
@@ -268,6 +259,7 b' static int ZSTD_isUpdateAuthorized(ZSTD_'
268 259 case ZSTD_p_ldmMinMatch:
269 260 case ZSTD_p_ldmBucketSizeLog:
270 261 case ZSTD_p_ldmHashEveryLog:
262 case ZSTD_p_forceAttachDict:
271 263 default:
272 264 return 0;
273 265 }
@@ -302,7 +294,6 b' size_t ZSTD_CCtx_setParameter(ZSTD_CCtx*'
302 294 if (cctx->cdict) return ERROR(stage_wrong);
303 295 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
304 296
305 case ZSTD_p_compressLiterals:
306 297 case ZSTD_p_contentSizeFlag:
307 298 case ZSTD_p_checksumFlag:
308 299 case ZSTD_p_dictIDFlag:
@@ -313,6 +304,9 b' size_t ZSTD_CCtx_setParameter(ZSTD_CCtx*'
313 304 * default : 0 when using a CDict, 1 when using a Prefix */
314 305 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
315 306
307 case ZSTD_p_forceAttachDict:
308 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
309
316 310 case ZSTD_p_nbWorkers:
317 311 if ((value>0) && cctx->staticSize) {
318 312 return ERROR(parameter_unsupported); /* MT not compatible with static alloc */
@@ -351,7 +345,6 b' size_t ZSTD_CCtxParam_setParameter('
351 345 int cLevel = (int)value; /* cast expected to restore negative sign */
352 346 if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel();
353 347 if (cLevel) { /* 0 : does not change current level */
354 CCtxParams->disableLiteralCompression = (cLevel<0); /* negative levels disable huffman */
355 348 CCtxParams->compressionLevel = cLevel;
356 349 }
357 350 if (CCtxParams->compressionLevel >= 0) return CCtxParams->compressionLevel;
@@ -399,10 +392,6 b' size_t ZSTD_CCtxParam_setParameter('
399 392 CCtxParams->cParams.strategy = (ZSTD_strategy)value;
400 393 return (size_t)CCtxParams->cParams.strategy;
401 394
402 case ZSTD_p_compressLiterals:
403 CCtxParams->disableLiteralCompression = !value;
404 return !CCtxParams->disableLiteralCompression;
405
406 395 case ZSTD_p_contentSizeFlag :
407 396 /* Content size written in frame header _when known_ (default:1) */
408 397 DEBUGLOG(4, "set content size flag = %u", (value>0));
@@ -423,6 +412,12 b' size_t ZSTD_CCtxParam_setParameter('
423 412 CCtxParams->forceWindow = (value > 0);
424 413 return CCtxParams->forceWindow;
425 414
415 case ZSTD_p_forceAttachDict :
416 CCtxParams->attachDictPref = value ?
417 (value > 0 ? ZSTD_dictForceAttach : ZSTD_dictForceCopy) :
418 ZSTD_dictDefaultAttach;
419 return CCtxParams->attachDictPref;
420
426 421 case ZSTD_p_nbWorkers :
427 422 #ifndef ZSTD_MULTITHREAD
428 423 if (value>0) return ERROR(parameter_unsupported);
@@ -477,6 +472,98 b' size_t ZSTD_CCtxParam_setParameter('
477 472 }
478 473 }
479 474
475 size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned* value)
476 {
477 return ZSTD_CCtxParam_getParameter(&cctx->requestedParams, param, value);
478 }
479
480 size_t ZSTD_CCtxParam_getParameter(
481 ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, unsigned* value)
482 {
483 switch(param)
484 {
485 case ZSTD_p_format :
486 *value = CCtxParams->format;
487 break;
488 case ZSTD_p_compressionLevel :
489 *value = CCtxParams->compressionLevel;
490 break;
491 case ZSTD_p_windowLog :
492 *value = CCtxParams->cParams.windowLog;
493 break;
494 case ZSTD_p_hashLog :
495 *value = CCtxParams->cParams.hashLog;
496 break;
497 case ZSTD_p_chainLog :
498 *value = CCtxParams->cParams.chainLog;
499 break;
500 case ZSTD_p_searchLog :
501 *value = CCtxParams->cParams.searchLog;
502 break;
503 case ZSTD_p_minMatch :
504 *value = CCtxParams->cParams.searchLength;
505 break;
506 case ZSTD_p_targetLength :
507 *value = CCtxParams->cParams.targetLength;
508 break;
509 case ZSTD_p_compressionStrategy :
510 *value = (unsigned)CCtxParams->cParams.strategy;
511 break;
512 case ZSTD_p_contentSizeFlag :
513 *value = CCtxParams->fParams.contentSizeFlag;
514 break;
515 case ZSTD_p_checksumFlag :
516 *value = CCtxParams->fParams.checksumFlag;
517 break;
518 case ZSTD_p_dictIDFlag :
519 *value = !CCtxParams->fParams.noDictIDFlag;
520 break;
521 case ZSTD_p_forceMaxWindow :
522 *value = CCtxParams->forceWindow;
523 break;
524 case ZSTD_p_forceAttachDict :
525 *value = CCtxParams->attachDictPref;
526 break;
527 case ZSTD_p_nbWorkers :
528 #ifndef ZSTD_MULTITHREAD
529 assert(CCtxParams->nbWorkers == 0);
530 #endif
531 *value = CCtxParams->nbWorkers;
532 break;
533 case ZSTD_p_jobSize :
534 #ifndef ZSTD_MULTITHREAD
535 return ERROR(parameter_unsupported);
536 #else
537 *value = CCtxParams->jobSize;
538 break;
539 #endif
540 case ZSTD_p_overlapSizeLog :
541 #ifndef ZSTD_MULTITHREAD
542 return ERROR(parameter_unsupported);
543 #else
544 *value = CCtxParams->overlapSizeLog;
545 break;
546 #endif
547 case ZSTD_p_enableLongDistanceMatching :
548 *value = CCtxParams->ldmParams.enableLdm;
549 break;
550 case ZSTD_p_ldmHashLog :
551 *value = CCtxParams->ldmParams.hashLog;
552 break;
553 case ZSTD_p_ldmMinMatch :
554 *value = CCtxParams->ldmParams.minMatchLength;
555 break;
556 case ZSTD_p_ldmBucketSizeLog :
557 *value = CCtxParams->ldmParams.bucketSizeLog;
558 break;
559 case ZSTD_p_ldmHashEveryLog :
560 *value = CCtxParams->ldmParams.hashEveryLog;
561 break;
562 default: return ERROR(parameter_unsupported);
563 }
564 return 0;
565 }
566
480 567 /** ZSTD_CCtx_setParametersUsingCCtxParams() :
481 568 * just applies `params` into `cctx`
482 569 * no action is performed, parameters are merely stored.
@@ -487,6 +574,7 b' size_t ZSTD_CCtxParam_setParameter('
487 574 size_t ZSTD_CCtx_setParametersUsingCCtxParams(
488 575 ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params)
489 576 {
577 DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams");
490 578 if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
491 579 if (cctx->cdict) return ERROR(stage_wrong);
492 580
@@ -565,18 +653,19 b' size_t ZSTD_CCtx_refPrefix_advanced('
565 653 return 0;
566 654 }
567 655
568 static void ZSTD_startNewCompression(ZSTD_CCtx* cctx)
656 /*! ZSTD_CCtx_reset() :
657 * Also dumps dictionary */
658 void ZSTD_CCtx_reset(ZSTD_CCtx* cctx)
569 659 {
570 660 cctx->streamStage = zcss_init;
571 661 cctx->pledgedSrcSizePlusOne = 0;
572 662 }
573 663
574 /*! ZSTD_CCtx_reset() :
575 * Also dumps dictionary */
576 void ZSTD_CCtx_reset(ZSTD_CCtx* cctx)
664 size_t ZSTD_CCtx_resetParameters(ZSTD_CCtx* cctx)
577 665 {
578 ZSTD_startNewCompression(cctx);
666 if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
579 667 cctx->cdict = NULL;
668 return ZSTD_CCtxParams_reset(&cctx->requestedParams);
580 669 }
581 670
582 671 /** ZSTD_checkCParams() :
@@ -589,8 +678,9 b' size_t ZSTD_checkCParams(ZSTD_compressio'
589 678 CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
590 679 CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
591 680 CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
592 if ((U32)(cParams.targetLength) < ZSTD_TARGETLENGTH_MIN)
593 return ERROR(parameter_unsupported);
681 ZSTD_STATIC_ASSERT(ZSTD_TARGETLENGTH_MIN == 0);
682 if (cParams.targetLength > ZSTD_TARGETLENGTH_MAX)
683 return ERROR(parameter_outOfBound);
594 684 if ((U32)(cParams.strategy) > (U32)ZSTD_btultra)
595 685 return ERROR(parameter_unsupported);
596 686 return 0;
@@ -599,7 +689,8 b' size_t ZSTD_checkCParams(ZSTD_compressio'
599 689 /** ZSTD_clampCParams() :
600 690 * make CParam values within valid range.
601 691 * @return : valid CParams */
602 static ZSTD_compressionParameters ZSTD_clampCParams(ZSTD_compressionParameters cParams)
692 static ZSTD_compressionParameters
693 ZSTD_clampCParams(ZSTD_compressionParameters cParams)
603 694 {
604 695 # define CLAMP(val,min,max) { \
605 696 if (val<min) val=min; \
@@ -610,8 +701,10 b' static ZSTD_compressionParameters ZSTD_c'
610 701 CLAMP(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
611 702 CLAMP(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
612 703 CLAMP(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
613 if ((U32)(cParams.targetLength) < ZSTD_TARGETLENGTH_MIN) cParams.targetLength = ZSTD_TARGETLENGTH_MIN;
614 if ((U32)(cParams.strategy) > (U32)ZSTD_btultra) cParams.strategy = ZSTD_btultra;
704 ZSTD_STATIC_ASSERT(ZSTD_TARGETLENGTH_MIN == 0);
705 if (cParams.targetLength > ZSTD_TARGETLENGTH_MAX)
706 cParams.targetLength = ZSTD_TARGETLENGTH_MAX;
707 CLAMP(cParams.strategy, ZSTD_fast, ZSTD_btultra);
615 708 return cParams;
616 709 }
617 710
@@ -627,8 +720,11 b' static U32 ZSTD_cycleLog(U32 hashLog, ZS'
627 720 optimize `cPar` for a given input (`srcSize` and `dictSize`).
628 721 mostly downsizing to reduce memory consumption and initialization latency.
629 722 Both `srcSize` and `dictSize` are optional (use 0 if unknown).
630 Note : cPar is considered validated at this stage. Use ZSTD_checkCParams() to ensure that condition. */
631 ZSTD_compressionParameters ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
723 Note : cPar is assumed validated. Use ZSTD_checkCParams() to ensure this condition. */
724 static ZSTD_compressionParameters
725 ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
726 unsigned long long srcSize,
727 size_t dictSize)
632 728 {
633 729 static const U64 minSrcSize = 513; /* (1<<9) + 1 */
634 730 static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
@@ -648,7 +744,7 b' ZSTD_compressionParameters ZSTD_adjustCP'
648 744 ZSTD_highbit32(tSize-1) + 1;
649 745 if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
650 746 }
651 if (cPar.hashLog > cPar.windowLog) cPar.hashLog = cPar.windowLog;
747 if (cPar.hashLog > cPar.windowLog+1) cPar.hashLog = cPar.windowLog+1;
652 748 { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
653 749 if (cycleLog > cPar.windowLog)
654 750 cPar.chainLog -= (cycleLog - cPar.windowLog);
@@ -660,13 +756,34 b' ZSTD_compressionParameters ZSTD_adjustCP'
660 756 return cPar;
661 757 }
662 758
663 ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
759 ZSTD_compressionParameters
760 ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
761 unsigned long long srcSize,
762 size_t dictSize)
664 763 {
665 764 cPar = ZSTD_clampCParams(cPar);
666 765 return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize);
667 766 }
668 767
669 static size_t ZSTD_sizeof_matchState(ZSTD_compressionParameters const* cParams, const U32 forCCtx)
768 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
769 const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize)
770 {
771 ZSTD_compressionParameters cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize);
772 if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
773 if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog;
774 if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog;
775 if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog;
776 if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog;
777 if (CCtxParams->cParams.searchLength) cParams.searchLength = CCtxParams->cParams.searchLength;
778 if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength;
779 if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy;
780 assert(!ZSTD_checkCParams(cParams));
781 return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize);
782 }
783
784 static size_t
785 ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
786 const U32 forCCtx)
670 787 {
671 788 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
672 789 size_t const hSize = ((size_t)1) << cParams->hashLog;
@@ -693,7 +810,7 b' size_t ZSTD_estimateCCtxSize_usingCCtxPa'
693 810 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
694 811 U32 const divider = (cParams.searchLength==3) ? 3 : 4;
695 812 size_t const maxNbSeq = blockSize / divider;
696 size_t const tokenSpace = blockSize + 11*maxNbSeq;
813 size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq;
697 814 size_t const entropySpace = HUF_WORKSPACE_SIZE;
698 815 size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t);
699 816 size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1);
@@ -752,12 +869,14 b' size_t ZSTD_estimateCStreamSize_usingCPa'
752 869 return ZSTD_estimateCStreamSize_usingCCtxParams(&params);
753 870 }
754 871
755 static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) {
872 static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
873 {
756 874 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0);
757 875 return ZSTD_estimateCStreamSize_usingCParams(cParams);
758 876 }
759 877
760 size_t ZSTD_estimateCStreamSize(int compressionLevel) {
878 size_t ZSTD_estimateCStreamSize(int compressionLevel)
879 {
761 880 int level;
762 881 size_t memBudget = 0;
763 882 for (level=1; level<=compressionLevel; level++) {
@@ -786,9 +905,27 b' ZSTD_frameProgression ZSTD_getFrameProgr'
786 905 fp.ingested = cctx->consumedSrcSize + buffered;
787 906 fp.consumed = cctx->consumedSrcSize;
788 907 fp.produced = cctx->producedCSize;
908 fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */
909 fp.currentJobID = 0;
910 fp.nbActiveWorkers = 0;
789 911 return fp;
790 912 } }
791 913
914 /*! ZSTD_toFlushNow()
915 * Only useful for multithreading scenarios currently (nbWorkers >= 1).
916 */
917 size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx)
918 {
919 #ifdef ZSTD_MULTITHREAD
920 if (cctx->appliedParams.nbWorkers > 0) {
921 return ZSTDMT_toFlushNow(cctx->mtctx);
922 }
923 #endif
924 (void)cctx;
925 return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */
926 }
927
928
792 929
793 930 static U32 ZSTD_equivalentCParams(ZSTD_compressionParameters cParams1,
794 931 ZSTD_compressionParameters cParams2)
@@ -799,6 +936,20 b' static U32 ZSTD_equivalentCParams(ZSTD_c'
799 936 & ((cParams1.searchLength==3) == (cParams2.searchLength==3)); /* hashlog3 space */
800 937 }
801 938
939 static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,
940 ZSTD_compressionParameters cParams2)
941 {
942 (void)cParams1;
943 (void)cParams2;
944 assert(cParams1.windowLog == cParams2.windowLog);
945 assert(cParams1.chainLog == cParams2.chainLog);
946 assert(cParams1.hashLog == cParams2.hashLog);
947 assert(cParams1.searchLog == cParams2.searchLog);
948 assert(cParams1.searchLength == cParams2.searchLength);
949 assert(cParams1.targetLength == cParams2.targetLength);
950 assert(cParams1.strategy == cParams2.strategy);
951 }
952
802 953 /** The parameters are equivalent if ldm is not enabled in both sets or
803 954 * all the parameters are equivalent. */
804 955 static U32 ZSTD_equivalentLdmParams(ldmParams_t ldmParams1,
@@ -817,33 +968,51 b' typedef enum { ZSTDb_not_buffered, ZSTDb'
817 968 /* ZSTD_sufficientBuff() :
818 969 * check internal buffers exist for streaming if buffPol == ZSTDb_buffered .
819 970 * Note : they are assumed to be correctly sized if ZSTD_equivalentCParams()==1 */
820 static U32 ZSTD_sufficientBuff(size_t bufferSize1, size_t blockSize1,
971 static U32 ZSTD_sufficientBuff(size_t bufferSize1, size_t maxNbSeq1,
972 size_t maxNbLit1,
821 973 ZSTD_buffered_policy_e buffPol2,
822 974 ZSTD_compressionParameters cParams2,
823 975 U64 pledgedSrcSize)
824 976 {
825 977 size_t const windowSize2 = MAX(1, (size_t)MIN(((U64)1 << cParams2.windowLog), pledgedSrcSize));
826 978 size_t const blockSize2 = MIN(ZSTD_BLOCKSIZE_MAX, windowSize2);
979 size_t const maxNbSeq2 = blockSize2 / ((cParams2.searchLength == 3) ? 3 : 4);
980 size_t const maxNbLit2 = blockSize2;
827 981 size_t const neededBufferSize2 = (buffPol2==ZSTDb_buffered) ? windowSize2 + blockSize2 : 0;
828 DEBUGLOG(4, "ZSTD_sufficientBuff: is windowSize2=%u <= wlog1=%u",
829 (U32)windowSize2, cParams2.windowLog);
830 DEBUGLOG(4, "ZSTD_sufficientBuff: is blockSize2=%u <= blockSize1=%u",
831 (U32)blockSize2, (U32)blockSize1);
832 return (blockSize2 <= blockSize1) /* seqStore space depends on blockSize */
982 DEBUGLOG(4, "ZSTD_sufficientBuff: is neededBufferSize2=%u <= bufferSize1=%u",
983 (U32)neededBufferSize2, (U32)bufferSize1);
984 DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbSeq2=%u <= maxNbSeq1=%u",
985 (U32)maxNbSeq2, (U32)maxNbSeq1);
986 DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbLit2=%u <= maxNbLit1=%u",
987 (U32)maxNbLit2, (U32)maxNbLit1);
988 return (maxNbLit2 <= maxNbLit1)
989 & (maxNbSeq2 <= maxNbSeq1)
833 990 & (neededBufferSize2 <= bufferSize1);
834 991 }
835 992
836 993 /** Equivalence for resetCCtx purposes */
837 994 static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1,
838 995 ZSTD_CCtx_params params2,
839 size_t buffSize1, size_t blockSize1,
996 size_t buffSize1,
997 size_t maxNbSeq1, size_t maxNbLit1,
840 998 ZSTD_buffered_policy_e buffPol2,
841 999 U64 pledgedSrcSize)
842 1000 {
843 1001 DEBUGLOG(4, "ZSTD_equivalentParams: pledgedSrcSize=%u", (U32)pledgedSrcSize);
844 return ZSTD_equivalentCParams(params1.cParams, params2.cParams) &&
845 ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams) &&
846 ZSTD_sufficientBuff(buffSize1, blockSize1, buffPol2, params2.cParams, pledgedSrcSize);
1002 if (!ZSTD_equivalentCParams(params1.cParams, params2.cParams)) {
1003 DEBUGLOG(4, "ZSTD_equivalentCParams() == 0");
1004 return 0;
1005 }
1006 if (!ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams)) {
1007 DEBUGLOG(4, "ZSTD_equivalentLdmParams() == 0");
1008 return 0;
1009 }
1010 if (!ZSTD_sufficientBuff(buffSize1, maxNbSeq1, maxNbLit1, buffPol2,
1011 params2.cParams, pledgedSrcSize)) {
1012 DEBUGLOG(4, "ZSTD_sufficientBuff() == 0");
1013 return 0;
1014 }
1015 return 1;
847 1016 }
848 1017
849 1018 static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
@@ -851,10 +1020,10 b' static void ZSTD_reset_compressedBlockSt'
851 1020 int i;
852 1021 for (i = 0; i < ZSTD_REP_NUM; ++i)
853 1022 bs->rep[i] = repStartValue[i];
854 bs->entropy.hufCTable_repeatMode = HUF_repeat_none;
855 bs->entropy.offcode_repeatMode = FSE_repeat_none;
856 bs->entropy.matchlength_repeatMode = FSE_repeat_none;
857 bs->entropy.litlength_repeatMode = FSE_repeat_none;
1023 bs->entropy.huf.repeatMode = HUF_repeat_none;
1024 bs->entropy.fse.offcode_repeatMode = FSE_repeat_none;
1025 bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none;
1026 bs->entropy.fse.litlength_repeatMode = FSE_repeat_none;
858 1027 }
859 1028
860 1029 /*! ZSTD_invalidateMatchState()
@@ -866,8 +1035,10 b' static void ZSTD_invalidateMatchState(ZS'
866 1035 ZSTD_window_clear(&ms->window);
867 1036
868 1037 ms->nextToUpdate = ms->window.dictLimit + 1;
1038 ms->nextToUpdate3 = ms->window.dictLimit + 1;
869 1039 ms->loadedDictEnd = 0;
870 1040 ms->opt.litLengthSum = 0; /* force reset of btopt stats */
1041 ms->dictMatchState = NULL;
871 1042 }
872 1043
873 1044 /*! ZSTD_continueCCtx() :
@@ -880,6 +1051,7 b' static size_t ZSTD_continueCCtx(ZSTD_CCt'
880 1051
881 1052 cctx->blockSize = blockSize; /* previous block size could be different even for same windowLog, due to pledgedSrcSize */
882 1053 cctx->appliedParams = params;
1054 cctx->blockState.matchState.cParams = params.cParams;
883 1055 cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
884 1056 cctx->consumedSrcSize = 0;
885 1057 cctx->producedCSize = 0;
@@ -900,7 +1072,11 b' static size_t ZSTD_continueCCtx(ZSTD_CCt'
900 1072
901 1073 typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e;
902 1074
903 static void* ZSTD_reset_matchState(ZSTD_matchState_t* ms, void* ptr, ZSTD_compressionParameters const* cParams, ZSTD_compResetPolicy_e const crp, U32 const forCCtx)
1075 static void*
1076 ZSTD_reset_matchState(ZSTD_matchState_t* ms,
1077 void* ptr,
1078 const ZSTD_compressionParameters* cParams,
1079 ZSTD_compResetPolicy_e const crp, U32 const forCCtx)
904 1080 {
905 1081 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
906 1082 size_t const hSize = ((size_t)1) << cParams->hashLog;
@@ -912,6 +1088,9 b' static void* ZSTD_reset_matchState(ZSTD_'
912 1088
913 1089 ms->hashLog3 = hashLog3;
914 1090 memset(&ms->window, 0, sizeof(ms->window));
1091 ms->window.dictLimit = 1; /* start from 1, so that 1st position is valid */
1092 ms->window.lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
1093 ms->window.nextSrc = ms->window.base + 1; /* see issue #1241 */
915 1094 ZSTD_invalidateMatchState(ms);
916 1095
917 1096 /* opt parser space */
@@ -937,14 +1116,24 b' static void* ZSTD_reset_matchState(ZSTD_'
937 1116 ms->hashTable3 = ms->chainTable + chainSize;
938 1117 ptr = ms->hashTable3 + h3Size;
939 1118
1119 ms->cParams = *cParams;
1120
940 1121 assert(((size_t)ptr & 3) == 0);
941 1122 return ptr;
942 1123 }
943 1124
1125 #define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */
1126 #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 /* when workspace is continuously too large
1127 * during at least this number of times,
1128 * context's memory usage is considered wasteful,
1129 * because it's sized to handle a worst case scenario which rarely happens.
1130 * In which case, resize it down to free some memory */
1131
944 1132 /*! ZSTD_resetCCtx_internal() :
945 1133 note : `params` are assumed fully validated at this stage */
946 1134 static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
947 ZSTD_CCtx_params params, U64 pledgedSrcSize,
1135 ZSTD_CCtx_params params,
1136 U64 pledgedSrcSize,
948 1137 ZSTD_compResetPolicy_e const crp,
949 1138 ZSTD_buffered_policy_e const zbuff)
950 1139 {
@@ -954,34 +1143,35 b' static size_t ZSTD_resetCCtx_internal(ZS'
954 1143
955 1144 if (crp == ZSTDcrp_continue) {
956 1145 if (ZSTD_equivalentParams(zc->appliedParams, params,
957 zc->inBuffSize, zc->blockSize,
958 zbuff, pledgedSrcSize)) {
959 DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%u)",
960 zc->appliedParams.cParams.windowLog, (U32)zc->blockSize);
961 return ZSTD_continueCCtx(zc, params, pledgedSrcSize);
1146 zc->inBuffSize,
1147 zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit,
1148 zbuff, pledgedSrcSize)) {
1149 DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%zu)",
1150 zc->appliedParams.cParams.windowLog, zc->blockSize);
1151 zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */
1152 if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION)
1153 return ZSTD_continueCCtx(zc, params, pledgedSrcSize);
962 1154 } }
963 1155 DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx");
964 1156
965 1157 if (params.ldmParams.enableLdm) {
966 1158 /* Adjust long distance matching parameters */
967 params.ldmParams.windowLog = params.cParams.windowLog;
968 1159 ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
969 1160 assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
970 1161 assert(params.ldmParams.hashEveryLog < 32);
971 zc->ldmState.hashPower =
972 ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength);
1162 zc->ldmState.hashPower = ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength);
973 1163 }
974 1164
975 1165 { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
976 1166 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
977 1167 U32 const divider = (params.cParams.searchLength==3) ? 3 : 4;
978 1168 size_t const maxNbSeq = blockSize / divider;
979 size_t const tokenSpace = blockSize + 11*maxNbSeq;
1169 size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq;
980 1170 size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0;
981 1171 size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0;
982 1172 size_t const matchStateSize = ZSTD_sizeof_matchState(&params.cParams, /* forCCtx */ 1);
983 1173 size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
984 void* ptr;
1174 void* ptr; /* used to partition workSpace */
985 1175
986 1176 /* Check if workSpace is large enough, alloc a new one if needed */
987 1177 { size_t const entropySpace = HUF_WORKSPACE_SIZE;
@@ -993,14 +1183,20 b' static size_t ZSTD_resetCCtx_internal(ZS'
993 1183 size_t const neededSpace = entropySpace + blockStateSpace + ldmSpace +
994 1184 ldmSeqSpace + matchStateSize + tokenSpace +
995 1185 bufferSpace;
996 DEBUGLOG(4, "Need %uKB workspace, including %uKB for match state, and %uKB for buffers",
997 (U32)(neededSpace>>10), (U32)(matchStateSize>>10), (U32)(bufferSpace>>10));
998 DEBUGLOG(4, "windowSize: %u - blockSize: %u", (U32)windowSize, (U32)blockSize);
999
1000 if (zc->workSpaceSize < neededSpace) { /* too small : resize */
1001 DEBUGLOG(4, "Need to update workSpaceSize from %uK to %uK",
1002 (unsigned)(zc->workSpaceSize>>10),
1003 (unsigned)(neededSpace>>10));
1186
1187 int const workSpaceTooSmall = zc->workSpaceSize < neededSpace;
1188 int const workSpaceTooLarge = zc->workSpaceSize > ZSTD_WORKSPACETOOLARGE_FACTOR * neededSpace;
1189 int const workSpaceWasteful = workSpaceTooLarge && (zc->workSpaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION);
1190 zc->workSpaceOversizedDuration = workSpaceTooLarge ? zc->workSpaceOversizedDuration+1 : 0;
1191
1192 DEBUGLOG(4, "Need %zuKB workspace, including %zuKB for match state, and %zuKB for buffers",
1193 neededSpace>>10, matchStateSize>>10, bufferSpace>>10);
1194 DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
1195
1196 if (workSpaceTooSmall || workSpaceWasteful) {
1197 DEBUGLOG(4, "Need to resize workSpaceSize from %zuKB to %zuKB",
1198 zc->workSpaceSize >> 10,
1199 neededSpace >> 10);
1004 1200 /* static cctx : no resize, error out */
1005 1201 if (zc->staticSize) return ERROR(memory_allocation);
1006 1202
@@ -1009,9 +1205,11 b' static size_t ZSTD_resetCCtx_internal(ZS'
1009 1205 zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem);
1010 1206 if (zc->workSpace == NULL) return ERROR(memory_allocation);
1011 1207 zc->workSpaceSize = neededSpace;
1012 ptr = zc->workSpace;
1013
1014 /* Statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
1208 zc->workSpaceOversizedDuration = 0;
1209
1210 /* Statically sized space.
1211 * entropyWorkspace never moves,
1212 * though prev/next block swap places */
1015 1213 assert(((size_t)zc->workSpace & 3) == 0); /* ensure correct alignment */
1016 1214 assert(zc->workSpaceSize >= 2 * sizeof(ZSTD_compressedBlockState_t));
1017 1215 zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)zc->workSpace;
@@ -1022,6 +1220,7 b' static size_t ZSTD_resetCCtx_internal(ZS'
1022 1220
1023 1221 /* init params */
1024 1222 zc->appliedParams = params;
1223 zc->blockState.matchState.cParams = params.cParams;
1025 1224 zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
1026 1225 zc->consumedSrcSize = 0;
1027 1226 zc->producedCSize = 0;
@@ -1058,13 +1257,18 b' static size_t ZSTD_resetCCtx_internal(ZS'
1058 1257 ptr = ZSTD_reset_matchState(&zc->blockState.matchState, ptr, &params.cParams, crp, /* forCCtx */ 1);
1059 1258
1060 1259 /* sequences storage */
1260 zc->seqStore.maxNbSeq = maxNbSeq;
1061 1261 zc->seqStore.sequencesStart = (seqDef*)ptr;
1062 1262 ptr = zc->seqStore.sequencesStart + maxNbSeq;
1063 1263 zc->seqStore.llCode = (BYTE*) ptr;
1064 1264 zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq;
1065 1265 zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq;
1066 1266 zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq;
1067 ptr = zc->seqStore.litStart + blockSize;
1267 /* ZSTD_wildcopy() is used to copy into the literals buffer,
1268 * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
1269 */
1270 zc->seqStore.maxNbLit = blockSize;
1271 ptr = zc->seqStore.litStart + blockSize + WILDCOPY_OVERLENGTH;
1068 1272
1069 1273 /* ldm bucketOffsets table */
1070 1274 if (params.ldmParams.enableLdm) {
@@ -1098,28 +1302,110 b' void ZSTD_invalidateRepCodes(ZSTD_CCtx* '
1098 1302 assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
1099 1303 }
1100 1304
1101 static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
1305 /* These are the approximate sizes for each strategy past which copying the
1306 * dictionary tables into the working context is faster than using them
1307 * in-place.
1308 */
1309 static const size_t attachDictSizeCutoffs[(unsigned)ZSTD_btultra+1] = {
1310 8 KB, /* unused */
1311 8 KB, /* ZSTD_fast */
1312 16 KB, /* ZSTD_dfast */
1313 32 KB, /* ZSTD_greedy */
1314 32 KB, /* ZSTD_lazy */
1315 32 KB, /* ZSTD_lazy2 */
1316 32 KB, /* ZSTD_btlazy2 */
1317 32 KB, /* ZSTD_btopt */
1318 8 KB /* ZSTD_btultra */
1319 };
1320
1321 static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
1322 ZSTD_CCtx_params params,
1323 U64 pledgedSrcSize)
1324 {
1325 size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy];
1326 return ( pledgedSrcSize <= cutoff
1327 || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
1328 || params.attachDictPref == ZSTD_dictForceAttach )
1329 && params.attachDictPref != ZSTD_dictForceCopy
1330 && !params.forceWindow; /* dictMatchState isn't correctly
1331 * handled in _enforceMaxDist */
1332 }
1333
1334 static size_t ZSTD_resetCCtx_byAttachingCDict(
1335 ZSTD_CCtx* cctx,
1336 const ZSTD_CDict* cdict,
1337 ZSTD_CCtx_params params,
1338 U64 pledgedSrcSize,
1339 ZSTD_buffered_policy_e zbuff)
1340 {
1341 {
1342 const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
1343 unsigned const windowLog = params.cParams.windowLog;
1344 assert(windowLog != 0);
1345 /* Resize working context table params for input only, since the dict
1346 * has its own tables. */
1347 params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0);
1348 params.cParams.windowLog = windowLog;
1349 ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
1350 ZSTDcrp_continue, zbuff);
1351 assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
1352 }
1353
1354 {
1355 const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
1356 - cdict->matchState.window.base);
1357 const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;
1358 if (cdictLen == 0) {
1359 /* don't even attach dictionaries with no contents */
1360 DEBUGLOG(4, "skipping attaching empty dictionary");
1361 } else {
1362 DEBUGLOG(4, "attaching dictionary into context");
1363 cctx->blockState.matchState.dictMatchState = &cdict->matchState;
1364
1365 /* prep working match state so dict matches never have negative indices
1366 * when they are translated to the working context's index space. */
1367 if (cctx->blockState.matchState.window.dictLimit < cdictEnd) {
1368 cctx->blockState.matchState.window.nextSrc =
1369 cctx->blockState.matchState.window.base + cdictEnd;
1370 ZSTD_window_clear(&cctx->blockState.matchState.window);
1371 }
1372 cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
1373 }
1374 }
1375
1376 cctx->dictID = cdict->dictID;
1377
1378 /* copy block state */
1379 memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
1380
1381 return 0;
1382 }
1383
1384 static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
1102 1385 const ZSTD_CDict* cdict,
1103 unsigned windowLog,
1104 ZSTD_frameParameters fParams,
1386 ZSTD_CCtx_params params,
1105 1387 U64 pledgedSrcSize,
1106 1388 ZSTD_buffered_policy_e zbuff)
1107 1389 {
1108 { ZSTD_CCtx_params params = cctx->requestedParams;
1390 const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
1391
1392 DEBUGLOG(4, "copying dictionary into context");
1393
1394 { unsigned const windowLog = params.cParams.windowLog;
1395 assert(windowLog != 0);
1109 1396 /* Copy only compression parameters related to tables. */
1110 params.cParams = cdict->cParams;
1111 if (windowLog) params.cParams.windowLog = windowLog;
1112 params.fParams = fParams;
1397 params.cParams = *cdict_cParams;
1398 params.cParams.windowLog = windowLog;
1113 1399 ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
1114 1400 ZSTDcrp_noMemset, zbuff);
1115 assert(cctx->appliedParams.cParams.strategy == cdict->cParams.strategy);
1116 assert(cctx->appliedParams.cParams.hashLog == cdict->cParams.hashLog);
1117 assert(cctx->appliedParams.cParams.chainLog == cdict->cParams.chainLog);
1401 assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
1402 assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);
1403 assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog);
1118 1404 }
1119 1405
1120 1406 /* copy tables */
1121 { size_t const chainSize = (cdict->cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict->cParams.chainLog);
1122 size_t const hSize = (size_t)1 << cdict->cParams.hashLog;
1407 { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog);
1408 size_t const hSize = (size_t)1 << cdict_cParams->hashLog;
1123 1409 size_t const tableSpace = (chainSize + hSize) * sizeof(U32);
1124 1410 assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */
1125 1411 assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize);
@@ -1127,6 +1413,7 b' static size_t ZSTD_resetCCtx_usingCDict('
1127 1413 assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize);
1128 1414 memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */
1129 1415 }
1416
1130 1417 /* Zero the hashTable3, since the cdict never fills it */
1131 1418 { size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3;
1132 1419 assert(cdict->matchState.hashLog3 == 0);
@@ -1134,14 +1421,14 b' static size_t ZSTD_resetCCtx_usingCDict('
1134 1421 }
1135 1422
1136 1423 /* copy dictionary offsets */
1137 {
1138 ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
1424 { ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
1139 1425 ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
1140 1426 dstMatchState->window = srcMatchState->window;
1141 1427 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
1142 1428 dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3;
1143 1429 dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
1144 1430 }
1431
1145 1432 cctx->dictID = cdict->dictID;
1146 1433
1147 1434 /* copy block state */
@@ -1150,6 +1437,27 b' static size_t ZSTD_resetCCtx_usingCDict('
1150 1437 return 0;
1151 1438 }
1152 1439
1440 /* We have a choice between copying the dictionary context into the working
1441 * context, or referencing the dictionary context from the working context
1442 * in-place. We decide here which strategy to use. */
1443 static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
1444 const ZSTD_CDict* cdict,
1445 ZSTD_CCtx_params params,
1446 U64 pledgedSrcSize,
1447 ZSTD_buffered_policy_e zbuff)
1448 {
1449
1450 DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)", (U32)pledgedSrcSize);
1451
1452 if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) {
1453 return ZSTD_resetCCtx_byAttachingCDict(
1454 cctx, cdict, params, pledgedSrcSize, zbuff);
1455 } else {
1456 return ZSTD_resetCCtx_byCopyingCDict(
1457 cctx, cdict, params, pledgedSrcSize, zbuff);
1458 }
1459 }
1460
1153 1461 /*! ZSTD_copyCCtx_internal() :
1154 1462 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
1155 1463 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
@@ -1192,7 +1500,7 b' static size_t ZSTD_copyCCtx_internal(ZST'
1192 1500
1193 1501 /* copy dictionary offsets */
1194 1502 {
1195 ZSTD_matchState_t const* srcMatchState = &srcCCtx->blockState.matchState;
1503 const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState;
1196 1504 ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;
1197 1505 dstMatchState->window = srcMatchState->window;
1198 1506 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
@@ -1294,15 +1602,15 b' static void ZSTD_reduceIndex (ZSTD_CCtx*'
1294 1602
1295 1603 /* See doc/zstd_compression_format.md for detailed format description */
1296 1604
1297 size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
1605 static size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
1298 1606 {
1607 U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
1299 1608 if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
1609 MEM_writeLE24(dst, cBlockHeader24);
1300 1610 memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
1301 MEM_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw);
1302 return ZSTD_blockHeaderSize+srcSize;
1611 return ZSTD_blockHeaderSize + srcSize;
1303 1612 }
1304 1613
1305
1306 1614 static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
1307 1615 {
1308 1616 BYTE* const ostart = (BYTE* const)dst;
@@ -1356,16 +1664,24 b' static size_t ZSTD_compressRleLiteralsBl'
1356 1664 }
1357 1665
1358 1666
1359 static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }
1360
1361 static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t const* prevEntropy,
1362 ZSTD_entropyCTables_t* nextEntropy,
1667 /* ZSTD_minGain() :
1668 * minimum compression required
1669 * to generate a compress block or a compressed literals section.
1670 * note : use same formula for both situations */
1671 static size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
1672 {
1673 U32 const minlog = (strat==ZSTD_btultra) ? 7 : 6;
1674 return (srcSize >> minlog) + 2;
1675 }
1676
1677 static size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
1678 ZSTD_hufCTables_t* nextHuf,
1363 1679 ZSTD_strategy strategy, int disableLiteralCompression,
1364 1680 void* dst, size_t dstCapacity,
1365 1681 const void* src, size_t srcSize,
1366 1682 U32* workspace, const int bmi2)
1367 1683 {
1368 size_t const minGain = ZSTD_minGain(srcSize);
1684 size_t const minGain = ZSTD_minGain(srcSize, strategy);
1369 1685 size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
1370 1686 BYTE* const ostart = (BYTE*)dst;
1371 1687 U32 singleStream = srcSize < 256;
@@ -1376,27 +1692,25 b' static size_t ZSTD_compressLiterals (ZST'
1376 1692 disableLiteralCompression);
1377 1693
1378 1694 /* Prepare nextEntropy assuming reusing the existing table */
1379 nextEntropy->hufCTable_repeatMode = prevEntropy->hufCTable_repeatMode;
1380 memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable,
1381 sizeof(prevEntropy->hufCTable));
1695 memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
1382 1696
1383 1697 if (disableLiteralCompression)
1384 1698 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
1385 1699
1386 1700 /* small ? don't even attempt compression (speed opt) */
1387 1701 # define COMPRESS_LITERALS_SIZE_MIN 63
1388 { size_t const minLitSize = (prevEntropy->hufCTable_repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
1702 { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
1389 1703 if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
1390 1704 }
1391 1705
1392 1706 if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
1393 { HUF_repeat repeat = prevEntropy->hufCTable_repeatMode;
1707 { HUF_repeat repeat = prevHuf->repeatMode;
1394 1708 int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
1395 1709 if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
1396 1710 cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
1397 workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextEntropy->hufCTable, &repeat, preferRepeat, bmi2)
1711 workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2)
1398 1712 : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
1399 workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextEntropy->hufCTable, &repeat, preferRepeat, bmi2);
1713 workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
1400 1714 if (repeat != HUF_repeat_none) {
1401 1715 /* reused the existing table */
1402 1716 hType = set_repeat;
@@ -1404,17 +1718,17 b' static size_t ZSTD_compressLiterals (ZST'
1404 1718 }
1405 1719
1406 1720 if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
1407 memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable, sizeof(prevEntropy->hufCTable));
1721 memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
1408 1722 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
1409 1723 }
1410 1724 if (cLitSize==1) {
1411 memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable, sizeof(prevEntropy->hufCTable));
1725 memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
1412 1726 return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
1413 1727 }
1414 1728
1415 1729 if (hType == set_compressed) {
1416 1730 /* using a newly constructed table */
1417 nextEntropy->hufCTable_repeatMode = HUF_repeat_check;
1731 nextHuf->repeatMode = HUF_repeat_check;
1418 1732 }
1419 1733
1420 1734 /* Build header */
@@ -1451,6 +1765,7 b' void ZSTD_seqToCodes(const seqStore_t* s'
1451 1765 BYTE* const mlCodeTable = seqStorePtr->mlCode;
1452 1766 U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
1453 1767 U32 u;
1768 assert(nbSeq <= seqStorePtr->maxNbSeq);
1454 1769 for (u=0; u<nbSeq; u++) {
1455 1770 U32 const llv = sequences[u].litLength;
1456 1771 U32 const mlv = sequences[u].matchLength;
@@ -1464,61 +1779,234 b' void ZSTD_seqToCodes(const seqStore_t* s'
1464 1779 mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
1465 1780 }
1466 1781
1782
1783 /**
1784 * -log2(x / 256) lookup table for x in [0, 256).
1785 * If x == 0: Return 0
1786 * Else: Return floor(-log2(x / 256) * 256)
1787 */
1788 static unsigned const kInverseProbabiltyLog256[256] = {
1789 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
1790 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889,
1791 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734,
1792 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626,
1793 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542,
1794 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473,
1795 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415,
1796 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366,
1797 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322,
1798 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282,
1799 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247,
1800 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215,
1801 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185,
1802 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157,
1803 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132,
1804 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108,
1805 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85,
1806 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64,
1807 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44,
1808 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25,
1809 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7,
1810 5, 4, 2, 1,
1811 };
1812
1813
1814 /**
1815 * Returns the cost in bits of encoding the distribution described by count
1816 * using the entropy bound.
1817 */
1818 static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
1819 {
1820 unsigned cost = 0;
1821 unsigned s;
1822 for (s = 0; s <= max; ++s) {
1823 unsigned norm = (unsigned)((256 * count[s]) / total);
1824 if (count[s] != 0 && norm == 0)
1825 norm = 1;
1826 assert(count[s] < total);
1827 cost += count[s] * kInverseProbabiltyLog256[norm];
1828 }
1829 return cost >> 8;
1830 }
1831
1832
1833 /**
1834 * Returns the cost in bits of encoding the distribution in count using the
1835 * table described by norm. The max symbol support by norm is assumed >= max.
1836 * norm must be valid for every symbol with non-zero probability in count.
1837 */
1838 static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
1839 unsigned const* count, unsigned const max)
1840 {
1841 unsigned const shift = 8 - accuracyLog;
1842 size_t cost = 0;
1843 unsigned s;
1844 assert(accuracyLog <= 8);
1845 for (s = 0; s <= max; ++s) {
1846 unsigned const normAcc = norm[s] != -1 ? norm[s] : 1;
1847 unsigned const norm256 = normAcc << shift;
1848 assert(norm256 > 0);
1849 assert(norm256 < 256);
1850 cost += count[s] * kInverseProbabiltyLog256[norm256];
1851 }
1852 return cost >> 8;
1853 }
1854
1855
1856 static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
1857 void const* ptr = ctable;
1858 U16 const* u16ptr = (U16 const*)ptr;
1859 U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
1860 return maxSymbolValue;
1861 }
1862
1863
1864 /**
1865 * Returns the cost in bits of encoding the distribution in count using ctable.
1866 * Returns an error if ctable cannot represent all the symbols in count.
1867 */
1868 static size_t ZSTD_fseBitCost(
1869 FSE_CTable const* ctable,
1870 unsigned const* count,
1871 unsigned const max)
1872 {
1873 unsigned const kAccuracyLog = 8;
1874 size_t cost = 0;
1875 unsigned s;
1876 FSE_CState_t cstate;
1877 FSE_initCState(&cstate, ctable);
1878 if (ZSTD_getFSEMaxSymbolValue(ctable) < max) {
1879 DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u",
1880 ZSTD_getFSEMaxSymbolValue(ctable), max);
1881 return ERROR(GENERIC);
1882 }
1883 for (s = 0; s <= max; ++s) {
1884 unsigned const tableLog = cstate.stateLog;
1885 unsigned const badCost = (tableLog + 1) << kAccuracyLog;
1886 unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
1887 if (count[s] == 0)
1888 continue;
1889 if (bitCost >= badCost) {
1890 DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s);
1891 return ERROR(GENERIC);
1892 }
1893 cost += count[s] * bitCost;
1894 }
1895 return cost >> kAccuracyLog;
1896 }
1897
1898 /**
1899 * Returns the cost in bytes of encoding the normalized count header.
1900 * Returns an error if any of the helper functions return an error.
1901 */
1902 static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
1903 size_t const nbSeq, unsigned const FSELog)
1904 {
1905 BYTE wksp[FSE_NCOUNTBOUND];
1906 S16 norm[MaxSeq + 1];
1907 const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
1908 CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq, max));
1909 return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
1910 }
1911
1912
1467 1913 typedef enum {
1468 1914 ZSTD_defaultDisallowed = 0,
1469 1915 ZSTD_defaultAllowed = 1
1470 1916 } ZSTD_defaultPolicy_e;
1471 1917
1472 MEM_STATIC
1473 symbolEncodingType_e ZSTD_selectEncodingType(
1474 FSE_repeat* repeatMode, size_t const mostFrequent, size_t nbSeq,
1475 U32 defaultNormLog, ZSTD_defaultPolicy_e const isDefaultAllowed)
1918 MEM_STATIC symbolEncodingType_e
1919 ZSTD_selectEncodingType(
1920 FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
1921 size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
1922 FSE_CTable const* prevCTable,
1923 short const* defaultNorm, U32 defaultNormLog,
1924 ZSTD_defaultPolicy_e const isDefaultAllowed,
1925 ZSTD_strategy const strategy)
1476 1926 {
1477 #define MIN_SEQ_FOR_DYNAMIC_FSE 64
1478 #define MAX_SEQ_FOR_STATIC_FSE 1000
1479 1927 ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
1480 if ((mostFrequent == nbSeq) && (!isDefaultAllowed || nbSeq > 2)) {
1928 if (mostFrequent == nbSeq) {
1929 *repeatMode = FSE_repeat_none;
1930 if (isDefaultAllowed && nbSeq <= 2) {
1931 /* Prefer set_basic over set_rle when there are 2 or less symbols,
1932 * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
1933 * If basic encoding isn't possible, always choose RLE.
1934 */
1935 DEBUGLOG(5, "Selected set_basic");
1936 return set_basic;
1937 }
1481 1938 DEBUGLOG(5, "Selected set_rle");
1482 /* Prefer set_basic over set_rle when there are 2 or less symbols,
1483 * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
1484 * If basic encoding isn't possible, always choose RLE.
1485 */
1486 *repeatMode = FSE_repeat_check;
1487 1939 return set_rle;
1488 1940 }
1489 if ( isDefaultAllowed
1490 && (*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
1491 DEBUGLOG(5, "Selected set_repeat");
1492 return set_repeat;
1493 }
1494 if ( isDefaultAllowed
1495 && ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) ) {
1496 DEBUGLOG(5, "Selected set_basic");
1497 /* The format allows default tables to be repeated, but it isn't useful.
1498 * When using simple heuristics to select encoding type, we don't want
1499 * to confuse these tables with dictionaries. When running more careful
1500 * analysis, we don't need to waste time checking both repeating tables
1501 * and default tables.
1502 */
1503 *repeatMode = FSE_repeat_none;
1504 return set_basic;
1941 if (strategy < ZSTD_lazy) {
1942 if (isDefaultAllowed) {
1943 size_t const staticFse_nbSeq_max = 1000;
1944 size_t const mult = 10 - strategy;
1945 size_t const baseLog = 3;
1946 size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */
1947 assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */
1948 assert(mult <= 9 && mult >= 7);
1949 if ( (*repeatMode == FSE_repeat_valid)
1950 && (nbSeq < staticFse_nbSeq_max) ) {
1951 DEBUGLOG(5, "Selected set_repeat");
1952 return set_repeat;
1953 }
1954 if ( (nbSeq < dynamicFse_nbSeq_min)
1955 || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
1956 DEBUGLOG(5, "Selected set_basic");
1957 /* The format allows default tables to be repeated, but it isn't useful.
1958 * When using simple heuristics to select encoding type, we don't want
1959 * to confuse these tables with dictionaries. When running more careful
1960 * analysis, we don't need to waste time checking both repeating tables
1961 * and default tables.
1962 */
1963 *repeatMode = FSE_repeat_none;
1964 return set_basic;
1965 }
1966 }
1967 } else {
1968 size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
1969 size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
1970 size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
1971 size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
1972
1973 if (isDefaultAllowed) {
1974 assert(!ZSTD_isError(basicCost));
1975 assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
1976 }
1977 assert(!ZSTD_isError(NCountCost));
1978 assert(compressedCost < ERROR(maxCode));
1979 DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
1980 (U32)basicCost, (U32)repeatCost, (U32)compressedCost);
1981 if (basicCost <= repeatCost && basicCost <= compressedCost) {
1982 DEBUGLOG(5, "Selected set_basic");
1983 assert(isDefaultAllowed);
1984 *repeatMode = FSE_repeat_none;
1985 return set_basic;
1986 }
1987 if (repeatCost <= compressedCost) {
1988 DEBUGLOG(5, "Selected set_repeat");
1989 assert(!ZSTD_isError(repeatCost));
1990 return set_repeat;
1991 }
1992 assert(compressedCost < basicCost && compressedCost < repeatCost);
1505 1993 }
1506 1994 DEBUGLOG(5, "Selected set_compressed");
1507 1995 *repeatMode = FSE_repeat_check;
1508 1996 return set_compressed;
1509 1997 }
1510 1998
1511 MEM_STATIC
1512 size_t ZSTD_buildCTable(void* dst, size_t dstCapacity,
1513 FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
1514 U32* count, U32 max,
1515 BYTE const* codeTable, size_t nbSeq,
1516 S16 const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
1517 FSE_CTable const* prevCTable, size_t prevCTableSize,
1518 void* workspace, size_t workspaceSize)
1999 MEM_STATIC size_t
2000 ZSTD_buildCTable(void* dst, size_t dstCapacity,
2001 FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
2002 U32* count, U32 max,
2003 const BYTE* codeTable, size_t nbSeq,
2004 const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
2005 const FSE_CTable* prevCTable, size_t prevCTableSize,
2006 void* workspace, size_t workspaceSize)
1519 2007 {
1520 2008 BYTE* op = (BYTE*)dst;
1521 BYTE const* const oend = op + dstCapacity;
2009 const BYTE* const oend = op + dstCapacity;
1522 2010
1523 2011 switch (type) {
1524 2012 case set_rle:
@@ -1674,7 +2162,7 b' ZSTD_encodeSequences_bmi2('
1674 2162
1675 2163 #endif
1676 2164
1677 size_t ZSTD_encodeSequences(
2165 static size_t ZSTD_encodeSequences(
1678 2166 void* dst, size_t dstCapacity,
1679 2167 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
1680 2168 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
@@ -1706,10 +2194,11 b' MEM_STATIC size_t ZSTD_compressSequences'
1706 2194 const int bmi2)
1707 2195 {
1708 2196 const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
2197 ZSTD_strategy const strategy = cctxParams->cParams.strategy;
1709 2198 U32 count[MaxSeq+1];
1710 FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
1711 FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
1712 FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
2199 FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
2200 FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
2201 FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
1713 2202 U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
1714 2203 const seqDef* const sequences = seqStorePtr->sequencesStart;
1715 2204 const BYTE* const ofCodeTable = seqStorePtr->ofCode;
@@ -1720,15 +2209,17 b' MEM_STATIC size_t ZSTD_compressSequences'
1720 2209 BYTE* op = ostart;
1721 2210 size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
1722 2211 BYTE* seqHead;
2212 BYTE* lastNCount = NULL;
1723 2213
1724 2214 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
1725 2215
1726 2216 /* Compress literals */
1727 2217 { const BYTE* const literals = seqStorePtr->litStart;
1728 2218 size_t const litSize = seqStorePtr->lit - literals;
2219 int const disableLiteralCompression = (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
1729 2220 size_t const cSize = ZSTD_compressLiterals(
1730 prevEntropy, nextEntropy,
1731 cctxParams->cParams.strategy, cctxParams->disableLiteralCompression,
2221 &prevEntropy->huf, &nextEntropy->huf,
2222 cctxParams->cParams.strategy, disableLiteralCompression,
1732 2223 op, dstCapacity,
1733 2224 literals, litSize,
1734 2225 workspace, bmi2);
@@ -1747,13 +2238,9 b' MEM_STATIC size_t ZSTD_compressSequences'
1747 2238 else
1748 2239 op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
1749 2240 if (nbSeq==0) {
1750 memcpy(nextEntropy->litlengthCTable, prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable));
1751 nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
1752 memcpy(nextEntropy->offcodeCTable, prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable));
1753 nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
1754 memcpy(nextEntropy->matchlengthCTable, prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable));
1755 nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
1756 return op - ostart;
2241 /* Copy the old tables over as if we repeated them */
2242 memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
2243 return op - ostart;
1757 2244 }
1758 2245
1759 2246 /* seqHead : flags for FSE encoding type */
@@ -1763,43 +2250,53 b' MEM_STATIC size_t ZSTD_compressSequences'
1763 2250 ZSTD_seqToCodes(seqStorePtr);
1764 2251 /* build CTable for Literal Lengths */
1765 2252 { U32 max = MaxLL;
1766 size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace);
2253 size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace); /* can't fail */
1767 2254 DEBUGLOG(5, "Building LL table");
1768 nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
1769 LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog, ZSTD_defaultAllowed);
2255 nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
2256 LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, count, max, mostFrequent, nbSeq, LLFSELog, prevEntropy->fse.litlengthCTable, LL_defaultNorm, LL_defaultNormLog, ZSTD_defaultAllowed, strategy);
2257 assert(set_basic < set_compressed && set_rle < set_compressed);
2258 assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
1770 2259 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
1771 count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
1772 prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
1773 workspace, HUF_WORKSPACE_SIZE);
2260 count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
2261 prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable),
2262 workspace, HUF_WORKSPACE_SIZE);
1774 2263 if (ZSTD_isError(countSize)) return countSize;
2264 if (LLtype == set_compressed)
2265 lastNCount = op;
1775 2266 op += countSize;
1776 2267 } }
1777 2268 /* build CTable for Offsets */
1778 2269 { U32 max = MaxOff;
1779 size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace);
2270 size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace); /* can't fail */
1780 2271 /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
1781 2272 ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
1782 2273 DEBUGLOG(5, "Building OF table");
1783 nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
1784 Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog, defaultPolicy);
2274 nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode;
2275 Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, count, max, mostFrequent, nbSeq, OffFSELog, prevEntropy->fse.offcodeCTable, OF_defaultNorm, OF_defaultNormLog, defaultPolicy, strategy);
2276 assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
1785 2277 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
1786 count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
1787 prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
1788 workspace, HUF_WORKSPACE_SIZE);
2278 count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
2279 prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable),
2280 workspace, HUF_WORKSPACE_SIZE);
1789 2281 if (ZSTD_isError(countSize)) return countSize;
2282 if (Offtype == set_compressed)
2283 lastNCount = op;
1790 2284 op += countSize;
1791 2285 } }
1792 2286 /* build CTable for MatchLengths */
1793 2287 { U32 max = MaxML;
1794 size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace);
2288 size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace); /* can't fail */
1795 2289 DEBUGLOG(5, "Building ML table");
1796 nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
1797 MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog, ZSTD_defaultAllowed);
2290 nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
2291 MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, count, max, mostFrequent, nbSeq, MLFSELog, prevEntropy->fse.matchlengthCTable, ML_defaultNorm, ML_defaultNormLog, ZSTD_defaultAllowed, strategy);
2292 assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
1798 2293 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
1799 count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
1800 prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
1801 workspace, HUF_WORKSPACE_SIZE);
2294 count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
2295 prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable),
2296 workspace, HUF_WORKSPACE_SIZE);
1802 2297 if (ZSTD_isError(countSize)) return countSize;
2298 if (MLtype == set_compressed)
2299 lastNCount = op;
1803 2300 op += countSize;
1804 2301 } }
1805 2302
@@ -1814,21 +2311,37 b' MEM_STATIC size_t ZSTD_compressSequences'
1814 2311 longOffsets, bmi2);
1815 2312 if (ZSTD_isError(bitstreamSize)) return bitstreamSize;
1816 2313 op += bitstreamSize;
2314 /* zstd versions <= 1.3.4 mistakenly report corruption when
2315 * FSE_readNCount() recieves a buffer < 4 bytes.
2316 * Fixed by https://github.com/facebook/zstd/pull/1146.
2317 * This can happen when the last set_compressed table present is 2
2318 * bytes and the bitstream is only one byte.
2319 * In this exceedingly rare case, we will simply emit an uncompressed
2320 * block, since it isn't worth optimizing.
2321 */
2322 if (lastNCount && (op - lastNCount) < 4) {
2323 /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
2324 assert(op - lastNCount == 3);
2325 DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
2326 "emitting an uncompressed block.");
2327 return 0;
2328 }
1817 2329 }
1818 2330
1819 2331 return op - ostart;
1820 2332 }
1821 2333
1822 2334 MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr,
1823 ZSTD_entropyCTables_t const* prevEntropy,
2335 const ZSTD_entropyCTables_t* prevEntropy,
1824 2336 ZSTD_entropyCTables_t* nextEntropy,
1825 ZSTD_CCtx_params const* cctxParams,
2337 const ZSTD_CCtx_params* cctxParams,
1826 2338 void* dst, size_t dstCapacity,
1827 2339 size_t srcSize, U32* workspace, int bmi2)
1828 2340 {
1829 2341 size_t const cSize = ZSTD_compressSequences_internal(
1830 2342 seqStorePtr, prevEntropy, nextEntropy, cctxParams, dst, dstCapacity,
1831 2343 workspace, bmi2);
2344 if (cSize == 0) return 0;
1832 2345 /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
1833 2346 * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
1834 2347 */
@@ -1837,40 +2350,55 b' MEM_STATIC size_t ZSTD_compressSequences'
1837 2350 if (ZSTD_isError(cSize)) return cSize;
1838 2351
1839 2352 /* Check compressibility */
1840 { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize); /* note : fixed formula, maybe should depend on compression level, or strategy */
2353 { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
1841 2354 if (cSize >= maxCSize) return 0; /* block not compressed */
1842 2355 }
1843 2356
1844 /* We check that dictionaries have offset codes available for the first
1845 * block. After the first block, the offcode table might not have large
1846 * enough codes to represent the offsets in the data.
1847 */
1848 if (nextEntropy->offcode_repeatMode == FSE_repeat_valid)
1849 nextEntropy->offcode_repeatMode = FSE_repeat_check;
1850
1851 2357 return cSize;
1852 2358 }
1853 2359
1854 2360 /* ZSTD_selectBlockCompressor() :
1855 2361 * Not static, but internal use only (used by long distance matcher)
1856 2362 * assumption : strat is a valid strategy */
1857 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
2363 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
1858 2364 {
1859 static const ZSTD_blockCompressor blockCompressor[2][(unsigned)ZSTD_btultra+1] = {
2365 static const ZSTD_blockCompressor blockCompressor[3][(unsigned)ZSTD_btultra+1] = {
1860 2366 { ZSTD_compressBlock_fast /* default for 0 */,
1861 ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy,
1862 ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2,
1863 ZSTD_compressBlock_btopt, ZSTD_compressBlock_btultra },
2367 ZSTD_compressBlock_fast,
2368 ZSTD_compressBlock_doubleFast,
2369 ZSTD_compressBlock_greedy,
2370 ZSTD_compressBlock_lazy,
2371 ZSTD_compressBlock_lazy2,
2372 ZSTD_compressBlock_btlazy2,
2373 ZSTD_compressBlock_btopt,
2374 ZSTD_compressBlock_btultra },
1864 2375 { ZSTD_compressBlock_fast_extDict /* default for 0 */,
1865 ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict,
1866 ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict,
1867 ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btultra_extDict }
2376 ZSTD_compressBlock_fast_extDict,
2377 ZSTD_compressBlock_doubleFast_extDict,
2378 ZSTD_compressBlock_greedy_extDict,
2379 ZSTD_compressBlock_lazy_extDict,
2380 ZSTD_compressBlock_lazy2_extDict,
2381 ZSTD_compressBlock_btlazy2_extDict,
2382 ZSTD_compressBlock_btopt_extDict,
2383 ZSTD_compressBlock_btultra_extDict },
2384 { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */,
2385 ZSTD_compressBlock_fast_dictMatchState,
2386 ZSTD_compressBlock_doubleFast_dictMatchState,
2387 ZSTD_compressBlock_greedy_dictMatchState,
2388 ZSTD_compressBlock_lazy_dictMatchState,
2389 ZSTD_compressBlock_lazy2_dictMatchState,
2390 ZSTD_compressBlock_btlazy2_dictMatchState,
2391 ZSTD_compressBlock_btopt_dictMatchState,
2392 ZSTD_compressBlock_btultra_dictMatchState }
1868 2393 };
2394 ZSTD_blockCompressor selectedCompressor;
1869 2395 ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
1870 2396
1871 2397 assert((U32)strat >= (U32)ZSTD_fast);
1872 2398 assert((U32)strat <= (U32)ZSTD_btultra);
1873 return blockCompressor[extDict!=0][(U32)strat];
2399 selectedCompressor = blockCompressor[(int)dictMode][(U32)strat];
2400 assert(selectedCompressor != NULL);
2401 return selectedCompressor;
1874 2402 }
1875 2403
1876 2404 static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,
@@ -1880,7 +2408,7 b' static void ZSTD_storeLastLiterals(seqSt'
1880 2408 seqStorePtr->lit += lastLLSize;
1881 2409 }
1882 2410
1883 static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
2411 void ZSTD_resetSeqStore(seqStore_t* ssPtr)
1884 2412 {
1885 2413 ssPtr->lit = ssPtr->litStart;
1886 2414 ssPtr->sequences = ssPtr->sequencesStart;
@@ -1892,24 +2420,38 b' static size_t ZSTD_compressBlock_interna'
1892 2420 const void* src, size_t srcSize)
1893 2421 {
1894 2422 ZSTD_matchState_t* const ms = &zc->blockState.matchState;
1895 DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
1896 (U32)dstCapacity, ms->window.dictLimit, ms->nextToUpdate);
2423 size_t cSize;
2424 DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%zu, dictLimit=%u, nextToUpdate=%u)",
2425 dstCapacity, ms->window.dictLimit, ms->nextToUpdate);
2426 assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
2427
2428 /* Assert that we have correctly flushed the ctx params into the ms's copy */
2429 ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
2430
1897 2431 if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
1898 2432 ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.searchLength);
1899 return 0; /* don't even attempt compression below a certain srcSize */
2433 cSize = 0;
2434 goto out; /* don't even attempt compression below a certain srcSize */
1900 2435 }
1901 2436 ZSTD_resetSeqStore(&(zc->seqStore));
2437 ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; /* required for optimal parser to read stats from dictionary */
2438
2439 /* a gap between an attached dict and the current window is not safe,
2440 * they must remain adjacent, and when that stops being the case, the dict
2441 * must be unset */
2442 assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit);
1902 2443
1903 2444 /* limited update after a very long match */
1904 2445 { const BYTE* const base = ms->window.base;
1905 2446 const BYTE* const istart = (const BYTE*)src;
1906 2447 const U32 current = (U32)(istart-base);
2448 if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */
1907 2449 if (current > ms->nextToUpdate + 384)
1908 2450 ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384));
1909 2451 }
1910 2452
1911 2453 /* select and store sequences */
1912 { U32 const extDict = ZSTD_window_hasExtDict(ms->window);
2454 { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms);
1913 2455 size_t lastLLSize;
1914 2456 { int i;
1915 2457 for (i = 0; i < ZSTD_REP_NUM; ++i)
@@ -1922,8 +2464,7 b' static size_t ZSTD_compressBlock_interna'
1922 2464 ZSTD_ldm_blockCompress(&zc->externSeqStore,
1923 2465 ms, &zc->seqStore,
1924 2466 zc->blockState.nextCBlock->rep,
1925 &zc->appliedParams.cParams,
1926 src, srcSize, extDict);
2467 src, srcSize);
1927 2468 assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
1928 2469 } else if (zc->appliedParams.ldmParams.enableLdm) {
1929 2470 rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0};
@@ -1939,31 +2480,38 b' static size_t ZSTD_compressBlock_interna'
1939 2480 ZSTD_ldm_blockCompress(&ldmSeqStore,
1940 2481 ms, &zc->seqStore,
1941 2482 zc->blockState.nextCBlock->rep,
1942 &zc->appliedParams.cParams,
1943 src, srcSize, extDict);
2483 src, srcSize);
1944 2484 assert(ldmSeqStore.pos == ldmSeqStore.size);
1945 2485 } else { /* not long range mode */
1946 ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict);
1947 lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize);
2486 ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode);
2487 lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
1948 2488 }
1949 2489 { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
1950 2490 ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
1951 2491 } }
1952 2492
1953 2493 /* encode sequences and literals */
1954 { size_t const cSize = ZSTD_compressSequences(&zc->seqStore,
1955 &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
1956 &zc->appliedParams,
1957 dst, dstCapacity,
1958 srcSize, zc->entropyWorkspace, zc->bmi2);
1959 if (ZSTD_isError(cSize) || cSize == 0) return cSize;
1960 /* confirm repcodes and entropy tables */
1961 { ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
1962 zc->blockState.prevCBlock = zc->blockState.nextCBlock;
1963 zc->blockState.nextCBlock = tmp;
1964 }
1965 return cSize;
2494 cSize = ZSTD_compressSequences(&zc->seqStore,
2495 &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
2496 &zc->appliedParams,
2497 dst, dstCapacity,
2498 srcSize, zc->entropyWorkspace, zc->bmi2);
2499
2500 out:
2501 if (!ZSTD_isError(cSize) && cSize != 0) {
2502 /* confirm repcodes and entropy tables when emitting a compressed block */
2503 ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
2504 zc->blockState.prevCBlock = zc->blockState.nextCBlock;
2505 zc->blockState.nextCBlock = tmp;
1966 2506 }
2507 /* We check that dictionaries have offset codes available for the first
2508 * block. After the first block, the offcode table might not have large
2509 * enough codes to represent the offsets in the data.
2510 */
2511 if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
2512 zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
2513
2514 return cSize;
1967 2515 }
1968 2516
1969 2517
@@ -2005,13 +2553,13 b' static size_t ZSTD_compress_frameChunk ('
2005 2553 ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
2006 2554 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
2007 2555 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
2008
2009 2556 ZSTD_reduceIndex(cctx, correction);
2010 2557 if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
2011 2558 else ms->nextToUpdate -= correction;
2012 2559 ms->loadedDictEnd = 0;
2560 ms->dictMatchState = NULL;
2013 2561 }
2014 ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd);
2562 ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
2015 2563 if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
2016 2564
2017 2565 { size_t cSize = ZSTD_compressBlock_internal(cctx,
@@ -2020,11 +2568,8 b' static size_t ZSTD_compress_frameChunk ('
2020 2568 if (ZSTD_isError(cSize)) return cSize;
2021 2569
2022 2570 if (cSize == 0) { /* block is not compressible */
2023 U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(blockSize << 3);
2024 if (blockSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
2025 MEM_writeLE32(op, cBlockHeader24); /* 4th byte will be overwritten */
2026 memcpy(op + ZSTD_blockHeaderSize, ip, blockSize);
2027 cSize = ZSTD_blockHeaderSize + blockSize;
2571 cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
2572 if (ZSTD_isError(cSize)) return cSize;
2028 2573 } else {
2029 2574 U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
2030 2575 MEM_writeLE24(op, cBlockHeader24);
@@ -2060,6 +2605,7 b' static size_t ZSTD_writeFrameHeader(void'
2060 2605 BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
2061 2606 size_t pos=0;
2062 2607
2608 assert(!(params.fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));
2063 2609 if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall);
2064 2610 DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
2065 2611 !params.fParams.noDictIDFlag, dictID, dictIDSizeCode);
@@ -2122,7 +2668,7 b' static size_t ZSTD_compressContinue_inte'
2122 2668 const void* src, size_t srcSize,
2123 2669 U32 frame, U32 lastFrameChunk)
2124 2670 {
2125 ZSTD_matchState_t* ms = &cctx->blockState.matchState;
2671 ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
2126 2672 size_t fhSize = 0;
2127 2673
2128 2674 DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u",
@@ -2143,8 +2689,25 b' static size_t ZSTD_compressContinue_inte'
2143 2689 if (!ZSTD_window_update(&ms->window, src, srcSize)) {
2144 2690 ms->nextToUpdate = ms->window.dictLimit;
2145 2691 }
2146 if (cctx->appliedParams.ldmParams.enableLdm)
2692 if (cctx->appliedParams.ldmParams.enableLdm) {
2147 2693 ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
2694 }
2695
2696 if (!frame) {
2697 /* overflow check and correction for block mode */
2698 if (ZSTD_window_needOverflowCorrection(ms->window, (const char*)src + srcSize)) {
2699 U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy);
2700 U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, 1 << cctx->appliedParams.cParams.windowLog, src);
2701 ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
2702 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
2703 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
2704 ZSTD_reduceIndex(cctx, correction);
2705 if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
2706 else ms->nextToUpdate -= correction;
2707 ms->loadedDictEnd = 0;
2708 ms->dictMatchState = NULL;
2709 }
2710 }
2148 2711
2149 2712 DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (U32)cctx->blockSize);
2150 2713 { size_t const cSize = frame ?
@@ -2153,7 +2716,9 b' static size_t ZSTD_compressContinue_inte'
2153 2716 if (ZSTD_isError(cSize)) return cSize;
2154 2717 cctx->consumedSrcSize += srcSize;
2155 2718 cctx->producedCSize += (cSize + fhSize);
2156 if (cctx->appliedParams.fParams.contentSizeFlag) { /* control src size */
2719 assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
2720 if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */
2721 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
2157 2722 if (cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne) {
2158 2723 DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize >= %u",
2159 2724 (U32)cctx->pledgedSrcSizePlusOne-1, (U32)cctx->consumedSrcSize);
@@ -2184,44 +2749,50 b' size_t ZSTD_compressBlock(ZSTD_CCtx* cct'
2184 2749 {
2185 2750 size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
2186 2751 if (srcSize > blockSizeMax) return ERROR(srcSize_wrong);
2752
2187 2753 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
2188 2754 }
2189 2755
2190 2756 /*! ZSTD_loadDictionaryContent() :
2191 2757 * @return : 0, or an error code
2192 2758 */
2193 static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const void* src, size_t srcSize)
2759 static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
2760 ZSTD_CCtx_params const* params,
2761 const void* src, size_t srcSize,
2762 ZSTD_dictTableLoadMethod_e dtlm)
2194 2763 {
2195 2764 const BYTE* const ip = (const BYTE*) src;
2196 2765 const BYTE* const iend = ip + srcSize;
2197 ZSTD_compressionParameters const* cParams = &params->cParams;
2198 2766
2199 2767 ZSTD_window_update(&ms->window, src, srcSize);
2200 2768 ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
2201 2769
2770 /* Assert that we the ms params match the params we're being given */
2771 ZSTD_assertEqualCParams(params->cParams, ms->cParams);
2772
2202 2773 if (srcSize <= HASH_READ_SIZE) return 0;
2203 2774
2204 2775 switch(params->cParams.strategy)
2205 2776 {
2206 2777 case ZSTD_fast:
2207 ZSTD_fillHashTable(ms, cParams, iend);
2778 ZSTD_fillHashTable(ms, iend, dtlm);
2208 2779 break;
2209 2780 case ZSTD_dfast:
2210 ZSTD_fillDoubleHashTable(ms, cParams, iend);
2781 ZSTD_fillDoubleHashTable(ms, iend, dtlm);
2211 2782 break;
2212 2783
2213 2784 case ZSTD_greedy:
2214 2785 case ZSTD_lazy:
2215 2786 case ZSTD_lazy2:
2216 2787 if (srcSize >= HASH_READ_SIZE)
2217 ZSTD_insertAndFindFirstIndex(ms, cParams, iend-HASH_READ_SIZE);
2788 ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE);
2218 2789 break;
2219 2790
2220 2791 case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
2221 2792 case ZSTD_btopt:
2222 2793 case ZSTD_btultra:
2223 2794 if (srcSize >= HASH_READ_SIZE)
2224 ZSTD_updateTree(ms, cParams, iend-HASH_READ_SIZE, iend);
2795 ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
2225 2796 break;
2226 2797
2227 2798 default:
@@ -2256,7 +2827,12 b' static size_t ZSTD_checkDictNCount(short'
2256 2827 * assumptions : magic number supposed already checked
2257 2828 * dictSize supposed > 8
2258 2829 */
2259 static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const void* dict, size_t dictSize, void* workspace)
2830 static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
2831 ZSTD_matchState_t* ms,
2832 ZSTD_CCtx_params const* params,
2833 const void* dict, size_t dictSize,
2834 ZSTD_dictTableLoadMethod_e dtlm,
2835 void* workspace)
2260 2836 {
2261 2837 const BYTE* dictPtr = (const BYTE*)dict;
2262 2838 const BYTE* const dictEnd = dictPtr + dictSize;
@@ -2265,13 +2841,15 b' static size_t ZSTD_loadZstdDictionary(ZS'
2265 2841 size_t dictID;
2266 2842
2267 2843 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
2844 assert(dictSize > 8);
2845 assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
2268 2846
2269 2847 dictPtr += 4; /* skip magic number */
2270 2848 dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr);
2271 2849 dictPtr += 4;
2272 2850
2273 2851 { unsigned maxSymbolValue = 255;
2274 size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.hufCTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr);
2852 size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr);
2275 2853 if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
2276 2854 if (maxSymbolValue < 255) return ERROR(dictionary_corrupted);
2277 2855 dictPtr += hufHeaderSize;
@@ -2282,7 +2860,8 b' static size_t ZSTD_loadZstdDictionary(ZS'
2282 2860 if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
2283 2861 if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
2284 2862 /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
2285 CHECK_E( FSE_buildCTable_wksp(bs->entropy.offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, workspace, HUF_WORKSPACE_SIZE),
2863 /* fill all offset symbols to avoid garbage at end of table */
2864 CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.offcodeCTable, offcodeNCount, MaxOff, offcodeLog, workspace, HUF_WORKSPACE_SIZE),
2286 2865 dictionary_corrupted);
2287 2866 dictPtr += offcodeHeaderSize;
2288 2867 }
@@ -2294,7 +2873,7 b' static size_t ZSTD_loadZstdDictionary(ZS'
2294 2873 if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
2295 2874 /* Every match length code must have non-zero probability */
2296 2875 CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
2297 CHECK_E( FSE_buildCTable_wksp(bs->entropy.matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, workspace, HUF_WORKSPACE_SIZE),
2876 CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, workspace, HUF_WORKSPACE_SIZE),
2298 2877 dictionary_corrupted);
2299 2878 dictPtr += matchlengthHeaderSize;
2300 2879 }
@@ -2306,7 +2885,7 b' static size_t ZSTD_loadZstdDictionary(ZS'
2306 2885 if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
2307 2886 /* Every literal length code must have non-zero probability */
2308 2887 CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
2309 CHECK_E( FSE_buildCTable_wksp(bs->entropy.litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, workspace, HUF_WORKSPACE_SIZE),
2888 CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, workspace, HUF_WORKSPACE_SIZE),
2310 2889 dictionary_corrupted);
2311 2890 dictPtr += litlengthHeaderSize;
2312 2891 }
@@ -2332,22 +2911,25 b' static size_t ZSTD_loadZstdDictionary(ZS'
2332 2911 if (bs->rep[u] > dictContentSize) return ERROR(dictionary_corrupted);
2333 2912 } }
2334 2913
2335 bs->entropy.hufCTable_repeatMode = HUF_repeat_valid;
2336 bs->entropy.offcode_repeatMode = FSE_repeat_valid;
2337 bs->entropy.matchlength_repeatMode = FSE_repeat_valid;
2338 bs->entropy.litlength_repeatMode = FSE_repeat_valid;
2339 CHECK_F(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize));
2914 bs->entropy.huf.repeatMode = HUF_repeat_valid;
2915 bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid;
2916 bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid;
2917 bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid;
2918 CHECK_F(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize, dtlm));
2340 2919 return dictID;
2341 2920 }
2342 2921 }
2343 2922
2344 2923 /** ZSTD_compress_insertDictionary() :
2345 2924 * @return : dictID, or an error code */
2346 static size_t ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms,
2347 ZSTD_CCtx_params const* params,
2348 const void* dict, size_t dictSize,
2349 ZSTD_dictContentType_e dictContentType,
2350 void* workspace)
2925 static size_t
2926 ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
2927 ZSTD_matchState_t* ms,
2928 const ZSTD_CCtx_params* params,
2929 const void* dict, size_t dictSize,
2930 ZSTD_dictContentType_e dictContentType,
2931 ZSTD_dictTableLoadMethod_e dtlm,
2932 void* workspace)
2351 2933 {
2352 2934 DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
2353 2935 if ((dict==NULL) || (dictSize<=8)) return 0;
@@ -2356,12 +2938,12 b' static size_t ZSTD_compress_insertDictio'
2356 2938
2357 2939 /* dict restricted modes */
2358 2940 if (dictContentType == ZSTD_dct_rawContent)
2359 return ZSTD_loadDictionaryContent(ms, params, dict, dictSize);
2941 return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm);
2360 2942
2361 2943 if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
2362 2944 if (dictContentType == ZSTD_dct_auto) {
2363 2945 DEBUGLOG(4, "raw content dictionary detected");
2364 return ZSTD_loadDictionaryContent(ms, params, dict, dictSize);
2946 return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm);
2365 2947 }
2366 2948 if (dictContentType == ZSTD_dct_fullDict)
2367 2949 return ERROR(dictionary_wrong);
@@ -2369,17 +2951,18 b' static size_t ZSTD_compress_insertDictio'
2369 2951 }
2370 2952
2371 2953 /* dict as full zstd dictionary */
2372 return ZSTD_loadZstdDictionary(bs, ms, params, dict, dictSize, workspace);
2954 return ZSTD_loadZstdDictionary(bs, ms, params, dict, dictSize, dtlm, workspace);
2373 2955 }
2374 2956
2375 2957 /*! ZSTD_compressBegin_internal() :
2376 2958 * @return : 0, or an error code */
2377 size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
2378 const void* dict, size_t dictSize,
2379 ZSTD_dictContentType_e dictContentType,
2380 const ZSTD_CDict* cdict,
2381 ZSTD_CCtx_params params, U64 pledgedSrcSize,
2382 ZSTD_buffered_policy_e zbuff)
2959 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
2960 const void* dict, size_t dictSize,
2961 ZSTD_dictContentType_e dictContentType,
2962 ZSTD_dictTableLoadMethod_e dtlm,
2963 const ZSTD_CDict* cdict,
2964 ZSTD_CCtx_params params, U64 pledgedSrcSize,
2965 ZSTD_buffered_policy_e zbuff)
2383 2966 {
2384 2967 DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params.cParams.windowLog);
2385 2968 /* params are supposed to be fully validated at this point */
@@ -2387,9 +2970,7 b' size_t ZSTD_compressBegin_internal(ZSTD_'
2387 2970 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
2388 2971
2389 2972 if (cdict && cdict->dictContentSize>0) {
2390 cctx->requestedParams = params;
2391 return ZSTD_resetCCtx_usingCDict(cctx, cdict, params.cParams.windowLog,
2392 params.fParams, pledgedSrcSize, zbuff);
2973 return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);
2393 2974 }
2394 2975
2395 2976 CHECK_F( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
@@ -2397,7 +2978,7 b' size_t ZSTD_compressBegin_internal(ZSTD_'
2397 2978 {
2398 2979 size_t const dictID = ZSTD_compress_insertDictionary(
2399 2980 cctx->blockState.prevCBlock, &cctx->blockState.matchState,
2400 &params, dict, dictSize, dictContentType, cctx->entropyWorkspace);
2981 &params, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace);
2401 2982 if (ZSTD_isError(dictID)) return dictID;
2402 2983 assert(dictID <= (size_t)(U32)-1);
2403 2984 cctx->dictID = (U32)dictID;
@@ -2408,6 +2989,7 b' size_t ZSTD_compressBegin_internal(ZSTD_'
2408 2989 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
2409 2990 const void* dict, size_t dictSize,
2410 2991 ZSTD_dictContentType_e dictContentType,
2992 ZSTD_dictTableLoadMethod_e dtlm,
2411 2993 const ZSTD_CDict* cdict,
2412 2994 ZSTD_CCtx_params params,
2413 2995 unsigned long long pledgedSrcSize)
@@ -2416,7 +2998,7 b' size_t ZSTD_compressBegin_advanced_inter'
2416 2998 /* compression parameters verification and optimization */
2417 2999 CHECK_F( ZSTD_checkCParams(params.cParams) );
2418 3000 return ZSTD_compressBegin_internal(cctx,
2419 dict, dictSize, dictContentType,
3001 dict, dictSize, dictContentType, dtlm,
2420 3002 cdict,
2421 3003 params, pledgedSrcSize,
2422 3004 ZSTDb_not_buffered);
@@ -2431,7 +3013,7 b' size_t ZSTD_compressBegin_advanced(ZSTD_'
2431 3013 ZSTD_CCtx_params const cctxParams =
2432 3014 ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
2433 3015 return ZSTD_compressBegin_advanced_internal(cctx,
2434 dict, dictSize, ZSTD_dct_auto,
3016 dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast,
2435 3017 NULL /*cdict*/,
2436 3018 cctxParams, pledgedSrcSize);
2437 3019 }
@@ -2442,7 +3024,7 b' size_t ZSTD_compressBegin_usingDict(ZSTD'
2442 3024 ZSTD_CCtx_params const cctxParams =
2443 3025 ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
2444 3026 DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (U32)dictSize);
2445 return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, NULL,
3027 return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
2446 3028 cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
2447 3029 }
2448 3030
@@ -2505,7 +3087,9 b' size_t ZSTD_compressEnd (ZSTD_CCtx* cctx'
2505 3087 if (ZSTD_isError(cSize)) return cSize;
2506 3088 endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
2507 3089 if (ZSTD_isError(endResult)) return endResult;
2508 if (cctx->appliedParams.fParams.contentSizeFlag) { /* control src size */
3090 assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
3091 if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */
3092 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
2509 3093 DEBUGLOG(4, "end of frame : controlling src size");
2510 3094 if (cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1) {
2511 3095 DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize = %u",
@@ -2517,22 +3101,22 b' size_t ZSTD_compressEnd (ZSTD_CCtx* cctx'
2517 3101
2518 3102
2519 3103 static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
2520 void* dst, size_t dstCapacity,
2521 const void* src, size_t srcSize,
2522 const void* dict,size_t dictSize,
2523 ZSTD_parameters params)
3104 void* dst, size_t dstCapacity,
3105 const void* src, size_t srcSize,
3106 const void* dict,size_t dictSize,
3107 ZSTD_parameters params)
2524 3108 {
2525 3109 ZSTD_CCtx_params const cctxParams =
2526 3110 ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
2527 3111 DEBUGLOG(4, "ZSTD_compress_internal");
2528 3112 return ZSTD_compress_advanced_internal(cctx,
2529 dst, dstCapacity,
2530 src, srcSize,
2531 dict, dictSize,
2532 cctxParams);
3113 dst, dstCapacity,
3114 src, srcSize,
3115 dict, dictSize,
3116 cctxParams);
2533 3117 }
2534 3118
2535 size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
3119 size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
2536 3120 void* dst, size_t dstCapacity,
2537 3121 const void* src, size_t srcSize,
2538 3122 const void* dict,size_t dictSize,
@@ -2540,7 +3124,11 b' size_t ZSTD_compress_advanced (ZSTD_CCtx'
2540 3124 {
2541 3125 DEBUGLOG(4, "ZSTD_compress_advanced");
2542 3126 CHECK_F(ZSTD_checkCParams(params.cParams));
2543 return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
3127 return ZSTD_compress_internal(cctx,
3128 dst, dstCapacity,
3129 src, srcSize,
3130 dict, dictSize,
3131 params);
2544 3132 }
2545 3133
2546 3134 /* Internal */
@@ -2551,37 +3139,44 b' size_t ZSTD_compress_advanced_internal('
2551 3139 const void* dict,size_t dictSize,
2552 3140 ZSTD_CCtx_params params)
2553 3141 {
2554 DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)",
2555 (U32)srcSize);
2556 CHECK_F( ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, NULL,
2557 params, srcSize, ZSTDb_not_buffered) );
3142 DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (U32)srcSize);
3143 CHECK_F( ZSTD_compressBegin_internal(cctx,
3144 dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
3145 params, srcSize, ZSTDb_not_buffered) );
2558 3146 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
2559 3147 }
2560 3148
2561 size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize,
2562 const void* dict, size_t dictSize, int compressionLevel)
3149 size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
3150 void* dst, size_t dstCapacity,
3151 const void* src, size_t srcSize,
3152 const void* dict, size_t dictSize,
3153 int compressionLevel)
2563 3154 {
2564 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize ? srcSize : 1, dict ? dictSize : 0);
3155 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize + (!srcSize), dict ? dictSize : 0);
2565 3156 ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
2566 3157 assert(params.fParams.contentSizeFlag == 1);
2567 ZSTD_CCtxParam_setParameter(&cctxParams, ZSTD_p_compressLiterals, compressionLevel>=0);
2568 3158 return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, cctxParams);
2569 3159 }
2570 3160
2571 size_t ZSTD_compressCCtx (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
3161 size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
3162 void* dst, size_t dstCapacity,
3163 const void* src, size_t srcSize,
3164 int compressionLevel)
2572 3165 {
2573 3166 DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (U32)srcSize);
3167 assert(cctx != NULL);
2574 3168 return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
2575 3169 }
2576 3170
2577 size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
3171 size_t ZSTD_compress(void* dst, size_t dstCapacity,
3172 const void* src, size_t srcSize,
3173 int compressionLevel)
2578 3174 {
2579 3175 size_t result;
2580 3176 ZSTD_CCtx ctxBody;
2581 memset(&ctxBody, 0, sizeof(ctxBody));
2582 ctxBody.customMem = ZSTD_defaultCMem;
3177 ZSTD_initCCtx(&ctxBody, ZSTD_defaultCMem);
2583 3178 result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);
2584 ZSTD_free(ctxBody.workSpace, ZSTD_defaultCMem); /* can't free ctxBody itself, as it's on stack; free only heap content */
3179 ZSTD_freeCCtxContent(&ctxBody); /* can't free ctxBody itself, as it's on stack; free only heap content */
2585 3180 return result;
2586 3181 }
2587 3182
@@ -2619,9 +3214,9 b' static size_t ZSTD_initCDict_internal('
2619 3214 ZSTD_dictContentType_e dictContentType,
2620 3215 ZSTD_compressionParameters cParams)
2621 3216 {
2622 DEBUGLOG(3, "ZSTD_initCDict_internal, dictContentType %u", (U32)dictContentType);
3217 DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (U32)dictContentType);
2623 3218 assert(!ZSTD_checkCParams(cParams));
2624 cdict->cParams = cParams;
3219 cdict->matchState.cParams = cParams;
2625 3220 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
2626 3221 cdict->dictBuffer = NULL;
2627 3222 cdict->dictContent = dictBuffer;
@@ -2654,7 +3249,7 b' static size_t ZSTD_initCDict_internal('
2654 3249 { size_t const dictID = ZSTD_compress_insertDictionary(
2655 3250 &cdict->cBlockState, &cdict->matchState, &params,
2656 3251 cdict->dictContent, cdict->dictContentSize,
2657 dictContentType, cdict->workspace);
3252 dictContentType, ZSTD_dtlm_full, cdict->workspace);
2658 3253 if (ZSTD_isError(dictID)) return dictID;
2659 3254 assert(dictID <= (size_t)(U32)-1);
2660 3255 cdict->dictID = (U32)dictID;
@@ -2775,7 +3370,7 b' const ZSTD_CDict* ZSTD_initStaticCDict('
2775 3370 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict)
2776 3371 {
2777 3372 assert(cdict != NULL);
2778 return cdict->cParams;
3373 return cdict->matchState.cParams;
2779 3374 }
2780 3375
2781 3376 /* ZSTD_compressBegin_usingCDict_advanced() :
@@ -2799,7 +3394,7 b' size_t ZSTD_compressBegin_usingCDict_adv'
2799 3394 }
2800 3395 params.fParams = fParams;
2801 3396 return ZSTD_compressBegin_internal(cctx,
2802 NULL, 0, ZSTD_dct_auto,
3397 NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast,
2803 3398 cdict,
2804 3399 params, pledgedSrcSize,
2805 3400 ZSTDb_not_buffered);
@@ -2813,7 +3408,7 b' size_t ZSTD_compressBegin_usingCDict(ZST'
2813 3408 {
2814 3409 ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
2815 3410 DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag);
2816 return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, 0);
3411 return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
2817 3412 }
2818 3413
2819 3414 size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
@@ -2880,16 +3475,17 b' size_t ZSTD_CStreamOutSize(void)'
2880 3475 static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx,
2881 3476 const void* const dict, size_t const dictSize, ZSTD_dictContentType_e const dictContentType,
2882 3477 const ZSTD_CDict* const cdict,
2883 ZSTD_CCtx_params const params, unsigned long long const pledgedSrcSize)
3478 ZSTD_CCtx_params params, unsigned long long const pledgedSrcSize)
2884 3479 {
2885 DEBUGLOG(4, "ZSTD_resetCStream_internal (disableLiteralCompression=%i)",
2886 params.disableLiteralCompression);
3480 DEBUGLOG(4, "ZSTD_resetCStream_internal");
3481 /* Finalize the compression parameters */
3482 params.cParams = ZSTD_getCParamsFromCCtxParams(&params, pledgedSrcSize, dictSize);
2887 3483 /* params are supposed to be fully validated at this point */
2888 3484 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
2889 3485 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
2890 3486
2891 3487 CHECK_F( ZSTD_compressBegin_internal(cctx,
2892 dict, dictSize, dictContentType,
3488 dict, dictSize, dictContentType, ZSTD_dtlm_fast,
2893 3489 cdict,
2894 3490 params, pledgedSrcSize,
2895 3491 ZSTDb_buffered) );
@@ -2912,7 +3508,6 b' size_t ZSTD_resetCStream(ZSTD_CStream* z'
2912 3508 DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (U32)pledgedSrcSize);
2913 3509 if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
2914 3510 params.fParams.contentSizeFlag = 1;
2915 params.cParams = ZSTD_getCParamsFromCCtxParams(&params, pledgedSrcSize, 0);
2916 3511 return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dct_auto, zcs->cdict, params, pledgedSrcSize);
2917 3512 }
2918 3513
@@ -2925,6 +3520,7 b' size_t ZSTD_initCStream_internal(ZSTD_CS'
2925 3520 ZSTD_CCtx_params params, unsigned long long pledgedSrcSize)
2926 3521 {
2927 3522 DEBUGLOG(4, "ZSTD_initCStream_internal");
3523 params.cParams = ZSTD_getCParamsFromCCtxParams(&params, pledgedSrcSize, dictSize);
2928 3524 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
2929 3525 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
2930 3526
@@ -2991,25 +3587,21 b' size_t ZSTD_initCStream_advanced(ZSTD_CS'
2991 3587 (U32)pledgedSrcSize, params.fParams.contentSizeFlag);
2992 3588 CHECK_F( ZSTD_checkCParams(params.cParams) );
2993 3589 if ((pledgedSrcSize==0) && (params.fParams.contentSizeFlag==0)) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* for compatibility with older programs relying on this behavior. Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. This line will be removed in the future. */
2994 { ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params);
2995 return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL /*cdict*/, cctxParams, pledgedSrcSize);
2996 }
3590 zcs->requestedParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params);
3591 return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL /*cdict*/, zcs->requestedParams, pledgedSrcSize);
2997 3592 }
2998 3593
2999 3594 size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
3000 3595 {
3001 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
3002 ZSTD_CCtx_params const cctxParams =
3003 ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params);
3004 return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN);
3596 ZSTD_CCtxParams_init(&zcs->requestedParams, compressionLevel);
3597 return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL, zcs->requestedParams, ZSTD_CONTENTSIZE_UNKNOWN);
3005 3598 }
3006 3599
3007 3600 size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss)
3008 3601 {
3009 3602 U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; /* temporary : 0 interpreted as "unknown" during transition period. Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. `0` will be interpreted as "empty" in the future */
3010 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0);
3011 ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params);
3012 return ZSTD_initCStream_internal(zcs, NULL, 0, NULL, cctxParams, pledgedSrcSize);
3603 ZSTD_CCtxParams_init(&zcs->requestedParams, compressionLevel);
3604 return ZSTD_initCStream_internal(zcs, NULL, 0, NULL, zcs->requestedParams, pledgedSrcSize);
3013 3605 }
3014 3606
3015 3607 size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
@@ -3073,7 +3665,7 b' size_t ZSTD_compressStream_generic(ZSTD_'
3073 3665 ip = iend;
3074 3666 op += cSize;
3075 3667 zcs->frameEnded = 1;
3076 ZSTD_startNewCompression(zcs);
3668 ZSTD_CCtx_reset(zcs);
3077 3669 someMoreWork = 0; break;
3078 3670 }
3079 3671 /* complete loading into inBuffer */
@@ -3126,7 +3718,7 b' size_t ZSTD_compressStream_generic(ZSTD_'
3126 3718 if (zcs->frameEnded) {
3127 3719 DEBUGLOG(5, "Frame completed directly in outBuffer");
3128 3720 someMoreWork = 0;
3129 ZSTD_startNewCompression(zcs);
3721 ZSTD_CCtx_reset(zcs);
3130 3722 }
3131 3723 break;
3132 3724 }
@@ -3154,7 +3746,7 b' size_t ZSTD_compressStream_generic(ZSTD_'
3154 3746 if (zcs->frameEnded) {
3155 3747 DEBUGLOG(5, "Frame completed on flush");
3156 3748 someMoreWork = 0;
3157 ZSTD_startNewCompression(zcs);
3749 ZSTD_CCtx_reset(zcs);
3158 3750 break;
3159 3751 }
3160 3752 zcs->streamStage = zcss_load;
@@ -3207,19 +3799,16 b' size_t ZSTD_compress_generic (ZSTD_CCtx*'
3207 3799 params.cParams = ZSTD_getCParamsFromCCtxParams(
3208 3800 &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/);
3209 3801
3802
3210 3803 #ifdef ZSTD_MULTITHREAD
3211 3804 if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
3212 3805 params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
3213 3806 }
3214 3807 if (params.nbWorkers > 0) {
3215 3808 /* mt context creation */
3216 if (cctx->mtctx == NULL || (params.nbWorkers != ZSTDMT_getNbWorkers(cctx->mtctx))) {
3809 if (cctx->mtctx == NULL) {
3217 3810 DEBUGLOG(4, "ZSTD_compress_generic: creating new mtctx for nbWorkers=%u",
3218 3811 params.nbWorkers);
3219 if (cctx->mtctx != NULL)
3220 DEBUGLOG(4, "ZSTD_compress_generic: previous nbWorkers was %u",
3221 ZSTDMT_getNbWorkers(cctx->mtctx));
3222 ZSTDMT_freeCCtx(cctx->mtctx);
3223 3812 cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbWorkers, cctx->customMem);
3224 3813 if (cctx->mtctx == NULL) return ERROR(memory_allocation);
3225 3814 }
@@ -3251,8 +3840,9 b' size_t ZSTD_compress_generic (ZSTD_CCtx*'
3251 3840 { size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
3252 3841 if ( ZSTD_isError(flushMin)
3253 3842 || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
3254 ZSTD_startNewCompression(cctx);
3843 ZSTD_CCtx_reset(cctx);
3255 3844 }
3845 DEBUGLOG(5, "completed ZSTD_compress_generic delegating to ZSTDMT_compressStream_generic");
3256 3846 return flushMin;
3257 3847 } }
3258 3848 #endif
@@ -3308,82 +3898,83 b' size_t ZSTD_endStream(ZSTD_CStream* zcs,'
3308 3898
3309 3899 #define ZSTD_MAX_CLEVEL 22
3310 3900 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
3901 int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; }
3311 3902
3312 3903 static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
3313 3904 { /* "default" - guarantees a monotonically increasing memory budget */
3314 3905 /* W, C, H, S, L, TL, strat */
3315 3906 { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
3316 { 19, 13, 14, 1, 7, 1, ZSTD_fast }, /* level 1 */
3317 { 19, 15, 16, 1, 6, 1, ZSTD_fast }, /* level 2 */
3318 { 20, 16, 17, 1, 5, 8, ZSTD_dfast }, /* level 3 */
3319 { 20, 17, 18, 1, 5, 8, ZSTD_dfast }, /* level 4 */
3320 { 20, 17, 18, 2, 5, 16, ZSTD_greedy }, /* level 5 */
3321 { 21, 17, 19, 2, 5, 16, ZSTD_lazy }, /* level 6 */
3322 { 21, 18, 19, 3, 5, 16, ZSTD_lazy }, /* level 7 */
3323 { 21, 18, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */
3324 { 21, 19, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 9 */
3325 { 21, 19, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */
3326 { 22, 20, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */
3907 { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */
3908 { 19, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */
3909 { 20, 16, 17, 1, 5, 1, ZSTD_dfast }, /* level 3 */
3910 { 20, 18, 18, 1, 5, 1, ZSTD_dfast }, /* level 4 */
3911 { 20, 18, 18, 2, 5, 2, ZSTD_greedy }, /* level 5 */
3912 { 21, 18, 19, 2, 5, 4, ZSTD_lazy }, /* level 6 */
3913 { 21, 18, 19, 3, 5, 8, ZSTD_lazy2 }, /* level 7 */
3914 { 21, 19, 19, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */
3915 { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */
3916 { 21, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */
3917 { 21, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */
3327 3918 { 22, 20, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */
3328 3919 { 22, 21, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */
3329 3920 { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */
3330 3921 { 22, 22, 22, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */
3331 3922 { 22, 21, 22, 4, 5, 48, ZSTD_btopt }, /* level 16 */
3332 { 23, 22, 22, 4, 4, 48, ZSTD_btopt }, /* level 17 */
3333 { 23, 22, 22, 5, 3, 64, ZSTD_btopt }, /* level 18 */
3334 { 23, 23, 22, 7, 3,128, ZSTD_btopt }, /* level 19 */
3335 { 25, 25, 23, 7, 3,128, ZSTD_btultra }, /* level 20 */
3336 { 26, 26, 24, 7, 3,256, ZSTD_btultra }, /* level 21 */
3337 { 27, 27, 25, 9, 3,512, ZSTD_btultra }, /* level 22 */
3923 { 23, 22, 22, 4, 4, 64, ZSTD_btopt }, /* level 17 */
3924 { 23, 23, 22, 6, 3,256, ZSTD_btopt }, /* level 18 */
3925 { 23, 24, 22, 7, 3,256, ZSTD_btultra }, /* level 19 */
3926 { 25, 25, 23, 7, 3,256, ZSTD_btultra }, /* level 20 */
3927 { 26, 26, 24, 7, 3,512, ZSTD_btultra }, /* level 21 */
3928 { 27, 27, 25, 9, 3,999, ZSTD_btultra }, /* level 22 */
3338 3929 },
3339 3930 { /* for srcSize <= 256 KB */
3340 3931 /* W, C, H, S, L, T, strat */
3341 3932 { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
3342 { 18, 13, 14, 1, 6, 1, ZSTD_fast }, /* level 1 */
3343 { 18, 14, 13, 1, 5, 8, ZSTD_dfast }, /* level 2 */
3344 { 18, 16, 15, 1, 5, 8, ZSTD_dfast }, /* level 3 */
3345 { 18, 15, 17, 1, 5, 8, ZSTD_greedy }, /* level 4.*/
3346 { 18, 16, 17, 4, 5, 8, ZSTD_greedy }, /* level 5.*/
3347 { 18, 16, 17, 3, 5, 8, ZSTD_lazy }, /* level 6.*/
3348 { 18, 17, 17, 4, 4, 8, ZSTD_lazy }, /* level 7 */
3349 { 18, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
3350 { 18, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
3351 { 18, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
3352 { 18, 18, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 11.*/
3353 { 18, 18, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 12.*/
3354 { 18, 19, 17, 7, 4, 8, ZSTD_btlazy2 }, /* level 13 */
3355 { 18, 18, 18, 4, 4, 16, ZSTD_btopt }, /* level 14.*/
3356 { 18, 18, 18, 4, 3, 16, ZSTD_btopt }, /* level 15.*/
3357 { 18, 19, 18, 6, 3, 32, ZSTD_btopt }, /* level 16.*/
3358 { 18, 19, 18, 8, 3, 64, ZSTD_btopt }, /* level 17.*/
3359 { 18, 19, 18, 9, 3,128, ZSTD_btopt }, /* level 18.*/
3360 { 18, 19, 18, 10, 3,256, ZSTD_btopt }, /* level 19.*/
3361 { 18, 19, 18, 11, 3,512, ZSTD_btultra }, /* level 20.*/
3362 { 18, 19, 18, 12, 3,512, ZSTD_btultra }, /* level 21.*/
3363 { 18, 19, 18, 13, 3,512, ZSTD_btultra }, /* level 22.*/
3933 { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */
3934 { 18, 14, 14, 1, 5, 1, ZSTD_dfast }, /* level 2 */
3935 { 18, 16, 16, 1, 4, 1, ZSTD_dfast }, /* level 3 */
3936 { 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/
3937 { 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/
3938 { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/
3939 { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */
3940 { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
3941 { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
3942 { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
3943 { 18, 18, 19, 5, 4, 16, ZSTD_btlazy2 }, /* level 11.*/
3944 { 18, 19, 19, 6, 4, 16, ZSTD_btlazy2 }, /* level 12.*/
3945 { 18, 19, 19, 8, 4, 16, ZSTD_btlazy2 }, /* level 13 */
3946 { 18, 18, 19, 4, 4, 24, ZSTD_btopt }, /* level 14.*/
3947 { 18, 18, 19, 4, 3, 24, ZSTD_btopt }, /* level 15.*/
3948 { 18, 19, 19, 6, 3, 64, ZSTD_btopt }, /* level 16.*/
3949 { 18, 19, 19, 8, 3,128, ZSTD_btopt }, /* level 17.*/
3950 { 18, 19, 19, 10, 3,256, ZSTD_btopt }, /* level 18.*/
3951 { 18, 19, 19, 10, 3,256, ZSTD_btultra }, /* level 19.*/
3952 { 18, 19, 19, 11, 3,512, ZSTD_btultra }, /* level 20.*/
3953 { 18, 19, 19, 12, 3,512, ZSTD_btultra }, /* level 21.*/
3954 { 18, 19, 19, 13, 3,999, ZSTD_btultra }, /* level 22.*/
3364 3955 },
3365 3956 { /* for srcSize <= 128 KB */
3366 3957 /* W, C, H, S, L, T, strat */
3367 { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* level 0 - not used */
3368 { 17, 12, 13, 1, 6, 1, ZSTD_fast }, /* level 1 */
3369 { 17, 13, 16, 1, 5, 1, ZSTD_fast }, /* level 2 */
3370 { 17, 16, 16, 2, 5, 8, ZSTD_dfast }, /* level 3 */
3371 { 17, 13, 15, 3, 4, 8, ZSTD_greedy }, /* level 4 */
3372 { 17, 15, 17, 4, 4, 8, ZSTD_greedy }, /* level 5 */
3373 { 17, 16, 17, 3, 4, 8, ZSTD_lazy }, /* level 6 */
3374 { 17, 15, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 7 */
3958 { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
3959 { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */
3960 { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */
3961 { 17, 15, 16, 2, 5, 1, ZSTD_dfast }, /* level 3 */
3962 { 17, 17, 17, 2, 4, 1, ZSTD_dfast }, /* level 4 */
3963 { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */
3964 { 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */
3965 { 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */
3375 3966 { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
3376 3967 { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
3377 3968 { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
3378 3969 { 17, 17, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 11 */
3379 { 17, 17, 17, 8, 4, 8, ZSTD_lazy2 }, /* level 12 */
3380 { 17, 18, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13.*/
3381 { 17, 17, 17, 7, 3, 8, ZSTD_btopt }, /* level 14.*/
3382 { 17, 17, 17, 7, 3, 16, ZSTD_btopt }, /* level 15.*/
3383 { 17, 18, 17, 7, 3, 32, ZSTD_btopt }, /* level 16.*/
3384 { 17, 18, 17, 7, 3, 64, ZSTD_btopt }, /* level 17.*/
3385 { 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 18.*/
3386 { 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 19.*/
3970 { 17, 18, 17, 6, 4, 16, ZSTD_btlazy2 }, /* level 12 */
3971 { 17, 18, 17, 8, 4, 16, ZSTD_btlazy2 }, /* level 13.*/
3972 { 17, 18, 17, 4, 4, 32, ZSTD_btopt }, /* level 14.*/
3973 { 17, 18, 17, 6, 3, 64, ZSTD_btopt }, /* level 15.*/
3974 { 17, 18, 17, 7, 3,128, ZSTD_btopt }, /* level 16.*/
3975 { 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 17.*/
3976 { 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 18.*/
3977 { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 19.*/
3387 3978 { 17, 18, 17, 9, 3,256, ZSTD_btultra }, /* level 20.*/
3388 3979 { 17, 18, 17, 10, 3,256, ZSTD_btultra }, /* level 21.*/
3389 3980 { 17, 18, 17, 11, 3,512, ZSTD_btultra }, /* level 22.*/
@@ -3391,28 +3982,28 b' static const ZSTD_compressionParameters '
3391 3982 { /* for srcSize <= 16 KB */
3392 3983 /* W, C, H, S, L, T, strat */
3393 3984 { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
3394 { 14, 14, 14, 1, 6, 1, ZSTD_fast }, /* level 1 */
3395 { 14, 14, 14, 1, 4, 1, ZSTD_fast }, /* level 2 */
3396 { 14, 14, 14, 1, 4, 6, ZSTD_dfast }, /* level 3.*/
3397 { 14, 14, 14, 4, 4, 6, ZSTD_greedy }, /* level 4.*/
3398 { 14, 14, 14, 3, 4, 6, ZSTD_lazy }, /* level 5.*/
3399 { 14, 14, 14, 4, 4, 6, ZSTD_lazy2 }, /* level 6 */
3400 { 14, 14, 14, 5, 4, 6, ZSTD_lazy2 }, /* level 7 */
3401 { 14, 14, 14, 6, 4, 6, ZSTD_lazy2 }, /* level 8.*/
3402 { 14, 15, 14, 6, 4, 6, ZSTD_btlazy2 }, /* level 9.*/
3403 { 14, 15, 14, 3, 3, 6, ZSTD_btopt }, /* level 10.*/
3404 { 14, 15, 14, 6, 3, 8, ZSTD_btopt }, /* level 11.*/
3985 { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */
3986 { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */
3987 { 14, 14, 14, 2, 4, 1, ZSTD_dfast }, /* level 3.*/
3988 { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4.*/
3989 { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/
3990 { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */
3991 { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */
3992 { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/
3993 { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/
3994 { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/
3995 { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/
3405 3996 { 14, 15, 14, 6, 3, 16, ZSTD_btopt }, /* level 12.*/
3406 3997 { 14, 15, 14, 6, 3, 24, ZSTD_btopt }, /* level 13.*/
3407 3998 { 14, 15, 15, 6, 3, 48, ZSTD_btopt }, /* level 14.*/
3408 3999 { 14, 15, 15, 6, 3, 64, ZSTD_btopt }, /* level 15.*/
3409 4000 { 14, 15, 15, 6, 3, 96, ZSTD_btopt }, /* level 16.*/
3410 4001 { 14, 15, 15, 6, 3,128, ZSTD_btopt }, /* level 17.*/
3411 { 14, 15, 15, 6, 3,256, ZSTD_btopt }, /* level 18.*/
3412 { 14, 15, 15, 7, 3,256, ZSTD_btopt }, /* level 19.*/
4002 { 14, 15, 15, 8, 3,256, ZSTD_btopt }, /* level 18.*/
4003 { 14, 15, 15, 6, 3,256, ZSTD_btultra }, /* level 19.*/
3413 4004 { 14, 15, 15, 8, 3,256, ZSTD_btultra }, /* level 20.*/
3414 4005 { 14, 15, 15, 9, 3,256, ZSTD_btultra }, /* level 21.*/
3415 { 14, 15, 15, 10, 3,256, ZSTD_btultra }, /* level 22.*/
4006 { 14, 15, 15, 10, 3,512, ZSTD_btultra }, /* level 22.*/
3416 4007 },
3417 4008 };
3418 4009
@@ -27,6 +27,7 b''
27 27 extern "C" {
28 28 #endif
29 29
30
30 31 /*-*************************************
31 32 * Constants
32 33 ***************************************/
@@ -37,7 +38,8 b' extern "C" {'
37 38 It's not a big deal though : candidate will just be sorted again.
38 39 Additionnally, candidate position 1 will be lost.
39 40 But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
40 The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy */
41 The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy
42 Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
41 43
42 44
43 45 /*-*************************************
@@ -46,6 +48,12 b' extern "C" {'
46 48 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
47 49 typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
48 50
51 typedef enum {
52 ZSTD_dictDefaultAttach = 0,
53 ZSTD_dictForceAttach = 1,
54 ZSTD_dictForceCopy = -1,
55 } ZSTD_dictAttachPref_e;
56
49 57 typedef struct ZSTD_prefixDict_s {
50 58 const void* dict;
51 59 size_t dictSize;
@@ -53,14 +61,22 b' typedef struct ZSTD_prefixDict_s {'
53 61 } ZSTD_prefixDict;
54 62
55 63 typedef struct {
56 U32 hufCTable[HUF_CTABLE_SIZE_U32(255)];
64 U32 CTable[HUF_CTABLE_SIZE_U32(255)];
65 HUF_repeat repeatMode;
66 } ZSTD_hufCTables_t;
67
68 typedef struct {
57 69 FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
58 70 FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
59 71 FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
60 HUF_repeat hufCTable_repeatMode;
61 72 FSE_repeat offcode_repeatMode;
62 73 FSE_repeat matchlength_repeatMode;
63 74 FSE_repeat litlength_repeatMode;
75 } ZSTD_fseCTables_t;
76
77 typedef struct {
78 ZSTD_hufCTables_t huf;
79 ZSTD_fseCTables_t fse;
64 80 } ZSTD_entropyCTables_t;
65 81
66 82 typedef struct {
@@ -76,26 +92,27 b' typedef struct {'
76 92 U32 rep[ZSTD_REP_NUM];
77 93 } ZSTD_optimal_t;
78 94
95 typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
96
79 97 typedef struct {
80 98 /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
81 U32* litFreq; /* table of literals statistics, of size 256 */
82 U32* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
83 U32* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
84 U32* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
85 ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */
86 ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
99 U32* litFreq; /* table of literals statistics, of size 256 */
100 U32* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
101 U32* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
102 U32* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
103 ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */
104 ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
87 105
88 106 U32 litSum; /* nb of literals */
89 107 U32 litLengthSum; /* nb of litLength codes */
90 108 U32 matchLengthSum; /* nb of matchLength codes */
91 109 U32 offCodeSum; /* nb of offset codes */
92 /* begin updated by ZSTD_setLog2Prices */
93 U32 log2litSum; /* pow2 to compare log2(litfreq) to */
94 U32 log2litLengthSum; /* pow2 to compare log2(llfreq) to */
95 U32 log2matchLengthSum; /* pow2 to compare log2(mlfreq) to */
96 U32 log2offCodeSum; /* pow2 to compare log2(offreq) to */
97 /* end : updated by ZSTD_setLog2Prices */
98 U32 staticPrices; /* prices follow a pre-defined cost structure, statistics are irrelevant */
110 U32 litSumBasePrice; /* to compare to log2(litfreq) */
111 U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */
112 U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */
113 U32 offCodeSumBasePrice; /* to compare to log2(offreq) */
114 ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */
115 const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */
99 116 } optState_t;
100 117
101 118 typedef struct {
@@ -111,17 +128,20 b' typedef struct {'
111 128 U32 lowLimit; /* below that point, no more data */
112 129 } ZSTD_window_t;
113 130
114 typedef struct {
115 ZSTD_window_t window; /* State for window round buffer management */
116 U32 loadedDictEnd; /* index of end of dictionary */
117 U32 nextToUpdate; /* index from which to continue table update */
118 U32 nextToUpdate3; /* index from which to continue table update */
119 U32 hashLog3; /* dispatch table : larger == faster, more memory */
131 typedef struct ZSTD_matchState_t ZSTD_matchState_t;
132 struct ZSTD_matchState_t {
133 ZSTD_window_t window; /* State for window round buffer management */
134 U32 loadedDictEnd; /* index of end of dictionary */
135 U32 nextToUpdate; /* index from which to continue table update */
136 U32 nextToUpdate3; /* index from which to continue table update */
137 U32 hashLog3; /* dispatch table : larger == faster, more memory */
120 138 U32* hashTable;
121 139 U32* hashTable3;
122 140 U32* chainTable;
123 141 optState_t opt; /* optimal parser state */
124 } ZSTD_matchState_t;
142 const ZSTD_matchState_t *dictMatchState;
143 ZSTD_compressionParameters cParams;
144 };
125 145
126 146 typedef struct {
127 147 ZSTD_compressedBlockState_t* prevCBlock;
@@ -161,7 +181,7 b' typedef struct {'
161 181 rawSeq* seq; /* The start of the sequences */
162 182 size_t pos; /* The position where reading stopped. <= size. */
163 183 size_t size; /* The number of sequences. <= capacity. */
164 size_t capacity; /* The capacity of the `seq` pointer */
184 size_t capacity; /* The capacity starting from `seq` pointer */
165 185 } rawSeqStore_t;
166 186
167 187 struct ZSTD_CCtx_params_s {
@@ -170,10 +190,11 b' struct ZSTD_CCtx_params_s {'
170 190 ZSTD_frameParameters fParams;
171 191
172 192 int compressionLevel;
173 int disableLiteralCompression;
174 193 int forceWindow; /* force back-references to respect limit of
175 194 * 1<<wLog, even for dictionary */
176 195
196 ZSTD_dictAttachPref_e attachDictPref;
197
177 198 /* Multithreading: used to pass parameters to mtctx */
178 199 unsigned nbWorkers;
179 200 unsigned jobSize;
@@ -193,6 +214,8 b' struct ZSTD_CCtx_s {'
193 214 ZSTD_CCtx_params requestedParams;
194 215 ZSTD_CCtx_params appliedParams;
195 216 U32 dictID;
217
218 int workSpaceOversizedDuration;
196 219 void* workSpace;
197 220 size_t workSpaceSize;
198 221 size_t blockSize;
@@ -235,11 +258,15 b' struct ZSTD_CCtx_s {'
235 258 #endif
236 259 };
237 260
261 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
262
263 typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e;
264
238 265
239 266 typedef size_t (*ZSTD_blockCompressor) (
240 267 ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
241 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
242 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict);
268 void const* src, size_t srcSize);
269 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
243 270
244 271
245 272 MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
@@ -280,16 +307,18 b' MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)'
280 307 */
281 308 MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase)
282 309 {
283 #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 6)
310 #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
284 311 static const BYTE* g_start = NULL;
285 312 if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
286 313 { U32 const pos = (U32)((const BYTE*)literals - g_start);
287 DEBUGLOG(6, "Cpos%7u :%3u literals, match%3u bytes at dist.code%7u",
314 DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
288 315 pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode);
289 316 }
290 317 #endif
318 assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
291 319 /* copy Literals */
292 assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + 128 KB);
320 assert(seqStorePtr->maxNbLit <= 128 KB);
321 assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
293 322 ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
294 323 seqStorePtr->lit += litLength;
295 324
@@ -420,6 +449,11 b' ZSTD_count_2segments(const BYTE* ip, con'
420 449 const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
421 450 size_t const matchLength = ZSTD_count(ip, match, vEnd);
422 451 if (match + matchLength != mEnd) return matchLength;
452 DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
453 DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
454 DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
455 DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
456 DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
423 457 return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
424 458 }
425 459
@@ -497,6 +531,20 b' MEM_STATIC U32 ZSTD_window_hasExtDict(ZS'
497 531 }
498 532
499 533 /**
534 * ZSTD_matchState_dictMode():
535 * Inspects the provided matchState and figures out what dictMode should be
536 * passed to the compressor.
537 */
538 MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
539 {
540 return ZSTD_window_hasExtDict(ms->window) ?
541 ZSTD_extDict :
542 ms->dictMatchState != NULL ?
543 ZSTD_dictMatchState :
544 ZSTD_noDict;
545 }
546
547 /**
500 548 * ZSTD_window_needOverflowCorrection():
501 549 * Returns non-zero if the indices are getting too large and need overflow
502 550 * protection.
@@ -563,31 +611,41 b' MEM_STATIC U32 ZSTD_window_correctOverfl'
563 611 * ZSTD_window_enforceMaxDist():
564 612 * Updates lowLimit so that:
565 613 * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
614 *
566 615 * This allows a simple check that index >= lowLimit to see if index is valid.
567 616 * This must be called before a block compression call, with srcEnd as the block
568 617 * source end.
618 *
569 619 * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
570 620 * This is because dictionaries are allowed to be referenced as long as the last
571 621 * byte of the dictionary is in the window, but once they are out of range,
572 622 * they cannot be referenced. If loadedDictEndPtr is NULL, we use
573 623 * loadedDictEnd == 0.
624 *
625 * In normal dict mode, the dict is between lowLimit and dictLimit. In
626 * dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
627 * is below them. forceWindow and dictMatchState are therefore incompatible.
574 628 */
575 629 MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
576 630 void const* srcEnd, U32 maxDist,
577 U32* loadedDictEndPtr)
631 U32* loadedDictEndPtr,
632 const ZSTD_matchState_t** dictMatchStatePtr)
578 633 {
579 634 U32 const current = (U32)((BYTE const*)srcEnd - window->base);
580 635 U32 loadedDictEnd = loadedDictEndPtr != NULL ? *loadedDictEndPtr : 0;
636 DEBUGLOG(5, "ZSTD_window_enforceMaxDist: current=%u, maxDist=%u", current, maxDist);
581 637 if (current > maxDist + loadedDictEnd) {
582 638 U32 const newLowLimit = current - maxDist;
583 639 if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
584 640 if (window->dictLimit < window->lowLimit) {
585 DEBUGLOG(5, "Update dictLimit from %u to %u", window->dictLimit,
586 window->lowLimit);
641 DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
642 window->dictLimit, window->lowLimit);
587 643 window->dictLimit = window->lowLimit;
588 644 }
589 645 if (loadedDictEndPtr)
590 646 *loadedDictEndPtr = 0;
647 if (dictMatchStatePtr)
648 *dictMatchStatePtr = NULL;
591 649 }
592 650 }
593 651
@@ -603,12 +661,12 b' MEM_STATIC U32 ZSTD_window_update(ZSTD_w'
603 661 {
604 662 BYTE const* const ip = (BYTE const*)src;
605 663 U32 contiguous = 1;
664 DEBUGLOG(5, "ZSTD_window_update");
606 665 /* Check if blocks follow each other */
607 666 if (src != window->nextSrc) {
608 667 /* not contiguous */
609 668 size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
610 DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u",
611 window->dictLimit);
669 DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
612 670 window->lowLimit = window->dictLimit;
613 671 assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */
614 672 window->dictLimit = (U32)distanceFromBase;
@@ -625,10 +683,38 b' MEM_STATIC U32 ZSTD_window_update(ZSTD_w'
625 683 ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
626 684 U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
627 685 window->lowLimit = lowLimitMax;
686 DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
628 687 }
629 688 return contiguous;
630 689 }
631 690
691
692 /* debug functions */
693
694 MEM_STATIC double ZSTD_fWeight(U32 rawStat)
695 {
696 U32 const fp_accuracy = 8;
697 U32 const fp_multiplier = (1 << fp_accuracy);
698 U32 const stat = rawStat + 1;
699 U32 const hb = ZSTD_highbit32(stat);
700 U32 const BWeight = hb * fp_multiplier;
701 U32 const FWeight = (stat << fp_accuracy) >> hb;
702 U32 const weight = BWeight + FWeight;
703 assert(hb + fp_accuracy < 31);
704 return (double)weight / fp_multiplier;
705 }
706
707 MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
708 {
709 unsigned u, sum;
710 for (u=0, sum=0; u<=max; u++) sum += table[u];
711 DEBUGLOG(2, "total nb elts: %u", sum);
712 for (u=0; u<=max; u++) {
713 DEBUGLOG(2, "%2u: %5u (%.2f)",
714 u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
715 }
716 }
717
632 718 #if defined (__cplusplus)
633 719 }
634 720 #endif
@@ -640,7 +726,7 b' MEM_STATIC U32 ZSTD_window_update(ZSTD_w'
640 726 * ============================================================== */
641 727
642 728 /* ZSTD_getCParamsFromCCtxParams() :
643 * cParams are built depending on compressionLevel, src size hints,
729 * cParams are built depending on compressionLevel, src size hints,
644 730 * LDM and manually set compression parameters.
645 731 */
646 732 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
@@ -656,6 +742,8 b' size_t ZSTD_initCStream_internal(ZSTD_CS'
656 742 const ZSTD_CDict* cdict,
657 743 ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
658 744
745 void ZSTD_resetSeqStore(seqStore_t* ssPtr);
746
659 747 /*! ZSTD_compressStream_generic() :
660 748 * Private use only. To be called from zstdmt_compress.c in single-thread mode. */
661 749 size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
@@ -672,6 +760,7 b' ZSTD_compressionParameters ZSTD_getCPara'
672 760 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
673 761 const void* dict, size_t dictSize,
674 762 ZSTD_dictContentType_e dictContentType,
763 ZSTD_dictTableLoadMethod_e dtlm,
675 764 const ZSTD_CDict* cdict,
676 765 ZSTD_CCtx_params params,
677 766 unsigned long long pledgedSrcSize);
@@ -13,9 +13,9 b''
13 13
14 14
15 15 void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
16 ZSTD_compressionParameters const* cParams,
17 void const* end)
16 void const* end, ZSTD_dictTableLoadMethod_e dtlm)
18 17 {
18 const ZSTD_compressionParameters* const cParams = &ms->cParams;
19 19 U32* const hashLarge = ms->hashTable;
20 20 U32 const hBitsL = cParams->hashLog;
21 21 U32 const mls = cParams->searchLength;
@@ -40,6 +40,9 b' void ZSTD_fillDoubleHashTable(ZSTD_match'
40 40 hashSmall[smHash] = current + i;
41 41 if (i == 0 || hashLarge[lgHash] == 0)
42 42 hashLarge[lgHash] = current + i;
43 /* Only load extra positions for ZSTD_dtlm_full */
44 if (dtlm == ZSTD_dtlm_fast)
45 break;
43 46 }
44 47 }
45 48 }
@@ -48,9 +51,10 b' void ZSTD_fillDoubleHashTable(ZSTD_match'
48 51 FORCE_INLINE_TEMPLATE
49 52 size_t ZSTD_compressBlock_doubleFast_generic(
50 53 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
51 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
52 U32 const mls /* template */)
54 void const* src, size_t srcSize,
55 U32 const mls /* template */, ZSTD_dictMode_e const dictMode)
53 56 {
57 ZSTD_compressionParameters const* cParams = &ms->cParams;
54 58 U32* const hashLong = ms->hashTable;
55 59 const U32 hBitsL = cParams->hashLog;
56 60 U32* const hashSmall = ms->chainTable;
@@ -59,70 +63,188 b' size_t ZSTD_compressBlock_doubleFast_gen'
59 63 const BYTE* const istart = (const BYTE*)src;
60 64 const BYTE* ip = istart;
61 65 const BYTE* anchor = istart;
62 const U32 lowestIndex = ms->window.dictLimit;
63 const BYTE* const lowest = base + lowestIndex;
66 const U32 prefixLowestIndex = ms->window.dictLimit;
67 const BYTE* const prefixLowest = base + prefixLowestIndex;
64 68 const BYTE* const iend = istart + srcSize;
65 69 const BYTE* const ilimit = iend - HASH_READ_SIZE;
66 70 U32 offset_1=rep[0], offset_2=rep[1];
67 71 U32 offsetSaved = 0;
68 72
73 const ZSTD_matchState_t* const dms = ms->dictMatchState;
74 const ZSTD_compressionParameters* const dictCParams =
75 dictMode == ZSTD_dictMatchState ?
76 &dms->cParams : NULL;
77 const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ?
78 dms->hashTable : NULL;
79 const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ?
80 dms->chainTable : NULL;
81 const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ?
82 dms->window.dictLimit : 0;
83 const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
84 dms->window.base : NULL;
85 const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ?
86 dictBase + dictStartIndex : NULL;
87 const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
88 dms->window.nextSrc : NULL;
89 const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
90 prefixLowestIndex - (U32)(dictEnd - dictBase) :
91 0;
92 const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ?
93 dictCParams->hashLog : hBitsL;
94 const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
95 dictCParams->chainLog : hBitsS;
96 const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart);
97
98 assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
99
69 100 /* init */
70 ip += (ip==lowest);
71 { U32 const maxRep = (U32)(ip-lowest);
101 ip += (dictAndPrefixLength == 0);
102 if (dictMode == ZSTD_noDict) {
103 U32 const maxRep = (U32)(ip - prefixLowest);
72 104 if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
73 105 if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
74 106 }
107 if (dictMode == ZSTD_dictMatchState) {
108 /* dictMatchState repCode checks don't currently handle repCode == 0
109 * disabling. */
110 assert(offset_1 <= dictAndPrefixLength);
111 assert(offset_2 <= dictAndPrefixLength);
112 }
75 113
76 114 /* Main Search Loop */
77 115 while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
78 116 size_t mLength;
117 U32 offset;
79 118 size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
80 119 size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
120 size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
121 size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
81 122 U32 const current = (U32)(ip-base);
82 123 U32 const matchIndexL = hashLong[h2];
83 U32 const matchIndexS = hashSmall[h];
124 U32 matchIndexS = hashSmall[h];
84 125 const BYTE* matchLong = base + matchIndexL;
85 126 const BYTE* match = base + matchIndexS;
127 const U32 repIndex = current + 1 - offset_1;
128 const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
129 && repIndex < prefixLowestIndex) ?
130 dictBase + (repIndex - dictIndexDelta) :
131 base + repIndex;
86 132 hashLong[h2] = hashSmall[h] = current; /* update hash tables */
87 133
88 assert(offset_1 <= current); /* supposed guaranteed by construction */
89 if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
90 /* favor repcode */
134 /* check dictMatchState repcode */
135 if (dictMode == ZSTD_dictMatchState
136 && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
137 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
138 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
139 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
140 ip++;
141 ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
142 goto _match_stored;
143 }
144
145 /* check noDict repcode */
146 if ( dictMode == ZSTD_noDict
147 && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
91 148 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
92 149 ip++;
93 150 ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
94 } else {
95 U32 offset;
96 if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) {
151 goto _match_stored;
152 }
153
154 if (matchIndexL > prefixLowestIndex) {
155 /* check prefix long match */
156 if (MEM_read64(matchLong) == MEM_read64(ip)) {
97 157 mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
98 158 offset = (U32)(ip-matchLong);
99 while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
100 } else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) {
101 size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
102 U32 const matchIndexL3 = hashLong[hl3];
103 const BYTE* matchL3 = base + matchIndexL3;
104 hashLong[hl3] = current + 1;
105 if ( (matchIndexL3 > lowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1)) ) {
159 while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
160 goto _match_found;
161 }
162 } else if (dictMode == ZSTD_dictMatchState) {
163 /* check dictMatchState long match */
164 U32 const dictMatchIndexL = dictHashLong[dictHL];
165 const BYTE* dictMatchL = dictBase + dictMatchIndexL;
166 assert(dictMatchL < dictEnd);
167
168 if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
169 mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
170 offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
171 while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
172 goto _match_found;
173 }
174 }
175
176 if (matchIndexS > prefixLowestIndex) {
177 /* check prefix short match */
178 if (MEM_read32(match) == MEM_read32(ip)) {
179 goto _search_next_long;
180 }
181 } else if (dictMode == ZSTD_dictMatchState) {
182 /* check dictMatchState short match */
183 U32 const dictMatchIndexS = dictHashSmall[dictHS];
184 match = dictBase + dictMatchIndexS;
185 matchIndexS = dictMatchIndexS + dictIndexDelta;
186
187 if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
188 goto _search_next_long;
189 }
190 }
191
192 ip += ((ip-anchor) >> kSearchStrength) + 1;
193 continue;
194
195 _search_next_long:
196
197 {
198 size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
199 size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
200 U32 const matchIndexL3 = hashLong[hl3];
201 const BYTE* matchL3 = base + matchIndexL3;
202 hashLong[hl3] = current + 1;
203
204 /* check prefix long +1 match */
205 if (matchIndexL3 > prefixLowestIndex) {
206 if (MEM_read64(matchL3) == MEM_read64(ip+1)) {
106 207 mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
107 208 ip++;
108 209 offset = (U32)(ip-matchL3);
109 while (((ip>anchor) & (matchL3>lowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
110 } else {
111 mLength = ZSTD_count(ip+4, match+4, iend) + 4;
112 offset = (U32)(ip-match);
113 while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
210 while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
211 goto _match_found;
114 212 }
115 } else {
116 ip += ((ip-anchor) >> kSearchStrength) + 1;
117 continue;
213 } else if (dictMode == ZSTD_dictMatchState) {
214 /* check dict long +1 match */
215 U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
216 const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
217 assert(dictMatchL3 < dictEnd);
218 if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
219 mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
220 ip++;
221 offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
222 while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
223 goto _match_found;
224 }
118 225 }
119
120 offset_2 = offset_1;
121 offset_1 = offset;
122
123 ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
124 226 }
125 227
228 /* if no long +1 match, explore the short match we found */
229 if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
230 mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
231 offset = (U32)(current - matchIndexS);
232 while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
233 } else {
234 mLength = ZSTD_count(ip+4, match+4, iend) + 4;
235 offset = (U32)(ip - match);
236 while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
237 }
238
239 /* fall-through */
240
241 _match_found:
242 offset_2 = offset_1;
243 offset_1 = offset;
244
245 ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
246
247 _match_stored:
126 248 /* match found */
127 249 ip += mLength;
128 250 anchor = ip;
@@ -135,19 +257,44 b' size_t ZSTD_compressBlock_doubleFast_gen'
135 257 hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
136 258
137 259 /* check immediate repcode */
138 while ( (ip <= ilimit)
139 && ( (offset_2>0)
140 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
141 /* store sequence */
142 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
143 { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
144 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
145 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
146 ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
147 ip += rLength;
148 anchor = ip;
149 continue; /* faster when present ... (?) */
150 } } }
260 if (dictMode == ZSTD_dictMatchState) {
261 while (ip <= ilimit) {
262 U32 const current2 = (U32)(ip-base);
263 U32 const repIndex2 = current2 - offset_2;
264 const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
265 && repIndex2 < prefixLowestIndex ?
266 dictBase - dictIndexDelta + repIndex2 :
267 base + repIndex2;
268 if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
269 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
270 const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
271 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
272 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
273 ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
274 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
275 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
276 ip += repLength2;
277 anchor = ip;
278 continue;
279 }
280 break;
281 }
282 }
283
284 if (dictMode == ZSTD_noDict) {
285 while ( (ip <= ilimit)
286 && ( (offset_2>0)
287 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
288 /* store sequence */
289 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
290 U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
291 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
292 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
293 ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
294 ip += rLength;
295 anchor = ip;
296 continue; /* faster when present ... (?) */
297 } } } }
151 298
152 299 /* save reps for next block */
153 300 rep[0] = offset_1 ? offset_1 : offsetSaved;
@@ -160,102 +307,126 b' size_t ZSTD_compressBlock_doubleFast_gen'
160 307
161 308 size_t ZSTD_compressBlock_doubleFast(
162 309 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
163 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
310 void const* src, size_t srcSize)
164 311 {
165 const U32 mls = cParams->searchLength;
312 const U32 mls = ms->cParams.searchLength;
166 313 switch(mls)
167 314 {
168 315 default: /* includes case 3 */
169 316 case 4 :
170 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 4);
317 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
171 318 case 5 :
172 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 5);
319 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
173 320 case 6 :
174 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 6);
321 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
175 322 case 7 :
176 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 7);
323 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
324 }
325 }
326
327
328 size_t ZSTD_compressBlock_doubleFast_dictMatchState(
329 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
330 void const* src, size_t srcSize)
331 {
332 const U32 mls = ms->cParams.searchLength;
333 switch(mls)
334 {
335 default: /* includes case 3 */
336 case 4 :
337 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
338 case 5 :
339 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
340 case 6 :
341 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
342 case 7 :
343 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
177 344 }
178 345 }
179 346
180 347
181 348 static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
182 349 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
183 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
350 void const* src, size_t srcSize,
184 351 U32 const mls /* template */)
185 352 {
353 ZSTD_compressionParameters const* cParams = &ms->cParams;
186 354 U32* const hashLong = ms->hashTable;
187 355 U32 const hBitsL = cParams->hashLog;
188 356 U32* const hashSmall = ms->chainTable;
189 357 U32 const hBitsS = cParams->chainLog;
190 const BYTE* const base = ms->window.base;
191 const BYTE* const dictBase = ms->window.dictBase;
192 358 const BYTE* const istart = (const BYTE*)src;
193 359 const BYTE* ip = istart;
194 360 const BYTE* anchor = istart;
195 const U32 lowestIndex = ms->window.lowLimit;
196 const BYTE* const dictStart = dictBase + lowestIndex;
197 const U32 dictLimit = ms->window.dictLimit;
198 const BYTE* const lowPrefixPtr = base + dictLimit;
199 const BYTE* const dictEnd = dictBase + dictLimit;
200 361 const BYTE* const iend = istart + srcSize;
201 362 const BYTE* const ilimit = iend - 8;
363 const U32 prefixStartIndex = ms->window.dictLimit;
364 const BYTE* const base = ms->window.base;
365 const BYTE* const prefixStart = base + prefixStartIndex;
366 const U32 dictStartIndex = ms->window.lowLimit;
367 const BYTE* const dictBase = ms->window.dictBase;
368 const BYTE* const dictStart = dictBase + dictStartIndex;
369 const BYTE* const dictEnd = dictBase + prefixStartIndex;
202 370 U32 offset_1=rep[0], offset_2=rep[1];
203 371
372 DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
373
204 374 /* Search Loop */
205 375 while (ip < ilimit) { /* < instead of <=, because (ip+1) */
206 376 const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
207 377 const U32 matchIndex = hashSmall[hSmall];
208 const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
378 const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
209 379 const BYTE* match = matchBase + matchIndex;
210 380
211 381 const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
212 382 const U32 matchLongIndex = hashLong[hLong];
213 const BYTE* matchLongBase = matchLongIndex < dictLimit ? dictBase : base;
383 const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
214 384 const BYTE* matchLong = matchLongBase + matchLongIndex;
215 385
216 386 const U32 current = (U32)(ip-base);
217 387 const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
218 const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
219 const BYTE* repMatch = repBase + repIndex;
388 const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
389 const BYTE* const repMatch = repBase + repIndex;
220 390 size_t mLength;
221 391 hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
222 392
223 if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
224 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
225 const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
226 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
393 if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
394 & (repIndex > dictStartIndex))
395 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
396 const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
397 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
227 398 ip++;
228 399 ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
229 400 } else {
230 if ((matchLongIndex > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
231 const BYTE* matchEnd = matchLongIndex < dictLimit ? dictEnd : iend;
232 const BYTE* lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr;
401 if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
402 const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
403 const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
233 404 U32 offset;
234 mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, lowPrefixPtr) + 8;
405 mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
235 406 offset = current - matchLongIndex;
236 407 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
237 408 offset_2 = offset_1;
238 409 offset_1 = offset;
239 410 ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
240 411
241 } else if ((matchIndex > lowestIndex) && (MEM_read32(match) == MEM_read32(ip))) {
412 } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
242 413 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
243 414 U32 const matchIndex3 = hashLong[h3];
244 const BYTE* const match3Base = matchIndex3 < dictLimit ? dictBase : base;
415 const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
245 416 const BYTE* match3 = match3Base + matchIndex3;
246 417 U32 offset;
247 418 hashLong[h3] = current + 1;
248 if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
249 const BYTE* matchEnd = matchIndex3 < dictLimit ? dictEnd : iend;
250 const BYTE* lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr;
251 mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, lowPrefixPtr) + 8;
419 if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
420 const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
421 const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
422 mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
252 423 ip++;
253 424 offset = current+1 - matchIndex3;
254 425 while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
255 426 } else {
256 const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
257 const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
258 mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4;
427 const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
428 const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
429 mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
259 430 offset = current - matchIndex;
260 431 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
261 432 }
@@ -282,12 +453,13 b' static size_t ZSTD_compressBlock_doubleF'
282 453 while (ip <= ilimit) {
283 454 U32 const current2 = (U32)(ip-base);
284 455 U32 const repIndex2 = current2 - offset_2;
285 const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
286 if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
287 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
288 const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
289 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
290 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
456 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
457 if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
458 & (repIndex2 > dictStartIndex))
459 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
460 const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
461 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
462 U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
291 463 ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
292 464 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
293 465 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
@@ -309,19 +481,19 b' static size_t ZSTD_compressBlock_doubleF'
309 481
310 482 size_t ZSTD_compressBlock_doubleFast_extDict(
311 483 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
312 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
484 void const* src, size_t srcSize)
313 485 {
314 U32 const mls = cParams->searchLength;
486 U32 const mls = ms->cParams.searchLength;
315 487 switch(mls)
316 488 {
317 489 default: /* includes case 3 */
318 490 case 4 :
319 return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 4);
491 return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
320 492 case 5 :
321 return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 5);
493 return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
322 494 case 6 :
323 return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 6);
495 return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
324 496 case 7 :
325 return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 7);
497 return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
326 498 }
327 499 }
@@ -19,14 +19,16 b' extern "C" {'
19 19 #include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
20 20
21 21 void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
22 ZSTD_compressionParameters const* cParams,
23 void const* end);
22 void const* end, ZSTD_dictTableLoadMethod_e dtlm);
24 23 size_t ZSTD_compressBlock_doubleFast(
25 24 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
26 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
25 void const* src, size_t srcSize);
26 size_t ZSTD_compressBlock_doubleFast_dictMatchState(
27 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
28 void const* src, size_t srcSize);
27 29 size_t ZSTD_compressBlock_doubleFast_extDict(
28 30 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
29 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
31 void const* src, size_t srcSize);
30 32
31 33
32 34 #if defined (__cplusplus)
@@ -13,9 +13,9 b''
13 13
14 14
15 15 void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
16 ZSTD_compressionParameters const* cParams,
17 void const* end)
16 void const* end, ZSTD_dictTableLoadMethod_e dtlm)
18 17 {
18 const ZSTD_compressionParameters* const cParams = &ms->cParams;
19 19 U32* const hashTable = ms->hashTable;
20 20 U32 const hBits = cParams->hashLog;
21 21 U32 const mls = cParams->searchLength;
@@ -34,6 +34,9 b' void ZSTD_fillHashTable(ZSTD_matchState_'
34 34 size_t const hash = ZSTD_hashPtr(ip + i, hBits, mls);
35 35 if (i == 0 || hashTable[hash] == 0)
36 36 hashTable[hash] = current + i;
37 /* Only load extra positions for ZSTD_dtlm_full */
38 if (dtlm == ZSTD_dtlm_fast)
39 break;
37 40 }
38 41 }
39 42 }
@@ -42,26 +45,65 b' FORCE_INLINE_TEMPLATE'
42 45 size_t ZSTD_compressBlock_fast_generic(
43 46 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
44 47 void const* src, size_t srcSize,
45 U32 const hlog, U32 const stepSize, U32 const mls)
48 U32 const mls, ZSTD_dictMode_e const dictMode)
46 49 {
50 const ZSTD_compressionParameters* const cParams = &ms->cParams;
47 51 U32* const hashTable = ms->hashTable;
52 U32 const hlog = cParams->hashLog;
53 /* support stepSize of 0 */
54 U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
48 55 const BYTE* const base = ms->window.base;
49 56 const BYTE* const istart = (const BYTE*)src;
50 57 const BYTE* ip = istart;
51 58 const BYTE* anchor = istart;
52 const U32 lowestIndex = ms->window.dictLimit;
53 const BYTE* const lowest = base + lowestIndex;
59 const U32 prefixStartIndex = ms->window.dictLimit;
60 const BYTE* const prefixStart = base + prefixStartIndex;
54 61 const BYTE* const iend = istart + srcSize;
55 62 const BYTE* const ilimit = iend - HASH_READ_SIZE;
56 63 U32 offset_1=rep[0], offset_2=rep[1];
57 64 U32 offsetSaved = 0;
58 65
66 const ZSTD_matchState_t* const dms = ms->dictMatchState;
67 const ZSTD_compressionParameters* const dictCParams =
68 dictMode == ZSTD_dictMatchState ?
69 &dms->cParams : NULL;
70 const U32* const dictHashTable = dictMode == ZSTD_dictMatchState ?
71 dms->hashTable : NULL;
72 const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ?
73 dms->window.dictLimit : 0;
74 const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
75 dms->window.base : NULL;
76 const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ?
77 dictBase + dictStartIndex : NULL;
78 const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
79 dms->window.nextSrc : NULL;
80 const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
81 prefixStartIndex - (U32)(dictEnd - dictBase) :
82 0;
83 const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
84 const U32 dictHLog = dictMode == ZSTD_dictMatchState ?
85 dictCParams->hashLog : hlog;
86
87 assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
88
89 /* otherwise, we would get index underflow when translating a dict index
90 * into a local index */
91 assert(dictMode != ZSTD_dictMatchState
92 || prefixStartIndex >= (U32)(dictEnd - dictBase));
93
59 94 /* init */
60 ip += (ip==lowest);
61 { U32 const maxRep = (U32)(ip-lowest);
95 ip += (dictAndPrefixLength == 0);
96 if (dictMode == ZSTD_noDict) {
97 U32 const maxRep = (U32)(ip - prefixStart);
62 98 if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
63 99 if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
64 100 }
101 if (dictMode == ZSTD_dictMatchState) {
102 /* dictMatchState repCode checks don't currently handle repCode == 0
103 * disabling. */
104 assert(offset_1 <= dictAndPrefixLength);
105 assert(offset_2 <= dictAndPrefixLength);
106 }
65 107
66 108 /* Main Search Loop */
67 109 while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
@@ -70,26 +112,67 b' size_t ZSTD_compressBlock_fast_generic('
70 112 U32 const current = (U32)(ip-base);
71 113 U32 const matchIndex = hashTable[h];
72 114 const BYTE* match = base + matchIndex;
115 const U32 repIndex = current + 1 - offset_1;
116 const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
117 && repIndex < prefixStartIndex) ?
118 dictBase + (repIndex - dictIndexDelta) :
119 base + repIndex;
73 120 hashTable[h] = current; /* update hash table */
74 121
75 if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
122 if ( (dictMode == ZSTD_dictMatchState)
123 && ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
124 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
125 const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
126 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
127 ip++;
128 ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
129 } else if ( dictMode == ZSTD_noDict
130 && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
76 131 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
77 132 ip++;
78 133 ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
79 } else {
80 if ( (matchIndex <= lowestIndex)
81 || (MEM_read32(match) != MEM_read32(ip)) ) {
134 } else if ( (matchIndex <= prefixStartIndex) ) {
135 if (dictMode == ZSTD_dictMatchState) {
136 size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
137 U32 const dictMatchIndex = dictHashTable[dictHash];
138 const BYTE* dictMatch = dictBase + dictMatchIndex;
139 if (dictMatchIndex <= dictStartIndex ||
140 MEM_read32(dictMatch) != MEM_read32(ip)) {
141 assert(stepSize >= 1);
142 ip += ((ip-anchor) >> kSearchStrength) + stepSize;
143 continue;
144 } else {
145 /* found a dict match */
146 U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
147 mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
148 while (((ip>anchor) & (dictMatch>dictStart))
149 && (ip[-1] == dictMatch[-1])) {
150 ip--; dictMatch--; mLength++;
151 } /* catch up */
152 offset_2 = offset_1;
153 offset_1 = offset;
154 ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
155 }
156 } else {
82 157 assert(stepSize >= 1);
83 158 ip += ((ip-anchor) >> kSearchStrength) + stepSize;
84 159 continue;
85 160 }
161 } else if (MEM_read32(match) != MEM_read32(ip)) {
162 /* it's not a match, and we're not going to check the dictionary */
163 assert(stepSize >= 1);
164 ip += ((ip-anchor) >> kSearchStrength) + stepSize;
165 continue;
166 } else {
167 /* found a regular match */
168 U32 const offset = (U32)(ip-match);
86 169 mLength = ZSTD_count(ip+4, match+4, iend) + 4;
87 { U32 const offset = (U32)(ip-match);
88 while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
89 offset_2 = offset_1;
90 offset_1 = offset;
91 ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
92 } }
170 while (((ip>anchor) & (match>prefixStart))
171 && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
172 offset_2 = offset_1;
173 offset_1 = offset;
174 ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
175 }
93 176
94 177 /* match found */
95 178 ip += mLength;
@@ -97,21 +180,46 b' size_t ZSTD_compressBlock_fast_generic('
97 180
98 181 if (ip <= ilimit) {
99 182 /* Fill Table */
183 assert(base+current+2 > istart); /* check base overflow */
100 184 hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */
101 185 hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
186
102 187 /* check immediate repcode */
103 while ( (ip <= ilimit)
104 && ( (offset_2>0)
105 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
106 /* store sequence */
107 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
108 { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
109 hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base);
110 ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
111 ip += rLength;
112 anchor = ip;
113 continue; /* faster when present ... (?) */
114 } } }
188 if (dictMode == ZSTD_dictMatchState) {
189 while (ip <= ilimit) {
190 U32 const current2 = (U32)(ip-base);
191 U32 const repIndex2 = current2 - offset_2;
192 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
193 dictBase - dictIndexDelta + repIndex2 :
194 base + repIndex2;
195 if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
196 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
197 const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
198 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
199 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
200 ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
201 hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
202 ip += repLength2;
203 anchor = ip;
204 continue;
205 }
206 break;
207 }
208 }
209
210 if (dictMode == ZSTD_noDict) {
211 while ( (ip <= ilimit)
212 && ( (offset_2>0)
213 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
214 /* store sequence */
215 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
216 U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
217 hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base);
218 ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
219 ip += rLength;
220 anchor = ip;
221 continue; /* faster when present ... (?) */
222 } } } }
115 223
116 224 /* save reps for next block */
117 225 rep[0] = offset_1 ? offset_1 : offsetSaved;
@@ -124,42 +232,66 b' size_t ZSTD_compressBlock_fast_generic('
124 232
125 233 size_t ZSTD_compressBlock_fast(
126 234 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
127 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
235 void const* src, size_t srcSize)
128 236 {
129 U32 const hlog = cParams->hashLog;
237 ZSTD_compressionParameters const* cParams = &ms->cParams;
130 238 U32 const mls = cParams->searchLength;
131 U32 const stepSize = cParams->targetLength;
239 assert(ms->dictMatchState == NULL);
132 240 switch(mls)
133 241 {
134 242 default: /* includes case 3 */
135 243 case 4 :
136 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4);
244 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
137 245 case 5 :
138 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5);
246 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
139 247 case 6 :
140 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6);
248 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
141 249 case 7 :
142 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7);
250 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
251 }
252 }
253
254 size_t ZSTD_compressBlock_fast_dictMatchState(
255 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
256 void const* src, size_t srcSize)
257 {
258 ZSTD_compressionParameters const* cParams = &ms->cParams;
259 U32 const mls = cParams->searchLength;
260 assert(ms->dictMatchState != NULL);
261 switch(mls)
262 {
263 default: /* includes case 3 */
264 case 4 :
265 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
266 case 5 :
267 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
268 case 6 :
269 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
270 case 7 :
271 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
143 272 }
144 273 }
145 274
146 275
147 276 static size_t ZSTD_compressBlock_fast_extDict_generic(
148 277 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
149 void const* src, size_t srcSize,
150 U32 const hlog, U32 const stepSize, U32 const mls)
278 void const* src, size_t srcSize, U32 const mls)
151 279 {
152 U32* hashTable = ms->hashTable;
280 const ZSTD_compressionParameters* const cParams = &ms->cParams;
281 U32* const hashTable = ms->hashTable;
282 U32 const hlog = cParams->hashLog;
283 /* support stepSize of 0 */
284 U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
153 285 const BYTE* const base = ms->window.base;
154 286 const BYTE* const dictBase = ms->window.dictBase;
155 287 const BYTE* const istart = (const BYTE*)src;
156 288 const BYTE* ip = istart;
157 289 const BYTE* anchor = istart;
158 const U32 lowestIndex = ms->window.lowLimit;
159 const BYTE* const dictStart = dictBase + lowestIndex;
160 const U32 dictLimit = ms->window.dictLimit;
161 const BYTE* const lowPrefixPtr = base + dictLimit;
162 const BYTE* const dictEnd = dictBase + dictLimit;
290 const U32 dictStartIndex = ms->window.lowLimit;
291 const BYTE* const dictStart = dictBase + dictStartIndex;
292 const U32 prefixStartIndex = ms->window.dictLimit;
293 const BYTE* const prefixStart = base + prefixStartIndex;
294 const BYTE* const dictEnd = dictBase + prefixStartIndex;
163 295 const BYTE* const iend = istart + srcSize;
164 296 const BYTE* const ilimit = iend - 8;
165 297 U32 offset_1=rep[0], offset_2=rep[1];
@@ -167,33 +299,34 b' static size_t ZSTD_compressBlock_fast_ex'
167 299 /* Search Loop */
168 300 while (ip < ilimit) { /* < instead of <=, because (ip+1) */
169 301 const size_t h = ZSTD_hashPtr(ip, hlog, mls);
170 const U32 matchIndex = hashTable[h];
171 const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
172 const BYTE* match = matchBase + matchIndex;
173 const U32 current = (U32)(ip-base);
174 const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
175 const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
176 const BYTE* repMatch = repBase + repIndex;
302 const U32 matchIndex = hashTable[h];
303 const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
304 const BYTE* match = matchBase + matchIndex;
305 const U32 current = (U32)(ip-base);
306 const U32 repIndex = current + 1 - offset_1;
307 const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
308 const BYTE* const repMatch = repBase + repIndex;
177 309 size_t mLength;
178 310 hashTable[h] = current; /* update hash table */
311 assert(offset_1 <= current +1); /* check repIndex */
179 312
180 if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
313 if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
181 314 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
182 const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
183 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
315 const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
316 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
184 317 ip++;
185 318 ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
186 319 } else {
187 if ( (matchIndex < lowestIndex) ||
320 if ( (matchIndex < dictStartIndex) ||
188 321 (MEM_read32(match) != MEM_read32(ip)) ) {
189 322 assert(stepSize >= 1);
190 323 ip += ((ip-anchor) >> kSearchStrength) + stepSize;
191 324 continue;
192 325 }
193 { const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
194 const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
326 { const BYTE* matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
327 const BYTE* lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
195 328 U32 offset;
196 mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4;
329 mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
197 330 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
198 331 offset = current - matchIndex;
199 332 offset_2 = offset_1;
@@ -213,11 +346,11 b' static size_t ZSTD_compressBlock_fast_ex'
213 346 while (ip <= ilimit) {
214 347 U32 const current2 = (U32)(ip-base);
215 348 U32 const repIndex2 = current2 - offset_2;
216 const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
217 if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
349 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
350 if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */
218 351 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
219 const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
220 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
352 const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
353 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
221 354 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
222 355 ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
223 356 hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
@@ -239,21 +372,20 b' static size_t ZSTD_compressBlock_fast_ex'
239 372
240 373 size_t ZSTD_compressBlock_fast_extDict(
241 374 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
242 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
375 void const* src, size_t srcSize)
243 376 {
244 U32 const hlog = cParams->hashLog;
377 ZSTD_compressionParameters const* cParams = &ms->cParams;
245 378 U32 const mls = cParams->searchLength;
246 U32 const stepSize = cParams->targetLength;
247 379 switch(mls)
248 380 {
249 381 default: /* includes case 3 */
250 382 case 4 :
251 return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4);
383 return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
252 384 case 5 :
253 return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5);
385 return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
254 386 case 6 :
255 return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6);
387 return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
256 388 case 7 :
257 return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7);
389 return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
258 390 }
259 391 }
@@ -19,14 +19,16 b' extern "C" {'
19 19 #include "zstd_compress_internal.h"
20 20
21 21 void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
22 ZSTD_compressionParameters const* cParams,
23 void const* end);
22 void const* end, ZSTD_dictTableLoadMethod_e dtlm);
24 23 size_t ZSTD_compressBlock_fast(
25 24 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
26 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
25 void const* src, size_t srcSize);
26 size_t ZSTD_compressBlock_fast_dictMatchState(
27 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
28 void const* src, size_t srcSize);
27 29 size_t ZSTD_compressBlock_fast_extDict(
28 30 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
29 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
31 void const* src, size_t srcSize);
30 32
31 33 #if defined (__cplusplus)
32 34 }
This diff has been collapsed as it changes many lines, (522 lines changed) Show them Hide them
@@ -16,11 +16,12 b''
16 16 * Binary Tree search
17 17 ***************************************/
18 18
19 void ZSTD_updateDUBT(
20 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
19 static void
20 ZSTD_updateDUBT(ZSTD_matchState_t* ms,
21 21 const BYTE* ip, const BYTE* iend,
22 22 U32 mls)
23 23 {
24 const ZSTD_compressionParameters* const cParams = &ms->cParams;
24 25 U32* const hashTable = ms->hashTable;
25 26 U32 const hashLog = cParams->hashLog;
26 27
@@ -59,11 +60,12 b' void ZSTD_updateDUBT('
59 60 * sort one already inserted but unsorted position
60 61 * assumption : current >= btlow == (current - btmask)
61 62 * doesn't fail */
62 static void ZSTD_insertDUBT1(
63 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
63 static void
64 ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
64 65 U32 current, const BYTE* inputEnd,
65 U32 nbCompares, U32 btLow, int extDict)
66 U32 nbCompares, U32 btLow, const ZSTD_dictMode_e dictMode)
66 67 {
68 const ZSTD_compressionParameters* const cParams = &ms->cParams;
67 69 U32* const bt = ms->chainTable;
68 70 U32 const btLog = cParams->chainLog - 1;
69 71 U32 const btMask = (1 << btLog) - 1;
@@ -92,10 +94,12 b' static void ZSTD_insertDUBT1('
92 94 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
93 95 assert(matchIndex < current);
94 96
95 if ( (!extDict)
97 if ( (dictMode != ZSTD_extDict)
96 98 || (matchIndex+matchLength >= dictLimit) /* both in current segment*/
97 99 || (current < dictLimit) /* both in extDict */) {
98 const BYTE* const mBase = !extDict || ((matchIndex+matchLength) >= dictLimit) ? base : dictBase;
100 const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
101 || (matchIndex+matchLength >= dictLimit)) ?
102 base : dictBase;
99 103 assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
100 104 || (current < dictLimit) );
101 105 match = mBase + matchIndex;
@@ -138,13 +142,95 b' static void ZSTD_insertDUBT1('
138 142 }
139 143
140 144
141 static size_t ZSTD_DUBT_findBestMatch (
142 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
143 const BYTE* const ip, const BYTE* const iend,
144 size_t* offsetPtr,
145 U32 const mls,
146 U32 const extDict)
145 static size_t
146 ZSTD_DUBT_findBetterDictMatch (
147 ZSTD_matchState_t* ms,
148 const BYTE* const ip, const BYTE* const iend,
149 size_t* offsetPtr,
150 U32 nbCompares,
151 U32 const mls,
152 const ZSTD_dictMode_e dictMode)
147 153 {
154 const ZSTD_matchState_t * const dms = ms->dictMatchState;
155 const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
156 const U32 * const dictHashTable = dms->hashTable;
157 U32 const hashLog = dmsCParams->hashLog;
158 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
159 U32 dictMatchIndex = dictHashTable[h];
160
161 const BYTE* const base = ms->window.base;
162 const BYTE* const prefixStart = base + ms->window.dictLimit;
163 U32 const current = (U32)(ip-base);
164 const BYTE* const dictBase = dms->window.base;
165 const BYTE* const dictEnd = dms->window.nextSrc;
166 U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
167 U32 const dictLowLimit = dms->window.lowLimit;
168 U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
169
170 U32* const dictBt = dms->chainTable;
171 U32 const btLog = dmsCParams->chainLog - 1;
172 U32 const btMask = (1 << btLog) - 1;
173 U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
174
175 size_t commonLengthSmaller=0, commonLengthLarger=0, bestLength=0;
176 U32 matchEndIdx = current+8+1;
177
178 (void)dictMode;
179 assert(dictMode == ZSTD_dictMatchState);
180
181 while (nbCompares-- && (dictMatchIndex > dictLowLimit)) {
182 U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
183 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
184 const BYTE* match = dictBase + dictMatchIndex;
185 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
186 if (dictMatchIndex+matchLength >= dictHighLimit)
187 match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */
188
189 if (matchLength > bestLength) {
190 U32 matchIndex = dictMatchIndex + dictIndexDelta;
191 if (matchLength > matchEndIdx - matchIndex)
192 matchEndIdx = matchIndex + (U32)matchLength;
193 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
194 DEBUGLOG(2, "ZSTD_DUBT_findBestDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
195 current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);
196 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
197 }
198 if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
199 break; /* drop, to guarantee consistency (miss a little bit of compression) */
200 }
201 }
202
203 DEBUGLOG(2, "matchLength:%6zu, match:%p, prefixStart:%p, ip:%p", matchLength, match, prefixStart, ip);
204 if (match[matchLength] < ip[matchLength]) {
205 if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
206 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
207 dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
208 } else {
209 /* match is larger than current */
210 if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
211 commonLengthLarger = matchLength;
212 dictMatchIndex = nextPtr[0];
213 }
214 }
215
216 if (bestLength >= MINMATCH) {
217 U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
218 DEBUGLOG(2, "ZSTD_DUBT_findBestDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
219 current, (U32)bestLength, (U32)*offsetPtr, mIndex);
220 }
221 return bestLength;
222
223 }
224
225
226 static size_t
227 ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
228 const BYTE* const ip, const BYTE* const iend,
229 size_t* offsetPtr,
230 U32 const mls,
231 const ZSTD_dictMode_e dictMode)
232 {
233 const ZSTD_compressionParameters* const cParams = &ms->cParams;
148 234 U32* const hashTable = ms->hashTable;
149 235 U32 const hashLog = cParams->hashLog;
150 236 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
@@ -195,8 +281,8 b' static size_t ZSTD_DUBT_findBestMatch ('
195 281 while (matchIndex) { /* will end on matchIndex == 0 */
196 282 U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
197 283 U32 const nextCandidateIdx = *nextCandidateIdxPtr;
198 ZSTD_insertDUBT1(ms, cParams, matchIndex, iend,
199 nbCandidates, unsortLimit, extDict);
284 ZSTD_insertDUBT1(ms, matchIndex, iend,
285 nbCandidates, unsortLimit, dictMode);
200 286 matchIndex = nextCandidateIdx;
201 287 nbCandidates++;
202 288 }
@@ -221,7 +307,7 b' static size_t ZSTD_DUBT_findBestMatch ('
221 307 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
222 308 const BYTE* match;
223 309
224 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
310 if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
225 311 match = base + matchIndex;
226 312 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
227 313 } else {
@@ -259,6 +345,10 b' static size_t ZSTD_DUBT_findBestMatch ('
259 345
260 346 *smallerPtr = *largerPtr = 0;
261 347
348 if (dictMode == ZSTD_dictMatchState && nbCompares) {
349 bestLength = ZSTD_DUBT_findBetterDictMatch(ms, ip, iend, offsetPtr, nbCompares, mls, dictMode);
350 }
351
262 352 assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
263 353 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
264 354 if (bestLength >= MINMATCH) {
@@ -272,61 +362,64 b' static size_t ZSTD_DUBT_findBestMatch ('
272 362
273 363
274 364 /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
275 static size_t ZSTD_BtFindBestMatch (
276 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
277 const BYTE* const ip, const BYTE* const iLimit,
278 size_t* offsetPtr,
279 const U32 mls /* template */)
365 FORCE_INLINE_TEMPLATE size_t
366 ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
367 const BYTE* const ip, const BYTE* const iLimit,
368 size_t* offsetPtr,
369 const U32 mls /* template */,
370 const ZSTD_dictMode_e dictMode)
280 371 {
281 372 DEBUGLOG(7, "ZSTD_BtFindBestMatch");
282 373 if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
283 ZSTD_updateDUBT(ms, cParams, ip, iLimit, mls);
284 return ZSTD_DUBT_findBestMatch(ms, cParams, ip, iLimit, offsetPtr, mls, 0);
374 ZSTD_updateDUBT(ms, ip, iLimit, mls);
375 return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
285 376 }
286 377
287 378
288 static size_t ZSTD_BtFindBestMatch_selectMLS (
289 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
290 const BYTE* ip, const BYTE* const iLimit,
291 size_t* offsetPtr)
379 static size_t
380 ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms,
381 const BYTE* ip, const BYTE* const iLimit,
382 size_t* offsetPtr)
292 383 {
293 switch(cParams->searchLength)
384 switch(ms->cParams.searchLength)
294 385 {
295 386 default : /* includes case 3 */
296 case 4 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 4);
297 case 5 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 5);
387 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
388 case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
298 389 case 7 :
299 case 6 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 6);
390 case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
300 391 }
301 392 }
302 393
303 394
304 /** Tree updater, providing best match */
305 static size_t ZSTD_BtFindBestMatch_extDict (
306 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
307 const BYTE* const ip, const BYTE* const iLimit,
308 size_t* offsetPtr,
309 const U32 mls)
395 static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
396 ZSTD_matchState_t* ms,
397 const BYTE* ip, const BYTE* const iLimit,
398 size_t* offsetPtr)
310 399 {
311 DEBUGLOG(7, "ZSTD_BtFindBestMatch_extDict");
312 if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
313 ZSTD_updateDUBT(ms, cParams, ip, iLimit, mls);
314 return ZSTD_DUBT_findBestMatch(ms, cParams, ip, iLimit, offsetPtr, mls, 1);
400 switch(ms->cParams.searchLength)
401 {
402 default : /* includes case 3 */
403 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
404 case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
405 case 7 :
406 case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
407 }
315 408 }
316 409
317 410
318 static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
319 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
411 static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
412 ZSTD_matchState_t* ms,
320 413 const BYTE* ip, const BYTE* const iLimit,
321 414 size_t* offsetPtr)
322 415 {
323 switch(cParams->searchLength)
416 switch(ms->cParams.searchLength)
324 417 {
325 418 default : /* includes case 3 */
326 case 4 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 4);
327 case 5 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 5);
419 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
420 case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
328 421 case 7 :
329 case 6 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 6);
422 case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
330 423 }
331 424 }
332 425
@@ -340,7 +433,8 b' static size_t ZSTD_BtFindBestMatch_selec'
340 433 /* Update chains up to ip (excluded)
341 434 Assumption : always within prefix (i.e. not within extDict) */
342 435 static U32 ZSTD_insertAndFindFirstIndex_internal(
343 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
436 ZSTD_matchState_t* ms,
437 const ZSTD_compressionParameters* const cParams,
344 438 const BYTE* ip, U32 const mls)
345 439 {
346 440 U32* const hashTable = ms->hashTable;
@@ -362,22 +456,21 b' static U32 ZSTD_insertAndFindFirstIndex_'
362 456 return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
363 457 }
364 458
365 U32 ZSTD_insertAndFindFirstIndex(
366 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
367 const BYTE* ip)
368 {
369 return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, cParams->searchLength);
459 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
460 const ZSTD_compressionParameters* const cParams = &ms->cParams;
461 return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.searchLength);
370 462 }
371 463
372 464
373 465 /* inlining is important to hardwire a hot branch (template emulation) */
374 466 FORCE_INLINE_TEMPLATE
375 467 size_t ZSTD_HcFindBestMatch_generic (
376 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
468 ZSTD_matchState_t* ms,
377 469 const BYTE* const ip, const BYTE* const iLimit,
378 470 size_t* offsetPtr,
379 const U32 mls, const U32 extDict)
471 const U32 mls, const ZSTD_dictMode_e dictMode)
380 472 {
473 const ZSTD_compressionParameters* const cParams = &ms->cParams;
381 474 U32* const chainTable = ms->chainTable;
382 475 const U32 chainSize = (1 << cParams->chainLog);
383 476 const U32 chainMask = chainSize-1;
@@ -397,7 +490,7 b' size_t ZSTD_HcFindBestMatch_generic ('
397 490
398 491 for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
399 492 size_t currentMl=0;
400 if ((!extDict) || matchIndex >= dictLimit) {
493 if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
401 494 const BYTE* const match = base + matchIndex;
402 495 if (match[ml] == ip[ml]) /* potentially better */
403 496 currentMl = ZSTD_count(ip, match, iLimit);
@@ -419,38 +512,87 b' size_t ZSTD_HcFindBestMatch_generic ('
419 512 matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
420 513 }
421 514
515 if (dictMode == ZSTD_dictMatchState) {
516 const ZSTD_matchState_t* const dms = ms->dictMatchState;
517 const U32* const dmsChainTable = dms->chainTable;
518 const U32 dmsChainSize = (1 << dms->cParams.chainLog);
519 const U32 dmsChainMask = dmsChainSize - 1;
520 const U32 dmsLowestIndex = dms->window.dictLimit;
521 const BYTE* const dmsBase = dms->window.base;
522 const BYTE* const dmsEnd = dms->window.nextSrc;
523 const U32 dmsSize = (U32)(dmsEnd - dmsBase);
524 const U32 dmsIndexDelta = dictLimit - dmsSize;
525 const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
526
527 matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
528
529 for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
530 size_t currentMl=0;
531 const BYTE* const match = dmsBase + matchIndex;
532 assert(match+4 <= dmsEnd);
533 if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
534 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
535
536 /* save best solution */
537 if (currentMl > ml) {
538 ml = currentMl;
539 *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
540 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
541 }
542
543 if (matchIndex <= dmsMinChain) break;
544 matchIndex = dmsChainTable[matchIndex & dmsChainMask];
545 }
546 }
547
422 548 return ml;
423 549 }
424 550
425 551
426 552 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
427 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
553 ZSTD_matchState_t* ms,
428 554 const BYTE* ip, const BYTE* const iLimit,
429 555 size_t* offsetPtr)
430 556 {
431 switch(cParams->searchLength)
557 switch(ms->cParams.searchLength)
432 558 {
433 559 default : /* includes case 3 */
434 case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, 0);
435 case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, 0);
560 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
561 case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
436 562 case 7 :
437 case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, 0);
563 case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
564 }
565 }
566
567
568 static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
569 ZSTD_matchState_t* ms,
570 const BYTE* ip, const BYTE* const iLimit,
571 size_t* offsetPtr)
572 {
573 switch(ms->cParams.searchLength)
574 {
575 default : /* includes case 3 */
576 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
577 case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
578 case 7 :
579 case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
438 580 }
439 581 }
440 582
441 583
442 584 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
443 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
585 ZSTD_matchState_t* ms,
444 586 const BYTE* ip, const BYTE* const iLimit,
445 size_t* const offsetPtr)
587 size_t* offsetPtr)
446 588 {
447 switch(cParams->searchLength)
589 switch(ms->cParams.searchLength)
448 590 {
449 591 default : /* includes case 3 */
450 case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, 1);
451 case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, 1);
592 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
593 case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
452 594 case 7 :
453 case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, 1);
595 case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
454 596 }
455 597 }
456 598
@@ -462,30 +604,55 b' FORCE_INLINE_TEMPLATE'
462 604 size_t ZSTD_compressBlock_lazy_generic(
463 605 ZSTD_matchState_t* ms, seqStore_t* seqStore,
464 606 U32 rep[ZSTD_REP_NUM],
465 ZSTD_compressionParameters const* cParams,
466 607 const void* src, size_t srcSize,
467 const U32 searchMethod, const U32 depth)
608 const U32 searchMethod, const U32 depth,
609 ZSTD_dictMode_e const dictMode)
468 610 {
469 611 const BYTE* const istart = (const BYTE*)src;
470 612 const BYTE* ip = istart;
471 613 const BYTE* anchor = istart;
472 614 const BYTE* const iend = istart + srcSize;
473 615 const BYTE* const ilimit = iend - 8;
474 const BYTE* const base = ms->window.base + ms->window.dictLimit;
616 const BYTE* const base = ms->window.base;
617 const U32 prefixLowestIndex = ms->window.dictLimit;
618 const BYTE* const prefixLowest = base + prefixLowestIndex;
475 619
476 620 typedef size_t (*searchMax_f)(
477 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
621 ZSTD_matchState_t* ms,
478 622 const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
479 searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
623 searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
624 (searchMethod ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
625 (searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS);
480 626 U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
481 627
628 const ZSTD_matchState_t* const dms = ms->dictMatchState;
629 const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ?
630 dms->window.dictLimit : 0;
631 const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
632 dms->window.base : NULL;
633 const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ?
634 dictBase + dictLowestIndex : NULL;
635 const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
636 dms->window.nextSrc : NULL;
637 const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
638 prefixLowestIndex - (U32)(dictEnd - dictBase) :
639 0;
640 const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest);
641
482 642 /* init */
483 ip += (ip==base);
643 ip += (dictAndPrefixLength == 0);
484 644 ms->nextToUpdate3 = ms->nextToUpdate;
485 { U32 const maxRep = (U32)(ip-base);
645 if (dictMode == ZSTD_noDict) {
646 U32 const maxRep = (U32)(ip - prefixLowest);
486 647 if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
487 648 if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
488 649 }
650 if (dictMode == ZSTD_dictMatchState) {
651 /* dictMatchState repCode checks don't currently handle repCode == 0
652 * disabling. */
653 assert(offset_1 <= dictAndPrefixLength);
654 assert(offset_2 <= dictAndPrefixLength);
655 }
489 656
490 657 /* Match Loop */
491 658 while (ip < ilimit) {
@@ -494,15 +661,28 b' size_t ZSTD_compressBlock_lazy_generic('
494 661 const BYTE* start=ip+1;
495 662
496 663 /* check repCode */
497 if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) {
498 /* repcode : we take it */
664 if (dictMode == ZSTD_dictMatchState) {
665 const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
666 const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
667 && repIndex < prefixLowestIndex) ?
668 dictBase + (repIndex - dictIndexDelta) :
669 base + repIndex;
670 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
671 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
672 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
673 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
674 if (depth==0) goto _storeSequence;
675 }
676 }
677 if ( dictMode == ZSTD_noDict
678 && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
499 679 matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
500 680 if (depth==0) goto _storeSequence;
501 681 }
502 682
503 683 /* first search (depth 0) */
504 { size_t offsetFound = 99999999;
505 size_t const ml2 = searchMax(ms, cParams, ip, iend, &offsetFound);
684 { size_t offsetFound = 999999999;
685 size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
506 686 if (ml2 > matchLength)
507 687 matchLength = ml2, start = ip, offset=offsetFound;
508 688 }
@@ -516,15 +696,31 b' size_t ZSTD_compressBlock_lazy_generic('
516 696 if (depth>=1)
517 697 while (ip<ilimit) {
518 698 ip ++;
519 if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
699 if ( (dictMode == ZSTD_noDict)
700 && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
520 701 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
521 702 int const gain2 = (int)(mlRep * 3);
522 703 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
523 704 if ((mlRep >= 4) && (gain2 > gain1))
524 705 matchLength = mlRep, offset = 0, start = ip;
525 706 }
526 { size_t offset2=99999999;
527 size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
707 if (dictMode == ZSTD_dictMatchState) {
708 const U32 repIndex = (U32)(ip - base) - offset_1;
709 const BYTE* repMatch = repIndex < prefixLowestIndex ?
710 dictBase + (repIndex - dictIndexDelta) :
711 base + repIndex;
712 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
713 && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
714 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
715 size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
716 int const gain2 = (int)(mlRep * 3);
717 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
718 if ((mlRep >= 4) && (gain2 > gain1))
719 matchLength = mlRep, offset = 0, start = ip;
720 }
721 }
722 { size_t offset2=999999999;
723 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
528 724 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
529 725 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
530 726 if ((ml2 >= 4) && (gain2 > gain1)) {
@@ -535,15 +731,31 b' size_t ZSTD_compressBlock_lazy_generic('
535 731 /* let's find an even better one */
536 732 if ((depth==2) && (ip<ilimit)) {
537 733 ip ++;
538 if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
539 size_t const ml2 = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
540 int const gain2 = (int)(ml2 * 4);
734 if ( (dictMode == ZSTD_noDict)
735 && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
736 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
737 int const gain2 = (int)(mlRep * 4);
541 738 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
542 if ((ml2 >= 4) && (gain2 > gain1))
543 matchLength = ml2, offset = 0, start = ip;
739 if ((mlRep >= 4) && (gain2 > gain1))
740 matchLength = mlRep, offset = 0, start = ip;
544 741 }
545 { size_t offset2=99999999;
546 size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
742 if (dictMode == ZSTD_dictMatchState) {
743 const U32 repIndex = (U32)(ip - base) - offset_1;
744 const BYTE* repMatch = repIndex < prefixLowestIndex ?
745 dictBase + (repIndex - dictIndexDelta) :
746 base + repIndex;
747 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
748 && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
749 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
750 size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
751 int const gain2 = (int)(mlRep * 4);
752 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
753 if ((mlRep >= 4) && (gain2 > gain1))
754 matchLength = mlRep, offset = 0, start = ip;
755 }
756 }
757 { size_t offset2=999999999;
758 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
547 759 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
548 760 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
549 761 if ((ml2 >= 4) && (gain2 > gain1)) {
@@ -560,9 +772,17 b' size_t ZSTD_compressBlock_lazy_generic('
560 772 */
561 773 /* catch up */
562 774 if (offset) {
563 while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > base))
564 && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
565 { start--; matchLength++; }
775 if (dictMode == ZSTD_noDict) {
776 while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
777 && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
778 { start--; matchLength++; }
779 }
780 if (dictMode == ZSTD_dictMatchState) {
781 U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
782 const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
783 const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
784 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
785 }
566 786 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
567 787 }
568 788 /* store sequence */
@@ -573,16 +793,39 b' size_t ZSTD_compressBlock_lazy_generic('
573 793 }
574 794
575 795 /* check immediate repcode */
576 while ( ((ip <= ilimit) & (offset_2>0))
577 && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
578 /* store sequence */
579 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
580 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
581 ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
582 ip += matchLength;
583 anchor = ip;
584 continue; /* faster when present ... (?) */
585 } }
796 if (dictMode == ZSTD_dictMatchState) {
797 while (ip <= ilimit) {
798 U32 const current2 = (U32)(ip-base);
799 U32 const repIndex = current2 - offset_2;
800 const BYTE* repMatch = dictMode == ZSTD_dictMatchState
801 && repIndex < prefixLowestIndex ?
802 dictBase - dictIndexDelta + repIndex :
803 base + repIndex;
804 if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
805 && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
806 const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
807 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
808 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
809 ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
810 ip += matchLength;
811 anchor = ip;
812 continue;
813 }
814 break;
815 }
816 }
817
818 if (dictMode == ZSTD_noDict) {
819 while ( ((ip <= ilimit) & (offset_2>0))
820 && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
821 /* store sequence */
822 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
823 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
824 ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
825 ip += matchLength;
826 anchor = ip;
827 continue; /* faster when present ... (?) */
828 } } }
586 829
587 830 /* Save reps for next block */
588 831 rep[0] = offset_1 ? offset_1 : savedOffset;
@@ -595,30 +838,58 b' size_t ZSTD_compressBlock_lazy_generic('
595 838
596 839 size_t ZSTD_compressBlock_btlazy2(
597 840 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
598 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
841 void const* src, size_t srcSize)
599 842 {
600 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 1, 2);
843 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_noDict);
601 844 }
602 845
603 846 size_t ZSTD_compressBlock_lazy2(
604 847 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
605 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
848 void const* src, size_t srcSize)
606 849 {
607 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 2);
850 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_noDict);
608 851 }
609 852
610 853 size_t ZSTD_compressBlock_lazy(
611 854 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
612 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
855 void const* src, size_t srcSize)
613 856 {
614 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 1);
857 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_noDict);
615 858 }
616 859
617 860 size_t ZSTD_compressBlock_greedy(
618 861 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
619 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
862 void const* src, size_t srcSize)
863 {
864 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_noDict);
865 }
866
867 size_t ZSTD_compressBlock_btlazy2_dictMatchState(
868 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
869 void const* src, size_t srcSize)
870 {
871 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_dictMatchState);
872 }
873
874 size_t ZSTD_compressBlock_lazy2_dictMatchState(
875 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
876 void const* src, size_t srcSize)
620 877 {
621 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 0);
878 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_dictMatchState);
879 }
880
881 size_t ZSTD_compressBlock_lazy_dictMatchState(
882 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
883 void const* src, size_t srcSize)
884 {
885 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_dictMatchState);
886 }
887
888 size_t ZSTD_compressBlock_greedy_dictMatchState(
889 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
890 void const* src, size_t srcSize)
891 {
892 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_dictMatchState);
622 893 }
623 894
624 895
@@ -626,7 +897,6 b' FORCE_INLINE_TEMPLATE'
626 897 size_t ZSTD_compressBlock_lazy_extDict_generic(
627 898 ZSTD_matchState_t* ms, seqStore_t* seqStore,
628 899 U32 rep[ZSTD_REP_NUM],
629 ZSTD_compressionParameters const* cParams,
630 900 const void* src, size_t srcSize,
631 901 const U32 searchMethod, const U32 depth)
632 902 {
@@ -644,9 +914,9 b' size_t ZSTD_compressBlock_lazy_extDict_g'
644 914 const BYTE* const dictStart = dictBase + lowestIndex;
645 915
646 916 typedef size_t (*searchMax_f)(
647 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
917 ZSTD_matchState_t* ms,
648 918 const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
649 searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
919 searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
650 920
651 921 U32 offset_1 = rep[0], offset_2 = rep[1];
652 922
@@ -674,8 +944,8 b' size_t ZSTD_compressBlock_lazy_extDict_g'
674 944 } }
675 945
676 946 /* first search (depth 0) */
677 { size_t offsetFound = 99999999;
678 size_t const ml2 = searchMax(ms, cParams, ip, iend, &offsetFound);
947 { size_t offsetFound = 999999999;
948 size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
679 949 if (ml2 > matchLength)
680 950 matchLength = ml2, start = ip, offset=offsetFound;
681 951 }
@@ -707,8 +977,8 b' size_t ZSTD_compressBlock_lazy_extDict_g'
707 977 } }
708 978
709 979 /* search match, depth 1 */
710 { size_t offset2=99999999;
711 size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
980 { size_t offset2=999999999;
981 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
712 982 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
713 983 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
714 984 if ((ml2 >= 4) && (gain2 > gain1)) {
@@ -737,8 +1007,8 b' size_t ZSTD_compressBlock_lazy_extDict_g'
737 1007 } }
738 1008
739 1009 /* search match, depth 2 */
740 { size_t offset2=99999999;
741 size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
1010 { size_t offset2=999999999;
1011 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
742 1012 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
743 1013 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
744 1014 if ((ml2 >= 4) && (gain2 > gain1)) {
@@ -794,31 +1064,31 b' size_t ZSTD_compressBlock_lazy_extDict_g'
794 1064
795 1065 size_t ZSTD_compressBlock_greedy_extDict(
796 1066 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
797 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
1067 void const* src, size_t srcSize)
798 1068 {
799 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 0);
1069 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 0);
800 1070 }
801 1071
802 1072 size_t ZSTD_compressBlock_lazy_extDict(
803 1073 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
804 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
1074 void const* src, size_t srcSize)
805 1075
806 1076 {
807 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 1);
1077 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 1);
808 1078 }
809 1079
810 1080 size_t ZSTD_compressBlock_lazy2_extDict(
811 1081 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
812 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
1082 void const* src, size_t srcSize)
813 1083
814 1084 {
815 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 2);
1085 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 2);
816 1086 }
817 1087
818 1088 size_t ZSTD_compressBlock_btlazy2_extDict(
819 1089 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
820 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
1090 void const* src, size_t srcSize)
821 1091
822 1092 {
823 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 1, 2);
1093 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 1, 2);
824 1094 }
@@ -17,37 +17,48 b' extern "C" {'
17 17
18 18 #include "zstd_compress_internal.h"
19 19
20 U32 ZSTD_insertAndFindFirstIndex(
21 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
22 const BYTE* ip);
20 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
23 21
24 22 void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK */
25 23
26 24 size_t ZSTD_compressBlock_btlazy2(
27 25 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
28 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
26 void const* src, size_t srcSize);
29 27 size_t ZSTD_compressBlock_lazy2(
30 28 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
31 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
29 void const* src, size_t srcSize);
32 30 size_t ZSTD_compressBlock_lazy(
33 31 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
34 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
32 void const* src, size_t srcSize);
35 33 size_t ZSTD_compressBlock_greedy(
36 34 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
37 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
35 void const* src, size_t srcSize);
36
37 size_t ZSTD_compressBlock_btlazy2_dictMatchState(
38 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
39 void const* src, size_t srcSize);
40 size_t ZSTD_compressBlock_lazy2_dictMatchState(
41 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
42 void const* src, size_t srcSize);
43 size_t ZSTD_compressBlock_lazy_dictMatchState(
44 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
45 void const* src, size_t srcSize);
46 size_t ZSTD_compressBlock_greedy_dictMatchState(
47 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
48 void const* src, size_t srcSize);
38 49
39 50 size_t ZSTD_compressBlock_greedy_extDict(
40 51 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
41 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
52 void const* src, size_t srcSize);
42 53 size_t ZSTD_compressBlock_lazy_extDict(
43 54 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
44 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
55 void const* src, size_t srcSize);
45 56 size_t ZSTD_compressBlock_lazy2_extDict(
46 57 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
47 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
58 void const* src, size_t srcSize);
48 59 size_t ZSTD_compressBlock_btlazy2_extDict(
49 60 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
50 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
61 void const* src, size_t srcSize);
51 62
52 63 #if defined (__cplusplus)
53 64 }
@@ -9,6 +9,7 b''
9 9
10 10 #include "zstd_ldm.h"
11 11
12 #include "debug.h"
12 13 #include "zstd_fast.h" /* ZSTD_fillHashTable() */
13 14 #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
14 15
@@ -20,7 +21,7 b''
20 21 void ZSTD_ldm_adjustParameters(ldmParams_t* params,
21 22 ZSTD_compressionParameters const* cParams)
22 23 {
23 U32 const windowLog = cParams->windowLog;
24 params->windowLog = cParams->windowLog;
24 25 ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
25 26 DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
26 27 if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
@@ -33,12 +34,13 b' void ZSTD_ldm_adjustParameters(ldmParams'
33 34 params->minMatchLength = minMatch;
34 35 }
35 36 if (params->hashLog == 0) {
36 params->hashLog = MAX(ZSTD_HASHLOG_MIN, windowLog - LDM_HASH_RLOG);
37 params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
37 38 assert(params->hashLog <= ZSTD_HASHLOG_MAX);
38 39 }
39 40 if (params->hashEveryLog == 0) {
40 params->hashEveryLog =
41 windowLog < params->hashLog ? 0 : windowLog - params->hashLog;
41 params->hashEveryLog = params->windowLog < params->hashLog
42 ? 0
43 : params->windowLog - params->hashLog;
42 44 }
43 45 params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
44 46 }
@@ -216,21 +218,18 b' static size_t ZSTD_ldm_countBackwardsMat'
216 218 * The tables for the other strategies are filled within their
217 219 * block compressors. */
218 220 static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
219 ZSTD_compressionParameters const* cParams,
220 221 void const* end)
221 222 {
222 223 const BYTE* const iend = (const BYTE*)end;
223 224
224 switch(cParams->strategy)
225 switch(ms->cParams.strategy)
225 226 {
226 227 case ZSTD_fast:
227 ZSTD_fillHashTable(ms, cParams, iend);
228 ms->nextToUpdate = (U32)(iend - ms->window.base);
228 ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
229 229 break;
230 230
231 231 case ZSTD_dfast:
232 ZSTD_fillDoubleHashTable(ms, cParams, iend);
233 ms->nextToUpdate = (U32)(iend - ms->window.base);
232 ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
234 233 break;
235 234
236 235 case ZSTD_greedy:
@@ -508,7 +507,7 b' size_t ZSTD_ldm_generateSequences('
508 507 * * Try invalidation after the sequence generation and test the
509 508 * the offset against maxDist directly.
510 509 */
511 ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL);
510 ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
512 511 /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
513 512 newLeftoverSize = ZSTD_ldm_generateSequences_internal(
514 513 ldmState, sequences, params, chunkStart, chunkSize);
@@ -591,19 +590,19 b' static rawSeq maybeSplitSequence(rawSeqS'
591 590
592 591 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
593 592 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
594 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
595 int const extDict)
593 void const* src, size_t srcSize)
596 594 {
595 const ZSTD_compressionParameters* const cParams = &ms->cParams;
597 596 unsigned const minMatch = cParams->searchLength;
598 597 ZSTD_blockCompressor const blockCompressor =
599 ZSTD_selectBlockCompressor(cParams->strategy, extDict);
600 BYTE const* const base = ms->window.base;
598 ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
601 599 /* Input bounds */
602 600 BYTE const* const istart = (BYTE const*)src;
603 601 BYTE const* const iend = istart + srcSize;
604 602 /* Input positions */
605 603 BYTE const* ip = istart;
606 604
605 DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
607 606 assert(rawSeqStore->pos <= rawSeqStore->size);
608 607 assert(rawSeqStore->size <= rawSeqStore->capacity);
609 608 /* Loop through each sequence and apply the block compressor to the lits */
@@ -621,14 +620,13 b' size_t ZSTD_ldm_blockCompress(rawSeqStor'
621 620
622 621 /* Fill tables for block compressor */
623 622 ZSTD_ldm_limitTableUpdate(ms, ip);
624 ZSTD_ldm_fillFastTables(ms, cParams, ip);
623 ZSTD_ldm_fillFastTables(ms, ip);
625 624 /* Run the block compressor */
625 DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength);
626 626 {
627 627 size_t const newLitLength =
628 blockCompressor(ms, seqStore, rep, cParams, ip,
629 sequence.litLength);
628 blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
630 629 ip += sequence.litLength;
631 ms->nextToUpdate = (U32)(ip - base);
632 630 /* Update the repcodes */
633 631 for (i = ZSTD_REP_NUM - 1; i > 0; i--)
634 632 rep[i] = rep[i-1];
@@ -642,12 +640,7 b' size_t ZSTD_ldm_blockCompress(rawSeqStor'
642 640 }
643 641 /* Fill the tables for the block compressor */
644 642 ZSTD_ldm_limitTableUpdate(ms, ip);
645 ZSTD_ldm_fillFastTables(ms, cParams, ip);
643 ZSTD_ldm_fillFastTables(ms, ip);
646 644 /* Compress the last literals */
647 {
648 size_t const lastLiterals = blockCompressor(ms, seqStore, rep, cParams,
649 ip, iend - ip);
650 ms->nextToUpdate = (U32)(iend - base);
651 return lastLiterals;
652 }
645 return blockCompressor(ms, seqStore, rep, ip, iend - ip);
653 646 }
@@ -61,9 +61,7 b' size_t ZSTD_ldm_generateSequences('
61 61 */
62 62 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
63 63 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
64 ZSTD_compressionParameters const* cParams,
65 void const* src, size_t srcSize,
66 int const extDict);
64 void const* src, size_t srcSize);
67 65
68 66 /**
69 67 * ZSTD_ldm_skipSequences():
This diff has been collapsed as it changes many lines, (813 lines changed) Show them Hide them
@@ -9,10 +9,11 b''
9 9 */
10 10
11 11 #include "zstd_compress_internal.h"
12 #include "hist.h"
12 13 #include "zstd_opt.h"
13 14
14 15
15 #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats. Also used for matchSum (?) */
16 #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
16 17 #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
17 18 #define ZSTD_MAX_PRICE (1<<30)
18 19
@@ -20,128 +21,210 b''
20 21 /*-*************************************
21 22 * Price functions for optimal parser
22 23 ***************************************/
23 static void ZSTD_setLog2Prices(optState_t* optPtr)
24
25 #if 0 /* approximation at bit level */
26 # define BITCOST_ACCURACY 0
27 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
28 # define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat))
29 #elif 0 /* fractional bit accuracy */
30 # define BITCOST_ACCURACY 8
31 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
32 # define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
33 #else /* opt==approx, ultra==accurate */
34 # define BITCOST_ACCURACY 8
35 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
36 # define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
37 #endif
38
39 MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
40 {
41 return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
42 }
43
44 MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
24 45 {
25 optPtr->log2litSum = ZSTD_highbit32(optPtr->litSum+1);
26 optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1);
27 optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1);
28 optPtr->log2offCodeSum = ZSTD_highbit32(optPtr->offCodeSum+1);
46 U32 const stat = rawStat + 1;
47 U32 const hb = ZSTD_highbit32(stat);
48 U32 const BWeight = hb * BITCOST_MULTIPLIER;
49 U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
50 U32 const weight = BWeight + FWeight;
51 assert(hb + BITCOST_ACCURACY < 31);
52 return weight;
53 }
54
55 /* debugging function, @return price in bytes */
56 MEM_STATIC double ZSTD_fCost(U32 price)
57 {
58 return (double)price / (BITCOST_MULTIPLIER*8);
59 }
60
61 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
62 {
63 optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
64 optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
65 optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
66 optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
29 67 }
30 68
31 69
70 static U32 ZSTD_downscaleStat(U32* table, U32 lastEltIndex, int malus)
71 {
72 U32 s, sum=0;
73 assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
74 for (s=0; s<=lastEltIndex; s++) {
75 table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
76 sum += table[s];
77 }
78 return sum;
79 }
80
32 81 static void ZSTD_rescaleFreqs(optState_t* const optPtr,
33 const BYTE* const src, size_t const srcSize)
82 const BYTE* const src, size_t const srcSize,
83 int optLevel)
34 84 {
35 optPtr->staticPrices = 0;
85 optPtr->priceType = zop_dynamic;
86
87 if (optPtr->litLengthSum == 0) { /* first block : init */
88 if (srcSize <= 1024) /* heuristic */
89 optPtr->priceType = zop_predef;
90
91 assert(optPtr->symbolCosts != NULL);
92 if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */
93 optPtr->priceType = zop_dynamic;
36 94
37 if (optPtr->litLengthSum == 0) { /* first init */
38 unsigned u;
39 if (srcSize <= 1024) optPtr->staticPrices = 1;
95 assert(optPtr->litFreq != NULL);
96 optPtr->litSum = 0;
97 { unsigned lit;
98 for (lit=0; lit<=MaxLit; lit++) {
99 U32 const scaleLog = 11; /* scale to 2K */
100 U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
101 assert(bitCost <= scaleLog);
102 optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
103 optPtr->litSum += optPtr->litFreq[lit];
104 } }
105
106 { unsigned ll;
107 FSE_CState_t llstate;
108 FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
109 optPtr->litLengthSum = 0;
110 for (ll=0; ll<=MaxLL; ll++) {
111 U32 const scaleLog = 10; /* scale to 1K */
112 U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
113 assert(bitCost < scaleLog);
114 optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
115 optPtr->litLengthSum += optPtr->litLengthFreq[ll];
116 } }
40 117
41 assert(optPtr->litFreq!=NULL);
42 for (u=0; u<=MaxLit; u++)
43 optPtr->litFreq[u] = 0;
44 for (u=0; u<srcSize; u++)
45 optPtr->litFreq[src[u]]++;
46 optPtr->litSum = 0;
47 for (u=0; u<=MaxLit; u++) {
48 optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> ZSTD_FREQ_DIV);
49 optPtr->litSum += optPtr->litFreq[u];
118 { unsigned ml;
119 FSE_CState_t mlstate;
120 FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
121 optPtr->matchLengthSum = 0;
122 for (ml=0; ml<=MaxML; ml++) {
123 U32 const scaleLog = 10;
124 U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
125 assert(bitCost < scaleLog);
126 optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
127 optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
128 } }
129
130 { unsigned of;
131 FSE_CState_t ofstate;
132 FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
133 optPtr->offCodeSum = 0;
134 for (of=0; of<=MaxOff; of++) {
135 U32 const scaleLog = 10;
136 U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
137 assert(bitCost < scaleLog);
138 optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
139 optPtr->offCodeSum += optPtr->offCodeFreq[of];
140 } }
141
142 } else { /* not a dictionary */
143
144 assert(optPtr->litFreq != NULL);
145 { unsigned lit = MaxLit;
146 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
147 }
148 optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
149
150 { unsigned ll;
151 for (ll=0; ll<=MaxLL; ll++)
152 optPtr->litLengthFreq[ll] = 1;
153 }
154 optPtr->litLengthSum = MaxLL+1;
155
156 { unsigned ml;
157 for (ml=0; ml<=MaxML; ml++)
158 optPtr->matchLengthFreq[ml] = 1;
159 }
160 optPtr->matchLengthSum = MaxML+1;
161
162 { unsigned of;
163 for (of=0; of<=MaxOff; of++)
164 optPtr->offCodeFreq[of] = 1;
165 }
166 optPtr->offCodeSum = MaxOff+1;
167
50 168 }
51 169
52 for (u=0; u<=MaxLL; u++)
53 optPtr->litLengthFreq[u] = 1;
54 optPtr->litLengthSum = MaxLL+1;
55 for (u=0; u<=MaxML; u++)
56 optPtr->matchLengthFreq[u] = 1;
57 optPtr->matchLengthSum = MaxML+1;
58 for (u=0; u<=MaxOff; u++)
59 optPtr->offCodeFreq[u] = 1;
60 optPtr->offCodeSum = (MaxOff+1);
61
62 } else {
63 unsigned u;
170 } else { /* new block : re-use previous statistics, scaled down */
64 171
65 optPtr->litSum = 0;
66 for (u=0; u<=MaxLit; u++) {
67 optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> (ZSTD_FREQ_DIV+1));
68 optPtr->litSum += optPtr->litFreq[u];
69 }
70 optPtr->litLengthSum = 0;
71 for (u=0; u<=MaxLL; u++) {
72 optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
73 optPtr->litLengthSum += optPtr->litLengthFreq[u];
74 }
75 optPtr->matchLengthSum = 0;
76 for (u=0; u<=MaxML; u++) {
77 optPtr->matchLengthFreq[u] = 1 + (optPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
78 optPtr->matchLengthSum += optPtr->matchLengthFreq[u];
79 }
80 optPtr->offCodeSum = 0;
81 for (u=0; u<=MaxOff; u++) {
82 optPtr->offCodeFreq[u] = 1 + (optPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
83 optPtr->offCodeSum += optPtr->offCodeFreq[u];
84 }
172 optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
173 optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
174 optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
175 optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
85 176 }
86 177
87 ZSTD_setLog2Prices(optPtr);
178 ZSTD_setBasePrices(optPtr, optLevel);
88 179 }
89 180
90
91 181 /* ZSTD_rawLiteralsCost() :
92 * cost of literals (only) in given segment (which length can be null)
93 * does not include cost of literalLength symbol */
182 * price of literals (only) in specified segment (which length can be 0).
183 * does not include price of literalLength symbol */
94 184 static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
95 const optState_t* const optPtr)
185 const optState_t* const optPtr,
186 int optLevel)
96 187 {
97 if (optPtr->staticPrices) return (litLength*6); /* 6 bit per literal - no statistic used */
98 188 if (litLength == 0) return 0;
189 if (optPtr->priceType == zop_predef)
190 return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
99 191
100 /* literals */
101 { U32 u;
102 U32 cost = litLength * optPtr->log2litSum;
103 for (u=0; u < litLength; u++)
104 cost -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1);
105 return cost;
192 /* dynamic statistics */
193 { U32 price = litLength * optPtr->litSumBasePrice;
194 U32 u;
195 for (u=0; u < litLength; u++) {
196 assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */
197 price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
198 }
199 return price;
106 200 }
107 201 }
108 202
109 203 /* ZSTD_litLengthPrice() :
110 204 * cost of literalLength symbol */
111 static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr)
205 static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
112 206 {
113 if (optPtr->staticPrices) return ZSTD_highbit32((U32)litLength+1);
207 if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel);
114 208
115 /* literal Length */
209 /* dynamic statistics */
116 210 { U32 const llCode = ZSTD_LLcode(litLength);
117 U32 const price = LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
118 return price;
211 return (LL_bits[llCode] * BITCOST_MULTIPLIER) + (optPtr->litLengthSumBasePrice - WEIGHT(optPtr->litLengthFreq[llCode], optLevel));
119 212 }
120 213 }
121 214
122 /* ZSTD_litLengthPrice() :
123 * cost of the literal part of a sequence,
124 * including literals themselves, and literalLength symbol */
125 static U32 ZSTD_fullLiteralsCost(const BYTE* const literals, U32 const litLength,
126 const optState_t* const optPtr)
127 {
128 return ZSTD_rawLiteralsCost(literals, litLength, optPtr)
129 + ZSTD_litLengthPrice(litLength, optPtr);
130 }
131
132 215 /* ZSTD_litLengthContribution() :
133 216 * @return ( cost(litlength) - cost(0) )
134 217 * this value can then be added to rawLiteralsCost()
135 218 * to provide a cost which is directly comparable to a match ending at same position */
136 static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr)
219 static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel)
137 220 {
138 if (optPtr->staticPrices) return ZSTD_highbit32(litLength+1);
221 if (optPtr->priceType >= zop_predef) return WEIGHT(litLength, optLevel);
139 222
140 /* literal Length */
223 /* dynamic statistics */
141 224 { U32 const llCode = ZSTD_LLcode(litLength);
142 int const contribution = LL_bits[llCode]
143 + ZSTD_highbit32(optPtr->litLengthFreq[0]+1)
144 - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
225 int const contribution = (LL_bits[llCode] * BITCOST_MULTIPLIER)
226 + WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
227 - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
145 228 #if 1
146 229 return contribution;
147 230 #else
@@ -155,10 +238,11 b' static int ZSTD_litLengthContribution(U3'
155 238 * which can be compared to the ending cost of a match
156 239 * should a new match start at this position */
157 240 static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength,
158 const optState_t* const optPtr)
241 const optState_t* const optPtr,
242 int optLevel)
159 243 {
160 int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr)
161 + ZSTD_litLengthContribution(litLength, optPtr);
244 int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
245 + ZSTD_litLengthContribution(litLength, optPtr, optLevel);
162 246 return contribution;
163 247 }
164 248
@@ -166,31 +250,38 b' static int ZSTD_literalsContribution(con'
166 250 * Provides the cost of the match part (offset + matchLength) of a sequence
167 251 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
168 252 * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
169 FORCE_INLINE_TEMPLATE U32 ZSTD_getMatchPrice(
170 U32 const offset, U32 const matchLength,
171 const optState_t* const optPtr,
172 int const optLevel)
253 FORCE_INLINE_TEMPLATE U32
254 ZSTD_getMatchPrice(U32 const offset,
255 U32 const matchLength,
256 const optState_t* const optPtr,
257 int const optLevel)
173 258 {
174 259 U32 price;
175 260 U32 const offCode = ZSTD_highbit32(offset+1);
176 261 U32 const mlBase = matchLength - MINMATCH;
177 262 assert(matchLength >= MINMATCH);
178 263
179 if (optPtr->staticPrices) /* fixed scheme, do not use statistics */
180 return ZSTD_highbit32((U32)mlBase+1) + 16 + offCode;
264 if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */
265 return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
181 266
182 price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1);
183 if ((optLevel<2) /*static*/ && offCode >= 20) price += (offCode-19)*2; /* handicap for long distance offsets, favor decompression speed */
267 /* dynamic statistics */
268 price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
269 if ((optLevel<2) /*static*/ && offCode >= 20)
270 price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
184 271
185 272 /* match Length */
186 273 { U32 const mlCode = ZSTD_MLcode(mlBase);
187 price += ML_bits[mlCode] + optPtr->log2matchLengthSum - ZSTD_highbit32(optPtr->matchLengthFreq[mlCode]+1);
274 price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
188 275 }
189 276
277 price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
278
190 279 DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
191 280 return price;
192 281 }
193 282
283 /* ZSTD_updateStats() :
284 * assumption : literals + litLengtn <= iend */
194 285 static void ZSTD_updateStats(optState_t* const optPtr,
195 286 U32 litLength, const BYTE* literals,
196 287 U32 offsetCode, U32 matchLength)
@@ -269,10 +360,11 b' static U32 ZSTD_insertAndFindFirstIndexH'
269 360 * ip : assumed <= iend-8 .
270 361 * @return : nb of positions added */
271 362 static U32 ZSTD_insertBt1(
272 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
363 ZSTD_matchState_t* ms,
273 364 const BYTE* const ip, const BYTE* const iend,
274 U32 const mls, U32 const extDict)
365 U32 const mls, const int extDict)
275 366 {
367 const ZSTD_compressionParameters* const cParams = &ms->cParams;
276 368 U32* const hashTable = ms->hashTable;
277 369 U32 const hashLog = cParams->hashLog;
278 370 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
@@ -293,6 +385,7 b' static U32 ZSTD_insertBt1('
293 385 U32* largerPtr = smallerPtr + 1;
294 386 U32 dummy32; /* to be nullified at the end */
295 387 U32 const windowLow = ms->window.lowLimit;
388 U32 const matchLow = windowLow ? windowLow : 1;
296 389 U32 matchEndIdx = current+8+1;
297 390 size_t bestLength = 8;
298 391 U32 nbCompares = 1U << cParams->searchLog;
@@ -308,7 +401,7 b' static U32 ZSTD_insertBt1('
308 401 assert(ip <= iend-8); /* required for h calculation */
309 402 hashTable[h] = current; /* Update Hash Table */
310 403
311 while (nbCompares-- && (matchIndex > windowLow)) {
404 while (nbCompares-- && (matchIndex >= matchLow)) {
312 405 U32* const nextPtr = bt + 2*(matchIndex & btMask);
313 406 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
314 407 assert(matchIndex < current);
@@ -334,8 +427,8 b' static U32 ZSTD_insertBt1('
334 427 }
335 428 #endif
336 429
337 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
338 assert(matchIndex+matchLength >= dictLimit); /* might be wrong if extDict is incorrectly set to 0 */
430 if (!extDict || (matchIndex+matchLength >= dictLimit)) {
431 assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */
339 432 match = base + matchIndex;
340 433 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
341 434 } else {
@@ -379,35 +472,33 b' static U32 ZSTD_insertBt1('
379 472
380 473 FORCE_INLINE_TEMPLATE
381 474 void ZSTD_updateTree_internal(
382 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
475 ZSTD_matchState_t* ms,
383 476 const BYTE* const ip, const BYTE* const iend,
384 const U32 mls, const U32 extDict)
477 const U32 mls, const ZSTD_dictMode_e dictMode)
385 478 {
386 479 const BYTE* const base = ms->window.base;
387 480 U32 const target = (U32)(ip - base);
388 481 U32 idx = ms->nextToUpdate;
389 DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (extDict:%u)",
390 idx, target, extDict);
482 DEBUGLOG(5, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
483 idx, target, dictMode);
391 484
392 485 while(idx < target)
393 idx += ZSTD_insertBt1(ms, cParams, base+idx, iend, mls, extDict);
486 idx += ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
394 487 ms->nextToUpdate = target;
395 488 }
396 489
397 void ZSTD_updateTree(
398 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
399 const BYTE* ip, const BYTE* iend)
400 {
401 ZSTD_updateTree_internal(ms, cParams, ip, iend, cParams->searchLength, 0 /*extDict*/);
490 void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
491 ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.searchLength, ZSTD_noDict);
402 492 }
403 493
404 494 FORCE_INLINE_TEMPLATE
405 495 U32 ZSTD_insertBtAndGetAllMatches (
406 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
407 const BYTE* const ip, const BYTE* const iLimit, int const extDict,
496 ZSTD_matchState_t* ms,
497 const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
408 498 U32 rep[ZSTD_REP_NUM], U32 const ll0,
409 499 ZSTD_match_t* matches, const U32 lengthToBeat, U32 const mls /* template */)
410 500 {
501 const ZSTD_compressionParameters* const cParams = &ms->cParams;
411 502 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
412 503 const BYTE* const base = ms->window.base;
413 504 U32 const current = (U32)(ip-base);
@@ -426,6 +517,7 b' U32 ZSTD_insertBtAndGetAllMatches ('
426 517 const BYTE* const prefixStart = base + dictLimit;
427 518 U32 const btLow = btMask >= current ? 0 : current - btMask;
428 519 U32 const windowLow = ms->window.lowLimit;
520 U32 const matchLow = windowLow ? windowLow : 1;
429 521 U32* smallerPtr = bt + 2*(current&btMask);
430 522 U32* largerPtr = bt + 2*(current&btMask) + 1;
431 523 U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */
@@ -433,8 +525,21 b' U32 ZSTD_insertBtAndGetAllMatches ('
433 525 U32 mnum = 0;
434 526 U32 nbCompares = 1U << cParams->searchLog;
435 527
528 const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
529 const ZSTD_compressionParameters* const dmsCParams =
530 dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
531 const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
532 const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
533 U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
534 U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
535 U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
536 U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
537 U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
538 U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
539 U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
540
436 541 size_t bestLength = lengthToBeat-1;
437 DEBUGLOG(7, "ZSTD_insertBtAndGetAllMatches");
542 DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current);
438 543
439 544 /* check repCode */
440 545 { U32 const lastR = ZSTD_REP_NUM + ll0;
@@ -449,18 +554,26 b' U32 ZSTD_insertBtAndGetAllMatches ('
449 554 repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
450 555 }
451 556 } else { /* repIndex < dictLimit || repIndex >= current */
452 const BYTE* const repMatch = dictBase + repIndex;
557 const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
558 dmsBase + repIndex - dmsIndexDelta :
559 dictBase + repIndex;
453 560 assert(current >= windowLow);
454 if ( extDict /* this case only valid in extDict mode */
561 if ( dictMode == ZSTD_extDict
455 562 && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */
456 563 & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
457 564 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
458 565 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
566 }
567 if (dictMode == ZSTD_dictMatchState
568 && ( ((repOffset-1) /*intentional overflow*/ < current - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `current > repIndex >= dmsLowLimit` */
569 & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
570 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
571 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
459 572 } }
460 573 /* save longer solution */
461 574 if (repLen > bestLength) {
462 DEBUGLOG(8, "found rep-match %u of length %u",
463 repCode - ll0, (U32)repLen);
575 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
576 repCode, ll0, repOffset, repLen);
464 577 bestLength = repLen;
465 578 matches[mnum].off = repCode - ll0;
466 579 matches[mnum].len = (U32)repLen;
@@ -473,10 +586,10 b' U32 ZSTD_insertBtAndGetAllMatches ('
473 586 /* HC3 match finder */
474 587 if ((mls == 3) /*static*/ && (bestLength < mls)) {
475 588 U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip);
476 if ((matchIndex3 > windowLow)
589 if ((matchIndex3 >= matchLow)
477 590 & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
478 591 size_t mlen;
479 if ((!extDict) /*static*/ || (matchIndex3 >= dictLimit)) {
592 if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
480 593 const BYTE* const match = base + matchIndex3;
481 594 mlen = ZSTD_count(ip, match, iLimit);
482 595 } else {
@@ -498,17 +611,21 b' U32 ZSTD_insertBtAndGetAllMatches ('
498 611 (ip+mlen == iLimit) ) { /* best possible length */
499 612 ms->nextToUpdate = current+1; /* skip insertion */
500 613 return 1;
501 } } } }
614 }
615 }
616 }
617 /* no dictMatchState lookup: dicts don't have a populated HC3 table */
618 }
502 619
503 620 hashTable[h] = current; /* Update Hash Table */
504 621
505 while (nbCompares-- && (matchIndex > windowLow)) {
622 while (nbCompares-- && (matchIndex >= matchLow)) {
506 623 U32* const nextPtr = bt + 2*(matchIndex & btMask);
507 624 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
508 625 const BYTE* match;
509 626 assert(current > matchIndex);
510 627
511 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
628 if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
512 629 assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
513 630 match = base + matchIndex;
514 631 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
@@ -520,8 +637,8 b' U32 ZSTD_insertBtAndGetAllMatches ('
520 637 }
521 638
522 639 if (matchLength > bestLength) {
523 DEBUGLOG(8, "found match of length %u at distance %u",
524 (U32)matchLength, current - matchIndex);
640 DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
641 (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
525 642 assert(matchEndIdx > matchIndex);
526 643 if (matchLength > matchEndIdx - matchIndex)
527 644 matchEndIdx = matchIndex + (U32)matchLength;
@@ -529,9 +646,10 b' U32 ZSTD_insertBtAndGetAllMatches ('
529 646 matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
530 647 matches[mnum].len = (U32)matchLength;
531 648 mnum++;
532 if (matchLength > ZSTD_OPT_NUM) break;
533 if (ip+matchLength == iLimit) { /* equal : no way to know if inf or sup */
534 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
649 if ( (matchLength > ZSTD_OPT_NUM)
650 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
651 if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
652 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
535 653 }
536 654 }
537 655
@@ -552,6 +670,47 b' U32 ZSTD_insertBtAndGetAllMatches ('
552 670
553 671 *smallerPtr = *largerPtr = 0;
554 672
673 if (dictMode == ZSTD_dictMatchState && nbCompares) {
674 size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
675 U32 dictMatchIndex = dms->hashTable[dmsH];
676 const U32* const dmsBt = dms->chainTable;
677 commonLengthSmaller = commonLengthLarger = 0;
678 while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) {
679 const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
680 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
681 const BYTE* match = dmsBase + dictMatchIndex;
682 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
683 if (dictMatchIndex+matchLength >= dmsHighLimit)
684 match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */
685
686 if (matchLength > bestLength) {
687 matchIndex = dictMatchIndex + dmsIndexDelta;
688 DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
689 (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
690 if (matchLength > matchEndIdx - matchIndex)
691 matchEndIdx = matchIndex + (U32)matchLength;
692 bestLength = matchLength;
693 matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
694 matches[mnum].len = (U32)matchLength;
695 mnum++;
696 if ( (matchLength > ZSTD_OPT_NUM)
697 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
698 break; /* drop, to guarantee consistency (miss a little bit of compression) */
699 }
700 }
701
702 if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
703 if (match[matchLength] < ip[matchLength]) {
704 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
705 dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
706 } else {
707 /* match is larger than current */
708 commonLengthLarger = matchLength;
709 dictMatchIndex = nextPtr[0];
710 }
711 }
712 }
713
555 714 assert(matchEndIdx > current+8);
556 715 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
557 716 return mnum;
@@ -559,23 +718,24 b' U32 ZSTD_insertBtAndGetAllMatches ('
559 718
560 719
561 720 FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
562 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
563 const BYTE* ip, const BYTE* const iHighLimit, int const extDict,
721 ZSTD_matchState_t* ms,
722 const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
564 723 U32 rep[ZSTD_REP_NUM], U32 const ll0,
565 724 ZSTD_match_t* matches, U32 const lengthToBeat)
566 725 {
726 const ZSTD_compressionParameters* const cParams = &ms->cParams;
567 727 U32 const matchLengthSearch = cParams->searchLength;
568 DEBUGLOG(7, "ZSTD_BtGetAllMatches");
728 DEBUGLOG(8, "ZSTD_BtGetAllMatches");
569 729 if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
570 ZSTD_updateTree_internal(ms, cParams, ip, iHighLimit, matchLengthSearch, extDict);
730 ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
571 731 switch(matchLengthSearch)
572 732 {
573 case 3 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 3);
733 case 3 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 3);
574 734 default :
575 case 4 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 4);
576 case 5 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 5);
735 case 4 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 4);
736 case 5 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 5);
577 737 case 7 :
578 case 6 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 6);
738 case 6 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 6);
579 739 }
580 740 }
581 741
@@ -587,7 +747,7 b' typedef struct repcodes_s {'
587 747 U32 rep[3];
588 748 } repcodes_t;
589 749
590 repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
750 static repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
591 751 {
592 752 repcodes_t newReps;
593 753 if (offset >= ZSTD_REP_NUM) { /* full offset */
@@ -609,65 +769,17 b' repcodes_t ZSTD_updateRep(U32 const rep['
609 769 }
610 770
611 771
612 typedef struct {
613 const BYTE* anchor;
614 U32 litlen;
615 U32 rawLitCost;
616 } cachedLiteralPrice_t;
617
618 static U32 ZSTD_rawLiteralsCost_cached(
619 cachedLiteralPrice_t* const cachedLitPrice,
620 const BYTE* const anchor, U32 const litlen,
621 const optState_t* const optStatePtr)
772 static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
622 773 {
623 U32 startCost;
624 U32 remainingLength;
625 const BYTE* startPosition;
626
627 if (anchor == cachedLitPrice->anchor) {
628 startCost = cachedLitPrice->rawLitCost;
629 startPosition = anchor + cachedLitPrice->litlen;
630 assert(litlen >= cachedLitPrice->litlen);
631 remainingLength = litlen - cachedLitPrice->litlen;
632 } else {
633 startCost = 0;
634 startPosition = anchor;
635 remainingLength = litlen;
636 }
637
638 { U32 const rawLitCost = startCost + ZSTD_rawLiteralsCost(startPosition, remainingLength, optStatePtr);
639 cachedLitPrice->anchor = anchor;
640 cachedLitPrice->litlen = litlen;
641 cachedLitPrice->rawLitCost = rawLitCost;
642 return rawLitCost;
643 }
774 return sol.litlen + sol.mlen;
644 775 }
645 776
646 static U32 ZSTD_fullLiteralsCost_cached(
647 cachedLiteralPrice_t* const cachedLitPrice,
648 const BYTE* const anchor, U32 const litlen,
649 const optState_t* const optStatePtr)
650 {
651 return ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr)
652 + ZSTD_litLengthPrice(litlen, optStatePtr);
653 }
654
655 static int ZSTD_literalsContribution_cached(
656 cachedLiteralPrice_t* const cachedLitPrice,
657 const BYTE* const anchor, U32 const litlen,
658 const optState_t* const optStatePtr)
659 {
660 int const contribution = ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr)
661 + ZSTD_litLengthContribution(litlen, optStatePtr);
662 return contribution;
663 }
664
665 FORCE_INLINE_TEMPLATE
666 size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore,
667 U32 rep[ZSTD_REP_NUM],
668 ZSTD_compressionParameters const* cParams,
669 const void* src, size_t srcSize,
670 const int optLevel, const int extDict)
777 FORCE_INLINE_TEMPLATE size_t
778 ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
779 seqStore_t* seqStore,
780 U32 rep[ZSTD_REP_NUM],
781 const void* src, size_t srcSize,
782 const int optLevel, const ZSTD_dictMode_e dictMode)
671 783 {
672 784 optState_t* const optStatePtr = &ms->opt;
673 785 const BYTE* const istart = (const BYTE*)src;
@@ -677,72 +789,76 b' size_t ZSTD_compressBlock_opt_generic(ZS'
677 789 const BYTE* const ilimit = iend - 8;
678 790 const BYTE* const base = ms->window.base;
679 791 const BYTE* const prefixStart = base + ms->window.dictLimit;
792 const ZSTD_compressionParameters* const cParams = &ms->cParams;
680 793
681 794 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
682 795 U32 const minMatch = (cParams->searchLength == 3) ? 3 : 4;
683 796
684 797 ZSTD_optimal_t* const opt = optStatePtr->priceTable;
685 798 ZSTD_match_t* const matches = optStatePtr->matchTable;
686 cachedLiteralPrice_t cachedLitPrice;
799 ZSTD_optimal_t lastSequence;
687 800
688 801 /* init */
689 802 DEBUGLOG(5, "ZSTD_compressBlock_opt_generic");
803 assert(optLevel <= 2);
690 804 ms->nextToUpdate3 = ms->nextToUpdate;
691 ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize);
805 ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
692 806 ip += (ip==prefixStart);
693 memset(&cachedLitPrice, 0, sizeof(cachedLitPrice));
694 807
695 808 /* Match Loop */
696 809 while (ip < ilimit) {
697 810 U32 cur, last_pos = 0;
698 U32 best_mlen, best_off;
699 811
700 812 /* find first match */
701 813 { U32 const litlen = (U32)(ip - anchor);
702 814 U32 const ll0 = !litlen;
703 U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, ip, iend, extDict, rep, ll0, matches, minMatch);
815 U32 const nbMatches = ZSTD_BtGetAllMatches(ms, ip, iend, dictMode, rep, ll0, matches, minMatch);
704 816 if (!nbMatches) { ip++; continue; }
705 817
706 818 /* initialize opt[0] */
707 819 { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
708 opt[0].mlen = 1;
820 opt[0].mlen = 0; /* means is_a_literal */
709 821 opt[0].litlen = litlen;
822 opt[0].price = ZSTD_literalsContribution(anchor, litlen, optStatePtr, optLevel);
710 823
711 824 /* large match -> immediate encoding */
712 825 { U32 const maxML = matches[nbMatches-1].len;
713 DEBUGLOG(7, "found %u matches of maxLength=%u and offset=%u at cPos=%u => start new serie",
714 nbMatches, maxML, matches[nbMatches-1].off, (U32)(ip-prefixStart));
826 U32 const maxOffset = matches[nbMatches-1].off;
827 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new serie",
828 nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
715 829
716 830 if (maxML > sufficient_len) {
717 best_mlen = maxML;
718 best_off = matches[nbMatches-1].off;
719 DEBUGLOG(7, "large match (%u>%u), immediate encoding",
720 best_mlen, sufficient_len);
831 lastSequence.litlen = litlen;
832 lastSequence.mlen = maxML;
833 lastSequence.off = maxOffset;
834 DEBUGLOG(6, "large match (%u>%u), immediate encoding",
835 maxML, sufficient_len);
721 836 cur = 0;
722 last_pos = 1;
837 last_pos = ZSTD_totalLen(lastSequence);
723 838 goto _shortestPath;
724 839 } }
725 840
726 841 /* set prices for first matches starting position == 0 */
727 { U32 const literalsPrice = ZSTD_fullLiteralsCost_cached(&cachedLitPrice, anchor, litlen, optStatePtr);
842 { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
728 843 U32 pos;
729 844 U32 matchNb;
730 for (pos = 0; pos < minMatch; pos++) {
731 opt[pos].mlen = 1;
732 opt[pos].price = ZSTD_MAX_PRICE;
845 for (pos = 1; pos < minMatch; pos++) {
846 opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
733 847 }
734 848 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
735 849 U32 const offset = matches[matchNb].off;
736 850 U32 const end = matches[matchNb].len;
737 851 repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
738 852 for ( ; pos <= end ; pos++ ) {
739 U32 const matchPrice = literalsPrice + ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
740 DEBUGLOG(7, "rPos:%u => set initial price : %u",
741 pos, matchPrice);
853 U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
854 U32 const sequencePrice = literalsPrice + matchPrice;
855 DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
856 pos, ZSTD_fCost(sequencePrice));
742 857 opt[pos].mlen = pos;
743 858 opt[pos].off = offset;
744 859 opt[pos].litlen = litlen;
745 opt[pos].price = matchPrice;
860 opt[pos].price = sequencePrice;
861 ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
746 862 memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
747 863 } }
748 864 last_pos = pos-1;
@@ -753,55 +869,67 b' size_t ZSTD_compressBlock_opt_generic(ZS'
753 869 for (cur = 1; cur <= last_pos; cur++) {
754 870 const BYTE* const inr = ip + cur;
755 871 assert(cur < ZSTD_OPT_NUM);
872 DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
756 873
757 874 /* Fix current position with one literal if cheaper */
758 { U32 const litlen = (opt[cur-1].mlen == 1) ? opt[cur-1].litlen + 1 : 1;
759 int price; /* note : contribution can be negative */
760 if (cur > litlen) {
761 price = opt[cur - litlen].price + ZSTD_literalsContribution(inr-litlen, litlen, optStatePtr);
762 } else {
763 price = ZSTD_literalsContribution_cached(&cachedLitPrice, anchor, litlen, optStatePtr);
764 }
875 { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
876 int const price = opt[cur-1].price
877 + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
878 + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
879 - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
765 880 assert(price < 1000000000); /* overflow check */
766 881 if (price <= opt[cur].price) {
767 DEBUGLOG(7, "rPos:%u : better price (%u<%u) using literal",
768 cur, price, opt[cur].price);
769 opt[cur].mlen = 1;
882 DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
883 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
884 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
885 opt[cur].mlen = 0;
770 886 opt[cur].off = 0;
771 887 opt[cur].litlen = litlen;
772 888 opt[cur].price = price;
773 889 memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep));
774 } }
890 } else {
891 DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
892 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
893 opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
894 }
895 }
775 896
776 897 /* last match must start at a minimum distance of 8 from oend */
777 898 if (inr > ilimit) continue;
778 899
779 900 if (cur == last_pos) break;
780 901
781 if ( (optLevel==0) /*static*/
782 && (opt[cur+1].price <= opt[cur].price) )
902 if ( (optLevel==0) /*static_test*/
903 && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
904 DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
783 905 continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
906 }
784 907
785 { U32 const ll0 = (opt[cur].mlen != 1);
786 U32 const litlen = (opt[cur].mlen == 1) ? opt[cur].litlen : 0;
787 U32 const previousPrice = (cur > litlen) ? opt[cur-litlen].price : 0;
788 U32 const basePrice = previousPrice + ZSTD_fullLiteralsCost(inr-litlen, litlen, optStatePtr);
789 U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, inr, iend, extDict, opt[cur].rep, ll0, matches, minMatch);
908 { U32 const ll0 = (opt[cur].mlen != 0);
909 U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
910 U32 const previousPrice = opt[cur].price;
911 U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
912 U32 const nbMatches = ZSTD_BtGetAllMatches(ms, inr, iend, dictMode, opt[cur].rep, ll0, matches, minMatch);
790 913 U32 matchNb;
791 if (!nbMatches) continue;
914 if (!nbMatches) {
915 DEBUGLOG(7, "rPos:%u : no match found", cur);
916 continue;
917 }
792 918
793 919 { U32 const maxML = matches[nbMatches-1].len;
794 DEBUGLOG(7, "rPos:%u, found %u matches, of maxLength=%u",
795 cur, nbMatches, maxML);
920 DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
921 inr-istart, cur, nbMatches, maxML);
796 922
797 923 if ( (maxML > sufficient_len)
798 | (cur + maxML >= ZSTD_OPT_NUM) ) {
799 best_mlen = maxML;
800 best_off = matches[nbMatches-1].off;
801 last_pos = cur + 1;
924 || (cur + maxML >= ZSTD_OPT_NUM) ) {
925 lastSequence.mlen = maxML;
926 lastSequence.off = matches[nbMatches-1].off;
927 lastSequence.litlen = litlen;
928 cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
929 last_pos = cur + ZSTD_totalLen(lastSequence);
930 if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */
802 931 goto _shortestPath;
803 }
804 }
932 } }
805 933
806 934 /* set prices using matches found at position == cur */
807 935 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
@@ -811,81 +939,97 b' size_t ZSTD_compressBlock_opt_generic(ZS'
811 939 U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
812 940 U32 mlen;
813 941
814 DEBUGLOG(7, "testing match %u => offCode=%u, mlen=%u, llen=%u",
942 DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
815 943 matchNb, matches[matchNb].off, lastML, litlen);
816 944
817 for (mlen = lastML; mlen >= startML; mlen--) {
945 for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
818 946 U32 const pos = cur + mlen;
819 947 int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
820 948
821 949 if ((pos > last_pos) || (price < opt[pos].price)) {
822 DEBUGLOG(7, "rPos:%u => new better price (%u<%u)",
823 pos, price, opt[pos].price);
824 while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }
950 DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
951 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
952 while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */
825 953 opt[pos].mlen = mlen;
826 954 opt[pos].off = offset;
827 955 opt[pos].litlen = litlen;
828 956 opt[pos].price = price;
957 ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
829 958 memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
830 959 } else {
831 if (optLevel==0) break; /* gets ~+10% speed for about -0.01 ratio loss */
960 DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
961 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
962 if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
832 963 }
833 964 } } }
834 965 } /* for (cur = 1; cur <= last_pos; cur++) */
835 966
836 best_mlen = opt[last_pos].mlen;
837 best_off = opt[last_pos].off;
838 cur = last_pos - best_mlen;
967 lastSequence = opt[last_pos];
968 cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */
969 assert(cur < ZSTD_OPT_NUM); /* control overflow*/
839 970
840 971 _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
841 assert(opt[0].mlen == 1);
972 assert(opt[0].mlen == 0);
973
974 { U32 const storeEnd = cur + 1;
975 U32 storeStart = storeEnd;
976 U32 seqPos = cur;
842 977
843 /* reverse traversal */
844 DEBUGLOG(7, "start reverse traversal (last_pos:%u, cur:%u)",
845 last_pos, cur);
846 { U32 selectedMatchLength = best_mlen;
847 U32 selectedOffset = best_off;
848 U32 pos = cur;
849 while (1) {
850 U32 const mlen = opt[pos].mlen;
851 U32 const off = opt[pos].off;
852 opt[pos].mlen = selectedMatchLength;
853 opt[pos].off = selectedOffset;
854 selectedMatchLength = mlen;
855 selectedOffset = off;
856 if (mlen > pos) break;
857 pos -= mlen;
858 } }
978 DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
979 last_pos, cur); (void)last_pos;
980 assert(storeEnd < ZSTD_OPT_NUM);
981 DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
982 storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
983 opt[storeEnd] = lastSequence;
984 while (seqPos > 0) {
985 U32 const backDist = ZSTD_totalLen(opt[seqPos]);
986 storeStart--;
987 DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
988 seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
989 opt[storeStart] = opt[seqPos];
990 seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
991 }
859 992
860 /* save sequences */
861 { U32 pos;
862 for (pos=0; pos < last_pos; ) {
863 U32 const llen = (U32)(ip - anchor);
864 U32 const mlen = opt[pos].mlen;
865 U32 const offset = opt[pos].off;
866 if (mlen == 1) { ip++; pos++; continue; } /* literal position => move on */
867 pos += mlen; ip += mlen;
993 /* save sequences */
994 DEBUGLOG(6, "sending selected sequences into seqStore")
995 { U32 storePos;
996 for (storePos=storeStart; storePos <= storeEnd; storePos++) {
997 U32 const llen = opt[storePos].litlen;
998 U32 const mlen = opt[storePos].mlen;
999 U32 const offCode = opt[storePos].off;
1000 U32 const advance = llen + mlen;
1001 DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1002 anchor - istart, llen, mlen);
1003
1004 if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */
1005 assert(storePos == storeEnd); /* must be last sequence */
1006 ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */
1007 continue; /* will finish */
1008 }
868 1009
869 /* repcodes update : like ZSTD_updateRep(), but update in place */
870 if (offset >= ZSTD_REP_NUM) { /* full offset */
871 rep[2] = rep[1];
872 rep[1] = rep[0];
873 rep[0] = offset - ZSTD_REP_MOVE;
874 } else { /* repcode */
875 U32 const repCode = offset + (llen==0);
876 if (repCode) { /* note : if repCode==0, no change */
877 U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
878 if (repCode >= 2) rep[2] = rep[1];
1010 /* repcodes update : like ZSTD_updateRep(), but update in place */
1011 if (offCode >= ZSTD_REP_NUM) { /* full offset */
1012 rep[2] = rep[1];
879 1013 rep[1] = rep[0];
880 rep[0] = currentOffset;
881 }
882 }
1014 rep[0] = offCode - ZSTD_REP_MOVE;
1015 } else { /* repcode */
1016 U32 const repCode = offCode + (llen==0);
1017 if (repCode) { /* note : if repCode==0, no change */
1018 U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
1019 if (repCode >= 2) rep[2] = rep[1];
1020 rep[1] = rep[0];
1021 rep[0] = currentOffset;
1022 } }
883 1023
884 ZSTD_updateStats(optStatePtr, llen, anchor, offset, mlen);
885 ZSTD_storeSeq(seqStore, llen, anchor, offset, mlen-MINMATCH);
886 anchor = ip;
887 } }
888 ZSTD_setLog2Prices(optStatePtr);
1024 assert(anchor + llen <= iend);
1025 ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
1026 ZSTD_storeSeq(seqStore, llen, anchor, offCode, mlen-MINMATCH);
1027 anchor += advance;
1028 ip = anchor;
1029 } }
1030 ZSTD_setBasePrices(optStatePtr, optLevel);
1031 }
1032
889 1033 } /* while (ip < ilimit) */
890 1034
891 1035 /* Return the last literals size */
@@ -895,29 +1039,94 b' size_t ZSTD_compressBlock_opt_generic(ZS'
895 1039
896 1040 size_t ZSTD_compressBlock_btopt(
897 1041 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
898 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
1042 const void* src, size_t srcSize)
899 1043 {
900 1044 DEBUGLOG(5, "ZSTD_compressBlock_btopt");
901 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, 0 /*extDict*/);
1045 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
1046 }
1047
1048
1049 /* used in 2-pass strategy */
1050 static U32 ZSTD_upscaleStat(U32* table, U32 lastEltIndex, int bonus)
1051 {
1052 U32 s, sum=0;
1053 assert(ZSTD_FREQ_DIV+bonus > 0);
1054 for (s=0; s<=lastEltIndex; s++) {
1055 table[s] <<= ZSTD_FREQ_DIV+bonus;
1056 table[s]--;
1057 sum += table[s];
1058 }
1059 return sum;
1060 }
1061
1062 /* used in 2-pass strategy */
1063 MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
1064 {
1065 optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
1066 optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 1);
1067 optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 1);
1068 optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 1);
902 1069 }
903 1070
904 1071 size_t ZSTD_compressBlock_btultra(
905 1072 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
906 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
1073 const void* src, size_t srcSize)
907 1074 {
908 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, 0 /*extDict*/);
1075 DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1076 #if 0
1077 /* 2-pass strategy (disabled)
1078 * this strategy makes a first pass over first block to collect statistics
1079 * and seed next round's statistics with it.
1080 * The compression ratio gain is generally small (~0.5% on first block),
1081 * the cost is 2x cpu time on first block. */
1082 assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1083 if ( (ms->opt.litLengthSum==0) /* first block */
1084 && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
1085 && (ms->window.dictLimit == ms->window.lowLimit) ) { /* no dictionary */
1086 U32 tmpRep[ZSTD_REP_NUM];
1087 DEBUGLOG(5, "ZSTD_compressBlock_btultra: first block: collecting statistics");
1088 assert(ms->nextToUpdate >= ms->window.dictLimit
1089 && ms->nextToUpdate <= ms->window.dictLimit + 1);
1090 memcpy(tmpRep, rep, sizeof(tmpRep));
1091 ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
1092 ZSTD_resetSeqStore(seqStore);
1093 /* invalidate first scan from history */
1094 ms->window.base -= srcSize;
1095 ms->window.dictLimit += (U32)srcSize;
1096 ms->window.lowLimit = ms->window.dictLimit;
1097 ms->nextToUpdate = ms->window.dictLimit;
1098 ms->nextToUpdate3 = ms->window.dictLimit;
1099 /* re-inforce weight of collected statistics */
1100 ZSTD_upscaleStats(&ms->opt);
1101 }
1102 #endif
1103 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
1104 }
1105
1106 size_t ZSTD_compressBlock_btopt_dictMatchState(
1107 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1108 const void* src, size_t srcSize)
1109 {
1110 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
1111 }
1112
1113 size_t ZSTD_compressBlock_btultra_dictMatchState(
1114 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1115 const void* src, size_t srcSize)
1116 {
1117 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
909 1118 }
910 1119
911 1120 size_t ZSTD_compressBlock_btopt_extDict(
912 1121 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
913 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
1122 const void* src, size_t srcSize)
914 1123 {
915 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, 1 /*extDict*/);
1124 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
916 1125 }
917 1126
918 1127 size_t ZSTD_compressBlock_btultra_extDict(
919 1128 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
920 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
1129 const void* src, size_t srcSize)
921 1130 {
922 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, 1 /*extDict*/);
1131 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
923 1132 }
@@ -17,23 +17,29 b' extern "C" {'
17 17
18 18 #include "zstd_compress_internal.h"
19 19
20 void ZSTD_updateTree(
21 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
22 const BYTE* ip, const BYTE* iend); /* used in ZSTD_loadDictionaryContent() */
20 /* used in ZSTD_loadDictionaryContent() */
21 void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend);
23 22
24 23 size_t ZSTD_compressBlock_btopt(
25 24 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
26 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
25 void const* src, size_t srcSize);
27 26 size_t ZSTD_compressBlock_btultra(
28 27 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
29 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
28 void const* src, size_t srcSize);
29
30 size_t ZSTD_compressBlock_btopt_dictMatchState(
31 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
32 void const* src, size_t srcSize);
33 size_t ZSTD_compressBlock_btultra_dictMatchState(
34 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
35 void const* src, size_t srcSize);
30 36
31 37 size_t ZSTD_compressBlock_btopt_extDict(
32 38 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
33 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
39 void const* src, size_t srcSize);
34 40 size_t ZSTD_compressBlock_btultra_extDict(
35 41 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
36 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
42 void const* src, size_t srcSize);
37 43
38 44 #if defined (__cplusplus)
39 45 }
@@ -37,18 +37,19 b''
37 37 #define ZSTD_RESIZE_SEQPOOL 0
38 38
39 39 /* ====== Debug ====== */
40 #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
40 #if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2) \
41 && !defined(_MSC_VER) \
42 && !defined(__MINGW32__)
41 43
42 44 # include <stdio.h>
43 45 # include <unistd.h>
44 46 # include <sys/times.h>
45 # define DEBUGLOGRAW(l, ...) if (l<=ZSTD_DEBUG) { fprintf(stderr, __VA_ARGS__); }
46 47
47 48 # define DEBUG_PRINTHEX(l,p,n) { \
48 49 unsigned debug_u; \
49 50 for (debug_u=0; debug_u<(n); debug_u++) \
50 DEBUGLOGRAW(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
51 DEBUGLOGRAW(l, " \n"); \
51 RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
52 RAWLOG(l, " \n"); \
52 53 }
53 54
54 55 static unsigned long long GetCurrentClockTimeMicroseconds(void)
@@ -62,7 +63,7 b' static unsigned long long GetCurrentCloc'
62 63
63 64 #define MUTEX_WAIT_TIME_DLEVEL 6
64 65 #define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
65 if (ZSTD_DEBUG >= MUTEX_WAIT_TIME_DLEVEL) { \
66 if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \
66 67 unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
67 68 ZSTD_pthread_mutex_lock(mutex); \
68 69 { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
@@ -160,6 +161,25 b' static void ZSTDMT_setBufferSize(ZSTDMT_'
160 161 ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
161 162 }
162 163
164
165 static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, U32 nbWorkers)
166 {
167 unsigned const maxNbBuffers = 2*nbWorkers + 3;
168 if (srcBufPool==NULL) return NULL;
169 if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */
170 return srcBufPool;
171 /* need a larger buffer pool */
172 { ZSTD_customMem const cMem = srcBufPool->cMem;
173 size_t const bSize = srcBufPool->bufferSize; /* forward parameters */
174 ZSTDMT_bufferPool* newBufPool;
175 ZSTDMT_freeBufferPool(srcBufPool);
176 newBufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
177 if (newBufPool==NULL) return newBufPool;
178 ZSTDMT_setBufferSize(newBufPool, bSize);
179 return newBufPool;
180 }
181 }
182
163 183 /** ZSTDMT_getBuffer() :
164 184 * assumption : bufPool must be valid
165 185 * @return : a buffer, with start pointer and size
@@ -229,8 +249,8 b' static buffer_t ZSTDMT_resizeBuffer(ZSTD'
229 249 /* store buffer for later re-use, up to pool capacity */
230 250 static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
231 251 {
252 DEBUGLOG(5, "ZSTDMT_releaseBuffer");
232 253 if (buf.start == NULL) return; /* compatible with release on NULL */
233 DEBUGLOG(5, "ZSTDMT_releaseBuffer");
234 254 ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
235 255 if (bufPool->nbBuffers < bufPool->totalBuffers) {
236 256 bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */
@@ -300,7 +320,8 b' static void ZSTDMT_setNbSeq(ZSTDMT_seqPo'
300 320
301 321 static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem)
302 322 {
303 ZSTDMT_seqPool* seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
323 ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
324 if (seqPool == NULL) return NULL;
304 325 ZSTDMT_setNbSeq(seqPool, 0);
305 326 return seqPool;
306 327 }
@@ -310,6 +331,10 b' static void ZSTDMT_freeSeqPool(ZSTDMT_se'
310 331 ZSTDMT_freeBufferPool(seqPool);
311 332 }
312 333
334 static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
335 {
336 return ZSTDMT_expandBufferPool(pool, nbWorkers);
337 }
313 338
314 339
315 340 /* ===== CCtx Pool ===== */
@@ -355,6 +380,18 b' static ZSTDMT_CCtxPool* ZSTDMT_createCCt'
355 380 return cctxPool;
356 381 }
357 382
383 static ZSTDMT_CCtxPool* ZSTDMT_expandCCtxPool(ZSTDMT_CCtxPool* srcPool,
384 unsigned nbWorkers)
385 {
386 if (srcPool==NULL) return NULL;
387 if (nbWorkers <= srcPool->totalCCtx) return srcPool; /* good enough */
388 /* need a larger cctx pool */
389 { ZSTD_customMem const cMem = srcPool->cMem;
390 ZSTDMT_freeCCtxPool(srcPool);
391 return ZSTDMT_createCCtxPool(nbWorkers, cMem);
392 }
393 }
394
358 395 /* only works during initialization phase, not during compression */
359 396 static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
360 397 {
@@ -425,12 +462,11 b' typedef struct {'
425 462 ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */
426 463 } serialState_t;
427 464
428 static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params)
465 static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params, size_t jobSize)
429 466 {
430 467 /* Adjust parameters */
431 468 if (params.ldmParams.enableLdm) {
432 469 DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
433 params.ldmParams.windowLog = params.cParams.windowLog;
434 470 ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
435 471 assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
436 472 assert(params.ldmParams.hashEveryLog < 32);
@@ -453,7 +489,7 b' static int ZSTDMT_serialState_reset(seri'
453 489 serialState->params.ldmParams.hashLog -
454 490 serialState->params.ldmParams.bucketSizeLog;
455 491 /* Size the seq pool tables */
456 ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, params.jobSize));
492 ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
457 493 /* Reset the window */
458 494 ZSTD_window_clear(&serialState->ldmState.window);
459 495 serialState->ldmWindow = serialState->ldmState.window;
@@ -473,6 +509,7 b' static int ZSTDMT_serialState_reset(seri'
473 509 memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
474 510 }
475 511 serialState->params = params;
512 serialState->params.jobSize = (U32)jobSize;
476 513 return 0;
477 514 }
478 515
@@ -505,6 +542,7 b' static void ZSTDMT_serialState_update(se'
505 542 /* Wait for our turn */
506 543 ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex);
507 544 while (serialState->nextJobID < jobID) {
545 DEBUGLOG(5, "wait for serialState->cond");
508 546 ZSTD_pthread_cond_wait(&serialState->cond, &serialState->mutex);
509 547 }
510 548 /* A future job may error and skip our job */
@@ -514,6 +552,7 b' static void ZSTDMT_serialState_update(se'
514 552 size_t error;
515 553 assert(seqStore.seq != NULL && seqStore.pos == 0 &&
516 554 seqStore.size == 0 && seqStore.capacity > 0);
555 assert(src.size <= serialState->params.jobSize);
517 556 ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
518 557 error = ZSTD_ldm_generateSequences(
519 558 &serialState->ldmState, &seqStore,
@@ -593,14 +632,32 b' typedef struct {'
593 632 unsigned frameChecksumNeeded; /* used only by mtctx */
594 633 } ZSTDMT_jobDescription;
595 634
635 #define JOB_ERROR(e) { \
636 ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); \
637 job->cSize = e; \
638 ZSTD_pthread_mutex_unlock(&job->job_mutex); \
639 goto _endJob; \
640 }
641
596 642 /* ZSTDMT_compressionJob() is a POOL_function type */
597 void ZSTDMT_compressionJob(void* jobDescription)
643 static void ZSTDMT_compressionJob(void* jobDescription)
598 644 {
599 645 ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
600 646 ZSTD_CCtx_params jobParams = job->params; /* do not modify job->params ! copy it, modify the copy */
601 647 ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(job->cctxPool);
602 648 rawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool);
603 649 buffer_t dstBuff = job->dstBuff;
650 size_t lastCBlockSize = 0;
651
652 /* ressources */
653 if (cctx==NULL) JOB_ERROR(ERROR(memory_allocation));
654 if (dstBuff.start == NULL) { /* streaming job : doesn't provide a dstBuffer */
655 dstBuff = ZSTDMT_getBuffer(job->bufPool);
656 if (dstBuff.start==NULL) JOB_ERROR(ERROR(memory_allocation));
657 job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */
658 }
659 if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL)
660 JOB_ERROR(ERROR(memory_allocation));
604 661
605 662 /* Don't compute the checksum for chunks, since we compute it externally,
606 663 * but write it in the header.
@@ -609,47 +666,31 b' void ZSTDMT_compressionJob(void* jobDesc'
609 666 /* Don't run LDM for the chunks, since we handle it externally */
610 667 jobParams.ldmParams.enableLdm = 0;
611 668
612 /* ressources */
613 if (cctx==NULL) {
614 job->cSize = ERROR(memory_allocation);
615 goto _endJob;
616 }
617 if (dstBuff.start == NULL) { /* streaming job : doesn't provide a dstBuffer */
618 dstBuff = ZSTDMT_getBuffer(job->bufPool);
619 if (dstBuff.start==NULL) {
620 job->cSize = ERROR(memory_allocation);
621 goto _endJob;
622 }
623 job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */
624 }
625 669
626 670 /* init */
627 671 if (job->cdict) {
628 size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, job->cdict, jobParams, job->fullFrameSize);
672 size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize);
629 673 assert(job->firstJob); /* only allowed for first job */
630 if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
674 if (ZSTD_isError(initError)) JOB_ERROR(initError);
631 675 } else { /* srcStart points at reloaded section */
632 676 U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size;
633 677 { size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_p_forceMaxWindow, !job->firstJob);
634 if (ZSTD_isError(forceWindowError)) {
635 job->cSize = forceWindowError;
636 goto _endJob;
637 } }
678 if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
679 }
638 680 { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
639 681 job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
682 ZSTD_dtlm_fast,
640 683 NULL, /*cdict*/
641 684 jobParams, pledgedSrcSize);
642 if (ZSTD_isError(initError)) {
643 job->cSize = initError;
644 goto _endJob;
645 } } }
685 if (ZSTD_isError(initError)) JOB_ERROR(initError);
686 } }
646 687
647 688 /* Perform serial step as early as possible, but after CCtx initialization */
648 689 ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
649 690
650 691 if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */
651 692 size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
652 if (ZSTD_isError(hSize)) { job->cSize = hSize; /* save error code */ goto _endJob; }
693 if (ZSTD_isError(hSize)) JOB_ERROR(hSize);
653 694 DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
654 695 ZSTD_invalidateRepCodes(cctx);
655 696 }
@@ -667,7 +708,7 b' void ZSTDMT_compressionJob(void* jobDesc'
667 708 assert(job->cSize == 0);
668 709 for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
669 710 size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize);
670 if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; }
711 if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
671 712 ip += chunkSize;
672 713 op += cSize; assert(op < oend);
673 714 /* stats */
@@ -680,18 +721,16 b' void ZSTDMT_compressionJob(void* jobDesc'
680 721 ZSTD_pthread_mutex_unlock(&job->job_mutex);
681 722 }
682 723 /* last block */
683 assert(chunkSize > 0); assert((chunkSize & (chunkSize - 1)) == 0); /* chunkSize must be power of 2 for mask==(chunkSize-1) to work */
724 assert(chunkSize > 0);
725 assert((chunkSize & (chunkSize - 1)) == 0); /* chunkSize must be power of 2 for mask==(chunkSize-1) to work */
684 726 if ((nbChunks > 0) | job->lastJob /*must output a "last block" flag*/ ) {
685 727 size_t const lastBlockSize1 = job->src.size & (chunkSize-1);
686 728 size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
687 729 size_t const cSize = (job->lastJob) ?
688 730 ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) :
689 731 ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
690 if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; }
691 /* stats */
692 ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);
693 job->cSize += cSize;
694 ZSTD_pthread_mutex_unlock(&job->job_mutex);
732 if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
733 lastCBlockSize = cSize;
695 734 } }
696 735
697 736 _endJob:
@@ -704,7 +743,9 b' void ZSTDMT_compressionJob(void* jobDesc'
704 743 ZSTDMT_releaseCCtx(job->cctxPool, cctx);
705 744 /* report */
706 745 ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);
707 job->consumed = job->src.size;
746 if (ZSTD_isError(job->cSize)) assert(lastCBlockSize == 0);
747 job->cSize += lastCBlockSize;
748 job->consumed = job->src.size; /* when job->consumed == job->src.size , compression job is presumed completed */
708 749 ZSTD_pthread_cond_signal(&job->job_cond);
709 750 ZSTD_pthread_mutex_unlock(&job->job_mutex);
710 751 }
@@ -745,9 +786,9 b' struct ZSTDMT_CCtx_s {'
745 786 ZSTD_CCtx_params params;
746 787 size_t targetSectionSize;
747 788 size_t targetPrefixSize;
748 roundBuff_t roundBuff;
789 int jobReady; /* 1 => one job is already prepared, but pool has shortage of workers. Don't create a new job. */
749 790 inBuff_t inBuff;
750 int jobReady; /* 1 => one job is already prepared, but pool has shortage of workers. Don't create another one. */
791 roundBuff_t roundBuff;
751 792 serialState_t serial;
752 793 unsigned singleBlockingThread;
753 794 unsigned jobIDMask;
@@ -798,6 +839,20 b' static ZSTDMT_jobDescription* ZSTDMT_cre'
798 839 return jobTable;
799 840 }
800 841
842 static size_t ZSTDMT_expandJobsTable (ZSTDMT_CCtx* mtctx, U32 nbWorkers) {
843 U32 nbJobs = nbWorkers + 2;
844 if (nbJobs > mtctx->jobIDMask+1) { /* need more job capacity */
845 ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
846 mtctx->jobIDMask = 0;
847 mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, mtctx->cMem);
848 if (mtctx->jobs==NULL) return ERROR(memory_allocation);
849 assert((nbJobs != 0) && ((nbJobs & (nbJobs - 1)) == 0)); /* ensure nbJobs is a power of 2 */
850 mtctx->jobIDMask = nbJobs - 1;
851 }
852 return 0;
853 }
854
855
801 856 /* ZSTDMT_CCtxParam_setNbWorkers():
802 857 * Internal use only */
803 858 size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
@@ -875,7 +930,7 b' static void ZSTDMT_waitForAllJobsComplet'
875 930 unsigned const jobID = mtctx->doneJobID & mtctx->jobIDMask;
876 931 ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex);
877 932 while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) {
878 DEBUGLOG(5, "waiting for jobCompleted signal from job %u", mtctx->doneJobID); /* we want to block when waiting for data to flush */
933 DEBUGLOG(4, "waiting for jobCompleted signal from job %u", mtctx->doneJobID); /* we want to block when waiting for data to flush */
879 934 ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex);
880 935 }
881 936 ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex);
@@ -924,6 +979,8 b' size_t ZSTDMT_CCtxParam_setMTCtxParamete'
924 979 if ( (value > 0) /* value==0 => automatic job size */
925 980 & (value < ZSTDMT_JOBSIZE_MIN) )
926 981 value = ZSTDMT_JOBSIZE_MIN;
982 if (value > ZSTDMT_JOBSIZE_MAX)
983 value = ZSTDMT_JOBSIZE_MAX;
927 984 params->jobSize = value;
928 985 return value;
929 986 case ZSTDMT_p_overlapSectionLog :
@@ -950,6 +1007,21 b' size_t ZSTDMT_setMTCtxParameter(ZSTDMT_C'
950 1007 }
951 1008 }
952 1009
1010 size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned* value)
1011 {
1012 switch (parameter) {
1013 case ZSTDMT_p_jobSize:
1014 *value = mtctx->params.jobSize;
1015 break;
1016 case ZSTDMT_p_overlapSectionLog:
1017 *value = mtctx->params.overlapSizeLog;
1018 break;
1019 default:
1020 return ERROR(parameter_unsupported);
1021 }
1022 return 0;
1023 }
1024
953 1025 /* Sets parameters relevant to the compression job,
954 1026 * initializing others to default values. */
955 1027 static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
@@ -960,13 +1032,30 b' static ZSTD_CCtx_params ZSTDMT_initJobCC'
960 1032 jobParams.cParams = params.cParams;
961 1033 jobParams.fParams = params.fParams;
962 1034 jobParams.compressionLevel = params.compressionLevel;
963 jobParams.disableLiteralCompression = params.disableLiteralCompression;
964 1035
965 1036 return jobParams;
966 1037 }
967 1038
1039
1040 /* ZSTDMT_resize() :
1041 * @return : error code if fails, 0 on success */
1042 static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
1043 {
1044 if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
1045 CHECK_F( ZSTDMT_expandJobsTable(mtctx, nbWorkers) );
1046 mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
1047 if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
1048 mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
1049 if (mtctx->cctxPool == NULL) return ERROR(memory_allocation);
1050 mtctx->seqPool = ZSTDMT_expandSeqPool(mtctx->seqPool, nbWorkers);
1051 if (mtctx->seqPool == NULL) return ERROR(memory_allocation);
1052 ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers);
1053 return 0;
1054 }
1055
1056
968 1057 /*! ZSTDMT_updateCParams_whileCompressing() :
969 * Updates only a selected set of compression parameters, to remain compatible with current frame.
1058 * Updates a selected set of compression parameters, remaining compatible with currently active frame.
970 1059 * New parameters will be applied to next compression job. */
971 1060 void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams)
972 1061 {
@@ -981,38 +1070,36 b' void ZSTDMT_updateCParams_whileCompressi'
981 1070 }
982 1071 }
983 1072
984 /* ZSTDMT_getNbWorkers():
985 * @return nb threads currently active in mtctx.
986 * mtctx must be valid */
987 unsigned ZSTDMT_getNbWorkers(const ZSTDMT_CCtx* mtctx)
988 {
989 assert(mtctx != NULL);
990 return mtctx->params.nbWorkers;
991 }
992
993 1073 /* ZSTDMT_getFrameProgression():
994 1074 * tells how much data has been consumed (input) and produced (output) for current frame.
995 1075 * able to count progression inside worker threads.
996 * Note : mutex will be acquired during statistics collection. */
1076 * Note : mutex will be acquired during statistics collection inside workers. */
997 1077 ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
998 1078 {
999 1079 ZSTD_frameProgression fps;
1000 DEBUGLOG(6, "ZSTDMT_getFrameProgression");
1080 DEBUGLOG(5, "ZSTDMT_getFrameProgression");
1081 fps.ingested = mtctx->consumed + mtctx->inBuff.filled;
1001 1082 fps.consumed = mtctx->consumed;
1002 fps.produced = mtctx->produced;
1003 fps.ingested = mtctx->consumed + mtctx->inBuff.filled;
1083 fps.produced = fps.flushed = mtctx->produced;
1084 fps.currentJobID = mtctx->nextJobID;
1085 fps.nbActiveWorkers = 0;
1004 1086 { unsigned jobNb;
1005 1087 unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1);
1006 1088 DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)",
1007 1089 mtctx->doneJobID, lastJobNb, mtctx->jobReady)
1008 1090 for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) {
1009 1091 unsigned const wJobID = jobNb & mtctx->jobIDMask;
1010 ZSTD_pthread_mutex_lock(&mtctx->jobs[wJobID].job_mutex);
1011 { size_t const cResult = mtctx->jobs[wJobID].cSize;
1092 ZSTDMT_jobDescription* jobPtr = &mtctx->jobs[wJobID];
1093 ZSTD_pthread_mutex_lock(&jobPtr->job_mutex);
1094 { size_t const cResult = jobPtr->cSize;
1012 1095 size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
1013 fps.consumed += mtctx->jobs[wJobID].consumed;
1014 fps.ingested += mtctx->jobs[wJobID].src.size;
1096 size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
1097 assert(flushed <= produced);
1098 fps.ingested += jobPtr->src.size;
1099 fps.consumed += jobPtr->consumed;
1015 1100 fps.produced += produced;
1101 fps.flushed += flushed;
1102 fps.nbActiveWorkers += (jobPtr->consumed < jobPtr->src.size);
1016 1103 }
1017 1104 ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
1018 1105 }
@@ -1021,6 +1108,34 b' ZSTD_frameProgression ZSTDMT_getFramePro'
1021 1108 }
1022 1109
1023 1110
1111 size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
1112 {
1113 size_t toFlush;
1114 unsigned const jobID = mtctx->doneJobID;
1115 assert(jobID <= mtctx->nextJobID);
1116 if (jobID == mtctx->nextJobID) return 0; /* no active job => nothing to flush */
1117
1118 /* look into oldest non-fully-flushed job */
1119 { unsigned const wJobID = jobID & mtctx->jobIDMask;
1120 ZSTDMT_jobDescription* const jobPtr = &mtctx->jobs[wJobID];
1121 ZSTD_pthread_mutex_lock(&jobPtr->job_mutex);
1122 { size_t const cResult = jobPtr->cSize;
1123 size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
1124 size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
1125 assert(flushed <= produced);
1126 toFlush = produced - flushed;
1127 if (toFlush==0 && (jobPtr->consumed >= jobPtr->src.size)) {
1128 /* doneJobID is not-fully-flushed, but toFlush==0 : doneJobID should be compressing some more data */
1129 assert(jobPtr->consumed < jobPtr->src.size);
1130 }
1131 }
1132 ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
1133 }
1134
1135 return toFlush;
1136 }
1137
1138
1024 1139 /* ------------------------------------------ */
1025 1140 /* ===== Multi-threaded compression ===== */
1026 1141 /* ------------------------------------------ */
@@ -1087,18 +1202,10 b' static size_t ZSTDMT_compress_advanced_i'
1087 1202
1088 1203 assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
1089 1204 ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
1090 if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params))
1205 if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize))
1091 1206 return ERROR(memory_allocation);
1092 1207
1093 if (nbJobs > mtctx->jobIDMask+1) { /* enlarge job table */
1094 U32 jobsTableSize = nbJobs;
1095 ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
1096 mtctx->jobIDMask = 0;
1097 mtctx->jobs = ZSTDMT_createJobsTable(&jobsTableSize, mtctx->cMem);
1098 if (mtctx->jobs==NULL) return ERROR(memory_allocation);
1099 assert((jobsTableSize != 0) && ((jobsTableSize & (jobsTableSize - 1)) == 0)); /* ensure jobsTableSize is a power of 2 */
1100 mtctx->jobIDMask = jobsTableSize - 1;
1101 }
1208 CHECK_F( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */
1102 1209
1103 1210 { unsigned u;
1104 1211 for (u=0; u<nbJobs; u++) {
@@ -1221,17 +1328,18 b' size_t ZSTDMT_initCStream_internal('
1221 1328 const ZSTD_CDict* cdict, ZSTD_CCtx_params params,
1222 1329 unsigned long long pledgedSrcSize)
1223 1330 {
1224 DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u, disableLiteralCompression=%i)",
1225 (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx, params.disableLiteralCompression);
1226 /* params are supposed to be fully validated at this point */
1331 DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u)",
1332 (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx);
1333
1334 /* params supposed partially fully validated at this point */
1227 1335 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
1228 1336 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
1229 assert(mtctx->cctxPool->totalCCtx == params.nbWorkers);
1230 1337
1231 1338 /* init */
1232 if (params.jobSize == 0) {
1233 params.jobSize = 1U << ZSTDMT_computeTargetJobLog(params);
1234 }
1339 if (params.nbWorkers != mtctx->params.nbWorkers)
1340 CHECK_F( ZSTDMT_resize(mtctx, params.nbWorkers) );
1341
1342 if (params.jobSize > 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
1235 1343 if (params.jobSize > ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
1236 1344
1237 1345 mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
@@ -1270,7 +1378,9 b' size_t ZSTDMT_initCStream_internal('
1270 1378 mtctx->targetPrefixSize = (size_t)1 << ZSTDMT_computeOverlapLog(params);
1271 1379 DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(mtctx->targetPrefixSize>>10));
1272 1380 mtctx->targetSectionSize = params.jobSize;
1273 if (mtctx->targetSectionSize < ZSTDMT_JOBSIZE_MIN) mtctx->targetSectionSize = ZSTDMT_JOBSIZE_MIN;
1381 if (mtctx->targetSectionSize == 0) {
1382 mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
1383 }
1274 1384 if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */
1275 1385 DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), params.jobSize);
1276 1386 DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10));
@@ -1312,7 +1422,7 b' size_t ZSTDMT_initCStream_internal('
1312 1422 mtctx->allJobsCompleted = 0;
1313 1423 mtctx->consumed = 0;
1314 1424 mtctx->produced = 0;
1315 if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params))
1425 if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize))
1316 1426 return ERROR(memory_allocation);
1317 1427 return 0;
1318 1428 }
@@ -1420,7 +1530,7 b' static size_t ZSTDMT_createCompressionJo'
1420 1530 mtctx->jobs[jobID].jobID = mtctx->nextJobID;
1421 1531 mtctx->jobs[jobID].firstJob = (mtctx->nextJobID==0);
1422 1532 mtctx->jobs[jobID].lastJob = endFrame;
1423 mtctx->jobs[jobID].frameChecksumNeeded = endFrame && (mtctx->nextJobID>0) && mtctx->params.fParams.checksumFlag;
1533 mtctx->jobs[jobID].frameChecksumNeeded = mtctx->params.fParams.checksumFlag && endFrame && (mtctx->nextJobID>0);
1424 1534 mtctx->jobs[jobID].dstFlushed = 0;
1425 1535
1426 1536 /* Update the round buffer pos and clear the input buffer to be reset */
@@ -1468,6 +1578,8 b' static size_t ZSTDMT_createCompressionJo'
1468 1578
1469 1579
1470 1580 /*! ZSTDMT_flushProduced() :
1581 * flush whatever data has been produced but not yet flushed in current job.
1582 * move to next job if current one is fully flushed.
1471 1583 * `output` : `pos` will be updated with amount of data flushed .
1472 1584 * `blockToFlush` : if >0, the function will block and wait if there is no data available to flush .
1473 1585 * @return : amount of data remaining within internal buffer, 0 if no more, 1 if unknown but > 0, or an error code */
@@ -1496,7 +1608,7 b' static size_t ZSTDMT_flushProduced(ZSTDM'
1496 1608 /* try to flush something */
1497 1609 { size_t cSize = mtctx->jobs[wJobID].cSize; /* shared */
1498 1610 size_t const srcConsumed = mtctx->jobs[wJobID].consumed; /* shared */
1499 size_t const srcSize = mtctx->jobs[wJobID].src.size; /* read-only, could be done after mutex lock, but no-declaration-after-statement */
1611 size_t const srcSize = mtctx->jobs[wJobID].src.size; /* read-only, could be done after mutex lock, but no-declaration-after-statement */
1500 1612 ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
1501 1613 if (ZSTD_isError(cSize)) {
1502 1614 DEBUGLOG(5, "ZSTDMT_flushProduced: job %u : compression error detected : %s",
@@ -1516,6 +1628,7 b' static size_t ZSTDMT_flushProduced(ZSTDM'
1516 1628 mtctx->jobs[wJobID].cSize += 4; /* can write this shared value, as worker is no longer active */
1517 1629 mtctx->jobs[wJobID].frameChecksumNeeded = 0;
1518 1630 }
1631
1519 1632 if (cSize > 0) { /* compression is ongoing or completed */
1520 1633 size_t const toFlush = MIN(cSize - mtctx->jobs[wJobID].dstFlushed, output->size - output->pos);
1521 1634 DEBUGLOG(5, "ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%u/%u, generated:%u)",
@@ -1529,11 +1642,12 b' static size_t ZSTDMT_flushProduced(ZSTDM'
1529 1642 output->pos += toFlush;
1530 1643 mtctx->jobs[wJobID].dstFlushed += toFlush; /* can write : this value is only used by mtctx */
1531 1644
1532 if ( (srcConsumed == srcSize) /* job completed */
1645 if ( (srcConsumed == srcSize) /* job is completed */
1533 1646 && (mtctx->jobs[wJobID].dstFlushed == cSize) ) { /* output buffer fully flushed => free this job position */
1534 1647 DEBUGLOG(5, "Job %u completed (%u bytes), moving to next one",
1535 1648 mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed);
1536 1649 ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[wJobID].dstBuff);
1650 DEBUGLOG(5, "dstBuffer released");
1537 1651 mtctx->jobs[wJobID].dstBuff = g_nullBuffer;
1538 1652 mtctx->jobs[wJobID].cSize = 0; /* ensure this job slot is considered "not started" in future check */
1539 1653 mtctx->consumed += srcSize;
@@ -1610,6 +1724,7 b' static int ZSTDMT_doesOverlapWindow(buff'
1610 1724 range_t extDict;
1611 1725 range_t prefix;
1612 1726
1727 DEBUGLOG(5, "ZSTDMT_doesOverlapWindow");
1613 1728 extDict.start = window.dictBase + window.lowLimit;
1614 1729 extDict.size = window.dictLimit - window.lowLimit;
1615 1730
@@ -1630,12 +1745,13 b' static void ZSTDMT_waitForLdmComplete(ZS'
1630 1745 {
1631 1746 if (mtctx->params.ldmParams.enableLdm) {
1632 1747 ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;
1748 DEBUGLOG(5, "ZSTDMT_waitForLdmComplete");
1633 1749 DEBUGLOG(5, "source [0x%zx, 0x%zx)",
1634 1750 (size_t)buffer.start,
1635 1751 (size_t)buffer.start + buffer.capacity);
1636 1752 ZSTD_PTHREAD_MUTEX_LOCK(mutex);
1637 1753 while (ZSTDMT_doesOverlapWindow(buffer, mtctx->serial.ldmWindow)) {
1638 DEBUGLOG(6, "Waiting for LDM to finish...");
1754 DEBUGLOG(5, "Waiting for LDM to finish...");
1639 1755 ZSTD_pthread_cond_wait(&mtctx->serial.ldmWindowCond, mutex);
1640 1756 }
1641 1757 DEBUGLOG(6, "Done waiting for LDM to finish");
@@ -1655,6 +1771,7 b' static int ZSTDMT_tryGetInputRange(ZSTDM'
1655 1771 size_t const target = mtctx->targetSectionSize;
1656 1772 buffer_t buffer;
1657 1773
1774 DEBUGLOG(5, "ZSTDMT_tryGetInputRange");
1658 1775 assert(mtctx->inBuff.buffer.start == NULL);
1659 1776 assert(mtctx->roundBuff.capacity >= target);
1660 1777
@@ -1668,7 +1785,7 b' static int ZSTDMT_tryGetInputRange(ZSTDM'
1668 1785 buffer.start = start;
1669 1786 buffer.capacity = prefixSize;
1670 1787 if (ZSTDMT_isOverlapped(buffer, inUse)) {
1671 DEBUGLOG(6, "Waiting for buffer...");
1788 DEBUGLOG(5, "Waiting for buffer...");
1672 1789 return 0;
1673 1790 }
1674 1791 ZSTDMT_waitForLdmComplete(mtctx, buffer);
@@ -1680,7 +1797,7 b' static int ZSTDMT_tryGetInputRange(ZSTDM'
1680 1797 buffer.capacity = target;
1681 1798
1682 1799 if (ZSTDMT_isOverlapped(buffer, inUse)) {
1683 DEBUGLOG(6, "Waiting for buffer...");
1800 DEBUGLOG(5, "Waiting for buffer...");
1684 1801 return 0;
1685 1802 }
1686 1803 assert(!ZSTDMT_isOverlapped(buffer, mtctx->inBuff.prefix));
@@ -1753,8 +1870,10 b' size_t ZSTDMT_compressStream_generic(ZST'
1753 1870 /* It is only possible for this operation to fail if there are
1754 1871 * still compression jobs ongoing.
1755 1872 */
1873 DEBUGLOG(5, "ZSTDMT_tryGetInputRange failed");
1756 1874 assert(mtctx->doneJobID != mtctx->nextJobID);
1757 }
1875 } else
1876 DEBUGLOG(5, "ZSTDMT_tryGetInputRange completed successfully : mtctx->inBuff.buffer.start = %p", mtctx->inBuff.buffer.start);
1758 1877 }
1759 1878 if (mtctx->inBuff.buffer.start != NULL) {
1760 1879 size_t const toLoad = MIN(input->size - input->pos, mtctx->targetSectionSize - mtctx->inBuff.filled);
@@ -1782,6 +1901,7 b' size_t ZSTDMT_compressStream_generic(ZST'
1782 1901 /* check for potential compressed data ready to be flushed */
1783 1902 { size_t const remainingToFlush = ZSTDMT_flushProduced(mtctx, output, !forwardInputProgress, endOp); /* block if there was no forward input progress */
1784 1903 if (input->pos < input->size) return MAX(remainingToFlush, 1); /* input not consumed : do not end flush yet */
1904 DEBUGLOG(5, "end of ZSTDMT_compressStream_generic: remainingToFlush = %u", (U32)remainingToFlush);
1785 1905 return remainingToFlush;
1786 1906 }
1787 1907 }
@@ -95,6 +95,11 b' typedef enum {'
95 95 * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
96 96 ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value);
97 97
98 /* ZSTDMT_getMTCtxParameter() :
99 * Query the ZSTDMT_CCtx for a parameter value.
100 * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
101 ZSTDLIB_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned* value);
102
98 103
99 104 /*! ZSTDMT_compressStream_generic() :
100 105 * Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
@@ -114,11 +119,21 b' ZSTDLIB_API size_t ZSTDMT_compressStream'
114 119 * === Not exposed in libzstd. Never invoke directly ===
115 120 * ======================================================== */
116 121
122 /*! ZSTDMT_toFlushNow()
123 * Tell how many bytes are ready to be flushed immediately.
124 * Probe the oldest active job (not yet entirely flushed) and check its output buffer.
125 * If return 0, it means there is no active job,
126 * or, it means oldest job is still active, but everything produced has been flushed so far,
127 * therefore flushing is limited by speed of oldest job. */
128 size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx);
129
130 /*! ZSTDMT_CCtxParam_setMTCtxParameter()
131 * like ZSTDMT_setMTCtxParameter(), but into a ZSTD_CCtx_Params */
117 132 size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, unsigned value);
118 133
119 /* ZSTDMT_CCtxParam_setNbWorkers()
120 * Set nbWorkers, and clamp it.
121 * Also reset jobSize and overlapLog */
134 /*! ZSTDMT_CCtxParam_setNbWorkers()
135 * Set nbWorkers, and clamp it.
136 * Also reset jobSize and overlapLog */
122 137 size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers);
123 138
124 139 /*! ZSTDMT_updateCParams_whileCompressing() :
@@ -126,14 +141,9 b' size_t ZSTDMT_CCtxParam_setNbWorkers(ZST'
126 141 * New parameters will be applied to next compression job. */
127 142 void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams);
128 143
129 /* ZSTDMT_getNbWorkers():
130 * @return nb threads currently active in mtctx.
131 * mtctx must be valid */
132 unsigned ZSTDMT_getNbWorkers(const ZSTDMT_CCtx* mtctx);
133
134 /* ZSTDMT_getFrameProgression():
135 * tells how much data has been consumed (input) and produced (output) for current frame.
136 * able to count progression inside worker threads.
144 /*! ZSTDMT_getFrameProgression():
145 * tells how much data has been consumed (input) and produced (output) for current frame.
146 * able to count progression inside worker threads.
137 147 */
138 148 ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);
139 149
This diff has been collapsed as it changes many lines, (714 lines changed) Show them Hide them
@@ -1,6 +1,7 b''
1 1 /* ******************************************************************
2 Huffman decoder, part of New Generation Entropy library
3 Copyright (C) 2013-2016, Yann Collet.
2 huff0 huffman decoder,
3 part of Finite State Entropy library
4 Copyright (C) 2013-present, Yann Collet.
4 5
5 6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 7
@@ -29,16 +30,15 b''
29 30
30 31 You can contact the author at :
31 32 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c
33 33 ****************************************************************** */
34 34
35 35 /* **************************************************************
36 36 * Dependencies
37 37 ****************************************************************/
38 38 #include <string.h> /* memcpy, memset */
39 #include "compiler.h"
39 40 #include "bitstream.h" /* BIT_* */
40 #include "compiler.h"
41 #include "fse.h" /* header compression */
41 #include "fse.h" /* to compress headers */
42 42 #define HUF_STATIC_LINKING_ONLY
43 43 #include "huf.h"
44 44 #include "error_private.h"
@@ -48,7 +48,6 b''
48 48 * Error Management
49 49 ****************************************************************/
50 50 #define HUF_isError ERR_isError
51 #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
52 51 #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
53 52
54 53
@@ -75,15 +74,15 b' static DTableDesc HUF_getDTableDesc(cons'
75 74 /*-***************************/
76 75 /* single-symbol decoding */
77 76 /*-***************************/
78 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */
77 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */
79 78
80 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
79 size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
81 80 {
82 81 U32 tableLog = 0;
83 82 U32 nbSymbols = 0;
84 83 size_t iSize;
85 84 void* const dtPtr = DTable + 1;
86 HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
85 HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
87 86
88 87 U32* rankVal;
89 88 BYTE* huffWeight;
@@ -96,7 +95,7 b' size_t HUF_readDTableX2_wksp(HUF_DTable*'
96 95
97 96 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
98 97
99 HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
98 DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
100 99 /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
101 100
102 101 iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
@@ -124,7 +123,7 b' size_t HUF_readDTableX2_wksp(HUF_DTable*'
124 123 U32 const w = huffWeight[n];
125 124 U32 const length = (1 << w) >> 1;
126 125 U32 u;
127 HUF_DEltX2 D;
126 HUF_DEltX1 D;
128 127 D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
129 128 for (u = rankVal[w]; u < rankVal[w] + length; u++)
130 129 dt[u] = D;
@@ -134,17 +133,15 b' size_t HUF_readDTableX2_wksp(HUF_DTable*'
134 133 return iSize;
135 134 }
136 135
137 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
136 size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
138 137 {
139 138 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
140 return HUF_readDTableX2_wksp(DTable, src, srcSize,
139 return HUF_readDTableX1_wksp(DTable, src, srcSize,
141 140 workSpace, sizeof(workSpace));
142 141 }
143 142
144 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
145
146 143 FORCE_INLINE_TEMPLATE BYTE
147 HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
144 HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
148 145 {
149 146 size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
150 147 BYTE const c = dt[val].byte;
@@ -152,44 +149,44 b' HUF_decodeSymbolX2(BIT_DStream_t* Dstrea'
152 149 return c;
153 150 }
154 151
155 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
156 *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
152 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
153 *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
157 154
158 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
155 #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
159 156 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
160 HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
157 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
161 158
162 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
159 #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
163 160 if (MEM_64bits()) \
164 HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
161 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
165 162
166 163 HINT_INLINE size_t
167 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
164 HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
168 165 {
169 166 BYTE* const pStart = p;
170 167
171 168 /* up to 4 symbols at a time */
172 169 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
173 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
174 HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
175 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
176 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
170 HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
171 HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
172 HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
173 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
177 174 }
178 175
179 176 /* [0-3] symbols remaining */
180 177 if (MEM_32bits())
181 178 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
182 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
179 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
183 180
184 181 /* no more data to retrieve from bitstream, no need to reload */
185 182 while (p < pEnd)
186 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
183 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
187 184
188 185 return pEnd-pStart;
189 186 }
190 187
191 188 FORCE_INLINE_TEMPLATE size_t
192 HUF_decompress1X2_usingDTable_internal_body(
189 HUF_decompress1X1_usingDTable_internal_body(
193 190 void* dst, size_t dstSize,
194 191 const void* cSrc, size_t cSrcSize,
195 192 const HUF_DTable* DTable)
@@ -197,14 +194,14 b' HUF_decompress1X2_usingDTable_internal_b'
197 194 BYTE* op = (BYTE*)dst;
198 195 BYTE* const oend = op + dstSize;
199 196 const void* dtPtr = DTable + 1;
200 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
197 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
201 198 BIT_DStream_t bitD;
202 199 DTableDesc const dtd = HUF_getDTableDesc(DTable);
203 200 U32 const dtLog = dtd.tableLog;
204 201
205 202 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
206 203
207 HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
204 HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
208 205
209 206 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
210 207
@@ -212,7 +209,7 b' HUF_decompress1X2_usingDTable_internal_b'
212 209 }
213 210
214 211 FORCE_INLINE_TEMPLATE size_t
215 HUF_decompress4X2_usingDTable_internal_body(
212 HUF_decompress4X1_usingDTable_internal_body(
216 213 void* dst, size_t dstSize,
217 214 const void* cSrc, size_t cSrcSize,
218 215 const HUF_DTable* DTable)
@@ -224,7 +221,7 b' HUF_decompress4X2_usingDTable_internal_b'
224 221 BYTE* const ostart = (BYTE*) dst;
225 222 BYTE* const oend = ostart + dstSize;
226 223 const void* const dtPtr = DTable + 1;
227 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
224 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
228 225
229 226 /* Init */
230 227 BIT_DStream_t bitD1;
@@ -260,22 +257,22 b' HUF_decompress4X2_usingDTable_internal_b'
260 257 /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
261 258 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
262 259 while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
263 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
264 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
265 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
266 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
267 HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
268 HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
269 HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
270 HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
271 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
272 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
273 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
274 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
275 HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
276 HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
277 HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
278 HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
260 HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
261 HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
262 HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
263 HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
264 HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
265 HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
266 HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
267 HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
268 HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
269 HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
270 HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
271 HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
272 HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
273 HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
274 HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
275 HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
279 276 BIT_reloadDStream(&bitD1);
280 277 BIT_reloadDStream(&bitD2);
281 278 BIT_reloadDStream(&bitD3);
@@ -291,191 +288,10 b' HUF_decompress4X2_usingDTable_internal_b'
291 288 /* note : op4 supposed already verified within main loop */
292 289
293 290 /* finish bitStreams one by one */
294 HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
295 HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
296 HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
297 HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
298
299 /* check */
300 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
301 if (!endCheck) return ERROR(corruption_detected); }
302
303 /* decoded size */
304 return dstSize;
305 }
306 }
307
308
309 FORCE_INLINE_TEMPLATE U32
310 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
311 {
312 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
313 memcpy(op, dt+val, 2);
314 BIT_skipBits(DStream, dt[val].nbBits);
315 return dt[val].length;
316 }
317
318 FORCE_INLINE_TEMPLATE U32
319 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
320 {
321 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
322 memcpy(op, dt+val, 1);
323 if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
324 else {
325 if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
326 BIT_skipBits(DStream, dt[val].nbBits);
327 if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
328 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
329 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
330 } }
331 return 1;
332 }
333
334 #define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
335 ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
336
337 #define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
338 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
339 ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
340
341 #define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
342 if (MEM_64bits()) \
343 ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
344
345 HINT_INLINE size_t
346 HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
347 const HUF_DEltX4* const dt, const U32 dtLog)
348 {
349 BYTE* const pStart = p;
350
351 /* up to 8 symbols at a time */
352 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
353 HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
354 HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
355 HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
356 HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
357 }
358
359 /* closer to end : up to 2 symbols at a time */
360 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
361 HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
362
363 while (p <= pEnd-2)
364 HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
365
366 if (p < pEnd)
367 p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
368
369 return p-pStart;
370 }
371
372 FORCE_INLINE_TEMPLATE size_t
373 HUF_decompress1X4_usingDTable_internal_body(
374 void* dst, size_t dstSize,
375 const void* cSrc, size_t cSrcSize,
376 const HUF_DTable* DTable)
377 {
378 BIT_DStream_t bitD;
379
380 /* Init */
381 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
382
383 /* decode */
384 { BYTE* const ostart = (BYTE*) dst;
385 BYTE* const oend = ostart + dstSize;
386 const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
387 const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
388 DTableDesc const dtd = HUF_getDTableDesc(DTable);
389 HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
390 }
391
392 /* check */
393 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
394
395 /* decoded size */
396 return dstSize;
397 }
398
399
400 FORCE_INLINE_TEMPLATE size_t
401 HUF_decompress4X4_usingDTable_internal_body(
402 void* dst, size_t dstSize,
403 const void* cSrc, size_t cSrcSize,
404 const HUF_DTable* DTable)
405 {
406 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
407
408 { const BYTE* const istart = (const BYTE*) cSrc;
409 BYTE* const ostart = (BYTE*) dst;
410 BYTE* const oend = ostart + dstSize;
411 const void* const dtPtr = DTable+1;
412 const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
413
414 /* Init */
415 BIT_DStream_t bitD1;
416 BIT_DStream_t bitD2;
417 BIT_DStream_t bitD3;
418 BIT_DStream_t bitD4;
419 size_t const length1 = MEM_readLE16(istart);
420 size_t const length2 = MEM_readLE16(istart+2);
421 size_t const length3 = MEM_readLE16(istart+4);
422 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
423 const BYTE* const istart1 = istart + 6; /* jumpTable */
424 const BYTE* const istart2 = istart1 + length1;
425 const BYTE* const istart3 = istart2 + length2;
426 const BYTE* const istart4 = istart3 + length3;
427 size_t const segmentSize = (dstSize+3) / 4;
428 BYTE* const opStart2 = ostart + segmentSize;
429 BYTE* const opStart3 = opStart2 + segmentSize;
430 BYTE* const opStart4 = opStart3 + segmentSize;
431 BYTE* op1 = ostart;
432 BYTE* op2 = opStart2;
433 BYTE* op3 = opStart3;
434 BYTE* op4 = opStart4;
435 U32 endSignal;
436 DTableDesc const dtd = HUF_getDTableDesc(DTable);
437 U32 const dtLog = dtd.tableLog;
438
439 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
440 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
441 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
442 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
443 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
444
445 /* 16-32 symbols per loop (4-8 symbols per stream) */
446 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
447 for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
448 HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
449 HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
450 HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
451 HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
452 HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
453 HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
454 HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
455 HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
456 HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
457 HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
458 HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
459 HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
460 HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
461 HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
462 HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
463 HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
464
465 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
466 }
467
468 /* check corruption */
469 if (op1 > opStart2) return ERROR(corruption_detected);
470 if (op2 > opStart3) return ERROR(corruption_detected);
471 if (op3 > opStart4) return ERROR(corruption_detected);
472 /* note : op4 already verified within main loop */
473
474 /* finish bitStreams one by one */
475 HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
476 HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
477 HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
478 HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
291 HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
292 HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
293 HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
294 HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
479 295
480 296 /* check */
481 297 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
@@ -493,7 +309,7 b' typedef size_t (*HUF_decompress_usingDTa'
493 309 const HUF_DTable *DTable);
494 310 #if DYNAMIC_BMI2
495 311
496 #define X(fn) \
312 #define HUF_DGEN(fn) \
497 313 \
498 314 static size_t fn##_default( \
499 315 void* dst, size_t dstSize, \
@@ -522,7 +338,7 b' typedef size_t (*HUF_decompress_usingDTa'
522 338
523 339 #else
524 340
525 #define X(fn) \
341 #define HUF_DGEN(fn) \
526 342 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
527 343 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
528 344 { \
@@ -532,112 +348,114 b' typedef size_t (*HUF_decompress_usingDTa'
532 348
533 349 #endif
534 350
535 X(HUF_decompress1X2_usingDTable_internal)
536 X(HUF_decompress4X2_usingDTable_internal)
537 X(HUF_decompress1X4_usingDTable_internal)
538 X(HUF_decompress4X4_usingDTable_internal)
351 HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
352 HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
539 353
540 #undef X
541 354
542 355
543 size_t HUF_decompress1X2_usingDTable(
356 size_t HUF_decompress1X1_usingDTable(
357 void* dst, size_t dstSize,
358 const void* cSrc, size_t cSrcSize,
359 const HUF_DTable* DTable)
360 {
361 DTableDesc dtd = HUF_getDTableDesc(DTable);
362 if (dtd.tableType != 0) return ERROR(GENERIC);
363 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
364 }
365
366 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
367 const void* cSrc, size_t cSrcSize,
368 void* workSpace, size_t wkspSize)
369 {
370 const BYTE* ip = (const BYTE*) cSrc;
371
372 size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
373 if (HUF_isError(hSize)) return hSize;
374 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
375 ip += hSize; cSrcSize -= hSize;
376
377 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
378 }
379
380
381 size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
382 const void* cSrc, size_t cSrcSize)
383 {
384 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
385 return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
386 workSpace, sizeof(workSpace));
387 }
388
389 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
390 {
391 HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
392 return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
393 }
394
395 size_t HUF_decompress4X1_usingDTable(
544 396 void* dst, size_t dstSize,
545 397 const void* cSrc, size_t cSrcSize,
546 398 const HUF_DTable* DTable)
547 399 {
548 400 DTableDesc dtd = HUF_getDTableDesc(DTable);
549 401 if (dtd.tableType != 0) return ERROR(GENERIC);
550 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
551 }
552
553 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
554 const void* cSrc, size_t cSrcSize,
555 void* workSpace, size_t wkspSize)
556 {
557 const BYTE* ip = (const BYTE*) cSrc;
558
559 size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
560 if (HUF_isError(hSize)) return hSize;
561 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
562 ip += hSize; cSrcSize -= hSize;
563
564 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
402 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
565 403 }
566 404
567
568 size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
569 const void* cSrc, size_t cSrcSize)
570 {
571 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
572 return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
573 workSpace, sizeof(workSpace));
574 }
575
576 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
577 {
578 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
579 return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
580 }
581
582 size_t HUF_decompress4X2_usingDTable(
583 void* dst, size_t dstSize,
584 const void* cSrc, size_t cSrcSize,
585 const HUF_DTable* DTable)
586 {
587 DTableDesc dtd = HUF_getDTableDesc(DTable);
588 if (dtd.tableType != 0) return ERROR(GENERIC);
589 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
590 }
591
592 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
405 static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
593 406 const void* cSrc, size_t cSrcSize,
594 407 void* workSpace, size_t wkspSize, int bmi2)
595 408 {
596 409 const BYTE* ip = (const BYTE*) cSrc;
597 410
598 size_t const hSize = HUF_readDTableX2_wksp (dctx, cSrc, cSrcSize,
411 size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
599 412 workSpace, wkspSize);
600 413 if (HUF_isError(hSize)) return hSize;
601 414 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
602 415 ip += hSize; cSrcSize -= hSize;
603 416
604 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
417 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
605 418 }
606 419
607 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
420 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
608 421 const void* cSrc, size_t cSrcSize,
609 422 void* workSpace, size_t wkspSize)
610 423 {
611 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
424 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
612 425 }
613 426
614 427
615 size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
428 size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
616 429 {
617 430 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
618 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
431 return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
619 432 workSpace, sizeof(workSpace));
620 433 }
621 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
434 size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
622 435 {
623 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
624 return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
436 HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
437 return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
625 438 }
626 439
627 440
628 441 /* *************************/
629 442 /* double-symbols decoding */
630 443 /* *************************/
631 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
632 444
633 /* HUF_fillDTableX4Level2() :
445 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
446 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
447 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
448 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
449
450
451 /* HUF_fillDTableX2Level2() :
634 452 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
635 static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
453 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
636 454 const U32* rankValOrigin, const int minWeight,
637 455 const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
638 456 U32 nbBitsBaseline, U16 baseSeq)
639 457 {
640 HUF_DEltX4 DElt;
458 HUF_DEltX2 DElt;
641 459 U32 rankVal[HUF_TABLELOG_MAX + 1];
642 460
643 461 /* get pre-calculated rankVal */
@@ -672,10 +490,8 b' static void HUF_fillDTableX4Level2(HUF_D'
672 490 } }
673 491 }
674 492
675 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
676 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
677 493
678 static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
494 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
679 495 const sortedSymbol_t* sortedList, const U32 sortedListSize,
680 496 const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
681 497 const U32 nbBitsBaseline)
@@ -700,12 +516,12 b' static void HUF_fillDTableX4(HUF_DEltX4*'
700 516 int minWeight = nbBits + scaleLog;
701 517 if (minWeight < 1) minWeight = 1;
702 518 sortedRank = rankStart[minWeight];
703 HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
519 HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
704 520 rankValOrigin[nbBits], minWeight,
705 521 sortedList+sortedRank, sortedListSize-sortedRank,
706 522 nbBitsBaseline, symbol);
707 523 } else {
708 HUF_DEltX4 DElt;
524 HUF_DEltX2 DElt;
709 525 MEM_writeLE16(&(DElt.sequence), symbol);
710 526 DElt.nbBits = (BYTE)(nbBits);
711 527 DElt.length = 1;
@@ -717,16 +533,16 b' static void HUF_fillDTableX4(HUF_DEltX4*'
717 533 }
718 534 }
719 535
720 size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
721 size_t srcSize, void* workSpace,
722 size_t wkspSize)
536 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
537 const void* src, size_t srcSize,
538 void* workSpace, size_t wkspSize)
723 539 {
724 540 U32 tableLog, maxW, sizeOfSort, nbSymbols;
725 541 DTableDesc dtd = HUF_getDTableDesc(DTable);
726 542 U32 const maxTableLog = dtd.maxTableLog;
727 543 size_t iSize;
728 544 void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
729 HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr;
545 HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
730 546 U32 *rankStart;
731 547
732 548 rankValCol_t* rankVal;
@@ -752,7 +568,7 b' size_t HUF_readDTableX4_wksp(HUF_DTable*'
752 568 rankStart = rankStart0 + 1;
753 569 memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
754 570
755 HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
571 DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
756 572 if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
757 573 /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
758 574
@@ -806,7 +622,7 b' size_t HUF_readDTableX4_wksp(HUF_DTable*'
806 622 rankValPtr[w] = rankVal0[w] >> consumed;
807 623 } } } }
808 624
809 HUF_fillDTableX4(dt, maxTableLog,
625 HUF_fillDTableX2(dt, maxTableLog,
810 626 sortedSymbol, sizeOfSort,
811 627 rankStart0, rankVal, maxW,
812 628 tableLog+1);
@@ -817,112 +633,296 b' size_t HUF_readDTableX4_wksp(HUF_DTable*'
817 633 return iSize;
818 634 }
819 635
820 size_t HUF_readDTableX4(HUF_DTable* DTable, const void* src, size_t srcSize)
636 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
821 637 {
822 638 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
823 return HUF_readDTableX4_wksp(DTable, src, srcSize,
639 return HUF_readDTableX2_wksp(DTable, src, srcSize,
824 640 workSpace, sizeof(workSpace));
825 641 }
826 642
827 size_t HUF_decompress1X4_usingDTable(
643
644 FORCE_INLINE_TEMPLATE U32
645 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
646 {
647 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
648 memcpy(op, dt+val, 2);
649 BIT_skipBits(DStream, dt[val].nbBits);
650 return dt[val].length;
651 }
652
653 FORCE_INLINE_TEMPLATE U32
654 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
655 {
656 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
657 memcpy(op, dt+val, 1);
658 if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
659 else {
660 if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
661 BIT_skipBits(DStream, dt[val].nbBits);
662 if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
663 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
664 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
665 } }
666 return 1;
667 }
668
669 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
670 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
671
672 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
673 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
674 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
675
676 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
677 if (MEM_64bits()) \
678 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
679
680 HINT_INLINE size_t
681 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
682 const HUF_DEltX2* const dt, const U32 dtLog)
683 {
684 BYTE* const pStart = p;
685
686 /* up to 8 symbols at a time */
687 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
688 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
689 HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
690 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
691 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
692 }
693
694 /* closer to end : up to 2 symbols at a time */
695 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
696 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
697
698 while (p <= pEnd-2)
699 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
700
701 if (p < pEnd)
702 p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
703
704 return p-pStart;
705 }
706
707 FORCE_INLINE_TEMPLATE size_t
708 HUF_decompress1X2_usingDTable_internal_body(
709 void* dst, size_t dstSize,
710 const void* cSrc, size_t cSrcSize,
711 const HUF_DTable* DTable)
712 {
713 BIT_DStream_t bitD;
714
715 /* Init */
716 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
717
718 /* decode */
719 { BYTE* const ostart = (BYTE*) dst;
720 BYTE* const oend = ostart + dstSize;
721 const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
722 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
723 DTableDesc const dtd = HUF_getDTableDesc(DTable);
724 HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
725 }
726
727 /* check */
728 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
729
730 /* decoded size */
731 return dstSize;
732 }
733
734
735 FORCE_INLINE_TEMPLATE size_t
736 HUF_decompress4X2_usingDTable_internal_body(
737 void* dst, size_t dstSize,
738 const void* cSrc, size_t cSrcSize,
739 const HUF_DTable* DTable)
740 {
741 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
742
743 { const BYTE* const istart = (const BYTE*) cSrc;
744 BYTE* const ostart = (BYTE*) dst;
745 BYTE* const oend = ostart + dstSize;
746 const void* const dtPtr = DTable+1;
747 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
748
749 /* Init */
750 BIT_DStream_t bitD1;
751 BIT_DStream_t bitD2;
752 BIT_DStream_t bitD3;
753 BIT_DStream_t bitD4;
754 size_t const length1 = MEM_readLE16(istart);
755 size_t const length2 = MEM_readLE16(istart+2);
756 size_t const length3 = MEM_readLE16(istart+4);
757 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
758 const BYTE* const istart1 = istart + 6; /* jumpTable */
759 const BYTE* const istart2 = istart1 + length1;
760 const BYTE* const istart3 = istart2 + length2;
761 const BYTE* const istart4 = istart3 + length3;
762 size_t const segmentSize = (dstSize+3) / 4;
763 BYTE* const opStart2 = ostart + segmentSize;
764 BYTE* const opStart3 = opStart2 + segmentSize;
765 BYTE* const opStart4 = opStart3 + segmentSize;
766 BYTE* op1 = ostart;
767 BYTE* op2 = opStart2;
768 BYTE* op3 = opStart3;
769 BYTE* op4 = opStart4;
770 U32 endSignal;
771 DTableDesc const dtd = HUF_getDTableDesc(DTable);
772 U32 const dtLog = dtd.tableLog;
773
774 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
775 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
776 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
777 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
778 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
779
780 /* 16-32 symbols per loop (4-8 symbols per stream) */
781 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
782 for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
783 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
784 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
785 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
786 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
787 HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
788 HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
789 HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
790 HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
791 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
792 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
793 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
794 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
795 HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
796 HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
797 HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
798 HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
799
800 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
801 }
802
803 /* check corruption */
804 if (op1 > opStart2) return ERROR(corruption_detected);
805 if (op2 > opStart3) return ERROR(corruption_detected);
806 if (op3 > opStart4) return ERROR(corruption_detected);
807 /* note : op4 already verified within main loop */
808
809 /* finish bitStreams one by one */
810 HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
811 HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
812 HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
813 HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
814
815 /* check */
816 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
817 if (!endCheck) return ERROR(corruption_detected); }
818
819 /* decoded size */
820 return dstSize;
821 }
822 }
823
824 HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
825 HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
826
827 size_t HUF_decompress1X2_usingDTable(
828 828 void* dst, size_t dstSize,
829 829 const void* cSrc, size_t cSrcSize,
830 830 const HUF_DTable* DTable)
831 831 {
832 832 DTableDesc dtd = HUF_getDTableDesc(DTable);
833 833 if (dtd.tableType != 1) return ERROR(GENERIC);
834 return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
834 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
835 835 }
836 836
837 size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
837 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
838 838 const void* cSrc, size_t cSrcSize,
839 839 void* workSpace, size_t wkspSize)
840 840 {
841 841 const BYTE* ip = (const BYTE*) cSrc;
842 842
843 size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize,
843 size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
844 844 workSpace, wkspSize);
845 845 if (HUF_isError(hSize)) return hSize;
846 846 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
847 847 ip += hSize; cSrcSize -= hSize;
848 848
849 return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
849 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
850 850 }
851 851
852 852
853 size_t HUF_decompress1X4_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
853 size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
854 854 const void* cSrc, size_t cSrcSize)
855 855 {
856 856 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
857 return HUF_decompress1X4_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
857 return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
858 858 workSpace, sizeof(workSpace));
859 859 }
860 860
861 size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
861 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
862 862 {
863 HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
864 return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
863 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
864 return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
865 865 }
866 866
867 size_t HUF_decompress4X4_usingDTable(
867 size_t HUF_decompress4X2_usingDTable(
868 868 void* dst, size_t dstSize,
869 869 const void* cSrc, size_t cSrcSize,
870 870 const HUF_DTable* DTable)
871 871 {
872 872 DTableDesc dtd = HUF_getDTableDesc(DTable);
873 873 if (dtd.tableType != 1) return ERROR(GENERIC);
874 return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
874 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
875 875 }
876 876
877 static size_t HUF_decompress4X4_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
877 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
878 878 const void* cSrc, size_t cSrcSize,
879 879 void* workSpace, size_t wkspSize, int bmi2)
880 880 {
881 881 const BYTE* ip = (const BYTE*) cSrc;
882 882
883 size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize,
883 size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
884 884 workSpace, wkspSize);
885 885 if (HUF_isError(hSize)) return hSize;
886 886 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
887 887 ip += hSize; cSrcSize -= hSize;
888 888
889 return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
889 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
890 890 }
891 891
892 size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
892 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
893 893 const void* cSrc, size_t cSrcSize,
894 894 void* workSpace, size_t wkspSize)
895 895 {
896 return HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
896 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
897 897 }
898 898
899 899
900 size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
900 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
901 901 const void* cSrc, size_t cSrcSize)
902 902 {
903 903 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
904 return HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
904 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
905 905 workSpace, sizeof(workSpace));
906 906 }
907 907
908 size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
908 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
909 909 {
910 HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
911 return HUF_decompress4X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
910 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
911 return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
912 912 }
913 913
914 914
915 /* ********************************/
916 /* Generic decompression selector */
917 /* ********************************/
915 /* ***********************************/
916 /* Universal decompression selectors */
917 /* ***********************************/
918 918
919 919 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
920 920 const void* cSrc, size_t cSrcSize,
921 921 const HUF_DTable* DTable)
922 922 {
923 923 DTableDesc const dtd = HUF_getDTableDesc(DTable);
924 return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
925 HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
924 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
925 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
926 926 }
927 927
928 928 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
@@ -930,8 +930,8 b' size_t HUF_decompress4X_usingDTable(void'
930 930 const HUF_DTable* DTable)
931 931 {
932 932 DTableDesc const dtd = HUF_getDTableDesc(DTable);
933 return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
934 HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
933 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
934 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
935 935 }
936 936
937 937
@@ -960,12 +960,12 b' static const algo_time_t algoTime[16 /* '
960 960 /** HUF_selectDecoder() :
961 961 * Tells which decoder is likely to decode faster,
962 962 * based on a set of pre-computed metrics.
963 * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
963 * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
964 964 * Assumption : 0 < dstSize <= 128 KB */
965 965 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
966 966 {
967 967 assert(dstSize > 0);
968 assert(dstSize <= 128 KB);
968 assert(dstSize <= 128*1024);
969 969 /* decoder timing evaluation */
970 970 { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
971 971 U32 const D256 = (U32)(dstSize >> 8);
@@ -980,7 +980,7 b' typedef size_t (*decompressionAlgo)(void'
980 980
981 981 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
982 982 {
983 static const decompressionAlgo decompress[2] = { HUF_decompress4X2, HUF_decompress4X4 };
983 static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
984 984
985 985 /* validation checks */
986 986 if (dstSize == 0) return ERROR(dstSize_tooSmall);
@@ -1002,8 +1002,8 b' size_t HUF_decompress4X_DCtx (HUF_DTable'
1002 1002 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1003 1003
1004 1004 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1005 return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1006 HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1005 return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1006 HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1007 1007 }
1008 1008 }
1009 1009
@@ -1025,8 +1025,8 b' size_t HUF_decompress4X_hufOnly_wksp(HUF'
1025 1025 if (cSrcSize == 0) return ERROR(corruption_detected);
1026 1026
1027 1027 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1028 return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
1029 HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1028 return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
1029 HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1030 1030 }
1031 1031 }
1032 1032
@@ -1041,9 +1041,9 b' size_t HUF_decompress1X_DCtx_wksp(HUF_DT'
1041 1041 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1042 1042
1043 1043 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1044 return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc,
1044 return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1045 1045 cSrcSize, workSpace, wkspSize):
1046 HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1046 HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1047 1047 cSrcSize, workSpace, wkspSize);
1048 1048 }
1049 1049 }
@@ -1060,27 +1060,27 b' size_t HUF_decompress1X_DCtx(HUF_DTable*'
1060 1060 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1061 1061 {
1062 1062 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1063 return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1064 HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1063 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1064 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1065 1065 }
1066 1066
1067 size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1067 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1068 1068 {
1069 1069 const BYTE* ip = (const BYTE*) cSrc;
1070 1070
1071 size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1071 size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1072 1072 if (HUF_isError(hSize)) return hSize;
1073 1073 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1074 1074 ip += hSize; cSrcSize -= hSize;
1075 1075
1076 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1076 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1077 1077 }
1078 1078
1079 1079 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1080 1080 {
1081 1081 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1082 return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1083 HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1082 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1083 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1084 1084 }
1085 1085
1086 1086 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
@@ -1090,7 +1090,7 b' size_t HUF_decompress4X_hufOnly_wksp_bmi'
1090 1090 if (cSrcSize == 0) return ERROR(corruption_detected);
1091 1091
1092 1092 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1093 return algoNb ? HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1094 HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1093 return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1094 HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1095 1095 }
1096 1096 }
@@ -40,12 +40,24 b''
40 40 # define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_DEFAULTMAX) + 1)
41 41 #endif
42 42
43 /*!
44 * NO_FORWARD_PROGRESS_MAX :
45 * maximum allowed nb of calls to ZSTD_decompressStream() and ZSTD_decompress_generic()
46 * without any forward progress
47 * (defined as: no byte read from input, and no byte flushed to output)
48 * before triggering an error.
49 */
50 #ifndef ZSTD_NO_FORWARD_PROGRESS_MAX
51 # define ZSTD_NO_FORWARD_PROGRESS_MAX 16
52 #endif
53
43 54
44 55 /*-*******************************************************
45 56 * Dependencies
46 57 *********************************************************/
47 58 #include <string.h> /* memcpy, memmove, memset */
48 #include "cpu.h"
59 #include "compiler.h" /* prefetch */
60 #include "cpu.h" /* bmi2 */
49 61 #include "mem.h" /* low level memory routines */
50 62 #define FSE_STATIC_LINKING_ONLY
51 63 #include "fse.h"
@@ -57,6 +69,9 b''
57 69 # include "zstd_legacy.h"
58 70 #endif
59 71
72 static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict);
73 static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict);
74
60 75
61 76 /*-*************************************
62 77 * Errors
@@ -99,11 +114,10 b' typedef struct {'
99 114 #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
100 115
101 116 typedef struct {
102 ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)];
103 ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)];
104 ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)];
117 ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
118 ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
119 ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
105 120 HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
106 U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
107 121 U32 rep[ZSTD_REP_NUM];
108 122 } ZSTD_entropyDTables_t;
109 123
@@ -114,9 +128,10 b' struct ZSTD_DCtx_s'
114 128 const ZSTD_seqSymbol* OFTptr;
115 129 const HUF_DTable* HUFptr;
116 130 ZSTD_entropyDTables_t entropy;
131 U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */
117 132 const void* previousDstEnd; /* detect continuity */
118 const void* base; /* start of current segment */
119 const void* vBase; /* virtual start of previous segment if it was just before current one */
133 const void* prefixStart; /* start of current segment */
134 const void* virtualStart; /* virtual start of previous segment if it was just before current one */
120 135 const void* dictEnd; /* end of previous segment */
121 136 size_t expected;
122 137 ZSTD_frameHeader fParams;
@@ -127,7 +142,6 b' struct ZSTD_DCtx_s'
127 142 U32 fseEntropy;
128 143 XXH64_state_t xxhState;
129 144 size_t headerSize;
130 U32 dictID;
131 145 ZSTD_format_e format;
132 146 const BYTE* litPtr;
133 147 ZSTD_customMem customMem;
@@ -136,9 +150,13 b' struct ZSTD_DCtx_s'
136 150 size_t staticSize;
137 151 int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
138 152
139 /* streaming */
153 /* dictionary */
140 154 ZSTD_DDict* ddictLocal;
141 const ZSTD_DDict* ddict;
155 const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
156 U32 dictID;
157 int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
158
159 /* streaming */
142 160 ZSTD_dStreamStage streamStage;
143 161 char* inBuff;
144 162 size_t inBuffSize;
@@ -153,6 +171,7 b' struct ZSTD_DCtx_s'
153 171 U32 previousLegacyVersion;
154 172 U32 legacyVersion;
155 173 U32 hostageByte;
174 int noForwardProgress;
156 175
157 176 /* workspace */
158 177 BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
@@ -173,7 +192,7 b' size_t ZSTD_estimateDCtxSize(void) { ret'
173 192 static size_t ZSTD_startingInputLength(ZSTD_format_e format)
174 193 {
175 194 size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ?
176 ZSTD_frameHeaderSize_prefix - ZSTD_frameIdSize :
195 ZSTD_frameHeaderSize_prefix - ZSTD_FRAMEIDSIZE :
177 196 ZSTD_frameHeaderSize_prefix;
178 197 ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE);
179 198 /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
@@ -188,10 +207,15 b' static void ZSTD_initDCtx_internal(ZSTD_'
188 207 dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
189 208 dctx->ddict = NULL;
190 209 dctx->ddictLocal = NULL;
210 dctx->dictEnd = NULL;
211 dctx->ddictIsCold = 0;
191 212 dctx->inBuff = NULL;
192 213 dctx->inBuffSize = 0;
193 214 dctx->outBuffSize = 0;
194 215 dctx->streamStage = zdss_init;
216 dctx->legacyContext = NULL;
217 dctx->previousLegacyVersion = 0;
218 dctx->noForwardProgress = 0;
195 219 dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
196 220 }
197 221
@@ -215,8 +239,6 b' ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD'
215 239 { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(*dctx), customMem);
216 240 if (!dctx) return NULL;
217 241 dctx->customMem = customMem;
218 dctx->legacyContext = NULL;
219 dctx->previousLegacyVersion = 0;
220 242 ZSTD_initDCtx_internal(dctx);
221 243 return dctx;
222 244 }
@@ -265,7 +287,7 b' void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, c'
265 287 * Note 3 : Skippable Frame Identifiers are considered valid. */
266 288 unsigned ZSTD_isFrame(const void* buffer, size_t size)
267 289 {
268 if (size < ZSTD_frameIdSize) return 0;
290 if (size < ZSTD_FRAMEIDSIZE) return 0;
269 291 { U32 const magic = MEM_readLE32(buffer);
270 292 if (magic == ZSTD_MAGICNUMBER) return 1;
271 293 if ((magic & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
@@ -298,25 +320,28 b' static size_t ZSTD_frameHeaderSize_inter'
298 320
299 321 /** ZSTD_frameHeaderSize() :
300 322 * srcSize must be >= ZSTD_frameHeaderSize_prefix.
301 * @return : size of the Frame Header */
323 * @return : size of the Frame Header,
324 * or an error code (if srcSize is too small) */
302 325 size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
303 326 {
304 327 return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1);
305 328 }
306 329
307 330
308 /** ZSTD_getFrameHeader_internal() :
331 /** ZSTD_getFrameHeader_advanced() :
309 332 * decode Frame Header, or require larger `srcSize`.
310 333 * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
311 334 * @return : 0, `zfhPtr` is correctly filled,
312 335 * >0, `srcSize` is too small, value is wanted `srcSize` amount,
313 336 * or an error code, which can be tested using ZSTD_isError() */
314 static size_t ZSTD_getFrameHeader_internal(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
337 size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
315 338 {
316 339 const BYTE* ip = (const BYTE*)src;
317 340 size_t const minInputSize = ZSTD_startingInputLength(format);
318 341
342 memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
319 343 if (srcSize < minInputSize) return minInputSize;
344 if (src==NULL) return ERROR(GENERIC); /* invalid parameter */
320 345
321 346 if ( (format != ZSTD_f_zstd1_magicless)
322 347 && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
@@ -325,7 +350,7 b' static size_t ZSTD_getFrameHeader_intern'
325 350 if (srcSize < ZSTD_skippableHeaderSize)
326 351 return ZSTD_skippableHeaderSize; /* magic number + frame length */
327 352 memset(zfhPtr, 0, sizeof(*zfhPtr));
328 zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_frameIdSize);
353 zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE);
329 354 zfhPtr->frameType = ZSTD_skippableFrame;
330 355 return 0;
331 356 }
@@ -394,7 +419,7 b' static size_t ZSTD_getFrameHeader_intern'
394 419 * or an error code, which can be tested using ZSTD_isError() */
395 420 size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize)
396 421 {
397 return ZSTD_getFrameHeader_internal(zfhPtr, src, srcSize, ZSTD_f_zstd1);
422 return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1);
398 423 }
399 424
400 425
@@ -437,7 +462,7 b' unsigned long long ZSTD_findDecompressed'
437 462 size_t skippableSize;
438 463 if (srcSize < ZSTD_skippableHeaderSize)
439 464 return ERROR(srcSize_wrong);
440 skippableSize = MEM_readLE32((const BYTE *)src + ZSTD_frameIdSize)
465 skippableSize = MEM_readLE32((const BYTE *)src + ZSTD_FRAMEIDSIZE)
441 466 + ZSTD_skippableHeaderSize;
442 467 if (srcSize < skippableSize) {
443 468 return ZSTD_CONTENTSIZE_ERROR;
@@ -491,7 +516,7 b' unsigned long long ZSTD_getDecompressedS'
491 516 * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
492 517 static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
493 518 {
494 size_t const result = ZSTD_getFrameHeader_internal(&(dctx->fParams), src, headerSize, dctx->format);
519 size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
495 520 if (ZSTD_isError(result)) return result; /* invalid header */
496 521 if (result>0) return ERROR(srcSize_wrong); /* headerSize too small */
497 522 if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID))
@@ -526,6 +551,7 b' size_t ZSTD_getcBlockSize(const void* sr'
526 551 static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
527 552 const void* src, size_t srcSize)
528 553 {
554 if (dst==NULL) return ERROR(dstSize_tooSmall);
529 555 if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall);
530 556 memcpy(dst, src, srcSize);
531 557 return srcSize;
@@ -542,6 +568,9 b' static size_t ZSTD_setRleBlock(void* dst'
542 568 return regenSize;
543 569 }
544 570
571 /* Hidden declaration for fullbench */
572 size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
573 const void* src, size_t srcSize);
545 574 /*! ZSTD_decodeLiteralsBlock() :
546 575 * @return : nb of bytes read from src (< srcSize )
547 576 * note : symbol not declared but exposed for fullbench */
@@ -558,6 +587,7 b' size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt'
558 587 case set_repeat:
559 588 if (dctx->litEntropy==0) return ERROR(dictionary_corrupted);
560 589 /* fall-through */
590
561 591 case set_compressed:
562 592 if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
563 593 { size_t lhSize, litSize, litCSize;
@@ -589,15 +619,20 b' size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt'
589 619 if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
590 620 if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
591 621
622 /* prefetch huffman table if cold */
623 if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
624 PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
625 }
626
592 627 if (HUF_isError((litEncType==set_repeat) ?
593 628 ( singleStream ?
594 629 HUF_decompress1X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) :
595 630 HUF_decompress4X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) ) :
596 631 ( singleStream ?
597 HUF_decompress1X2_DCtx_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
598 dctx->entropy.workspace, sizeof(dctx->entropy.workspace), dctx->bmi2) :
632 HUF_decompress1X1_DCtx_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
633 dctx->workspace, sizeof(dctx->workspace), dctx->bmi2) :
599 634 HUF_decompress4X_hufOnly_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
600 dctx->entropy.workspace, sizeof(dctx->entropy.workspace), dctx->bmi2))))
635 dctx->workspace, sizeof(dctx->workspace), dctx->bmi2))))
601 636 return ERROR(corruption_detected);
602 637
603 638 dctx->litPtr = dctx->litBuffer;
@@ -869,7 +904,8 b' static size_t ZSTD_buildSeqTable(ZSTD_se'
869 904 symbolEncodingType_e type, U32 max, U32 maxLog,
870 905 const void* src, size_t srcSize,
871 906 const U32* baseValue, const U32* nbAdditionalBits,
872 const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable)
907 const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
908 int ddictIsCold, int nbSeq)
873 909 {
874 910 switch(type)
875 911 {
@@ -888,6 +924,12 b' static size_t ZSTD_buildSeqTable(ZSTD_se'
888 924 return 0;
889 925 case set_repeat:
890 926 if (!flagRepeatTable) return ERROR(corruption_detected);
927 /* prefetch FSE table if used */
928 if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
929 const void* const pStart = *DTablePtr;
930 size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
931 PREFETCH_AREA(pStart, pSize);
932 }
891 933 return 0;
892 934 case set_compressed :
893 935 { U32 tableLog;
@@ -933,6 +975,9 b' static const U32 ML_base[MaxML+1] = {'
933 975 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
934 976 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
935 977
978 /* Hidden delcaration for fullbench */
979 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
980 const void* src, size_t srcSize);
936 981
937 982 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
938 983 const void* src, size_t srcSize)
@@ -940,25 +985,25 b' size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* '
940 985 const BYTE* const istart = (const BYTE* const)src;
941 986 const BYTE* const iend = istart + srcSize;
942 987 const BYTE* ip = istart;
988 int nbSeq;
943 989 DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
944 990
945 991 /* check */
946 992 if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
947 993
948 994 /* SeqHead */
949 { int nbSeq = *ip++;
950 if (!nbSeq) { *nbSeqPtr=0; return 1; }
951 if (nbSeq > 0x7F) {
952 if (nbSeq == 0xFF) {
953 if (ip+2 > iend) return ERROR(srcSize_wrong);
954 nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
955 } else {
956 if (ip >= iend) return ERROR(srcSize_wrong);
957 nbSeq = ((nbSeq-0x80)<<8) + *ip++;
958 }
995 nbSeq = *ip++;
996 if (!nbSeq) { *nbSeqPtr=0; return 1; }
997 if (nbSeq > 0x7F) {
998 if (nbSeq == 0xFF) {
999 if (ip+2 > iend) return ERROR(srcSize_wrong);
1000 nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
1001 } else {
1002 if (ip >= iend) return ERROR(srcSize_wrong);
1003 nbSeq = ((nbSeq-0x80)<<8) + *ip++;
959 1004 }
960 *nbSeqPtr = nbSeq;
961 1005 }
1006 *nbSeqPtr = nbSeq;
962 1007
963 1008 /* FSE table descriptors */
964 1009 if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */
@@ -972,7 +1017,8 b' size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* '
972 1017 LLtype, MaxLL, LLFSELog,
973 1018 ip, iend-ip,
974 1019 LL_base, LL_bits,
975 LL_defaultDTable, dctx->fseEntropy);
1020 LL_defaultDTable, dctx->fseEntropy,
1021 dctx->ddictIsCold, nbSeq);
976 1022 if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
977 1023 ip += llhSize;
978 1024 }
@@ -981,7 +1027,8 b' size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* '
981 1027 OFtype, MaxOff, OffFSELog,
982 1028 ip, iend-ip,
983 1029 OF_base, OF_bits,
984 OF_defaultDTable, dctx->fseEntropy);
1030 OF_defaultDTable, dctx->fseEntropy,
1031 dctx->ddictIsCold, nbSeq);
985 1032 if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
986 1033 ip += ofhSize;
987 1034 }
@@ -990,12 +1037,23 b' size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* '
990 1037 MLtype, MaxML, MLFSELog,
991 1038 ip, iend-ip,
992 1039 ML_base, ML_bits,
993 ML_defaultDTable, dctx->fseEntropy);
1040 ML_defaultDTable, dctx->fseEntropy,
1041 dctx->ddictIsCold, nbSeq);
994 1042 if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
995 1043 ip += mlhSize;
996 1044 }
997 1045 }
998 1046
1047 /* prefetch dictionary content */
1048 if (dctx->ddictIsCold) {
1049 size_t const dictSize = (const char*)dctx->prefixStart - (const char*)dctx->virtualStart;
1050 size_t const psmin = MIN(dictSize, (size_t)(64*nbSeq) /* heuristic */ );
1051 size_t const pSize = MIN(psmin, 128 KB /* protection */ );
1052 const void* const pStart = (const char*)dctx->dictEnd - pSize;
1053 PREFETCH_AREA(pStart, pSize);
1054 dctx->ddictIsCold = 0;
1055 }
1056
999 1057 return ip-istart;
1000 1058 }
1001 1059
@@ -1075,7 +1133,7 b' HINT_INLINE'
1075 1133 size_t ZSTD_execSequence(BYTE* op,
1076 1134 BYTE* const oend, seq_t sequence,
1077 1135 const BYTE** litPtr, const BYTE* const litLimit,
1078 const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
1136 const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
1079 1137 {
1080 1138 BYTE* const oLitEnd = op + sequence.litLength;
1081 1139 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
@@ -1087,7 +1145,7 b' size_t ZSTD_execSequence(BYTE* op,'
1087 1145 /* check */
1088 1146 if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
1089 1147 if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
1090 if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd);
1148 if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
1091 1149
1092 1150 /* copy Literals */
1093 1151 ZSTD_copy8(op, *litPtr);
@@ -1097,11 +1155,11 b' size_t ZSTD_execSequence(BYTE* op,'
1097 1155 *litPtr = iLitEnd; /* update for next sequence */
1098 1156
1099 1157 /* copy Match */
1100 if (sequence.offset > (size_t)(oLitEnd - base)) {
1158 if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1101 1159 /* offset beyond prefix -> go into extDict */
1102 if (sequence.offset > (size_t)(oLitEnd - vBase))
1160 if (sequence.offset > (size_t)(oLitEnd - virtualStart))
1103 1161 return ERROR(corruption_detected);
1104 match = dictEnd + (match - base);
1162 match = dictEnd + (match - prefixStart);
1105 1163 if (match + sequence.matchLength <= dictEnd) {
1106 1164 memmove(oLitEnd, match, sequence.matchLength);
1107 1165 return sequenceLength;
@@ -1111,7 +1169,7 b' size_t ZSTD_execSequence(BYTE* op,'
1111 1169 memmove(oLitEnd, match, length1);
1112 1170 op = oLitEnd + length1;
1113 1171 sequence.matchLength -= length1;
1114 match = base;
1172 match = prefixStart;
1115 1173 if (op > oend_w || sequence.matchLength < MINMATCH) {
1116 1174 U32 i;
1117 1175 for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
@@ -1354,10 +1412,10 b' ZSTD_decompressSequences_body( ZSTD_DCtx'
1354 1412 BYTE* op = ostart;
1355 1413 const BYTE* litPtr = dctx->litPtr;
1356 1414 const BYTE* const litEnd = litPtr + dctx->litSize;
1357 const BYTE* const base = (const BYTE*) (dctx->base);
1358 const BYTE* const vBase = (const BYTE*) (dctx->vBase);
1415 const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1416 const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
1359 1417 const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1360 DEBUGLOG(5, "ZSTD_decompressSequences");
1418 DEBUGLOG(5, "ZSTD_decompressSequences_body");
1361 1419
1362 1420 /* Regen sequences */
1363 1421 if (nbSeq) {
@@ -1372,14 +1430,14 b' ZSTD_decompressSequences_body( ZSTD_DCtx'
1372 1430 for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
1373 1431 nbSeq--;
1374 1432 { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1375 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
1433 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
1376 1434 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1377 1435 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1378 1436 op += oneSeqSize;
1379 1437 } }
1380 1438
1381 1439 /* check if reached exact end */
1382 DEBUGLOG(5, "ZSTD_decompressSequences: after decode loop, remaining nbSeq : %i", nbSeq);
1440 DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
1383 1441 if (nbSeq) return ERROR(corruption_detected);
1384 1442 /* save reps for next block */
1385 1443 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
@@ -1498,8 +1556,8 b' ZSTD_decompressSequencesLong_body('
1498 1556 BYTE* op = ostart;
1499 1557 const BYTE* litPtr = dctx->litPtr;
1500 1558 const BYTE* const litEnd = litPtr + dctx->litSize;
1501 const BYTE* const prefixStart = (const BYTE*) (dctx->base);
1502 const BYTE* const dictStart = (const BYTE*) (dctx->vBase);
1559 const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1560 const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
1503 1561 const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1504 1562
1505 1563 /* Regen sequences */
@@ -1662,7 +1720,8 b' static size_t ZSTD_decompressBlock_inter'
1662 1720 /* isLongOffset must be true if there are long offsets.
1663 1721 * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
1664 1722 * We don't expect that to be the case in 64-bit mode.
1665 * In block mode, window size is not known, so we have to be conservative. (note: but it could be evaluated from current-lowLimit)
1723 * In block mode, window size is not known, so we have to be conservative.
1724 * (note: but it could be evaluated from current-lowLimit)
1666 1725 */
1667 1726 ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)));
1668 1727 DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
@@ -1701,8 +1760,8 b' static void ZSTD_checkContinuity(ZSTD_DC'
1701 1760 {
1702 1761 if (dst != dctx->previousDstEnd) { /* not contiguous */
1703 1762 dctx->dictEnd = dctx->previousDstEnd;
1704 dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
1705 dctx->base = dst;
1763 dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
1764 dctx->prefixStart = dst;
1706 1765 dctx->previousDstEnd = dst;
1707 1766 }
1708 1767 }
@@ -1729,10 +1788,10 b' ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD'
1729 1788 }
1730 1789
1731 1790
1732 static size_t ZSTD_generateNxBytes(void* dst, size_t dstCapacity, BYTE byte, size_t length)
1791 static size_t ZSTD_generateNxBytes(void* dst, size_t dstCapacity, BYTE value, size_t length)
1733 1792 {
1734 1793 if (length > dstCapacity) return ERROR(dstSize_tooSmall);
1735 memset(dst, byte, length);
1794 memset(dst, value, length);
1736 1795 return length;
1737 1796 }
1738 1797
@@ -1749,7 +1808,7 b' size_t ZSTD_findFrameCompressedSize(cons'
1749 1808 #endif
1750 1809 if ( (srcSize >= ZSTD_skippableHeaderSize)
1751 1810 && (MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START ) {
1752 return ZSTD_skippableHeaderSize + MEM_readLE32((const BYTE*)src + ZSTD_frameIdSize);
1811 return ZSTD_skippableHeaderSize + MEM_readLE32((const BYTE*)src + ZSTD_FRAMEIDSIZE);
1753 1812 } else {
1754 1813 const BYTE* ip = (const BYTE*)src;
1755 1814 const BYTE* const ipstart = ip;
@@ -1783,7 +1842,6 b' size_t ZSTD_findFrameCompressedSize(cons'
1783 1842 if (zfh.checksumFlag) { /* Final frame content checksum */
1784 1843 if (remainingSize < 4) return ERROR(srcSize_wrong);
1785 1844 ip += 4;
1786 remainingSize -= 4;
1787 1845 }
1788 1846
1789 1847 return ip - ipstart;
@@ -1871,9 +1929,6 b' static size_t ZSTD_decompressFrame(ZSTD_'
1871 1929 return op-ostart;
1872 1930 }
1873 1931
1874 static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict);
1875 static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict);
1876
1877 1932 static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
1878 1933 void* dst, size_t dstCapacity,
1879 1934 const void* src, size_t srcSize,
@@ -1881,6 +1936,9 b' static size_t ZSTD_decompressMultiFrame('
1881 1936 const ZSTD_DDict* ddict)
1882 1937 {
1883 1938 void* const dststart = dst;
1939 int moreThan1Frame = 0;
1940
1941 DEBUGLOG(5, "ZSTD_decompressMultiFrame");
1884 1942 assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */
1885 1943
1886 1944 if (ddict) {
@@ -1889,7 +1947,6 b' static size_t ZSTD_decompressMultiFrame('
1889 1947 }
1890 1948
1891 1949 while (srcSize >= ZSTD_frameHeaderSize_prefix) {
1892 U32 magicNumber;
1893 1950
1894 1951 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
1895 1952 if (ZSTD_isLegacy(src, srcSize)) {
@@ -1911,24 +1968,21 b' static size_t ZSTD_decompressMultiFrame('
1911 1968 }
1912 1969 #endif
1913 1970
1914 magicNumber = MEM_readLE32(src);
1915 DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
1916 (U32)magicNumber, (U32)ZSTD_MAGICNUMBER);
1917 if (magicNumber != ZSTD_MAGICNUMBER) {
1971 { U32 const magicNumber = MEM_readLE32(src);
1972 DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
1973 (U32)magicNumber, (U32)ZSTD_MAGICNUMBER);
1918 1974 if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
1919 1975 size_t skippableSize;
1920 1976 if (srcSize < ZSTD_skippableHeaderSize)
1921 1977 return ERROR(srcSize_wrong);
1922 skippableSize = MEM_readLE32((const BYTE*)src + ZSTD_frameIdSize)
1978 skippableSize = MEM_readLE32((const BYTE*)src + ZSTD_FRAMEIDSIZE)
1923 1979 + ZSTD_skippableHeaderSize;
1924 1980 if (srcSize < skippableSize) return ERROR(srcSize_wrong);
1925 1981
1926 1982 src = (const BYTE *)src + skippableSize;
1927 1983 srcSize -= skippableSize;
1928 1984 continue;
1929 }
1930 return ERROR(prefix_unknown);
1931 }
1985 } }
1932 1986
1933 1987 if (ddict) {
1934 1988 /* we were called from ZSTD_decompress_usingDDict */
@@ -1942,11 +1996,25 b' static size_t ZSTD_decompressMultiFrame('
1942 1996
1943 1997 { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
1944 1998 &src, &srcSize);
1999 if ( (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
2000 && (moreThan1Frame==1) ) {
2001 /* at least one frame successfully completed,
2002 * but following bytes are garbage :
2003 * it's more likely to be a srcSize error,
2004 * specifying more bytes than compressed size of frame(s).
2005 * This error message replaces ERROR(prefix_unknown),
2006 * which would be confusing, as the first header is actually correct.
2007 * Note that one could be unlucky, it might be a corruption error instead,
2008 * happening right at the place where we expect zstd magic bytes.
2009 * But this is _much_ less likely than a srcSize field error. */
2010 return ERROR(srcSize_wrong);
2011 }
1945 2012 if (ZSTD_isError(res)) return res;
1946 2013 /* no need to bound check, ZSTD_decompressFrame already has */
1947 2014 dst = (BYTE*)dst + res;
1948 2015 dstCapacity -= res;
1949 2016 }
2017 moreThan1Frame = 1;
1950 2018 } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
1951 2019
1952 2020 if (srcSize) return ERROR(srcSize_wrong); /* input not entirely consumed */
@@ -1980,6 +2048,7 b' size_t ZSTD_decompress(void* dst, size_t'
1980 2048 return regenSize;
1981 2049 #else /* stack mode */
1982 2050 ZSTD_DCtx dctx;
2051 ZSTD_initDCtx_internal(&dctx);
1983 2052 return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize);
1984 2053 #endif
1985 2054 }
@@ -2031,7 +2100,7 b' size_t ZSTD_decompressContinue(ZSTD_DCtx'
2031 2100 case ZSTDds_getFrameHeaderSize :
2032 2101 assert(src != NULL);
2033 2102 if (dctx->format == ZSTD_f_zstd1) { /* allows header */
2034 assert(srcSize >= ZSTD_frameIdSize); /* to read skippable magic number */
2103 assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */
2035 2104 if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
2036 2105 memcpy(dctx->headerBuffer, src, srcSize);
2037 2106 dctx->expected = ZSTD_skippableHeaderSize - srcSize; /* remaining to load to get full skippable frame header */
@@ -2141,7 +2210,7 b' size_t ZSTD_decompressContinue(ZSTD_DCtx'
2141 2210 assert(src != NULL);
2142 2211 assert(srcSize <= ZSTD_skippableHeaderSize);
2143 2212 memcpy(dctx->headerBuffer + (ZSTD_skippableHeaderSize - srcSize), src, srcSize); /* complete skippable header */
2144 dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_frameIdSize); /* note : dctx->expected can grow seriously large, beyond local buffer size */
2213 dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */
2145 2214 dctx->stage = ZSTDds_skipFrame;
2146 2215 return 0;
2147 2216
@@ -2159,27 +2228,33 b' size_t ZSTD_decompressContinue(ZSTD_DCtx'
2159 2228 static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
2160 2229 {
2161 2230 dctx->dictEnd = dctx->previousDstEnd;
2162 dctx->vBase = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
2163 dctx->base = dict;
2231 dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
2232 dctx->prefixStart = dict;
2164 2233 dctx->previousDstEnd = (const char*)dict + dictSize;
2165 2234 return 0;
2166 2235 }
2167 2236
2168 /* ZSTD_loadEntropy() :
2169 * dict : must point at beginning of a valid zstd dictionary
2237 /*! ZSTD_loadEntropy() :
2238 * dict : must point at beginning of a valid zstd dictionary.
2170 2239 * @return : size of entropy tables read */
2171 static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy, const void* const dict, size_t const dictSize)
2240 static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy,
2241 const void* const dict, size_t const dictSize)
2172 2242 {
2173 2243 const BYTE* dictPtr = (const BYTE*)dict;
2174 2244 const BYTE* const dictEnd = dictPtr + dictSize;
2175 2245
2176 2246 if (dictSize <= 8) return ERROR(dictionary_corrupted);
2247 assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */
2177 2248 dictPtr += 8; /* skip header = magic + dictID */
2178 2249
2179
2180 { size_t const hSize = HUF_readDTableX4_wksp(
2181 entropy->hufTable, dictPtr, dictEnd - dictPtr,
2182 entropy->workspace, sizeof(entropy->workspace));
2250 ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable));
2251 ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable));
2252 ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE);
2253 { void* const workspace = &entropy->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */
2254 size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable);
2255 size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
2256 dictPtr, dictEnd - dictPtr,
2257 workspace, workspaceSize);
2183 2258 if (HUF_isError(hSize)) return ERROR(dictionary_corrupted);
2184 2259 dictPtr += hSize;
2185 2260 }
@@ -2190,7 +2265,7 b' static size_t ZSTD_loadEntropy(ZSTD_entr'
2190 2265 if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
2191 2266 if (offcodeMaxValue > MaxOff) return ERROR(dictionary_corrupted);
2192 2267 if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
2193 ZSTD_buildFSETable(entropy->OFTable,
2268 ZSTD_buildFSETable( entropy->OFTable,
2194 2269 offcodeNCount, offcodeMaxValue,
2195 2270 OF_base, OF_bits,
2196 2271 offcodeLog);
@@ -2203,7 +2278,7 b' static size_t ZSTD_loadEntropy(ZSTD_entr'
2203 2278 if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
2204 2279 if (matchlengthMaxValue > MaxML) return ERROR(dictionary_corrupted);
2205 2280 if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
2206 ZSTD_buildFSETable(entropy->MLTable,
2281 ZSTD_buildFSETable( entropy->MLTable,
2207 2282 matchlengthNCount, matchlengthMaxValue,
2208 2283 ML_base, ML_bits,
2209 2284 matchlengthLog);
@@ -2216,7 +2291,7 b' static size_t ZSTD_loadEntropy(ZSTD_entr'
2216 2291 if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
2217 2292 if (litlengthMaxValue > MaxLL) return ERROR(dictionary_corrupted);
2218 2293 if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
2219 ZSTD_buildFSETable(entropy->LLTable,
2294 ZSTD_buildFSETable( entropy->LLTable,
2220 2295 litlengthNCount, litlengthMaxValue,
2221 2296 LL_base, LL_bits,
2222 2297 litlengthLog);
@@ -2242,7 +2317,7 b' static size_t ZSTD_decompress_insertDict'
2242 2317 if (magic != ZSTD_MAGIC_DICTIONARY) {
2243 2318 return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */
2244 2319 } }
2245 dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_frameIdSize);
2320 dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
2246 2321
2247 2322 /* load entropy tables */
2248 2323 { size_t const eSize = ZSTD_loadEntropy(&dctx->entropy, dict, dictSize);
@@ -2256,7 +2331,6 b' static size_t ZSTD_decompress_insertDict'
2256 2331 return ZSTD_refDictContent(dctx, dict, dictSize);
2257 2332 }
2258 2333
2259 /* Note : this function cannot fail */
2260 2334 size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
2261 2335 {
2262 2336 assert(dctx != NULL);
@@ -2264,8 +2338,8 b' size_t ZSTD_decompressBegin(ZSTD_DCtx* d'
2264 2338 dctx->stage = ZSTDds_getFrameHeaderSize;
2265 2339 dctx->decodedSize = 0;
2266 2340 dctx->previousDstEnd = NULL;
2267 dctx->base = NULL;
2268 dctx->vBase = NULL;
2341 dctx->prefixStart = NULL;
2342 dctx->virtualStart = NULL;
2269 2343 dctx->dictEnd = NULL;
2270 2344 dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
2271 2345 dctx->litEntropy = dctx->fseEntropy = 0;
@@ -2302,42 +2376,53 b' struct ZSTD_DDict_s {'
2302 2376
2303 2377 static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict)
2304 2378 {
2379 assert(ddict != NULL);
2305 2380 return ddict->dictContent;
2306 2381 }
2307 2382
2308 2383 static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict)
2309 2384 {
2385 assert(ddict != NULL);
2310 2386 return ddict->dictSize;
2311 2387 }
2312 2388
2313 size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dstDCtx, const ZSTD_DDict* ddict)
2389 size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
2314 2390 {
2315 CHECK_F( ZSTD_decompressBegin(dstDCtx) );
2316 if (ddict) { /* support begin on NULL */
2317 dstDCtx->dictID = ddict->dictID;
2318 dstDCtx->base = ddict->dictContent;
2319 dstDCtx->vBase = ddict->dictContent;
2320 dstDCtx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
2321 dstDCtx->previousDstEnd = dstDCtx->dictEnd;
2391 DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict");
2392 assert(dctx != NULL);
2393 if (ddict) {
2394 dctx->ddictIsCold = (dctx->dictEnd != (const char*)ddict->dictContent + ddict->dictSize);
2395 DEBUGLOG(4, "DDict is %s",
2396 dctx->ddictIsCold ? "~cold~" : "hot!");
2397 }
2398 CHECK_F( ZSTD_decompressBegin(dctx) );
2399 if (ddict) { /* NULL ddict is equivalent to no dictionary */
2400 dctx->dictID = ddict->dictID;
2401 dctx->prefixStart = ddict->dictContent;
2402 dctx->virtualStart = ddict->dictContent;
2403 dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
2404 dctx->previousDstEnd = dctx->dictEnd;
2322 2405 if (ddict->entropyPresent) {
2323 dstDCtx->litEntropy = 1;
2324 dstDCtx->fseEntropy = 1;
2325 dstDCtx->LLTptr = ddict->entropy.LLTable;
2326 dstDCtx->MLTptr = ddict->entropy.MLTable;
2327 dstDCtx->OFTptr = ddict->entropy.OFTable;
2328 dstDCtx->HUFptr = ddict->entropy.hufTable;
2329 dstDCtx->entropy.rep[0] = ddict->entropy.rep[0];
2330 dstDCtx->entropy.rep[1] = ddict->entropy.rep[1];
2331 dstDCtx->entropy.rep[2] = ddict->entropy.rep[2];
2406 dctx->litEntropy = 1;
2407 dctx->fseEntropy = 1;
2408 dctx->LLTptr = ddict->entropy.LLTable;
2409 dctx->MLTptr = ddict->entropy.MLTable;
2410 dctx->OFTptr = ddict->entropy.OFTable;
2411 dctx->HUFptr = ddict->entropy.hufTable;
2412 dctx->entropy.rep[0] = ddict->entropy.rep[0];
2413 dctx->entropy.rep[1] = ddict->entropy.rep[1];
2414 dctx->entropy.rep[2] = ddict->entropy.rep[2];
2332 2415 } else {
2333 dstDCtx->litEntropy = 0;
2334 dstDCtx->fseEntropy = 0;
2416 dctx->litEntropy = 0;
2417 dctx->fseEntropy = 0;
2335 2418 }
2336 2419 }
2337 2420 return 0;
2338 2421 }
2339 2422
2340 static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict, ZSTD_dictContentType_e dictContentType)
2423 static size_t
2424 ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict,
2425 ZSTD_dictContentType_e dictContentType)
2341 2426 {
2342 2427 ddict->dictID = 0;
2343 2428 ddict->entropyPresent = 0;
@@ -2355,10 +2440,12 b' static size_t ZSTD_loadEntropy_inDDict(Z'
2355 2440 return 0; /* pure content mode */
2356 2441 }
2357 2442 }
2358 ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_frameIdSize);
2443 ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
2359 2444
2360 2445 /* load entropy tables */
2361 CHECK_E( ZSTD_loadEntropy(&ddict->entropy, ddict->dictContent, ddict->dictSize), dictionary_corrupted );
2446 CHECK_E( ZSTD_loadEntropy(&ddict->entropy,
2447 ddict->dictContent, ddict->dictSize),
2448 dictionary_corrupted );
2362 2449 ddict->entropyPresent = 1;
2363 2450 return 0;
2364 2451 }
@@ -2372,6 +2459,7 b' static size_t ZSTD_initDDict_internal(ZS'
2372 2459 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
2373 2460 ddict->dictBuffer = NULL;
2374 2461 ddict->dictContent = dict;
2462 if (!dict) dictSize = 0;
2375 2463 } else {
2376 2464 void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
2377 2465 ddict->dictBuffer = internalBuffer;
@@ -2396,14 +2484,15 b' ZSTD_DDict* ZSTD_createDDict_advanced(co'
2396 2484 if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
2397 2485
2398 2486 { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
2399 if (!ddict) return NULL;
2487 if (ddict == NULL) return NULL;
2400 2488 ddict->cMem = customMem;
2401
2402 if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, dictLoadMethod, dictContentType) )) {
2403 ZSTD_freeDDict(ddict);
2404 return NULL;
2405 }
2406
2489 { size_t const initResult = ZSTD_initDDict_internal(ddict,
2490 dict, dictSize,
2491 dictLoadMethod, dictContentType);
2492 if (ZSTD_isError(initResult)) {
2493 ZSTD_freeDDict(ddict);
2494 return NULL;
2495 } }
2407 2496 return ddict;
2408 2497 }
2409 2498 }
@@ -2430,23 +2519,25 b' ZSTD_DDict* ZSTD_createDDict_byReference'
2430 2519
2431 2520
2432 2521 const ZSTD_DDict* ZSTD_initStaticDDict(
2433 void* workspace, size_t workspaceSize,
2522 void* sBuffer, size_t sBufferSize,
2434 2523 const void* dict, size_t dictSize,
2435 2524 ZSTD_dictLoadMethod_e dictLoadMethod,
2436 2525 ZSTD_dictContentType_e dictContentType)
2437 2526 {
2438 size_t const neededSpace =
2439 sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
2440 ZSTD_DDict* const ddict = (ZSTD_DDict*)workspace;
2441 assert(workspace != NULL);
2527 size_t const neededSpace = sizeof(ZSTD_DDict)
2528 + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
2529 ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
2530 assert(sBuffer != NULL);
2442 2531 assert(dict != NULL);
2443 if ((size_t)workspace & 7) return NULL; /* 8-aligned */
2444 if (workspaceSize < neededSpace) return NULL;
2532 if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */
2533 if (sBufferSize < neededSpace) return NULL;
2445 2534 if (dictLoadMethod == ZSTD_dlm_byCopy) {
2446 2535 memcpy(ddict+1, dict, dictSize); /* local copy */
2447 2536 dict = ddict+1;
2448 2537 }
2449 if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, ZSTD_dlm_byRef, dictContentType) ))
2538 if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
2539 dict, dictSize,
2540 ZSTD_dlm_byRef, dictContentType) ))
2450 2541 return NULL;
2451 2542 return ddict;
2452 2543 }
@@ -2484,7 +2575,7 b' unsigned ZSTD_getDictID_fromDict(const v'
2484 2575 {
2485 2576 if (dictSize < 8) return 0;
2486 2577 if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0;
2487 return MEM_readLE32((const char*)dict + ZSTD_frameIdSize);
2578 return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
2488 2579 }
2489 2580
2490 2581 /*! ZSTD_getDictID_fromDDict() :
@@ -2560,12 +2651,15 b' size_t ZSTD_freeDStream(ZSTD_DStream* zd'
2560 2651 }
2561 2652
2562 2653
2563 /* *** Initialization *** */
2654 /* *** Initialization *** */
2564 2655
2565 2656 size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; }
2566 2657 size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
2567 2658
2568 size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
2659 size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
2660 const void* dict, size_t dictSize,
2661 ZSTD_dictLoadMethod_e dictLoadMethod,
2662 ZSTD_dictContentType_e dictContentType)
2569 2663 {
2570 2664 if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
2571 2665 ZSTD_freeDDict(dctx->ddictLocal);
@@ -2607,6 +2701,7 b' size_t ZSTD_initDStream_usingDict(ZSTD_D'
2607 2701 {
2608 2702 DEBUGLOG(4, "ZSTD_initDStream_usingDict");
2609 2703 zds->streamStage = zdss_init;
2704 zds->noForwardProgress = 0;
2610 2705 CHECK_F( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
2611 2706 return ZSTD_frameHeaderSize_prefix;
2612 2707 }
@@ -2618,13 +2713,6 b' size_t ZSTD_initDStream(ZSTD_DStream* zd'
2618 2713 return ZSTD_initDStream_usingDict(zds, NULL, 0);
2619 2714 }
2620 2715
2621 size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
2622 {
2623 if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
2624 dctx->ddict = ddict;
2625 return 0;
2626 }
2627
2628 2716 /* ZSTD_initDStream_usingDDict() :
2629 2717 * ddict will just be referenced, and must outlive decompression session
2630 2718 * this function cannot fail */
@@ -2663,6 +2751,13 b' size_t ZSTD_setDStreamParameter(ZSTD_DSt'
2663 2751 return 0;
2664 2752 }
2665 2753
2754 size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
2755 {
2756 if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
2757 dctx->ddict = ddict;
2758 return 0;
2759 }
2760
2666 2761 size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
2667 2762 {
2668 2763 if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
@@ -2767,7 +2862,7 b' size_t ZSTD_decompressStream(ZSTD_DStrea'
2767 2862 return hint;
2768 2863 } }
2769 2864 #endif
2770 { size_t const hSize = ZSTD_getFrameHeader_internal(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format);
2865 { size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format);
2771 2866 DEBUGLOG(5, "header size : %u", (U32)hSize);
2772 2867 if (ZSTD_isError(hSize)) {
2773 2868 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
@@ -2828,7 +2923,7 b' size_t ZSTD_decompressStream(ZSTD_DStrea'
2828 2923 CHECK_F(ZSTD_decompressBegin_usingDDict(zds, zds->ddict));
2829 2924
2830 2925 if ((MEM_readLE32(zds->headerBuffer) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
2831 zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_frameIdSize);
2926 zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
2832 2927 zds->stage = ZSTDds_skipFrame;
2833 2928 } else {
2834 2929 CHECK_F(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize));
@@ -2947,8 +3042,18 b' size_t ZSTD_decompressStream(ZSTD_DStrea'
2947 3042 } }
2948 3043
2949 3044 /* result */
2950 input->pos += (size_t)(ip-istart);
2951 output->pos += (size_t)(op-ostart);
3045 input->pos = (size_t)(ip - (const char*)(input->src));
3046 output->pos = (size_t)(op - (char*)(output->dst));
3047 if ((ip==istart) && (op==ostart)) { /* no forward progress */
3048 zds->noForwardProgress ++;
3049 if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
3050 if (op==oend) return ERROR(dstSize_tooSmall);
3051 if (ip==iend) return ERROR(srcSize_wrong);
3052 assert(0);
3053 }
3054 } else {
3055 zds->noForwardProgress = 0;
3056 }
2952 3057 { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds);
2953 3058 if (!nextSrcSizeHint) { /* frame fully decoded */
2954 3059 if (zds->outEnd == zds->outStart) { /* output fully flushed */
@@ -29,6 +29,7 b''
29 29 #include "mem.h" /* read */
30 30 #include "pool.h"
31 31 #include "threading.h"
32 #include "cover.h"
32 33 #include "zstd_internal.h" /* includes zstd.h */
33 34 #ifndef ZDICT_STATIC_LINKING_ONLY
34 35 #define ZDICT_STATIC_LINKING_ONLY
@@ -39,6 +40,7 b''
39 40 * Constants
40 41 ***************************************/
41 42 #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
43 #define DEFAULT_SPLITPOINT 1.0
42 44
43 45 /*-*************************************
44 46 * Console display
@@ -184,7 +186,7 b' static void COVER_map_remove(COVER_map_t'
184 186 }
185 187
186 188 /**
187 * Destroyes a map that is inited with COVER_map_init().
189 * Destroys a map that is inited with COVER_map_init().
188 190 */
189 191 static void COVER_map_destroy(COVER_map_t *map) {
190 192 if (map->data) {
@@ -203,6 +205,8 b' typedef struct {'
203 205 size_t *offsets;
204 206 const size_t *samplesSizes;
205 207 size_t nbSamples;
208 size_t nbTrainSamples;
209 size_t nbTestSamples;
206 210 U32 *suffix;
207 211 size_t suffixSize;
208 212 U32 *freqs;
@@ -220,9 +224,9 b' static COVER_ctx_t *g_ctx = NULL;'
220 224 /**
221 225 * Returns the sum of the sample sizes.
222 226 */
223 static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
227 size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
224 228 size_t sum = 0;
225 size_t i;
229 unsigned i;
226 230 for (i = 0; i < nbSamples; ++i) {
227 231 sum += samplesSizes[i];
228 232 }
@@ -377,14 +381,6 b' static void COVER_group(COVER_ctx_t *ctx'
377 381 ctx->suffix[dmerId] = freq;
378 382 }
379 383
380 /**
381 * A segment is a range in the source as well as the score of the segment.
382 */
383 typedef struct {
384 U32 begin;
385 U32 end;
386 U32 score;
387 } COVER_segment_t;
388 384
389 385 /**
390 386 * Selects the best segment in an epoch.
@@ -494,6 +490,10 b' static int COVER_checkParameters(ZDICT_c'
494 490 if (parameters.d > parameters.k) {
495 491 return 0;
496 492 }
493 /* 0 < splitPoint <= 1 */
494 if (parameters.splitPoint <= 0 || parameters.splitPoint > 1){
495 return 0;
496 }
497 497 return 1;
498 498 }
499 499
@@ -531,9 +531,14 b' static void COVER_ctx_destroy(COVER_ctx_'
531 531 */
532 532 static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
533 533 const size_t *samplesSizes, unsigned nbSamples,
534 unsigned d) {
534 unsigned d, double splitPoint) {
535 535 const BYTE *const samples = (const BYTE *)samplesBuffer;
536 536 const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
537 /* Split samples into testing and training sets */
538 const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples;
539 const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
540 const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
541 const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
537 542 /* Checks */
538 543 if (totalSamplesSize < MAX(d, sizeof(U64)) ||
539 544 totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
@@ -541,15 +546,29 b' static int COVER_ctx_init(COVER_ctx_t *c'
541 546 (U32)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
542 547 return 0;
543 548 }
549 /* Check if there are at least 5 training samples */
550 if (nbTrainSamples < 5) {
551 DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
552 return 0;
553 }
554 /* Check if there's testing sample */
555 if (nbTestSamples < 1) {
556 DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
557 return 0;
558 }
544 559 /* Zero the context */
545 560 memset(ctx, 0, sizeof(*ctx));
546 DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbSamples,
547 (U32)totalSamplesSize);
561 DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
562 (U32)trainingSamplesSize);
563 DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
564 (U32)testSamplesSize);
548 565 ctx->samples = samples;
549 566 ctx->samplesSizes = samplesSizes;
550 567 ctx->nbSamples = nbSamples;
568 ctx->nbTrainSamples = nbTrainSamples;
569 ctx->nbTestSamples = nbTestSamples;
551 570 /* Partial suffix array */
552 ctx->suffixSize = totalSamplesSize - MAX(d, sizeof(U64)) + 1;
571 ctx->suffixSize = trainingSamplesSize - MAX(d, sizeof(U64)) + 1;
553 572 ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
554 573 /* Maps index to the dmerID */
555 574 ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
@@ -563,7 +582,7 b' static int COVER_ctx_init(COVER_ctx_t *c'
563 582 ctx->freqs = NULL;
564 583 ctx->d = d;
565 584
566 /* Fill offsets from the samlesSizes */
585 /* Fill offsets from the samplesSizes */
567 586 {
568 587 U32 i;
569 588 ctx->offsets[0] = 0;
@@ -581,10 +600,17 b' static int COVER_ctx_init(COVER_ctx_t *c'
581 600 for (i = 0; i < ctx->suffixSize; ++i) {
582 601 ctx->suffix[i] = i;
583 602 }
584 /* qsort doesn't take an opaque pointer, so pass as a global */
603 /* qsort doesn't take an opaque pointer, so pass as a global.
604 * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
605 */
585 606 g_ctx = ctx;
607 #if defined(__OpenBSD__)
608 mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
609 (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
610 #else
586 611 qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
587 612 (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
613 #endif
588 614 }
589 615 DISPLAYLEVEL(2, "Computing frequencies\n");
590 616 /* For each dmer group (group of positions with the same first d bytes):
@@ -613,7 +639,7 b' static size_t COVER_buildDictionary(cons'
613 639 /* Divide the data up into epochs of equal size.
614 640 * We will select at least one segment from each epoch.
615 641 */
616 const U32 epochs = (U32)(dictBufferCapacity / parameters.k);
642 const U32 epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k / 4));
617 643 const U32 epochSize = (U32)(ctx->suffixSize / epochs);
618 644 size_t epoch;
619 645 DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
@@ -658,7 +684,7 b' ZDICTLIB_API size_t ZDICT_trainFromBuffe'
658 684 BYTE* const dict = (BYTE*)dictBuffer;
659 685 COVER_ctx_t ctx;
660 686 COVER_map_t activeDmers;
661
687 parameters.splitPoint = 1.0;
662 688 /* Initialize global data */
663 689 g_displayLevel = parameters.zParams.notificationLevel;
664 690 /* Checks */
@@ -677,7 +703,7 b' ZDICTLIB_API size_t ZDICT_trainFromBuffe'
677 703 }
678 704 /* Initialize context and activeDmers */
679 705 if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
680 parameters.d)) {
706 parameters.d, parameters.splitPoint)) {
681 707 return ERROR(GENERIC);
682 708 }
683 709 if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
@@ -704,28 +730,65 b' ZDICTLIB_API size_t ZDICT_trainFromBuffe'
704 730 }
705 731 }
706 732
707 /**
708 * COVER_best_t is used for two purposes:
709 * 1. Synchronizing threads.
710 * 2. Saving the best parameters and dictionary.
711 *
712 * All of the methods except COVER_best_init() are thread safe if zstd is
713 * compiled with multithreaded support.
714 */
715 typedef struct COVER_best_s {
716 ZSTD_pthread_mutex_t mutex;
717 ZSTD_pthread_cond_t cond;
718 size_t liveJobs;
719 void *dict;
720 size_t dictSize;
721 ZDICT_cover_params_t parameters;
722 size_t compressedSize;
723 } COVER_best_t;
733
734
735 size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
736 const size_t *samplesSizes, const BYTE *samples,
737 size_t *offsets,
738 size_t nbTrainSamples, size_t nbSamples,
739 BYTE *const dict, size_t dictBufferCapacity) {
740 size_t totalCompressedSize = ERROR(GENERIC);
741 /* Pointers */
742 ZSTD_CCtx *cctx;
743 ZSTD_CDict *cdict;
744 void *dst;
745 /* Local variables */
746 size_t dstCapacity;
747 size_t i;
748 /* Allocate dst with enough space to compress the maximum sized sample */
749 {
750 size_t maxSampleSize = 0;
751 i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
752 for (; i < nbSamples; ++i) {
753 maxSampleSize = MAX(samplesSizes[i], maxSampleSize);
754 }
755 dstCapacity = ZSTD_compressBound(maxSampleSize);
756 dst = malloc(dstCapacity);
757 }
758 /* Create the cctx and cdict */
759 cctx = ZSTD_createCCtx();
760 cdict = ZSTD_createCDict(dict, dictBufferCapacity,
761 parameters.zParams.compressionLevel);
762 if (!dst || !cctx || !cdict) {
763 goto _compressCleanup;
764 }
765 /* Compress each sample and sum their sizes (or error) */
766 totalCompressedSize = dictBufferCapacity;
767 i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
768 for (; i < nbSamples; ++i) {
769 const size_t size = ZSTD_compress_usingCDict(
770 cctx, dst, dstCapacity, samples + offsets[i],
771 samplesSizes[i], cdict);
772 if (ZSTD_isError(size)) {
773 totalCompressedSize = ERROR(GENERIC);
774 goto _compressCleanup;
775 }
776 totalCompressedSize += size;
777 }
778 _compressCleanup:
779 ZSTD_freeCCtx(cctx);
780 ZSTD_freeCDict(cdict);
781 if (dst) {
782 free(dst);
783 }
784 return totalCompressedSize;
785 }
786
724 787
725 788 /**
726 789 * Initialize the `COVER_best_t`.
727 790 */
728 static void COVER_best_init(COVER_best_t *best) {
791 void COVER_best_init(COVER_best_t *best) {
729 792 if (best==NULL) return; /* compatible with init on NULL */
730 793 (void)ZSTD_pthread_mutex_init(&best->mutex, NULL);
731 794 (void)ZSTD_pthread_cond_init(&best->cond, NULL);
@@ -739,7 +802,7 b' static void COVER_best_init(COVER_best_t'
739 802 /**
740 803 * Wait until liveJobs == 0.
741 804 */
742 static void COVER_best_wait(COVER_best_t *best) {
805 void COVER_best_wait(COVER_best_t *best) {
743 806 if (!best) {
744 807 return;
745 808 }
@@ -753,7 +816,7 b' static void COVER_best_wait(COVER_best_t'
753 816 /**
754 817 * Call COVER_best_wait() and then destroy the COVER_best_t.
755 818 */
756 static void COVER_best_destroy(COVER_best_t *best) {
819 void COVER_best_destroy(COVER_best_t *best) {
757 820 if (!best) {
758 821 return;
759 822 }
@@ -769,7 +832,7 b' static void COVER_best_destroy(COVER_bes'
769 832 * Called when a thread is about to be launched.
770 833 * Increments liveJobs.
771 834 */
772 static void COVER_best_start(COVER_best_t *best) {
835 void COVER_best_start(COVER_best_t *best) {
773 836 if (!best) {
774 837 return;
775 838 }
@@ -783,7 +846,7 b' static void COVER_best_start(COVER_best_'
783 846 * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
784 847 * If this dictionary is the best so far save it and its parameters.
785 848 */
786 static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
849 void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
787 850 ZDICT_cover_params_t parameters, void *dict,
788 851 size_t dictSize) {
789 852 if (!best) {
@@ -814,10 +877,10 b' static void COVER_best_finish(COVER_best'
814 877 best->parameters = parameters;
815 878 best->compressedSize = compressedSize;
816 879 }
817 ZSTD_pthread_mutex_unlock(&best->mutex);
818 880 if (liveJobs == 0) {
819 881 ZSTD_pthread_cond_broadcast(&best->cond);
820 882 }
883 ZSTD_pthread_mutex_unlock(&best->mutex);
821 884 }
822 885 }
823 886
@@ -832,7 +895,7 b' typedef struct COVER_tryParameters_data_'
832 895 } COVER_tryParameters_data_t;
833 896
834 897 /**
835 * Tries a set of parameters and upates the COVER_best_t with the results.
898 * Tries a set of parameters and updates the COVER_best_t with the results.
836 899 * This function is thread safe if zstd is compiled with multithreaded support.
837 900 * It takes its parameters as an *OWNING* opaque pointer to support threading.
838 901 */
@@ -863,7 +926,7 b' static void COVER_tryParameters(void *op'
863 926 dictBufferCapacity, parameters);
864 927 dictBufferCapacity = ZDICT_finalizeDictionary(
865 928 dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
866 ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples,
929 ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples,
867 930 parameters.zParams);
868 931 if (ZDICT_isError(dictBufferCapacity)) {
869 932 DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
@@ -871,49 +934,10 b' static void COVER_tryParameters(void *op'
871 934 }
872 935 }
873 936 /* Check total compressed size */
874 {
875 /* Pointers */
876 ZSTD_CCtx *cctx;
877 ZSTD_CDict *cdict;
878 void *dst;
879 /* Local variables */
880 size_t dstCapacity;
881 size_t i;
882 /* Allocate dst with enough space to compress the maximum sized sample */
883 {
884 size_t maxSampleSize = 0;
885 for (i = 0; i < ctx->nbSamples; ++i) {
886 maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize);
887 }
888 dstCapacity = ZSTD_compressBound(maxSampleSize);
889 dst = malloc(dstCapacity);
890 }
891 /* Create the cctx and cdict */
892 cctx = ZSTD_createCCtx();
893 cdict = ZSTD_createCDict(dict, dictBufferCapacity,
894 parameters.zParams.compressionLevel);
895 if (!dst || !cctx || !cdict) {
896 goto _compressCleanup;
897 }
898 /* Compress each sample and sum their sizes (or error) */
899 totalCompressedSize = dictBufferCapacity;
900 for (i = 0; i < ctx->nbSamples; ++i) {
901 const size_t size = ZSTD_compress_usingCDict(
902 cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i],
903 ctx->samplesSizes[i], cdict);
904 if (ZSTD_isError(size)) {
905 totalCompressedSize = ERROR(GENERIC);
906 goto _compressCleanup;
907 }
908 totalCompressedSize += size;
909 }
910 _compressCleanup:
911 ZSTD_freeCCtx(cctx);
912 ZSTD_freeCDict(cdict);
913 if (dst) {
914 free(dst);
915 }
916 }
937 totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
938 ctx->samples, ctx->offsets,
939 ctx->nbTrainSamples, ctx->nbSamples,
940 dict, dictBufferCapacity);
917 941
918 942 _cleanup:
919 943 COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
@@ -934,6 +958,8 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF'
934 958 ZDICT_cover_params_t *parameters) {
935 959 /* constants */
936 960 const unsigned nbThreads = parameters->nbThreads;
961 const double splitPoint =
962 parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
937 963 const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
938 964 const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
939 965 const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
@@ -951,6 +977,10 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF'
951 977 POOL_ctx *pool = NULL;
952 978
953 979 /* Checks */
980 if (splitPoint <= 0 || splitPoint > 1) {
981 LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
982 return ERROR(GENERIC);
983 }
954 984 if (kMinK < kMaxD || kMaxK < kMinK) {
955 985 LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
956 986 return ERROR(GENERIC);
@@ -981,7 +1011,7 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF'
981 1011 /* Initialize the context for this value of d */
982 1012 COVER_ctx_t ctx;
983 1013 LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
984 if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d)) {
1014 if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint)) {
985 1015 LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
986 1016 COVER_best_destroy(&best);
987 1017 POOL_free(pool);
@@ -1006,6 +1036,7 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF'
1006 1036 data->parameters = *parameters;
1007 1037 data->parameters.k = k;
1008 1038 data->parameters.d = d;
1039 data->parameters.splitPoint = splitPoint;
1009 1040 data->parameters.steps = kSteps;
1010 1041 data->parameters.zParams.notificationLevel = g_displayLevel;
1011 1042 /* Check the parameters */
@@ -1637,7 +1637,7 b' construct_SA(const unsigned char *T, int'
1637 1637 if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1638 1638 k = SA + BUCKET_B(c2 = c0, c1);
1639 1639 }
1640 assert(k < j);
1640 assert(k < j); assert(k != NULL);
1641 1641 *k-- = s;
1642 1642 } else {
1643 1643 assert(((s == 0) && (T[s] == c1)) || (s < 0));
@@ -1701,7 +1701,7 b' construct_BWT(const unsigned char *T, in'
1701 1701 if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1702 1702 k = SA + BUCKET_B(c2 = c0, c1);
1703 1703 }
1704 assert(k < j);
1704 assert(k < j); assert(k != NULL);
1705 1705 *k-- = s;
1706 1706 } else if(s != 0) {
1707 1707 *j = ~s;
@@ -1785,7 +1785,7 b' construct_BWT_indexes(const unsigned cha'
1785 1785 if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1786 1786 k = SA + BUCKET_B(c2 = c0, c1);
1787 1787 }
1788 assert(k < j);
1788 assert(k < j); assert(k != NULL);
1789 1789 *k-- = s;
1790 1790 } else if(s != 0) {
1791 1791 *j = ~s;
@@ -293,7 +293,7 b' static dictItem ZDICT_analyzePos('
293 293 refinedEnd = refinedStart + selectedCount;
294 294 }
295 295
296 /* evaluate gain based on new ref */
296 /* evaluate gain based on new dict */
297 297 start = refinedStart;
298 298 pos = suffix[refinedStart];
299 299 end = start;
@@ -341,7 +341,7 b' static dictItem ZDICT_analyzePos('
341 341 for (i=MINMATCHLENGTH; i<=(int)maxLength; i++)
342 342 savings[i] = savings[i-1] + (lengthList[i] * (i-3));
343 343
344 DISPLAYLEVEL(4, "Selected ref at position %u, of length %u : saves %u (ratio: %.2f) \n",
344 DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
345 345 (U32)pos, (U32)maxLength, savings[maxLength], (double)savings[maxLength] / maxLength);
346 346
347 347 solution.pos = (U32)pos;
@@ -581,7 +581,7 b' static void ZDICT_fillNoise(void* buffer'
581 581
582 582 typedef struct
583 583 {
584 ZSTD_CCtx* ref; /* contains reference to dictionary */
584 ZSTD_CDict* dict; /* dictionary */
585 585 ZSTD_CCtx* zc; /* working context */
586 586 void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */
587 587 } EStats_ress_t;
@@ -597,8 +597,9 b' static void ZDICT_countEStats(EStats_res'
597 597 size_t cSize;
598 598
599 599 if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
600 { size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
601 if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
600 { size_t const errorCode = ZSTD_compressBegin_usingCDict(esr.zc, esr.dict);
601 if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; }
602
602 603 }
603 604 cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
604 605 if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
@@ -697,7 +698,7 b' static size_t ZDICT_analyzeEntropy(void*'
697 698 short litLengthNCount[MaxLL+1];
698 699 U32 repOffset[MAXREPOFFSET];
699 700 offsetCount_t bestRepOffset[ZSTD_REP_NUM+1];
700 EStats_ress_t esr;
701 EStats_ress_t esr = { NULL, NULL, NULL };
701 702 ZSTD_parameters params;
702 703 U32 u, huffLog = 11, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
703 704 size_t pos = 0, errorCode;
@@ -708,14 +709,6 b' static size_t ZDICT_analyzeEntropy(void*'
708 709
709 710 /* init */
710 711 DEBUGLOG(4, "ZDICT_analyzeEntropy");
711 esr.ref = ZSTD_createCCtx();
712 esr.zc = ZSTD_createCCtx();
713 esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
714 if (!esr.ref || !esr.zc || !esr.workPlace) {
715 eSize = ERROR(memory_allocation);
716 DISPLAYLEVEL(1, "Not enough memory \n");
717 goto _cleanup;
718 }
719 712 if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionaryCreation_failed); goto _cleanup; } /* too large dictionary */
720 713 for (u=0; u<256; u++) countLit[u] = 1; /* any character must be described */
721 714 for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1;
@@ -724,14 +717,17 b' static size_t ZDICT_analyzeEntropy(void*'
724 717 memset(repOffset, 0, sizeof(repOffset));
725 718 repOffset[1] = repOffset[4] = repOffset[8] = 1;
726 719 memset(bestRepOffset, 0, sizeof(bestRepOffset));
727 if (compressionLevel<=0) compressionLevel = g_compressionLevel_default;
720 if (compressionLevel==0) compressionLevel = g_compressionLevel_default;
728 721 params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
729 { size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
730 if (ZSTD_isError(beginResult)) {
731 DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced() failed : %s \n", ZSTD_getErrorName(beginResult));
732 eSize = ERROR(GENERIC);
733 goto _cleanup;
734 } }
722
723 esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem);
724 esr.zc = ZSTD_createCCtx();
725 esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
726 if (!esr.dict || !esr.zc || !esr.workPlace) {
727 eSize = ERROR(memory_allocation);
728 DISPLAYLEVEL(1, "Not enough memory \n");
729 goto _cleanup;
730 }
735 731
736 732 /* collect stats on all samples */
737 733 for (u=0; u<nbFiles; u++) {
@@ -856,7 +852,7 b' static size_t ZDICT_analyzeEntropy(void*'
856 852 eSize += 12;
857 853
858 854 _cleanup:
859 ZSTD_freeCCtx(esr.ref);
855 ZSTD_freeCDict(esr.dict);
860 856 ZSTD_freeCCtx(esr.zc);
861 857 free(esr.workPlace);
862 858
@@ -867,13 +863,13 b' static size_t ZDICT_analyzeEntropy(void*'
867 863
868 864 size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
869 865 const void* customDictContent, size_t dictContentSize,
870 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
871 ZDICT_params_t params)
866 const void* samplesBuffer, const size_t* samplesSizes,
867 unsigned nbSamples, ZDICT_params_t params)
872 868 {
873 869 size_t hSize;
874 870 #define HBUFFSIZE 256 /* should prove large enough for all entropy headers */
875 871 BYTE header[HBUFFSIZE];
876 int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
872 int const compressionLevel = (params.compressionLevel == 0) ? g_compressionLevel_default : params.compressionLevel;
877 873 U32 const notificationLevel = params.notificationLevel;
878 874
879 875 /* check conditions */
@@ -914,11 +910,12 b' size_t ZDICT_finalizeDictionary(void* di'
914 910 }
915 911
916 912
917 size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
918 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
919 ZDICT_params_t params)
913 static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
914 void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
915 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
916 ZDICT_params_t params)
920 917 {
921 int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
918 int const compressionLevel = (params.compressionLevel == 0) ? g_compressionLevel_default : params.compressionLevel;
922 919 U32 const notificationLevel = params.notificationLevel;
923 920 size_t hSize = 8;
924 921
@@ -947,7 +944,11 b' size_t ZDICT_addEntropyTablesFromBuffer_'
947 944 return MIN(dictBufferCapacity, hSize+dictContentSize);
948 945 }
949 946
950
947 /* Hidden declaration for dbio.c */
948 size_t ZDICT_trainFromBuffer_unsafe_legacy(
949 void* dictBuffer, size_t maxDictSize,
950 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
951 ZDICT_legacy_params_t params);
951 952 /*! ZDICT_trainFromBuffer_unsafe_legacy() :
952 953 * Warning : `samplesBuffer` must be followed by noisy guard band.
953 954 * @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
@@ -991,8 +992,10 b' size_t ZDICT_trainFromBuffer_unsafe_lega'
991 992 U32 const pos = dictList[u].pos;
992 993 U32 const length = dictList[u].length;
993 994 U32 const printedLength = MIN(40, length);
994 if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize))
995 if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize)) {
996 free(dictList);
995 997 return ERROR(GENERIC); /* should never happen */
998 }
996 999 DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
997 1000 u, length, pos, dictList[u].savings);
998 1001 ZDICT_printHex((const char*)samplesBuffer+pos, printedLength);
@@ -1082,17 +1085,17 b' size_t ZDICT_trainFromBuffer_legacy(void'
1082 1085 size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
1083 1086 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
1084 1087 {
1085 ZDICT_cover_params_t params;
1088 ZDICT_fastCover_params_t params;
1086 1089 DEBUGLOG(3, "ZDICT_trainFromBuffer");
1087 1090 memset(&params, 0, sizeof(params));
1088 1091 params.d = 8;
1089 1092 params.steps = 4;
1090 1093 /* Default to level 6 since no compression level information is available */
1091 params.zParams.compressionLevel = 6;
1092 #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
1093 params.zParams.notificationLevel = ZSTD_DEBUG;
1094 params.zParams.compressionLevel = 3;
1095 #if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1)
1096 params.zParams.notificationLevel = DEBUGLEVEL;
1094 1097 #endif
1095 return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity,
1098 return ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, dictBufferCapacity,
1096 1099 samplesBuffer, samplesSizes, nbSamples,
1097 1100 &params);
1098 1101 }
@@ -39,7 +39,8 b' extern "C" {'
39 39
40 40 /*! ZDICT_trainFromBuffer():
41 41 * Train a dictionary from an array of samples.
42 * Redirect towards ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4.
42 * Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4,
43 * f=20, and accel=1.
43 44 * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
44 45 * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
45 46 * The resulting dictionary will be saved into `dictBuffer`.
@@ -52,7 +53,8 b' extern "C" {'
52 53 * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
53 54 */
54 55 ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
55 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
56 const void* samplesBuffer,
57 const size_t* samplesSizes, unsigned nbSamples);
56 58
57 59
58 60 /*====== Helper functions ======*/
@@ -84,11 +86,22 b' typedef struct {'
84 86 typedef struct {
85 87 unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
86 88 unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
87 unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
89 unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
88 90 unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
91 double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
89 92 ZDICT_params_t zParams;
90 93 } ZDICT_cover_params_t;
91 94
95 typedef struct {
96 unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
97 unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
98 unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(20)*/
99 unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
100 unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
101 double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
102 unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */
103 ZDICT_params_t zParams;
104 } ZDICT_fastCover_params_t;
92 105
93 106 /*! ZDICT_trainFromBuffer_cover():
94 107 * Train a dictionary from an array of samples using the COVER algorithm.
@@ -115,9 +128,9 b' ZDICTLIB_API size_t ZDICT_trainFromBuffe'
115 128 * dictionary constructed with those parameters is stored in `dictBuffer`.
116 129 *
117 130 * All of the parameters d, k, steps are optional.
118 * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
131 * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
119 132 * if steps is zero it defaults to its default value.
120 * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
133 * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
121 134 *
122 135 * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
123 136 * or an error code, which can be tested with ZDICT_isError().
@@ -129,6 +142,48 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF'
129 142 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
130 143 ZDICT_cover_params_t* parameters);
131 144
145 /*! ZDICT_trainFromBuffer_fastCover():
146 * Train a dictionary from an array of samples using a modified version of COVER algorithm.
147 * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
148 * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
149 * d and k are required.
150 * All other parameters are optional, will use default values if not provided
151 * The resulting dictionary will be saved into `dictBuffer`.
152 * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
153 * or an error code, which can be tested with ZDICT_isError().
154 * Note: ZDICT_trainFromBuffer_fastCover() requires about 1 bytes of memory for each input byte and additionally another 6 * 2^f bytes of memory .
155 * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
156 * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
157 * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
158 * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
159 */
160 ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
161 size_t dictBufferCapacity, const void *samplesBuffer,
162 const size_t *samplesSizes, unsigned nbSamples,
163 ZDICT_fastCover_params_t parameters);
164
165 /*! ZDICT_optimizeTrainFromBuffer_fastCover():
166 * The same requirements as above hold for all the parameters except `parameters`.
167 * This function tries many parameter combinations (specifically, k and d combinations)
168 * and picks the best parameters. `*parameters` is filled with the best parameters found,
169 * dictionary constructed with those parameters is stored in `dictBuffer`.
170 * All of the parameters d, k, steps, f, and accel are optional.
171 * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
172 * if steps is zero it defaults to its default value.
173 * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
174 * If f is zero, default value of 20 is used.
175 * If accel is zero, default value of 1 is used.
176 *
177 * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
178 * or an error code, which can be tested with ZDICT_isError().
179 * On success `*parameters` contains the parameters selected.
180 * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 1 byte of memory for each input byte and additionally another 6 * 2^f bytes of memory for each thread.
181 */
182 ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
183 size_t dictBufferCapacity, const void* samplesBuffer,
184 const size_t* samplesSizes, unsigned nbSamples,
185 ZDICT_fastCover_params_t* parameters);
186
132 187 /*! ZDICT_finalizeDictionary():
133 188 * Given a custom content as a basis for dictionary, and a set of samples,
134 189 * finalize dictionary by adding headers and statistics.
@@ -35,31 +35,43 b' extern "C" {'
35 35 #endif
36 36
37 37
38 /*******************************************************************************************************
38 /*******************************************************************************
39 39 Introduction
40 40
41 zstd, short for Zstandard, is a fast lossless compression algorithm,
42 targeting real-time compression scenarios at zlib-level and better compression ratios.
43 The zstd compression library provides in-memory compression and decompression functions.
44 The library supports compression levels from 1 up to ZSTD_maxCLevel() which is currently 22.
45 Levels >= 20, labeled `--ultra`, should be used with caution, as they require more memory.
41 zstd, short for Zstandard, is a fast lossless compression algorithm, targeting
42 real-time compression scenarios at zlib-level and better compression ratios.
43 The zstd compression library provides in-memory compression and decompression
44 functions.
45
46 The library supports regular compression levels from 1 up to ZSTD_maxCLevel(),
47 which is currently 22. Levels >= 20, labeled `--ultra`, should be used with
48 caution, as they require more memory. The library also offers negative
49 compression levels, which extend the range of speed vs. ratio preferences.
50 The lower the level, the faster the speed (at the cost of compression).
51
46 52 Compression can be done in:
47 53 - a single step (described as Simple API)
48 54 - a single step, reusing a context (described as Explicit context)
49 55 - unbounded multiple steps (described as Streaming compression)
50 The compression ratio achievable on small data can be highly improved using a dictionary in:
56
57 The compression ratio achievable on small data can be highly improved using
58 a dictionary. Dictionary compression can be performed in:
51 59 - a single step (described as Simple dictionary API)
52 - a single step, reusing a dictionary (described as Bulk-processing dictionary API)
60 - a single step, reusing a dictionary (described as Bulk-processing
61 dictionary API)
53 62
54 Advanced experimental functions can be accessed using #define ZSTD_STATIC_LINKING_ONLY before including zstd.h.
55 Advanced experimental APIs shall never be used with a dynamic library.
56 They are not "stable", their definition may change in the future. Only static linking is allowed.
57 *********************************************************************************************************/
63 Advanced experimental functions can be accessed using
64 `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h.
65
66 Advanced experimental APIs should never be used with a dynamically-linked
67 library. They are not "stable"; their definitions or signatures may change in
68 the future. Only static linking is allowed.
69 *******************************************************************************/
58 70
59 71 /*------ Version ------*/
60 72 #define ZSTD_VERSION_MAJOR 1
61 73 #define ZSTD_VERSION_MINOR 3
62 #define ZSTD_VERSION_RELEASE 4
74 #define ZSTD_VERSION_RELEASE 6
63 75
64 76 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
65 77 ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< useful to check dll version */
@@ -68,8 +80,14 b' ZSTDLIB_API unsigned ZSTD_versionNumber('
68 80 #define ZSTD_QUOTE(str) #str
69 81 #define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
70 82 #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
71 ZSTDLIB_API const char* ZSTD_versionString(void); /* added in v1.3.0 */
83 ZSTDLIB_API const char* ZSTD_versionString(void); /* v1.3.0+ */
72 84
85 /***************************************
86 * Default constant
87 ***************************************/
88 #ifndef ZSTD_CLEVEL_DEFAULT
89 # define ZSTD_CLEVEL_DEFAULT 3
90 #endif
73 91
74 92 /***************************************
75 93 * Simple API
@@ -96,7 +114,7 b' ZSTDLIB_API size_t ZSTD_decompress( void'
96 114 * `src` should point to the start of a ZSTD encoded frame.
97 115 * `srcSize` must be at least as large as the frame header.
98 116 * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
99 * @return : - decompressed size of the frame in `src`, if known
117 * @return : - decompressed size of `src` frame content, if known
100 118 * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
101 119 * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
102 120 * note 1 : a 0 return value means the frame is valid but "empty".
@@ -106,7 +124,8 b' ZSTDLIB_API size_t ZSTD_decompress( void'
106 124 * Optionally, application can rely on some implicit limit,
107 125 * as ZSTD_decompress() only needs an upper bound of decompressed size.
108 126 * (For example, data could be necessarily cut into blocks <= 16 KB).
109 * note 3 : decompressed size is always present when compression is done with ZSTD_compress()
127 * note 3 : decompressed size is always present when compression is completed using single-pass functions,
128 * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict().
110 129 * note 4 : decompressed size can be very large (64-bits value),
111 130 * potentially larger than what local system can handle as a single memory segment.
112 131 * In which case, it's necessary to use streaming mode to decompress data.
@@ -123,8 +142,7 b' ZSTDLIB_API unsigned long long ZSTD_getF'
123 142 * Both functions work the same way, but ZSTD_getDecompressedSize() blends
124 143 * "empty", "unknown" and "error" results to the same return value (0),
125 144 * while ZSTD_getFrameContentSize() gives them separate return values.
126 * `src` is the start of a zstd compressed frame.
127 * @return : content size to be decompressed, as a 64-bits value _if known and not empty_, 0 otherwise. */
145 * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */
128 146 ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
129 147
130 148
@@ -205,7 +223,8 b' typedef struct ZSTD_CDict_s ZSTD_CDict;'
205 223 * When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
206 224 * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
207 225 * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
208 * `dictBuffer` can be released after ZSTD_CDict creation, since its content is copied within CDict */
226 * `dictBuffer` can be released after ZSTD_CDict creation, since its content is copied within CDict
227 * Note : A ZSTD_CDict can be created with an empty dictionary, but it is inefficient for small data. */
209 228 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
210 229 int compressionLevel);
211 230
@@ -217,7 +236,9 b' ZSTDLIB_API size_t ZSTD_freeCDict(Z'
217 236 * Compression using a digested Dictionary.
218 237 * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
219 238 * Note that compression level is decided during dictionary creation.
220 * Frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */
239 * Frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no)
240 * Note : ZSTD_compress_usingCDict() can be used with a ZSTD_CDict created from an empty dictionary.
241 * But it is inefficient for small data, and it is recommended to use ZSTD_compressCCtx(). */
221 242 ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
222 243 void* dst, size_t dstCapacity,
223 244 const void* src, size_t srcSize,
@@ -272,39 +293,44 b' typedef struct ZSTD_outBuffer_s {'
272 293 * since it will play nicer with system's memory, by re-using already allocated memory.
273 294 * Use one separate ZSTD_CStream per thread for parallel execution.
274 295 *
275 * Start a new compression by initializing ZSTD_CStream.
296 * Start a new compression by initializing ZSTD_CStream context.
276 297 * Use ZSTD_initCStream() to start a new compression operation.
277 * Use ZSTD_initCStream_usingDict() or ZSTD_initCStream_usingCDict() for a compression which requires a dictionary (experimental section)
298 * Use variants ZSTD_initCStream_usingDict() or ZSTD_initCStream_usingCDict() for streaming with dictionary (experimental section)
278 299 *
279 * Use ZSTD_compressStream() repetitively to consume input stream.
280 * The function will automatically update both `pos` fields.
281 * Note that it may not consume the entire input, in which case `pos < size`,
282 * and it's up to the caller to present again remaining data.
300 * Use ZSTD_compressStream() as many times as necessary to consume input stream.
301 * The function will automatically update both `pos` fields within `input` and `output`.
302 * Note that the function may not consume the entire input,
303 * for example, because the output buffer is already full,
304 * in which case `input.pos < input.size`.
305 * The caller must check if input has been entirely consumed.
306 * If not, the caller must make some room to receive more compressed data,
307 * typically by emptying output buffer, or allocating a new output buffer,
308 * and then present again remaining input data.
283 309 * @return : a size hint, preferred nb of bytes to use as input for next function call
284 310 * or an error code, which can be tested using ZSTD_isError().
285 311 * Note 1 : it's just a hint, to help latency a little, any other value will work fine.
286 312 * Note 2 : size hint is guaranteed to be <= ZSTD_CStreamInSize()
287 313 *
288 * At any moment, it's possible to flush whatever data remains within internal buffer, using ZSTD_flushStream().
289 * `output->pos` will be updated.
290 * Note that some content might still be left within internal buffer if `output->size` is too small.
291 * @return : nb of bytes still present within internal buffer (0 if it's empty)
314 * At any moment, it's possible to flush whatever data might remain stuck within internal buffer,
315 * using ZSTD_flushStream(). `output->pos` will be updated.
316 * Note that, if `output->size` is too small, a single invocation of ZSTD_flushStream() might not be enough (return code > 0).
317 * In which case, make some room to receive more compressed data, and call again ZSTD_flushStream().
318 * @return : 0 if internal buffers are entirely flushed,
319 * >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
292 320 * or an error code, which can be tested using ZSTD_isError().
293 321 *
294 322 * ZSTD_endStream() instructs to finish a frame.
295 323 * It will perform a flush and write frame epilogue.
296 324 * The epilogue is required for decoders to consider a frame completed.
297 * ZSTD_endStream() may not be able to flush full data if `output->size` is too small.
298 * In which case, call again ZSTD_endStream() to complete the flush.
325 * flush() operation is the same, and follows same rules as ZSTD_flushStream().
299 326 * @return : 0 if frame fully completed and fully flushed,
300 or >0 if some data is still present within internal buffer
301 (value is minimum size estimation for remaining data to flush, but it could be more)
327 * >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
302 328 * or an error code, which can be tested using ZSTD_isError().
303 329 *
304 330 * *******************************************************************/
305 331
306 332 typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */
307 /* Continue to distinguish them for compatibility with versions <= v1.2.0 */
333 /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */
308 334 /*===== ZSTD_CStream management functions =====*/
309 335 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void);
310 336 ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
@@ -359,21 +385,28 b' ZSTDLIB_API size_t ZSTD_DStreamOutSize(v'
359 385
360 386
361 387
388
389 #if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
390 #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
391
362 392 /****************************************************************************************
363 * START OF ADVANCED AND EXPERIMENTAL FUNCTIONS
393 * ADVANCED AND EXPERIMENTAL FUNCTIONS
394 ****************************************************************************************
364 395 * The definitions in this section are considered experimental.
365 396 * They should never be used with a dynamic library, as prototypes may change in the future.
366 397 * They are provided for advanced scenarios.
367 398 * Use them only in association with static linking.
368 399 * ***************************************************************************************/
369 400
370 #if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
371 #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
401 ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */
372 402
373 /* --- Constants ---*/
374 #define ZSTD_MAGICNUMBER 0xFD2FB528 /* >= v0.8.0 */
403 /* --- Constants ---*/
404 #define ZSTD_MAGICNUMBER 0xFD2FB528 /* v0.8+ */
405 #define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* v0.7+ */
375 406 #define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U
376 #define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* >= v0.7.0 */
407
408 #define ZSTD_BLOCKSIZELOG_MAX 17
409 #define ZSTD_BLOCKSIZE_MAX (1<<ZSTD_BLOCKSIZELOG_MAX) /* define, for static allocation */
377 410
378 411 #define ZSTD_WINDOWLOG_MAX_32 30
379 412 #define ZSTD_WINDOWLOG_MAX_64 31
@@ -390,9 +423,10 b' ZSTDLIB_API size_t ZSTD_DStreamOutSize(v'
390 423 #define ZSTD_SEARCHLOG_MIN 1
391 424 #define ZSTD_SEARCHLENGTH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */
392 425 #define ZSTD_SEARCHLENGTH_MIN 3 /* only for ZSTD_btopt, other strategies are limited to 4 */
393 #define ZSTD_TARGETLENGTH_MIN 1 /* only used by btopt, btultra and btfast */
426 #define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX
427 #define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */
428 #define ZSTD_LDM_MINMATCH_MAX 4096
394 429 #define ZSTD_LDM_MINMATCH_MIN 4
395 #define ZSTD_LDM_MINMATCH_MAX 4096
396 430 #define ZSTD_LDM_BUCKETSIZELOG_MAX 8
397 431
398 432 #define ZSTD_FRAMEHEADERSIZE_PREFIX 5 /* minimum input size to know frame header size */
@@ -404,7 +438,8 b' static const size_t ZSTD_frameHeaderSize'
404 438 static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable frame length */
405 439
406 440
407 /*--- Advanced types ---*/
441
442 /* --- Advanced types --- */
408 443 typedef enum { ZSTD_fast=1, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2,
409 444 ZSTD_btlazy2, ZSTD_btopt, ZSTD_btultra } ZSTD_strategy; /* from faster to stronger */
410 445
@@ -480,9 +515,9 b' ZSTDLIB_API size_t ZSTD_findFrameCompres'
480 515 ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
481 516
482 517 /*! ZSTD_frameHeaderSize() :
483 * `src` should point to the start of a ZSTD frame
484 * `srcSize` must be >= ZSTD_frameHeaderSize_prefix.
485 * @return : size of the Frame Header */
518 * srcSize must be >= ZSTD_frameHeaderSize_prefix.
519 * @return : size of the Frame Header,
520 * or an error code (if srcSize is too small) */
486 521 ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
487 522
488 523
@@ -711,29 +746,48 b' ZSTDLIB_API size_t ZSTD_initCStream_usin'
711 746
712 747 /*! ZSTD_resetCStream() :
713 748 * start a new compression job, using same parameters from previous job.
714 * This is typically useful to skip dictionary loading stage, since it will re-use it in-place..
749 * This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
715 750 * Note that zcs must be init at least once before using ZSTD_resetCStream().
716 751 * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
717 752 * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
718 753 * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
719 754 * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
720 * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
755 * @return : 0, or an error code (which can be tested using ZSTD_isError())
756 */
721 757 ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
722 758
723 759
724 760 typedef struct {
725 unsigned long long ingested;
726 unsigned long long consumed;
727 unsigned long long produced;
761 unsigned long long ingested; /* nb input bytes read and buffered */
762 unsigned long long consumed; /* nb input bytes actually compressed */
763 unsigned long long produced; /* nb of compressed bytes generated and buffered */
764 unsigned long long flushed; /* nb of compressed bytes flushed : not provided; can be tracked from caller side */
765 unsigned currentJobID; /* MT only : latest started job nb */
766 unsigned nbActiveWorkers; /* MT only : nb of workers actively compressing at probe time */
728 767 } ZSTD_frameProgression;
729 768
730 /* ZSTD_getFrameProgression():
769 /* ZSTD_getFrameProgression() :
731 770 * tells how much data has been ingested (read from input)
732 771 * consumed (input actually compressed) and produced (output) for current frame.
733 * Therefore, (ingested - consumed) is amount of input data buffered internally, not yet compressed.
734 * Can report progression inside worker threads (multi-threading and non-blocking mode).
772 * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed.
773 * Aggregates progression inside active worker threads.
735 774 */
736 ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx);
775 ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx);
776
777 /*! ZSTD_toFlushNow() :
778 * Tell how many bytes are ready to be flushed immediately.
779 * Useful for multithreading scenarios (nbWorkers >= 1).
780 * Probe the oldest active job, defined as oldest job not yet entirely flushed,
781 * and check its output buffer.
782 * @return : amount of data stored in oldest job and ready to be flushed immediately.
783 * if @return == 0, it means either :
784 * + there is no active job (could be checked with ZSTD_frameProgression()), or
785 * + oldest job is still actively compressing data,
786 * but everything it has produced has also been flushed so far,
787 * therefore flushing speed is currently limited by production speed of oldest job
788 * irrespective of the speed of concurrent newer jobs.
789 */
790 ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
737 791
738 792
739 793
@@ -880,6 +934,11 b' typedef struct {'
880 934 unsigned dictID;
881 935 unsigned checksumFlag;
882 936 } ZSTD_frameHeader;
937 /** ZSTD_getFrameHeader() :
938 * decode Frame Header, or requires larger `srcSize`.
939 * @return : 0, `zfhPtr` is correctly filled,
940 * >0, `srcSize` is too small, value is wanted `srcSize` amount,
941 * or an error code, which can be tested using ZSTD_isError() */
883 942 ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */
884 943 ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
885 944
@@ -901,23 +960,15 b' ZSTDLIB_API ZSTD_nextInputType_e ZSTD_ne'
901 960 /** New advanced API (experimental) */
902 961 /* ============================================ */
903 962
904 /* notes on API design :
905 * In this proposal, parameters are pushed one by one into an existing context,
906 * and then applied on all subsequent compression jobs.
907 * When no parameter is ever provided, CCtx is created with compression level ZSTD_CLEVEL_DEFAULT.
963 /* API design :
964 * In this advanced API, parameters are pushed one by one into an existing context,
965 * using ZSTD_CCtx_set*() functions.
966 * Pushed parameters are sticky : they are applied to next job, and any subsequent job.
967 * It's possible to reset parameters to "default" using ZSTD_CCtx_reset().
968 * Important : "sticky" parameters only work with `ZSTD_compress_generic()` !
969 * For any other entry point, "sticky" parameters are ignored !
908 970 *
909 971 * This API is intended to replace all others advanced / experimental API entry points.
910 * But it stands a reasonable chance to become "stable", after a reasonable testing period.
911 */
912
913 /* note on naming convention :
914 * Initially, the API favored names like ZSTD_setCCtxParameter() .
915 * In this proposal, convention is changed towards ZSTD_CCtx_setParameter() .
916 * The main driver is that it identifies more clearly the target object type.
917 * It feels clearer when considering multiple targets :
918 * ZSTD_CDict_setParameter() (rather than ZSTD_setCDictParameter())
919 * ZSTD_CCtxParams_setParameter() (rather than ZSTD_setCCtxParamsParameter() )
920 * etc...
921 972 */
922 973
923 974 /* note on enum design :
@@ -947,7 +998,7 b' typedef enum {'
947 998 /* compression parameters */
948 999 ZSTD_p_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table
949 1000 * Default level is ZSTD_CLEVEL_DEFAULT==3.
950 * Special: value 0 means "do not change cLevel".
1001 * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
951 1002 * Note 1 : it's possible to pass a negative compression level by casting it to unsigned type.
952 1003 * Note 2 : setting a level sets all default values of other compression parameters.
953 1004 * Note 3 : setting compressionLevel automatically updates ZSTD_p_compressLiterals. */
@@ -956,16 +1007,19 b' typedef enum {'
956 1007 * Special: value 0 means "use default windowLog".
957 1008 * Note: Using a window size greater than ZSTD_MAXWINDOWSIZE_DEFAULT (default: 2^27)
958 1009 * requires explicitly allowing such window size during decompression stage. */
959 ZSTD_p_hashLog, /* Size of the probe table, as a power of 2.
1010 ZSTD_p_hashLog, /* Size of the initial probe table, as a power of 2.
960 1011 * Resulting table size is (1 << (hashLog+2)).
961 1012 * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
962 1013 * Larger tables improve compression ratio of strategies <= dFast,
963 1014 * and improve speed of strategies > dFast.
964 1015 * Special: value 0 means "use default hashLog". */
965 ZSTD_p_chainLog, /* Size of the full-search table, as a power of 2.
1016 ZSTD_p_chainLog, /* Size of the multi-probe search table, as a power of 2.
966 1017 * Resulting table size is (1 << (chainLog+2)).
1018 * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
967 1019 * Larger tables result in better and slower compression.
968 1020 * This parameter is useless when using "fast" strategy.
1021 * Note it's still useful when using "dfast" strategy,
1022 * in which case it defines a secondary probe table.
969 1023 * Special: value 0 means "use default chainLog". */
970 1024 ZSTD_p_searchLog, /* Number of search attempts, as a power of 2.
971 1025 * More attempts result in better and slower compression.
@@ -1047,27 +1101,52 b' typedef enum {'
1047 1101 /* experimental parameters - no stability guaranteed */
1048 1102 /* =================================================================== */
1049 1103
1050 ZSTD_p_compressLiterals=1000, /* control huffman compression of literals (enabled) by default.
1051 * disabling it improves speed and decreases compression ratio by a large amount.
1052 * note : this setting is automatically updated when changing compression level.
1053 * positive compression levels set ZSTD_p_compressLiterals to 1.
1054 * negative compression levels set ZSTD_p_compressLiterals to 0. */
1055
1056 1104 ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize,
1057 1105 * even when referencing into Dictionary content (default:0) */
1106 ZSTD_p_forceAttachDict, /* ZSTD supports usage of a CDict in-place
1107 * (avoiding having to copy the compression tables
1108 * from the CDict into the working context). Using
1109 * a CDict in this way saves an initial setup step,
1110 * but comes at the cost of more work per byte of
1111 * input. ZSTD has a simple internal heuristic that
1112 * guesses which strategy will be faster. You can
1113 * use this flag to override that guess.
1114 *
1115 * Note that the by-reference, in-place strategy is
1116 * only used when reusing a compression context
1117 * with compatible compression parameters. (If
1118 * incompatible / uninitialized, the working
1119 * context needs to be cleared anyways, which is
1120 * about as expensive as overwriting it with the
1121 * dictionary context, so there's no savings in
1122 * using the CDict by-ref.)
1123 *
1124 * Values greater than 0 force attaching the dict.
1125 * Values less than 0 force copying the dict.
1126 * 0 selects the default heuristic-guided behavior.
1127 */
1058 1128
1059 1129 } ZSTD_cParameter;
1060 1130
1061 1131
1062 1132 /*! ZSTD_CCtx_setParameter() :
1063 1133 * Set one compression parameter, selected by enum ZSTD_cParameter.
1064 * Setting a parameter is generally only possible during frame initialization (before starting compression),
1065 * except for a few exceptions which can be updated during compression: compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
1066 * Note : when `value` is an enum, cast it to unsigned for proper type checking.
1067 * @result : informational value (typically, value being set clamped correctly),
1134 * Setting a parameter is generally only possible during frame initialization (before starting compression).
1135 * Exception : when using multi-threading mode (nbThreads >= 1),
1136 * following parameters can be updated _during_ compression (within same frame):
1137 * => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
1138 * new parameters will be active on next job, or after a flush().
1139 * Note : when `value` type is not unsigned (int, or enum), cast it to unsigned for proper type checking.
1140 * @result : informational value (typically, value being set, correctly clamped),
1068 1141 * or an error code (which can be tested with ZSTD_isError()). */
1069 1142 ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value);
1070 1143
1144 /*! ZSTD_CCtx_getParameter() :
1145 * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
1146 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
1147 */
1148 ZSTDLIB_API size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned* value);
1149
1071 1150 /*! ZSTD_CCtx_setPledgedSrcSize() :
1072 1151 * Total input data size to be compressed as a single frame.
1073 1152 * This value will be controlled at the end, and result in error if not respected.
@@ -1114,30 +1193,45 b' ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZS'
1114 1193
1115 1194 /*! ZSTD_CCtx_refPrefix() :
1116 1195 * Reference a prefix (single-usage dictionary) for next compression job.
1117 * Decompression need same prefix to properly regenerate data.
1118 * Prefix is **only used once**. Tables are discarded at end of compression job.
1119 * Subsequent compression jobs will be done without prefix (if none is explicitly referenced).
1120 * If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_CDict instead.
1196 * Decompression will need same prefix to properly regenerate data.
1197 * Compressing with a prefix is similar in outcome as performing a diff and compressing it,
1198 * but performs much faster, especially during decompression (compression speed is tunable with compression level).
1199 * Note that prefix is **only used once**. Tables are discarded at end of compression job (ZSTD_e_end).
1121 1200 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
1122 1201 * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
1123 * Note 1 : Prefix buffer is referenced. It must outlive compression job.
1124 * Note 2 : Referencing a prefix involves building tables, which are dependent on compression parameters.
1202 * Note 1 : Prefix buffer is referenced. It **must** outlive compression job.
1203 * Its contain must remain unmodified up to end of compression (ZSTD_e_end).
1204 * Note 2 : If the intention is to diff some large src data blob with some prior version of itself,
1205 * ensure that the window size is large enough to contain the entire source.
1206 * See ZSTD_p_windowLog.
1207 * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
1125 1208 * It's a CPU consuming operation, with non-negligible impact on latency.
1126 * Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent).
1209 * If there is a need to use same prefix multiple times, consider loadDictionary instead.
1210 * Note 4 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent).
1127 1211 * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode. */
1128 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize);
1129 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
1212 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
1213 const void* prefix, size_t prefixSize);
1214 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx,
1215 const void* prefix, size_t prefixSize,
1216 ZSTD_dictContentType_e dictContentType);
1130 1217
1131 1218 /*! ZSTD_CCtx_reset() :
1132 1219 * Return a CCtx to clean state.
1133 1220 * Useful after an error, or to interrupt an ongoing compression job and start a new one.
1134 1221 * Any internal data not yet flushed is cancelled.
1135 * Dictionary (if any) is dropped.
1136 * All parameters are back to default values.
1137 * It's possible to modify compression parameters after a reset.
1222 * The parameters and dictionary are kept unchanged, to reset them use ZSTD_CCtx_resetParameters().
1138 1223 */
1139 1224 ZSTDLIB_API void ZSTD_CCtx_reset(ZSTD_CCtx* cctx);
1140 1225
1226 /*! ZSTD_CCtx_resetParameters() :
1227 * All parameters are back to default values (compression level is ZSTD_CLEVEL_DEFAULT).
1228 * Dictionary (if any) is dropped.
1229 * Resetting parameters is only possible during frame initialization (before starting compression).
1230 * To reset the context use ZSTD_CCtx_reset().
1231 * @return 0 or an error code (which can be checked with ZSTD_isError()).
1232 */
1233 ZSTDLIB_API size_t ZSTD_CCtx_resetParameters(ZSTD_CCtx* cctx);
1234
1141 1235
1142 1236
1143 1237 typedef enum {
@@ -1235,6 +1329,13 b' ZSTDLIB_API size_t ZSTD_CCtxParams_init_'
1235 1329 */
1236 1330 ZSTDLIB_API size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned value);
1237 1331
1332 /*! ZSTD_CCtxParam_getParameter() :
1333 * Similar to ZSTD_CCtx_getParameter.
1334 * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
1335 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
1336 */
1337 ZSTDLIB_API size_t ZSTD_CCtxParam_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned* value);
1338
1238 1339 /*! ZSTD_CCtx_setParametersUsingCCtxParams() :
1239 1340 * Apply a set of ZSTD_CCtx_params to the compression context.
1240 1341 * This can be done even after compression is started,
@@ -1246,10 +1347,13 b' ZSTDLIB_API size_t ZSTD_CCtx_setParamete'
1246 1347 ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
1247 1348
1248 1349
1249 /*=== Advanced parameters for decompression API ===*/
1350 /* ==================================== */
1351 /*=== Advanced decompression API ===*/
1352 /* ==================================== */
1250 1353
1251 /* The following parameters must be set after creating a ZSTD_DCtx* (or ZSTD_DStream*) object,
1252 * but before starting decompression of a frame.
1354 /* The following API works the same way as the advanced compression API :
1355 * a context is created, parameters are pushed into it one by one,
1356 * then the context can be used to decompress data using an interface similar to the straming API.
1253 1357 */
1254 1358
1255 1359 /*! ZSTD_DCtx_loadDictionary() :
@@ -1286,17 +1390,25 b' ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZS'
1286 1390
1287 1391 /*! ZSTD_DCtx_refPrefix() :
1288 1392 * Reference a prefix (single-usage dictionary) for next compression job.
1289 * Prefix is **only used once**. It must be explicitly referenced before each frame.
1290 * If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_DDict instead.
1393 * This is the reverse operation of ZSTD_CCtx_refPrefix(),
1394 * and must use the same prefix as the one used during compression.
1395 * Prefix is **only used once**. Reference is discarded at end of frame.
1396 * End of frame is reached when ZSTD_DCtx_decompress_generic() returns 0.
1291 1397 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
1292 1398 * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary
1293 * Note 2 : Prefix buffer is referenced. It must outlive compression job.
1399 * Note 2 : Prefix buffer is referenced. It **must** outlive decompression job.
1400 * Prefix buffer must remain unmodified up to the end of frame,
1401 * reached when ZSTD_DCtx_decompress_generic() returns 0.
1294 1402 * Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent).
1295 1403 * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode.
1296 1404 * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
1405 * A fulldict prefix is more costly though.
1297 1406 */
1298 ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize);
1299 ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
1407 ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx,
1408 const void* prefix, size_t prefixSize);
1409 ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx,
1410 const void* prefix, size_t prefixSize,
1411 ZSTD_dictContentType_e dictContentType);
1300 1412
1301 1413
1302 1414 /*! ZSTD_DCtx_setMaxWindowSize() :
@@ -1318,6 +1430,13 b' ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindo'
1318 1430 ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
1319 1431
1320 1432
1433 /*! ZSTD_getFrameHeader_advanced() :
1434 * same as ZSTD_getFrameHeader(),
1435 * with added capability to select a format (like ZSTD_f_zstd1_magicless) */
1436 ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr,
1437 const void* src, size_t srcSize, ZSTD_format_e format);
1438
1439
1321 1440 /*! ZSTD_decompress_generic() :
1322 1441 * Behave the same as ZSTD_decompressStream.
1323 1442 * Decompression parameters cannot be changed once decompression is started.
@@ -1383,8 +1502,6 b' ZSTDLIB_API void ZSTD_DCtx_reset(ZSTD_DC'
1383 1502 Use ZSTD_insertBlock() for such a case.
1384 1503 */
1385 1504
1386 #define ZSTD_BLOCKSIZELOG_MAX 17
1387 #define ZSTD_BLOCKSIZE_MAX (1<<ZSTD_BLOCKSIZELOG_MAX) /* define, for static allocation */
1388 1505 /*===== Raw zstd block functions =====*/
1389 1506 ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx);
1390 1507 ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
@@ -40,6 +40,8 b' from __future__ import absolute_import, '
40 40 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE',
41 41 'DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE',
42 42 'MAGIC_NUMBER',
43 'BLOCKSIZELOG_MAX',
44 'BLOCKSIZE_MAX',
43 45 'WINDOWLOG_MIN',
44 46 'WINDOWLOG_MAX',
45 47 'CHAINLOG_MIN',
@@ -52,6 +54,7 b' from __future__ import absolute_import, '
52 54 'SEARCHLENGTH_MIN',
53 55 'SEARCHLENGTH_MAX',
54 56 'TARGETLENGTH_MIN',
57 'TARGETLENGTH_MAX',
55 58 'LDM_MINMATCH_MIN',
56 59 'LDM_MINMATCH_MAX',
57 60 'LDM_BUCKETSIZELOG_MAX',
@@ -102,6 +105,8 b' CONTENTSIZE_UNKNOWN = lib.ZSTD_CONTENTSI'
102 105 CONTENTSIZE_ERROR = lib.ZSTD_CONTENTSIZE_ERROR
103 106 ZSTD_VERSION = (lib.ZSTD_VERSION_MAJOR, lib.ZSTD_VERSION_MINOR, lib.ZSTD_VERSION_RELEASE)
104 107
108 BLOCKSIZELOG_MAX = lib.ZSTD_BLOCKSIZELOG_MAX
109 BLOCKSIZE_MAX = lib.ZSTD_BLOCKSIZE_MAX
105 110 WINDOWLOG_MIN = lib.ZSTD_WINDOWLOG_MIN
106 111 WINDOWLOG_MAX = lib.ZSTD_WINDOWLOG_MAX
107 112 CHAINLOG_MIN = lib.ZSTD_CHAINLOG_MIN
@@ -114,6 +119,7 b' SEARCHLOG_MAX = lib.ZSTD_SEARCHLOG_MAX'
114 119 SEARCHLENGTH_MIN = lib.ZSTD_SEARCHLENGTH_MIN
115 120 SEARCHLENGTH_MAX = lib.ZSTD_SEARCHLENGTH_MAX
116 121 TARGETLENGTH_MIN = lib.ZSTD_TARGETLENGTH_MIN
122 TARGETLENGTH_MAX = lib.ZSTD_TARGETLENGTH_MAX
117 123 LDM_MINMATCH_MIN = lib.ZSTD_LDM_MINMATCH_MIN
118 124 LDM_MINMATCH_MAX = lib.ZSTD_LDM_MINMATCH_MAX
119 125 LDM_BUCKETSIZELOG_MAX = lib.ZSTD_LDM_BUCKETSIZELOG_MAX
@@ -191,7 +197,6 b' def _make_cctx_params(params):'
191 197 (lib.ZSTD_p_nbWorkers, params.threads),
192 198 (lib.ZSTD_p_jobSize, params.job_size),
193 199 (lib.ZSTD_p_overlapSizeLog, params.overlap_size_log),
194 (lib.ZSTD_p_compressLiterals, params.compress_literals),
195 200 (lib.ZSTD_p_forceMaxWindow, params.force_max_window),
196 201 (lib.ZSTD_p_enableLongDistanceMatching, params.enable_ldm),
197 202 (lib.ZSTD_p_ldmHashLog, params.ldm_hash_log),
@@ -224,9 +229,6 b' class ZstdCompressionParameters(object):'
224 229 if arg not in kwargs:
225 230 kwargs[arg] = getattr(params, attr)
226 231
227 if 'compress_literals' not in kwargs:
228 kwargs['compress_literals'] = 1 if level >= 0 else 0
229
230 232 return ZstdCompressionParameters(**kwargs)
231 233
232 234 def __init__(self, format=0, compression_level=0, window_log=0, hash_log=0,
@@ -235,14 +237,11 b' class ZstdCompressionParameters(object):'
235 237 write_dict_id=0, job_size=0, overlap_size_log=0,
236 238 force_max_window=0, enable_ldm=0, ldm_hash_log=0,
237 239 ldm_min_match=0, ldm_bucket_size_log=0, ldm_hash_every_log=0,
238 threads=0, compress_literals=None):
240 threads=0):
239 241
240 242 if threads < 0:
241 243 threads = _cpu_count()
242 244
243 if compress_literals is None:
244 compress_literals = compression_level >= 0
245
246 245 self.format = format
247 246 self.compression_level = compression_level
248 247 self.window_log = window_log
@@ -257,7 +256,6 b' class ZstdCompressionParameters(object):'
257 256 self.write_dict_id = write_dict_id
258 257 self.job_size = job_size
259 258 self.overlap_size_log = overlap_size_log
260 self.compress_literals = compress_literals
261 259 self.force_max_window = force_max_window
262 260 self.enable_ldm = enable_ldm
263 261 self.ldm_hash_log = ldm_hash_log
@@ -411,13 +409,14 b' class ZstdCompressionWriter(object):'
411 409 raise ZstdError('zstd compress error: %s' %
412 410 _zstd_error(zresult))
413 411
414 if not out_buffer.pos:
415 break
412 if out_buffer.pos:
413 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
414 total_write += out_buffer.pos
415 self._bytes_compressed += out_buffer.pos
416 out_buffer.pos = 0
416 417
417 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
418 total_write += out_buffer.pos
419 self._bytes_compressed += out_buffer.pos
420 out_buffer.pos = 0
418 if not zresult:
419 break
421 420
422 421 return total_write
423 422
@@ -460,6 +459,14 b' class ZstdCompressionObj(object):'
460 459 if self._finished:
461 460 raise ZstdError('compressor object already finished')
462 461
462 if flush_mode == COMPRESSOBJ_FLUSH_BLOCK:
463 z_flush_mode = lib.ZSTD_e_flush
464 elif flush_mode == COMPRESSOBJ_FLUSH_FINISH:
465 z_flush_mode = lib.ZSTD_e_end
466 self._finished = True
467 else:
468 raise ZstdError('unhandled flush mode')
469
463 470 assert self._out.pos == 0
464 471
465 472 in_buffer = ffi.new('ZSTD_inBuffer *')
@@ -467,35 +474,13 b' class ZstdCompressionObj(object):'
467 474 in_buffer.size = 0
468 475 in_buffer.pos = 0
469 476
470 if flush_mode == COMPRESSOBJ_FLUSH_BLOCK:
471 zresult = lib.ZSTD_compress_generic(self._compressor._cctx,
472 self._out,
473 in_buffer,
474 lib.ZSTD_e_flush)
475 if lib.ZSTD_isError(zresult):
476 raise ZstdError('zstd compress error: %s' %
477 _zstd_error(zresult))
478
479 # Output buffer is guaranteed to hold full block.
480 assert zresult == 0
481
482 if self._out.pos:
483 result = ffi.buffer(self._out.dst, self._out.pos)[:]
484 self._out.pos = 0
485 return result
486 else:
487 return b''
488
489 assert flush_mode == COMPRESSOBJ_FLUSH_FINISH
490 self._finished = True
491
492 477 chunks = []
493 478
494 479 while True:
495 480 zresult = lib.ZSTD_compress_generic(self._compressor._cctx,
496 481 self._out,
497 482 in_buffer,
498 lib.ZSTD_e_end)
483 z_flush_mode)
499 484 if lib.ZSTD_isError(zresult):
500 485 raise ZstdError('error ending compression stream: %s' %
501 486 _zstd_error(zresult))
@@ -510,11 +495,107 b' class ZstdCompressionObj(object):'
510 495 return b''.join(chunks)
511 496
512 497
498 class ZstdCompressionChunker(object):
499 def __init__(self, compressor, chunk_size):
500 self._compressor = compressor
501 self._out = ffi.new('ZSTD_outBuffer *')
502 self._dst_buffer = ffi.new('char[]', chunk_size)
503 self._out.dst = self._dst_buffer
504 self._out.size = chunk_size
505 self._out.pos = 0
506
507 self._in = ffi.new('ZSTD_inBuffer *')
508 self._in.src = ffi.NULL
509 self._in.size = 0
510 self._in.pos = 0
511 self._finished = False
512
513 def compress(self, data):
514 if self._finished:
515 raise ZstdError('cannot call compress() after compression finished')
516
517 if self._in.src != ffi.NULL:
518 raise ZstdError('cannot perform operation before consuming output '
519 'from previous operation')
520
521 data_buffer = ffi.from_buffer(data)
522
523 if not len(data_buffer):
524 return
525
526 self._in.src = data_buffer
527 self._in.size = len(data_buffer)
528 self._in.pos = 0
529
530 while self._in.pos < self._in.size:
531 zresult = lib.ZSTD_compress_generic(self._compressor._cctx,
532 self._out,
533 self._in,
534 lib.ZSTD_e_continue)
535
536 if self._in.pos == self._in.size:
537 self._in.src = ffi.NULL
538 self._in.size = 0
539 self._in.pos = 0
540
541 if lib.ZSTD_isError(zresult):
542 raise ZstdError('zstd compress error: %s' %
543 _zstd_error(zresult))
544
545 if self._out.pos == self._out.size:
546 yield ffi.buffer(self._out.dst, self._out.pos)[:]
547 self._out.pos = 0
548
549 def flush(self):
550 if self._finished:
551 raise ZstdError('cannot call flush() after compression finished')
552
553 if self._in.src != ffi.NULL:
554 raise ZstdError('cannot call flush() before consuming output from '
555 'previous operation')
556
557 while True:
558 zresult = lib.ZSTD_compress_generic(self._compressor._cctx,
559 self._out, self._in,
560 lib.ZSTD_e_flush)
561 if lib.ZSTD_isError(zresult):
562 raise ZstdError('zstd compress error: %s' % _zstd_error(zresult))
563
564 if self._out.pos:
565 yield ffi.buffer(self._out.dst, self._out.pos)[:]
566 self._out.pos = 0
567
568 if not zresult:
569 return
570
571 def finish(self):
572 if self._finished:
573 raise ZstdError('cannot call finish() after compression finished')
574
575 if self._in.src != ffi.NULL:
576 raise ZstdError('cannot call finish() before consuming output from '
577 'previous operation')
578
579 while True:
580 zresult = lib.ZSTD_compress_generic(self._compressor._cctx,
581 self._out, self._in,
582 lib.ZSTD_e_end)
583 if lib.ZSTD_isError(zresult):
584 raise ZstdError('zstd compress error: %s' % _zstd_error(zresult))
585
586 if self._out.pos:
587 yield ffi.buffer(self._out.dst, self._out.pos)[:]
588 self._out.pos = 0
589
590 if not zresult:
591 self._finished = True
592 return
593
594
513 595 class CompressionReader(object):
514 def __init__(self, compressor, source, size, read_size):
596 def __init__(self, compressor, source, read_size):
515 597 self._compressor = compressor
516 598 self._source = source
517 self._source_size = size
518 599 self._read_size = read_size
519 600 self._entered = False
520 601 self._closed = False
@@ -530,12 +611,6 b' class CompressionReader(object):'
530 611 if self._entered:
531 612 raise ValueError('cannot __enter__ multiple times')
532 613
533 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._compressor._cctx,
534 self._source_size)
535 if lib.ZSTD_isError(zresult):
536 raise ZstdError('error setting source size: %s' %
537 _zstd_error(zresult))
538
539 614 self._entered = True
540 615 return self
541 616
@@ -578,6 +653,7 b' class CompressionReader(object):'
578 653 self._closed = True
579 654 return None
580 655
656 @property
581 657 def closed(self):
582 658 return self._closed
583 659
@@ -596,9 +672,6 b' class CompressionReader(object):'
596 672 next = __next__
597 673
598 674 def read(self, size=-1):
599 if not self._entered:
600 raise ZstdError('read() must be called from an active context manager')
601
602 675 if self._closed:
603 676 raise ValueError('stream is closed')
604 677
@@ -759,16 +832,14 b' class ZstdCompressor(object):'
759 832 self._dict_data = dict_data
760 833
761 834 # We defer setting up garbage collection until after calling
762 # _ensure_cctx() to ensure the memory size estimate is more accurate.
835 # _setup_cctx() to ensure the memory size estimate is more accurate.
763 836 try:
764 self._ensure_cctx()
837 self._setup_cctx()
765 838 finally:
766 839 self._cctx = ffi.gc(cctx, lib.ZSTD_freeCCtx,
767 840 size=lib.ZSTD_sizeof_CCtx(cctx))
768 841
769 def _ensure_cctx(self):
770 lib.ZSTD_CCtx_reset(self._cctx)
771
842 def _setup_cctx(self):
772 843 zresult = lib.ZSTD_CCtx_setParametersUsingCCtxParams(self._cctx,
773 844 self._params)
774 845 if lib.ZSTD_isError(zresult):
@@ -793,7 +864,7 b' class ZstdCompressor(object):'
793 864 return lib.ZSTD_sizeof_CCtx(self._cctx)
794 865
795 866 def compress(self, data):
796 self._ensure_cctx()
867 lib.ZSTD_CCtx_reset(self._cctx)
797 868
798 869 data_buffer = ffi.from_buffer(data)
799 870
@@ -830,7 +901,7 b' class ZstdCompressor(object):'
830 901 return ffi.buffer(out, out_buffer.pos)[:]
831 902
832 903 def compressobj(self, size=-1):
833 self._ensure_cctx()
904 lib.ZSTD_CCtx_reset(self._cctx)
834 905
835 906 if size < 0:
836 907 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
@@ -851,6 +922,19 b' class ZstdCompressor(object):'
851 922
852 923 return cobj
853 924
925 def chunker(self, size=-1, chunk_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
926 lib.ZSTD_CCtx_reset(self._cctx)
927
928 if size < 0:
929 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
930
931 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
932 if lib.ZSTD_isError(zresult):
933 raise ZstdError('error setting source size: %s' %
934 _zstd_error(zresult))
935
936 return ZstdCompressionChunker(self, chunk_size=chunk_size)
937
854 938 def copy_stream(self, ifh, ofh, size=-1,
855 939 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
856 940 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
@@ -860,7 +944,7 b' class ZstdCompressor(object):'
860 944 if not hasattr(ofh, 'write'):
861 945 raise ValueError('second argument must have a write() method')
862 946
863 self._ensure_cctx()
947 lib.ZSTD_CCtx_reset(self._cctx)
864 948
865 949 if size < 0:
866 950 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
@@ -927,7 +1011,7 b' class ZstdCompressor(object):'
927 1011
928 1012 def stream_reader(self, source, size=-1,
929 1013 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE):
930 self._ensure_cctx()
1014 lib.ZSTD_CCtx_reset(self._cctx)
931 1015
932 1016 try:
933 1017 size = len(source)
@@ -937,7 +1021,12 b' class ZstdCompressor(object):'
937 1021 if size < 0:
938 1022 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
939 1023
940 return CompressionReader(self, source, size, read_size)
1024 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
1025 if lib.ZSTD_isError(zresult):
1026 raise ZstdError('error setting source size: %s' %
1027 _zstd_error(zresult))
1028
1029 return CompressionReader(self, source, read_size)
941 1030
942 1031 def stream_writer(self, writer, size=-1,
943 1032 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
@@ -945,7 +1034,7 b' class ZstdCompressor(object):'
945 1034 if not hasattr(writer, 'write'):
946 1035 raise ValueError('must pass an object with a write() method')
947 1036
948 self._ensure_cctx()
1037 lib.ZSTD_CCtx_reset(self._cctx)
949 1038
950 1039 if size < 0:
951 1040 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
@@ -967,7 +1056,7 b' class ZstdCompressor(object):'
967 1056 raise ValueError('must pass an object with a read() method or '
968 1057 'conforms to buffer protocol')
969 1058
970 self._ensure_cctx()
1059 lib.ZSTD_CCtx_reset(self._cctx)
971 1060
972 1061 if size < 0:
973 1062 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
@@ -1267,7 +1356,7 b' class ZstdDecompressionObj(object):'
1267 1356
1268 1357 chunks = []
1269 1358
1270 while in_buffer.pos < in_buffer.size:
1359 while True:
1271 1360 zresult = lib.ZSTD_decompress_generic(self._decompressor._dctx,
1272 1361 out_buffer, in_buffer)
1273 1362 if lib.ZSTD_isError(zresult):
@@ -1280,7 +1369,12 b' class ZstdDecompressionObj(object):'
1280 1369
1281 1370 if out_buffer.pos:
1282 1371 chunks.append(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
1283 out_buffer.pos = 0
1372
1373 if (zresult == 0 or
1374 (in_buffer.pos == in_buffer.size and out_buffer.pos == 0)):
1375 break
1376
1377 out_buffer.pos = 0
1284 1378
1285 1379 return b''.join(chunks)
1286 1380
@@ -1303,8 +1397,6 b' class DecompressionReader(object):'
1303 1397 if self._entered:
1304 1398 raise ValueError('cannot __enter__ multiple times')
1305 1399
1306 self._decompressor._ensure_dctx()
1307
1308 1400 self._entered = True
1309 1401 return self
1310 1402
@@ -1347,6 +1439,7 b' class DecompressionReader(object):'
1347 1439 self._closed = True
1348 1440 return None
1349 1441
1442 @property
1350 1443 def closed(self):
1351 1444 return self._closed
1352 1445
@@ -1364,10 +1457,7 b' class DecompressionReader(object):'
1364 1457
1365 1458 next = __next__
1366 1459
1367 def read(self, size=-1):
1368 if not self._entered:
1369 raise ZstdError('read() must be called from an active context manager')
1370
1460 def read(self, size):
1371 1461 if self._closed:
1372 1462 raise ValueError('stream is closed')
1373 1463
@@ -1442,10 +1532,6 b' class DecompressionReader(object):'
1442 1532 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1443 1533
1444 1534 def seek(self, pos, whence=os.SEEK_SET):
1445 if not self._entered:
1446 raise ZstdError('seek() must be called from an active context '
1447 'manager')
1448
1449 1535 if self._closed:
1450 1536 raise ValueError('stream is closed')
1451 1537
@@ -919,7 +919,9 b' extmodules = ['
919 919
920 920 sys.path.insert(0, 'contrib/python-zstandard')
921 921 import setup_zstd
922 extmodules.append(setup_zstd.get_c_extension(name='mercurial.zstd'))
922 extmodules.append(setup_zstd.get_c_extension(
923 name='mercurial.zstd',
924 root=os.path.abspath(os.path.dirname(__file__))))
923 925
924 926 try:
925 927 from distutils import cygwinccompiler
General Comments 0
You need to be logged in to leave comments. Login now