##// END OF EJS Templates
zstandard: vendor python-zstandard 0.10.1...
Gregory Szorc -
r40157:73fef626 default
parent child Browse files
Show More
@@ -0,0 +1,360 b''
1 /**
2 * Copyright (c) 2018-present, Gregory Szorc
3 * All rights reserved.
4 *
5 * This software may be modified and distributed under the terms
6 * of the BSD license. See the LICENSE file for details.
7 */
8
9 #include "python-zstandard.h"
10
11 extern PyObject* ZstdError;
12
13 PyDoc_STRVAR(ZstdCompressionChunkerIterator__doc__,
14 "Iterator of output chunks from ZstdCompressionChunker.\n"
15 );
16
17 static void ZstdCompressionChunkerIterator_dealloc(ZstdCompressionChunkerIterator* self) {
18 Py_XDECREF(self->chunker);
19
20 PyObject_Del(self);
21 }
22
23 static PyObject* ZstdCompressionChunkerIterator_iter(PyObject* self) {
24 Py_INCREF(self);
25 return self;
26 }
27
28 static PyObject* ZstdCompressionChunkerIterator_iternext(ZstdCompressionChunkerIterator* self) {
29 size_t zresult;
30 PyObject* chunk;
31 ZstdCompressionChunker* chunker = self->chunker;
32 ZSTD_EndDirective zFlushMode;
33
34 if (self->mode != compressionchunker_mode_normal && chunker->input.pos != chunker->input.size) {
35 PyErr_SetString(ZstdError, "input should have been fully consumed before calling flush() or finish()");
36 return NULL;
37 }
38
39 if (chunker->finished) {
40 return NULL;
41 }
42
43 /* If we have data left in the input, consume it. */
44 while (chunker->input.pos < chunker->input.size) {
45 Py_BEGIN_ALLOW_THREADS
46 zresult = ZSTD_compress_generic(chunker->compressor->cctx, &chunker->output,
47 &chunker->input, ZSTD_e_continue);
48 Py_END_ALLOW_THREADS
49
50 /* Input is fully consumed. */
51 if (chunker->input.pos == chunker->input.size) {
52 chunker->input.src = NULL;
53 chunker->input.pos = 0;
54 chunker->input.size = 0;
55 PyBuffer_Release(&chunker->inBuffer);
56 }
57
58 if (ZSTD_isError(zresult)) {
59 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
60 return NULL;
61 }
62
63 /* If it produced a full output chunk, emit it. */
64 if (chunker->output.pos == chunker->output.size) {
65 chunk = PyBytes_FromStringAndSize(chunker->output.dst, chunker->output.pos);
66 if (!chunk) {
67 return NULL;
68 }
69
70 chunker->output.pos = 0;
71
72 return chunk;
73 }
74
75 /* Else continue to compress available input data. */
76 }
77
78 /* We also need this here for the special case of an empty input buffer. */
79 if (chunker->input.pos == chunker->input.size) {
80 chunker->input.src = NULL;
81 chunker->input.pos = 0;
82 chunker->input.size = 0;
83 PyBuffer_Release(&chunker->inBuffer);
84 }
85
86 /* No more input data. A partial chunk may be in chunker->output.
87 * If we're in normal compression mode, we're done. Otherwise if we're in
88 * flush or finish mode, we need to emit what data remains.
89 */
90 if (self->mode == compressionchunker_mode_normal) {
91 /* We don't need to set StopIteration. */
92 return NULL;
93 }
94
95 if (self->mode == compressionchunker_mode_flush) {
96 zFlushMode = ZSTD_e_flush;
97 }
98 else if (self->mode == compressionchunker_mode_finish) {
99 zFlushMode = ZSTD_e_end;
100 }
101 else {
102 PyErr_SetString(ZstdError, "unhandled compression mode; this should never happen");
103 return NULL;
104 }
105
106 Py_BEGIN_ALLOW_THREADS
107 zresult = ZSTD_compress_generic(chunker->compressor->cctx, &chunker->output,
108 &chunker->input, zFlushMode);
109 Py_END_ALLOW_THREADS
110
111 if (ZSTD_isError(zresult)) {
112 PyErr_Format(ZstdError, "zstd compress error: %s",
113 ZSTD_getErrorName(zresult));
114 return NULL;
115 }
116
117 if (!zresult && chunker->output.pos == 0) {
118 return NULL;
119 }
120
121 chunk = PyBytes_FromStringAndSize(chunker->output.dst, chunker->output.pos);
122 if (!chunk) {
123 return NULL;
124 }
125
126 chunker->output.pos = 0;
127
128 if (!zresult && self->mode == compressionchunker_mode_finish) {
129 chunker->finished = 1;
130 }
131
132 return chunk;
133 }
134
135 PyTypeObject ZstdCompressionChunkerIteratorType = {
136 PyVarObject_HEAD_INIT(NULL, 0)
137 "zstd.ZstdCompressionChunkerIterator", /* tp_name */
138 sizeof(ZstdCompressionChunkerIterator), /* tp_basicsize */
139 0, /* tp_itemsize */
140 (destructor)ZstdCompressionChunkerIterator_dealloc, /* tp_dealloc */
141 0, /* tp_print */
142 0, /* tp_getattr */
143 0, /* tp_setattr */
144 0, /* tp_compare */
145 0, /* tp_repr */
146 0, /* tp_as_number */
147 0, /* tp_as_sequence */
148 0, /* tp_as_mapping */
149 0, /* tp_hash */
150 0, /* tp_call */
151 0, /* tp_str */
152 0, /* tp_getattro */
153 0, /* tp_setattro */
154 0, /* tp_as_buffer */
155 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
156 ZstdCompressionChunkerIterator__doc__, /* tp_doc */
157 0, /* tp_traverse */
158 0, /* tp_clear */
159 0, /* tp_richcompare */
160 0, /* tp_weaklistoffset */
161 ZstdCompressionChunkerIterator_iter, /* tp_iter */
162 (iternextfunc)ZstdCompressionChunkerIterator_iternext, /* tp_iternext */
163 0, /* tp_methods */
164 0, /* tp_members */
165 0, /* tp_getset */
166 0, /* tp_base */
167 0, /* tp_dict */
168 0, /* tp_descr_get */
169 0, /* tp_descr_set */
170 0, /* tp_dictoffset */
171 0, /* tp_init */
172 0, /* tp_alloc */
173 PyType_GenericNew, /* tp_new */
174 };
175
176 PyDoc_STRVAR(ZstdCompressionChunker__doc__,
177 "Compress chunks iteratively into exact chunk sizes.\n"
178 );
179
180 static void ZstdCompressionChunker_dealloc(ZstdCompressionChunker* self) {
181 PyBuffer_Release(&self->inBuffer);
182 self->input.src = NULL;
183
184 PyMem_Free(self->output.dst);
185 self->output.dst = NULL;
186
187 Py_XDECREF(self->compressor);
188
189 PyObject_Del(self);
190 }
191
192 static ZstdCompressionChunkerIterator* ZstdCompressionChunker_compress(ZstdCompressionChunker* self, PyObject* args, PyObject* kwargs) {
193 static char* kwlist[] = {
194 "data",
195 NULL
196 };
197
198 ZstdCompressionChunkerIterator* result;
199
200 if (self->finished) {
201 PyErr_SetString(ZstdError, "cannot call compress() after compression finished");
202 return NULL;
203 }
204
205 if (self->inBuffer.obj) {
206 PyErr_SetString(ZstdError,
207 "cannot perform operation before consuming output from previous operation");
208 return NULL;
209 }
210
211 #if PY_MAJOR_VERSION >= 3
212 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*:compress",
213 #else
214 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*:compress",
215 #endif
216 kwlist, &self->inBuffer)) {
217 return NULL;
218 }
219
220 if (!PyBuffer_IsContiguous(&self->inBuffer, 'C') || self->inBuffer.ndim > 1) {
221 PyErr_SetString(PyExc_ValueError,
222 "data buffer should be contiguous and have at most one dimension");
223 PyBuffer_Release(&self->inBuffer);
224 return NULL;
225 }
226
227 result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
228 if (!result) {
229 PyBuffer_Release(&self->inBuffer);
230 return NULL;
231 }
232
233 self->input.src = self->inBuffer.buf;
234 self->input.size = self->inBuffer.len;
235 self->input.pos = 0;
236
237 result->chunker = self;
238 Py_INCREF(result->chunker);
239
240 result->mode = compressionchunker_mode_normal;
241
242 return result;
243 }
244
245 static ZstdCompressionChunkerIterator* ZstdCompressionChunker_finish(ZstdCompressionChunker* self) {
246 ZstdCompressionChunkerIterator* result;
247
248 if (self->finished) {
249 PyErr_SetString(ZstdError, "cannot call finish() after compression finished");
250 return NULL;
251 }
252
253 if (self->inBuffer.obj) {
254 PyErr_SetString(ZstdError,
255 "cannot call finish() before consuming output from previous operation");
256 return NULL;
257 }
258
259 result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
260 if (!result) {
261 return NULL;
262 }
263
264 result->chunker = self;
265 Py_INCREF(result->chunker);
266
267 result->mode = compressionchunker_mode_finish;
268
269 return result;
270 }
271
272 static ZstdCompressionChunkerIterator* ZstdCompressionChunker_flush(ZstdCompressionChunker* self, PyObject* args, PyObject* kwargs) {
273 ZstdCompressionChunkerIterator* result;
274
275 if (self->finished) {
276 PyErr_SetString(ZstdError, "cannot call flush() after compression finished");
277 return NULL;
278 }
279
280 if (self->inBuffer.obj) {
281 PyErr_SetString(ZstdError,
282 "cannot call flush() before consuming output from previous operation");
283 return NULL;
284 }
285
286 result = (ZstdCompressionChunkerIterator*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerIteratorType, NULL);
287 if (!result) {
288 return NULL;
289 }
290
291 result->chunker = self;
292 Py_INCREF(result->chunker);
293
294 result->mode = compressionchunker_mode_flush;
295
296 return result;
297 }
298
299 static PyMethodDef ZstdCompressionChunker_methods[] = {
300 { "compress", (PyCFunction)ZstdCompressionChunker_compress, METH_VARARGS | METH_KEYWORDS,
301 PyDoc_STR("compress data") },
302 { "finish", (PyCFunction)ZstdCompressionChunker_finish, METH_NOARGS,
303 PyDoc_STR("finish compression operation") },
304 { "flush", (PyCFunction)ZstdCompressionChunker_flush, METH_VARARGS | METH_KEYWORDS,
305 PyDoc_STR("finish compression operation") },
306 { NULL, NULL }
307 };
308
309 PyTypeObject ZstdCompressionChunkerType = {
310 PyVarObject_HEAD_INIT(NULL, 0)
311 "zstd.ZstdCompressionChunkerType", /* tp_name */
312 sizeof(ZstdCompressionChunker), /* tp_basicsize */
313 0, /* tp_itemsize */
314 (destructor)ZstdCompressionChunker_dealloc, /* tp_dealloc */
315 0, /* tp_print */
316 0, /* tp_getattr */
317 0, /* tp_setattr */
318 0, /* tp_compare */
319 0, /* tp_repr */
320 0, /* tp_as_number */
321 0, /* tp_as_sequence */
322 0, /* tp_as_mapping */
323 0, /* tp_hash */
324 0, /* tp_call */
325 0, /* tp_str */
326 0, /* tp_getattro */
327 0, /* tp_setattro */
328 0, /* tp_as_buffer */
329 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
330 ZstdCompressionChunker__doc__, /* tp_doc */
331 0, /* tp_traverse */
332 0, /* tp_clear */
333 0, /* tp_richcompare */
334 0, /* tp_weaklistoffset */
335 0, /* tp_iter */
336 0, /* tp_iternext */
337 ZstdCompressionChunker_methods, /* tp_methods */
338 0, /* tp_members */
339 0, /* tp_getset */
340 0, /* tp_base */
341 0, /* tp_dict */
342 0, /* tp_descr_get */
343 0, /* tp_descr_set */
344 0, /* tp_dictoffset */
345 0, /* tp_init */
346 0, /* tp_alloc */
347 PyType_GenericNew, /* tp_new */
348 };
349
350 void compressionchunker_module_init(PyObject* module) {
351 Py_TYPE(&ZstdCompressionChunkerIteratorType) = &PyType_Type;
352 if (PyType_Ready(&ZstdCompressionChunkerIteratorType) < 0) {
353 return;
354 }
355
356 Py_TYPE(&ZstdCompressionChunkerType) = &PyType_Type;
357 if (PyType_Ready(&ZstdCompressionChunkerType) < 0) {
358 return;
359 }
360 }
@@ -0,0 +1,44 b''
1 /* ******************************************************************
2 debug
3 Part of FSE library
4 Copyright (C) 2013-present, Yann Collet.
5
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
11
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
17 distribution.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 You can contact the author at :
32 - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 ****************************************************************** */
34
35
36 /*
37 * This module only hosts one global variable
38 * which can be used to dynamically influence the verbosity of traces,
39 * such as DEBUGLOG and RAWLOG
40 */
41
42 #include "debug.h"
43
44 int g_debuglevel = DEBUGLEVEL;
@@ -0,0 +1,123 b''
1 /* ******************************************************************
2 debug
3 Part of FSE library
4 Copyright (C) 2013-present, Yann Collet.
5
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
11
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
17 distribution.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 You can contact the author at :
32 - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 ****************************************************************** */
34
35
36 /*
37 * The purpose of this header is to enable debug functions.
38 * They regroup assert(), DEBUGLOG() and RAWLOG() for run-time,
39 * and DEBUG_STATIC_ASSERT() for compile-time.
40 *
41 * By default, DEBUGLEVEL==0, which means run-time debug is disabled.
42 *
43 * Level 1 enables assert() only.
44 * Starting level 2, traces can be generated and pushed to stderr.
45 * The higher the level, the more verbose the traces.
46 *
47 * It's possible to dynamically adjust level using variable g_debug_level,
48 * which is only declared if DEBUGLEVEL>=2,
49 * and is a global variable, not multi-thread protected (use with care)
50 */
51
52 #ifndef DEBUG_H_12987983217
53 #define DEBUG_H_12987983217
54
55 #if defined (__cplusplus)
56 extern "C" {
57 #endif
58
59
60 /* static assert is triggered at compile time, leaving no runtime artefact,
61 * but can only work with compile-time constants.
62 * This variant can only be used inside a function. */
63 #define DEBUG_STATIC_ASSERT(c) (void)sizeof(char[(c) ? 1 : -1])
64
65
66 /* DEBUGLEVEL is expected to be defined externally,
67 * typically through compiler command line.
68 * Value must be a number. */
69 #ifndef DEBUGLEVEL
70 # define DEBUGLEVEL 0
71 #endif
72
73 /* recommended values for DEBUGLEVEL :
74 * 0 : no debug, all run-time functions disabled
75 * 1 : no display, enables assert() only
76 * 2 : reserved, for currently active debug path
77 * 3 : events once per object lifetime (CCtx, CDict, etc.)
78 * 4 : events once per frame
79 * 5 : events once per block
80 * 6 : events once per sequence (verbose)
81 * 7+: events at every position (*very* verbose)
82 *
83 * It's generally inconvenient to output traces > 5.
84 * In which case, it's possible to selectively enable higher verbosity levels
85 * by modifying g_debug_level.
86 */
87
88 #if (DEBUGLEVEL>=1)
89 # include <assert.h>
90 #else
91 # ifndef assert /* assert may be already defined, due to prior #include <assert.h> */
92 # define assert(condition) ((void)0) /* disable assert (default) */
93 # endif
94 #endif
95
96 #if (DEBUGLEVEL>=2)
97 # include <stdio.h>
98 extern int g_debuglevel; /* here, this variable is only declared,
99 it actually lives in debug.c,
100 and is shared by the whole process.
101 It's typically used to enable very verbose levels
102 on selective conditions (such as position in src) */
103
104 # define RAWLOG(l, ...) { \
105 if (l<=g_debuglevel) { \
106 fprintf(stderr, __VA_ARGS__); \
107 } }
108 # define DEBUGLOG(l, ...) { \
109 if (l<=g_debuglevel) { \
110 fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
111 fprintf(stderr, " \n"); \
112 } }
113 #else
114 # define RAWLOG(l, ...) {} /* disabled */
115 # define DEBUGLOG(l, ...) {} /* disabled */
116 #endif
117
118
119 #if defined (__cplusplus)
120 }
121 #endif
122
123 #endif /* DEBUG_H_12987983217 */
@@ -0,0 +1,195 b''
1 /* ******************************************************************
2 hist : Histogram functions
3 part of Finite State Entropy project
4 Copyright (C) 2013-present, Yann Collet.
5
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
11
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
17 distribution.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 You can contact the author at :
32 - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 - Public forum : https://groups.google.com/forum/#!forum/lz4c
34 ****************************************************************** */
35
36 /* --- dependencies --- */
37 #include "mem.h" /* U32, BYTE, etc. */
38 #include "debug.h" /* assert, DEBUGLOG */
39 #include "error_private.h" /* ERROR */
40 #include "hist.h"
41
42
43 /* --- Error management --- */
44 unsigned HIST_isError(size_t code) { return ERR_isError(code); }
45
46 /*-**************************************************************
47 * Histogram functions
48 ****************************************************************/
49 unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
50 const void* src, size_t srcSize)
51 {
52 const BYTE* ip = (const BYTE*)src;
53 const BYTE* const end = ip + srcSize;
54 unsigned maxSymbolValue = *maxSymbolValuePtr;
55 unsigned largestCount=0;
56
57 memset(count, 0, (maxSymbolValue+1) * sizeof(*count));
58 if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
59
60 while (ip<end) {
61 assert(*ip <= maxSymbolValue);
62 count[*ip++]++;
63 }
64
65 while (!count[maxSymbolValue]) maxSymbolValue--;
66 *maxSymbolValuePtr = maxSymbolValue;
67
68 { U32 s;
69 for (s=0; s<=maxSymbolValue; s++)
70 if (count[s] > largestCount) largestCount = count[s];
71 }
72
73 return largestCount;
74 }
75
76
77 /* HIST_count_parallel_wksp() :
78 * store histogram into 4 intermediate tables, recombined at the end.
79 * this design makes better use of OoO cpus,
80 * and is noticeably faster when some values are heavily repeated.
81 * But it needs some additional workspace for intermediate tables.
82 * `workSpace` size must be a table of size >= HIST_WKSP_SIZE_U32.
83 * @return : largest histogram frequency,
84 * or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
85 static size_t HIST_count_parallel_wksp(
86 unsigned* count, unsigned* maxSymbolValuePtr,
87 const void* source, size_t sourceSize,
88 unsigned checkMax,
89 unsigned* const workSpace)
90 {
91 const BYTE* ip = (const BYTE*)source;
92 const BYTE* const iend = ip+sourceSize;
93 unsigned maxSymbolValue = *maxSymbolValuePtr;
94 unsigned max=0;
95 U32* const Counting1 = workSpace;
96 U32* const Counting2 = Counting1 + 256;
97 U32* const Counting3 = Counting2 + 256;
98 U32* const Counting4 = Counting3 + 256;
99
100 memset(workSpace, 0, 4*256*sizeof(unsigned));
101
102 /* safety checks */
103 if (!sourceSize) {
104 memset(count, 0, maxSymbolValue + 1);
105 *maxSymbolValuePtr = 0;
106 return 0;
107 }
108 if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
109
110 /* by stripes of 16 bytes */
111 { U32 cached = MEM_read32(ip); ip += 4;
112 while (ip < iend-15) {
113 U32 c = cached; cached = MEM_read32(ip); ip += 4;
114 Counting1[(BYTE) c ]++;
115 Counting2[(BYTE)(c>>8) ]++;
116 Counting3[(BYTE)(c>>16)]++;
117 Counting4[ c>>24 ]++;
118 c = cached; cached = MEM_read32(ip); ip += 4;
119 Counting1[(BYTE) c ]++;
120 Counting2[(BYTE)(c>>8) ]++;
121 Counting3[(BYTE)(c>>16)]++;
122 Counting4[ c>>24 ]++;
123 c = cached; cached = MEM_read32(ip); ip += 4;
124 Counting1[(BYTE) c ]++;
125 Counting2[(BYTE)(c>>8) ]++;
126 Counting3[(BYTE)(c>>16)]++;
127 Counting4[ c>>24 ]++;
128 c = cached; cached = MEM_read32(ip); ip += 4;
129 Counting1[(BYTE) c ]++;
130 Counting2[(BYTE)(c>>8) ]++;
131 Counting3[(BYTE)(c>>16)]++;
132 Counting4[ c>>24 ]++;
133 }
134 ip-=4;
135 }
136
137 /* finish last symbols */
138 while (ip<iend) Counting1[*ip++]++;
139
140 if (checkMax) { /* verify stats will fit into destination table */
141 U32 s; for (s=255; s>maxSymbolValue; s--) {
142 Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
143 if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
144 } }
145
146 { U32 s;
147 if (maxSymbolValue > 255) maxSymbolValue = 255;
148 for (s=0; s<=maxSymbolValue; s++) {
149 count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
150 if (count[s] > max) max = count[s];
151 } }
152
153 while (!count[maxSymbolValue]) maxSymbolValue--;
154 *maxSymbolValuePtr = maxSymbolValue;
155 return (size_t)max;
156 }
157
158 /* HIST_countFast_wksp() :
159 * Same as HIST_countFast(), but using an externally provided scratch buffer.
160 * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
161 size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
162 const void* source, size_t sourceSize,
163 unsigned* workSpace)
164 {
165 if (sourceSize < 1500) /* heuristic threshold */
166 return HIST_count_simple(count, maxSymbolValuePtr, source, sourceSize);
167 return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
168 }
169
170 /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
171 size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
172 const void* source, size_t sourceSize)
173 {
174 unsigned tmpCounters[HIST_WKSP_SIZE_U32];
175 return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
176 }
177
178 /* HIST_count_wksp() :
179 * Same as HIST_count(), but using an externally provided scratch buffer.
180 * `workSpace` size must be table of >= HIST_WKSP_SIZE_U32 unsigned */
181 size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
182 const void* source, size_t sourceSize, unsigned* workSpace)
183 {
184 if (*maxSymbolValuePtr < 255)
185 return HIST_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
186 *maxSymbolValuePtr = 255;
187 return HIST_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
188 }
189
190 size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
191 const void* src, size_t srcSize)
192 {
193 unsigned tmpCounters[HIST_WKSP_SIZE_U32];
194 return HIST_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
195 }
@@ -0,0 +1,92 b''
1 /* ******************************************************************
2 hist : Histogram functions
3 part of Finite State Entropy project
4 Copyright (C) 2013-present, Yann Collet.
5
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
11
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
17 distribution.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 You can contact the author at :
32 - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 - Public forum : https://groups.google.com/forum/#!forum/lz4c
34 ****************************************************************** */
35
36 /* --- dependencies --- */
37 #include <stddef.h> /* size_t */
38
39
40 /* --- simple histogram functions --- */
41
42 /*! HIST_count():
43 * Provides the precise count of each byte within a table 'count'.
44 * 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
45 * Updates *maxSymbolValuePtr with actual largest symbol value detected.
46 * @return : count of the most frequent symbol (which isn't identified).
47 * or an error code, which can be tested using HIST_isError().
48 * note : if return == srcSize, there is only one symbol.
49 */
50 size_t HIST_count(unsigned* count, unsigned* maxSymbolValuePtr,
51 const void* src, size_t srcSize);
52
53 unsigned HIST_isError(size_t code); /**< tells if a return value is an error code */
54
55
56 /* --- advanced histogram functions --- */
57
58 #define HIST_WKSP_SIZE_U32 1024
59 /** HIST_count_wksp() :
60 * Same as HIST_count(), but using an externally provided scratch buffer.
61 * Benefit is this function will use very little stack space.
62 * `workSpace` must be a table of unsigned of size >= HIST_WKSP_SIZE_U32
63 */
64 size_t HIST_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
65 const void* src, size_t srcSize,
66 unsigned* workSpace);
67
68 /** HIST_countFast() :
69 * same as HIST_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr.
70 * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr`
71 */
72 size_t HIST_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
73 const void* src, size_t srcSize);
74
75 /** HIST_countFast_wksp() :
76 * Same as HIST_countFast(), but using an externally provided scratch buffer.
77 * `workSpace` must be a table of unsigned of size >= HIST_WKSP_SIZE_U32
78 */
79 size_t HIST_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
80 const void* src, size_t srcSize,
81 unsigned* workSpace);
82
83 /*! HIST_count_simple() :
84 * Same as HIST_countFast(), this function is unsafe,
85 * and will segfault if any value within `src` is `> *maxSymbolValuePtr`.
86 * It is also a bit slower for large inputs.
87 * However, it does not need any additional memory (not even on stack).
88 * @return : count of the most frequent symbol.
89 * Note this function doesn't produce any error (i.e. it must succeed).
90 */
91 unsigned HIST_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
92 const void* src, size_t srcSize);
@@ -0,0 +1,83 b''
1 #include <stdio.h> /* fprintf */
2 #include <stdlib.h> /* malloc, free, qsort */
3 #include <string.h> /* memset */
4 #include <time.h> /* clock */
5 #include "mem.h" /* read */
6 #include "pool.h"
7 #include "threading.h"
8 #include "zstd_internal.h" /* includes zstd.h */
9 #ifndef ZDICT_STATIC_LINKING_ONLY
10 #define ZDICT_STATIC_LINKING_ONLY
11 #endif
12 #include "zdict.h"
13
14 /**
15 * COVER_best_t is used for two purposes:
16 * 1. Synchronizing threads.
17 * 2. Saving the best parameters and dictionary.
18 *
19 * All of the methods except COVER_best_init() are thread safe if zstd is
20 * compiled with multithreaded support.
21 */
22 typedef struct COVER_best_s {
23 ZSTD_pthread_mutex_t mutex;
24 ZSTD_pthread_cond_t cond;
25 size_t liveJobs;
26 void *dict;
27 size_t dictSize;
28 ZDICT_cover_params_t parameters;
29 size_t compressedSize;
30 } COVER_best_t;
31
32 /**
33 * A segment is a range in the source as well as the score of the segment.
34 */
35 typedef struct {
36 U32 begin;
37 U32 end;
38 U32 score;
39 } COVER_segment_t;
40
41 /**
42 * Checks total compressed size of a dictionary
43 */
44 size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
45 const size_t *samplesSizes, const BYTE *samples,
46 size_t *offsets,
47 size_t nbTrainSamples, size_t nbSamples,
48 BYTE *const dict, size_t dictBufferCapacity);
49
50 /**
51 * Returns the sum of the sample sizes.
52 */
53 size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) ;
54
55 /**
56 * Initialize the `COVER_best_t`.
57 */
58 void COVER_best_init(COVER_best_t *best);
59
60 /**
61 * Wait until liveJobs == 0.
62 */
63 void COVER_best_wait(COVER_best_t *best);
64
65 /**
66 * Call COVER_best_wait() and then destroy the COVER_best_t.
67 */
68 void COVER_best_destroy(COVER_best_t *best);
69
70 /**
71 * Called when a thread is about to be launched.
72 * Increments liveJobs.
73 */
74 void COVER_best_start(COVER_best_t *best);
75
76 /**
77 * Called when a thread finishes executing, both on error or success.
78 * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
79 * If this dictionary is the best so far save it and its parameters.
80 */
81 void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
82 ZDICT_cover_params_t parameters, void *dict,
83 size_t dictSize);
This diff has been collapsed as it changes many lines, (728 lines changed) Show them Hide them
@@ -0,0 +1,728 b''
1 /*-*************************************
2 * Dependencies
3 ***************************************/
4 #include <stdio.h> /* fprintf */
5 #include <stdlib.h> /* malloc, free, qsort */
6 #include <string.h> /* memset */
7 #include <time.h> /* clock */
8
9 #include "mem.h" /* read */
10 #include "pool.h"
11 #include "threading.h"
12 #include "cover.h"
13 #include "zstd_internal.h" /* includes zstd.h */
14 #ifndef ZDICT_STATIC_LINKING_ONLY
15 #define ZDICT_STATIC_LINKING_ONLY
16 #endif
17 #include "zdict.h"
18
19
20 /*-*************************************
21 * Constants
22 ***************************************/
23 #define FASTCOVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
24 #define FASTCOVER_MAX_F 31
25 #define FASTCOVER_MAX_ACCEL 10
26 #define DEFAULT_SPLITPOINT 0.75
27 #define DEFAULT_F 20
28 #define DEFAULT_ACCEL 1
29
30
31 /*-*************************************
32 * Console display
33 ***************************************/
34 static int g_displayLevel = 2;
35 #define DISPLAY(...) \
36 { \
37 fprintf(stderr, __VA_ARGS__); \
38 fflush(stderr); \
39 }
40 #define LOCALDISPLAYLEVEL(displayLevel, l, ...) \
41 if (displayLevel >= l) { \
42 DISPLAY(__VA_ARGS__); \
43 } /* 0 : no display; 1: errors; 2: default; 3: details; 4: debug */
44 #define DISPLAYLEVEL(l, ...) LOCALDISPLAYLEVEL(g_displayLevel, l, __VA_ARGS__)
45
46 #define LOCALDISPLAYUPDATE(displayLevel, l, ...) \
47 if (displayLevel >= l) { \
48 if ((clock() - g_time > refreshRate) || (displayLevel >= 4)) { \
49 g_time = clock(); \
50 DISPLAY(__VA_ARGS__); \
51 } \
52 }
53 #define DISPLAYUPDATE(l, ...) LOCALDISPLAYUPDATE(g_displayLevel, l, __VA_ARGS__)
54 static const clock_t refreshRate = CLOCKS_PER_SEC * 15 / 100;
55 static clock_t g_time = 0;
56
57
58 /*-*************************************
59 * Hash Functions
60 ***************************************/
61 static const U64 prime6bytes = 227718039650203ULL;
62 static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
63 static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
64
65 static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
66 static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
67 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
68
69
70 /**
71 * Hash the d-byte value pointed to by p and mod 2^f
72 */
73 static size_t FASTCOVER_hashPtrToIndex(const void* p, U32 h, unsigned d) {
74 if (d == 6) {
75 return ZSTD_hash6Ptr(p, h) & ((1 << h) - 1);
76 }
77 return ZSTD_hash8Ptr(p, h) & ((1 << h) - 1);
78 }
79
80
81 /*-*************************************
82 * Acceleration
83 ***************************************/
84 typedef struct {
85 unsigned finalize; /* Percentage of training samples used for ZDICT_finalizeDictionary */
86 unsigned skip; /* Number of dmer skipped between each dmer counted in computeFrequency */
87 } FASTCOVER_accel_t;
88
89
90 static const FASTCOVER_accel_t FASTCOVER_defaultAccelParameters[FASTCOVER_MAX_ACCEL+1] = {
91 { 100, 0 }, /* accel = 0, should not happen because accel = 0 defaults to accel = 1 */
92 { 100, 0 }, /* accel = 1 */
93 { 50, 1 }, /* accel = 2 */
94 { 34, 2 }, /* accel = 3 */
95 { 25, 3 }, /* accel = 4 */
96 { 20, 4 }, /* accel = 5 */
97 { 17, 5 }, /* accel = 6 */
98 { 14, 6 }, /* accel = 7 */
99 { 13, 7 }, /* accel = 8 */
100 { 11, 8 }, /* accel = 9 */
101 { 10, 9 }, /* accel = 10 */
102 };
103
104
105 /*-*************************************
106 * Context
107 ***************************************/
108 typedef struct {
109 const BYTE *samples;
110 size_t *offsets;
111 const size_t *samplesSizes;
112 size_t nbSamples;
113 size_t nbTrainSamples;
114 size_t nbTestSamples;
115 size_t nbDmers;
116 U32 *freqs;
117 unsigned d;
118 unsigned f;
119 FASTCOVER_accel_t accelParams;
120 } FASTCOVER_ctx_t;
121
122
123 /*-*************************************
124 * Helper functions
125 ***************************************/
126 /**
127 * Selects the best segment in an epoch.
128 * Segments of are scored according to the function:
129 *
130 * Let F(d) be the frequency of all dmers with hash value d.
131 * Let S_i be hash value of the dmer at position i of segment S which has length k.
132 *
133 * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1})
134 *
135 * Once the dmer with hash value d is in the dictionay we set F(d) = 0.
136 */
137 static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx,
138 U32 *freqs, U32 begin, U32 end,
139 ZDICT_cover_params_t parameters,
140 U16* segmentFreqs) {
141 /* Constants */
142 const U32 k = parameters.k;
143 const U32 d = parameters.d;
144 const U32 f = ctx->f;
145 const U32 dmersInK = k - d + 1;
146
147 /* Try each segment (activeSegment) and save the best (bestSegment) */
148 COVER_segment_t bestSegment = {0, 0, 0};
149 COVER_segment_t activeSegment;
150
151 /* Reset the activeDmers in the segment */
152 /* The activeSegment starts at the beginning of the epoch. */
153 activeSegment.begin = begin;
154 activeSegment.end = begin;
155 activeSegment.score = 0;
156
157 /* Slide the activeSegment through the whole epoch.
158 * Save the best segment in bestSegment.
159 */
160 while (activeSegment.end < end) {
161 /* Get hash value of current dmer */
162 const size_t index = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d);
163
164 /* Add frequency of this index to score if this is the first occurence of index in active segment */
165 if (segmentFreqs[index] == 0) {
166 activeSegment.score += freqs[index];
167 }
168 /* Increment end of segment and segmentFreqs*/
169 activeSegment.end += 1;
170 segmentFreqs[index] += 1;
171 /* If the window is now too large, drop the first position */
172 if (activeSegment.end - activeSegment.begin == dmersInK + 1) {
173 /* Get hash value of the dmer to be eliminated from active segment */
174 const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d);
175 segmentFreqs[delIndex] -= 1;
176 /* Subtract frequency of this index from score if this is the last occurrence of this index in active segment */
177 if (segmentFreqs[delIndex] == 0) {
178 activeSegment.score -= freqs[delIndex];
179 }
180 /* Increment start of segment */
181 activeSegment.begin += 1;
182 }
183
184 /* If this segment is the best so far save it */
185 if (activeSegment.score > bestSegment.score) {
186 bestSegment = activeSegment;
187 }
188 }
189
190 /* Zero out rest of segmentFreqs array */
191 while (activeSegment.begin < end) {
192 const size_t delIndex = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.begin, f, d);
193 segmentFreqs[delIndex] -= 1;
194 activeSegment.begin += 1;
195 }
196
197 {
198 /* Zero the frequency of hash value of each dmer covered by the chosen segment. */
199 U32 pos;
200 for (pos = bestSegment.begin; pos != bestSegment.end; ++pos) {
201 const size_t i = FASTCOVER_hashPtrToIndex(ctx->samples + pos, f, d);
202 freqs[i] = 0;
203 }
204 }
205
206 return bestSegment;
207 }
208
209
210 static int FASTCOVER_checkParameters(ZDICT_cover_params_t parameters,
211 size_t maxDictSize, unsigned f,
212 unsigned accel) {
213 /* k, d, and f are required parameters */
214 if (parameters.d == 0 || parameters.k == 0) {
215 return 0;
216 }
217 /* d has to be 6 or 8 */
218 if (parameters.d != 6 && parameters.d != 8) {
219 return 0;
220 }
221 /* k <= maxDictSize */
222 if (parameters.k > maxDictSize) {
223 return 0;
224 }
225 /* d <= k */
226 if (parameters.d > parameters.k) {
227 return 0;
228 }
229 /* 0 < f <= FASTCOVER_MAX_F*/
230 if (f > FASTCOVER_MAX_F || f == 0) {
231 return 0;
232 }
233 /* 0 < splitPoint <= 1 */
234 if (parameters.splitPoint <= 0 || parameters.splitPoint > 1) {
235 return 0;
236 }
237 /* 0 < accel <= 10 */
238 if (accel > 10 || accel == 0) {
239 return 0;
240 }
241 return 1;
242 }
243
244
245 /**
246 * Clean up a context initialized with `FASTCOVER_ctx_init()`.
247 */
248 static void
249 FASTCOVER_ctx_destroy(FASTCOVER_ctx_t* ctx)
250 {
251 if (!ctx) return;
252
253 free(ctx->freqs);
254 ctx->freqs = NULL;
255
256 free(ctx->offsets);
257 ctx->offsets = NULL;
258 }
259
260
261 /**
262 * Calculate for frequency of hash value of each dmer in ctx->samples
263 */
264 static void
265 FASTCOVER_computeFrequency(U32* freqs, const FASTCOVER_ctx_t* ctx)
266 {
267 const unsigned f = ctx->f;
268 const unsigned d = ctx->d;
269 const unsigned skip = ctx->accelParams.skip;
270 const unsigned readLength = MAX(d, 8);
271 size_t i;
272 assert(ctx->nbTrainSamples >= 5);
273 assert(ctx->nbTrainSamples <= ctx->nbSamples);
274 for (i = 0; i < ctx->nbTrainSamples; i++) {
275 size_t start = ctx->offsets[i]; /* start of current dmer */
276 size_t const currSampleEnd = ctx->offsets[i+1];
277 while (start + readLength <= currSampleEnd) {
278 const size_t dmerIndex = FASTCOVER_hashPtrToIndex(ctx->samples + start, f, d);
279 freqs[dmerIndex]++;
280 start = start + skip + 1;
281 }
282 }
283 }
284
285
286 /**
287 * Prepare a context for dictionary building.
288 * The context is only dependent on the parameter `d` and can used multiple
289 * times.
290 * Returns 1 on success or zero on error.
291 * The context must be destroyed with `FASTCOVER_ctx_destroy()`.
292 */
293 static int
294 FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,
295 const void* samplesBuffer,
296 const size_t* samplesSizes, unsigned nbSamples,
297 unsigned d, double splitPoint, unsigned f,
298 FASTCOVER_accel_t accelParams)
299 {
300 const BYTE* const samples = (const BYTE*)samplesBuffer;
301 const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
302 /* Split samples into testing and training sets */
303 const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples;
304 const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
305 const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
306 const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
307
308 /* Checks */
309 if (totalSamplesSize < MAX(d, sizeof(U64)) ||
310 totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) {
311 DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n",
312 (U32)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20));
313 return 0;
314 }
315
316 /* Check if there are at least 5 training samples */
317 if (nbTrainSamples < 5) {
318 DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples);
319 return 0;
320 }
321
322 /* Check if there's testing sample */
323 if (nbTestSamples < 1) {
324 DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples);
325 return 0;
326 }
327
328 /* Zero the context */
329 memset(ctx, 0, sizeof(*ctx));
330 DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
331 (U32)trainingSamplesSize);
332 DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
333 (U32)testSamplesSize);
334
335 ctx->samples = samples;
336 ctx->samplesSizes = samplesSizes;
337 ctx->nbSamples = nbSamples;
338 ctx->nbTrainSamples = nbTrainSamples;
339 ctx->nbTestSamples = nbTestSamples;
340 ctx->nbDmers = trainingSamplesSize - MAX(d, sizeof(U64)) + 1;
341 ctx->d = d;
342 ctx->f = f;
343 ctx->accelParams = accelParams;
344
345 /* The offsets of each file */
346 ctx->offsets = (size_t*)calloc((nbSamples + 1), sizeof(size_t));
347 if (ctx->offsets == NULL) {
348 DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n");
349 FASTCOVER_ctx_destroy(ctx);
350 return 0;
351 }
352
353 /* Fill offsets from the samplesSizes */
354 { U32 i;
355 ctx->offsets[0] = 0;
356 assert(nbSamples >= 5);
357 for (i = 1; i <= nbSamples; ++i) {
358 ctx->offsets[i] = ctx->offsets[i - 1] + samplesSizes[i - 1];
359 }
360 }
361
362 /* Initialize frequency array of size 2^f */
363 ctx->freqs = (U32*)calloc(((U64)1 << f), sizeof(U32));
364 if (ctx->freqs == NULL) {
365 DISPLAYLEVEL(1, "Failed to allocate frequency table \n");
366 FASTCOVER_ctx_destroy(ctx);
367 return 0;
368 }
369
370 DISPLAYLEVEL(2, "Computing frequencies\n");
371 FASTCOVER_computeFrequency(ctx->freqs, ctx);
372
373 return 1;
374 }
375
376
377 /**
378 * Given the prepared context build the dictionary.
379 */
380 static size_t
381 FASTCOVER_buildDictionary(const FASTCOVER_ctx_t* ctx,
382 U32* freqs,
383 void* dictBuffer, size_t dictBufferCapacity,
384 ZDICT_cover_params_t parameters,
385 U16* segmentFreqs)
386 {
387 BYTE *const dict = (BYTE *)dictBuffer;
388 size_t tail = dictBufferCapacity;
389 /* Divide the data up into epochs of equal size.
390 * We will select at least one segment from each epoch.
391 */
392 const U32 epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k));
393 const U32 epochSize = (U32)(ctx->nbDmers / epochs);
394 size_t epoch;
395 DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
396 epochSize);
397 /* Loop through the epochs until there are no more segments or the dictionary
398 * is full.
399 */
400 for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) {
401 const U32 epochBegin = (U32)(epoch * epochSize);
402 const U32 epochEnd = epochBegin + epochSize;
403 size_t segmentSize;
404 /* Select a segment */
405 COVER_segment_t segment = FASTCOVER_selectSegment(
406 ctx, freqs, epochBegin, epochEnd, parameters, segmentFreqs);
407
408 /* If the segment covers no dmers, then we are out of content */
409 if (segment.score == 0) {
410 break;
411 }
412
413 /* Trim the segment if necessary and if it is too small then we are done */
414 segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail);
415 if (segmentSize < parameters.d) {
416 break;
417 }
418
419 /* We fill the dictionary from the back to allow the best segments to be
420 * referenced with the smallest offsets.
421 */
422 tail -= segmentSize;
423 memcpy(dict + tail, ctx->samples + segment.begin, segmentSize);
424 DISPLAYUPDATE(
425 2, "\r%u%% ",
426 (U32)(((dictBufferCapacity - tail) * 100) / dictBufferCapacity));
427 }
428 DISPLAYLEVEL(2, "\r%79s\r", "");
429 return tail;
430 }
431
432
433 /**
434 * Parameters for FASTCOVER_tryParameters().
435 */
436 typedef struct FASTCOVER_tryParameters_data_s {
437 const FASTCOVER_ctx_t* ctx;
438 COVER_best_t* best;
439 size_t dictBufferCapacity;
440 ZDICT_cover_params_t parameters;
441 } FASTCOVER_tryParameters_data_t;
442
443
444 /**
445 * Tries a set of parameters and updates the COVER_best_t with the results.
446 * This function is thread safe if zstd is compiled with multithreaded support.
447 * It takes its parameters as an *OWNING* opaque pointer to support threading.
448 */
449 static void FASTCOVER_tryParameters(void *opaque)
450 {
451 /* Save parameters as local variables */
452 FASTCOVER_tryParameters_data_t *const data = (FASTCOVER_tryParameters_data_t *)opaque;
453 const FASTCOVER_ctx_t *const ctx = data->ctx;
454 const ZDICT_cover_params_t parameters = data->parameters;
455 size_t dictBufferCapacity = data->dictBufferCapacity;
456 size_t totalCompressedSize = ERROR(GENERIC);
457 /* Initialize array to keep track of frequency of dmer within activeSegment */
458 U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16));
459 /* Allocate space for hash table, dict, and freqs */
460 BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity);
461 U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32));
462 if (!segmentFreqs || !dict || !freqs) {
463 DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n");
464 goto _cleanup;
465 }
466 /* Copy the frequencies because we need to modify them */
467 memcpy(freqs, ctx->freqs, ((U64)1 << ctx->f) * sizeof(U32));
468 /* Build the dictionary */
469 { const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity,
470 parameters, segmentFreqs);
471 const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100);
472 dictBufferCapacity = ZDICT_finalizeDictionary(
473 dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
474 ctx->samples, ctx->samplesSizes, nbFinalizeSamples, parameters.zParams);
475 if (ZDICT_isError(dictBufferCapacity)) {
476 DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
477 goto _cleanup;
478 }
479 }
480 /* Check total compressed size */
481 totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
482 ctx->samples, ctx->offsets,
483 ctx->nbTrainSamples, ctx->nbSamples,
484 dict, dictBufferCapacity);
485 _cleanup:
486 COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
487 dictBufferCapacity);
488 free(data);
489 free(segmentFreqs);
490 free(dict);
491 free(freqs);
492 }
493
494
495 static void
496 FASTCOVER_convertToCoverParams(ZDICT_fastCover_params_t fastCoverParams,
497 ZDICT_cover_params_t* coverParams)
498 {
499 coverParams->k = fastCoverParams.k;
500 coverParams->d = fastCoverParams.d;
501 coverParams->steps = fastCoverParams.steps;
502 coverParams->nbThreads = fastCoverParams.nbThreads;
503 coverParams->splitPoint = fastCoverParams.splitPoint;
504 coverParams->zParams = fastCoverParams.zParams;
505 }
506
507
508 static void
509 FASTCOVER_convertToFastCoverParams(ZDICT_cover_params_t coverParams,
510 ZDICT_fastCover_params_t* fastCoverParams,
511 unsigned f, unsigned accel)
512 {
513 fastCoverParams->k = coverParams.k;
514 fastCoverParams->d = coverParams.d;
515 fastCoverParams->steps = coverParams.steps;
516 fastCoverParams->nbThreads = coverParams.nbThreads;
517 fastCoverParams->splitPoint = coverParams.splitPoint;
518 fastCoverParams->f = f;
519 fastCoverParams->accel = accel;
520 fastCoverParams->zParams = coverParams.zParams;
521 }
522
523
524 ZDICTLIB_API size_t
525 ZDICT_trainFromBuffer_fastCover(void* dictBuffer, size_t dictBufferCapacity,
526 const void* samplesBuffer,
527 const size_t* samplesSizes, unsigned nbSamples,
528 ZDICT_fastCover_params_t parameters)
529 {
530 BYTE* const dict = (BYTE*)dictBuffer;
531 FASTCOVER_ctx_t ctx;
532 ZDICT_cover_params_t coverParams;
533 FASTCOVER_accel_t accelParams;
534 /* Initialize global data */
535 g_displayLevel = parameters.zParams.notificationLevel;
536 /* Assign splitPoint and f if not provided */
537 parameters.splitPoint = 1.0;
538 parameters.f = parameters.f == 0 ? DEFAULT_F : parameters.f;
539 parameters.accel = parameters.accel == 0 ? DEFAULT_ACCEL : parameters.accel;
540 /* Convert to cover parameter */
541 memset(&coverParams, 0 , sizeof(coverParams));
542 FASTCOVER_convertToCoverParams(parameters, &coverParams);
543 /* Checks */
544 if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f,
545 parameters.accel)) {
546 DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
547 return ERROR(GENERIC);
548 }
549 if (nbSamples == 0) {
550 DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n");
551 return ERROR(GENERIC);
552 }
553 if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
554 DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n",
555 ZDICT_DICTSIZE_MIN);
556 return ERROR(dstSize_tooSmall);
557 }
558 /* Assign corresponding FASTCOVER_accel_t to accelParams*/
559 accelParams = FASTCOVER_defaultAccelParameters[parameters.accel];
560 /* Initialize context */
561 if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
562 coverParams.d, parameters.splitPoint, parameters.f,
563 accelParams)) {
564 DISPLAYLEVEL(1, "Failed to initialize context\n");
565 return ERROR(GENERIC);
566 }
567 /* Build the dictionary */
568 DISPLAYLEVEL(2, "Building dictionary\n");
569 {
570 /* Initialize array to keep track of frequency of dmer within activeSegment */
571 U16* segmentFreqs = (U16 *)calloc(((U64)1 << parameters.f), sizeof(U16));
572 const size_t tail = FASTCOVER_buildDictionary(&ctx, ctx.freqs, dictBuffer,
573 dictBufferCapacity, coverParams, segmentFreqs);
574 const unsigned nbFinalizeSamples = (unsigned)(ctx.nbTrainSamples * ctx.accelParams.finalize / 100);
575 const size_t dictionarySize = ZDICT_finalizeDictionary(
576 dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
577 samplesBuffer, samplesSizes, nbFinalizeSamples, coverParams.zParams);
578 if (!ZSTD_isError(dictionarySize)) {
579 DISPLAYLEVEL(2, "Constructed dictionary of size %u\n",
580 (U32)dictionarySize);
581 }
582 FASTCOVER_ctx_destroy(&ctx);
583 free(segmentFreqs);
584 return dictionarySize;
585 }
586 }
587
588
589 ZDICTLIB_API size_t
590 ZDICT_optimizeTrainFromBuffer_fastCover(
591 void* dictBuffer, size_t dictBufferCapacity,
592 const void* samplesBuffer,
593 const size_t* samplesSizes, unsigned nbSamples,
594 ZDICT_fastCover_params_t* parameters)
595 {
596 ZDICT_cover_params_t coverParams;
597 FASTCOVER_accel_t accelParams;
598 /* constants */
599 const unsigned nbThreads = parameters->nbThreads;
600 const double splitPoint =
601 parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
602 const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
603 const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
604 const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
605 const unsigned kMaxK = parameters->k == 0 ? 2000 : parameters->k;
606 const unsigned kSteps = parameters->steps == 0 ? 40 : parameters->steps;
607 const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1);
608 const unsigned kIterations =
609 (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize);
610 const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f;
611 const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel;
612 /* Local variables */
613 const int displayLevel = parameters->zParams.notificationLevel;
614 unsigned iteration = 1;
615 unsigned d;
616 unsigned k;
617 COVER_best_t best;
618 POOL_ctx *pool = NULL;
619 /* Checks */
620 if (splitPoint <= 0 || splitPoint > 1) {
621 LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n");
622 return ERROR(GENERIC);
623 }
624 if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) {
625 LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n");
626 return ERROR(GENERIC);
627 }
628 if (kMinK < kMaxD || kMaxK < kMinK) {
629 LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n");
630 return ERROR(GENERIC);
631 }
632 if (nbSamples == 0) {
633 LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n");
634 return ERROR(GENERIC);
635 }
636 if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) {
637 LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n",
638 ZDICT_DICTSIZE_MIN);
639 return ERROR(dstSize_tooSmall);
640 }
641 if (nbThreads > 1) {
642 pool = POOL_create(nbThreads, 1);
643 if (!pool) {
644 return ERROR(memory_allocation);
645 }
646 }
647 /* Initialization */
648 COVER_best_init(&best);
649 memset(&coverParams, 0 , sizeof(coverParams));
650 FASTCOVER_convertToCoverParams(*parameters, &coverParams);
651 accelParams = FASTCOVER_defaultAccelParameters[accel];
652 /* Turn down global display level to clean up display at level 2 and below */
653 g_displayLevel = displayLevel == 0 ? 0 : displayLevel - 1;
654 /* Loop through d first because each new value needs a new context */
655 LOCALDISPLAYLEVEL(displayLevel, 2, "Trying %u different sets of parameters\n",
656 kIterations);
657 for (d = kMinD; d <= kMaxD; d += 2) {
658 /* Initialize the context for this value of d */
659 FASTCOVER_ctx_t ctx;
660 LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
661 if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams)) {
662 LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
663 COVER_best_destroy(&best);
664 POOL_free(pool);
665 return ERROR(GENERIC);
666 }
667 /* Loop through k reusing the same context */
668 for (k = kMinK; k <= kMaxK; k += kStepSize) {
669 /* Prepare the arguments */
670 FASTCOVER_tryParameters_data_t *data = (FASTCOVER_tryParameters_data_t *)malloc(
671 sizeof(FASTCOVER_tryParameters_data_t));
672 LOCALDISPLAYLEVEL(displayLevel, 3, "k=%u\n", k);
673 if (!data) {
674 LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to allocate parameters\n");
675 COVER_best_destroy(&best);
676 FASTCOVER_ctx_destroy(&ctx);
677 POOL_free(pool);
678 return ERROR(GENERIC);
679 }
680 data->ctx = &ctx;
681 data->best = &best;
682 data->dictBufferCapacity = dictBufferCapacity;
683 data->parameters = coverParams;
684 data->parameters.k = k;
685 data->parameters.d = d;
686 data->parameters.splitPoint = splitPoint;
687 data->parameters.steps = kSteps;
688 data->parameters.zParams.notificationLevel = g_displayLevel;
689 /* Check the parameters */
690 if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity,
691 data->ctx->f, accel)) {
692 DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n");
693 free(data);
694 continue;
695 }
696 /* Call the function and pass ownership of data to it */
697 COVER_best_start(&best);
698 if (pool) {
699 POOL_add(pool, &FASTCOVER_tryParameters, data);
700 } else {
701 FASTCOVER_tryParameters(data);
702 }
703 /* Print status */
704 LOCALDISPLAYUPDATE(displayLevel, 2, "\r%u%% ",
705 (U32)((iteration * 100) / kIterations));
706 ++iteration;
707 }
708 COVER_best_wait(&best);
709 FASTCOVER_ctx_destroy(&ctx);
710 }
711 LOCALDISPLAYLEVEL(displayLevel, 2, "\r%79s\r", "");
712 /* Fill the output buffer and parameters with output of the best parameters */
713 {
714 const size_t dictSize = best.dictSize;
715 if (ZSTD_isError(best.compressedSize)) {
716 const size_t compressedSize = best.compressedSize;
717 COVER_best_destroy(&best);
718 POOL_free(pool);
719 return compressedSize;
720 }
721 FASTCOVER_convertToFastCoverParams(best.parameters, parameters, f, accel);
722 memcpy(dictBuffer, best.dict, dictSize);
723 COVER_best_destroy(&best);
724 POOL_free(pool);
725 return dictSize;
726 }
727
728 }
@@ -6,6 +6,7 b' mercurial/cext/osutil.c'
6 mercurial/cext/revlog.c
6 mercurial/cext/revlog.c
7 # Vendored code that we should never format:
7 # Vendored code that we should never format:
8 contrib/python-zstandard/c-ext/bufferutil.c
8 contrib/python-zstandard/c-ext/bufferutil.c
9 contrib/python-zstandard/c-ext/compressionchunker.c
9 contrib/python-zstandard/c-ext/compressiondict.c
10 contrib/python-zstandard/c-ext/compressiondict.c
10 contrib/python-zstandard/c-ext/compressionparams.c
11 contrib/python-zstandard/c-ext/compressionparams.c
11 contrib/python-zstandard/c-ext/compressionreader.c
12 contrib/python-zstandard/c-ext/compressionreader.c
@@ -25,6 +26,8 b' contrib/python-zstandard/zstd.c'
25 contrib/python-zstandard/zstd/common/bitstream.h
26 contrib/python-zstandard/zstd/common/bitstream.h
26 contrib/python-zstandard/zstd/common/compiler.h
27 contrib/python-zstandard/zstd/common/compiler.h
27 contrib/python-zstandard/zstd/common/cpu.h
28 contrib/python-zstandard/zstd/common/cpu.h
29 contrib/python-zstandard/zstd/common/debug.c
30 contrib/python-zstandard/zstd/common/debug.h
28 contrib/python-zstandard/zstd/common/entropy_common.c
31 contrib/python-zstandard/zstd/common/entropy_common.c
29 contrib/python-zstandard/zstd/common/error_private.c
32 contrib/python-zstandard/zstd/common/error_private.c
30 contrib/python-zstandard/zstd/common/error_private.h
33 contrib/python-zstandard/zstd/common/error_private.h
@@ -42,6 +45,8 b' contrib/python-zstandard/zstd/common/zst'
42 contrib/python-zstandard/zstd/common/zstd_errors.h
45 contrib/python-zstandard/zstd/common/zstd_errors.h
43 contrib/python-zstandard/zstd/common/zstd_internal.h
46 contrib/python-zstandard/zstd/common/zstd_internal.h
44 contrib/python-zstandard/zstd/compress/fse_compress.c
47 contrib/python-zstandard/zstd/compress/fse_compress.c
48 contrib/python-zstandard/zstd/compress/hist.c
49 contrib/python-zstandard/zstd/compress/hist.h
45 contrib/python-zstandard/zstd/compress/huf_compress.c
50 contrib/python-zstandard/zstd/compress/huf_compress.c
46 contrib/python-zstandard/zstd/compress/zstd_compress.c
51 contrib/python-zstandard/zstd/compress/zstd_compress.c
47 contrib/python-zstandard/zstd/compress/zstd_compress_internal.h
52 contrib/python-zstandard/zstd/compress/zstd_compress_internal.h
@@ -64,8 +69,10 b' contrib/python-zstandard/zstd/deprecated'
64 contrib/python-zstandard/zstd/deprecated/zbuff_decompress.c
69 contrib/python-zstandard/zstd/deprecated/zbuff_decompress.c
65 contrib/python-zstandard/zstd/deprecated/zbuff.h
70 contrib/python-zstandard/zstd/deprecated/zbuff.h
66 contrib/python-zstandard/zstd/dictBuilder/cover.c
71 contrib/python-zstandard/zstd/dictBuilder/cover.c
72 contrib/python-zstandard/zstd/dictBuilder/cover.h
67 contrib/python-zstandard/zstd/dictBuilder/divsufsort.c
73 contrib/python-zstandard/zstd/dictBuilder/divsufsort.c
68 contrib/python-zstandard/zstd/dictBuilder/divsufsort.h
74 contrib/python-zstandard/zstd/dictBuilder/divsufsort.h
75 contrib/python-zstandard/zstd/dictBuilder/fastcover.c
69 contrib/python-zstandard/zstd/dictBuilder/zdict.c
76 contrib/python-zstandard/zstd/dictBuilder/zdict.c
70 contrib/python-zstandard/zstd/dictBuilder/zdict.h
77 contrib/python-zstandard/zstd/dictBuilder/zdict.h
71 contrib/python-zstandard/zstd/zstd.h
78 contrib/python-zstandard/zstd/zstd.h
@@ -1,7 +1,10 b''
1 graft c-ext
1 graft c-ext
2 graft debian
2 graft zstd
3 graft zstd
3 graft tests
4 graft tests
4 include make_cffi.py
5 include make_cffi.py
5 include setup_zstd.py
6 include setup_zstd.py
6 include zstd.c
7 include zstd.c
8 include zstd_cffi.py
7 include LICENSE
9 include LICENSE
10 include NEWS.rst
@@ -30,6 +30,19 b' Actions Blocking Release'
30 * Remove low-level compression parameters from ``ZstdCompressor.__init__`` and
30 * Remove low-level compression parameters from ``ZstdCompressor.__init__`` and
31 require use of ``CompressionParameters``.
31 require use of ``CompressionParameters``.
32 * Expose ``ZSTD_getFrameProgression()`` from more compressor types.
32 * Expose ``ZSTD_getFrameProgression()`` from more compressor types.
33 * Support modifying compression parameters mid operation when supported by
34 zstd API.
35 * Expose ``ZSTD_CLEVEL_DEFAULT`` constant.
36 * Support ``ZSTD_p_forceAttachDict`` compression parameter.
37 * Use ``ZSTD_CCtx_getParameter()``/``ZSTD_CCtxParam_getParameter()`` for retrieving
38 compression parameters.
39 * Consider exposing ``ZSTDMT_toFlushNow()``.
40 * Expose ``ZDICT_trainFromBuffer_fastCover()``,
41 ``ZDICT_optimizeTrainFromBuffer_fastCover``.
42 * Expose and enforce ``ZSTD_minCLevel()`` for minimum compression level.
43 * Consider a ``chunker()`` API for decompression.
44 * Consider stats for ``chunker()`` API, including finding the last consumed
45 offset of input data.
33
46
34 Other Actions Not Blocking Release
47 Other Actions Not Blocking Release
35 ---------------------------------------
48 ---------------------------------------
@@ -38,6 +51,111 b' Other Actions Not Blocking Release'
38 * API for ensuring max memory ceiling isn't exceeded.
51 * API for ensuring max memory ceiling isn't exceeded.
39 * Move off nose for testing.
52 * Move off nose for testing.
40
53
54 0.10.1 (released 2018-10-08)
55 ============================
56
57 Backwards Compatibility Notes
58 -----------------------------
59
60 * ``ZstdCompressor.stream_reader().closed`` is now a property instead of a
61 method (#58).
62 * ``ZstdDecompressor.stream_reader().closed`` is now a property instead of a
63 method (#58).
64
65 Changes
66 -------
67
68 * Stop attempting to package Python 3.6 for Miniconda. The latest version of
69 Miniconda is using Python 3.7. The Python 3.6 Miniconda packages were a lie
70 since this were built against Python 3.7.
71 * ``ZstdCompressor.stream_reader()``'s and ``ZstdDecompressor.stream_reader()``'s
72 ``closed`` attribute is now a read-only property instead of a method. This now
73 properly matches the ``IOBase`` API and allows instances to be used in more
74 places that accept ``IOBase`` instances.
75
76 0.10.0 (released 2018-10-08)
77 ============================
78
79 Backwards Compatibility Notes
80 -----------------------------
81
82 * ``ZstdDecompressor.stream_reader().read()`` now consistently requires an
83 argument in both the C and CFFI backends. Before, the CFFI implementation
84 would assume a default value of ``-1``, which was later rejected.
85 * The ``compress_literals`` argument and attribute has been removed from
86 ``zstd.ZstdCompressionParameters`` because it was removed by the zstd 1.3.5
87 API.
88 * ``ZSTD_CCtx_setParametersUsingCCtxParams()`` is no longer called on every
89 operation performed against ``ZstdCompressor`` instances. The reason for this
90 change is that the zstd 1.3.5 API no longer allows this without calling
91 ``ZSTD_CCtx_resetParameters()`` first. But if we called
92 ``ZSTD_CCtx_resetParameters()`` on every operation, we'd have to redo
93 potentially expensive setup when using dictionaries. We now call
94 ``ZSTD_CCtx_reset()`` on every operation and don't attempt to change
95 compression parameters.
96 * Objects returned by ``ZstdCompressor.stream_reader()`` no longer need to be
97 used as a context manager. The context manager interface still exists and its
98 behavior is unchanged.
99 * Objects returned by ``ZstdDecompressor.stream_reader()`` no longer need to be
100 used as a context manager. The context manager interface still exists and its
101 behavior is unchanged.
102
103 Bug Fixes
104 ---------
105
106 * ``ZstdDecompressor.decompressobj().decompress()`` should now return all data
107 from internal buffers in more scenarios. Before, it was possible for data to
108 remain in internal buffers. This data would be emitted on a subsequent call
109 to ``decompress()``. The overall output stream would still be valid. But if
110 callers were expecting input data to exactly map to output data (say the
111 producer had used ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` and was attempting to
112 map input chunks to output chunks), then the previous behavior would be
113 wrong. The new behavior is such that output from
114 ``flush(COMPRESSOBJ_FLUSH_BLOCK)`` fed into ``decompressobj().decompress()``
115 should produce all available compressed input.
116 * ``ZstdDecompressor.stream_reader().read()`` should no longer segfault after
117 a previous context manager resulted in error (#56).
118 * ``ZstdCompressor.compressobj().flush(COMPRESSOBJ_FLUSH_BLOCK)`` now returns
119 all data necessary to flush a block. Before, it was possible for the
120 ``flush()`` to not emit all data necessary to fully represent a block. This
121 would mean decompressors wouldn't be able to decompress all data that had been
122 fed into the compressor and ``flush()``ed. (#55).
123
124 New Features
125 ------------
126
127 * New module constants ``BLOCKSIZELOG_MAX``, ``BLOCKSIZE_MAX``,
128 ``TARGETLENGTH_MAX`` that expose constants from libzstd.
129 * New ``ZstdCompressor.chunker()`` API for manually feeding data into a
130 compressor and emitting chunks of a fixed size. Like ``compressobj()``, the
131 API doesn't impose restrictions on the input or output types for the
132 data streams. Unlike ``compressobj()``, it ensures output chunks are of a
133 fixed size. This makes this API useful when the compressed output is being
134 fed into an I/O layer, where uniform write sizes are useful.
135 * ``ZstdCompressor.stream_reader()`` no longer needs to be used as a context
136 manager (#34).
137 * ``ZstdDecompressor.stream_reader()`` no longer needs to be used as a context
138 manager (#34).
139 * Bundled zstandard library upgraded from 1.3.4 to 1.3.6.
140
141 Changes
142 -------
143
144 * Added ``zstd_cffi.py`` and ``NEWS.rst`` to ``MANIFEST.in``.
145 * ``zstandard.__version__`` is now defined (#50).
146 * Upgrade pip, setuptools, wheel, and cibuildwheel packages to latest versions.
147 * Upgrade various packages used in CI to latest versions. Notably tox (in
148 order to support Python 3.7).
149 * Use relative paths in setup.py to appease Python 3.7 (#51).
150 * Added CI for Python 3.7.
151
152 0.9.1 (released 2018-06-04)
153 ===========================
154
155 * Debian packaging support.
156 * Fix typo in setup.py (#44).
157 * Support building with mingw compiler (#46).
158
41 0.9.0 (released 2018-04-08)
159 0.9.0 (released 2018-04-08)
42 ===========================
160 ===========================
43
161
@@ -90,7 +208,7 b' Bug Fixes'
90 New Features
208 New Features
91 ------------
209 ------------
92
210
93 * Bundlded zstandard library upgraded from 1.1.3 to 1.3.4. This delivers various
211 * Bundled zstandard library upgraded from 1.1.3 to 1.3.4. This delivers various
94 bug fixes and performance improvements. It also gives us access to newer
212 bug fixes and performance improvements. It also gives us access to newer
95 features.
213 features.
96 * Support for negative compression levels.
214 * Support for negative compression levels.
@@ -196,6 +196,17 b' Stream Reader API'
196
196
197 with open(path, 'rb') as fh:
197 with open(path, 'rb') as fh:
198 cctx = zstd.ZstdCompressor()
198 cctx = zstd.ZstdCompressor()
199 reader = cctx.stream_reader(fh)
200 while True:
201 chunk = reader.read(16384)
202 if not chunk:
203 break
204
205 # Do something with compressed chunk.
206
207 Instances can also be used as context managers::
208
209 with open(path, 'rb') as fh:
199 with cctx.stream_reader(fh) as reader:
210 with cctx.stream_reader(fh) as reader:
200 while True:
211 while True:
201 chunk = reader.read(16384)
212 chunk = reader.read(16384)
@@ -204,9 +215,9 b' Stream Reader API'
204
215
205 # Do something with compressed chunk.
216 # Do something with compressed chunk.
206
217
207 The stream can only be read within a context manager. When the context
218 When the context manager exists or ``close()`` is called, the stream is closed,
208 manager exits, the stream is closed and the underlying resource is
219 underlying resources are released, and future operations against the compression
209 released and future operations against the compression stream stream will fail.
220 stream will fail.
210
221
211 The ``source`` argument to ``stream_reader()`` can be any object with a
222 The ``source`` argument to ``stream_reader()`` can be any object with a
212 ``read(size)`` method or any object implementing the *buffer protocol*.
223 ``read(size)`` method or any object implementing the *buffer protocol*.
@@ -419,6 +430,64 b' the compressor::'
419 data = cobj.compress(b'foobar')
430 data = cobj.compress(b'foobar')
420 data = cobj.flush()
431 data = cobj.flush()
421
432
433 Chunker API
434 ^^^^^^^^^^^
435
436 ``chunker(size=None, chunk_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE)`` returns
437 an object that can be used to iteratively feed chunks of data into a compressor
438 and produce output chunks of a uniform size.
439
440 The object returned by ``chunker()`` exposes the following methods:
441
442 ``compress(data)``
443 Feeds new input data into the compressor.
444
445 ``flush()``
446 Flushes all data currently in the compressor.
447
448 ``finish()``
449 Signals the end of input data. No new data can be compressed after this
450 method is called.
451
452 ``compress()``, ``flush()``, and ``finish()`` all return an iterator of
453 ``bytes`` instances holding compressed data. The iterator may be empty. Callers
454 MUST iterate through all elements of the returned iterator before performing
455 another operation on the object.
456
457 All chunks emitted by ``compress()`` will have a length of ``chunk_size``.
458
459 ``flush()`` and ``finish()`` may return a final chunk smaller than
460 ``chunk_size``.
461
462 Here is how the API should be used::
463
464 cctx = zstd.ZstdCompressor()
465 chunker = cctx.chunker(chunk_size=32768)
466
467 with open(path, 'rb') as fh:
468 while True:
469 in_chunk = fh.read(32768)
470 if not in_chunk:
471 break
472
473 for out_chunk in chunker.compress(in_chunk):
474 # Do something with output chunk of size 32768.
475
476 for out_chunk in chunker.finish():
477 # Do something with output chunks that finalize the zstd frame.
478
479 The ``chunker()`` API is often a better alternative to ``compressobj()``.
480
481 ``compressobj()`` will emit output data as it is available. This results in a
482 *stream* of output chunks of varying sizes. The consistency of the output chunk
483 size with ``chunker()`` is more appropriate for many usages, such as sending
484 compressed data to a socket.
485
486 ``compressobj()`` may also perform extra memory reallocations in order to
487 dynamically adjust the sizes of the output chunks. Since ``chunker()`` output
488 chunks are all the same size (except for flushed or final chunks), there is
489 less memory allocation overhead.
490
422 Batch Compression API
491 Batch Compression API
423 ^^^^^^^^^^^^^^^^^^^^^
492 ^^^^^^^^^^^^^^^^^^^^^
424
493
@@ -542,17 +611,24 b' Stream Reader API'
542
611
543 with open(path, 'rb') as fh:
612 with open(path, 'rb') as fh:
544 dctx = zstd.ZstdDecompressor()
613 dctx = zstd.ZstdDecompressor()
545 with dctx.stream_reader(fh) as reader:
614 reader = dctx.stream_reader(fh)
546 while True:
615 while True:
547 chunk = reader.read(16384)
616 chunk = reader.read(16384)
548 if not chunk:
617 if not chunk:
549 break
618 break
619
620 # Do something with decompressed chunk.
550
621
551 # Do something with decompressed chunk.
622 The stream can also be used as a context manager::
552
623
553 The stream can only be read within a context manager. When the context
624 with open(path, 'rb') as fh:
554 manager exits, the stream is closed and the underlying resource is
625 dctx = zstd.ZstdDecompressor()
555 released and future operations against the stream will fail.
626 with dctx.stream_reader(fh) as reader:
627 ...
628
629 When used as a context manager, the stream is closed and the underlying
630 resources are released when the context manager exits. Future operations against
631 the stream will fail.
556
632
557 The ``source`` argument to ``stream_reader()`` can be any object with a
633 The ``source`` argument to ``stream_reader()`` can be any object with a
558 ``read(size)`` method or any object implementing the *buffer protocol*.
634 ``read(size)`` method or any object implementing the *buffer protocol*.
@@ -1077,7 +1153,6 b' follows:'
1077 * write_dict_id
1153 * write_dict_id
1078 * job_size
1154 * job_size
1079 * overlap_size_log
1155 * overlap_size_log
1080 * compress_literals
1081 * force_max_window
1156 * force_max_window
1082 * enable_ldm
1157 * enable_ldm
1083 * ldm_hash_log
1158 * ldm_hash_log
@@ -39,7 +39,6 b' int set_parameters(ZSTD_CCtx_params* par'
39 TRY_SET_PARAMETER(params, ZSTD_p_nbWorkers, obj->threads);
39 TRY_SET_PARAMETER(params, ZSTD_p_nbWorkers, obj->threads);
40 TRY_SET_PARAMETER(params, ZSTD_p_jobSize, obj->jobSize);
40 TRY_SET_PARAMETER(params, ZSTD_p_jobSize, obj->jobSize);
41 TRY_SET_PARAMETER(params, ZSTD_p_overlapSizeLog, obj->overlapSizeLog);
41 TRY_SET_PARAMETER(params, ZSTD_p_overlapSizeLog, obj->overlapSizeLog);
42 TRY_SET_PARAMETER(params, ZSTD_p_compressLiterals, obj->compressLiterals);
43 TRY_SET_PARAMETER(params, ZSTD_p_forceMaxWindow, obj->forceMaxWindow);
42 TRY_SET_PARAMETER(params, ZSTD_p_forceMaxWindow, obj->forceMaxWindow);
44 TRY_SET_PARAMETER(params, ZSTD_p_enableLongDistanceMatching, obj->enableLongDistanceMatching);
43 TRY_SET_PARAMETER(params, ZSTD_p_enableLongDistanceMatching, obj->enableLongDistanceMatching);
45 TRY_SET_PARAMETER(params, ZSTD_p_ldmHashLog, obj->ldmHashLog);
44 TRY_SET_PARAMETER(params, ZSTD_p_ldmHashLog, obj->ldmHashLog);
@@ -88,7 +87,6 b' static int ZstdCompressionParameters_ini'
88 "ldm_bucket_size_log",
87 "ldm_bucket_size_log",
89 "ldm_hash_every_log",
88 "ldm_hash_every_log",
90 "threads",
89 "threads",
91 "compress_literals",
92 NULL
90 NULL
93 };
91 };
94
92
@@ -114,18 +112,13 b' static int ZstdCompressionParameters_ini'
114 unsigned ldmHashEveryLog = 0;
112 unsigned ldmHashEveryLog = 0;
115 int threads = 0;
113 int threads = 0;
116
114
117 /* Setting value 0 has the effect of disabling. So we use -1 as a default
118 * to detect whether to set. Then we automatically derive the expected value
119 * based on the level, just like zstandard does itself. */
120 int compressLiterals = -1;
121
122 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
115 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
123 "|IiIIIIIIIIIIIIIIIIIIii:CompressionParameters",
116 "|IiIIIIIIIIIIIIIIIIIIi:CompressionParameters",
124 kwlist, &format, &compressionLevel, &windowLog, &hashLog, &chainLog,
117 kwlist, &format, &compressionLevel, &windowLog, &hashLog, &chainLog,
125 &searchLog, &minMatch, &targetLength, &compressionStrategy,
118 &searchLog, &minMatch, &targetLength, &compressionStrategy,
126 &contentSizeFlag, &checksumFlag, &dictIDFlag, &jobSize, &overlapSizeLog,
119 &contentSizeFlag, &checksumFlag, &dictIDFlag, &jobSize, &overlapSizeLog,
127 &forceMaxWindow, &enableLDM, &ldmHashLog, &ldmMinMatch, &ldmBucketSizeLog,
120 &forceMaxWindow, &enableLDM, &ldmHashLog, &ldmMinMatch, &ldmBucketSizeLog,
128 &ldmHashEveryLog, &threads, &compressLiterals)) {
121 &ldmHashEveryLog, &threads)) {
129 return -1;
122 return -1;
130 }
123 }
131
124
@@ -133,10 +126,6 b' static int ZstdCompressionParameters_ini'
133 threads = cpu_count();
126 threads = cpu_count();
134 }
127 }
135
128
136 if (compressLiterals < 0) {
137 compressLiterals = compressionLevel >= 0;
138 }
139
140 self->format = format;
129 self->format = format;
141 self->compressionLevel = compressionLevel;
130 self->compressionLevel = compressionLevel;
142 self->windowLog = windowLog;
131 self->windowLog = windowLog;
@@ -152,7 +141,6 b' static int ZstdCompressionParameters_ini'
152 self->threads = threads;
141 self->threads = threads;
153 self->jobSize = jobSize;
142 self->jobSize = jobSize;
154 self->overlapSizeLog = overlapSizeLog;
143 self->overlapSizeLog = overlapSizeLog;
155 self->compressLiterals = compressLiterals;
156 self->forceMaxWindow = forceMaxWindow;
144 self->forceMaxWindow = forceMaxWindow;
157 self->enableLongDistanceMatching = enableLDM;
145 self->enableLongDistanceMatching = enableLDM;
158 self->ldmHashLog = ldmHashLog;
146 self->ldmHashLog = ldmHashLog;
@@ -299,16 +287,6 b' ZstdCompressionParametersObject* Compres'
299 Py_DECREF(val);
287 Py_DECREF(val);
300 }
288 }
301
289
302 val = PyDict_GetItemString(kwargs, "compress_literals");
303 if (!val) {
304 val = PyLong_FromLong(level >= 0 ? 1 : 0);
305 if (!val) {
306 goto cleanup;
307 }
308 PyDict_SetItemString(kwargs, "compress_literals", val);
309 Py_DECREF(val);
310 }
311
312 result = PyObject_New(ZstdCompressionParametersObject, &ZstdCompressionParametersType);
290 result = PyObject_New(ZstdCompressionParametersObject, &ZstdCompressionParametersType);
313 if (!result) {
291 if (!result) {
314 goto cleanup;
292 goto cleanup;
@@ -420,9 +398,6 b' static PyMemberDef ZstdCompressionParame'
420 { "overlap_size_log", T_UINT,
398 { "overlap_size_log", T_UINT,
421 offsetof(ZstdCompressionParametersObject, overlapSizeLog), READONLY,
399 offsetof(ZstdCompressionParametersObject, overlapSizeLog), READONLY,
422 "Size of previous input reloaded at the beginning of each job" },
400 "Size of previous input reloaded at the beginning of each job" },
423 { "compress_literals", T_UINT,
424 offsetof(ZstdCompressionParametersObject, compressLiterals), READONLY,
425 "whether Huffman compression of literals is in use" },
426 { "force_max_window", T_UINT,
401 { "force_max_window", T_UINT,
427 offsetof(ZstdCompressionParametersObject, forceMaxWindow), READONLY,
402 offsetof(ZstdCompressionParametersObject, forceMaxWindow), READONLY,
428 "force back references to remain smaller than window size" },
403 "force back references to remain smaller than window size" },
@@ -43,20 +43,11 b' static void reader_dealloc(ZstdCompressi'
43 }
43 }
44
44
45 static ZstdCompressionReader* reader_enter(ZstdCompressionReader* self) {
45 static ZstdCompressionReader* reader_enter(ZstdCompressionReader* self) {
46 size_t zresult;
47
48 if (self->entered) {
46 if (self->entered) {
49 PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
47 PyErr_SetString(PyExc_ValueError, "cannot __enter__ multiple times");
50 return NULL;
48 return NULL;
51 }
49 }
52
50
53 zresult = ZSTD_CCtx_setPledgedSrcSize(self->compressor->cctx, self->sourceSize);
54 if (ZSTD_isError(zresult)) {
55 PyErr_Format(ZstdError, "error setting source size: %s",
56 ZSTD_getErrorName(zresult));
57 return NULL;
58 }
59
60 self->entered = 1;
51 self->entered = 1;
61
52
62 Py_INCREF(self);
53 Py_INCREF(self);
@@ -132,15 +123,6 b' static PyObject* reader_close(ZstdCompre'
132 Py_RETURN_NONE;
123 Py_RETURN_NONE;
133 }
124 }
134
125
135 static PyObject* reader_closed(ZstdCompressionReader* self) {
136 if (self->closed) {
137 Py_RETURN_TRUE;
138 }
139 else {
140 Py_RETURN_FALSE;
141 }
142 }
143
144 static PyObject* reader_tell(ZstdCompressionReader* self) {
126 static PyObject* reader_tell(ZstdCompressionReader* self) {
145 /* TODO should this raise OSError since stream isn't seekable? */
127 /* TODO should this raise OSError since stream isn't seekable? */
146 return PyLong_FromUnsignedLongLong(self->bytesCompressed);
128 return PyLong_FromUnsignedLongLong(self->bytesCompressed);
@@ -159,11 +141,6 b' static PyObject* reader_read(ZstdCompres'
159 size_t zresult;
141 size_t zresult;
160 size_t oldPos;
142 size_t oldPos;
161
143
162 if (!self->entered) {
163 PyErr_SetString(ZstdError, "read() must be called from an active context manager");
164 return NULL;
165 }
166
167 if (self->closed) {
144 if (self->closed) {
168 PyErr_SetString(PyExc_ValueError, "stream is closed");
145 PyErr_SetString(PyExc_ValueError, "stream is closed");
169 return NULL;
146 return NULL;
@@ -333,8 +310,6 b' static PyMethodDef reader_methods[] = {'
333 PyDoc_STR("Exit a compression context") },
310 PyDoc_STR("Exit a compression context") },
334 { "close", (PyCFunction)reader_close, METH_NOARGS,
311 { "close", (PyCFunction)reader_close, METH_NOARGS,
335 PyDoc_STR("Close the stream so it cannot perform any more operations") },
312 PyDoc_STR("Close the stream so it cannot perform any more operations") },
336 { "closed", (PyCFunction)reader_closed, METH_NOARGS,
337 PyDoc_STR("Whether stream is closed") },
338 { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
313 { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
339 { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
314 { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
340 { "readable", (PyCFunction)reader_readable, METH_NOARGS,
315 { "readable", (PyCFunction)reader_readable, METH_NOARGS,
@@ -354,6 +329,12 b' static PyMethodDef reader_methods[] = {'
354 { NULL, NULL }
329 { NULL, NULL }
355 };
330 };
356
331
332 static PyMemberDef reader_members[] = {
333 { "closed", T_BOOL, offsetof(ZstdCompressionReader, closed),
334 READONLY, "whether stream is closed" },
335 { NULL }
336 };
337
357 PyTypeObject ZstdCompressionReaderType = {
338 PyTypeObject ZstdCompressionReaderType = {
358 PyVarObject_HEAD_INIT(NULL, 0)
339 PyVarObject_HEAD_INIT(NULL, 0)
359 "zstd.ZstdCompressionReader", /* tp_name */
340 "zstd.ZstdCompressionReader", /* tp_name */
@@ -383,7 +364,7 b' PyTypeObject ZstdCompressionReaderType ='
383 reader_iter, /* tp_iter */
364 reader_iter, /* tp_iter */
384 reader_iternext, /* tp_iternext */
365 reader_iternext, /* tp_iternext */
385 reader_methods, /* tp_methods */
366 reader_methods, /* tp_methods */
386 0, /* tp_members */
367 reader_members, /* tp_members */
387 0, /* tp_getset */
368 0, /* tp_getset */
388 0, /* tp_base */
369 0, /* tp_base */
389 0, /* tp_dict */
370 0, /* tp_dict */
@@ -222,10 +222,6 b' static PyObject* ZstdCompressionWriter_f'
222 return NULL;
222 return NULL;
223 }
223 }
224
224
225 if (!output.pos) {
226 break;
227 }
228
229 /* Copy data from output buffer to writer. */
225 /* Copy data from output buffer to writer. */
230 if (output.pos) {
226 if (output.pos) {
231 #if PY_MAJOR_VERSION >= 3
227 #if PY_MAJOR_VERSION >= 3
@@ -238,7 +234,12 b' static PyObject* ZstdCompressionWriter_f'
238 totalWrite += output.pos;
234 totalWrite += output.pos;
239 self->bytesCompressed += output.pos;
235 self->bytesCompressed += output.pos;
240 }
236 }
237
241 output.pos = 0;
238 output.pos = 0;
239
240 if (!zresult) {
241 break;
242 }
242 }
243 }
243
244
244 PyMem_Free(output.dst);
245 PyMem_Free(output.dst);
@@ -115,6 +115,7 b' static PyObject* ZstdCompressionObj_flus'
115 PyObject* result = NULL;
115 PyObject* result = NULL;
116 Py_ssize_t resultSize = 0;
116 Py_ssize_t resultSize = 0;
117 ZSTD_inBuffer input;
117 ZSTD_inBuffer input;
118 ZSTD_EndDirective zFlushMode;
118
119
119 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:flush", kwlist, &flushMode)) {
120 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:flush", kwlist, &flushMode)) {
120 return NULL;
121 return NULL;
@@ -130,52 +131,34 b' static PyObject* ZstdCompressionObj_flus'
130 return NULL;
131 return NULL;
131 }
132 }
132
133
134 switch (flushMode) {
135 case compressorobj_flush_block:
136 zFlushMode = ZSTD_e_flush;
137 break;
138
139 case compressorobj_flush_finish:
140 zFlushMode = ZSTD_e_end;
141 self->finished = 1;
142 break;
143
144 default:
145 PyErr_SetString(ZstdError, "unhandled flush mode");
146 return NULL;
147 }
148
133 assert(self->output.pos == 0);
149 assert(self->output.pos == 0);
134
150
135 input.src = NULL;
151 input.src = NULL;
136 input.size = 0;
152 input.size = 0;
137 input.pos = 0;
153 input.pos = 0;
138
154
139 if (flushMode == compressorobj_flush_block) {
155 while (1) {
140 /* The output buffer is of size ZSTD_CStreamOutSize(), which is
141 guaranteed to hold a full block. */
142 Py_BEGIN_ALLOW_THREADS
156 Py_BEGIN_ALLOW_THREADS
143 zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
157 zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
144 &input, ZSTD_e_flush);
158 &input, zFlushMode);
145 Py_END_ALLOW_THREADS
159 Py_END_ALLOW_THREADS
146
160
147 if (ZSTD_isError(zresult)) {
161 if (ZSTD_isError(zresult)) {
148 PyErr_Format(ZstdError, "zstd compress error: %s", ZSTD_getErrorName(zresult));
149 return NULL;
150 }
151
152 /* Output buffer is guaranteed to hold full block. */
153 assert(zresult == 0);
154
155 if (self->output.pos) {
156 result = PyBytes_FromStringAndSize(self->output.dst, self->output.pos);
157 if (!result) {
158 return NULL;
159 }
160 }
161
162 self->output.pos = 0;
163
164 if (result) {
165 return result;
166 }
167 else {
168 return PyBytes_FromString("");
169 }
170 }
171
172 assert(flushMode == compressorobj_flush_finish);
173 self->finished = 1;
174
175 while (1) {
176 zresult = ZSTD_compress_generic(self->compressor->cctx, &self->output,
177 &input, ZSTD_e_end);
178 if (ZSTD_isError(zresult)) {
179 PyErr_Format(ZstdError, "error ending compression stream: %s",
162 PyErr_Format(ZstdError, "error ending compression stream: %s",
180 ZSTD_getErrorName(zresult));
163 ZSTD_getErrorName(zresult));
181 return NULL;
164 return NULL;
@@ -11,15 +11,13 b''
11
11
12 extern PyObject* ZstdError;
12 extern PyObject* ZstdError;
13
13
14 int ensure_cctx(ZstdCompressor* compressor) {
14 int setup_cctx(ZstdCompressor* compressor) {
15 size_t zresult;
15 size_t zresult;
16
16
17 assert(compressor);
17 assert(compressor);
18 assert(compressor->cctx);
18 assert(compressor->cctx);
19 assert(compressor->params);
19 assert(compressor->params);
20
20
21 ZSTD_CCtx_reset(compressor->cctx);
22
23 zresult = ZSTD_CCtx_setParametersUsingCCtxParams(compressor->cctx, compressor->params);
21 zresult = ZSTD_CCtx_setParametersUsingCCtxParams(compressor->cctx, compressor->params);
24 if (ZSTD_isError(zresult)) {
22 if (ZSTD_isError(zresult)) {
25 PyErr_Format(ZstdError, "could not set compression parameters: %s",
23 PyErr_Format(ZstdError, "could not set compression parameters: %s",
@@ -237,9 +235,9 b' static int ZstdCompressor_init(ZstdCompr'
237 Py_INCREF(dict);
235 Py_INCREF(dict);
238 }
236 }
239
237
240 if (ensure_cctx(self)) {
238 if (setup_cctx(self)) {
241 return -1;
239 return -1;
242 }
240 }
243
241
244 return 0;
242 return 0;
245 }
243 }
@@ -346,9 +344,7 b' static PyObject* ZstdCompressor_copy_str'
346 return NULL;
344 return NULL;
347 }
345 }
348
346
349 if (ensure_cctx(self)) {
347 ZSTD_CCtx_reset(self->cctx);
350 return NULL;
351 }
352
348
353 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
349 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
354 if (ZSTD_isError(zresult)) {
350 if (ZSTD_isError(zresult)) {
@@ -489,6 +485,7 b' static ZstdCompressionReader* ZstdCompre'
489 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
485 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
490 size_t readSize = ZSTD_CStreamInSize();
486 size_t readSize = ZSTD_CStreamInSize();
491 ZstdCompressionReader* result = NULL;
487 ZstdCompressionReader* result = NULL;
488 size_t zresult;
492
489
493 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kk:stream_reader", kwlist,
490 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|Kk:stream_reader", kwlist,
494 &source, &sourceSize, &readSize)) {
491 &source, &sourceSize, &readSize)) {
@@ -520,13 +517,17 b' static ZstdCompressionReader* ZstdCompre'
520 goto except;
517 goto except;
521 }
518 }
522
519
523 if (ensure_cctx(self)) {
520 ZSTD_CCtx_reset(self->cctx);
521
522 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
523 if (ZSTD_isError(zresult)) {
524 PyErr_Format(ZstdError, "error setting source source: %s",
525 ZSTD_getErrorName(zresult));
524 goto except;
526 goto except;
525 }
527 }
526
528
527 result->compressor = self;
529 result->compressor = self;
528 Py_INCREF(self);
530 Py_INCREF(self);
529 result->sourceSize = sourceSize;
530
531
531 return result;
532 return result;
532
533
@@ -576,9 +577,7 b' static PyObject* ZstdCompressor_compress'
576 goto finally;
577 goto finally;
577 }
578 }
578
579
579 if (ensure_cctx(self)) {
580 ZSTD_CCtx_reset(self->cctx);
580 goto finally;
581 }
582
581
583 destSize = ZSTD_compressBound(source.len);
582 destSize = ZSTD_compressBound(source.len);
584 output = PyBytes_FromStringAndSize(NULL, destSize);
583 output = PyBytes_FromStringAndSize(NULL, destSize);
@@ -652,9 +651,7 b' static ZstdCompressionObj* ZstdCompresso'
652 return NULL;
651 return NULL;
653 }
652 }
654
653
655 if (ensure_cctx(self)) {
654 ZSTD_CCtx_reset(self->cctx);
656 return NULL;
657 }
658
655
659 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, inSize);
656 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, inSize);
660 if (ZSTD_isError(zresult)) {
657 if (ZSTD_isError(zresult)) {
@@ -743,9 +740,7 b' static ZstdCompressorIterator* ZstdCompr'
743 goto except;
740 goto except;
744 }
741 }
745
742
746 if (ensure_cctx(self)) {
743 ZSTD_CCtx_reset(self->cctx);
747 return NULL;
748 }
749
744
750 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
745 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
751 if (ZSTD_isError(zresult)) {
746 if (ZSTD_isError(zresult)) {
@@ -817,9 +812,7 b' static ZstdCompressionWriter* ZstdCompre'
817 return NULL;
812 return NULL;
818 }
813 }
819
814
820 if (ensure_cctx(self)) {
815 ZSTD_CCtx_reset(self->cctx);
821 return NULL;
822 }
823
816
824 result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL);
817 result = (ZstdCompressionWriter*)PyObject_CallObject((PyObject*)&ZstdCompressionWriterType, NULL);
825 if (!result) {
818 if (!result) {
@@ -839,6 +832,58 b' static ZstdCompressionWriter* ZstdCompre'
839 return result;
832 return result;
840 }
833 }
841
834
835 PyDoc_STRVAR(ZstdCompressor_chunker__doc__,
836 "Create an object for iterative compressing to same-sized chunks.\n"
837 );
838
839 static ZstdCompressionChunker* ZstdCompressor_chunker(ZstdCompressor* self, PyObject* args, PyObject* kwargs) {
840 static char* kwlist[] = {
841 "size",
842 "chunk_size",
843 NULL
844 };
845
846 unsigned long long sourceSize = ZSTD_CONTENTSIZE_UNKNOWN;
847 size_t chunkSize = ZSTD_CStreamOutSize();
848 ZstdCompressionChunker* chunker;
849 size_t zresult;
850
851 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|Kk:chunker", kwlist,
852 &sourceSize, &chunkSize)) {
853 return NULL;
854 }
855
856 ZSTD_CCtx_reset(self->cctx);
857
858 zresult = ZSTD_CCtx_setPledgedSrcSize(self->cctx, sourceSize);
859 if (ZSTD_isError(zresult)) {
860 PyErr_Format(ZstdError, "error setting source size: %s",
861 ZSTD_getErrorName(zresult));
862 return NULL;
863 }
864
865 chunker = (ZstdCompressionChunker*)PyObject_CallObject((PyObject*)&ZstdCompressionChunkerType, NULL);
866 if (!chunker) {
867 return NULL;
868 }
869
870 chunker->output.dst = PyMem_Malloc(chunkSize);
871 if (!chunker->output.dst) {
872 PyErr_NoMemory();
873 Py_DECREF(chunker);
874 return NULL;
875 }
876 chunker->output.size = chunkSize;
877 chunker->output.pos = 0;
878
879 chunker->compressor = self;
880 Py_INCREF(chunker->compressor);
881
882 chunker->chunkSize = chunkSize;
883
884 return chunker;
885 }
886
842 typedef struct {
887 typedef struct {
843 void* sourceData;
888 void* sourceData;
844 size_t sourceSize;
889 size_t sourceSize;
@@ -1524,6 +1569,8 b' finally:'
1524 }
1569 }
1525
1570
1526 static PyMethodDef ZstdCompressor_methods[] = {
1571 static PyMethodDef ZstdCompressor_methods[] = {
1572 { "chunker", (PyCFunction)ZstdCompressor_chunker,
1573 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_chunker__doc__ },
1527 { "compress", (PyCFunction)ZstdCompressor_compress,
1574 { "compress", (PyCFunction)ZstdCompressor_compress,
1528 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ },
1575 METH_VARARGS | METH_KEYWORDS, ZstdCompressor_compress__doc__ },
1529 { "compressobj", (PyCFunction)ZstdCompressor_compressobj,
1576 { "compressobj", (PyCFunction)ZstdCompressor_compressobj,
@@ -27,7 +27,6 b' void constants_module_init(PyObject* mod'
27 #else
27 #else
28 version = PyString_FromString(PYTHON_ZSTANDARD_VERSION);
28 version = PyString_FromString(PYTHON_ZSTANDARD_VERSION);
29 #endif
29 #endif
30 Py_INCREF(version);
31 PyModule_AddObject(mod, "__version__", version);
30 PyModule_AddObject(mod, "__version__", version);
32
31
33 ZstdError = PyErr_NewException("zstd.ZstdError", NULL, NULL);
32 ZstdError = PyErr_NewException("zstd.ZstdError", NULL, NULL);
@@ -41,7 +40,6 b' void constants_module_init(PyObject* mod'
41 PyTuple_SetItem(zstdVersion, 0, PyLong_FromLong(ZSTD_VERSION_MAJOR));
40 PyTuple_SetItem(zstdVersion, 0, PyLong_FromLong(ZSTD_VERSION_MAJOR));
42 PyTuple_SetItem(zstdVersion, 1, PyLong_FromLong(ZSTD_VERSION_MINOR));
41 PyTuple_SetItem(zstdVersion, 1, PyLong_FromLong(ZSTD_VERSION_MINOR));
43 PyTuple_SetItem(zstdVersion, 2, PyLong_FromLong(ZSTD_VERSION_RELEASE));
42 PyTuple_SetItem(zstdVersion, 2, PyLong_FromLong(ZSTD_VERSION_RELEASE));
44 Py_INCREF(zstdVersion);
45 PyModule_AddObject(mod, "ZSTD_VERSION", zstdVersion);
43 PyModule_AddObject(mod, "ZSTD_VERSION", zstdVersion);
46
44
47 frameHeader = PyBytes_FromStringAndSize(frame_header, sizeof(frame_header));
45 frameHeader = PyBytes_FromStringAndSize(frame_header, sizeof(frame_header));
@@ -68,6 +66,8 b' void constants_module_init(PyObject* mod'
68 (long)ZSTD_DStreamOutSize());
66 (long)ZSTD_DStreamOutSize());
69
67
70 PyModule_AddIntConstant(mod, "MAGIC_NUMBER", ZSTD_MAGICNUMBER);
68 PyModule_AddIntConstant(mod, "MAGIC_NUMBER", ZSTD_MAGICNUMBER);
69 PyModule_AddIntConstant(mod, "BLOCKSIZELOG_MAX", ZSTD_BLOCKSIZELOG_MAX);
70 PyModule_AddIntConstant(mod, "BLOCKSIZE_MAX", ZSTD_BLOCKSIZE_MAX);
71 PyModule_AddIntConstant(mod, "WINDOWLOG_MIN", ZSTD_WINDOWLOG_MIN);
71 PyModule_AddIntConstant(mod, "WINDOWLOG_MIN", ZSTD_WINDOWLOG_MIN);
72 PyModule_AddIntConstant(mod, "WINDOWLOG_MAX", ZSTD_WINDOWLOG_MAX);
72 PyModule_AddIntConstant(mod, "WINDOWLOG_MAX", ZSTD_WINDOWLOG_MAX);
73 PyModule_AddIntConstant(mod, "CHAINLOG_MIN", ZSTD_CHAINLOG_MIN);
73 PyModule_AddIntConstant(mod, "CHAINLOG_MIN", ZSTD_CHAINLOG_MIN);
@@ -80,6 +80,7 b' void constants_module_init(PyObject* mod'
80 PyModule_AddIntConstant(mod, "SEARCHLENGTH_MIN", ZSTD_SEARCHLENGTH_MIN);
80 PyModule_AddIntConstant(mod, "SEARCHLENGTH_MIN", ZSTD_SEARCHLENGTH_MIN);
81 PyModule_AddIntConstant(mod, "SEARCHLENGTH_MAX", ZSTD_SEARCHLENGTH_MAX);
81 PyModule_AddIntConstant(mod, "SEARCHLENGTH_MAX", ZSTD_SEARCHLENGTH_MAX);
82 PyModule_AddIntConstant(mod, "TARGETLENGTH_MIN", ZSTD_TARGETLENGTH_MIN);
82 PyModule_AddIntConstant(mod, "TARGETLENGTH_MIN", ZSTD_TARGETLENGTH_MIN);
83 PyModule_AddIntConstant(mod, "TARGETLENGTH_MAX", ZSTD_TARGETLENGTH_MAX);
83 PyModule_AddIntConstant(mod, "LDM_MINMATCH_MIN", ZSTD_LDM_MINMATCH_MIN);
84 PyModule_AddIntConstant(mod, "LDM_MINMATCH_MIN", ZSTD_LDM_MINMATCH_MIN);
84 PyModule_AddIntConstant(mod, "LDM_MINMATCH_MAX", ZSTD_LDM_MINMATCH_MAX);
85 PyModule_AddIntConstant(mod, "LDM_MINMATCH_MAX", ZSTD_LDM_MINMATCH_MAX);
85 PyModule_AddIntConstant(mod, "LDM_BUCKETSIZELOG_MAX", ZSTD_LDM_BUCKETSIZELOG_MAX);
86 PyModule_AddIntConstant(mod, "LDM_BUCKETSIZELOG_MAX", ZSTD_LDM_BUCKETSIZELOG_MAX);
@@ -47,10 +47,6 b' static ZstdDecompressionReader* reader_e'
47 return NULL;
47 return NULL;
48 }
48 }
49
49
50 if (ensure_dctx(self->decompressor, 1)) {
51 return NULL;
52 }
53
54 self->entered = 1;
50 self->entered = 1;
55
51
56 Py_INCREF(self);
52 Py_INCREF(self);
@@ -98,15 +94,6 b' static PyObject* reader_close(ZstdDecomp'
98 Py_RETURN_NONE;
94 Py_RETURN_NONE;
99 }
95 }
100
96
101 static PyObject* reader_closed(ZstdDecompressionReader* self) {
102 if (self->closed) {
103 Py_RETURN_TRUE;
104 }
105 else {
106 Py_RETURN_FALSE;
107 }
108 }
109
110 static PyObject* reader_flush(PyObject* self) {
97 static PyObject* reader_flush(PyObject* self) {
111 Py_RETURN_NONE;
98 Py_RETURN_NONE;
112 }
99 }
@@ -128,11 +115,6 b' static PyObject* reader_read(ZstdDecompr'
128 ZSTD_outBuffer output;
115 ZSTD_outBuffer output;
129 size_t zresult;
116 size_t zresult;
130
117
131 if (!self->entered) {
132 PyErr_SetString(ZstdError, "read() must be called from an active context manager");
133 return NULL;
134 }
135
136 if (self->closed) {
118 if (self->closed) {
137 PyErr_SetString(PyExc_ValueError, "stream is closed");
119 PyErr_SetString(PyExc_ValueError, "stream is closed");
138 return NULL;
120 return NULL;
@@ -281,11 +263,6 b' static PyObject* reader_seek(ZstdDecompr'
281 unsigned long long readAmount = 0;
263 unsigned long long readAmount = 0;
282 size_t defaultOutSize = ZSTD_DStreamOutSize();
264 size_t defaultOutSize = ZSTD_DStreamOutSize();
283
265
284 if (!self->entered) {
285 PyErr_SetString(ZstdError, "seek() must be called from an active context manager");
286 return NULL;
287 }
288
289 if (self->closed) {
266 if (self->closed) {
290 PyErr_SetString(PyExc_ValueError, "stream is closed");
267 PyErr_SetString(PyExc_ValueError, "stream is closed");
291 return NULL;
268 return NULL;
@@ -384,8 +361,6 b' static PyMethodDef reader_methods[] = {'
384 PyDoc_STR("Exit a compression context") },
361 PyDoc_STR("Exit a compression context") },
385 { "close", (PyCFunction)reader_close, METH_NOARGS,
362 { "close", (PyCFunction)reader_close, METH_NOARGS,
386 PyDoc_STR("Close the stream so it cannot perform any more operations") },
363 PyDoc_STR("Close the stream so it cannot perform any more operations") },
387 { "closed", (PyCFunction)reader_closed, METH_NOARGS,
388 PyDoc_STR("Whether stream is closed") },
389 { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
364 { "flush", (PyCFunction)reader_flush, METH_NOARGS, PyDoc_STR("no-ops") },
390 { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
365 { "isatty", (PyCFunction)reader_isatty, METH_NOARGS, PyDoc_STR("Returns False") },
391 { "readable", (PyCFunction)reader_readable, METH_NOARGS,
366 { "readable", (PyCFunction)reader_readable, METH_NOARGS,
@@ -407,6 +382,12 b' static PyMethodDef reader_methods[] = {'
407 { NULL, NULL }
382 { NULL, NULL }
408 };
383 };
409
384
385 static PyMemberDef reader_members[] = {
386 { "closed", T_BOOL, offsetof(ZstdDecompressionReader, closed),
387 READONLY, "whether stream is closed" },
388 { NULL }
389 };
390
410 PyTypeObject ZstdDecompressionReaderType = {
391 PyTypeObject ZstdDecompressionReaderType = {
411 PyVarObject_HEAD_INIT(NULL, 0)
392 PyVarObject_HEAD_INIT(NULL, 0)
412 "zstd.ZstdDecompressionReader", /* tp_name */
393 "zstd.ZstdDecompressionReader", /* tp_name */
@@ -436,7 +417,7 b' PyTypeObject ZstdDecompressionReaderType'
436 reader_iter, /* tp_iter */
417 reader_iter, /* tp_iter */
437 reader_iternext, /* tp_iternext */
418 reader_iternext, /* tp_iternext */
438 reader_methods, /* tp_methods */
419 reader_methods, /* tp_methods */
439 0, /* tp_members */
420 reader_members, /* tp_members */
440 0, /* tp_getset */
421 0, /* tp_getset */
441 0, /* tp_base */
422 0, /* tp_base */
442 0, /* tp_dict */
423 0, /* tp_dict */
@@ -33,6 +33,8 b' static PyObject* DecompressionObj_decomp'
33 PyObject* result = NULL;
33 PyObject* result = NULL;
34 Py_ssize_t resultSize = 0;
34 Py_ssize_t resultSize = 0;
35
35
36 output.dst = NULL;
37
36 if (self->finished) {
38 if (self->finished) {
37 PyErr_SetString(ZstdError, "cannot use a decompressobj multiple times");
39 PyErr_SetString(ZstdError, "cannot use a decompressobj multiple times");
38 return NULL;
40 return NULL;
@@ -53,6 +55,12 b' static PyObject* DecompressionObj_decomp'
53 goto finally;
55 goto finally;
54 }
56 }
55
57
58 /* Special case of empty input. Output will always be empty. */
59 if (source.len == 0) {
60 result = PyBytes_FromString("");
61 goto finally;
62 }
63
56 input.src = source.buf;
64 input.src = source.buf;
57 input.size = source.len;
65 input.size = source.len;
58 input.pos = 0;
66 input.pos = 0;
@@ -65,8 +73,7 b' static PyObject* DecompressionObj_decomp'
65 output.size = self->outSize;
73 output.size = self->outSize;
66 output.pos = 0;
74 output.pos = 0;
67
75
68 /* Read input until exhausted. */
76 while (1) {
69 while (input.pos < input.size) {
70 Py_BEGIN_ALLOW_THREADS
77 Py_BEGIN_ALLOW_THREADS
71 zresult = ZSTD_decompress_generic(self->decompressor->dctx, &output, &input);
78 zresult = ZSTD_decompress_generic(self->decompressor->dctx, &output, &input);
72 Py_END_ALLOW_THREADS
79 Py_END_ALLOW_THREADS
@@ -98,9 +105,13 b' static PyObject* DecompressionObj_decomp'
98 goto except;
105 goto except;
99 }
106 }
100 }
107 }
108 }
101
109
102 output.pos = 0;
110 if (zresult == 0 || (input.pos == input.size && output.pos == 0)) {
111 break;
103 }
112 }
113
114 output.pos = 0;
104 }
115 }
105
116
106 if (!result) {
117 if (!result) {
@@ -575,6 +575,10 b' static ZstdDecompressionReader* Decompre'
575 return NULL;
575 return NULL;
576 }
576 }
577
577
578 if (ensure_dctx(self, 1)) {
579 return NULL;
580 }
581
578 result = (ZstdDecompressionReader*)PyObject_CallObject((PyObject*)&ZstdDecompressionReaderType, NULL);
582 result = (ZstdDecompressionReader*)PyObject_CallObject((PyObject*)&ZstdDecompressionReaderType, NULL);
579 if (NULL == result) {
583 if (NULL == result) {
580 return NULL;
584 return NULL;
@@ -15,7 +15,8 b''
15 #include <zstd.h>
15 #include <zstd.h>
16 #include <zdict.h>
16 #include <zdict.h>
17
17
18 #define PYTHON_ZSTANDARD_VERSION "0.9.0"
18 /* Remember to change the string in zstandard/__init__ as well */
19 #define PYTHON_ZSTANDARD_VERSION "0.10.1"
19
20
20 typedef enum {
21 typedef enum {
21 compressorobj_flush_finish,
22 compressorobj_flush_finish,
@@ -45,7 +46,6 b' typedef struct {'
45 unsigned threads;
46 unsigned threads;
46 unsigned jobSize;
47 unsigned jobSize;
47 unsigned overlapSizeLog;
48 unsigned overlapSizeLog;
48 unsigned compressLiterals;
49 unsigned forceMaxWindow;
49 unsigned forceMaxWindow;
50 unsigned enableLongDistanceMatching;
50 unsigned enableLongDistanceMatching;
51 unsigned ldmHashLog;
51 unsigned ldmHashLog;
@@ -162,7 +162,6 b' typedef struct {'
162 ZstdCompressor* compressor;
162 ZstdCompressor* compressor;
163 PyObject* reader;
163 PyObject* reader;
164 Py_buffer buffer;
164 Py_buffer buffer;
165 unsigned long long sourceSize;
166 size_t readSize;
165 size_t readSize;
167
166
168 int entered;
167 int entered;
@@ -181,6 +180,34 b' extern PyTypeObject ZstdCompressionReade'
181 typedef struct {
180 typedef struct {
182 PyObject_HEAD
181 PyObject_HEAD
183
182
183 ZstdCompressor* compressor;
184 ZSTD_inBuffer input;
185 ZSTD_outBuffer output;
186 Py_buffer inBuffer;
187 int finished;
188 size_t chunkSize;
189 } ZstdCompressionChunker;
190
191 extern PyTypeObject ZstdCompressionChunkerType;
192
193 typedef enum {
194 compressionchunker_mode_normal,
195 compressionchunker_mode_flush,
196 compressionchunker_mode_finish,
197 } CompressionChunkerMode;
198
199 typedef struct {
200 PyObject_HEAD
201
202 ZstdCompressionChunker* chunker;
203 CompressionChunkerMode mode;
204 } ZstdCompressionChunkerIterator;
205
206 extern PyTypeObject ZstdCompressionChunkerIteratorType;
207
208 typedef struct {
209 PyObject_HEAD
210
184 ZSTD_DCtx* dctx;
211 ZSTD_DCtx* dctx;
185 ZstdCompressionDict* dict;
212 ZstdCompressionDict* dict;
186 size_t maxWindowSize;
213 size_t maxWindowSize;
@@ -17,6 +17,7 b' import tempfile'
17 HERE = os.path.abspath(os.path.dirname(__file__))
17 HERE = os.path.abspath(os.path.dirname(__file__))
18
18
19 SOURCES = ['zstd/%s' % p for p in (
19 SOURCES = ['zstd/%s' % p for p in (
20 'common/debug.c',
20 'common/entropy_common.c',
21 'common/entropy_common.c',
21 'common/error_private.c',
22 'common/error_private.c',
22 'common/fse_decompress.c',
23 'common/fse_decompress.c',
@@ -25,6 +26,7 b" SOURCES = ['zstd/%s' % p for p in ("
25 'common/xxhash.c',
26 'common/xxhash.c',
26 'common/zstd_common.c',
27 'common/zstd_common.c',
27 'compress/fse_compress.c',
28 'compress/fse_compress.c',
29 'compress/hist.c',
28 'compress/huf_compress.c',
30 'compress/huf_compress.c',
29 'compress/zstd_compress.c',
31 'compress/zstd_compress.c',
30 'compress/zstd_double_fast.c',
32 'compress/zstd_double_fast.c',
@@ -36,6 +38,7 b" SOURCES = ['zstd/%s' % p for p in ("
36 'decompress/huf_decompress.c',
38 'decompress/huf_decompress.c',
37 'decompress/zstd_decompress.c',
39 'decompress/zstd_decompress.c',
38 'dictBuilder/cover.c',
40 'dictBuilder/cover.c',
41 'dictBuilder/fastcover.c',
39 'dictBuilder/divsufsort.c',
42 'dictBuilder/divsufsort.c',
40 'dictBuilder/zdict.c',
43 'dictBuilder/zdict.c',
41 )]
44 )]
@@ -6,12 +6,12 b''
6
6
7 import distutils.ccompiler
7 import distutils.ccompiler
8 import os
8 import os
9 import sys
10
9
11 from distutils.extension import Extension
10 from distutils.extension import Extension
12
11
13
12
14 zstd_sources = ['zstd/%s' % p for p in (
13 zstd_sources = ['zstd/%s' % p for p in (
14 'common/debug.c',
15 'common/entropy_common.c',
15 'common/entropy_common.c',
16 'common/error_private.c',
16 'common/error_private.c',
17 'common/fse_decompress.c',
17 'common/fse_decompress.c',
@@ -20,6 +20,7 b" zstd_sources = ['zstd/%s' % p for p in ("
20 'common/xxhash.c',
20 'common/xxhash.c',
21 'common/zstd_common.c',
21 'common/zstd_common.c',
22 'compress/fse_compress.c',
22 'compress/fse_compress.c',
23 'compress/hist.c',
23 'compress/huf_compress.c',
24 'compress/huf_compress.c',
24 'compress/zstd_compress.c',
25 'compress/zstd_compress.c',
25 'compress/zstd_double_fast.c',
26 'compress/zstd_double_fast.c',
@@ -32,6 +33,7 b" zstd_sources = ['zstd/%s' % p for p in ("
32 'decompress/zstd_decompress.c',
33 'decompress/zstd_decompress.c',
33 'dictBuilder/cover.c',
34 'dictBuilder/cover.c',
34 'dictBuilder/divsufsort.c',
35 'dictBuilder/divsufsort.c',
36 'dictBuilder/fastcover.c',
35 'dictBuilder/zdict.c',
37 'dictBuilder/zdict.c',
36 )]
38 )]
37
39
@@ -75,6 +77,7 b' ext_sources = ['
75 'c-ext/compressobj.c',
77 'c-ext/compressobj.c',
76 'c-ext/compressor.c',
78 'c-ext/compressor.c',
77 'c-ext/compressoriterator.c',
79 'c-ext/compressoriterator.c',
80 'c-ext/compressionchunker.c',
78 'c-ext/compressionparams.c',
81 'c-ext/compressionparams.c',
79 'c-ext/compressionreader.c',
82 'c-ext/compressionreader.c',
80 'c-ext/compressionwriter.c',
83 'c-ext/compressionwriter.c',
@@ -93,25 +96,45 b' zstd_depends = ['
93
96
94
97
95 def get_c_extension(support_legacy=False, system_zstd=False, name='zstd',
98 def get_c_extension(support_legacy=False, system_zstd=False, name='zstd',
96 warnings_as_errors=False):
99 warnings_as_errors=False, root=None):
97 """Obtain a distutils.extension.Extension for the C extension."""
100 """Obtain a distutils.extension.Extension for the C extension.
98 root = os.path.abspath(os.path.dirname(__file__))
101
102 ``support_legacy`` controls whether to compile in legacy zstd format support.
103
104 ``system_zstd`` controls whether to compile against the system zstd library.
105 For this to work, the system zstd library and headers must match what
106 python-zstandard is coded against exactly.
107
108 ``name`` is the module name of the C extension to produce.
109
110 ``warnings_as_errors`` controls whether compiler warnings are turned into
111 compiler errors.
99
112
100 sources = set([os.path.join(root, p) for p in ext_sources])
113 ``root`` defines a root path that source should be computed as relative
114 to. This should be the directory with the main ``setup.py`` that is
115 being invoked. If not defined, paths will be relative to this file.
116 """
117 actual_root = os.path.abspath(os.path.dirname(__file__))
118 root = root or actual_root
119
120 sources = set([os.path.join(actual_root, p) for p in ext_sources])
101 if not system_zstd:
121 if not system_zstd:
102 sources.update([os.path.join(root, p) for p in zstd_sources])
122 sources.update([os.path.join(actual_root, p) for p in zstd_sources])
103 if support_legacy:
123 if support_legacy:
104 sources.update([os.path.join(root, p) for p in zstd_sources_legacy])
124 sources.update([os.path.join(actual_root, p)
125 for p in zstd_sources_legacy])
105 sources = list(sources)
126 sources = list(sources)
106
127
107 include_dirs = set([os.path.join(root, d) for d in ext_includes])
128 include_dirs = set([os.path.join(actual_root, d) for d in ext_includes])
108 if not system_zstd:
129 if not system_zstd:
109 include_dirs.update([os.path.join(root, d) for d in zstd_includes])
130 include_dirs.update([os.path.join(actual_root, d)
131 for d in zstd_includes])
110 if support_legacy:
132 if support_legacy:
111 include_dirs.update([os.path.join(root, d) for d in zstd_includes_legacy])
133 include_dirs.update([os.path.join(actual_root, d)
134 for d in zstd_includes_legacy])
112 include_dirs = list(include_dirs)
135 include_dirs = list(include_dirs)
113
136
114 depends = [os.path.join(root, p) for p in zstd_depends]
137 depends = [os.path.join(actual_root, p) for p in zstd_depends]
115
138
116 compiler = distutils.ccompiler.new_compiler()
139 compiler = distutils.ccompiler.new_compiler()
117
140
@@ -152,6 +175,11 b' def get_c_extension(support_legacy=False'
152
175
153 libraries = ['zstd'] if system_zstd else []
176 libraries = ['zstd'] if system_zstd else []
154
177
178 # Python 3.7 doesn't like absolute paths. So normalize to relative.
179 sources = [os.path.relpath(p, root) for p in sources]
180 include_dirs = [os.path.relpath(p, root) for p in include_dirs]
181 depends = [os.path.relpath(p, root) for p in depends]
182
155 # TODO compile with optimizations.
183 # TODO compile with optimizations.
156 return Extension(name, sources,
184 return Extension(name, sources,
157 include_dirs=include_dirs,
185 include_dirs=include_dirs,
@@ -153,7 +153,7 b' class TestCompressor_compress(unittest.T'
153 no_params = zstd.get_frame_parameters(no_dict_id)
153 no_params = zstd.get_frame_parameters(no_dict_id)
154 with_params = zstd.get_frame_parameters(with_dict_id)
154 with_params = zstd.get_frame_parameters(with_dict_id)
155 self.assertEqual(no_params.dict_id, 0)
155 self.assertEqual(no_params.dict_id, 0)
156 self.assertEqual(with_params.dict_id, 1387616518)
156 self.assertEqual(with_params.dict_id, 1880053135)
157
157
158 def test_compress_dict_multiple(self):
158 def test_compress_dict_multiple(self):
159 samples = []
159 samples = []
@@ -216,7 +216,7 b' class TestCompressor_compress(unittest.T'
216 self.assertEqual(params.dict_id, d.dict_id())
216 self.assertEqual(params.dict_id, d.dict_id())
217
217
218 self.assertEqual(result,
218 self.assertEqual(result,
219 b'\x28\xb5\x2f\xfd\x23\x06\x59\xb5\x52\x03\x19\x00\x00'
219 b'\x28\xb5\x2f\xfd\x23\x8f\x55\x0f\x70\x03\x19\x00\x00'
220 b'\x66\x6f\x6f')
220 b'\x66\x6f\x6f')
221
221
222 def test_multithreaded_compression_params(self):
222 def test_multithreaded_compression_params(self):
@@ -336,7 +336,9 b' class TestCompressor_compressobj(unittes'
336 b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo')
336 b'\x28\xb5\x2f\xfd\x00\x48\x18\x00\x00foo')
337 self.assertEqual(cobj.compress(b'bar'), b'')
337 self.assertEqual(cobj.compress(b'bar'), b'')
338 # 3 byte header plus content.
338 # 3 byte header plus content.
339 self.assertEqual(cobj.flush(), b'\x19\x00\x00bar')
339 self.assertEqual(cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK),
340 b'\x18\x00\x00bar')
341 self.assertEqual(cobj.flush(), b'\x01\x00\x00')
340
342
341 def test_flush_empty_block(self):
343 def test_flush_empty_block(self):
342 cctx = zstd.ZstdCompressor(write_checksum=True)
344 cctx = zstd.ZstdCompressor(write_checksum=True)
@@ -576,15 +578,23 b' class TestCompressor_stream_reader(unitt'
576 def test_context_manager(self):
578 def test_context_manager(self):
577 cctx = zstd.ZstdCompressor()
579 cctx = zstd.ZstdCompressor()
578
580
579 reader = cctx.stream_reader(b'foo' * 60)
580 with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'):
581 reader.read(10)
582
583 with cctx.stream_reader(b'foo') as reader:
581 with cctx.stream_reader(b'foo') as reader:
584 with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'):
582 with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'):
585 with reader as reader2:
583 with reader as reader2:
586 pass
584 pass
587
585
586 def test_no_context_manager(self):
587 cctx = zstd.ZstdCompressor()
588
589 reader = cctx.stream_reader(b'foo')
590 reader.read(4)
591 self.assertFalse(reader.closed)
592
593 reader.close()
594 self.assertTrue(reader.closed)
595 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
596 reader.read(1)
597
588 def test_not_implemented(self):
598 def test_not_implemented(self):
589 cctx = zstd.ZstdCompressor()
599 cctx = zstd.ZstdCompressor()
590
600
@@ -619,13 +629,18 b' class TestCompressor_stream_reader(unitt'
619 self.assertFalse(reader.writable())
629 self.assertFalse(reader.writable())
620 self.assertFalse(reader.seekable())
630 self.assertFalse(reader.seekable())
621 self.assertFalse(reader.isatty())
631 self.assertFalse(reader.isatty())
632 self.assertFalse(reader.closed)
622 self.assertIsNone(reader.flush())
633 self.assertIsNone(reader.flush())
634 self.assertFalse(reader.closed)
635
636 self.assertTrue(reader.closed)
623
637
624 def test_read_closed(self):
638 def test_read_closed(self):
625 cctx = zstd.ZstdCompressor()
639 cctx = zstd.ZstdCompressor()
626
640
627 with cctx.stream_reader(b'foo' * 60) as reader:
641 with cctx.stream_reader(b'foo' * 60) as reader:
628 reader.close()
642 reader.close()
643 self.assertTrue(reader.closed)
629 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
644 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
630 reader.read(10)
645 reader.read(10)
631
646
@@ -715,7 +730,7 b' class TestCompressor_stream_reader(unitt'
715 while reader.read(8192):
730 while reader.read(8192):
716 pass
731 pass
717
732
718 with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'):
733 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
719 reader.read(10)
734 reader.read(10)
720
735
721 def test_bad_size(self):
736 def test_bad_size(self):
@@ -792,7 +807,7 b' class TestCompressor_stream_writer(unitt'
792 d = zstd.train_dictionary(8192, samples)
807 d = zstd.train_dictionary(8192, samples)
793
808
794 h = hashlib.sha1(d.as_bytes()).hexdigest()
809 h = hashlib.sha1(d.as_bytes()).hexdigest()
795 self.assertEqual(h, '3040faa0ddc37d50e71a4dd28052cb8db5d9d027')
810 self.assertEqual(h, '2b3b6428da5bf2c9cc9d4bb58ba0bc5990dd0e79')
796
811
797 buffer = io.BytesIO()
812 buffer = io.BytesIO()
798 cctx = zstd.ZstdCompressor(level=9, dict_data=d)
813 cctx = zstd.ZstdCompressor(level=9, dict_data=d)
@@ -808,9 +823,16 b' class TestCompressor_stream_writer(unitt'
808 self.assertEqual(params.window_size, 2097152)
823 self.assertEqual(params.window_size, 2097152)
809 self.assertEqual(params.dict_id, d.dict_id())
824 self.assertEqual(params.dict_id, d.dict_id())
810 self.assertFalse(params.has_checksum)
825 self.assertFalse(params.has_checksum)
811 self.assertEqual(compressed,
826
812 b'\x28\xb5\x2f\xfd\x03\x58\x06\x59\xb5\x52\x5d\x00'
827 h = hashlib.sha1(compressed).hexdigest()
813 b'\x00\x00\x02\xfc\x3d\x3f\xd9\xb0\x51\x03\x45\x89')
828 self.assertEqual(h, '23f88344263678478f5f82298e0a5d1833125786')
829
830 source = b'foo' + b'bar' + (b'foo' * 16384)
831
832 dctx = zstd.ZstdDecompressor(dict_data=d)
833
834 self.assertEqual(dctx.decompress(compressed, max_output_size=len(source)),
835 source)
814
836
815 def test_compression_params(self):
837 def test_compression_params(self):
816 params = zstd.ZstdCompressionParameters(
838 params = zstd.ZstdCompressionParameters(
@@ -1157,6 +1179,181 b' class TestCompressor_read_to_iter(unitte'
1157 b''.join(cctx.read_to_iter(source))
1179 b''.join(cctx.read_to_iter(source))
1158
1180
1159
1181
1182 @make_cffi
1183 class TestCompressor_chunker(unittest.TestCase):
1184 def test_empty(self):
1185 cctx = zstd.ZstdCompressor(write_content_size=False)
1186 chunker = cctx.chunker()
1187
1188 it = chunker.compress(b'')
1189
1190 with self.assertRaises(StopIteration):
1191 next(it)
1192
1193 it = chunker.finish()
1194
1195 self.assertEqual(next(it), b'\x28\xb5\x2f\xfd\x00\x50\x01\x00\x00')
1196
1197 with self.assertRaises(StopIteration):
1198 next(it)
1199
1200 def test_simple_input(self):
1201 cctx = zstd.ZstdCompressor()
1202 chunker = cctx.chunker()
1203
1204 it = chunker.compress(b'foobar')
1205
1206 with self.assertRaises(StopIteration):
1207 next(it)
1208
1209 it = chunker.compress(b'baz' * 30)
1210
1211 with self.assertRaises(StopIteration):
1212 next(it)
1213
1214 it = chunker.finish()
1215
1216 self.assertEqual(next(it),
1217 b'\x28\xb5\x2f\xfd\x00\x50\x7d\x00\x00\x48\x66\x6f'
1218 b'\x6f\x62\x61\x72\x62\x61\x7a\x01\x00\xe4\xe4\x8e')
1219
1220 with self.assertRaises(StopIteration):
1221 next(it)
1222
1223 def test_input_size(self):
1224 cctx = zstd.ZstdCompressor()
1225 chunker = cctx.chunker(size=1024)
1226
1227 it = chunker.compress(b'x' * 1000)
1228
1229 with self.assertRaises(StopIteration):
1230 next(it)
1231
1232 it = chunker.compress(b'y' * 24)
1233
1234 with self.assertRaises(StopIteration):
1235 next(it)
1236
1237 chunks = list(chunker.finish())
1238
1239 self.assertEqual(chunks, [
1240 b'\x28\xb5\x2f\xfd\x60\x00\x03\x65\x00\x00\x18\x78\x78\x79\x02\x00'
1241 b'\xa0\x16\xe3\x2b\x80\x05'
1242 ])
1243
1244 dctx = zstd.ZstdDecompressor()
1245
1246 self.assertEqual(dctx.decompress(b''.join(chunks)),
1247 (b'x' * 1000) + (b'y' * 24))
1248
1249 def test_small_chunk_size(self):
1250 cctx = zstd.ZstdCompressor()
1251 chunker = cctx.chunker(chunk_size=1)
1252
1253 chunks = list(chunker.compress(b'foo' * 1024))
1254 self.assertEqual(chunks, [])
1255
1256 chunks = list(chunker.finish())
1257 self.assertTrue(all(len(chunk) == 1 for chunk in chunks))
1258
1259 self.assertEqual(
1260 b''.join(chunks),
1261 b'\x28\xb5\x2f\xfd\x00\x50\x55\x00\x00\x18\x66\x6f\x6f\x01\x00'
1262 b'\xfa\xd3\x77\x43')
1263
1264 dctx = zstd.ZstdDecompressor()
1265 self.assertEqual(dctx.decompress(b''.join(chunks),
1266 max_output_size=10000),
1267 b'foo' * 1024)
1268
1269 def test_input_types(self):
1270 cctx = zstd.ZstdCompressor()
1271
1272 mutable_array = bytearray(3)
1273 mutable_array[:] = b'foo'
1274
1275 sources = [
1276 memoryview(b'foo'),
1277 bytearray(b'foo'),
1278 mutable_array,
1279 ]
1280
1281 for source in sources:
1282 chunker = cctx.chunker()
1283
1284 self.assertEqual(list(chunker.compress(source)), [])
1285 self.assertEqual(list(chunker.finish()), [
1286 b'\x28\xb5\x2f\xfd\x00\x50\x19\x00\x00\x66\x6f\x6f'
1287 ])
1288
1289 def test_flush(self):
1290 cctx = zstd.ZstdCompressor()
1291 chunker = cctx.chunker()
1292
1293 self.assertEqual(list(chunker.compress(b'foo' * 1024)), [])
1294 self.assertEqual(list(chunker.compress(b'bar' * 1024)), [])
1295
1296 chunks1 = list(chunker.flush())
1297
1298 self.assertEqual(chunks1, [
1299 b'\x28\xb5\x2f\xfd\x00\x50\x8c\x00\x00\x30\x66\x6f\x6f\x62\x61\x72'
1300 b'\x02\x00\xfa\x03\xfe\xd0\x9f\xbe\x1b\x02'
1301 ])
1302
1303 self.assertEqual(list(chunker.flush()), [])
1304 self.assertEqual(list(chunker.flush()), [])
1305
1306 self.assertEqual(list(chunker.compress(b'baz' * 1024)), [])
1307
1308 chunks2 = list(chunker.flush())
1309 self.assertEqual(len(chunks2), 1)
1310
1311 chunks3 = list(chunker.finish())
1312 self.assertEqual(len(chunks2), 1)
1313
1314 dctx = zstd.ZstdDecompressor()
1315
1316 self.assertEqual(dctx.decompress(b''.join(chunks1 + chunks2 + chunks3),
1317 max_output_size=10000),
1318 (b'foo' * 1024) + (b'bar' * 1024) + (b'baz' * 1024))
1319
1320 def test_compress_after_finish(self):
1321 cctx = zstd.ZstdCompressor()
1322 chunker = cctx.chunker()
1323
1324 list(chunker.compress(b'foo'))
1325 list(chunker.finish())
1326
1327 with self.assertRaisesRegexp(
1328 zstd.ZstdError,
1329 'cannot call compress\(\) after compression finished'):
1330 list(chunker.compress(b'foo'))
1331
1332 def test_flush_after_finish(self):
1333 cctx = zstd.ZstdCompressor()
1334 chunker = cctx.chunker()
1335
1336 list(chunker.compress(b'foo'))
1337 list(chunker.finish())
1338
1339 with self.assertRaisesRegexp(
1340 zstd.ZstdError,
1341 'cannot call flush\(\) after compression finished'):
1342 list(chunker.flush())
1343
1344 def test_finish_after_finish(self):
1345 cctx = zstd.ZstdCompressor()
1346 chunker = cctx.chunker()
1347
1348 list(chunker.compress(b'foo'))
1349 list(chunker.finish())
1350
1351 with self.assertRaisesRegexp(
1352 zstd.ZstdError,
1353 'cannot call finish\(\) after compression finished'):
1354 list(chunker.finish())
1355
1356
1160 class TestCompressor_multi_compress_to_buffer(unittest.TestCase):
1357 class TestCompressor_multi_compress_to_buffer(unittest.TestCase):
1161 def test_invalid_inputs(self):
1358 def test_invalid_inputs(self):
1162 cctx = zstd.ZstdCompressor()
1359 cctx = zstd.ZstdCompressor()
@@ -135,6 +135,51 b' class TestCompressor_compressobj_fuzzing'
135
135
136 self.assertEqual(b''.join(chunks), ref_frame)
136 self.assertEqual(b''.join(chunks), ref_frame)
137
137
138 @hypothesis.settings(
139 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
140 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
141 level=strategies.integers(min_value=1, max_value=5),
142 chunk_sizes=strategies.data(),
143 flushes=strategies.data())
144 def test_flush_block(self, original, level, chunk_sizes, flushes):
145 cctx = zstd.ZstdCompressor(level=level)
146 cobj = cctx.compressobj()
147
148 dctx = zstd.ZstdDecompressor()
149 dobj = dctx.decompressobj()
150
151 compressed_chunks = []
152 decompressed_chunks = []
153 i = 0
154 while True:
155 input_size = chunk_sizes.draw(strategies.integers(1, 4096))
156 source = original[i:i + input_size]
157 if not source:
158 break
159
160 i += input_size
161
162 chunk = cobj.compress(source)
163 compressed_chunks.append(chunk)
164 decompressed_chunks.append(dobj.decompress(chunk))
165
166 if not flushes.draw(strategies.booleans()):
167 continue
168
169 chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_BLOCK)
170 compressed_chunks.append(chunk)
171 decompressed_chunks.append(dobj.decompress(chunk))
172
173 self.assertEqual(b''.join(decompressed_chunks), original[0:i])
174
175 chunk = cobj.flush(zstd.COMPRESSOBJ_FLUSH_FINISH)
176 compressed_chunks.append(chunk)
177 decompressed_chunks.append(dobj.decompress(chunk))
178
179 self.assertEqual(dctx.decompress(b''.join(compressed_chunks),
180 max_output_size=len(original)),
181 original)
182 self.assertEqual(b''.join(decompressed_chunks), original)
138
183
139 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
184 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
140 @make_cffi
185 @make_cffi
@@ -186,3 +231,90 b' class TestCompressor_multi_compress_to_b'
186
231
187 for i, frame in enumerate(result):
232 for i, frame in enumerate(result):
188 self.assertEqual(dctx.decompress(frame), original[i])
233 self.assertEqual(dctx.decompress(frame), original[i])
234
235
236 @unittest.skipUnless('ZSTD_SLOW_TESTS' in os.environ, 'ZSTD_SLOW_TESTS not set')
237 @make_cffi
238 class TestCompressor_chunker_fuzzing(unittest.TestCase):
239 @hypothesis.settings(
240 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
241 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
242 level=strategies.integers(min_value=1, max_value=5),
243 chunk_size=strategies.integers(
244 min_value=1,
245 max_value=32 * 1048576),
246 input_sizes=strategies.data())
247 def test_random_input_sizes(self, original, level, chunk_size, input_sizes):
248 cctx = zstd.ZstdCompressor(level=level)
249 chunker = cctx.chunker(chunk_size=chunk_size)
250
251 chunks = []
252 i = 0
253 while True:
254 input_size = input_sizes.draw(strategies.integers(1, 4096))
255 source = original[i:i + input_size]
256 if not source:
257 break
258
259 chunks.extend(chunker.compress(source))
260 i += input_size
261
262 chunks.extend(chunker.finish())
263
264 dctx = zstd.ZstdDecompressor()
265
266 self.assertEqual(dctx.decompress(b''.join(chunks),
267 max_output_size=len(original)),
268 original)
269
270 self.assertTrue(all(len(chunk) == chunk_size for chunk in chunks[:-1]))
271
272 @hypothesis.settings(
273 suppress_health_check=[hypothesis.HealthCheck.large_base_example])
274 @hypothesis.given(original=strategies.sampled_from(random_input_data()),
275 level=strategies.integers(min_value=1, max_value=5),
276 chunk_size=strategies.integers(
277 min_value=1,
278 max_value=32 * 1048576),
279 input_sizes=strategies.data(),
280 flushes=strategies.data())
281 def test_flush_block(self, original, level, chunk_size, input_sizes,
282 flushes):
283 cctx = zstd.ZstdCompressor(level=level)
284 chunker = cctx.chunker(chunk_size=chunk_size)
285
286 dctx = zstd.ZstdDecompressor()
287 dobj = dctx.decompressobj()
288
289 compressed_chunks = []
290 decompressed_chunks = []
291 i = 0
292 while True:
293 input_size = input_sizes.draw(strategies.integers(1, 4096))
294 source = original[i:i + input_size]
295 if not source:
296 break
297
298 i += input_size
299
300 chunks = list(chunker.compress(source))
301 compressed_chunks.extend(chunks)
302 decompressed_chunks.append(dobj.decompress(b''.join(chunks)))
303
304 if not flushes.draw(strategies.booleans()):
305 continue
306
307 chunks = list(chunker.flush())
308 compressed_chunks.extend(chunks)
309 decompressed_chunks.append(dobj.decompress(b''.join(chunks)))
310
311 self.assertEqual(b''.join(decompressed_chunks), original[0:i])
312
313 chunks = list(chunker.finish())
314 compressed_chunks.extend(chunks)
315 decompressed_chunks.append(dobj.decompress(b''.join(chunks)))
316
317 self.assertEqual(dctx.decompress(b''.join(compressed_chunks),
318 max_output_size=len(original)),
319 original)
320 self.assertEqual(b''.join(decompressed_chunks), original) No newline at end of file
@@ -24,6 +24,7 b' class TestCompressionParameters(unittest'
24 hash_log=zstd.HASHLOG_MAX,
24 hash_log=zstd.HASHLOG_MAX,
25 search_log=zstd.SEARCHLOG_MAX,
25 search_log=zstd.SEARCHLOG_MAX,
26 min_match=zstd.SEARCHLENGTH_MAX - 1,
26 min_match=zstd.SEARCHLENGTH_MAX - 1,
27 target_length=zstd.TARGETLENGTH_MAX,
27 compression_strategy=zstd.STRATEGY_BTULTRA)
28 compression_strategy=zstd.STRATEGY_BTULTRA)
28
29
29 def test_from_level(self):
30 def test_from_level(self):
@@ -34,7 +35,6 b' class TestCompressionParameters(unittest'
34
35
35 p = zstd.ZstdCompressionParameters.from_level(-4)
36 p = zstd.ZstdCompressionParameters.from_level(-4)
36 self.assertEqual(p.window_log, 19)
37 self.assertEqual(p.window_log, 19)
37 self.assertEqual(p.compress_literals, 0)
38
38
39 def test_members(self):
39 def test_members(self):
40 p = zstd.ZstdCompressionParameters(window_log=10,
40 p = zstd.ZstdCompressionParameters(window_log=10,
@@ -64,19 +64,11 b' class TestCompressionParameters(unittest'
64 self.assertEqual(p.job_size, 1048576)
64 self.assertEqual(p.job_size, 1048576)
65 self.assertEqual(p.overlap_size_log, 6)
65 self.assertEqual(p.overlap_size_log, 6)
66
66
67 p = zstd.ZstdCompressionParameters(compression_level=2)
68 self.assertEqual(p.compress_literals, 1)
69
70 p = zstd.ZstdCompressionParameters(compress_literals=False)
71 self.assertEqual(p.compress_literals, 0)
72
73 p = zstd.ZstdCompressionParameters(compression_level=-1)
67 p = zstd.ZstdCompressionParameters(compression_level=-1)
74 self.assertEqual(p.compression_level, -1)
68 self.assertEqual(p.compression_level, -1)
75 self.assertEqual(p.compress_literals, 0)
76
69
77 p = zstd.ZstdCompressionParameters(compression_level=-2, compress_literals=True)
70 p = zstd.ZstdCompressionParameters(compression_level=-2)
78 self.assertEqual(p.compression_level, -2)
71 self.assertEqual(p.compression_level, -2)
79 self.assertEqual(p.compress_literals, 1)
80
72
81 p = zstd.ZstdCompressionParameters(force_max_window=True)
73 p = zstd.ZstdCompressionParameters(force_max_window=True)
82 self.assertEqual(p.force_max_window, 1)
74 self.assertEqual(p.force_max_window, 1)
@@ -27,7 +27,7 b' s_searchlog = strategies.integers(min_va'
27 s_searchlength = strategies.integers(min_value=zstd.SEARCHLENGTH_MIN,
27 s_searchlength = strategies.integers(min_value=zstd.SEARCHLENGTH_MIN,
28 max_value=zstd.SEARCHLENGTH_MAX)
28 max_value=zstd.SEARCHLENGTH_MAX)
29 s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN,
29 s_targetlength = strategies.integers(min_value=zstd.TARGETLENGTH_MIN,
30 max_value=2**32)
30 max_value=zstd.TARGETLENGTH_MAX)
31 s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST,
31 s_strategy = strategies.sampled_from((zstd.STRATEGY_FAST,
32 zstd.STRATEGY_DFAST,
32 zstd.STRATEGY_DFAST,
33 zstd.STRATEGY_GREEDY,
33 zstd.STRATEGY_GREEDY,
@@ -293,10 +293,6 b' class TestDecompressor_stream_reader(uni'
293 def test_context_manager(self):
293 def test_context_manager(self):
294 dctx = zstd.ZstdDecompressor()
294 dctx = zstd.ZstdDecompressor()
295
295
296 reader = dctx.stream_reader(b'foo')
297 with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'):
298 reader.read(1)
299
300 with dctx.stream_reader(b'foo') as reader:
296 with dctx.stream_reader(b'foo') as reader:
301 with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'):
297 with self.assertRaisesRegexp(ValueError, 'cannot __enter__ multiple times'):
302 with reader as reader2:
298 with reader as reader2:
@@ -331,17 +327,23 b' class TestDecompressor_stream_reader(uni'
331 dctx = zstd.ZstdDecompressor()
327 dctx = zstd.ZstdDecompressor()
332
328
333 with dctx.stream_reader(b'foo') as reader:
329 with dctx.stream_reader(b'foo') as reader:
330 self.assertFalse(reader.closed)
334 self.assertTrue(reader.readable())
331 self.assertTrue(reader.readable())
335 self.assertFalse(reader.writable())
332 self.assertFalse(reader.writable())
336 self.assertTrue(reader.seekable())
333 self.assertTrue(reader.seekable())
337 self.assertFalse(reader.isatty())
334 self.assertFalse(reader.isatty())
335 self.assertFalse(reader.closed)
338 self.assertIsNone(reader.flush())
336 self.assertIsNone(reader.flush())
337 self.assertFalse(reader.closed)
338
339 self.assertTrue(reader.closed)
339
340
340 def test_read_closed(self):
341 def test_read_closed(self):
341 dctx = zstd.ZstdDecompressor()
342 dctx = zstd.ZstdDecompressor()
342
343
343 with dctx.stream_reader(b'foo') as reader:
344 with dctx.stream_reader(b'foo') as reader:
344 reader.close()
345 reader.close()
346 self.assertTrue(reader.closed)
345 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
347 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
346 reader.read(1)
348 reader.read(1)
347
349
@@ -372,10 +374,10 b' class TestDecompressor_stream_reader(uni'
372 self.assertEqual(reader.tell(), len(source))
374 self.assertEqual(reader.tell(), len(source))
373
375
374 # Read after EOF should return empty bytes.
376 # Read after EOF should return empty bytes.
375 self.assertEqual(reader.read(), b'')
377 self.assertEqual(reader.read(1), b'')
376 self.assertEqual(reader.tell(), len(result))
378 self.assertEqual(reader.tell(), len(result))
377
379
378 self.assertTrue(reader.closed())
380 self.assertTrue(reader.closed)
379
381
380 def test_read_buffer_small_chunks(self):
382 def test_read_buffer_small_chunks(self):
381 cctx = zstd.ZstdCompressor()
383 cctx = zstd.ZstdCompressor()
@@ -408,8 +410,11 b' class TestDecompressor_stream_reader(uni'
408 chunk = reader.read(8192)
410 chunk = reader.read(8192)
409 self.assertEqual(chunk, source)
411 self.assertEqual(chunk, source)
410 self.assertEqual(reader.tell(), len(source))
412 self.assertEqual(reader.tell(), len(source))
411 self.assertEqual(reader.read(), b'')
413 self.assertEqual(reader.read(1), b'')
412 self.assertEqual(reader.tell(), len(source))
414 self.assertEqual(reader.tell(), len(source))
415 self.assertFalse(reader.closed)
416
417 self.assertTrue(reader.closed)
413
418
414 def test_read_stream_small_chunks(self):
419 def test_read_stream_small_chunks(self):
415 cctx = zstd.ZstdCompressor()
420 cctx = zstd.ZstdCompressor()
@@ -440,7 +445,9 b' class TestDecompressor_stream_reader(uni'
440 while reader.read(16):
445 while reader.read(16):
441 pass
446 pass
442
447
443 with self.assertRaisesRegexp(zstd.ZstdError, 'read\(\) must be called from an active'):
448 self.assertTrue(reader.closed)
449
450 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
444 reader.read(10)
451 reader.read(10)
445
452
446 def test_illegal_seeks(self):
453 def test_illegal_seeks(self):
@@ -474,8 +481,7 b' class TestDecompressor_stream_reader(uni'
474 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
481 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
475 reader.seek(4, os.SEEK_SET)
482 reader.seek(4, os.SEEK_SET)
476
483
477 with self.assertRaisesRegexp(
484 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
478 zstd.ZstdError, 'seek\(\) must be called from an active context'):
479 reader.seek(0)
485 reader.seek(0)
480
486
481 def test_seek(self):
487 def test_seek(self):
@@ -492,6 +498,39 b' class TestDecompressor_stream_reader(uni'
492 reader.seek(4, os.SEEK_CUR)
498 reader.seek(4, os.SEEK_CUR)
493 self.assertEqual(reader.read(2), b'ar')
499 self.assertEqual(reader.read(2), b'ar')
494
500
501 def test_no_context_manager(self):
502 source = b'foobar' * 60
503 cctx = zstd.ZstdCompressor()
504 frame = cctx.compress(source)
505
506 dctx = zstd.ZstdDecompressor()
507 reader = dctx.stream_reader(frame)
508
509 self.assertEqual(reader.read(6), b'foobar')
510 self.assertEqual(reader.read(18), b'foobar' * 3)
511 self.assertFalse(reader.closed)
512
513 # Calling close prevents subsequent use.
514 reader.close()
515 self.assertTrue(reader.closed)
516
517 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
518 reader.read(6)
519
520 def test_read_after_error(self):
521 source = io.BytesIO(b'')
522 dctx = zstd.ZstdDecompressor()
523
524 reader = dctx.stream_reader(source)
525
526 with reader:
527 with self.assertRaises(TypeError):
528 reader.read()
529
530 with reader:
531 with self.assertRaisesRegexp(ValueError, 'stream is closed'):
532 reader.read(100)
533
495
534
496 @make_cffi
535 @make_cffi
497 class TestDecompressor_decompressobj(unittest.TestCase):
536 class TestDecompressor_decompressobj(unittest.TestCase):
@@ -12,7 +12,9 b' from . common import ('
12 @make_cffi
12 @make_cffi
13 class TestModuleAttributes(unittest.TestCase):
13 class TestModuleAttributes(unittest.TestCase):
14 def test_version(self):
14 def test_version(self):
15 self.assertEqual(zstd.ZSTD_VERSION, (1, 3, 4))
15 self.assertEqual(zstd.ZSTD_VERSION, (1, 3, 6))
16
17 self.assertEqual(zstd.__version__, '0.10.1')
16
18
17 def test_constants(self):
19 def test_constants(self):
18 self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22)
20 self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22)
@@ -27,6 +29,8 b' class TestModuleAttributes(unittest.Test'
27 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE',
29 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE',
28 'DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE',
30 'DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE',
29 'MAGIC_NUMBER',
31 'MAGIC_NUMBER',
32 'BLOCKSIZELOG_MAX',
33 'BLOCKSIZE_MAX',
30 'WINDOWLOG_MIN',
34 'WINDOWLOG_MIN',
31 'WINDOWLOG_MAX',
35 'WINDOWLOG_MAX',
32 'CHAINLOG_MIN',
36 'CHAINLOG_MIN',
@@ -39,6 +43,7 b' class TestModuleAttributes(unittest.Test'
39 'SEARCHLENGTH_MIN',
43 'SEARCHLENGTH_MIN',
40 'SEARCHLENGTH_MAX',
44 'SEARCHLENGTH_MAX',
41 'TARGETLENGTH_MIN',
45 'TARGETLENGTH_MIN',
46 'TARGETLENGTH_MAX',
42 'LDM_MINMATCH_MIN',
47 'LDM_MINMATCH_MIN',
43 'LDM_MINMATCH_MAX',
48 'LDM_MINMATCH_MAX',
44 'LDM_BUCKETSIZELOG_MAX',
49 'LDM_BUCKETSIZELOG_MAX',
@@ -57,7 +57,8 b' class TestTrainDictionary(unittest.TestC'
57 d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1,
57 d = zstd.train_dictionary(8192, generate_samples(), threads=-1, steps=1,
58 d=16)
58 d=16)
59
59
60 self.assertEqual(d.k, 50)
60 # This varies by platform.
61 self.assertIn(d.k, (50, 2000))
61 self.assertEqual(d.d, 16)
62 self.assertEqual(d.d, 16)
62
63
63 @make_cffi
64 @make_cffi
@@ -60,3 +60,6 b" elif _module_policy == 'cffi':"
60 else:
60 else:
61 raise ImportError('unknown module import policy: %s; use default, cffi_fallback, '
61 raise ImportError('unknown module import policy: %s; use default, cffi_fallback, '
62 'cext, or cffi' % _module_policy)
62 'cext, or cffi' % _module_policy)
63
64 # Keep this in sync with python-zstandard.h.
65 __version__ = '0.10.1'
@@ -182,6 +182,7 b' void compressobj_module_init(PyObject* m'
182 void compressor_module_init(PyObject* mod);
182 void compressor_module_init(PyObject* mod);
183 void compressionparams_module_init(PyObject* mod);
183 void compressionparams_module_init(PyObject* mod);
184 void constants_module_init(PyObject* mod);
184 void constants_module_init(PyObject* mod);
185 void compressionchunker_module_init(PyObject* mod);
185 void compressiondict_module_init(PyObject* mod);
186 void compressiondict_module_init(PyObject* mod);
186 void compressionreader_module_init(PyObject* mod);
187 void compressionreader_module_init(PyObject* mod);
187 void compressionwriter_module_init(PyObject* mod);
188 void compressionwriter_module_init(PyObject* mod);
@@ -209,7 +210,7 b' void zstd_module_init(PyObject* m) {'
209 We detect this mismatch here and refuse to load the module if this
210 We detect this mismatch here and refuse to load the module if this
210 scenario is detected.
211 scenario is detected.
211 */
212 */
212 if (ZSTD_VERSION_NUMBER != 10304 || ZSTD_versionNumber() != 10304) {
213 if (ZSTD_VERSION_NUMBER != 10306 || ZSTD_versionNumber() != 10306) {
213 PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
214 PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version");
214 return;
215 return;
215 }
216 }
@@ -219,6 +220,7 b' void zstd_module_init(PyObject* m) {'
219 compressiondict_module_init(m);
220 compressiondict_module_init(m);
220 compressobj_module_init(m);
221 compressobj_module_init(m);
221 compressor_module_init(m);
222 compressor_module_init(m);
223 compressionchunker_module_init(m);
222 compressionreader_module_init(m);
224 compressionreader_module_init(m);
223 compressionwriter_module_init(m);
225 compressionwriter_module_init(m);
224 compressoriterator_module_init(m);
226 compressoriterator_module_init(m);
@@ -1,8 +1,7 b''
1 /* ******************************************************************
1 /* ******************************************************************
2 bitstream
2 bitstream
3 Part of FSE library
3 Part of FSE library
4 header file (to include)
4 Copyright (C) 2013-present, Yann Collet.
5 Copyright (C) 2013-2017, Yann Collet.
6
5
7 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
8
7
@@ -49,21 +48,10 b' extern "C" {'
49 * Dependencies
48 * Dependencies
50 ******************************************/
49 ******************************************/
51 #include "mem.h" /* unaligned access routines */
50 #include "mem.h" /* unaligned access routines */
51 #include "debug.h" /* assert(), DEBUGLOG(), RAWLOG() */
52 #include "error_private.h" /* error codes and messages */
52 #include "error_private.h" /* error codes and messages */
53
53
54
54
55 /*-*************************************
56 * Debug
57 ***************************************/
58 #if defined(BIT_DEBUG) && (BIT_DEBUG>=1)
59 # include <assert.h>
60 #else
61 # ifndef assert
62 # define assert(condition) ((void)0)
63 # endif
64 #endif
65
66
67 /*=========================================
55 /*=========================================
68 * Target specific
56 * Target specific
69 =========================================*/
57 =========================================*/
@@ -83,8 +71,7 b' extern "C" {'
83 * A critical property of these streams is that they encode and decode in **reverse** direction.
71 * A critical property of these streams is that they encode and decode in **reverse** direction.
84 * So the first bit sequence you add will be the last to be read, like a LIFO stack.
72 * So the first bit sequence you add will be the last to be read, like a LIFO stack.
85 */
73 */
86 typedef struct
74 typedef struct {
87 {
88 size_t bitContainer;
75 size_t bitContainer;
89 unsigned bitPos;
76 unsigned bitPos;
90 char* startPtr;
77 char* startPtr;
@@ -118,8 +105,7 b' MEM_STATIC size_t BIT_closeCStream(BIT_C'
118 /*-********************************************
105 /*-********************************************
119 * bitStream decoding API (read backward)
106 * bitStream decoding API (read backward)
120 **********************************************/
107 **********************************************/
121 typedef struct
108 typedef struct {
122 {
123 size_t bitContainer;
109 size_t bitContainer;
124 unsigned bitsConsumed;
110 unsigned bitsConsumed;
125 const char* ptr;
111 const char* ptr;
@@ -236,7 +222,8 b' MEM_STATIC void BIT_addBits(BIT_CStream_'
236 }
222 }
237
223
238 /*! BIT_addBitsFast() :
224 /*! BIT_addBitsFast() :
239 * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
225 * works only if `value` is _clean_,
226 * meaning all high bits above nbBits are 0 */
240 MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
227 MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC,
241 size_t value, unsigned nbBits)
228 size_t value, unsigned nbBits)
242 {
229 {
@@ -77,9 +77,9 b''
77 * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
77 * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
78 */
78 */
79 #ifndef DYNAMIC_BMI2
79 #ifndef DYNAMIC_BMI2
80 #if (defined(__clang__) && __has_attribute(__target__)) \
80 #if ((defined(__clang__) && __has_attribute(__target__)) \
81 || (defined(__GNUC__) \
81 || (defined(__GNUC__) \
82 && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8))) \
82 && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
83 && (defined(__x86_64__) || defined(_M_X86)) \
83 && (defined(__x86_64__) || defined(_M_X86)) \
84 && !defined(__BMI2__)
84 && !defined(__BMI2__)
85 # define DYNAMIC_BMI2 1
85 # define DYNAMIC_BMI2 1
@@ -88,15 +88,37 b''
88 #endif
88 #endif
89 #endif
89 #endif
90
90
91 /* prefetch */
91 /* prefetch
92 #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
92 * can be disabled, by declaring NO_PREFETCH macro
93 # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
93 * All prefetch invocations use a single default locality 2,
94 # define PREFETCH(ptr) _mm_prefetch((const char*)ptr, _MM_HINT_T0)
94 * generating instruction prefetcht1,
95 #elif defined(__GNUC__)
95 * which, according to Intel, means "load data into L2 cache".
96 # define PREFETCH(ptr) __builtin_prefetch(ptr, 0, 0)
96 * This is a good enough "middle ground" for the time being,
97 * though in theory, it would be better to specialize locality depending on data being prefetched.
98 * Tests could not determine any sensible difference based on locality value. */
99 #if defined(NO_PREFETCH)
100 # define PREFETCH(ptr) (void)(ptr) /* disabled */
97 #else
101 #else
98 # define PREFETCH(ptr) /* disabled */
102 # if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_I86)) /* _mm_prefetch() is not defined outside of x86/x64 */
99 #endif
103 # include <mmintrin.h> /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
104 # define PREFETCH(ptr) _mm_prefetch((const char*)(ptr), _MM_HINT_T1)
105 # elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
106 # define PREFETCH(ptr) __builtin_prefetch((ptr), 0 /* rw==read */, 2 /* locality */)
107 # else
108 # define PREFETCH(ptr) (void)(ptr) /* disabled */
109 # endif
110 #endif /* NO_PREFETCH */
111
112 #define CACHELINE_SIZE 64
113
114 #define PREFETCH_AREA(p, s) { \
115 const char* const _ptr = (const char*)(p); \
116 size_t const _size = (size_t)(s); \
117 size_t _pos; \
118 for (_pos=0; _pos<_size; _pos+=CACHELINE_SIZE) { \
119 PREFETCH(_ptr + _pos); \
120 } \
121 }
100
122
101 /* disable warnings */
123 /* disable warnings */
102 #ifdef _MSC_VER /* Visual Studio */
124 #ifdef _MSC_VER /* Visual Studio */
@@ -36,7 +36,7 b' MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void)'
36 U32 f1d = 0;
36 U32 f1d = 0;
37 U32 f7b = 0;
37 U32 f7b = 0;
38 U32 f7c = 0;
38 U32 f7c = 0;
39 #ifdef _MSC_VER
39 #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
40 int reg[4];
40 int reg[4];
41 __cpuid((int*)reg, 0);
41 __cpuid((int*)reg, 0);
42 {
42 {
@@ -72,8 +72,7 b' MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void)'
72 "cpuid\n\t"
72 "cpuid\n\t"
73 "popl %%ebx\n\t"
73 "popl %%ebx\n\t"
74 : "=a"(f1a), "=c"(f1c), "=d"(f1d)
74 : "=a"(f1a), "=c"(f1c), "=d"(f1d)
75 : "a"(1)
75 : "a"(1));
76 :);
77 }
76 }
78 if (n >= 7) {
77 if (n >= 7) {
79 __asm__(
78 __asm__(
@@ -72,7 +72,21 b' size_t FSE_readNCount (short* normalized'
72 unsigned charnum = 0;
72 unsigned charnum = 0;
73 int previous0 = 0;
73 int previous0 = 0;
74
74
75 if (hbSize < 4) return ERROR(srcSize_wrong);
75 if (hbSize < 4) {
76 /* This function only works when hbSize >= 4 */
77 char buffer[4];
78 memset(buffer, 0, sizeof(buffer));
79 memcpy(buffer, headerBuffer, hbSize);
80 { size_t const countSize = FSE_readNCount(normalizedCounter, maxSVPtr, tableLogPtr,
81 buffer, sizeof(buffer));
82 if (FSE_isError(countSize)) return countSize;
83 if (countSize > hbSize) return ERROR(corruption_detected);
84 return countSize;
85 } }
86 assert(hbSize >= 4);
87
88 /* init */
89 memset(normalizedCounter, 0, (*maxSVPtr+1) * sizeof(normalizedCounter[0])); /* all symbols not present in NCount have a frequency of 0 */
76 bitStream = MEM_readLE32(ip);
90 bitStream = MEM_readLE32(ip);
77 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
91 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
78 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
92 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
@@ -105,6 +119,7 b' size_t FSE_readNCount (short* normalized'
105 if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
119 if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
106 while (charnum < n0) normalizedCounter[charnum++] = 0;
120 while (charnum < n0) normalizedCounter[charnum++] = 0;
107 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
121 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
122 assert((bitCount >> 3) <= 3); /* For first condition to work */
108 ip += bitCount>>3;
123 ip += bitCount>>3;
109 bitCount &= 7;
124 bitCount &= 7;
110 bitStream = MEM_readLE32(ip) >> bitCount;
125 bitStream = MEM_readLE32(ip) >> bitCount;
@@ -72,6 +72,7 b' extern "C" {'
72 #define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE)
72 #define FSE_VERSION_NUMBER (FSE_VERSION_MAJOR *100*100 + FSE_VERSION_MINOR *100 + FSE_VERSION_RELEASE)
73 FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
73 FSE_PUBLIC_API unsigned FSE_versionNumber(void); /**< library version number; to be used when checking dll version */
74
74
75
75 /*-****************************************
76 /*-****************************************
76 * FSE simple functions
77 * FSE simple functions
77 ******************************************/
78 ******************************************/
@@ -129,7 +130,7 b' FSE_PUBLIC_API size_t FSE_compress2 (voi'
129 ******************************************/
130 ******************************************/
130 /*!
131 /*!
131 FSE_compress() does the following:
132 FSE_compress() does the following:
132 1. count symbol occurrence from source[] into table count[]
133 1. count symbol occurrence from source[] into table count[] (see hist.h)
133 2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
134 2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
134 3. save normalized counters to memory buffer using writeNCount()
135 3. save normalized counters to memory buffer using writeNCount()
135 4. build encoding table 'CTable' from normalized counters
136 4. build encoding table 'CTable' from normalized counters
@@ -147,15 +148,6 b' or to save and provide normalized distri'
147
148
148 /* *** COMPRESSION *** */
149 /* *** COMPRESSION *** */
149
150
150 /*! FSE_count():
151 Provides the precise count of each byte within a table 'count'.
152 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
153 *maxSymbolValuePtr will be updated if detected smaller than initial value.
154 @return : the count of the most frequent symbol (which is not identified).
155 if return == srcSize, there is only one symbol.
156 Can also return an error code, which can be tested with FSE_isError(). */
157 FSE_PUBLIC_API size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
158
159 /*! FSE_optimalTableLog():
151 /*! FSE_optimalTableLog():
160 dynamically downsize 'tableLog' when conditions are met.
152 dynamically downsize 'tableLog' when conditions are met.
161 It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
153 It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
@@ -167,7 +159,8 b' FSE_PUBLIC_API unsigned FSE_optimalTable'
167 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
159 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
168 @return : tableLog,
160 @return : tableLog,
169 or an errorCode, which can be tested using FSE_isError() */
161 or an errorCode, which can be tested using FSE_isError() */
170 FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
162 FSE_PUBLIC_API size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog,
163 const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
171
164
172 /*! FSE_NCountWriteBound():
165 /*! FSE_NCountWriteBound():
173 Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
166 Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
@@ -178,8 +171,9 b' FSE_PUBLIC_API size_t FSE_NCountWriteBou'
178 Compactly save 'normalizedCounter' into 'buffer'.
171 Compactly save 'normalizedCounter' into 'buffer'.
179 @return : size of the compressed table,
172 @return : size of the compressed table,
180 or an errorCode, which can be tested using FSE_isError(). */
173 or an errorCode, which can be tested using FSE_isError(). */
181 FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
174 FSE_PUBLIC_API size_t FSE_writeNCount (void* buffer, size_t bufferSize,
182
175 const short* normalizedCounter,
176 unsigned maxSymbolValue, unsigned tableLog);
183
177
184 /*! Constructor and Destructor of FSE_CTable.
178 /*! Constructor and Destructor of FSE_CTable.
185 Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
179 Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
@@ -250,7 +244,9 b' If there is an error, the function will '
250 @return : size read from 'rBuffer',
244 @return : size read from 'rBuffer',
251 or an errorCode, which can be tested using FSE_isError().
245 or an errorCode, which can be tested using FSE_isError().
252 maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
246 maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
253 FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
247 FSE_PUBLIC_API size_t FSE_readNCount (short* normalizedCounter,
248 unsigned* maxSymbolValuePtr, unsigned* tableLogPtr,
249 const void* rBuffer, size_t rBuffSize);
254
250
255 /*! Constructor and Destructor of FSE_DTable.
251 /*! Constructor and Destructor of FSE_DTable.
256 Note that its size depends on 'tableLog' */
252 Note that its size depends on 'tableLog' */
@@ -325,33 +321,8 b' If there is an error, the function will '
325
321
326
322
327 /* *****************************************
323 /* *****************************************
328 * FSE advanced API
324 * FSE advanced API
329 *******************************************/
325 ***************************************** */
330 /* FSE_count_wksp() :
331 * Same as FSE_count(), but using an externally provided scratch buffer.
332 * `workSpace` size must be table of >= `1024` unsigned
333 */
334 size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
335 const void* source, size_t sourceSize, unsigned* workSpace);
336
337 /** FSE_countFast() :
338 * same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr
339 */
340 size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
341
342 /* FSE_countFast_wksp() :
343 * Same as FSE_countFast(), but using an externally provided scratch buffer.
344 * `workSpace` must be a table of minimum `1024` unsigned
345 */
346 size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned* workSpace);
347
348 /*! FSE_count_simple() :
349 * Same as FSE_countFast(), but does not use any additional memory (not even on stack).
350 * This function is unsafe, and will segfault if any value within `src` is `> *maxSymbolValuePtr` (presuming it's also the size of `count`).
351 */
352 size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
353
354
355
326
356 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
327 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
357 /**< same as FSE_optimalTableLog(), which used `minus==2` */
328 /**< same as FSE_optimalTableLog(), which used `minus==2` */
@@ -576,6 +547,39 b' MEM_STATIC void FSE_flushCState(BIT_CStr'
576 }
547 }
577
548
578
549
550 /* FSE_getMaxNbBits() :
551 * Approximate maximum cost of a symbol, in bits.
552 * Fractional get rounded up (i.e : a symbol with a normalized frequency of 3 gives the same result as a frequency of 2)
553 * note 1 : assume symbolValue is valid (<= maxSymbolValue)
554 * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
555 MEM_STATIC U32 FSE_getMaxNbBits(const void* symbolTTPtr, U32 symbolValue)
556 {
557 const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
558 return (symbolTT[symbolValue].deltaNbBits + ((1<<16)-1)) >> 16;
559 }
560
561 /* FSE_bitCost() :
562 * Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits)
563 * note 1 : assume symbolValue is valid (<= maxSymbolValue)
564 * note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits */
565 MEM_STATIC U32 FSE_bitCost(const void* symbolTTPtr, U32 tableLog, U32 symbolValue, U32 accuracyLog)
566 {
567 const FSE_symbolCompressionTransform* symbolTT = (const FSE_symbolCompressionTransform*) symbolTTPtr;
568 U32 const minNbBits = symbolTT[symbolValue].deltaNbBits >> 16;
569 U32 const threshold = (minNbBits+1) << 16;
570 assert(tableLog < 16);
571 assert(accuracyLog < 31-tableLog); /* ensure enough room for renormalization double shift */
572 { U32 const tableSize = 1 << tableLog;
573 U32 const deltaFromThreshold = threshold - (symbolTT[symbolValue].deltaNbBits + tableSize);
574 U32 const normalizedDeltaFromThreshold = (deltaFromThreshold << accuracyLog) >> tableLog; /* linear interpolation (very approximate) */
575 U32 const bitMultiplier = 1 << accuracyLog;
576 assert(symbolTT[symbolValue].deltaNbBits + tableSize <= threshold);
577 assert(normalizedDeltaFromThreshold <= bitMultiplier);
578 return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold;
579 }
580 }
581
582
579 /* ====== Decompression ====== */
583 /* ====== Decompression ====== */
580
584
581 typedef struct {
585 typedef struct {
@@ -49,7 +49,7 b''
49 * Error Management
49 * Error Management
50 ****************************************************************/
50 ****************************************************************/
51 #define FSE_isError ERR_isError
51 #define FSE_isError ERR_isError
52 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
52 #define FSE_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
53
53
54 /* check and forward error code */
54 /* check and forward error code */
55 #define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; }
55 #define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; }
@@ -1,7 +1,7 b''
1 /* ******************************************************************
1 /* ******************************************************************
2 Huffman coder, part of New Generation Entropy library
2 huff0 huffman codec,
3 header file
3 part of Finite State Entropy library
4 Copyright (C) 2013-2016, Yann Collet.
4 Copyright (C) 2013-present, Yann Collet.
5
5
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
7
@@ -163,25 +163,25 b' HUF_PUBLIC_API size_t HUF_compress4X_wks'
163 /* static allocation of HUF's DTable */
163 /* static allocation of HUF's DTable */
164 typedef U32 HUF_DTable;
164 typedef U32 HUF_DTable;
165 #define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog)))
165 #define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog)))
166 #define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
166 #define HUF_CREATE_STATIC_DTABLEX1(DTable, maxTableLog) \
167 HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) }
167 HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1) * 0x01000001) }
168 #define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
168 #define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
169 HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) }
169 HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog) * 0x01000001) }
170
170
171
171
172 /* ****************************************
172 /* ****************************************
173 * Advanced decompression functions
173 * Advanced decompression functions
174 ******************************************/
174 ******************************************/
175 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
175 size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
176 size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
176 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
177
177
178 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */
178 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */
179 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
179 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
180 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */
180 size_t HUF_decompress4X_hufOnly_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< considers RLE and uncompressed as errors */
181 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
181 size_t HUF_decompress4X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
182 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
182 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
183 size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
183 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
184 size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
184 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
185
185
186
186
187 /* ****************************************
187 /* ****************************************
@@ -208,7 +208,7 b' size_t HUF_compress4X_usingCTable(void* '
208 typedef enum {
208 typedef enum {
209 HUF_repeat_none, /**< Cannot use the previous table */
209 HUF_repeat_none, /**< Cannot use the previous table */
210 HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
210 HUF_repeat_check, /**< Can use the previous table but it must be checked. Note : The previous table must have been constructed by HUF_compress{1, 4}X_repeat */
211 HUF_repeat_valid /**< Can use the previous table and it is asumed to be valid */
211 HUF_repeat_valid /**< Can use the previous table and it is assumed to be valid */
212 } HUF_repeat;
212 } HUF_repeat;
213 /** HUF_compress4X_repeat() :
213 /** HUF_compress4X_repeat() :
214 * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
214 * Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
@@ -227,7 +227,9 b' size_t HUF_compress4X_repeat(void* dst, '
227 */
227 */
228 #define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
228 #define HUF_CTABLE_WORKSPACE_SIZE_U32 (2*HUF_SYMBOLVALUE_MAX +1 +1)
229 #define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
229 #define HUF_CTABLE_WORKSPACE_SIZE (HUF_CTABLE_WORKSPACE_SIZE_U32 * sizeof(unsigned))
230 size_t HUF_buildCTable_wksp (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize);
230 size_t HUF_buildCTable_wksp (HUF_CElt* tree,
231 const U32* count, U32 maxSymbolValue, U32 maxNbBits,
232 void* workSpace, size_t wkspSize);
231
233
232 /*! HUF_readStats() :
234 /*! HUF_readStats() :
233 * Read compact Huffman tree, saved by HUF_writeCTable().
235 * Read compact Huffman tree, saved by HUF_writeCTable().
@@ -242,10 +244,15 b' size_t HUF_readStats(BYTE* huffWeight, s'
242 * Loading a CTable saved with HUF_writeCTable() */
244 * Loading a CTable saved with HUF_writeCTable() */
243 size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
245 size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
244
246
247 /** HUF_getNbBits() :
248 * Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
249 * Note 1 : is not inlined, as HUF_CElt definition is private
250 * Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */
251 U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue);
245
252
246 /*
253 /*
247 * HUF_decompress() does the following:
254 * HUF_decompress() does the following:
248 * 1. select the decompression algorithm (X2, X4) based on pre-computed heuristics
255 * 1. select the decompression algorithm (X1, X2) based on pre-computed heuristics
249 * 2. build Huffman table from save, using HUF_readDTableX?()
256 * 2. build Huffman table from save, using HUF_readDTableX?()
250 * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable()
257 * 3. decode 1 or 4 segments in parallel using HUF_decompress?X?_usingDTable()
251 */
258 */
@@ -253,13 +260,13 b' size_t HUF_readCTable (HUF_CElt* CTable,'
253 /** HUF_selectDecoder() :
260 /** HUF_selectDecoder() :
254 * Tells which decoder is likely to decode faster,
261 * Tells which decoder is likely to decode faster,
255 * based on a set of pre-computed metrics.
262 * based on a set of pre-computed metrics.
256 * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
263 * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
257 * Assumption : 0 < dstSize <= 128 KB */
264 * Assumption : 0 < dstSize <= 128 KB */
258 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
265 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
259
266
260 /**
267 /**
261 * The minimum workspace size for the `workSpace` used in
268 * The minimum workspace size for the `workSpace` used in
262 * HUF_readDTableX2_wksp() and HUF_readDTableX4_wksp().
269 * HUF_readDTableX1_wksp() and HUF_readDTableX2_wksp().
263 *
270 *
264 * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
271 * The space used depends on HUF_TABLELOG_MAX, ranging from ~1500 bytes when
265 * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
272 * HUF_TABLE_LOG_MAX=12 to ~1850 bytes when HUF_TABLE_LOG_MAX=15.
@@ -270,14 +277,14 b' U32 HUF_selectDecoder (size_t dstSize, s'
270 #define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
277 #define HUF_DECOMPRESS_WORKSPACE_SIZE (2 << 10)
271 #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
278 #define HUF_DECOMPRESS_WORKSPACE_SIZE_U32 (HUF_DECOMPRESS_WORKSPACE_SIZE / sizeof(U32))
272
279
280 size_t HUF_readDTableX1 (HUF_DTable* DTable, const void* src, size_t srcSize);
281 size_t HUF_readDTableX1_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
273 size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
282 size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
274 size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
283 size_t HUF_readDTableX2_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
275 size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize);
276 size_t HUF_readDTableX4_wksp (HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize);
277
284
278 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
285 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
286 size_t HUF_decompress4X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
279 size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
287 size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
280 size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
281
288
282
289
283 /* ====================== */
290 /* ====================== */
@@ -298,25 +305,25 b' size_t HUF_compress1X_repeat(void* dst, '
298 void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
305 void* workSpace, size_t wkspSize, /**< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
299 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
306 HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
300
307
301 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
308 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
302 size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
309 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
303
310
304 size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
311 size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
305 size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
312 size_t HUF_decompress1X_DCtx_wksp (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize);
306 size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
313 size_t HUF_decompress1X1_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
307 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
314 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< single-symbol decoder */
308 size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
315 size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
309 size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
316 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize); /**< double-symbols decoder */
310
317
311 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */
318 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable); /**< automatic selection of sing or double symbol decoder, based on DTable */
319 size_t HUF_decompress1X1_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
312 size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
320 size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
313 size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
314
321
315 /* BMI2 variants.
322 /* BMI2 variants.
316 * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
323 * If the CPU has BMI2 support, pass bmi2=1, otherwise pass bmi2=0.
317 */
324 */
318 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
325 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
319 size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
326 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
320 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
327 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2);
321 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
328 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2);
322
329
@@ -57,11 +57,23 b' MEM_STATIC void MEM_check(void) { MEM_ST'
57 typedef uint64_t U64;
57 typedef uint64_t U64;
58 typedef int64_t S64;
58 typedef int64_t S64;
59 #else
59 #else
60 # include <limits.h>
61 #if CHAR_BIT != 8
62 # error "this implementation requires char to be exactly 8-bit type"
63 #endif
60 typedef unsigned char BYTE;
64 typedef unsigned char BYTE;
65 #if USHRT_MAX != 65535
66 # error "this implementation requires short to be exactly 16-bit type"
67 #endif
61 typedef unsigned short U16;
68 typedef unsigned short U16;
62 typedef signed short S16;
69 typedef signed short S16;
70 #if UINT_MAX != 4294967295
71 # error "this implementation requires int to be exactly 32-bit type"
72 #endif
63 typedef unsigned int U32;
73 typedef unsigned int U32;
64 typedef signed int S32;
74 typedef signed int S32;
75 /* note : there are no limits defined for long long type in C90.
76 * limits exist in C99, however, in such case, <stdint.h> is preferred */
65 typedef unsigned long long U64;
77 typedef unsigned long long U64;
66 typedef signed long long S64;
78 typedef signed long long S64;
67 #endif
79 #endif
@@ -10,9 +10,10 b''
10
10
11
11
12 /* ====== Dependencies ======= */
12 /* ====== Dependencies ======= */
13 #include <stddef.h> /* size_t */
13 #include <stddef.h> /* size_t */
14 #include "debug.h" /* assert */
15 #include "zstd_internal.h" /* ZSTD_malloc, ZSTD_free */
14 #include "pool.h"
16 #include "pool.h"
15 #include "zstd_internal.h" /* ZSTD_malloc, ZSTD_free */
16
17
17 /* ====== Compiler specifics ====== */
18 /* ====== Compiler specifics ====== */
18 #if defined(_MSC_VER)
19 #if defined(_MSC_VER)
@@ -33,8 +34,9 b' typedef struct POOL_job_s {'
33 struct POOL_ctx_s {
34 struct POOL_ctx_s {
34 ZSTD_customMem customMem;
35 ZSTD_customMem customMem;
35 /* Keep track of the threads */
36 /* Keep track of the threads */
36 ZSTD_pthread_t *threads;
37 ZSTD_pthread_t* threads;
37 size_t numThreads;
38 size_t threadCapacity;
39 size_t threadLimit;
38
40
39 /* The queue is a circular buffer */
41 /* The queue is a circular buffer */
40 POOL_job *queue;
42 POOL_job *queue;
@@ -58,10 +60,10 b' struct POOL_ctx_s {'
58 };
60 };
59
61
60 /* POOL_thread() :
62 /* POOL_thread() :
61 Work thread for the thread pool.
63 * Work thread for the thread pool.
62 Waits for jobs and executes them.
64 * Waits for jobs and executes them.
63 @returns : NULL on failure else non-null.
65 * @returns : NULL on failure else non-null.
64 */
66 */
65 static void* POOL_thread(void* opaque) {
67 static void* POOL_thread(void* opaque) {
66 POOL_ctx* const ctx = (POOL_ctx*)opaque;
68 POOL_ctx* const ctx = (POOL_ctx*)opaque;
67 if (!ctx) { return NULL; }
69 if (!ctx) { return NULL; }
@@ -69,14 +71,17 b' static void* POOL_thread(void* opaque) {'
69 /* Lock the mutex and wait for a non-empty queue or until shutdown */
71 /* Lock the mutex and wait for a non-empty queue or until shutdown */
70 ZSTD_pthread_mutex_lock(&ctx->queueMutex);
72 ZSTD_pthread_mutex_lock(&ctx->queueMutex);
71
73
72 while (ctx->queueEmpty && !ctx->shutdown) {
74 while ( ctx->queueEmpty
75 || (ctx->numThreadsBusy >= ctx->threadLimit) ) {
76 if (ctx->shutdown) {
77 /* even if !queueEmpty, (possible if numThreadsBusy >= threadLimit),
78 * a few threads will be shutdown while !queueEmpty,
79 * but enough threads will remain active to finish the queue */
80 ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
81 return opaque;
82 }
73 ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
83 ZSTD_pthread_cond_wait(&ctx->queuePopCond, &ctx->queueMutex);
74 }
84 }
75 /* empty => shutting down: so stop */
76 if (ctx->queueEmpty) {
77 ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
78 return opaque;
79 }
80 /* Pop a job off the queue */
85 /* Pop a job off the queue */
81 { POOL_job const job = ctx->queue[ctx->queueHead];
86 { POOL_job const job = ctx->queue[ctx->queueHead];
82 ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
87 ctx->queueHead = (ctx->queueHead + 1) % ctx->queueSize;
@@ -89,30 +94,32 b' static void* POOL_thread(void* opaque) {'
89 job.function(job.opaque);
94 job.function(job.opaque);
90
95
91 /* If the intended queue size was 0, signal after finishing job */
96 /* If the intended queue size was 0, signal after finishing job */
97 ZSTD_pthread_mutex_lock(&ctx->queueMutex);
98 ctx->numThreadsBusy--;
92 if (ctx->queueSize == 1) {
99 if (ctx->queueSize == 1) {
93 ZSTD_pthread_mutex_lock(&ctx->queueMutex);
94 ctx->numThreadsBusy--;
95 ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
96 ZSTD_pthread_cond_signal(&ctx->queuePushCond);
100 ZSTD_pthread_cond_signal(&ctx->queuePushCond);
97 } }
101 }
102 ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
103 }
98 } /* for (;;) */
104 } /* for (;;) */
99 /* Unreachable */
105 assert(0); /* Unreachable */
100 }
106 }
101
107
102 POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
108 POOL_ctx* POOL_create(size_t numThreads, size_t queueSize) {
103 return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
109 return POOL_create_advanced(numThreads, queueSize, ZSTD_defaultCMem);
104 }
110 }
105
111
106 POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem) {
112 POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
113 ZSTD_customMem customMem) {
107 POOL_ctx* ctx;
114 POOL_ctx* ctx;
108 /* Check the parameters */
115 /* Check parameters */
109 if (!numThreads) { return NULL; }
116 if (!numThreads) { return NULL; }
110 /* Allocate the context and zero initialize */
117 /* Allocate the context and zero initialize */
111 ctx = (POOL_ctx*)ZSTD_calloc(sizeof(POOL_ctx), customMem);
118 ctx = (POOL_ctx*)ZSTD_calloc(sizeof(POOL_ctx), customMem);
112 if (!ctx) { return NULL; }
119 if (!ctx) { return NULL; }
113 /* Initialize the job queue.
120 /* Initialize the job queue.
114 * It needs one extra space since one space is wasted to differentiate empty
121 * It needs one extra space since one space is wasted to differentiate
115 * and full queues.
122 * empty and full queues.
116 */
123 */
117 ctx->queueSize = queueSize + 1;
124 ctx->queueSize = queueSize + 1;
118 ctx->queue = (POOL_job*)ZSTD_malloc(ctx->queueSize * sizeof(POOL_job), customMem);
125 ctx->queue = (POOL_job*)ZSTD_malloc(ctx->queueSize * sizeof(POOL_job), customMem);
@@ -126,7 +133,7 b' POOL_ctx* POOL_create_advanced(size_t nu'
126 ctx->shutdown = 0;
133 ctx->shutdown = 0;
127 /* Allocate space for the thread handles */
134 /* Allocate space for the thread handles */
128 ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
135 ctx->threads = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), customMem);
129 ctx->numThreads = 0;
136 ctx->threadCapacity = 0;
130 ctx->customMem = customMem;
137 ctx->customMem = customMem;
131 /* Check for errors */
138 /* Check for errors */
132 if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; }
139 if (!ctx->threads || !ctx->queue) { POOL_free(ctx); return NULL; }
@@ -134,11 +141,12 b' POOL_ctx* POOL_create_advanced(size_t nu'
134 { size_t i;
141 { size_t i;
135 for (i = 0; i < numThreads; ++i) {
142 for (i = 0; i < numThreads; ++i) {
136 if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) {
143 if (ZSTD_pthread_create(&ctx->threads[i], NULL, &POOL_thread, ctx)) {
137 ctx->numThreads = i;
144 ctx->threadCapacity = i;
138 POOL_free(ctx);
145 POOL_free(ctx);
139 return NULL;
146 return NULL;
140 } }
147 } }
141 ctx->numThreads = numThreads;
148 ctx->threadCapacity = numThreads;
149 ctx->threadLimit = numThreads;
142 }
150 }
143 return ctx;
151 return ctx;
144 }
152 }
@@ -156,8 +164,8 b' static void POOL_join(POOL_ctx* ctx) {'
156 ZSTD_pthread_cond_broadcast(&ctx->queuePopCond);
164 ZSTD_pthread_cond_broadcast(&ctx->queuePopCond);
157 /* Join all of the threads */
165 /* Join all of the threads */
158 { size_t i;
166 { size_t i;
159 for (i = 0; i < ctx->numThreads; ++i) {
167 for (i = 0; i < ctx->threadCapacity; ++i) {
160 ZSTD_pthread_join(ctx->threads[i], NULL);
168 ZSTD_pthread_join(ctx->threads[i], NULL); /* note : could fail */
161 } }
169 } }
162 }
170 }
163
171
@@ -172,24 +180,68 b' void POOL_free(POOL_ctx *ctx) {'
172 ZSTD_free(ctx, ctx->customMem);
180 ZSTD_free(ctx, ctx->customMem);
173 }
181 }
174
182
183
184
175 size_t POOL_sizeof(POOL_ctx *ctx) {
185 size_t POOL_sizeof(POOL_ctx *ctx) {
176 if (ctx==NULL) return 0; /* supports sizeof NULL */
186 if (ctx==NULL) return 0; /* supports sizeof NULL */
177 return sizeof(*ctx)
187 return sizeof(*ctx)
178 + ctx->queueSize * sizeof(POOL_job)
188 + ctx->queueSize * sizeof(POOL_job)
179 + ctx->numThreads * sizeof(ZSTD_pthread_t);
189 + ctx->threadCapacity * sizeof(ZSTD_pthread_t);
190 }
191
192
193 /* @return : 0 on success, 1 on error */
194 static int POOL_resize_internal(POOL_ctx* ctx, size_t numThreads)
195 {
196 if (numThreads <= ctx->threadCapacity) {
197 if (!numThreads) return 1;
198 ctx->threadLimit = numThreads;
199 return 0;
200 }
201 /* numThreads > threadCapacity */
202 { ZSTD_pthread_t* const threadPool = (ZSTD_pthread_t*)ZSTD_malloc(numThreads * sizeof(ZSTD_pthread_t), ctx->customMem);
203 if (!threadPool) return 1;
204 /* replace existing thread pool */
205 memcpy(threadPool, ctx->threads, ctx->threadCapacity * sizeof(*threadPool));
206 ZSTD_free(ctx->threads, ctx->customMem);
207 ctx->threads = threadPool;
208 /* Initialize additional threads */
209 { size_t threadId;
210 for (threadId = ctx->threadCapacity; threadId < numThreads; ++threadId) {
211 if (ZSTD_pthread_create(&threadPool[threadId], NULL, &POOL_thread, ctx)) {
212 ctx->threadCapacity = threadId;
213 return 1;
214 } }
215 } }
216 /* successfully expanded */
217 ctx->threadCapacity = numThreads;
218 ctx->threadLimit = numThreads;
219 return 0;
220 }
221
222 /* @return : 0 on success, 1 on error */
223 int POOL_resize(POOL_ctx* ctx, size_t numThreads)
224 {
225 int result;
226 if (ctx==NULL) return 1;
227 ZSTD_pthread_mutex_lock(&ctx->queueMutex);
228 result = POOL_resize_internal(ctx, numThreads);
229 ZSTD_pthread_cond_broadcast(&ctx->queuePopCond);
230 ZSTD_pthread_mutex_unlock(&ctx->queueMutex);
231 return result;
180 }
232 }
181
233
182 /**
234 /**
183 * Returns 1 if the queue is full and 0 otherwise.
235 * Returns 1 if the queue is full and 0 otherwise.
184 *
236 *
185 * If the queueSize is 1 (the pool was created with an intended queueSize of 0),
237 * When queueSize is 1 (pool was created with an intended queueSize of 0),
186 * then a queue is empty if there is a thread free and no job is waiting.
238 * then a queue is empty if there is a thread free _and_ no job is waiting.
187 */
239 */
188 static int isQueueFull(POOL_ctx const* ctx) {
240 static int isQueueFull(POOL_ctx const* ctx) {
189 if (ctx->queueSize > 1) {
241 if (ctx->queueSize > 1) {
190 return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize);
242 return ctx->queueHead == ((ctx->queueTail + 1) % ctx->queueSize);
191 } else {
243 } else {
192 return ctx->numThreadsBusy == ctx->numThreads ||
244 return (ctx->numThreadsBusy == ctx->threadLimit) ||
193 !ctx->queueEmpty;
245 !ctx->queueEmpty;
194 }
246 }
195 }
247 }
@@ -263,6 +315,11 b' void POOL_free(POOL_ctx* ctx) {'
263 (void)ctx;
315 (void)ctx;
264 }
316 }
265
317
318 int POOL_resize(POOL_ctx* ctx, size_t numThreads) {
319 (void)ctx; (void)numThreads;
320 return 0;
321 }
322
266 void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) {
323 void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque) {
267 (void)ctx;
324 (void)ctx;
268 function(opaque);
325 function(opaque);
@@ -30,40 +30,50 b' typedef struct POOL_ctx_s POOL_ctx;'
30 */
30 */
31 POOL_ctx* POOL_create(size_t numThreads, size_t queueSize);
31 POOL_ctx* POOL_create(size_t numThreads, size_t queueSize);
32
32
33 POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize, ZSTD_customMem customMem);
33 POOL_ctx* POOL_create_advanced(size_t numThreads, size_t queueSize,
34 ZSTD_customMem customMem);
34
35
35 /*! POOL_free() :
36 /*! POOL_free() :
36 Free a thread pool returned by POOL_create().
37 * Free a thread pool returned by POOL_create().
37 */
38 */
38 void POOL_free(POOL_ctx* ctx);
39 void POOL_free(POOL_ctx* ctx);
39
40
41 /*! POOL_resize() :
42 * Expands or shrinks pool's number of threads.
43 * This is more efficient than releasing + creating a new context,
44 * since it tries to preserve and re-use existing threads.
45 * `numThreads` must be at least 1.
46 * @return : 0 when resize was successful,
47 * !0 (typically 1) if there is an error.
48 * note : only numThreads can be resized, queueSize remains unchanged.
49 */
50 int POOL_resize(POOL_ctx* ctx, size_t numThreads);
51
40 /*! POOL_sizeof() :
52 /*! POOL_sizeof() :
41 return memory usage of pool returned by POOL_create().
53 * @return threadpool memory usage
42 */
54 * note : compatible with NULL (returns 0 in this case)
55 */
43 size_t POOL_sizeof(POOL_ctx* ctx);
56 size_t POOL_sizeof(POOL_ctx* ctx);
44
57
45 /*! POOL_function :
58 /*! POOL_function :
46 The function type that can be added to a thread pool.
59 * The function type that can be added to a thread pool.
47 */
60 */
48 typedef void (*POOL_function)(void*);
61 typedef void (*POOL_function)(void*);
49 /*! POOL_add_function :
50 The function type for a generic thread pool add function.
51 */
52 typedef void (*POOL_add_function)(void*, POOL_function, void*);
53
62
54 /*! POOL_add() :
63 /*! POOL_add() :
55 Add the job `function(opaque)` to the thread pool. `ctx` must be valid.
64 * Add the job `function(opaque)` to the thread pool. `ctx` must be valid.
56 Possibly blocks until there is room in the queue.
65 * Possibly blocks until there is room in the queue.
57 Note : The function may be executed asynchronously, so `opaque` must live until the function has been completed.
66 * Note : The function may be executed asynchronously,
58 */
67 * therefore, `opaque` must live until function has been completed.
68 */
59 void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque);
69 void POOL_add(POOL_ctx* ctx, POOL_function function, void* opaque);
60
70
61
71
62 /*! POOL_tryAdd() :
72 /*! POOL_tryAdd() :
63 Add the job `function(opaque)` to the thread pool if a worker is available.
73 * Add the job `function(opaque)` to thread pool _if_ a worker is available.
64 return immediately otherwise.
74 * Returns immediately even if not (does not block).
65 @return : 1 if successful, 0 if not.
75 * @return : 1 if successful, 0 if not.
66 */
76 */
67 int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque);
77 int POOL_tryAdd(POOL_ctx* ctx, POOL_function function, void* opaque);
68
78
69
79
@@ -98,6 +98,7 b''
98 /* Modify the local functions below should you wish to use some other memory routines */
98 /* Modify the local functions below should you wish to use some other memory routines */
99 /* for malloc(), free() */
99 /* for malloc(), free() */
100 #include <stdlib.h>
100 #include <stdlib.h>
101 #include <stddef.h> /* size_t */
101 static void* XXH_malloc(size_t s) { return malloc(s); }
102 static void* XXH_malloc(size_t s) { return malloc(s); }
102 static void XXH_free (void* p) { free(p); }
103 static void XXH_free (void* p) { free(p); }
103 /* for memcpy() */
104 /* for memcpy() */
@@ -46,11 +46,6 b' ZSTD_ErrorCode ZSTD_getErrorCode(size_t '
46 * provides error code string from enum */
46 * provides error code string from enum */
47 const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
47 const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorString(code); }
48
48
49 /*! g_debuglog_enable :
50 * turn on/off debug traces (global switch) */
51 #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 2)
52 int g_debuglog_enable = 1;
53 #endif
54
49
55
50
56 /*=**************************************************************
51 /*=**************************************************************
@@ -21,6 +21,7 b''
21 ***************************************/
21 ***************************************/
22 #include "compiler.h"
22 #include "compiler.h"
23 #include "mem.h"
23 #include "mem.h"
24 #include "debug.h" /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
24 #include "error_private.h"
25 #include "error_private.h"
25 #define ZSTD_STATIC_LINKING_ONLY
26 #define ZSTD_STATIC_LINKING_ONLY
26 #include "zstd.h"
27 #include "zstd.h"
@@ -38,43 +39,8 b''
38 extern "C" {
39 extern "C" {
39 #endif
40 #endif
40
41
41
42 /* ---- static assert (debug) --- */
42 /*-*************************************
43 #define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
43 * Debug
44 ***************************************/
45 #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
46 # include <assert.h>
47 #else
48 # ifndef assert
49 # define assert(condition) ((void)0)
50 # endif
51 #endif
52
53 #define ZSTD_STATIC_ASSERT(c) { enum { ZSTD_static_assert = 1/(int)(!!(c)) }; }
54
55 #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
56 # include <stdio.h>
57 extern int g_debuglog_enable;
58 /* recommended values for ZSTD_DEBUG display levels :
59 * 1 : no display, enables assert() only
60 * 2 : reserved for currently active debug path
61 * 3 : events once per object lifetime (CCtx, CDict, etc.)
62 * 4 : events once per frame
63 * 5 : events once per block
64 * 6 : events once per sequence (*very* verbose) */
65 # define RAWLOG(l, ...) { \
66 if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \
67 fprintf(stderr, __VA_ARGS__); \
68 } }
69 # define DEBUGLOG(l, ...) { \
70 if ((g_debuglog_enable) & (l<=ZSTD_DEBUG)) { \
71 fprintf(stderr, __FILE__ ": " __VA_ARGS__); \
72 fprintf(stderr, " \n"); \
73 } }
74 #else
75 # define RAWLOG(l, ...) {} /* disabled */
76 # define DEBUGLOG(l, ...) {} /* disabled */
77 #endif
78
44
79
45
80 /*-*************************************
46 /*-*************************************
@@ -113,8 +79,7 b' static const U32 repStartValue[ZSTD_REP_'
113 static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
79 static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
114 static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
80 static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
115
81
116 #define ZSTD_FRAMEIDSIZE 4
82 #define ZSTD_FRAMEIDSIZE 4 /* magic number size */
117 static const size_t ZSTD_frameIdSize = ZSTD_FRAMEIDSIZE; /* magic number size */
118
83
119 #define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
84 #define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
120 static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
85 static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
@@ -227,6 +192,8 b' typedef struct {'
227 BYTE* llCode;
192 BYTE* llCode;
228 BYTE* mlCode;
193 BYTE* mlCode;
229 BYTE* ofCode;
194 BYTE* ofCode;
195 size_t maxNbSeq;
196 size_t maxNbLit;
230 U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
197 U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
231 U32 longLengthPos;
198 U32 longLengthPos;
232 } seqStore_t;
199 } seqStore_t;
@@ -1,6 +1,6 b''
1 /* ******************************************************************
1 /* ******************************************************************
2 FSE : Finite State Entropy encoder
2 FSE : Finite State Entropy encoder
3 Copyright (C) 2013-2015, Yann Collet.
3 Copyright (C) 2013-present, Yann Collet.
4
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
6
@@ -37,9 +37,11 b''
37 ****************************************************************/
37 ****************************************************************/
38 #include <stdlib.h> /* malloc, free, qsort */
38 #include <stdlib.h> /* malloc, free, qsort */
39 #include <string.h> /* memcpy, memset */
39 #include <string.h> /* memcpy, memset */
40 #include <stdio.h> /* printf (debug) */
40 #include "compiler.h"
41 #include "mem.h" /* U32, U16, etc. */
42 #include "debug.h" /* assert, DEBUGLOG */
43 #include "hist.h" /* HIST_count_wksp */
41 #include "bitstream.h"
44 #include "bitstream.h"
42 #include "compiler.h"
43 #define FSE_STATIC_LINKING_ONLY
45 #define FSE_STATIC_LINKING_ONLY
44 #include "fse.h"
46 #include "fse.h"
45 #include "error_private.h"
47 #include "error_private.h"
@@ -49,7 +51,6 b''
49 * Error Management
51 * Error Management
50 ****************************************************************/
52 ****************************************************************/
51 #define FSE_isError ERR_isError
53 #define FSE_isError ERR_isError
52 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
53
54
54
55
55 /* **************************************************************
56 /* **************************************************************
@@ -82,7 +83,9 b''
82 * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
83 * wkspSize should be sized to handle worst case situation, which is `1<<max_tableLog * sizeof(FSE_FUNCTION_TYPE)`
83 * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
84 * workSpace must also be properly aligned with FSE_FUNCTION_TYPE requirements
84 */
85 */
85 size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize)
86 size_t FSE_buildCTable_wksp(FSE_CTable* ct,
87 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
88 void* workSpace, size_t wkspSize)
86 {
89 {
87 U32 const tableSize = 1 << tableLog;
90 U32 const tableSize = 1 << tableLog;
88 U32 const tableMask = tableSize - 1;
91 U32 const tableMask = tableSize - 1;
@@ -100,9 +103,14 b' size_t FSE_buildCTable_wksp(FSE_CTable* '
100 if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
103 if (((size_t)1 << tableLog) * sizeof(FSE_FUNCTION_TYPE) > wkspSize) return ERROR(tableLog_tooLarge);
101 tableU16[-2] = (U16) tableLog;
104 tableU16[-2] = (U16) tableLog;
102 tableU16[-1] = (U16) maxSymbolValue;
105 tableU16[-1] = (U16) maxSymbolValue;
106 assert(tableLog < 16); /* required for threshold strategy to work */
103
107
104 /* For explanations on how to distribute symbol values over the table :
108 /* For explanations on how to distribute symbol values over the table :
105 * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
109 * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
110
111 #ifdef __clang_analyzer__
112 memset(tableSymbol, 0, sizeof(*tableSymbol) * tableSize); /* useless initialization, just to keep scan-build happy */
113 #endif
106
114
107 /* symbol start positions */
115 /* symbol start positions */
108 { U32 u;
116 { U32 u;
@@ -122,13 +130,15 b' size_t FSE_buildCTable_wksp(FSE_CTable* '
122 U32 symbol;
130 U32 symbol;
123 for (symbol=0; symbol<=maxSymbolValue; symbol++) {
131 for (symbol=0; symbol<=maxSymbolValue; symbol++) {
124 int nbOccurences;
132 int nbOccurences;
125 for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++) {
133 int const freq = normalizedCounter[symbol];
134 for (nbOccurences=0; nbOccurences<freq; nbOccurences++) {
126 tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
135 tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
127 position = (position + step) & tableMask;
136 position = (position + step) & tableMask;
128 while (position > highThreshold) position = (position + step) & tableMask; /* Low proba area */
137 while (position > highThreshold)
138 position = (position + step) & tableMask; /* Low proba area */
129 } }
139 } }
130
140
131 if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */
141 assert(position==0); /* Must have initialized all positions */
132 }
142 }
133
143
134 /* Build table */
144 /* Build table */
@@ -143,7 +153,10 b' size_t FSE_buildCTable_wksp(FSE_CTable* '
143 for (s=0; s<=maxSymbolValue; s++) {
153 for (s=0; s<=maxSymbolValue; s++) {
144 switch (normalizedCounter[s])
154 switch (normalizedCounter[s])
145 {
155 {
146 case 0: break;
156 case 0:
157 /* filling nonetheless, for compatibility with FSE_getMaxNbBits() */
158 symbolTT[s].deltaNbBits = ((tableLog+1) << 16) - (1<<tableLog);
159 break;
147
160
148 case -1:
161 case -1:
149 case 1:
162 case 1:
@@ -160,6 +173,18 b' size_t FSE_buildCTable_wksp(FSE_CTable* '
160 total += normalizedCounter[s];
173 total += normalizedCounter[s];
161 } } } }
174 } } } }
162
175
176 #if 0 /* debug : symbol costs */
177 DEBUGLOG(5, "\n --- table statistics : ");
178 { U32 symbol;
179 for (symbol=0; symbol<=maxSymbolValue; symbol++) {
180 DEBUGLOG(5, "%3u: w=%3i, maxBits=%u, fracBits=%.2f",
181 symbol, normalizedCounter[symbol],
182 FSE_getMaxNbBits(symbolTT, symbol),
183 (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
184 }
185 }
186 #endif
187
163 return 0;
188 return 0;
164 }
189 }
165
190
@@ -174,8 +199,9 b' size_t FSE_buildCTable(FSE_CTable* ct, c'
174
199
175 #ifndef FSE_COMMONDEFS_ONLY
200 #ifndef FSE_COMMONDEFS_ONLY
176
201
202
177 /*-**************************************************************
203 /*-**************************************************************
178 * FSE NCount encoding-decoding
204 * FSE NCount encoding
179 ****************************************************************/
205 ****************************************************************/
180 size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
206 size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
181 {
207 {
@@ -183,9 +209,10 b' size_t FSE_NCountWriteBound(unsigned max'
183 return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
209 return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
184 }
210 }
185
211
186 static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
212 static size_t
187 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
213 FSE_writeNCount_generic (void* header, size_t headerBufferSize,
188 unsigned writeIsSafe)
214 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
215 unsigned writeIsSafe)
189 {
216 {
190 BYTE* const ostart = (BYTE*) header;
217 BYTE* const ostart = (BYTE*) header;
191 BYTE* out = ostart;
218 BYTE* out = ostart;
@@ -194,13 +221,12 b' static size_t FSE_writeNCount_generic (v'
194 const int tableSize = 1 << tableLog;
221 const int tableSize = 1 << tableLog;
195 int remaining;
222 int remaining;
196 int threshold;
223 int threshold;
197 U32 bitStream;
224 U32 bitStream = 0;
198 int bitCount;
225 int bitCount = 0;
199 unsigned charnum = 0;
226 unsigned symbol = 0;
200 int previous0 = 0;
227 unsigned const alphabetSize = maxSymbolValue + 1;
228 int previousIs0 = 0;
201
229
202 bitStream = 0;
203 bitCount = 0;
204 /* Table Size */
230 /* Table Size */
205 bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
231 bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
206 bitCount += 4;
232 bitCount += 4;
@@ -210,48 +236,53 b' static size_t FSE_writeNCount_generic (v'
210 threshold = tableSize;
236 threshold = tableSize;
211 nbBits = tableLog+1;
237 nbBits = tableLog+1;
212
238
213 while (remaining>1) { /* stops at 1 */
239 while ((symbol < alphabetSize) && (remaining>1)) { /* stops at 1 */
214 if (previous0) {
240 if (previousIs0) {
215 unsigned start = charnum;
241 unsigned start = symbol;
216 while (!normalizedCounter[charnum]) charnum++;
242 while ((symbol < alphabetSize) && !normalizedCounter[symbol]) symbol++;
217 while (charnum >= start+24) {
243 if (symbol == alphabetSize) break; /* incorrect distribution */
244 while (symbol >= start+24) {
218 start+=24;
245 start+=24;
219 bitStream += 0xFFFFU << bitCount;
246 bitStream += 0xFFFFU << bitCount;
220 if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
247 if ((!writeIsSafe) && (out > oend-2))
248 return ERROR(dstSize_tooSmall); /* Buffer overflow */
221 out[0] = (BYTE) bitStream;
249 out[0] = (BYTE) bitStream;
222 out[1] = (BYTE)(bitStream>>8);
250 out[1] = (BYTE)(bitStream>>8);
223 out+=2;
251 out+=2;
224 bitStream>>=16;
252 bitStream>>=16;
225 }
253 }
226 while (charnum >= start+3) {
254 while (symbol >= start+3) {
227 start+=3;
255 start+=3;
228 bitStream += 3 << bitCount;
256 bitStream += 3 << bitCount;
229 bitCount += 2;
257 bitCount += 2;
230 }
258 }
231 bitStream += (charnum-start) << bitCount;
259 bitStream += (symbol-start) << bitCount;
232 bitCount += 2;
260 bitCount += 2;
233 if (bitCount>16) {
261 if (bitCount>16) {
234 if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
262 if ((!writeIsSafe) && (out > oend - 2))
263 return ERROR(dstSize_tooSmall); /* Buffer overflow */
235 out[0] = (BYTE)bitStream;
264 out[0] = (BYTE)bitStream;
236 out[1] = (BYTE)(bitStream>>8);
265 out[1] = (BYTE)(bitStream>>8);
237 out += 2;
266 out += 2;
238 bitStream >>= 16;
267 bitStream >>= 16;
239 bitCount -= 16;
268 bitCount -= 16;
240 } }
269 } }
241 { int count = normalizedCounter[charnum++];
270 { int count = normalizedCounter[symbol++];
242 int const max = (2*threshold-1)-remaining;
271 int const max = (2*threshold-1) - remaining;
243 remaining -= count < 0 ? -count : count;
272 remaining -= count < 0 ? -count : count;
244 count++; /* +1 for extra accuracy */
273 count++; /* +1 for extra accuracy */
245 if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
274 if (count>=threshold)
275 count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
246 bitStream += count << bitCount;
276 bitStream += count << bitCount;
247 bitCount += nbBits;
277 bitCount += nbBits;
248 bitCount -= (count<max);
278 bitCount -= (count<max);
249 previous0 = (count==1);
279 previousIs0 = (count==1);
250 if (remaining<1) return ERROR(GENERIC);
280 if (remaining<1) return ERROR(GENERIC);
251 while (remaining<threshold) { nbBits--; threshold>>=1; }
281 while (remaining<threshold) { nbBits--; threshold>>=1; }
252 }
282 }
253 if (bitCount>16) {
283 if (bitCount>16) {
254 if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
284 if ((!writeIsSafe) && (out > oend - 2))
285 return ERROR(dstSize_tooSmall); /* Buffer overflow */
255 out[0] = (BYTE)bitStream;
286 out[0] = (BYTE)bitStream;
256 out[1] = (BYTE)(bitStream>>8);
287 out[1] = (BYTE)(bitStream>>8);
257 out += 2;
288 out += 2;
@@ -259,19 +290,23 b' static size_t FSE_writeNCount_generic (v'
259 bitCount -= 16;
290 bitCount -= 16;
260 } }
291 } }
261
292
293 if (remaining != 1)
294 return ERROR(GENERIC); /* incorrect normalized distribution */
295 assert(symbol <= alphabetSize);
296
262 /* flush remaining bitStream */
297 /* flush remaining bitStream */
263 if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
298 if ((!writeIsSafe) && (out > oend - 2))
299 return ERROR(dstSize_tooSmall); /* Buffer overflow */
264 out[0] = (BYTE)bitStream;
300 out[0] = (BYTE)bitStream;
265 out[1] = (BYTE)(bitStream>>8);
301 out[1] = (BYTE)(bitStream>>8);
266 out+= (bitCount+7) /8;
302 out+= (bitCount+7) /8;
267
303
268 if (charnum > maxSymbolValue + 1) return ERROR(GENERIC);
269
270 return (out-ostart);
304 return (out-ostart);
271 }
305 }
272
306
273
307
274 size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
308 size_t FSE_writeNCount (void* buffer, size_t bufferSize,
309 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
275 {
310 {
276 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */
311 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported */
277 if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */
312 if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */
@@ -279,179 +314,13 b' size_t FSE_writeNCount (void* buffer, si'
279 if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
314 if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
280 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
315 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
281
316
282 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
317 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1 /* write in buffer is safe */);
283 }
284
285
286
287 /*-**************************************************************
288 * Counting histogram
289 ****************************************************************/
290 /*! FSE_count_simple
291 This function counts byte values within `src`, and store the histogram into table `count`.
292 It doesn't use any additional memory.
293 But this function is unsafe : it doesn't check that all values within `src` can fit into `count`.
294 For this reason, prefer using a table `count` with 256 elements.
295 @return : count of most numerous element.
296 */
297 size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
298 const void* src, size_t srcSize)
299 {
300 const BYTE* ip = (const BYTE*)src;
301 const BYTE* const end = ip + srcSize;
302 unsigned maxSymbolValue = *maxSymbolValuePtr;
303 unsigned max=0;
304
305 memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
306 if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
307
308 while (ip<end) {
309 assert(*ip <= maxSymbolValue);
310 count[*ip++]++;
311 }
312
313 while (!count[maxSymbolValue]) maxSymbolValue--;
314 *maxSymbolValuePtr = maxSymbolValue;
315
316 { U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; }
317
318 return (size_t)max;
319 }
318 }
320
319
321
320
322 /* FSE_count_parallel_wksp() :
323 * Same as FSE_count_parallel(), but using an externally provided scratch buffer.
324 * `workSpace` size must be a minimum of `1024 * sizeof(unsigned)`.
325 * @return : largest histogram frequency, or an error code (notably when histogram would be larger than *maxSymbolValuePtr). */
326 static size_t FSE_count_parallel_wksp(
327 unsigned* count, unsigned* maxSymbolValuePtr,
328 const void* source, size_t sourceSize,
329 unsigned checkMax, unsigned* const workSpace)
330 {
331 const BYTE* ip = (const BYTE*)source;
332 const BYTE* const iend = ip+sourceSize;
333 unsigned maxSymbolValue = *maxSymbolValuePtr;
334 unsigned max=0;
335 U32* const Counting1 = workSpace;
336 U32* const Counting2 = Counting1 + 256;
337 U32* const Counting3 = Counting2 + 256;
338 U32* const Counting4 = Counting3 + 256;
339
340 memset(workSpace, 0, 4*256*sizeof(unsigned));
341
342 /* safety checks */
343 if (!sourceSize) {
344 memset(count, 0, maxSymbolValue + 1);
345 *maxSymbolValuePtr = 0;
346 return 0;
347 }
348 if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
349
350 /* by stripes of 16 bytes */
351 { U32 cached = MEM_read32(ip); ip += 4;
352 while (ip < iend-15) {
353 U32 c = cached; cached = MEM_read32(ip); ip += 4;
354 Counting1[(BYTE) c ]++;
355 Counting2[(BYTE)(c>>8) ]++;
356 Counting3[(BYTE)(c>>16)]++;
357 Counting4[ c>>24 ]++;
358 c = cached; cached = MEM_read32(ip); ip += 4;
359 Counting1[(BYTE) c ]++;
360 Counting2[(BYTE)(c>>8) ]++;
361 Counting3[(BYTE)(c>>16)]++;
362 Counting4[ c>>24 ]++;
363 c = cached; cached = MEM_read32(ip); ip += 4;
364 Counting1[(BYTE) c ]++;
365 Counting2[(BYTE)(c>>8) ]++;
366 Counting3[(BYTE)(c>>16)]++;
367 Counting4[ c>>24 ]++;
368 c = cached; cached = MEM_read32(ip); ip += 4;
369 Counting1[(BYTE) c ]++;
370 Counting2[(BYTE)(c>>8) ]++;
371 Counting3[(BYTE)(c>>16)]++;
372 Counting4[ c>>24 ]++;
373 }
374 ip-=4;
375 }
376
377 /* finish last symbols */
378 while (ip<iend) Counting1[*ip++]++;
379
380 if (checkMax) { /* verify stats will fit into destination table */
381 U32 s; for (s=255; s>maxSymbolValue; s--) {
382 Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
383 if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
384 } }
385
386 { U32 s;
387 if (maxSymbolValue > 255) maxSymbolValue = 255;
388 for (s=0; s<=maxSymbolValue; s++) {
389 count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
390 if (count[s] > max) max = count[s];
391 } }
392
393 while (!count[maxSymbolValue]) maxSymbolValue--;
394 *maxSymbolValuePtr = maxSymbolValue;
395 return (size_t)max;
396 }
397
398 /* FSE_countFast_wksp() :
399 * Same as FSE_countFast(), but using an externally provided scratch buffer.
400 * `workSpace` size must be table of >= `1024` unsigned */
401 size_t FSE_countFast_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
402 const void* source, size_t sourceSize,
403 unsigned* workSpace)
404 {
405 if (sourceSize < 1500) /* heuristic threshold */
406 return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
407 return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 0, workSpace);
408 }
409
410 /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
411 size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
412 const void* source, size_t sourceSize)
413 {
414 unsigned tmpCounters[1024];
415 return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, tmpCounters);
416 }
417
418 /* FSE_count_wksp() :
419 * Same as FSE_count(), but using an externally provided scratch buffer.
420 * `workSpace` size must be table of >= `1024` unsigned */
421 size_t FSE_count_wksp(unsigned* count, unsigned* maxSymbolValuePtr,
422 const void* source, size_t sourceSize, unsigned* workSpace)
423 {
424 if (*maxSymbolValuePtr < 255)
425 return FSE_count_parallel_wksp(count, maxSymbolValuePtr, source, sourceSize, 1, workSpace);
426 *maxSymbolValuePtr = 255;
427 return FSE_countFast_wksp(count, maxSymbolValuePtr, source, sourceSize, workSpace);
428 }
429
430 size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
431 const void* src, size_t srcSize)
432 {
433 unsigned tmpCounters[1024];
434 return FSE_count_wksp(count, maxSymbolValuePtr, src, srcSize, tmpCounters);
435 }
436
437
438
439 /*-**************************************************************
321 /*-**************************************************************
440 * FSE Compression Code
322 * FSE Compression Code
441 ****************************************************************/
323 ****************************************************************/
442 /*! FSE_sizeof_CTable() :
443 FSE_CTable is a variable size structure which contains :
444 `U16 tableLog;`
445 `U16 maxSymbolValue;`
446 `U16 nextStateNumber[1 << tableLog];` // This size is variable
447 `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable
448 Allocation is manual (C standard does not support variable-size structures).
449 */
450 size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
451 {
452 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
453 return FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
454 }
455
324
456 FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
325 FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
457 {
326 {
@@ -466,7 +335,7 b' void FSE_freeCTable (FSE_CTable* ct) { f'
466 /* provides the minimum logSize to safely represent a distribution */
335 /* provides the minimum logSize to safely represent a distribution */
467 static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
336 static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
468 {
337 {
469 U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
338 U32 minBitsSrc = BIT_highbit32((U32)(srcSize)) + 1;
470 U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
339 U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
471 U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
340 U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
472 assert(srcSize > 1); /* Not supported, RLE should be used instead */
341 assert(srcSize > 1); /* Not supported, RLE should be used instead */
@@ -529,6 +398,9 b' static size_t FSE_normalizeM2(short* nor'
529 }
398 }
530 ToDistribute = (1 << tableLog) - distributed;
399 ToDistribute = (1 << tableLog) - distributed;
531
400
401 if (ToDistribute == 0)
402 return 0;
403
532 if ((total / ToDistribute) > lowOne) {
404 if ((total / ToDistribute) > lowOne) {
533 /* risk of rounding to zero */
405 /* risk of rounding to zero */
534 lowOne = (U32)((total * 3) / (ToDistribute * 2));
406 lowOne = (U32)((total * 3) / (ToDistribute * 2));
@@ -629,11 +501,11 b' size_t FSE_normalizeCount (short* normal'
629 U32 s;
501 U32 s;
630 U32 nTotal = 0;
502 U32 nTotal = 0;
631 for (s=0; s<=maxSymbolValue; s++)
503 for (s=0; s<=maxSymbolValue; s++)
632 printf("%3i: %4i \n", s, normalizedCounter[s]);
504 RAWLOG(2, "%3i: %4i \n", s, normalizedCounter[s]);
633 for (s=0; s<=maxSymbolValue; s++)
505 for (s=0; s<=maxSymbolValue; s++)
634 nTotal += abs(normalizedCounter[s]);
506 nTotal += abs(normalizedCounter[s]);
635 if (nTotal != (1U<<tableLog))
507 if (nTotal != (1U<<tableLog))
636 printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
508 RAWLOG(2, "Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
637 getchar();
509 getchar();
638 }
510 }
639 #endif
511 #endif
@@ -800,7 +672,7 b' size_t FSE_compress_wksp (void* dst, siz'
800 if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
672 if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
801
673
802 /* Scan input and build symbol stats */
674 /* Scan input and build symbol stats */
803 { CHECK_V_F(maxCount, FSE_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) );
675 { CHECK_V_F(maxCount, HIST_count_wksp(count, &maxSymbolValue, src, srcSize, (unsigned*)scratchBuffer) );
804 if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
676 if (maxCount == srcSize) return 1; /* only a single symbol in src : rle */
805 if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
677 if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
806 if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
678 if (maxCount < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
@@ -835,7 +707,7 b' typedef struct {'
835 size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
707 size_t FSE_compress2 (void* dst, size_t dstCapacity, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
836 {
708 {
837 fseWkspMax_t scratchBuffer;
709 fseWkspMax_t scratchBuffer;
838 FSE_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
710 DEBUG_STATIC_ASSERT(sizeof(scratchBuffer) >= FSE_WKSP_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)); /* compilation failures here means scratchBuffer is not large enough */
839 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
711 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
840 return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
712 return FSE_compress_wksp(dst, dstCapacity, src, srcSize, maxSymbolValue, tableLog, &scratchBuffer, sizeof(scratchBuffer));
841 }
713 }
@@ -45,8 +45,9 b''
45 ****************************************************************/
45 ****************************************************************/
46 #include <string.h> /* memcpy, memset */
46 #include <string.h> /* memcpy, memset */
47 #include <stdio.h> /* printf (debug) */
47 #include <stdio.h> /* printf (debug) */
48 #include "compiler.h"
48 #include "bitstream.h"
49 #include "bitstream.h"
49 #include "compiler.h"
50 #include "hist.h"
50 #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
51 #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
51 #include "fse.h" /* header compression */
52 #include "fse.h" /* header compression */
52 #define HUF_STATIC_LINKING_ONLY
53 #define HUF_STATIC_LINKING_ONLY
@@ -58,7 +59,7 b''
58 * Error Management
59 * Error Management
59 ****************************************************************/
60 ****************************************************************/
60 #define HUF_isError ERR_isError
61 #define HUF_isError ERR_isError
61 #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
62 #define HUF_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c) /* use only *after* variable declarations */
62 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
63 #define CHECK_V_F(e, f) size_t const e = f; if (ERR_isError(e)) return e
63 #define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
64 #define CHECK_F(f) { CHECK_V_F(_var_err__, f); }
64
65
@@ -81,7 +82,7 b' unsigned HUF_optimalTableLog(unsigned ma'
81 * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
82 * Note : all elements within weightTable are supposed to be <= HUF_TABLELOG_MAX.
82 */
83 */
83 #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
84 #define MAX_FSE_TABLELOG_FOR_HUFF_HEADER 6
84 size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
85 static size_t HUF_compressWeights (void* dst, size_t dstSize, const void* weightTable, size_t wtSize)
85 {
86 {
86 BYTE* const ostart = (BYTE*) dst;
87 BYTE* const ostart = (BYTE*) dst;
87 BYTE* op = ostart;
88 BYTE* op = ostart;
@@ -100,9 +101,9 b' size_t HUF_compressWeights (void* dst, s'
100 if (wtSize <= 1) return 0; /* Not compressible */
101 if (wtSize <= 1) return 0; /* Not compressible */
101
102
102 /* Scan input and build symbol stats */
103 /* Scan input and build symbol stats */
103 { CHECK_V_F(maxCount, FSE_count_simple(count, &maxSymbolValue, weightTable, wtSize) );
104 { unsigned const maxCount = HIST_count_simple(count, &maxSymbolValue, weightTable, wtSize); /* never fails */
104 if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
105 if (maxCount == wtSize) return 1; /* only a single symbol in src : rle */
105 if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
106 if (maxCount == 1) return 0; /* each symbol present maximum once => not compressible */
106 }
107 }
107
108
108 tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
109 tableLog = FSE_optimalTableLog(tableLog, wtSize, maxSymbolValue);
@@ -216,6 +217,13 b' size_t HUF_readCTable (HUF_CElt* CTable,'
216 return readSize;
217 return readSize;
217 }
218 }
218
219
220 U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
221 {
222 const HUF_CElt* table = (const HUF_CElt*)symbolTable;
223 assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
224 return table[symbolValue].nbBits;
225 }
226
219
227
220 typedef struct nodeElt_s {
228 typedef struct nodeElt_s {
221 U32 count;
229 U32 count;
@@ -660,9 +668,9 b' static size_t HUF_compress_internal ('
660 }
668 }
661
669
662 /* Scan input and build symbol stats */
670 /* Scan input and build symbol stats */
663 { CHECK_V_F(largest, FSE_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) );
671 { CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->count) );
664 if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
672 if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
665 if (largest <= (srcSize >> 7)+1) return 0; /* heuristic : probably not compressible enough */
673 if (largest <= (srcSize >> 7)+4) return 0; /* heuristic : probably not compressible enough */
666 }
674 }
667
675
668 /* Check validity of previous table */
676 /* Check validity of previous table */
This diff has been collapsed as it changes many lines, (1413 lines changed) Show them Hide them
@@ -8,21 +8,13 b''
8 * You may select, at your option, one of the above-listed licenses.
8 * You may select, at your option, one of the above-listed licenses.
9 */
9 */
10
10
11
12 /*-*************************************
13 * Tuning parameters
14 ***************************************/
15 #ifndef ZSTD_CLEVEL_DEFAULT
16 # define ZSTD_CLEVEL_DEFAULT 3
17 #endif
18
19
20 /*-*************************************
11 /*-*************************************
21 * Dependencies
12 * Dependencies
22 ***************************************/
13 ***************************************/
23 #include <string.h> /* memset */
14 #include <string.h> /* memset */
24 #include "cpu.h"
15 #include "cpu.h"
25 #include "mem.h"
16 #include "mem.h"
17 #include "hist.h" /* HIST_countFast_wksp */
26 #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
18 #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
27 #include "fse.h"
19 #include "fse.h"
28 #define HUF_STATIC_LINKING_ONLY
20 #define HUF_STATIC_LINKING_ONLY
@@ -54,7 +46,6 b' struct ZSTD_CDict_s {'
54 size_t workspaceSize;
46 size_t workspaceSize;
55 ZSTD_matchState_t matchState;
47 ZSTD_matchState_t matchState;
56 ZSTD_compressedBlockState_t cBlockState;
48 ZSTD_compressedBlockState_t cBlockState;
57 ZSTD_compressionParameters cParams;
58 ZSTD_customMem customMem;
49 ZSTD_customMem customMem;
59 U32 dictID;
50 U32 dictID;
60 }; /* typedef'd to ZSTD_CDict within "zstd.h" */
51 }; /* typedef'd to ZSTD_CDict within "zstd.h" */
@@ -64,17 +55,26 b' ZSTD_CCtx* ZSTD_createCCtx(void)'
64 return ZSTD_createCCtx_advanced(ZSTD_defaultCMem);
55 return ZSTD_createCCtx_advanced(ZSTD_defaultCMem);
65 }
56 }
66
57
58 static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager)
59 {
60 assert(cctx != NULL);
61 memset(cctx, 0, sizeof(*cctx));
62 cctx->customMem = memManager;
63 cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
64 { size_t const err = ZSTD_CCtx_resetParameters(cctx);
65 assert(!ZSTD_isError(err));
66 (void)err;
67 }
68 }
69
67 ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
70 ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
68 {
71 {
69 ZSTD_STATIC_ASSERT(zcss_init==0);
72 ZSTD_STATIC_ASSERT(zcss_init==0);
70 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));
73 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN==(0ULL - 1));
71 if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
74 if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
72 { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_calloc(sizeof(ZSTD_CCtx), customMem);
75 { ZSTD_CCtx* const cctx = (ZSTD_CCtx*)ZSTD_malloc(sizeof(ZSTD_CCtx), customMem);
73 if (!cctx) return NULL;
76 if (!cctx) return NULL;
74 cctx->customMem = customMem;
77 ZSTD_initCCtx(cctx, customMem);
75 cctx->requestedParams.compressionLevel = ZSTD_CLEVEL_DEFAULT;
76 cctx->requestedParams.fParams.contentSizeFlag = 1;
77 cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
78 return cctx;
78 return cctx;
79 }
79 }
80 }
80 }
@@ -102,17 +102,24 b' ZSTD_CCtx* ZSTD_initStaticCCtx(void *wor'
102 return cctx;
102 return cctx;
103 }
103 }
104
104
105 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
105 static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx)
106 {
106 {
107 if (cctx==NULL) return 0; /* support free on NULL */
107 assert(cctx != NULL);
108 if (cctx->staticSize) return ERROR(memory_allocation); /* not compatible with static CCtx */
108 assert(cctx->staticSize == 0);
109 ZSTD_free(cctx->workSpace, cctx->customMem); cctx->workSpace = NULL;
109 ZSTD_free(cctx->workSpace, cctx->customMem); cctx->workSpace = NULL;
110 ZSTD_freeCDict(cctx->cdictLocal); cctx->cdictLocal = NULL;
110 ZSTD_freeCDict(cctx->cdictLocal); cctx->cdictLocal = NULL;
111 #ifdef ZSTD_MULTITHREAD
111 #ifdef ZSTD_MULTITHREAD
112 ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL;
112 ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL;
113 #endif
113 #endif
114 }
115
116 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
117 {
118 if (cctx==NULL) return 0; /* support free on NULL */
119 if (cctx->staticSize) return ERROR(memory_allocation); /* not compatible with static CCtx */
120 ZSTD_freeCCtxContent(cctx);
114 ZSTD_free(cctx, cctx->customMem);
121 ZSTD_free(cctx, cctx->customMem);
115 return 0; /* reserved as a potential error code in the future */
122 return 0;
116 }
123 }
117
124
118
125
@@ -143,21 +150,6 b' size_t ZSTD_sizeof_CStream(const ZSTD_CS'
143 /* private API call, for dictBuilder only */
150 /* private API call, for dictBuilder only */
144 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
151 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
145
152
146 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
147 const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize)
148 {
149 ZSTD_compressionParameters cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize);
150 if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
151 if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog;
152 if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog;
153 if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog;
154 if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog;
155 if (CCtxParams->cParams.searchLength) cParams.searchLength = CCtxParams->cParams.searchLength;
156 if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength;
157 if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy;
158 return cParams;
159 }
160
161 static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
153 static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
162 ZSTD_compressionParameters cParams)
154 ZSTD_compressionParameters cParams)
163 {
155 {
@@ -251,7 +243,6 b' static int ZSTD_isUpdateAuthorized(ZSTD_'
251 case ZSTD_p_minMatch:
243 case ZSTD_p_minMatch:
252 case ZSTD_p_targetLength:
244 case ZSTD_p_targetLength:
253 case ZSTD_p_compressionStrategy:
245 case ZSTD_p_compressionStrategy:
254 case ZSTD_p_compressLiterals:
255 return 1;
246 return 1;
256
247
257 case ZSTD_p_format:
248 case ZSTD_p_format:
@@ -268,6 +259,7 b' static int ZSTD_isUpdateAuthorized(ZSTD_'
268 case ZSTD_p_ldmMinMatch:
259 case ZSTD_p_ldmMinMatch:
269 case ZSTD_p_ldmBucketSizeLog:
260 case ZSTD_p_ldmBucketSizeLog:
270 case ZSTD_p_ldmHashEveryLog:
261 case ZSTD_p_ldmHashEveryLog:
262 case ZSTD_p_forceAttachDict:
271 default:
263 default:
272 return 0;
264 return 0;
273 }
265 }
@@ -302,7 +294,6 b' size_t ZSTD_CCtx_setParameter(ZSTD_CCtx*'
302 if (cctx->cdict) return ERROR(stage_wrong);
294 if (cctx->cdict) return ERROR(stage_wrong);
303 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
295 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
304
296
305 case ZSTD_p_compressLiterals:
306 case ZSTD_p_contentSizeFlag:
297 case ZSTD_p_contentSizeFlag:
307 case ZSTD_p_checksumFlag:
298 case ZSTD_p_checksumFlag:
308 case ZSTD_p_dictIDFlag:
299 case ZSTD_p_dictIDFlag:
@@ -313,6 +304,9 b' size_t ZSTD_CCtx_setParameter(ZSTD_CCtx*'
313 * default : 0 when using a CDict, 1 when using a Prefix */
304 * default : 0 when using a CDict, 1 when using a Prefix */
314 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
305 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
315
306
307 case ZSTD_p_forceAttachDict:
308 return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value);
309
316 case ZSTD_p_nbWorkers:
310 case ZSTD_p_nbWorkers:
317 if ((value>0) && cctx->staticSize) {
311 if ((value>0) && cctx->staticSize) {
318 return ERROR(parameter_unsupported); /* MT not compatible with static alloc */
312 return ERROR(parameter_unsupported); /* MT not compatible with static alloc */
@@ -351,7 +345,6 b' size_t ZSTD_CCtxParam_setParameter('
351 int cLevel = (int)value; /* cast expected to restore negative sign */
345 int cLevel = (int)value; /* cast expected to restore negative sign */
352 if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel();
346 if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel();
353 if (cLevel) { /* 0 : does not change current level */
347 if (cLevel) { /* 0 : does not change current level */
354 CCtxParams->disableLiteralCompression = (cLevel<0); /* negative levels disable huffman */
355 CCtxParams->compressionLevel = cLevel;
348 CCtxParams->compressionLevel = cLevel;
356 }
349 }
357 if (CCtxParams->compressionLevel >= 0) return CCtxParams->compressionLevel;
350 if (CCtxParams->compressionLevel >= 0) return CCtxParams->compressionLevel;
@@ -399,10 +392,6 b' size_t ZSTD_CCtxParam_setParameter('
399 CCtxParams->cParams.strategy = (ZSTD_strategy)value;
392 CCtxParams->cParams.strategy = (ZSTD_strategy)value;
400 return (size_t)CCtxParams->cParams.strategy;
393 return (size_t)CCtxParams->cParams.strategy;
401
394
402 case ZSTD_p_compressLiterals:
403 CCtxParams->disableLiteralCompression = !value;
404 return !CCtxParams->disableLiteralCompression;
405
406 case ZSTD_p_contentSizeFlag :
395 case ZSTD_p_contentSizeFlag :
407 /* Content size written in frame header _when known_ (default:1) */
396 /* Content size written in frame header _when known_ (default:1) */
408 DEBUGLOG(4, "set content size flag = %u", (value>0));
397 DEBUGLOG(4, "set content size flag = %u", (value>0));
@@ -423,6 +412,12 b' size_t ZSTD_CCtxParam_setParameter('
423 CCtxParams->forceWindow = (value > 0);
412 CCtxParams->forceWindow = (value > 0);
424 return CCtxParams->forceWindow;
413 return CCtxParams->forceWindow;
425
414
415 case ZSTD_p_forceAttachDict :
416 CCtxParams->attachDictPref = value ?
417 (value > 0 ? ZSTD_dictForceAttach : ZSTD_dictForceCopy) :
418 ZSTD_dictDefaultAttach;
419 return CCtxParams->attachDictPref;
420
426 case ZSTD_p_nbWorkers :
421 case ZSTD_p_nbWorkers :
427 #ifndef ZSTD_MULTITHREAD
422 #ifndef ZSTD_MULTITHREAD
428 if (value>0) return ERROR(parameter_unsupported);
423 if (value>0) return ERROR(parameter_unsupported);
@@ -477,6 +472,98 b' size_t ZSTD_CCtxParam_setParameter('
477 }
472 }
478 }
473 }
479
474
475 size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned* value)
476 {
477 return ZSTD_CCtxParam_getParameter(&cctx->requestedParams, param, value);
478 }
479
480 size_t ZSTD_CCtxParam_getParameter(
481 ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, unsigned* value)
482 {
483 switch(param)
484 {
485 case ZSTD_p_format :
486 *value = CCtxParams->format;
487 break;
488 case ZSTD_p_compressionLevel :
489 *value = CCtxParams->compressionLevel;
490 break;
491 case ZSTD_p_windowLog :
492 *value = CCtxParams->cParams.windowLog;
493 break;
494 case ZSTD_p_hashLog :
495 *value = CCtxParams->cParams.hashLog;
496 break;
497 case ZSTD_p_chainLog :
498 *value = CCtxParams->cParams.chainLog;
499 break;
500 case ZSTD_p_searchLog :
501 *value = CCtxParams->cParams.searchLog;
502 break;
503 case ZSTD_p_minMatch :
504 *value = CCtxParams->cParams.searchLength;
505 break;
506 case ZSTD_p_targetLength :
507 *value = CCtxParams->cParams.targetLength;
508 break;
509 case ZSTD_p_compressionStrategy :
510 *value = (unsigned)CCtxParams->cParams.strategy;
511 break;
512 case ZSTD_p_contentSizeFlag :
513 *value = CCtxParams->fParams.contentSizeFlag;
514 break;
515 case ZSTD_p_checksumFlag :
516 *value = CCtxParams->fParams.checksumFlag;
517 break;
518 case ZSTD_p_dictIDFlag :
519 *value = !CCtxParams->fParams.noDictIDFlag;
520 break;
521 case ZSTD_p_forceMaxWindow :
522 *value = CCtxParams->forceWindow;
523 break;
524 case ZSTD_p_forceAttachDict :
525 *value = CCtxParams->attachDictPref;
526 break;
527 case ZSTD_p_nbWorkers :
528 #ifndef ZSTD_MULTITHREAD
529 assert(CCtxParams->nbWorkers == 0);
530 #endif
531 *value = CCtxParams->nbWorkers;
532 break;
533 case ZSTD_p_jobSize :
534 #ifndef ZSTD_MULTITHREAD
535 return ERROR(parameter_unsupported);
536 #else
537 *value = CCtxParams->jobSize;
538 break;
539 #endif
540 case ZSTD_p_overlapSizeLog :
541 #ifndef ZSTD_MULTITHREAD
542 return ERROR(parameter_unsupported);
543 #else
544 *value = CCtxParams->overlapSizeLog;
545 break;
546 #endif
547 case ZSTD_p_enableLongDistanceMatching :
548 *value = CCtxParams->ldmParams.enableLdm;
549 break;
550 case ZSTD_p_ldmHashLog :
551 *value = CCtxParams->ldmParams.hashLog;
552 break;
553 case ZSTD_p_ldmMinMatch :
554 *value = CCtxParams->ldmParams.minMatchLength;
555 break;
556 case ZSTD_p_ldmBucketSizeLog :
557 *value = CCtxParams->ldmParams.bucketSizeLog;
558 break;
559 case ZSTD_p_ldmHashEveryLog :
560 *value = CCtxParams->ldmParams.hashEveryLog;
561 break;
562 default: return ERROR(parameter_unsupported);
563 }
564 return 0;
565 }
566
480 /** ZSTD_CCtx_setParametersUsingCCtxParams() :
567 /** ZSTD_CCtx_setParametersUsingCCtxParams() :
481 * just applies `params` into `cctx`
568 * just applies `params` into `cctx`
482 * no action is performed, parameters are merely stored.
569 * no action is performed, parameters are merely stored.
@@ -487,6 +574,7 b' size_t ZSTD_CCtxParam_setParameter('
487 size_t ZSTD_CCtx_setParametersUsingCCtxParams(
574 size_t ZSTD_CCtx_setParametersUsingCCtxParams(
488 ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params)
575 ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params)
489 {
576 {
577 DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams");
490 if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
578 if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
491 if (cctx->cdict) return ERROR(stage_wrong);
579 if (cctx->cdict) return ERROR(stage_wrong);
492
580
@@ -565,18 +653,19 b' size_t ZSTD_CCtx_refPrefix_advanced('
565 return 0;
653 return 0;
566 }
654 }
567
655
568 static void ZSTD_startNewCompression(ZSTD_CCtx* cctx)
656 /*! ZSTD_CCtx_reset() :
657 * Also dumps dictionary */
658 void ZSTD_CCtx_reset(ZSTD_CCtx* cctx)
569 {
659 {
570 cctx->streamStage = zcss_init;
660 cctx->streamStage = zcss_init;
571 cctx->pledgedSrcSizePlusOne = 0;
661 cctx->pledgedSrcSizePlusOne = 0;
572 }
662 }
573
663
574 /*! ZSTD_CCtx_reset() :
664 size_t ZSTD_CCtx_resetParameters(ZSTD_CCtx* cctx)
575 * Also dumps dictionary */
576 void ZSTD_CCtx_reset(ZSTD_CCtx* cctx)
577 {
665 {
578 ZSTD_startNewCompression(cctx);
666 if (cctx->streamStage != zcss_init) return ERROR(stage_wrong);
579 cctx->cdict = NULL;
667 cctx->cdict = NULL;
668 return ZSTD_CCtxParams_reset(&cctx->requestedParams);
580 }
669 }
581
670
582 /** ZSTD_checkCParams() :
671 /** ZSTD_checkCParams() :
@@ -589,8 +678,9 b' size_t ZSTD_checkCParams(ZSTD_compressio'
589 CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
678 CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
590 CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
679 CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
591 CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
680 CLAMPCHECK(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
592 if ((U32)(cParams.targetLength) < ZSTD_TARGETLENGTH_MIN)
681 ZSTD_STATIC_ASSERT(ZSTD_TARGETLENGTH_MIN == 0);
593 return ERROR(parameter_unsupported);
682 if (cParams.targetLength > ZSTD_TARGETLENGTH_MAX)
683 return ERROR(parameter_outOfBound);
594 if ((U32)(cParams.strategy) > (U32)ZSTD_btultra)
684 if ((U32)(cParams.strategy) > (U32)ZSTD_btultra)
595 return ERROR(parameter_unsupported);
685 return ERROR(parameter_unsupported);
596 return 0;
686 return 0;
@@ -599,7 +689,8 b' size_t ZSTD_checkCParams(ZSTD_compressio'
599 /** ZSTD_clampCParams() :
689 /** ZSTD_clampCParams() :
600 * make CParam values within valid range.
690 * make CParam values within valid range.
601 * @return : valid CParams */
691 * @return : valid CParams */
602 static ZSTD_compressionParameters ZSTD_clampCParams(ZSTD_compressionParameters cParams)
692 static ZSTD_compressionParameters
693 ZSTD_clampCParams(ZSTD_compressionParameters cParams)
603 {
694 {
604 # define CLAMP(val,min,max) { \
695 # define CLAMP(val,min,max) { \
605 if (val<min) val=min; \
696 if (val<min) val=min; \
@@ -610,8 +701,10 b' static ZSTD_compressionParameters ZSTD_c'
610 CLAMP(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
701 CLAMP(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
611 CLAMP(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
702 CLAMP(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
612 CLAMP(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
703 CLAMP(cParams.searchLength, ZSTD_SEARCHLENGTH_MIN, ZSTD_SEARCHLENGTH_MAX);
613 if ((U32)(cParams.targetLength) < ZSTD_TARGETLENGTH_MIN) cParams.targetLength = ZSTD_TARGETLENGTH_MIN;
704 ZSTD_STATIC_ASSERT(ZSTD_TARGETLENGTH_MIN == 0);
614 if ((U32)(cParams.strategy) > (U32)ZSTD_btultra) cParams.strategy = ZSTD_btultra;
705 if (cParams.targetLength > ZSTD_TARGETLENGTH_MAX)
706 cParams.targetLength = ZSTD_TARGETLENGTH_MAX;
707 CLAMP(cParams.strategy, ZSTD_fast, ZSTD_btultra);
615 return cParams;
708 return cParams;
616 }
709 }
617
710
@@ -627,8 +720,11 b' static U32 ZSTD_cycleLog(U32 hashLog, ZS'
627 optimize `cPar` for a given input (`srcSize` and `dictSize`).
720 optimize `cPar` for a given input (`srcSize` and `dictSize`).
628 mostly downsizing to reduce memory consumption and initialization latency.
721 mostly downsizing to reduce memory consumption and initialization latency.
629 Both `srcSize` and `dictSize` are optional (use 0 if unknown).
722 Both `srcSize` and `dictSize` are optional (use 0 if unknown).
630 Note : cPar is considered validated at this stage. Use ZSTD_checkCParams() to ensure that condition. */
723 Note : cPar is assumed validated. Use ZSTD_checkCParams() to ensure this condition. */
631 ZSTD_compressionParameters ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
724 static ZSTD_compressionParameters
725 ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
726 unsigned long long srcSize,
727 size_t dictSize)
632 {
728 {
633 static const U64 minSrcSize = 513; /* (1<<9) + 1 */
729 static const U64 minSrcSize = 513; /* (1<<9) + 1 */
634 static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
730 static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1);
@@ -648,7 +744,7 b' ZSTD_compressionParameters ZSTD_adjustCP'
648 ZSTD_highbit32(tSize-1) + 1;
744 ZSTD_highbit32(tSize-1) + 1;
649 if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
745 if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
650 }
746 }
651 if (cPar.hashLog > cPar.windowLog) cPar.hashLog = cPar.windowLog;
747 if (cPar.hashLog > cPar.windowLog+1) cPar.hashLog = cPar.windowLog+1;
652 { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
748 { U32 const cycleLog = ZSTD_cycleLog(cPar.chainLog, cPar.strategy);
653 if (cycleLog > cPar.windowLog)
749 if (cycleLog > cPar.windowLog)
654 cPar.chainLog -= (cycleLog - cPar.windowLog);
750 cPar.chainLog -= (cycleLog - cPar.windowLog);
@@ -660,13 +756,34 b' ZSTD_compressionParameters ZSTD_adjustCP'
660 return cPar;
756 return cPar;
661 }
757 }
662
758
663 ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
759 ZSTD_compressionParameters
760 ZSTD_adjustCParams(ZSTD_compressionParameters cPar,
761 unsigned long long srcSize,
762 size_t dictSize)
664 {
763 {
665 cPar = ZSTD_clampCParams(cPar);
764 cPar = ZSTD_clampCParams(cPar);
666 return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize);
765 return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize);
667 }
766 }
668
767
669 static size_t ZSTD_sizeof_matchState(ZSTD_compressionParameters const* cParams, const U32 forCCtx)
768 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
769 const ZSTD_CCtx_params* CCtxParams, U64 srcSizeHint, size_t dictSize)
770 {
771 ZSTD_compressionParameters cParams = ZSTD_getCParams(CCtxParams->compressionLevel, srcSizeHint, dictSize);
772 if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
773 if (CCtxParams->cParams.windowLog) cParams.windowLog = CCtxParams->cParams.windowLog;
774 if (CCtxParams->cParams.hashLog) cParams.hashLog = CCtxParams->cParams.hashLog;
775 if (CCtxParams->cParams.chainLog) cParams.chainLog = CCtxParams->cParams.chainLog;
776 if (CCtxParams->cParams.searchLog) cParams.searchLog = CCtxParams->cParams.searchLog;
777 if (CCtxParams->cParams.searchLength) cParams.searchLength = CCtxParams->cParams.searchLength;
778 if (CCtxParams->cParams.targetLength) cParams.targetLength = CCtxParams->cParams.targetLength;
779 if (CCtxParams->cParams.strategy) cParams.strategy = CCtxParams->cParams.strategy;
780 assert(!ZSTD_checkCParams(cParams));
781 return ZSTD_adjustCParams_internal(cParams, srcSizeHint, dictSize);
782 }
783
784 static size_t
785 ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
786 const U32 forCCtx)
670 {
787 {
671 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
788 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
672 size_t const hSize = ((size_t)1) << cParams->hashLog;
789 size_t const hSize = ((size_t)1) << cParams->hashLog;
@@ -693,7 +810,7 b' size_t ZSTD_estimateCCtxSize_usingCCtxPa'
693 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
810 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog);
694 U32 const divider = (cParams.searchLength==3) ? 3 : 4;
811 U32 const divider = (cParams.searchLength==3) ? 3 : 4;
695 size_t const maxNbSeq = blockSize / divider;
812 size_t const maxNbSeq = blockSize / divider;
696 size_t const tokenSpace = blockSize + 11*maxNbSeq;
813 size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq;
697 size_t const entropySpace = HUF_WORKSPACE_SIZE;
814 size_t const entropySpace = HUF_WORKSPACE_SIZE;
698 size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t);
815 size_t const blockStateSpace = 2 * sizeof(ZSTD_compressedBlockState_t);
699 size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1);
816 size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 1);
@@ -752,12 +869,14 b' size_t ZSTD_estimateCStreamSize_usingCPa'
752 return ZSTD_estimateCStreamSize_usingCCtxParams(&params);
869 return ZSTD_estimateCStreamSize_usingCCtxParams(&params);
753 }
870 }
754
871
755 static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel) {
872 static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
873 {
756 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0);
874 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, 0, 0);
757 return ZSTD_estimateCStreamSize_usingCParams(cParams);
875 return ZSTD_estimateCStreamSize_usingCParams(cParams);
758 }
876 }
759
877
760 size_t ZSTD_estimateCStreamSize(int compressionLevel) {
878 size_t ZSTD_estimateCStreamSize(int compressionLevel)
879 {
761 int level;
880 int level;
762 size_t memBudget = 0;
881 size_t memBudget = 0;
763 for (level=1; level<=compressionLevel; level++) {
882 for (level=1; level<=compressionLevel; level++) {
@@ -786,9 +905,27 b' ZSTD_frameProgression ZSTD_getFrameProgr'
786 fp.ingested = cctx->consumedSrcSize + buffered;
905 fp.ingested = cctx->consumedSrcSize + buffered;
787 fp.consumed = cctx->consumedSrcSize;
906 fp.consumed = cctx->consumedSrcSize;
788 fp.produced = cctx->producedCSize;
907 fp.produced = cctx->producedCSize;
908 fp.flushed = cctx->producedCSize; /* simplified; some data might still be left within streaming output buffer */
909 fp.currentJobID = 0;
910 fp.nbActiveWorkers = 0;
789 return fp;
911 return fp;
790 } }
912 } }
791
913
914 /*! ZSTD_toFlushNow()
915 * Only useful for multithreading scenarios currently (nbWorkers >= 1).
916 */
917 size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx)
918 {
919 #ifdef ZSTD_MULTITHREAD
920 if (cctx->appliedParams.nbWorkers > 0) {
921 return ZSTDMT_toFlushNow(cctx->mtctx);
922 }
923 #endif
924 (void)cctx;
925 return 0; /* over-simplification; could also check if context is currently running in streaming mode, and in which case, report how many bytes are left to be flushed within output buffer */
926 }
927
928
792
929
793 static U32 ZSTD_equivalentCParams(ZSTD_compressionParameters cParams1,
930 static U32 ZSTD_equivalentCParams(ZSTD_compressionParameters cParams1,
794 ZSTD_compressionParameters cParams2)
931 ZSTD_compressionParameters cParams2)
@@ -799,6 +936,20 b' static U32 ZSTD_equivalentCParams(ZSTD_c'
799 & ((cParams1.searchLength==3) == (cParams2.searchLength==3)); /* hashlog3 space */
936 & ((cParams1.searchLength==3) == (cParams2.searchLength==3)); /* hashlog3 space */
800 }
937 }
801
938
939 static void ZSTD_assertEqualCParams(ZSTD_compressionParameters cParams1,
940 ZSTD_compressionParameters cParams2)
941 {
942 (void)cParams1;
943 (void)cParams2;
944 assert(cParams1.windowLog == cParams2.windowLog);
945 assert(cParams1.chainLog == cParams2.chainLog);
946 assert(cParams1.hashLog == cParams2.hashLog);
947 assert(cParams1.searchLog == cParams2.searchLog);
948 assert(cParams1.searchLength == cParams2.searchLength);
949 assert(cParams1.targetLength == cParams2.targetLength);
950 assert(cParams1.strategy == cParams2.strategy);
951 }
952
802 /** The parameters are equivalent if ldm is not enabled in both sets or
953 /** The parameters are equivalent if ldm is not enabled in both sets or
803 * all the parameters are equivalent. */
954 * all the parameters are equivalent. */
804 static U32 ZSTD_equivalentLdmParams(ldmParams_t ldmParams1,
955 static U32 ZSTD_equivalentLdmParams(ldmParams_t ldmParams1,
@@ -817,33 +968,51 b' typedef enum { ZSTDb_not_buffered, ZSTDb'
817 /* ZSTD_sufficientBuff() :
968 /* ZSTD_sufficientBuff() :
818 * check internal buffers exist for streaming if buffPol == ZSTDb_buffered .
969 * check internal buffers exist for streaming if buffPol == ZSTDb_buffered .
819 * Note : they are assumed to be correctly sized if ZSTD_equivalentCParams()==1 */
970 * Note : they are assumed to be correctly sized if ZSTD_equivalentCParams()==1 */
820 static U32 ZSTD_sufficientBuff(size_t bufferSize1, size_t blockSize1,
971 static U32 ZSTD_sufficientBuff(size_t bufferSize1, size_t maxNbSeq1,
972 size_t maxNbLit1,
821 ZSTD_buffered_policy_e buffPol2,
973 ZSTD_buffered_policy_e buffPol2,
822 ZSTD_compressionParameters cParams2,
974 ZSTD_compressionParameters cParams2,
823 U64 pledgedSrcSize)
975 U64 pledgedSrcSize)
824 {
976 {
825 size_t const windowSize2 = MAX(1, (size_t)MIN(((U64)1 << cParams2.windowLog), pledgedSrcSize));
977 size_t const windowSize2 = MAX(1, (size_t)MIN(((U64)1 << cParams2.windowLog), pledgedSrcSize));
826 size_t const blockSize2 = MIN(ZSTD_BLOCKSIZE_MAX, windowSize2);
978 size_t const blockSize2 = MIN(ZSTD_BLOCKSIZE_MAX, windowSize2);
979 size_t const maxNbSeq2 = blockSize2 / ((cParams2.searchLength == 3) ? 3 : 4);
980 size_t const maxNbLit2 = blockSize2;
827 size_t const neededBufferSize2 = (buffPol2==ZSTDb_buffered) ? windowSize2 + blockSize2 : 0;
981 size_t const neededBufferSize2 = (buffPol2==ZSTDb_buffered) ? windowSize2 + blockSize2 : 0;
828 DEBUGLOG(4, "ZSTD_sufficientBuff: is windowSize2=%u <= wlog1=%u",
982 DEBUGLOG(4, "ZSTD_sufficientBuff: is neededBufferSize2=%u <= bufferSize1=%u",
829 (U32)windowSize2, cParams2.windowLog);
983 (U32)neededBufferSize2, (U32)bufferSize1);
830 DEBUGLOG(4, "ZSTD_sufficientBuff: is blockSize2=%u <= blockSize1=%u",
984 DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbSeq2=%u <= maxNbSeq1=%u",
831 (U32)blockSize2, (U32)blockSize1);
985 (U32)maxNbSeq2, (U32)maxNbSeq1);
832 return (blockSize2 <= blockSize1) /* seqStore space depends on blockSize */
986 DEBUGLOG(4, "ZSTD_sufficientBuff: is maxNbLit2=%u <= maxNbLit1=%u",
987 (U32)maxNbLit2, (U32)maxNbLit1);
988 return (maxNbLit2 <= maxNbLit1)
989 & (maxNbSeq2 <= maxNbSeq1)
833 & (neededBufferSize2 <= bufferSize1);
990 & (neededBufferSize2 <= bufferSize1);
834 }
991 }
835
992
836 /** Equivalence for resetCCtx purposes */
993 /** Equivalence for resetCCtx purposes */
837 static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1,
994 static U32 ZSTD_equivalentParams(ZSTD_CCtx_params params1,
838 ZSTD_CCtx_params params2,
995 ZSTD_CCtx_params params2,
839 size_t buffSize1, size_t blockSize1,
996 size_t buffSize1,
997 size_t maxNbSeq1, size_t maxNbLit1,
840 ZSTD_buffered_policy_e buffPol2,
998 ZSTD_buffered_policy_e buffPol2,
841 U64 pledgedSrcSize)
999 U64 pledgedSrcSize)
842 {
1000 {
843 DEBUGLOG(4, "ZSTD_equivalentParams: pledgedSrcSize=%u", (U32)pledgedSrcSize);
1001 DEBUGLOG(4, "ZSTD_equivalentParams: pledgedSrcSize=%u", (U32)pledgedSrcSize);
844 return ZSTD_equivalentCParams(params1.cParams, params2.cParams) &&
1002 if (!ZSTD_equivalentCParams(params1.cParams, params2.cParams)) {
845 ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams) &&
1003 DEBUGLOG(4, "ZSTD_equivalentCParams() == 0");
846 ZSTD_sufficientBuff(buffSize1, blockSize1, buffPol2, params2.cParams, pledgedSrcSize);
1004 return 0;
1005 }
1006 if (!ZSTD_equivalentLdmParams(params1.ldmParams, params2.ldmParams)) {
1007 DEBUGLOG(4, "ZSTD_equivalentLdmParams() == 0");
1008 return 0;
1009 }
1010 if (!ZSTD_sufficientBuff(buffSize1, maxNbSeq1, maxNbLit1, buffPol2,
1011 params2.cParams, pledgedSrcSize)) {
1012 DEBUGLOG(4, "ZSTD_sufficientBuff() == 0");
1013 return 0;
1014 }
1015 return 1;
847 }
1016 }
848
1017
849 static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
1018 static void ZSTD_reset_compressedBlockState(ZSTD_compressedBlockState_t* bs)
@@ -851,10 +1020,10 b' static void ZSTD_reset_compressedBlockSt'
851 int i;
1020 int i;
852 for (i = 0; i < ZSTD_REP_NUM; ++i)
1021 for (i = 0; i < ZSTD_REP_NUM; ++i)
853 bs->rep[i] = repStartValue[i];
1022 bs->rep[i] = repStartValue[i];
854 bs->entropy.hufCTable_repeatMode = HUF_repeat_none;
1023 bs->entropy.huf.repeatMode = HUF_repeat_none;
855 bs->entropy.offcode_repeatMode = FSE_repeat_none;
1024 bs->entropy.fse.offcode_repeatMode = FSE_repeat_none;
856 bs->entropy.matchlength_repeatMode = FSE_repeat_none;
1025 bs->entropy.fse.matchlength_repeatMode = FSE_repeat_none;
857 bs->entropy.litlength_repeatMode = FSE_repeat_none;
1026 bs->entropy.fse.litlength_repeatMode = FSE_repeat_none;
858 }
1027 }
859
1028
860 /*! ZSTD_invalidateMatchState()
1029 /*! ZSTD_invalidateMatchState()
@@ -866,8 +1035,10 b' static void ZSTD_invalidateMatchState(ZS'
866 ZSTD_window_clear(&ms->window);
1035 ZSTD_window_clear(&ms->window);
867
1036
868 ms->nextToUpdate = ms->window.dictLimit + 1;
1037 ms->nextToUpdate = ms->window.dictLimit + 1;
1038 ms->nextToUpdate3 = ms->window.dictLimit + 1;
869 ms->loadedDictEnd = 0;
1039 ms->loadedDictEnd = 0;
870 ms->opt.litLengthSum = 0; /* force reset of btopt stats */
1040 ms->opt.litLengthSum = 0; /* force reset of btopt stats */
1041 ms->dictMatchState = NULL;
871 }
1042 }
872
1043
873 /*! ZSTD_continueCCtx() :
1044 /*! ZSTD_continueCCtx() :
@@ -880,6 +1051,7 b' static size_t ZSTD_continueCCtx(ZSTD_CCt'
880
1051
881 cctx->blockSize = blockSize; /* previous block size could be different even for same windowLog, due to pledgedSrcSize */
1052 cctx->blockSize = blockSize; /* previous block size could be different even for same windowLog, due to pledgedSrcSize */
882 cctx->appliedParams = params;
1053 cctx->appliedParams = params;
1054 cctx->blockState.matchState.cParams = params.cParams;
883 cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
1055 cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1;
884 cctx->consumedSrcSize = 0;
1056 cctx->consumedSrcSize = 0;
885 cctx->producedCSize = 0;
1057 cctx->producedCSize = 0;
@@ -900,7 +1072,11 b' static size_t ZSTD_continueCCtx(ZSTD_CCt'
900
1072
901 typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e;
1073 typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e;
902
1074
903 static void* ZSTD_reset_matchState(ZSTD_matchState_t* ms, void* ptr, ZSTD_compressionParameters const* cParams, ZSTD_compResetPolicy_e const crp, U32 const forCCtx)
1075 static void*
1076 ZSTD_reset_matchState(ZSTD_matchState_t* ms,
1077 void* ptr,
1078 const ZSTD_compressionParameters* cParams,
1079 ZSTD_compResetPolicy_e const crp, U32 const forCCtx)
904 {
1080 {
905 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
1081 size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
906 size_t const hSize = ((size_t)1) << cParams->hashLog;
1082 size_t const hSize = ((size_t)1) << cParams->hashLog;
@@ -912,6 +1088,9 b' static void* ZSTD_reset_matchState(ZSTD_'
912
1088
913 ms->hashLog3 = hashLog3;
1089 ms->hashLog3 = hashLog3;
914 memset(&ms->window, 0, sizeof(ms->window));
1090 memset(&ms->window, 0, sizeof(ms->window));
1091 ms->window.dictLimit = 1; /* start from 1, so that 1st position is valid */
1092 ms->window.lowLimit = 1; /* it ensures first and later CCtx usages compress the same */
1093 ms->window.nextSrc = ms->window.base + 1; /* see issue #1241 */
915 ZSTD_invalidateMatchState(ms);
1094 ZSTD_invalidateMatchState(ms);
916
1095
917 /* opt parser space */
1096 /* opt parser space */
@@ -937,14 +1116,24 b' static void* ZSTD_reset_matchState(ZSTD_'
937 ms->hashTable3 = ms->chainTable + chainSize;
1116 ms->hashTable3 = ms->chainTable + chainSize;
938 ptr = ms->hashTable3 + h3Size;
1117 ptr = ms->hashTable3 + h3Size;
939
1118
1119 ms->cParams = *cParams;
1120
940 assert(((size_t)ptr & 3) == 0);
1121 assert(((size_t)ptr & 3) == 0);
941 return ptr;
1122 return ptr;
942 }
1123 }
943
1124
1125 #define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */
1126 #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 /* when workspace is continuously too large
1127 * during at least this number of times,
1128 * context's memory usage is considered wasteful,
1129 * because it's sized to handle a worst case scenario which rarely happens.
1130 * In which case, resize it down to free some memory */
1131
944 /*! ZSTD_resetCCtx_internal() :
1132 /*! ZSTD_resetCCtx_internal() :
945 note : `params` are assumed fully validated at this stage */
1133 note : `params` are assumed fully validated at this stage */
946 static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
1134 static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
947 ZSTD_CCtx_params params, U64 pledgedSrcSize,
1135 ZSTD_CCtx_params params,
1136 U64 pledgedSrcSize,
948 ZSTD_compResetPolicy_e const crp,
1137 ZSTD_compResetPolicy_e const crp,
949 ZSTD_buffered_policy_e const zbuff)
1138 ZSTD_buffered_policy_e const zbuff)
950 {
1139 {
@@ -954,34 +1143,35 b' static size_t ZSTD_resetCCtx_internal(ZS'
954
1143
955 if (crp == ZSTDcrp_continue) {
1144 if (crp == ZSTDcrp_continue) {
956 if (ZSTD_equivalentParams(zc->appliedParams, params,
1145 if (ZSTD_equivalentParams(zc->appliedParams, params,
957 zc->inBuffSize, zc->blockSize,
1146 zc->inBuffSize,
958 zbuff, pledgedSrcSize)) {
1147 zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit,
959 DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%u)",
1148 zbuff, pledgedSrcSize)) {
960 zc->appliedParams.cParams.windowLog, (U32)zc->blockSize);
1149 DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode (wLog1=%u, blockSize1=%zu)",
961 return ZSTD_continueCCtx(zc, params, pledgedSrcSize);
1150 zc->appliedParams.cParams.windowLog, zc->blockSize);
1151 zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */
1152 if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION)
1153 return ZSTD_continueCCtx(zc, params, pledgedSrcSize);
962 } }
1154 } }
963 DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx");
1155 DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx");
964
1156
965 if (params.ldmParams.enableLdm) {
1157 if (params.ldmParams.enableLdm) {
966 /* Adjust long distance matching parameters */
1158 /* Adjust long distance matching parameters */
967 params.ldmParams.windowLog = params.cParams.windowLog;
968 ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
1159 ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
969 assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
1160 assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
970 assert(params.ldmParams.hashEveryLog < 32);
1161 assert(params.ldmParams.hashEveryLog < 32);
971 zc->ldmState.hashPower =
1162 zc->ldmState.hashPower = ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength);
972 ZSTD_ldm_getHashPower(params.ldmParams.minMatchLength);
973 }
1163 }
974
1164
975 { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
1165 { size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
976 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
1166 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
977 U32 const divider = (params.cParams.searchLength==3) ? 3 : 4;
1167 U32 const divider = (params.cParams.searchLength==3) ? 3 : 4;
978 size_t const maxNbSeq = blockSize / divider;
1168 size_t const maxNbSeq = blockSize / divider;
979 size_t const tokenSpace = blockSize + 11*maxNbSeq;
1169 size_t const tokenSpace = WILDCOPY_OVERLENGTH + blockSize + 11*maxNbSeq;
980 size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0;
1170 size_t const buffOutSize = (zbuff==ZSTDb_buffered) ? ZSTD_compressBound(blockSize)+1 : 0;
981 size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0;
1171 size_t const buffInSize = (zbuff==ZSTDb_buffered) ? windowSize + blockSize : 0;
982 size_t const matchStateSize = ZSTD_sizeof_matchState(&params.cParams, /* forCCtx */ 1);
1172 size_t const matchStateSize = ZSTD_sizeof_matchState(&params.cParams, /* forCCtx */ 1);
983 size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
1173 size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
984 void* ptr;
1174 void* ptr; /* used to partition workSpace */
985
1175
986 /* Check if workSpace is large enough, alloc a new one if needed */
1176 /* Check if workSpace is large enough, alloc a new one if needed */
987 { size_t const entropySpace = HUF_WORKSPACE_SIZE;
1177 { size_t const entropySpace = HUF_WORKSPACE_SIZE;
@@ -993,14 +1183,20 b' static size_t ZSTD_resetCCtx_internal(ZS'
993 size_t const neededSpace = entropySpace + blockStateSpace + ldmSpace +
1183 size_t const neededSpace = entropySpace + blockStateSpace + ldmSpace +
994 ldmSeqSpace + matchStateSize + tokenSpace +
1184 ldmSeqSpace + matchStateSize + tokenSpace +
995 bufferSpace;
1185 bufferSpace;
996 DEBUGLOG(4, "Need %uKB workspace, including %uKB for match state, and %uKB for buffers",
1186
997 (U32)(neededSpace>>10), (U32)(matchStateSize>>10), (U32)(bufferSpace>>10));
1187 int const workSpaceTooSmall = zc->workSpaceSize < neededSpace;
998 DEBUGLOG(4, "windowSize: %u - blockSize: %u", (U32)windowSize, (U32)blockSize);
1188 int const workSpaceTooLarge = zc->workSpaceSize > ZSTD_WORKSPACETOOLARGE_FACTOR * neededSpace;
999
1189 int const workSpaceWasteful = workSpaceTooLarge && (zc->workSpaceOversizedDuration > ZSTD_WORKSPACETOOLARGE_MAXDURATION);
1000 if (zc->workSpaceSize < neededSpace) { /* too small : resize */
1190 zc->workSpaceOversizedDuration = workSpaceTooLarge ? zc->workSpaceOversizedDuration+1 : 0;
1001 DEBUGLOG(4, "Need to update workSpaceSize from %uK to %uK",
1191
1002 (unsigned)(zc->workSpaceSize>>10),
1192 DEBUGLOG(4, "Need %zuKB workspace, including %zuKB for match state, and %zuKB for buffers",
1003 (unsigned)(neededSpace>>10));
1193 neededSpace>>10, matchStateSize>>10, bufferSpace>>10);
1194 DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
1195
1196 if (workSpaceTooSmall || workSpaceWasteful) {
1197 DEBUGLOG(4, "Need to resize workSpaceSize from %zuKB to %zuKB",
1198 zc->workSpaceSize >> 10,
1199 neededSpace >> 10);
1004 /* static cctx : no resize, error out */
1200 /* static cctx : no resize, error out */
1005 if (zc->staticSize) return ERROR(memory_allocation);
1201 if (zc->staticSize) return ERROR(memory_allocation);
1006
1202
@@ -1009,9 +1205,11 b' static size_t ZSTD_resetCCtx_internal(ZS'
1009 zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem);
1205 zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem);
1010 if (zc->workSpace == NULL) return ERROR(memory_allocation);
1206 if (zc->workSpace == NULL) return ERROR(memory_allocation);
1011 zc->workSpaceSize = neededSpace;
1207 zc->workSpaceSize = neededSpace;
1012 ptr = zc->workSpace;
1208 zc->workSpaceOversizedDuration = 0;
1013
1209
1014 /* Statically sized space. entropyWorkspace never moves (but prev/next block swap places) */
1210 /* Statically sized space.
1211 * entropyWorkspace never moves,
1212 * though prev/next block swap places */
1015 assert(((size_t)zc->workSpace & 3) == 0); /* ensure correct alignment */
1213 assert(((size_t)zc->workSpace & 3) == 0); /* ensure correct alignment */
1016 assert(zc->workSpaceSize >= 2 * sizeof(ZSTD_compressedBlockState_t));
1214 assert(zc->workSpaceSize >= 2 * sizeof(ZSTD_compressedBlockState_t));
1017 zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)zc->workSpace;
1215 zc->blockState.prevCBlock = (ZSTD_compressedBlockState_t*)zc->workSpace;
@@ -1022,6 +1220,7 b' static size_t ZSTD_resetCCtx_internal(ZS'
1022
1220
1023 /* init params */
1221 /* init params */
1024 zc->appliedParams = params;
1222 zc->appliedParams = params;
1223 zc->blockState.matchState.cParams = params.cParams;
1025 zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
1224 zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
1026 zc->consumedSrcSize = 0;
1225 zc->consumedSrcSize = 0;
1027 zc->producedCSize = 0;
1226 zc->producedCSize = 0;
@@ -1058,13 +1257,18 b' static size_t ZSTD_resetCCtx_internal(ZS'
1058 ptr = ZSTD_reset_matchState(&zc->blockState.matchState, ptr, &params.cParams, crp, /* forCCtx */ 1);
1257 ptr = ZSTD_reset_matchState(&zc->blockState.matchState, ptr, &params.cParams, crp, /* forCCtx */ 1);
1059
1258
1060 /* sequences storage */
1259 /* sequences storage */
1260 zc->seqStore.maxNbSeq = maxNbSeq;
1061 zc->seqStore.sequencesStart = (seqDef*)ptr;
1261 zc->seqStore.sequencesStart = (seqDef*)ptr;
1062 ptr = zc->seqStore.sequencesStart + maxNbSeq;
1262 ptr = zc->seqStore.sequencesStart + maxNbSeq;
1063 zc->seqStore.llCode = (BYTE*) ptr;
1263 zc->seqStore.llCode = (BYTE*) ptr;
1064 zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq;
1264 zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq;
1065 zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq;
1265 zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq;
1066 zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq;
1266 zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq;
1067 ptr = zc->seqStore.litStart + blockSize;
1267 /* ZSTD_wildcopy() is used to copy into the literals buffer,
1268 * so we have to oversize the buffer by WILDCOPY_OVERLENGTH bytes.
1269 */
1270 zc->seqStore.maxNbLit = blockSize;
1271 ptr = zc->seqStore.litStart + blockSize + WILDCOPY_OVERLENGTH;
1068
1272
1069 /* ldm bucketOffsets table */
1273 /* ldm bucketOffsets table */
1070 if (params.ldmParams.enableLdm) {
1274 if (params.ldmParams.enableLdm) {
@@ -1098,28 +1302,110 b' void ZSTD_invalidateRepCodes(ZSTD_CCtx* '
1098 assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
1302 assert(!ZSTD_window_hasExtDict(cctx->blockState.matchState.window));
1099 }
1303 }
1100
1304
1101 static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
1305 /* These are the approximate sizes for each strategy past which copying the
1306 * dictionary tables into the working context is faster than using them
1307 * in-place.
1308 */
1309 static const size_t attachDictSizeCutoffs[(unsigned)ZSTD_btultra+1] = {
1310 8 KB, /* unused */
1311 8 KB, /* ZSTD_fast */
1312 16 KB, /* ZSTD_dfast */
1313 32 KB, /* ZSTD_greedy */
1314 32 KB, /* ZSTD_lazy */
1315 32 KB, /* ZSTD_lazy2 */
1316 32 KB, /* ZSTD_btlazy2 */
1317 32 KB, /* ZSTD_btopt */
1318 8 KB /* ZSTD_btultra */
1319 };
1320
1321 static int ZSTD_shouldAttachDict(const ZSTD_CDict* cdict,
1322 ZSTD_CCtx_params params,
1323 U64 pledgedSrcSize)
1324 {
1325 size_t cutoff = attachDictSizeCutoffs[cdict->matchState.cParams.strategy];
1326 return ( pledgedSrcSize <= cutoff
1327 || pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN
1328 || params.attachDictPref == ZSTD_dictForceAttach )
1329 && params.attachDictPref != ZSTD_dictForceCopy
1330 && !params.forceWindow; /* dictMatchState isn't correctly
1331 * handled in _enforceMaxDist */
1332 }
1333
1334 static size_t ZSTD_resetCCtx_byAttachingCDict(
1335 ZSTD_CCtx* cctx,
1336 const ZSTD_CDict* cdict,
1337 ZSTD_CCtx_params params,
1338 U64 pledgedSrcSize,
1339 ZSTD_buffered_policy_e zbuff)
1340 {
1341 {
1342 const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
1343 unsigned const windowLog = params.cParams.windowLog;
1344 assert(windowLog != 0);
1345 /* Resize working context table params for input only, since the dict
1346 * has its own tables. */
1347 params.cParams = ZSTD_adjustCParams_internal(*cdict_cParams, pledgedSrcSize, 0);
1348 params.cParams.windowLog = windowLog;
1349 ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
1350 ZSTDcrp_continue, zbuff);
1351 assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
1352 }
1353
1354 {
1355 const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc
1356 - cdict->matchState.window.base);
1357 const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit;
1358 if (cdictLen == 0) {
1359 /* don't even attach dictionaries with no contents */
1360 DEBUGLOG(4, "skipping attaching empty dictionary");
1361 } else {
1362 DEBUGLOG(4, "attaching dictionary into context");
1363 cctx->blockState.matchState.dictMatchState = &cdict->matchState;
1364
1365 /* prep working match state so dict matches never have negative indices
1366 * when they are translated to the working context's index space. */
1367 if (cctx->blockState.matchState.window.dictLimit < cdictEnd) {
1368 cctx->blockState.matchState.window.nextSrc =
1369 cctx->blockState.matchState.window.base + cdictEnd;
1370 ZSTD_window_clear(&cctx->blockState.matchState.window);
1371 }
1372 cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit;
1373 }
1374 }
1375
1376 cctx->dictID = cdict->dictID;
1377
1378 /* copy block state */
1379 memcpy(cctx->blockState.prevCBlock, &cdict->cBlockState, sizeof(cdict->cBlockState));
1380
1381 return 0;
1382 }
1383
1384 static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
1102 const ZSTD_CDict* cdict,
1385 const ZSTD_CDict* cdict,
1103 unsigned windowLog,
1386 ZSTD_CCtx_params params,
1104 ZSTD_frameParameters fParams,
1105 U64 pledgedSrcSize,
1387 U64 pledgedSrcSize,
1106 ZSTD_buffered_policy_e zbuff)
1388 ZSTD_buffered_policy_e zbuff)
1107 {
1389 {
1108 { ZSTD_CCtx_params params = cctx->requestedParams;
1390 const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
1391
1392 DEBUGLOG(4, "copying dictionary into context");
1393
1394 { unsigned const windowLog = params.cParams.windowLog;
1395 assert(windowLog != 0);
1109 /* Copy only compression parameters related to tables. */
1396 /* Copy only compression parameters related to tables. */
1110 params.cParams = cdict->cParams;
1397 params.cParams = *cdict_cParams;
1111 if (windowLog) params.cParams.windowLog = windowLog;
1398 params.cParams.windowLog = windowLog;
1112 params.fParams = fParams;
1113 ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
1399 ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
1114 ZSTDcrp_noMemset, zbuff);
1400 ZSTDcrp_noMemset, zbuff);
1115 assert(cctx->appliedParams.cParams.strategy == cdict->cParams.strategy);
1401 assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
1116 assert(cctx->appliedParams.cParams.hashLog == cdict->cParams.hashLog);
1402 assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);
1117 assert(cctx->appliedParams.cParams.chainLog == cdict->cParams.chainLog);
1403 assert(cctx->appliedParams.cParams.chainLog == cdict_cParams->chainLog);
1118 }
1404 }
1119
1405
1120 /* copy tables */
1406 /* copy tables */
1121 { size_t const chainSize = (cdict->cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict->cParams.chainLog);
1407 { size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog);
1122 size_t const hSize = (size_t)1 << cdict->cParams.hashLog;
1408 size_t const hSize = (size_t)1 << cdict_cParams->hashLog;
1123 size_t const tableSpace = (chainSize + hSize) * sizeof(U32);
1409 size_t const tableSpace = (chainSize + hSize) * sizeof(U32);
1124 assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */
1410 assert((U32*)cctx->blockState.matchState.chainTable == (U32*)cctx->blockState.matchState.hashTable + hSize); /* chainTable must follow hashTable */
1125 assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize);
1411 assert((U32*)cctx->blockState.matchState.hashTable3 == (U32*)cctx->blockState.matchState.chainTable + chainSize);
@@ -1127,6 +1413,7 b' static size_t ZSTD_resetCCtx_usingCDict('
1127 assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize);
1413 assert((U32*)cdict->matchState.hashTable3 == (U32*)cdict->matchState.chainTable + chainSize);
1128 memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */
1414 memcpy(cctx->blockState.matchState.hashTable, cdict->matchState.hashTable, tableSpace); /* presumes all tables follow each other */
1129 }
1415 }
1416
1130 /* Zero the hashTable3, since the cdict never fills it */
1417 /* Zero the hashTable3, since the cdict never fills it */
1131 { size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3;
1418 { size_t const h3Size = (size_t)1 << cctx->blockState.matchState.hashLog3;
1132 assert(cdict->matchState.hashLog3 == 0);
1419 assert(cdict->matchState.hashLog3 == 0);
@@ -1134,14 +1421,14 b' static size_t ZSTD_resetCCtx_usingCDict('
1134 }
1421 }
1135
1422
1136 /* copy dictionary offsets */
1423 /* copy dictionary offsets */
1137 {
1424 { ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
1138 ZSTD_matchState_t const* srcMatchState = &cdict->matchState;
1139 ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
1425 ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState;
1140 dstMatchState->window = srcMatchState->window;
1426 dstMatchState->window = srcMatchState->window;
1141 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
1427 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
1142 dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3;
1428 dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3;
1143 dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
1429 dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd;
1144 }
1430 }
1431
1145 cctx->dictID = cdict->dictID;
1432 cctx->dictID = cdict->dictID;
1146
1433
1147 /* copy block state */
1434 /* copy block state */
@@ -1150,6 +1437,27 b' static size_t ZSTD_resetCCtx_usingCDict('
1150 return 0;
1437 return 0;
1151 }
1438 }
1152
1439
1440 /* We have a choice between copying the dictionary context into the working
1441 * context, or referencing the dictionary context from the working context
1442 * in-place. We decide here which strategy to use. */
1443 static size_t ZSTD_resetCCtx_usingCDict(ZSTD_CCtx* cctx,
1444 const ZSTD_CDict* cdict,
1445 ZSTD_CCtx_params params,
1446 U64 pledgedSrcSize,
1447 ZSTD_buffered_policy_e zbuff)
1448 {
1449
1450 DEBUGLOG(4, "ZSTD_resetCCtx_usingCDict (pledgedSrcSize=%u)", (U32)pledgedSrcSize);
1451
1452 if (ZSTD_shouldAttachDict(cdict, params, pledgedSrcSize)) {
1453 return ZSTD_resetCCtx_byAttachingCDict(
1454 cctx, cdict, params, pledgedSrcSize, zbuff);
1455 } else {
1456 return ZSTD_resetCCtx_byCopyingCDict(
1457 cctx, cdict, params, pledgedSrcSize, zbuff);
1458 }
1459 }
1460
1153 /*! ZSTD_copyCCtx_internal() :
1461 /*! ZSTD_copyCCtx_internal() :
1154 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
1462 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
1155 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
1463 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
@@ -1192,7 +1500,7 b' static size_t ZSTD_copyCCtx_internal(ZST'
1192
1500
1193 /* copy dictionary offsets */
1501 /* copy dictionary offsets */
1194 {
1502 {
1195 ZSTD_matchState_t const* srcMatchState = &srcCCtx->blockState.matchState;
1503 const ZSTD_matchState_t* srcMatchState = &srcCCtx->blockState.matchState;
1196 ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;
1504 ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState;
1197 dstMatchState->window = srcMatchState->window;
1505 dstMatchState->window = srcMatchState->window;
1198 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
1506 dstMatchState->nextToUpdate = srcMatchState->nextToUpdate;
@@ -1294,15 +1602,15 b' static void ZSTD_reduceIndex (ZSTD_CCtx*'
1294
1602
1295 /* See doc/zstd_compression_format.md for detailed format description */
1603 /* See doc/zstd_compression_format.md for detailed format description */
1296
1604
1297 size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
1605 static size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock)
1298 {
1606 {
1607 U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3);
1299 if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
1608 if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
1609 MEM_writeLE24(dst, cBlockHeader24);
1300 memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
1610 memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
1301 MEM_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw);
1611 return ZSTD_blockHeaderSize + srcSize;
1302 return ZSTD_blockHeaderSize+srcSize;
1303 }
1612 }
1304
1613
1305
1306 static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
1614 static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
1307 {
1615 {
1308 BYTE* const ostart = (BYTE* const)dst;
1616 BYTE* const ostart = (BYTE* const)dst;
@@ -1356,16 +1664,24 b' static size_t ZSTD_compressRleLiteralsBl'
1356 }
1664 }
1357
1665
1358
1666
1359 static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }
1667 /* ZSTD_minGain() :
1360
1668 * minimum compression required
1361 static size_t ZSTD_compressLiterals (ZSTD_entropyCTables_t const* prevEntropy,
1669 * to generate a compress block or a compressed literals section.
1362 ZSTD_entropyCTables_t* nextEntropy,
1670 * note : use same formula for both situations */
1671 static size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
1672 {
1673 U32 const minlog = (strat==ZSTD_btultra) ? 7 : 6;
1674 return (srcSize >> minlog) + 2;
1675 }
1676
1677 static size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
1678 ZSTD_hufCTables_t* nextHuf,
1363 ZSTD_strategy strategy, int disableLiteralCompression,
1679 ZSTD_strategy strategy, int disableLiteralCompression,
1364 void* dst, size_t dstCapacity,
1680 void* dst, size_t dstCapacity,
1365 const void* src, size_t srcSize,
1681 const void* src, size_t srcSize,
1366 U32* workspace, const int bmi2)
1682 U32* workspace, const int bmi2)
1367 {
1683 {
1368 size_t const minGain = ZSTD_minGain(srcSize);
1684 size_t const minGain = ZSTD_minGain(srcSize, strategy);
1369 size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
1685 size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
1370 BYTE* const ostart = (BYTE*)dst;
1686 BYTE* const ostart = (BYTE*)dst;
1371 U32 singleStream = srcSize < 256;
1687 U32 singleStream = srcSize < 256;
@@ -1376,27 +1692,25 b' static size_t ZSTD_compressLiterals (ZST'
1376 disableLiteralCompression);
1692 disableLiteralCompression);
1377
1693
1378 /* Prepare nextEntropy assuming reusing the existing table */
1694 /* Prepare nextEntropy assuming reusing the existing table */
1379 nextEntropy->hufCTable_repeatMode = prevEntropy->hufCTable_repeatMode;
1695 memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
1380 memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable,
1381 sizeof(prevEntropy->hufCTable));
1382
1696
1383 if (disableLiteralCompression)
1697 if (disableLiteralCompression)
1384 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
1698 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
1385
1699
1386 /* small ? don't even attempt compression (speed opt) */
1700 /* small ? don't even attempt compression (speed opt) */
1387 # define COMPRESS_LITERALS_SIZE_MIN 63
1701 # define COMPRESS_LITERALS_SIZE_MIN 63
1388 { size_t const minLitSize = (prevEntropy->hufCTable_repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
1702 { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
1389 if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
1703 if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
1390 }
1704 }
1391
1705
1392 if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
1706 if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
1393 { HUF_repeat repeat = prevEntropy->hufCTable_repeatMode;
1707 { HUF_repeat repeat = prevHuf->repeatMode;
1394 int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
1708 int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0;
1395 if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
1709 if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
1396 cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
1710 cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
1397 workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextEntropy->hufCTable, &repeat, preferRepeat, bmi2)
1711 workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2)
1398 : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
1712 : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11,
1399 workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextEntropy->hufCTable, &repeat, preferRepeat, bmi2);
1713 workspace, HUF_WORKSPACE_SIZE, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
1400 if (repeat != HUF_repeat_none) {
1714 if (repeat != HUF_repeat_none) {
1401 /* reused the existing table */
1715 /* reused the existing table */
1402 hType = set_repeat;
1716 hType = set_repeat;
@@ -1404,17 +1718,17 b' static size_t ZSTD_compressLiterals (ZST'
1404 }
1718 }
1405
1719
1406 if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
1720 if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
1407 memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable, sizeof(prevEntropy->hufCTable));
1721 memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
1408 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
1722 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
1409 }
1723 }
1410 if (cLitSize==1) {
1724 if (cLitSize==1) {
1411 memcpy(nextEntropy->hufCTable, prevEntropy->hufCTable, sizeof(prevEntropy->hufCTable));
1725 memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
1412 return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
1726 return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
1413 }
1727 }
1414
1728
1415 if (hType == set_compressed) {
1729 if (hType == set_compressed) {
1416 /* using a newly constructed table */
1730 /* using a newly constructed table */
1417 nextEntropy->hufCTable_repeatMode = HUF_repeat_check;
1731 nextHuf->repeatMode = HUF_repeat_check;
1418 }
1732 }
1419
1733
1420 /* Build header */
1734 /* Build header */
@@ -1451,6 +1765,7 b' void ZSTD_seqToCodes(const seqStore_t* s'
1451 BYTE* const mlCodeTable = seqStorePtr->mlCode;
1765 BYTE* const mlCodeTable = seqStorePtr->mlCode;
1452 U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
1766 U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
1453 U32 u;
1767 U32 u;
1768 assert(nbSeq <= seqStorePtr->maxNbSeq);
1454 for (u=0; u<nbSeq; u++) {
1769 for (u=0; u<nbSeq; u++) {
1455 U32 const llv = sequences[u].litLength;
1770 U32 const llv = sequences[u].litLength;
1456 U32 const mlv = sequences[u].matchLength;
1771 U32 const mlv = sequences[u].matchLength;
@@ -1464,61 +1779,234 b' void ZSTD_seqToCodes(const seqStore_t* s'
1464 mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
1779 mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
1465 }
1780 }
1466
1781
1782
1783 /**
1784 * -log2(x / 256) lookup table for x in [0, 256).
1785 * If x == 0: Return 0
1786 * Else: Return floor(-log2(x / 256) * 256)
1787 */
1788 static unsigned const kInverseProbabiltyLog256[256] = {
1789 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162,
1790 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889,
1791 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734,
1792 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626,
1793 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542,
1794 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473,
1795 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415,
1796 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366,
1797 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322,
1798 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282,
1799 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247,
1800 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215,
1801 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185,
1802 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157,
1803 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132,
1804 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108,
1805 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85,
1806 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64,
1807 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44,
1808 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25,
1809 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7,
1810 5, 4, 2, 1,
1811 };
1812
1813
1814 /**
1815 * Returns the cost in bits of encoding the distribution described by count
1816 * using the entropy bound.
1817 */
1818 static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total)
1819 {
1820 unsigned cost = 0;
1821 unsigned s;
1822 for (s = 0; s <= max; ++s) {
1823 unsigned norm = (unsigned)((256 * count[s]) / total);
1824 if (count[s] != 0 && norm == 0)
1825 norm = 1;
1826 assert(count[s] < total);
1827 cost += count[s] * kInverseProbabiltyLog256[norm];
1828 }
1829 return cost >> 8;
1830 }
1831
1832
1833 /**
1834 * Returns the cost in bits of encoding the distribution in count using the
1835 * table described by norm. The max symbol support by norm is assumed >= max.
1836 * norm must be valid for every symbol with non-zero probability in count.
1837 */
1838 static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog,
1839 unsigned const* count, unsigned const max)
1840 {
1841 unsigned const shift = 8 - accuracyLog;
1842 size_t cost = 0;
1843 unsigned s;
1844 assert(accuracyLog <= 8);
1845 for (s = 0; s <= max; ++s) {
1846 unsigned const normAcc = norm[s] != -1 ? norm[s] : 1;
1847 unsigned const norm256 = normAcc << shift;
1848 assert(norm256 > 0);
1849 assert(norm256 < 256);
1850 cost += count[s] * kInverseProbabiltyLog256[norm256];
1851 }
1852 return cost >> 8;
1853 }
1854
1855
1856 static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) {
1857 void const* ptr = ctable;
1858 U16 const* u16ptr = (U16 const*)ptr;
1859 U32 const maxSymbolValue = MEM_read16(u16ptr + 1);
1860 return maxSymbolValue;
1861 }
1862
1863
1864 /**
1865 * Returns the cost in bits of encoding the distribution in count using ctable.
1866 * Returns an error if ctable cannot represent all the symbols in count.
1867 */
1868 static size_t ZSTD_fseBitCost(
1869 FSE_CTable const* ctable,
1870 unsigned const* count,
1871 unsigned const max)
1872 {
1873 unsigned const kAccuracyLog = 8;
1874 size_t cost = 0;
1875 unsigned s;
1876 FSE_CState_t cstate;
1877 FSE_initCState(&cstate, ctable);
1878 if (ZSTD_getFSEMaxSymbolValue(ctable) < max) {
1879 DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u",
1880 ZSTD_getFSEMaxSymbolValue(ctable), max);
1881 return ERROR(GENERIC);
1882 }
1883 for (s = 0; s <= max; ++s) {
1884 unsigned const tableLog = cstate.stateLog;
1885 unsigned const badCost = (tableLog + 1) << kAccuracyLog;
1886 unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog);
1887 if (count[s] == 0)
1888 continue;
1889 if (bitCost >= badCost) {
1890 DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s);
1891 return ERROR(GENERIC);
1892 }
1893 cost += count[s] * bitCost;
1894 }
1895 return cost >> kAccuracyLog;
1896 }
1897
1898 /**
1899 * Returns the cost in bytes of encoding the normalized count header.
1900 * Returns an error if any of the helper functions return an error.
1901 */
1902 static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max,
1903 size_t const nbSeq, unsigned const FSELog)
1904 {
1905 BYTE wksp[FSE_NCOUNTBOUND];
1906 S16 norm[MaxSeq + 1];
1907 const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max);
1908 CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq, max));
1909 return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog);
1910 }
1911
1912
1467 typedef enum {
1913 typedef enum {
1468 ZSTD_defaultDisallowed = 0,
1914 ZSTD_defaultDisallowed = 0,
1469 ZSTD_defaultAllowed = 1
1915 ZSTD_defaultAllowed = 1
1470 } ZSTD_defaultPolicy_e;
1916 } ZSTD_defaultPolicy_e;
1471
1917
1472 MEM_STATIC
1918 MEM_STATIC symbolEncodingType_e
1473 symbolEncodingType_e ZSTD_selectEncodingType(
1919 ZSTD_selectEncodingType(
1474 FSE_repeat* repeatMode, size_t const mostFrequent, size_t nbSeq,
1920 FSE_repeat* repeatMode, unsigned const* count, unsigned const max,
1475 U32 defaultNormLog, ZSTD_defaultPolicy_e const isDefaultAllowed)
1921 size_t const mostFrequent, size_t nbSeq, unsigned const FSELog,
1922 FSE_CTable const* prevCTable,
1923 short const* defaultNorm, U32 defaultNormLog,
1924 ZSTD_defaultPolicy_e const isDefaultAllowed,
1925 ZSTD_strategy const strategy)
1476 {
1926 {
1477 #define MIN_SEQ_FOR_DYNAMIC_FSE 64
1478 #define MAX_SEQ_FOR_STATIC_FSE 1000
1479 ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
1927 ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0);
1480 if ((mostFrequent == nbSeq) && (!isDefaultAllowed || nbSeq > 2)) {
1928 if (mostFrequent == nbSeq) {
1929 *repeatMode = FSE_repeat_none;
1930 if (isDefaultAllowed && nbSeq <= 2) {
1931 /* Prefer set_basic over set_rle when there are 2 or less symbols,
1932 * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
1933 * If basic encoding isn't possible, always choose RLE.
1934 */
1935 DEBUGLOG(5, "Selected set_basic");
1936 return set_basic;
1937 }
1481 DEBUGLOG(5, "Selected set_rle");
1938 DEBUGLOG(5, "Selected set_rle");
1482 /* Prefer set_basic over set_rle when there are 2 or less symbols,
1483 * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol.
1484 * If basic encoding isn't possible, always choose RLE.
1485 */
1486 *repeatMode = FSE_repeat_check;
1487 return set_rle;
1939 return set_rle;
1488 }
1940 }
1489 if ( isDefaultAllowed
1941 if (strategy < ZSTD_lazy) {
1490 && (*repeatMode == FSE_repeat_valid) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
1942 if (isDefaultAllowed) {
1491 DEBUGLOG(5, "Selected set_repeat");
1943 size_t const staticFse_nbSeq_max = 1000;
1492 return set_repeat;
1944 size_t const mult = 10 - strategy;
1493 }
1945 size_t const baseLog = 3;
1494 if ( isDefaultAllowed
1946 size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */
1495 && ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (defaultNormLog-1)))) ) {
1947 assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */
1496 DEBUGLOG(5, "Selected set_basic");
1948 assert(mult <= 9 && mult >= 7);
1497 /* The format allows default tables to be repeated, but it isn't useful.
1949 if ( (*repeatMode == FSE_repeat_valid)
1498 * When using simple heuristics to select encoding type, we don't want
1950 && (nbSeq < staticFse_nbSeq_max) ) {
1499 * to confuse these tables with dictionaries. When running more careful
1951 DEBUGLOG(5, "Selected set_repeat");
1500 * analysis, we don't need to waste time checking both repeating tables
1952 return set_repeat;
1501 * and default tables.
1953 }
1502 */
1954 if ( (nbSeq < dynamicFse_nbSeq_min)
1503 *repeatMode = FSE_repeat_none;
1955 || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) {
1504 return set_basic;
1956 DEBUGLOG(5, "Selected set_basic");
1957 /* The format allows default tables to be repeated, but it isn't useful.
1958 * When using simple heuristics to select encoding type, we don't want
1959 * to confuse these tables with dictionaries. When running more careful
1960 * analysis, we don't need to waste time checking both repeating tables
1961 * and default tables.
1962 */
1963 *repeatMode = FSE_repeat_none;
1964 return set_basic;
1965 }
1966 }
1967 } else {
1968 size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC);
1969 size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC);
1970 size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog);
1971 size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq);
1972
1973 if (isDefaultAllowed) {
1974 assert(!ZSTD_isError(basicCost));
1975 assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost)));
1976 }
1977 assert(!ZSTD_isError(NCountCost));
1978 assert(compressedCost < ERROR(maxCode));
1979 DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u",
1980 (U32)basicCost, (U32)repeatCost, (U32)compressedCost);
1981 if (basicCost <= repeatCost && basicCost <= compressedCost) {
1982 DEBUGLOG(5, "Selected set_basic");
1983 assert(isDefaultAllowed);
1984 *repeatMode = FSE_repeat_none;
1985 return set_basic;
1986 }
1987 if (repeatCost <= compressedCost) {
1988 DEBUGLOG(5, "Selected set_repeat");
1989 assert(!ZSTD_isError(repeatCost));
1990 return set_repeat;
1991 }
1992 assert(compressedCost < basicCost && compressedCost < repeatCost);
1505 }
1993 }
1506 DEBUGLOG(5, "Selected set_compressed");
1994 DEBUGLOG(5, "Selected set_compressed");
1507 *repeatMode = FSE_repeat_check;
1995 *repeatMode = FSE_repeat_check;
1508 return set_compressed;
1996 return set_compressed;
1509 }
1997 }
1510
1998
1511 MEM_STATIC
1999 MEM_STATIC size_t
1512 size_t ZSTD_buildCTable(void* dst, size_t dstCapacity,
2000 ZSTD_buildCTable(void* dst, size_t dstCapacity,
1513 FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
2001 FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type,
1514 U32* count, U32 max,
2002 U32* count, U32 max,
1515 BYTE const* codeTable, size_t nbSeq,
2003 const BYTE* codeTable, size_t nbSeq,
1516 S16 const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
2004 const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax,
1517 FSE_CTable const* prevCTable, size_t prevCTableSize,
2005 const FSE_CTable* prevCTable, size_t prevCTableSize,
1518 void* workspace, size_t workspaceSize)
2006 void* workspace, size_t workspaceSize)
1519 {
2007 {
1520 BYTE* op = (BYTE*)dst;
2008 BYTE* op = (BYTE*)dst;
1521 BYTE const* const oend = op + dstCapacity;
2009 const BYTE* const oend = op + dstCapacity;
1522
2010
1523 switch (type) {
2011 switch (type) {
1524 case set_rle:
2012 case set_rle:
@@ -1674,7 +2162,7 b' ZSTD_encodeSequences_bmi2('
1674
2162
1675 #endif
2163 #endif
1676
2164
1677 size_t ZSTD_encodeSequences(
2165 static size_t ZSTD_encodeSequences(
1678 void* dst, size_t dstCapacity,
2166 void* dst, size_t dstCapacity,
1679 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
2167 FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
1680 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
2168 FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable,
@@ -1706,10 +2194,11 b' MEM_STATIC size_t ZSTD_compressSequences'
1706 const int bmi2)
2194 const int bmi2)
1707 {
2195 {
1708 const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
2196 const int longOffsets = cctxParams->cParams.windowLog > STREAM_ACCUMULATOR_MIN;
2197 ZSTD_strategy const strategy = cctxParams->cParams.strategy;
1709 U32 count[MaxSeq+1];
2198 U32 count[MaxSeq+1];
1710 FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
2199 FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
1711 FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
2200 FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
1712 FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
2201 FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
1713 U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
2202 U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
1714 const seqDef* const sequences = seqStorePtr->sequencesStart;
2203 const seqDef* const sequences = seqStorePtr->sequencesStart;
1715 const BYTE* const ofCodeTable = seqStorePtr->ofCode;
2204 const BYTE* const ofCodeTable = seqStorePtr->ofCode;
@@ -1720,15 +2209,17 b' MEM_STATIC size_t ZSTD_compressSequences'
1720 BYTE* op = ostart;
2209 BYTE* op = ostart;
1721 size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
2210 size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
1722 BYTE* seqHead;
2211 BYTE* seqHead;
2212 BYTE* lastNCount = NULL;
1723
2213
1724 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
2214 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
1725
2215
1726 /* Compress literals */
2216 /* Compress literals */
1727 { const BYTE* const literals = seqStorePtr->litStart;
2217 { const BYTE* const literals = seqStorePtr->litStart;
1728 size_t const litSize = seqStorePtr->lit - literals;
2218 size_t const litSize = seqStorePtr->lit - literals;
2219 int const disableLiteralCompression = (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
1729 size_t const cSize = ZSTD_compressLiterals(
2220 size_t const cSize = ZSTD_compressLiterals(
1730 prevEntropy, nextEntropy,
2221 &prevEntropy->huf, &nextEntropy->huf,
1731 cctxParams->cParams.strategy, cctxParams->disableLiteralCompression,
2222 cctxParams->cParams.strategy, disableLiteralCompression,
1732 op, dstCapacity,
2223 op, dstCapacity,
1733 literals, litSize,
2224 literals, litSize,
1734 workspace, bmi2);
2225 workspace, bmi2);
@@ -1747,13 +2238,9 b' MEM_STATIC size_t ZSTD_compressSequences'
1747 else
2238 else
1748 op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
2239 op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
1749 if (nbSeq==0) {
2240 if (nbSeq==0) {
1750 memcpy(nextEntropy->litlengthCTable, prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable));
2241 /* Copy the old tables over as if we repeated them */
1751 nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
2242 memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
1752 memcpy(nextEntropy->offcodeCTable, prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable));
2243 return op - ostart;
1753 nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
1754 memcpy(nextEntropy->matchlengthCTable, prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable));
1755 nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
1756 return op - ostart;
1757 }
2244 }
1758
2245
1759 /* seqHead : flags for FSE encoding type */
2246 /* seqHead : flags for FSE encoding type */
@@ -1763,43 +2250,53 b' MEM_STATIC size_t ZSTD_compressSequences'
1763 ZSTD_seqToCodes(seqStorePtr);
2250 ZSTD_seqToCodes(seqStorePtr);
1764 /* build CTable for Literal Lengths */
2251 /* build CTable for Literal Lengths */
1765 { U32 max = MaxLL;
2252 { U32 max = MaxLL;
1766 size_t const mostFrequent = FSE_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace);
2253 size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, workspace); /* can't fail */
1767 DEBUGLOG(5, "Building LL table");
2254 DEBUGLOG(5, "Building LL table");
1768 nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
2255 nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
1769 LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode, mostFrequent, nbSeq, LL_defaultNormLog, ZSTD_defaultAllowed);
2256 LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode, count, max, mostFrequent, nbSeq, LLFSELog, prevEntropy->fse.litlengthCTable, LL_defaultNorm, LL_defaultNormLog, ZSTD_defaultAllowed, strategy);
2257 assert(set_basic < set_compressed && set_rle < set_compressed);
2258 assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
1770 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
2259 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
1771 count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
2260 count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
1772 prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
2261 prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable),
1773 workspace, HUF_WORKSPACE_SIZE);
2262 workspace, HUF_WORKSPACE_SIZE);
1774 if (ZSTD_isError(countSize)) return countSize;
2263 if (ZSTD_isError(countSize)) return countSize;
2264 if (LLtype == set_compressed)
2265 lastNCount = op;
1775 op += countSize;
2266 op += countSize;
1776 } }
2267 } }
1777 /* build CTable for Offsets */
2268 /* build CTable for Offsets */
1778 { U32 max = MaxOff;
2269 { U32 max = MaxOff;
1779 size_t const mostFrequent = FSE_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace);
2270 size_t const mostFrequent = HIST_countFast_wksp(count, &max, ofCodeTable, nbSeq, workspace); /* can't fail */
1780 /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
2271 /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
1781 ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
2272 ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
1782 DEBUGLOG(5, "Building OF table");
2273 DEBUGLOG(5, "Building OF table");
1783 nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
2274 nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode;
1784 Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode, mostFrequent, nbSeq, OF_defaultNormLog, defaultPolicy);
2275 Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode, count, max, mostFrequent, nbSeq, OffFSELog, prevEntropy->fse.offcodeCTable, OF_defaultNorm, OF_defaultNormLog, defaultPolicy, strategy);
2276 assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
1785 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
2277 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
1786 count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
2278 count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
1787 prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
2279 prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable),
1788 workspace, HUF_WORKSPACE_SIZE);
2280 workspace, HUF_WORKSPACE_SIZE);
1789 if (ZSTD_isError(countSize)) return countSize;
2281 if (ZSTD_isError(countSize)) return countSize;
2282 if (Offtype == set_compressed)
2283 lastNCount = op;
1790 op += countSize;
2284 op += countSize;
1791 } }
2285 } }
1792 /* build CTable for MatchLengths */
2286 /* build CTable for MatchLengths */
1793 { U32 max = MaxML;
2287 { U32 max = MaxML;
1794 size_t const mostFrequent = FSE_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace);
2288 size_t const mostFrequent = HIST_countFast_wksp(count, &max, mlCodeTable, nbSeq, workspace); /* can't fail */
1795 DEBUGLOG(5, "Building ML table");
2289 DEBUGLOG(5, "Building ML table");
1796 nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
2290 nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
1797 MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode, mostFrequent, nbSeq, ML_defaultNormLog, ZSTD_defaultAllowed);
2291 MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode, count, max, mostFrequent, nbSeq, MLFSELog, prevEntropy->fse.matchlengthCTable, ML_defaultNorm, ML_defaultNormLog, ZSTD_defaultAllowed, strategy);
2292 assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
1798 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
2293 { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
1799 count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
2294 count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
1800 prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
2295 prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable),
1801 workspace, HUF_WORKSPACE_SIZE);
2296 workspace, HUF_WORKSPACE_SIZE);
1802 if (ZSTD_isError(countSize)) return countSize;
2297 if (ZSTD_isError(countSize)) return countSize;
2298 if (MLtype == set_compressed)
2299 lastNCount = op;
1803 op += countSize;
2300 op += countSize;
1804 } }
2301 } }
1805
2302
@@ -1814,21 +2311,37 b' MEM_STATIC size_t ZSTD_compressSequences'
1814 longOffsets, bmi2);
2311 longOffsets, bmi2);
1815 if (ZSTD_isError(bitstreamSize)) return bitstreamSize;
2312 if (ZSTD_isError(bitstreamSize)) return bitstreamSize;
1816 op += bitstreamSize;
2313 op += bitstreamSize;
2314 /* zstd versions <= 1.3.4 mistakenly report corruption when
2315 * FSE_readNCount() recieves a buffer < 4 bytes.
2316 * Fixed by https://github.com/facebook/zstd/pull/1146.
2317 * This can happen when the last set_compressed table present is 2
2318 * bytes and the bitstream is only one byte.
2319 * In this exceedingly rare case, we will simply emit an uncompressed
2320 * block, since it isn't worth optimizing.
2321 */
2322 if (lastNCount && (op - lastNCount) < 4) {
2323 /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
2324 assert(op - lastNCount == 3);
2325 DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
2326 "emitting an uncompressed block.");
2327 return 0;
2328 }
1817 }
2329 }
1818
2330
1819 return op - ostart;
2331 return op - ostart;
1820 }
2332 }
1821
2333
1822 MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr,
2334 MEM_STATIC size_t ZSTD_compressSequences(seqStore_t* seqStorePtr,
1823 ZSTD_entropyCTables_t const* prevEntropy,
2335 const ZSTD_entropyCTables_t* prevEntropy,
1824 ZSTD_entropyCTables_t* nextEntropy,
2336 ZSTD_entropyCTables_t* nextEntropy,
1825 ZSTD_CCtx_params const* cctxParams,
2337 const ZSTD_CCtx_params* cctxParams,
1826 void* dst, size_t dstCapacity,
2338 void* dst, size_t dstCapacity,
1827 size_t srcSize, U32* workspace, int bmi2)
2339 size_t srcSize, U32* workspace, int bmi2)
1828 {
2340 {
1829 size_t const cSize = ZSTD_compressSequences_internal(
2341 size_t const cSize = ZSTD_compressSequences_internal(
1830 seqStorePtr, prevEntropy, nextEntropy, cctxParams, dst, dstCapacity,
2342 seqStorePtr, prevEntropy, nextEntropy, cctxParams, dst, dstCapacity,
1831 workspace, bmi2);
2343 workspace, bmi2);
2344 if (cSize == 0) return 0;
1832 /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
2345 /* When srcSize <= dstCapacity, there is enough space to write a raw uncompressed block.
1833 * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
2346 * Since we ran out of space, block must be not compressible, so fall back to raw uncompressed block.
1834 */
2347 */
@@ -1837,40 +2350,55 b' MEM_STATIC size_t ZSTD_compressSequences'
1837 if (ZSTD_isError(cSize)) return cSize;
2350 if (ZSTD_isError(cSize)) return cSize;
1838
2351
1839 /* Check compressibility */
2352 /* Check compressibility */
1840 { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize); /* note : fixed formula, maybe should depend on compression level, or strategy */
2353 { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
1841 if (cSize >= maxCSize) return 0; /* block not compressed */
2354 if (cSize >= maxCSize) return 0; /* block not compressed */
1842 }
2355 }
1843
2356
1844 /* We check that dictionaries have offset codes available for the first
1845 * block. After the first block, the offcode table might not have large
1846 * enough codes to represent the offsets in the data.
1847 */
1848 if (nextEntropy->offcode_repeatMode == FSE_repeat_valid)
1849 nextEntropy->offcode_repeatMode = FSE_repeat_check;
1850
1851 return cSize;
2357 return cSize;
1852 }
2358 }
1853
2359
1854 /* ZSTD_selectBlockCompressor() :
2360 /* ZSTD_selectBlockCompressor() :
1855 * Not static, but internal use only (used by long distance matcher)
2361 * Not static, but internal use only (used by long distance matcher)
1856 * assumption : strat is a valid strategy */
2362 * assumption : strat is a valid strategy */
1857 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
2363 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
1858 {
2364 {
1859 static const ZSTD_blockCompressor blockCompressor[2][(unsigned)ZSTD_btultra+1] = {
2365 static const ZSTD_blockCompressor blockCompressor[3][(unsigned)ZSTD_btultra+1] = {
1860 { ZSTD_compressBlock_fast /* default for 0 */,
2366 { ZSTD_compressBlock_fast /* default for 0 */,
1861 ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy,
2367 ZSTD_compressBlock_fast,
1862 ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2,
2368 ZSTD_compressBlock_doubleFast,
1863 ZSTD_compressBlock_btopt, ZSTD_compressBlock_btultra },
2369 ZSTD_compressBlock_greedy,
2370 ZSTD_compressBlock_lazy,
2371 ZSTD_compressBlock_lazy2,
2372 ZSTD_compressBlock_btlazy2,
2373 ZSTD_compressBlock_btopt,
2374 ZSTD_compressBlock_btultra },
1864 { ZSTD_compressBlock_fast_extDict /* default for 0 */,
2375 { ZSTD_compressBlock_fast_extDict /* default for 0 */,
1865 ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict,
2376 ZSTD_compressBlock_fast_extDict,
1866 ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict,
2377 ZSTD_compressBlock_doubleFast_extDict,
1867 ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btultra_extDict }
2378 ZSTD_compressBlock_greedy_extDict,
2379 ZSTD_compressBlock_lazy_extDict,
2380 ZSTD_compressBlock_lazy2_extDict,
2381 ZSTD_compressBlock_btlazy2_extDict,
2382 ZSTD_compressBlock_btopt_extDict,
2383 ZSTD_compressBlock_btultra_extDict },
2384 { ZSTD_compressBlock_fast_dictMatchState /* default for 0 */,
2385 ZSTD_compressBlock_fast_dictMatchState,
2386 ZSTD_compressBlock_doubleFast_dictMatchState,
2387 ZSTD_compressBlock_greedy_dictMatchState,
2388 ZSTD_compressBlock_lazy_dictMatchState,
2389 ZSTD_compressBlock_lazy2_dictMatchState,
2390 ZSTD_compressBlock_btlazy2_dictMatchState,
2391 ZSTD_compressBlock_btopt_dictMatchState,
2392 ZSTD_compressBlock_btultra_dictMatchState }
1868 };
2393 };
2394 ZSTD_blockCompressor selectedCompressor;
1869 ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
2395 ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
1870
2396
1871 assert((U32)strat >= (U32)ZSTD_fast);
2397 assert((U32)strat >= (U32)ZSTD_fast);
1872 assert((U32)strat <= (U32)ZSTD_btultra);
2398 assert((U32)strat <= (U32)ZSTD_btultra);
1873 return blockCompressor[extDict!=0][(U32)strat];
2399 selectedCompressor = blockCompressor[(int)dictMode][(U32)strat];
2400 assert(selectedCompressor != NULL);
2401 return selectedCompressor;
1874 }
2402 }
1875
2403
1876 static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,
2404 static void ZSTD_storeLastLiterals(seqStore_t* seqStorePtr,
@@ -1880,7 +2408,7 b' static void ZSTD_storeLastLiterals(seqSt'
1880 seqStorePtr->lit += lastLLSize;
2408 seqStorePtr->lit += lastLLSize;
1881 }
2409 }
1882
2410
1883 static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
2411 void ZSTD_resetSeqStore(seqStore_t* ssPtr)
1884 {
2412 {
1885 ssPtr->lit = ssPtr->litStart;
2413 ssPtr->lit = ssPtr->litStart;
1886 ssPtr->sequences = ssPtr->sequencesStart;
2414 ssPtr->sequences = ssPtr->sequencesStart;
@@ -1892,24 +2420,38 b' static size_t ZSTD_compressBlock_interna'
1892 const void* src, size_t srcSize)
2420 const void* src, size_t srcSize)
1893 {
2421 {
1894 ZSTD_matchState_t* const ms = &zc->blockState.matchState;
2422 ZSTD_matchState_t* const ms = &zc->blockState.matchState;
1895 DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
2423 size_t cSize;
1896 (U32)dstCapacity, ms->window.dictLimit, ms->nextToUpdate);
2424 DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%zu, dictLimit=%u, nextToUpdate=%u)",
2425 dstCapacity, ms->window.dictLimit, ms->nextToUpdate);
2426 assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
2427
2428 /* Assert that we have correctly flushed the ctx params into the ms's copy */
2429 ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams);
2430
1897 if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
2431 if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) {
1898 ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.searchLength);
2432 ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.searchLength);
1899 return 0; /* don't even attempt compression below a certain srcSize */
2433 cSize = 0;
2434 goto out; /* don't even attempt compression below a certain srcSize */
1900 }
2435 }
1901 ZSTD_resetSeqStore(&(zc->seqStore));
2436 ZSTD_resetSeqStore(&(zc->seqStore));
2437 ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; /* required for optimal parser to read stats from dictionary */
2438
2439 /* a gap between an attached dict and the current window is not safe,
2440 * they must remain adjacent, and when that stops being the case, the dict
2441 * must be unset */
2442 assert(ms->dictMatchState == NULL || ms->loadedDictEnd == ms->window.dictLimit);
1902
2443
1903 /* limited update after a very long match */
2444 /* limited update after a very long match */
1904 { const BYTE* const base = ms->window.base;
2445 { const BYTE* const base = ms->window.base;
1905 const BYTE* const istart = (const BYTE*)src;
2446 const BYTE* const istart = (const BYTE*)src;
1906 const U32 current = (U32)(istart-base);
2447 const U32 current = (U32)(istart-base);
2448 if (sizeof(ptrdiff_t)==8) assert(istart - base < (ptrdiff_t)(U32)(-1)); /* ensure no overflow */
1907 if (current > ms->nextToUpdate + 384)
2449 if (current > ms->nextToUpdate + 384)
1908 ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384));
2450 ms->nextToUpdate = current - MIN(192, (U32)(current - ms->nextToUpdate - 384));
1909 }
2451 }
1910
2452
1911 /* select and store sequences */
2453 /* select and store sequences */
1912 { U32 const extDict = ZSTD_window_hasExtDict(ms->window);
2454 { ZSTD_dictMode_e const dictMode = ZSTD_matchState_dictMode(ms);
1913 size_t lastLLSize;
2455 size_t lastLLSize;
1914 { int i;
2456 { int i;
1915 for (i = 0; i < ZSTD_REP_NUM; ++i)
2457 for (i = 0; i < ZSTD_REP_NUM; ++i)
@@ -1922,8 +2464,7 b' static size_t ZSTD_compressBlock_interna'
1922 ZSTD_ldm_blockCompress(&zc->externSeqStore,
2464 ZSTD_ldm_blockCompress(&zc->externSeqStore,
1923 ms, &zc->seqStore,
2465 ms, &zc->seqStore,
1924 zc->blockState.nextCBlock->rep,
2466 zc->blockState.nextCBlock->rep,
1925 &zc->appliedParams.cParams,
2467 src, srcSize);
1926 src, srcSize, extDict);
1927 assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
2468 assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
1928 } else if (zc->appliedParams.ldmParams.enableLdm) {
2469 } else if (zc->appliedParams.ldmParams.enableLdm) {
1929 rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0};
2470 rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0};
@@ -1939,31 +2480,38 b' static size_t ZSTD_compressBlock_interna'
1939 ZSTD_ldm_blockCompress(&ldmSeqStore,
2480 ZSTD_ldm_blockCompress(&ldmSeqStore,
1940 ms, &zc->seqStore,
2481 ms, &zc->seqStore,
1941 zc->blockState.nextCBlock->rep,
2482 zc->blockState.nextCBlock->rep,
1942 &zc->appliedParams.cParams,
2483 src, srcSize);
1943 src, srcSize, extDict);
1944 assert(ldmSeqStore.pos == ldmSeqStore.size);
2484 assert(ldmSeqStore.pos == ldmSeqStore.size);
1945 } else { /* not long range mode */
2485 } else { /* not long range mode */
1946 ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict);
2486 ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode);
1947 lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize);
2487 lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
1948 }
2488 }
1949 { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
2489 { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize;
1950 ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
2490 ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize);
1951 } }
2491 } }
1952
2492
1953 /* encode sequences and literals */
2493 /* encode sequences and literals */
1954 { size_t const cSize = ZSTD_compressSequences(&zc->seqStore,
2494 cSize = ZSTD_compressSequences(&zc->seqStore,
1955 &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
2495 &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
1956 &zc->appliedParams,
2496 &zc->appliedParams,
1957 dst, dstCapacity,
2497 dst, dstCapacity,
1958 srcSize, zc->entropyWorkspace, zc->bmi2);
2498 srcSize, zc->entropyWorkspace, zc->bmi2);
1959 if (ZSTD_isError(cSize) || cSize == 0) return cSize;
2499
1960 /* confirm repcodes and entropy tables */
2500 out:
1961 { ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
2501 if (!ZSTD_isError(cSize) && cSize != 0) {
1962 zc->blockState.prevCBlock = zc->blockState.nextCBlock;
2502 /* confirm repcodes and entropy tables when emitting a compressed block */
1963 zc->blockState.nextCBlock = tmp;
2503 ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
1964 }
2504 zc->blockState.prevCBlock = zc->blockState.nextCBlock;
1965 return cSize;
2505 zc->blockState.nextCBlock = tmp;
1966 }
2506 }
2507 /* We check that dictionaries have offset codes available for the first
2508 * block. After the first block, the offcode table might not have large
2509 * enough codes to represent the offsets in the data.
2510 */
2511 if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
2512 zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
2513
2514 return cSize;
1967 }
2515 }
1968
2516
1969
2517
@@ -2005,13 +2553,13 b' static size_t ZSTD_compress_frameChunk ('
2005 ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
2553 ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
2006 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
2554 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
2007 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
2555 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
2008
2009 ZSTD_reduceIndex(cctx, correction);
2556 ZSTD_reduceIndex(cctx, correction);
2010 if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
2557 if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
2011 else ms->nextToUpdate -= correction;
2558 else ms->nextToUpdate -= correction;
2012 ms->loadedDictEnd = 0;
2559 ms->loadedDictEnd = 0;
2560 ms->dictMatchState = NULL;
2013 }
2561 }
2014 ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd);
2562 ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
2015 if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
2563 if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
2016
2564
2017 { size_t cSize = ZSTD_compressBlock_internal(cctx,
2565 { size_t cSize = ZSTD_compressBlock_internal(cctx,
@@ -2020,11 +2568,8 b' static size_t ZSTD_compress_frameChunk ('
2020 if (ZSTD_isError(cSize)) return cSize;
2568 if (ZSTD_isError(cSize)) return cSize;
2021
2569
2022 if (cSize == 0) { /* block is not compressible */
2570 if (cSize == 0) { /* block is not compressible */
2023 U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(blockSize << 3);
2571 cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock);
2024 if (blockSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
2572 if (ZSTD_isError(cSize)) return cSize;
2025 MEM_writeLE32(op, cBlockHeader24); /* 4th byte will be overwritten */
2026 memcpy(op + ZSTD_blockHeaderSize, ip, blockSize);
2027 cSize = ZSTD_blockHeaderSize + blockSize;
2028 } else {
2573 } else {
2029 U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
2574 U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
2030 MEM_writeLE24(op, cBlockHeader24);
2575 MEM_writeLE24(op, cBlockHeader24);
@@ -2060,6 +2605,7 b' static size_t ZSTD_writeFrameHeader(void'
2060 BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
2605 BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
2061 size_t pos=0;
2606 size_t pos=0;
2062
2607
2608 assert(!(params.fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN));
2063 if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall);
2609 if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall);
2064 DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
2610 DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u",
2065 !params.fParams.noDictIDFlag, dictID, dictIDSizeCode);
2611 !params.fParams.noDictIDFlag, dictID, dictIDSizeCode);
@@ -2122,7 +2668,7 b' static size_t ZSTD_compressContinue_inte'
2122 const void* src, size_t srcSize,
2668 const void* src, size_t srcSize,
2123 U32 frame, U32 lastFrameChunk)
2669 U32 frame, U32 lastFrameChunk)
2124 {
2670 {
2125 ZSTD_matchState_t* ms = &cctx->blockState.matchState;
2671 ZSTD_matchState_t* const ms = &cctx->blockState.matchState;
2126 size_t fhSize = 0;
2672 size_t fhSize = 0;
2127
2673
2128 DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u",
2674 DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u",
@@ -2143,8 +2689,25 b' static size_t ZSTD_compressContinue_inte'
2143 if (!ZSTD_window_update(&ms->window, src, srcSize)) {
2689 if (!ZSTD_window_update(&ms->window, src, srcSize)) {
2144 ms->nextToUpdate = ms->window.dictLimit;
2690 ms->nextToUpdate = ms->window.dictLimit;
2145 }
2691 }
2146 if (cctx->appliedParams.ldmParams.enableLdm)
2692 if (cctx->appliedParams.ldmParams.enableLdm) {
2147 ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
2693 ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
2694 }
2695
2696 if (!frame) {
2697 /* overflow check and correction for block mode */
2698 if (ZSTD_window_needOverflowCorrection(ms->window, (const char*)src + srcSize)) {
2699 U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy);
2700 U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, 1 << cctx->appliedParams.cParams.windowLog, src);
2701 ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
2702 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
2703 ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31);
2704 ZSTD_reduceIndex(cctx, correction);
2705 if (ms->nextToUpdate < correction) ms->nextToUpdate = 0;
2706 else ms->nextToUpdate -= correction;
2707 ms->loadedDictEnd = 0;
2708 ms->dictMatchState = NULL;
2709 }
2710 }
2148
2711
2149 DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (U32)cctx->blockSize);
2712 DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (U32)cctx->blockSize);
2150 { size_t const cSize = frame ?
2713 { size_t const cSize = frame ?
@@ -2153,7 +2716,9 b' static size_t ZSTD_compressContinue_inte'
2153 if (ZSTD_isError(cSize)) return cSize;
2716 if (ZSTD_isError(cSize)) return cSize;
2154 cctx->consumedSrcSize += srcSize;
2717 cctx->consumedSrcSize += srcSize;
2155 cctx->producedCSize += (cSize + fhSize);
2718 cctx->producedCSize += (cSize + fhSize);
2156 if (cctx->appliedParams.fParams.contentSizeFlag) { /* control src size */
2719 assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
2720 if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */
2721 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
2157 if (cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne) {
2722 if (cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne) {
2158 DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize >= %u",
2723 DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize >= %u",
2159 (U32)cctx->pledgedSrcSizePlusOne-1, (U32)cctx->consumedSrcSize);
2724 (U32)cctx->pledgedSrcSizePlusOne-1, (U32)cctx->consumedSrcSize);
@@ -2184,44 +2749,50 b' size_t ZSTD_compressBlock(ZSTD_CCtx* cct'
2184 {
2749 {
2185 size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
2750 size_t const blockSizeMax = ZSTD_getBlockSize(cctx);
2186 if (srcSize > blockSizeMax) return ERROR(srcSize_wrong);
2751 if (srcSize > blockSizeMax) return ERROR(srcSize_wrong);
2752
2187 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
2753 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */);
2188 }
2754 }
2189
2755
2190 /*! ZSTD_loadDictionaryContent() :
2756 /*! ZSTD_loadDictionaryContent() :
2191 * @return : 0, or an error code
2757 * @return : 0, or an error code
2192 */
2758 */
2193 static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const void* src, size_t srcSize)
2759 static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
2760 ZSTD_CCtx_params const* params,
2761 const void* src, size_t srcSize,
2762 ZSTD_dictTableLoadMethod_e dtlm)
2194 {
2763 {
2195 const BYTE* const ip = (const BYTE*) src;
2764 const BYTE* const ip = (const BYTE*) src;
2196 const BYTE* const iend = ip + srcSize;
2765 const BYTE* const iend = ip + srcSize;
2197 ZSTD_compressionParameters const* cParams = &params->cParams;
2198
2766
2199 ZSTD_window_update(&ms->window, src, srcSize);
2767 ZSTD_window_update(&ms->window, src, srcSize);
2200 ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
2768 ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
2201
2769
2770 /* Assert that we the ms params match the params we're being given */
2771 ZSTD_assertEqualCParams(params->cParams, ms->cParams);
2772
2202 if (srcSize <= HASH_READ_SIZE) return 0;
2773 if (srcSize <= HASH_READ_SIZE) return 0;
2203
2774
2204 switch(params->cParams.strategy)
2775 switch(params->cParams.strategy)
2205 {
2776 {
2206 case ZSTD_fast:
2777 case ZSTD_fast:
2207 ZSTD_fillHashTable(ms, cParams, iend);
2778 ZSTD_fillHashTable(ms, iend, dtlm);
2208 break;
2779 break;
2209 case ZSTD_dfast:
2780 case ZSTD_dfast:
2210 ZSTD_fillDoubleHashTable(ms, cParams, iend);
2781 ZSTD_fillDoubleHashTable(ms, iend, dtlm);
2211 break;
2782 break;
2212
2783
2213 case ZSTD_greedy:
2784 case ZSTD_greedy:
2214 case ZSTD_lazy:
2785 case ZSTD_lazy:
2215 case ZSTD_lazy2:
2786 case ZSTD_lazy2:
2216 if (srcSize >= HASH_READ_SIZE)
2787 if (srcSize >= HASH_READ_SIZE)
2217 ZSTD_insertAndFindFirstIndex(ms, cParams, iend-HASH_READ_SIZE);
2788 ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE);
2218 break;
2789 break;
2219
2790
2220 case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
2791 case ZSTD_btlazy2: /* we want the dictionary table fully sorted */
2221 case ZSTD_btopt:
2792 case ZSTD_btopt:
2222 case ZSTD_btultra:
2793 case ZSTD_btultra:
2223 if (srcSize >= HASH_READ_SIZE)
2794 if (srcSize >= HASH_READ_SIZE)
2224 ZSTD_updateTree(ms, cParams, iend-HASH_READ_SIZE, iend);
2795 ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
2225 break;
2796 break;
2226
2797
2227 default:
2798 default:
@@ -2256,7 +2827,12 b' static size_t ZSTD_checkDictNCount(short'
2256 * assumptions : magic number supposed already checked
2827 * assumptions : magic number supposed already checked
2257 * dictSize supposed > 8
2828 * dictSize supposed > 8
2258 */
2829 */
2259 static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const void* dict, size_t dictSize, void* workspace)
2830 static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
2831 ZSTD_matchState_t* ms,
2832 ZSTD_CCtx_params const* params,
2833 const void* dict, size_t dictSize,
2834 ZSTD_dictTableLoadMethod_e dtlm,
2835 void* workspace)
2260 {
2836 {
2261 const BYTE* dictPtr = (const BYTE*)dict;
2837 const BYTE* dictPtr = (const BYTE*)dict;
2262 const BYTE* const dictEnd = dictPtr + dictSize;
2838 const BYTE* const dictEnd = dictPtr + dictSize;
@@ -2265,13 +2841,15 b' static size_t ZSTD_loadZstdDictionary(ZS'
2265 size_t dictID;
2841 size_t dictID;
2266
2842
2267 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
2843 ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
2844 assert(dictSize > 8);
2845 assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
2268
2846
2269 dictPtr += 4; /* skip magic number */
2847 dictPtr += 4; /* skip magic number */
2270 dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr);
2848 dictID = params->fParams.noDictIDFlag ? 0 : MEM_readLE32(dictPtr);
2271 dictPtr += 4;
2849 dictPtr += 4;
2272
2850
2273 { unsigned maxSymbolValue = 255;
2851 { unsigned maxSymbolValue = 255;
2274 size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.hufCTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr);
2852 size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr);
2275 if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
2853 if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
2276 if (maxSymbolValue < 255) return ERROR(dictionary_corrupted);
2854 if (maxSymbolValue < 255) return ERROR(dictionary_corrupted);
2277 dictPtr += hufHeaderSize;
2855 dictPtr += hufHeaderSize;
@@ -2282,7 +2860,8 b' static size_t ZSTD_loadZstdDictionary(ZS'
2282 if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
2860 if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
2283 if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
2861 if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
2284 /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
2862 /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
2285 CHECK_E( FSE_buildCTable_wksp(bs->entropy.offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog, workspace, HUF_WORKSPACE_SIZE),
2863 /* fill all offset symbols to avoid garbage at end of table */
2864 CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.offcodeCTable, offcodeNCount, MaxOff, offcodeLog, workspace, HUF_WORKSPACE_SIZE),
2286 dictionary_corrupted);
2865 dictionary_corrupted);
2287 dictPtr += offcodeHeaderSize;
2866 dictPtr += offcodeHeaderSize;
2288 }
2867 }
@@ -2294,7 +2873,7 b' static size_t ZSTD_loadZstdDictionary(ZS'
2294 if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
2873 if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
2295 /* Every match length code must have non-zero probability */
2874 /* Every match length code must have non-zero probability */
2296 CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
2875 CHECK_F( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
2297 CHECK_E( FSE_buildCTable_wksp(bs->entropy.matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, workspace, HUF_WORKSPACE_SIZE),
2876 CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog, workspace, HUF_WORKSPACE_SIZE),
2298 dictionary_corrupted);
2877 dictionary_corrupted);
2299 dictPtr += matchlengthHeaderSize;
2878 dictPtr += matchlengthHeaderSize;
2300 }
2879 }
@@ -2306,7 +2885,7 b' static size_t ZSTD_loadZstdDictionary(ZS'
2306 if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
2885 if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
2307 /* Every literal length code must have non-zero probability */
2886 /* Every literal length code must have non-zero probability */
2308 CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
2887 CHECK_F( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
2309 CHECK_E( FSE_buildCTable_wksp(bs->entropy.litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, workspace, HUF_WORKSPACE_SIZE),
2888 CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog, workspace, HUF_WORKSPACE_SIZE),
2310 dictionary_corrupted);
2889 dictionary_corrupted);
2311 dictPtr += litlengthHeaderSize;
2890 dictPtr += litlengthHeaderSize;
2312 }
2891 }
@@ -2332,22 +2911,25 b' static size_t ZSTD_loadZstdDictionary(ZS'
2332 if (bs->rep[u] > dictContentSize) return ERROR(dictionary_corrupted);
2911 if (bs->rep[u] > dictContentSize) return ERROR(dictionary_corrupted);
2333 } }
2912 } }
2334
2913
2335 bs->entropy.hufCTable_repeatMode = HUF_repeat_valid;
2914 bs->entropy.huf.repeatMode = HUF_repeat_valid;
2336 bs->entropy.offcode_repeatMode = FSE_repeat_valid;
2915 bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid;
2337 bs->entropy.matchlength_repeatMode = FSE_repeat_valid;
2916 bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid;
2338 bs->entropy.litlength_repeatMode = FSE_repeat_valid;
2917 bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid;
2339 CHECK_F(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize));
2918 CHECK_F(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize, dtlm));
2340 return dictID;
2919 return dictID;
2341 }
2920 }
2342 }
2921 }
2343
2922
2344 /** ZSTD_compress_insertDictionary() :
2923 /** ZSTD_compress_insertDictionary() :
2345 * @return : dictID, or an error code */
2924 * @return : dictID, or an error code */
2346 static size_t ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs, ZSTD_matchState_t* ms,
2925 static size_t
2347 ZSTD_CCtx_params const* params,
2926 ZSTD_compress_insertDictionary(ZSTD_compressedBlockState_t* bs,
2348 const void* dict, size_t dictSize,
2927 ZSTD_matchState_t* ms,
2349 ZSTD_dictContentType_e dictContentType,
2928 const ZSTD_CCtx_params* params,
2350 void* workspace)
2929 const void* dict, size_t dictSize,
2930 ZSTD_dictContentType_e dictContentType,
2931 ZSTD_dictTableLoadMethod_e dtlm,
2932 void* workspace)
2351 {
2933 {
2352 DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
2934 DEBUGLOG(4, "ZSTD_compress_insertDictionary (dictSize=%u)", (U32)dictSize);
2353 if ((dict==NULL) || (dictSize<=8)) return 0;
2935 if ((dict==NULL) || (dictSize<=8)) return 0;
@@ -2356,12 +2938,12 b' static size_t ZSTD_compress_insertDictio'
2356
2938
2357 /* dict restricted modes */
2939 /* dict restricted modes */
2358 if (dictContentType == ZSTD_dct_rawContent)
2940 if (dictContentType == ZSTD_dct_rawContent)
2359 return ZSTD_loadDictionaryContent(ms, params, dict, dictSize);
2941 return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm);
2360
2942
2361 if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
2943 if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) {
2362 if (dictContentType == ZSTD_dct_auto) {
2944 if (dictContentType == ZSTD_dct_auto) {
2363 DEBUGLOG(4, "raw content dictionary detected");
2945 DEBUGLOG(4, "raw content dictionary detected");
2364 return ZSTD_loadDictionaryContent(ms, params, dict, dictSize);
2946 return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm);
2365 }
2947 }
2366 if (dictContentType == ZSTD_dct_fullDict)
2948 if (dictContentType == ZSTD_dct_fullDict)
2367 return ERROR(dictionary_wrong);
2949 return ERROR(dictionary_wrong);
@@ -2369,17 +2951,18 b' static size_t ZSTD_compress_insertDictio'
2369 }
2951 }
2370
2952
2371 /* dict as full zstd dictionary */
2953 /* dict as full zstd dictionary */
2372 return ZSTD_loadZstdDictionary(bs, ms, params, dict, dictSize, workspace);
2954 return ZSTD_loadZstdDictionary(bs, ms, params, dict, dictSize, dtlm, workspace);
2373 }
2955 }
2374
2956
2375 /*! ZSTD_compressBegin_internal() :
2957 /*! ZSTD_compressBegin_internal() :
2376 * @return : 0, or an error code */
2958 * @return : 0, or an error code */
2377 size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
2959 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
2378 const void* dict, size_t dictSize,
2960 const void* dict, size_t dictSize,
2379 ZSTD_dictContentType_e dictContentType,
2961 ZSTD_dictContentType_e dictContentType,
2380 const ZSTD_CDict* cdict,
2962 ZSTD_dictTableLoadMethod_e dtlm,
2381 ZSTD_CCtx_params params, U64 pledgedSrcSize,
2963 const ZSTD_CDict* cdict,
2382 ZSTD_buffered_policy_e zbuff)
2964 ZSTD_CCtx_params params, U64 pledgedSrcSize,
2965 ZSTD_buffered_policy_e zbuff)
2383 {
2966 {
2384 DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params.cParams.windowLog);
2967 DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params.cParams.windowLog);
2385 /* params are supposed to be fully validated at this point */
2968 /* params are supposed to be fully validated at this point */
@@ -2387,9 +2970,7 b' size_t ZSTD_compressBegin_internal(ZSTD_'
2387 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
2970 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
2388
2971
2389 if (cdict && cdict->dictContentSize>0) {
2972 if (cdict && cdict->dictContentSize>0) {
2390 cctx->requestedParams = params;
2973 return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);
2391 return ZSTD_resetCCtx_usingCDict(cctx, cdict, params.cParams.windowLog,
2392 params.fParams, pledgedSrcSize, zbuff);
2393 }
2974 }
2394
2975
2395 CHECK_F( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
2976 CHECK_F( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
@@ -2397,7 +2978,7 b' size_t ZSTD_compressBegin_internal(ZSTD_'
2397 {
2978 {
2398 size_t const dictID = ZSTD_compress_insertDictionary(
2979 size_t const dictID = ZSTD_compress_insertDictionary(
2399 cctx->blockState.prevCBlock, &cctx->blockState.matchState,
2980 cctx->blockState.prevCBlock, &cctx->blockState.matchState,
2400 &params, dict, dictSize, dictContentType, cctx->entropyWorkspace);
2981 &params, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace);
2401 if (ZSTD_isError(dictID)) return dictID;
2982 if (ZSTD_isError(dictID)) return dictID;
2402 assert(dictID <= (size_t)(U32)-1);
2983 assert(dictID <= (size_t)(U32)-1);
2403 cctx->dictID = (U32)dictID;
2984 cctx->dictID = (U32)dictID;
@@ -2408,6 +2989,7 b' size_t ZSTD_compressBegin_internal(ZSTD_'
2408 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
2989 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
2409 const void* dict, size_t dictSize,
2990 const void* dict, size_t dictSize,
2410 ZSTD_dictContentType_e dictContentType,
2991 ZSTD_dictContentType_e dictContentType,
2992 ZSTD_dictTableLoadMethod_e dtlm,
2411 const ZSTD_CDict* cdict,
2993 const ZSTD_CDict* cdict,
2412 ZSTD_CCtx_params params,
2994 ZSTD_CCtx_params params,
2413 unsigned long long pledgedSrcSize)
2995 unsigned long long pledgedSrcSize)
@@ -2416,7 +2998,7 b' size_t ZSTD_compressBegin_advanced_inter'
2416 /* compression parameters verification and optimization */
2998 /* compression parameters verification and optimization */
2417 CHECK_F( ZSTD_checkCParams(params.cParams) );
2999 CHECK_F( ZSTD_checkCParams(params.cParams) );
2418 return ZSTD_compressBegin_internal(cctx,
3000 return ZSTD_compressBegin_internal(cctx,
2419 dict, dictSize, dictContentType,
3001 dict, dictSize, dictContentType, dtlm,
2420 cdict,
3002 cdict,
2421 params, pledgedSrcSize,
3003 params, pledgedSrcSize,
2422 ZSTDb_not_buffered);
3004 ZSTDb_not_buffered);
@@ -2431,7 +3013,7 b' size_t ZSTD_compressBegin_advanced(ZSTD_'
2431 ZSTD_CCtx_params const cctxParams =
3013 ZSTD_CCtx_params const cctxParams =
2432 ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
3014 ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
2433 return ZSTD_compressBegin_advanced_internal(cctx,
3015 return ZSTD_compressBegin_advanced_internal(cctx,
2434 dict, dictSize, ZSTD_dct_auto,
3016 dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast,
2435 NULL /*cdict*/,
3017 NULL /*cdict*/,
2436 cctxParams, pledgedSrcSize);
3018 cctxParams, pledgedSrcSize);
2437 }
3019 }
@@ -2442,7 +3024,7 b' size_t ZSTD_compressBegin_usingDict(ZSTD'
2442 ZSTD_CCtx_params const cctxParams =
3024 ZSTD_CCtx_params const cctxParams =
2443 ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
3025 ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
2444 DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (U32)dictSize);
3026 DEBUGLOG(4, "ZSTD_compressBegin_usingDict (dictSize=%u)", (U32)dictSize);
2445 return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, NULL,
3027 return ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
2446 cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
3028 cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, ZSTDb_not_buffered);
2447 }
3029 }
2448
3030
@@ -2505,7 +3087,9 b' size_t ZSTD_compressEnd (ZSTD_CCtx* cctx'
2505 if (ZSTD_isError(cSize)) return cSize;
3087 if (ZSTD_isError(cSize)) return cSize;
2506 endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
3088 endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
2507 if (ZSTD_isError(endResult)) return endResult;
3089 if (ZSTD_isError(endResult)) return endResult;
2508 if (cctx->appliedParams.fParams.contentSizeFlag) { /* control src size */
3090 assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0));
3091 if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */
3092 ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1);
2509 DEBUGLOG(4, "end of frame : controlling src size");
3093 DEBUGLOG(4, "end of frame : controlling src size");
2510 if (cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1) {
3094 if (cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1) {
2511 DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize = %u",
3095 DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize = %u",
@@ -2517,22 +3101,22 b' size_t ZSTD_compressEnd (ZSTD_CCtx* cctx'
2517
3101
2518
3102
2519 static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
3103 static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
2520 void* dst, size_t dstCapacity,
3104 void* dst, size_t dstCapacity,
2521 const void* src, size_t srcSize,
3105 const void* src, size_t srcSize,
2522 const void* dict,size_t dictSize,
3106 const void* dict,size_t dictSize,
2523 ZSTD_parameters params)
3107 ZSTD_parameters params)
2524 {
3108 {
2525 ZSTD_CCtx_params const cctxParams =
3109 ZSTD_CCtx_params const cctxParams =
2526 ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
3110 ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
2527 DEBUGLOG(4, "ZSTD_compress_internal");
3111 DEBUGLOG(4, "ZSTD_compress_internal");
2528 return ZSTD_compress_advanced_internal(cctx,
3112 return ZSTD_compress_advanced_internal(cctx,
2529 dst, dstCapacity,
3113 dst, dstCapacity,
2530 src, srcSize,
3114 src, srcSize,
2531 dict, dictSize,
3115 dict, dictSize,
2532 cctxParams);
3116 cctxParams);
2533 }
3117 }
2534
3118
2535 size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
3119 size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
2536 void* dst, size_t dstCapacity,
3120 void* dst, size_t dstCapacity,
2537 const void* src, size_t srcSize,
3121 const void* src, size_t srcSize,
2538 const void* dict,size_t dictSize,
3122 const void* dict,size_t dictSize,
@@ -2540,7 +3124,11 b' size_t ZSTD_compress_advanced (ZSTD_CCtx'
2540 {
3124 {
2541 DEBUGLOG(4, "ZSTD_compress_advanced");
3125 DEBUGLOG(4, "ZSTD_compress_advanced");
2542 CHECK_F(ZSTD_checkCParams(params.cParams));
3126 CHECK_F(ZSTD_checkCParams(params.cParams));
2543 return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
3127 return ZSTD_compress_internal(cctx,
3128 dst, dstCapacity,
3129 src, srcSize,
3130 dict, dictSize,
3131 params);
2544 }
3132 }
2545
3133
2546 /* Internal */
3134 /* Internal */
@@ -2551,37 +3139,44 b' size_t ZSTD_compress_advanced_internal('
2551 const void* dict,size_t dictSize,
3139 const void* dict,size_t dictSize,
2552 ZSTD_CCtx_params params)
3140 ZSTD_CCtx_params params)
2553 {
3141 {
2554 DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)",
3142 DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (U32)srcSize);
2555 (U32)srcSize);
3143 CHECK_F( ZSTD_compressBegin_internal(cctx,
2556 CHECK_F( ZSTD_compressBegin_internal(cctx, dict, dictSize, ZSTD_dct_auto, NULL,
3144 dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL,
2557 params, srcSize, ZSTDb_not_buffered) );
3145 params, srcSize, ZSTDb_not_buffered) );
2558 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
3146 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
2559 }
3147 }
2560
3148
2561 size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize,
3149 size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
2562 const void* dict, size_t dictSize, int compressionLevel)
3150 void* dst, size_t dstCapacity,
3151 const void* src, size_t srcSize,
3152 const void* dict, size_t dictSize,
3153 int compressionLevel)
2563 {
3154 {
2564 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize ? srcSize : 1, dict ? dictSize : 0);
3155 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, srcSize + (!srcSize), dict ? dictSize : 0);
2565 ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
3156 ZSTD_CCtx_params cctxParams = ZSTD_assignParamsToCCtxParams(cctx->requestedParams, params);
2566 assert(params.fParams.contentSizeFlag == 1);
3157 assert(params.fParams.contentSizeFlag == 1);
2567 ZSTD_CCtxParam_setParameter(&cctxParams, ZSTD_p_compressLiterals, compressionLevel>=0);
2568 return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, cctxParams);
3158 return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, cctxParams);
2569 }
3159 }
2570
3160
2571 size_t ZSTD_compressCCtx (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
3161 size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
3162 void* dst, size_t dstCapacity,
3163 const void* src, size_t srcSize,
3164 int compressionLevel)
2572 {
3165 {
2573 DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (U32)srcSize);
3166 DEBUGLOG(4, "ZSTD_compressCCtx (srcSize=%u)", (U32)srcSize);
3167 assert(cctx != NULL);
2574 return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
3168 return ZSTD_compress_usingDict(cctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
2575 }
3169 }
2576
3170
2577 size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
3171 size_t ZSTD_compress(void* dst, size_t dstCapacity,
3172 const void* src, size_t srcSize,
3173 int compressionLevel)
2578 {
3174 {
2579 size_t result;
3175 size_t result;
2580 ZSTD_CCtx ctxBody;
3176 ZSTD_CCtx ctxBody;
2581 memset(&ctxBody, 0, sizeof(ctxBody));
3177 ZSTD_initCCtx(&ctxBody, ZSTD_defaultCMem);
2582 ctxBody.customMem = ZSTD_defaultCMem;
2583 result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);
3178 result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);
2584 ZSTD_free(ctxBody.workSpace, ZSTD_defaultCMem); /* can't free ctxBody itself, as it's on stack; free only heap content */
3179 ZSTD_freeCCtxContent(&ctxBody); /* can't free ctxBody itself, as it's on stack; free only heap content */
2585 return result;
3180 return result;
2586 }
3181 }
2587
3182
@@ -2619,9 +3214,9 b' static size_t ZSTD_initCDict_internal('
2619 ZSTD_dictContentType_e dictContentType,
3214 ZSTD_dictContentType_e dictContentType,
2620 ZSTD_compressionParameters cParams)
3215 ZSTD_compressionParameters cParams)
2621 {
3216 {
2622 DEBUGLOG(3, "ZSTD_initCDict_internal, dictContentType %u", (U32)dictContentType);
3217 DEBUGLOG(3, "ZSTD_initCDict_internal (dictContentType:%u)", (U32)dictContentType);
2623 assert(!ZSTD_checkCParams(cParams));
3218 assert(!ZSTD_checkCParams(cParams));
2624 cdict->cParams = cParams;
3219 cdict->matchState.cParams = cParams;
2625 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
3220 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
2626 cdict->dictBuffer = NULL;
3221 cdict->dictBuffer = NULL;
2627 cdict->dictContent = dictBuffer;
3222 cdict->dictContent = dictBuffer;
@@ -2654,7 +3249,7 b' static size_t ZSTD_initCDict_internal('
2654 { size_t const dictID = ZSTD_compress_insertDictionary(
3249 { size_t const dictID = ZSTD_compress_insertDictionary(
2655 &cdict->cBlockState, &cdict->matchState, &params,
3250 &cdict->cBlockState, &cdict->matchState, &params,
2656 cdict->dictContent, cdict->dictContentSize,
3251 cdict->dictContent, cdict->dictContentSize,
2657 dictContentType, cdict->workspace);
3252 dictContentType, ZSTD_dtlm_full, cdict->workspace);
2658 if (ZSTD_isError(dictID)) return dictID;
3253 if (ZSTD_isError(dictID)) return dictID;
2659 assert(dictID <= (size_t)(U32)-1);
3254 assert(dictID <= (size_t)(U32)-1);
2660 cdict->dictID = (U32)dictID;
3255 cdict->dictID = (U32)dictID;
@@ -2775,7 +3370,7 b' const ZSTD_CDict* ZSTD_initStaticCDict('
2775 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict)
3370 ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict)
2776 {
3371 {
2777 assert(cdict != NULL);
3372 assert(cdict != NULL);
2778 return cdict->cParams;
3373 return cdict->matchState.cParams;
2779 }
3374 }
2780
3375
2781 /* ZSTD_compressBegin_usingCDict_advanced() :
3376 /* ZSTD_compressBegin_usingCDict_advanced() :
@@ -2799,7 +3394,7 b' size_t ZSTD_compressBegin_usingCDict_adv'
2799 }
3394 }
2800 params.fParams = fParams;
3395 params.fParams = fParams;
2801 return ZSTD_compressBegin_internal(cctx,
3396 return ZSTD_compressBegin_internal(cctx,
2802 NULL, 0, ZSTD_dct_auto,
3397 NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast,
2803 cdict,
3398 cdict,
2804 params, pledgedSrcSize,
3399 params, pledgedSrcSize,
2805 ZSTDb_not_buffered);
3400 ZSTDb_not_buffered);
@@ -2813,7 +3408,7 b' size_t ZSTD_compressBegin_usingCDict(ZST'
2813 {
3408 {
2814 ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
3409 ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
2815 DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag);
3410 DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag);
2816 return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, 0);
3411 return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
2817 }
3412 }
2818
3413
2819 size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
3414 size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
@@ -2880,16 +3475,17 b' size_t ZSTD_CStreamOutSize(void)'
2880 static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx,
3475 static size_t ZSTD_resetCStream_internal(ZSTD_CStream* cctx,
2881 const void* const dict, size_t const dictSize, ZSTD_dictContentType_e const dictContentType,
3476 const void* const dict, size_t const dictSize, ZSTD_dictContentType_e const dictContentType,
2882 const ZSTD_CDict* const cdict,
3477 const ZSTD_CDict* const cdict,
2883 ZSTD_CCtx_params const params, unsigned long long const pledgedSrcSize)
3478 ZSTD_CCtx_params params, unsigned long long const pledgedSrcSize)
2884 {
3479 {
2885 DEBUGLOG(4, "ZSTD_resetCStream_internal (disableLiteralCompression=%i)",
3480 DEBUGLOG(4, "ZSTD_resetCStream_internal");
2886 params.disableLiteralCompression);
3481 /* Finalize the compression parameters */
3482 params.cParams = ZSTD_getCParamsFromCCtxParams(&params, pledgedSrcSize, dictSize);
2887 /* params are supposed to be fully validated at this point */
3483 /* params are supposed to be fully validated at this point */
2888 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
3484 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
2889 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
3485 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
2890
3486
2891 CHECK_F( ZSTD_compressBegin_internal(cctx,
3487 CHECK_F( ZSTD_compressBegin_internal(cctx,
2892 dict, dictSize, dictContentType,
3488 dict, dictSize, dictContentType, ZSTD_dtlm_fast,
2893 cdict,
3489 cdict,
2894 params, pledgedSrcSize,
3490 params, pledgedSrcSize,
2895 ZSTDb_buffered) );
3491 ZSTDb_buffered) );
@@ -2912,7 +3508,6 b' size_t ZSTD_resetCStream(ZSTD_CStream* z'
2912 DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (U32)pledgedSrcSize);
3508 DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (U32)pledgedSrcSize);
2913 if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
3509 if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN;
2914 params.fParams.contentSizeFlag = 1;
3510 params.fParams.contentSizeFlag = 1;
2915 params.cParams = ZSTD_getCParamsFromCCtxParams(&params, pledgedSrcSize, 0);
2916 return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dct_auto, zcs->cdict, params, pledgedSrcSize);
3511 return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dct_auto, zcs->cdict, params, pledgedSrcSize);
2917 }
3512 }
2918
3513
@@ -2925,6 +3520,7 b' size_t ZSTD_initCStream_internal(ZSTD_CS'
2925 ZSTD_CCtx_params params, unsigned long long pledgedSrcSize)
3520 ZSTD_CCtx_params params, unsigned long long pledgedSrcSize)
2926 {
3521 {
2927 DEBUGLOG(4, "ZSTD_initCStream_internal");
3522 DEBUGLOG(4, "ZSTD_initCStream_internal");
3523 params.cParams = ZSTD_getCParamsFromCCtxParams(&params, pledgedSrcSize, dictSize);
2928 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
3524 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
2929 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
3525 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
2930
3526
@@ -2991,25 +3587,21 b' size_t ZSTD_initCStream_advanced(ZSTD_CS'
2991 (U32)pledgedSrcSize, params.fParams.contentSizeFlag);
3587 (U32)pledgedSrcSize, params.fParams.contentSizeFlag);
2992 CHECK_F( ZSTD_checkCParams(params.cParams) );
3588 CHECK_F( ZSTD_checkCParams(params.cParams) );
2993 if ((pledgedSrcSize==0) && (params.fParams.contentSizeFlag==0)) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* for compatibility with older programs relying on this behavior. Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. This line will be removed in the future. */
3589 if ((pledgedSrcSize==0) && (params.fParams.contentSizeFlag==0)) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* for compatibility with older programs relying on this behavior. Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. This line will be removed in the future. */
2994 { ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params);
3590 zcs->requestedParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params);
2995 return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL /*cdict*/, cctxParams, pledgedSrcSize);
3591 return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL /*cdict*/, zcs->requestedParams, pledgedSrcSize);
2996 }
2997 }
3592 }
2998
3593
2999 size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
3594 size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
3000 {
3595 {
3001 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
3596 ZSTD_CCtxParams_init(&zcs->requestedParams, compressionLevel);
3002 ZSTD_CCtx_params const cctxParams =
3597 return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL, zcs->requestedParams, ZSTD_CONTENTSIZE_UNKNOWN);
3003 ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params);
3004 return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL, cctxParams, ZSTD_CONTENTSIZE_UNKNOWN);
3005 }
3598 }
3006
3599
3007 size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss)
3600 size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss)
3008 {
3601 {
3009 U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; /* temporary : 0 interpreted as "unknown" during transition period. Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. `0` will be interpreted as "empty" in the future */
3602 U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; /* temporary : 0 interpreted as "unknown" during transition period. Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. `0` will be interpreted as "empty" in the future */
3010 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, pledgedSrcSize, 0);
3603 ZSTD_CCtxParams_init(&zcs->requestedParams, compressionLevel);
3011 ZSTD_CCtx_params const cctxParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params);
3604 return ZSTD_initCStream_internal(zcs, NULL, 0, NULL, zcs->requestedParams, pledgedSrcSize);
3012 return ZSTD_initCStream_internal(zcs, NULL, 0, NULL, cctxParams, pledgedSrcSize);
3013 }
3605 }
3014
3606
3015 size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
3607 size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
@@ -3073,7 +3665,7 b' size_t ZSTD_compressStream_generic(ZSTD_'
3073 ip = iend;
3665 ip = iend;
3074 op += cSize;
3666 op += cSize;
3075 zcs->frameEnded = 1;
3667 zcs->frameEnded = 1;
3076 ZSTD_startNewCompression(zcs);
3668 ZSTD_CCtx_reset(zcs);
3077 someMoreWork = 0; break;
3669 someMoreWork = 0; break;
3078 }
3670 }
3079 /* complete loading into inBuffer */
3671 /* complete loading into inBuffer */
@@ -3126,7 +3718,7 b' size_t ZSTD_compressStream_generic(ZSTD_'
3126 if (zcs->frameEnded) {
3718 if (zcs->frameEnded) {
3127 DEBUGLOG(5, "Frame completed directly in outBuffer");
3719 DEBUGLOG(5, "Frame completed directly in outBuffer");
3128 someMoreWork = 0;
3720 someMoreWork = 0;
3129 ZSTD_startNewCompression(zcs);
3721 ZSTD_CCtx_reset(zcs);
3130 }
3722 }
3131 break;
3723 break;
3132 }
3724 }
@@ -3154,7 +3746,7 b' size_t ZSTD_compressStream_generic(ZSTD_'
3154 if (zcs->frameEnded) {
3746 if (zcs->frameEnded) {
3155 DEBUGLOG(5, "Frame completed on flush");
3747 DEBUGLOG(5, "Frame completed on flush");
3156 someMoreWork = 0;
3748 someMoreWork = 0;
3157 ZSTD_startNewCompression(zcs);
3749 ZSTD_CCtx_reset(zcs);
3158 break;
3750 break;
3159 }
3751 }
3160 zcs->streamStage = zcss_load;
3752 zcs->streamStage = zcss_load;
@@ -3207,19 +3799,16 b' size_t ZSTD_compress_generic (ZSTD_CCtx*'
3207 params.cParams = ZSTD_getCParamsFromCCtxParams(
3799 params.cParams = ZSTD_getCParamsFromCCtxParams(
3208 &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/);
3800 &cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, 0 /*dictSize*/);
3209
3801
3802
3210 #ifdef ZSTD_MULTITHREAD
3803 #ifdef ZSTD_MULTITHREAD
3211 if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
3804 if ((cctx->pledgedSrcSizePlusOne-1) <= ZSTDMT_JOBSIZE_MIN) {
3212 params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
3805 params.nbWorkers = 0; /* do not invoke multi-threading when src size is too small */
3213 }
3806 }
3214 if (params.nbWorkers > 0) {
3807 if (params.nbWorkers > 0) {
3215 /* mt context creation */
3808 /* mt context creation */
3216 if (cctx->mtctx == NULL || (params.nbWorkers != ZSTDMT_getNbWorkers(cctx->mtctx))) {
3809 if (cctx->mtctx == NULL) {
3217 DEBUGLOG(4, "ZSTD_compress_generic: creating new mtctx for nbWorkers=%u",
3810 DEBUGLOG(4, "ZSTD_compress_generic: creating new mtctx for nbWorkers=%u",
3218 params.nbWorkers);
3811 params.nbWorkers);
3219 if (cctx->mtctx != NULL)
3220 DEBUGLOG(4, "ZSTD_compress_generic: previous nbWorkers was %u",
3221 ZSTDMT_getNbWorkers(cctx->mtctx));
3222 ZSTDMT_freeCCtx(cctx->mtctx);
3223 cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbWorkers, cctx->customMem);
3812 cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbWorkers, cctx->customMem);
3224 if (cctx->mtctx == NULL) return ERROR(memory_allocation);
3813 if (cctx->mtctx == NULL) return ERROR(memory_allocation);
3225 }
3814 }
@@ -3251,8 +3840,9 b' size_t ZSTD_compress_generic (ZSTD_CCtx*'
3251 { size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
3840 { size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp);
3252 if ( ZSTD_isError(flushMin)
3841 if ( ZSTD_isError(flushMin)
3253 || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
3842 || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */
3254 ZSTD_startNewCompression(cctx);
3843 ZSTD_CCtx_reset(cctx);
3255 }
3844 }
3845 DEBUGLOG(5, "completed ZSTD_compress_generic delegating to ZSTDMT_compressStream_generic");
3256 return flushMin;
3846 return flushMin;
3257 } }
3847 } }
3258 #endif
3848 #endif
@@ -3308,82 +3898,83 b' size_t ZSTD_endStream(ZSTD_CStream* zcs,'
3308
3898
3309 #define ZSTD_MAX_CLEVEL 22
3899 #define ZSTD_MAX_CLEVEL 22
3310 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
3900 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
3901 int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; }
3311
3902
3312 static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
3903 static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
3313 { /* "default" - guarantees a monotonically increasing memory budget */
3904 { /* "default" - guarantees a monotonically increasing memory budget */
3314 /* W, C, H, S, L, TL, strat */
3905 /* W, C, H, S, L, TL, strat */
3315 { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
3906 { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */
3316 { 19, 13, 14, 1, 7, 1, ZSTD_fast }, /* level 1 */
3907 { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */
3317 { 19, 15, 16, 1, 6, 1, ZSTD_fast }, /* level 2 */
3908 { 19, 15, 16, 1, 6, 0, ZSTD_fast }, /* level 2 */
3318 { 20, 16, 17, 1, 5, 8, ZSTD_dfast }, /* level 3 */
3909 { 20, 16, 17, 1, 5, 1, ZSTD_dfast }, /* level 3 */
3319 { 20, 17, 18, 1, 5, 8, ZSTD_dfast }, /* level 4 */
3910 { 20, 18, 18, 1, 5, 1, ZSTD_dfast }, /* level 4 */
3320 { 20, 17, 18, 2, 5, 16, ZSTD_greedy }, /* level 5 */
3911 { 20, 18, 18, 2, 5, 2, ZSTD_greedy }, /* level 5 */
3321 { 21, 17, 19, 2, 5, 16, ZSTD_lazy }, /* level 6 */
3912 { 21, 18, 19, 2, 5, 4, ZSTD_lazy }, /* level 6 */
3322 { 21, 18, 19, 3, 5, 16, ZSTD_lazy }, /* level 7 */
3913 { 21, 18, 19, 3, 5, 8, ZSTD_lazy2 }, /* level 7 */
3323 { 21, 18, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */
3914 { 21, 19, 19, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */
3324 { 21, 19, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 9 */
3915 { 21, 19, 20, 4, 5, 16, ZSTD_lazy2 }, /* level 9 */
3325 { 21, 19, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */
3916 { 21, 20, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */
3326 { 22, 20, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */
3917 { 21, 21, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */
3327 { 22, 20, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */
3918 { 22, 20, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */
3328 { 22, 21, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */
3919 { 22, 21, 22, 4, 5, 32, ZSTD_btlazy2 }, /* level 13 */
3329 { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */
3920 { 22, 21, 22, 5, 5, 32, ZSTD_btlazy2 }, /* level 14 */
3330 { 22, 22, 22, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */
3921 { 22, 22, 22, 6, 5, 32, ZSTD_btlazy2 }, /* level 15 */
3331 { 22, 21, 22, 4, 5, 48, ZSTD_btopt }, /* level 16 */
3922 { 22, 21, 22, 4, 5, 48, ZSTD_btopt }, /* level 16 */
3332 { 23, 22, 22, 4, 4, 48, ZSTD_btopt }, /* level 17 */
3923 { 23, 22, 22, 4, 4, 64, ZSTD_btopt }, /* level 17 */
3333 { 23, 22, 22, 5, 3, 64, ZSTD_btopt }, /* level 18 */
3924 { 23, 23, 22, 6, 3,256, ZSTD_btopt }, /* level 18 */
3334 { 23, 23, 22, 7, 3,128, ZSTD_btopt }, /* level 19 */
3925 { 23, 24, 22, 7, 3,256, ZSTD_btultra }, /* level 19 */
3335 { 25, 25, 23, 7, 3,128, ZSTD_btultra }, /* level 20 */
3926 { 25, 25, 23, 7, 3,256, ZSTD_btultra }, /* level 20 */
3336 { 26, 26, 24, 7, 3,256, ZSTD_btultra }, /* level 21 */
3927 { 26, 26, 24, 7, 3,512, ZSTD_btultra }, /* level 21 */
3337 { 27, 27, 25, 9, 3,512, ZSTD_btultra }, /* level 22 */
3928 { 27, 27, 25, 9, 3,999, ZSTD_btultra }, /* level 22 */
3338 },
3929 },
3339 { /* for srcSize <= 256 KB */
3930 { /* for srcSize <= 256 KB */
3340 /* W, C, H, S, L, T, strat */
3931 /* W, C, H, S, L, T, strat */
3341 { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
3932 { 18, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
3342 { 18, 13, 14, 1, 6, 1, ZSTD_fast }, /* level 1 */
3933 { 18, 13, 14, 1, 6, 0, ZSTD_fast }, /* level 1 */
3343 { 18, 14, 13, 1, 5, 8, ZSTD_dfast }, /* level 2 */
3934 { 18, 14, 14, 1, 5, 1, ZSTD_dfast }, /* level 2 */
3344 { 18, 16, 15, 1, 5, 8, ZSTD_dfast }, /* level 3 */
3935 { 18, 16, 16, 1, 4, 1, ZSTD_dfast }, /* level 3 */
3345 { 18, 15, 17, 1, 5, 8, ZSTD_greedy }, /* level 4.*/
3936 { 18, 16, 17, 2, 5, 2, ZSTD_greedy }, /* level 4.*/
3346 { 18, 16, 17, 4, 5, 8, ZSTD_greedy }, /* level 5.*/
3937 { 18, 18, 18, 3, 5, 2, ZSTD_greedy }, /* level 5.*/
3347 { 18, 16, 17, 3, 5, 8, ZSTD_lazy }, /* level 6.*/
3938 { 18, 18, 19, 3, 5, 4, ZSTD_lazy }, /* level 6.*/
3348 { 18, 17, 17, 4, 4, 8, ZSTD_lazy }, /* level 7 */
3939 { 18, 18, 19, 4, 4, 4, ZSTD_lazy }, /* level 7 */
3349 { 18, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
3940 { 18, 18, 19, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
3350 { 18, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
3941 { 18, 18, 19, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
3351 { 18, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
3942 { 18, 18, 19, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
3352 { 18, 18, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 11.*/
3943 { 18, 18, 19, 5, 4, 16, ZSTD_btlazy2 }, /* level 11.*/
3353 { 18, 18, 17, 5, 4, 8, ZSTD_btlazy2 }, /* level 12.*/
3944 { 18, 19, 19, 6, 4, 16, ZSTD_btlazy2 }, /* level 12.*/
3354 { 18, 19, 17, 7, 4, 8, ZSTD_btlazy2 }, /* level 13 */
3945 { 18, 19, 19, 8, 4, 16, ZSTD_btlazy2 }, /* level 13 */
3355 { 18, 18, 18, 4, 4, 16, ZSTD_btopt }, /* level 14.*/
3946 { 18, 18, 19, 4, 4, 24, ZSTD_btopt }, /* level 14.*/
3356 { 18, 18, 18, 4, 3, 16, ZSTD_btopt }, /* level 15.*/
3947 { 18, 18, 19, 4, 3, 24, ZSTD_btopt }, /* level 15.*/
3357 { 18, 19, 18, 6, 3, 32, ZSTD_btopt }, /* level 16.*/
3948 { 18, 19, 19, 6, 3, 64, ZSTD_btopt }, /* level 16.*/
3358 { 18, 19, 18, 8, 3, 64, ZSTD_btopt }, /* level 17.*/
3949 { 18, 19, 19, 8, 3,128, ZSTD_btopt }, /* level 17.*/
3359 { 18, 19, 18, 9, 3,128, ZSTD_btopt }, /* level 18.*/
3950 { 18, 19, 19, 10, 3,256, ZSTD_btopt }, /* level 18.*/
3360 { 18, 19, 18, 10, 3,256, ZSTD_btopt }, /* level 19.*/
3951 { 18, 19, 19, 10, 3,256, ZSTD_btultra }, /* level 19.*/
3361 { 18, 19, 18, 11, 3,512, ZSTD_btultra }, /* level 20.*/
3952 { 18, 19, 19, 11, 3,512, ZSTD_btultra }, /* level 20.*/
3362 { 18, 19, 18, 12, 3,512, ZSTD_btultra }, /* level 21.*/
3953 { 18, 19, 19, 12, 3,512, ZSTD_btultra }, /* level 21.*/
3363 { 18, 19, 18, 13, 3,512, ZSTD_btultra }, /* level 22.*/
3954 { 18, 19, 19, 13, 3,999, ZSTD_btultra }, /* level 22.*/
3364 },
3955 },
3365 { /* for srcSize <= 128 KB */
3956 { /* for srcSize <= 128 KB */
3366 /* W, C, H, S, L, T, strat */
3957 /* W, C, H, S, L, T, strat */
3367 { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* level 0 - not used */
3958 { 17, 12, 12, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
3368 { 17, 12, 13, 1, 6, 1, ZSTD_fast }, /* level 1 */
3959 { 17, 12, 13, 1, 6, 0, ZSTD_fast }, /* level 1 */
3369 { 17, 13, 16, 1, 5, 1, ZSTD_fast }, /* level 2 */
3960 { 17, 13, 15, 1, 5, 0, ZSTD_fast }, /* level 2 */
3370 { 17, 16, 16, 2, 5, 8, ZSTD_dfast }, /* level 3 */
3961 { 17, 15, 16, 2, 5, 1, ZSTD_dfast }, /* level 3 */
3371 { 17, 13, 15, 3, 4, 8, ZSTD_greedy }, /* level 4 */
3962 { 17, 17, 17, 2, 4, 1, ZSTD_dfast }, /* level 4 */
3372 { 17, 15, 17, 4, 4, 8, ZSTD_greedy }, /* level 5 */
3963 { 17, 16, 17, 3, 4, 2, ZSTD_greedy }, /* level 5 */
3373 { 17, 16, 17, 3, 4, 8, ZSTD_lazy }, /* level 6 */
3964 { 17, 17, 17, 3, 4, 4, ZSTD_lazy }, /* level 6 */
3374 { 17, 15, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 7 */
3965 { 17, 17, 17, 3, 4, 8, ZSTD_lazy2 }, /* level 7 */
3375 { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
3966 { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
3376 { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
3967 { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
3377 { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
3968 { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
3378 { 17, 17, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 11 */
3969 { 17, 17, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 11 */
3379 { 17, 17, 17, 8, 4, 8, ZSTD_lazy2 }, /* level 12 */
3970 { 17, 18, 17, 6, 4, 16, ZSTD_btlazy2 }, /* level 12 */
3380 { 17, 18, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13.*/
3971 { 17, 18, 17, 8, 4, 16, ZSTD_btlazy2 }, /* level 13.*/
3381 { 17, 17, 17, 7, 3, 8, ZSTD_btopt }, /* level 14.*/
3972 { 17, 18, 17, 4, 4, 32, ZSTD_btopt }, /* level 14.*/
3382 { 17, 17, 17, 7, 3, 16, ZSTD_btopt }, /* level 15.*/
3973 { 17, 18, 17, 6, 3, 64, ZSTD_btopt }, /* level 15.*/
3383 { 17, 18, 17, 7, 3, 32, ZSTD_btopt }, /* level 16.*/
3974 { 17, 18, 17, 7, 3,128, ZSTD_btopt }, /* level 16.*/
3384 { 17, 18, 17, 7, 3, 64, ZSTD_btopt }, /* level 17.*/
3975 { 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 17.*/
3385 { 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 18.*/
3976 { 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 18.*/
3386 { 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 19.*/
3977 { 17, 18, 17, 8, 3,256, ZSTD_btultra }, /* level 19.*/
3387 { 17, 18, 17, 9, 3,256, ZSTD_btultra }, /* level 20.*/
3978 { 17, 18, 17, 9, 3,256, ZSTD_btultra }, /* level 20.*/
3388 { 17, 18, 17, 10, 3,256, ZSTD_btultra }, /* level 21.*/
3979 { 17, 18, 17, 10, 3,256, ZSTD_btultra }, /* level 21.*/
3389 { 17, 18, 17, 11, 3,512, ZSTD_btultra }, /* level 22.*/
3980 { 17, 18, 17, 11, 3,512, ZSTD_btultra }, /* level 22.*/
@@ -3391,28 +3982,28 b' static const ZSTD_compressionParameters '
3391 { /* for srcSize <= 16 KB */
3982 { /* for srcSize <= 16 KB */
3392 /* W, C, H, S, L, T, strat */
3983 /* W, C, H, S, L, T, strat */
3393 { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
3984 { 14, 12, 13, 1, 5, 1, ZSTD_fast }, /* base for negative levels */
3394 { 14, 14, 14, 1, 6, 1, ZSTD_fast }, /* level 1 */
3985 { 14, 14, 15, 1, 5, 0, ZSTD_fast }, /* level 1 */
3395 { 14, 14, 14, 1, 4, 1, ZSTD_fast }, /* level 2 */
3986 { 14, 14, 15, 1, 4, 0, ZSTD_fast }, /* level 2 */
3396 { 14, 14, 14, 1, 4, 6, ZSTD_dfast }, /* level 3.*/
3987 { 14, 14, 14, 2, 4, 1, ZSTD_dfast }, /* level 3.*/
3397 { 14, 14, 14, 4, 4, 6, ZSTD_greedy }, /* level 4.*/
3988 { 14, 14, 14, 4, 4, 2, ZSTD_greedy }, /* level 4.*/
3398 { 14, 14, 14, 3, 4, 6, ZSTD_lazy }, /* level 5.*/
3989 { 14, 14, 14, 3, 4, 4, ZSTD_lazy }, /* level 5.*/
3399 { 14, 14, 14, 4, 4, 6, ZSTD_lazy2 }, /* level 6 */
3990 { 14, 14, 14, 4, 4, 8, ZSTD_lazy2 }, /* level 6 */
3400 { 14, 14, 14, 5, 4, 6, ZSTD_lazy2 }, /* level 7 */
3991 { 14, 14, 14, 6, 4, 8, ZSTD_lazy2 }, /* level 7 */
3401 { 14, 14, 14, 6, 4, 6, ZSTD_lazy2 }, /* level 8.*/
3992 { 14, 14, 14, 8, 4, 8, ZSTD_lazy2 }, /* level 8.*/
3402 { 14, 15, 14, 6, 4, 6, ZSTD_btlazy2 }, /* level 9.*/
3993 { 14, 15, 14, 5, 4, 8, ZSTD_btlazy2 }, /* level 9.*/
3403 { 14, 15, 14, 3, 3, 6, ZSTD_btopt }, /* level 10.*/
3994 { 14, 15, 14, 9, 4, 8, ZSTD_btlazy2 }, /* level 10.*/
3404 { 14, 15, 14, 6, 3, 8, ZSTD_btopt }, /* level 11.*/
3995 { 14, 15, 14, 3, 4, 12, ZSTD_btopt }, /* level 11.*/
3405 { 14, 15, 14, 6, 3, 16, ZSTD_btopt }, /* level 12.*/
3996 { 14, 15, 14, 6, 3, 16, ZSTD_btopt }, /* level 12.*/
3406 { 14, 15, 14, 6, 3, 24, ZSTD_btopt }, /* level 13.*/
3997 { 14, 15, 14, 6, 3, 24, ZSTD_btopt }, /* level 13.*/
3407 { 14, 15, 15, 6, 3, 48, ZSTD_btopt }, /* level 14.*/
3998 { 14, 15, 15, 6, 3, 48, ZSTD_btopt }, /* level 14.*/
3408 { 14, 15, 15, 6, 3, 64, ZSTD_btopt }, /* level 15.*/
3999 { 14, 15, 15, 6, 3, 64, ZSTD_btopt }, /* level 15.*/
3409 { 14, 15, 15, 6, 3, 96, ZSTD_btopt }, /* level 16.*/
4000 { 14, 15, 15, 6, 3, 96, ZSTD_btopt }, /* level 16.*/
3410 { 14, 15, 15, 6, 3,128, ZSTD_btopt }, /* level 17.*/
4001 { 14, 15, 15, 6, 3,128, ZSTD_btopt }, /* level 17.*/
3411 { 14, 15, 15, 6, 3,256, ZSTD_btopt }, /* level 18.*/
4002 { 14, 15, 15, 8, 3,256, ZSTD_btopt }, /* level 18.*/
3412 { 14, 15, 15, 7, 3,256, ZSTD_btopt }, /* level 19.*/
4003 { 14, 15, 15, 6, 3,256, ZSTD_btultra }, /* level 19.*/
3413 { 14, 15, 15, 8, 3,256, ZSTD_btultra }, /* level 20.*/
4004 { 14, 15, 15, 8, 3,256, ZSTD_btultra }, /* level 20.*/
3414 { 14, 15, 15, 9, 3,256, ZSTD_btultra }, /* level 21.*/
4005 { 14, 15, 15, 9, 3,256, ZSTD_btultra }, /* level 21.*/
3415 { 14, 15, 15, 10, 3,256, ZSTD_btultra }, /* level 22.*/
4006 { 14, 15, 15, 10, 3,512, ZSTD_btultra }, /* level 22.*/
3416 },
4007 },
3417 };
4008 };
3418
4009
@@ -27,6 +27,7 b''
27 extern "C" {
27 extern "C" {
28 #endif
28 #endif
29
29
30
30 /*-*************************************
31 /*-*************************************
31 * Constants
32 * Constants
32 ***************************************/
33 ***************************************/
@@ -37,7 +38,8 b' extern "C" {'
37 It's not a big deal though : candidate will just be sorted again.
38 It's not a big deal though : candidate will just be sorted again.
38 Additionnally, candidate position 1 will be lost.
39 Additionnally, candidate position 1 will be lost.
39 But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
40 But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss.
40 The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy */
41 The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be misdhandled after table re-use with a different strategy
42 Constant required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */
41
43
42
44
43 /*-*************************************
45 /*-*************************************
@@ -46,6 +48,12 b' extern "C" {'
46 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
48 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
47 typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
49 typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;
48
50
51 typedef enum {
52 ZSTD_dictDefaultAttach = 0,
53 ZSTD_dictForceAttach = 1,
54 ZSTD_dictForceCopy = -1,
55 } ZSTD_dictAttachPref_e;
56
49 typedef struct ZSTD_prefixDict_s {
57 typedef struct ZSTD_prefixDict_s {
50 const void* dict;
58 const void* dict;
51 size_t dictSize;
59 size_t dictSize;
@@ -53,14 +61,22 b' typedef struct ZSTD_prefixDict_s {'
53 } ZSTD_prefixDict;
61 } ZSTD_prefixDict;
54
62
55 typedef struct {
63 typedef struct {
56 U32 hufCTable[HUF_CTABLE_SIZE_U32(255)];
64 U32 CTable[HUF_CTABLE_SIZE_U32(255)];
65 HUF_repeat repeatMode;
66 } ZSTD_hufCTables_t;
67
68 typedef struct {
57 FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
69 FSE_CTable offcodeCTable[FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
58 FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
70 FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
59 FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
71 FSE_CTable litlengthCTable[FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
60 HUF_repeat hufCTable_repeatMode;
61 FSE_repeat offcode_repeatMode;
72 FSE_repeat offcode_repeatMode;
62 FSE_repeat matchlength_repeatMode;
73 FSE_repeat matchlength_repeatMode;
63 FSE_repeat litlength_repeatMode;
74 FSE_repeat litlength_repeatMode;
75 } ZSTD_fseCTables_t;
76
77 typedef struct {
78 ZSTD_hufCTables_t huf;
79 ZSTD_fseCTables_t fse;
64 } ZSTD_entropyCTables_t;
80 } ZSTD_entropyCTables_t;
65
81
66 typedef struct {
82 typedef struct {
@@ -76,26 +92,27 b' typedef struct {'
76 U32 rep[ZSTD_REP_NUM];
92 U32 rep[ZSTD_REP_NUM];
77 } ZSTD_optimal_t;
93 } ZSTD_optimal_t;
78
94
95 typedef enum { zop_dynamic=0, zop_predef } ZSTD_OptPrice_e;
96
79 typedef struct {
97 typedef struct {
80 /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
98 /* All tables are allocated inside cctx->workspace by ZSTD_resetCCtx_internal() */
81 U32* litFreq; /* table of literals statistics, of size 256 */
99 U32* litFreq; /* table of literals statistics, of size 256 */
82 U32* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
100 U32* litLengthFreq; /* table of litLength statistics, of size (MaxLL+1) */
83 U32* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
101 U32* matchLengthFreq; /* table of matchLength statistics, of size (MaxML+1) */
84 U32* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
102 U32* offCodeFreq; /* table of offCode statistics, of size (MaxOff+1) */
85 ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */
103 ZSTD_match_t* matchTable; /* list of found matches, of size ZSTD_OPT_NUM+1 */
86 ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
104 ZSTD_optimal_t* priceTable; /* All positions tracked by optimal parser, of size ZSTD_OPT_NUM+1 */
87
105
88 U32 litSum; /* nb of literals */
106 U32 litSum; /* nb of literals */
89 U32 litLengthSum; /* nb of litLength codes */
107 U32 litLengthSum; /* nb of litLength codes */
90 U32 matchLengthSum; /* nb of matchLength codes */
108 U32 matchLengthSum; /* nb of matchLength codes */
91 U32 offCodeSum; /* nb of offset codes */
109 U32 offCodeSum; /* nb of offset codes */
92 /* begin updated by ZSTD_setLog2Prices */
110 U32 litSumBasePrice; /* to compare to log2(litfreq) */
93 U32 log2litSum; /* pow2 to compare log2(litfreq) to */
111 U32 litLengthSumBasePrice; /* to compare to log2(llfreq) */
94 U32 log2litLengthSum; /* pow2 to compare log2(llfreq) to */
112 U32 matchLengthSumBasePrice;/* to compare to log2(mlfreq) */
95 U32 log2matchLengthSum; /* pow2 to compare log2(mlfreq) to */
113 U32 offCodeSumBasePrice; /* to compare to log2(offreq) */
96 U32 log2offCodeSum; /* pow2 to compare log2(offreq) to */
114 ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */
97 /* end : updated by ZSTD_setLog2Prices */
115 const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */
98 U32 staticPrices; /* prices follow a pre-defined cost structure, statistics are irrelevant */
99 } optState_t;
116 } optState_t;
100
117
101 typedef struct {
118 typedef struct {
@@ -111,17 +128,20 b' typedef struct {'
111 U32 lowLimit; /* below that point, no more data */
128 U32 lowLimit; /* below that point, no more data */
112 } ZSTD_window_t;
129 } ZSTD_window_t;
113
130
114 typedef struct {
131 typedef struct ZSTD_matchState_t ZSTD_matchState_t;
115 ZSTD_window_t window; /* State for window round buffer management */
132 struct ZSTD_matchState_t {
116 U32 loadedDictEnd; /* index of end of dictionary */
133 ZSTD_window_t window; /* State for window round buffer management */
117 U32 nextToUpdate; /* index from which to continue table update */
134 U32 loadedDictEnd; /* index of end of dictionary */
118 U32 nextToUpdate3; /* index from which to continue table update */
135 U32 nextToUpdate; /* index from which to continue table update */
119 U32 hashLog3; /* dispatch table : larger == faster, more memory */
136 U32 nextToUpdate3; /* index from which to continue table update */
137 U32 hashLog3; /* dispatch table : larger == faster, more memory */
120 U32* hashTable;
138 U32* hashTable;
121 U32* hashTable3;
139 U32* hashTable3;
122 U32* chainTable;
140 U32* chainTable;
123 optState_t opt; /* optimal parser state */
141 optState_t opt; /* optimal parser state */
124 } ZSTD_matchState_t;
142 const ZSTD_matchState_t *dictMatchState;
143 ZSTD_compressionParameters cParams;
144 };
125
145
126 typedef struct {
146 typedef struct {
127 ZSTD_compressedBlockState_t* prevCBlock;
147 ZSTD_compressedBlockState_t* prevCBlock;
@@ -161,7 +181,7 b' typedef struct {'
161 rawSeq* seq; /* The start of the sequences */
181 rawSeq* seq; /* The start of the sequences */
162 size_t pos; /* The position where reading stopped. <= size. */
182 size_t pos; /* The position where reading stopped. <= size. */
163 size_t size; /* The number of sequences. <= capacity. */
183 size_t size; /* The number of sequences. <= capacity. */
164 size_t capacity; /* The capacity of the `seq` pointer */
184 size_t capacity; /* The capacity starting from `seq` pointer */
165 } rawSeqStore_t;
185 } rawSeqStore_t;
166
186
167 struct ZSTD_CCtx_params_s {
187 struct ZSTD_CCtx_params_s {
@@ -170,10 +190,11 b' struct ZSTD_CCtx_params_s {'
170 ZSTD_frameParameters fParams;
190 ZSTD_frameParameters fParams;
171
191
172 int compressionLevel;
192 int compressionLevel;
173 int disableLiteralCompression;
174 int forceWindow; /* force back-references to respect limit of
193 int forceWindow; /* force back-references to respect limit of
175 * 1<<wLog, even for dictionary */
194 * 1<<wLog, even for dictionary */
176
195
196 ZSTD_dictAttachPref_e attachDictPref;
197
177 /* Multithreading: used to pass parameters to mtctx */
198 /* Multithreading: used to pass parameters to mtctx */
178 unsigned nbWorkers;
199 unsigned nbWorkers;
179 unsigned jobSize;
200 unsigned jobSize;
@@ -193,6 +214,8 b' struct ZSTD_CCtx_s {'
193 ZSTD_CCtx_params requestedParams;
214 ZSTD_CCtx_params requestedParams;
194 ZSTD_CCtx_params appliedParams;
215 ZSTD_CCtx_params appliedParams;
195 U32 dictID;
216 U32 dictID;
217
218 int workSpaceOversizedDuration;
196 void* workSpace;
219 void* workSpace;
197 size_t workSpaceSize;
220 size_t workSpaceSize;
198 size_t blockSize;
221 size_t blockSize;
@@ -235,11 +258,15 b' struct ZSTD_CCtx_s {'
235 #endif
258 #endif
236 };
259 };
237
260
261 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
262
263 typedef enum { ZSTD_noDict = 0, ZSTD_extDict = 1, ZSTD_dictMatchState = 2 } ZSTD_dictMode_e;
264
238
265
239 typedef size_t (*ZSTD_blockCompressor) (
266 typedef size_t (*ZSTD_blockCompressor) (
240 ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
267 ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
241 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
268 void const* src, size_t srcSize);
242 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict);
269 ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
243
270
244
271
245 MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
272 MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
@@ -280,16 +307,18 b' MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)'
280 */
307 */
281 MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase)
308 MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t mlBase)
282 {
309 {
283 #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG >= 6)
310 #if defined(DEBUGLEVEL) && (DEBUGLEVEL >= 6)
284 static const BYTE* g_start = NULL;
311 static const BYTE* g_start = NULL;
285 if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
312 if (g_start==NULL) g_start = (const BYTE*)literals; /* note : index only works for compression within a single segment */
286 { U32 const pos = (U32)((const BYTE*)literals - g_start);
313 { U32 const pos = (U32)((const BYTE*)literals - g_start);
287 DEBUGLOG(6, "Cpos%7u :%3u literals, match%3u bytes at dist.code%7u",
314 DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
288 pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode);
315 pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offsetCode);
289 }
316 }
290 #endif
317 #endif
318 assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
291 /* copy Literals */
319 /* copy Literals */
292 assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + 128 KB);
320 assert(seqStorePtr->maxNbLit <= 128 KB);
321 assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit);
293 ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
322 ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
294 seqStorePtr->lit += litLength;
323 seqStorePtr->lit += litLength;
295
324
@@ -420,6 +449,11 b' ZSTD_count_2segments(const BYTE* ip, con'
420 const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
449 const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
421 size_t const matchLength = ZSTD_count(ip, match, vEnd);
450 size_t const matchLength = ZSTD_count(ip, match, vEnd);
422 if (match + matchLength != mEnd) return matchLength;
451 if (match + matchLength != mEnd) return matchLength;
452 DEBUGLOG(7, "ZSTD_count_2segments: found a 2-parts match (current length==%zu)", matchLength);
453 DEBUGLOG(7, "distance from match beginning to end dictionary = %zi", mEnd - match);
454 DEBUGLOG(7, "distance from current pos to end buffer = %zi", iEnd - ip);
455 DEBUGLOG(7, "next byte : ip==%02X, istart==%02X", ip[matchLength], *iStart);
456 DEBUGLOG(7, "final match length = %zu", matchLength + ZSTD_count(ip+matchLength, iStart, iEnd));
423 return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
457 return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
424 }
458 }
425
459
@@ -497,6 +531,20 b' MEM_STATIC U32 ZSTD_window_hasExtDict(ZS'
497 }
531 }
498
532
499 /**
533 /**
534 * ZSTD_matchState_dictMode():
535 * Inspects the provided matchState and figures out what dictMode should be
536 * passed to the compressor.
537 */
538 MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
539 {
540 return ZSTD_window_hasExtDict(ms->window) ?
541 ZSTD_extDict :
542 ms->dictMatchState != NULL ?
543 ZSTD_dictMatchState :
544 ZSTD_noDict;
545 }
546
547 /**
500 * ZSTD_window_needOverflowCorrection():
548 * ZSTD_window_needOverflowCorrection():
501 * Returns non-zero if the indices are getting too large and need overflow
549 * Returns non-zero if the indices are getting too large and need overflow
502 * protection.
550 * protection.
@@ -563,31 +611,41 b' MEM_STATIC U32 ZSTD_window_correctOverfl'
563 * ZSTD_window_enforceMaxDist():
611 * ZSTD_window_enforceMaxDist():
564 * Updates lowLimit so that:
612 * Updates lowLimit so that:
565 * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
613 * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd
614 *
566 * This allows a simple check that index >= lowLimit to see if index is valid.
615 * This allows a simple check that index >= lowLimit to see if index is valid.
567 * This must be called before a block compression call, with srcEnd as the block
616 * This must be called before a block compression call, with srcEnd as the block
568 * source end.
617 * source end.
618 *
569 * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
619 * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit.
570 * This is because dictionaries are allowed to be referenced as long as the last
620 * This is because dictionaries are allowed to be referenced as long as the last
571 * byte of the dictionary is in the window, but once they are out of range,
621 * byte of the dictionary is in the window, but once they are out of range,
572 * they cannot be referenced. If loadedDictEndPtr is NULL, we use
622 * they cannot be referenced. If loadedDictEndPtr is NULL, we use
573 * loadedDictEnd == 0.
623 * loadedDictEnd == 0.
624 *
625 * In normal dict mode, the dict is between lowLimit and dictLimit. In
626 * dictMatchState mode, lowLimit and dictLimit are the same, and the dictionary
627 * is below them. forceWindow and dictMatchState are therefore incompatible.
574 */
628 */
575 MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
629 MEM_STATIC void ZSTD_window_enforceMaxDist(ZSTD_window_t* window,
576 void const* srcEnd, U32 maxDist,
630 void const* srcEnd, U32 maxDist,
577 U32* loadedDictEndPtr)
631 U32* loadedDictEndPtr,
632 const ZSTD_matchState_t** dictMatchStatePtr)
578 {
633 {
579 U32 const current = (U32)((BYTE const*)srcEnd - window->base);
634 U32 const current = (U32)((BYTE const*)srcEnd - window->base);
580 U32 loadedDictEnd = loadedDictEndPtr != NULL ? *loadedDictEndPtr : 0;
635 U32 loadedDictEnd = loadedDictEndPtr != NULL ? *loadedDictEndPtr : 0;
636 DEBUGLOG(5, "ZSTD_window_enforceMaxDist: current=%u, maxDist=%u", current, maxDist);
581 if (current > maxDist + loadedDictEnd) {
637 if (current > maxDist + loadedDictEnd) {
582 U32 const newLowLimit = current - maxDist;
638 U32 const newLowLimit = current - maxDist;
583 if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
639 if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit;
584 if (window->dictLimit < window->lowLimit) {
640 if (window->dictLimit < window->lowLimit) {
585 DEBUGLOG(5, "Update dictLimit from %u to %u", window->dictLimit,
641 DEBUGLOG(5, "Update dictLimit to match lowLimit, from %u to %u",
586 window->lowLimit);
642 window->dictLimit, window->lowLimit);
587 window->dictLimit = window->lowLimit;
643 window->dictLimit = window->lowLimit;
588 }
644 }
589 if (loadedDictEndPtr)
645 if (loadedDictEndPtr)
590 *loadedDictEndPtr = 0;
646 *loadedDictEndPtr = 0;
647 if (dictMatchStatePtr)
648 *dictMatchStatePtr = NULL;
591 }
649 }
592 }
650 }
593
651
@@ -603,12 +661,12 b' MEM_STATIC U32 ZSTD_window_update(ZSTD_w'
603 {
661 {
604 BYTE const* const ip = (BYTE const*)src;
662 BYTE const* const ip = (BYTE const*)src;
605 U32 contiguous = 1;
663 U32 contiguous = 1;
664 DEBUGLOG(5, "ZSTD_window_update");
606 /* Check if blocks follow each other */
665 /* Check if blocks follow each other */
607 if (src != window->nextSrc) {
666 if (src != window->nextSrc) {
608 /* not contiguous */
667 /* not contiguous */
609 size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
668 size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
610 DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u",
669 DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
611 window->dictLimit);
612 window->lowLimit = window->dictLimit;
670 window->lowLimit = window->dictLimit;
613 assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */
671 assert(distanceFromBase == (size_t)(U32)distanceFromBase); /* should never overflow */
614 window->dictLimit = (U32)distanceFromBase;
672 window->dictLimit = (U32)distanceFromBase;
@@ -625,10 +683,38 b' MEM_STATIC U32 ZSTD_window_update(ZSTD_w'
625 ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
683 ptrdiff_t const highInputIdx = (ip + srcSize) - window->dictBase;
626 U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
684 U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)window->dictLimit) ? window->dictLimit : (U32)highInputIdx;
627 window->lowLimit = lowLimitMax;
685 window->lowLimit = lowLimitMax;
686 DEBUGLOG(5, "Overlapping extDict and input : new lowLimit = %u", window->lowLimit);
628 }
687 }
629 return contiguous;
688 return contiguous;
630 }
689 }
631
690
691
692 /* debug functions */
693
694 MEM_STATIC double ZSTD_fWeight(U32 rawStat)
695 {
696 U32 const fp_accuracy = 8;
697 U32 const fp_multiplier = (1 << fp_accuracy);
698 U32 const stat = rawStat + 1;
699 U32 const hb = ZSTD_highbit32(stat);
700 U32 const BWeight = hb * fp_multiplier;
701 U32 const FWeight = (stat << fp_accuracy) >> hb;
702 U32 const weight = BWeight + FWeight;
703 assert(hb + fp_accuracy < 31);
704 return (double)weight / fp_multiplier;
705 }
706
707 MEM_STATIC void ZSTD_debugTable(const U32* table, U32 max)
708 {
709 unsigned u, sum;
710 for (u=0, sum=0; u<=max; u++) sum += table[u];
711 DEBUGLOG(2, "total nb elts: %u", sum);
712 for (u=0; u<=max; u++) {
713 DEBUGLOG(2, "%2u: %5u (%.2f)",
714 u, table[u], ZSTD_fWeight(sum) - ZSTD_fWeight(table[u]) );
715 }
716 }
717
632 #if defined (__cplusplus)
718 #if defined (__cplusplus)
633 }
719 }
634 #endif
720 #endif
@@ -640,7 +726,7 b' MEM_STATIC U32 ZSTD_window_update(ZSTD_w'
640 * ============================================================== */
726 * ============================================================== */
641
727
642 /* ZSTD_getCParamsFromCCtxParams() :
728 /* ZSTD_getCParamsFromCCtxParams() :
643 * cParams are built depending on compressionLevel, src size hints,
729 * cParams are built depending on compressionLevel, src size hints,
644 * LDM and manually set compression parameters.
730 * LDM and manually set compression parameters.
645 */
731 */
646 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
732 ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
@@ -656,6 +742,8 b' size_t ZSTD_initCStream_internal(ZSTD_CS'
656 const ZSTD_CDict* cdict,
742 const ZSTD_CDict* cdict,
657 ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
743 ZSTD_CCtx_params params, unsigned long long pledgedSrcSize);
658
744
745 void ZSTD_resetSeqStore(seqStore_t* ssPtr);
746
659 /*! ZSTD_compressStream_generic() :
747 /*! ZSTD_compressStream_generic() :
660 * Private use only. To be called from zstdmt_compress.c in single-thread mode. */
748 * Private use only. To be called from zstdmt_compress.c in single-thread mode. */
661 size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
749 size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
@@ -672,6 +760,7 b' ZSTD_compressionParameters ZSTD_getCPara'
672 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
760 size_t ZSTD_compressBegin_advanced_internal(ZSTD_CCtx* cctx,
673 const void* dict, size_t dictSize,
761 const void* dict, size_t dictSize,
674 ZSTD_dictContentType_e dictContentType,
762 ZSTD_dictContentType_e dictContentType,
763 ZSTD_dictTableLoadMethod_e dtlm,
675 const ZSTD_CDict* cdict,
764 const ZSTD_CDict* cdict,
676 ZSTD_CCtx_params params,
765 ZSTD_CCtx_params params,
677 unsigned long long pledgedSrcSize);
766 unsigned long long pledgedSrcSize);
@@ -13,9 +13,9 b''
13
13
14
14
15 void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
15 void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
16 ZSTD_compressionParameters const* cParams,
16 void const* end, ZSTD_dictTableLoadMethod_e dtlm)
17 void const* end)
18 {
17 {
18 const ZSTD_compressionParameters* const cParams = &ms->cParams;
19 U32* const hashLarge = ms->hashTable;
19 U32* const hashLarge = ms->hashTable;
20 U32 const hBitsL = cParams->hashLog;
20 U32 const hBitsL = cParams->hashLog;
21 U32 const mls = cParams->searchLength;
21 U32 const mls = cParams->searchLength;
@@ -40,6 +40,9 b' void ZSTD_fillDoubleHashTable(ZSTD_match'
40 hashSmall[smHash] = current + i;
40 hashSmall[smHash] = current + i;
41 if (i == 0 || hashLarge[lgHash] == 0)
41 if (i == 0 || hashLarge[lgHash] == 0)
42 hashLarge[lgHash] = current + i;
42 hashLarge[lgHash] = current + i;
43 /* Only load extra positions for ZSTD_dtlm_full */
44 if (dtlm == ZSTD_dtlm_fast)
45 break;
43 }
46 }
44 }
47 }
45 }
48 }
@@ -48,9 +51,10 b' void ZSTD_fillDoubleHashTable(ZSTD_match'
48 FORCE_INLINE_TEMPLATE
51 FORCE_INLINE_TEMPLATE
49 size_t ZSTD_compressBlock_doubleFast_generic(
52 size_t ZSTD_compressBlock_doubleFast_generic(
50 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
53 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
51 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
54 void const* src, size_t srcSize,
52 U32 const mls /* template */)
55 U32 const mls /* template */, ZSTD_dictMode_e const dictMode)
53 {
56 {
57 ZSTD_compressionParameters const* cParams = &ms->cParams;
54 U32* const hashLong = ms->hashTable;
58 U32* const hashLong = ms->hashTable;
55 const U32 hBitsL = cParams->hashLog;
59 const U32 hBitsL = cParams->hashLog;
56 U32* const hashSmall = ms->chainTable;
60 U32* const hashSmall = ms->chainTable;
@@ -59,70 +63,188 b' size_t ZSTD_compressBlock_doubleFast_gen'
59 const BYTE* const istart = (const BYTE*)src;
63 const BYTE* const istart = (const BYTE*)src;
60 const BYTE* ip = istart;
64 const BYTE* ip = istart;
61 const BYTE* anchor = istart;
65 const BYTE* anchor = istart;
62 const U32 lowestIndex = ms->window.dictLimit;
66 const U32 prefixLowestIndex = ms->window.dictLimit;
63 const BYTE* const lowest = base + lowestIndex;
67 const BYTE* const prefixLowest = base + prefixLowestIndex;
64 const BYTE* const iend = istart + srcSize;
68 const BYTE* const iend = istart + srcSize;
65 const BYTE* const ilimit = iend - HASH_READ_SIZE;
69 const BYTE* const ilimit = iend - HASH_READ_SIZE;
66 U32 offset_1=rep[0], offset_2=rep[1];
70 U32 offset_1=rep[0], offset_2=rep[1];
67 U32 offsetSaved = 0;
71 U32 offsetSaved = 0;
68
72
73 const ZSTD_matchState_t* const dms = ms->dictMatchState;
74 const ZSTD_compressionParameters* const dictCParams =
75 dictMode == ZSTD_dictMatchState ?
76 &dms->cParams : NULL;
77 const U32* const dictHashLong = dictMode == ZSTD_dictMatchState ?
78 dms->hashTable : NULL;
79 const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ?
80 dms->chainTable : NULL;
81 const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ?
82 dms->window.dictLimit : 0;
83 const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
84 dms->window.base : NULL;
85 const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ?
86 dictBase + dictStartIndex : NULL;
87 const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
88 dms->window.nextSrc : NULL;
89 const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
90 prefixLowestIndex - (U32)(dictEnd - dictBase) :
91 0;
92 const U32 dictHBitsL = dictMode == ZSTD_dictMatchState ?
93 dictCParams->hashLog : hBitsL;
94 const U32 dictHBitsS = dictMode == ZSTD_dictMatchState ?
95 dictCParams->chainLog : hBitsS;
96 const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart);
97
98 assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
99
69 /* init */
100 /* init */
70 ip += (ip==lowest);
101 ip += (dictAndPrefixLength == 0);
71 { U32 const maxRep = (U32)(ip-lowest);
102 if (dictMode == ZSTD_noDict) {
103 U32 const maxRep = (U32)(ip - prefixLowest);
72 if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
104 if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
73 if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
105 if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
74 }
106 }
107 if (dictMode == ZSTD_dictMatchState) {
108 /* dictMatchState repCode checks don't currently handle repCode == 0
109 * disabling. */
110 assert(offset_1 <= dictAndPrefixLength);
111 assert(offset_2 <= dictAndPrefixLength);
112 }
75
113
76 /* Main Search Loop */
114 /* Main Search Loop */
77 while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
115 while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
78 size_t mLength;
116 size_t mLength;
117 U32 offset;
79 size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
118 size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
80 size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
119 size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
120 size_t const dictHL = ZSTD_hashPtr(ip, dictHBitsL, 8);
121 size_t const dictHS = ZSTD_hashPtr(ip, dictHBitsS, mls);
81 U32 const current = (U32)(ip-base);
122 U32 const current = (U32)(ip-base);
82 U32 const matchIndexL = hashLong[h2];
123 U32 const matchIndexL = hashLong[h2];
83 U32 const matchIndexS = hashSmall[h];
124 U32 matchIndexS = hashSmall[h];
84 const BYTE* matchLong = base + matchIndexL;
125 const BYTE* matchLong = base + matchIndexL;
85 const BYTE* match = base + matchIndexS;
126 const BYTE* match = base + matchIndexS;
127 const U32 repIndex = current + 1 - offset_1;
128 const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
129 && repIndex < prefixLowestIndex) ?
130 dictBase + (repIndex - dictIndexDelta) :
131 base + repIndex;
86 hashLong[h2] = hashSmall[h] = current; /* update hash tables */
132 hashLong[h2] = hashSmall[h] = current; /* update hash tables */
87
133
88 assert(offset_1 <= current); /* supposed guaranteed by construction */
134 /* check dictMatchState repcode */
89 if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
135 if (dictMode == ZSTD_dictMatchState
90 /* favor repcode */
136 && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
137 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
138 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
139 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
140 ip++;
141 ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
142 goto _match_stored;
143 }
144
145 /* check noDict repcode */
146 if ( dictMode == ZSTD_noDict
147 && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
91 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
148 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
92 ip++;
149 ip++;
93 ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
150 ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
94 } else {
151 goto _match_stored;
95 U32 offset;
152 }
96 if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) {
153
154 if (matchIndexL > prefixLowestIndex) {
155 /* check prefix long match */
156 if (MEM_read64(matchLong) == MEM_read64(ip)) {
97 mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
157 mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
98 offset = (U32)(ip-matchLong);
158 offset = (U32)(ip-matchLong);
99 while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
159 while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
100 } else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) {
160 goto _match_found;
101 size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
161 }
102 U32 const matchIndexL3 = hashLong[hl3];
162 } else if (dictMode == ZSTD_dictMatchState) {
103 const BYTE* matchL3 = base + matchIndexL3;
163 /* check dictMatchState long match */
104 hashLong[hl3] = current + 1;
164 U32 const dictMatchIndexL = dictHashLong[dictHL];
105 if ( (matchIndexL3 > lowestIndex) && (MEM_read64(matchL3) == MEM_read64(ip+1)) ) {
165 const BYTE* dictMatchL = dictBase + dictMatchIndexL;
166 assert(dictMatchL < dictEnd);
167
168 if (dictMatchL > dictStart && MEM_read64(dictMatchL) == MEM_read64(ip)) {
169 mLength = ZSTD_count_2segments(ip+8, dictMatchL+8, iend, dictEnd, prefixLowest) + 8;
170 offset = (U32)(current - dictMatchIndexL - dictIndexDelta);
171 while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */
172 goto _match_found;
173 }
174 }
175
176 if (matchIndexS > prefixLowestIndex) {
177 /* check prefix short match */
178 if (MEM_read32(match) == MEM_read32(ip)) {
179 goto _search_next_long;
180 }
181 } else if (dictMode == ZSTD_dictMatchState) {
182 /* check dictMatchState short match */
183 U32 const dictMatchIndexS = dictHashSmall[dictHS];
184 match = dictBase + dictMatchIndexS;
185 matchIndexS = dictMatchIndexS + dictIndexDelta;
186
187 if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) {
188 goto _search_next_long;
189 }
190 }
191
192 ip += ((ip-anchor) >> kSearchStrength) + 1;
193 continue;
194
195 _search_next_long:
196
197 {
198 size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
199 size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8);
200 U32 const matchIndexL3 = hashLong[hl3];
201 const BYTE* matchL3 = base + matchIndexL3;
202 hashLong[hl3] = current + 1;
203
204 /* check prefix long +1 match */
205 if (matchIndexL3 > prefixLowestIndex) {
206 if (MEM_read64(matchL3) == MEM_read64(ip+1)) {
106 mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
207 mLength = ZSTD_count(ip+9, matchL3+8, iend) + 8;
107 ip++;
208 ip++;
108 offset = (U32)(ip-matchL3);
209 offset = (U32)(ip-matchL3);
109 while (((ip>anchor) & (matchL3>lowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
210 while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
110 } else {
211 goto _match_found;
111 mLength = ZSTD_count(ip+4, match+4, iend) + 4;
112 offset = (U32)(ip-match);
113 while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
114 }
212 }
115 } else {
213 } else if (dictMode == ZSTD_dictMatchState) {
116 ip += ((ip-anchor) >> kSearchStrength) + 1;
214 /* check dict long +1 match */
117 continue;
215 U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
216 const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
217 assert(dictMatchL3 < dictEnd);
218 if (dictMatchL3 > dictStart && MEM_read64(dictMatchL3) == MEM_read64(ip+1)) {
219 mLength = ZSTD_count_2segments(ip+1+8, dictMatchL3+8, iend, dictEnd, prefixLowest) + 8;
220 ip++;
221 offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta);
222 while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */
223 goto _match_found;
224 }
118 }
225 }
119
120 offset_2 = offset_1;
121 offset_1 = offset;
122
123 ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
124 }
226 }
125
227
228 /* if no long +1 match, explore the short match we found */
229 if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
230 mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
231 offset = (U32)(current - matchIndexS);
232 while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
233 } else {
234 mLength = ZSTD_count(ip+4, match+4, iend) + 4;
235 offset = (U32)(ip - match);
236 while (((ip>anchor) & (match>prefixLowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
237 }
238
239 /* fall-through */
240
241 _match_found:
242 offset_2 = offset_1;
243 offset_1 = offset;
244
245 ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
246
247 _match_stored:
126 /* match found */
248 /* match found */
127 ip += mLength;
249 ip += mLength;
128 anchor = ip;
250 anchor = ip;
@@ -135,19 +257,44 b' size_t ZSTD_compressBlock_doubleFast_gen'
135 hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
257 hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
136
258
137 /* check immediate repcode */
259 /* check immediate repcode */
138 while ( (ip <= ilimit)
260 if (dictMode == ZSTD_dictMatchState) {
139 && ( (offset_2>0)
261 while (ip <= ilimit) {
140 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
262 U32 const current2 = (U32)(ip-base);
141 /* store sequence */
263 U32 const repIndex2 = current2 - offset_2;
142 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
264 const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
143 { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
265 && repIndex2 < prefixLowestIndex ?
144 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
266 dictBase - dictIndexDelta + repIndex2 :
145 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
267 base + repIndex2;
146 ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
268 if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
147 ip += rLength;
269 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
148 anchor = ip;
270 const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
149 continue; /* faster when present ... (?) */
271 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
150 } } }
272 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
273 ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
274 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
275 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
276 ip += repLength2;
277 anchor = ip;
278 continue;
279 }
280 break;
281 }
282 }
283
284 if (dictMode == ZSTD_noDict) {
285 while ( (ip <= ilimit)
286 && ( (offset_2>0)
287 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
288 /* store sequence */
289 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
290 U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
291 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
292 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
293 ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
294 ip += rLength;
295 anchor = ip;
296 continue; /* faster when present ... (?) */
297 } } } }
151
298
152 /* save reps for next block */
299 /* save reps for next block */
153 rep[0] = offset_1 ? offset_1 : offsetSaved;
300 rep[0] = offset_1 ? offset_1 : offsetSaved;
@@ -160,102 +307,126 b' size_t ZSTD_compressBlock_doubleFast_gen'
160
307
161 size_t ZSTD_compressBlock_doubleFast(
308 size_t ZSTD_compressBlock_doubleFast(
162 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
309 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
163 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
310 void const* src, size_t srcSize)
164 {
311 {
165 const U32 mls = cParams->searchLength;
312 const U32 mls = ms->cParams.searchLength;
166 switch(mls)
313 switch(mls)
167 {
314 {
168 default: /* includes case 3 */
315 default: /* includes case 3 */
169 case 4 :
316 case 4 :
170 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 4);
317 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
171 case 5 :
318 case 5 :
172 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 5);
319 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
173 case 6 :
320 case 6 :
174 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 6);
321 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
175 case 7 :
322 case 7 :
176 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, cParams, src, srcSize, 7);
323 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
324 }
325 }
326
327
328 size_t ZSTD_compressBlock_doubleFast_dictMatchState(
329 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
330 void const* src, size_t srcSize)
331 {
332 const U32 mls = ms->cParams.searchLength;
333 switch(mls)
334 {
335 default: /* includes case 3 */
336 case 4 :
337 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
338 case 5 :
339 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
340 case 6 :
341 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
342 case 7 :
343 return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
177 }
344 }
178 }
345 }
179
346
180
347
181 static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
348 static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
182 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
349 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
183 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
350 void const* src, size_t srcSize,
184 U32 const mls /* template */)
351 U32 const mls /* template */)
185 {
352 {
353 ZSTD_compressionParameters const* cParams = &ms->cParams;
186 U32* const hashLong = ms->hashTable;
354 U32* const hashLong = ms->hashTable;
187 U32 const hBitsL = cParams->hashLog;
355 U32 const hBitsL = cParams->hashLog;
188 U32* const hashSmall = ms->chainTable;
356 U32* const hashSmall = ms->chainTable;
189 U32 const hBitsS = cParams->chainLog;
357 U32 const hBitsS = cParams->chainLog;
190 const BYTE* const base = ms->window.base;
191 const BYTE* const dictBase = ms->window.dictBase;
192 const BYTE* const istart = (const BYTE*)src;
358 const BYTE* const istart = (const BYTE*)src;
193 const BYTE* ip = istart;
359 const BYTE* ip = istart;
194 const BYTE* anchor = istart;
360 const BYTE* anchor = istart;
195 const U32 lowestIndex = ms->window.lowLimit;
196 const BYTE* const dictStart = dictBase + lowestIndex;
197 const U32 dictLimit = ms->window.dictLimit;
198 const BYTE* const lowPrefixPtr = base + dictLimit;
199 const BYTE* const dictEnd = dictBase + dictLimit;
200 const BYTE* const iend = istart + srcSize;
361 const BYTE* const iend = istart + srcSize;
201 const BYTE* const ilimit = iend - 8;
362 const BYTE* const ilimit = iend - 8;
363 const U32 prefixStartIndex = ms->window.dictLimit;
364 const BYTE* const base = ms->window.base;
365 const BYTE* const prefixStart = base + prefixStartIndex;
366 const U32 dictStartIndex = ms->window.lowLimit;
367 const BYTE* const dictBase = ms->window.dictBase;
368 const BYTE* const dictStart = dictBase + dictStartIndex;
369 const BYTE* const dictEnd = dictBase + prefixStartIndex;
202 U32 offset_1=rep[0], offset_2=rep[1];
370 U32 offset_1=rep[0], offset_2=rep[1];
203
371
372 DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize);
373
204 /* Search Loop */
374 /* Search Loop */
205 while (ip < ilimit) { /* < instead of <=, because (ip+1) */
375 while (ip < ilimit) { /* < instead of <=, because (ip+1) */
206 const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
376 const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
207 const U32 matchIndex = hashSmall[hSmall];
377 const U32 matchIndex = hashSmall[hSmall];
208 const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
378 const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
209 const BYTE* match = matchBase + matchIndex;
379 const BYTE* match = matchBase + matchIndex;
210
380
211 const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
381 const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
212 const U32 matchLongIndex = hashLong[hLong];
382 const U32 matchLongIndex = hashLong[hLong];
213 const BYTE* matchLongBase = matchLongIndex < dictLimit ? dictBase : base;
383 const BYTE* const matchLongBase = matchLongIndex < prefixStartIndex ? dictBase : base;
214 const BYTE* matchLong = matchLongBase + matchLongIndex;
384 const BYTE* matchLong = matchLongBase + matchLongIndex;
215
385
216 const U32 current = (U32)(ip-base);
386 const U32 current = (U32)(ip-base);
217 const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
387 const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
218 const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
388 const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
219 const BYTE* repMatch = repBase + repIndex;
389 const BYTE* const repMatch = repBase + repIndex;
220 size_t mLength;
390 size_t mLength;
221 hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
391 hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
222
392
223 if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
393 if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
224 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
394 & (repIndex > dictStartIndex))
225 const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
395 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
226 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
396 const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
397 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
227 ip++;
398 ip++;
228 ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
399 ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
229 } else {
400 } else {
230 if ((matchLongIndex > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
401 if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
231 const BYTE* matchEnd = matchLongIndex < dictLimit ? dictEnd : iend;
402 const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
232 const BYTE* lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr;
403 const BYTE* const lowMatchPtr = matchLongIndex < prefixStartIndex ? dictStart : prefixStart;
233 U32 offset;
404 U32 offset;
234 mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, lowPrefixPtr) + 8;
405 mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, prefixStart) + 8;
235 offset = current - matchLongIndex;
406 offset = current - matchLongIndex;
236 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
407 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
237 offset_2 = offset_1;
408 offset_2 = offset_1;
238 offset_1 = offset;
409 offset_1 = offset;
239 ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
410 ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
240
411
241 } else if ((matchIndex > lowestIndex) && (MEM_read32(match) == MEM_read32(ip))) {
412 } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
242 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
413 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
243 U32 const matchIndex3 = hashLong[h3];
414 U32 const matchIndex3 = hashLong[h3];
244 const BYTE* const match3Base = matchIndex3 < dictLimit ? dictBase : base;
415 const BYTE* const match3Base = matchIndex3 < prefixStartIndex ? dictBase : base;
245 const BYTE* match3 = match3Base + matchIndex3;
416 const BYTE* match3 = match3Base + matchIndex3;
246 U32 offset;
417 U32 offset;
247 hashLong[h3] = current + 1;
418 hashLong[h3] = current + 1;
248 if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
419 if ( (matchIndex3 > dictStartIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
249 const BYTE* matchEnd = matchIndex3 < dictLimit ? dictEnd : iend;
420 const BYTE* const matchEnd = matchIndex3 < prefixStartIndex ? dictEnd : iend;
250 const BYTE* lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr;
421 const BYTE* const lowMatchPtr = matchIndex3 < prefixStartIndex ? dictStart : prefixStart;
251 mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, lowPrefixPtr) + 8;
422 mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, prefixStart) + 8;
252 ip++;
423 ip++;
253 offset = current+1 - matchIndex3;
424 offset = current+1 - matchIndex3;
254 while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
425 while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
255 } else {
426 } else {
256 const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
427 const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
257 const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
428 const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
258 mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4;
429 mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
259 offset = current - matchIndex;
430 offset = current - matchIndex;
260 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
431 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
261 }
432 }
@@ -282,12 +453,13 b' static size_t ZSTD_compressBlock_doubleF'
282 while (ip <= ilimit) {
453 while (ip <= ilimit) {
283 U32 const current2 = (U32)(ip-base);
454 U32 const current2 = (U32)(ip-base);
284 U32 const repIndex2 = current2 - offset_2;
455 U32 const repIndex2 = current2 - offset_2;
285 const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
456 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
286 if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
457 if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
287 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
458 & (repIndex2 > dictStartIndex))
288 const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
459 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
289 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
460 const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
290 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
461 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
462 U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
291 ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
463 ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
292 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
464 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
293 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
465 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
@@ -309,19 +481,19 b' static size_t ZSTD_compressBlock_doubleF'
309
481
310 size_t ZSTD_compressBlock_doubleFast_extDict(
482 size_t ZSTD_compressBlock_doubleFast_extDict(
311 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
483 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
312 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
484 void const* src, size_t srcSize)
313 {
485 {
314 U32 const mls = cParams->searchLength;
486 U32 const mls = ms->cParams.searchLength;
315 switch(mls)
487 switch(mls)
316 {
488 {
317 default: /* includes case 3 */
489 default: /* includes case 3 */
318 case 4 :
490 case 4 :
319 return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 4);
491 return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
320 case 5 :
492 case 5 :
321 return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 5);
493 return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
322 case 6 :
494 case 6 :
323 return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 6);
495 return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
324 case 7 :
496 case 7 :
325 return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 7);
497 return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
326 }
498 }
327 }
499 }
@@ -19,14 +19,16 b' extern "C" {'
19 #include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
19 #include "zstd_compress_internal.h" /* ZSTD_CCtx, size_t */
20
20
21 void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
21 void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
22 ZSTD_compressionParameters const* cParams,
22 void const* end, ZSTD_dictTableLoadMethod_e dtlm);
23 void const* end);
24 size_t ZSTD_compressBlock_doubleFast(
23 size_t ZSTD_compressBlock_doubleFast(
25 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
24 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
26 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
25 void const* src, size_t srcSize);
26 size_t ZSTD_compressBlock_doubleFast_dictMatchState(
27 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
28 void const* src, size_t srcSize);
27 size_t ZSTD_compressBlock_doubleFast_extDict(
29 size_t ZSTD_compressBlock_doubleFast_extDict(
28 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
30 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
29 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
31 void const* src, size_t srcSize);
30
32
31
33
32 #if defined (__cplusplus)
34 #if defined (__cplusplus)
@@ -13,9 +13,9 b''
13
13
14
14
15 void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
15 void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
16 ZSTD_compressionParameters const* cParams,
16 void const* end, ZSTD_dictTableLoadMethod_e dtlm)
17 void const* end)
18 {
17 {
18 const ZSTD_compressionParameters* const cParams = &ms->cParams;
19 U32* const hashTable = ms->hashTable;
19 U32* const hashTable = ms->hashTable;
20 U32 const hBits = cParams->hashLog;
20 U32 const hBits = cParams->hashLog;
21 U32 const mls = cParams->searchLength;
21 U32 const mls = cParams->searchLength;
@@ -34,6 +34,9 b' void ZSTD_fillHashTable(ZSTD_matchState_'
34 size_t const hash = ZSTD_hashPtr(ip + i, hBits, mls);
34 size_t const hash = ZSTD_hashPtr(ip + i, hBits, mls);
35 if (i == 0 || hashTable[hash] == 0)
35 if (i == 0 || hashTable[hash] == 0)
36 hashTable[hash] = current + i;
36 hashTable[hash] = current + i;
37 /* Only load extra positions for ZSTD_dtlm_full */
38 if (dtlm == ZSTD_dtlm_fast)
39 break;
37 }
40 }
38 }
41 }
39 }
42 }
@@ -42,26 +45,65 b' FORCE_INLINE_TEMPLATE'
42 size_t ZSTD_compressBlock_fast_generic(
45 size_t ZSTD_compressBlock_fast_generic(
43 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
46 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
44 void const* src, size_t srcSize,
47 void const* src, size_t srcSize,
45 U32 const hlog, U32 const stepSize, U32 const mls)
48 U32 const mls, ZSTD_dictMode_e const dictMode)
46 {
49 {
50 const ZSTD_compressionParameters* const cParams = &ms->cParams;
47 U32* const hashTable = ms->hashTable;
51 U32* const hashTable = ms->hashTable;
52 U32 const hlog = cParams->hashLog;
53 /* support stepSize of 0 */
54 U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
48 const BYTE* const base = ms->window.base;
55 const BYTE* const base = ms->window.base;
49 const BYTE* const istart = (const BYTE*)src;
56 const BYTE* const istart = (const BYTE*)src;
50 const BYTE* ip = istart;
57 const BYTE* ip = istart;
51 const BYTE* anchor = istart;
58 const BYTE* anchor = istart;
52 const U32 lowestIndex = ms->window.dictLimit;
59 const U32 prefixStartIndex = ms->window.dictLimit;
53 const BYTE* const lowest = base + lowestIndex;
60 const BYTE* const prefixStart = base + prefixStartIndex;
54 const BYTE* const iend = istart + srcSize;
61 const BYTE* const iend = istart + srcSize;
55 const BYTE* const ilimit = iend - HASH_READ_SIZE;
62 const BYTE* const ilimit = iend - HASH_READ_SIZE;
56 U32 offset_1=rep[0], offset_2=rep[1];
63 U32 offset_1=rep[0], offset_2=rep[1];
57 U32 offsetSaved = 0;
64 U32 offsetSaved = 0;
58
65
66 const ZSTD_matchState_t* const dms = ms->dictMatchState;
67 const ZSTD_compressionParameters* const dictCParams =
68 dictMode == ZSTD_dictMatchState ?
69 &dms->cParams : NULL;
70 const U32* const dictHashTable = dictMode == ZSTD_dictMatchState ?
71 dms->hashTable : NULL;
72 const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ?
73 dms->window.dictLimit : 0;
74 const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
75 dms->window.base : NULL;
76 const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ?
77 dictBase + dictStartIndex : NULL;
78 const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
79 dms->window.nextSrc : NULL;
80 const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
81 prefixStartIndex - (U32)(dictEnd - dictBase) :
82 0;
83 const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart);
84 const U32 dictHLog = dictMode == ZSTD_dictMatchState ?
85 dictCParams->hashLog : hlog;
86
87 assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
88
89 /* otherwise, we would get index underflow when translating a dict index
90 * into a local index */
91 assert(dictMode != ZSTD_dictMatchState
92 || prefixStartIndex >= (U32)(dictEnd - dictBase));
93
59 /* init */
94 /* init */
60 ip += (ip==lowest);
95 ip += (dictAndPrefixLength == 0);
61 { U32 const maxRep = (U32)(ip-lowest);
96 if (dictMode == ZSTD_noDict) {
97 U32 const maxRep = (U32)(ip - prefixStart);
62 if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
98 if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
63 if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
99 if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
64 }
100 }
101 if (dictMode == ZSTD_dictMatchState) {
102 /* dictMatchState repCode checks don't currently handle repCode == 0
103 * disabling. */
104 assert(offset_1 <= dictAndPrefixLength);
105 assert(offset_2 <= dictAndPrefixLength);
106 }
65
107
66 /* Main Search Loop */
108 /* Main Search Loop */
67 while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
109 while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
@@ -70,26 +112,67 b' size_t ZSTD_compressBlock_fast_generic('
70 U32 const current = (U32)(ip-base);
112 U32 const current = (U32)(ip-base);
71 U32 const matchIndex = hashTable[h];
113 U32 const matchIndex = hashTable[h];
72 const BYTE* match = base + matchIndex;
114 const BYTE* match = base + matchIndex;
115 const U32 repIndex = current + 1 - offset_1;
116 const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
117 && repIndex < prefixStartIndex) ?
118 dictBase + (repIndex - dictIndexDelta) :
119 base + repIndex;
73 hashTable[h] = current; /* update hash table */
120 hashTable[h] = current; /* update hash table */
74
121
75 if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
122 if ( (dictMode == ZSTD_dictMatchState)
123 && ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */
124 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
125 const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
126 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
127 ip++;
128 ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
129 } else if ( dictMode == ZSTD_noDict
130 && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
76 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
131 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
77 ip++;
132 ip++;
78 ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
133 ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
79 } else {
134 } else if ( (matchIndex <= prefixStartIndex) ) {
80 if ( (matchIndex <= lowestIndex)
135 if (dictMode == ZSTD_dictMatchState) {
81 || (MEM_read32(match) != MEM_read32(ip)) ) {
136 size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
137 U32 const dictMatchIndex = dictHashTable[dictHash];
138 const BYTE* dictMatch = dictBase + dictMatchIndex;
139 if (dictMatchIndex <= dictStartIndex ||
140 MEM_read32(dictMatch) != MEM_read32(ip)) {
141 assert(stepSize >= 1);
142 ip += ((ip-anchor) >> kSearchStrength) + stepSize;
143 continue;
144 } else {
145 /* found a dict match */
146 U32 const offset = (U32)(current-dictMatchIndex-dictIndexDelta);
147 mLength = ZSTD_count_2segments(ip+4, dictMatch+4, iend, dictEnd, prefixStart) + 4;
148 while (((ip>anchor) & (dictMatch>dictStart))
149 && (ip[-1] == dictMatch[-1])) {
150 ip--; dictMatch--; mLength++;
151 } /* catch up */
152 offset_2 = offset_1;
153 offset_1 = offset;
154 ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
155 }
156 } else {
82 assert(stepSize >= 1);
157 assert(stepSize >= 1);
83 ip += ((ip-anchor) >> kSearchStrength) + stepSize;
158 ip += ((ip-anchor) >> kSearchStrength) + stepSize;
84 continue;
159 continue;
85 }
160 }
161 } else if (MEM_read32(match) != MEM_read32(ip)) {
162 /* it's not a match, and we're not going to check the dictionary */
163 assert(stepSize >= 1);
164 ip += ((ip-anchor) >> kSearchStrength) + stepSize;
165 continue;
166 } else {
167 /* found a regular match */
168 U32 const offset = (U32)(ip-match);
86 mLength = ZSTD_count(ip+4, match+4, iend) + 4;
169 mLength = ZSTD_count(ip+4, match+4, iend) + 4;
87 { U32 const offset = (U32)(ip-match);
170 while (((ip>anchor) & (match>prefixStart))
88 while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
171 && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
89 offset_2 = offset_1;
172 offset_2 = offset_1;
90 offset_1 = offset;
173 offset_1 = offset;
91 ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
174 ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
92 } }
175 }
93
176
94 /* match found */
177 /* match found */
95 ip += mLength;
178 ip += mLength;
@@ -97,21 +180,46 b' size_t ZSTD_compressBlock_fast_generic('
97
180
98 if (ip <= ilimit) {
181 if (ip <= ilimit) {
99 /* Fill Table */
182 /* Fill Table */
183 assert(base+current+2 > istart); /* check base overflow */
100 hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */
184 hashTable[ZSTD_hashPtr(base+current+2, hlog, mls)] = current+2; /* here because current+2 could be > iend-8 */
101 hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
185 hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base);
186
102 /* check immediate repcode */
187 /* check immediate repcode */
103 while ( (ip <= ilimit)
188 if (dictMode == ZSTD_dictMatchState) {
104 && ( (offset_2>0)
189 while (ip <= ilimit) {
105 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
190 U32 const current2 = (U32)(ip-base);
106 /* store sequence */
191 U32 const repIndex2 = current2 - offset_2;
107 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
192 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ?
108 { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
193 dictBase - dictIndexDelta + repIndex2 :
109 hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base);
194 base + repIndex2;
110 ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
195 if ( ((U32)((prefixStartIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
111 ip += rLength;
196 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
112 anchor = ip;
197 const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
113 continue; /* faster when present ... (?) */
198 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
114 } } }
199 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
200 ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
201 hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
202 ip += repLength2;
203 anchor = ip;
204 continue;
205 }
206 break;
207 }
208 }
209
210 if (dictMode == ZSTD_noDict) {
211 while ( (ip <= ilimit)
212 && ( (offset_2>0)
213 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
214 /* store sequence */
215 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
216 U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */
217 hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base);
218 ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH);
219 ip += rLength;
220 anchor = ip;
221 continue; /* faster when present ... (?) */
222 } } } }
115
223
116 /* save reps for next block */
224 /* save reps for next block */
117 rep[0] = offset_1 ? offset_1 : offsetSaved;
225 rep[0] = offset_1 ? offset_1 : offsetSaved;
@@ -124,42 +232,66 b' size_t ZSTD_compressBlock_fast_generic('
124
232
125 size_t ZSTD_compressBlock_fast(
233 size_t ZSTD_compressBlock_fast(
126 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
234 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
127 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
235 void const* src, size_t srcSize)
128 {
236 {
129 U32 const hlog = cParams->hashLog;
237 ZSTD_compressionParameters const* cParams = &ms->cParams;
130 U32 const mls = cParams->searchLength;
238 U32 const mls = cParams->searchLength;
131 U32 const stepSize = cParams->targetLength;
239 assert(ms->dictMatchState == NULL);
132 switch(mls)
240 switch(mls)
133 {
241 {
134 default: /* includes case 3 */
242 default: /* includes case 3 */
135 case 4 :
243 case 4 :
136 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4);
244 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
137 case 5 :
245 case 5 :
138 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5);
246 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
139 case 6 :
247 case 6 :
140 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6);
248 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
141 case 7 :
249 case 7 :
142 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7);
250 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
251 }
252 }
253
254 size_t ZSTD_compressBlock_fast_dictMatchState(
255 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
256 void const* src, size_t srcSize)
257 {
258 ZSTD_compressionParameters const* cParams = &ms->cParams;
259 U32 const mls = cParams->searchLength;
260 assert(ms->dictMatchState != NULL);
261 switch(mls)
262 {
263 default: /* includes case 3 */
264 case 4 :
265 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
266 case 5 :
267 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
268 case 6 :
269 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
270 case 7 :
271 return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
143 }
272 }
144 }
273 }
145
274
146
275
147 static size_t ZSTD_compressBlock_fast_extDict_generic(
276 static size_t ZSTD_compressBlock_fast_extDict_generic(
148 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
277 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
149 void const* src, size_t srcSize,
278 void const* src, size_t srcSize, U32 const mls)
150 U32 const hlog, U32 const stepSize, U32 const mls)
151 {
279 {
152 U32* hashTable = ms->hashTable;
280 const ZSTD_compressionParameters* const cParams = &ms->cParams;
281 U32* const hashTable = ms->hashTable;
282 U32 const hlog = cParams->hashLog;
283 /* support stepSize of 0 */
284 U32 const stepSize = cParams->targetLength + !(cParams->targetLength);
153 const BYTE* const base = ms->window.base;
285 const BYTE* const base = ms->window.base;
154 const BYTE* const dictBase = ms->window.dictBase;
286 const BYTE* const dictBase = ms->window.dictBase;
155 const BYTE* const istart = (const BYTE*)src;
287 const BYTE* const istart = (const BYTE*)src;
156 const BYTE* ip = istart;
288 const BYTE* ip = istart;
157 const BYTE* anchor = istart;
289 const BYTE* anchor = istart;
158 const U32 lowestIndex = ms->window.lowLimit;
290 const U32 dictStartIndex = ms->window.lowLimit;
159 const BYTE* const dictStart = dictBase + lowestIndex;
291 const BYTE* const dictStart = dictBase + dictStartIndex;
160 const U32 dictLimit = ms->window.dictLimit;
292 const U32 prefixStartIndex = ms->window.dictLimit;
161 const BYTE* const lowPrefixPtr = base + dictLimit;
293 const BYTE* const prefixStart = base + prefixStartIndex;
162 const BYTE* const dictEnd = dictBase + dictLimit;
294 const BYTE* const dictEnd = dictBase + prefixStartIndex;
163 const BYTE* const iend = istart + srcSize;
295 const BYTE* const iend = istart + srcSize;
164 const BYTE* const ilimit = iend - 8;
296 const BYTE* const ilimit = iend - 8;
165 U32 offset_1=rep[0], offset_2=rep[1];
297 U32 offset_1=rep[0], offset_2=rep[1];
@@ -167,33 +299,34 b' static size_t ZSTD_compressBlock_fast_ex'
167 /* Search Loop */
299 /* Search Loop */
168 while (ip < ilimit) { /* < instead of <=, because (ip+1) */
300 while (ip < ilimit) { /* < instead of <=, because (ip+1) */
169 const size_t h = ZSTD_hashPtr(ip, hlog, mls);
301 const size_t h = ZSTD_hashPtr(ip, hlog, mls);
170 const U32 matchIndex = hashTable[h];
302 const U32 matchIndex = hashTable[h];
171 const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
303 const BYTE* const matchBase = matchIndex < prefixStartIndex ? dictBase : base;
172 const BYTE* match = matchBase + matchIndex;
304 const BYTE* match = matchBase + matchIndex;
173 const U32 current = (U32)(ip-base);
305 const U32 current = (U32)(ip-base);
174 const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
306 const U32 repIndex = current + 1 - offset_1;
175 const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
307 const BYTE* const repBase = repIndex < prefixStartIndex ? dictBase : base;
176 const BYTE* repMatch = repBase + repIndex;
308 const BYTE* const repMatch = repBase + repIndex;
177 size_t mLength;
309 size_t mLength;
178 hashTable[h] = current; /* update hash table */
310 hashTable[h] = current; /* update hash table */
311 assert(offset_1 <= current +1); /* check repIndex */
179
312
180 if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
313 if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
181 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
314 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
182 const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
315 const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
183 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
316 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
184 ip++;
317 ip++;
185 ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
318 ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH);
186 } else {
319 } else {
187 if ( (matchIndex < lowestIndex) ||
320 if ( (matchIndex < dictStartIndex) ||
188 (MEM_read32(match) != MEM_read32(ip)) ) {
321 (MEM_read32(match) != MEM_read32(ip)) ) {
189 assert(stepSize >= 1);
322 assert(stepSize >= 1);
190 ip += ((ip-anchor) >> kSearchStrength) + stepSize;
323 ip += ((ip-anchor) >> kSearchStrength) + stepSize;
191 continue;
324 continue;
192 }
325 }
193 { const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
326 { const BYTE* matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend;
194 const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
327 const BYTE* lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart;
195 U32 offset;
328 U32 offset;
196 mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4;
329 mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
197 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
330 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
198 offset = current - matchIndex;
331 offset = current - matchIndex;
199 offset_2 = offset_1;
332 offset_2 = offset_1;
@@ -213,11 +346,11 b' static size_t ZSTD_compressBlock_fast_ex'
213 while (ip <= ilimit) {
346 while (ip <= ilimit) {
214 U32 const current2 = (U32)(ip-base);
347 U32 const current2 = (U32)(ip-base);
215 U32 const repIndex2 = current2 - offset_2;
348 U32 const repIndex2 = current2 - offset_2;
216 const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
349 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
217 if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
350 if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */
218 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
351 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
219 const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
352 const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
220 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, lowPrefixPtr) + 4;
353 size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
221 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
354 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
222 ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
355 ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH);
223 hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
356 hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
@@ -239,21 +372,20 b' static size_t ZSTD_compressBlock_fast_ex'
239
372
240 size_t ZSTD_compressBlock_fast_extDict(
373 size_t ZSTD_compressBlock_fast_extDict(
241 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
374 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
242 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
375 void const* src, size_t srcSize)
243 {
376 {
244 U32 const hlog = cParams->hashLog;
377 ZSTD_compressionParameters const* cParams = &ms->cParams;
245 U32 const mls = cParams->searchLength;
378 U32 const mls = cParams->searchLength;
246 U32 const stepSize = cParams->targetLength;
247 switch(mls)
379 switch(mls)
248 {
380 {
249 default: /* includes case 3 */
381 default: /* includes case 3 */
250 case 4 :
382 case 4 :
251 return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 4);
383 return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
252 case 5 :
384 case 5 :
253 return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 5);
385 return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
254 case 6 :
386 case 6 :
255 return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 6);
387 return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
256 case 7 :
388 case 7 :
257 return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, hlog, stepSize, 7);
389 return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
258 }
390 }
259 }
391 }
@@ -19,14 +19,16 b' extern "C" {'
19 #include "zstd_compress_internal.h"
19 #include "zstd_compress_internal.h"
20
20
21 void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
21 void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
22 ZSTD_compressionParameters const* cParams,
22 void const* end, ZSTD_dictTableLoadMethod_e dtlm);
23 void const* end);
24 size_t ZSTD_compressBlock_fast(
23 size_t ZSTD_compressBlock_fast(
25 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
24 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
26 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
25 void const* src, size_t srcSize);
26 size_t ZSTD_compressBlock_fast_dictMatchState(
27 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
28 void const* src, size_t srcSize);
27 size_t ZSTD_compressBlock_fast_extDict(
29 size_t ZSTD_compressBlock_fast_extDict(
28 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
30 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
29 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
31 void const* src, size_t srcSize);
30
32
31 #if defined (__cplusplus)
33 #if defined (__cplusplus)
32 }
34 }
This diff has been collapsed as it changes many lines, (522 lines changed) Show them Hide them
@@ -16,11 +16,12 b''
16 * Binary Tree search
16 * Binary Tree search
17 ***************************************/
17 ***************************************/
18
18
19 void ZSTD_updateDUBT(
19 static void
20 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
20 ZSTD_updateDUBT(ZSTD_matchState_t* ms,
21 const BYTE* ip, const BYTE* iend,
21 const BYTE* ip, const BYTE* iend,
22 U32 mls)
22 U32 mls)
23 {
23 {
24 const ZSTD_compressionParameters* const cParams = &ms->cParams;
24 U32* const hashTable = ms->hashTable;
25 U32* const hashTable = ms->hashTable;
25 U32 const hashLog = cParams->hashLog;
26 U32 const hashLog = cParams->hashLog;
26
27
@@ -59,11 +60,12 b' void ZSTD_updateDUBT('
59 * sort one already inserted but unsorted position
60 * sort one already inserted but unsorted position
60 * assumption : current >= btlow == (current - btmask)
61 * assumption : current >= btlow == (current - btmask)
61 * doesn't fail */
62 * doesn't fail */
62 static void ZSTD_insertDUBT1(
63 static void
63 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
64 ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
64 U32 current, const BYTE* inputEnd,
65 U32 current, const BYTE* inputEnd,
65 U32 nbCompares, U32 btLow, int extDict)
66 U32 nbCompares, U32 btLow, const ZSTD_dictMode_e dictMode)
66 {
67 {
68 const ZSTD_compressionParameters* const cParams = &ms->cParams;
67 U32* const bt = ms->chainTable;
69 U32* const bt = ms->chainTable;
68 U32 const btLog = cParams->chainLog - 1;
70 U32 const btLog = cParams->chainLog - 1;
69 U32 const btMask = (1 << btLog) - 1;
71 U32 const btMask = (1 << btLog) - 1;
@@ -92,10 +94,12 b' static void ZSTD_insertDUBT1('
92 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
94 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
93 assert(matchIndex < current);
95 assert(matchIndex < current);
94
96
95 if ( (!extDict)
97 if ( (dictMode != ZSTD_extDict)
96 || (matchIndex+matchLength >= dictLimit) /* both in current segment*/
98 || (matchIndex+matchLength >= dictLimit) /* both in current segment*/
97 || (current < dictLimit) /* both in extDict */) {
99 || (current < dictLimit) /* both in extDict */) {
98 const BYTE* const mBase = !extDict || ((matchIndex+matchLength) >= dictLimit) ? base : dictBase;
100 const BYTE* const mBase = ( (dictMode != ZSTD_extDict)
101 || (matchIndex+matchLength >= dictLimit)) ?
102 base : dictBase;
99 assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
103 assert( (matchIndex+matchLength >= dictLimit) /* might be wrong if extDict is incorrectly set to 0 */
100 || (current < dictLimit) );
104 || (current < dictLimit) );
101 match = mBase + matchIndex;
105 match = mBase + matchIndex;
@@ -138,13 +142,95 b' static void ZSTD_insertDUBT1('
138 }
142 }
139
143
140
144
141 static size_t ZSTD_DUBT_findBestMatch (
145 static size_t
142 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
146 ZSTD_DUBT_findBetterDictMatch (
143 const BYTE* const ip, const BYTE* const iend,
147 ZSTD_matchState_t* ms,
144 size_t* offsetPtr,
148 const BYTE* const ip, const BYTE* const iend,
145 U32 const mls,
149 size_t* offsetPtr,
146 U32 const extDict)
150 U32 nbCompares,
151 U32 const mls,
152 const ZSTD_dictMode_e dictMode)
147 {
153 {
154 const ZSTD_matchState_t * const dms = ms->dictMatchState;
155 const ZSTD_compressionParameters* const dmsCParams = &dms->cParams;
156 const U32 * const dictHashTable = dms->hashTable;
157 U32 const hashLog = dmsCParams->hashLog;
158 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
159 U32 dictMatchIndex = dictHashTable[h];
160
161 const BYTE* const base = ms->window.base;
162 const BYTE* const prefixStart = base + ms->window.dictLimit;
163 U32 const current = (U32)(ip-base);
164 const BYTE* const dictBase = dms->window.base;
165 const BYTE* const dictEnd = dms->window.nextSrc;
166 U32 const dictHighLimit = (U32)(dms->window.nextSrc - dms->window.base);
167 U32 const dictLowLimit = dms->window.lowLimit;
168 U32 const dictIndexDelta = ms->window.lowLimit - dictHighLimit;
169
170 U32* const dictBt = dms->chainTable;
171 U32 const btLog = dmsCParams->chainLog - 1;
172 U32 const btMask = (1 << btLog) - 1;
173 U32 const btLow = (btMask >= dictHighLimit - dictLowLimit) ? dictLowLimit : dictHighLimit - btMask;
174
175 size_t commonLengthSmaller=0, commonLengthLarger=0, bestLength=0;
176 U32 matchEndIdx = current+8+1;
177
178 (void)dictMode;
179 assert(dictMode == ZSTD_dictMatchState);
180
181 while (nbCompares-- && (dictMatchIndex > dictLowLimit)) {
182 U32* const nextPtr = dictBt + 2*(dictMatchIndex & btMask);
183 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
184 const BYTE* match = dictBase + dictMatchIndex;
185 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
186 if (dictMatchIndex+matchLength >= dictHighLimit)
187 match = base + dictMatchIndex + dictIndexDelta; /* to prepare for next usage of match[matchLength] */
188
189 if (matchLength > bestLength) {
190 U32 matchIndex = dictMatchIndex + dictIndexDelta;
191 if (matchLength > matchEndIdx - matchIndex)
192 matchEndIdx = matchIndex + (U32)matchLength;
193 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
194 DEBUGLOG(2, "ZSTD_DUBT_findBestDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
195 current, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + current - matchIndex, dictMatchIndex, matchIndex);
196 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
197 }
198 if (ip+matchLength == iend) { /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
199 break; /* drop, to guarantee consistency (miss a little bit of compression) */
200 }
201 }
202
203 DEBUGLOG(2, "matchLength:%6zu, match:%p, prefixStart:%p, ip:%p", matchLength, match, prefixStart, ip);
204 if (match[matchLength] < ip[matchLength]) {
205 if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
206 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
207 dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
208 } else {
209 /* match is larger than current */
210 if (dictMatchIndex <= btLow) { break; } /* beyond tree size, stop the search */
211 commonLengthLarger = matchLength;
212 dictMatchIndex = nextPtr[0];
213 }
214 }
215
216 if (bestLength >= MINMATCH) {
217 U32 const mIndex = current - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
218 DEBUGLOG(2, "ZSTD_DUBT_findBestDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
219 current, (U32)bestLength, (U32)*offsetPtr, mIndex);
220 }
221 return bestLength;
222
223 }
224
225
226 static size_t
227 ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
228 const BYTE* const ip, const BYTE* const iend,
229 size_t* offsetPtr,
230 U32 const mls,
231 const ZSTD_dictMode_e dictMode)
232 {
233 const ZSTD_compressionParameters* const cParams = &ms->cParams;
148 U32* const hashTable = ms->hashTable;
234 U32* const hashTable = ms->hashTable;
149 U32 const hashLog = cParams->hashLog;
235 U32 const hashLog = cParams->hashLog;
150 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
236 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
@@ -195,8 +281,8 b' static size_t ZSTD_DUBT_findBestMatch ('
195 while (matchIndex) { /* will end on matchIndex == 0 */
281 while (matchIndex) { /* will end on matchIndex == 0 */
196 U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
282 U32* const nextCandidateIdxPtr = bt + 2*(matchIndex&btMask) + 1;
197 U32 const nextCandidateIdx = *nextCandidateIdxPtr;
283 U32 const nextCandidateIdx = *nextCandidateIdxPtr;
198 ZSTD_insertDUBT1(ms, cParams, matchIndex, iend,
284 ZSTD_insertDUBT1(ms, matchIndex, iend,
199 nbCandidates, unsortLimit, extDict);
285 nbCandidates, unsortLimit, dictMode);
200 matchIndex = nextCandidateIdx;
286 matchIndex = nextCandidateIdx;
201 nbCandidates++;
287 nbCandidates++;
202 }
288 }
@@ -221,7 +307,7 b' static size_t ZSTD_DUBT_findBestMatch ('
221 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
307 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
222 const BYTE* match;
308 const BYTE* match;
223
309
224 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
310 if ((dictMode != ZSTD_extDict) || (matchIndex+matchLength >= dictLimit)) {
225 match = base + matchIndex;
311 match = base + matchIndex;
226 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
312 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
227 } else {
313 } else {
@@ -259,6 +345,10 b' static size_t ZSTD_DUBT_findBestMatch ('
259
345
260 *smallerPtr = *largerPtr = 0;
346 *smallerPtr = *largerPtr = 0;
261
347
348 if (dictMode == ZSTD_dictMatchState && nbCompares) {
349 bestLength = ZSTD_DUBT_findBetterDictMatch(ms, ip, iend, offsetPtr, nbCompares, mls, dictMode);
350 }
351
262 assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
352 assert(matchEndIdx > current+8); /* ensure nextToUpdate is increased */
263 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
353 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
264 if (bestLength >= MINMATCH) {
354 if (bestLength >= MINMATCH) {
@@ -272,61 +362,64 b' static size_t ZSTD_DUBT_findBestMatch ('
272
362
273
363
274 /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
364 /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
275 static size_t ZSTD_BtFindBestMatch (
365 FORCE_INLINE_TEMPLATE size_t
276 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
366 ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
277 const BYTE* const ip, const BYTE* const iLimit,
367 const BYTE* const ip, const BYTE* const iLimit,
278 size_t* offsetPtr,
368 size_t* offsetPtr,
279 const U32 mls /* template */)
369 const U32 mls /* template */,
370 const ZSTD_dictMode_e dictMode)
280 {
371 {
281 DEBUGLOG(7, "ZSTD_BtFindBestMatch");
372 DEBUGLOG(7, "ZSTD_BtFindBestMatch");
282 if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
373 if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
283 ZSTD_updateDUBT(ms, cParams, ip, iLimit, mls);
374 ZSTD_updateDUBT(ms, ip, iLimit, mls);
284 return ZSTD_DUBT_findBestMatch(ms, cParams, ip, iLimit, offsetPtr, mls, 0);
375 return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
285 }
376 }
286
377
287
378
288 static size_t ZSTD_BtFindBestMatch_selectMLS (
379 static size_t
289 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
380 ZSTD_BtFindBestMatch_selectMLS ( ZSTD_matchState_t* ms,
290 const BYTE* ip, const BYTE* const iLimit,
381 const BYTE* ip, const BYTE* const iLimit,
291 size_t* offsetPtr)
382 size_t* offsetPtr)
292 {
383 {
293 switch(cParams->searchLength)
384 switch(ms->cParams.searchLength)
294 {
385 {
295 default : /* includes case 3 */
386 default : /* includes case 3 */
296 case 4 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 4);
387 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
297 case 5 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 5);
388 case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
298 case 7 :
389 case 7 :
299 case 6 : return ZSTD_BtFindBestMatch(ms, cParams, ip, iLimit, offsetPtr, 6);
390 case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
300 }
391 }
301 }
392 }
302
393
303
394
304 /** Tree updater, providing best match */
395 static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
305 static size_t ZSTD_BtFindBestMatch_extDict (
396 ZSTD_matchState_t* ms,
306 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
397 const BYTE* ip, const BYTE* const iLimit,
307 const BYTE* const ip, const BYTE* const iLimit,
398 size_t* offsetPtr)
308 size_t* offsetPtr,
309 const U32 mls)
310 {
399 {
311 DEBUGLOG(7, "ZSTD_BtFindBestMatch_extDict");
400 switch(ms->cParams.searchLength)
312 if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
401 {
313 ZSTD_updateDUBT(ms, cParams, ip, iLimit, mls);
402 default : /* includes case 3 */
314 return ZSTD_DUBT_findBestMatch(ms, cParams, ip, iLimit, offsetPtr, mls, 1);
403 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
404 case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
405 case 7 :
406 case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
407 }
315 }
408 }
316
409
317
410
318 static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
411 static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
319 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
412 ZSTD_matchState_t* ms,
320 const BYTE* ip, const BYTE* const iLimit,
413 const BYTE* ip, const BYTE* const iLimit,
321 size_t* offsetPtr)
414 size_t* offsetPtr)
322 {
415 {
323 switch(cParams->searchLength)
416 switch(ms->cParams.searchLength)
324 {
417 {
325 default : /* includes case 3 */
418 default : /* includes case 3 */
326 case 4 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 4);
419 case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
327 case 5 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 5);
420 case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
328 case 7 :
421 case 7 :
329 case 6 : return ZSTD_BtFindBestMatch_extDict(ms, cParams, ip, iLimit, offsetPtr, 6);
422 case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
330 }
423 }
331 }
424 }
332
425
@@ -340,7 +433,8 b' static size_t ZSTD_BtFindBestMatch_selec'
340 /* Update chains up to ip (excluded)
433 /* Update chains up to ip (excluded)
341 Assumption : always within prefix (i.e. not within extDict) */
434 Assumption : always within prefix (i.e. not within extDict) */
342 static U32 ZSTD_insertAndFindFirstIndex_internal(
435 static U32 ZSTD_insertAndFindFirstIndex_internal(
343 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
436 ZSTD_matchState_t* ms,
437 const ZSTD_compressionParameters* const cParams,
344 const BYTE* ip, U32 const mls)
438 const BYTE* ip, U32 const mls)
345 {
439 {
346 U32* const hashTable = ms->hashTable;
440 U32* const hashTable = ms->hashTable;
@@ -362,22 +456,21 b' static U32 ZSTD_insertAndFindFirstIndex_'
362 return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
456 return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
363 }
457 }
364
458
365 U32 ZSTD_insertAndFindFirstIndex(
459 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
366 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
460 const ZSTD_compressionParameters* const cParams = &ms->cParams;
367 const BYTE* ip)
461 return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.searchLength);
368 {
369 return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, cParams->searchLength);
370 }
462 }
371
463
372
464
373 /* inlining is important to hardwire a hot branch (template emulation) */
465 /* inlining is important to hardwire a hot branch (template emulation) */
374 FORCE_INLINE_TEMPLATE
466 FORCE_INLINE_TEMPLATE
375 size_t ZSTD_HcFindBestMatch_generic (
467 size_t ZSTD_HcFindBestMatch_generic (
376 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
468 ZSTD_matchState_t* ms,
377 const BYTE* const ip, const BYTE* const iLimit,
469 const BYTE* const ip, const BYTE* const iLimit,
378 size_t* offsetPtr,
470 size_t* offsetPtr,
379 const U32 mls, const U32 extDict)
471 const U32 mls, const ZSTD_dictMode_e dictMode)
380 {
472 {
473 const ZSTD_compressionParameters* const cParams = &ms->cParams;
381 U32* const chainTable = ms->chainTable;
474 U32* const chainTable = ms->chainTable;
382 const U32 chainSize = (1 << cParams->chainLog);
475 const U32 chainSize = (1 << cParams->chainLog);
383 const U32 chainMask = chainSize-1;
476 const U32 chainMask = chainSize-1;
@@ -397,7 +490,7 b' size_t ZSTD_HcFindBestMatch_generic ('
397
490
398 for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
491 for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
399 size_t currentMl=0;
492 size_t currentMl=0;
400 if ((!extDict) || matchIndex >= dictLimit) {
493 if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
401 const BYTE* const match = base + matchIndex;
494 const BYTE* const match = base + matchIndex;
402 if (match[ml] == ip[ml]) /* potentially better */
495 if (match[ml] == ip[ml]) /* potentially better */
403 currentMl = ZSTD_count(ip, match, iLimit);
496 currentMl = ZSTD_count(ip, match, iLimit);
@@ -419,38 +512,87 b' size_t ZSTD_HcFindBestMatch_generic ('
419 matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
512 matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
420 }
513 }
421
514
515 if (dictMode == ZSTD_dictMatchState) {
516 const ZSTD_matchState_t* const dms = ms->dictMatchState;
517 const U32* const dmsChainTable = dms->chainTable;
518 const U32 dmsChainSize = (1 << dms->cParams.chainLog);
519 const U32 dmsChainMask = dmsChainSize - 1;
520 const U32 dmsLowestIndex = dms->window.dictLimit;
521 const BYTE* const dmsBase = dms->window.base;
522 const BYTE* const dmsEnd = dms->window.nextSrc;
523 const U32 dmsSize = (U32)(dmsEnd - dmsBase);
524 const U32 dmsIndexDelta = dictLimit - dmsSize;
525 const U32 dmsMinChain = dmsSize > dmsChainSize ? dmsSize - dmsChainSize : 0;
526
527 matchIndex = dms->hashTable[ZSTD_hashPtr(ip, dms->cParams.hashLog, mls)];
528
529 for ( ; (matchIndex>dmsLowestIndex) & (nbAttempts>0) ; nbAttempts--) {
530 size_t currentMl=0;
531 const BYTE* const match = dmsBase + matchIndex;
532 assert(match+4 <= dmsEnd);
533 if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
534 currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
535
536 /* save best solution */
537 if (currentMl > ml) {
538 ml = currentMl;
539 *offsetPtr = current - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
540 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
541 }
542
543 if (matchIndex <= dmsMinChain) break;
544 matchIndex = dmsChainTable[matchIndex & dmsChainMask];
545 }
546 }
547
422 return ml;
548 return ml;
423 }
549 }
424
550
425
551
426 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
552 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
427 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
553 ZSTD_matchState_t* ms,
428 const BYTE* ip, const BYTE* const iLimit,
554 const BYTE* ip, const BYTE* const iLimit,
429 size_t* offsetPtr)
555 size_t* offsetPtr)
430 {
556 {
431 switch(cParams->searchLength)
557 switch(ms->cParams.searchLength)
432 {
558 {
433 default : /* includes case 3 */
559 default : /* includes case 3 */
434 case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, 0);
560 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
435 case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, 0);
561 case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
436 case 7 :
562 case 7 :
437 case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, 0);
563 case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
564 }
565 }
566
567
568 static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
569 ZSTD_matchState_t* ms,
570 const BYTE* ip, const BYTE* const iLimit,
571 size_t* offsetPtr)
572 {
573 switch(ms->cParams.searchLength)
574 {
575 default : /* includes case 3 */
576 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
577 case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
578 case 7 :
579 case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
438 }
580 }
439 }
581 }
440
582
441
583
442 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
584 FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
443 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
585 ZSTD_matchState_t* ms,
444 const BYTE* ip, const BYTE* const iLimit,
586 const BYTE* ip, const BYTE* const iLimit,
445 size_t* const offsetPtr)
587 size_t* offsetPtr)
446 {
588 {
447 switch(cParams->searchLength)
589 switch(ms->cParams.searchLength)
448 {
590 {
449 default : /* includes case 3 */
591 default : /* includes case 3 */
450 case 4 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 4, 1);
592 case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
451 case 5 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 5, 1);
593 case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
452 case 7 :
594 case 7 :
453 case 6 : return ZSTD_HcFindBestMatch_generic(ms, cParams, ip, iLimit, offsetPtr, 6, 1);
595 case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
454 }
596 }
455 }
597 }
456
598
@@ -462,30 +604,55 b' FORCE_INLINE_TEMPLATE'
462 size_t ZSTD_compressBlock_lazy_generic(
604 size_t ZSTD_compressBlock_lazy_generic(
463 ZSTD_matchState_t* ms, seqStore_t* seqStore,
605 ZSTD_matchState_t* ms, seqStore_t* seqStore,
464 U32 rep[ZSTD_REP_NUM],
606 U32 rep[ZSTD_REP_NUM],
465 ZSTD_compressionParameters const* cParams,
466 const void* src, size_t srcSize,
607 const void* src, size_t srcSize,
467 const U32 searchMethod, const U32 depth)
608 const U32 searchMethod, const U32 depth,
609 ZSTD_dictMode_e const dictMode)
468 {
610 {
469 const BYTE* const istart = (const BYTE*)src;
611 const BYTE* const istart = (const BYTE*)src;
470 const BYTE* ip = istart;
612 const BYTE* ip = istart;
471 const BYTE* anchor = istart;
613 const BYTE* anchor = istart;
472 const BYTE* const iend = istart + srcSize;
614 const BYTE* const iend = istart + srcSize;
473 const BYTE* const ilimit = iend - 8;
615 const BYTE* const ilimit = iend - 8;
474 const BYTE* const base = ms->window.base + ms->window.dictLimit;
616 const BYTE* const base = ms->window.base;
617 const U32 prefixLowestIndex = ms->window.dictLimit;
618 const BYTE* const prefixLowest = base + prefixLowestIndex;
475
619
476 typedef size_t (*searchMax_f)(
620 typedef size_t (*searchMax_f)(
477 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
621 ZSTD_matchState_t* ms,
478 const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
622 const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
479 searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
623 searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ?
624 (searchMethod ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) :
625 (searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS);
480 U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
626 U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
481
627
628 const ZSTD_matchState_t* const dms = ms->dictMatchState;
629 const U32 dictLowestIndex = dictMode == ZSTD_dictMatchState ?
630 dms->window.dictLimit : 0;
631 const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ?
632 dms->window.base : NULL;
633 const BYTE* const dictLowest = dictMode == ZSTD_dictMatchState ?
634 dictBase + dictLowestIndex : NULL;
635 const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ?
636 dms->window.nextSrc : NULL;
637 const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ?
638 prefixLowestIndex - (U32)(dictEnd - dictBase) :
639 0;
640 const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictLowest);
641
482 /* init */
642 /* init */
483 ip += (ip==base);
643 ip += (dictAndPrefixLength == 0);
484 ms->nextToUpdate3 = ms->nextToUpdate;
644 ms->nextToUpdate3 = ms->nextToUpdate;
485 { U32 const maxRep = (U32)(ip-base);
645 if (dictMode == ZSTD_noDict) {
646 U32 const maxRep = (U32)(ip - prefixLowest);
486 if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
647 if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
487 if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
648 if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
488 }
649 }
650 if (dictMode == ZSTD_dictMatchState) {
651 /* dictMatchState repCode checks don't currently handle repCode == 0
652 * disabling. */
653 assert(offset_1 <= dictAndPrefixLength);
654 assert(offset_2 <= dictAndPrefixLength);
655 }
489
656
490 /* Match Loop */
657 /* Match Loop */
491 while (ip < ilimit) {
658 while (ip < ilimit) {
@@ -494,15 +661,28 b' size_t ZSTD_compressBlock_lazy_generic('
494 const BYTE* start=ip+1;
661 const BYTE* start=ip+1;
495
662
496 /* check repCode */
663 /* check repCode */
497 if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) {
664 if (dictMode == ZSTD_dictMatchState) {
498 /* repcode : we take it */
665 const U32 repIndex = (U32)(ip - base) + 1 - offset_1;
666 const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
667 && repIndex < prefixLowestIndex) ?
668 dictBase + (repIndex - dictIndexDelta) :
669 base + repIndex;
670 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
671 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
672 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
673 matchLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
674 if (depth==0) goto _storeSequence;
675 }
676 }
677 if ( dictMode == ZSTD_noDict
678 && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
499 matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
679 matchLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
500 if (depth==0) goto _storeSequence;
680 if (depth==0) goto _storeSequence;
501 }
681 }
502
682
503 /* first search (depth 0) */
683 /* first search (depth 0) */
504 { size_t offsetFound = 99999999;
684 { size_t offsetFound = 999999999;
505 size_t const ml2 = searchMax(ms, cParams, ip, iend, &offsetFound);
685 size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
506 if (ml2 > matchLength)
686 if (ml2 > matchLength)
507 matchLength = ml2, start = ip, offset=offsetFound;
687 matchLength = ml2, start = ip, offset=offsetFound;
508 }
688 }
@@ -516,15 +696,31 b' size_t ZSTD_compressBlock_lazy_generic('
516 if (depth>=1)
696 if (depth>=1)
517 while (ip<ilimit) {
697 while (ip<ilimit) {
518 ip ++;
698 ip ++;
519 if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
699 if ( (dictMode == ZSTD_noDict)
700 && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
520 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
701 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
521 int const gain2 = (int)(mlRep * 3);
702 int const gain2 = (int)(mlRep * 3);
522 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
703 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
523 if ((mlRep >= 4) && (gain2 > gain1))
704 if ((mlRep >= 4) && (gain2 > gain1))
524 matchLength = mlRep, offset = 0, start = ip;
705 matchLength = mlRep, offset = 0, start = ip;
525 }
706 }
526 { size_t offset2=99999999;
707 if (dictMode == ZSTD_dictMatchState) {
527 size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
708 const U32 repIndex = (U32)(ip - base) - offset_1;
709 const BYTE* repMatch = repIndex < prefixLowestIndex ?
710 dictBase + (repIndex - dictIndexDelta) :
711 base + repIndex;
712 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
713 && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
714 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
715 size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
716 int const gain2 = (int)(mlRep * 3);
717 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
718 if ((mlRep >= 4) && (gain2 > gain1))
719 matchLength = mlRep, offset = 0, start = ip;
720 }
721 }
722 { size_t offset2=999999999;
723 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
528 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
724 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
529 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
725 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
530 if ((ml2 >= 4) && (gain2 > gain1)) {
726 if ((ml2 >= 4) && (gain2 > gain1)) {
@@ -535,15 +731,31 b' size_t ZSTD_compressBlock_lazy_generic('
535 /* let's find an even better one */
731 /* let's find an even better one */
536 if ((depth==2) && (ip<ilimit)) {
732 if ((depth==2) && (ip<ilimit)) {
537 ip ++;
733 ip ++;
538 if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
734 if ( (dictMode == ZSTD_noDict)
539 size_t const ml2 = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
735 && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
540 int const gain2 = (int)(ml2 * 4);
736 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
737 int const gain2 = (int)(mlRep * 4);
541 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
738 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
542 if ((ml2 >= 4) && (gain2 > gain1))
739 if ((mlRep >= 4) && (gain2 > gain1))
543 matchLength = ml2, offset = 0, start = ip;
740 matchLength = mlRep, offset = 0, start = ip;
544 }
741 }
545 { size_t offset2=99999999;
742 if (dictMode == ZSTD_dictMatchState) {
546 size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
743 const U32 repIndex = (U32)(ip - base) - offset_1;
744 const BYTE* repMatch = repIndex < prefixLowestIndex ?
745 dictBase + (repIndex - dictIndexDelta) :
746 base + repIndex;
747 if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
748 && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
749 const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
750 size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
751 int const gain2 = (int)(mlRep * 4);
752 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
753 if ((mlRep >= 4) && (gain2 > gain1))
754 matchLength = mlRep, offset = 0, start = ip;
755 }
756 }
757 { size_t offset2=999999999;
758 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
547 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
759 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
548 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
760 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
549 if ((ml2 >= 4) && (gain2 > gain1)) {
761 if ((ml2 >= 4) && (gain2 > gain1)) {
@@ -560,9 +772,17 b' size_t ZSTD_compressBlock_lazy_generic('
560 */
772 */
561 /* catch up */
773 /* catch up */
562 if (offset) {
774 if (offset) {
563 while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > base))
775 if (dictMode == ZSTD_noDict) {
564 && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
776 while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
565 { start--; matchLength++; }
777 && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) ) /* only search for offset within prefix */
778 { start--; matchLength++; }
779 }
780 if (dictMode == ZSTD_dictMatchState) {
781 U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
782 const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
783 const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
784 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
785 }
566 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
786 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
567 }
787 }
568 /* store sequence */
788 /* store sequence */
@@ -573,16 +793,39 b' size_t ZSTD_compressBlock_lazy_generic('
573 }
793 }
574
794
575 /* check immediate repcode */
795 /* check immediate repcode */
576 while ( ((ip <= ilimit) & (offset_2>0))
796 if (dictMode == ZSTD_dictMatchState) {
577 && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
797 while (ip <= ilimit) {
578 /* store sequence */
798 U32 const current2 = (U32)(ip-base);
579 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
799 U32 const repIndex = current2 - offset_2;
580 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
800 const BYTE* repMatch = dictMode == ZSTD_dictMatchState
581 ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
801 && repIndex < prefixLowestIndex ?
582 ip += matchLength;
802 dictBase - dictIndexDelta + repIndex :
583 anchor = ip;
803 base + repIndex;
584 continue; /* faster when present ... (?) */
804 if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex) >= 3 /* intentional overflow */)
585 } }
805 && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
806 const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
807 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
808 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset_2 <=> offset_1 */
809 ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
810 ip += matchLength;
811 anchor = ip;
812 continue;
813 }
814 break;
815 }
816 }
817
818 if (dictMode == ZSTD_noDict) {
819 while ( ((ip <= ilimit) & (offset_2>0))
820 && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
821 /* store sequence */
822 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
823 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
824 ZSTD_storeSeq(seqStore, 0, anchor, 0, matchLength-MINMATCH);
825 ip += matchLength;
826 anchor = ip;
827 continue; /* faster when present ... (?) */
828 } } }
586
829
587 /* Save reps for next block */
830 /* Save reps for next block */
588 rep[0] = offset_1 ? offset_1 : savedOffset;
831 rep[0] = offset_1 ? offset_1 : savedOffset;
@@ -595,30 +838,58 b' size_t ZSTD_compressBlock_lazy_generic('
595
838
596 size_t ZSTD_compressBlock_btlazy2(
839 size_t ZSTD_compressBlock_btlazy2(
597 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
840 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
598 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
841 void const* src, size_t srcSize)
599 {
842 {
600 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 1, 2);
843 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_noDict);
601 }
844 }
602
845
603 size_t ZSTD_compressBlock_lazy2(
846 size_t ZSTD_compressBlock_lazy2(
604 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
847 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
605 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
848 void const* src, size_t srcSize)
606 {
849 {
607 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 2);
850 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_noDict);
608 }
851 }
609
852
610 size_t ZSTD_compressBlock_lazy(
853 size_t ZSTD_compressBlock_lazy(
611 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
854 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
612 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
855 void const* src, size_t srcSize)
613 {
856 {
614 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 1);
857 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_noDict);
615 }
858 }
616
859
617 size_t ZSTD_compressBlock_greedy(
860 size_t ZSTD_compressBlock_greedy(
618 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
861 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
619 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
862 void const* src, size_t srcSize)
863 {
864 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_noDict);
865 }
866
867 size_t ZSTD_compressBlock_btlazy2_dictMatchState(
868 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
869 void const* src, size_t srcSize)
870 {
871 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 1, 2, ZSTD_dictMatchState);
872 }
873
874 size_t ZSTD_compressBlock_lazy2_dictMatchState(
875 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
876 void const* src, size_t srcSize)
620 {
877 {
621 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 0);
878 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 2, ZSTD_dictMatchState);
879 }
880
881 size_t ZSTD_compressBlock_lazy_dictMatchState(
882 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
883 void const* src, size_t srcSize)
884 {
885 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 1, ZSTD_dictMatchState);
886 }
887
888 size_t ZSTD_compressBlock_greedy_dictMatchState(
889 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
890 void const* src, size_t srcSize)
891 {
892 return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, 0, 0, ZSTD_dictMatchState);
622 }
893 }
623
894
624
895
@@ -626,7 +897,6 b' FORCE_INLINE_TEMPLATE'
626 size_t ZSTD_compressBlock_lazy_extDict_generic(
897 size_t ZSTD_compressBlock_lazy_extDict_generic(
627 ZSTD_matchState_t* ms, seqStore_t* seqStore,
898 ZSTD_matchState_t* ms, seqStore_t* seqStore,
628 U32 rep[ZSTD_REP_NUM],
899 U32 rep[ZSTD_REP_NUM],
629 ZSTD_compressionParameters const* cParams,
630 const void* src, size_t srcSize,
900 const void* src, size_t srcSize,
631 const U32 searchMethod, const U32 depth)
901 const U32 searchMethod, const U32 depth)
632 {
902 {
@@ -644,9 +914,9 b' size_t ZSTD_compressBlock_lazy_extDict_g'
644 const BYTE* const dictStart = dictBase + lowestIndex;
914 const BYTE* const dictStart = dictBase + lowestIndex;
645
915
646 typedef size_t (*searchMax_f)(
916 typedef size_t (*searchMax_f)(
647 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
917 ZSTD_matchState_t* ms,
648 const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
918 const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
649 searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
919 searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
650
920
651 U32 offset_1 = rep[0], offset_2 = rep[1];
921 U32 offset_1 = rep[0], offset_2 = rep[1];
652
922
@@ -674,8 +944,8 b' size_t ZSTD_compressBlock_lazy_extDict_g'
674 } }
944 } }
675
945
676 /* first search (depth 0) */
946 /* first search (depth 0) */
677 { size_t offsetFound = 99999999;
947 { size_t offsetFound = 999999999;
678 size_t const ml2 = searchMax(ms, cParams, ip, iend, &offsetFound);
948 size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
679 if (ml2 > matchLength)
949 if (ml2 > matchLength)
680 matchLength = ml2, start = ip, offset=offsetFound;
950 matchLength = ml2, start = ip, offset=offsetFound;
681 }
951 }
@@ -707,8 +977,8 b' size_t ZSTD_compressBlock_lazy_extDict_g'
707 } }
977 } }
708
978
709 /* search match, depth 1 */
979 /* search match, depth 1 */
710 { size_t offset2=99999999;
980 { size_t offset2=999999999;
711 size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
981 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
712 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
982 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
713 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
983 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
714 if ((ml2 >= 4) && (gain2 > gain1)) {
984 if ((ml2 >= 4) && (gain2 > gain1)) {
@@ -737,8 +1007,8 b' size_t ZSTD_compressBlock_lazy_extDict_g'
737 } }
1007 } }
738
1008
739 /* search match, depth 2 */
1009 /* search match, depth 2 */
740 { size_t offset2=99999999;
1010 { size_t offset2=999999999;
741 size_t const ml2 = searchMax(ms, cParams, ip, iend, &offset2);
1011 size_t const ml2 = searchMax(ms, ip, iend, &offset2);
742 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
1012 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
743 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
1013 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
744 if ((ml2 >= 4) && (gain2 > gain1)) {
1014 if ((ml2 >= 4) && (gain2 > gain1)) {
@@ -794,31 +1064,31 b' size_t ZSTD_compressBlock_lazy_extDict_g'
794
1064
795 size_t ZSTD_compressBlock_greedy_extDict(
1065 size_t ZSTD_compressBlock_greedy_extDict(
796 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1066 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
797 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
1067 void const* src, size_t srcSize)
798 {
1068 {
799 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 0);
1069 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 0);
800 }
1070 }
801
1071
802 size_t ZSTD_compressBlock_lazy_extDict(
1072 size_t ZSTD_compressBlock_lazy_extDict(
803 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1073 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
804 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
1074 void const* src, size_t srcSize)
805
1075
806 {
1076 {
807 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 1);
1077 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 1);
808 }
1078 }
809
1079
810 size_t ZSTD_compressBlock_lazy2_extDict(
1080 size_t ZSTD_compressBlock_lazy2_extDict(
811 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1081 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
812 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
1082 void const* src, size_t srcSize)
813
1083
814 {
1084 {
815 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 0, 2);
1085 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 0, 2);
816 }
1086 }
817
1087
818 size_t ZSTD_compressBlock_btlazy2_extDict(
1088 size_t ZSTD_compressBlock_btlazy2_extDict(
819 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1089 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
820 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
1090 void const* src, size_t srcSize)
821
1091
822 {
1092 {
823 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, cParams, src, srcSize, 1, 2);
1093 return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, 1, 2);
824 }
1094 }
@@ -17,37 +17,48 b' extern "C" {'
17
17
18 #include "zstd_compress_internal.h"
18 #include "zstd_compress_internal.h"
19
19
20 U32 ZSTD_insertAndFindFirstIndex(
20 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
21 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
22 const BYTE* ip);
23
21
24 void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK */
22 void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). pre-emptively increase value of ZSTD_DUBT_UNSORTED_MARK */
25
23
26 size_t ZSTD_compressBlock_btlazy2(
24 size_t ZSTD_compressBlock_btlazy2(
27 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
25 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
28 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
26 void const* src, size_t srcSize);
29 size_t ZSTD_compressBlock_lazy2(
27 size_t ZSTD_compressBlock_lazy2(
30 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
28 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
31 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
29 void const* src, size_t srcSize);
32 size_t ZSTD_compressBlock_lazy(
30 size_t ZSTD_compressBlock_lazy(
33 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
31 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
34 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
32 void const* src, size_t srcSize);
35 size_t ZSTD_compressBlock_greedy(
33 size_t ZSTD_compressBlock_greedy(
36 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
34 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
37 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
35 void const* src, size_t srcSize);
36
37 size_t ZSTD_compressBlock_btlazy2_dictMatchState(
38 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
39 void const* src, size_t srcSize);
40 size_t ZSTD_compressBlock_lazy2_dictMatchState(
41 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
42 void const* src, size_t srcSize);
43 size_t ZSTD_compressBlock_lazy_dictMatchState(
44 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
45 void const* src, size_t srcSize);
46 size_t ZSTD_compressBlock_greedy_dictMatchState(
47 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
48 void const* src, size_t srcSize);
38
49
39 size_t ZSTD_compressBlock_greedy_extDict(
50 size_t ZSTD_compressBlock_greedy_extDict(
40 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
51 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
41 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
52 void const* src, size_t srcSize);
42 size_t ZSTD_compressBlock_lazy_extDict(
53 size_t ZSTD_compressBlock_lazy_extDict(
43 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
54 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
44 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
55 void const* src, size_t srcSize);
45 size_t ZSTD_compressBlock_lazy2_extDict(
56 size_t ZSTD_compressBlock_lazy2_extDict(
46 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
57 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
47 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
58 void const* src, size_t srcSize);
48 size_t ZSTD_compressBlock_btlazy2_extDict(
59 size_t ZSTD_compressBlock_btlazy2_extDict(
49 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
60 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
50 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
61 void const* src, size_t srcSize);
51
62
52 #if defined (__cplusplus)
63 #if defined (__cplusplus)
53 }
64 }
@@ -9,6 +9,7 b''
9
9
10 #include "zstd_ldm.h"
10 #include "zstd_ldm.h"
11
11
12 #include "debug.h"
12 #include "zstd_fast.h" /* ZSTD_fillHashTable() */
13 #include "zstd_fast.h" /* ZSTD_fillHashTable() */
13 #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
14 #include "zstd_double_fast.h" /* ZSTD_fillDoubleHashTable() */
14
15
@@ -20,7 +21,7 b''
20 void ZSTD_ldm_adjustParameters(ldmParams_t* params,
21 void ZSTD_ldm_adjustParameters(ldmParams_t* params,
21 ZSTD_compressionParameters const* cParams)
22 ZSTD_compressionParameters const* cParams)
22 {
23 {
23 U32 const windowLog = cParams->windowLog;
24 params->windowLog = cParams->windowLog;
24 ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
25 ZSTD_STATIC_ASSERT(LDM_BUCKET_SIZE_LOG <= ZSTD_LDM_BUCKETSIZELOG_MAX);
25 DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
26 DEBUGLOG(4, "ZSTD_ldm_adjustParameters");
26 if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
27 if (!params->bucketSizeLog) params->bucketSizeLog = LDM_BUCKET_SIZE_LOG;
@@ -33,12 +34,13 b' void ZSTD_ldm_adjustParameters(ldmParams'
33 params->minMatchLength = minMatch;
34 params->minMatchLength = minMatch;
34 }
35 }
35 if (params->hashLog == 0) {
36 if (params->hashLog == 0) {
36 params->hashLog = MAX(ZSTD_HASHLOG_MIN, windowLog - LDM_HASH_RLOG);
37 params->hashLog = MAX(ZSTD_HASHLOG_MIN, params->windowLog - LDM_HASH_RLOG);
37 assert(params->hashLog <= ZSTD_HASHLOG_MAX);
38 assert(params->hashLog <= ZSTD_HASHLOG_MAX);
38 }
39 }
39 if (params->hashEveryLog == 0) {
40 if (params->hashEveryLog == 0) {
40 params->hashEveryLog =
41 params->hashEveryLog = params->windowLog < params->hashLog
41 windowLog < params->hashLog ? 0 : windowLog - params->hashLog;
42 ? 0
43 : params->windowLog - params->hashLog;
42 }
44 }
43 params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
45 params->bucketSizeLog = MIN(params->bucketSizeLog, params->hashLog);
44 }
46 }
@@ -216,21 +218,18 b' static size_t ZSTD_ldm_countBackwardsMat'
216 * The tables for the other strategies are filled within their
218 * The tables for the other strategies are filled within their
217 * block compressors. */
219 * block compressors. */
218 static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
220 static size_t ZSTD_ldm_fillFastTables(ZSTD_matchState_t* ms,
219 ZSTD_compressionParameters const* cParams,
220 void const* end)
221 void const* end)
221 {
222 {
222 const BYTE* const iend = (const BYTE*)end;
223 const BYTE* const iend = (const BYTE*)end;
223
224
224 switch(cParams->strategy)
225 switch(ms->cParams.strategy)
225 {
226 {
226 case ZSTD_fast:
227 case ZSTD_fast:
227 ZSTD_fillHashTable(ms, cParams, iend);
228 ZSTD_fillHashTable(ms, iend, ZSTD_dtlm_fast);
228 ms->nextToUpdate = (U32)(iend - ms->window.base);
229 break;
229 break;
230
230
231 case ZSTD_dfast:
231 case ZSTD_dfast:
232 ZSTD_fillDoubleHashTable(ms, cParams, iend);
232 ZSTD_fillDoubleHashTable(ms, iend, ZSTD_dtlm_fast);
233 ms->nextToUpdate = (U32)(iend - ms->window.base);
234 break;
233 break;
235
234
236 case ZSTD_greedy:
235 case ZSTD_greedy:
@@ -508,7 +507,7 b' size_t ZSTD_ldm_generateSequences('
508 * * Try invalidation after the sequence generation and test the
507 * * Try invalidation after the sequence generation and test the
509 * the offset against maxDist directly.
508 * the offset against maxDist directly.
510 */
509 */
511 ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL);
510 ZSTD_window_enforceMaxDist(&ldmState->window, chunkEnd, maxDist, NULL, NULL);
512 /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
511 /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
513 newLeftoverSize = ZSTD_ldm_generateSequences_internal(
512 newLeftoverSize = ZSTD_ldm_generateSequences_internal(
514 ldmState, sequences, params, chunkStart, chunkSize);
513 ldmState, sequences, params, chunkStart, chunkSize);
@@ -591,19 +590,19 b' static rawSeq maybeSplitSequence(rawSeqS'
591
590
592 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
591 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
593 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
592 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
594 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
593 void const* src, size_t srcSize)
595 int const extDict)
596 {
594 {
595 const ZSTD_compressionParameters* const cParams = &ms->cParams;
597 unsigned const minMatch = cParams->searchLength;
596 unsigned const minMatch = cParams->searchLength;
598 ZSTD_blockCompressor const blockCompressor =
597 ZSTD_blockCompressor const blockCompressor =
599 ZSTD_selectBlockCompressor(cParams->strategy, extDict);
598 ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
600 BYTE const* const base = ms->window.base;
601 /* Input bounds */
599 /* Input bounds */
602 BYTE const* const istart = (BYTE const*)src;
600 BYTE const* const istart = (BYTE const*)src;
603 BYTE const* const iend = istart + srcSize;
601 BYTE const* const iend = istart + srcSize;
604 /* Input positions */
602 /* Input positions */
605 BYTE const* ip = istart;
603 BYTE const* ip = istart;
606
604
605 DEBUGLOG(5, "ZSTD_ldm_blockCompress: srcSize=%zu", srcSize);
607 assert(rawSeqStore->pos <= rawSeqStore->size);
606 assert(rawSeqStore->pos <= rawSeqStore->size);
608 assert(rawSeqStore->size <= rawSeqStore->capacity);
607 assert(rawSeqStore->size <= rawSeqStore->capacity);
609 /* Loop through each sequence and apply the block compressor to the lits */
608 /* Loop through each sequence and apply the block compressor to the lits */
@@ -621,14 +620,13 b' size_t ZSTD_ldm_blockCompress(rawSeqStor'
621
620
622 /* Fill tables for block compressor */
621 /* Fill tables for block compressor */
623 ZSTD_ldm_limitTableUpdate(ms, ip);
622 ZSTD_ldm_limitTableUpdate(ms, ip);
624 ZSTD_ldm_fillFastTables(ms, cParams, ip);
623 ZSTD_ldm_fillFastTables(ms, ip);
625 /* Run the block compressor */
624 /* Run the block compressor */
625 DEBUGLOG(5, "calling block compressor on segment of size %u", sequence.litLength);
626 {
626 {
627 size_t const newLitLength =
627 size_t const newLitLength =
628 blockCompressor(ms, seqStore, rep, cParams, ip,
628 blockCompressor(ms, seqStore, rep, ip, sequence.litLength);
629 sequence.litLength);
630 ip += sequence.litLength;
629 ip += sequence.litLength;
631 ms->nextToUpdate = (U32)(ip - base);
632 /* Update the repcodes */
630 /* Update the repcodes */
633 for (i = ZSTD_REP_NUM - 1; i > 0; i--)
631 for (i = ZSTD_REP_NUM - 1; i > 0; i--)
634 rep[i] = rep[i-1];
632 rep[i] = rep[i-1];
@@ -642,12 +640,7 b' size_t ZSTD_ldm_blockCompress(rawSeqStor'
642 }
640 }
643 /* Fill the tables for the block compressor */
641 /* Fill the tables for the block compressor */
644 ZSTD_ldm_limitTableUpdate(ms, ip);
642 ZSTD_ldm_limitTableUpdate(ms, ip);
645 ZSTD_ldm_fillFastTables(ms, cParams, ip);
643 ZSTD_ldm_fillFastTables(ms, ip);
646 /* Compress the last literals */
644 /* Compress the last literals */
647 {
645 return blockCompressor(ms, seqStore, rep, ip, iend - ip);
648 size_t const lastLiterals = blockCompressor(ms, seqStore, rep, cParams,
649 ip, iend - ip);
650 ms->nextToUpdate = (U32)(iend - base);
651 return lastLiterals;
652 }
653 }
646 }
@@ -61,9 +61,7 b' size_t ZSTD_ldm_generateSequences('
61 */
61 */
62 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
62 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
63 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
63 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
64 ZSTD_compressionParameters const* cParams,
64 void const* src, size_t srcSize);
65 void const* src, size_t srcSize,
66 int const extDict);
67
65
68 /**
66 /**
69 * ZSTD_ldm_skipSequences():
67 * ZSTD_ldm_skipSequences():
This diff has been collapsed as it changes many lines, (813 lines changed) Show them Hide them
@@ -9,10 +9,11 b''
9 */
9 */
10
10
11 #include "zstd_compress_internal.h"
11 #include "zstd_compress_internal.h"
12 #include "hist.h"
12 #include "zstd_opt.h"
13 #include "zstd_opt.h"
13
14
14
15
15 #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats. Also used for matchSum (?) */
16 #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
16 #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
17 #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */
17 #define ZSTD_MAX_PRICE (1<<30)
18 #define ZSTD_MAX_PRICE (1<<30)
18
19
@@ -20,128 +21,210 b''
20 /*-*************************************
21 /*-*************************************
21 * Price functions for optimal parser
22 * Price functions for optimal parser
22 ***************************************/
23 ***************************************/
23 static void ZSTD_setLog2Prices(optState_t* optPtr)
24
25 #if 0 /* approximation at bit level */
26 # define BITCOST_ACCURACY 0
27 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
28 # define WEIGHT(stat) ((void)opt, ZSTD_bitWeight(stat))
29 #elif 0 /* fractional bit accuracy */
30 # define BITCOST_ACCURACY 8
31 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
32 # define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
33 #else /* opt==approx, ultra==accurate */
34 # define BITCOST_ACCURACY 8
35 # define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
36 # define WEIGHT(stat,opt) (opt ? ZSTD_fracWeight(stat) : ZSTD_bitWeight(stat))
37 #endif
38
39 MEM_STATIC U32 ZSTD_bitWeight(U32 stat)
40 {
41 return (ZSTD_highbit32(stat+1) * BITCOST_MULTIPLIER);
42 }
43
44 MEM_STATIC U32 ZSTD_fracWeight(U32 rawStat)
24 {
45 {
25 optPtr->log2litSum = ZSTD_highbit32(optPtr->litSum+1);
46 U32 const stat = rawStat + 1;
26 optPtr->log2litLengthSum = ZSTD_highbit32(optPtr->litLengthSum+1);
47 U32 const hb = ZSTD_highbit32(stat);
27 optPtr->log2matchLengthSum = ZSTD_highbit32(optPtr->matchLengthSum+1);
48 U32 const BWeight = hb * BITCOST_MULTIPLIER;
28 optPtr->log2offCodeSum = ZSTD_highbit32(optPtr->offCodeSum+1);
49 U32 const FWeight = (stat << BITCOST_ACCURACY) >> hb;
50 U32 const weight = BWeight + FWeight;
51 assert(hb + BITCOST_ACCURACY < 31);
52 return weight;
53 }
54
55 /* debugging function, @return price in bytes */
56 MEM_STATIC double ZSTD_fCost(U32 price)
57 {
58 return (double)price / (BITCOST_MULTIPLIER*8);
59 }
60
61 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
62 {
63 optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel);
64 optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel);
65 optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel);
66 optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel);
29 }
67 }
30
68
31
69
70 static U32 ZSTD_downscaleStat(U32* table, U32 lastEltIndex, int malus)
71 {
72 U32 s, sum=0;
73 assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
74 for (s=0; s<=lastEltIndex; s++) {
75 table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
76 sum += table[s];
77 }
78 return sum;
79 }
80
32 static void ZSTD_rescaleFreqs(optState_t* const optPtr,
81 static void ZSTD_rescaleFreqs(optState_t* const optPtr,
33 const BYTE* const src, size_t const srcSize)
82 const BYTE* const src, size_t const srcSize,
83 int optLevel)
34 {
84 {
35 optPtr->staticPrices = 0;
85 optPtr->priceType = zop_dynamic;
86
87 if (optPtr->litLengthSum == 0) { /* first block : init */
88 if (srcSize <= 1024) /* heuristic */
89 optPtr->priceType = zop_predef;
90
91 assert(optPtr->symbolCosts != NULL);
92 if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */
93 optPtr->priceType = zop_dynamic;
36
94
37 if (optPtr->litLengthSum == 0) { /* first init */
95 assert(optPtr->litFreq != NULL);
38 unsigned u;
96 optPtr->litSum = 0;
39 if (srcSize <= 1024) optPtr->staticPrices = 1;
97 { unsigned lit;
98 for (lit=0; lit<=MaxLit; lit++) {
99 U32 const scaleLog = 11; /* scale to 2K */
100 U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
101 assert(bitCost <= scaleLog);
102 optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
103 optPtr->litSum += optPtr->litFreq[lit];
104 } }
105
106 { unsigned ll;
107 FSE_CState_t llstate;
108 FSE_initCState(&llstate, optPtr->symbolCosts->fse.litlengthCTable);
109 optPtr->litLengthSum = 0;
110 for (ll=0; ll<=MaxLL; ll++) {
111 U32 const scaleLog = 10; /* scale to 1K */
112 U32 const bitCost = FSE_getMaxNbBits(llstate.symbolTT, ll);
113 assert(bitCost < scaleLog);
114 optPtr->litLengthFreq[ll] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
115 optPtr->litLengthSum += optPtr->litLengthFreq[ll];
116 } }
40
117
41 assert(optPtr->litFreq!=NULL);
118 { unsigned ml;
42 for (u=0; u<=MaxLit; u++)
119 FSE_CState_t mlstate;
43 optPtr->litFreq[u] = 0;
120 FSE_initCState(&mlstate, optPtr->symbolCosts->fse.matchlengthCTable);
44 for (u=0; u<srcSize; u++)
121 optPtr->matchLengthSum = 0;
45 optPtr->litFreq[src[u]]++;
122 for (ml=0; ml<=MaxML; ml++) {
46 optPtr->litSum = 0;
123 U32 const scaleLog = 10;
47 for (u=0; u<=MaxLit; u++) {
124 U32 const bitCost = FSE_getMaxNbBits(mlstate.symbolTT, ml);
48 optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> ZSTD_FREQ_DIV);
125 assert(bitCost < scaleLog);
49 optPtr->litSum += optPtr->litFreq[u];
126 optPtr->matchLengthFreq[ml] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
127 optPtr->matchLengthSum += optPtr->matchLengthFreq[ml];
128 } }
129
130 { unsigned of;
131 FSE_CState_t ofstate;
132 FSE_initCState(&ofstate, optPtr->symbolCosts->fse.offcodeCTable);
133 optPtr->offCodeSum = 0;
134 for (of=0; of<=MaxOff; of++) {
135 U32 const scaleLog = 10;
136 U32 const bitCost = FSE_getMaxNbBits(ofstate.symbolTT, of);
137 assert(bitCost < scaleLog);
138 optPtr->offCodeFreq[of] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
139 optPtr->offCodeSum += optPtr->offCodeFreq[of];
140 } }
141
142 } else { /* not a dictionary */
143
144 assert(optPtr->litFreq != NULL);
145 { unsigned lit = MaxLit;
146 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */
147 }
148 optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
149
150 { unsigned ll;
151 for (ll=0; ll<=MaxLL; ll++)
152 optPtr->litLengthFreq[ll] = 1;
153 }
154 optPtr->litLengthSum = MaxLL+1;
155
156 { unsigned ml;
157 for (ml=0; ml<=MaxML; ml++)
158 optPtr->matchLengthFreq[ml] = 1;
159 }
160 optPtr->matchLengthSum = MaxML+1;
161
162 { unsigned of;
163 for (of=0; of<=MaxOff; of++)
164 optPtr->offCodeFreq[of] = 1;
165 }
166 optPtr->offCodeSum = MaxOff+1;
167
50 }
168 }
51
169
52 for (u=0; u<=MaxLL; u++)
170 } else { /* new block : re-use previous statistics, scaled down */
53 optPtr->litLengthFreq[u] = 1;
54 optPtr->litLengthSum = MaxLL+1;
55 for (u=0; u<=MaxML; u++)
56 optPtr->matchLengthFreq[u] = 1;
57 optPtr->matchLengthSum = MaxML+1;
58 for (u=0; u<=MaxOff; u++)
59 optPtr->offCodeFreq[u] = 1;
60 optPtr->offCodeSum = (MaxOff+1);
61
62 } else {
63 unsigned u;
64
171
65 optPtr->litSum = 0;
172 optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
66 for (u=0; u<=MaxLit; u++) {
173 optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
67 optPtr->litFreq[u] = 1 + (optPtr->litFreq[u] >> (ZSTD_FREQ_DIV+1));
174 optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
68 optPtr->litSum += optPtr->litFreq[u];
175 optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
69 }
70 optPtr->litLengthSum = 0;
71 for (u=0; u<=MaxLL; u++) {
72 optPtr->litLengthFreq[u] = 1 + (optPtr->litLengthFreq[u]>>(ZSTD_FREQ_DIV+1));
73 optPtr->litLengthSum += optPtr->litLengthFreq[u];
74 }
75 optPtr->matchLengthSum = 0;
76 for (u=0; u<=MaxML; u++) {
77 optPtr->matchLengthFreq[u] = 1 + (optPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
78 optPtr->matchLengthSum += optPtr->matchLengthFreq[u];
79 }
80 optPtr->offCodeSum = 0;
81 for (u=0; u<=MaxOff; u++) {
82 optPtr->offCodeFreq[u] = 1 + (optPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
83 optPtr->offCodeSum += optPtr->offCodeFreq[u];
84 }
85 }
176 }
86
177
87 ZSTD_setLog2Prices(optPtr);
178 ZSTD_setBasePrices(optPtr, optLevel);
88 }
179 }
89
180
90
91 /* ZSTD_rawLiteralsCost() :
181 /* ZSTD_rawLiteralsCost() :
92 * cost of literals (only) in given segment (which length can be null)
182 * price of literals (only) in specified segment (which length can be 0).
93 * does not include cost of literalLength symbol */
183 * does not include price of literalLength symbol */
94 static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
184 static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
95 const optState_t* const optPtr)
185 const optState_t* const optPtr,
186 int optLevel)
96 {
187 {
97 if (optPtr->staticPrices) return (litLength*6); /* 6 bit per literal - no statistic used */
98 if (litLength == 0) return 0;
188 if (litLength == 0) return 0;
189 if (optPtr->priceType == zop_predef)
190 return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */
99
191
100 /* literals */
192 /* dynamic statistics */
101 { U32 u;
193 { U32 price = litLength * optPtr->litSumBasePrice;
102 U32 cost = litLength * optPtr->log2litSum;
194 U32 u;
103 for (u=0; u < litLength; u++)
195 for (u=0; u < litLength; u++) {
104 cost -= ZSTD_highbit32(optPtr->litFreq[literals[u]]+1);
196 assert(WEIGHT(optPtr->litFreq[literals[u]], optLevel) <= optPtr->litSumBasePrice); /* literal cost should never be negative */
105 return cost;
197 price -= WEIGHT(optPtr->litFreq[literals[u]], optLevel);
198 }
199 return price;
106 }
200 }
107 }
201 }
108
202
109 /* ZSTD_litLengthPrice() :
203 /* ZSTD_litLengthPrice() :
110 * cost of literalLength symbol */
204 * cost of literalLength symbol */
111 static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr)
205 static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
112 {
206 {
113 if (optPtr->staticPrices) return ZSTD_highbit32((U32)litLength+1);
207 if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel);
114
208
115 /* literal Length */
209 /* dynamic statistics */
116 { U32 const llCode = ZSTD_LLcode(litLength);
210 { U32 const llCode = ZSTD_LLcode(litLength);
117 U32 const price = LL_bits[llCode] + optPtr->log2litLengthSum - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
211 return (LL_bits[llCode] * BITCOST_MULTIPLIER) + (optPtr->litLengthSumBasePrice - WEIGHT(optPtr->litLengthFreq[llCode], optLevel));
118 return price;
119 }
212 }
120 }
213 }
121
214
122 /* ZSTD_litLengthPrice() :
123 * cost of the literal part of a sequence,
124 * including literals themselves, and literalLength symbol */
125 static U32 ZSTD_fullLiteralsCost(const BYTE* const literals, U32 const litLength,
126 const optState_t* const optPtr)
127 {
128 return ZSTD_rawLiteralsCost(literals, litLength, optPtr)
129 + ZSTD_litLengthPrice(litLength, optPtr);
130 }
131
132 /* ZSTD_litLengthContribution() :
215 /* ZSTD_litLengthContribution() :
133 * @return ( cost(litlength) - cost(0) )
216 * @return ( cost(litlength) - cost(0) )
134 * this value can then be added to rawLiteralsCost()
217 * this value can then be added to rawLiteralsCost()
135 * to provide a cost which is directly comparable to a match ending at same position */
218 * to provide a cost which is directly comparable to a match ending at same position */
136 static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr)
219 static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel)
137 {
220 {
138 if (optPtr->staticPrices) return ZSTD_highbit32(litLength+1);
221 if (optPtr->priceType >= zop_predef) return WEIGHT(litLength, optLevel);
139
222
140 /* literal Length */
223 /* dynamic statistics */
141 { U32 const llCode = ZSTD_LLcode(litLength);
224 { U32 const llCode = ZSTD_LLcode(litLength);
142 int const contribution = LL_bits[llCode]
225 int const contribution = (LL_bits[llCode] * BITCOST_MULTIPLIER)
143 + ZSTD_highbit32(optPtr->litLengthFreq[0]+1)
226 + WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */
144 - ZSTD_highbit32(optPtr->litLengthFreq[llCode]+1);
227 - WEIGHT(optPtr->litLengthFreq[llCode], optLevel);
145 #if 1
228 #if 1
146 return contribution;
229 return contribution;
147 #else
230 #else
@@ -155,10 +238,11 b' static int ZSTD_litLengthContribution(U3'
155 * which can be compared to the ending cost of a match
238 * which can be compared to the ending cost of a match
156 * should a new match start at this position */
239 * should a new match start at this position */
157 static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength,
240 static int ZSTD_literalsContribution(const BYTE* const literals, U32 const litLength,
158 const optState_t* const optPtr)
241 const optState_t* const optPtr,
242 int optLevel)
159 {
243 {
160 int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr)
244 int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel)
161 + ZSTD_litLengthContribution(litLength, optPtr);
245 + ZSTD_litLengthContribution(litLength, optPtr, optLevel);
162 return contribution;
246 return contribution;
163 }
247 }
164
248
@@ -166,31 +250,38 b' static int ZSTD_literalsContribution(con'
166 * Provides the cost of the match part (offset + matchLength) of a sequence
250 * Provides the cost of the match part (offset + matchLength) of a sequence
167 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
251 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
168 * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
252 * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
169 FORCE_INLINE_TEMPLATE U32 ZSTD_getMatchPrice(
253 FORCE_INLINE_TEMPLATE U32
170 U32 const offset, U32 const matchLength,
254 ZSTD_getMatchPrice(U32 const offset,
171 const optState_t* const optPtr,
255 U32 const matchLength,
172 int const optLevel)
256 const optState_t* const optPtr,
257 int const optLevel)
173 {
258 {
174 U32 price;
259 U32 price;
175 U32 const offCode = ZSTD_highbit32(offset+1);
260 U32 const offCode = ZSTD_highbit32(offset+1);
176 U32 const mlBase = matchLength - MINMATCH;
261 U32 const mlBase = matchLength - MINMATCH;
177 assert(matchLength >= MINMATCH);
262 assert(matchLength >= MINMATCH);
178
263
179 if (optPtr->staticPrices) /* fixed scheme, do not use statistics */
264 if (optPtr->priceType == zop_predef) /* fixed scheme, do not use statistics */
180 return ZSTD_highbit32((U32)mlBase+1) + 16 + offCode;
265 return WEIGHT(mlBase, optLevel) + ((16 + offCode) * BITCOST_MULTIPLIER);
181
266
182 price = offCode + optPtr->log2offCodeSum - ZSTD_highbit32(optPtr->offCodeFreq[offCode]+1);
267 /* dynamic statistics */
183 if ((optLevel<2) /*static*/ && offCode >= 20) price += (offCode-19)*2; /* handicap for long distance offsets, favor decompression speed */
268 price = (offCode * BITCOST_MULTIPLIER) + (optPtr->offCodeSumBasePrice - WEIGHT(optPtr->offCodeFreq[offCode], optLevel));
269 if ((optLevel<2) /*static*/ && offCode >= 20)
270 price += (offCode-19)*2 * BITCOST_MULTIPLIER; /* handicap for long distance offsets, favor decompression speed */
184
271
185 /* match Length */
272 /* match Length */
186 { U32 const mlCode = ZSTD_MLcode(mlBase);
273 { U32 const mlCode = ZSTD_MLcode(mlBase);
187 price += ML_bits[mlCode] + optPtr->log2matchLengthSum - ZSTD_highbit32(optPtr->matchLengthFreq[mlCode]+1);
274 price += (ML_bits[mlCode] * BITCOST_MULTIPLIER) + (optPtr->matchLengthSumBasePrice - WEIGHT(optPtr->matchLengthFreq[mlCode], optLevel));
188 }
275 }
189
276
277 price += BITCOST_MULTIPLIER / 5; /* heuristic : make matches a bit more costly to favor less sequences -> faster decompression speed */
278
190 DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
279 DEBUGLOG(8, "ZSTD_getMatchPrice(ml:%u) = %u", matchLength, price);
191 return price;
280 return price;
192 }
281 }
193
282
283 /* ZSTD_updateStats() :
284 * assumption : literals + litLengtn <= iend */
194 static void ZSTD_updateStats(optState_t* const optPtr,
285 static void ZSTD_updateStats(optState_t* const optPtr,
195 U32 litLength, const BYTE* literals,
286 U32 litLength, const BYTE* literals,
196 U32 offsetCode, U32 matchLength)
287 U32 offsetCode, U32 matchLength)
@@ -269,10 +360,11 b' static U32 ZSTD_insertAndFindFirstIndexH'
269 * ip : assumed <= iend-8 .
360 * ip : assumed <= iend-8 .
270 * @return : nb of positions added */
361 * @return : nb of positions added */
271 static U32 ZSTD_insertBt1(
362 static U32 ZSTD_insertBt1(
272 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
363 ZSTD_matchState_t* ms,
273 const BYTE* const ip, const BYTE* const iend,
364 const BYTE* const ip, const BYTE* const iend,
274 U32 const mls, U32 const extDict)
365 U32 const mls, const int extDict)
275 {
366 {
367 const ZSTD_compressionParameters* const cParams = &ms->cParams;
276 U32* const hashTable = ms->hashTable;
368 U32* const hashTable = ms->hashTable;
277 U32 const hashLog = cParams->hashLog;
369 U32 const hashLog = cParams->hashLog;
278 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
370 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
@@ -293,6 +385,7 b' static U32 ZSTD_insertBt1('
293 U32* largerPtr = smallerPtr + 1;
385 U32* largerPtr = smallerPtr + 1;
294 U32 dummy32; /* to be nullified at the end */
386 U32 dummy32; /* to be nullified at the end */
295 U32 const windowLow = ms->window.lowLimit;
387 U32 const windowLow = ms->window.lowLimit;
388 U32 const matchLow = windowLow ? windowLow : 1;
296 U32 matchEndIdx = current+8+1;
389 U32 matchEndIdx = current+8+1;
297 size_t bestLength = 8;
390 size_t bestLength = 8;
298 U32 nbCompares = 1U << cParams->searchLog;
391 U32 nbCompares = 1U << cParams->searchLog;
@@ -308,7 +401,7 b' static U32 ZSTD_insertBt1('
308 assert(ip <= iend-8); /* required for h calculation */
401 assert(ip <= iend-8); /* required for h calculation */
309 hashTable[h] = current; /* Update Hash Table */
402 hashTable[h] = current; /* Update Hash Table */
310
403
311 while (nbCompares-- && (matchIndex > windowLow)) {
404 while (nbCompares-- && (matchIndex >= matchLow)) {
312 U32* const nextPtr = bt + 2*(matchIndex & btMask);
405 U32* const nextPtr = bt + 2*(matchIndex & btMask);
313 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
406 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
314 assert(matchIndex < current);
407 assert(matchIndex < current);
@@ -334,8 +427,8 b' static U32 ZSTD_insertBt1('
334 }
427 }
335 #endif
428 #endif
336
429
337 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
430 if (!extDict || (matchIndex+matchLength >= dictLimit)) {
338 assert(matchIndex+matchLength >= dictLimit); /* might be wrong if extDict is incorrectly set to 0 */
431 assert(matchIndex+matchLength >= dictLimit); /* might be wrong if actually extDict */
339 match = base + matchIndex;
432 match = base + matchIndex;
340 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
433 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iend);
341 } else {
434 } else {
@@ -379,35 +472,33 b' static U32 ZSTD_insertBt1('
379
472
380 FORCE_INLINE_TEMPLATE
473 FORCE_INLINE_TEMPLATE
381 void ZSTD_updateTree_internal(
474 void ZSTD_updateTree_internal(
382 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
475 ZSTD_matchState_t* ms,
383 const BYTE* const ip, const BYTE* const iend,
476 const BYTE* const ip, const BYTE* const iend,
384 const U32 mls, const U32 extDict)
477 const U32 mls, const ZSTD_dictMode_e dictMode)
385 {
478 {
386 const BYTE* const base = ms->window.base;
479 const BYTE* const base = ms->window.base;
387 U32 const target = (U32)(ip - base);
480 U32 const target = (U32)(ip - base);
388 U32 idx = ms->nextToUpdate;
481 U32 idx = ms->nextToUpdate;
389 DEBUGLOG(7, "ZSTD_updateTree_internal, from %u to %u (extDict:%u)",
482 DEBUGLOG(5, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)",
390 idx, target, extDict);
483 idx, target, dictMode);
391
484
392 while(idx < target)
485 while(idx < target)
393 idx += ZSTD_insertBt1(ms, cParams, base+idx, iend, mls, extDict);
486 idx += ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
394 ms->nextToUpdate = target;
487 ms->nextToUpdate = target;
395 }
488 }
396
489
397 void ZSTD_updateTree(
490 void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) {
398 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
491 ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.searchLength, ZSTD_noDict);
399 const BYTE* ip, const BYTE* iend)
400 {
401 ZSTD_updateTree_internal(ms, cParams, ip, iend, cParams->searchLength, 0 /*extDict*/);
402 }
492 }
403
493
404 FORCE_INLINE_TEMPLATE
494 FORCE_INLINE_TEMPLATE
405 U32 ZSTD_insertBtAndGetAllMatches (
495 U32 ZSTD_insertBtAndGetAllMatches (
406 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
496 ZSTD_matchState_t* ms,
407 const BYTE* const ip, const BYTE* const iLimit, int const extDict,
497 const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode,
408 U32 rep[ZSTD_REP_NUM], U32 const ll0,
498 U32 rep[ZSTD_REP_NUM], U32 const ll0,
409 ZSTD_match_t* matches, const U32 lengthToBeat, U32 const mls /* template */)
499 ZSTD_match_t* matches, const U32 lengthToBeat, U32 const mls /* template */)
410 {
500 {
501 const ZSTD_compressionParameters* const cParams = &ms->cParams;
411 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
502 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
412 const BYTE* const base = ms->window.base;
503 const BYTE* const base = ms->window.base;
413 U32 const current = (U32)(ip-base);
504 U32 const current = (U32)(ip-base);
@@ -426,6 +517,7 b' U32 ZSTD_insertBtAndGetAllMatches ('
426 const BYTE* const prefixStart = base + dictLimit;
517 const BYTE* const prefixStart = base + dictLimit;
427 U32 const btLow = btMask >= current ? 0 : current - btMask;
518 U32 const btLow = btMask >= current ? 0 : current - btMask;
428 U32 const windowLow = ms->window.lowLimit;
519 U32 const windowLow = ms->window.lowLimit;
520 U32 const matchLow = windowLow ? windowLow : 1;
429 U32* smallerPtr = bt + 2*(current&btMask);
521 U32* smallerPtr = bt + 2*(current&btMask);
430 U32* largerPtr = bt + 2*(current&btMask) + 1;
522 U32* largerPtr = bt + 2*(current&btMask) + 1;
431 U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */
523 U32 matchEndIdx = current+8+1; /* farthest referenced position of any match => detects repetitive patterns */
@@ -433,8 +525,21 b' U32 ZSTD_insertBtAndGetAllMatches ('
433 U32 mnum = 0;
525 U32 mnum = 0;
434 U32 nbCompares = 1U << cParams->searchLog;
526 U32 nbCompares = 1U << cParams->searchLog;
435
527
528 const ZSTD_matchState_t* dms = dictMode == ZSTD_dictMatchState ? ms->dictMatchState : NULL;
529 const ZSTD_compressionParameters* const dmsCParams =
530 dictMode == ZSTD_dictMatchState ? &dms->cParams : NULL;
531 const BYTE* const dmsBase = dictMode == ZSTD_dictMatchState ? dms->window.base : NULL;
532 const BYTE* const dmsEnd = dictMode == ZSTD_dictMatchState ? dms->window.nextSrc : NULL;
533 U32 const dmsHighLimit = dictMode == ZSTD_dictMatchState ? (U32)(dmsEnd - dmsBase) : 0;
534 U32 const dmsLowLimit = dictMode == ZSTD_dictMatchState ? dms->window.lowLimit : 0;
535 U32 const dmsIndexDelta = dictMode == ZSTD_dictMatchState ? windowLow - dmsHighLimit : 0;
536 U32 const dmsHashLog = dictMode == ZSTD_dictMatchState ? dmsCParams->hashLog : hashLog;
537 U32 const dmsBtLog = dictMode == ZSTD_dictMatchState ? dmsCParams->chainLog - 1 : btLog;
538 U32 const dmsBtMask = dictMode == ZSTD_dictMatchState ? (1U << dmsBtLog) - 1 : 0;
539 U32 const dmsBtLow = dictMode == ZSTD_dictMatchState && dmsBtMask < dmsHighLimit - dmsLowLimit ? dmsHighLimit - dmsBtMask : dmsLowLimit;
540
436 size_t bestLength = lengthToBeat-1;
541 size_t bestLength = lengthToBeat-1;
437 DEBUGLOG(7, "ZSTD_insertBtAndGetAllMatches");
542 DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current);
438
543
439 /* check repCode */
544 /* check repCode */
440 { U32 const lastR = ZSTD_REP_NUM + ll0;
545 { U32 const lastR = ZSTD_REP_NUM + ll0;
@@ -449,18 +554,26 b' U32 ZSTD_insertBtAndGetAllMatches ('
449 repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
554 repLen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repOffset, iLimit) + minMatch;
450 }
555 }
451 } else { /* repIndex < dictLimit || repIndex >= current */
556 } else { /* repIndex < dictLimit || repIndex >= current */
452 const BYTE* const repMatch = dictBase + repIndex;
557 const BYTE* const repMatch = dictMode == ZSTD_dictMatchState ?
558 dmsBase + repIndex - dmsIndexDelta :
559 dictBase + repIndex;
453 assert(current >= windowLow);
560 assert(current >= windowLow);
454 if ( extDict /* this case only valid in extDict mode */
561 if ( dictMode == ZSTD_extDict
455 && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */
562 && ( ((repOffset-1) /*intentional overflow*/ < current - windowLow) /* equivalent to `current > repIndex >= windowLow` */
456 & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
563 & (((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */)
457 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
564 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
458 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
565 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dictEnd, prefixStart) + minMatch;
566 }
567 if (dictMode == ZSTD_dictMatchState
568 && ( ((repOffset-1) /*intentional overflow*/ < current - (dmsLowLimit + dmsIndexDelta)) /* equivalent to `current > repIndex >= dmsLowLimit` */
569 & ((U32)((dictLimit-1) - repIndex) >= 3) ) /* intentional overflow : do not test positions overlapping 2 memory segments */
570 && (ZSTD_readMINMATCH(ip, minMatch) == ZSTD_readMINMATCH(repMatch, minMatch)) ) {
571 repLen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iLimit, dmsEnd, prefixStart) + minMatch;
459 } }
572 } }
460 /* save longer solution */
573 /* save longer solution */
461 if (repLen > bestLength) {
574 if (repLen > bestLength) {
462 DEBUGLOG(8, "found rep-match %u of length %u",
575 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
463 repCode - ll0, (U32)repLen);
576 repCode, ll0, repOffset, repLen);
464 bestLength = repLen;
577 bestLength = repLen;
465 matches[mnum].off = repCode - ll0;
578 matches[mnum].off = repCode - ll0;
466 matches[mnum].len = (U32)repLen;
579 matches[mnum].len = (U32)repLen;
@@ -473,10 +586,10 b' U32 ZSTD_insertBtAndGetAllMatches ('
473 /* HC3 match finder */
586 /* HC3 match finder */
474 if ((mls == 3) /*static*/ && (bestLength < mls)) {
587 if ((mls == 3) /*static*/ && (bestLength < mls)) {
475 U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip);
588 U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip);
476 if ((matchIndex3 > windowLow)
589 if ((matchIndex3 >= matchLow)
477 & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
590 & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) {
478 size_t mlen;
591 size_t mlen;
479 if ((!extDict) /*static*/ || (matchIndex3 >= dictLimit)) {
592 if ((dictMode == ZSTD_noDict) /*static*/ || (dictMode == ZSTD_dictMatchState) /*static*/ || (matchIndex3 >= dictLimit)) {
480 const BYTE* const match = base + matchIndex3;
593 const BYTE* const match = base + matchIndex3;
481 mlen = ZSTD_count(ip, match, iLimit);
594 mlen = ZSTD_count(ip, match, iLimit);
482 } else {
595 } else {
@@ -498,17 +611,21 b' U32 ZSTD_insertBtAndGetAllMatches ('
498 (ip+mlen == iLimit) ) { /* best possible length */
611 (ip+mlen == iLimit) ) { /* best possible length */
499 ms->nextToUpdate = current+1; /* skip insertion */
612 ms->nextToUpdate = current+1; /* skip insertion */
500 return 1;
613 return 1;
501 } } } }
614 }
615 }
616 }
617 /* no dictMatchState lookup: dicts don't have a populated HC3 table */
618 }
502
619
503 hashTable[h] = current; /* Update Hash Table */
620 hashTable[h] = current; /* Update Hash Table */
504
621
505 while (nbCompares-- && (matchIndex > windowLow)) {
622 while (nbCompares-- && (matchIndex >= matchLow)) {
506 U32* const nextPtr = bt + 2*(matchIndex & btMask);
623 U32* const nextPtr = bt + 2*(matchIndex & btMask);
507 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
624 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
508 const BYTE* match;
625 const BYTE* match;
509 assert(current > matchIndex);
626 assert(current > matchIndex);
510
627
511 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
628 if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) {
512 assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
629 assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */
513 match = base + matchIndex;
630 match = base + matchIndex;
514 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
631 matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit);
@@ -520,8 +637,8 b' U32 ZSTD_insertBtAndGetAllMatches ('
520 }
637 }
521
638
522 if (matchLength > bestLength) {
639 if (matchLength > bestLength) {
523 DEBUGLOG(8, "found match of length %u at distance %u",
640 DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
524 (U32)matchLength, current - matchIndex);
641 (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
525 assert(matchEndIdx > matchIndex);
642 assert(matchEndIdx > matchIndex);
526 if (matchLength > matchEndIdx - matchIndex)
643 if (matchLength > matchEndIdx - matchIndex)
527 matchEndIdx = matchIndex + (U32)matchLength;
644 matchEndIdx = matchIndex + (U32)matchLength;
@@ -529,9 +646,10 b' U32 ZSTD_insertBtAndGetAllMatches ('
529 matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
646 matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
530 matches[mnum].len = (U32)matchLength;
647 matches[mnum].len = (U32)matchLength;
531 mnum++;
648 mnum++;
532 if (matchLength > ZSTD_OPT_NUM) break;
649 if ( (matchLength > ZSTD_OPT_NUM)
533 if (ip+matchLength == iLimit) { /* equal : no way to know if inf or sup */
650 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
534 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
651 if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
652 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
535 }
653 }
536 }
654 }
537
655
@@ -552,6 +670,47 b' U32 ZSTD_insertBtAndGetAllMatches ('
552
670
553 *smallerPtr = *largerPtr = 0;
671 *smallerPtr = *largerPtr = 0;
554
672
673 if (dictMode == ZSTD_dictMatchState && nbCompares) {
674 size_t const dmsH = ZSTD_hashPtr(ip, dmsHashLog, mls);
675 U32 dictMatchIndex = dms->hashTable[dmsH];
676 const U32* const dmsBt = dms->chainTable;
677 commonLengthSmaller = commonLengthLarger = 0;
678 while (nbCompares-- && (dictMatchIndex > dmsLowLimit)) {
679 const U32* const nextPtr = dmsBt + 2*(dictMatchIndex & dmsBtMask);
680 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
681 const BYTE* match = dmsBase + dictMatchIndex;
682 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dmsEnd, prefixStart);
683 if (dictMatchIndex+matchLength >= dmsHighLimit)
684 match = base + dictMatchIndex + dmsIndexDelta; /* to prepare for next usage of match[matchLength] */
685
686 if (matchLength > bestLength) {
687 matchIndex = dictMatchIndex + dmsIndexDelta;
688 DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
689 (U32)matchLength, current - matchIndex, current - matchIndex + ZSTD_REP_MOVE);
690 if (matchLength > matchEndIdx - matchIndex)
691 matchEndIdx = matchIndex + (U32)matchLength;
692 bestLength = matchLength;
693 matches[mnum].off = (current - matchIndex) + ZSTD_REP_MOVE;
694 matches[mnum].len = (U32)matchLength;
695 mnum++;
696 if ( (matchLength > ZSTD_OPT_NUM)
697 | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
698 break; /* drop, to guarantee consistency (miss a little bit of compression) */
699 }
700 }
701
702 if (dictMatchIndex <= dmsBtLow) { break; } /* beyond tree size, stop the search */
703 if (match[matchLength] < ip[matchLength]) {
704 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
705 dictMatchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
706 } else {
707 /* match is larger than current */
708 commonLengthLarger = matchLength;
709 dictMatchIndex = nextPtr[0];
710 }
711 }
712 }
713
555 assert(matchEndIdx > current+8);
714 assert(matchEndIdx > current+8);
556 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
715 ms->nextToUpdate = matchEndIdx - 8; /* skip repetitive patterns */
557 return mnum;
716 return mnum;
@@ -559,23 +718,24 b' U32 ZSTD_insertBtAndGetAllMatches ('
559
718
560
719
561 FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
720 FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
562 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
721 ZSTD_matchState_t* ms,
563 const BYTE* ip, const BYTE* const iHighLimit, int const extDict,
722 const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
564 U32 rep[ZSTD_REP_NUM], U32 const ll0,
723 U32 rep[ZSTD_REP_NUM], U32 const ll0,
565 ZSTD_match_t* matches, U32 const lengthToBeat)
724 ZSTD_match_t* matches, U32 const lengthToBeat)
566 {
725 {
726 const ZSTD_compressionParameters* const cParams = &ms->cParams;
567 U32 const matchLengthSearch = cParams->searchLength;
727 U32 const matchLengthSearch = cParams->searchLength;
568 DEBUGLOG(7, "ZSTD_BtGetAllMatches");
728 DEBUGLOG(8, "ZSTD_BtGetAllMatches");
569 if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
729 if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */
570 ZSTD_updateTree_internal(ms, cParams, ip, iHighLimit, matchLengthSearch, extDict);
730 ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
571 switch(matchLengthSearch)
731 switch(matchLengthSearch)
572 {
732 {
573 case 3 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 3);
733 case 3 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 3);
574 default :
734 default :
575 case 4 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 4);
735 case 4 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 4);
576 case 5 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 5);
736 case 5 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 5);
577 case 7 :
737 case 7 :
578 case 6 : return ZSTD_insertBtAndGetAllMatches(ms, cParams, ip, iHighLimit, extDict, rep, ll0, matches, lengthToBeat, 6);
738 case 6 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, matches, lengthToBeat, 6);
579 }
739 }
580 }
740 }
581
741
@@ -587,7 +747,7 b' typedef struct repcodes_s {'
587 U32 rep[3];
747 U32 rep[3];
588 } repcodes_t;
748 } repcodes_t;
589
749
590 repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
750 static repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
591 {
751 {
592 repcodes_t newReps;
752 repcodes_t newReps;
593 if (offset >= ZSTD_REP_NUM) { /* full offset */
753 if (offset >= ZSTD_REP_NUM) { /* full offset */
@@ -609,65 +769,17 b' repcodes_t ZSTD_updateRep(U32 const rep['
609 }
769 }
610
770
611
771
612 typedef struct {
772 static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
613 const BYTE* anchor;
614 U32 litlen;
615 U32 rawLitCost;
616 } cachedLiteralPrice_t;
617
618 static U32 ZSTD_rawLiteralsCost_cached(
619 cachedLiteralPrice_t* const cachedLitPrice,
620 const BYTE* const anchor, U32 const litlen,
621 const optState_t* const optStatePtr)
622 {
773 {
623 U32 startCost;
774 return sol.litlen + sol.mlen;
624 U32 remainingLength;
625 const BYTE* startPosition;
626
627 if (anchor == cachedLitPrice->anchor) {
628 startCost = cachedLitPrice->rawLitCost;
629 startPosition = anchor + cachedLitPrice->litlen;
630 assert(litlen >= cachedLitPrice->litlen);
631 remainingLength = litlen - cachedLitPrice->litlen;
632 } else {
633 startCost = 0;
634 startPosition = anchor;
635 remainingLength = litlen;
636 }
637
638 { U32 const rawLitCost = startCost + ZSTD_rawLiteralsCost(startPosition, remainingLength, optStatePtr);
639 cachedLitPrice->anchor = anchor;
640 cachedLitPrice->litlen = litlen;
641 cachedLitPrice->rawLitCost = rawLitCost;
642 return rawLitCost;
643 }
644 }
775 }
645
776
646 static U32 ZSTD_fullLiteralsCost_cached(
777 FORCE_INLINE_TEMPLATE size_t
647 cachedLiteralPrice_t* const cachedLitPrice,
778 ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
648 const BYTE* const anchor, U32 const litlen,
779 seqStore_t* seqStore,
649 const optState_t* const optStatePtr)
780 U32 rep[ZSTD_REP_NUM],
650 {
781 const void* src, size_t srcSize,
651 return ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr)
782 const int optLevel, const ZSTD_dictMode_e dictMode)
652 + ZSTD_litLengthPrice(litlen, optStatePtr);
653 }
654
655 static int ZSTD_literalsContribution_cached(
656 cachedLiteralPrice_t* const cachedLitPrice,
657 const BYTE* const anchor, U32 const litlen,
658 const optState_t* const optStatePtr)
659 {
660 int const contribution = ZSTD_rawLiteralsCost_cached(cachedLitPrice, anchor, litlen, optStatePtr)
661 + ZSTD_litLengthContribution(litlen, optStatePtr);
662 return contribution;
663 }
664
665 FORCE_INLINE_TEMPLATE
666 size_t ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,seqStore_t* seqStore,
667 U32 rep[ZSTD_REP_NUM],
668 ZSTD_compressionParameters const* cParams,
669 const void* src, size_t srcSize,
670 const int optLevel, const int extDict)
671 {
783 {
672 optState_t* const optStatePtr = &ms->opt;
784 optState_t* const optStatePtr = &ms->opt;
673 const BYTE* const istart = (const BYTE*)src;
785 const BYTE* const istart = (const BYTE*)src;
@@ -677,72 +789,76 b' size_t ZSTD_compressBlock_opt_generic(ZS'
677 const BYTE* const ilimit = iend - 8;
789 const BYTE* const ilimit = iend - 8;
678 const BYTE* const base = ms->window.base;
790 const BYTE* const base = ms->window.base;
679 const BYTE* const prefixStart = base + ms->window.dictLimit;
791 const BYTE* const prefixStart = base + ms->window.dictLimit;
792 const ZSTD_compressionParameters* const cParams = &ms->cParams;
680
793
681 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
794 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
682 U32 const minMatch = (cParams->searchLength == 3) ? 3 : 4;
795 U32 const minMatch = (cParams->searchLength == 3) ? 3 : 4;
683
796
684 ZSTD_optimal_t* const opt = optStatePtr->priceTable;
797 ZSTD_optimal_t* const opt = optStatePtr->priceTable;
685 ZSTD_match_t* const matches = optStatePtr->matchTable;
798 ZSTD_match_t* const matches = optStatePtr->matchTable;
686 cachedLiteralPrice_t cachedLitPrice;
799 ZSTD_optimal_t lastSequence;
687
800
688 /* init */
801 /* init */
689 DEBUGLOG(5, "ZSTD_compressBlock_opt_generic");
802 DEBUGLOG(5, "ZSTD_compressBlock_opt_generic");
803 assert(optLevel <= 2);
690 ms->nextToUpdate3 = ms->nextToUpdate;
804 ms->nextToUpdate3 = ms->nextToUpdate;
691 ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize);
805 ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel);
692 ip += (ip==prefixStart);
806 ip += (ip==prefixStart);
693 memset(&cachedLitPrice, 0, sizeof(cachedLitPrice));
694
807
695 /* Match Loop */
808 /* Match Loop */
696 while (ip < ilimit) {
809 while (ip < ilimit) {
697 U32 cur, last_pos = 0;
810 U32 cur, last_pos = 0;
698 U32 best_mlen, best_off;
699
811
700 /* find first match */
812 /* find first match */
701 { U32 const litlen = (U32)(ip - anchor);
813 { U32 const litlen = (U32)(ip - anchor);
702 U32 const ll0 = !litlen;
814 U32 const ll0 = !litlen;
703 U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, ip, iend, extDict, rep, ll0, matches, minMatch);
815 U32 const nbMatches = ZSTD_BtGetAllMatches(ms, ip, iend, dictMode, rep, ll0, matches, minMatch);
704 if (!nbMatches) { ip++; continue; }
816 if (!nbMatches) { ip++; continue; }
705
817
706 /* initialize opt[0] */
818 /* initialize opt[0] */
707 { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
819 { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
708 opt[0].mlen = 1;
820 opt[0].mlen = 0; /* means is_a_literal */
709 opt[0].litlen = litlen;
821 opt[0].litlen = litlen;
822 opt[0].price = ZSTD_literalsContribution(anchor, litlen, optStatePtr, optLevel);
710
823
711 /* large match -> immediate encoding */
824 /* large match -> immediate encoding */
712 { U32 const maxML = matches[nbMatches-1].len;
825 { U32 const maxML = matches[nbMatches-1].len;
713 DEBUGLOG(7, "found %u matches of maxLength=%u and offset=%u at cPos=%u => start new serie",
826 U32 const maxOffset = matches[nbMatches-1].off;
714 nbMatches, maxML, matches[nbMatches-1].off, (U32)(ip-prefixStart));
827 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new serie",
828 nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
715
829
716 if (maxML > sufficient_len) {
830 if (maxML > sufficient_len) {
717 best_mlen = maxML;
831 lastSequence.litlen = litlen;
718 best_off = matches[nbMatches-1].off;
832 lastSequence.mlen = maxML;
719 DEBUGLOG(7, "large match (%u>%u), immediate encoding",
833 lastSequence.off = maxOffset;
720 best_mlen, sufficient_len);
834 DEBUGLOG(6, "large match (%u>%u), immediate encoding",
835 maxML, sufficient_len);
721 cur = 0;
836 cur = 0;
722 last_pos = 1;
837 last_pos = ZSTD_totalLen(lastSequence);
723 goto _shortestPath;
838 goto _shortestPath;
724 } }
839 } }
725
840
726 /* set prices for first matches starting position == 0 */
841 /* set prices for first matches starting position == 0 */
727 { U32 const literalsPrice = ZSTD_fullLiteralsCost_cached(&cachedLitPrice, anchor, litlen, optStatePtr);
842 { U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
728 U32 pos;
843 U32 pos;
729 U32 matchNb;
844 U32 matchNb;
730 for (pos = 0; pos < minMatch; pos++) {
845 for (pos = 1; pos < minMatch; pos++) {
731 opt[pos].mlen = 1;
846 opt[pos].price = ZSTD_MAX_PRICE; /* mlen, litlen and price will be fixed during forward scanning */
732 opt[pos].price = ZSTD_MAX_PRICE;
733 }
847 }
734 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
848 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
735 U32 const offset = matches[matchNb].off;
849 U32 const offset = matches[matchNb].off;
736 U32 const end = matches[matchNb].len;
850 U32 const end = matches[matchNb].len;
737 repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
851 repcodes_t const repHistory = ZSTD_updateRep(rep, offset, ll0);
738 for ( ; pos <= end ; pos++ ) {
852 for ( ; pos <= end ; pos++ ) {
739 U32 const matchPrice = literalsPrice + ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
853 U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
740 DEBUGLOG(7, "rPos:%u => set initial price : %u",
854 U32 const sequencePrice = literalsPrice + matchPrice;
741 pos, matchPrice);
855 DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
856 pos, ZSTD_fCost(sequencePrice));
742 opt[pos].mlen = pos;
857 opt[pos].mlen = pos;
743 opt[pos].off = offset;
858 opt[pos].off = offset;
744 opt[pos].litlen = litlen;
859 opt[pos].litlen = litlen;
745 opt[pos].price = matchPrice;
860 opt[pos].price = sequencePrice;
861 ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
746 memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
862 memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
747 } }
863 } }
748 last_pos = pos-1;
864 last_pos = pos-1;
@@ -753,55 +869,67 b' size_t ZSTD_compressBlock_opt_generic(ZS'
753 for (cur = 1; cur <= last_pos; cur++) {
869 for (cur = 1; cur <= last_pos; cur++) {
754 const BYTE* const inr = ip + cur;
870 const BYTE* const inr = ip + cur;
755 assert(cur < ZSTD_OPT_NUM);
871 assert(cur < ZSTD_OPT_NUM);
872 DEBUGLOG(7, "cPos:%zi==rPos:%u", inr-istart, cur)
756
873
757 /* Fix current position with one literal if cheaper */
874 /* Fix current position with one literal if cheaper */
758 { U32 const litlen = (opt[cur-1].mlen == 1) ? opt[cur-1].litlen + 1 : 1;
875 { U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
759 int price; /* note : contribution can be negative */
876 int const price = opt[cur-1].price
760 if (cur > litlen) {
877 + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
761 price = opt[cur - litlen].price + ZSTD_literalsContribution(inr-litlen, litlen, optStatePtr);
878 + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
762 } else {
879 - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
763 price = ZSTD_literalsContribution_cached(&cachedLitPrice, anchor, litlen, optStatePtr);
764 }
765 assert(price < 1000000000); /* overflow check */
880 assert(price < 1000000000); /* overflow check */
766 if (price <= opt[cur].price) {
881 if (price <= opt[cur].price) {
767 DEBUGLOG(7, "rPos:%u : better price (%u<%u) using literal",
882 DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
768 cur, price, opt[cur].price);
883 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price), litlen,
769 opt[cur].mlen = 1;
884 opt[cur-1].rep[0], opt[cur-1].rep[1], opt[cur-1].rep[2]);
885 opt[cur].mlen = 0;
770 opt[cur].off = 0;
886 opt[cur].off = 0;
771 opt[cur].litlen = litlen;
887 opt[cur].litlen = litlen;
772 opt[cur].price = price;
888 opt[cur].price = price;
773 memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep));
889 memcpy(opt[cur].rep, opt[cur-1].rep, sizeof(opt[cur].rep));
774 } }
890 } else {
891 DEBUGLOG(7, "cPos:%zi==rPos:%u : literal would cost more (%.2f>%.2f) (hist:%u,%u,%u)",
892 inr-istart, cur, ZSTD_fCost(price), ZSTD_fCost(opt[cur].price),
893 opt[cur].rep[0], opt[cur].rep[1], opt[cur].rep[2]);
894 }
895 }
775
896
776 /* last match must start at a minimum distance of 8 from oend */
897 /* last match must start at a minimum distance of 8 from oend */
777 if (inr > ilimit) continue;
898 if (inr > ilimit) continue;
778
899
779 if (cur == last_pos) break;
900 if (cur == last_pos) break;
780
901
781 if ( (optLevel==0) /*static*/
902 if ( (optLevel==0) /*static_test*/
782 && (opt[cur+1].price <= opt[cur].price) )
903 && (opt[cur+1].price <= opt[cur].price + (BITCOST_MULTIPLIER/2)) ) {
904 DEBUGLOG(7, "move to next rPos:%u : price is <=", cur+1);
783 continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
905 continue; /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
906 }
784
907
785 { U32 const ll0 = (opt[cur].mlen != 1);
908 { U32 const ll0 = (opt[cur].mlen != 0);
786 U32 const litlen = (opt[cur].mlen == 1) ? opt[cur].litlen : 0;
909 U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
787 U32 const previousPrice = (cur > litlen) ? opt[cur-litlen].price : 0;
910 U32 const previousPrice = opt[cur].price;
788 U32 const basePrice = previousPrice + ZSTD_fullLiteralsCost(inr-litlen, litlen, optStatePtr);
911 U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
789 U32 const nbMatches = ZSTD_BtGetAllMatches(ms, cParams, inr, iend, extDict, opt[cur].rep, ll0, matches, minMatch);
912 U32 const nbMatches = ZSTD_BtGetAllMatches(ms, inr, iend, dictMode, opt[cur].rep, ll0, matches, minMatch);
790 U32 matchNb;
913 U32 matchNb;
791 if (!nbMatches) continue;
914 if (!nbMatches) {
915 DEBUGLOG(7, "rPos:%u : no match found", cur);
916 continue;
917 }
792
918
793 { U32 const maxML = matches[nbMatches-1].len;
919 { U32 const maxML = matches[nbMatches-1].len;
794 DEBUGLOG(7, "rPos:%u, found %u matches, of maxLength=%u",
920 DEBUGLOG(7, "cPos:%zi==rPos:%u, found %u matches, of maxLength=%u",
795 cur, nbMatches, maxML);
921 inr-istart, cur, nbMatches, maxML);
796
922
797 if ( (maxML > sufficient_len)
923 if ( (maxML > sufficient_len)
798 | (cur + maxML >= ZSTD_OPT_NUM) ) {
924 || (cur + maxML >= ZSTD_OPT_NUM) ) {
799 best_mlen = maxML;
925 lastSequence.mlen = maxML;
800 best_off = matches[nbMatches-1].off;
926 lastSequence.off = matches[nbMatches-1].off;
801 last_pos = cur + 1;
927 lastSequence.litlen = litlen;
928 cur -= (opt[cur].mlen==0) ? opt[cur].litlen : 0; /* last sequence is actually only literals, fix cur to last match - note : may underflow, in which case, it's first sequence, and it's okay */
929 last_pos = cur + ZSTD_totalLen(lastSequence);
930 if (cur > ZSTD_OPT_NUM) cur = 0; /* underflow => first match */
802 goto _shortestPath;
931 goto _shortestPath;
803 }
932 } }
804 }
805
933
806 /* set prices using matches found at position == cur */
934 /* set prices using matches found at position == cur */
807 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
935 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
@@ -811,81 +939,97 b' size_t ZSTD_compressBlock_opt_generic(ZS'
811 U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
939 U32 const startML = (matchNb>0) ? matches[matchNb-1].len+1 : minMatch;
812 U32 mlen;
940 U32 mlen;
813
941
814 DEBUGLOG(7, "testing match %u => offCode=%u, mlen=%u, llen=%u",
942 DEBUGLOG(7, "testing match %u => offCode=%4u, mlen=%2u, llen=%2u",
815 matchNb, matches[matchNb].off, lastML, litlen);
943 matchNb, matches[matchNb].off, lastML, litlen);
816
944
817 for (mlen = lastML; mlen >= startML; mlen--) {
945 for (mlen = lastML; mlen >= startML; mlen--) { /* scan downward */
818 U32 const pos = cur + mlen;
946 U32 const pos = cur + mlen;
819 int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
947 int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
820
948
821 if ((pos > last_pos) || (price < opt[pos].price)) {
949 if ((pos > last_pos) || (price < opt[pos].price)) {
822 DEBUGLOG(7, "rPos:%u => new better price (%u<%u)",
950 DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
823 pos, price, opt[pos].price);
951 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
824 while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; }
952 while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } /* fill empty positions */
825 opt[pos].mlen = mlen;
953 opt[pos].mlen = mlen;
826 opt[pos].off = offset;
954 opt[pos].off = offset;
827 opt[pos].litlen = litlen;
955 opt[pos].litlen = litlen;
828 opt[pos].price = price;
956 opt[pos].price = price;
957 ZSTD_STATIC_ASSERT(sizeof(opt[pos].rep) == sizeof(repHistory));
829 memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
958 memcpy(opt[pos].rep, &repHistory, sizeof(repHistory));
830 } else {
959 } else {
831 if (optLevel==0) break; /* gets ~+10% speed for about -0.01 ratio loss */
960 DEBUGLOG(7, "rPos:%u (ml=%2u) => new price is worse (%.2f>=%.2f)",
961 pos, mlen, ZSTD_fCost(price), ZSTD_fCost(opt[pos].price));
962 if (optLevel==0) break; /* early update abort; gets ~+10% speed for about -0.01 ratio loss */
832 }
963 }
833 } } }
964 } } }
834 } /* for (cur = 1; cur <= last_pos; cur++) */
965 } /* for (cur = 1; cur <= last_pos; cur++) */
835
966
836 best_mlen = opt[last_pos].mlen;
967 lastSequence = opt[last_pos];
837 best_off = opt[last_pos].off;
968 cur = last_pos > ZSTD_totalLen(lastSequence) ? last_pos - ZSTD_totalLen(lastSequence) : 0; /* single sequence, and it starts before `ip` */
838 cur = last_pos - best_mlen;
969 assert(cur < ZSTD_OPT_NUM); /* control overflow*/
839
970
840 _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
971 _shortestPath: /* cur, last_pos, best_mlen, best_off have to be set */
841 assert(opt[0].mlen == 1);
972 assert(opt[0].mlen == 0);
973
974 { U32 const storeEnd = cur + 1;
975 U32 storeStart = storeEnd;
976 U32 seqPos = cur;
842
977
843 /* reverse traversal */
978 DEBUGLOG(6, "start reverse traversal (last_pos:%u, cur:%u)",
844 DEBUGLOG(7, "start reverse traversal (last_pos:%u, cur:%u)",
979 last_pos, cur); (void)last_pos;
845 last_pos, cur);
980 assert(storeEnd < ZSTD_OPT_NUM);
846 { U32 selectedMatchLength = best_mlen;
981 DEBUGLOG(6, "last sequence copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
847 U32 selectedOffset = best_off;
982 storeEnd, lastSequence.litlen, lastSequence.mlen, lastSequence.off);
848 U32 pos = cur;
983 opt[storeEnd] = lastSequence;
849 while (1) {
984 while (seqPos > 0) {
850 U32 const mlen = opt[pos].mlen;
985 U32 const backDist = ZSTD_totalLen(opt[seqPos]);
851 U32 const off = opt[pos].off;
986 storeStart--;
852 opt[pos].mlen = selectedMatchLength;
987 DEBUGLOG(6, "sequence from rPos=%u copied into pos=%u (llen=%u,mlen=%u,ofc=%u)",
853 opt[pos].off = selectedOffset;
988 seqPos, storeStart, opt[seqPos].litlen, opt[seqPos].mlen, opt[seqPos].off);
854 selectedMatchLength = mlen;
989 opt[storeStart] = opt[seqPos];
855 selectedOffset = off;
990 seqPos = (seqPos > backDist) ? seqPos - backDist : 0;
856 if (mlen > pos) break;
991 }
857 pos -= mlen;
858 } }
859
992
860 /* save sequences */
993 /* save sequences */
861 { U32 pos;
994 DEBUGLOG(6, "sending selected sequences into seqStore")
862 for (pos=0; pos < last_pos; ) {
995 { U32 storePos;
863 U32 const llen = (U32)(ip - anchor);
996 for (storePos=storeStart; storePos <= storeEnd; storePos++) {
864 U32 const mlen = opt[pos].mlen;
997 U32 const llen = opt[storePos].litlen;
865 U32 const offset = opt[pos].off;
998 U32 const mlen = opt[storePos].mlen;
866 if (mlen == 1) { ip++; pos++; continue; } /* literal position => move on */
999 U32 const offCode = opt[storePos].off;
867 pos += mlen; ip += mlen;
1000 U32 const advance = llen + mlen;
1001 DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u",
1002 anchor - istart, llen, mlen);
1003
1004 if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */
1005 assert(storePos == storeEnd); /* must be last sequence */
1006 ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */
1007 continue; /* will finish */
1008 }
868
1009
869 /* repcodes update : like ZSTD_updateRep(), but update in place */
1010 /* repcodes update : like ZSTD_updateRep(), but update in place */
870 if (offset >= ZSTD_REP_NUM) { /* full offset */
1011 if (offCode >= ZSTD_REP_NUM) { /* full offset */
871 rep[2] = rep[1];
1012 rep[2] = rep[1];
872 rep[1] = rep[0];
873 rep[0] = offset - ZSTD_REP_MOVE;
874 } else { /* repcode */
875 U32 const repCode = offset + (llen==0);
876 if (repCode) { /* note : if repCode==0, no change */
877 U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
878 if (repCode >= 2) rep[2] = rep[1];
879 rep[1] = rep[0];
1013 rep[1] = rep[0];
880 rep[0] = currentOffset;
1014 rep[0] = offCode - ZSTD_REP_MOVE;
881 }
1015 } else { /* repcode */
882 }
1016 U32 const repCode = offCode + (llen==0);
1017 if (repCode) { /* note : if repCode==0, no change */
1018 U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
1019 if (repCode >= 2) rep[2] = rep[1];
1020 rep[1] = rep[0];
1021 rep[0] = currentOffset;
1022 } }
883
1023
884 ZSTD_updateStats(optStatePtr, llen, anchor, offset, mlen);
1024 assert(anchor + llen <= iend);
885 ZSTD_storeSeq(seqStore, llen, anchor, offset, mlen-MINMATCH);
1025 ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
886 anchor = ip;
1026 ZSTD_storeSeq(seqStore, llen, anchor, offCode, mlen-MINMATCH);
887 } }
1027 anchor += advance;
888 ZSTD_setLog2Prices(optStatePtr);
1028 ip = anchor;
1029 } }
1030 ZSTD_setBasePrices(optStatePtr, optLevel);
1031 }
1032
889 } /* while (ip < ilimit) */
1033 } /* while (ip < ilimit) */
890
1034
891 /* Return the last literals size */
1035 /* Return the last literals size */
@@ -895,29 +1039,94 b' size_t ZSTD_compressBlock_opt_generic(ZS'
895
1039
896 size_t ZSTD_compressBlock_btopt(
1040 size_t ZSTD_compressBlock_btopt(
897 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1041 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
898 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
1042 const void* src, size_t srcSize)
899 {
1043 {
900 DEBUGLOG(5, "ZSTD_compressBlock_btopt");
1044 DEBUGLOG(5, "ZSTD_compressBlock_btopt");
901 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, 0 /*extDict*/);
1045 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
1046 }
1047
1048
1049 /* used in 2-pass strategy */
1050 static U32 ZSTD_upscaleStat(U32* table, U32 lastEltIndex, int bonus)
1051 {
1052 U32 s, sum=0;
1053 assert(ZSTD_FREQ_DIV+bonus > 0);
1054 for (s=0; s<=lastEltIndex; s++) {
1055 table[s] <<= ZSTD_FREQ_DIV+bonus;
1056 table[s]--;
1057 sum += table[s];
1058 }
1059 return sum;
1060 }
1061
1062 /* used in 2-pass strategy */
1063 MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
1064 {
1065 optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
1066 optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 1);
1067 optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 1);
1068 optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 1);
902 }
1069 }
903
1070
904 size_t ZSTD_compressBlock_btultra(
1071 size_t ZSTD_compressBlock_btultra(
905 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1072 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
906 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
1073 const void* src, size_t srcSize)
907 {
1074 {
908 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, 0 /*extDict*/);
1075 DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
1076 #if 0
1077 /* 2-pass strategy (disabled)
1078 * this strategy makes a first pass over first block to collect statistics
1079 * and seed next round's statistics with it.
1080 * The compression ratio gain is generally small (~0.5% on first block),
1081 * the cost is 2x cpu time on first block. */
1082 assert(srcSize <= ZSTD_BLOCKSIZE_MAX);
1083 if ( (ms->opt.litLengthSum==0) /* first block */
1084 && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */
1085 && (ms->window.dictLimit == ms->window.lowLimit) ) { /* no dictionary */
1086 U32 tmpRep[ZSTD_REP_NUM];
1087 DEBUGLOG(5, "ZSTD_compressBlock_btultra: first block: collecting statistics");
1088 assert(ms->nextToUpdate >= ms->window.dictLimit
1089 && ms->nextToUpdate <= ms->window.dictLimit + 1);
1090 memcpy(tmpRep, rep, sizeof(tmpRep));
1091 ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/
1092 ZSTD_resetSeqStore(seqStore);
1093 /* invalidate first scan from history */
1094 ms->window.base -= srcSize;
1095 ms->window.dictLimit += (U32)srcSize;
1096 ms->window.lowLimit = ms->window.dictLimit;
1097 ms->nextToUpdate = ms->window.dictLimit;
1098 ms->nextToUpdate3 = ms->window.dictLimit;
1099 /* re-inforce weight of collected statistics */
1100 ZSTD_upscaleStats(&ms->opt);
1101 }
1102 #endif
1103 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
1104 }
1105
1106 size_t ZSTD_compressBlock_btopt_dictMatchState(
1107 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1108 const void* src, size_t srcSize)
1109 {
1110 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
1111 }
1112
1113 size_t ZSTD_compressBlock_btultra_dictMatchState(
1114 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1115 const void* src, size_t srcSize)
1116 {
1117 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
909 }
1118 }
910
1119
911 size_t ZSTD_compressBlock_btopt_extDict(
1120 size_t ZSTD_compressBlock_btopt_extDict(
912 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1121 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
913 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
1122 const void* src, size_t srcSize)
914 {
1123 {
915 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 0 /*optLevel*/, 1 /*extDict*/);
1124 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
916 }
1125 }
917
1126
918 size_t ZSTD_compressBlock_btultra_extDict(
1127 size_t ZSTD_compressBlock_btultra_extDict(
919 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
1128 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
920 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize)
1129 const void* src, size_t srcSize)
921 {
1130 {
922 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, cParams, src, srcSize, 2 /*optLevel*/, 1 /*extDict*/);
1131 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
923 }
1132 }
@@ -17,23 +17,29 b' extern "C" {'
17
17
18 #include "zstd_compress_internal.h"
18 #include "zstd_compress_internal.h"
19
19
20 void ZSTD_updateTree(
20 /* used in ZSTD_loadDictionaryContent() */
21 ZSTD_matchState_t* ms, ZSTD_compressionParameters const* cParams,
21 void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend);
22 const BYTE* ip, const BYTE* iend); /* used in ZSTD_loadDictionaryContent() */
23
22
24 size_t ZSTD_compressBlock_btopt(
23 size_t ZSTD_compressBlock_btopt(
25 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
24 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
26 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
25 void const* src, size_t srcSize);
27 size_t ZSTD_compressBlock_btultra(
26 size_t ZSTD_compressBlock_btultra(
28 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
27 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
29 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
28 void const* src, size_t srcSize);
29
30 size_t ZSTD_compressBlock_btopt_dictMatchState(
31 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
32 void const* src, size_t srcSize);
33 size_t ZSTD_compressBlock_btultra_dictMatchState(
34 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
35 void const* src, size_t srcSize);
30
36
31 size_t ZSTD_compressBlock_btopt_extDict(
37 size_t ZSTD_compressBlock_btopt_extDict(
32 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
38 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
33 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
39 void const* src, size_t srcSize);
34 size_t ZSTD_compressBlock_btultra_extDict(
40 size_t ZSTD_compressBlock_btultra_extDict(
35 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
41 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
36 ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize);
42 void const* src, size_t srcSize);
37
43
38 #if defined (__cplusplus)
44 #if defined (__cplusplus)
39 }
45 }
@@ -37,18 +37,19 b''
37 #define ZSTD_RESIZE_SEQPOOL 0
37 #define ZSTD_RESIZE_SEQPOOL 0
38
38
39 /* ====== Debug ====== */
39 /* ====== Debug ====== */
40 #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=2)
40 #if defined(DEBUGLEVEL) && (DEBUGLEVEL>=2) \
41 && !defined(_MSC_VER) \
42 && !defined(__MINGW32__)
41
43
42 # include <stdio.h>
44 # include <stdio.h>
43 # include <unistd.h>
45 # include <unistd.h>
44 # include <sys/times.h>
46 # include <sys/times.h>
45 # define DEBUGLOGRAW(l, ...) if (l<=ZSTD_DEBUG) { fprintf(stderr, __VA_ARGS__); }
46
47
47 # define DEBUG_PRINTHEX(l,p,n) { \
48 # define DEBUG_PRINTHEX(l,p,n) { \
48 unsigned debug_u; \
49 unsigned debug_u; \
49 for (debug_u=0; debug_u<(n); debug_u++) \
50 for (debug_u=0; debug_u<(n); debug_u++) \
50 DEBUGLOGRAW(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
51 RAWLOG(l, "%02X ", ((const unsigned char*)(p))[debug_u]); \
51 DEBUGLOGRAW(l, " \n"); \
52 RAWLOG(l, " \n"); \
52 }
53 }
53
54
54 static unsigned long long GetCurrentClockTimeMicroseconds(void)
55 static unsigned long long GetCurrentClockTimeMicroseconds(void)
@@ -62,7 +63,7 b' static unsigned long long GetCurrentCloc'
62
63
63 #define MUTEX_WAIT_TIME_DLEVEL 6
64 #define MUTEX_WAIT_TIME_DLEVEL 6
64 #define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
65 #define ZSTD_PTHREAD_MUTEX_LOCK(mutex) { \
65 if (ZSTD_DEBUG >= MUTEX_WAIT_TIME_DLEVEL) { \
66 if (DEBUGLEVEL >= MUTEX_WAIT_TIME_DLEVEL) { \
66 unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
67 unsigned long long const beforeTime = GetCurrentClockTimeMicroseconds(); \
67 ZSTD_pthread_mutex_lock(mutex); \
68 ZSTD_pthread_mutex_lock(mutex); \
68 { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
69 { unsigned long long const afterTime = GetCurrentClockTimeMicroseconds(); \
@@ -160,6 +161,25 b' static void ZSTDMT_setBufferSize(ZSTDMT_'
160 ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
161 ZSTD_pthread_mutex_unlock(&bufPool->poolMutex);
161 }
162 }
162
163
164
165 static ZSTDMT_bufferPool* ZSTDMT_expandBufferPool(ZSTDMT_bufferPool* srcBufPool, U32 nbWorkers)
166 {
167 unsigned const maxNbBuffers = 2*nbWorkers + 3;
168 if (srcBufPool==NULL) return NULL;
169 if (srcBufPool->totalBuffers >= maxNbBuffers) /* good enough */
170 return srcBufPool;
171 /* need a larger buffer pool */
172 { ZSTD_customMem const cMem = srcBufPool->cMem;
173 size_t const bSize = srcBufPool->bufferSize; /* forward parameters */
174 ZSTDMT_bufferPool* newBufPool;
175 ZSTDMT_freeBufferPool(srcBufPool);
176 newBufPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
177 if (newBufPool==NULL) return newBufPool;
178 ZSTDMT_setBufferSize(newBufPool, bSize);
179 return newBufPool;
180 }
181 }
182
163 /** ZSTDMT_getBuffer() :
183 /** ZSTDMT_getBuffer() :
164 * assumption : bufPool must be valid
184 * assumption : bufPool must be valid
165 * @return : a buffer, with start pointer and size
185 * @return : a buffer, with start pointer and size
@@ -229,8 +249,8 b' static buffer_t ZSTDMT_resizeBuffer(ZSTD'
229 /* store buffer for later re-use, up to pool capacity */
249 /* store buffer for later re-use, up to pool capacity */
230 static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
250 static void ZSTDMT_releaseBuffer(ZSTDMT_bufferPool* bufPool, buffer_t buf)
231 {
251 {
252 DEBUGLOG(5, "ZSTDMT_releaseBuffer");
232 if (buf.start == NULL) return; /* compatible with release on NULL */
253 if (buf.start == NULL) return; /* compatible with release on NULL */
233 DEBUGLOG(5, "ZSTDMT_releaseBuffer");
234 ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
254 ZSTD_pthread_mutex_lock(&bufPool->poolMutex);
235 if (bufPool->nbBuffers < bufPool->totalBuffers) {
255 if (bufPool->nbBuffers < bufPool->totalBuffers) {
236 bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */
256 bufPool->bTable[bufPool->nbBuffers++] = buf; /* stored for later use */
@@ -300,7 +320,8 b' static void ZSTDMT_setNbSeq(ZSTDMT_seqPo'
300
320
301 static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem)
321 static ZSTDMT_seqPool* ZSTDMT_createSeqPool(unsigned nbWorkers, ZSTD_customMem cMem)
302 {
322 {
303 ZSTDMT_seqPool* seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
323 ZSTDMT_seqPool* const seqPool = ZSTDMT_createBufferPool(nbWorkers, cMem);
324 if (seqPool == NULL) return NULL;
304 ZSTDMT_setNbSeq(seqPool, 0);
325 ZSTDMT_setNbSeq(seqPool, 0);
305 return seqPool;
326 return seqPool;
306 }
327 }
@@ -310,6 +331,10 b' static void ZSTDMT_freeSeqPool(ZSTDMT_se'
310 ZSTDMT_freeBufferPool(seqPool);
331 ZSTDMT_freeBufferPool(seqPool);
311 }
332 }
312
333
334 static ZSTDMT_seqPool* ZSTDMT_expandSeqPool(ZSTDMT_seqPool* pool, U32 nbWorkers)
335 {
336 return ZSTDMT_expandBufferPool(pool, nbWorkers);
337 }
313
338
314
339
315 /* ===== CCtx Pool ===== */
340 /* ===== CCtx Pool ===== */
@@ -355,6 +380,18 b' static ZSTDMT_CCtxPool* ZSTDMT_createCCt'
355 return cctxPool;
380 return cctxPool;
356 }
381 }
357
382
383 static ZSTDMT_CCtxPool* ZSTDMT_expandCCtxPool(ZSTDMT_CCtxPool* srcPool,
384 unsigned nbWorkers)
385 {
386 if (srcPool==NULL) return NULL;
387 if (nbWorkers <= srcPool->totalCCtx) return srcPool; /* good enough */
388 /* need a larger cctx pool */
389 { ZSTD_customMem const cMem = srcPool->cMem;
390 ZSTDMT_freeCCtxPool(srcPool);
391 return ZSTDMT_createCCtxPool(nbWorkers, cMem);
392 }
393 }
394
358 /* only works during initialization phase, not during compression */
395 /* only works during initialization phase, not during compression */
359 static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
396 static size_t ZSTDMT_sizeof_CCtxPool(ZSTDMT_CCtxPool* cctxPool)
360 {
397 {
@@ -425,12 +462,11 b' typedef struct {'
425 ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */
462 ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */
426 } serialState_t;
463 } serialState_t;
427
464
428 static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params)
465 static int ZSTDMT_serialState_reset(serialState_t* serialState, ZSTDMT_seqPool* seqPool, ZSTD_CCtx_params params, size_t jobSize)
429 {
466 {
430 /* Adjust parameters */
467 /* Adjust parameters */
431 if (params.ldmParams.enableLdm) {
468 if (params.ldmParams.enableLdm) {
432 DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
469 DEBUGLOG(4, "LDM window size = %u KB", (1U << params.cParams.windowLog) >> 10);
433 params.ldmParams.windowLog = params.cParams.windowLog;
434 ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
470 ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
435 assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
471 assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
436 assert(params.ldmParams.hashEveryLog < 32);
472 assert(params.ldmParams.hashEveryLog < 32);
@@ -453,7 +489,7 b' static int ZSTDMT_serialState_reset(seri'
453 serialState->params.ldmParams.hashLog -
489 serialState->params.ldmParams.hashLog -
454 serialState->params.ldmParams.bucketSizeLog;
490 serialState->params.ldmParams.bucketSizeLog;
455 /* Size the seq pool tables */
491 /* Size the seq pool tables */
456 ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, params.jobSize));
492 ZSTDMT_setNbSeq(seqPool, ZSTD_ldm_getMaxNbSeq(params.ldmParams, jobSize));
457 /* Reset the window */
493 /* Reset the window */
458 ZSTD_window_clear(&serialState->ldmState.window);
494 ZSTD_window_clear(&serialState->ldmState.window);
459 serialState->ldmWindow = serialState->ldmState.window;
495 serialState->ldmWindow = serialState->ldmState.window;
@@ -473,6 +509,7 b' static int ZSTDMT_serialState_reset(seri'
473 memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
509 memset(serialState->ldmState.bucketOffsets, 0, bucketSize);
474 }
510 }
475 serialState->params = params;
511 serialState->params = params;
512 serialState->params.jobSize = (U32)jobSize;
476 return 0;
513 return 0;
477 }
514 }
478
515
@@ -505,6 +542,7 b' static void ZSTDMT_serialState_update(se'
505 /* Wait for our turn */
542 /* Wait for our turn */
506 ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex);
543 ZSTD_PTHREAD_MUTEX_LOCK(&serialState->mutex);
507 while (serialState->nextJobID < jobID) {
544 while (serialState->nextJobID < jobID) {
545 DEBUGLOG(5, "wait for serialState->cond");
508 ZSTD_pthread_cond_wait(&serialState->cond, &serialState->mutex);
546 ZSTD_pthread_cond_wait(&serialState->cond, &serialState->mutex);
509 }
547 }
510 /* A future job may error and skip our job */
548 /* A future job may error and skip our job */
@@ -514,6 +552,7 b' static void ZSTDMT_serialState_update(se'
514 size_t error;
552 size_t error;
515 assert(seqStore.seq != NULL && seqStore.pos == 0 &&
553 assert(seqStore.seq != NULL && seqStore.pos == 0 &&
516 seqStore.size == 0 && seqStore.capacity > 0);
554 seqStore.size == 0 && seqStore.capacity > 0);
555 assert(src.size <= serialState->params.jobSize);
517 ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
556 ZSTD_window_update(&serialState->ldmState.window, src.start, src.size);
518 error = ZSTD_ldm_generateSequences(
557 error = ZSTD_ldm_generateSequences(
519 &serialState->ldmState, &seqStore,
558 &serialState->ldmState, &seqStore,
@@ -593,14 +632,32 b' typedef struct {'
593 unsigned frameChecksumNeeded; /* used only by mtctx */
632 unsigned frameChecksumNeeded; /* used only by mtctx */
594 } ZSTDMT_jobDescription;
633 } ZSTDMT_jobDescription;
595
634
635 #define JOB_ERROR(e) { \
636 ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex); \
637 job->cSize = e; \
638 ZSTD_pthread_mutex_unlock(&job->job_mutex); \
639 goto _endJob; \
640 }
641
596 /* ZSTDMT_compressionJob() is a POOL_function type */
642 /* ZSTDMT_compressionJob() is a POOL_function type */
597 void ZSTDMT_compressionJob(void* jobDescription)
643 static void ZSTDMT_compressionJob(void* jobDescription)
598 {
644 {
599 ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
645 ZSTDMT_jobDescription* const job = (ZSTDMT_jobDescription*)jobDescription;
600 ZSTD_CCtx_params jobParams = job->params; /* do not modify job->params ! copy it, modify the copy */
646 ZSTD_CCtx_params jobParams = job->params; /* do not modify job->params ! copy it, modify the copy */
601 ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(job->cctxPool);
647 ZSTD_CCtx* const cctx = ZSTDMT_getCCtx(job->cctxPool);
602 rawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool);
648 rawSeqStore_t rawSeqStore = ZSTDMT_getSeq(job->seqPool);
603 buffer_t dstBuff = job->dstBuff;
649 buffer_t dstBuff = job->dstBuff;
650 size_t lastCBlockSize = 0;
651
652 /* ressources */
653 if (cctx==NULL) JOB_ERROR(ERROR(memory_allocation));
654 if (dstBuff.start == NULL) { /* streaming job : doesn't provide a dstBuffer */
655 dstBuff = ZSTDMT_getBuffer(job->bufPool);
656 if (dstBuff.start==NULL) JOB_ERROR(ERROR(memory_allocation));
657 job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */
658 }
659 if (jobParams.ldmParams.enableLdm && rawSeqStore.seq == NULL)
660 JOB_ERROR(ERROR(memory_allocation));
604
661
605 /* Don't compute the checksum for chunks, since we compute it externally,
662 /* Don't compute the checksum for chunks, since we compute it externally,
606 * but write it in the header.
663 * but write it in the header.
@@ -609,47 +666,31 b' void ZSTDMT_compressionJob(void* jobDesc'
609 /* Don't run LDM for the chunks, since we handle it externally */
666 /* Don't run LDM for the chunks, since we handle it externally */
610 jobParams.ldmParams.enableLdm = 0;
667 jobParams.ldmParams.enableLdm = 0;
611
668
612 /* ressources */
613 if (cctx==NULL) {
614 job->cSize = ERROR(memory_allocation);
615 goto _endJob;
616 }
617 if (dstBuff.start == NULL) { /* streaming job : doesn't provide a dstBuffer */
618 dstBuff = ZSTDMT_getBuffer(job->bufPool);
619 if (dstBuff.start==NULL) {
620 job->cSize = ERROR(memory_allocation);
621 goto _endJob;
622 }
623 job->dstBuff = dstBuff; /* this value can be read in ZSTDMT_flush, when it copies the whole job */
624 }
625
669
626 /* init */
670 /* init */
627 if (job->cdict) {
671 if (job->cdict) {
628 size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, job->cdict, jobParams, job->fullFrameSize);
672 size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, NULL, 0, ZSTD_dct_auto, ZSTD_dtlm_fast, job->cdict, jobParams, job->fullFrameSize);
629 assert(job->firstJob); /* only allowed for first job */
673 assert(job->firstJob); /* only allowed for first job */
630 if (ZSTD_isError(initError)) { job->cSize = initError; goto _endJob; }
674 if (ZSTD_isError(initError)) JOB_ERROR(initError);
631 } else { /* srcStart points at reloaded section */
675 } else { /* srcStart points at reloaded section */
632 U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size;
676 U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size;
633 { size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_p_forceMaxWindow, !job->firstJob);
677 { size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_p_forceMaxWindow, !job->firstJob);
634 if (ZSTD_isError(forceWindowError)) {
678 if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError);
635 job->cSize = forceWindowError;
679 }
636 goto _endJob;
637 } }
638 { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
680 { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx,
639 job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
681 job->prefix.start, job->prefix.size, ZSTD_dct_rawContent, /* load dictionary in "content-only" mode (no header analysis) */
682 ZSTD_dtlm_fast,
640 NULL, /*cdict*/
683 NULL, /*cdict*/
641 jobParams, pledgedSrcSize);
684 jobParams, pledgedSrcSize);
642 if (ZSTD_isError(initError)) {
685 if (ZSTD_isError(initError)) JOB_ERROR(initError);
643 job->cSize = initError;
686 } }
644 goto _endJob;
645 } } }
646
687
647 /* Perform serial step as early as possible, but after CCtx initialization */
688 /* Perform serial step as early as possible, but after CCtx initialization */
648 ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
689 ZSTDMT_serialState_update(job->serial, cctx, rawSeqStore, job->src, job->jobID);
649
690
650 if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */
691 if (!job->firstJob) { /* flush and overwrite frame header when it's not first job */
651 size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
692 size_t const hSize = ZSTD_compressContinue(cctx, dstBuff.start, dstBuff.capacity, job->src.start, 0);
652 if (ZSTD_isError(hSize)) { job->cSize = hSize; /* save error code */ goto _endJob; }
693 if (ZSTD_isError(hSize)) JOB_ERROR(hSize);
653 DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
694 DEBUGLOG(5, "ZSTDMT_compressionJob: flush and overwrite %u bytes of frame header (not first job)", (U32)hSize);
654 ZSTD_invalidateRepCodes(cctx);
695 ZSTD_invalidateRepCodes(cctx);
655 }
696 }
@@ -667,7 +708,7 b' void ZSTDMT_compressionJob(void* jobDesc'
667 assert(job->cSize == 0);
708 assert(job->cSize == 0);
668 for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
709 for (chunkNb = 1; chunkNb < nbChunks; chunkNb++) {
669 size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize);
710 size_t const cSize = ZSTD_compressContinue(cctx, op, oend-op, ip, chunkSize);
670 if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; }
711 if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
671 ip += chunkSize;
712 ip += chunkSize;
672 op += cSize; assert(op < oend);
713 op += cSize; assert(op < oend);
673 /* stats */
714 /* stats */
@@ -680,18 +721,16 b' void ZSTDMT_compressionJob(void* jobDesc'
680 ZSTD_pthread_mutex_unlock(&job->job_mutex);
721 ZSTD_pthread_mutex_unlock(&job->job_mutex);
681 }
722 }
682 /* last block */
723 /* last block */
683 assert(chunkSize > 0); assert((chunkSize & (chunkSize - 1)) == 0); /* chunkSize must be power of 2 for mask==(chunkSize-1) to work */
724 assert(chunkSize > 0);
725 assert((chunkSize & (chunkSize - 1)) == 0); /* chunkSize must be power of 2 for mask==(chunkSize-1) to work */
684 if ((nbChunks > 0) | job->lastJob /*must output a "last block" flag*/ ) {
726 if ((nbChunks > 0) | job->lastJob /*must output a "last block" flag*/ ) {
685 size_t const lastBlockSize1 = job->src.size & (chunkSize-1);
727 size_t const lastBlockSize1 = job->src.size & (chunkSize-1);
686 size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
728 size_t const lastBlockSize = ((lastBlockSize1==0) & (job->src.size>=chunkSize)) ? chunkSize : lastBlockSize1;
687 size_t const cSize = (job->lastJob) ?
729 size_t const cSize = (job->lastJob) ?
688 ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) :
730 ZSTD_compressEnd (cctx, op, oend-op, ip, lastBlockSize) :
689 ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
731 ZSTD_compressContinue(cctx, op, oend-op, ip, lastBlockSize);
690 if (ZSTD_isError(cSize)) { job->cSize = cSize; goto _endJob; }
732 if (ZSTD_isError(cSize)) JOB_ERROR(cSize);
691 /* stats */
733 lastCBlockSize = cSize;
692 ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);
693 job->cSize += cSize;
694 ZSTD_pthread_mutex_unlock(&job->job_mutex);
695 } }
734 } }
696
735
697 _endJob:
736 _endJob:
@@ -704,7 +743,9 b' void ZSTDMT_compressionJob(void* jobDesc'
704 ZSTDMT_releaseCCtx(job->cctxPool, cctx);
743 ZSTDMT_releaseCCtx(job->cctxPool, cctx);
705 /* report */
744 /* report */
706 ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);
745 ZSTD_PTHREAD_MUTEX_LOCK(&job->job_mutex);
707 job->consumed = job->src.size;
746 if (ZSTD_isError(job->cSize)) assert(lastCBlockSize == 0);
747 job->cSize += lastCBlockSize;
748 job->consumed = job->src.size; /* when job->consumed == job->src.size , compression job is presumed completed */
708 ZSTD_pthread_cond_signal(&job->job_cond);
749 ZSTD_pthread_cond_signal(&job->job_cond);
709 ZSTD_pthread_mutex_unlock(&job->job_mutex);
750 ZSTD_pthread_mutex_unlock(&job->job_mutex);
710 }
751 }
@@ -745,9 +786,9 b' struct ZSTDMT_CCtx_s {'
745 ZSTD_CCtx_params params;
786 ZSTD_CCtx_params params;
746 size_t targetSectionSize;
787 size_t targetSectionSize;
747 size_t targetPrefixSize;
788 size_t targetPrefixSize;
748 roundBuff_t roundBuff;
789 int jobReady; /* 1 => one job is already prepared, but pool has shortage of workers. Don't create a new job. */
749 inBuff_t inBuff;
790 inBuff_t inBuff;
750 int jobReady; /* 1 => one job is already prepared, but pool has shortage of workers. Don't create another one. */
791 roundBuff_t roundBuff;
751 serialState_t serial;
792 serialState_t serial;
752 unsigned singleBlockingThread;
793 unsigned singleBlockingThread;
753 unsigned jobIDMask;
794 unsigned jobIDMask;
@@ -798,6 +839,20 b' static ZSTDMT_jobDescription* ZSTDMT_cre'
798 return jobTable;
839 return jobTable;
799 }
840 }
800
841
842 static size_t ZSTDMT_expandJobsTable (ZSTDMT_CCtx* mtctx, U32 nbWorkers) {
843 U32 nbJobs = nbWorkers + 2;
844 if (nbJobs > mtctx->jobIDMask+1) { /* need more job capacity */
845 ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
846 mtctx->jobIDMask = 0;
847 mtctx->jobs = ZSTDMT_createJobsTable(&nbJobs, mtctx->cMem);
848 if (mtctx->jobs==NULL) return ERROR(memory_allocation);
849 assert((nbJobs != 0) && ((nbJobs & (nbJobs - 1)) == 0)); /* ensure nbJobs is a power of 2 */
850 mtctx->jobIDMask = nbJobs - 1;
851 }
852 return 0;
853 }
854
855
801 /* ZSTDMT_CCtxParam_setNbWorkers():
856 /* ZSTDMT_CCtxParam_setNbWorkers():
802 * Internal use only */
857 * Internal use only */
803 size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
858 size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers)
@@ -875,7 +930,7 b' static void ZSTDMT_waitForAllJobsComplet'
875 unsigned const jobID = mtctx->doneJobID & mtctx->jobIDMask;
930 unsigned const jobID = mtctx->doneJobID & mtctx->jobIDMask;
876 ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex);
931 ZSTD_PTHREAD_MUTEX_LOCK(&mtctx->jobs[jobID].job_mutex);
877 while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) {
932 while (mtctx->jobs[jobID].consumed < mtctx->jobs[jobID].src.size) {
878 DEBUGLOG(5, "waiting for jobCompleted signal from job %u", mtctx->doneJobID); /* we want to block when waiting for data to flush */
933 DEBUGLOG(4, "waiting for jobCompleted signal from job %u", mtctx->doneJobID); /* we want to block when waiting for data to flush */
879 ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex);
934 ZSTD_pthread_cond_wait(&mtctx->jobs[jobID].job_cond, &mtctx->jobs[jobID].job_mutex);
880 }
935 }
881 ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex);
936 ZSTD_pthread_mutex_unlock(&mtctx->jobs[jobID].job_mutex);
@@ -924,6 +979,8 b' size_t ZSTDMT_CCtxParam_setMTCtxParamete'
924 if ( (value > 0) /* value==0 => automatic job size */
979 if ( (value > 0) /* value==0 => automatic job size */
925 & (value < ZSTDMT_JOBSIZE_MIN) )
980 & (value < ZSTDMT_JOBSIZE_MIN) )
926 value = ZSTDMT_JOBSIZE_MIN;
981 value = ZSTDMT_JOBSIZE_MIN;
982 if (value > ZSTDMT_JOBSIZE_MAX)
983 value = ZSTDMT_JOBSIZE_MAX;
927 params->jobSize = value;
984 params->jobSize = value;
928 return value;
985 return value;
929 case ZSTDMT_p_overlapSectionLog :
986 case ZSTDMT_p_overlapSectionLog :
@@ -950,6 +1007,21 b' size_t ZSTDMT_setMTCtxParameter(ZSTDMT_C'
950 }
1007 }
951 }
1008 }
952
1009
1010 size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned* value)
1011 {
1012 switch (parameter) {
1013 case ZSTDMT_p_jobSize:
1014 *value = mtctx->params.jobSize;
1015 break;
1016 case ZSTDMT_p_overlapSectionLog:
1017 *value = mtctx->params.overlapSizeLog;
1018 break;
1019 default:
1020 return ERROR(parameter_unsupported);
1021 }
1022 return 0;
1023 }
1024
953 /* Sets parameters relevant to the compression job,
1025 /* Sets parameters relevant to the compression job,
954 * initializing others to default values. */
1026 * initializing others to default values. */
955 static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
1027 static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params)
@@ -960,13 +1032,30 b' static ZSTD_CCtx_params ZSTDMT_initJobCC'
960 jobParams.cParams = params.cParams;
1032 jobParams.cParams = params.cParams;
961 jobParams.fParams = params.fParams;
1033 jobParams.fParams = params.fParams;
962 jobParams.compressionLevel = params.compressionLevel;
1034 jobParams.compressionLevel = params.compressionLevel;
963 jobParams.disableLiteralCompression = params.disableLiteralCompression;
964
1035
965 return jobParams;
1036 return jobParams;
966 }
1037 }
967
1038
1039
1040 /* ZSTDMT_resize() :
1041 * @return : error code if fails, 0 on success */
1042 static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers)
1043 {
1044 if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation);
1045 CHECK_F( ZSTDMT_expandJobsTable(mtctx, nbWorkers) );
1046 mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers);
1047 if (mtctx->bufPool == NULL) return ERROR(memory_allocation);
1048 mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers);
1049 if (mtctx->cctxPool == NULL) return ERROR(memory_allocation);
1050 mtctx->seqPool = ZSTDMT_expandSeqPool(mtctx->seqPool, nbWorkers);
1051 if (mtctx->seqPool == NULL) return ERROR(memory_allocation);
1052 ZSTDMT_CCtxParam_setNbWorkers(&mtctx->params, nbWorkers);
1053 return 0;
1054 }
1055
1056
968 /*! ZSTDMT_updateCParams_whileCompressing() :
1057 /*! ZSTDMT_updateCParams_whileCompressing() :
969 * Updates only a selected set of compression parameters, to remain compatible with current frame.
1058 * Updates a selected set of compression parameters, remaining compatible with currently active frame.
970 * New parameters will be applied to next compression job. */
1059 * New parameters will be applied to next compression job. */
971 void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams)
1060 void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams)
972 {
1061 {
@@ -981,38 +1070,36 b' void ZSTDMT_updateCParams_whileCompressi'
981 }
1070 }
982 }
1071 }
983
1072
984 /* ZSTDMT_getNbWorkers():
985 * @return nb threads currently active in mtctx.
986 * mtctx must be valid */
987 unsigned ZSTDMT_getNbWorkers(const ZSTDMT_CCtx* mtctx)
988 {
989 assert(mtctx != NULL);
990 return mtctx->params.nbWorkers;
991 }
992
993 /* ZSTDMT_getFrameProgression():
1073 /* ZSTDMT_getFrameProgression():
994 * tells how much data has been consumed (input) and produced (output) for current frame.
1074 * tells how much data has been consumed (input) and produced (output) for current frame.
995 * able to count progression inside worker threads.
1075 * able to count progression inside worker threads.
996 * Note : mutex will be acquired during statistics collection. */
1076 * Note : mutex will be acquired during statistics collection inside workers. */
997 ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
1077 ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx)
998 {
1078 {
999 ZSTD_frameProgression fps;
1079 ZSTD_frameProgression fps;
1000 DEBUGLOG(6, "ZSTDMT_getFrameProgression");
1080 DEBUGLOG(5, "ZSTDMT_getFrameProgression");
1081 fps.ingested = mtctx->consumed + mtctx->inBuff.filled;
1001 fps.consumed = mtctx->consumed;
1082 fps.consumed = mtctx->consumed;
1002 fps.produced = mtctx->produced;
1083 fps.produced = fps.flushed = mtctx->produced;
1003 fps.ingested = mtctx->consumed + mtctx->inBuff.filled;
1084 fps.currentJobID = mtctx->nextJobID;
1085 fps.nbActiveWorkers = 0;
1004 { unsigned jobNb;
1086 { unsigned jobNb;
1005 unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1);
1087 unsigned lastJobNb = mtctx->nextJobID + mtctx->jobReady; assert(mtctx->jobReady <= 1);
1006 DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)",
1088 DEBUGLOG(6, "ZSTDMT_getFrameProgression: jobs: from %u to <%u (jobReady:%u)",
1007 mtctx->doneJobID, lastJobNb, mtctx->jobReady)
1089 mtctx->doneJobID, lastJobNb, mtctx->jobReady)
1008 for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) {
1090 for (jobNb = mtctx->doneJobID ; jobNb < lastJobNb ; jobNb++) {
1009 unsigned const wJobID = jobNb & mtctx->jobIDMask;
1091 unsigned const wJobID = jobNb & mtctx->jobIDMask;
1010 ZSTD_pthread_mutex_lock(&mtctx->jobs[wJobID].job_mutex);
1092 ZSTDMT_jobDescription* jobPtr = &mtctx->jobs[wJobID];
1011 { size_t const cResult = mtctx->jobs[wJobID].cSize;
1093 ZSTD_pthread_mutex_lock(&jobPtr->job_mutex);
1094 { size_t const cResult = jobPtr->cSize;
1012 size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
1095 size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
1013 fps.consumed += mtctx->jobs[wJobID].consumed;
1096 size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
1014 fps.ingested += mtctx->jobs[wJobID].src.size;
1097 assert(flushed <= produced);
1098 fps.ingested += jobPtr->src.size;
1099 fps.consumed += jobPtr->consumed;
1015 fps.produced += produced;
1100 fps.produced += produced;
1101 fps.flushed += flushed;
1102 fps.nbActiveWorkers += (jobPtr->consumed < jobPtr->src.size);
1016 }
1103 }
1017 ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
1104 ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
1018 }
1105 }
@@ -1021,6 +1108,34 b' ZSTD_frameProgression ZSTDMT_getFramePro'
1021 }
1108 }
1022
1109
1023
1110
1111 size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx)
1112 {
1113 size_t toFlush;
1114 unsigned const jobID = mtctx->doneJobID;
1115 assert(jobID <= mtctx->nextJobID);
1116 if (jobID == mtctx->nextJobID) return 0; /* no active job => nothing to flush */
1117
1118 /* look into oldest non-fully-flushed job */
1119 { unsigned const wJobID = jobID & mtctx->jobIDMask;
1120 ZSTDMT_jobDescription* const jobPtr = &mtctx->jobs[wJobID];
1121 ZSTD_pthread_mutex_lock(&jobPtr->job_mutex);
1122 { size_t const cResult = jobPtr->cSize;
1123 size_t const produced = ZSTD_isError(cResult) ? 0 : cResult;
1124 size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed;
1125 assert(flushed <= produced);
1126 toFlush = produced - flushed;
1127 if (toFlush==0 && (jobPtr->consumed >= jobPtr->src.size)) {
1128 /* doneJobID is not-fully-flushed, but toFlush==0 : doneJobID should be compressing some more data */
1129 assert(jobPtr->consumed < jobPtr->src.size);
1130 }
1131 }
1132 ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
1133 }
1134
1135 return toFlush;
1136 }
1137
1138
1024 /* ------------------------------------------ */
1139 /* ------------------------------------------ */
1025 /* ===== Multi-threaded compression ===== */
1140 /* ===== Multi-threaded compression ===== */
1026 /* ------------------------------------------ */
1141 /* ------------------------------------------ */
@@ -1087,18 +1202,10 b' static size_t ZSTDMT_compress_advanced_i'
1087
1202
1088 assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
1203 assert(avgJobSize >= 256 KB); /* condition for ZSTD_compressBound(A) + ZSTD_compressBound(B) <= ZSTD_compressBound(A+B), required to compress directly into Dst (no additional buffer) */
1089 ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
1204 ZSTDMT_setBufferSize(mtctx->bufPool, ZSTD_compressBound(avgJobSize) );
1090 if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params))
1205 if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize))
1091 return ERROR(memory_allocation);
1206 return ERROR(memory_allocation);
1092
1207
1093 if (nbJobs > mtctx->jobIDMask+1) { /* enlarge job table */
1208 CHECK_F( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */
1094 U32 jobsTableSize = nbJobs;
1095 ZSTDMT_freeJobsTable(mtctx->jobs, mtctx->jobIDMask+1, mtctx->cMem);
1096 mtctx->jobIDMask = 0;
1097 mtctx->jobs = ZSTDMT_createJobsTable(&jobsTableSize, mtctx->cMem);
1098 if (mtctx->jobs==NULL) return ERROR(memory_allocation);
1099 assert((jobsTableSize != 0) && ((jobsTableSize & (jobsTableSize - 1)) == 0)); /* ensure jobsTableSize is a power of 2 */
1100 mtctx->jobIDMask = jobsTableSize - 1;
1101 }
1102
1209
1103 { unsigned u;
1210 { unsigned u;
1104 for (u=0; u<nbJobs; u++) {
1211 for (u=0; u<nbJobs; u++) {
@@ -1221,17 +1328,18 b' size_t ZSTDMT_initCStream_internal('
1221 const ZSTD_CDict* cdict, ZSTD_CCtx_params params,
1328 const ZSTD_CDict* cdict, ZSTD_CCtx_params params,
1222 unsigned long long pledgedSrcSize)
1329 unsigned long long pledgedSrcSize)
1223 {
1330 {
1224 DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u, disableLiteralCompression=%i)",
1331 DEBUGLOG(4, "ZSTDMT_initCStream_internal (pledgedSrcSize=%u, nbWorkers=%u, cctxPool=%u)",
1225 (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx, params.disableLiteralCompression);
1332 (U32)pledgedSrcSize, params.nbWorkers, mtctx->cctxPool->totalCCtx);
1226 /* params are supposed to be fully validated at this point */
1333
1334 /* params supposed partially fully validated at this point */
1227 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
1335 assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
1228 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
1336 assert(!((dict) && (cdict))); /* either dict or cdict, not both */
1229 assert(mtctx->cctxPool->totalCCtx == params.nbWorkers);
1230
1337
1231 /* init */
1338 /* init */
1232 if (params.jobSize == 0) {
1339 if (params.nbWorkers != mtctx->params.nbWorkers)
1233 params.jobSize = 1U << ZSTDMT_computeTargetJobLog(params);
1340 CHECK_F( ZSTDMT_resize(mtctx, params.nbWorkers) );
1234 }
1341
1342 if (params.jobSize > 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN;
1235 if (params.jobSize > ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
1343 if (params.jobSize > ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX;
1236
1344
1237 mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
1345 mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */
@@ -1270,7 +1378,9 b' size_t ZSTDMT_initCStream_internal('
1270 mtctx->targetPrefixSize = (size_t)1 << ZSTDMT_computeOverlapLog(params);
1378 mtctx->targetPrefixSize = (size_t)1 << ZSTDMT_computeOverlapLog(params);
1271 DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(mtctx->targetPrefixSize>>10));
1379 DEBUGLOG(4, "overlapLog=%u => %u KB", params.overlapSizeLog, (U32)(mtctx->targetPrefixSize>>10));
1272 mtctx->targetSectionSize = params.jobSize;
1380 mtctx->targetSectionSize = params.jobSize;
1273 if (mtctx->targetSectionSize < ZSTDMT_JOBSIZE_MIN) mtctx->targetSectionSize = ZSTDMT_JOBSIZE_MIN;
1381 if (mtctx->targetSectionSize == 0) {
1382 mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params);
1383 }
1274 if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */
1384 if (mtctx->targetSectionSize < mtctx->targetPrefixSize) mtctx->targetSectionSize = mtctx->targetPrefixSize; /* job size must be >= overlap size */
1275 DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), params.jobSize);
1385 DEBUGLOG(4, "Job Size : %u KB (note : set to %u)", (U32)(mtctx->targetSectionSize>>10), params.jobSize);
1276 DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10));
1386 DEBUGLOG(4, "inBuff Size : %u KB", (U32)(mtctx->targetSectionSize>>10));
@@ -1312,7 +1422,7 b' size_t ZSTDMT_initCStream_internal('
1312 mtctx->allJobsCompleted = 0;
1422 mtctx->allJobsCompleted = 0;
1313 mtctx->consumed = 0;
1423 mtctx->consumed = 0;
1314 mtctx->produced = 0;
1424 mtctx->produced = 0;
1315 if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params))
1425 if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, mtctx->targetSectionSize))
1316 return ERROR(memory_allocation);
1426 return ERROR(memory_allocation);
1317 return 0;
1427 return 0;
1318 }
1428 }
@@ -1420,7 +1530,7 b' static size_t ZSTDMT_createCompressionJo'
1420 mtctx->jobs[jobID].jobID = mtctx->nextJobID;
1530 mtctx->jobs[jobID].jobID = mtctx->nextJobID;
1421 mtctx->jobs[jobID].firstJob = (mtctx->nextJobID==0);
1531 mtctx->jobs[jobID].firstJob = (mtctx->nextJobID==0);
1422 mtctx->jobs[jobID].lastJob = endFrame;
1532 mtctx->jobs[jobID].lastJob = endFrame;
1423 mtctx->jobs[jobID].frameChecksumNeeded = endFrame && (mtctx->nextJobID>0) && mtctx->params.fParams.checksumFlag;
1533 mtctx->jobs[jobID].frameChecksumNeeded = mtctx->params.fParams.checksumFlag && endFrame && (mtctx->nextJobID>0);
1424 mtctx->jobs[jobID].dstFlushed = 0;
1534 mtctx->jobs[jobID].dstFlushed = 0;
1425
1535
1426 /* Update the round buffer pos and clear the input buffer to be reset */
1536 /* Update the round buffer pos and clear the input buffer to be reset */
@@ -1468,6 +1578,8 b' static size_t ZSTDMT_createCompressionJo'
1468
1578
1469
1579
1470 /*! ZSTDMT_flushProduced() :
1580 /*! ZSTDMT_flushProduced() :
1581 * flush whatever data has been produced but not yet flushed in current job.
1582 * move to next job if current one is fully flushed.
1471 * `output` : `pos` will be updated with amount of data flushed .
1583 * `output` : `pos` will be updated with amount of data flushed .
1472 * `blockToFlush` : if >0, the function will block and wait if there is no data available to flush .
1584 * `blockToFlush` : if >0, the function will block and wait if there is no data available to flush .
1473 * @return : amount of data remaining within internal buffer, 0 if no more, 1 if unknown but > 0, or an error code */
1585 * @return : amount of data remaining within internal buffer, 0 if no more, 1 if unknown but > 0, or an error code */
@@ -1496,7 +1608,7 b' static size_t ZSTDMT_flushProduced(ZSTDM'
1496 /* try to flush something */
1608 /* try to flush something */
1497 { size_t cSize = mtctx->jobs[wJobID].cSize; /* shared */
1609 { size_t cSize = mtctx->jobs[wJobID].cSize; /* shared */
1498 size_t const srcConsumed = mtctx->jobs[wJobID].consumed; /* shared */
1610 size_t const srcConsumed = mtctx->jobs[wJobID].consumed; /* shared */
1499 size_t const srcSize = mtctx->jobs[wJobID].src.size; /* read-only, could be done after mutex lock, but no-declaration-after-statement */
1611 size_t const srcSize = mtctx->jobs[wJobID].src.size; /* read-only, could be done after mutex lock, but no-declaration-after-statement */
1500 ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
1612 ZSTD_pthread_mutex_unlock(&mtctx->jobs[wJobID].job_mutex);
1501 if (ZSTD_isError(cSize)) {
1613 if (ZSTD_isError(cSize)) {
1502 DEBUGLOG(5, "ZSTDMT_flushProduced: job %u : compression error detected : %s",
1614 DEBUGLOG(5, "ZSTDMT_flushProduced: job %u : compression error detected : %s",
@@ -1516,6 +1628,7 b' static size_t ZSTDMT_flushProduced(ZSTDM'
1516 mtctx->jobs[wJobID].cSize += 4; /* can write this shared value, as worker is no longer active */
1628 mtctx->jobs[wJobID].cSize += 4; /* can write this shared value, as worker is no longer active */
1517 mtctx->jobs[wJobID].frameChecksumNeeded = 0;
1629 mtctx->jobs[wJobID].frameChecksumNeeded = 0;
1518 }
1630 }
1631
1519 if (cSize > 0) { /* compression is ongoing or completed */
1632 if (cSize > 0) { /* compression is ongoing or completed */
1520 size_t const toFlush = MIN(cSize - mtctx->jobs[wJobID].dstFlushed, output->size - output->pos);
1633 size_t const toFlush = MIN(cSize - mtctx->jobs[wJobID].dstFlushed, output->size - output->pos);
1521 DEBUGLOG(5, "ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%u/%u, generated:%u)",
1634 DEBUGLOG(5, "ZSTDMT_flushProduced: Flushing %u bytes from job %u (completion:%u/%u, generated:%u)",
@@ -1529,11 +1642,12 b' static size_t ZSTDMT_flushProduced(ZSTDM'
1529 output->pos += toFlush;
1642 output->pos += toFlush;
1530 mtctx->jobs[wJobID].dstFlushed += toFlush; /* can write : this value is only used by mtctx */
1643 mtctx->jobs[wJobID].dstFlushed += toFlush; /* can write : this value is only used by mtctx */
1531
1644
1532 if ( (srcConsumed == srcSize) /* job completed */
1645 if ( (srcConsumed == srcSize) /* job is completed */
1533 && (mtctx->jobs[wJobID].dstFlushed == cSize) ) { /* output buffer fully flushed => free this job position */
1646 && (mtctx->jobs[wJobID].dstFlushed == cSize) ) { /* output buffer fully flushed => free this job position */
1534 DEBUGLOG(5, "Job %u completed (%u bytes), moving to next one",
1647 DEBUGLOG(5, "Job %u completed (%u bytes), moving to next one",
1535 mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed);
1648 mtctx->doneJobID, (U32)mtctx->jobs[wJobID].dstFlushed);
1536 ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[wJobID].dstBuff);
1649 ZSTDMT_releaseBuffer(mtctx->bufPool, mtctx->jobs[wJobID].dstBuff);
1650 DEBUGLOG(5, "dstBuffer released");
1537 mtctx->jobs[wJobID].dstBuff = g_nullBuffer;
1651 mtctx->jobs[wJobID].dstBuff = g_nullBuffer;
1538 mtctx->jobs[wJobID].cSize = 0; /* ensure this job slot is considered "not started" in future check */
1652 mtctx->jobs[wJobID].cSize = 0; /* ensure this job slot is considered "not started" in future check */
1539 mtctx->consumed += srcSize;
1653 mtctx->consumed += srcSize;
@@ -1610,6 +1724,7 b' static int ZSTDMT_doesOverlapWindow(buff'
1610 range_t extDict;
1724 range_t extDict;
1611 range_t prefix;
1725 range_t prefix;
1612
1726
1727 DEBUGLOG(5, "ZSTDMT_doesOverlapWindow");
1613 extDict.start = window.dictBase + window.lowLimit;
1728 extDict.start = window.dictBase + window.lowLimit;
1614 extDict.size = window.dictLimit - window.lowLimit;
1729 extDict.size = window.dictLimit - window.lowLimit;
1615
1730
@@ -1630,12 +1745,13 b' static void ZSTDMT_waitForLdmComplete(ZS'
1630 {
1745 {
1631 if (mtctx->params.ldmParams.enableLdm) {
1746 if (mtctx->params.ldmParams.enableLdm) {
1632 ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;
1747 ZSTD_pthread_mutex_t* mutex = &mtctx->serial.ldmWindowMutex;
1748 DEBUGLOG(5, "ZSTDMT_waitForLdmComplete");
1633 DEBUGLOG(5, "source [0x%zx, 0x%zx)",
1749 DEBUGLOG(5, "source [0x%zx, 0x%zx)",
1634 (size_t)buffer.start,
1750 (size_t)buffer.start,
1635 (size_t)buffer.start + buffer.capacity);
1751 (size_t)buffer.start + buffer.capacity);
1636 ZSTD_PTHREAD_MUTEX_LOCK(mutex);
1752 ZSTD_PTHREAD_MUTEX_LOCK(mutex);
1637 while (ZSTDMT_doesOverlapWindow(buffer, mtctx->serial.ldmWindow)) {
1753 while (ZSTDMT_doesOverlapWindow(buffer, mtctx->serial.ldmWindow)) {
1638 DEBUGLOG(6, "Waiting for LDM to finish...");
1754 DEBUGLOG(5, "Waiting for LDM to finish...");
1639 ZSTD_pthread_cond_wait(&mtctx->serial.ldmWindowCond, mutex);
1755 ZSTD_pthread_cond_wait(&mtctx->serial.ldmWindowCond, mutex);
1640 }
1756 }
1641 DEBUGLOG(6, "Done waiting for LDM to finish");
1757 DEBUGLOG(6, "Done waiting for LDM to finish");
@@ -1655,6 +1771,7 b' static int ZSTDMT_tryGetInputRange(ZSTDM'
1655 size_t const target = mtctx->targetSectionSize;
1771 size_t const target = mtctx->targetSectionSize;
1656 buffer_t buffer;
1772 buffer_t buffer;
1657
1773
1774 DEBUGLOG(5, "ZSTDMT_tryGetInputRange");
1658 assert(mtctx->inBuff.buffer.start == NULL);
1775 assert(mtctx->inBuff.buffer.start == NULL);
1659 assert(mtctx->roundBuff.capacity >= target);
1776 assert(mtctx->roundBuff.capacity >= target);
1660
1777
@@ -1668,7 +1785,7 b' static int ZSTDMT_tryGetInputRange(ZSTDM'
1668 buffer.start = start;
1785 buffer.start = start;
1669 buffer.capacity = prefixSize;
1786 buffer.capacity = prefixSize;
1670 if (ZSTDMT_isOverlapped(buffer, inUse)) {
1787 if (ZSTDMT_isOverlapped(buffer, inUse)) {
1671 DEBUGLOG(6, "Waiting for buffer...");
1788 DEBUGLOG(5, "Waiting for buffer...");
1672 return 0;
1789 return 0;
1673 }
1790 }
1674 ZSTDMT_waitForLdmComplete(mtctx, buffer);
1791 ZSTDMT_waitForLdmComplete(mtctx, buffer);
@@ -1680,7 +1797,7 b' static int ZSTDMT_tryGetInputRange(ZSTDM'
1680 buffer.capacity = target;
1797 buffer.capacity = target;
1681
1798
1682 if (ZSTDMT_isOverlapped(buffer, inUse)) {
1799 if (ZSTDMT_isOverlapped(buffer, inUse)) {
1683 DEBUGLOG(6, "Waiting for buffer...");
1800 DEBUGLOG(5, "Waiting for buffer...");
1684 return 0;
1801 return 0;
1685 }
1802 }
1686 assert(!ZSTDMT_isOverlapped(buffer, mtctx->inBuff.prefix));
1803 assert(!ZSTDMT_isOverlapped(buffer, mtctx->inBuff.prefix));
@@ -1753,8 +1870,10 b' size_t ZSTDMT_compressStream_generic(ZST'
1753 /* It is only possible for this operation to fail if there are
1870 /* It is only possible for this operation to fail if there are
1754 * still compression jobs ongoing.
1871 * still compression jobs ongoing.
1755 */
1872 */
1873 DEBUGLOG(5, "ZSTDMT_tryGetInputRange failed");
1756 assert(mtctx->doneJobID != mtctx->nextJobID);
1874 assert(mtctx->doneJobID != mtctx->nextJobID);
1757 }
1875 } else
1876 DEBUGLOG(5, "ZSTDMT_tryGetInputRange completed successfully : mtctx->inBuff.buffer.start = %p", mtctx->inBuff.buffer.start);
1758 }
1877 }
1759 if (mtctx->inBuff.buffer.start != NULL) {
1878 if (mtctx->inBuff.buffer.start != NULL) {
1760 size_t const toLoad = MIN(input->size - input->pos, mtctx->targetSectionSize - mtctx->inBuff.filled);
1879 size_t const toLoad = MIN(input->size - input->pos, mtctx->targetSectionSize - mtctx->inBuff.filled);
@@ -1782,6 +1901,7 b' size_t ZSTDMT_compressStream_generic(ZST'
1782 /* check for potential compressed data ready to be flushed */
1901 /* check for potential compressed data ready to be flushed */
1783 { size_t const remainingToFlush = ZSTDMT_flushProduced(mtctx, output, !forwardInputProgress, endOp); /* block if there was no forward input progress */
1902 { size_t const remainingToFlush = ZSTDMT_flushProduced(mtctx, output, !forwardInputProgress, endOp); /* block if there was no forward input progress */
1784 if (input->pos < input->size) return MAX(remainingToFlush, 1); /* input not consumed : do not end flush yet */
1903 if (input->pos < input->size) return MAX(remainingToFlush, 1); /* input not consumed : do not end flush yet */
1904 DEBUGLOG(5, "end of ZSTDMT_compressStream_generic: remainingToFlush = %u", (U32)remainingToFlush);
1785 return remainingToFlush;
1905 return remainingToFlush;
1786 }
1906 }
1787 }
1907 }
@@ -95,6 +95,11 b' typedef enum {'
95 * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
95 * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
96 ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value);
96 ZSTDLIB_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned value);
97
97
98 /* ZSTDMT_getMTCtxParameter() :
99 * Query the ZSTDMT_CCtx for a parameter value.
100 * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
101 ZSTDLIB_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, unsigned* value);
102
98
103
99 /*! ZSTDMT_compressStream_generic() :
104 /*! ZSTDMT_compressStream_generic() :
100 * Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
105 * Combines ZSTDMT_compressStream() with optional ZSTDMT_flushStream() or ZSTDMT_endStream()
@@ -114,11 +119,21 b' ZSTDLIB_API size_t ZSTDMT_compressStream'
114 * === Not exposed in libzstd. Never invoke directly ===
119 * === Not exposed in libzstd. Never invoke directly ===
115 * ======================================================== */
120 * ======================================================== */
116
121
122 /*! ZSTDMT_toFlushNow()
123 * Tell how many bytes are ready to be flushed immediately.
124 * Probe the oldest active job (not yet entirely flushed) and check its output buffer.
125 * If return 0, it means there is no active job,
126 * or, it means oldest job is still active, but everything produced has been flushed so far,
127 * therefore flushing is limited by speed of oldest job. */
128 size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mtctx);
129
130 /*! ZSTDMT_CCtxParam_setMTCtxParameter()
131 * like ZSTDMT_setMTCtxParameter(), but into a ZSTD_CCtx_Params */
117 size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, unsigned value);
132 size_t ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_CCtx_params* params, ZSTDMT_parameter parameter, unsigned value);
118
133
119 /* ZSTDMT_CCtxParam_setNbWorkers()
134 /*! ZSTDMT_CCtxParam_setNbWorkers()
120 * Set nbWorkers, and clamp it.
135 * Set nbWorkers, and clamp it.
121 * Also reset jobSize and overlapLog */
136 * Also reset jobSize and overlapLog */
122 size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers);
137 size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers);
123
138
124 /*! ZSTDMT_updateCParams_whileCompressing() :
139 /*! ZSTDMT_updateCParams_whileCompressing() :
@@ -126,14 +141,9 b' size_t ZSTDMT_CCtxParam_setNbWorkers(ZST'
126 * New parameters will be applied to next compression job. */
141 * New parameters will be applied to next compression job. */
127 void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams);
142 void ZSTDMT_updateCParams_whileCompressing(ZSTDMT_CCtx* mtctx, const ZSTD_CCtx_params* cctxParams);
128
143
129 /* ZSTDMT_getNbWorkers():
144 /*! ZSTDMT_getFrameProgression():
130 * @return nb threads currently active in mtctx.
145 * tells how much data has been consumed (input) and produced (output) for current frame.
131 * mtctx must be valid */
146 * able to count progression inside worker threads.
132 unsigned ZSTDMT_getNbWorkers(const ZSTDMT_CCtx* mtctx);
133
134 /* ZSTDMT_getFrameProgression():
135 * tells how much data has been consumed (input) and produced (output) for current frame.
136 * able to count progression inside worker threads.
137 */
147 */
138 ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);
148 ZSTD_frameProgression ZSTDMT_getFrameProgression(ZSTDMT_CCtx* mtctx);
139
149
This diff has been collapsed as it changes many lines, (714 lines changed) Show them Hide them
@@ -1,6 +1,7 b''
1 /* ******************************************************************
1 /* ******************************************************************
2 Huffman decoder, part of New Generation Entropy library
2 huff0 huffman decoder,
3 Copyright (C) 2013-2016, Yann Collet.
3 part of Finite State Entropy library
4 Copyright (C) 2013-present, Yann Collet.
4
5
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7
@@ -29,16 +30,15 b''
29
30
30 You can contact the author at :
31 You can contact the author at :
31 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c
33 ****************************************************************** */
33 ****************************************************************** */
34
34
35 /* **************************************************************
35 /* **************************************************************
36 * Dependencies
36 * Dependencies
37 ****************************************************************/
37 ****************************************************************/
38 #include <string.h> /* memcpy, memset */
38 #include <string.h> /* memcpy, memset */
39 #include "compiler.h"
39 #include "bitstream.h" /* BIT_* */
40 #include "bitstream.h" /* BIT_* */
40 #include "compiler.h"
41 #include "fse.h" /* to compress headers */
41 #include "fse.h" /* header compression */
42 #define HUF_STATIC_LINKING_ONLY
42 #define HUF_STATIC_LINKING_ONLY
43 #include "huf.h"
43 #include "huf.h"
44 #include "error_private.h"
44 #include "error_private.h"
@@ -48,7 +48,6 b''
48 * Error Management
48 * Error Management
49 ****************************************************************/
49 ****************************************************************/
50 #define HUF_isError ERR_isError
50 #define HUF_isError ERR_isError
51 #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
52 #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
51 #define CHECK_F(f) { size_t const err_ = (f); if (HUF_isError(err_)) return err_; }
53
52
54
53
@@ -75,15 +74,15 b' static DTableDesc HUF_getDTableDesc(cons'
75 /*-***************************/
74 /*-***************************/
76 /* single-symbol decoding */
75 /* single-symbol decoding */
77 /*-***************************/
76 /*-***************************/
78 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */
77 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1; /* single-symbol decoding */
79
78
80 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
79 size_t HUF_readDTableX1_wksp(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize)
81 {
80 {
82 U32 tableLog = 0;
81 U32 tableLog = 0;
83 U32 nbSymbols = 0;
82 U32 nbSymbols = 0;
84 size_t iSize;
83 size_t iSize;
85 void* const dtPtr = DTable + 1;
84 void* const dtPtr = DTable + 1;
86 HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
85 HUF_DEltX1* const dt = (HUF_DEltX1*)dtPtr;
87
86
88 U32* rankVal;
87 U32* rankVal;
89 BYTE* huffWeight;
88 BYTE* huffWeight;
@@ -96,7 +95,7 b' size_t HUF_readDTableX2_wksp(HUF_DTable*'
96
95
97 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
96 if ((spaceUsed32 << 2) > wkspSize) return ERROR(tableLog_tooLarge);
98
97
99 HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
98 DEBUG_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
100 /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
99 /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
101
100
102 iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
101 iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
@@ -124,7 +123,7 b' size_t HUF_readDTableX2_wksp(HUF_DTable*'
124 U32 const w = huffWeight[n];
123 U32 const w = huffWeight[n];
125 U32 const length = (1 << w) >> 1;
124 U32 const length = (1 << w) >> 1;
126 U32 u;
125 U32 u;
127 HUF_DEltX2 D;
126 HUF_DEltX1 D;
128 D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
127 D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
129 for (u = rankVal[w]; u < rankVal[w] + length; u++)
128 for (u = rankVal[w]; u < rankVal[w] + length; u++)
130 dt[u] = D;
129 dt[u] = D;
@@ -134,17 +133,15 b' size_t HUF_readDTableX2_wksp(HUF_DTable*'
134 return iSize;
133 return iSize;
135 }
134 }
136
135
137 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
136 size_t HUF_readDTableX1(HUF_DTable* DTable, const void* src, size_t srcSize)
138 {
137 {
139 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
138 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
140 return HUF_readDTableX2_wksp(DTable, src, srcSize,
139 return HUF_readDTableX1_wksp(DTable, src, srcSize,
141 workSpace, sizeof(workSpace));
140 workSpace, sizeof(workSpace));
142 }
141 }
143
142
144 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
145
146 FORCE_INLINE_TEMPLATE BYTE
143 FORCE_INLINE_TEMPLATE BYTE
147 HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
144 HUF_decodeSymbolX1(BIT_DStream_t* Dstream, const HUF_DEltX1* dt, const U32 dtLog)
148 {
145 {
149 size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
146 size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
150 BYTE const c = dt[val].byte;
147 BYTE const c = dt[val].byte;
@@ -152,44 +149,44 b' HUF_decodeSymbolX2(BIT_DStream_t* Dstrea'
152 return c;
149 return c;
153 }
150 }
154
151
155 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
152 #define HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr) \
156 *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
153 *ptr++ = HUF_decodeSymbolX1(DStreamPtr, dt, dtLog)
157
154
158 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
155 #define HUF_DECODE_SYMBOLX1_1(ptr, DStreamPtr) \
159 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
156 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
160 HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
157 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
161
158
162 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
159 #define HUF_DECODE_SYMBOLX1_2(ptr, DStreamPtr) \
163 if (MEM_64bits()) \
160 if (MEM_64bits()) \
164 HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
161 HUF_DECODE_SYMBOLX1_0(ptr, DStreamPtr)
165
162
166 HINT_INLINE size_t
163 HINT_INLINE size_t
167 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
164 HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX1* const dt, const U32 dtLog)
168 {
165 {
169 BYTE* const pStart = p;
166 BYTE* const pStart = p;
170
167
171 /* up to 4 symbols at a time */
168 /* up to 4 symbols at a time */
172 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
169 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
173 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
170 HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
174 HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
171 HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
175 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
172 HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
176 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
173 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
177 }
174 }
178
175
179 /* [0-3] symbols remaining */
176 /* [0-3] symbols remaining */
180 if (MEM_32bits())
177 if (MEM_32bits())
181 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
178 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd))
182 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
179 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
183
180
184 /* no more data to retrieve from bitstream, no need to reload */
181 /* no more data to retrieve from bitstream, no need to reload */
185 while (p < pEnd)
182 while (p < pEnd)
186 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
183 HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
187
184
188 return pEnd-pStart;
185 return pEnd-pStart;
189 }
186 }
190
187
191 FORCE_INLINE_TEMPLATE size_t
188 FORCE_INLINE_TEMPLATE size_t
192 HUF_decompress1X2_usingDTable_internal_body(
189 HUF_decompress1X1_usingDTable_internal_body(
193 void* dst, size_t dstSize,
190 void* dst, size_t dstSize,
194 const void* cSrc, size_t cSrcSize,
191 const void* cSrc, size_t cSrcSize,
195 const HUF_DTable* DTable)
192 const HUF_DTable* DTable)
@@ -197,14 +194,14 b' HUF_decompress1X2_usingDTable_internal_b'
197 BYTE* op = (BYTE*)dst;
194 BYTE* op = (BYTE*)dst;
198 BYTE* const oend = op + dstSize;
195 BYTE* const oend = op + dstSize;
199 const void* dtPtr = DTable + 1;
196 const void* dtPtr = DTable + 1;
200 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
197 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
201 BIT_DStream_t bitD;
198 BIT_DStream_t bitD;
202 DTableDesc const dtd = HUF_getDTableDesc(DTable);
199 DTableDesc const dtd = HUF_getDTableDesc(DTable);
203 U32 const dtLog = dtd.tableLog;
200 U32 const dtLog = dtd.tableLog;
204
201
205 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
202 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
206
203
207 HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
204 HUF_decodeStreamX1(op, &bitD, oend, dt, dtLog);
208
205
209 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
206 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
210
207
@@ -212,7 +209,7 b' HUF_decompress1X2_usingDTable_internal_b'
212 }
209 }
213
210
214 FORCE_INLINE_TEMPLATE size_t
211 FORCE_INLINE_TEMPLATE size_t
215 HUF_decompress4X2_usingDTable_internal_body(
212 HUF_decompress4X1_usingDTable_internal_body(
216 void* dst, size_t dstSize,
213 void* dst, size_t dstSize,
217 const void* cSrc, size_t cSrcSize,
214 const void* cSrc, size_t cSrcSize,
218 const HUF_DTable* DTable)
215 const HUF_DTable* DTable)
@@ -224,7 +221,7 b' HUF_decompress4X2_usingDTable_internal_b'
224 BYTE* const ostart = (BYTE*) dst;
221 BYTE* const ostart = (BYTE*) dst;
225 BYTE* const oend = ostart + dstSize;
222 BYTE* const oend = ostart + dstSize;
226 const void* const dtPtr = DTable + 1;
223 const void* const dtPtr = DTable + 1;
227 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
224 const HUF_DEltX1* const dt = (const HUF_DEltX1*)dtPtr;
228
225
229 /* Init */
226 /* Init */
230 BIT_DStream_t bitD1;
227 BIT_DStream_t bitD1;
@@ -260,22 +257,22 b' HUF_decompress4X2_usingDTable_internal_b'
260 /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
257 /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
261 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
258 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
262 while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
259 while ( (endSignal==BIT_DStream_unfinished) && (op4<(oend-3)) ) {
263 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
260 HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
264 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
261 HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
265 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
262 HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
266 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
263 HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
267 HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
264 HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
268 HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
265 HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
269 HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
266 HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
270 HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
267 HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
271 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
268 HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
272 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
269 HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
273 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
270 HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
274 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
271 HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
275 HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
272 HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
276 HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
273 HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
277 HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
274 HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
278 HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
275 HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
279 BIT_reloadDStream(&bitD1);
276 BIT_reloadDStream(&bitD1);
280 BIT_reloadDStream(&bitD2);
277 BIT_reloadDStream(&bitD2);
281 BIT_reloadDStream(&bitD3);
278 BIT_reloadDStream(&bitD3);
@@ -291,191 +288,10 b' HUF_decompress4X2_usingDTable_internal_b'
291 /* note : op4 supposed already verified within main loop */
288 /* note : op4 supposed already verified within main loop */
292
289
293 /* finish bitStreams one by one */
290 /* finish bitStreams one by one */
294 HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
291 HUF_decodeStreamX1(op1, &bitD1, opStart2, dt, dtLog);
295 HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
292 HUF_decodeStreamX1(op2, &bitD2, opStart3, dt, dtLog);
296 HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
293 HUF_decodeStreamX1(op3, &bitD3, opStart4, dt, dtLog);
297 HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
294 HUF_decodeStreamX1(op4, &bitD4, oend, dt, dtLog);
298
299 /* check */
300 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
301 if (!endCheck) return ERROR(corruption_detected); }
302
303 /* decoded size */
304 return dstSize;
305 }
306 }
307
308
309 FORCE_INLINE_TEMPLATE U32
310 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
311 {
312 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
313 memcpy(op, dt+val, 2);
314 BIT_skipBits(DStream, dt[val].nbBits);
315 return dt[val].length;
316 }
317
318 FORCE_INLINE_TEMPLATE U32
319 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
320 {
321 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
322 memcpy(op, dt+val, 1);
323 if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
324 else {
325 if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
326 BIT_skipBits(DStream, dt[val].nbBits);
327 if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
328 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
329 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
330 } }
331 return 1;
332 }
333
334 #define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
335 ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
336
337 #define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
338 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
339 ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
340
341 #define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
342 if (MEM_64bits()) \
343 ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
344
345 HINT_INLINE size_t
346 HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
347 const HUF_DEltX4* const dt, const U32 dtLog)
348 {
349 BYTE* const pStart = p;
350
351 /* up to 8 symbols at a time */
352 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
353 HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
354 HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
355 HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
356 HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
357 }
358
359 /* closer to end : up to 2 symbols at a time */
360 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
361 HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
362
363 while (p <= pEnd-2)
364 HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
365
366 if (p < pEnd)
367 p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
368
369 return p-pStart;
370 }
371
372 FORCE_INLINE_TEMPLATE size_t
373 HUF_decompress1X4_usingDTable_internal_body(
374 void* dst, size_t dstSize,
375 const void* cSrc, size_t cSrcSize,
376 const HUF_DTable* DTable)
377 {
378 BIT_DStream_t bitD;
379
380 /* Init */
381 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
382
383 /* decode */
384 { BYTE* const ostart = (BYTE*) dst;
385 BYTE* const oend = ostart + dstSize;
386 const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
387 const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
388 DTableDesc const dtd = HUF_getDTableDesc(DTable);
389 HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
390 }
391
392 /* check */
393 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
394
395 /* decoded size */
396 return dstSize;
397 }
398
399
400 FORCE_INLINE_TEMPLATE size_t
401 HUF_decompress4X4_usingDTable_internal_body(
402 void* dst, size_t dstSize,
403 const void* cSrc, size_t cSrcSize,
404 const HUF_DTable* DTable)
405 {
406 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
407
408 { const BYTE* const istart = (const BYTE*) cSrc;
409 BYTE* const ostart = (BYTE*) dst;
410 BYTE* const oend = ostart + dstSize;
411 const void* const dtPtr = DTable+1;
412 const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
413
414 /* Init */
415 BIT_DStream_t bitD1;
416 BIT_DStream_t bitD2;
417 BIT_DStream_t bitD3;
418 BIT_DStream_t bitD4;
419 size_t const length1 = MEM_readLE16(istart);
420 size_t const length2 = MEM_readLE16(istart+2);
421 size_t const length3 = MEM_readLE16(istart+4);
422 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
423 const BYTE* const istart1 = istart + 6; /* jumpTable */
424 const BYTE* const istart2 = istart1 + length1;
425 const BYTE* const istart3 = istart2 + length2;
426 const BYTE* const istart4 = istart3 + length3;
427 size_t const segmentSize = (dstSize+3) / 4;
428 BYTE* const opStart2 = ostart + segmentSize;
429 BYTE* const opStart3 = opStart2 + segmentSize;
430 BYTE* const opStart4 = opStart3 + segmentSize;
431 BYTE* op1 = ostart;
432 BYTE* op2 = opStart2;
433 BYTE* op3 = opStart3;
434 BYTE* op4 = opStart4;
435 U32 endSignal;
436 DTableDesc const dtd = HUF_getDTableDesc(DTable);
437 U32 const dtLog = dtd.tableLog;
438
439 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
440 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
441 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
442 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
443 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
444
445 /* 16-32 symbols per loop (4-8 symbols per stream) */
446 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
447 for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
448 HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
449 HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
450 HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
451 HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
452 HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
453 HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
454 HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
455 HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
456 HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
457 HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
458 HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
459 HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
460 HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
461 HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
462 HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
463 HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
464
465 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
466 }
467
468 /* check corruption */
469 if (op1 > opStart2) return ERROR(corruption_detected);
470 if (op2 > opStart3) return ERROR(corruption_detected);
471 if (op3 > opStart4) return ERROR(corruption_detected);
472 /* note : op4 already verified within main loop */
473
474 /* finish bitStreams one by one */
475 HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
476 HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
477 HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
478 HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
479
295
480 /* check */
296 /* check */
481 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
297 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
@@ -493,7 +309,7 b' typedef size_t (*HUF_decompress_usingDTa'
493 const HUF_DTable *DTable);
309 const HUF_DTable *DTable);
494 #if DYNAMIC_BMI2
310 #if DYNAMIC_BMI2
495
311
496 #define X(fn) \
312 #define HUF_DGEN(fn) \
497 \
313 \
498 static size_t fn##_default( \
314 static size_t fn##_default( \
499 void* dst, size_t dstSize, \
315 void* dst, size_t dstSize, \
@@ -522,7 +338,7 b' typedef size_t (*HUF_decompress_usingDTa'
522
338
523 #else
339 #else
524
340
525 #define X(fn) \
341 #define HUF_DGEN(fn) \
526 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
342 static size_t fn(void* dst, size_t dstSize, void const* cSrc, \
527 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
343 size_t cSrcSize, HUF_DTable const* DTable, int bmi2) \
528 { \
344 { \
@@ -532,112 +348,114 b' typedef size_t (*HUF_decompress_usingDTa'
532
348
533 #endif
349 #endif
534
350
535 X(HUF_decompress1X2_usingDTable_internal)
351 HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
536 X(HUF_decompress4X2_usingDTable_internal)
352 HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
537 X(HUF_decompress1X4_usingDTable_internal)
538 X(HUF_decompress4X4_usingDTable_internal)
539
353
540 #undef X
541
354
542
355
543 size_t HUF_decompress1X2_usingDTable(
356 size_t HUF_decompress1X1_usingDTable(
357 void* dst, size_t dstSize,
358 const void* cSrc, size_t cSrcSize,
359 const HUF_DTable* DTable)
360 {
361 DTableDesc dtd = HUF_getDTableDesc(DTable);
362 if (dtd.tableType != 0) return ERROR(GENERIC);
363 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
364 }
365
366 size_t HUF_decompress1X1_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
367 const void* cSrc, size_t cSrcSize,
368 void* workSpace, size_t wkspSize)
369 {
370 const BYTE* ip = (const BYTE*) cSrc;
371
372 size_t const hSize = HUF_readDTableX1_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
373 if (HUF_isError(hSize)) return hSize;
374 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
375 ip += hSize; cSrcSize -= hSize;
376
377 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
378 }
379
380
381 size_t HUF_decompress1X1_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
382 const void* cSrc, size_t cSrcSize)
383 {
384 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
385 return HUF_decompress1X1_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
386 workSpace, sizeof(workSpace));
387 }
388
389 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
390 {
391 HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
392 return HUF_decompress1X1_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
393 }
394
395 size_t HUF_decompress4X1_usingDTable(
544 void* dst, size_t dstSize,
396 void* dst, size_t dstSize,
545 const void* cSrc, size_t cSrcSize,
397 const void* cSrc, size_t cSrcSize,
546 const HUF_DTable* DTable)
398 const HUF_DTable* DTable)
547 {
399 {
548 DTableDesc dtd = HUF_getDTableDesc(DTable);
400 DTableDesc dtd = HUF_getDTableDesc(DTable);
549 if (dtd.tableType != 0) return ERROR(GENERIC);
401 if (dtd.tableType != 0) return ERROR(GENERIC);
550 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
402 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
551 }
552
553 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
554 const void* cSrc, size_t cSrcSize,
555 void* workSpace, size_t wkspSize)
556 {
557 const BYTE* ip = (const BYTE*) cSrc;
558
559 size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize, workSpace, wkspSize);
560 if (HUF_isError(hSize)) return hSize;
561 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
562 ip += hSize; cSrcSize -= hSize;
563
564 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
565 }
403 }
566
404
567
405 static size_t HUF_decompress4X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
568 size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
569 const void* cSrc, size_t cSrcSize)
570 {
571 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
572 return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
573 workSpace, sizeof(workSpace));
574 }
575
576 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
577 {
578 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
579 return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
580 }
581
582 size_t HUF_decompress4X2_usingDTable(
583 void* dst, size_t dstSize,
584 const void* cSrc, size_t cSrcSize,
585 const HUF_DTable* DTable)
586 {
587 DTableDesc dtd = HUF_getDTableDesc(DTable);
588 if (dtd.tableType != 0) return ERROR(GENERIC);
589 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
590 }
591
592 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
593 const void* cSrc, size_t cSrcSize,
406 const void* cSrc, size_t cSrcSize,
594 void* workSpace, size_t wkspSize, int bmi2)
407 void* workSpace, size_t wkspSize, int bmi2)
595 {
408 {
596 const BYTE* ip = (const BYTE*) cSrc;
409 const BYTE* ip = (const BYTE*) cSrc;
597
410
598 size_t const hSize = HUF_readDTableX2_wksp (dctx, cSrc, cSrcSize,
411 size_t const hSize = HUF_readDTableX1_wksp (dctx, cSrc, cSrcSize,
599 workSpace, wkspSize);
412 workSpace, wkspSize);
600 if (HUF_isError(hSize)) return hSize;
413 if (HUF_isError(hSize)) return hSize;
601 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
414 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
602 ip += hSize; cSrcSize -= hSize;
415 ip += hSize; cSrcSize -= hSize;
603
416
604 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
417 return HUF_decompress4X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
605 }
418 }
606
419
607 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
420 size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
608 const void* cSrc, size_t cSrcSize,
421 const void* cSrc, size_t cSrcSize,
609 void* workSpace, size_t wkspSize)
422 void* workSpace, size_t wkspSize)
610 {
423 {
611 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
424 return HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, 0);
612 }
425 }
613
426
614
427
615 size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
428 size_t HUF_decompress4X1_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
616 {
429 {
617 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
430 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
618 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
431 return HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
619 workSpace, sizeof(workSpace));
432 workSpace, sizeof(workSpace));
620 }
433 }
621 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
434 size_t HUF_decompress4X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
622 {
435 {
623 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
436 HUF_CREATE_STATIC_DTABLEX1(DTable, HUF_TABLELOG_MAX);
624 return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
437 return HUF_decompress4X1_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
625 }
438 }
626
439
627
440
628 /* *************************/
441 /* *************************/
629 /* double-symbols decoding */
442 /* double-symbols decoding */
630 /* *************************/
443 /* *************************/
631 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
632
444
633 /* HUF_fillDTableX4Level2() :
445 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2; /* double-symbols decoding */
446 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
447 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
448 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
449
450
451 /* HUF_fillDTableX2Level2() :
634 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
452 * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
635 static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
453 static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
636 const U32* rankValOrigin, const int minWeight,
454 const U32* rankValOrigin, const int minWeight,
637 const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
455 const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
638 U32 nbBitsBaseline, U16 baseSeq)
456 U32 nbBitsBaseline, U16 baseSeq)
639 {
457 {
640 HUF_DEltX4 DElt;
458 HUF_DEltX2 DElt;
641 U32 rankVal[HUF_TABLELOG_MAX + 1];
459 U32 rankVal[HUF_TABLELOG_MAX + 1];
642
460
643 /* get pre-calculated rankVal */
461 /* get pre-calculated rankVal */
@@ -672,10 +490,8 b' static void HUF_fillDTableX4Level2(HUF_D'
672 } }
490 } }
673 }
491 }
674
492
675 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
676 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
677
493
678 static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
494 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
679 const sortedSymbol_t* sortedList, const U32 sortedListSize,
495 const sortedSymbol_t* sortedList, const U32 sortedListSize,
680 const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
496 const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
681 const U32 nbBitsBaseline)
497 const U32 nbBitsBaseline)
@@ -700,12 +516,12 b' static void HUF_fillDTableX4(HUF_DEltX4*'
700 int minWeight = nbBits + scaleLog;
516 int minWeight = nbBits + scaleLog;
701 if (minWeight < 1) minWeight = 1;
517 if (minWeight < 1) minWeight = 1;
702 sortedRank = rankStart[minWeight];
518 sortedRank = rankStart[minWeight];
703 HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
519 HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
704 rankValOrigin[nbBits], minWeight,
520 rankValOrigin[nbBits], minWeight,
705 sortedList+sortedRank, sortedListSize-sortedRank,
521 sortedList+sortedRank, sortedListSize-sortedRank,
706 nbBitsBaseline, symbol);
522 nbBitsBaseline, symbol);
707 } else {
523 } else {
708 HUF_DEltX4 DElt;
524 HUF_DEltX2 DElt;
709 MEM_writeLE16(&(DElt.sequence), symbol);
525 MEM_writeLE16(&(DElt.sequence), symbol);
710 DElt.nbBits = (BYTE)(nbBits);
526 DElt.nbBits = (BYTE)(nbBits);
711 DElt.length = 1;
527 DElt.length = 1;
@@ -717,16 +533,16 b' static void HUF_fillDTableX4(HUF_DEltX4*'
717 }
533 }
718 }
534 }
719
535
720 size_t HUF_readDTableX4_wksp(HUF_DTable* DTable, const void* src,
536 size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
721 size_t srcSize, void* workSpace,
537 const void* src, size_t srcSize,
722 size_t wkspSize)
538 void* workSpace, size_t wkspSize)
723 {
539 {
724 U32 tableLog, maxW, sizeOfSort, nbSymbols;
540 U32 tableLog, maxW, sizeOfSort, nbSymbols;
725 DTableDesc dtd = HUF_getDTableDesc(DTable);
541 DTableDesc dtd = HUF_getDTableDesc(DTable);
726 U32 const maxTableLog = dtd.maxTableLog;
542 U32 const maxTableLog = dtd.maxTableLog;
727 size_t iSize;
543 size_t iSize;
728 void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
544 void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
729 HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr;
545 HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
730 U32 *rankStart;
546 U32 *rankStart;
731
547
732 rankValCol_t* rankVal;
548 rankValCol_t* rankVal;
@@ -752,7 +568,7 b' size_t HUF_readDTableX4_wksp(HUF_DTable*'
752 rankStart = rankStart0 + 1;
568 rankStart = rankStart0 + 1;
753 memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
569 memset(rankStats, 0, sizeof(U32) * (2 * HUF_TABLELOG_MAX + 2 + 1));
754
570
755 HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
571 DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(HUF_DTable)); /* if compiler fails here, assertion is wrong */
756 if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
572 if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
757 /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
573 /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
758
574
@@ -806,7 +622,7 b' size_t HUF_readDTableX4_wksp(HUF_DTable*'
806 rankValPtr[w] = rankVal0[w] >> consumed;
622 rankValPtr[w] = rankVal0[w] >> consumed;
807 } } } }
623 } } } }
808
624
809 HUF_fillDTableX4(dt, maxTableLog,
625 HUF_fillDTableX2(dt, maxTableLog,
810 sortedSymbol, sizeOfSort,
626 sortedSymbol, sizeOfSort,
811 rankStart0, rankVal, maxW,
627 rankStart0, rankVal, maxW,
812 tableLog+1);
628 tableLog+1);
@@ -817,112 +633,296 b' size_t HUF_readDTableX4_wksp(HUF_DTable*'
817 return iSize;
633 return iSize;
818 }
634 }
819
635
820 size_t HUF_readDTableX4(HUF_DTable* DTable, const void* src, size_t srcSize)
636 size_t HUF_readDTableX2(HUF_DTable* DTable, const void* src, size_t srcSize)
821 {
637 {
822 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
638 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
823 return HUF_readDTableX4_wksp(DTable, src, srcSize,
639 return HUF_readDTableX2_wksp(DTable, src, srcSize,
824 workSpace, sizeof(workSpace));
640 workSpace, sizeof(workSpace));
825 }
641 }
826
642
827 size_t HUF_decompress1X4_usingDTable(
643
644 FORCE_INLINE_TEMPLATE U32
645 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
646 {
647 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
648 memcpy(op, dt+val, 2);
649 BIT_skipBits(DStream, dt[val].nbBits);
650 return dt[val].length;
651 }
652
653 FORCE_INLINE_TEMPLATE U32
654 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
655 {
656 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
657 memcpy(op, dt+val, 1);
658 if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
659 else {
660 if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
661 BIT_skipBits(DStream, dt[val].nbBits);
662 if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
663 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
664 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
665 } }
666 return 1;
667 }
668
669 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
670 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
671
672 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
673 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
674 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
675
676 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
677 if (MEM_64bits()) \
678 ptr += HUF_decodeSymbolX2(ptr, DStreamPtr, dt, dtLog)
679
680 HINT_INLINE size_t
681 HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
682 const HUF_DEltX2* const dt, const U32 dtLog)
683 {
684 BYTE* const pStart = p;
685
686 /* up to 8 symbols at a time */
687 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
688 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
689 HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
690 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
691 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
692 }
693
694 /* closer to end : up to 2 symbols at a time */
695 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
696 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
697
698 while (p <= pEnd-2)
699 HUF_DECODE_SYMBOLX2_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
700
701 if (p < pEnd)
702 p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
703
704 return p-pStart;
705 }
706
707 FORCE_INLINE_TEMPLATE size_t
708 HUF_decompress1X2_usingDTable_internal_body(
709 void* dst, size_t dstSize,
710 const void* cSrc, size_t cSrcSize,
711 const HUF_DTable* DTable)
712 {
713 BIT_DStream_t bitD;
714
715 /* Init */
716 CHECK_F( BIT_initDStream(&bitD, cSrc, cSrcSize) );
717
718 /* decode */
719 { BYTE* const ostart = (BYTE*) dst;
720 BYTE* const oend = ostart + dstSize;
721 const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
722 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
723 DTableDesc const dtd = HUF_getDTableDesc(DTable);
724 HUF_decodeStreamX2(ostart, &bitD, oend, dt, dtd.tableLog);
725 }
726
727 /* check */
728 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
729
730 /* decoded size */
731 return dstSize;
732 }
733
734
735 FORCE_INLINE_TEMPLATE size_t
736 HUF_decompress4X2_usingDTable_internal_body(
737 void* dst, size_t dstSize,
738 const void* cSrc, size_t cSrcSize,
739 const HUF_DTable* DTable)
740 {
741 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
742
743 { const BYTE* const istart = (const BYTE*) cSrc;
744 BYTE* const ostart = (BYTE*) dst;
745 BYTE* const oend = ostart + dstSize;
746 const void* const dtPtr = DTable+1;
747 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
748
749 /* Init */
750 BIT_DStream_t bitD1;
751 BIT_DStream_t bitD2;
752 BIT_DStream_t bitD3;
753 BIT_DStream_t bitD4;
754 size_t const length1 = MEM_readLE16(istart);
755 size_t const length2 = MEM_readLE16(istart+2);
756 size_t const length3 = MEM_readLE16(istart+4);
757 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
758 const BYTE* const istart1 = istart + 6; /* jumpTable */
759 const BYTE* const istart2 = istart1 + length1;
760 const BYTE* const istart3 = istart2 + length2;
761 const BYTE* const istart4 = istart3 + length3;
762 size_t const segmentSize = (dstSize+3) / 4;
763 BYTE* const opStart2 = ostart + segmentSize;
764 BYTE* const opStart3 = opStart2 + segmentSize;
765 BYTE* const opStart4 = opStart3 + segmentSize;
766 BYTE* op1 = ostart;
767 BYTE* op2 = opStart2;
768 BYTE* op3 = opStart3;
769 BYTE* op4 = opStart4;
770 U32 endSignal;
771 DTableDesc const dtd = HUF_getDTableDesc(DTable);
772 U32 const dtLog = dtd.tableLog;
773
774 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
775 CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
776 CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
777 CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
778 CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
779
780 /* 16-32 symbols per loop (4-8 symbols per stream) */
781 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
782 for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
783 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
784 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
785 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
786 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
787 HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
788 HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
789 HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
790 HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
791 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
792 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
793 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
794 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
795 HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
796 HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
797 HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
798 HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
799
800 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
801 }
802
803 /* check corruption */
804 if (op1 > opStart2) return ERROR(corruption_detected);
805 if (op2 > opStart3) return ERROR(corruption_detected);
806 if (op3 > opStart4) return ERROR(corruption_detected);
807 /* note : op4 already verified within main loop */
808
809 /* finish bitStreams one by one */
810 HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
811 HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
812 HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
813 HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
814
815 /* check */
816 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
817 if (!endCheck) return ERROR(corruption_detected); }
818
819 /* decoded size */
820 return dstSize;
821 }
822 }
823
824 HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
825 HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
826
827 size_t HUF_decompress1X2_usingDTable(
828 void* dst, size_t dstSize,
828 void* dst, size_t dstSize,
829 const void* cSrc, size_t cSrcSize,
829 const void* cSrc, size_t cSrcSize,
830 const HUF_DTable* DTable)
830 const HUF_DTable* DTable)
831 {
831 {
832 DTableDesc dtd = HUF_getDTableDesc(DTable);
832 DTableDesc dtd = HUF_getDTableDesc(DTable);
833 if (dtd.tableType != 1) return ERROR(GENERIC);
833 if (dtd.tableType != 1) return ERROR(GENERIC);
834 return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
834 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
835 }
835 }
836
836
837 size_t HUF_decompress1X4_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
837 size_t HUF_decompress1X2_DCtx_wksp(HUF_DTable* DCtx, void* dst, size_t dstSize,
838 const void* cSrc, size_t cSrcSize,
838 const void* cSrc, size_t cSrcSize,
839 void* workSpace, size_t wkspSize)
839 void* workSpace, size_t wkspSize)
840 {
840 {
841 const BYTE* ip = (const BYTE*) cSrc;
841 const BYTE* ip = (const BYTE*) cSrc;
842
842
843 size_t const hSize = HUF_readDTableX4_wksp(DCtx, cSrc, cSrcSize,
843 size_t const hSize = HUF_readDTableX2_wksp(DCtx, cSrc, cSrcSize,
844 workSpace, wkspSize);
844 workSpace, wkspSize);
845 if (HUF_isError(hSize)) return hSize;
845 if (HUF_isError(hSize)) return hSize;
846 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
846 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
847 ip += hSize; cSrcSize -= hSize;
847 ip += hSize; cSrcSize -= hSize;
848
848
849 return HUF_decompress1X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
849 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, DCtx, /* bmi2 */ 0);
850 }
850 }
851
851
852
852
853 size_t HUF_decompress1X4_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
853 size_t HUF_decompress1X2_DCtx(HUF_DTable* DCtx, void* dst, size_t dstSize,
854 const void* cSrc, size_t cSrcSize)
854 const void* cSrc, size_t cSrcSize)
855 {
855 {
856 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
856 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
857 return HUF_decompress1X4_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
857 return HUF_decompress1X2_DCtx_wksp(DCtx, dst, dstSize, cSrc, cSrcSize,
858 workSpace, sizeof(workSpace));
858 workSpace, sizeof(workSpace));
859 }
859 }
860
860
861 size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
861 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
862 {
862 {
863 HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
863 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
864 return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
864 return HUF_decompress1X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
865 }
865 }
866
866
867 size_t HUF_decompress4X4_usingDTable(
867 size_t HUF_decompress4X2_usingDTable(
868 void* dst, size_t dstSize,
868 void* dst, size_t dstSize,
869 const void* cSrc, size_t cSrcSize,
869 const void* cSrc, size_t cSrcSize,
870 const HUF_DTable* DTable)
870 const HUF_DTable* DTable)
871 {
871 {
872 DTableDesc dtd = HUF_getDTableDesc(DTable);
872 DTableDesc dtd = HUF_getDTableDesc(DTable);
873 if (dtd.tableType != 1) return ERROR(GENERIC);
873 if (dtd.tableType != 1) return ERROR(GENERIC);
874 return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
874 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
875 }
875 }
876
876
877 static size_t HUF_decompress4X4_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
877 static size_t HUF_decompress4X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize,
878 const void* cSrc, size_t cSrcSize,
878 const void* cSrc, size_t cSrcSize,
879 void* workSpace, size_t wkspSize, int bmi2)
879 void* workSpace, size_t wkspSize, int bmi2)
880 {
880 {
881 const BYTE* ip = (const BYTE*) cSrc;
881 const BYTE* ip = (const BYTE*) cSrc;
882
882
883 size_t hSize = HUF_readDTableX4_wksp(dctx, cSrc, cSrcSize,
883 size_t hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize,
884 workSpace, wkspSize);
884 workSpace, wkspSize);
885 if (HUF_isError(hSize)) return hSize;
885 if (HUF_isError(hSize)) return hSize;
886 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
886 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
887 ip += hSize; cSrcSize -= hSize;
887 ip += hSize; cSrcSize -= hSize;
888
888
889 return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
889 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
890 }
890 }
891
891
892 size_t HUF_decompress4X4_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
892 size_t HUF_decompress4X2_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
893 const void* cSrc, size_t cSrcSize,
893 const void* cSrc, size_t cSrcSize,
894 void* workSpace, size_t wkspSize)
894 void* workSpace, size_t wkspSize)
895 {
895 {
896 return HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
896 return HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, /* bmi2 */ 0);
897 }
897 }
898
898
899
899
900 size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
900 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize,
901 const void* cSrc, size_t cSrcSize)
901 const void* cSrc, size_t cSrcSize)
902 {
902 {
903 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
903 U32 workSpace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
904 return HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
904 return HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize,
905 workSpace, sizeof(workSpace));
905 workSpace, sizeof(workSpace));
906 }
906 }
907
907
908 size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
908 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
909 {
909 {
910 HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
910 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
911 return HUF_decompress4X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
911 return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
912 }
912 }
913
913
914
914
915 /* ********************************/
915 /* ***********************************/
916 /* Generic decompression selector */
916 /* Universal decompression selectors */
917 /* ********************************/
917 /* ***********************************/
918
918
919 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
919 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
920 const void* cSrc, size_t cSrcSize,
920 const void* cSrc, size_t cSrcSize,
921 const HUF_DTable* DTable)
921 const HUF_DTable* DTable)
922 {
922 {
923 DTableDesc const dtd = HUF_getDTableDesc(DTable);
923 DTableDesc const dtd = HUF_getDTableDesc(DTable);
924 return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
924 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
925 HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
925 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
926 }
926 }
927
927
928 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
928 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
@@ -930,8 +930,8 b' size_t HUF_decompress4X_usingDTable(void'
930 const HUF_DTable* DTable)
930 const HUF_DTable* DTable)
931 {
931 {
932 DTableDesc const dtd = HUF_getDTableDesc(DTable);
932 DTableDesc const dtd = HUF_getDTableDesc(DTable);
933 return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
933 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0) :
934 HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
934 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, /* bmi2 */ 0);
935 }
935 }
936
936
937
937
@@ -960,12 +960,12 b' static const algo_time_t algoTime[16 /* '
960 /** HUF_selectDecoder() :
960 /** HUF_selectDecoder() :
961 * Tells which decoder is likely to decode faster,
961 * Tells which decoder is likely to decode faster,
962 * based on a set of pre-computed metrics.
962 * based on a set of pre-computed metrics.
963 * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
963 * @return : 0==HUF_decompress4X1, 1==HUF_decompress4X2 .
964 * Assumption : 0 < dstSize <= 128 KB */
964 * Assumption : 0 < dstSize <= 128 KB */
965 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
965 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
966 {
966 {
967 assert(dstSize > 0);
967 assert(dstSize > 0);
968 assert(dstSize <= 128 KB);
968 assert(dstSize <= 128*1024);
969 /* decoder timing evaluation */
969 /* decoder timing evaluation */
970 { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
970 { U32 const Q = (cSrcSize >= dstSize) ? 15 : (U32)(cSrcSize * 16 / dstSize); /* Q < 16 */
971 U32 const D256 = (U32)(dstSize >> 8);
971 U32 const D256 = (U32)(dstSize >> 8);
@@ -980,7 +980,7 b' typedef size_t (*decompressionAlgo)(void'
980
980
981 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
981 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
982 {
982 {
983 static const decompressionAlgo decompress[2] = { HUF_decompress4X2, HUF_decompress4X4 };
983 static const decompressionAlgo decompress[2] = { HUF_decompress4X1, HUF_decompress4X2 };
984
984
985 /* validation checks */
985 /* validation checks */
986 if (dstSize == 0) return ERROR(dstSize_tooSmall);
986 if (dstSize == 0) return ERROR(dstSize_tooSmall);
@@ -1002,8 +1002,8 b' size_t HUF_decompress4X_DCtx (HUF_DTable'
1002 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1002 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1003
1003
1004 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1004 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1005 return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1005 return algoNb ? HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
1006 HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1006 HUF_decompress4X1_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
1007 }
1007 }
1008 }
1008 }
1009
1009
@@ -1025,8 +1025,8 b' size_t HUF_decompress4X_hufOnly_wksp(HUF'
1025 if (cSrcSize == 0) return ERROR(corruption_detected);
1025 if (cSrcSize == 0) return ERROR(corruption_detected);
1026
1026
1027 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1027 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1028 return algoNb ? HUF_decompress4X4_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
1028 return algoNb ? HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize):
1029 HUF_decompress4X2_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1029 HUF_decompress4X1_DCtx_wksp(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize);
1030 }
1030 }
1031 }
1031 }
1032
1032
@@ -1041,9 +1041,9 b' size_t HUF_decompress1X_DCtx_wksp(HUF_DT'
1041 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1041 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
1042
1042
1043 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1043 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1044 return algoNb ? HUF_decompress1X4_DCtx_wksp(dctx, dst, dstSize, cSrc,
1044 return algoNb ? HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1045 cSrcSize, workSpace, wkspSize):
1045 cSrcSize, workSpace, wkspSize):
1046 HUF_decompress1X2_DCtx_wksp(dctx, dst, dstSize, cSrc,
1046 HUF_decompress1X1_DCtx_wksp(dctx, dst, dstSize, cSrc,
1047 cSrcSize, workSpace, wkspSize);
1047 cSrcSize, workSpace, wkspSize);
1048 }
1048 }
1049 }
1049 }
@@ -1060,27 +1060,27 b' size_t HUF_decompress1X_DCtx(HUF_DTable*'
1060 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1060 size_t HUF_decompress1X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1061 {
1061 {
1062 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1062 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1063 return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1063 return dtd.tableType ? HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1064 HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1064 HUF_decompress1X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1065 }
1065 }
1066
1066
1067 size_t HUF_decompress1X2_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1067 size_t HUF_decompress1X1_DCtx_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1068 {
1068 {
1069 const BYTE* ip = (const BYTE*) cSrc;
1069 const BYTE* ip = (const BYTE*) cSrc;
1070
1070
1071 size_t const hSize = HUF_readDTableX2_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1071 size_t const hSize = HUF_readDTableX1_wksp(dctx, cSrc, cSrcSize, workSpace, wkspSize);
1072 if (HUF_isError(hSize)) return hSize;
1072 if (HUF_isError(hSize)) return hSize;
1073 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1073 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
1074 ip += hSize; cSrcSize -= hSize;
1074 ip += hSize; cSrcSize -= hSize;
1075
1075
1076 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1076 return HUF_decompress1X1_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx, bmi2);
1077 }
1077 }
1078
1078
1079 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1079 size_t HUF_decompress4X_usingDTable_bmi2(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable, int bmi2)
1080 {
1080 {
1081 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1081 DTableDesc const dtd = HUF_getDTableDesc(DTable);
1082 return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1082 return dtd.tableType ? HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2) :
1083 HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1083 HUF_decompress4X1_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable, bmi2);
1084 }
1084 }
1085
1085
1086 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
1086 size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize, void* workSpace, size_t wkspSize, int bmi2)
@@ -1090,7 +1090,7 b' size_t HUF_decompress4X_hufOnly_wksp_bmi'
1090 if (cSrcSize == 0) return ERROR(corruption_detected);
1090 if (cSrcSize == 0) return ERROR(corruption_detected);
1091
1091
1092 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1092 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
1093 return algoNb ? HUF_decompress4X4_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1093 return algoNb ? HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2) :
1094 HUF_decompress4X2_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1094 HUF_decompress4X1_DCtx_wksp_bmi2(dctx, dst, dstSize, cSrc, cSrcSize, workSpace, wkspSize, bmi2);
1095 }
1095 }
1096 }
1096 }
@@ -40,12 +40,24 b''
40 # define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_DEFAULTMAX) + 1)
40 # define ZSTD_MAXWINDOWSIZE_DEFAULT (((U32)1 << ZSTD_WINDOWLOG_DEFAULTMAX) + 1)
41 #endif
41 #endif
42
42
43 /*!
44 * NO_FORWARD_PROGRESS_MAX :
45 * maximum allowed nb of calls to ZSTD_decompressStream() and ZSTD_decompress_generic()
46 * without any forward progress
47 * (defined as: no byte read from input, and no byte flushed to output)
48 * before triggering an error.
49 */
50 #ifndef ZSTD_NO_FORWARD_PROGRESS_MAX
51 # define ZSTD_NO_FORWARD_PROGRESS_MAX 16
52 #endif
53
43
54
44 /*-*******************************************************
55 /*-*******************************************************
45 * Dependencies
56 * Dependencies
46 *********************************************************/
57 *********************************************************/
47 #include <string.h> /* memcpy, memmove, memset */
58 #include <string.h> /* memcpy, memmove, memset */
48 #include "cpu.h"
59 #include "compiler.h" /* prefetch */
60 #include "cpu.h" /* bmi2 */
49 #include "mem.h" /* low level memory routines */
61 #include "mem.h" /* low level memory routines */
50 #define FSE_STATIC_LINKING_ONLY
62 #define FSE_STATIC_LINKING_ONLY
51 #include "fse.h"
63 #include "fse.h"
@@ -57,6 +69,9 b''
57 # include "zstd_legacy.h"
69 # include "zstd_legacy.h"
58 #endif
70 #endif
59
71
72 static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict);
73 static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict);
74
60
75
61 /*-*************************************
76 /*-*************************************
62 * Errors
77 * Errors
@@ -99,11 +114,10 b' typedef struct {'
99 #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
114 #define SEQSYMBOL_TABLE_SIZE(log) (1 + (1 << (log)))
100
115
101 typedef struct {
116 typedef struct {
102 ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)];
117 ZSTD_seqSymbol LLTable[SEQSYMBOL_TABLE_SIZE(LLFSELog)]; /* Note : Space reserved for FSE Tables */
103 ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)];
118 ZSTD_seqSymbol OFTable[SEQSYMBOL_TABLE_SIZE(OffFSELog)]; /* is also used as temporary workspace while building hufTable during DDict creation */
104 ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)];
119 ZSTD_seqSymbol MLTable[SEQSYMBOL_TABLE_SIZE(MLFSELog)]; /* and therefore must be at least HUF_DECOMPRESS_WORKSPACE_SIZE large */
105 HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
120 HUF_DTable hufTable[HUF_DTABLE_SIZE(HufLog)]; /* can accommodate HUF_decompress4X */
106 U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32];
107 U32 rep[ZSTD_REP_NUM];
121 U32 rep[ZSTD_REP_NUM];
108 } ZSTD_entropyDTables_t;
122 } ZSTD_entropyDTables_t;
109
123
@@ -114,9 +128,10 b' struct ZSTD_DCtx_s'
114 const ZSTD_seqSymbol* OFTptr;
128 const ZSTD_seqSymbol* OFTptr;
115 const HUF_DTable* HUFptr;
129 const HUF_DTable* HUFptr;
116 ZSTD_entropyDTables_t entropy;
130 ZSTD_entropyDTables_t entropy;
131 U32 workspace[HUF_DECOMPRESS_WORKSPACE_SIZE_U32]; /* space needed when building huffman tables */
117 const void* previousDstEnd; /* detect continuity */
132 const void* previousDstEnd; /* detect continuity */
118 const void* base; /* start of current segment */
133 const void* prefixStart; /* start of current segment */
119 const void* vBase; /* virtual start of previous segment if it was just before current one */
134 const void* virtualStart; /* virtual start of previous segment if it was just before current one */
120 const void* dictEnd; /* end of previous segment */
135 const void* dictEnd; /* end of previous segment */
121 size_t expected;
136 size_t expected;
122 ZSTD_frameHeader fParams;
137 ZSTD_frameHeader fParams;
@@ -127,7 +142,6 b' struct ZSTD_DCtx_s'
127 U32 fseEntropy;
142 U32 fseEntropy;
128 XXH64_state_t xxhState;
143 XXH64_state_t xxhState;
129 size_t headerSize;
144 size_t headerSize;
130 U32 dictID;
131 ZSTD_format_e format;
145 ZSTD_format_e format;
132 const BYTE* litPtr;
146 const BYTE* litPtr;
133 ZSTD_customMem customMem;
147 ZSTD_customMem customMem;
@@ -136,9 +150,13 b' struct ZSTD_DCtx_s'
136 size_t staticSize;
150 size_t staticSize;
137 int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
151 int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
138
152
139 /* streaming */
153 /* dictionary */
140 ZSTD_DDict* ddictLocal;
154 ZSTD_DDict* ddictLocal;
141 const ZSTD_DDict* ddict;
155 const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */
156 U32 dictID;
157 int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */
158
159 /* streaming */
142 ZSTD_dStreamStage streamStage;
160 ZSTD_dStreamStage streamStage;
143 char* inBuff;
161 char* inBuff;
144 size_t inBuffSize;
162 size_t inBuffSize;
@@ -153,6 +171,7 b' struct ZSTD_DCtx_s'
153 U32 previousLegacyVersion;
171 U32 previousLegacyVersion;
154 U32 legacyVersion;
172 U32 legacyVersion;
155 U32 hostageByte;
173 U32 hostageByte;
174 int noForwardProgress;
156
175
157 /* workspace */
176 /* workspace */
158 BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
177 BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
@@ -173,7 +192,7 b' size_t ZSTD_estimateDCtxSize(void) { ret'
173 static size_t ZSTD_startingInputLength(ZSTD_format_e format)
192 static size_t ZSTD_startingInputLength(ZSTD_format_e format)
174 {
193 {
175 size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ?
194 size_t const startingInputLength = (format==ZSTD_f_zstd1_magicless) ?
176 ZSTD_frameHeaderSize_prefix - ZSTD_frameIdSize :
195 ZSTD_frameHeaderSize_prefix - ZSTD_FRAMEIDSIZE :
177 ZSTD_frameHeaderSize_prefix;
196 ZSTD_frameHeaderSize_prefix;
178 ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE);
197 ZSTD_STATIC_ASSERT(ZSTD_FRAMEHEADERSIZE_PREFIX >= ZSTD_FRAMEIDSIZE);
179 /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
198 /* only supports formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless */
@@ -188,10 +207,15 b' static void ZSTD_initDCtx_internal(ZSTD_'
188 dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
207 dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT;
189 dctx->ddict = NULL;
208 dctx->ddict = NULL;
190 dctx->ddictLocal = NULL;
209 dctx->ddictLocal = NULL;
210 dctx->dictEnd = NULL;
211 dctx->ddictIsCold = 0;
191 dctx->inBuff = NULL;
212 dctx->inBuff = NULL;
192 dctx->inBuffSize = 0;
213 dctx->inBuffSize = 0;
193 dctx->outBuffSize = 0;
214 dctx->outBuffSize = 0;
194 dctx->streamStage = zdss_init;
215 dctx->streamStage = zdss_init;
216 dctx->legacyContext = NULL;
217 dctx->previousLegacyVersion = 0;
218 dctx->noForwardProgress = 0;
195 dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
219 dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
196 }
220 }
197
221
@@ -215,8 +239,6 b' ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD'
215 { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(*dctx), customMem);
239 { ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_malloc(sizeof(*dctx), customMem);
216 if (!dctx) return NULL;
240 if (!dctx) return NULL;
217 dctx->customMem = customMem;
241 dctx->customMem = customMem;
218 dctx->legacyContext = NULL;
219 dctx->previousLegacyVersion = 0;
220 ZSTD_initDCtx_internal(dctx);
242 ZSTD_initDCtx_internal(dctx);
221 return dctx;
243 return dctx;
222 }
244 }
@@ -265,7 +287,7 b' void ZSTD_copyDCtx(ZSTD_DCtx* dstDCtx, c'
265 * Note 3 : Skippable Frame Identifiers are considered valid. */
287 * Note 3 : Skippable Frame Identifiers are considered valid. */
266 unsigned ZSTD_isFrame(const void* buffer, size_t size)
288 unsigned ZSTD_isFrame(const void* buffer, size_t size)
267 {
289 {
268 if (size < ZSTD_frameIdSize) return 0;
290 if (size < ZSTD_FRAMEIDSIZE) return 0;
269 { U32 const magic = MEM_readLE32(buffer);
291 { U32 const magic = MEM_readLE32(buffer);
270 if (magic == ZSTD_MAGICNUMBER) return 1;
292 if (magic == ZSTD_MAGICNUMBER) return 1;
271 if ((magic & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
293 if ((magic & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
@@ -298,25 +320,28 b' static size_t ZSTD_frameHeaderSize_inter'
298
320
299 /** ZSTD_frameHeaderSize() :
321 /** ZSTD_frameHeaderSize() :
300 * srcSize must be >= ZSTD_frameHeaderSize_prefix.
322 * srcSize must be >= ZSTD_frameHeaderSize_prefix.
301 * @return : size of the Frame Header */
323 * @return : size of the Frame Header,
324 * or an error code (if srcSize is too small) */
302 size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
325 size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize)
303 {
326 {
304 return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1);
327 return ZSTD_frameHeaderSize_internal(src, srcSize, ZSTD_f_zstd1);
305 }
328 }
306
329
307
330
308 /** ZSTD_getFrameHeader_internal() :
331 /** ZSTD_getFrameHeader_advanced() :
309 * decode Frame Header, or require larger `srcSize`.
332 * decode Frame Header, or require larger `srcSize`.
310 * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
333 * note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless
311 * @return : 0, `zfhPtr` is correctly filled,
334 * @return : 0, `zfhPtr` is correctly filled,
312 * >0, `srcSize` is too small, value is wanted `srcSize` amount,
335 * >0, `srcSize` is too small, value is wanted `srcSize` amount,
313 * or an error code, which can be tested using ZSTD_isError() */
336 * or an error code, which can be tested using ZSTD_isError() */
314 static size_t ZSTD_getFrameHeader_internal(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
337 size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format)
315 {
338 {
316 const BYTE* ip = (const BYTE*)src;
339 const BYTE* ip = (const BYTE*)src;
317 size_t const minInputSize = ZSTD_startingInputLength(format);
340 size_t const minInputSize = ZSTD_startingInputLength(format);
318
341
342 memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */
319 if (srcSize < minInputSize) return minInputSize;
343 if (srcSize < minInputSize) return minInputSize;
344 if (src==NULL) return ERROR(GENERIC); /* invalid parameter */
320
345
321 if ( (format != ZSTD_f_zstd1_magicless)
346 if ( (format != ZSTD_f_zstd1_magicless)
322 && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
347 && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) {
@@ -325,7 +350,7 b' static size_t ZSTD_getFrameHeader_intern'
325 if (srcSize < ZSTD_skippableHeaderSize)
350 if (srcSize < ZSTD_skippableHeaderSize)
326 return ZSTD_skippableHeaderSize; /* magic number + frame length */
351 return ZSTD_skippableHeaderSize; /* magic number + frame length */
327 memset(zfhPtr, 0, sizeof(*zfhPtr));
352 memset(zfhPtr, 0, sizeof(*zfhPtr));
328 zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_frameIdSize);
353 zfhPtr->frameContentSize = MEM_readLE32((const char *)src + ZSTD_FRAMEIDSIZE);
329 zfhPtr->frameType = ZSTD_skippableFrame;
354 zfhPtr->frameType = ZSTD_skippableFrame;
330 return 0;
355 return 0;
331 }
356 }
@@ -394,7 +419,7 b' static size_t ZSTD_getFrameHeader_intern'
394 * or an error code, which can be tested using ZSTD_isError() */
419 * or an error code, which can be tested using ZSTD_isError() */
395 size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize)
420 size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize)
396 {
421 {
397 return ZSTD_getFrameHeader_internal(zfhPtr, src, srcSize, ZSTD_f_zstd1);
422 return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1);
398 }
423 }
399
424
400
425
@@ -437,7 +462,7 b' unsigned long long ZSTD_findDecompressed'
437 size_t skippableSize;
462 size_t skippableSize;
438 if (srcSize < ZSTD_skippableHeaderSize)
463 if (srcSize < ZSTD_skippableHeaderSize)
439 return ERROR(srcSize_wrong);
464 return ERROR(srcSize_wrong);
440 skippableSize = MEM_readLE32((const BYTE *)src + ZSTD_frameIdSize)
465 skippableSize = MEM_readLE32((const BYTE *)src + ZSTD_FRAMEIDSIZE)
441 + ZSTD_skippableHeaderSize;
466 + ZSTD_skippableHeaderSize;
442 if (srcSize < skippableSize) {
467 if (srcSize < skippableSize) {
443 return ZSTD_CONTENTSIZE_ERROR;
468 return ZSTD_CONTENTSIZE_ERROR;
@@ -491,7 +516,7 b' unsigned long long ZSTD_getDecompressedS'
491 * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
516 * @return : 0 if success, or an error code, which can be tested using ZSTD_isError() */
492 static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
517 static size_t ZSTD_decodeFrameHeader(ZSTD_DCtx* dctx, const void* src, size_t headerSize)
493 {
518 {
494 size_t const result = ZSTD_getFrameHeader_internal(&(dctx->fParams), src, headerSize, dctx->format);
519 size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format);
495 if (ZSTD_isError(result)) return result; /* invalid header */
520 if (ZSTD_isError(result)) return result; /* invalid header */
496 if (result>0) return ERROR(srcSize_wrong); /* headerSize too small */
521 if (result>0) return ERROR(srcSize_wrong); /* headerSize too small */
497 if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID))
522 if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID))
@@ -526,6 +551,7 b' size_t ZSTD_getcBlockSize(const void* sr'
526 static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
551 static size_t ZSTD_copyRawBlock(void* dst, size_t dstCapacity,
527 const void* src, size_t srcSize)
552 const void* src, size_t srcSize)
528 {
553 {
554 if (dst==NULL) return ERROR(dstSize_tooSmall);
529 if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall);
555 if (srcSize > dstCapacity) return ERROR(dstSize_tooSmall);
530 memcpy(dst, src, srcSize);
556 memcpy(dst, src, srcSize);
531 return srcSize;
557 return srcSize;
@@ -542,6 +568,9 b' static size_t ZSTD_setRleBlock(void* dst'
542 return regenSize;
568 return regenSize;
543 }
569 }
544
570
571 /* Hidden declaration for fullbench */
572 size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
573 const void* src, size_t srcSize);
545 /*! ZSTD_decodeLiteralsBlock() :
574 /*! ZSTD_decodeLiteralsBlock() :
546 * @return : nb of bytes read from src (< srcSize )
575 * @return : nb of bytes read from src (< srcSize )
547 * note : symbol not declared but exposed for fullbench */
576 * note : symbol not declared but exposed for fullbench */
@@ -558,6 +587,7 b' size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt'
558 case set_repeat:
587 case set_repeat:
559 if (dctx->litEntropy==0) return ERROR(dictionary_corrupted);
588 if (dctx->litEntropy==0) return ERROR(dictionary_corrupted);
560 /* fall-through */
589 /* fall-through */
590
561 case set_compressed:
591 case set_compressed:
562 if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
592 if (srcSize < 5) return ERROR(corruption_detected); /* srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3 */
563 { size_t lhSize, litSize, litCSize;
593 { size_t lhSize, litSize, litCSize;
@@ -589,15 +619,20 b' size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt'
589 if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
619 if (litSize > ZSTD_BLOCKSIZE_MAX) return ERROR(corruption_detected);
590 if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
620 if (litCSize + lhSize > srcSize) return ERROR(corruption_detected);
591
621
622 /* prefetch huffman table if cold */
623 if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
624 PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
625 }
626
592 if (HUF_isError((litEncType==set_repeat) ?
627 if (HUF_isError((litEncType==set_repeat) ?
593 ( singleStream ?
628 ( singleStream ?
594 HUF_decompress1X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) :
629 HUF_decompress1X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) :
595 HUF_decompress4X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) ) :
630 HUF_decompress4X_usingDTable_bmi2(dctx->litBuffer, litSize, istart+lhSize, litCSize, dctx->HUFptr, dctx->bmi2) ) :
596 ( singleStream ?
631 ( singleStream ?
597 HUF_decompress1X2_DCtx_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
632 HUF_decompress1X1_DCtx_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
598 dctx->entropy.workspace, sizeof(dctx->entropy.workspace), dctx->bmi2) :
633 dctx->workspace, sizeof(dctx->workspace), dctx->bmi2) :
599 HUF_decompress4X_hufOnly_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
634 HUF_decompress4X_hufOnly_wksp_bmi2(dctx->entropy.hufTable, dctx->litBuffer, litSize, istart+lhSize, litCSize,
600 dctx->entropy.workspace, sizeof(dctx->entropy.workspace), dctx->bmi2))))
635 dctx->workspace, sizeof(dctx->workspace), dctx->bmi2))))
601 return ERROR(corruption_detected);
636 return ERROR(corruption_detected);
602
637
603 dctx->litPtr = dctx->litBuffer;
638 dctx->litPtr = dctx->litBuffer;
@@ -869,7 +904,8 b' static size_t ZSTD_buildSeqTable(ZSTD_se'
869 symbolEncodingType_e type, U32 max, U32 maxLog,
904 symbolEncodingType_e type, U32 max, U32 maxLog,
870 const void* src, size_t srcSize,
905 const void* src, size_t srcSize,
871 const U32* baseValue, const U32* nbAdditionalBits,
906 const U32* baseValue, const U32* nbAdditionalBits,
872 const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable)
907 const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
908 int ddictIsCold, int nbSeq)
873 {
909 {
874 switch(type)
910 switch(type)
875 {
911 {
@@ -888,6 +924,12 b' static size_t ZSTD_buildSeqTable(ZSTD_se'
888 return 0;
924 return 0;
889 case set_repeat:
925 case set_repeat:
890 if (!flagRepeatTable) return ERROR(corruption_detected);
926 if (!flagRepeatTable) return ERROR(corruption_detected);
927 /* prefetch FSE table if used */
928 if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
929 const void* const pStart = *DTablePtr;
930 size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
931 PREFETCH_AREA(pStart, pSize);
932 }
891 return 0;
933 return 0;
892 case set_compressed :
934 case set_compressed :
893 { U32 tableLog;
935 { U32 tableLog;
@@ -933,6 +975,9 b' static const U32 ML_base[MaxML+1] = {'
933 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
975 67, 83, 99, 0x83, 0x103, 0x203, 0x403, 0x803,
934 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
976 0x1003, 0x2003, 0x4003, 0x8003, 0x10003 };
935
977
978 /* Hidden delcaration for fullbench */
979 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
980 const void* src, size_t srcSize);
936
981
937 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
982 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
938 const void* src, size_t srcSize)
983 const void* src, size_t srcSize)
@@ -940,25 +985,25 b' size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* '
940 const BYTE* const istart = (const BYTE* const)src;
985 const BYTE* const istart = (const BYTE* const)src;
941 const BYTE* const iend = istart + srcSize;
986 const BYTE* const iend = istart + srcSize;
942 const BYTE* ip = istart;
987 const BYTE* ip = istart;
988 int nbSeq;
943 DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
989 DEBUGLOG(5, "ZSTD_decodeSeqHeaders");
944
990
945 /* check */
991 /* check */
946 if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
992 if (srcSize < MIN_SEQUENCES_SIZE) return ERROR(srcSize_wrong);
947
993
948 /* SeqHead */
994 /* SeqHead */
949 { int nbSeq = *ip++;
995 nbSeq = *ip++;
950 if (!nbSeq) { *nbSeqPtr=0; return 1; }
996 if (!nbSeq) { *nbSeqPtr=0; return 1; }
951 if (nbSeq > 0x7F) {
997 if (nbSeq > 0x7F) {
952 if (nbSeq == 0xFF) {
998 if (nbSeq == 0xFF) {
953 if (ip+2 > iend) return ERROR(srcSize_wrong);
999 if (ip+2 > iend) return ERROR(srcSize_wrong);
954 nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
1000 nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
955 } else {
1001 } else {
956 if (ip >= iend) return ERROR(srcSize_wrong);
1002 if (ip >= iend) return ERROR(srcSize_wrong);
957 nbSeq = ((nbSeq-0x80)<<8) + *ip++;
1003 nbSeq = ((nbSeq-0x80)<<8) + *ip++;
958 }
959 }
1004 }
960 *nbSeqPtr = nbSeq;
961 }
1005 }
1006 *nbSeqPtr = nbSeq;
962
1007
963 /* FSE table descriptors */
1008 /* FSE table descriptors */
964 if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */
1009 if (ip+4 > iend) return ERROR(srcSize_wrong); /* minimum possible size */
@@ -972,7 +1017,8 b' size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* '
972 LLtype, MaxLL, LLFSELog,
1017 LLtype, MaxLL, LLFSELog,
973 ip, iend-ip,
1018 ip, iend-ip,
974 LL_base, LL_bits,
1019 LL_base, LL_bits,
975 LL_defaultDTable, dctx->fseEntropy);
1020 LL_defaultDTable, dctx->fseEntropy,
1021 dctx->ddictIsCold, nbSeq);
976 if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
1022 if (ZSTD_isError(llhSize)) return ERROR(corruption_detected);
977 ip += llhSize;
1023 ip += llhSize;
978 }
1024 }
@@ -981,7 +1027,8 b' size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* '
981 OFtype, MaxOff, OffFSELog,
1027 OFtype, MaxOff, OffFSELog,
982 ip, iend-ip,
1028 ip, iend-ip,
983 OF_base, OF_bits,
1029 OF_base, OF_bits,
984 OF_defaultDTable, dctx->fseEntropy);
1030 OF_defaultDTable, dctx->fseEntropy,
1031 dctx->ddictIsCold, nbSeq);
985 if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
1032 if (ZSTD_isError(ofhSize)) return ERROR(corruption_detected);
986 ip += ofhSize;
1033 ip += ofhSize;
987 }
1034 }
@@ -990,12 +1037,23 b' size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* '
990 MLtype, MaxML, MLFSELog,
1037 MLtype, MaxML, MLFSELog,
991 ip, iend-ip,
1038 ip, iend-ip,
992 ML_base, ML_bits,
1039 ML_base, ML_bits,
993 ML_defaultDTable, dctx->fseEntropy);
1040 ML_defaultDTable, dctx->fseEntropy,
1041 dctx->ddictIsCold, nbSeq);
994 if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
1042 if (ZSTD_isError(mlhSize)) return ERROR(corruption_detected);
995 ip += mlhSize;
1043 ip += mlhSize;
996 }
1044 }
997 }
1045 }
998
1046
1047 /* prefetch dictionary content */
1048 if (dctx->ddictIsCold) {
1049 size_t const dictSize = (const char*)dctx->prefixStart - (const char*)dctx->virtualStart;
1050 size_t const psmin = MIN(dictSize, (size_t)(64*nbSeq) /* heuristic */ );
1051 size_t const pSize = MIN(psmin, 128 KB /* protection */ );
1052 const void* const pStart = (const char*)dctx->dictEnd - pSize;
1053 PREFETCH_AREA(pStart, pSize);
1054 dctx->ddictIsCold = 0;
1055 }
1056
999 return ip-istart;
1057 return ip-istart;
1000 }
1058 }
1001
1059
@@ -1075,7 +1133,7 b' HINT_INLINE'
1075 size_t ZSTD_execSequence(BYTE* op,
1133 size_t ZSTD_execSequence(BYTE* op,
1076 BYTE* const oend, seq_t sequence,
1134 BYTE* const oend, seq_t sequence,
1077 const BYTE** litPtr, const BYTE* const litLimit,
1135 const BYTE** litPtr, const BYTE* const litLimit,
1078 const BYTE* const base, const BYTE* const vBase, const BYTE* const dictEnd)
1136 const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
1079 {
1137 {
1080 BYTE* const oLitEnd = op + sequence.litLength;
1138 BYTE* const oLitEnd = op + sequence.litLength;
1081 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
1139 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
@@ -1087,7 +1145,7 b' size_t ZSTD_execSequence(BYTE* op,'
1087 /* check */
1145 /* check */
1088 if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
1146 if (oMatchEnd>oend) return ERROR(dstSize_tooSmall); /* last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend */
1089 if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
1147 if (iLitEnd > litLimit) return ERROR(corruption_detected); /* over-read beyond lit buffer */
1090 if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, base, vBase, dictEnd);
1148 if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
1091
1149
1092 /* copy Literals */
1150 /* copy Literals */
1093 ZSTD_copy8(op, *litPtr);
1151 ZSTD_copy8(op, *litPtr);
@@ -1097,11 +1155,11 b' size_t ZSTD_execSequence(BYTE* op,'
1097 *litPtr = iLitEnd; /* update for next sequence */
1155 *litPtr = iLitEnd; /* update for next sequence */
1098
1156
1099 /* copy Match */
1157 /* copy Match */
1100 if (sequence.offset > (size_t)(oLitEnd - base)) {
1158 if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
1101 /* offset beyond prefix -> go into extDict */
1159 /* offset beyond prefix -> go into extDict */
1102 if (sequence.offset > (size_t)(oLitEnd - vBase))
1160 if (sequence.offset > (size_t)(oLitEnd - virtualStart))
1103 return ERROR(corruption_detected);
1161 return ERROR(corruption_detected);
1104 match = dictEnd + (match - base);
1162 match = dictEnd + (match - prefixStart);
1105 if (match + sequence.matchLength <= dictEnd) {
1163 if (match + sequence.matchLength <= dictEnd) {
1106 memmove(oLitEnd, match, sequence.matchLength);
1164 memmove(oLitEnd, match, sequence.matchLength);
1107 return sequenceLength;
1165 return sequenceLength;
@@ -1111,7 +1169,7 b' size_t ZSTD_execSequence(BYTE* op,'
1111 memmove(oLitEnd, match, length1);
1169 memmove(oLitEnd, match, length1);
1112 op = oLitEnd + length1;
1170 op = oLitEnd + length1;
1113 sequence.matchLength -= length1;
1171 sequence.matchLength -= length1;
1114 match = base;
1172 match = prefixStart;
1115 if (op > oend_w || sequence.matchLength < MINMATCH) {
1173 if (op > oend_w || sequence.matchLength < MINMATCH) {
1116 U32 i;
1174 U32 i;
1117 for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
1175 for (i = 0; i < sequence.matchLength; ++i) op[i] = match[i];
@@ -1354,10 +1412,10 b' ZSTD_decompressSequences_body( ZSTD_DCtx'
1354 BYTE* op = ostart;
1412 BYTE* op = ostart;
1355 const BYTE* litPtr = dctx->litPtr;
1413 const BYTE* litPtr = dctx->litPtr;
1356 const BYTE* const litEnd = litPtr + dctx->litSize;
1414 const BYTE* const litEnd = litPtr + dctx->litSize;
1357 const BYTE* const base = (const BYTE*) (dctx->base);
1415 const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1358 const BYTE* const vBase = (const BYTE*) (dctx->vBase);
1416 const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
1359 const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1417 const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1360 DEBUGLOG(5, "ZSTD_decompressSequences");
1418 DEBUGLOG(5, "ZSTD_decompressSequences_body");
1361
1419
1362 /* Regen sequences */
1420 /* Regen sequences */
1363 if (nbSeq) {
1421 if (nbSeq) {
@@ -1372,14 +1430,14 b' ZSTD_decompressSequences_body( ZSTD_DCtx'
1372 for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
1430 for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) {
1373 nbSeq--;
1431 nbSeq--;
1374 { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1432 { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
1375 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, base, vBase, dictEnd);
1433 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
1376 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1434 DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
1377 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1435 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1378 op += oneSeqSize;
1436 op += oneSeqSize;
1379 } }
1437 } }
1380
1438
1381 /* check if reached exact end */
1439 /* check if reached exact end */
1382 DEBUGLOG(5, "ZSTD_decompressSequences: after decode loop, remaining nbSeq : %i", nbSeq);
1440 DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
1383 if (nbSeq) return ERROR(corruption_detected);
1441 if (nbSeq) return ERROR(corruption_detected);
1384 /* save reps for next block */
1442 /* save reps for next block */
1385 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1443 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
@@ -1498,8 +1556,8 b' ZSTD_decompressSequencesLong_body('
1498 BYTE* op = ostart;
1556 BYTE* op = ostart;
1499 const BYTE* litPtr = dctx->litPtr;
1557 const BYTE* litPtr = dctx->litPtr;
1500 const BYTE* const litEnd = litPtr + dctx->litSize;
1558 const BYTE* const litEnd = litPtr + dctx->litSize;
1501 const BYTE* const prefixStart = (const BYTE*) (dctx->base);
1559 const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1502 const BYTE* const dictStart = (const BYTE*) (dctx->vBase);
1560 const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
1503 const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1561 const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1504
1562
1505 /* Regen sequences */
1563 /* Regen sequences */
@@ -1662,7 +1720,8 b' static size_t ZSTD_decompressBlock_inter'
1662 /* isLongOffset must be true if there are long offsets.
1720 /* isLongOffset must be true if there are long offsets.
1663 * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
1721 * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
1664 * We don't expect that to be the case in 64-bit mode.
1722 * We don't expect that to be the case in 64-bit mode.
1665 * In block mode, window size is not known, so we have to be conservative. (note: but it could be evaluated from current-lowLimit)
1723 * In block mode, window size is not known, so we have to be conservative.
1724 * (note: but it could be evaluated from current-lowLimit)
1666 */
1725 */
1667 ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)));
1726 ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)));
1668 DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
1727 DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize);
@@ -1701,8 +1760,8 b' static void ZSTD_checkContinuity(ZSTD_DC'
1701 {
1760 {
1702 if (dst != dctx->previousDstEnd) { /* not contiguous */
1761 if (dst != dctx->previousDstEnd) { /* not contiguous */
1703 dctx->dictEnd = dctx->previousDstEnd;
1762 dctx->dictEnd = dctx->previousDstEnd;
1704 dctx->vBase = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
1763 dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
1705 dctx->base = dst;
1764 dctx->prefixStart = dst;
1706 dctx->previousDstEnd = dst;
1765 dctx->previousDstEnd = dst;
1707 }
1766 }
1708 }
1767 }
@@ -1729,10 +1788,10 b' ZSTDLIB_API size_t ZSTD_insertBlock(ZSTD'
1729 }
1788 }
1730
1789
1731
1790
1732 static size_t ZSTD_generateNxBytes(void* dst, size_t dstCapacity, BYTE byte, size_t length)
1791 static size_t ZSTD_generateNxBytes(void* dst, size_t dstCapacity, BYTE value, size_t length)
1733 {
1792 {
1734 if (length > dstCapacity) return ERROR(dstSize_tooSmall);
1793 if (length > dstCapacity) return ERROR(dstSize_tooSmall);
1735 memset(dst, byte, length);
1794 memset(dst, value, length);
1736 return length;
1795 return length;
1737 }
1796 }
1738
1797
@@ -1749,7 +1808,7 b' size_t ZSTD_findFrameCompressedSize(cons'
1749 #endif
1808 #endif
1750 if ( (srcSize >= ZSTD_skippableHeaderSize)
1809 if ( (srcSize >= ZSTD_skippableHeaderSize)
1751 && (MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START ) {
1810 && (MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START ) {
1752 return ZSTD_skippableHeaderSize + MEM_readLE32((const BYTE*)src + ZSTD_frameIdSize);
1811 return ZSTD_skippableHeaderSize + MEM_readLE32((const BYTE*)src + ZSTD_FRAMEIDSIZE);
1753 } else {
1812 } else {
1754 const BYTE* ip = (const BYTE*)src;
1813 const BYTE* ip = (const BYTE*)src;
1755 const BYTE* const ipstart = ip;
1814 const BYTE* const ipstart = ip;
@@ -1783,7 +1842,6 b' size_t ZSTD_findFrameCompressedSize(cons'
1783 if (zfh.checksumFlag) { /* Final frame content checksum */
1842 if (zfh.checksumFlag) { /* Final frame content checksum */
1784 if (remainingSize < 4) return ERROR(srcSize_wrong);
1843 if (remainingSize < 4) return ERROR(srcSize_wrong);
1785 ip += 4;
1844 ip += 4;
1786 remainingSize -= 4;
1787 }
1845 }
1788
1846
1789 return ip - ipstart;
1847 return ip - ipstart;
@@ -1871,9 +1929,6 b' static size_t ZSTD_decompressFrame(ZSTD_'
1871 return op-ostart;
1929 return op-ostart;
1872 }
1930 }
1873
1931
1874 static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict);
1875 static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict);
1876
1877 static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
1932 static size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
1878 void* dst, size_t dstCapacity,
1933 void* dst, size_t dstCapacity,
1879 const void* src, size_t srcSize,
1934 const void* src, size_t srcSize,
@@ -1881,6 +1936,9 b' static size_t ZSTD_decompressMultiFrame('
1881 const ZSTD_DDict* ddict)
1936 const ZSTD_DDict* ddict)
1882 {
1937 {
1883 void* const dststart = dst;
1938 void* const dststart = dst;
1939 int moreThan1Frame = 0;
1940
1941 DEBUGLOG(5, "ZSTD_decompressMultiFrame");
1884 assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */
1942 assert(dict==NULL || ddict==NULL); /* either dict or ddict set, not both */
1885
1943
1886 if (ddict) {
1944 if (ddict) {
@@ -1889,7 +1947,6 b' static size_t ZSTD_decompressMultiFrame('
1889 }
1947 }
1890
1948
1891 while (srcSize >= ZSTD_frameHeaderSize_prefix) {
1949 while (srcSize >= ZSTD_frameHeaderSize_prefix) {
1892 U32 magicNumber;
1893
1950
1894 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
1951 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
1895 if (ZSTD_isLegacy(src, srcSize)) {
1952 if (ZSTD_isLegacy(src, srcSize)) {
@@ -1911,24 +1968,21 b' static size_t ZSTD_decompressMultiFrame('
1911 }
1968 }
1912 #endif
1969 #endif
1913
1970
1914 magicNumber = MEM_readLE32(src);
1971 { U32 const magicNumber = MEM_readLE32(src);
1915 DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
1972 DEBUGLOG(4, "reading magic number %08X (expecting %08X)",
1916 (U32)magicNumber, (U32)ZSTD_MAGICNUMBER);
1973 (U32)magicNumber, (U32)ZSTD_MAGICNUMBER);
1917 if (magicNumber != ZSTD_MAGICNUMBER) {
1918 if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
1974 if ((magicNumber & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) {
1919 size_t skippableSize;
1975 size_t skippableSize;
1920 if (srcSize < ZSTD_skippableHeaderSize)
1976 if (srcSize < ZSTD_skippableHeaderSize)
1921 return ERROR(srcSize_wrong);
1977 return ERROR(srcSize_wrong);
1922 skippableSize = MEM_readLE32((const BYTE*)src + ZSTD_frameIdSize)
1978 skippableSize = MEM_readLE32((const BYTE*)src + ZSTD_FRAMEIDSIZE)
1923 + ZSTD_skippableHeaderSize;
1979 + ZSTD_skippableHeaderSize;
1924 if (srcSize < skippableSize) return ERROR(srcSize_wrong);
1980 if (srcSize < skippableSize) return ERROR(srcSize_wrong);
1925
1981
1926 src = (const BYTE *)src + skippableSize;
1982 src = (const BYTE *)src + skippableSize;
1927 srcSize -= skippableSize;
1983 srcSize -= skippableSize;
1928 continue;
1984 continue;
1929 }
1985 } }
1930 return ERROR(prefix_unknown);
1931 }
1932
1986
1933 if (ddict) {
1987 if (ddict) {
1934 /* we were called from ZSTD_decompress_usingDDict */
1988 /* we were called from ZSTD_decompress_usingDDict */
@@ -1942,11 +1996,25 b' static size_t ZSTD_decompressMultiFrame('
1942
1996
1943 { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
1997 { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity,
1944 &src, &srcSize);
1998 &src, &srcSize);
1999 if ( (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown)
2000 && (moreThan1Frame==1) ) {
2001 /* at least one frame successfully completed,
2002 * but following bytes are garbage :
2003 * it's more likely to be a srcSize error,
2004 * specifying more bytes than compressed size of frame(s).
2005 * This error message replaces ERROR(prefix_unknown),
2006 * which would be confusing, as the first header is actually correct.
2007 * Note that one could be unlucky, it might be a corruption error instead,
2008 * happening right at the place where we expect zstd magic bytes.
2009 * But this is _much_ less likely than a srcSize field error. */
2010 return ERROR(srcSize_wrong);
2011 }
1945 if (ZSTD_isError(res)) return res;
2012 if (ZSTD_isError(res)) return res;
1946 /* no need to bound check, ZSTD_decompressFrame already has */
2013 /* no need to bound check, ZSTD_decompressFrame already has */
1947 dst = (BYTE*)dst + res;
2014 dst = (BYTE*)dst + res;
1948 dstCapacity -= res;
2015 dstCapacity -= res;
1949 }
2016 }
2017 moreThan1Frame = 1;
1950 } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
2018 } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */
1951
2019
1952 if (srcSize) return ERROR(srcSize_wrong); /* input not entirely consumed */
2020 if (srcSize) return ERROR(srcSize_wrong); /* input not entirely consumed */
@@ -1980,6 +2048,7 b' size_t ZSTD_decompress(void* dst, size_t'
1980 return regenSize;
2048 return regenSize;
1981 #else /* stack mode */
2049 #else /* stack mode */
1982 ZSTD_DCtx dctx;
2050 ZSTD_DCtx dctx;
2051 ZSTD_initDCtx_internal(&dctx);
1983 return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize);
2052 return ZSTD_decompressDCtx(&dctx, dst, dstCapacity, src, srcSize);
1984 #endif
2053 #endif
1985 }
2054 }
@@ -2031,7 +2100,7 b' size_t ZSTD_decompressContinue(ZSTD_DCtx'
2031 case ZSTDds_getFrameHeaderSize :
2100 case ZSTDds_getFrameHeaderSize :
2032 assert(src != NULL);
2101 assert(src != NULL);
2033 if (dctx->format == ZSTD_f_zstd1) { /* allows header */
2102 if (dctx->format == ZSTD_f_zstd1) { /* allows header */
2034 assert(srcSize >= ZSTD_frameIdSize); /* to read skippable magic number */
2103 assert(srcSize >= ZSTD_FRAMEIDSIZE); /* to read skippable magic number */
2035 if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
2104 if ((MEM_readLE32(src) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
2036 memcpy(dctx->headerBuffer, src, srcSize);
2105 memcpy(dctx->headerBuffer, src, srcSize);
2037 dctx->expected = ZSTD_skippableHeaderSize - srcSize; /* remaining to load to get full skippable frame header */
2106 dctx->expected = ZSTD_skippableHeaderSize - srcSize; /* remaining to load to get full skippable frame header */
@@ -2141,7 +2210,7 b' size_t ZSTD_decompressContinue(ZSTD_DCtx'
2141 assert(src != NULL);
2210 assert(src != NULL);
2142 assert(srcSize <= ZSTD_skippableHeaderSize);
2211 assert(srcSize <= ZSTD_skippableHeaderSize);
2143 memcpy(dctx->headerBuffer + (ZSTD_skippableHeaderSize - srcSize), src, srcSize); /* complete skippable header */
2212 memcpy(dctx->headerBuffer + (ZSTD_skippableHeaderSize - srcSize), src, srcSize); /* complete skippable header */
2144 dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_frameIdSize); /* note : dctx->expected can grow seriously large, beyond local buffer size */
2213 dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */
2145 dctx->stage = ZSTDds_skipFrame;
2214 dctx->stage = ZSTDds_skipFrame;
2146 return 0;
2215 return 0;
2147
2216
@@ -2159,27 +2228,33 b' size_t ZSTD_decompressContinue(ZSTD_DCtx'
2159 static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
2228 static size_t ZSTD_refDictContent(ZSTD_DCtx* dctx, const void* dict, size_t dictSize)
2160 {
2229 {
2161 dctx->dictEnd = dctx->previousDstEnd;
2230 dctx->dictEnd = dctx->previousDstEnd;
2162 dctx->vBase = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->base));
2231 dctx->virtualStart = (const char*)dict - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
2163 dctx->base = dict;
2232 dctx->prefixStart = dict;
2164 dctx->previousDstEnd = (const char*)dict + dictSize;
2233 dctx->previousDstEnd = (const char*)dict + dictSize;
2165 return 0;
2234 return 0;
2166 }
2235 }
2167
2236
2168 /* ZSTD_loadEntropy() :
2237 /*! ZSTD_loadEntropy() :
2169 * dict : must point at beginning of a valid zstd dictionary
2238 * dict : must point at beginning of a valid zstd dictionary.
2170 * @return : size of entropy tables read */
2239 * @return : size of entropy tables read */
2171 static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy, const void* const dict, size_t const dictSize)
2240 static size_t ZSTD_loadEntropy(ZSTD_entropyDTables_t* entropy,
2241 const void* const dict, size_t const dictSize)
2172 {
2242 {
2173 const BYTE* dictPtr = (const BYTE*)dict;
2243 const BYTE* dictPtr = (const BYTE*)dict;
2174 const BYTE* const dictEnd = dictPtr + dictSize;
2244 const BYTE* const dictEnd = dictPtr + dictSize;
2175
2245
2176 if (dictSize <= 8) return ERROR(dictionary_corrupted);
2246 if (dictSize <= 8) return ERROR(dictionary_corrupted);
2247 assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */
2177 dictPtr += 8; /* skip header = magic + dictID */
2248 dictPtr += 8; /* skip header = magic + dictID */
2178
2249
2179
2250 ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, OFTable) == offsetof(ZSTD_entropyDTables_t, LLTable) + sizeof(entropy->LLTable));
2180 { size_t const hSize = HUF_readDTableX4_wksp(
2251 ZSTD_STATIC_ASSERT(offsetof(ZSTD_entropyDTables_t, MLTable) == offsetof(ZSTD_entropyDTables_t, OFTable) + sizeof(entropy->OFTable));
2181 entropy->hufTable, dictPtr, dictEnd - dictPtr,
2252 ZSTD_STATIC_ASSERT(sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable) >= HUF_DECOMPRESS_WORKSPACE_SIZE);
2182 entropy->workspace, sizeof(entropy->workspace));
2253 { void* const workspace = &entropy->LLTable; /* use fse tables as temporary workspace; implies fse tables are grouped together */
2254 size_t const workspaceSize = sizeof(entropy->LLTable) + sizeof(entropy->OFTable) + sizeof(entropy->MLTable);
2255 size_t const hSize = HUF_readDTableX2_wksp(entropy->hufTable,
2256 dictPtr, dictEnd - dictPtr,
2257 workspace, workspaceSize);
2183 if (HUF_isError(hSize)) return ERROR(dictionary_corrupted);
2258 if (HUF_isError(hSize)) return ERROR(dictionary_corrupted);
2184 dictPtr += hSize;
2259 dictPtr += hSize;
2185 }
2260 }
@@ -2190,7 +2265,7 b' static size_t ZSTD_loadEntropy(ZSTD_entr'
2190 if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
2265 if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
2191 if (offcodeMaxValue > MaxOff) return ERROR(dictionary_corrupted);
2266 if (offcodeMaxValue > MaxOff) return ERROR(dictionary_corrupted);
2192 if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
2267 if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
2193 ZSTD_buildFSETable(entropy->OFTable,
2268 ZSTD_buildFSETable( entropy->OFTable,
2194 offcodeNCount, offcodeMaxValue,
2269 offcodeNCount, offcodeMaxValue,
2195 OF_base, OF_bits,
2270 OF_base, OF_bits,
2196 offcodeLog);
2271 offcodeLog);
@@ -2203,7 +2278,7 b' static size_t ZSTD_loadEntropy(ZSTD_entr'
2203 if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
2278 if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
2204 if (matchlengthMaxValue > MaxML) return ERROR(dictionary_corrupted);
2279 if (matchlengthMaxValue > MaxML) return ERROR(dictionary_corrupted);
2205 if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
2280 if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
2206 ZSTD_buildFSETable(entropy->MLTable,
2281 ZSTD_buildFSETable( entropy->MLTable,
2207 matchlengthNCount, matchlengthMaxValue,
2282 matchlengthNCount, matchlengthMaxValue,
2208 ML_base, ML_bits,
2283 ML_base, ML_bits,
2209 matchlengthLog);
2284 matchlengthLog);
@@ -2216,7 +2291,7 b' static size_t ZSTD_loadEntropy(ZSTD_entr'
2216 if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
2291 if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
2217 if (litlengthMaxValue > MaxLL) return ERROR(dictionary_corrupted);
2292 if (litlengthMaxValue > MaxLL) return ERROR(dictionary_corrupted);
2218 if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
2293 if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
2219 ZSTD_buildFSETable(entropy->LLTable,
2294 ZSTD_buildFSETable( entropy->LLTable,
2220 litlengthNCount, litlengthMaxValue,
2295 litlengthNCount, litlengthMaxValue,
2221 LL_base, LL_bits,
2296 LL_base, LL_bits,
2222 litlengthLog);
2297 litlengthLog);
@@ -2242,7 +2317,7 b' static size_t ZSTD_decompress_insertDict'
2242 if (magic != ZSTD_MAGIC_DICTIONARY) {
2317 if (magic != ZSTD_MAGIC_DICTIONARY) {
2243 return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */
2318 return ZSTD_refDictContent(dctx, dict, dictSize); /* pure content mode */
2244 } }
2319 } }
2245 dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_frameIdSize);
2320 dctx->dictID = MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
2246
2321
2247 /* load entropy tables */
2322 /* load entropy tables */
2248 { size_t const eSize = ZSTD_loadEntropy(&dctx->entropy, dict, dictSize);
2323 { size_t const eSize = ZSTD_loadEntropy(&dctx->entropy, dict, dictSize);
@@ -2256,7 +2331,6 b' static size_t ZSTD_decompress_insertDict'
2256 return ZSTD_refDictContent(dctx, dict, dictSize);
2331 return ZSTD_refDictContent(dctx, dict, dictSize);
2257 }
2332 }
2258
2333
2259 /* Note : this function cannot fail */
2260 size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
2334 size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
2261 {
2335 {
2262 assert(dctx != NULL);
2336 assert(dctx != NULL);
@@ -2264,8 +2338,8 b' size_t ZSTD_decompressBegin(ZSTD_DCtx* d'
2264 dctx->stage = ZSTDds_getFrameHeaderSize;
2338 dctx->stage = ZSTDds_getFrameHeaderSize;
2265 dctx->decodedSize = 0;
2339 dctx->decodedSize = 0;
2266 dctx->previousDstEnd = NULL;
2340 dctx->previousDstEnd = NULL;
2267 dctx->base = NULL;
2341 dctx->prefixStart = NULL;
2268 dctx->vBase = NULL;
2342 dctx->virtualStart = NULL;
2269 dctx->dictEnd = NULL;
2343 dctx->dictEnd = NULL;
2270 dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
2344 dctx->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */
2271 dctx->litEntropy = dctx->fseEntropy = 0;
2345 dctx->litEntropy = dctx->fseEntropy = 0;
@@ -2302,42 +2376,53 b' struct ZSTD_DDict_s {'
2302
2376
2303 static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict)
2377 static const void* ZSTD_DDictDictContent(const ZSTD_DDict* ddict)
2304 {
2378 {
2379 assert(ddict != NULL);
2305 return ddict->dictContent;
2380 return ddict->dictContent;
2306 }
2381 }
2307
2382
2308 static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict)
2383 static size_t ZSTD_DDictDictSize(const ZSTD_DDict* ddict)
2309 {
2384 {
2385 assert(ddict != NULL);
2310 return ddict->dictSize;
2386 return ddict->dictSize;
2311 }
2387 }
2312
2388
2313 size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dstDCtx, const ZSTD_DDict* ddict)
2389 size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
2314 {
2390 {
2315 CHECK_F( ZSTD_decompressBegin(dstDCtx) );
2391 DEBUGLOG(4, "ZSTD_decompressBegin_usingDDict");
2316 if (ddict) { /* support begin on NULL */
2392 assert(dctx != NULL);
2317 dstDCtx->dictID = ddict->dictID;
2393 if (ddict) {
2318 dstDCtx->base = ddict->dictContent;
2394 dctx->ddictIsCold = (dctx->dictEnd != (const char*)ddict->dictContent + ddict->dictSize);
2319 dstDCtx->vBase = ddict->dictContent;
2395 DEBUGLOG(4, "DDict is %s",
2320 dstDCtx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
2396 dctx->ddictIsCold ? "~cold~" : "hot!");
2321 dstDCtx->previousDstEnd = dstDCtx->dictEnd;
2397 }
2398 CHECK_F( ZSTD_decompressBegin(dctx) );
2399 if (ddict) { /* NULL ddict is equivalent to no dictionary */
2400 dctx->dictID = ddict->dictID;
2401 dctx->prefixStart = ddict->dictContent;
2402 dctx->virtualStart = ddict->dictContent;
2403 dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
2404 dctx->previousDstEnd = dctx->dictEnd;
2322 if (ddict->entropyPresent) {
2405 if (ddict->entropyPresent) {
2323 dstDCtx->litEntropy = 1;
2406 dctx->litEntropy = 1;
2324 dstDCtx->fseEntropy = 1;
2407 dctx->fseEntropy = 1;
2325 dstDCtx->LLTptr = ddict->entropy.LLTable;
2408 dctx->LLTptr = ddict->entropy.LLTable;
2326 dstDCtx->MLTptr = ddict->entropy.MLTable;
2409 dctx->MLTptr = ddict->entropy.MLTable;
2327 dstDCtx->OFTptr = ddict->entropy.OFTable;
2410 dctx->OFTptr = ddict->entropy.OFTable;
2328 dstDCtx->HUFptr = ddict->entropy.hufTable;
2411 dctx->HUFptr = ddict->entropy.hufTable;
2329 dstDCtx->entropy.rep[0] = ddict->entropy.rep[0];
2412 dctx->entropy.rep[0] = ddict->entropy.rep[0];
2330 dstDCtx->entropy.rep[1] = ddict->entropy.rep[1];
2413 dctx->entropy.rep[1] = ddict->entropy.rep[1];
2331 dstDCtx->entropy.rep[2] = ddict->entropy.rep[2];
2414 dctx->entropy.rep[2] = ddict->entropy.rep[2];
2332 } else {
2415 } else {
2333 dstDCtx->litEntropy = 0;
2416 dctx->litEntropy = 0;
2334 dstDCtx->fseEntropy = 0;
2417 dctx->fseEntropy = 0;
2335 }
2418 }
2336 }
2419 }
2337 return 0;
2420 return 0;
2338 }
2421 }
2339
2422
2340 static size_t ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict, ZSTD_dictContentType_e dictContentType)
2423 static size_t
2424 ZSTD_loadEntropy_inDDict(ZSTD_DDict* ddict,
2425 ZSTD_dictContentType_e dictContentType)
2341 {
2426 {
2342 ddict->dictID = 0;
2427 ddict->dictID = 0;
2343 ddict->entropyPresent = 0;
2428 ddict->entropyPresent = 0;
@@ -2355,10 +2440,12 b' static size_t ZSTD_loadEntropy_inDDict(Z'
2355 return 0; /* pure content mode */
2440 return 0; /* pure content mode */
2356 }
2441 }
2357 }
2442 }
2358 ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_frameIdSize);
2443 ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
2359
2444
2360 /* load entropy tables */
2445 /* load entropy tables */
2361 CHECK_E( ZSTD_loadEntropy(&ddict->entropy, ddict->dictContent, ddict->dictSize), dictionary_corrupted );
2446 CHECK_E( ZSTD_loadEntropy(&ddict->entropy,
2447 ddict->dictContent, ddict->dictSize),
2448 dictionary_corrupted );
2362 ddict->entropyPresent = 1;
2449 ddict->entropyPresent = 1;
2363 return 0;
2450 return 0;
2364 }
2451 }
@@ -2372,6 +2459,7 b' static size_t ZSTD_initDDict_internal(ZS'
2372 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
2459 if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
2373 ddict->dictBuffer = NULL;
2460 ddict->dictBuffer = NULL;
2374 ddict->dictContent = dict;
2461 ddict->dictContent = dict;
2462 if (!dict) dictSize = 0;
2375 } else {
2463 } else {
2376 void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
2464 void* const internalBuffer = ZSTD_malloc(dictSize, ddict->cMem);
2377 ddict->dictBuffer = internalBuffer;
2465 ddict->dictBuffer = internalBuffer;
@@ -2396,14 +2484,15 b' ZSTD_DDict* ZSTD_createDDict_advanced(co'
2396 if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
2484 if (!customMem.customAlloc ^ !customMem.customFree) return NULL;
2397
2485
2398 { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
2486 { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_malloc(sizeof(ZSTD_DDict), customMem);
2399 if (!ddict) return NULL;
2487 if (ddict == NULL) return NULL;
2400 ddict->cMem = customMem;
2488 ddict->cMem = customMem;
2401
2489 { size_t const initResult = ZSTD_initDDict_internal(ddict,
2402 if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, dictLoadMethod, dictContentType) )) {
2490 dict, dictSize,
2403 ZSTD_freeDDict(ddict);
2491 dictLoadMethod, dictContentType);
2404 return NULL;
2492 if (ZSTD_isError(initResult)) {
2405 }
2493 ZSTD_freeDDict(ddict);
2406
2494 return NULL;
2495 } }
2407 return ddict;
2496 return ddict;
2408 }
2497 }
2409 }
2498 }
@@ -2430,23 +2519,25 b' ZSTD_DDict* ZSTD_createDDict_byReference'
2430
2519
2431
2520
2432 const ZSTD_DDict* ZSTD_initStaticDDict(
2521 const ZSTD_DDict* ZSTD_initStaticDDict(
2433 void* workspace, size_t workspaceSize,
2522 void* sBuffer, size_t sBufferSize,
2434 const void* dict, size_t dictSize,
2523 const void* dict, size_t dictSize,
2435 ZSTD_dictLoadMethod_e dictLoadMethod,
2524 ZSTD_dictLoadMethod_e dictLoadMethod,
2436 ZSTD_dictContentType_e dictContentType)
2525 ZSTD_dictContentType_e dictContentType)
2437 {
2526 {
2438 size_t const neededSpace =
2527 size_t const neededSpace = sizeof(ZSTD_DDict)
2439 sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
2528 + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
2440 ZSTD_DDict* const ddict = (ZSTD_DDict*)workspace;
2529 ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
2441 assert(workspace != NULL);
2530 assert(sBuffer != NULL);
2442 assert(dict != NULL);
2531 assert(dict != NULL);
2443 if ((size_t)workspace & 7) return NULL; /* 8-aligned */
2532 if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */
2444 if (workspaceSize < neededSpace) return NULL;
2533 if (sBufferSize < neededSpace) return NULL;
2445 if (dictLoadMethod == ZSTD_dlm_byCopy) {
2534 if (dictLoadMethod == ZSTD_dlm_byCopy) {
2446 memcpy(ddict+1, dict, dictSize); /* local copy */
2535 memcpy(ddict+1, dict, dictSize); /* local copy */
2447 dict = ddict+1;
2536 dict = ddict+1;
2448 }
2537 }
2449 if (ZSTD_isError( ZSTD_initDDict_internal(ddict, dict, dictSize, ZSTD_dlm_byRef, dictContentType) ))
2538 if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
2539 dict, dictSize,
2540 ZSTD_dlm_byRef, dictContentType) ))
2450 return NULL;
2541 return NULL;
2451 return ddict;
2542 return ddict;
2452 }
2543 }
@@ -2484,7 +2575,7 b' unsigned ZSTD_getDictID_fromDict(const v'
2484 {
2575 {
2485 if (dictSize < 8) return 0;
2576 if (dictSize < 8) return 0;
2486 if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0;
2577 if (MEM_readLE32(dict) != ZSTD_MAGIC_DICTIONARY) return 0;
2487 return MEM_readLE32((const char*)dict + ZSTD_frameIdSize);
2578 return MEM_readLE32((const char*)dict + ZSTD_FRAMEIDSIZE);
2488 }
2579 }
2489
2580
2490 /*! ZSTD_getDictID_fromDDict() :
2581 /*! ZSTD_getDictID_fromDDict() :
@@ -2560,12 +2651,15 b' size_t ZSTD_freeDStream(ZSTD_DStream* zd'
2560 }
2651 }
2561
2652
2562
2653
2563 /* *** Initialization *** */
2654 /* *** Initialization *** */
2564
2655
2565 size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; }
2656 size_t ZSTD_DStreamInSize(void) { return ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize; }
2566 size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
2657 size_t ZSTD_DStreamOutSize(void) { return ZSTD_BLOCKSIZE_MAX; }
2567
2658
2568 size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType)
2659 size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx,
2660 const void* dict, size_t dictSize,
2661 ZSTD_dictLoadMethod_e dictLoadMethod,
2662 ZSTD_dictContentType_e dictContentType)
2569 {
2663 {
2570 if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
2664 if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
2571 ZSTD_freeDDict(dctx->ddictLocal);
2665 ZSTD_freeDDict(dctx->ddictLocal);
@@ -2607,6 +2701,7 b' size_t ZSTD_initDStream_usingDict(ZSTD_D'
2607 {
2701 {
2608 DEBUGLOG(4, "ZSTD_initDStream_usingDict");
2702 DEBUGLOG(4, "ZSTD_initDStream_usingDict");
2609 zds->streamStage = zdss_init;
2703 zds->streamStage = zdss_init;
2704 zds->noForwardProgress = 0;
2610 CHECK_F( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
2705 CHECK_F( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) );
2611 return ZSTD_frameHeaderSize_prefix;
2706 return ZSTD_frameHeaderSize_prefix;
2612 }
2707 }
@@ -2618,13 +2713,6 b' size_t ZSTD_initDStream(ZSTD_DStream* zd'
2618 return ZSTD_initDStream_usingDict(zds, NULL, 0);
2713 return ZSTD_initDStream_usingDict(zds, NULL, 0);
2619 }
2714 }
2620
2715
2621 size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
2622 {
2623 if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
2624 dctx->ddict = ddict;
2625 return 0;
2626 }
2627
2628 /* ZSTD_initDStream_usingDDict() :
2716 /* ZSTD_initDStream_usingDDict() :
2629 * ddict will just be referenced, and must outlive decompression session
2717 * ddict will just be referenced, and must outlive decompression session
2630 * this function cannot fail */
2718 * this function cannot fail */
@@ -2663,6 +2751,13 b' size_t ZSTD_setDStreamParameter(ZSTD_DSt'
2663 return 0;
2751 return 0;
2664 }
2752 }
2665
2753
2754 size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
2755 {
2756 if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
2757 dctx->ddict = ddict;
2758 return 0;
2759 }
2760
2666 size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
2761 size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize)
2667 {
2762 {
2668 if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
2763 if (dctx->streamStage != zdss_init) return ERROR(stage_wrong);
@@ -2767,7 +2862,7 b' size_t ZSTD_decompressStream(ZSTD_DStrea'
2767 return hint;
2862 return hint;
2768 } }
2863 } }
2769 #endif
2864 #endif
2770 { size_t const hSize = ZSTD_getFrameHeader_internal(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format);
2865 { size_t const hSize = ZSTD_getFrameHeader_advanced(&zds->fParams, zds->headerBuffer, zds->lhSize, zds->format);
2771 DEBUGLOG(5, "header size : %u", (U32)hSize);
2866 DEBUGLOG(5, "header size : %u", (U32)hSize);
2772 if (ZSTD_isError(hSize)) {
2867 if (ZSTD_isError(hSize)) {
2773 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
2868 #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1)
@@ -2828,7 +2923,7 b' size_t ZSTD_decompressStream(ZSTD_DStrea'
2828 CHECK_F(ZSTD_decompressBegin_usingDDict(zds, zds->ddict));
2923 CHECK_F(ZSTD_decompressBegin_usingDDict(zds, zds->ddict));
2829
2924
2830 if ((MEM_readLE32(zds->headerBuffer) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
2925 if ((MEM_readLE32(zds->headerBuffer) & 0xFFFFFFF0U) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
2831 zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_frameIdSize);
2926 zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
2832 zds->stage = ZSTDds_skipFrame;
2927 zds->stage = ZSTDds_skipFrame;
2833 } else {
2928 } else {
2834 CHECK_F(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize));
2929 CHECK_F(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize));
@@ -2947,8 +3042,18 b' size_t ZSTD_decompressStream(ZSTD_DStrea'
2947 } }
3042 } }
2948
3043
2949 /* result */
3044 /* result */
2950 input->pos += (size_t)(ip-istart);
3045 input->pos = (size_t)(ip - (const char*)(input->src));
2951 output->pos += (size_t)(op-ostart);
3046 output->pos = (size_t)(op - (char*)(output->dst));
3047 if ((ip==istart) && (op==ostart)) { /* no forward progress */
3048 zds->noForwardProgress ++;
3049 if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) {
3050 if (op==oend) return ERROR(dstSize_tooSmall);
3051 if (ip==iend) return ERROR(srcSize_wrong);
3052 assert(0);
3053 }
3054 } else {
3055 zds->noForwardProgress = 0;
3056 }
2952 { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds);
3057 { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zds);
2953 if (!nextSrcSizeHint) { /* frame fully decoded */
3058 if (!nextSrcSizeHint) { /* frame fully decoded */
2954 if (zds->outEnd == zds->outStart) { /* output fully flushed */
3059 if (zds->outEnd == zds->outStart) { /* output fully flushed */
@@ -29,6 +29,7 b''
29 #include "mem.h" /* read */
29 #include "mem.h" /* read */
30 #include "pool.h"
30 #include "pool.h"
31 #include "threading.h"
31 #include "threading.h"
32 #include "cover.h"
32 #include "zstd_internal.h" /* includes zstd.h */
33 #include "zstd_internal.h" /* includes zstd.h */
33 #ifndef ZDICT_STATIC_LINKING_ONLY
34 #ifndef ZDICT_STATIC_LINKING_ONLY
34 #define ZDICT_STATIC_LINKING_ONLY
35 #define ZDICT_STATIC_LINKING_ONLY
@@ -39,6 +40,7 b''
39 * Constants
40 * Constants
40 ***************************************/
41 ***************************************/
41 #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
42 #define COVER_MAX_SAMPLES_SIZE (sizeof(size_t) == 8 ? ((U32)-1) : ((U32)1 GB))
43 #define DEFAULT_SPLITPOINT 1.0
42
44
43 /*-*************************************
45 /*-*************************************
44 * Console display
46 * Console display
@@ -184,7 +186,7 b' static void COVER_map_remove(COVER_map_t'
184 }
186 }
185
187
186 /**
188 /**
187 * Destroyes a map that is inited with COVER_map_init().
189 * Destroys a map that is inited with COVER_map_init().
188 */
190 */
189 static void COVER_map_destroy(COVER_map_t *map) {
191 static void COVER_map_destroy(COVER_map_t *map) {
190 if (map->data) {
192 if (map->data) {
@@ -203,6 +205,8 b' typedef struct {'
203 size_t *offsets;
205 size_t *offsets;
204 const size_t *samplesSizes;
206 const size_t *samplesSizes;
205 size_t nbSamples;
207 size_t nbSamples;
208 size_t nbTrainSamples;
209 size_t nbTestSamples;
206 U32 *suffix;
210 U32 *suffix;
207 size_t suffixSize;
211 size_t suffixSize;
208 U32 *freqs;
212 U32 *freqs;
@@ -220,9 +224,9 b' static COVER_ctx_t *g_ctx = NULL;'
220 /**
224 /**
221 * Returns the sum of the sample sizes.
225 * Returns the sum of the sample sizes.
222 */
226 */
223 static size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
227 size_t COVER_sum(const size_t *samplesSizes, unsigned nbSamples) {
224 size_t sum = 0;
228 size_t sum = 0;
225 size_t i;
229 unsigned i;
226 for (i = 0; i < nbSamples; ++i) {
230 for (i = 0; i < nbSamples; ++i) {
227 sum += samplesSizes[i];
231 sum += samplesSizes[i];
228 }
232 }
@@ -377,14 +381,6 b' static void COVER_group(COVER_ctx_t *ctx'
377 ctx->suffix[dmerId] = freq;
381 ctx->suffix[dmerId] = freq;
378 }
382 }
379
383
380 /**
381 * A segment is a range in the source as well as the score of the segment.
382 */
383 typedef struct {
384 U32 begin;
385 U32 end;
386 U32 score;
387 } COVER_segment_t;
388
384
389 /**
385 /**
390 * Selects the best segment in an epoch.
386 * Selects the best segment in an epoch.
@@ -494,6 +490,10 b' static int COVER_checkParameters(ZDICT_c'
494 if (parameters.d > parameters.k) {
490 if (parameters.d > parameters.k) {
495 return 0;
491 return 0;
496 }
492 }
493 /* 0 < splitPoint <= 1 */
494 if (parameters.splitPoint <= 0 || parameters.splitPoint > 1){
495 return 0;
496 }
497 return 1;
497 return 1;
498 }
498 }
499
499
@@ -531,9 +531,14 b' static void COVER_ctx_destroy(COVER_ctx_'
531 */
531 */
532 static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
532 static int COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer,
533 const size_t *samplesSizes, unsigned nbSamples,
533 const size_t *samplesSizes, unsigned nbSamples,
534 unsigned d) {
534 unsigned d, double splitPoint) {
535 const BYTE *const samples = (const BYTE *)samplesBuffer;
535 const BYTE *const samples = (const BYTE *)samplesBuffer;
536 const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
536 const size_t totalSamplesSize = COVER_sum(samplesSizes, nbSamples);
537 /* Split samples into testing and training sets */
538 const unsigned nbTrainSamples = splitPoint < 1.0 ? (unsigned)((double)nbSamples * splitPoint) : nbSamples;
539 const unsigned nbTestSamples = splitPoint < 1.0 ? nbSamples - nbTrainSamples : nbSamples;
540 const size_t trainingSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes, nbTrainSamples) : totalSamplesSize;
541 const size_t testSamplesSize = splitPoint < 1.0 ? COVER_sum(samplesSizes + nbTrainSamples, nbTestSamples) : totalSamplesSize;
537 /* Checks */
542 /* Checks */
538 if (totalSamplesSize < MAX(d, sizeof(U64)) ||
543 if (totalSamplesSize < MAX(d, sizeof(U64)) ||
539 totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
544 totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) {
@@ -541,15 +546,29 b' static int COVER_ctx_init(COVER_ctx_t *c'
541 (U32)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
546 (U32)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20));
542 return 0;
547 return 0;
543 }
548 }
549 /* Check if there are at least 5 training samples */
550 if (nbTrainSamples < 5) {
551 DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples);
552 return 0;
553 }
554 /* Check if there's testing sample */
555 if (nbTestSamples < 1) {
556 DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples);
557 return 0;
558 }
544 /* Zero the context */
559 /* Zero the context */
545 memset(ctx, 0, sizeof(*ctx));
560 memset(ctx, 0, sizeof(*ctx));
546 DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbSamples,
561 DISPLAYLEVEL(2, "Training on %u samples of total size %u\n", nbTrainSamples,
547 (U32)totalSamplesSize);
562 (U32)trainingSamplesSize);
563 DISPLAYLEVEL(2, "Testing on %u samples of total size %u\n", nbTestSamples,
564 (U32)testSamplesSize);
548 ctx->samples = samples;
565 ctx->samples = samples;
549 ctx->samplesSizes = samplesSizes;
566 ctx->samplesSizes = samplesSizes;
550 ctx->nbSamples = nbSamples;
567 ctx->nbSamples = nbSamples;
568 ctx->nbTrainSamples = nbTrainSamples;
569 ctx->nbTestSamples = nbTestSamples;
551 /* Partial suffix array */
570 /* Partial suffix array */
552 ctx->suffixSize = totalSamplesSize - MAX(d, sizeof(U64)) + 1;
571 ctx->suffixSize = trainingSamplesSize - MAX(d, sizeof(U64)) + 1;
553 ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
572 ctx->suffix = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
554 /* Maps index to the dmerID */
573 /* Maps index to the dmerID */
555 ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
574 ctx->dmerAt = (U32 *)malloc(ctx->suffixSize * sizeof(U32));
@@ -563,7 +582,7 b' static int COVER_ctx_init(COVER_ctx_t *c'
563 ctx->freqs = NULL;
582 ctx->freqs = NULL;
564 ctx->d = d;
583 ctx->d = d;
565
584
566 /* Fill offsets from the samlesSizes */
585 /* Fill offsets from the samplesSizes */
567 {
586 {
568 U32 i;
587 U32 i;
569 ctx->offsets[0] = 0;
588 ctx->offsets[0] = 0;
@@ -581,10 +600,17 b' static int COVER_ctx_init(COVER_ctx_t *c'
581 for (i = 0; i < ctx->suffixSize; ++i) {
600 for (i = 0; i < ctx->suffixSize; ++i) {
582 ctx->suffix[i] = i;
601 ctx->suffix[i] = i;
583 }
602 }
584 /* qsort doesn't take an opaque pointer, so pass as a global */
603 /* qsort doesn't take an opaque pointer, so pass as a global.
604 * On OpenBSD qsort() is not guaranteed to be stable, their mergesort() is.
605 */
585 g_ctx = ctx;
606 g_ctx = ctx;
607 #if defined(__OpenBSD__)
608 mergesort(ctx->suffix, ctx->suffixSize, sizeof(U32),
609 (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
610 #else
586 qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
611 qsort(ctx->suffix, ctx->suffixSize, sizeof(U32),
587 (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
612 (ctx->d <= 8 ? &COVER_strict_cmp8 : &COVER_strict_cmp));
613 #endif
588 }
614 }
589 DISPLAYLEVEL(2, "Computing frequencies\n");
615 DISPLAYLEVEL(2, "Computing frequencies\n");
590 /* For each dmer group (group of positions with the same first d bytes):
616 /* For each dmer group (group of positions with the same first d bytes):
@@ -613,7 +639,7 b' static size_t COVER_buildDictionary(cons'
613 /* Divide the data up into epochs of equal size.
639 /* Divide the data up into epochs of equal size.
614 * We will select at least one segment from each epoch.
640 * We will select at least one segment from each epoch.
615 */
641 */
616 const U32 epochs = (U32)(dictBufferCapacity / parameters.k);
642 const U32 epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k / 4));
617 const U32 epochSize = (U32)(ctx->suffixSize / epochs);
643 const U32 epochSize = (U32)(ctx->suffixSize / epochs);
618 size_t epoch;
644 size_t epoch;
619 DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
645 DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", epochs,
@@ -658,7 +684,7 b' ZDICTLIB_API size_t ZDICT_trainFromBuffe'
658 BYTE* const dict = (BYTE*)dictBuffer;
684 BYTE* const dict = (BYTE*)dictBuffer;
659 COVER_ctx_t ctx;
685 COVER_ctx_t ctx;
660 COVER_map_t activeDmers;
686 COVER_map_t activeDmers;
661
687 parameters.splitPoint = 1.0;
662 /* Initialize global data */
688 /* Initialize global data */
663 g_displayLevel = parameters.zParams.notificationLevel;
689 g_displayLevel = parameters.zParams.notificationLevel;
664 /* Checks */
690 /* Checks */
@@ -677,7 +703,7 b' ZDICTLIB_API size_t ZDICT_trainFromBuffe'
677 }
703 }
678 /* Initialize context and activeDmers */
704 /* Initialize context and activeDmers */
679 if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
705 if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples,
680 parameters.d)) {
706 parameters.d, parameters.splitPoint)) {
681 return ERROR(GENERIC);
707 return ERROR(GENERIC);
682 }
708 }
683 if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
709 if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) {
@@ -704,28 +730,65 b' ZDICTLIB_API size_t ZDICT_trainFromBuffe'
704 }
730 }
705 }
731 }
706
732
707 /**
733
708 * COVER_best_t is used for two purposes:
734
709 * 1. Synchronizing threads.
735 size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters,
710 * 2. Saving the best parameters and dictionary.
736 const size_t *samplesSizes, const BYTE *samples,
711 *
737 size_t *offsets,
712 * All of the methods except COVER_best_init() are thread safe if zstd is
738 size_t nbTrainSamples, size_t nbSamples,
713 * compiled with multithreaded support.
739 BYTE *const dict, size_t dictBufferCapacity) {
714 */
740 size_t totalCompressedSize = ERROR(GENERIC);
715 typedef struct COVER_best_s {
741 /* Pointers */
716 ZSTD_pthread_mutex_t mutex;
742 ZSTD_CCtx *cctx;
717 ZSTD_pthread_cond_t cond;
743 ZSTD_CDict *cdict;
718 size_t liveJobs;
744 void *dst;
719 void *dict;
745 /* Local variables */
720 size_t dictSize;
746 size_t dstCapacity;
721 ZDICT_cover_params_t parameters;
747 size_t i;
722 size_t compressedSize;
748 /* Allocate dst with enough space to compress the maximum sized sample */
723 } COVER_best_t;
749 {
750 size_t maxSampleSize = 0;
751 i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
752 for (; i < nbSamples; ++i) {
753 maxSampleSize = MAX(samplesSizes[i], maxSampleSize);
754 }
755 dstCapacity = ZSTD_compressBound(maxSampleSize);
756 dst = malloc(dstCapacity);
757 }
758 /* Create the cctx and cdict */
759 cctx = ZSTD_createCCtx();
760 cdict = ZSTD_createCDict(dict, dictBufferCapacity,
761 parameters.zParams.compressionLevel);
762 if (!dst || !cctx || !cdict) {
763 goto _compressCleanup;
764 }
765 /* Compress each sample and sum their sizes (or error) */
766 totalCompressedSize = dictBufferCapacity;
767 i = parameters.splitPoint < 1.0 ? nbTrainSamples : 0;
768 for (; i < nbSamples; ++i) {
769 const size_t size = ZSTD_compress_usingCDict(
770 cctx, dst, dstCapacity, samples + offsets[i],
771 samplesSizes[i], cdict);
772 if (ZSTD_isError(size)) {
773 totalCompressedSize = ERROR(GENERIC);
774 goto _compressCleanup;
775 }
776 totalCompressedSize += size;
777 }
778 _compressCleanup:
779 ZSTD_freeCCtx(cctx);
780 ZSTD_freeCDict(cdict);
781 if (dst) {
782 free(dst);
783 }
784 return totalCompressedSize;
785 }
786
724
787
725 /**
788 /**
726 * Initialize the `COVER_best_t`.
789 * Initialize the `COVER_best_t`.
727 */
790 */
728 static void COVER_best_init(COVER_best_t *best) {
791 void COVER_best_init(COVER_best_t *best) {
729 if (best==NULL) return; /* compatible with init on NULL */
792 if (best==NULL) return; /* compatible with init on NULL */
730 (void)ZSTD_pthread_mutex_init(&best->mutex, NULL);
793 (void)ZSTD_pthread_mutex_init(&best->mutex, NULL);
731 (void)ZSTD_pthread_cond_init(&best->cond, NULL);
794 (void)ZSTD_pthread_cond_init(&best->cond, NULL);
@@ -739,7 +802,7 b' static void COVER_best_init(COVER_best_t'
739 /**
802 /**
740 * Wait until liveJobs == 0.
803 * Wait until liveJobs == 0.
741 */
804 */
742 static void COVER_best_wait(COVER_best_t *best) {
805 void COVER_best_wait(COVER_best_t *best) {
743 if (!best) {
806 if (!best) {
744 return;
807 return;
745 }
808 }
@@ -753,7 +816,7 b' static void COVER_best_wait(COVER_best_t'
753 /**
816 /**
754 * Call COVER_best_wait() and then destroy the COVER_best_t.
817 * Call COVER_best_wait() and then destroy the COVER_best_t.
755 */
818 */
756 static void COVER_best_destroy(COVER_best_t *best) {
819 void COVER_best_destroy(COVER_best_t *best) {
757 if (!best) {
820 if (!best) {
758 return;
821 return;
759 }
822 }
@@ -769,7 +832,7 b' static void COVER_best_destroy(COVER_bes'
769 * Called when a thread is about to be launched.
832 * Called when a thread is about to be launched.
770 * Increments liveJobs.
833 * Increments liveJobs.
771 */
834 */
772 static void COVER_best_start(COVER_best_t *best) {
835 void COVER_best_start(COVER_best_t *best) {
773 if (!best) {
836 if (!best) {
774 return;
837 return;
775 }
838 }
@@ -783,7 +846,7 b' static void COVER_best_start(COVER_best_'
783 * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
846 * Decrements liveJobs and signals any waiting threads if liveJobs == 0.
784 * If this dictionary is the best so far save it and its parameters.
847 * If this dictionary is the best so far save it and its parameters.
785 */
848 */
786 static void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
849 void COVER_best_finish(COVER_best_t *best, size_t compressedSize,
787 ZDICT_cover_params_t parameters, void *dict,
850 ZDICT_cover_params_t parameters, void *dict,
788 size_t dictSize) {
851 size_t dictSize) {
789 if (!best) {
852 if (!best) {
@@ -814,10 +877,10 b' static void COVER_best_finish(COVER_best'
814 best->parameters = parameters;
877 best->parameters = parameters;
815 best->compressedSize = compressedSize;
878 best->compressedSize = compressedSize;
816 }
879 }
817 ZSTD_pthread_mutex_unlock(&best->mutex);
818 if (liveJobs == 0) {
880 if (liveJobs == 0) {
819 ZSTD_pthread_cond_broadcast(&best->cond);
881 ZSTD_pthread_cond_broadcast(&best->cond);
820 }
882 }
883 ZSTD_pthread_mutex_unlock(&best->mutex);
821 }
884 }
822 }
885 }
823
886
@@ -832,7 +895,7 b' typedef struct COVER_tryParameters_data_'
832 } COVER_tryParameters_data_t;
895 } COVER_tryParameters_data_t;
833
896
834 /**
897 /**
835 * Tries a set of parameters and upates the COVER_best_t with the results.
898 * Tries a set of parameters and updates the COVER_best_t with the results.
836 * This function is thread safe if zstd is compiled with multithreaded support.
899 * This function is thread safe if zstd is compiled with multithreaded support.
837 * It takes its parameters as an *OWNING* opaque pointer to support threading.
900 * It takes its parameters as an *OWNING* opaque pointer to support threading.
838 */
901 */
@@ -863,7 +926,7 b' static void COVER_tryParameters(void *op'
863 dictBufferCapacity, parameters);
926 dictBufferCapacity, parameters);
864 dictBufferCapacity = ZDICT_finalizeDictionary(
927 dictBufferCapacity = ZDICT_finalizeDictionary(
865 dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
928 dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail,
866 ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbSamples,
929 ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples,
867 parameters.zParams);
930 parameters.zParams);
868 if (ZDICT_isError(dictBufferCapacity)) {
931 if (ZDICT_isError(dictBufferCapacity)) {
869 DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
932 DISPLAYLEVEL(1, "Failed to finalize dictionary\n");
@@ -871,49 +934,10 b' static void COVER_tryParameters(void *op'
871 }
934 }
872 }
935 }
873 /* Check total compressed size */
936 /* Check total compressed size */
874 {
937 totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes,
875 /* Pointers */
938 ctx->samples, ctx->offsets,
876 ZSTD_CCtx *cctx;
939 ctx->nbTrainSamples, ctx->nbSamples,
877 ZSTD_CDict *cdict;
940 dict, dictBufferCapacity);
878 void *dst;
879 /* Local variables */
880 size_t dstCapacity;
881 size_t i;
882 /* Allocate dst with enough space to compress the maximum sized sample */
883 {
884 size_t maxSampleSize = 0;
885 for (i = 0; i < ctx->nbSamples; ++i) {
886 maxSampleSize = MAX(ctx->samplesSizes[i], maxSampleSize);
887 }
888 dstCapacity = ZSTD_compressBound(maxSampleSize);
889 dst = malloc(dstCapacity);
890 }
891 /* Create the cctx and cdict */
892 cctx = ZSTD_createCCtx();
893 cdict = ZSTD_createCDict(dict, dictBufferCapacity,
894 parameters.zParams.compressionLevel);
895 if (!dst || !cctx || !cdict) {
896 goto _compressCleanup;
897 }
898 /* Compress each sample and sum their sizes (or error) */
899 totalCompressedSize = dictBufferCapacity;
900 for (i = 0; i < ctx->nbSamples; ++i) {
901 const size_t size = ZSTD_compress_usingCDict(
902 cctx, dst, dstCapacity, ctx->samples + ctx->offsets[i],
903 ctx->samplesSizes[i], cdict);
904 if (ZSTD_isError(size)) {
905 totalCompressedSize = ERROR(GENERIC);
906 goto _compressCleanup;
907 }
908 totalCompressedSize += size;
909 }
910 _compressCleanup:
911 ZSTD_freeCCtx(cctx);
912 ZSTD_freeCDict(cdict);
913 if (dst) {
914 free(dst);
915 }
916 }
917
941
918 _cleanup:
942 _cleanup:
919 COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
943 COVER_best_finish(data->best, totalCompressedSize, parameters, dict,
@@ -934,6 +958,8 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF'
934 ZDICT_cover_params_t *parameters) {
958 ZDICT_cover_params_t *parameters) {
935 /* constants */
959 /* constants */
936 const unsigned nbThreads = parameters->nbThreads;
960 const unsigned nbThreads = parameters->nbThreads;
961 const double splitPoint =
962 parameters->splitPoint <= 0.0 ? DEFAULT_SPLITPOINT : parameters->splitPoint;
937 const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
963 const unsigned kMinD = parameters->d == 0 ? 6 : parameters->d;
938 const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
964 const unsigned kMaxD = parameters->d == 0 ? 8 : parameters->d;
939 const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
965 const unsigned kMinK = parameters->k == 0 ? 50 : parameters->k;
@@ -951,6 +977,10 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF'
951 POOL_ctx *pool = NULL;
977 POOL_ctx *pool = NULL;
952
978
953 /* Checks */
979 /* Checks */
980 if (splitPoint <= 0 || splitPoint > 1) {
981 LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
982 return ERROR(GENERIC);
983 }
954 if (kMinK < kMaxD || kMaxK < kMinK) {
984 if (kMinK < kMaxD || kMaxK < kMinK) {
955 LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
985 LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n");
956 return ERROR(GENERIC);
986 return ERROR(GENERIC);
@@ -981,7 +1011,7 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF'
981 /* Initialize the context for this value of d */
1011 /* Initialize the context for this value of d */
982 COVER_ctx_t ctx;
1012 COVER_ctx_t ctx;
983 LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
1013 LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d);
984 if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d)) {
1014 if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint)) {
985 LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
1015 LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n");
986 COVER_best_destroy(&best);
1016 COVER_best_destroy(&best);
987 POOL_free(pool);
1017 POOL_free(pool);
@@ -1006,6 +1036,7 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF'
1006 data->parameters = *parameters;
1036 data->parameters = *parameters;
1007 data->parameters.k = k;
1037 data->parameters.k = k;
1008 data->parameters.d = d;
1038 data->parameters.d = d;
1039 data->parameters.splitPoint = splitPoint;
1009 data->parameters.steps = kSteps;
1040 data->parameters.steps = kSteps;
1010 data->parameters.zParams.notificationLevel = g_displayLevel;
1041 data->parameters.zParams.notificationLevel = g_displayLevel;
1011 /* Check the parameters */
1042 /* Check the parameters */
@@ -1637,7 +1637,7 b' construct_SA(const unsigned char *T, int'
1637 if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1637 if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1638 k = SA + BUCKET_B(c2 = c0, c1);
1638 k = SA + BUCKET_B(c2 = c0, c1);
1639 }
1639 }
1640 assert(k < j);
1640 assert(k < j); assert(k != NULL);
1641 *k-- = s;
1641 *k-- = s;
1642 } else {
1642 } else {
1643 assert(((s == 0) && (T[s] == c1)) || (s < 0));
1643 assert(((s == 0) && (T[s] == c1)) || (s < 0));
@@ -1701,7 +1701,7 b' construct_BWT(const unsigned char *T, in'
1701 if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1701 if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1702 k = SA + BUCKET_B(c2 = c0, c1);
1702 k = SA + BUCKET_B(c2 = c0, c1);
1703 }
1703 }
1704 assert(k < j);
1704 assert(k < j); assert(k != NULL);
1705 *k-- = s;
1705 *k-- = s;
1706 } else if(s != 0) {
1706 } else if(s != 0) {
1707 *j = ~s;
1707 *j = ~s;
@@ -1785,7 +1785,7 b' construct_BWT_indexes(const unsigned cha'
1785 if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1785 if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1786 k = SA + BUCKET_B(c2 = c0, c1);
1786 k = SA + BUCKET_B(c2 = c0, c1);
1787 }
1787 }
1788 assert(k < j);
1788 assert(k < j); assert(k != NULL);
1789 *k-- = s;
1789 *k-- = s;
1790 } else if(s != 0) {
1790 } else if(s != 0) {
1791 *j = ~s;
1791 *j = ~s;
@@ -293,7 +293,7 b' static dictItem ZDICT_analyzePos('
293 refinedEnd = refinedStart + selectedCount;
293 refinedEnd = refinedStart + selectedCount;
294 }
294 }
295
295
296 /* evaluate gain based on new ref */
296 /* evaluate gain based on new dict */
297 start = refinedStart;
297 start = refinedStart;
298 pos = suffix[refinedStart];
298 pos = suffix[refinedStart];
299 end = start;
299 end = start;
@@ -341,7 +341,7 b' static dictItem ZDICT_analyzePos('
341 for (i=MINMATCHLENGTH; i<=(int)maxLength; i++)
341 for (i=MINMATCHLENGTH; i<=(int)maxLength; i++)
342 savings[i] = savings[i-1] + (lengthList[i] * (i-3));
342 savings[i] = savings[i-1] + (lengthList[i] * (i-3));
343
343
344 DISPLAYLEVEL(4, "Selected ref at position %u, of length %u : saves %u (ratio: %.2f) \n",
344 DISPLAYLEVEL(4, "Selected dict at position %u, of length %u : saves %u (ratio: %.2f) \n",
345 (U32)pos, (U32)maxLength, savings[maxLength], (double)savings[maxLength] / maxLength);
345 (U32)pos, (U32)maxLength, savings[maxLength], (double)savings[maxLength] / maxLength);
346
346
347 solution.pos = (U32)pos;
347 solution.pos = (U32)pos;
@@ -581,7 +581,7 b' static void ZDICT_fillNoise(void* buffer'
581
581
582 typedef struct
582 typedef struct
583 {
583 {
584 ZSTD_CCtx* ref; /* contains reference to dictionary */
584 ZSTD_CDict* dict; /* dictionary */
585 ZSTD_CCtx* zc; /* working context */
585 ZSTD_CCtx* zc; /* working context */
586 void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */
586 void* workPlace; /* must be ZSTD_BLOCKSIZE_MAX allocated */
587 } EStats_ress_t;
587 } EStats_ress_t;
@@ -597,8 +597,9 b' static void ZDICT_countEStats(EStats_res'
597 size_t cSize;
597 size_t cSize;
598
598
599 if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
599 if (srcSize > blockSizeMax) srcSize = blockSizeMax; /* protection vs large samples */
600 { size_t const errorCode = ZSTD_copyCCtx(esr.zc, esr.ref, 0);
600 { size_t const errorCode = ZSTD_compressBegin_usingCDict(esr.zc, esr.dict);
601 if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_copyCCtx failed \n"); return; }
601 if (ZSTD_isError(errorCode)) { DISPLAYLEVEL(1, "warning : ZSTD_compressBegin_usingCDict failed \n"); return; }
602
602 }
603 }
603 cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
604 cSize = ZSTD_compressBlock(esr.zc, esr.workPlace, ZSTD_BLOCKSIZE_MAX, src, srcSize);
604 if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
605 if (ZSTD_isError(cSize)) { DISPLAYLEVEL(3, "warning : could not compress sample size %u \n", (U32)srcSize); return; }
@@ -697,7 +698,7 b' static size_t ZDICT_analyzeEntropy(void*'
697 short litLengthNCount[MaxLL+1];
698 short litLengthNCount[MaxLL+1];
698 U32 repOffset[MAXREPOFFSET];
699 U32 repOffset[MAXREPOFFSET];
699 offsetCount_t bestRepOffset[ZSTD_REP_NUM+1];
700 offsetCount_t bestRepOffset[ZSTD_REP_NUM+1];
700 EStats_ress_t esr;
701 EStats_ress_t esr = { NULL, NULL, NULL };
701 ZSTD_parameters params;
702 ZSTD_parameters params;
702 U32 u, huffLog = 11, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
703 U32 u, huffLog = 11, Offlog = OffFSELog, mlLog = MLFSELog, llLog = LLFSELog, total;
703 size_t pos = 0, errorCode;
704 size_t pos = 0, errorCode;
@@ -708,14 +709,6 b' static size_t ZDICT_analyzeEntropy(void*'
708
709
709 /* init */
710 /* init */
710 DEBUGLOG(4, "ZDICT_analyzeEntropy");
711 DEBUGLOG(4, "ZDICT_analyzeEntropy");
711 esr.ref = ZSTD_createCCtx();
712 esr.zc = ZSTD_createCCtx();
713 esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
714 if (!esr.ref || !esr.zc || !esr.workPlace) {
715 eSize = ERROR(memory_allocation);
716 DISPLAYLEVEL(1, "Not enough memory \n");
717 goto _cleanup;
718 }
719 if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionaryCreation_failed); goto _cleanup; } /* too large dictionary */
712 if (offcodeMax>OFFCODE_MAX) { eSize = ERROR(dictionaryCreation_failed); goto _cleanup; } /* too large dictionary */
720 for (u=0; u<256; u++) countLit[u] = 1; /* any character must be described */
713 for (u=0; u<256; u++) countLit[u] = 1; /* any character must be described */
721 for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1;
714 for (u=0; u<=offcodeMax; u++) offcodeCount[u] = 1;
@@ -724,14 +717,17 b' static size_t ZDICT_analyzeEntropy(void*'
724 memset(repOffset, 0, sizeof(repOffset));
717 memset(repOffset, 0, sizeof(repOffset));
725 repOffset[1] = repOffset[4] = repOffset[8] = 1;
718 repOffset[1] = repOffset[4] = repOffset[8] = 1;
726 memset(bestRepOffset, 0, sizeof(bestRepOffset));
719 memset(bestRepOffset, 0, sizeof(bestRepOffset));
727 if (compressionLevel<=0) compressionLevel = g_compressionLevel_default;
720 if (compressionLevel==0) compressionLevel = g_compressionLevel_default;
728 params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
721 params = ZSTD_getParams(compressionLevel, averageSampleSize, dictBufferSize);
729 { size_t const beginResult = ZSTD_compressBegin_advanced(esr.ref, dictBuffer, dictBufferSize, params, 0);
722
730 if (ZSTD_isError(beginResult)) {
723 esr.dict = ZSTD_createCDict_advanced(dictBuffer, dictBufferSize, ZSTD_dlm_byRef, ZSTD_dct_rawContent, params.cParams, ZSTD_defaultCMem);
731 DISPLAYLEVEL(1, "error : ZSTD_compressBegin_advanced() failed : %s \n", ZSTD_getErrorName(beginResult));
724 esr.zc = ZSTD_createCCtx();
732 eSize = ERROR(GENERIC);
725 esr.workPlace = malloc(ZSTD_BLOCKSIZE_MAX);
733 goto _cleanup;
726 if (!esr.dict || !esr.zc || !esr.workPlace) {
734 } }
727 eSize = ERROR(memory_allocation);
728 DISPLAYLEVEL(1, "Not enough memory \n");
729 goto _cleanup;
730 }
735
731
736 /* collect stats on all samples */
732 /* collect stats on all samples */
737 for (u=0; u<nbFiles; u++) {
733 for (u=0; u<nbFiles; u++) {
@@ -856,7 +852,7 b' static size_t ZDICT_analyzeEntropy(void*'
856 eSize += 12;
852 eSize += 12;
857
853
858 _cleanup:
854 _cleanup:
859 ZSTD_freeCCtx(esr.ref);
855 ZSTD_freeCDict(esr.dict);
860 ZSTD_freeCCtx(esr.zc);
856 ZSTD_freeCCtx(esr.zc);
861 free(esr.workPlace);
857 free(esr.workPlace);
862
858
@@ -867,13 +863,13 b' static size_t ZDICT_analyzeEntropy(void*'
867
863
868 size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
864 size_t ZDICT_finalizeDictionary(void* dictBuffer, size_t dictBufferCapacity,
869 const void* customDictContent, size_t dictContentSize,
865 const void* customDictContent, size_t dictContentSize,
870 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
866 const void* samplesBuffer, const size_t* samplesSizes,
871 ZDICT_params_t params)
867 unsigned nbSamples, ZDICT_params_t params)
872 {
868 {
873 size_t hSize;
869 size_t hSize;
874 #define HBUFFSIZE 256 /* should prove large enough for all entropy headers */
870 #define HBUFFSIZE 256 /* should prove large enough for all entropy headers */
875 BYTE header[HBUFFSIZE];
871 BYTE header[HBUFFSIZE];
876 int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
872 int const compressionLevel = (params.compressionLevel == 0) ? g_compressionLevel_default : params.compressionLevel;
877 U32 const notificationLevel = params.notificationLevel;
873 U32 const notificationLevel = params.notificationLevel;
878
874
879 /* check conditions */
875 /* check conditions */
@@ -914,11 +910,12 b' size_t ZDICT_finalizeDictionary(void* di'
914 }
910 }
915
911
916
912
917 size_t ZDICT_addEntropyTablesFromBuffer_advanced(void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
913 static size_t ZDICT_addEntropyTablesFromBuffer_advanced(
918 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
914 void* dictBuffer, size_t dictContentSize, size_t dictBufferCapacity,
919 ZDICT_params_t params)
915 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
916 ZDICT_params_t params)
920 {
917 {
921 int const compressionLevel = (params.compressionLevel <= 0) ? g_compressionLevel_default : params.compressionLevel;
918 int const compressionLevel = (params.compressionLevel == 0) ? g_compressionLevel_default : params.compressionLevel;
922 U32 const notificationLevel = params.notificationLevel;
919 U32 const notificationLevel = params.notificationLevel;
923 size_t hSize = 8;
920 size_t hSize = 8;
924
921
@@ -947,7 +944,11 b' size_t ZDICT_addEntropyTablesFromBuffer_'
947 return MIN(dictBufferCapacity, hSize+dictContentSize);
944 return MIN(dictBufferCapacity, hSize+dictContentSize);
948 }
945 }
949
946
950
947 /* Hidden declaration for dbio.c */
948 size_t ZDICT_trainFromBuffer_unsafe_legacy(
949 void* dictBuffer, size_t maxDictSize,
950 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
951 ZDICT_legacy_params_t params);
951 /*! ZDICT_trainFromBuffer_unsafe_legacy() :
952 /*! ZDICT_trainFromBuffer_unsafe_legacy() :
952 * Warning : `samplesBuffer` must be followed by noisy guard band.
953 * Warning : `samplesBuffer` must be followed by noisy guard band.
953 * @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
954 * @return : size of dictionary, or an error code which can be tested with ZDICT_isError()
@@ -991,8 +992,10 b' size_t ZDICT_trainFromBuffer_unsafe_lega'
991 U32 const pos = dictList[u].pos;
992 U32 const pos = dictList[u].pos;
992 U32 const length = dictList[u].length;
993 U32 const length = dictList[u].length;
993 U32 const printedLength = MIN(40, length);
994 U32 const printedLength = MIN(40, length);
994 if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize))
995 if ((pos > samplesBuffSize) || ((pos + length) > samplesBuffSize)) {
996 free(dictList);
995 return ERROR(GENERIC); /* should never happen */
997 return ERROR(GENERIC); /* should never happen */
998 }
996 DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
999 DISPLAYLEVEL(3, "%3u:%3u bytes at pos %8u, savings %7u bytes |",
997 u, length, pos, dictList[u].savings);
1000 u, length, pos, dictList[u].savings);
998 ZDICT_printHex((const char*)samplesBuffer+pos, printedLength);
1001 ZDICT_printHex((const char*)samplesBuffer+pos, printedLength);
@@ -1082,17 +1085,17 b' size_t ZDICT_trainFromBuffer_legacy(void'
1082 size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
1085 size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
1083 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
1086 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples)
1084 {
1087 {
1085 ZDICT_cover_params_t params;
1088 ZDICT_fastCover_params_t params;
1086 DEBUGLOG(3, "ZDICT_trainFromBuffer");
1089 DEBUGLOG(3, "ZDICT_trainFromBuffer");
1087 memset(&params, 0, sizeof(params));
1090 memset(&params, 0, sizeof(params));
1088 params.d = 8;
1091 params.d = 8;
1089 params.steps = 4;
1092 params.steps = 4;
1090 /* Default to level 6 since no compression level information is available */
1093 /* Default to level 6 since no compression level information is available */
1091 params.zParams.compressionLevel = 6;
1094 params.zParams.compressionLevel = 3;
1092 #if defined(ZSTD_DEBUG) && (ZSTD_DEBUG>=1)
1095 #if defined(DEBUGLEVEL) && (DEBUGLEVEL>=1)
1093 params.zParams.notificationLevel = ZSTD_DEBUG;
1096 params.zParams.notificationLevel = DEBUGLEVEL;
1094 #endif
1097 #endif
1095 return ZDICT_optimizeTrainFromBuffer_cover(dictBuffer, dictBufferCapacity,
1098 return ZDICT_optimizeTrainFromBuffer_fastCover(dictBuffer, dictBufferCapacity,
1096 samplesBuffer, samplesSizes, nbSamples,
1099 samplesBuffer, samplesSizes, nbSamples,
1097 &params);
1100 &params);
1098 }
1101 }
@@ -39,7 +39,8 b' extern "C" {'
39
39
40 /*! ZDICT_trainFromBuffer():
40 /*! ZDICT_trainFromBuffer():
41 * Train a dictionary from an array of samples.
41 * Train a dictionary from an array of samples.
42 * Redirect towards ZDICT_optimizeTrainFromBuffer_cover() single-threaded, with d=8 and steps=4.
42 * Redirect towards ZDICT_optimizeTrainFromBuffer_fastCover() single-threaded, with d=8, steps=4,
43 * f=20, and accel=1.
43 * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
44 * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
44 * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
45 * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
45 * The resulting dictionary will be saved into `dictBuffer`.
46 * The resulting dictionary will be saved into `dictBuffer`.
@@ -52,7 +53,8 b' extern "C" {'
52 * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
53 * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
53 */
54 */
54 ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
55 ZDICTLIB_API size_t ZDICT_trainFromBuffer(void* dictBuffer, size_t dictBufferCapacity,
55 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples);
56 const void* samplesBuffer,
57 const size_t* samplesSizes, unsigned nbSamples);
56
58
57
59
58 /*====== Helper functions ======*/
60 /*====== Helper functions ======*/
@@ -84,11 +86,22 b' typedef struct {'
84 typedef struct {
86 typedef struct {
85 unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
87 unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
86 unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
88 unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
87 unsigned steps; /* Number of steps : Only used for optimization : 0 means default (32) : Higher means more parameters checked */
89 unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
88 unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
90 unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
91 double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */
89 ZDICT_params_t zParams;
92 ZDICT_params_t zParams;
90 } ZDICT_cover_params_t;
93 } ZDICT_cover_params_t;
91
94
95 typedef struct {
96 unsigned k; /* Segment size : constraint: 0 < k : Reasonable range [16, 2048+] */
97 unsigned d; /* dmer size : constraint: 0 < d <= k : Reasonable range [6, 16] */
98 unsigned f; /* log of size of frequency array : constraint: 0 < f <= 31 : 1 means default(20)*/
99 unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */
100 unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */
101 double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */
102 unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */
103 ZDICT_params_t zParams;
104 } ZDICT_fastCover_params_t;
92
105
93 /*! ZDICT_trainFromBuffer_cover():
106 /*! ZDICT_trainFromBuffer_cover():
94 * Train a dictionary from an array of samples using the COVER algorithm.
107 * Train a dictionary from an array of samples using the COVER algorithm.
@@ -115,9 +128,9 b' ZDICTLIB_API size_t ZDICT_trainFromBuffe'
115 * dictionary constructed with those parameters is stored in `dictBuffer`.
128 * dictionary constructed with those parameters is stored in `dictBuffer`.
116 *
129 *
117 * All of the parameters d, k, steps are optional.
130 * All of the parameters d, k, steps are optional.
118 * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8, 10, 12, 14, 16}.
131 * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
119 * if steps is zero it defaults to its default value.
132 * if steps is zero it defaults to its default value.
120 * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [16, 2048].
133 * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
121 *
134 *
122 * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
135 * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
123 * or an error code, which can be tested with ZDICT_isError().
136 * or an error code, which can be tested with ZDICT_isError().
@@ -129,6 +142,48 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF'
129 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
142 const void* samplesBuffer, const size_t* samplesSizes, unsigned nbSamples,
130 ZDICT_cover_params_t* parameters);
143 ZDICT_cover_params_t* parameters);
131
144
145 /*! ZDICT_trainFromBuffer_fastCover():
146 * Train a dictionary from an array of samples using a modified version of COVER algorithm.
147 * Samples must be stored concatenated in a single flat buffer `samplesBuffer`,
148 * supplied with an array of sizes `samplesSizes`, providing the size of each sample, in order.
149 * d and k are required.
150 * All other parameters are optional, will use default values if not provided
151 * The resulting dictionary will be saved into `dictBuffer`.
152 * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
153 * or an error code, which can be tested with ZDICT_isError().
154 * Note: ZDICT_trainFromBuffer_fastCover() requires about 1 bytes of memory for each input byte and additionally another 6 * 2^f bytes of memory .
155 * Tips: In general, a reasonable dictionary has a size of ~ 100 KB.
156 * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`.
157 * In general, it's recommended to provide a few thousands samples, though this can vary a lot.
158 * It's recommended that total size of all samples be about ~x100 times the target size of dictionary.
159 */
160 ZDICTLIB_API size_t ZDICT_trainFromBuffer_fastCover(void *dictBuffer,
161 size_t dictBufferCapacity, const void *samplesBuffer,
162 const size_t *samplesSizes, unsigned nbSamples,
163 ZDICT_fastCover_params_t parameters);
164
165 /*! ZDICT_optimizeTrainFromBuffer_fastCover():
166 * The same requirements as above hold for all the parameters except `parameters`.
167 * This function tries many parameter combinations (specifically, k and d combinations)
168 * and picks the best parameters. `*parameters` is filled with the best parameters found,
169 * dictionary constructed with those parameters is stored in `dictBuffer`.
170 * All of the parameters d, k, steps, f, and accel are optional.
171 * If d is non-zero then we don't check multiple values of d, otherwise we check d = {6, 8}.
172 * if steps is zero it defaults to its default value.
173 * If k is non-zero then we don't check multiple values of k, otherwise we check steps values in [50, 2000].
174 * If f is zero, default value of 20 is used.
175 * If accel is zero, default value of 1 is used.
176 *
177 * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`)
178 * or an error code, which can be tested with ZDICT_isError().
179 * On success `*parameters` contains the parameters selected.
180 * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 1 byte of memory for each input byte and additionally another 6 * 2^f bytes of memory for each thread.
181 */
182 ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer,
183 size_t dictBufferCapacity, const void* samplesBuffer,
184 const size_t* samplesSizes, unsigned nbSamples,
185 ZDICT_fastCover_params_t* parameters);
186
132 /*! ZDICT_finalizeDictionary():
187 /*! ZDICT_finalizeDictionary():
133 * Given a custom content as a basis for dictionary, and a set of samples,
188 * Given a custom content as a basis for dictionary, and a set of samples,
134 * finalize dictionary by adding headers and statistics.
189 * finalize dictionary by adding headers and statistics.
@@ -35,31 +35,43 b' extern "C" {'
35 #endif
35 #endif
36
36
37
37
38 /*******************************************************************************************************
38 /*******************************************************************************
39 Introduction
39 Introduction
40
40
41 zstd, short for Zstandard, is a fast lossless compression algorithm,
41 zstd, short for Zstandard, is a fast lossless compression algorithm, targeting
42 targeting real-time compression scenarios at zlib-level and better compression ratios.
42 real-time compression scenarios at zlib-level and better compression ratios.
43 The zstd compression library provides in-memory compression and decompression functions.
43 The zstd compression library provides in-memory compression and decompression
44 The library supports compression levels from 1 up to ZSTD_maxCLevel() which is currently 22.
44 functions.
45 Levels >= 20, labeled `--ultra`, should be used with caution, as they require more memory.
45
46 The library supports regular compression levels from 1 up to ZSTD_maxCLevel(),
47 which is currently 22. Levels >= 20, labeled `--ultra`, should be used with
48 caution, as they require more memory. The library also offers negative
49 compression levels, which extend the range of speed vs. ratio preferences.
50 The lower the level, the faster the speed (at the cost of compression).
51
46 Compression can be done in:
52 Compression can be done in:
47 - a single step (described as Simple API)
53 - a single step (described as Simple API)
48 - a single step, reusing a context (described as Explicit context)
54 - a single step, reusing a context (described as Explicit context)
49 - unbounded multiple steps (described as Streaming compression)
55 - unbounded multiple steps (described as Streaming compression)
50 The compression ratio achievable on small data can be highly improved using a dictionary in:
56
57 The compression ratio achievable on small data can be highly improved using
58 a dictionary. Dictionary compression can be performed in:
51 - a single step (described as Simple dictionary API)
59 - a single step (described as Simple dictionary API)
52 - a single step, reusing a dictionary (described as Bulk-processing dictionary API)
60 - a single step, reusing a dictionary (described as Bulk-processing
61 dictionary API)
53
62
54 Advanced experimental functions can be accessed using #define ZSTD_STATIC_LINKING_ONLY before including zstd.h.
63 Advanced experimental functions can be accessed using
55 Advanced experimental APIs shall never be used with a dynamic library.
64 `#define ZSTD_STATIC_LINKING_ONLY` before including zstd.h.
56 They are not "stable", their definition may change in the future. Only static linking is allowed.
65
57 *********************************************************************************************************/
66 Advanced experimental APIs should never be used with a dynamically-linked
67 library. They are not "stable"; their definitions or signatures may change in
68 the future. Only static linking is allowed.
69 *******************************************************************************/
58
70
59 /*------ Version ------*/
71 /*------ Version ------*/
60 #define ZSTD_VERSION_MAJOR 1
72 #define ZSTD_VERSION_MAJOR 1
61 #define ZSTD_VERSION_MINOR 3
73 #define ZSTD_VERSION_MINOR 3
62 #define ZSTD_VERSION_RELEASE 4
74 #define ZSTD_VERSION_RELEASE 6
63
75
64 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
76 #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
65 ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< useful to check dll version */
77 ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< useful to check dll version */
@@ -68,8 +80,14 b' ZSTDLIB_API unsigned ZSTD_versionNumber('
68 #define ZSTD_QUOTE(str) #str
80 #define ZSTD_QUOTE(str) #str
69 #define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
81 #define ZSTD_EXPAND_AND_QUOTE(str) ZSTD_QUOTE(str)
70 #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
82 #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION)
71 ZSTDLIB_API const char* ZSTD_versionString(void); /* added in v1.3.0 */
83 ZSTDLIB_API const char* ZSTD_versionString(void); /* v1.3.0+ */
72
84
85 /***************************************
86 * Default constant
87 ***************************************/
88 #ifndef ZSTD_CLEVEL_DEFAULT
89 # define ZSTD_CLEVEL_DEFAULT 3
90 #endif
73
91
74 /***************************************
92 /***************************************
75 * Simple API
93 * Simple API
@@ -96,7 +114,7 b' ZSTDLIB_API size_t ZSTD_decompress( void'
96 * `src` should point to the start of a ZSTD encoded frame.
114 * `src` should point to the start of a ZSTD encoded frame.
97 * `srcSize` must be at least as large as the frame header.
115 * `srcSize` must be at least as large as the frame header.
98 * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
116 * hint : any size >= `ZSTD_frameHeaderSize_max` is large enough.
99 * @return : - decompressed size of the frame in `src`, if known
117 * @return : - decompressed size of `src` frame content, if known
100 * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
118 * - ZSTD_CONTENTSIZE_UNKNOWN if the size cannot be determined
101 * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
119 * - ZSTD_CONTENTSIZE_ERROR if an error occurred (e.g. invalid magic number, srcSize too small)
102 * note 1 : a 0 return value means the frame is valid but "empty".
120 * note 1 : a 0 return value means the frame is valid but "empty".
@@ -106,7 +124,8 b' ZSTDLIB_API size_t ZSTD_decompress( void'
106 * Optionally, application can rely on some implicit limit,
124 * Optionally, application can rely on some implicit limit,
107 * as ZSTD_decompress() only needs an upper bound of decompressed size.
125 * as ZSTD_decompress() only needs an upper bound of decompressed size.
108 * (For example, data could be necessarily cut into blocks <= 16 KB).
126 * (For example, data could be necessarily cut into blocks <= 16 KB).
109 * note 3 : decompressed size is always present when compression is done with ZSTD_compress()
127 * note 3 : decompressed size is always present when compression is completed using single-pass functions,
128 * such as ZSTD_compress(), ZSTD_compressCCtx() ZSTD_compress_usingDict() or ZSTD_compress_usingCDict().
110 * note 4 : decompressed size can be very large (64-bits value),
129 * note 4 : decompressed size can be very large (64-bits value),
111 * potentially larger than what local system can handle as a single memory segment.
130 * potentially larger than what local system can handle as a single memory segment.
112 * In which case, it's necessary to use streaming mode to decompress data.
131 * In which case, it's necessary to use streaming mode to decompress data.
@@ -123,8 +142,7 b' ZSTDLIB_API unsigned long long ZSTD_getF'
123 * Both functions work the same way, but ZSTD_getDecompressedSize() blends
142 * Both functions work the same way, but ZSTD_getDecompressedSize() blends
124 * "empty", "unknown" and "error" results to the same return value (0),
143 * "empty", "unknown" and "error" results to the same return value (0),
125 * while ZSTD_getFrameContentSize() gives them separate return values.
144 * while ZSTD_getFrameContentSize() gives them separate return values.
126 * `src` is the start of a zstd compressed frame.
145 * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */
127 * @return : content size to be decompressed, as a 64-bits value _if known and not empty_, 0 otherwise. */
128 ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
146 ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
129
147
130
148
@@ -205,7 +223,8 b' typedef struct ZSTD_CDict_s ZSTD_CDict;'
205 * When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
223 * When compressing multiple messages / blocks with the same dictionary, it's recommended to load it just once.
206 * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
224 * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup delay.
207 * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
225 * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only.
208 * `dictBuffer` can be released after ZSTD_CDict creation, since its content is copied within CDict */
226 * `dictBuffer` can be released after ZSTD_CDict creation, since its content is copied within CDict
227 * Note : A ZSTD_CDict can be created with an empty dictionary, but it is inefficient for small data. */
209 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
228 ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize,
210 int compressionLevel);
229 int compressionLevel);
211
230
@@ -217,7 +236,9 b' ZSTDLIB_API size_t ZSTD_freeCDict(Z'
217 * Compression using a digested Dictionary.
236 * Compression using a digested Dictionary.
218 * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
237 * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
219 * Note that compression level is decided during dictionary creation.
238 * Note that compression level is decided during dictionary creation.
220 * Frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */
239 * Frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no)
240 * Note : ZSTD_compress_usingCDict() can be used with a ZSTD_CDict created from an empty dictionary.
241 * But it is inefficient for small data, and it is recommended to use ZSTD_compressCCtx(). */
221 ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
242 ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
222 void* dst, size_t dstCapacity,
243 void* dst, size_t dstCapacity,
223 const void* src, size_t srcSize,
244 const void* src, size_t srcSize,
@@ -272,39 +293,44 b' typedef struct ZSTD_outBuffer_s {'
272 * since it will play nicer with system's memory, by re-using already allocated memory.
293 * since it will play nicer with system's memory, by re-using already allocated memory.
273 * Use one separate ZSTD_CStream per thread for parallel execution.
294 * Use one separate ZSTD_CStream per thread for parallel execution.
274 *
295 *
275 * Start a new compression by initializing ZSTD_CStream.
296 * Start a new compression by initializing ZSTD_CStream context.
276 * Use ZSTD_initCStream() to start a new compression operation.
297 * Use ZSTD_initCStream() to start a new compression operation.
277 * Use ZSTD_initCStream_usingDict() or ZSTD_initCStream_usingCDict() for a compression which requires a dictionary (experimental section)
298 * Use variants ZSTD_initCStream_usingDict() or ZSTD_initCStream_usingCDict() for streaming with dictionary (experimental section)
278 *
299 *
279 * Use ZSTD_compressStream() repetitively to consume input stream.
300 * Use ZSTD_compressStream() as many times as necessary to consume input stream.
280 * The function will automatically update both `pos` fields.
301 * The function will automatically update both `pos` fields within `input` and `output`.
281 * Note that it may not consume the entire input, in which case `pos < size`,
302 * Note that the function may not consume the entire input,
282 * and it's up to the caller to present again remaining data.
303 * for example, because the output buffer is already full,
304 * in which case `input.pos < input.size`.
305 * The caller must check if input has been entirely consumed.
306 * If not, the caller must make some room to receive more compressed data,
307 * typically by emptying output buffer, or allocating a new output buffer,
308 * and then present again remaining input data.
283 * @return : a size hint, preferred nb of bytes to use as input for next function call
309 * @return : a size hint, preferred nb of bytes to use as input for next function call
284 * or an error code, which can be tested using ZSTD_isError().
310 * or an error code, which can be tested using ZSTD_isError().
285 * Note 1 : it's just a hint, to help latency a little, any other value will work fine.
311 * Note 1 : it's just a hint, to help latency a little, any other value will work fine.
286 * Note 2 : size hint is guaranteed to be <= ZSTD_CStreamInSize()
312 * Note 2 : size hint is guaranteed to be <= ZSTD_CStreamInSize()
287 *
313 *
288 * At any moment, it's possible to flush whatever data remains within internal buffer, using ZSTD_flushStream().
314 * At any moment, it's possible to flush whatever data might remain stuck within internal buffer,
289 * `output->pos` will be updated.
315 * using ZSTD_flushStream(). `output->pos` will be updated.
290 * Note that some content might still be left within internal buffer if `output->size` is too small.
316 * Note that, if `output->size` is too small, a single invocation of ZSTD_flushStream() might not be enough (return code > 0).
291 * @return : nb of bytes still present within internal buffer (0 if it's empty)
317 * In which case, make some room to receive more compressed data, and call again ZSTD_flushStream().
318 * @return : 0 if internal buffers are entirely flushed,
319 * >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
292 * or an error code, which can be tested using ZSTD_isError().
320 * or an error code, which can be tested using ZSTD_isError().
293 *
321 *
294 * ZSTD_endStream() instructs to finish a frame.
322 * ZSTD_endStream() instructs to finish a frame.
295 * It will perform a flush and write frame epilogue.
323 * It will perform a flush and write frame epilogue.
296 * The epilogue is required for decoders to consider a frame completed.
324 * The epilogue is required for decoders to consider a frame completed.
297 * ZSTD_endStream() may not be able to flush full data if `output->size` is too small.
325 * flush() operation is the same, and follows same rules as ZSTD_flushStream().
298 * In which case, call again ZSTD_endStream() to complete the flush.
299 * @return : 0 if frame fully completed and fully flushed,
326 * @return : 0 if frame fully completed and fully flushed,
300 or >0 if some data is still present within internal buffer
327 * >0 if some data still present within internal buffer (the value is minimal estimation of remaining size),
301 (value is minimum size estimation for remaining data to flush, but it could be more)
302 * or an error code, which can be tested using ZSTD_isError().
328 * or an error code, which can be tested using ZSTD_isError().
303 *
329 *
304 * *******************************************************************/
330 * *******************************************************************/
305
331
306 typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */
332 typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */
307 /* Continue to distinguish them for compatibility with versions <= v1.2.0 */
333 /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */
308 /*===== ZSTD_CStream management functions =====*/
334 /*===== ZSTD_CStream management functions =====*/
309 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void);
335 ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void);
310 ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
336 ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs);
@@ -359,21 +385,28 b' ZSTDLIB_API size_t ZSTD_DStreamOutSize(v'
359
385
360
386
361
387
388
389 #if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
390 #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
391
362 /****************************************************************************************
392 /****************************************************************************************
363 * START OF ADVANCED AND EXPERIMENTAL FUNCTIONS
393 * ADVANCED AND EXPERIMENTAL FUNCTIONS
394 ****************************************************************************************
364 * The definitions in this section are considered experimental.
395 * The definitions in this section are considered experimental.
365 * They should never be used with a dynamic library, as prototypes may change in the future.
396 * They should never be used with a dynamic library, as prototypes may change in the future.
366 * They are provided for advanced scenarios.
397 * They are provided for advanced scenarios.
367 * Use them only in association with static linking.
398 * Use them only in association with static linking.
368 * ***************************************************************************************/
399 * ***************************************************************************************/
369
400
370 #if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
401 ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */
371 #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
372
402
373 /* --- Constants ---*/
403 /* --- Constants ---*/
374 #define ZSTD_MAGICNUMBER 0xFD2FB528 /* >= v0.8.0 */
404 #define ZSTD_MAGICNUMBER 0xFD2FB528 /* v0.8+ */
405 #define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* v0.7+ */
375 #define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U
406 #define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50U
376 #define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* >= v0.7.0 */
407
408 #define ZSTD_BLOCKSIZELOG_MAX 17
409 #define ZSTD_BLOCKSIZE_MAX (1<<ZSTD_BLOCKSIZELOG_MAX) /* define, for static allocation */
377
410
378 #define ZSTD_WINDOWLOG_MAX_32 30
411 #define ZSTD_WINDOWLOG_MAX_32 30
379 #define ZSTD_WINDOWLOG_MAX_64 31
412 #define ZSTD_WINDOWLOG_MAX_64 31
@@ -390,9 +423,10 b' ZSTDLIB_API size_t ZSTD_DStreamOutSize(v'
390 #define ZSTD_SEARCHLOG_MIN 1
423 #define ZSTD_SEARCHLOG_MIN 1
391 #define ZSTD_SEARCHLENGTH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */
424 #define ZSTD_SEARCHLENGTH_MAX 7 /* only for ZSTD_fast, other strategies are limited to 6 */
392 #define ZSTD_SEARCHLENGTH_MIN 3 /* only for ZSTD_btopt, other strategies are limited to 4 */
425 #define ZSTD_SEARCHLENGTH_MIN 3 /* only for ZSTD_btopt, other strategies are limited to 4 */
393 #define ZSTD_TARGETLENGTH_MIN 1 /* only used by btopt, btultra and btfast */
426 #define ZSTD_TARGETLENGTH_MAX ZSTD_BLOCKSIZE_MAX
427 #define ZSTD_TARGETLENGTH_MIN 0 /* note : comparing this constant to an unsigned results in a tautological test */
428 #define ZSTD_LDM_MINMATCH_MAX 4096
394 #define ZSTD_LDM_MINMATCH_MIN 4
429 #define ZSTD_LDM_MINMATCH_MIN 4
395 #define ZSTD_LDM_MINMATCH_MAX 4096
396 #define ZSTD_LDM_BUCKETSIZELOG_MAX 8
430 #define ZSTD_LDM_BUCKETSIZELOG_MAX 8
397
431
398 #define ZSTD_FRAMEHEADERSIZE_PREFIX 5 /* minimum input size to know frame header size */
432 #define ZSTD_FRAMEHEADERSIZE_PREFIX 5 /* minimum input size to know frame header size */
@@ -404,7 +438,8 b' static const size_t ZSTD_frameHeaderSize'
404 static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable frame length */
438 static const size_t ZSTD_skippableHeaderSize = 8; /* magic number + skippable frame length */
405
439
406
440
407 /*--- Advanced types ---*/
441
442 /* --- Advanced types --- */
408 typedef enum { ZSTD_fast=1, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2,
443 typedef enum { ZSTD_fast=1, ZSTD_dfast, ZSTD_greedy, ZSTD_lazy, ZSTD_lazy2,
409 ZSTD_btlazy2, ZSTD_btopt, ZSTD_btultra } ZSTD_strategy; /* from faster to stronger */
444 ZSTD_btlazy2, ZSTD_btopt, ZSTD_btultra } ZSTD_strategy; /* from faster to stronger */
410
445
@@ -480,9 +515,9 b' ZSTDLIB_API size_t ZSTD_findFrameCompres'
480 ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
515 ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
481
516
482 /*! ZSTD_frameHeaderSize() :
517 /*! ZSTD_frameHeaderSize() :
483 * `src` should point to the start of a ZSTD frame
518 * srcSize must be >= ZSTD_frameHeaderSize_prefix.
484 * `srcSize` must be >= ZSTD_frameHeaderSize_prefix.
519 * @return : size of the Frame Header,
485 * @return : size of the Frame Header */
520 * or an error code (if srcSize is too small) */
486 ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
521 ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
487
522
488
523
@@ -711,29 +746,48 b' ZSTDLIB_API size_t ZSTD_initCStream_usin'
711
746
712 /*! ZSTD_resetCStream() :
747 /*! ZSTD_resetCStream() :
713 * start a new compression job, using same parameters from previous job.
748 * start a new compression job, using same parameters from previous job.
714 * This is typically useful to skip dictionary loading stage, since it will re-use it in-place..
749 * This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
715 * Note that zcs must be init at least once before using ZSTD_resetCStream().
750 * Note that zcs must be init at least once before using ZSTD_resetCStream().
716 * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
751 * If pledgedSrcSize is not known at reset time, use macro ZSTD_CONTENTSIZE_UNKNOWN.
717 * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
752 * If pledgedSrcSize > 0, its value must be correct, as it will be written in header, and controlled at the end.
718 * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
753 * For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
719 * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
754 * but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
720 * @return : 0, or an error code (which can be tested using ZSTD_isError()) */
755 * @return : 0, or an error code (which can be tested using ZSTD_isError())
756 */
721 ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
757 ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
722
758
723
759
724 typedef struct {
760 typedef struct {
725 unsigned long long ingested;
761 unsigned long long ingested; /* nb input bytes read and buffered */
726 unsigned long long consumed;
762 unsigned long long consumed; /* nb input bytes actually compressed */
727 unsigned long long produced;
763 unsigned long long produced; /* nb of compressed bytes generated and buffered */
764 unsigned long long flushed; /* nb of compressed bytes flushed : not provided; can be tracked from caller side */
765 unsigned currentJobID; /* MT only : latest started job nb */
766 unsigned nbActiveWorkers; /* MT only : nb of workers actively compressing at probe time */
728 } ZSTD_frameProgression;
767 } ZSTD_frameProgression;
729
768
730 /* ZSTD_getFrameProgression():
769 /* ZSTD_getFrameProgression() :
731 * tells how much data has been ingested (read from input)
770 * tells how much data has been ingested (read from input)
732 * consumed (input actually compressed) and produced (output) for current frame.
771 * consumed (input actually compressed) and produced (output) for current frame.
733 * Therefore, (ingested - consumed) is amount of input data buffered internally, not yet compressed.
772 * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed.
734 * Can report progression inside worker threads (multi-threading and non-blocking mode).
773 * Aggregates progression inside active worker threads.
735 */
774 */
736 ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx);
775 ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx);
776
777 /*! ZSTD_toFlushNow() :
778 * Tell how many bytes are ready to be flushed immediately.
779 * Useful for multithreading scenarios (nbWorkers >= 1).
780 * Probe the oldest active job, defined as oldest job not yet entirely flushed,
781 * and check its output buffer.
782 * @return : amount of data stored in oldest job and ready to be flushed immediately.
783 * if @return == 0, it means either :
784 * + there is no active job (could be checked with ZSTD_frameProgression()), or
785 * + oldest job is still actively compressing data,
786 * but everything it has produced has also been flushed so far,
787 * therefore flushing speed is currently limited by production speed of oldest job
788 * irrespective of the speed of concurrent newer jobs.
789 */
790 ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
737
791
738
792
739
793
@@ -880,6 +934,11 b' typedef struct {'
880 unsigned dictID;
934 unsigned dictID;
881 unsigned checksumFlag;
935 unsigned checksumFlag;
882 } ZSTD_frameHeader;
936 } ZSTD_frameHeader;
937 /** ZSTD_getFrameHeader() :
938 * decode Frame Header, or requires larger `srcSize`.
939 * @return : 0, `zfhPtr` is correctly filled,
940 * >0, `srcSize` is too small, value is wanted `srcSize` amount,
941 * or an error code, which can be tested using ZSTD_isError() */
883 ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */
942 ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize); /**< doesn't consume input */
884 ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
943 ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize); /**< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
885
944
@@ -901,23 +960,15 b' ZSTDLIB_API ZSTD_nextInputType_e ZSTD_ne'
901 /** New advanced API (experimental) */
960 /** New advanced API (experimental) */
902 /* ============================================ */
961 /* ============================================ */
903
962
904 /* notes on API design :
963 /* API design :
905 * In this proposal, parameters are pushed one by one into an existing context,
964 * In this advanced API, parameters are pushed one by one into an existing context,
906 * and then applied on all subsequent compression jobs.
965 * using ZSTD_CCtx_set*() functions.
907 * When no parameter is ever provided, CCtx is created with compression level ZSTD_CLEVEL_DEFAULT.
966 * Pushed parameters are sticky : they are applied to next job, and any subsequent job.
967 * It's possible to reset parameters to "default" using ZSTD_CCtx_reset().
968 * Important : "sticky" parameters only work with `ZSTD_compress_generic()` !
969 * For any other entry point, "sticky" parameters are ignored !
908 *
970 *
909 * This API is intended to replace all others advanced / experimental API entry points.
971 * This API is intended to replace all others advanced / experimental API entry points.
910 * But it stands a reasonable chance to become "stable", after a reasonable testing period.
911 */
912
913 /* note on naming convention :
914 * Initially, the API favored names like ZSTD_setCCtxParameter() .
915 * In this proposal, convention is changed towards ZSTD_CCtx_setParameter() .
916 * The main driver is that it identifies more clearly the target object type.
917 * It feels clearer when considering multiple targets :
918 * ZSTD_CDict_setParameter() (rather than ZSTD_setCDictParameter())
919 * ZSTD_CCtxParams_setParameter() (rather than ZSTD_setCCtxParamsParameter() )
920 * etc...
921 */
972 */
922
973
923 /* note on enum design :
974 /* note on enum design :
@@ -947,7 +998,7 b' typedef enum {'
947 /* compression parameters */
998 /* compression parameters */
948 ZSTD_p_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table
999 ZSTD_p_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table
949 * Default level is ZSTD_CLEVEL_DEFAULT==3.
1000 * Default level is ZSTD_CLEVEL_DEFAULT==3.
950 * Special: value 0 means "do not change cLevel".
1001 * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT.
951 * Note 1 : it's possible to pass a negative compression level by casting it to unsigned type.
1002 * Note 1 : it's possible to pass a negative compression level by casting it to unsigned type.
952 * Note 2 : setting a level sets all default values of other compression parameters.
1003 * Note 2 : setting a level sets all default values of other compression parameters.
953 * Note 3 : setting compressionLevel automatically updates ZSTD_p_compressLiterals. */
1004 * Note 3 : setting compressionLevel automatically updates ZSTD_p_compressLiterals. */
@@ -956,16 +1007,19 b' typedef enum {'
956 * Special: value 0 means "use default windowLog".
1007 * Special: value 0 means "use default windowLog".
957 * Note: Using a window size greater than ZSTD_MAXWINDOWSIZE_DEFAULT (default: 2^27)
1008 * Note: Using a window size greater than ZSTD_MAXWINDOWSIZE_DEFAULT (default: 2^27)
958 * requires explicitly allowing such window size during decompression stage. */
1009 * requires explicitly allowing such window size during decompression stage. */
959 ZSTD_p_hashLog, /* Size of the probe table, as a power of 2.
1010 ZSTD_p_hashLog, /* Size of the initial probe table, as a power of 2.
960 * Resulting table size is (1 << (hashLog+2)).
1011 * Resulting table size is (1 << (hashLog+2)).
961 * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
1012 * Must be clamped between ZSTD_HASHLOG_MIN and ZSTD_HASHLOG_MAX.
962 * Larger tables improve compression ratio of strategies <= dFast,
1013 * Larger tables improve compression ratio of strategies <= dFast,
963 * and improve speed of strategies > dFast.
1014 * and improve speed of strategies > dFast.
964 * Special: value 0 means "use default hashLog". */
1015 * Special: value 0 means "use default hashLog". */
965 ZSTD_p_chainLog, /* Size of the full-search table, as a power of 2.
1016 ZSTD_p_chainLog, /* Size of the multi-probe search table, as a power of 2.
966 * Resulting table size is (1 << (chainLog+2)).
1017 * Resulting table size is (1 << (chainLog+2)).
1018 * Must be clamped between ZSTD_CHAINLOG_MIN and ZSTD_CHAINLOG_MAX.
967 * Larger tables result in better and slower compression.
1019 * Larger tables result in better and slower compression.
968 * This parameter is useless when using "fast" strategy.
1020 * This parameter is useless when using "fast" strategy.
1021 * Note it's still useful when using "dfast" strategy,
1022 * in which case it defines a secondary probe table.
969 * Special: value 0 means "use default chainLog". */
1023 * Special: value 0 means "use default chainLog". */
970 ZSTD_p_searchLog, /* Number of search attempts, as a power of 2.
1024 ZSTD_p_searchLog, /* Number of search attempts, as a power of 2.
971 * More attempts result in better and slower compression.
1025 * More attempts result in better and slower compression.
@@ -1047,27 +1101,52 b' typedef enum {'
1047 /* experimental parameters - no stability guaranteed */
1101 /* experimental parameters - no stability guaranteed */
1048 /* =================================================================== */
1102 /* =================================================================== */
1049
1103
1050 ZSTD_p_compressLiterals=1000, /* control huffman compression of literals (enabled) by default.
1051 * disabling it improves speed and decreases compression ratio by a large amount.
1052 * note : this setting is automatically updated when changing compression level.
1053 * positive compression levels set ZSTD_p_compressLiterals to 1.
1054 * negative compression levels set ZSTD_p_compressLiterals to 0. */
1055
1056 ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize,
1104 ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize,
1057 * even when referencing into Dictionary content (default:0) */
1105 * even when referencing into Dictionary content (default:0) */
1106 ZSTD_p_forceAttachDict, /* ZSTD supports usage of a CDict in-place
1107 * (avoiding having to copy the compression tables
1108 * from the CDict into the working context). Using
1109 * a CDict in this way saves an initial setup step,
1110 * but comes at the cost of more work per byte of
1111 * input. ZSTD has a simple internal heuristic that
1112 * guesses which strategy will be faster. You can
1113 * use this flag to override that guess.
1114 *
1115 * Note that the by-reference, in-place strategy is
1116 * only used when reusing a compression context
1117 * with compatible compression parameters. (If
1118 * incompatible / uninitialized, the working
1119 * context needs to be cleared anyways, which is
1120 * about as expensive as overwriting it with the
1121 * dictionary context, so there's no savings in
1122 * using the CDict by-ref.)
1123 *
1124 * Values greater than 0 force attaching the dict.
1125 * Values less than 0 force copying the dict.
1126 * 0 selects the default heuristic-guided behavior.
1127 */
1058
1128
1059 } ZSTD_cParameter;
1129 } ZSTD_cParameter;
1060
1130
1061
1131
1062 /*! ZSTD_CCtx_setParameter() :
1132 /*! ZSTD_CCtx_setParameter() :
1063 * Set one compression parameter, selected by enum ZSTD_cParameter.
1133 * Set one compression parameter, selected by enum ZSTD_cParameter.
1064 * Setting a parameter is generally only possible during frame initialization (before starting compression),
1134 * Setting a parameter is generally only possible during frame initialization (before starting compression).
1065 * except for a few exceptions which can be updated during compression: compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
1135 * Exception : when using multi-threading mode (nbThreads >= 1),
1066 * Note : when `value` is an enum, cast it to unsigned for proper type checking.
1136 * following parameters can be updated _during_ compression (within same frame):
1067 * @result : informational value (typically, value being set clamped correctly),
1137 * => compressionLevel, hashLog, chainLog, searchLog, minMatch, targetLength and strategy.
1138 * new parameters will be active on next job, or after a flush().
1139 * Note : when `value` type is not unsigned (int, or enum), cast it to unsigned for proper type checking.
1140 * @result : informational value (typically, value being set, correctly clamped),
1068 * or an error code (which can be tested with ZSTD_isError()). */
1141 * or an error code (which can be tested with ZSTD_isError()). */
1069 ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value);
1142 ZSTDLIB_API size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value);
1070
1143
1144 /*! ZSTD_CCtx_getParameter() :
1145 * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
1146 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
1147 */
1148 ZSTDLIB_API size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned* value);
1149
1071 /*! ZSTD_CCtx_setPledgedSrcSize() :
1150 /*! ZSTD_CCtx_setPledgedSrcSize() :
1072 * Total input data size to be compressed as a single frame.
1151 * Total input data size to be compressed as a single frame.
1073 * This value will be controlled at the end, and result in error if not respected.
1152 * This value will be controlled at the end, and result in error if not respected.
@@ -1114,30 +1193,45 b' ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZS'
1114
1193
1115 /*! ZSTD_CCtx_refPrefix() :
1194 /*! ZSTD_CCtx_refPrefix() :
1116 * Reference a prefix (single-usage dictionary) for next compression job.
1195 * Reference a prefix (single-usage dictionary) for next compression job.
1117 * Decompression need same prefix to properly regenerate data.
1196 * Decompression will need same prefix to properly regenerate data.
1118 * Prefix is **only used once**. Tables are discarded at end of compression job.
1197 * Compressing with a prefix is similar in outcome as performing a diff and compressing it,
1119 * Subsequent compression jobs will be done without prefix (if none is explicitly referenced).
1198 * but performs much faster, especially during decompression (compression speed is tunable with compression level).
1120 * If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_CDict instead.
1199 * Note that prefix is **only used once**. Tables are discarded at end of compression job (ZSTD_e_end).
1121 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
1200 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
1122 * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
1201 * Special: Adding any prefix (including NULL) invalidates any previous prefix or dictionary
1123 * Note 1 : Prefix buffer is referenced. It must outlive compression job.
1202 * Note 1 : Prefix buffer is referenced. It **must** outlive compression job.
1124 * Note 2 : Referencing a prefix involves building tables, which are dependent on compression parameters.
1203 * Its contain must remain unmodified up to end of compression (ZSTD_e_end).
1204 * Note 2 : If the intention is to diff some large src data blob with some prior version of itself,
1205 * ensure that the window size is large enough to contain the entire source.
1206 * See ZSTD_p_windowLog.
1207 * Note 3 : Referencing a prefix involves building tables, which are dependent on compression parameters.
1125 * It's a CPU consuming operation, with non-negligible impact on latency.
1208 * It's a CPU consuming operation, with non-negligible impact on latency.
1126 * Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent).
1209 * If there is a need to use same prefix multiple times, consider loadDictionary instead.
1210 * Note 4 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent).
1127 * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode. */
1211 * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode. */
1128 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize);
1212 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
1129 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
1213 const void* prefix, size_t prefixSize);
1214 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx,
1215 const void* prefix, size_t prefixSize,
1216 ZSTD_dictContentType_e dictContentType);
1130
1217
1131 /*! ZSTD_CCtx_reset() :
1218 /*! ZSTD_CCtx_reset() :
1132 * Return a CCtx to clean state.
1219 * Return a CCtx to clean state.
1133 * Useful after an error, or to interrupt an ongoing compression job and start a new one.
1220 * Useful after an error, or to interrupt an ongoing compression job and start a new one.
1134 * Any internal data not yet flushed is cancelled.
1221 * Any internal data not yet flushed is cancelled.
1135 * Dictionary (if any) is dropped.
1222 * The parameters and dictionary are kept unchanged, to reset them use ZSTD_CCtx_resetParameters().
1136 * All parameters are back to default values.
1137 * It's possible to modify compression parameters after a reset.
1138 */
1223 */
1139 ZSTDLIB_API void ZSTD_CCtx_reset(ZSTD_CCtx* cctx);
1224 ZSTDLIB_API void ZSTD_CCtx_reset(ZSTD_CCtx* cctx);
1140
1225
1226 /*! ZSTD_CCtx_resetParameters() :
1227 * All parameters are back to default values (compression level is ZSTD_CLEVEL_DEFAULT).
1228 * Dictionary (if any) is dropped.
1229 * Resetting parameters is only possible during frame initialization (before starting compression).
1230 * To reset the context use ZSTD_CCtx_reset().
1231 * @return 0 or an error code (which can be checked with ZSTD_isError()).
1232 */
1233 ZSTDLIB_API size_t ZSTD_CCtx_resetParameters(ZSTD_CCtx* cctx);
1234
1141
1235
1142
1236
1143 typedef enum {
1237 typedef enum {
@@ -1235,6 +1329,13 b' ZSTDLIB_API size_t ZSTD_CCtxParams_init_'
1235 */
1329 */
1236 ZSTDLIB_API size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned value);
1330 ZSTDLIB_API size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned value);
1237
1331
1332 /*! ZSTD_CCtxParam_getParameter() :
1333 * Similar to ZSTD_CCtx_getParameter.
1334 * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
1335 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
1336 */
1337 ZSTDLIB_API size_t ZSTD_CCtxParam_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, unsigned* value);
1338
1238 /*! ZSTD_CCtx_setParametersUsingCCtxParams() :
1339 /*! ZSTD_CCtx_setParametersUsingCCtxParams() :
1239 * Apply a set of ZSTD_CCtx_params to the compression context.
1340 * Apply a set of ZSTD_CCtx_params to the compression context.
1240 * This can be done even after compression is started,
1341 * This can be done even after compression is started,
@@ -1246,10 +1347,13 b' ZSTDLIB_API size_t ZSTD_CCtx_setParamete'
1246 ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
1347 ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
1247
1348
1248
1349
1249 /*=== Advanced parameters for decompression API ===*/
1350 /* ==================================== */
1351 /*=== Advanced decompression API ===*/
1352 /* ==================================== */
1250
1353
1251 /* The following parameters must be set after creating a ZSTD_DCtx* (or ZSTD_DStream*) object,
1354 /* The following API works the same way as the advanced compression API :
1252 * but before starting decompression of a frame.
1355 * a context is created, parameters are pushed into it one by one,
1356 * then the context can be used to decompress data using an interface similar to the straming API.
1253 */
1357 */
1254
1358
1255 /*! ZSTD_DCtx_loadDictionary() :
1359 /*! ZSTD_DCtx_loadDictionary() :
@@ -1286,17 +1390,25 b' ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZS'
1286
1390
1287 /*! ZSTD_DCtx_refPrefix() :
1391 /*! ZSTD_DCtx_refPrefix() :
1288 * Reference a prefix (single-usage dictionary) for next compression job.
1392 * Reference a prefix (single-usage dictionary) for next compression job.
1289 * Prefix is **only used once**. It must be explicitly referenced before each frame.
1393 * This is the reverse operation of ZSTD_CCtx_refPrefix(),
1290 * If there is a need to use same prefix multiple times, consider embedding it into a ZSTD_DDict instead.
1394 * and must use the same prefix as the one used during compression.
1395 * Prefix is **only used once**. Reference is discarded at end of frame.
1396 * End of frame is reached when ZSTD_DCtx_decompress_generic() returns 0.
1291 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
1397 * @result : 0, or an error code (which can be tested with ZSTD_isError()).
1292 * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary
1398 * Note 1 : Adding any prefix (including NULL) invalidates any previously set prefix or dictionary
1293 * Note 2 : Prefix buffer is referenced. It must outlive compression job.
1399 * Note 2 : Prefix buffer is referenced. It **must** outlive decompression job.
1400 * Prefix buffer must remain unmodified up to the end of frame,
1401 * reached when ZSTD_DCtx_decompress_generic() returns 0.
1294 * Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent).
1402 * Note 3 : By default, the prefix is treated as raw content (ZSTD_dm_rawContent).
1295 * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode.
1403 * Use ZSTD_CCtx_refPrefix_advanced() to alter dictMode.
1296 * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
1404 * Note 4 : Referencing a raw content prefix has almost no cpu nor memory cost.
1405 * A fulldict prefix is more costly though.
1297 */
1406 */
1298 ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize);
1407 ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx,
1299 ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
1408 const void* prefix, size_t prefixSize);
1409 ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx,
1410 const void* prefix, size_t prefixSize,
1411 ZSTD_dictContentType_e dictContentType);
1300
1412
1301
1413
1302 /*! ZSTD_DCtx_setMaxWindowSize() :
1414 /*! ZSTD_DCtx_setMaxWindowSize() :
@@ -1318,6 +1430,13 b' ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindo'
1318 ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
1430 ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
1319
1431
1320
1432
1433 /*! ZSTD_getFrameHeader_advanced() :
1434 * same as ZSTD_getFrameHeader(),
1435 * with added capability to select a format (like ZSTD_f_zstd1_magicless) */
1436 ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr,
1437 const void* src, size_t srcSize, ZSTD_format_e format);
1438
1439
1321 /*! ZSTD_decompress_generic() :
1440 /*! ZSTD_decompress_generic() :
1322 * Behave the same as ZSTD_decompressStream.
1441 * Behave the same as ZSTD_decompressStream.
1323 * Decompression parameters cannot be changed once decompression is started.
1442 * Decompression parameters cannot be changed once decompression is started.
@@ -1383,8 +1502,6 b' ZSTDLIB_API void ZSTD_DCtx_reset(ZSTD_DC'
1383 Use ZSTD_insertBlock() for such a case.
1502 Use ZSTD_insertBlock() for such a case.
1384 */
1503 */
1385
1504
1386 #define ZSTD_BLOCKSIZELOG_MAX 17
1387 #define ZSTD_BLOCKSIZE_MAX (1<<ZSTD_BLOCKSIZELOG_MAX) /* define, for static allocation */
1388 /*===== Raw zstd block functions =====*/
1505 /*===== Raw zstd block functions =====*/
1389 ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx);
1506 ZSTDLIB_API size_t ZSTD_getBlockSize (const ZSTD_CCtx* cctx);
1390 ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
1507 ZSTDLIB_API size_t ZSTD_compressBlock (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
@@ -40,6 +40,8 b' from __future__ import absolute_import, '
40 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE',
40 'DECOMPRESSION_RECOMMENDED_INPUT_SIZE',
41 'DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE',
41 'DECOMPRESSION_RECOMMENDED_OUTPUT_SIZE',
42 'MAGIC_NUMBER',
42 'MAGIC_NUMBER',
43 'BLOCKSIZELOG_MAX',
44 'BLOCKSIZE_MAX',
43 'WINDOWLOG_MIN',
45 'WINDOWLOG_MIN',
44 'WINDOWLOG_MAX',
46 'WINDOWLOG_MAX',
45 'CHAINLOG_MIN',
47 'CHAINLOG_MIN',
@@ -52,6 +54,7 b' from __future__ import absolute_import, '
52 'SEARCHLENGTH_MIN',
54 'SEARCHLENGTH_MIN',
53 'SEARCHLENGTH_MAX',
55 'SEARCHLENGTH_MAX',
54 'TARGETLENGTH_MIN',
56 'TARGETLENGTH_MIN',
57 'TARGETLENGTH_MAX',
55 'LDM_MINMATCH_MIN',
58 'LDM_MINMATCH_MIN',
56 'LDM_MINMATCH_MAX',
59 'LDM_MINMATCH_MAX',
57 'LDM_BUCKETSIZELOG_MAX',
60 'LDM_BUCKETSIZELOG_MAX',
@@ -102,6 +105,8 b' CONTENTSIZE_UNKNOWN = lib.ZSTD_CONTENTSI'
102 CONTENTSIZE_ERROR = lib.ZSTD_CONTENTSIZE_ERROR
105 CONTENTSIZE_ERROR = lib.ZSTD_CONTENTSIZE_ERROR
103 ZSTD_VERSION = (lib.ZSTD_VERSION_MAJOR, lib.ZSTD_VERSION_MINOR, lib.ZSTD_VERSION_RELEASE)
106 ZSTD_VERSION = (lib.ZSTD_VERSION_MAJOR, lib.ZSTD_VERSION_MINOR, lib.ZSTD_VERSION_RELEASE)
104
107
108 BLOCKSIZELOG_MAX = lib.ZSTD_BLOCKSIZELOG_MAX
109 BLOCKSIZE_MAX = lib.ZSTD_BLOCKSIZE_MAX
105 WINDOWLOG_MIN = lib.ZSTD_WINDOWLOG_MIN
110 WINDOWLOG_MIN = lib.ZSTD_WINDOWLOG_MIN
106 WINDOWLOG_MAX = lib.ZSTD_WINDOWLOG_MAX
111 WINDOWLOG_MAX = lib.ZSTD_WINDOWLOG_MAX
107 CHAINLOG_MIN = lib.ZSTD_CHAINLOG_MIN
112 CHAINLOG_MIN = lib.ZSTD_CHAINLOG_MIN
@@ -114,6 +119,7 b' SEARCHLOG_MAX = lib.ZSTD_SEARCHLOG_MAX'
114 SEARCHLENGTH_MIN = lib.ZSTD_SEARCHLENGTH_MIN
119 SEARCHLENGTH_MIN = lib.ZSTD_SEARCHLENGTH_MIN
115 SEARCHLENGTH_MAX = lib.ZSTD_SEARCHLENGTH_MAX
120 SEARCHLENGTH_MAX = lib.ZSTD_SEARCHLENGTH_MAX
116 TARGETLENGTH_MIN = lib.ZSTD_TARGETLENGTH_MIN
121 TARGETLENGTH_MIN = lib.ZSTD_TARGETLENGTH_MIN
122 TARGETLENGTH_MAX = lib.ZSTD_TARGETLENGTH_MAX
117 LDM_MINMATCH_MIN = lib.ZSTD_LDM_MINMATCH_MIN
123 LDM_MINMATCH_MIN = lib.ZSTD_LDM_MINMATCH_MIN
118 LDM_MINMATCH_MAX = lib.ZSTD_LDM_MINMATCH_MAX
124 LDM_MINMATCH_MAX = lib.ZSTD_LDM_MINMATCH_MAX
119 LDM_BUCKETSIZELOG_MAX = lib.ZSTD_LDM_BUCKETSIZELOG_MAX
125 LDM_BUCKETSIZELOG_MAX = lib.ZSTD_LDM_BUCKETSIZELOG_MAX
@@ -191,7 +197,6 b' def _make_cctx_params(params):'
191 (lib.ZSTD_p_nbWorkers, params.threads),
197 (lib.ZSTD_p_nbWorkers, params.threads),
192 (lib.ZSTD_p_jobSize, params.job_size),
198 (lib.ZSTD_p_jobSize, params.job_size),
193 (lib.ZSTD_p_overlapSizeLog, params.overlap_size_log),
199 (lib.ZSTD_p_overlapSizeLog, params.overlap_size_log),
194 (lib.ZSTD_p_compressLiterals, params.compress_literals),
195 (lib.ZSTD_p_forceMaxWindow, params.force_max_window),
200 (lib.ZSTD_p_forceMaxWindow, params.force_max_window),
196 (lib.ZSTD_p_enableLongDistanceMatching, params.enable_ldm),
201 (lib.ZSTD_p_enableLongDistanceMatching, params.enable_ldm),
197 (lib.ZSTD_p_ldmHashLog, params.ldm_hash_log),
202 (lib.ZSTD_p_ldmHashLog, params.ldm_hash_log),
@@ -224,9 +229,6 b' class ZstdCompressionParameters(object):'
224 if arg not in kwargs:
229 if arg not in kwargs:
225 kwargs[arg] = getattr(params, attr)
230 kwargs[arg] = getattr(params, attr)
226
231
227 if 'compress_literals' not in kwargs:
228 kwargs['compress_literals'] = 1 if level >= 0 else 0
229
230 return ZstdCompressionParameters(**kwargs)
232 return ZstdCompressionParameters(**kwargs)
231
233
232 def __init__(self, format=0, compression_level=0, window_log=0, hash_log=0,
234 def __init__(self, format=0, compression_level=0, window_log=0, hash_log=0,
@@ -235,14 +237,11 b' class ZstdCompressionParameters(object):'
235 write_dict_id=0, job_size=0, overlap_size_log=0,
237 write_dict_id=0, job_size=0, overlap_size_log=0,
236 force_max_window=0, enable_ldm=0, ldm_hash_log=0,
238 force_max_window=0, enable_ldm=0, ldm_hash_log=0,
237 ldm_min_match=0, ldm_bucket_size_log=0, ldm_hash_every_log=0,
239 ldm_min_match=0, ldm_bucket_size_log=0, ldm_hash_every_log=0,
238 threads=0, compress_literals=None):
240 threads=0):
239
241
240 if threads < 0:
242 if threads < 0:
241 threads = _cpu_count()
243 threads = _cpu_count()
242
244
243 if compress_literals is None:
244 compress_literals = compression_level >= 0
245
246 self.format = format
245 self.format = format
247 self.compression_level = compression_level
246 self.compression_level = compression_level
248 self.window_log = window_log
247 self.window_log = window_log
@@ -257,7 +256,6 b' class ZstdCompressionParameters(object):'
257 self.write_dict_id = write_dict_id
256 self.write_dict_id = write_dict_id
258 self.job_size = job_size
257 self.job_size = job_size
259 self.overlap_size_log = overlap_size_log
258 self.overlap_size_log = overlap_size_log
260 self.compress_literals = compress_literals
261 self.force_max_window = force_max_window
259 self.force_max_window = force_max_window
262 self.enable_ldm = enable_ldm
260 self.enable_ldm = enable_ldm
263 self.ldm_hash_log = ldm_hash_log
261 self.ldm_hash_log = ldm_hash_log
@@ -411,13 +409,14 b' class ZstdCompressionWriter(object):'
411 raise ZstdError('zstd compress error: %s' %
409 raise ZstdError('zstd compress error: %s' %
412 _zstd_error(zresult))
410 _zstd_error(zresult))
413
411
414 if not out_buffer.pos:
412 if out_buffer.pos:
415 break
413 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
414 total_write += out_buffer.pos
415 self._bytes_compressed += out_buffer.pos
416 out_buffer.pos = 0
416
417
417 self._writer.write(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
418 if not zresult:
418 total_write += out_buffer.pos
419 break
419 self._bytes_compressed += out_buffer.pos
420 out_buffer.pos = 0
421
420
422 return total_write
421 return total_write
423
422
@@ -460,6 +459,14 b' class ZstdCompressionObj(object):'
460 if self._finished:
459 if self._finished:
461 raise ZstdError('compressor object already finished')
460 raise ZstdError('compressor object already finished')
462
461
462 if flush_mode == COMPRESSOBJ_FLUSH_BLOCK:
463 z_flush_mode = lib.ZSTD_e_flush
464 elif flush_mode == COMPRESSOBJ_FLUSH_FINISH:
465 z_flush_mode = lib.ZSTD_e_end
466 self._finished = True
467 else:
468 raise ZstdError('unhandled flush mode')
469
463 assert self._out.pos == 0
470 assert self._out.pos == 0
464
471
465 in_buffer = ffi.new('ZSTD_inBuffer *')
472 in_buffer = ffi.new('ZSTD_inBuffer *')
@@ -467,35 +474,13 b' class ZstdCompressionObj(object):'
467 in_buffer.size = 0
474 in_buffer.size = 0
468 in_buffer.pos = 0
475 in_buffer.pos = 0
469
476
470 if flush_mode == COMPRESSOBJ_FLUSH_BLOCK:
471 zresult = lib.ZSTD_compress_generic(self._compressor._cctx,
472 self._out,
473 in_buffer,
474 lib.ZSTD_e_flush)
475 if lib.ZSTD_isError(zresult):
476 raise ZstdError('zstd compress error: %s' %
477 _zstd_error(zresult))
478
479 # Output buffer is guaranteed to hold full block.
480 assert zresult == 0
481
482 if self._out.pos:
483 result = ffi.buffer(self._out.dst, self._out.pos)[:]
484 self._out.pos = 0
485 return result
486 else:
487 return b''
488
489 assert flush_mode == COMPRESSOBJ_FLUSH_FINISH
490 self._finished = True
491
492 chunks = []
477 chunks = []
493
478
494 while True:
479 while True:
495 zresult = lib.ZSTD_compress_generic(self._compressor._cctx,
480 zresult = lib.ZSTD_compress_generic(self._compressor._cctx,
496 self._out,
481 self._out,
497 in_buffer,
482 in_buffer,
498 lib.ZSTD_e_end)
483 z_flush_mode)
499 if lib.ZSTD_isError(zresult):
484 if lib.ZSTD_isError(zresult):
500 raise ZstdError('error ending compression stream: %s' %
485 raise ZstdError('error ending compression stream: %s' %
501 _zstd_error(zresult))
486 _zstd_error(zresult))
@@ -510,11 +495,107 b' class ZstdCompressionObj(object):'
510 return b''.join(chunks)
495 return b''.join(chunks)
511
496
512
497
498 class ZstdCompressionChunker(object):
499 def __init__(self, compressor, chunk_size):
500 self._compressor = compressor
501 self._out = ffi.new('ZSTD_outBuffer *')
502 self._dst_buffer = ffi.new('char[]', chunk_size)
503 self._out.dst = self._dst_buffer
504 self._out.size = chunk_size
505 self._out.pos = 0
506
507 self._in = ffi.new('ZSTD_inBuffer *')
508 self._in.src = ffi.NULL
509 self._in.size = 0
510 self._in.pos = 0
511 self._finished = False
512
513 def compress(self, data):
514 if self._finished:
515 raise ZstdError('cannot call compress() after compression finished')
516
517 if self._in.src != ffi.NULL:
518 raise ZstdError('cannot perform operation before consuming output '
519 'from previous operation')
520
521 data_buffer = ffi.from_buffer(data)
522
523 if not len(data_buffer):
524 return
525
526 self._in.src = data_buffer
527 self._in.size = len(data_buffer)
528 self._in.pos = 0
529
530 while self._in.pos < self._in.size:
531 zresult = lib.ZSTD_compress_generic(self._compressor._cctx,
532 self._out,
533 self._in,
534 lib.ZSTD_e_continue)
535
536 if self._in.pos == self._in.size:
537 self._in.src = ffi.NULL
538 self._in.size = 0
539 self._in.pos = 0
540
541 if lib.ZSTD_isError(zresult):
542 raise ZstdError('zstd compress error: %s' %
543 _zstd_error(zresult))
544
545 if self._out.pos == self._out.size:
546 yield ffi.buffer(self._out.dst, self._out.pos)[:]
547 self._out.pos = 0
548
549 def flush(self):
550 if self._finished:
551 raise ZstdError('cannot call flush() after compression finished')
552
553 if self._in.src != ffi.NULL:
554 raise ZstdError('cannot call flush() before consuming output from '
555 'previous operation')
556
557 while True:
558 zresult = lib.ZSTD_compress_generic(self._compressor._cctx,
559 self._out, self._in,
560 lib.ZSTD_e_flush)
561 if lib.ZSTD_isError(zresult):
562 raise ZstdError('zstd compress error: %s' % _zstd_error(zresult))
563
564 if self._out.pos:
565 yield ffi.buffer(self._out.dst, self._out.pos)[:]
566 self._out.pos = 0
567
568 if not zresult:
569 return
570
571 def finish(self):
572 if self._finished:
573 raise ZstdError('cannot call finish() after compression finished')
574
575 if self._in.src != ffi.NULL:
576 raise ZstdError('cannot call finish() before consuming output from '
577 'previous operation')
578
579 while True:
580 zresult = lib.ZSTD_compress_generic(self._compressor._cctx,
581 self._out, self._in,
582 lib.ZSTD_e_end)
583 if lib.ZSTD_isError(zresult):
584 raise ZstdError('zstd compress error: %s' % _zstd_error(zresult))
585
586 if self._out.pos:
587 yield ffi.buffer(self._out.dst, self._out.pos)[:]
588 self._out.pos = 0
589
590 if not zresult:
591 self._finished = True
592 return
593
594
513 class CompressionReader(object):
595 class CompressionReader(object):
514 def __init__(self, compressor, source, size, read_size):
596 def __init__(self, compressor, source, read_size):
515 self._compressor = compressor
597 self._compressor = compressor
516 self._source = source
598 self._source = source
517 self._source_size = size
518 self._read_size = read_size
599 self._read_size = read_size
519 self._entered = False
600 self._entered = False
520 self._closed = False
601 self._closed = False
@@ -530,12 +611,6 b' class CompressionReader(object):'
530 if self._entered:
611 if self._entered:
531 raise ValueError('cannot __enter__ multiple times')
612 raise ValueError('cannot __enter__ multiple times')
532
613
533 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._compressor._cctx,
534 self._source_size)
535 if lib.ZSTD_isError(zresult):
536 raise ZstdError('error setting source size: %s' %
537 _zstd_error(zresult))
538
539 self._entered = True
614 self._entered = True
540 return self
615 return self
541
616
@@ -578,6 +653,7 b' class CompressionReader(object):'
578 self._closed = True
653 self._closed = True
579 return None
654 return None
580
655
656 @property
581 def closed(self):
657 def closed(self):
582 return self._closed
658 return self._closed
583
659
@@ -596,9 +672,6 b' class CompressionReader(object):'
596 next = __next__
672 next = __next__
597
673
598 def read(self, size=-1):
674 def read(self, size=-1):
599 if not self._entered:
600 raise ZstdError('read() must be called from an active context manager')
601
602 if self._closed:
675 if self._closed:
603 raise ValueError('stream is closed')
676 raise ValueError('stream is closed')
604
677
@@ -759,16 +832,14 b' class ZstdCompressor(object):'
759 self._dict_data = dict_data
832 self._dict_data = dict_data
760
833
761 # We defer setting up garbage collection until after calling
834 # We defer setting up garbage collection until after calling
762 # _ensure_cctx() to ensure the memory size estimate is more accurate.
835 # _setup_cctx() to ensure the memory size estimate is more accurate.
763 try:
836 try:
764 self._ensure_cctx()
837 self._setup_cctx()
765 finally:
838 finally:
766 self._cctx = ffi.gc(cctx, lib.ZSTD_freeCCtx,
839 self._cctx = ffi.gc(cctx, lib.ZSTD_freeCCtx,
767 size=lib.ZSTD_sizeof_CCtx(cctx))
840 size=lib.ZSTD_sizeof_CCtx(cctx))
768
841
769 def _ensure_cctx(self):
842 def _setup_cctx(self):
770 lib.ZSTD_CCtx_reset(self._cctx)
771
772 zresult = lib.ZSTD_CCtx_setParametersUsingCCtxParams(self._cctx,
843 zresult = lib.ZSTD_CCtx_setParametersUsingCCtxParams(self._cctx,
773 self._params)
844 self._params)
774 if lib.ZSTD_isError(zresult):
845 if lib.ZSTD_isError(zresult):
@@ -793,7 +864,7 b' class ZstdCompressor(object):'
793 return lib.ZSTD_sizeof_CCtx(self._cctx)
864 return lib.ZSTD_sizeof_CCtx(self._cctx)
794
865
795 def compress(self, data):
866 def compress(self, data):
796 self._ensure_cctx()
867 lib.ZSTD_CCtx_reset(self._cctx)
797
868
798 data_buffer = ffi.from_buffer(data)
869 data_buffer = ffi.from_buffer(data)
799
870
@@ -830,7 +901,7 b' class ZstdCompressor(object):'
830 return ffi.buffer(out, out_buffer.pos)[:]
901 return ffi.buffer(out, out_buffer.pos)[:]
831
902
832 def compressobj(self, size=-1):
903 def compressobj(self, size=-1):
833 self._ensure_cctx()
904 lib.ZSTD_CCtx_reset(self._cctx)
834
905
835 if size < 0:
906 if size < 0:
836 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
907 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
@@ -851,6 +922,19 b' class ZstdCompressor(object):'
851
922
852 return cobj
923 return cobj
853
924
925 def chunker(self, size=-1, chunk_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
926 lib.ZSTD_CCtx_reset(self._cctx)
927
928 if size < 0:
929 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
930
931 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
932 if lib.ZSTD_isError(zresult):
933 raise ZstdError('error setting source size: %s' %
934 _zstd_error(zresult))
935
936 return ZstdCompressionChunker(self, chunk_size=chunk_size)
937
854 def copy_stream(self, ifh, ofh, size=-1,
938 def copy_stream(self, ifh, ofh, size=-1,
855 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
939 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE,
856 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
940 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
@@ -860,7 +944,7 b' class ZstdCompressor(object):'
860 if not hasattr(ofh, 'write'):
944 if not hasattr(ofh, 'write'):
861 raise ValueError('second argument must have a write() method')
945 raise ValueError('second argument must have a write() method')
862
946
863 self._ensure_cctx()
947 lib.ZSTD_CCtx_reset(self._cctx)
864
948
865 if size < 0:
949 if size < 0:
866 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
950 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
@@ -927,7 +1011,7 b' class ZstdCompressor(object):'
927
1011
928 def stream_reader(self, source, size=-1,
1012 def stream_reader(self, source, size=-1,
929 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE):
1013 read_size=COMPRESSION_RECOMMENDED_INPUT_SIZE):
930 self._ensure_cctx()
1014 lib.ZSTD_CCtx_reset(self._cctx)
931
1015
932 try:
1016 try:
933 size = len(source)
1017 size = len(source)
@@ -937,7 +1021,12 b' class ZstdCompressor(object):'
937 if size < 0:
1021 if size < 0:
938 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
1022 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
939
1023
940 return CompressionReader(self, source, size, read_size)
1024 zresult = lib.ZSTD_CCtx_setPledgedSrcSize(self._cctx, size)
1025 if lib.ZSTD_isError(zresult):
1026 raise ZstdError('error setting source size: %s' %
1027 _zstd_error(zresult))
1028
1029 return CompressionReader(self, source, read_size)
941
1030
942 def stream_writer(self, writer, size=-1,
1031 def stream_writer(self, writer, size=-1,
943 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
1032 write_size=COMPRESSION_RECOMMENDED_OUTPUT_SIZE):
@@ -945,7 +1034,7 b' class ZstdCompressor(object):'
945 if not hasattr(writer, 'write'):
1034 if not hasattr(writer, 'write'):
946 raise ValueError('must pass an object with a write() method')
1035 raise ValueError('must pass an object with a write() method')
947
1036
948 self._ensure_cctx()
1037 lib.ZSTD_CCtx_reset(self._cctx)
949
1038
950 if size < 0:
1039 if size < 0:
951 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
1040 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
@@ -967,7 +1056,7 b' class ZstdCompressor(object):'
967 raise ValueError('must pass an object with a read() method or '
1056 raise ValueError('must pass an object with a read() method or '
968 'conforms to buffer protocol')
1057 'conforms to buffer protocol')
969
1058
970 self._ensure_cctx()
1059 lib.ZSTD_CCtx_reset(self._cctx)
971
1060
972 if size < 0:
1061 if size < 0:
973 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
1062 size = lib.ZSTD_CONTENTSIZE_UNKNOWN
@@ -1267,7 +1356,7 b' class ZstdDecompressionObj(object):'
1267
1356
1268 chunks = []
1357 chunks = []
1269
1358
1270 while in_buffer.pos < in_buffer.size:
1359 while True:
1271 zresult = lib.ZSTD_decompress_generic(self._decompressor._dctx,
1360 zresult = lib.ZSTD_decompress_generic(self._decompressor._dctx,
1272 out_buffer, in_buffer)
1361 out_buffer, in_buffer)
1273 if lib.ZSTD_isError(zresult):
1362 if lib.ZSTD_isError(zresult):
@@ -1280,7 +1369,12 b' class ZstdDecompressionObj(object):'
1280
1369
1281 if out_buffer.pos:
1370 if out_buffer.pos:
1282 chunks.append(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
1371 chunks.append(ffi.buffer(out_buffer.dst, out_buffer.pos)[:])
1283 out_buffer.pos = 0
1372
1373 if (zresult == 0 or
1374 (in_buffer.pos == in_buffer.size and out_buffer.pos == 0)):
1375 break
1376
1377 out_buffer.pos = 0
1284
1378
1285 return b''.join(chunks)
1379 return b''.join(chunks)
1286
1380
@@ -1303,8 +1397,6 b' class DecompressionReader(object):'
1303 if self._entered:
1397 if self._entered:
1304 raise ValueError('cannot __enter__ multiple times')
1398 raise ValueError('cannot __enter__ multiple times')
1305
1399
1306 self._decompressor._ensure_dctx()
1307
1308 self._entered = True
1400 self._entered = True
1309 return self
1401 return self
1310
1402
@@ -1347,6 +1439,7 b' class DecompressionReader(object):'
1347 self._closed = True
1439 self._closed = True
1348 return None
1440 return None
1349
1441
1442 @property
1350 def closed(self):
1443 def closed(self):
1351 return self._closed
1444 return self._closed
1352
1445
@@ -1364,10 +1457,7 b' class DecompressionReader(object):'
1364
1457
1365 next = __next__
1458 next = __next__
1366
1459
1367 def read(self, size=-1):
1460 def read(self, size):
1368 if not self._entered:
1369 raise ZstdError('read() must be called from an active context manager')
1370
1371 if self._closed:
1461 if self._closed:
1372 raise ValueError('stream is closed')
1462 raise ValueError('stream is closed')
1373
1463
@@ -1442,10 +1532,6 b' class DecompressionReader(object):'
1442 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1532 return ffi.buffer(out_buffer.dst, out_buffer.pos)[:]
1443
1533
1444 def seek(self, pos, whence=os.SEEK_SET):
1534 def seek(self, pos, whence=os.SEEK_SET):
1445 if not self._entered:
1446 raise ZstdError('seek() must be called from an active context '
1447 'manager')
1448
1449 if self._closed:
1535 if self._closed:
1450 raise ValueError('stream is closed')
1536 raise ValueError('stream is closed')
1451
1537
@@ -919,7 +919,9 b' extmodules = ['
919
919
920 sys.path.insert(0, 'contrib/python-zstandard')
920 sys.path.insert(0, 'contrib/python-zstandard')
921 import setup_zstd
921 import setup_zstd
922 extmodules.append(setup_zstd.get_c_extension(name='mercurial.zstd'))
922 extmodules.append(setup_zstd.get_c_extension(
923 name='mercurial.zstd',
924 root=os.path.abspath(os.path.dirname(__file__))))
923
925
924 try:
926 try:
925 from distutils import cygwinccompiler
927 from distutils import cygwinccompiler
General Comments 0
You need to be logged in to leave comments. Login now