##// END OF EJS Templates
zstd: vendor zstd 1.1.1...
Gregory Szorc -
r30434:2e484bde default
parent child Browse files
Show More

The requested changes are too big and content was truncated. Show full diff

@@ -0,0 +1,30 b''
1 BSD License
2
3 For Zstandard software
4
5 Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
6
7 Redistribution and use in source and binary forms, with or without modification,
8 are permitted provided that the following conditions are met:
9
10 * Redistributions of source code must retain the above copyright notice, this
11 list of conditions and the following disclaimer.
12
13 * Redistributions in binary form must reproduce the above copyright notice,
14 this list of conditions and the following disclaimer in the documentation
15 and/or other materials provided with the distribution.
16
17 * Neither the name Facebook nor the names of its contributors may be used to
18 endorse or promote products derived from this software without specific
19 prior written permission.
20
21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
25 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
28 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@@ -0,0 +1,33 b''
1 Additional Grant of Patent Rights Version 2
2
3 "Software" means the Zstandard software distributed by Facebook, Inc.
4
5 Facebook, Inc. ("Facebook") hereby grants to each recipient of the Software
6 ("you") a perpetual, worldwide, royalty-free, non-exclusive, irrevocable
7 (subject to the termination provision below) license under any Necessary
8 Claims, to make, have made, use, sell, offer to sell, import, and otherwise
9 transfer the Software. For avoidance of doubt, no license is granted under
10 Facebook’s rights in any patent claims that are infringed by (i) modifications
11 to the Software made by you or any third party or (ii) the Software in
12 combination with any software or other technology.
13
14 The license granted hereunder will terminate, automatically and without notice,
15 if you (or any of your subsidiaries, corporate affiliates or agents) initiate
16 directly or indirectly, or take a direct financial interest in, any Patent
17 Assertion: (i) against Facebook or any of its subsidiaries or corporate
18 affiliates, (ii) against any party if such Patent Assertion arises in whole or
19 in part from any software, technology, product or service of Facebook or any of
20 its subsidiaries or corporate affiliates, or (iii) against any party relating
21 to the Software. Notwithstanding the foregoing, if Facebook or any of its
22 subsidiaries or corporate affiliates files a lawsuit alleging patent
23 infringement against you in the first instance, and you respond by filing a
24 patent infringement counterclaim in that lawsuit against that party that is
25 unrelated to the Software, the license granted hereunder will not terminate
26 under section (i) of this paragraph due to such counterclaim.
27
28 A "Necessary Claim" is a claim of a patent owned by Facebook that is
29 necessarily infringed by the Software standing alone.
30
31 A "Patent Assertion" is any lawsuit or other action alleging direct, indirect,
32 or contributory infringement or inducement to infringe any patent, including a
33 cross-claim or counterclaim.
@@ -0,0 +1,414 b''
1 /* ******************************************************************
2 bitstream
3 Part of FSE library
4 header file (to include)
5 Copyright (C) 2013-2016, Yann Collet.
6
7 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
8
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions are
11 met:
12
13 * Redistributions of source code must retain the above copyright
14 notice, this list of conditions and the following disclaimer.
15 * Redistributions in binary form must reproduce the above
16 copyright notice, this list of conditions and the following disclaimer
17 in the documentation and/or other materials provided with the
18 distribution.
19
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31
32 You can contact the author at :
33 - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
34 ****************************************************************** */
35 #ifndef BITSTREAM_H_MODULE
36 #define BITSTREAM_H_MODULE
37
38 #if defined (__cplusplus)
39 extern "C" {
40 #endif
41
42
43 /*
44 * This API consists of small unitary functions, which must be inlined for best performance.
45 * Since link-time-optimization is not available for all compilers,
46 * these functions are defined into a .h to be included.
47 */
48
49 /*-****************************************
50 * Dependencies
51 ******************************************/
52 #include "mem.h" /* unaligned access routines */
53 #include "error_private.h" /* error codes and messages */
54
55
56 /*=========================================
57 * Target specific
58 =========================================*/
59 #if defined(__BMI__) && defined(__GNUC__)
60 # include <immintrin.h> /* support for bextr (experimental) */
61 #endif
62
63
64 /*-******************************************
65 * bitStream encoding API (write forward)
66 ********************************************/
67 /* bitStream can mix input from multiple sources.
68 * A critical property of these streams is that they encode and decode in **reverse** direction.
69 * So the first bit sequence you add will be the last to be read, like a LIFO stack.
70 */
71 typedef struct
72 {
73 size_t bitContainer;
74 int bitPos;
75 char* startPtr;
76 char* ptr;
77 char* endPtr;
78 } BIT_CStream_t;
79
80 MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* dstBuffer, size_t dstCapacity);
81 MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
82 MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC);
83 MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC);
84
85 /* Start with initCStream, providing the size of buffer to write into.
86 * bitStream will never write outside of this buffer.
87 * `dstCapacity` must be >= sizeof(bitD->bitContainer), otherwise @return will be an error code.
88 *
89 * bits are first added to a local register.
90 * Local register is size_t, hence 64-bits on 64-bits systems, or 32-bits on 32-bits systems.
91 * Writing data into memory is an explicit operation, performed by the flushBits function.
92 * Hence keep track how many bits are potentially stored into local register to avoid register overflow.
93 * After a flushBits, a maximum of 7 bits might still be stored into local register.
94 *
95 * Avoid storing elements of more than 24 bits if you want compatibility with 32-bits bitstream readers.
96 *
97 * Last operation is to close the bitStream.
98 * The function returns the final size of CStream in bytes.
99 * If data couldn't fit into `dstBuffer`, it will return a 0 ( == not storable)
100 */
101
102
103 /*-********************************************
104 * bitStream decoding API (read backward)
105 **********************************************/
106 typedef struct
107 {
108 size_t bitContainer;
109 unsigned bitsConsumed;
110 const char* ptr;
111 const char* start;
112 } BIT_DStream_t;
113
114 typedef enum { BIT_DStream_unfinished = 0,
115 BIT_DStream_endOfBuffer = 1,
116 BIT_DStream_completed = 2,
117 BIT_DStream_overflow = 3 } BIT_DStream_status; /* result of BIT_reloadDStream() */
118 /* 1,2,4,8 would be better for bitmap combinations, but slows down performance a bit ... :( */
119
120 MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize);
121 MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, unsigned nbBits);
122 MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD);
123 MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* bitD);
124
125
126 /* Start by invoking BIT_initDStream().
127 * A chunk of the bitStream is then stored into a local register.
128 * Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
129 * You can then retrieve bitFields stored into the local register, **in reverse order**.
130 * Local register is explicitly reloaded from memory by the BIT_reloadDStream() method.
131 * A reload guarantee a minimum of ((8*sizeof(bitD->bitContainer))-7) bits when its result is BIT_DStream_unfinished.
132 * Otherwise, it can be less than that, so proceed accordingly.
133 * Checking if DStream has reached its end can be performed with BIT_endOfDStream().
134 */
135
136
137 /*-****************************************
138 * unsafe API
139 ******************************************/
140 MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits);
141 /* faster, but works only if value is "clean", meaning all high bits above nbBits are 0 */
142
143 MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC);
144 /* unsafe version; does not check buffer overflow */
145
146 MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, unsigned nbBits);
147 /* faster, but works only if nbBits >= 1 */
148
149
150
151 /*-**************************************************************
152 * Internal functions
153 ****************************************************************/
154 MEM_STATIC unsigned BIT_highbit32 (register U32 val)
155 {
156 # if defined(_MSC_VER) /* Visual */
157 unsigned long r=0;
158 _BitScanReverse ( &r, val );
159 return (unsigned) r;
160 # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */
161 return 31 - __builtin_clz (val);
162 # else /* Software version */
163 static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
164 U32 v = val;
165 v |= v >> 1;
166 v |= v >> 2;
167 v |= v >> 4;
168 v |= v >> 8;
169 v |= v >> 16;
170 return DeBruijnClz[ (U32) (v * 0x07C4ACDDU) >> 27];
171 # endif
172 }
173
174 /*===== Local Constants =====*/
175 static const unsigned BIT_mask[] = { 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0x1FFFF, 0x3FFFF, 0x7FFFF, 0xFFFFF, 0x1FFFFF, 0x3FFFFF, 0x7FFFFF, 0xFFFFFF, 0x1FFFFFF, 0x3FFFFFF }; /* up to 26 bits */
176
177
178 /*-**************************************************************
179 * bitStream encoding
180 ****************************************************************/
181 /*! BIT_initCStream() :
182 * `dstCapacity` must be > sizeof(void*)
183 * @return : 0 if success,
184 otherwise an error code (can be tested using ERR_isError() ) */
185 MEM_STATIC size_t BIT_initCStream(BIT_CStream_t* bitC, void* startPtr, size_t dstCapacity)
186 {
187 bitC->bitContainer = 0;
188 bitC->bitPos = 0;
189 bitC->startPtr = (char*)startPtr;
190 bitC->ptr = bitC->startPtr;
191 bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->ptr);
192 if (dstCapacity <= sizeof(bitC->ptr)) return ERROR(dstSize_tooSmall);
193 return 0;
194 }
195
196 /*! BIT_addBits() :
197 can add up to 26 bits into `bitC`.
198 Does not check for register overflow ! */
199 MEM_STATIC void BIT_addBits(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
200 {
201 bitC->bitContainer |= (value & BIT_mask[nbBits]) << bitC->bitPos;
202 bitC->bitPos += nbBits;
203 }
204
205 /*! BIT_addBitsFast() :
206 * works only if `value` is _clean_, meaning all high bits above nbBits are 0 */
207 MEM_STATIC void BIT_addBitsFast(BIT_CStream_t* bitC, size_t value, unsigned nbBits)
208 {
209 bitC->bitContainer |= value << bitC->bitPos;
210 bitC->bitPos += nbBits;
211 }
212
213 /*! BIT_flushBitsFast() :
214 * unsafe version; does not check buffer overflow */
215 MEM_STATIC void BIT_flushBitsFast(BIT_CStream_t* bitC)
216 {
217 size_t const nbBytes = bitC->bitPos >> 3;
218 MEM_writeLEST(bitC->ptr, bitC->bitContainer);
219 bitC->ptr += nbBytes;
220 bitC->bitPos &= 7;
221 bitC->bitContainer >>= nbBytes*8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
222 }
223
224 /*! BIT_flushBits() :
225 * safe version; check for buffer overflow, and prevents it.
226 * note : does not signal buffer overflow. This will be revealed later on using BIT_closeCStream() */
227 MEM_STATIC void BIT_flushBits(BIT_CStream_t* bitC)
228 {
229 size_t const nbBytes = bitC->bitPos >> 3;
230 MEM_writeLEST(bitC->ptr, bitC->bitContainer);
231 bitC->ptr += nbBytes;
232 if (bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
233 bitC->bitPos &= 7;
234 bitC->bitContainer >>= nbBytes*8; /* if bitPos >= sizeof(bitContainer)*8 --> undefined behavior */
235 }
236
237 /*! BIT_closeCStream() :
238 * @return : size of CStream, in bytes,
239 or 0 if it could not fit into dstBuffer */
240 MEM_STATIC size_t BIT_closeCStream(BIT_CStream_t* bitC)
241 {
242 BIT_addBitsFast(bitC, 1, 1); /* endMark */
243 BIT_flushBits(bitC);
244
245 if (bitC->ptr >= bitC->endPtr) return 0; /* doesn't fit within authorized budget : cancel */
246
247 return (bitC->ptr - bitC->startPtr) + (bitC->bitPos > 0);
248 }
249
250
251 /*-********************************************************
252 * bitStream decoding
253 **********************************************************/
254 /*! BIT_initDStream() :
255 * Initialize a BIT_DStream_t.
256 * `bitD` : a pointer to an already allocated BIT_DStream_t structure.
257 * `srcSize` must be the *exact* size of the bitStream, in bytes.
258 * @return : size of stream (== srcSize) or an errorCode if a problem is detected
259 */
260 MEM_STATIC size_t BIT_initDStream(BIT_DStream_t* bitD, const void* srcBuffer, size_t srcSize)
261 {
262 if (srcSize < 1) { memset(bitD, 0, sizeof(*bitD)); return ERROR(srcSize_wrong); }
263
264 if (srcSize >= sizeof(bitD->bitContainer)) { /* normal case */
265 bitD->start = (const char*)srcBuffer;
266 bitD->ptr = (const char*)srcBuffer + srcSize - sizeof(bitD->bitContainer);
267 bitD->bitContainer = MEM_readLEST(bitD->ptr);
268 { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
269 bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
270 if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
271 } else {
272 bitD->start = (const char*)srcBuffer;
273 bitD->ptr = bitD->start;
274 bitD->bitContainer = *(const BYTE*)(bitD->start);
275 switch(srcSize)
276 {
277 case 7: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[6]) << (sizeof(bitD->bitContainer)*8 - 16);
278 case 6: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[5]) << (sizeof(bitD->bitContainer)*8 - 24);
279 case 5: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[4]) << (sizeof(bitD->bitContainer)*8 - 32);
280 case 4: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[3]) << 24;
281 case 3: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[2]) << 16;
282 case 2: bitD->bitContainer += (size_t)(((const BYTE*)(srcBuffer))[1]) << 8;
283 default:;
284 }
285 { BYTE const lastByte = ((const BYTE*)srcBuffer)[srcSize-1];
286 bitD->bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
287 if (lastByte == 0) return ERROR(GENERIC); /* endMark not present */ }
288 bitD->bitsConsumed += (U32)(sizeof(bitD->bitContainer) - srcSize)*8;
289 }
290
291 return srcSize;
292 }
293
294 MEM_STATIC size_t BIT_getUpperBits(size_t bitContainer, U32 const start)
295 {
296 return bitContainer >> start;
297 }
298
299 MEM_STATIC size_t BIT_getMiddleBits(size_t bitContainer, U32 const start, U32 const nbBits)
300 {
301 #if defined(__BMI__) && defined(__GNUC__) /* experimental */
302 # if defined(__x86_64__)
303 if (sizeof(bitContainer)==8)
304 return _bextr_u64(bitContainer, start, nbBits);
305 else
306 # endif
307 return _bextr_u32(bitContainer, start, nbBits);
308 #else
309 return (bitContainer >> start) & BIT_mask[nbBits];
310 #endif
311 }
312
313 MEM_STATIC size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
314 {
315 return bitContainer & BIT_mask[nbBits];
316 }
317
318 /*! BIT_lookBits() :
319 * Provides next n bits from local register.
320 * local register is not modified.
321 * On 32-bits, maxNbBits==24.
322 * On 64-bits, maxNbBits==56.
323 * @return : value extracted
324 */
325 MEM_STATIC size_t BIT_lookBits(const BIT_DStream_t* bitD, U32 nbBits)
326 {
327 #if defined(__BMI__) && defined(__GNUC__) /* experimental; fails if bitD->bitsConsumed + nbBits > sizeof(bitD->bitContainer)*8 */
328 return BIT_getMiddleBits(bitD->bitContainer, (sizeof(bitD->bitContainer)*8) - bitD->bitsConsumed - nbBits, nbBits);
329 #else
330 U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
331 return ((bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> 1) >> ((bitMask-nbBits) & bitMask);
332 #endif
333 }
334
335 /*! BIT_lookBitsFast() :
336 * unsafe version; only works only if nbBits >= 1 */
337 MEM_STATIC size_t BIT_lookBitsFast(const BIT_DStream_t* bitD, U32 nbBits)
338 {
339 U32 const bitMask = sizeof(bitD->bitContainer)*8 - 1;
340 return (bitD->bitContainer << (bitD->bitsConsumed & bitMask)) >> (((bitMask+1)-nbBits) & bitMask);
341 }
342
343 MEM_STATIC void BIT_skipBits(BIT_DStream_t* bitD, U32 nbBits)
344 {
345 bitD->bitsConsumed += nbBits;
346 }
347
348 /*! BIT_readBits() :
349 * Read (consume) next n bits from local register and update.
350 * Pay attention to not read more than nbBits contained into local register.
351 * @return : extracted value.
352 */
353 MEM_STATIC size_t BIT_readBits(BIT_DStream_t* bitD, U32 nbBits)
354 {
355 size_t const value = BIT_lookBits(bitD, nbBits);
356 BIT_skipBits(bitD, nbBits);
357 return value;
358 }
359
360 /*! BIT_readBitsFast() :
361 * unsafe version; only works only if nbBits >= 1 */
362 MEM_STATIC size_t BIT_readBitsFast(BIT_DStream_t* bitD, U32 nbBits)
363 {
364 size_t const value = BIT_lookBitsFast(bitD, nbBits);
365 BIT_skipBits(bitD, nbBits);
366 return value;
367 }
368
369 /*! BIT_reloadDStream() :
370 * Refill `BIT_DStream_t` from src buffer previously defined (see BIT_initDStream() ).
371 * This function is safe, it guarantees it will not read beyond src buffer.
372 * @return : status of `BIT_DStream_t` internal register.
373 if status == unfinished, internal register is filled with >= (sizeof(bitD->bitContainer)*8 - 7) bits */
374 MEM_STATIC BIT_DStream_status BIT_reloadDStream(BIT_DStream_t* bitD)
375 {
376 if (bitD->bitsConsumed > (sizeof(bitD->bitContainer)*8)) /* should not happen => corruption detected */
377 return BIT_DStream_overflow;
378
379 if (bitD->ptr >= bitD->start + sizeof(bitD->bitContainer)) {
380 bitD->ptr -= bitD->bitsConsumed >> 3;
381 bitD->bitsConsumed &= 7;
382 bitD->bitContainer = MEM_readLEST(bitD->ptr);
383 return BIT_DStream_unfinished;
384 }
385 if (bitD->ptr == bitD->start) {
386 if (bitD->bitsConsumed < sizeof(bitD->bitContainer)*8) return BIT_DStream_endOfBuffer;
387 return BIT_DStream_completed;
388 }
389 { U32 nbBytes = bitD->bitsConsumed >> 3;
390 BIT_DStream_status result = BIT_DStream_unfinished;
391 if (bitD->ptr - nbBytes < bitD->start) {
392 nbBytes = (U32)(bitD->ptr - bitD->start); /* ptr > start */
393 result = BIT_DStream_endOfBuffer;
394 }
395 bitD->ptr -= nbBytes;
396 bitD->bitsConsumed -= nbBytes*8;
397 bitD->bitContainer = MEM_readLEST(bitD->ptr); /* reminder : srcSize > sizeof(bitD) */
398 return result;
399 }
400 }
401
402 /*! BIT_endOfDStream() :
403 * @return Tells if DStream has exactly reached its end (all bits consumed).
404 */
405 MEM_STATIC unsigned BIT_endOfDStream(const BIT_DStream_t* DStream)
406 {
407 return ((DStream->ptr == DStream->start) && (DStream->bitsConsumed == sizeof(DStream->bitContainer)*8));
408 }
409
410 #if defined (__cplusplus)
411 }
412 #endif
413
414 #endif /* BITSTREAM_H_MODULE */
@@ -0,0 +1,225 b''
1 /*
2 Common functions of New Generation Entropy library
3 Copyright (C) 2016, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above
14 copyright notice, this list of conditions and the following disclaimer
15 in the documentation and/or other materials provided with the
16 distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 You can contact the author at :
31 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c
33 *************************************************************************** */
34
35 /* *************************************
36 * Dependencies
37 ***************************************/
38 #include "mem.h"
39 #include "error_private.h" /* ERR_*, ERROR */
40 #define FSE_STATIC_LINKING_ONLY /* FSE_MIN_TABLELOG */
41 #include "fse.h"
42 #define HUF_STATIC_LINKING_ONLY /* HUF_TABLELOG_ABSOLUTEMAX */
43 #include "huf.h"
44
45
46 /*-****************************************
47 * FSE Error Management
48 ******************************************/
49 unsigned FSE_isError(size_t code) { return ERR_isError(code); }
50
51 const char* FSE_getErrorName(size_t code) { return ERR_getErrorName(code); }
52
53
54 /* **************************************************************
55 * HUF Error Management
56 ****************************************************************/
57 unsigned HUF_isError(size_t code) { return ERR_isError(code); }
58
59 const char* HUF_getErrorName(size_t code) { return ERR_getErrorName(code); }
60
61
62 /*-**************************************************************
63 * FSE NCount encoding-decoding
64 ****************************************************************/
65 static short FSE_abs(short a) { return (short)(a<0 ? -a : a); }
66
67 size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
68 const void* headerBuffer, size_t hbSize)
69 {
70 const BYTE* const istart = (const BYTE*) headerBuffer;
71 const BYTE* const iend = istart + hbSize;
72 const BYTE* ip = istart;
73 int nbBits;
74 int remaining;
75 int threshold;
76 U32 bitStream;
77 int bitCount;
78 unsigned charnum = 0;
79 int previous0 = 0;
80
81 if (hbSize < 4) return ERROR(srcSize_wrong);
82 bitStream = MEM_readLE32(ip);
83 nbBits = (bitStream & 0xF) + FSE_MIN_TABLELOG; /* extract tableLog */
84 if (nbBits > FSE_TABLELOG_ABSOLUTE_MAX) return ERROR(tableLog_tooLarge);
85 bitStream >>= 4;
86 bitCount = 4;
87 *tableLogPtr = nbBits;
88 remaining = (1<<nbBits)+1;
89 threshold = 1<<nbBits;
90 nbBits++;
91
92 while ((remaining>1) & (charnum<=*maxSVPtr)) {
93 if (previous0) {
94 unsigned n0 = charnum;
95 while ((bitStream & 0xFFFF) == 0xFFFF) {
96 n0 += 24;
97 if (ip < iend-5) {
98 ip += 2;
99 bitStream = MEM_readLE32(ip) >> bitCount;
100 } else {
101 bitStream >>= 16;
102 bitCount += 16;
103 } }
104 while ((bitStream & 3) == 3) {
105 n0 += 3;
106 bitStream >>= 2;
107 bitCount += 2;
108 }
109 n0 += bitStream & 3;
110 bitCount += 2;
111 if (n0 > *maxSVPtr) return ERROR(maxSymbolValue_tooSmall);
112 while (charnum < n0) normalizedCounter[charnum++] = 0;
113 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
114 ip += bitCount>>3;
115 bitCount &= 7;
116 bitStream = MEM_readLE32(ip) >> bitCount;
117 } else {
118 bitStream >>= 2;
119 } }
120 { short const max = (short)((2*threshold-1)-remaining);
121 short count;
122
123 if ((bitStream & (threshold-1)) < (U32)max) {
124 count = (short)(bitStream & (threshold-1));
125 bitCount += nbBits-1;
126 } else {
127 count = (short)(bitStream & (2*threshold-1));
128 if (count >= threshold) count -= max;
129 bitCount += nbBits;
130 }
131
132 count--; /* extra accuracy */
133 remaining -= FSE_abs(count);
134 normalizedCounter[charnum++] = count;
135 previous0 = !count;
136 while (remaining < threshold) {
137 nbBits--;
138 threshold >>= 1;
139 }
140
141 if ((ip <= iend-7) || (ip + (bitCount>>3) <= iend-4)) {
142 ip += bitCount>>3;
143 bitCount &= 7;
144 } else {
145 bitCount -= (int)(8 * (iend - 4 - ip));
146 ip = iend - 4;
147 }
148 bitStream = MEM_readLE32(ip) >> (bitCount & 31);
149 } } /* while ((remaining>1) & (charnum<=*maxSVPtr)) */
150 if (remaining != 1) return ERROR(corruption_detected);
151 if (bitCount > 32) return ERROR(corruption_detected);
152 *maxSVPtr = charnum-1;
153
154 ip += (bitCount+7)>>3;
155 return ip-istart;
156 }
157
158
159 /*! HUF_readStats() :
160 Read compact Huffman tree, saved by HUF_writeCTable().
161 `huffWeight` is destination buffer.
162 @return : size read from `src` , or an error Code .
163 Note : Needed by HUF_readCTable() and HUF_readDTableX?() .
164 */
165 size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
166 U32* nbSymbolsPtr, U32* tableLogPtr,
167 const void* src, size_t srcSize)
168 {
169 U32 weightTotal;
170 const BYTE* ip = (const BYTE*) src;
171 size_t iSize;
172 size_t oSize;
173
174 if (!srcSize) return ERROR(srcSize_wrong);
175 iSize = ip[0];
176 /* memset(huffWeight, 0, hwSize); *//* is not necessary, even though some analyzer complain ... */
177
178 if (iSize >= 128) { /* special header */
179 oSize = iSize - 127;
180 iSize = ((oSize+1)/2);
181 if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
182 if (oSize >= hwSize) return ERROR(corruption_detected);
183 ip += 1;
184 { U32 n;
185 for (n=0; n<oSize; n+=2) {
186 huffWeight[n] = ip[n/2] >> 4;
187 huffWeight[n+1] = ip[n/2] & 15;
188 } } }
189 else { /* header compressed with FSE (normal case) */
190 if (iSize+1 > srcSize) return ERROR(srcSize_wrong);
191 oSize = FSE_decompress(huffWeight, hwSize-1, ip+1, iSize); /* max (hwSize-1) values decoded, as last one is implied */
192 if (FSE_isError(oSize)) return oSize;
193 }
194
195 /* collect weight stats */
196 memset(rankStats, 0, (HUF_TABLELOG_ABSOLUTEMAX + 1) * sizeof(U32));
197 weightTotal = 0;
198 { U32 n; for (n=0; n<oSize; n++) {
199 if (huffWeight[n] >= HUF_TABLELOG_ABSOLUTEMAX) return ERROR(corruption_detected);
200 rankStats[huffWeight[n]]++;
201 weightTotal += (1 << huffWeight[n]) >> 1;
202 } }
203 if (weightTotal == 0) return ERROR(corruption_detected);
204
205 /* get last non-null symbol weight (implied, total must be 2^n) */
206 { U32 const tableLog = BIT_highbit32(weightTotal) + 1;
207 if (tableLog > HUF_TABLELOG_ABSOLUTEMAX) return ERROR(corruption_detected);
208 *tableLogPtr = tableLog;
209 /* determine last weight */
210 { U32 const total = 1 << tableLog;
211 U32 const rest = total - weightTotal;
212 U32 const verif = 1 << BIT_highbit32(rest);
213 U32 const lastWeight = BIT_highbit32(rest) + 1;
214 if (verif != rest) return ERROR(corruption_detected); /* last value must be a clean power of 2 */
215 huffWeight[oSize] = (BYTE)lastWeight;
216 rankStats[lastWeight]++;
217 } }
218
219 /* check tree construction validity */
220 if ((rankStats[1] < 2) || (rankStats[1] & 1)) return ERROR(corruption_detected); /* by construction : at least 2 elts of rank 1, must be even */
221
222 /* results */
223 *nbSymbolsPtr = (U32)(oSize+1);
224 return iSize+1;
225 }
@@ -0,0 +1,43 b''
1 /**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9
10 /* The purpose of this file is to have a single list of error strings embedded in binary */
11
12 #include "error_private.h"
13
14 const char* ERR_getErrorString(ERR_enum code)
15 {
16 static const char* const notErrorCode = "Unspecified error code";
17 switch( code )
18 {
19 case PREFIX(no_error): return "No error detected";
20 case PREFIX(GENERIC): return "Error (generic)";
21 case PREFIX(prefix_unknown): return "Unknown frame descriptor";
22 case PREFIX(version_unsupported): return "Version not supported";
23 case PREFIX(parameter_unknown): return "Unknown parameter type";
24 case PREFIX(frameParameter_unsupported): return "Unsupported frame parameter";
25 case PREFIX(frameParameter_unsupportedBy32bits): return "Frame parameter unsupported in 32-bits mode";
26 case PREFIX(frameParameter_windowTooLarge): return "Frame requires too much memory for decoding";
27 case PREFIX(compressionParameter_unsupported): return "Compression parameter is out of bound";
28 case PREFIX(init_missing): return "Context should be init first";
29 case PREFIX(memory_allocation): return "Allocation error : not enough memory";
30 case PREFIX(stage_wrong): return "Operation not authorized at current processing stage";
31 case PREFIX(dstSize_tooSmall): return "Destination buffer is too small";
32 case PREFIX(srcSize_wrong): return "Src size incorrect";
33 case PREFIX(corruption_detected): return "Corrupted block detected";
34 case PREFIX(checksum_wrong): return "Restored data doesn't match checksum";
35 case PREFIX(tableLog_tooLarge): return "tableLog requires too much memory : unsupported";
36 case PREFIX(maxSymbolValue_tooLarge): return "Unsupported max Symbol Value : too large";
37 case PREFIX(maxSymbolValue_tooSmall): return "Specified maxSymbolValue is too small";
38 case PREFIX(dictionary_corrupted): return "Dictionary is corrupted";
39 case PREFIX(dictionary_wrong): return "Dictionary mismatch";
40 case PREFIX(maxCode):
41 default: return notErrorCode;
42 }
43 }
@@ -0,0 +1,76 b''
1 /**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9
10 /* Note : this module is expected to remain private, do not expose it */
11
12 #ifndef ERROR_H_MODULE
13 #define ERROR_H_MODULE
14
15 #if defined (__cplusplus)
16 extern "C" {
17 #endif
18
19
20 /* ****************************************
21 * Dependencies
22 ******************************************/
23 #include <stddef.h> /* size_t */
24 #include "zstd_errors.h" /* enum list */
25
26
27 /* ****************************************
28 * Compiler-specific
29 ******************************************/
30 #if defined(__GNUC__)
31 # define ERR_STATIC static __attribute__((unused))
32 #elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
33 # define ERR_STATIC static inline
34 #elif defined(_MSC_VER)
35 # define ERR_STATIC static __inline
36 #else
37 # define ERR_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
38 #endif
39
40
41 /*-****************************************
42 * Customization (error_public.h)
43 ******************************************/
44 typedef ZSTD_ErrorCode ERR_enum;
45 #define PREFIX(name) ZSTD_error_##name
46
47
48 /*-****************************************
49 * Error codes handling
50 ******************************************/
51 #ifdef ERROR
52 # undef ERROR /* reported already defined on VS 2015 (Rich Geldreich) */
53 #endif
54 #define ERROR(name) ((size_t)-PREFIX(name))
55
56 ERR_STATIC unsigned ERR_isError(size_t code) { return (code > ERROR(maxCode)); }
57
58 ERR_STATIC ERR_enum ERR_getErrorCode(size_t code) { if (!ERR_isError(code)) return (ERR_enum)0; return (ERR_enum) (0-code); }
59
60
61 /*-****************************************
62 * Error Strings
63 ******************************************/
64
65 const char* ERR_getErrorString(ERR_enum code); /* error_private.c */
66
67 ERR_STATIC const char* ERR_getErrorName(size_t code)
68 {
69 return ERR_getErrorString(ERR_getErrorCode(code));
70 }
71
72 #if defined (__cplusplus)
73 }
74 #endif
75
76 #endif /* ERROR_H_MODULE */
This diff has been collapsed as it changes many lines, (634 lines changed) Show them Hide them
@@ -0,0 +1,634 b''
1 /* ******************************************************************
2 FSE : Finite State Entropy codec
3 Public Prototypes declaration
4 Copyright (C) 2013-2016, Yann Collet.
5
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
11
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
17 distribution.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 You can contact the author at :
32 - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 ****************************************************************** */
34 #ifndef FSE_H
35 #define FSE_H
36
37 #if defined (__cplusplus)
38 extern "C" {
39 #endif
40
41
42 /*-*****************************************
43 * Dependencies
44 ******************************************/
45 #include <stddef.h> /* size_t, ptrdiff_t */
46
47
48 /*-****************************************
49 * FSE simple functions
50 ******************************************/
51 /*! FSE_compress() :
52 Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
53 'dst' buffer must be already allocated. Compression runs faster is dstCapacity >= FSE_compressBound(srcSize).
54 @return : size of compressed data (<= dstCapacity).
55 Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
56 if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression instead.
57 if FSE_isError(return), compression failed (more details using FSE_getErrorName())
58 */
59 size_t FSE_compress(void* dst, size_t dstCapacity,
60 const void* src, size_t srcSize);
61
62 /*! FSE_decompress():
63 Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
64 into already allocated destination buffer 'dst', of size 'dstCapacity'.
65 @return : size of regenerated data (<= maxDstSize),
66 or an error code, which can be tested using FSE_isError() .
67
68 ** Important ** : FSE_decompress() does not decompress non-compressible nor RLE data !!!
69 Why ? : making this distinction requires a header.
70 Header management is intentionally delegated to the user layer, which can better manage special cases.
71 */
72 size_t FSE_decompress(void* dst, size_t dstCapacity,
73 const void* cSrc, size_t cSrcSize);
74
75
76 /*-*****************************************
77 * Tool functions
78 ******************************************/
79 size_t FSE_compressBound(size_t size); /* maximum compressed size */
80
81 /* Error Management */
82 unsigned FSE_isError(size_t code); /* tells if a return value is an error code */
83 const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
84
85
86 /*-*****************************************
87 * FSE advanced functions
88 ******************************************/
89 /*! FSE_compress2() :
90 Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
91 Both parameters can be defined as '0' to mean : use default value
92 @return : size of compressed data
93 Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
94 if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
95 if FSE_isError(return), it's an error code.
96 */
97 size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
98
99
100 /*-*****************************************
101 * FSE detailed API
102 ******************************************/
103 /*!
104 FSE_compress() does the following:
105 1. count symbol occurrence from source[] into table count[]
106 2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
107 3. save normalized counters to memory buffer using writeNCount()
108 4. build encoding table 'CTable' from normalized counters
109 5. encode the data stream using encoding table 'CTable'
110
111 FSE_decompress() does the following:
112 1. read normalized counters with readNCount()
113 2. build decoding table 'DTable' from normalized counters
114 3. decode the data stream using decoding table 'DTable'
115
116 The following API allows targeting specific sub-functions for advanced tasks.
117 For example, it's possible to compress several blocks using the same 'CTable',
118 or to save and provide normalized distribution using external method.
119 */
120
121 /* *** COMPRESSION *** */
122
123 /*! FSE_count():
124 Provides the precise count of each byte within a table 'count'.
125 'count' is a table of unsigned int, of minimum size (*maxSymbolValuePtr+1).
126 *maxSymbolValuePtr will be updated if detected smaller than initial value.
127 @return : the count of the most frequent symbol (which is not identified).
128 if return == srcSize, there is only one symbol.
129 Can also return an error code, which can be tested with FSE_isError(). */
130 size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
131
132 /*! FSE_optimalTableLog():
133 dynamically downsize 'tableLog' when conditions are met.
134 It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
135 @return : recommended tableLog (necessarily <= 'maxTableLog') */
136 unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
137
138 /*! FSE_normalizeCount():
139 normalize counts so that sum(count[]) == Power_of_2 (2^tableLog)
140 'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
141 @return : tableLog,
142 or an errorCode, which can be tested using FSE_isError() */
143 size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
144
145 /*! FSE_NCountWriteBound():
146 Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'.
147 Typically useful for allocation purpose. */
148 size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
149
150 /*! FSE_writeNCount():
151 Compactly save 'normalizedCounter' into 'buffer'.
152 @return : size of the compressed table,
153 or an errorCode, which can be tested using FSE_isError(). */
154 size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
155
156
157 /*! Constructor and Destructor of FSE_CTable.
158 Note that FSE_CTable size depends on 'tableLog' and 'maxSymbolValue' */
159 typedef unsigned FSE_CTable; /* don't allocate that. It's only meant to be more restrictive than void* */
160 FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue);
161 void FSE_freeCTable (FSE_CTable* ct);
162
163 /*! FSE_buildCTable():
164 Builds `ct`, which must be already allocated, using FSE_createCTable().
165 @return : 0, or an errorCode, which can be tested using FSE_isError() */
166 size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
167
168 /*! FSE_compress_usingCTable():
169 Compress `src` using `ct` into `dst` which must be already allocated.
170 @return : size of compressed data (<= `dstCapacity`),
171 or 0 if compressed data could not fit into `dst`,
172 or an errorCode, which can be tested using FSE_isError() */
173 size_t FSE_compress_usingCTable (void* dst, size_t dstCapacity, const void* src, size_t srcSize, const FSE_CTable* ct);
174
175 /*!
176 Tutorial :
177 ----------
178 The first step is to count all symbols. FSE_count() does this job very fast.
179 Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
180 'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
181 maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
182 FSE_count() will return the number of occurrence of the most frequent symbol.
183 This can be used to know if there is a single symbol within 'src', and to quickly evaluate its compressibility.
184 If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
185
186 The next step is to normalize the frequencies.
187 FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
188 It also guarantees a minimum of 1 to any Symbol with frequency >= 1.
189 You can use 'tableLog'==0 to mean "use default tableLog value".
190 If you are unsure of which tableLog value to use, you can ask FSE_optimalTableLog(),
191 which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
192
193 The result of FSE_normalizeCount() will be saved into a table,
194 called 'normalizedCounter', which is a table of signed short.
195 'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
196 The return value is tableLog if everything proceeded as expected.
197 It is 0 if there is a single symbol within distribution.
198 If there is an error (ex: invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
199
200 'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeNCount().
201 'buffer' must be already allocated.
202 For guaranteed success, buffer size must be at least FSE_headerBound().
203 The result of the function is the number of bytes written into 'buffer'.
204 If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError(); ex : buffer size too small).
205
206 'normalizedCounter' can then be used to create the compression table 'CTable'.
207 The space required by 'CTable' must be already allocated, using FSE_createCTable().
208 You can then use FSE_buildCTable() to fill 'CTable'.
209 If there is an error, both functions will return an ErrorCode (which can be tested using FSE_isError()).
210
211 'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
212 Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
213 The function returns the size of compressed data (without header), necessarily <= `dstCapacity`.
214 If it returns '0', compressed data could not fit into 'dst'.
215 If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
216 */
217
218
219 /* *** DECOMPRESSION *** */
220
221 /*! FSE_readNCount():
222 Read compactly saved 'normalizedCounter' from 'rBuffer'.
223 @return : size read from 'rBuffer',
224 or an errorCode, which can be tested using FSE_isError().
225 maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
226 size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
227
228 /*! Constructor and Destructor of FSE_DTable.
229 Note that its size depends on 'tableLog' */
230 typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void* */
231 FSE_DTable* FSE_createDTable(unsigned tableLog);
232 void FSE_freeDTable(FSE_DTable* dt);
233
234 /*! FSE_buildDTable():
235 Builds 'dt', which must be already allocated, using FSE_createDTable().
236 return : 0, or an errorCode, which can be tested using FSE_isError() */
237 size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
238
239 /*! FSE_decompress_usingDTable():
240 Decompress compressed source `cSrc` of size `cSrcSize` using `dt`
241 into `dst` which must be already allocated.
242 @return : size of regenerated data (necessarily <= `dstCapacity`),
243 or an errorCode, which can be tested using FSE_isError() */
244 size_t FSE_decompress_usingDTable(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt);
245
246 /*!
247 Tutorial :
248 ----------
249 (Note : these functions only decompress FSE-compressed blocks.
250 If block is uncompressed, use memcpy() instead
251 If block is a single repeated byte, use memset() instead )
252
253 The first step is to obtain the normalized frequencies of symbols.
254 This can be performed by FSE_readNCount() if it was saved using FSE_writeNCount().
255 'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of signed short.
256 In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
257 or size the table to handle worst case situations (typically 256).
258 FSE_readNCount() will provide 'tableLog' and 'maxSymbolValue'.
259 The result of FSE_readNCount() is the number of bytes read from 'rBuffer'.
260 Note that 'rBufferSize' must be at least 4 bytes, even if useful information is less than that.
261 If there is an error, the function will return an error code, which can be tested using FSE_isError().
262
263 The next step is to build the decompression tables 'FSE_DTable' from 'normalizedCounter'.
264 This is performed by the function FSE_buildDTable().
265 The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
266 If there is an error, the function will return an error code, which can be tested using FSE_isError().
267
268 `FSE_DTable` can then be used to decompress `cSrc`, with FSE_decompress_usingDTable().
269 `cSrcSize` must be strictly correct, otherwise decompression will fail.
270 FSE_decompress_usingDTable() result will tell how many bytes were regenerated (<=`dstCapacity`).
271 If there is an error, the function will return an error code, which can be tested using FSE_isError(). (ex: dst buffer too small)
272 */
273
274
275 #ifdef FSE_STATIC_LINKING_ONLY
276
277 /* *** Dependency *** */
278 #include "bitstream.h"
279
280
281 /* *****************************************
282 * Static allocation
283 *******************************************/
284 /* FSE buffer bounds */
285 #define FSE_NCOUNTBOUND 512
286 #define FSE_BLOCKBOUND(size) (size + (size>>7))
287 #define FSE_COMPRESSBOUND(size) (FSE_NCOUNTBOUND + FSE_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
288
289 /* It is possible to statically allocate FSE CTable/DTable as a table of unsigned using below macros */
290 #define FSE_CTABLE_SIZE_U32(maxTableLog, maxSymbolValue) (1 + (1<<(maxTableLog-1)) + ((maxSymbolValue+1)*2))
291 #define FSE_DTABLE_SIZE_U32(maxTableLog) (1 + (1<<maxTableLog))
292
293
294 /* *****************************************
295 * FSE advanced API
296 *******************************************/
297 size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize);
298 /**< same as FSE_count(), but blindly trusts that all byte values within src are <= *maxSymbolValuePtr */
299
300 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus);
301 /**< same as FSE_optimalTableLog(), which used `minus==2` */
302
303 size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits);
304 /**< build a fake FSE_CTable, designed to not compress an input, where each symbol uses nbBits */
305
306 size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
307 /**< build a fake FSE_CTable, designed to compress always the same symbolValue */
308
309 size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits);
310 /**< build a fake FSE_DTable, designed to read an uncompressed bitstream where each symbol uses nbBits */
311
312 size_t FSE_buildDTable_rle (FSE_DTable* dt, unsigned char symbolValue);
313 /**< build a fake FSE_DTable, designed to always generate the same symbolValue */
314
315
316 /* *****************************************
317 * FSE symbol compression API
318 *******************************************/
319 /*!
320 This API consists of small unitary functions, which highly benefit from being inlined.
321 You will want to enable link-time-optimization to ensure these functions are properly inlined in your binary.
322 Visual seems to do it automatically.
323 For gcc or clang, you'll need to add -flto flag at compilation and linking stages.
324 If none of these solutions is applicable, include "fse.c" directly.
325 */
326 typedef struct
327 {
328 ptrdiff_t value;
329 const void* stateTable;
330 const void* symbolTT;
331 unsigned stateLog;
332 } FSE_CState_t;
333
334 static void FSE_initCState(FSE_CState_t* CStatePtr, const FSE_CTable* ct);
335
336 static void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* CStatePtr, unsigned symbol);
337
338 static void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* CStatePtr);
339
340 /**<
341 These functions are inner components of FSE_compress_usingCTable().
342 They allow the creation of custom streams, mixing multiple tables and bit sources.
343
344 A key property to keep in mind is that encoding and decoding are done **in reverse direction**.
345 So the first symbol you will encode is the last you will decode, like a LIFO stack.
346
347 You will need a few variables to track your CStream. They are :
348
349 FSE_CTable ct; // Provided by FSE_buildCTable()
350 BIT_CStream_t bitStream; // bitStream tracking structure
351 FSE_CState_t state; // State tracking structure (can have several)
352
353
354 The first thing to do is to init bitStream and state.
355 size_t errorCode = BIT_initCStream(&bitStream, dstBuffer, maxDstSize);
356 FSE_initCState(&state, ct);
357
358 Note that BIT_initCStream() can produce an error code, so its result should be tested, using FSE_isError();
359 You can then encode your input data, byte after byte.
360 FSE_encodeSymbol() outputs a maximum of 'tableLog' bits at a time.
361 Remember decoding will be done in reverse direction.
362 FSE_encodeByte(&bitStream, &state, symbol);
363
364 At any time, you can also add any bit sequence.
365 Note : maximum allowed nbBits is 25, for compatibility with 32-bits decoders
366 BIT_addBits(&bitStream, bitField, nbBits);
367
368 The above methods don't commit data to memory, they just store it into local register, for speed.
369 Local register size is 64-bits on 64-bits systems, 32-bits on 32-bits systems (size_t).
370 Writing data to memory is a manual operation, performed by the flushBits function.
371 BIT_flushBits(&bitStream);
372
373 Your last FSE encoding operation shall be to flush your last state value(s).
374 FSE_flushState(&bitStream, &state);
375
376 Finally, you must close the bitStream.
377 The function returns the size of CStream in bytes.
378 If data couldn't fit into dstBuffer, it will return a 0 ( == not compressible)
379 If there is an error, it returns an errorCode (which can be tested using FSE_isError()).
380 size_t size = BIT_closeCStream(&bitStream);
381 */
382
383
384 /* *****************************************
385 * FSE symbol decompression API
386 *******************************************/
387 typedef struct
388 {
389 size_t state;
390 const void* table; /* precise table may vary, depending on U16 */
391 } FSE_DState_t;
392
393
394 static void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt);
395
396 static unsigned char FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
397
398 static unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr);
399
400 /**<
401 Let's now decompose FSE_decompress_usingDTable() into its unitary components.
402 You will decode FSE-encoded symbols from the bitStream,
403 and also any other bitFields you put in, **in reverse order**.
404
405 You will need a few variables to track your bitStream. They are :
406
407 BIT_DStream_t DStream; // Stream context
408 FSE_DState_t DState; // State context. Multiple ones are possible
409 FSE_DTable* DTablePtr; // Decoding table, provided by FSE_buildDTable()
410
411 The first thing to do is to init the bitStream.
412 errorCode = BIT_initDStream(&DStream, srcBuffer, srcSize);
413
414 You should then retrieve your initial state(s)
415 (in reverse flushing order if you have several ones) :
416 errorCode = FSE_initDState(&DState, &DStream, DTablePtr);
417
418 You can then decode your data, symbol after symbol.
419 For information the maximum number of bits read by FSE_decodeSymbol() is 'tableLog'.
420 Keep in mind that symbols are decoded in reverse order, like a LIFO stack (last in, first out).
421 unsigned char symbol = FSE_decodeSymbol(&DState, &DStream);
422
423 You can retrieve any bitfield you eventually stored into the bitStream (in reverse order)
424 Note : maximum allowed nbBits is 25, for 32-bits compatibility
425 size_t bitField = BIT_readBits(&DStream, nbBits);
426
427 All above operations only read from local register (which size depends on size_t).
428 Refueling the register from memory is manually performed by the reload method.
429 endSignal = FSE_reloadDStream(&DStream);
430
431 BIT_reloadDStream() result tells if there is still some more data to read from DStream.
432 BIT_DStream_unfinished : there is still some data left into the DStream.
433 BIT_DStream_endOfBuffer : Dstream reached end of buffer. Its container may no longer be completely filled.
434 BIT_DStream_completed : Dstream reached its exact end, corresponding in general to decompression completed.
435 BIT_DStream_tooFar : Dstream went too far. Decompression result is corrupted.
436
437 When reaching end of buffer (BIT_DStream_endOfBuffer), progress slowly, notably if you decode multiple symbols per loop,
438 to properly detect the exact end of stream.
439 After each decoded symbol, check if DStream is fully consumed using this simple test :
440 BIT_reloadDStream(&DStream) >= BIT_DStream_completed
441
442 When it's done, verify decompression is fully completed, by checking both DStream and the relevant states.
443 Checking if DStream has reached its end is performed by :
444 BIT_endOfDStream(&DStream);
445 Check also the states. There might be some symbols left there, if some high probability ones (>50%) are possible.
446 FSE_endOfDState(&DState);
447 */
448
449
450 /* *****************************************
451 * FSE unsafe API
452 *******************************************/
453 static unsigned char FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD);
454 /* faster, but works only if nbBits is always >= 1 (otherwise, result will be corrupted) */
455
456
457 /* *****************************************
458 * Implementation of inlined functions
459 *******************************************/
460 typedef struct {
461 int deltaFindState;
462 U32 deltaNbBits;
463 } FSE_symbolCompressionTransform; /* total 8 bytes */
464
465 MEM_STATIC void FSE_initCState(FSE_CState_t* statePtr, const FSE_CTable* ct)
466 {
467 const void* ptr = ct;
468 const U16* u16ptr = (const U16*) ptr;
469 const U32 tableLog = MEM_read16(ptr);
470 statePtr->value = (ptrdiff_t)1<<tableLog;
471 statePtr->stateTable = u16ptr+2;
472 statePtr->symbolTT = ((const U32*)ct + 1 + (tableLog ? (1<<(tableLog-1)) : 1));
473 statePtr->stateLog = tableLog;
474 }
475
476
477 /*! FSE_initCState2() :
478 * Same as FSE_initCState(), but the first symbol to include (which will be the last to be read)
479 * uses the smallest state value possible, saving the cost of this symbol */
480 MEM_STATIC void FSE_initCState2(FSE_CState_t* statePtr, const FSE_CTable* ct, U32 symbol)
481 {
482 FSE_initCState(statePtr, ct);
483 { const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
484 const U16* stateTable = (const U16*)(statePtr->stateTable);
485 U32 nbBitsOut = (U32)((symbolTT.deltaNbBits + (1<<15)) >> 16);
486 statePtr->value = (nbBitsOut << 16) - symbolTT.deltaNbBits;
487 statePtr->value = stateTable[(statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
488 }
489 }
490
491 MEM_STATIC void FSE_encodeSymbol(BIT_CStream_t* bitC, FSE_CState_t* statePtr, U32 symbol)
492 {
493 const FSE_symbolCompressionTransform symbolTT = ((const FSE_symbolCompressionTransform*)(statePtr->symbolTT))[symbol];
494 const U16* const stateTable = (const U16*)(statePtr->stateTable);
495 U32 nbBitsOut = (U32)((statePtr->value + symbolTT.deltaNbBits) >> 16);
496 BIT_addBits(bitC, statePtr->value, nbBitsOut);
497 statePtr->value = stateTable[ (statePtr->value >> nbBitsOut) + symbolTT.deltaFindState];
498 }
499
500 MEM_STATIC void FSE_flushCState(BIT_CStream_t* bitC, const FSE_CState_t* statePtr)
501 {
502 BIT_addBits(bitC, statePtr->value, statePtr->stateLog);
503 BIT_flushBits(bitC);
504 }
505
506
507 /* ====== Decompression ====== */
508
509 typedef struct {
510 U16 tableLog;
511 U16 fastMode;
512 } FSE_DTableHeader; /* sizeof U32 */
513
514 typedef struct
515 {
516 unsigned short newState;
517 unsigned char symbol;
518 unsigned char nbBits;
519 } FSE_decode_t; /* size == U32 */
520
521 MEM_STATIC void FSE_initDState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD, const FSE_DTable* dt)
522 {
523 const void* ptr = dt;
524 const FSE_DTableHeader* const DTableH = (const FSE_DTableHeader*)ptr;
525 DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
526 BIT_reloadDStream(bitD);
527 DStatePtr->table = dt + 1;
528 }
529
530 MEM_STATIC BYTE FSE_peekSymbol(const FSE_DState_t* DStatePtr)
531 {
532 FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
533 return DInfo.symbol;
534 }
535
536 MEM_STATIC void FSE_updateState(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
537 {
538 FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
539 U32 const nbBits = DInfo.nbBits;
540 size_t const lowBits = BIT_readBits(bitD, nbBits);
541 DStatePtr->state = DInfo.newState + lowBits;
542 }
543
544 MEM_STATIC BYTE FSE_decodeSymbol(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
545 {
546 FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
547 U32 const nbBits = DInfo.nbBits;
548 BYTE const symbol = DInfo.symbol;
549 size_t const lowBits = BIT_readBits(bitD, nbBits);
550
551 DStatePtr->state = DInfo.newState + lowBits;
552 return symbol;
553 }
554
555 /*! FSE_decodeSymbolFast() :
556 unsafe, only works if no symbol has a probability > 50% */
557 MEM_STATIC BYTE FSE_decodeSymbolFast(FSE_DState_t* DStatePtr, BIT_DStream_t* bitD)
558 {
559 FSE_decode_t const DInfo = ((const FSE_decode_t*)(DStatePtr->table))[DStatePtr->state];
560 U32 const nbBits = DInfo.nbBits;
561 BYTE const symbol = DInfo.symbol;
562 size_t const lowBits = BIT_readBitsFast(bitD, nbBits);
563
564 DStatePtr->state = DInfo.newState + lowBits;
565 return symbol;
566 }
567
568 MEM_STATIC unsigned FSE_endOfDState(const FSE_DState_t* DStatePtr)
569 {
570 return DStatePtr->state == 0;
571 }
572
573
574
575 #ifndef FSE_COMMONDEFS_ONLY
576
577 /* **************************************************************
578 * Tuning parameters
579 ****************************************************************/
580 /*!MEMORY_USAGE :
581 * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
582 * Increasing memory usage improves compression ratio
583 * Reduced memory usage can improve speed, due to cache effect
584 * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */
585 #ifndef FSE_MAX_MEMORY_USAGE
586 # define FSE_MAX_MEMORY_USAGE 14
587 #endif
588 #ifndef FSE_DEFAULT_MEMORY_USAGE
589 # define FSE_DEFAULT_MEMORY_USAGE 13
590 #endif
591
592 /*!FSE_MAX_SYMBOL_VALUE :
593 * Maximum symbol value authorized.
594 * Required for proper stack allocation */
595 #ifndef FSE_MAX_SYMBOL_VALUE
596 # define FSE_MAX_SYMBOL_VALUE 255
597 #endif
598
599 /* **************************************************************
600 * template functions type & suffix
601 ****************************************************************/
602 #define FSE_FUNCTION_TYPE BYTE
603 #define FSE_FUNCTION_EXTENSION
604 #define FSE_DECODE_TYPE FSE_decode_t
605
606
607 #endif /* !FSE_COMMONDEFS_ONLY */
608
609
610 /* ***************************************************************
611 * Constants
612 *****************************************************************/
613 #define FSE_MAX_TABLELOG (FSE_MAX_MEMORY_USAGE-2)
614 #define FSE_MAX_TABLESIZE (1U<<FSE_MAX_TABLELOG)
615 #define FSE_MAXTABLESIZE_MASK (FSE_MAX_TABLESIZE-1)
616 #define FSE_DEFAULT_TABLELOG (FSE_DEFAULT_MEMORY_USAGE-2)
617 #define FSE_MIN_TABLELOG 5
618
619 #define FSE_TABLELOG_ABSOLUTE_MAX 15
620 #if FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX
621 # error "FSE_MAX_TABLELOG > FSE_TABLELOG_ABSOLUTE_MAX is not supported"
622 #endif
623
624 #define FSE_TABLESTEP(tableSize) ((tableSize>>1) + (tableSize>>3) + 3)
625
626
627 #endif /* FSE_STATIC_LINKING_ONLY */
628
629
630 #if defined (__cplusplus)
631 }
632 #endif
633
634 #endif /* FSE_H */
@@ -0,0 +1,329 b''
1 /* ******************************************************************
2 FSE : Finite State Entropy decoder
3 Copyright (C) 2013-2015, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above
14 copyright notice, this list of conditions and the following disclaimer
15 in the documentation and/or other materials provided with the
16 distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 You can contact the author at :
31 - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c
33 ****************************************************************** */
34
35
36 /* **************************************************************
37 * Compiler specifics
38 ****************************************************************/
39 #ifdef _MSC_VER /* Visual Studio */
40 # define FORCE_INLINE static __forceinline
41 # include <intrin.h> /* For Visual 2005 */
42 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
43 # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
44 #else
45 # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
46 # ifdef __GNUC__
47 # define FORCE_INLINE static inline __attribute__((always_inline))
48 # else
49 # define FORCE_INLINE static inline
50 # endif
51 # else
52 # define FORCE_INLINE static
53 # endif /* __STDC_VERSION__ */
54 #endif
55
56
57 /* **************************************************************
58 * Includes
59 ****************************************************************/
60 #include <stdlib.h> /* malloc, free, qsort */
61 #include <string.h> /* memcpy, memset */
62 #include <stdio.h> /* printf (debug) */
63 #include "bitstream.h"
64 #define FSE_STATIC_LINKING_ONLY
65 #include "fse.h"
66
67
68 /* **************************************************************
69 * Error Management
70 ****************************************************************/
71 #define FSE_isError ERR_isError
72 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
73
74 /* check and forward error code */
75 #define CHECK_F(f) { size_t const e = f; if (FSE_isError(e)) return e; }
76
77
78 /* **************************************************************
79 * Complex types
80 ****************************************************************/
81 typedef U32 DTable_max_t[FSE_DTABLE_SIZE_U32(FSE_MAX_TABLELOG)];
82
83
84 /* **************************************************************
85 * Templates
86 ****************************************************************/
87 /*
88 designed to be included
89 for type-specific functions (template emulation in C)
90 Objective is to write these functions only once, for improved maintenance
91 */
92
93 /* safety checks */
94 #ifndef FSE_FUNCTION_EXTENSION
95 # error "FSE_FUNCTION_EXTENSION must be defined"
96 #endif
97 #ifndef FSE_FUNCTION_TYPE
98 # error "FSE_FUNCTION_TYPE must be defined"
99 #endif
100
101 /* Function names */
102 #define FSE_CAT(X,Y) X##Y
103 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
104 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
105
106
107 /* Function templates */
108 FSE_DTable* FSE_createDTable (unsigned tableLog)
109 {
110 if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
111 return (FSE_DTable*)malloc( FSE_DTABLE_SIZE_U32(tableLog) * sizeof (U32) );
112 }
113
114 void FSE_freeDTable (FSE_DTable* dt)
115 {
116 free(dt);
117 }
118
119 size_t FSE_buildDTable(FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
120 {
121 void* const tdPtr = dt+1; /* because *dt is unsigned, 32-bits aligned on 32-bits */
122 FSE_DECODE_TYPE* const tableDecode = (FSE_DECODE_TYPE*) (tdPtr);
123 U16 symbolNext[FSE_MAX_SYMBOL_VALUE+1];
124
125 U32 const maxSV1 = maxSymbolValue + 1;
126 U32 const tableSize = 1 << tableLog;
127 U32 highThreshold = tableSize-1;
128
129 /* Sanity Checks */
130 if (maxSymbolValue > FSE_MAX_SYMBOL_VALUE) return ERROR(maxSymbolValue_tooLarge);
131 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge);
132
133 /* Init, lay down lowprob symbols */
134 { FSE_DTableHeader DTableH;
135 DTableH.tableLog = (U16)tableLog;
136 DTableH.fastMode = 1;
137 { S16 const largeLimit= (S16)(1 << (tableLog-1));
138 U32 s;
139 for (s=0; s<maxSV1; s++) {
140 if (normalizedCounter[s]==-1) {
141 tableDecode[highThreshold--].symbol = (FSE_FUNCTION_TYPE)s;
142 symbolNext[s] = 1;
143 } else {
144 if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
145 symbolNext[s] = normalizedCounter[s];
146 } } }
147 memcpy(dt, &DTableH, sizeof(DTableH));
148 }
149
150 /* Spread symbols */
151 { U32 const tableMask = tableSize-1;
152 U32 const step = FSE_TABLESTEP(tableSize);
153 U32 s, position = 0;
154 for (s=0; s<maxSV1; s++) {
155 int i;
156 for (i=0; i<normalizedCounter[s]; i++) {
157 tableDecode[position].symbol = (FSE_FUNCTION_TYPE)s;
158 position = (position + step) & tableMask;
159 while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
160 } }
161 if (position!=0) return ERROR(GENERIC); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
162 }
163
164 /* Build Decoding table */
165 { U32 u;
166 for (u=0; u<tableSize; u++) {
167 FSE_FUNCTION_TYPE const symbol = (FSE_FUNCTION_TYPE)(tableDecode[u].symbol);
168 U16 nextState = symbolNext[symbol]++;
169 tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32 ((U32)nextState) );
170 tableDecode[u].newState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
171 } }
172
173 return 0;
174 }
175
176
177 #ifndef FSE_COMMONDEFS_ONLY
178
179 /*-*******************************************************
180 * Decompression (Byte symbols)
181 *********************************************************/
182 size_t FSE_buildDTable_rle (FSE_DTable* dt, BYTE symbolValue)
183 {
184 void* ptr = dt;
185 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
186 void* dPtr = dt + 1;
187 FSE_decode_t* const cell = (FSE_decode_t*)dPtr;
188
189 DTableH->tableLog = 0;
190 DTableH->fastMode = 0;
191
192 cell->newState = 0;
193 cell->symbol = symbolValue;
194 cell->nbBits = 0;
195
196 return 0;
197 }
198
199
200 size_t FSE_buildDTable_raw (FSE_DTable* dt, unsigned nbBits)
201 {
202 void* ptr = dt;
203 FSE_DTableHeader* const DTableH = (FSE_DTableHeader*)ptr;
204 void* dPtr = dt + 1;
205 FSE_decode_t* const dinfo = (FSE_decode_t*)dPtr;
206 const unsigned tableSize = 1 << nbBits;
207 const unsigned tableMask = tableSize - 1;
208 const unsigned maxSV1 = tableMask+1;
209 unsigned s;
210
211 /* Sanity checks */
212 if (nbBits < 1) return ERROR(GENERIC); /* min size */
213
214 /* Build Decoding Table */
215 DTableH->tableLog = (U16)nbBits;
216 DTableH->fastMode = 1;
217 for (s=0; s<maxSV1; s++) {
218 dinfo[s].newState = 0;
219 dinfo[s].symbol = (BYTE)s;
220 dinfo[s].nbBits = (BYTE)nbBits;
221 }
222
223 return 0;
224 }
225
226 FORCE_INLINE size_t FSE_decompress_usingDTable_generic(
227 void* dst, size_t maxDstSize,
228 const void* cSrc, size_t cSrcSize,
229 const FSE_DTable* dt, const unsigned fast)
230 {
231 BYTE* const ostart = (BYTE*) dst;
232 BYTE* op = ostart;
233 BYTE* const omax = op + maxDstSize;
234 BYTE* const olimit = omax-3;
235
236 BIT_DStream_t bitD;
237 FSE_DState_t state1;
238 FSE_DState_t state2;
239
240 /* Init */
241 CHECK_F(BIT_initDStream(&bitD, cSrc, cSrcSize));
242
243 FSE_initDState(&state1, &bitD, dt);
244 FSE_initDState(&state2, &bitD, dt);
245
246 #define FSE_GETSYMBOL(statePtr) fast ? FSE_decodeSymbolFast(statePtr, &bitD) : FSE_decodeSymbol(statePtr, &bitD)
247
248 /* 4 symbols per loop */
249 for ( ; (BIT_reloadDStream(&bitD)==BIT_DStream_unfinished) & (op<olimit) ; op+=4) {
250 op[0] = FSE_GETSYMBOL(&state1);
251
252 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
253 BIT_reloadDStream(&bitD);
254
255 op[1] = FSE_GETSYMBOL(&state2);
256
257 if (FSE_MAX_TABLELOG*4+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
258 { if (BIT_reloadDStream(&bitD) > BIT_DStream_unfinished) { op+=2; break; } }
259
260 op[2] = FSE_GETSYMBOL(&state1);
261
262 if (FSE_MAX_TABLELOG*2+7 > sizeof(bitD.bitContainer)*8) /* This test must be static */
263 BIT_reloadDStream(&bitD);
264
265 op[3] = FSE_GETSYMBOL(&state2);
266 }
267
268 /* tail */
269 /* note : BIT_reloadDStream(&bitD) >= FSE_DStream_partiallyFilled; Ends at exactly BIT_DStream_completed */
270 while (1) {
271 if (op>(omax-2)) return ERROR(dstSize_tooSmall);
272 *op++ = FSE_GETSYMBOL(&state1);
273 if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
274 *op++ = FSE_GETSYMBOL(&state2);
275 break;
276 }
277
278 if (op>(omax-2)) return ERROR(dstSize_tooSmall);
279 *op++ = FSE_GETSYMBOL(&state2);
280 if (BIT_reloadDStream(&bitD)==BIT_DStream_overflow) {
281 *op++ = FSE_GETSYMBOL(&state1);
282 break;
283 } }
284
285 return op-ostart;
286 }
287
288
289 size_t FSE_decompress_usingDTable(void* dst, size_t originalSize,
290 const void* cSrc, size_t cSrcSize,
291 const FSE_DTable* dt)
292 {
293 const void* ptr = dt;
294 const FSE_DTableHeader* DTableH = (const FSE_DTableHeader*)ptr;
295 const U32 fastMode = DTableH->fastMode;
296
297 /* select fast mode (static) */
298 if (fastMode) return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 1);
299 return FSE_decompress_usingDTable_generic(dst, originalSize, cSrc, cSrcSize, dt, 0);
300 }
301
302
303 size_t FSE_decompress(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize)
304 {
305 const BYTE* const istart = (const BYTE*)cSrc;
306 const BYTE* ip = istart;
307 short counting[FSE_MAX_SYMBOL_VALUE+1];
308 DTable_max_t dt; /* Static analyzer seems unable to understand this table will be properly initialized later */
309 unsigned tableLog;
310 unsigned maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
311
312 if (cSrcSize<2) return ERROR(srcSize_wrong); /* too small input size */
313
314 /* normal FSE decoding mode */
315 { size_t const NCountLength = FSE_readNCount (counting, &maxSymbolValue, &tableLog, istart, cSrcSize);
316 if (FSE_isError(NCountLength)) return NCountLength;
317 if (NCountLength >= cSrcSize) return ERROR(srcSize_wrong); /* too small input size */
318 ip += NCountLength;
319 cSrcSize -= NCountLength;
320 }
321
322 CHECK_F( FSE_buildDTable (dt, counting, maxSymbolValue, tableLog) );
323
324 return FSE_decompress_usingDTable (dst, maxDstSize, ip, cSrcSize, dt); /* always return, even if it is an error code */
325 }
326
327
328
329 #endif /* FSE_COMMONDEFS_ONLY */
@@ -0,0 +1,228 b''
1 /* ******************************************************************
2 Huffman coder, part of New Generation Entropy library
3 header file
4 Copyright (C) 2013-2016, Yann Collet.
5
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
11
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
17 distribution.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 You can contact the author at :
32 - Source repository : https://github.com/Cyan4973/FiniteStateEntropy
33 ****************************************************************** */
34 #ifndef HUF_H_298734234
35 #define HUF_H_298734234
36
37 #if defined (__cplusplus)
38 extern "C" {
39 #endif
40
41
42 /* *** Dependencies *** */
43 #include <stddef.h> /* size_t */
44
45
46 /* *** simple functions *** */
47 /**
48 HUF_compress() :
49 Compress content from buffer 'src', of size 'srcSize', into buffer 'dst'.
50 'dst' buffer must be already allocated.
51 Compression runs faster if `dstCapacity` >= HUF_compressBound(srcSize).
52 `srcSize` must be <= `HUF_BLOCKSIZE_MAX` == 128 KB.
53 @return : size of compressed data (<= `dstCapacity`).
54 Special values : if return == 0, srcData is not compressible => Nothing is stored within dst !!!
55 if return == 1, srcData is a single repeated byte symbol (RLE compression).
56 if HUF_isError(return), compression failed (more details using HUF_getErrorName())
57 */
58 size_t HUF_compress(void* dst, size_t dstCapacity,
59 const void* src, size_t srcSize);
60
61 /**
62 HUF_decompress() :
63 Decompress HUF data from buffer 'cSrc', of size 'cSrcSize',
64 into already allocated buffer 'dst', of minimum size 'dstSize'.
65 `dstSize` : **must** be the ***exact*** size of original (uncompressed) data.
66 Note : in contrast with FSE, HUF_decompress can regenerate
67 RLE (cSrcSize==1) and uncompressed (cSrcSize==dstSize) data,
68 because it knows size to regenerate.
69 @return : size of regenerated data (== dstSize),
70 or an error code, which can be tested using HUF_isError()
71 */
72 size_t HUF_decompress(void* dst, size_t dstSize,
73 const void* cSrc, size_t cSrcSize);
74
75
76 /* ****************************************
77 * Tool functions
78 ******************************************/
79 #define HUF_BLOCKSIZE_MAX (128 * 1024)
80 size_t HUF_compressBound(size_t size); /**< maximum compressed size (worst case) */
81
82 /* Error Management */
83 unsigned HUF_isError(size_t code); /**< tells if a return value is an error code */
84 const char* HUF_getErrorName(size_t code); /**< provides error code string (useful for debugging) */
85
86
87 /* *** Advanced function *** */
88
89 /** HUF_compress2() :
90 * Same as HUF_compress(), but offers direct control over `maxSymbolValue` and `tableLog` */
91 size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
92
93
94 #ifdef HUF_STATIC_LINKING_ONLY
95
96 /* *** Dependencies *** */
97 #include "mem.h" /* U32 */
98
99
100 /* *** Constants *** */
101 #define HUF_TABLELOG_ABSOLUTEMAX 16 /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
102 #define HUF_TABLELOG_MAX 12 /* max configured tableLog (for static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
103 #define HUF_TABLELOG_DEFAULT 11 /* tableLog by default, when not specified */
104 #define HUF_SYMBOLVALUE_MAX 255
105 #if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
106 # error "HUF_TABLELOG_MAX is too large !"
107 #endif
108
109
110 /* ****************************************
111 * Static allocation
112 ******************************************/
113 /* HUF buffer bounds */
114 #define HUF_CTABLEBOUND 129
115 #define HUF_BLOCKBOUND(size) (size + (size>>8) + 8) /* only true if incompressible pre-filtered with fast heuristic */
116 #define HUF_COMPRESSBOUND(size) (HUF_CTABLEBOUND + HUF_BLOCKBOUND(size)) /* Macro version, useful for static allocation */
117
118 /* static allocation of HUF's Compression Table */
119 #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
120 U32 name##hb[maxSymbolValue+1]; \
121 void* name##hv = &(name##hb); \
122 HUF_CElt* name = (HUF_CElt*)(name##hv) /* no final ; */
123
124 /* static allocation of HUF's DTable */
125 typedef U32 HUF_DTable;
126 #define HUF_DTABLE_SIZE(maxTableLog) (1 + (1<<(maxTableLog)))
127 #define HUF_CREATE_STATIC_DTABLEX2(DTable, maxTableLog) \
128 HUF_DTable DTable[HUF_DTABLE_SIZE((maxTableLog)-1)] = { ((U32)((maxTableLog)-1)*0x1000001) }
129 #define HUF_CREATE_STATIC_DTABLEX4(DTable, maxTableLog) \
130 HUF_DTable DTable[HUF_DTABLE_SIZE(maxTableLog)] = { ((U32)(maxTableLog)*0x1000001) }
131
132
133 /* ****************************************
134 * Advanced decompression functions
135 ******************************************/
136 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
137 size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
138
139 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< decodes RLE and uncompressed */
140 size_t HUF_decompress4X_hufOnly(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< considers RLE and uncompressed as errors */
141 size_t HUF_decompress4X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
142 size_t HUF_decompress4X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
143
144 size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
145 size_t HUF_decompress1X2_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< single-symbol decoder */
146 size_t HUF_decompress1X4_DCtx(HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /**< double-symbols decoder */
147
148
149 /* ****************************************
150 * HUF detailed API
151 ******************************************/
152 /*!
153 HUF_compress() does the following:
154 1. count symbol occurrence from source[] into table count[] using FSE_count()
155 2. (optional) refine tableLog using HUF_optimalTableLog()
156 3. build Huffman table from count using HUF_buildCTable()
157 4. save Huffman table to memory buffer using HUF_writeCTable()
158 5. encode the data stream using HUF_compress4X_usingCTable()
159
160 The following API allows targeting specific sub-functions for advanced tasks.
161 For example, it's possible to compress several blocks using the same 'CTable',
162 or to save and regenerate 'CTable' using external methods.
163 */
164 /* FSE_count() : find it within "fse.h" */
165 unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue);
166 typedef struct HUF_CElt_s HUF_CElt; /* incomplete type */
167 size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue, unsigned maxNbBits);
168 size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
169 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
170
171
172 /*! HUF_readStats() :
173 Read compact Huffman tree, saved by HUF_writeCTable().
174 `huffWeight` is destination buffer.
175 @return : size read from `src` , or an error Code .
176 Note : Needed by HUF_readCTable() and HUF_readDTableXn() . */
177 size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
178 U32* nbSymbolsPtr, U32* tableLogPtr,
179 const void* src, size_t srcSize);
180
181 /** HUF_readCTable() :
182 * Loading a CTable saved with HUF_writeCTable() */
183 size_t HUF_readCTable (HUF_CElt* CTable, unsigned maxSymbolValue, const void* src, size_t srcSize);
184
185
186 /*
187 HUF_decompress() does the following:
188 1. select the decompression algorithm (X2, X4) based on pre-computed heuristics
189 2. build Huffman table from save, using HUF_readDTableXn()
190 3. decode 1 or 4 segments in parallel using HUF_decompressSXn_usingDTable
191 */
192
193 /** HUF_selectDecoder() :
194 * Tells which decoder is likely to decode faster,
195 * based on a set of pre-determined metrics.
196 * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
197 * Assumption : 0 < cSrcSize < dstSize <= 128 KB */
198 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize);
199
200 size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize);
201 size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize);
202
203 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
204 size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
205 size_t HUF_decompress4X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
206
207
208 /* single stream variants */
209
210 size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
211 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
212
213 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* single-symbol decoder */
214 size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize); /* double-symbol decoder */
215
216 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
217 size_t HUF_decompress1X2_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
218 size_t HUF_decompress1X4_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const HUF_DTable* DTable);
219
220
221 #endif /* HUF_STATIC_LINKING_ONLY */
222
223
224 #if defined (__cplusplus)
225 }
226 #endif
227
228 #endif /* HUF_H_298734234 */
@@ -0,0 +1,370 b''
1 /**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9
10 #ifndef MEM_H_MODULE
11 #define MEM_H_MODULE
12
13 #if defined (__cplusplus)
14 extern "C" {
15 #endif
16
17 /*-****************************************
18 * Dependencies
19 ******************************************/
20 #include <stddef.h> /* size_t, ptrdiff_t */
21 #include <string.h> /* memcpy */
22
23
24 /*-****************************************
25 * Compiler specifics
26 ******************************************/
27 #if defined(_MSC_VER) /* Visual Studio */
28 # include <stdlib.h> /* _byteswap_ulong */
29 # include <intrin.h> /* _byteswap_* */
30 #endif
31 #if defined(__GNUC__)
32 # define MEM_STATIC static __inline __attribute__((unused))
33 #elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
34 # define MEM_STATIC static inline
35 #elif defined(_MSC_VER)
36 # define MEM_STATIC static __inline
37 #else
38 # define MEM_STATIC static /* this version may generate warnings for unused static functions; disable the relevant warning */
39 #endif
40
41 /* code only tested on 32 and 64 bits systems */
42 #define MEM_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; }
43 MEM_STATIC void MEM_check(void) { MEM_STATIC_ASSERT((sizeof(size_t)==4) || (sizeof(size_t)==8)); }
44
45
46 /*-**************************************************************
47 * Basic Types
48 *****************************************************************/
49 #if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
50 # include <stdint.h>
51 typedef uint8_t BYTE;
52 typedef uint16_t U16;
53 typedef int16_t S16;
54 typedef uint32_t U32;
55 typedef int32_t S32;
56 typedef uint64_t U64;
57 typedef int64_t S64;
58 #else
59 typedef unsigned char BYTE;
60 typedef unsigned short U16;
61 typedef signed short S16;
62 typedef unsigned int U32;
63 typedef signed int S32;
64 typedef unsigned long long U64;
65 typedef signed long long S64;
66 #endif
67
68
69 /*-**************************************************************
70 * Memory I/O
71 *****************************************************************/
72 /* MEM_FORCE_MEMORY_ACCESS :
73 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
74 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
75 * The below switch allow to select different access method for improved performance.
76 * Method 0 (default) : use `memcpy()`. Safe and portable.
77 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
78 * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
79 * Method 2 : direct access. This method is portable but violate C standard.
80 * It can generate buggy code on targets depending on alignment.
81 * In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
82 * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details.
83 * Prefer these methods in priority order (0 > 1 > 2)
84 */
85 #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
86 # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
87 # define MEM_FORCE_MEMORY_ACCESS 2
88 # elif defined(__INTEL_COMPILER) /*|| defined(_MSC_VER)*/ || \
89 (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
90 # define MEM_FORCE_MEMORY_ACCESS 1
91 # endif
92 #endif
93
94 MEM_STATIC unsigned MEM_32bits(void) { return sizeof(size_t)==4; }
95 MEM_STATIC unsigned MEM_64bits(void) { return sizeof(size_t)==8; }
96
97 MEM_STATIC unsigned MEM_isLittleEndian(void)
98 {
99 const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
100 return one.c[0];
101 }
102
103 #if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2)
104
105 /* violates C standard, by lying on structure alignment.
106 Only use if no other choice to achieve best performance on target platform */
107 MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; }
108 MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; }
109 MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; }
110 MEM_STATIC U64 MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; }
111
112 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; }
113 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; }
114 MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; }
115
116 #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1)
117
118 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
119 /* currently only defined for gcc and icc */
120 #if defined(_MSC_VER) || (defined(__INTEL_COMPILER) && defined(WIN32))
121 __pragma( pack(push, 1) )
122 typedef union { U16 u16; U32 u32; U64 u64; size_t st; } unalign;
123 __pragma( pack(pop) )
124 #else
125 typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign;
126 #endif
127
128 MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; }
129 MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
130 MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
131 MEM_STATIC U64 MEM_readST(const void* ptr) { return ((const unalign*)ptr)->st; }
132
133 MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; }
134 MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; }
135 MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; }
136
137 #else
138
139 /* default method, safe and standard.
140 can sometimes prove slower */
141
142 MEM_STATIC U16 MEM_read16(const void* memPtr)
143 {
144 U16 val; memcpy(&val, memPtr, sizeof(val)); return val;
145 }
146
147 MEM_STATIC U32 MEM_read32(const void* memPtr)
148 {
149 U32 val; memcpy(&val, memPtr, sizeof(val)); return val;
150 }
151
152 MEM_STATIC U64 MEM_read64(const void* memPtr)
153 {
154 U64 val; memcpy(&val, memPtr, sizeof(val)); return val;
155 }
156
157 MEM_STATIC size_t MEM_readST(const void* memPtr)
158 {
159 size_t val; memcpy(&val, memPtr, sizeof(val)); return val;
160 }
161
162 MEM_STATIC void MEM_write16(void* memPtr, U16 value)
163 {
164 memcpy(memPtr, &value, sizeof(value));
165 }
166
167 MEM_STATIC void MEM_write32(void* memPtr, U32 value)
168 {
169 memcpy(memPtr, &value, sizeof(value));
170 }
171
172 MEM_STATIC void MEM_write64(void* memPtr, U64 value)
173 {
174 memcpy(memPtr, &value, sizeof(value));
175 }
176
177 #endif /* MEM_FORCE_MEMORY_ACCESS */
178
179 MEM_STATIC U32 MEM_swap32(U32 in)
180 {
181 #if defined(_MSC_VER) /* Visual Studio */
182 return _byteswap_ulong(in);
183 #elif defined (__GNUC__)
184 return __builtin_bswap32(in);
185 #else
186 return ((in << 24) & 0xff000000 ) |
187 ((in << 8) & 0x00ff0000 ) |
188 ((in >> 8) & 0x0000ff00 ) |
189 ((in >> 24) & 0x000000ff );
190 #endif
191 }
192
193 MEM_STATIC U64 MEM_swap64(U64 in)
194 {
195 #if defined(_MSC_VER) /* Visual Studio */
196 return _byteswap_uint64(in);
197 #elif defined (__GNUC__)
198 return __builtin_bswap64(in);
199 #else
200 return ((in << 56) & 0xff00000000000000ULL) |
201 ((in << 40) & 0x00ff000000000000ULL) |
202 ((in << 24) & 0x0000ff0000000000ULL) |
203 ((in << 8) & 0x000000ff00000000ULL) |
204 ((in >> 8) & 0x00000000ff000000ULL) |
205 ((in >> 24) & 0x0000000000ff0000ULL) |
206 ((in >> 40) & 0x000000000000ff00ULL) |
207 ((in >> 56) & 0x00000000000000ffULL);
208 #endif
209 }
210
211 MEM_STATIC size_t MEM_swapST(size_t in)
212 {
213 if (MEM_32bits())
214 return (size_t)MEM_swap32((U32)in);
215 else
216 return (size_t)MEM_swap64((U64)in);
217 }
218
219 /*=== Little endian r/w ===*/
220
221 MEM_STATIC U16 MEM_readLE16(const void* memPtr)
222 {
223 if (MEM_isLittleEndian())
224 return MEM_read16(memPtr);
225 else {
226 const BYTE* p = (const BYTE*)memPtr;
227 return (U16)(p[0] + (p[1]<<8));
228 }
229 }
230
231 MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val)
232 {
233 if (MEM_isLittleEndian()) {
234 MEM_write16(memPtr, val);
235 } else {
236 BYTE* p = (BYTE*)memPtr;
237 p[0] = (BYTE)val;
238 p[1] = (BYTE)(val>>8);
239 }
240 }
241
242 MEM_STATIC U32 MEM_readLE24(const void* memPtr)
243 {
244 return MEM_readLE16(memPtr) + (((const BYTE*)memPtr)[2] << 16);
245 }
246
247 MEM_STATIC void MEM_writeLE24(void* memPtr, U32 val)
248 {
249 MEM_writeLE16(memPtr, (U16)val);
250 ((BYTE*)memPtr)[2] = (BYTE)(val>>16);
251 }
252
253 MEM_STATIC U32 MEM_readLE32(const void* memPtr)
254 {
255 if (MEM_isLittleEndian())
256 return MEM_read32(memPtr);
257 else
258 return MEM_swap32(MEM_read32(memPtr));
259 }
260
261 MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32)
262 {
263 if (MEM_isLittleEndian())
264 MEM_write32(memPtr, val32);
265 else
266 MEM_write32(memPtr, MEM_swap32(val32));
267 }
268
269 MEM_STATIC U64 MEM_readLE64(const void* memPtr)
270 {
271 if (MEM_isLittleEndian())
272 return MEM_read64(memPtr);
273 else
274 return MEM_swap64(MEM_read64(memPtr));
275 }
276
277 MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64)
278 {
279 if (MEM_isLittleEndian())
280 MEM_write64(memPtr, val64);
281 else
282 MEM_write64(memPtr, MEM_swap64(val64));
283 }
284
285 MEM_STATIC size_t MEM_readLEST(const void* memPtr)
286 {
287 if (MEM_32bits())
288 return (size_t)MEM_readLE32(memPtr);
289 else
290 return (size_t)MEM_readLE64(memPtr);
291 }
292
293 MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val)
294 {
295 if (MEM_32bits())
296 MEM_writeLE32(memPtr, (U32)val);
297 else
298 MEM_writeLE64(memPtr, (U64)val);
299 }
300
301 /*=== Big endian r/w ===*/
302
303 MEM_STATIC U32 MEM_readBE32(const void* memPtr)
304 {
305 if (MEM_isLittleEndian())
306 return MEM_swap32(MEM_read32(memPtr));
307 else
308 return MEM_read32(memPtr);
309 }
310
311 MEM_STATIC void MEM_writeBE32(void* memPtr, U32 val32)
312 {
313 if (MEM_isLittleEndian())
314 MEM_write32(memPtr, MEM_swap32(val32));
315 else
316 MEM_write32(memPtr, val32);
317 }
318
319 MEM_STATIC U64 MEM_readBE64(const void* memPtr)
320 {
321 if (MEM_isLittleEndian())
322 return MEM_swap64(MEM_read64(memPtr));
323 else
324 return MEM_read64(memPtr);
325 }
326
327 MEM_STATIC void MEM_writeBE64(void* memPtr, U64 val64)
328 {
329 if (MEM_isLittleEndian())
330 MEM_write64(memPtr, MEM_swap64(val64));
331 else
332 MEM_write64(memPtr, val64);
333 }
334
335 MEM_STATIC size_t MEM_readBEST(const void* memPtr)
336 {
337 if (MEM_32bits())
338 return (size_t)MEM_readBE32(memPtr);
339 else
340 return (size_t)MEM_readBE64(memPtr);
341 }
342
343 MEM_STATIC void MEM_writeBEST(void* memPtr, size_t val)
344 {
345 if (MEM_32bits())
346 MEM_writeBE32(memPtr, (U32)val);
347 else
348 MEM_writeBE64(memPtr, (U64)val);
349 }
350
351
352 /* function safe only for comparisons */
353 MEM_STATIC U32 MEM_readMINMATCH(const void* memPtr, U32 length)
354 {
355 switch (length)
356 {
357 default :
358 case 4 : return MEM_read32(memPtr);
359 case 3 : if (MEM_isLittleEndian())
360 return MEM_read32(memPtr)<<8;
361 else
362 return MEM_read32(memPtr)>>8;
363 }
364 }
365
366 #if defined (__cplusplus)
367 }
368 #endif
369
370 #endif /* MEM_H_MODULE */
This diff has been collapsed as it changes many lines, (867 lines changed) Show them Hide them
@@ -0,0 +1,867 b''
1 /*
2 * xxHash - Fast Hash algorithm
3 * Copyright (C) 2012-2016, Yann Collet
4 *
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at :
31 * - xxHash homepage: http://www.xxhash.com
32 * - xxHash source repository : https://github.com/Cyan4973/xxHash
33 */
34
35
36 /* *************************************
37 * Tuning parameters
38 ***************************************/
39 /*!XXH_FORCE_MEMORY_ACCESS :
40 * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
41 * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
42 * The below switch allow to select different access method for improved performance.
43 * Method 0 (default) : use `memcpy()`. Safe and portable.
44 * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
45 * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
46 * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
47 * It can generate buggy code on targets which do not support unaligned memory accesses.
48 * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
49 * See http://stackoverflow.com/a/32095106/646947 for details.
50 * Prefer these methods in priority order (0 > 1 > 2)
51 */
52 #ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
53 # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
54 # define XXH_FORCE_MEMORY_ACCESS 2
55 # elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \
56 (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) ))
57 # define XXH_FORCE_MEMORY_ACCESS 1
58 # endif
59 #endif
60
61 /*!XXH_ACCEPT_NULL_INPUT_POINTER :
62 * If the input pointer is a null pointer, xxHash default behavior is to trigger a memory access error, since it is a bad pointer.
63 * When this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
64 * By default, this option is disabled. To enable it, uncomment below define :
65 */
66 /* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */
67
68 /*!XXH_FORCE_NATIVE_FORMAT :
69 * By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
70 * Results are therefore identical for little-endian and big-endian CPU.
71 * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
72 * Should endian-independance be of no importance for your application, you may set the #define below to 1,
73 * to improve speed for Big-endian CPU.
74 * This option has no impact on Little_Endian CPU.
75 */
76 #ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */
77 # define XXH_FORCE_NATIVE_FORMAT 0
78 #endif
79
80 /*!XXH_FORCE_ALIGN_CHECK :
81 * This is a minor performance trick, only useful with lots of very small keys.
82 * It means : check for aligned/unaligned input.
83 * The check costs one initial branch per hash; set to 0 when the input data
84 * is guaranteed to be aligned.
85 */
86 #ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
87 # if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
88 # define XXH_FORCE_ALIGN_CHECK 0
89 # else
90 # define XXH_FORCE_ALIGN_CHECK 1
91 # endif
92 #endif
93
94
95 /* *************************************
96 * Includes & Memory related functions
97 ***************************************/
98 /* Modify the local functions below should you wish to use some other memory routines */
99 /* for malloc(), free() */
100 #include <stdlib.h>
101 static void* XXH_malloc(size_t s) { return malloc(s); }
102 static void XXH_free (void* p) { free(p); }
103 /* for memcpy() */
104 #include <string.h>
105 static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
106
107 #define XXH_STATIC_LINKING_ONLY
108 #include "xxhash.h"
109
110
111 /* *************************************
112 * Compiler Specific Options
113 ***************************************/
114 #ifdef _MSC_VER /* Visual Studio */
115 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
116 # define FORCE_INLINE static __forceinline
117 #else
118 # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
119 # ifdef __GNUC__
120 # define FORCE_INLINE static inline __attribute__((always_inline))
121 # else
122 # define FORCE_INLINE static inline
123 # endif
124 # else
125 # define FORCE_INLINE static
126 # endif /* __STDC_VERSION__ */
127 #endif
128
129
130 /* *************************************
131 * Basic Types
132 ***************************************/
133 #ifndef MEM_MODULE
134 # define MEM_MODULE
135 # if !defined (__VMS) && (defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
136 # include <stdint.h>
137 typedef uint8_t BYTE;
138 typedef uint16_t U16;
139 typedef uint32_t U32;
140 typedef int32_t S32;
141 typedef uint64_t U64;
142 # else
143 typedef unsigned char BYTE;
144 typedef unsigned short U16;
145 typedef unsigned int U32;
146 typedef signed int S32;
147 typedef unsigned long long U64; /* if your compiler doesn't support unsigned long long, replace by another 64-bit type here. Note that xxhash.h will also need to be updated. */
148 # endif
149 #endif
150
151
152 #if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
153
154 /* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
155 static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
156 static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
157
158 #elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
159
160 /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
161 /* currently only defined for gcc and icc */
162 typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign;
163
164 static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
165 static U64 XXH_read64(const void* ptr) { return ((const unalign*)ptr)->u64; }
166
167 #else
168
169 /* portable and safe solution. Generally efficient.
170 * see : http://stackoverflow.com/a/32095106/646947
171 */
172
173 static U32 XXH_read32(const void* memPtr)
174 {
175 U32 val;
176 memcpy(&val, memPtr, sizeof(val));
177 return val;
178 }
179
180 static U64 XXH_read64(const void* memPtr)
181 {
182 U64 val;
183 memcpy(&val, memPtr, sizeof(val));
184 return val;
185 }
186
187 #endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
188
189
190 /* ****************************************
191 * Compiler-specific Functions and Macros
192 ******************************************/
193 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
194
195 /* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
196 #if defined(_MSC_VER)
197 # define XXH_rotl32(x,r) _rotl(x,r)
198 # define XXH_rotl64(x,r) _rotl64(x,r)
199 #else
200 # define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
201 # define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
202 #endif
203
204 #if defined(_MSC_VER) /* Visual Studio */
205 # define XXH_swap32 _byteswap_ulong
206 # define XXH_swap64 _byteswap_uint64
207 #elif GCC_VERSION >= 403
208 # define XXH_swap32 __builtin_bswap32
209 # define XXH_swap64 __builtin_bswap64
210 #else
211 static U32 XXH_swap32 (U32 x)
212 {
213 return ((x << 24) & 0xff000000 ) |
214 ((x << 8) & 0x00ff0000 ) |
215 ((x >> 8) & 0x0000ff00 ) |
216 ((x >> 24) & 0x000000ff );
217 }
218 static U64 XXH_swap64 (U64 x)
219 {
220 return ((x << 56) & 0xff00000000000000ULL) |
221 ((x << 40) & 0x00ff000000000000ULL) |
222 ((x << 24) & 0x0000ff0000000000ULL) |
223 ((x << 8) & 0x000000ff00000000ULL) |
224 ((x >> 8) & 0x00000000ff000000ULL) |
225 ((x >> 24) & 0x0000000000ff0000ULL) |
226 ((x >> 40) & 0x000000000000ff00ULL) |
227 ((x >> 56) & 0x00000000000000ffULL);
228 }
229 #endif
230
231
232 /* *************************************
233 * Architecture Macros
234 ***************************************/
235 typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
236
237 /* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
238 #ifndef XXH_CPU_LITTLE_ENDIAN
239 static const int g_one = 1;
240 # define XXH_CPU_LITTLE_ENDIAN (*(const char*)(&g_one))
241 #endif
242
243
244 /* ***************************
245 * Memory reads
246 *****************************/
247 typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
248
249 FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
250 {
251 if (align==XXH_unaligned)
252 return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
253 else
254 return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
255 }
256
257 FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
258 {
259 return XXH_readLE32_align(ptr, endian, XXH_unaligned);
260 }
261
262 static U32 XXH_readBE32(const void* ptr)
263 {
264 return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
265 }
266
267 FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
268 {
269 if (align==XXH_unaligned)
270 return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
271 else
272 return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
273 }
274
275 FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
276 {
277 return XXH_readLE64_align(ptr, endian, XXH_unaligned);
278 }
279
280 static U64 XXH_readBE64(const void* ptr)
281 {
282 return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
283 }
284
285
286 /* *************************************
287 * Macros
288 ***************************************/
289 #define XXH_STATIC_ASSERT(c) { enum { XXH_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
290
291
292 /* *************************************
293 * Constants
294 ***************************************/
295 static const U32 PRIME32_1 = 2654435761U;
296 static const U32 PRIME32_2 = 2246822519U;
297 static const U32 PRIME32_3 = 3266489917U;
298 static const U32 PRIME32_4 = 668265263U;
299 static const U32 PRIME32_5 = 374761393U;
300
301 static const U64 PRIME64_1 = 11400714785074694791ULL;
302 static const U64 PRIME64_2 = 14029467366897019727ULL;
303 static const U64 PRIME64_3 = 1609587929392839161ULL;
304 static const U64 PRIME64_4 = 9650029242287828579ULL;
305 static const U64 PRIME64_5 = 2870177450012600261ULL;
306
307 XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
308
309
310 /* **************************
311 * Utils
312 ****************************/
313 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dstState, const XXH32_state_t* restrict srcState)
314 {
315 memcpy(dstState, srcState, sizeof(*dstState));
316 }
317
318 XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dstState, const XXH64_state_t* restrict srcState)
319 {
320 memcpy(dstState, srcState, sizeof(*dstState));
321 }
322
323
324 /* ***************************
325 * Simple Hash Functions
326 *****************************/
327
328 static U32 XXH32_round(U32 seed, U32 input)
329 {
330 seed += input * PRIME32_2;
331 seed = XXH_rotl32(seed, 13);
332 seed *= PRIME32_1;
333 return seed;
334 }
335
336 FORCE_INLINE U32 XXH32_endian_align(const void* input, size_t len, U32 seed, XXH_endianess endian, XXH_alignment align)
337 {
338 const BYTE* p = (const BYTE*)input;
339 const BYTE* bEnd = p + len;
340 U32 h32;
341 #define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
342
343 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
344 if (p==NULL) {
345 len=0;
346 bEnd=p=(const BYTE*)(size_t)16;
347 }
348 #endif
349
350 if (len>=16) {
351 const BYTE* const limit = bEnd - 16;
352 U32 v1 = seed + PRIME32_1 + PRIME32_2;
353 U32 v2 = seed + PRIME32_2;
354 U32 v3 = seed + 0;
355 U32 v4 = seed - PRIME32_1;
356
357 do {
358 v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
359 v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
360 v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
361 v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
362 } while (p<=limit);
363
364 h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
365 } else {
366 h32 = seed + PRIME32_5;
367 }
368
369 h32 += (U32) len;
370
371 while (p+4<=bEnd) {
372 h32 += XXH_get32bits(p) * PRIME32_3;
373 h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
374 p+=4;
375 }
376
377 while (p<bEnd) {
378 h32 += (*p) * PRIME32_5;
379 h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
380 p++;
381 }
382
383 h32 ^= h32 >> 15;
384 h32 *= PRIME32_2;
385 h32 ^= h32 >> 13;
386 h32 *= PRIME32_3;
387 h32 ^= h32 >> 16;
388
389 return h32;
390 }
391
392
393 XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
394 {
395 #if 0
396 /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
397 XXH32_CREATESTATE_STATIC(state);
398 XXH32_reset(state, seed);
399 XXH32_update(state, input, len);
400 return XXH32_digest(state);
401 #else
402 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
403
404 if (XXH_FORCE_ALIGN_CHECK) {
405 if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */
406 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
407 return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
408 else
409 return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
410 } }
411
412 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
413 return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
414 else
415 return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
416 #endif
417 }
418
419
420 static U64 XXH64_round(U64 acc, U64 input)
421 {
422 acc += input * PRIME64_2;
423 acc = XXH_rotl64(acc, 31);
424 acc *= PRIME64_1;
425 return acc;
426 }
427
428 static U64 XXH64_mergeRound(U64 acc, U64 val)
429 {
430 val = XXH64_round(0, val);
431 acc ^= val;
432 acc = acc * PRIME64_1 + PRIME64_4;
433 return acc;
434 }
435
436 FORCE_INLINE U64 XXH64_endian_align(const void* input, size_t len, U64 seed, XXH_endianess endian, XXH_alignment align)
437 {
438 const BYTE* p = (const BYTE*)input;
439 const BYTE* const bEnd = p + len;
440 U64 h64;
441 #define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
442
443 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
444 if (p==NULL) {
445 len=0;
446 bEnd=p=(const BYTE*)(size_t)32;
447 }
448 #endif
449
450 if (len>=32) {
451 const BYTE* const limit = bEnd - 32;
452 U64 v1 = seed + PRIME64_1 + PRIME64_2;
453 U64 v2 = seed + PRIME64_2;
454 U64 v3 = seed + 0;
455 U64 v4 = seed - PRIME64_1;
456
457 do {
458 v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
459 v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
460 v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
461 v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
462 } while (p<=limit);
463
464 h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
465 h64 = XXH64_mergeRound(h64, v1);
466 h64 = XXH64_mergeRound(h64, v2);
467 h64 = XXH64_mergeRound(h64, v3);
468 h64 = XXH64_mergeRound(h64, v4);
469
470 } else {
471 h64 = seed + PRIME64_5;
472 }
473
474 h64 += (U64) len;
475
476 while (p+8<=bEnd) {
477 U64 const k1 = XXH64_round(0, XXH_get64bits(p));
478 h64 ^= k1;
479 h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
480 p+=8;
481 }
482
483 if (p+4<=bEnd) {
484 h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1;
485 h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
486 p+=4;
487 }
488
489 while (p<bEnd) {
490 h64 ^= (*p) * PRIME64_5;
491 h64 = XXH_rotl64(h64, 11) * PRIME64_1;
492 p++;
493 }
494
495 h64 ^= h64 >> 33;
496 h64 *= PRIME64_2;
497 h64 ^= h64 >> 29;
498 h64 *= PRIME64_3;
499 h64 ^= h64 >> 32;
500
501 return h64;
502 }
503
504
505 XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
506 {
507 #if 0
508 /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
509 XXH64_CREATESTATE_STATIC(state);
510 XXH64_reset(state, seed);
511 XXH64_update(state, input, len);
512 return XXH64_digest(state);
513 #else
514 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
515
516 if (XXH_FORCE_ALIGN_CHECK) {
517 if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */
518 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
519 return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
520 else
521 return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
522 } }
523
524 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
525 return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
526 else
527 return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
528 #endif
529 }
530
531
532 /* **************************************************
533 * Advanced Hash Functions
534 ****************************************************/
535
536 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
537 {
538 return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
539 }
540 XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
541 {
542 XXH_free(statePtr);
543 return XXH_OK;
544 }
545
546 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
547 {
548 return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
549 }
550 XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
551 {
552 XXH_free(statePtr);
553 return XXH_OK;
554 }
555
556
557 /*** Hash feed ***/
558
559 XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
560 {
561 XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
562 memset(&state, 0, sizeof(state)-4); /* do not write into reserved, for future removal */
563 state.v1 = seed + PRIME32_1 + PRIME32_2;
564 state.v2 = seed + PRIME32_2;
565 state.v3 = seed + 0;
566 state.v4 = seed - PRIME32_1;
567 memcpy(statePtr, &state, sizeof(state));
568 return XXH_OK;
569 }
570
571
572 XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
573 {
574 XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
575 memset(&state, 0, sizeof(state)-8); /* do not write into reserved, for future removal */
576 state.v1 = seed + PRIME64_1 + PRIME64_2;
577 state.v2 = seed + PRIME64_2;
578 state.v3 = seed + 0;
579 state.v4 = seed - PRIME64_1;
580 memcpy(statePtr, &state, sizeof(state));
581 return XXH_OK;
582 }
583
584
585 FORCE_INLINE XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
586 {
587 const BYTE* p = (const BYTE*)input;
588 const BYTE* const bEnd = p + len;
589
590 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
591 if (input==NULL) return XXH_ERROR;
592 #endif
593
594 state->total_len_32 += (unsigned)len;
595 state->large_len |= (len>=16) | (state->total_len_32>=16);
596
597 if (state->memsize + len < 16) { /* fill in tmp buffer */
598 XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
599 state->memsize += (unsigned)len;
600 return XXH_OK;
601 }
602
603 if (state->memsize) { /* some data left from previous update */
604 XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
605 { const U32* p32 = state->mem32;
606 state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
607 state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
608 state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
609 state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian)); p32++;
610 }
611 p += 16-state->memsize;
612 state->memsize = 0;
613 }
614
615 if (p <= bEnd-16) {
616 const BYTE* const limit = bEnd - 16;
617 U32 v1 = state->v1;
618 U32 v2 = state->v2;
619 U32 v3 = state->v3;
620 U32 v4 = state->v4;
621
622 do {
623 v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
624 v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
625 v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
626 v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
627 } while (p<=limit);
628
629 state->v1 = v1;
630 state->v2 = v2;
631 state->v3 = v3;
632 state->v4 = v4;
633 }
634
635 if (p < bEnd) {
636 XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
637 state->memsize = (unsigned)(bEnd-p);
638 }
639
640 return XXH_OK;
641 }
642
643 XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
644 {
645 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
646
647 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
648 return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
649 else
650 return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
651 }
652
653
654
655 FORCE_INLINE U32 XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
656 {
657 const BYTE * p = (const BYTE*)state->mem32;
658 const BYTE* const bEnd = (const BYTE*)(state->mem32) + state->memsize;
659 U32 h32;
660
661 if (state->large_len) {
662 h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
663 } else {
664 h32 = state->v3 /* == seed */ + PRIME32_5;
665 }
666
667 h32 += state->total_len_32;
668
669 while (p+4<=bEnd) {
670 h32 += XXH_readLE32(p, endian) * PRIME32_3;
671 h32 = XXH_rotl32(h32, 17) * PRIME32_4;
672 p+=4;
673 }
674
675 while (p<bEnd) {
676 h32 += (*p) * PRIME32_5;
677 h32 = XXH_rotl32(h32, 11) * PRIME32_1;
678 p++;
679 }
680
681 h32 ^= h32 >> 15;
682 h32 *= PRIME32_2;
683 h32 ^= h32 >> 13;
684 h32 *= PRIME32_3;
685 h32 ^= h32 >> 16;
686
687 return h32;
688 }
689
690
691 XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
692 {
693 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
694
695 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
696 return XXH32_digest_endian(state_in, XXH_littleEndian);
697 else
698 return XXH32_digest_endian(state_in, XXH_bigEndian);
699 }
700
701
702
703 /* **** XXH64 **** */
704
705 FORCE_INLINE XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
706 {
707 const BYTE* p = (const BYTE*)input;
708 const BYTE* const bEnd = p + len;
709
710 #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
711 if (input==NULL) return XXH_ERROR;
712 #endif
713
714 state->total_len += len;
715
716 if (state->memsize + len < 32) { /* fill in tmp buffer */
717 XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
718 state->memsize += (U32)len;
719 return XXH_OK;
720 }
721
722 if (state->memsize) { /* tmp buffer is full */
723 XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
724 state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
725 state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
726 state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
727 state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
728 p += 32-state->memsize;
729 state->memsize = 0;
730 }
731
732 if (p+32 <= bEnd) {
733 const BYTE* const limit = bEnd - 32;
734 U64 v1 = state->v1;
735 U64 v2 = state->v2;
736 U64 v3 = state->v3;
737 U64 v4 = state->v4;
738
739 do {
740 v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
741 v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
742 v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
743 v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
744 } while (p<=limit);
745
746 state->v1 = v1;
747 state->v2 = v2;
748 state->v3 = v3;
749 state->v4 = v4;
750 }
751
752 if (p < bEnd) {
753 XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
754 state->memsize = (unsigned)(bEnd-p);
755 }
756
757 return XXH_OK;
758 }
759
760 XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
761 {
762 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
763
764 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
765 return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
766 else
767 return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
768 }
769
770
771
772 FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
773 {
774 const BYTE * p = (const BYTE*)state->mem64;
775 const BYTE* const bEnd = (const BYTE*)state->mem64 + state->memsize;
776 U64 h64;
777
778 if (state->total_len >= 32) {
779 U64 const v1 = state->v1;
780 U64 const v2 = state->v2;
781 U64 const v3 = state->v3;
782 U64 const v4 = state->v4;
783
784 h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
785 h64 = XXH64_mergeRound(h64, v1);
786 h64 = XXH64_mergeRound(h64, v2);
787 h64 = XXH64_mergeRound(h64, v3);
788 h64 = XXH64_mergeRound(h64, v4);
789 } else {
790 h64 = state->v3 + PRIME64_5;
791 }
792
793 h64 += (U64) state->total_len;
794
795 while (p+8<=bEnd) {
796 U64 const k1 = XXH64_round(0, XXH_readLE64(p, endian));
797 h64 ^= k1;
798 h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4;
799 p+=8;
800 }
801
802 if (p+4<=bEnd) {
803 h64 ^= (U64)(XXH_readLE32(p, endian)) * PRIME64_1;
804 h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
805 p+=4;
806 }
807
808 while (p<bEnd) {
809 h64 ^= (*p) * PRIME64_5;
810 h64 = XXH_rotl64(h64, 11) * PRIME64_1;
811 p++;
812 }
813
814 h64 ^= h64 >> 33;
815 h64 *= PRIME64_2;
816 h64 ^= h64 >> 29;
817 h64 *= PRIME64_3;
818 h64 ^= h64 >> 32;
819
820 return h64;
821 }
822
823
824 XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
825 {
826 XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
827
828 if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
829 return XXH64_digest_endian(state_in, XXH_littleEndian);
830 else
831 return XXH64_digest_endian(state_in, XXH_bigEndian);
832 }
833
834
835 /* **************************
836 * Canonical representation
837 ****************************/
838
839 /*! Default XXH result types are basic unsigned 32 and 64 bits.
840 * The canonical representation follows human-readable write convention, aka big-endian (large digits first).
841 * These functions allow transformation of hash result into and from its canonical format.
842 * This way, hash values can be written into a file or buffer, and remain comparable across different systems and programs.
843 */
844
845 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
846 {
847 XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
848 if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
849 memcpy(dst, &hash, sizeof(*dst));
850 }
851
852 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
853 {
854 XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
855 if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
856 memcpy(dst, &hash, sizeof(*dst));
857 }
858
859 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
860 {
861 return XXH_readBE32(src);
862 }
863
864 XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
865 {
866 return XXH_readBE64(src);
867 }
@@ -0,0 +1,309 b''
1 /*
2 xxHash - Extremely Fast Hash algorithm
3 Header File
4 Copyright (C) 2012-2016, Yann Collet.
5
6 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
7
8 Redistribution and use in source and binary forms, with or without
9 modification, are permitted provided that the following conditions are
10 met:
11
12 * Redistributions of source code must retain the above copyright
13 notice, this list of conditions and the following disclaimer.
14 * Redistributions in binary form must reproduce the above
15 copyright notice, this list of conditions and the following disclaimer
16 in the documentation and/or other materials provided with the
17 distribution.
18
19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 You can contact the author at :
32 - xxHash source repository : https://github.com/Cyan4973/xxHash
33 */
34
35 /* Notice extracted from xxHash homepage :
36
37 xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
38 It also successfully passes all tests from the SMHasher suite.
39
40 Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
41
42 Name Speed Q.Score Author
43 xxHash 5.4 GB/s 10
44 CrapWow 3.2 GB/s 2 Andrew
45 MumurHash 3a 2.7 GB/s 10 Austin Appleby
46 SpookyHash 2.0 GB/s 10 Bob Jenkins
47 SBox 1.4 GB/s 9 Bret Mulvey
48 Lookup3 1.2 GB/s 9 Bob Jenkins
49 SuperFastHash 1.2 GB/s 1 Paul Hsieh
50 CityHash64 1.05 GB/s 10 Pike & Alakuijala
51 FNV 0.55 GB/s 5 Fowler, Noll, Vo
52 CRC32 0.43 GB/s 9
53 MD5-32 0.33 GB/s 10 Ronald L. Rivest
54 SHA1-32 0.28 GB/s 10
55
56 Q.Score is a measure of quality of the hash function.
57 It depends on successfully passing SMHasher test set.
58 10 is a perfect score.
59
60 A 64-bits version, named XXH64, is available since r35.
61 It offers much better speed, but for 64-bits applications only.
62 Name Speed on 64 bits Speed on 32 bits
63 XXH64 13.8 GB/s 1.9 GB/s
64 XXH32 6.8 GB/s 6.0 GB/s
65 */
66
67 #ifndef XXHASH_H_5627135585666179
68 #define XXHASH_H_5627135585666179 1
69
70 #if defined (__cplusplus)
71 extern "C" {
72 #endif
73
74 #ifndef XXH_NAMESPACE
75 # define XXH_NAMESPACE ZSTD_ /* Zstandard specific */
76 #endif
77
78
79 /* ****************************
80 * Definitions
81 ******************************/
82 #include <stddef.h> /* size_t */
83 typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
84
85
86 /* ****************************
87 * API modifier
88 ******************************/
89 /** XXH_PRIVATE_API
90 * This is useful if you want to include xxhash functions in `static` mode
91 * in order to inline them, and remove their symbol from the public list.
92 * Methodology :
93 * #define XXH_PRIVATE_API
94 * #include "xxhash.h"
95 * `xxhash.c` is automatically included.
96 * It's not useful to compile and link it as a separate module anymore.
97 */
98 #ifdef XXH_PRIVATE_API
99 # ifndef XXH_STATIC_LINKING_ONLY
100 # define XXH_STATIC_LINKING_ONLY
101 # endif
102 # if defined(__GNUC__)
103 # define XXH_PUBLIC_API static __inline __attribute__((unused))
104 # elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
105 # define XXH_PUBLIC_API static inline
106 # elif defined(_MSC_VER)
107 # define XXH_PUBLIC_API static __inline
108 # else
109 # define XXH_PUBLIC_API static /* this version may generate warnings for unused static functions; disable the relevant warning */
110 # endif
111 #else
112 # define XXH_PUBLIC_API /* do nothing */
113 #endif /* XXH_PRIVATE_API */
114
115 /*!XXH_NAMESPACE, aka Namespace Emulation :
116
117 If you want to include _and expose_ xxHash functions from within your own library,
118 but also want to avoid symbol collisions with another library which also includes xxHash,
119
120 you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
121 with the value of XXH_NAMESPACE (so avoid to keep it NULL and avoid numeric values).
122
123 Note that no change is required within the calling program as long as it includes `xxhash.h` :
124 regular symbol name will be automatically translated by this header.
125 */
126 #ifdef XXH_NAMESPACE
127 # define XXH_CAT(A,B) A##B
128 # define XXH_NAME2(A,B) XXH_CAT(A,B)
129 # define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
130 # define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
131 # define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
132 # define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
133 # define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
134 # define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
135 # define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
136 # define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
137 # define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
138 # define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
139 # define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
140 # define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
141 # define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
142 # define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
143 # define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
144 # define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
145 # define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
146 # define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
147 # define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
148 #endif
149
150
151 /* *************************************
152 * Version
153 ***************************************/
154 #define XXH_VERSION_MAJOR 0
155 #define XXH_VERSION_MINOR 6
156 #define XXH_VERSION_RELEASE 2
157 #define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
158 XXH_PUBLIC_API unsigned XXH_versionNumber (void);
159
160
161 /* ****************************
162 * Simple Hash Functions
163 ******************************/
164 typedef unsigned int XXH32_hash_t;
165 typedef unsigned long long XXH64_hash_t;
166
167 XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
168 XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
169
170 /*!
171 XXH32() :
172 Calculate the 32-bits hash of sequence "length" bytes stored at memory address "input".
173 The memory between input & input+length must be valid (allocated and read-accessible).
174 "seed" can be used to alter the result predictably.
175 Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
176 XXH64() :
177 Calculate the 64-bits hash of sequence of length "len" stored at memory address "input".
178 "seed" can be used to alter the result predictably.
179 This function runs 2x faster on 64-bits systems, but slower on 32-bits systems (see benchmark).
180 */
181
182
183 /* ****************************
184 * Streaming Hash Functions
185 ******************************/
186 typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */
187 typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
188
189 /*! State allocation, compatible with dynamic libraries */
190
191 XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
192 XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
193
194 XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
195 XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
196
197
198 /* hash streaming */
199
200 XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed);
201 XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
202 XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
203
204 XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed);
205 XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
206 XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);
207
208 /*
209 These functions generate the xxHash of an input provided in multiple segments.
210 Note that, for small input, they are slower than single-call functions, due to state management.
211 For small input, prefer `XXH32()` and `XXH64()` .
212
213 XXH state must first be allocated, using XXH*_createState() .
214
215 Start a new hash by initializing state with a seed, using XXH*_reset().
216
217 Then, feed the hash state by calling XXH*_update() as many times as necessary.
218 Obviously, input must be allocated and read accessible.
219 The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
220
221 Finally, a hash value can be produced anytime, by using XXH*_digest().
222 This function returns the nn-bits hash as an int or long long.
223
224 It's still possible to continue inserting input into the hash state after a digest,
225 and generate some new hashes later on, by calling again XXH*_digest().
226
227 When done, free XXH state space if it was allocated dynamically.
228 */
229
230
231 /* **************************
232 * Utils
233 ****************************/
234 #if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) /* ! C99 */
235 # define restrict /* disable restrict */
236 #endif
237
238 XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* restrict dst_state, const XXH32_state_t* restrict src_state);
239 XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* restrict dst_state, const XXH64_state_t* restrict src_state);
240
241
242 /* **************************
243 * Canonical representation
244 ****************************/
245 typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
246 typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
247
248 XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
249 XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
250
251 XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
252 XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
253
254 /* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
255 * The canonical representation uses human-readable write convention, aka big-endian (large digits first).
256 * These functions allow transformation of hash result into and from its canonical format.
257 * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
258 */
259
260
261 #ifdef XXH_STATIC_LINKING_ONLY
262
263 /* ================================================================================================
264 This section contains definitions which are not guaranteed to remain stable.
265 They may change in future versions, becoming incompatible with a different version of the library.
266 They shall only be used with static linking.
267 Never use these definitions in association with dynamic linking !
268 =================================================================================================== */
269
270 /* These definitions are only meant to allow allocation of XXH state
271 statically, on stack, or in a struct for example.
272 Do not use members directly. */
273
274 struct XXH32_state_s {
275 unsigned total_len_32;
276 unsigned large_len;
277 unsigned v1;
278 unsigned v2;
279 unsigned v3;
280 unsigned v4;
281 unsigned mem32[4]; /* buffer defined as U32 for alignment */
282 unsigned memsize;
283 unsigned reserved; /* never read nor write, will be removed in a future version */
284 }; /* typedef'd to XXH32_state_t */
285
286 struct XXH64_state_s {
287 unsigned long long total_len;
288 unsigned long long v1;
289 unsigned long long v2;
290 unsigned long long v3;
291 unsigned long long v4;
292 unsigned long long mem64[4]; /* buffer defined as U64 for alignment */
293 unsigned memsize;
294 unsigned reserved[2]; /* never read nor write, will be removed in a future version */
295 }; /* typedef'd to XXH64_state_t */
296
297
298 # ifdef XXH_PRIVATE_API
299 # include "xxhash.c" /* include xxhash functions as `static`, for inlining */
300 # endif
301
302 #endif /* XXH_STATIC_LINKING_ONLY */
303
304
305 #if defined (__cplusplus)
306 }
307 #endif
308
309 #endif /* XXHASH_H_5627135585666179 */
@@ -0,0 +1,191 b''
1 /**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9
10 /* ***************************************************************
11 * NOTES/WARNINGS
12 *****************************************************************/
13 /* The streaming API defined here will soon be deprecated by the
14 * new one in 'zstd.h'; consider migrating towards newer streaming
15 * API. See 'lib/README.md'.
16 *****************************************************************/
17
18 #ifndef ZSTD_BUFFERED_H_23987
19 #define ZSTD_BUFFERED_H_23987
20
21 #if defined (__cplusplus)
22 extern "C" {
23 #endif
24
25 /* *************************************
26 * Dependencies
27 ***************************************/
28 #include <stddef.h> /* size_t */
29
30
31 /* ***************************************************************
32 * Compiler specifics
33 *****************************************************************/
34 /* ZSTD_DLL_EXPORT :
35 * Enable exporting of functions when building a Windows DLL */
36 #if defined(_WIN32) && defined(ZSTD_DLL_EXPORT) && (ZSTD_DLL_EXPORT==1)
37 # define ZSTDLIB_API __declspec(dllexport)
38 #else
39 # define ZSTDLIB_API
40 #endif
41
42
43 /* *************************************
44 * Streaming functions
45 ***************************************/
46 /* This is the easier "buffered" streaming API,
47 * using an internal buffer to lift all restrictions on user-provided buffers
48 * which can be any size, any place, for both input and output.
49 * ZBUFF and ZSTD are 100% interoperable,
50 * frames created by one can be decoded by the other one */
51
52 typedef struct ZBUFF_CCtx_s ZBUFF_CCtx;
53 ZSTDLIB_API ZBUFF_CCtx* ZBUFF_createCCtx(void);
54 ZSTDLIB_API size_t ZBUFF_freeCCtx(ZBUFF_CCtx* cctx);
55
56 ZSTDLIB_API size_t ZBUFF_compressInit(ZBUFF_CCtx* cctx, int compressionLevel);
57 ZSTDLIB_API size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
58
59 ZSTDLIB_API size_t ZBUFF_compressContinue(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr, const void* src, size_t* srcSizePtr);
60 ZSTDLIB_API size_t ZBUFF_compressFlush(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr);
61 ZSTDLIB_API size_t ZBUFF_compressEnd(ZBUFF_CCtx* cctx, void* dst, size_t* dstCapacityPtr);
62
63 /*-*************************************************
64 * Streaming compression - howto
65 *
66 * A ZBUFF_CCtx object is required to track streaming operation.
67 * Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources.
68 * ZBUFF_CCtx objects can be reused multiple times.
69 *
70 * Start by initializing ZBUF_CCtx.
71 * Use ZBUFF_compressInit() to start a new compression operation.
72 * Use ZBUFF_compressInitDictionary() for a compression which requires a dictionary.
73 *
74 * Use ZBUFF_compressContinue() repetitively to consume input stream.
75 * *srcSizePtr and *dstCapacityPtr can be any size.
76 * The function will report how many bytes were read or written within *srcSizePtr and *dstCapacityPtr.
77 * Note that it may not consume the entire input, in which case it's up to the caller to present again remaining data.
78 * The content of `dst` will be overwritten (up to *dstCapacityPtr) at each call, so save its content if it matters or change @dst .
79 * @return : a hint to preferred nb of bytes to use as input for next function call (it's just a hint, to improve latency)
80 * or an error code, which can be tested using ZBUFF_isError().
81 *
82 * At any moment, it's possible to flush whatever data remains within buffer, using ZBUFF_compressFlush().
83 * The nb of bytes written into `dst` will be reported into *dstCapacityPtr.
84 * Note that the function cannot output more than *dstCapacityPtr,
85 * therefore, some content might still be left into internal buffer if *dstCapacityPtr is too small.
86 * @return : nb of bytes still present into internal buffer (0 if it's empty)
87 * or an error code, which can be tested using ZBUFF_isError().
88 *
89 * ZBUFF_compressEnd() instructs to finish a frame.
90 * It will perform a flush and write frame epilogue.
91 * The epilogue is required for decoders to consider a frame completed.
92 * Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *dstCapacityPtr is too small.
93 * In which case, call again ZBUFF_compressFlush() to complete the flush.
94 * @return : nb of bytes still present into internal buffer (0 if it's empty)
95 * or an error code, which can be tested using ZBUFF_isError().
96 *
97 * Hint : _recommended buffer_ sizes (not compulsory) : ZBUFF_recommendedCInSize() / ZBUFF_recommendedCOutSize()
98 * input : ZBUFF_recommendedCInSize==128 KB block size is the internal unit, use this value to reduce intermediate stages (better latency)
99 * output : ZBUFF_recommendedCOutSize==ZSTD_compressBound(128 KB) + 3 + 3 : ensures it's always possible to write/flush/end a full block. Skip some buffering.
100 * By using both, it ensures that input will be entirely consumed, and output will always contain the result, reducing intermediate buffering.
101 * **************************************************/
102
103
104 typedef struct ZBUFF_DCtx_s ZBUFF_DCtx;
105 ZSTDLIB_API ZBUFF_DCtx* ZBUFF_createDCtx(void);
106 ZSTDLIB_API size_t ZBUFF_freeDCtx(ZBUFF_DCtx* dctx);
107
108 ZSTDLIB_API size_t ZBUFF_decompressInit(ZBUFF_DCtx* dctx);
109 ZSTDLIB_API size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* dctx, const void* dict, size_t dictSize);
110
111 ZSTDLIB_API size_t ZBUFF_decompressContinue(ZBUFF_DCtx* dctx,
112 void* dst, size_t* dstCapacityPtr,
113 const void* src, size_t* srcSizePtr);
114
115 /*-***************************************************************************
116 * Streaming decompression howto
117 *
118 * A ZBUFF_DCtx object is required to track streaming operations.
119 * Use ZBUFF_createDCtx() and ZBUFF_freeDCtx() to create/release resources.
120 * Use ZBUFF_decompressInit() to start a new decompression operation,
121 * or ZBUFF_decompressInitDictionary() if decompression requires a dictionary.
122 * Note that ZBUFF_DCtx objects can be re-init multiple times.
123 *
124 * Use ZBUFF_decompressContinue() repetitively to consume your input.
125 * *srcSizePtr and *dstCapacityPtr can be any size.
126 * The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
127 * Note that it may not consume the entire input, in which case it's up to the caller to present remaining input again.
128 * The content of `dst` will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters, or change `dst`.
129 * @return : 0 when a frame is completely decoded and fully flushed,
130 * 1 when there is still some data left within internal buffer to flush,
131 * >1 when more data is expected, with value being a suggested next input size (it's just a hint, which helps latency),
132 * or an error code, which can be tested using ZBUFF_isError().
133 *
134 * Hint : recommended buffer sizes (not compulsory) : ZBUFF_recommendedDInSize() and ZBUFF_recommendedDOutSize()
135 * output : ZBUFF_recommendedDOutSize== 128 KB block size is the internal unit, it ensures it's always possible to write a full block when decoded.
136 * input : ZBUFF_recommendedDInSize == 128KB + 3;
137 * just follow indications from ZBUFF_decompressContinue() to minimize latency. It should always be <= 128 KB + 3 .
138 * *******************************************************************************/
139
140
141 /* *************************************
142 * Tool functions
143 ***************************************/
144 ZSTDLIB_API unsigned ZBUFF_isError(size_t errorCode);
145 ZSTDLIB_API const char* ZBUFF_getErrorName(size_t errorCode);
146
147 /** Functions below provide recommended buffer sizes for Compression or Decompression operations.
148 * These sizes are just hints, they tend to offer better latency */
149 ZSTDLIB_API size_t ZBUFF_recommendedCInSize(void);
150 ZSTDLIB_API size_t ZBUFF_recommendedCOutSize(void);
151 ZSTDLIB_API size_t ZBUFF_recommendedDInSize(void);
152 ZSTDLIB_API size_t ZBUFF_recommendedDOutSize(void);
153
154
155 #ifdef ZBUFF_STATIC_LINKING_ONLY
156
157 /* ====================================================================================
158 * The definitions in this section are considered experimental.
159 * They should never be used in association with a dynamic library, as they may change in the future.
160 * They are provided for advanced usages.
161 * Use them only in association with static linking.
162 * ==================================================================================== */
163
164 /*--- Dependency ---*/
165 #define ZSTD_STATIC_LINKING_ONLY /* ZSTD_parameters, ZSTD_customMem */
166 #include "zstd.h"
167
168
169 /*--- Custom memory allocator ---*/
170 /*! ZBUFF_createCCtx_advanced() :
171 * Create a ZBUFF compression context using external alloc and free functions */
172 ZSTDLIB_API ZBUFF_CCtx* ZBUFF_createCCtx_advanced(ZSTD_customMem customMem);
173
174 /*! ZBUFF_createDCtx_advanced() :
175 * Create a ZBUFF decompression context using external alloc and free functions */
176 ZSTDLIB_API ZBUFF_DCtx* ZBUFF_createDCtx_advanced(ZSTD_customMem customMem);
177
178
179 /*--- Advanced Streaming Initialization ---*/
180 ZSTDLIB_API size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
181 const void* dict, size_t dictSize,
182 ZSTD_parameters params, unsigned long long pledgedSrcSize);
183
184 #endif /* ZBUFF_STATIC_LINKING_ONLY */
185
186
187 #if defined (__cplusplus)
188 }
189 #endif
190
191 #endif /* ZSTD_BUFFERED_H_23987 */
@@ -0,0 +1,83 b''
1 /**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9
10
11
12 /*-*************************************
13 * Dependencies
14 ***************************************/
15 #include <stdlib.h> /* malloc */
16 #include "error_private.h"
17 #define ZSTD_STATIC_LINKING_ONLY
18 #include "zstd.h" /* declaration of ZSTD_isError, ZSTD_getErrorName, ZSTD_getErrorCode, ZSTD_getErrorString, ZSTD_versionNumber */
19 #include "zbuff.h" /* declaration of ZBUFF_isError, ZBUFF_getErrorName */
20
21
22 /*-****************************************
23 * Version
24 ******************************************/
25 unsigned ZSTD_versionNumber (void) { return ZSTD_VERSION_NUMBER; }
26
27
28 /*-****************************************
29 * ZSTD Error Management
30 ******************************************/
31 /*! ZSTD_isError() :
32 * tells if a return value is an error code */
33 unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
34
35 /*! ZSTD_getErrorName() :
36 * provides error code string from function result (useful for debugging) */
37 const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
38
39 /*! ZSTD_getError() :
40 * convert a `size_t` function result into a proper ZSTD_errorCode enum */
41 ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
42
43 /*! ZSTD_getErrorString() :
44 * provides error code string from enum */
45 const char* ZSTD_getErrorString(ZSTD_ErrorCode code) { return ERR_getErrorName(code); }
46
47
48 /* **************************************************************
49 * ZBUFF Error Management
50 ****************************************************************/
51 unsigned ZBUFF_isError(size_t errorCode) { return ERR_isError(errorCode); }
52
53 const char* ZBUFF_getErrorName(size_t errorCode) { return ERR_getErrorName(errorCode); }
54
55
56
57 /*=**************************************************************
58 * Custom allocator
59 ****************************************************************/
60 /* default uses stdlib */
61 void* ZSTD_defaultAllocFunction(void* opaque, size_t size)
62 {
63 void* address = malloc(size);
64 (void)opaque;
65 return address;
66 }
67
68 void ZSTD_defaultFreeFunction(void* opaque, void* address)
69 {
70 (void)opaque;
71 free(address);
72 }
73
74 void* ZSTD_malloc(size_t size, ZSTD_customMem customMem)
75 {
76 return customMem.customAlloc(customMem.opaque, size);
77 }
78
79 void ZSTD_free(void* ptr, ZSTD_customMem customMem)
80 {
81 if (ptr!=NULL)
82 customMem.customFree(customMem.opaque, ptr);
83 }
@@ -0,0 +1,60 b''
1 /**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9
10 #ifndef ZSTD_ERRORS_H_398273423
11 #define ZSTD_ERRORS_H_398273423
12
13 #if defined (__cplusplus)
14 extern "C" {
15 #endif
16
17 /*===== dependency =====*/
18 #include <stddef.h> /* size_t */
19
20
21 /*-****************************************
22 * error codes list
23 ******************************************/
24 typedef enum {
25 ZSTD_error_no_error,
26 ZSTD_error_GENERIC,
27 ZSTD_error_prefix_unknown,
28 ZSTD_error_version_unsupported,
29 ZSTD_error_parameter_unknown,
30 ZSTD_error_frameParameter_unsupported,
31 ZSTD_error_frameParameter_unsupportedBy32bits,
32 ZSTD_error_frameParameter_windowTooLarge,
33 ZSTD_error_compressionParameter_unsupported,
34 ZSTD_error_init_missing,
35 ZSTD_error_memory_allocation,
36 ZSTD_error_stage_wrong,
37 ZSTD_error_dstSize_tooSmall,
38 ZSTD_error_srcSize_wrong,
39 ZSTD_error_corruption_detected,
40 ZSTD_error_checksum_wrong,
41 ZSTD_error_tableLog_tooLarge,
42 ZSTD_error_maxSymbolValue_tooLarge,
43 ZSTD_error_maxSymbolValue_tooSmall,
44 ZSTD_error_dictionary_corrupted,
45 ZSTD_error_dictionary_wrong,
46 ZSTD_error_maxCode
47 } ZSTD_ErrorCode;
48
49 /*! ZSTD_getErrorCode() :
50 convert a `size_t` function result into a `ZSTD_ErrorCode` enum type,
51 which can be used to compare directly with enum list published into "error_public.h" */
52 ZSTD_ErrorCode ZSTD_getErrorCode(size_t functionResult);
53 const char* ZSTD_getErrorString(ZSTD_ErrorCode code);
54
55
56 #if defined (__cplusplus)
57 }
58 #endif
59
60 #endif /* ZSTD_ERRORS_H_398273423 */
@@ -0,0 +1,267 b''
1 /**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9
10 #ifndef ZSTD_CCOMMON_H_MODULE
11 #define ZSTD_CCOMMON_H_MODULE
12
13 /*-*******************************************************
14 * Compiler specifics
15 *********************************************************/
16 #ifdef _MSC_VER /* Visual Studio */
17 # define FORCE_INLINE static __forceinline
18 # include <intrin.h> /* For Visual 2005 */
19 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
20 # pragma warning(disable : 4324) /* disable: C4324: padded structure */
21 # pragma warning(disable : 4100) /* disable: C4100: unreferenced formal parameter */
22 #else
23 # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
24 # ifdef __GNUC__
25 # define FORCE_INLINE static inline __attribute__((always_inline))
26 # else
27 # define FORCE_INLINE static inline
28 # endif
29 # else
30 # define FORCE_INLINE static
31 # endif /* __STDC_VERSION__ */
32 #endif
33
34 #ifdef _MSC_VER
35 # define FORCE_NOINLINE static __declspec(noinline)
36 #else
37 # ifdef __GNUC__
38 # define FORCE_NOINLINE static __attribute__((__noinline__))
39 # else
40 # define FORCE_NOINLINE static
41 # endif
42 #endif
43
44
45 /*-*************************************
46 * Dependencies
47 ***************************************/
48 #include "mem.h"
49 #include "error_private.h"
50 #define ZSTD_STATIC_LINKING_ONLY
51 #include "zstd.h"
52
53
54 /*-*************************************
55 * shared macros
56 ***************************************/
57 #define MIN(a,b) ((a)<(b) ? (a) : (b))
58 #define MAX(a,b) ((a)>(b) ? (a) : (b))
59 #define CHECK_F(f) { size_t const errcod = f; if (ERR_isError(errcod)) return errcod; } /* check and Forward error code */
60 #define CHECK_E(f, e) { size_t const errcod = f; if (ERR_isError(errcod)) return ERROR(e); } /* check and send Error code */
61
62
63 /*-*************************************
64 * Common constants
65 ***************************************/
66 #define ZSTD_OPT_NUM (1<<12)
67 #define ZSTD_DICT_MAGIC 0xEC30A437 /* v0.7+ */
68
69 #define ZSTD_REP_NUM 3 /* number of repcodes */
70 #define ZSTD_REP_CHECK (ZSTD_REP_NUM) /* number of repcodes to check by the optimal parser */
71 #define ZSTD_REP_MOVE (ZSTD_REP_NUM-1)
72 #define ZSTD_REP_MOVE_OPT (ZSTD_REP_NUM)
73 static const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
74
75 #define KB *(1 <<10)
76 #define MB *(1 <<20)
77 #define GB *(1U<<30)
78
79 #define BIT7 128
80 #define BIT6 64
81 #define BIT5 32
82 #define BIT4 16
83 #define BIT1 2
84 #define BIT0 1
85
86 #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
87 static const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
88 static const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
89
90 #define ZSTD_BLOCKHEADERSIZE 3 /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
91 static const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
92 typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
93
94 #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
95 #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */) /* for a non-null block */
96
97 #define HufLog 12
98 typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
99
100 #define LONGNBSEQ 0x7F00
101
102 #define MINMATCH 3
103 #define EQUAL_READ32 4
104
105 #define Litbits 8
106 #define MaxLit ((1<<Litbits) - 1)
107 #define MaxML 52
108 #define MaxLL 35
109 #define MaxOff 28
110 #define MaxSeq MAX(MaxLL, MaxML) /* Assumption : MaxOff < MaxLL,MaxML */
111 #define MLFSELog 9
112 #define LLFSELog 9
113 #define OffFSELog 8
114
115 static const U32 LL_bits[MaxLL+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
116 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9,10,11,12,
117 13,14,15,16 };
118 static const S16 LL_defaultNorm[MaxLL+1] = { 4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1,
119 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1,
120 -1,-1,-1,-1 };
121 #define LL_DEFAULTNORMLOG 6 /* for static allocation */
122 static const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
123
124 static const U32 ML_bits[MaxML+1] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
125 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
126 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9,10,11,
127 12,13,14,15,16 };
128 static const S16 ML_defaultNorm[MaxML+1] = { 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
129 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,
131 -1,-1,-1,-1,-1 };
132 #define ML_DEFAULTNORMLOG 6 /* for static allocation */
133 static const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
134
135 static const S16 OF_defaultNorm[MaxOff+1] = { 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1,
136 1, 1, 1, 1, 1, 1, 1, 1,-1,-1,-1,-1,-1 };
137 #define OF_DEFAULTNORMLOG 5 /* for static allocation */
138 static const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
139
140
141 /*-*******************************************
142 * Shared functions to include for inlining
143 *********************************************/
144 static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); }
145 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
146
147 /*! ZSTD_wildcopy() :
148 * custom version of memcpy(), can copy up to 7 bytes too many (8 bytes if length==0) */
149 #define WILDCOPY_OVERLENGTH 8
150 MEM_STATIC void ZSTD_wildcopy(void* dst, const void* src, size_t length)
151 {
152 const BYTE* ip = (const BYTE*)src;
153 BYTE* op = (BYTE*)dst;
154 BYTE* const oend = op + length;
155 do
156 COPY8(op, ip)
157 while (op < oend);
158 }
159
160 MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */
161 {
162 const BYTE* ip = (const BYTE*)src;
163 BYTE* op = (BYTE*)dst;
164 BYTE* const oend = (BYTE*)dstEnd;
165 do
166 COPY8(op, ip)
167 while (op < oend);
168 }
169
170
171 /*-*******************************************
172 * Private interfaces
173 *********************************************/
174 typedef struct ZSTD_stats_s ZSTD_stats_t;
175
176 typedef struct {
177 U32 off;
178 U32 len;
179 } ZSTD_match_t;
180
181 typedef struct {
182 U32 price;
183 U32 off;
184 U32 mlen;
185 U32 litlen;
186 U32 rep[ZSTD_REP_NUM];
187 } ZSTD_optimal_t;
188
189
190 typedef struct seqDef_s {
191 U32 offset;
192 U16 litLength;
193 U16 matchLength;
194 } seqDef;
195
196
197 typedef struct {
198 seqDef* sequencesStart;
199 seqDef* sequences;
200 BYTE* litStart;
201 BYTE* lit;
202 BYTE* llCode;
203 BYTE* mlCode;
204 BYTE* ofCode;
205 U32 longLengthID; /* 0 == no longLength; 1 == Lit.longLength; 2 == Match.longLength; */
206 U32 longLengthPos;
207 /* opt */
208 ZSTD_optimal_t* priceTable;
209 ZSTD_match_t* matchTable;
210 U32* matchLengthFreq;
211 U32* litLengthFreq;
212 U32* litFreq;
213 U32* offCodeFreq;
214 U32 matchLengthSum;
215 U32 matchSum;
216 U32 litLengthSum;
217 U32 litSum;
218 U32 offCodeSum;
219 U32 log2matchLengthSum;
220 U32 log2matchSum;
221 U32 log2litLengthSum;
222 U32 log2litSum;
223 U32 log2offCodeSum;
224 U32 factor;
225 U32 cachedPrice;
226 U32 cachedLitLength;
227 const BYTE* cachedLiterals;
228 } seqStore_t;
229
230 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);
231 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);
232 int ZSTD_isSkipFrame(ZSTD_DCtx* dctx);
233
234 /* custom memory allocation functions */
235 void* ZSTD_defaultAllocFunction(void* opaque, size_t size);
236 void ZSTD_defaultFreeFunction(void* opaque, void* address);
237 static const ZSTD_customMem defaultCustomMem = { ZSTD_defaultAllocFunction, ZSTD_defaultFreeFunction, NULL };
238 void* ZSTD_malloc(size_t size, ZSTD_customMem customMem);
239 void ZSTD_free(void* ptr, ZSTD_customMem customMem);
240
241
242 /*====== common function ======*/
243
244 MEM_STATIC U32 ZSTD_highbit32(U32 val)
245 {
246 # if defined(_MSC_VER) /* Visual */
247 unsigned long r=0;
248 _BitScanReverse(&r, val);
249 return (unsigned)r;
250 # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */
251 return 31 - __builtin_clz(val);
252 # else /* Software version */
253 static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
254 U32 v = val;
255 int r;
256 v |= v >> 1;
257 v |= v >> 2;
258 v |= v >> 4;
259 v |= v >> 8;
260 v |= v >> 16;
261 r = DeBruijnClz[(U32)(v * 0x07C4ACDDU) >> 27];
262 return r;
263 # endif
264 }
265
266
267 #endif /* ZSTD_CCOMMON_H_MODULE */
This diff has been collapsed as it changes many lines, (810 lines changed) Show them Hide them
@@ -0,0 +1,810 b''
1 /* ******************************************************************
2 FSE : Finite State Entropy encoder
3 Copyright (C) 2013-2015, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above
14 copyright notice, this list of conditions and the following disclaimer
15 in the documentation and/or other materials provided with the
16 distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 You can contact the author at :
31 - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c
33 ****************************************************************** */
34
35 /* **************************************************************
36 * Compiler specifics
37 ****************************************************************/
38 #ifdef _MSC_VER /* Visual Studio */
39 # define FORCE_INLINE static __forceinline
40 # include <intrin.h> /* For Visual 2005 */
41 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
42 # pragma warning(disable : 4214) /* disable: C4214: non-int bitfields */
43 #else
44 # if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
45 # ifdef __GNUC__
46 # define FORCE_INLINE static inline __attribute__((always_inline))
47 # else
48 # define FORCE_INLINE static inline
49 # endif
50 # else
51 # define FORCE_INLINE static
52 # endif /* __STDC_VERSION__ */
53 #endif
54
55
56 /* **************************************************************
57 * Includes
58 ****************************************************************/
59 #include <stdlib.h> /* malloc, free, qsort */
60 #include <string.h> /* memcpy, memset */
61 #include <stdio.h> /* printf (debug) */
62 #include "bitstream.h"
63 #define FSE_STATIC_LINKING_ONLY
64 #include "fse.h"
65
66
67 /* **************************************************************
68 * Error Management
69 ****************************************************************/
70 #define FSE_STATIC_ASSERT(c) { enum { FSE_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
71
72
73 /* **************************************************************
74 * Complex types
75 ****************************************************************/
76 typedef U32 CTable_max_t[FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)];
77
78
79 /* **************************************************************
80 * Templates
81 ****************************************************************/
82 /*
83 designed to be included
84 for type-specific functions (template emulation in C)
85 Objective is to write these functions only once, for improved maintenance
86 */
87
88 /* safety checks */
89 #ifndef FSE_FUNCTION_EXTENSION
90 # error "FSE_FUNCTION_EXTENSION must be defined"
91 #endif
92 #ifndef FSE_FUNCTION_TYPE
93 # error "FSE_FUNCTION_TYPE must be defined"
94 #endif
95
96 /* Function names */
97 #define FSE_CAT(X,Y) X##Y
98 #define FSE_FUNCTION_NAME(X,Y) FSE_CAT(X,Y)
99 #define FSE_TYPE_NAME(X,Y) FSE_CAT(X,Y)
100
101
102 /* Function templates */
103 size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
104 {
105 U32 const tableSize = 1 << tableLog;
106 U32 const tableMask = tableSize - 1;
107 void* const ptr = ct;
108 U16* const tableU16 = ( (U16*) ptr) + 2;
109 void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
110 FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
111 U32 const step = FSE_TABLESTEP(tableSize);
112 U32 cumul[FSE_MAX_SYMBOL_VALUE+2];
113
114 FSE_FUNCTION_TYPE tableSymbol[FSE_MAX_TABLESIZE]; /* memset() is not necessary, even if static analyzer complain about it */
115 U32 highThreshold = tableSize-1;
116
117 /* CTable header */
118 tableU16[-2] = (U16) tableLog;
119 tableU16[-1] = (U16) maxSymbolValue;
120
121 /* For explanations on how to distribute symbol values over the table :
122 * http://fastcompression.blogspot.fr/2014/02/fse-distributing-symbol-values.html */
123
124 /* symbol start positions */
125 { U32 u;
126 cumul[0] = 0;
127 for (u=1; u<=maxSymbolValue+1; u++) {
128 if (normalizedCounter[u-1]==-1) { /* Low proba symbol */
129 cumul[u] = cumul[u-1] + 1;
130 tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
131 } else {
132 cumul[u] = cumul[u-1] + normalizedCounter[u-1];
133 } }
134 cumul[maxSymbolValue+1] = tableSize+1;
135 }
136
137 /* Spread symbols */
138 { U32 position = 0;
139 U32 symbol;
140 for (symbol=0; symbol<=maxSymbolValue; symbol++) {
141 int nbOccurences;
142 for (nbOccurences=0; nbOccurences<normalizedCounter[symbol]; nbOccurences++) {
143 tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol;
144 position = (position + step) & tableMask;
145 while (position > highThreshold) position = (position + step) & tableMask; /* Low proba area */
146 } }
147
148 if (position!=0) return ERROR(GENERIC); /* Must have gone through all positions */
149 }
150
151 /* Build table */
152 { U32 u; for (u=0; u<tableSize; u++) {
153 FSE_FUNCTION_TYPE s = tableSymbol[u]; /* note : static analyzer may not understand tableSymbol is properly initialized */
154 tableU16[cumul[s]++] = (U16) (tableSize+u); /* TableU16 : sorted by symbol order; gives next state value */
155 } }
156
157 /* Build Symbol Transformation Table */
158 { unsigned total = 0;
159 unsigned s;
160 for (s=0; s<=maxSymbolValue; s++) {
161 switch (normalizedCounter[s])
162 {
163 case 0: break;
164
165 case -1:
166 case 1:
167 symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
168 symbolTT[s].deltaFindState = total - 1;
169 total ++;
170 break;
171 default :
172 {
173 U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
174 U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
175 symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
176 symbolTT[s].deltaFindState = total - normalizedCounter[s];
177 total += normalizedCounter[s];
178 } } } }
179
180 return 0;
181 }
182
183
184
185 #ifndef FSE_COMMONDEFS_ONLY
186
187 /*-**************************************************************
188 * FSE NCount encoding-decoding
189 ****************************************************************/
190 size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
191 {
192 size_t maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
193 return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND; /* maxSymbolValue==0 ? use default */
194 }
195
196 static short FSE_abs(short a) { return (short)(a<0 ? -a : a); }
197
198 static size_t FSE_writeNCount_generic (void* header, size_t headerBufferSize,
199 const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog,
200 unsigned writeIsSafe)
201 {
202 BYTE* const ostart = (BYTE*) header;
203 BYTE* out = ostart;
204 BYTE* const oend = ostart + headerBufferSize;
205 int nbBits;
206 const int tableSize = 1 << tableLog;
207 int remaining;
208 int threshold;
209 U32 bitStream;
210 int bitCount;
211 unsigned charnum = 0;
212 int previous0 = 0;
213
214 bitStream = 0;
215 bitCount = 0;
216 /* Table Size */
217 bitStream += (tableLog-FSE_MIN_TABLELOG) << bitCount;
218 bitCount += 4;
219
220 /* Init */
221 remaining = tableSize+1; /* +1 for extra accuracy */
222 threshold = tableSize;
223 nbBits = tableLog+1;
224
225 while (remaining>1) { /* stops at 1 */
226 if (previous0) {
227 unsigned start = charnum;
228 while (!normalizedCounter[charnum]) charnum++;
229 while (charnum >= start+24) {
230 start+=24;
231 bitStream += 0xFFFFU << bitCount;
232 if ((!writeIsSafe) && (out > oend-2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
233 out[0] = (BYTE) bitStream;
234 out[1] = (BYTE)(bitStream>>8);
235 out+=2;
236 bitStream>>=16;
237 }
238 while (charnum >= start+3) {
239 start+=3;
240 bitStream += 3 << bitCount;
241 bitCount += 2;
242 }
243 bitStream += (charnum-start) << bitCount;
244 bitCount += 2;
245 if (bitCount>16) {
246 if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
247 out[0] = (BYTE)bitStream;
248 out[1] = (BYTE)(bitStream>>8);
249 out += 2;
250 bitStream >>= 16;
251 bitCount -= 16;
252 } }
253 { short count = normalizedCounter[charnum++];
254 const short max = (short)((2*threshold-1)-remaining);
255 remaining -= FSE_abs(count);
256 if (remaining<1) return ERROR(GENERIC);
257 count++; /* +1 for extra accuracy */
258 if (count>=threshold) count += max; /* [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ */
259 bitStream += count << bitCount;
260 bitCount += nbBits;
261 bitCount -= (count<max);
262 previous0 = (count==1);
263 while (remaining<threshold) nbBits--, threshold>>=1;
264 }
265 if (bitCount>16) {
266 if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
267 out[0] = (BYTE)bitStream;
268 out[1] = (BYTE)(bitStream>>8);
269 out += 2;
270 bitStream >>= 16;
271 bitCount -= 16;
272 } }
273
274 /* flush remaining bitStream */
275 if ((!writeIsSafe) && (out > oend - 2)) return ERROR(dstSize_tooSmall); /* Buffer overflow */
276 out[0] = (BYTE)bitStream;
277 out[1] = (BYTE)(bitStream>>8);
278 out+= (bitCount+7) /8;
279
280 if (charnum > maxSymbolValue + 1) return ERROR(GENERIC);
281
282 return (out-ostart);
283 }
284
285
286 size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog)
287 {
288 if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC); /* Unsupported */
289 if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported */
290
291 if (bufferSize < FSE_NCountWriteBound(maxSymbolValue, tableLog))
292 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 0);
293
294 return FSE_writeNCount_generic(buffer, bufferSize, normalizedCounter, maxSymbolValue, tableLog, 1);
295 }
296
297
298
299 /*-**************************************************************
300 * Counting histogram
301 ****************************************************************/
302 /*! FSE_count_simple
303 This function just counts byte values within `src`,
304 and store the histogram into table `count`.
305 This function is unsafe : it doesn't check that all values within `src` can fit into `count`.
306 For this reason, prefer using a table `count` with 256 elements.
307 @return : count of most numerous element
308 */
309 static size_t FSE_count_simple(unsigned* count, unsigned* maxSymbolValuePtr,
310 const void* src, size_t srcSize)
311 {
312 const BYTE* ip = (const BYTE*)src;
313 const BYTE* const end = ip + srcSize;
314 unsigned maxSymbolValue = *maxSymbolValuePtr;
315 unsigned max=0;
316
317
318 memset(count, 0, (maxSymbolValue+1)*sizeof(*count));
319 if (srcSize==0) { *maxSymbolValuePtr = 0; return 0; }
320
321 while (ip<end) count[*ip++]++;
322
323 while (!count[maxSymbolValue]) maxSymbolValue--;
324 *maxSymbolValuePtr = maxSymbolValue;
325
326 { U32 s; for (s=0; s<=maxSymbolValue; s++) if (count[s] > max) max = count[s]; }
327
328 return (size_t)max;
329 }
330
331
332 static size_t FSE_count_parallel(unsigned* count, unsigned* maxSymbolValuePtr,
333 const void* source, size_t sourceSize,
334 unsigned checkMax)
335 {
336 const BYTE* ip = (const BYTE*)source;
337 const BYTE* const iend = ip+sourceSize;
338 unsigned maxSymbolValue = *maxSymbolValuePtr;
339 unsigned max=0;
340
341
342 U32 Counting1[256] = { 0 };
343 U32 Counting2[256] = { 0 };
344 U32 Counting3[256] = { 0 };
345 U32 Counting4[256] = { 0 };
346
347 /* safety checks */
348 if (!sourceSize) {
349 memset(count, 0, maxSymbolValue + 1);
350 *maxSymbolValuePtr = 0;
351 return 0;
352 }
353 if (!maxSymbolValue) maxSymbolValue = 255; /* 0 == default */
354
355 /* by stripes of 16 bytes */
356 { U32 cached = MEM_read32(ip); ip += 4;
357 while (ip < iend-15) {
358 U32 c = cached; cached = MEM_read32(ip); ip += 4;
359 Counting1[(BYTE) c ]++;
360 Counting2[(BYTE)(c>>8) ]++;
361 Counting3[(BYTE)(c>>16)]++;
362 Counting4[ c>>24 ]++;
363 c = cached; cached = MEM_read32(ip); ip += 4;
364 Counting1[(BYTE) c ]++;
365 Counting2[(BYTE)(c>>8) ]++;
366 Counting3[(BYTE)(c>>16)]++;
367 Counting4[ c>>24 ]++;
368 c = cached; cached = MEM_read32(ip); ip += 4;
369 Counting1[(BYTE) c ]++;
370 Counting2[(BYTE)(c>>8) ]++;
371 Counting3[(BYTE)(c>>16)]++;
372 Counting4[ c>>24 ]++;
373 c = cached; cached = MEM_read32(ip); ip += 4;
374 Counting1[(BYTE) c ]++;
375 Counting2[(BYTE)(c>>8) ]++;
376 Counting3[(BYTE)(c>>16)]++;
377 Counting4[ c>>24 ]++;
378 }
379 ip-=4;
380 }
381
382 /* finish last symbols */
383 while (ip<iend) Counting1[*ip++]++;
384
385 if (checkMax) { /* verify stats will fit into destination table */
386 U32 s; for (s=255; s>maxSymbolValue; s--) {
387 Counting1[s] += Counting2[s] + Counting3[s] + Counting4[s];
388 if (Counting1[s]) return ERROR(maxSymbolValue_tooSmall);
389 } }
390
391 { U32 s; for (s=0; s<=maxSymbolValue; s++) {
392 count[s] = Counting1[s] + Counting2[s] + Counting3[s] + Counting4[s];
393 if (count[s] > max) max = count[s];
394 }}
395
396 while (!count[maxSymbolValue]) maxSymbolValue--;
397 *maxSymbolValuePtr = maxSymbolValue;
398 return (size_t)max;
399 }
400
401 /* fast variant (unsafe : won't check if src contains values beyond count[] limit) */
402 size_t FSE_countFast(unsigned* count, unsigned* maxSymbolValuePtr,
403 const void* source, size_t sourceSize)
404 {
405 if (sourceSize < 1500) return FSE_count_simple(count, maxSymbolValuePtr, source, sourceSize);
406 return FSE_count_parallel(count, maxSymbolValuePtr, source, sourceSize, 0);
407 }
408
409 size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr,
410 const void* source, size_t sourceSize)
411 {
412 if (*maxSymbolValuePtr <255)
413 return FSE_count_parallel(count, maxSymbolValuePtr, source, sourceSize, 1);
414 *maxSymbolValuePtr = 255;
415 return FSE_countFast(count, maxSymbolValuePtr, source, sourceSize);
416 }
417
418
419
420 /*-**************************************************************
421 * FSE Compression Code
422 ****************************************************************/
423 /*! FSE_sizeof_CTable() :
424 FSE_CTable is a variable size structure which contains :
425 `U16 tableLog;`
426 `U16 maxSymbolValue;`
427 `U16 nextStateNumber[1 << tableLog];` // This size is variable
428 `FSE_symbolCompressionTransform symbolTT[maxSymbolValue+1];` // This size is variable
429 Allocation is manual (C standard does not support variable-size structures).
430 */
431
432 size_t FSE_sizeof_CTable (unsigned maxSymbolValue, unsigned tableLog)
433 {
434 size_t size;
435 FSE_STATIC_ASSERT((size_t)FSE_CTABLE_SIZE_U32(FSE_MAX_TABLELOG, FSE_MAX_SYMBOL_VALUE)*4 >= sizeof(CTable_max_t)); /* A compilation error here means FSE_CTABLE_SIZE_U32 is not large enough */
436 if (tableLog > FSE_MAX_TABLELOG) return ERROR(GENERIC);
437 size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
438 return size;
439 }
440
441 FSE_CTable* FSE_createCTable (unsigned maxSymbolValue, unsigned tableLog)
442 {
443 size_t size;
444 if (tableLog > FSE_TABLELOG_ABSOLUTE_MAX) tableLog = FSE_TABLELOG_ABSOLUTE_MAX;
445 size = FSE_CTABLE_SIZE_U32 (tableLog, maxSymbolValue) * sizeof(U32);
446 return (FSE_CTable*)malloc(size);
447 }
448
449 void FSE_freeCTable (FSE_CTable* ct) { free(ct); }
450
451 /* provides the minimum logSize to safely represent a distribution */
452 static unsigned FSE_minTableLog(size_t srcSize, unsigned maxSymbolValue)
453 {
454 U32 minBitsSrc = BIT_highbit32((U32)(srcSize - 1)) + 1;
455 U32 minBitsSymbols = BIT_highbit32(maxSymbolValue) + 2;
456 U32 minBits = minBitsSrc < minBitsSymbols ? minBitsSrc : minBitsSymbols;
457 return minBits;
458 }
459
460 unsigned FSE_optimalTableLog_internal(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue, unsigned minus)
461 {
462 U32 maxBitsSrc = BIT_highbit32((U32)(srcSize - 1)) - minus;
463 U32 tableLog = maxTableLog;
464 U32 minBits = FSE_minTableLog(srcSize, maxSymbolValue);
465 if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
466 if (maxBitsSrc < tableLog) tableLog = maxBitsSrc; /* Accuracy can be reduced */
467 if (minBits > tableLog) tableLog = minBits; /* Need a minimum to safely represent all symbol values */
468 if (tableLog < FSE_MIN_TABLELOG) tableLog = FSE_MIN_TABLELOG;
469 if (tableLog > FSE_MAX_TABLELOG) tableLog = FSE_MAX_TABLELOG;
470 return tableLog;
471 }
472
473 unsigned FSE_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
474 {
475 return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 2);
476 }
477
478
479 /* Secondary normalization method.
480 To be used when primary method fails. */
481
482 static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
483 {
484 U32 s;
485 U32 distributed = 0;
486 U32 ToDistribute;
487
488 /* Init */
489 U32 lowThreshold = (U32)(total >> tableLog);
490 U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
491
492 for (s=0; s<=maxSymbolValue; s++) {
493 if (count[s] == 0) {
494 norm[s]=0;
495 continue;
496 }
497 if (count[s] <= lowThreshold) {
498 norm[s] = -1;
499 distributed++;
500 total -= count[s];
501 continue;
502 }
503 if (count[s] <= lowOne) {
504 norm[s] = 1;
505 distributed++;
506 total -= count[s];
507 continue;
508 }
509 norm[s]=-2;
510 }
511 ToDistribute = (1 << tableLog) - distributed;
512
513 if ((total / ToDistribute) > lowOne) {
514 /* risk of rounding to zero */
515 lowOne = (U32)((total * 3) / (ToDistribute * 2));
516 for (s=0; s<=maxSymbolValue; s++) {
517 if ((norm[s] == -2) && (count[s] <= lowOne)) {
518 norm[s] = 1;
519 distributed++;
520 total -= count[s];
521 continue;
522 } }
523 ToDistribute = (1 << tableLog) - distributed;
524 }
525
526 if (distributed == maxSymbolValue+1) {
527 /* all values are pretty poor;
528 probably incompressible data (should have already been detected);
529 find max, then give all remaining points to max */
530 U32 maxV = 0, maxC = 0;
531 for (s=0; s<=maxSymbolValue; s++)
532 if (count[s] > maxC) maxV=s, maxC=count[s];
533 norm[maxV] += (short)ToDistribute;
534 return 0;
535 }
536
537 {
538 U64 const vStepLog = 62 - tableLog;
539 U64 const mid = (1ULL << (vStepLog-1)) - 1;
540 U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */
541 U64 tmpTotal = mid;
542 for (s=0; s<=maxSymbolValue; s++) {
543 if (norm[s]==-2) {
544 U64 end = tmpTotal + (count[s] * rStep);
545 U32 sStart = (U32)(tmpTotal >> vStepLog);
546 U32 sEnd = (U32)(end >> vStepLog);
547 U32 weight = sEnd - sStart;
548 if (weight < 1)
549 return ERROR(GENERIC);
550 norm[s] = (short)weight;
551 tmpTotal = end;
552 } } }
553
554 return 0;
555 }
556
557
558 size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
559 const unsigned* count, size_t total,
560 unsigned maxSymbolValue)
561 {
562 /* Sanity checks */
563 if (tableLog==0) tableLog = FSE_DEFAULT_TABLELOG;
564 if (tableLog < FSE_MIN_TABLELOG) return ERROR(GENERIC); /* Unsupported size */
565 if (tableLog > FSE_MAX_TABLELOG) return ERROR(tableLog_tooLarge); /* Unsupported size */
566 if (tableLog < FSE_minTableLog(total, maxSymbolValue)) return ERROR(GENERIC); /* Too small tableLog, compression potentially impossible */
567
568 { U32 const rtbTable[] = { 0, 473195, 504333, 520860, 550000, 700000, 750000, 830000 };
569
570 U64 const scale = 62 - tableLog;
571 U64 const step = ((U64)1<<62) / total; /* <== here, one division ! */
572 U64 const vStep = 1ULL<<(scale-20);
573 int stillToDistribute = 1<<tableLog;
574 unsigned s;
575 unsigned largest=0;
576 short largestP=0;
577 U32 lowThreshold = (U32)(total >> tableLog);
578
579 for (s=0; s<=maxSymbolValue; s++) {
580 if (count[s] == total) return 0; /* rle special case */
581 if (count[s] == 0) { normalizedCounter[s]=0; continue; }
582 if (count[s] <= lowThreshold) {
583 normalizedCounter[s] = -1;
584 stillToDistribute--;
585 } else {
586 short proba = (short)((count[s]*step) >> scale);
587 if (proba<8) {
588 U64 restToBeat = vStep * rtbTable[proba];
589 proba += (count[s]*step) - ((U64)proba<<scale) > restToBeat;
590 }
591 if (proba > largestP) largestP=proba, largest=s;
592 normalizedCounter[s] = proba;
593 stillToDistribute -= proba;
594 } }
595 if (-stillToDistribute >= (normalizedCounter[largest] >> 1)) {
596 /* corner case, need another normalization method */
597 size_t errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
598 if (FSE_isError(errorCode)) return errorCode;
599 }
600 else normalizedCounter[largest] += (short)stillToDistribute;
601 }
602
603 #if 0
604 { /* Print Table (debug) */
605 U32 s;
606 U32 nTotal = 0;
607 for (s=0; s<=maxSymbolValue; s++)
608 printf("%3i: %4i \n", s, normalizedCounter[s]);
609 for (s=0; s<=maxSymbolValue; s++)
610 nTotal += abs(normalizedCounter[s]);
611 if (nTotal != (1U<<tableLog))
612 printf("Warning !!! Total == %u != %u !!!", nTotal, 1U<<tableLog);
613 getchar();
614 }
615 #endif
616
617 return tableLog;
618 }
619
620
621 /* fake FSE_CTable, for raw (uncompressed) input */
622 size_t FSE_buildCTable_raw (FSE_CTable* ct, unsigned nbBits)
623 {
624 const unsigned tableSize = 1 << nbBits;
625 const unsigned tableMask = tableSize - 1;
626 const unsigned maxSymbolValue = tableMask;
627 void* const ptr = ct;
628 U16* const tableU16 = ( (U16*) ptr) + 2;
629 void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableSize>>1); /* assumption : tableLog >= 1 */
630 FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
631 unsigned s;
632
633 /* Sanity checks */
634 if (nbBits < 1) return ERROR(GENERIC); /* min size */
635
636 /* header */
637 tableU16[-2] = (U16) nbBits;
638 tableU16[-1] = (U16) maxSymbolValue;
639
640 /* Build table */
641 for (s=0; s<tableSize; s++)
642 tableU16[s] = (U16)(tableSize + s);
643
644 /* Build Symbol Transformation Table */
645 { const U32 deltaNbBits = (nbBits << 16) - (1 << nbBits);
646
647 for (s=0; s<=maxSymbolValue; s++) {
648 symbolTT[s].deltaNbBits = deltaNbBits;
649 symbolTT[s].deltaFindState = s-1;
650 } }
651
652
653 return 0;
654 }
655
656 /* fake FSE_CTable, for rle (100% always same symbol) input */
657 size_t FSE_buildCTable_rle (FSE_CTable* ct, BYTE symbolValue)
658 {
659 void* ptr = ct;
660 U16* tableU16 = ( (U16*) ptr) + 2;
661 void* FSCTptr = (U32*)ptr + 2;
662 FSE_symbolCompressionTransform* symbolTT = (FSE_symbolCompressionTransform*) FSCTptr;
663
664 /* header */
665 tableU16[-2] = (U16) 0;
666 tableU16[-1] = (U16) symbolValue;
667
668 /* Build table */
669 tableU16[0] = 0;
670 tableU16[1] = 0; /* just in case */
671
672 /* Build Symbol Transformation Table */
673 symbolTT[symbolValue].deltaNbBits = 0;
674 symbolTT[symbolValue].deltaFindState = 0;
675
676 return 0;
677 }
678
679
680 static size_t FSE_compress_usingCTable_generic (void* dst, size_t dstSize,
681 const void* src, size_t srcSize,
682 const FSE_CTable* ct, const unsigned fast)
683 {
684 const BYTE* const istart = (const BYTE*) src;
685 const BYTE* const iend = istart + srcSize;
686 const BYTE* ip=iend;
687
688
689 BIT_CStream_t bitC;
690 FSE_CState_t CState1, CState2;
691
692 /* init */
693 if (srcSize <= 2) return 0;
694 { size_t const errorCode = BIT_initCStream(&bitC, dst, dstSize);
695 if (FSE_isError(errorCode)) return 0; }
696
697 #define FSE_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
698
699 if (srcSize & 1) {
700 FSE_initCState2(&CState1, ct, *--ip);
701 FSE_initCState2(&CState2, ct, *--ip);
702 FSE_encodeSymbol(&bitC, &CState1, *--ip);
703 FSE_FLUSHBITS(&bitC);
704 } else {
705 FSE_initCState2(&CState2, ct, *--ip);
706 FSE_initCState2(&CState1, ct, *--ip);
707 }
708
709 /* join to mod 4 */
710 srcSize -= 2;
711 if ((sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) && (srcSize & 2)) { /* test bit 2 */
712 FSE_encodeSymbol(&bitC, &CState2, *--ip);
713 FSE_encodeSymbol(&bitC, &CState1, *--ip);
714 FSE_FLUSHBITS(&bitC);
715 }
716
717 /* 2 or 4 encoding per loop */
718 for ( ; ip>istart ; ) {
719
720 FSE_encodeSymbol(&bitC, &CState2, *--ip);
721
722 if (sizeof(bitC.bitContainer)*8 < FSE_MAX_TABLELOG*2+7 ) /* this test must be static */
723 FSE_FLUSHBITS(&bitC);
724
725 FSE_encodeSymbol(&bitC, &CState1, *--ip);
726
727 if (sizeof(bitC.bitContainer)*8 > FSE_MAX_TABLELOG*4+7 ) { /* this test must be static */
728 FSE_encodeSymbol(&bitC, &CState2, *--ip);
729 FSE_encodeSymbol(&bitC, &CState1, *--ip);
730 }
731
732 FSE_FLUSHBITS(&bitC);
733 }
734
735 FSE_flushCState(&bitC, &CState2);
736 FSE_flushCState(&bitC, &CState1);
737 return BIT_closeCStream(&bitC);
738 }
739
740 size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
741 const void* src, size_t srcSize,
742 const FSE_CTable* ct)
743 {
744 const unsigned fast = (dstSize >= FSE_BLOCKBOUND(srcSize));
745
746 if (fast)
747 return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 1);
748 else
749 return FSE_compress_usingCTable_generic(dst, dstSize, src, srcSize, ct, 0);
750 }
751
752
753 size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
754
755 size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog)
756 {
757 const BYTE* const istart = (const BYTE*) src;
758 const BYTE* ip = istart;
759
760 BYTE* const ostart = (BYTE*) dst;
761 BYTE* op = ostart;
762 BYTE* const oend = ostart + dstSize;
763
764 U32 count[FSE_MAX_SYMBOL_VALUE+1];
765 S16 norm[FSE_MAX_SYMBOL_VALUE+1];
766 CTable_max_t ct;
767 size_t errorCode;
768
769 /* init conditions */
770 if (srcSize <= 1) return 0; /* Uncompressible */
771 if (!maxSymbolValue) maxSymbolValue = FSE_MAX_SYMBOL_VALUE;
772 if (!tableLog) tableLog = FSE_DEFAULT_TABLELOG;
773
774 /* Scan input and build symbol stats */
775 errorCode = FSE_count (count, &maxSymbolValue, ip, srcSize);
776 if (FSE_isError(errorCode)) return errorCode;
777 if (errorCode == srcSize) return 1;
778 if (errorCode == 1) return 0; /* each symbol only present once */
779 if (errorCode < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
780
781 tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
782 errorCode = FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
783 if (FSE_isError(errorCode)) return errorCode;
784
785 /* Write table description header */
786 errorCode = FSE_writeNCount (op, oend-op, norm, maxSymbolValue, tableLog);
787 if (FSE_isError(errorCode)) return errorCode;
788 op += errorCode;
789
790 /* Compress */
791 errorCode = FSE_buildCTable (ct, norm, maxSymbolValue, tableLog);
792 if (FSE_isError(errorCode)) return errorCode;
793 errorCode = FSE_compress_usingCTable(op, oend - op, ip, srcSize, ct);
794 if (errorCode == 0) return 0; /* not enough space for compressed data */
795 op += errorCode;
796
797 /* check compressibility */
798 if ( (size_t)(op-ostart) >= srcSize-1 )
799 return 0;
800
801 return op-ostart;
802 }
803
804 size_t FSE_compress (void* dst, size_t dstSize, const void* src, size_t srcSize)
805 {
806 return FSE_compress2(dst, dstSize, src, (U32)srcSize, FSE_MAX_SYMBOL_VALUE, FSE_DEFAULT_TABLELOG);
807 }
808
809
810 #endif /* FSE_COMMONDEFS_ONLY */
This diff has been collapsed as it changes many lines, (533 lines changed) Show them Hide them
@@ -0,0 +1,533 b''
1 /* ******************************************************************
2 Huffman encoder, part of New Generation Entropy library
3 Copyright (C) 2013-2016, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above
14 copyright notice, this list of conditions and the following disclaimer
15 in the documentation and/or other materials provided with the
16 distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 You can contact the author at :
31 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c
33 ****************************************************************** */
34
35 /* **************************************************************
36 * Compiler specifics
37 ****************************************************************/
38 #ifdef _MSC_VER /* Visual Studio */
39 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
40 #endif
41
42
43 /* **************************************************************
44 * Includes
45 ****************************************************************/
46 #include <string.h> /* memcpy, memset */
47 #include <stdio.h> /* printf (debug) */
48 #include "bitstream.h"
49 #define FSE_STATIC_LINKING_ONLY /* FSE_optimalTableLog_internal */
50 #include "fse.h" /* header compression */
51 #define HUF_STATIC_LINKING_ONLY
52 #include "huf.h"
53
54
55 /* **************************************************************
56 * Error Management
57 ****************************************************************/
58 #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
59
60
61 /* **************************************************************
62 * Utils
63 ****************************************************************/
64 unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxSymbolValue)
65 {
66 return FSE_optimalTableLog_internal(maxTableLog, srcSize, maxSymbolValue, 1);
67 }
68
69
70 /* *******************************************************
71 * HUF : Huffman block compression
72 *********************************************************/
73 struct HUF_CElt_s {
74 U16 val;
75 BYTE nbBits;
76 }; /* typedef'd to HUF_CElt within "huf.h" */
77
78 typedef struct nodeElt_s {
79 U32 count;
80 U16 parent;
81 BYTE byte;
82 BYTE nbBits;
83 } nodeElt;
84
85 /*! HUF_writeCTable() :
86 `CTable` : huffman tree to save, using huf representation.
87 @return : size of saved CTable */
88 size_t HUF_writeCTable (void* dst, size_t maxDstSize,
89 const HUF_CElt* CTable, U32 maxSymbolValue, U32 huffLog)
90 {
91 BYTE bitsToWeight[HUF_TABLELOG_MAX + 1];
92 BYTE huffWeight[HUF_SYMBOLVALUE_MAX];
93 BYTE* op = (BYTE*)dst;
94 U32 n;
95
96 /* check conditions */
97 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC);
98
99 /* convert to weight */
100 bitsToWeight[0] = 0;
101 for (n=1; n<huffLog+1; n++)
102 bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
103 for (n=0; n<maxSymbolValue; n++)
104 huffWeight[n] = bitsToWeight[CTable[n].nbBits];
105
106 { size_t const size = FSE_compress(op+1, maxDstSize-1, huffWeight, maxSymbolValue);
107 if (FSE_isError(size)) return size;
108 if ((size>1) & (size < maxSymbolValue/2)) { /* FSE compressed */
109 op[0] = (BYTE)size;
110 return size+1;
111 }
112 }
113
114 /* raw values */
115 if (maxSymbolValue > (256-128)) return ERROR(GENERIC); /* should not happen */
116 if (((maxSymbolValue+1)/2) + 1 > maxDstSize) return ERROR(dstSize_tooSmall); /* not enough space within dst buffer */
117 op[0] = (BYTE)(128 /*special case*/ + (maxSymbolValue-1));
118 huffWeight[maxSymbolValue] = 0; /* to be sure it doesn't cause issue in final combination */
119 for (n=0; n<maxSymbolValue; n+=2)
120 op[(n/2)+1] = (BYTE)((huffWeight[n] << 4) + huffWeight[n+1]);
121 return ((maxSymbolValue+1)/2) + 1;
122
123 }
124
125
126 size_t HUF_readCTable (HUF_CElt* CTable, U32 maxSymbolValue, const void* src, size_t srcSize)
127 {
128 BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
129 U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
130 U32 tableLog = 0;
131 size_t readSize;
132 U32 nbSymbols = 0;
133 /*memset(huffWeight, 0, sizeof(huffWeight));*/ /* is not necessary, even though some analyzer complain ... */
134
135 /* get symbol weights */
136 readSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize);
137 if (HUF_isError(readSize)) return readSize;
138
139 /* check result */
140 if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
141 if (nbSymbols > maxSymbolValue+1) return ERROR(maxSymbolValue_tooSmall);
142
143 /* Prepare base value per rank */
144 { U32 n, nextRankStart = 0;
145 for (n=1; n<=tableLog; n++) {
146 U32 current = nextRankStart;
147 nextRankStart += (rankVal[n] << (n-1));
148 rankVal[n] = current;
149 } }
150
151 /* fill nbBits */
152 { U32 n; for (n=0; n<nbSymbols; n++) {
153 const U32 w = huffWeight[n];
154 CTable[n].nbBits = (BYTE)(tableLog + 1 - w);
155 } }
156
157 /* fill val */
158 { U16 nbPerRank[HUF_TABLELOG_MAX+2] = {0}; /* support w=0=>n=tableLog+1 */
159 U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
160 { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
161 /* determine stating value per rank */
162 valPerRank[tableLog+1] = 0; /* for w==0 */
163 { U16 min = 0;
164 U32 n; for (n=tableLog; n>0; n--) { /* start at n=tablelog <-> w=1 */
165 valPerRank[n] = min; /* get starting value within each rank */
166 min += nbPerRank[n];
167 min >>= 1;
168 } }
169 /* assign value within rank, symbol order */
170 { U32 n; for (n=0; n<=maxSymbolValue; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
171 }
172
173 return readSize;
174 }
175
176
177 static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
178 {
179 const U32 largestBits = huffNode[lastNonNull].nbBits;
180 if (largestBits <= maxNbBits) return largestBits; /* early exit : no elt > maxNbBits */
181
182 /* there are several too large elements (at least >= 2) */
183 { int totalCost = 0;
184 const U32 baseCost = 1 << (largestBits - maxNbBits);
185 U32 n = lastNonNull;
186
187 while (huffNode[n].nbBits > maxNbBits) {
188 totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits));
189 huffNode[n].nbBits = (BYTE)maxNbBits;
190 n --;
191 } /* n stops at huffNode[n].nbBits <= maxNbBits */
192 while (huffNode[n].nbBits == maxNbBits) n--; /* n end at index of smallest symbol using < maxNbBits */
193
194 /* renorm totalCost */
195 totalCost >>= (largestBits - maxNbBits); /* note : totalCost is necessarily a multiple of baseCost */
196
197 /* repay normalized cost */
198 { U32 const noSymbol = 0xF0F0F0F0;
199 U32 rankLast[HUF_TABLELOG_MAX+2];
200 int pos;
201
202 /* Get pos of last (smallest) symbol per rank */
203 memset(rankLast, 0xF0, sizeof(rankLast));
204 { U32 currentNbBits = maxNbBits;
205 for (pos=n ; pos >= 0; pos--) {
206 if (huffNode[pos].nbBits >= currentNbBits) continue;
207 currentNbBits = huffNode[pos].nbBits; /* < maxNbBits */
208 rankLast[maxNbBits-currentNbBits] = pos;
209 } }
210
211 while (totalCost > 0) {
212 U32 nBitsToDecrease = BIT_highbit32(totalCost) + 1;
213 for ( ; nBitsToDecrease > 1; nBitsToDecrease--) {
214 U32 highPos = rankLast[nBitsToDecrease];
215 U32 lowPos = rankLast[nBitsToDecrease-1];
216 if (highPos == noSymbol) continue;
217 if (lowPos == noSymbol) break;
218 { U32 const highTotal = huffNode[highPos].count;
219 U32 const lowTotal = 2 * huffNode[lowPos].count;
220 if (highTotal <= lowTotal) break;
221 } }
222 /* only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) */
223 while ((nBitsToDecrease<=HUF_TABLELOG_MAX) && (rankLast[nBitsToDecrease] == noSymbol)) /* HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary */
224 nBitsToDecrease ++;
225 totalCost -= 1 << (nBitsToDecrease-1);
226 if (rankLast[nBitsToDecrease-1] == noSymbol)
227 rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease]; /* this rank is no longer empty */
228 huffNode[rankLast[nBitsToDecrease]].nbBits ++;
229 if (rankLast[nBitsToDecrease] == 0) /* special case, reached largest symbol */
230 rankLast[nBitsToDecrease] = noSymbol;
231 else {
232 rankLast[nBitsToDecrease]--;
233 if (huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease)
234 rankLast[nBitsToDecrease] = noSymbol; /* this rank is now empty */
235 } } /* while (totalCost > 0) */
236
237 while (totalCost < 0) { /* Sometimes, cost correction overshoot */
238 if (rankLast[1] == noSymbol) { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */
239 while (huffNode[n].nbBits == maxNbBits) n--;
240 huffNode[n+1].nbBits--;
241 rankLast[1] = n+1;
242 totalCost++;
243 continue;
244 }
245 huffNode[ rankLast[1] + 1 ].nbBits--;
246 rankLast[1]++;
247 totalCost ++;
248 } } } /* there are several too large elements (at least >= 2) */
249
250 return maxNbBits;
251 }
252
253
254 typedef struct {
255 U32 base;
256 U32 current;
257 } rankPos;
258
259 static void HUF_sort(nodeElt* huffNode, const U32* count, U32 maxSymbolValue)
260 {
261 rankPos rank[32];
262 U32 n;
263
264 memset(rank, 0, sizeof(rank));
265 for (n=0; n<=maxSymbolValue; n++) {
266 U32 r = BIT_highbit32(count[n] + 1);
267 rank[r].base ++;
268 }
269 for (n=30; n>0; n--) rank[n-1].base += rank[n].base;
270 for (n=0; n<32; n++) rank[n].current = rank[n].base;
271 for (n=0; n<=maxSymbolValue; n++) {
272 U32 const c = count[n];
273 U32 const r = BIT_highbit32(c+1) + 1;
274 U32 pos = rank[r].current++;
275 while ((pos > rank[r].base) && (c > huffNode[pos-1].count)) huffNode[pos]=huffNode[pos-1], pos--;
276 huffNode[pos].count = c;
277 huffNode[pos].byte = (BYTE)n;
278 }
279 }
280
281
282 #define STARTNODE (HUF_SYMBOLVALUE_MAX+1)
283 size_t HUF_buildCTable (HUF_CElt* tree, const U32* count, U32 maxSymbolValue, U32 maxNbBits)
284 {
285 nodeElt huffNode0[2*HUF_SYMBOLVALUE_MAX+1 +1];
286 nodeElt* huffNode = huffNode0 + 1;
287 U32 n, nonNullRank;
288 int lowS, lowN;
289 U16 nodeNb = STARTNODE;
290 U32 nodeRoot;
291
292 /* safety checks */
293 if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
294 if (maxSymbolValue > HUF_SYMBOLVALUE_MAX) return ERROR(GENERIC);
295 memset(huffNode0, 0, sizeof(huffNode0));
296
297 /* sort, decreasing order */
298 HUF_sort(huffNode, count, maxSymbolValue);
299
300 /* init for parents */
301 nonNullRank = maxSymbolValue;
302 while(huffNode[nonNullRank].count == 0) nonNullRank--;
303 lowS = nonNullRank; nodeRoot = nodeNb + lowS - 1; lowN = nodeNb;
304 huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count;
305 huffNode[lowS].parent = huffNode[lowS-1].parent = nodeNb;
306 nodeNb++; lowS-=2;
307 for (n=nodeNb; n<=nodeRoot; n++) huffNode[n].count = (U32)(1U<<30);
308 huffNode0[0].count = (U32)(1U<<31);
309
310 /* create parents */
311 while (nodeNb <= nodeRoot) {
312 U32 n1 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
313 U32 n2 = (huffNode[lowS].count < huffNode[lowN].count) ? lowS-- : lowN++;
314 huffNode[nodeNb].count = huffNode[n1].count + huffNode[n2].count;
315 huffNode[n1].parent = huffNode[n2].parent = nodeNb;
316 nodeNb++;
317 }
318
319 /* distribute weights (unlimited tree height) */
320 huffNode[nodeRoot].nbBits = 0;
321 for (n=nodeRoot-1; n>=STARTNODE; n--)
322 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
323 for (n=0; n<=nonNullRank; n++)
324 huffNode[n].nbBits = huffNode[ huffNode[n].parent ].nbBits + 1;
325
326 /* enforce maxTableLog */
327 maxNbBits = HUF_setMaxHeight(huffNode, nonNullRank, maxNbBits);
328
329 /* fill result into tree (val, nbBits) */
330 { U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
331 U16 valPerRank[HUF_TABLELOG_MAX+1] = {0};
332 if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC); /* check fit into table */
333 for (n=0; n<=nonNullRank; n++)
334 nbPerRank[huffNode[n].nbBits]++;
335 /* determine stating value per rank */
336 { U16 min = 0;
337 for (n=maxNbBits; n>0; n--) {
338 valPerRank[n] = min; /* get starting value within each rank */
339 min += nbPerRank[n];
340 min >>= 1;
341 } }
342 for (n=0; n<=maxSymbolValue; n++)
343 tree[huffNode[n].byte].nbBits = huffNode[n].nbBits; /* push nbBits per symbol, symbol order */
344 for (n=0; n<=maxSymbolValue; n++)
345 tree[n].val = valPerRank[tree[n].nbBits]++; /* assign value within rank, symbol order */
346 }
347
348 return maxNbBits;
349 }
350
351 static void HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
352 {
353 BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
354 }
355
356 size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
357
358 #define HUF_FLUSHBITS(s) (fast ? BIT_flushBitsFast(s) : BIT_flushBits(s))
359
360 #define HUF_FLUSHBITS_1(stream) \
361 if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
362
363 #define HUF_FLUSHBITS_2(stream) \
364 if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
365
366 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
367 {
368 const BYTE* ip = (const BYTE*) src;
369 BYTE* const ostart = (BYTE*)dst;
370 BYTE* const oend = ostart + dstSize;
371 BYTE* op = ostart;
372 size_t n;
373 const unsigned fast = (dstSize >= HUF_BLOCKBOUND(srcSize));
374 BIT_CStream_t bitC;
375
376 /* init */
377 if (dstSize < 8) return 0; /* not enough space to compress */
378 { size_t const errorCode = BIT_initCStream(&bitC, op, oend-op);
379 if (HUF_isError(errorCode)) return 0; }
380
381 n = srcSize & ~3; /* join to mod 4 */
382 switch (srcSize & 3)
383 {
384 case 3 : HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
385 HUF_FLUSHBITS_2(&bitC);
386 case 2 : HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
387 HUF_FLUSHBITS_1(&bitC);
388 case 1 : HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
389 HUF_FLUSHBITS(&bitC);
390 case 0 :
391 default: ;
392 }
393
394 for (; n>0; n-=4) { /* note : n&3==0 at this stage */
395 HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
396 HUF_FLUSHBITS_1(&bitC);
397 HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
398 HUF_FLUSHBITS_2(&bitC);
399 HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
400 HUF_FLUSHBITS_1(&bitC);
401 HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
402 HUF_FLUSHBITS(&bitC);
403 }
404
405 return BIT_closeCStream(&bitC);
406 }
407
408
409 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
410 {
411 size_t const segmentSize = (srcSize+3)/4; /* first 3 segments */
412 const BYTE* ip = (const BYTE*) src;
413 const BYTE* const iend = ip + srcSize;
414 BYTE* const ostart = (BYTE*) dst;
415 BYTE* const oend = ostart + dstSize;
416 BYTE* op = ostart;
417
418 if (dstSize < 6 + 1 + 1 + 1 + 8) return 0; /* minimum space to compress successfully */
419 if (srcSize < 12) return 0; /* no saving possible : too small input */
420 op += 6; /* jumpTable */
421
422 { size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable);
423 if (HUF_isError(cSize)) return cSize;
424 if (cSize==0) return 0;
425 MEM_writeLE16(ostart, (U16)cSize);
426 op += cSize;
427 }
428
429 ip += segmentSize;
430 { size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable);
431 if (HUF_isError(cSize)) return cSize;
432 if (cSize==0) return 0;
433 MEM_writeLE16(ostart+2, (U16)cSize);
434 op += cSize;
435 }
436
437 ip += segmentSize;
438 { size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, segmentSize, CTable);
439 if (HUF_isError(cSize)) return cSize;
440 if (cSize==0) return 0;
441 MEM_writeLE16(ostart+4, (U16)cSize);
442 op += cSize;
443 }
444
445 ip += segmentSize;
446 { size_t const cSize = HUF_compress1X_usingCTable(op, oend-op, ip, iend-ip, CTable);
447 if (HUF_isError(cSize)) return cSize;
448 if (cSize==0) return 0;
449 op += cSize;
450 }
451
452 return op-ostart;
453 }
454
455
456 static size_t HUF_compress_internal (
457 void* dst, size_t dstSize,
458 const void* src, size_t srcSize,
459 unsigned maxSymbolValue, unsigned huffLog,
460 unsigned singleStream)
461 {
462 BYTE* const ostart = (BYTE*)dst;
463 BYTE* const oend = ostart + dstSize;
464 BYTE* op = ostart;
465
466 U32 count[HUF_SYMBOLVALUE_MAX+1];
467 HUF_CElt CTable[HUF_SYMBOLVALUE_MAX+1];
468
469 /* checks & inits */
470 if (!srcSize) return 0; /* Uncompressed (note : 1 means rle, so first byte must be correct) */
471 if (!dstSize) return 0; /* cannot fit within dst budget */
472 if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong); /* current block size limit */
473 if (huffLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
474 if (!maxSymbolValue) maxSymbolValue = HUF_SYMBOLVALUE_MAX;
475 if (!huffLog) huffLog = HUF_TABLELOG_DEFAULT;
476
477 /* Scan input and build symbol stats */
478 { size_t const largest = FSE_count (count, &maxSymbolValue, (const BYTE*)src, srcSize);
479 if (HUF_isError(largest)) return largest;
480 if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; } /* single symbol, rle */
481 if (largest <= (srcSize >> 7)+1) return 0; /* Fast heuristic : not compressible enough */
482 }
483
484 /* Build Huffman Tree */
485 huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
486 { size_t const maxBits = HUF_buildCTable (CTable, count, maxSymbolValue, huffLog);
487 if (HUF_isError(maxBits)) return maxBits;
488 huffLog = (U32)maxBits;
489 }
490
491 /* Write table description header */
492 { size_t const hSize = HUF_writeCTable (op, dstSize, CTable, maxSymbolValue, huffLog);
493 if (HUF_isError(hSize)) return hSize;
494 if (hSize + 12 >= srcSize) return 0; /* not useful to try compression */
495 op += hSize;
496 }
497
498 /* Compress */
499 { size_t const cSize = (singleStream) ?
500 HUF_compress1X_usingCTable(op, oend - op, src, srcSize, CTable) : /* single segment */
501 HUF_compress4X_usingCTable(op, oend - op, src, srcSize, CTable);
502 if (HUF_isError(cSize)) return cSize;
503 if (cSize==0) return 0; /* uncompressible */
504 op += cSize;
505 }
506
507 /* check compressibility */
508 if ((size_t)(op-ostart) >= srcSize-1)
509 return 0;
510
511 return op-ostart;
512 }
513
514
515 size_t HUF_compress1X (void* dst, size_t dstSize,
516 const void* src, size_t srcSize,
517 unsigned maxSymbolValue, unsigned huffLog)
518 {
519 return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 1);
520 }
521
522 size_t HUF_compress2 (void* dst, size_t dstSize,
523 const void* src, size_t srcSize,
524 unsigned maxSymbolValue, unsigned huffLog)
525 {
526 return HUF_compress_internal(dst, dstSize, src, srcSize, maxSymbolValue, huffLog, 0);
527 }
528
529
530 size_t HUF_compress (void* dst, size_t maxDstSize, const void* src, size_t srcSize)
531 {
532 return HUF_compress2(dst, maxDstSize, src, (U32)srcSize, 255, HUF_TABLELOG_DEFAULT);
533 }
@@ -0,0 +1,319 b''
1 /**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9
10
11
12 /* *************************************
13 * Dependencies
14 ***************************************/
15 #include <stdlib.h>
16 #include "error_private.h"
17 #include "zstd_internal.h" /* MIN, ZSTD_BLOCKHEADERSIZE, defaultCustomMem */
18 #define ZBUFF_STATIC_LINKING_ONLY
19 #include "zbuff.h"
20
21
22 /* *************************************
23 * Constants
24 ***************************************/
25 static size_t const ZBUFF_endFrameSize = ZSTD_BLOCKHEADERSIZE;
26
27
28 /*-***********************************************************
29 * Streaming compression
30 *
31 * A ZBUFF_CCtx object is required to track streaming operation.
32 * Use ZBUFF_createCCtx() and ZBUFF_freeCCtx() to create/release resources.
33 * Use ZBUFF_compressInit() to start a new compression operation.
34 * ZBUFF_CCtx objects can be reused multiple times.
35 *
36 * Use ZBUFF_compressContinue() repetitively to consume your input.
37 * *srcSizePtr and *dstCapacityPtr can be any size.
38 * The function will report how many bytes were read or written by modifying *srcSizePtr and *dstCapacityPtr.
39 * Note that it may not consume the entire input, in which case it's up to the caller to call again the function with remaining input.
40 * The content of dst will be overwritten (up to *dstCapacityPtr) at each function call, so save its content if it matters or change dst .
41 * @return : a hint to preferred nb of bytes to use as input for next function call (it's only a hint, to improve latency)
42 * or an error code, which can be tested using ZBUFF_isError().
43 *
44 * ZBUFF_compressFlush() can be used to instruct ZBUFF to compress and output whatever remains within its buffer.
45 * Note that it will not output more than *dstCapacityPtr.
46 * Therefore, some content might still be left into its internal buffer if dst buffer is too small.
47 * @return : nb of bytes still present into internal buffer (0 if it's empty)
48 * or an error code, which can be tested using ZBUFF_isError().
49 *
50 * ZBUFF_compressEnd() instructs to finish a frame.
51 * It will perform a flush and write frame epilogue.
52 * Similar to ZBUFF_compressFlush(), it may not be able to output the entire internal buffer content if *dstCapacityPtr is too small.
53 * @return : nb of bytes still present into internal buffer (0 if it's empty)
54 * or an error code, which can be tested using ZBUFF_isError().
55 *
56 * Hint : recommended buffer sizes (not compulsory)
57 * input : ZSTD_BLOCKSIZE_MAX (128 KB), internal unit size, it improves latency to use this value.
58 * output : ZSTD_compressBound(ZSTD_BLOCKSIZE_MAX) + ZSTD_blockHeaderSize + ZBUFF_endFrameSize : ensures it's always possible to write/flush/end a full block at best speed.
59 * ***********************************************************/
60
61 typedef enum { ZBUFFcs_init, ZBUFFcs_load, ZBUFFcs_flush, ZBUFFcs_final } ZBUFF_cStage;
62
63 /* *** Resources *** */
64 struct ZBUFF_CCtx_s {
65 ZSTD_CCtx* zc;
66 char* inBuff;
67 size_t inBuffSize;
68 size_t inToCompress;
69 size_t inBuffPos;
70 size_t inBuffTarget;
71 size_t blockSize;
72 char* outBuff;
73 size_t outBuffSize;
74 size_t outBuffContentSize;
75 size_t outBuffFlushedSize;
76 ZBUFF_cStage stage;
77 U32 checksum;
78 U32 frameEnded;
79 ZSTD_customMem customMem;
80 }; /* typedef'd tp ZBUFF_CCtx within "zbuff.h" */
81
82 ZBUFF_CCtx* ZBUFF_createCCtx(void)
83 {
84 return ZBUFF_createCCtx_advanced(defaultCustomMem);
85 }
86
87 ZBUFF_CCtx* ZBUFF_createCCtx_advanced(ZSTD_customMem customMem)
88 {
89 ZBUFF_CCtx* zbc;
90
91 if (!customMem.customAlloc && !customMem.customFree)
92 customMem = defaultCustomMem;
93
94 if (!customMem.customAlloc || !customMem.customFree)
95 return NULL;
96
97 zbc = (ZBUFF_CCtx*)customMem.customAlloc(customMem.opaque, sizeof(ZBUFF_CCtx));
98 if (zbc==NULL) return NULL;
99 memset(zbc, 0, sizeof(ZBUFF_CCtx));
100 memcpy(&zbc->customMem, &customMem, sizeof(ZSTD_customMem));
101 zbc->zc = ZSTD_createCCtx_advanced(customMem);
102 if (zbc->zc == NULL) { ZBUFF_freeCCtx(zbc); return NULL; }
103 return zbc;
104 }
105
106 size_t ZBUFF_freeCCtx(ZBUFF_CCtx* zbc)
107 {
108 if (zbc==NULL) return 0; /* support free on NULL */
109 ZSTD_freeCCtx(zbc->zc);
110 if (zbc->inBuff) zbc->customMem.customFree(zbc->customMem.opaque, zbc->inBuff);
111 if (zbc->outBuff) zbc->customMem.customFree(zbc->customMem.opaque, zbc->outBuff);
112 zbc->customMem.customFree(zbc->customMem.opaque, zbc);
113 return 0;
114 }
115
116
117 /* ====== Initialization ====== */
118
119 size_t ZBUFF_compressInit_advanced(ZBUFF_CCtx* zbc,
120 const void* dict, size_t dictSize,
121 ZSTD_parameters params, unsigned long long pledgedSrcSize)
122 {
123 /* allocate buffers */
124 { size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog;
125 if (zbc->inBuffSize < neededInBuffSize) {
126 zbc->inBuffSize = neededInBuffSize;
127 zbc->customMem.customFree(zbc->customMem.opaque, zbc->inBuff); /* should not be necessary */
128 zbc->inBuff = (char*)zbc->customMem.customAlloc(zbc->customMem.opaque, neededInBuffSize);
129 if (zbc->inBuff == NULL) return ERROR(memory_allocation);
130 }
131 zbc->blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, neededInBuffSize);
132 }
133 if (zbc->outBuffSize < ZSTD_compressBound(zbc->blockSize)+1) {
134 zbc->outBuffSize = ZSTD_compressBound(zbc->blockSize)+1;
135 zbc->customMem.customFree(zbc->customMem.opaque, zbc->outBuff); /* should not be necessary */
136 zbc->outBuff = (char*)zbc->customMem.customAlloc(zbc->customMem.opaque, zbc->outBuffSize);
137 if (zbc->outBuff == NULL) return ERROR(memory_allocation);
138 }
139
140 { size_t const errorCode = ZSTD_compressBegin_advanced(zbc->zc, dict, dictSize, params, pledgedSrcSize);
141 if (ZSTD_isError(errorCode)) return errorCode; }
142
143 zbc->inToCompress = 0;
144 zbc->inBuffPos = 0;
145 zbc->inBuffTarget = zbc->blockSize;
146 zbc->outBuffContentSize = zbc->outBuffFlushedSize = 0;
147 zbc->stage = ZBUFFcs_load;
148 zbc->checksum = params.fParams.checksumFlag > 0;
149 zbc->frameEnded = 0;
150 return 0; /* ready to go */
151 }
152
153
154 size_t ZBUFF_compressInitDictionary(ZBUFF_CCtx* zbc, const void* dict, size_t dictSize, int compressionLevel)
155 {
156 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
157 return ZBUFF_compressInit_advanced(zbc, dict, dictSize, params, 0);
158 }
159
160 size_t ZBUFF_compressInit(ZBUFF_CCtx* zbc, int compressionLevel)
161 {
162 return ZBUFF_compressInitDictionary(zbc, NULL, 0, compressionLevel);
163 }
164
165
166 /* internal util function */
167 MEM_STATIC size_t ZBUFF_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
168 {
169 size_t const length = MIN(dstCapacity, srcSize);
170 memcpy(dst, src, length);
171 return length;
172 }
173
174
175 /* ====== Compression ====== */
176
177 typedef enum { zbf_gather, zbf_flush, zbf_end } ZBUFF_flush_e;
178
179 static size_t ZBUFF_compressContinue_generic(ZBUFF_CCtx* zbc,
180 void* dst, size_t* dstCapacityPtr,
181 const void* src, size_t* srcSizePtr,
182 ZBUFF_flush_e const flush)
183 {
184 U32 someMoreWork = 1;
185 const char* const istart = (const char*)src;
186 const char* const iend = istart + *srcSizePtr;
187 const char* ip = istart;
188 char* const ostart = (char*)dst;
189 char* const oend = ostart + *dstCapacityPtr;
190 char* op = ostart;
191
192 while (someMoreWork) {
193 switch(zbc->stage)
194 {
195 case ZBUFFcs_init: return ERROR(init_missing); /* call ZBUFF_compressInit() first ! */
196
197 case ZBUFFcs_load:
198 /* complete inBuffer */
199 { size_t const toLoad = zbc->inBuffTarget - zbc->inBuffPos;
200 size_t const loaded = ZBUFF_limitCopy(zbc->inBuff + zbc->inBuffPos, toLoad, ip, iend-ip);
201 zbc->inBuffPos += loaded;
202 ip += loaded;
203 if ( (zbc->inBuffPos==zbc->inToCompress) || (!flush && (toLoad != loaded)) ) {
204 someMoreWork = 0; break; /* not enough input to get a full block : stop there, wait for more */
205 } }
206 /* compress current block (note : this stage cannot be stopped in the middle) */
207 { void* cDst;
208 size_t cSize;
209 size_t const iSize = zbc->inBuffPos - zbc->inToCompress;
210 size_t oSize = oend-op;
211 if (oSize >= ZSTD_compressBound(iSize))
212 cDst = op; /* compress directly into output buffer (avoid flush stage) */
213 else
214 cDst = zbc->outBuff, oSize = zbc->outBuffSize;
215 cSize = (flush == zbf_end) ?
216 ZSTD_compressEnd(zbc->zc, cDst, oSize, zbc->inBuff + zbc->inToCompress, iSize) :
217 ZSTD_compressContinue(zbc->zc, cDst, oSize, zbc->inBuff + zbc->inToCompress, iSize);
218 if (ZSTD_isError(cSize)) return cSize;
219 if (flush == zbf_end) zbc->frameEnded = 1;
220 /* prepare next block */
221 zbc->inBuffTarget = zbc->inBuffPos + zbc->blockSize;
222 if (zbc->inBuffTarget > zbc->inBuffSize)
223 zbc->inBuffPos = 0, zbc->inBuffTarget = zbc->blockSize; /* note : inBuffSize >= blockSize */
224 zbc->inToCompress = zbc->inBuffPos;
225 if (cDst == op) { op += cSize; break; } /* no need to flush */
226 zbc->outBuffContentSize = cSize;
227 zbc->outBuffFlushedSize = 0;
228 zbc->stage = ZBUFFcs_flush; /* continue to flush stage */
229 }
230
231 case ZBUFFcs_flush:
232 { size_t const toFlush = zbc->outBuffContentSize - zbc->outBuffFlushedSize;
233 size_t const flushed = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outBuffFlushedSize, toFlush);
234 op += flushed;
235 zbc->outBuffFlushedSize += flushed;
236 if (toFlush!=flushed) { someMoreWork = 0; break; } /* dst too small to store flushed data : stop there */
237 zbc->outBuffContentSize = zbc->outBuffFlushedSize = 0;
238 zbc->stage = ZBUFFcs_load;
239 break;
240 }
241
242 case ZBUFFcs_final:
243 someMoreWork = 0; /* do nothing */
244 break;
245
246 default:
247 return ERROR(GENERIC); /* impossible */
248 }
249 }
250
251 *srcSizePtr = ip - istart;
252 *dstCapacityPtr = op - ostart;
253 if (zbc->frameEnded) return 0;
254 { size_t hintInSize = zbc->inBuffTarget - zbc->inBuffPos;
255 if (hintInSize==0) hintInSize = zbc->blockSize;
256 return hintInSize;
257 }
258 }
259
260 size_t ZBUFF_compressContinue(ZBUFF_CCtx* zbc,
261 void* dst, size_t* dstCapacityPtr,
262 const void* src, size_t* srcSizePtr)
263 {
264 return ZBUFF_compressContinue_generic(zbc, dst, dstCapacityPtr, src, srcSizePtr, zbf_gather);
265 }
266
267
268
269 /* ====== Finalize ====== */
270
271 size_t ZBUFF_compressFlush(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr)
272 {
273 size_t srcSize = 0;
274 ZBUFF_compressContinue_generic(zbc, dst, dstCapacityPtr, &srcSize, &srcSize, zbf_flush); /* use a valid src address instead of NULL */
275 return zbc->outBuffContentSize - zbc->outBuffFlushedSize;
276 }
277
278
279 size_t ZBUFF_compressEnd(ZBUFF_CCtx* zbc, void* dst, size_t* dstCapacityPtr)
280 {
281 BYTE* const ostart = (BYTE*)dst;
282 BYTE* const oend = ostart + *dstCapacityPtr;
283 BYTE* op = ostart;
284
285 if (zbc->stage != ZBUFFcs_final) {
286 /* flush whatever remains */
287 size_t outSize = *dstCapacityPtr;
288 size_t srcSize = 0;
289 size_t const notEnded = ZBUFF_compressContinue_generic(zbc, dst, &outSize, &srcSize, &srcSize, zbf_end); /* use a valid address instead of NULL */
290 size_t const remainingToFlush = zbc->outBuffContentSize - zbc->outBuffFlushedSize;
291 op += outSize;
292 if (remainingToFlush) {
293 *dstCapacityPtr = op-ostart;
294 return remainingToFlush + ZBUFF_endFrameSize + (zbc->checksum * 4);
295 }
296 /* create epilogue */
297 zbc->stage = ZBUFFcs_final;
298 zbc->outBuffContentSize = !notEnded ? 0 :
299 ZSTD_compressEnd(zbc->zc, zbc->outBuff, zbc->outBuffSize, NULL, 0); /* write epilogue into outBuff */
300 }
301
302 /* flush epilogue */
303 { size_t const toFlush = zbc->outBuffContentSize - zbc->outBuffFlushedSize;
304 size_t const flushed = ZBUFF_limitCopy(op, oend-op, zbc->outBuff + zbc->outBuffFlushedSize, toFlush);
305 op += flushed;
306 zbc->outBuffFlushedSize += flushed;
307 *dstCapacityPtr = op-ostart;
308 if (toFlush==flushed) zbc->stage = ZBUFFcs_init; /* end reached */
309 return toFlush - flushed;
310 }
311 }
312
313
314
315 /* *************************************
316 * Tool functions
317 ***************************************/
318 size_t ZBUFF_recommendedCInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
319 size_t ZBUFF_recommendedCOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + ZBUFF_endFrameSize; }
This diff has been collapsed as it changes many lines, (3264 lines changed) Show them Hide them
@@ -0,0 +1,3264 b''
1 /**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9
10
11 /*-*************************************
12 * Dependencies
13 ***************************************/
14 #include <string.h> /* memset */
15 #include "mem.h"
16 #define XXH_STATIC_LINKING_ONLY /* XXH64_state_t */
17 #include "xxhash.h" /* XXH_reset, update, digest */
18 #define FSE_STATIC_LINKING_ONLY /* FSE_encodeSymbol */
19 #include "fse.h"
20 #define HUF_STATIC_LINKING_ONLY
21 #include "huf.h"
22 #include "zstd_internal.h" /* includes zstd.h */
23
24
25 /*-*************************************
26 * Constants
27 ***************************************/
28 static const U32 g_searchStrength = 8; /* control skip over incompressible data */
29 #define HASH_READ_SIZE 8
30 typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
31
32
33 /*-*************************************
34 * Helper functions
35 ***************************************/
36 size_t ZSTD_compressBound(size_t srcSize) { return FSE_compressBound(srcSize) + 12; }
37
38
39 /*-*************************************
40 * Sequence storage
41 ***************************************/
42 static void ZSTD_resetSeqStore(seqStore_t* ssPtr)
43 {
44 ssPtr->lit = ssPtr->litStart;
45 ssPtr->sequences = ssPtr->sequencesStart;
46 ssPtr->longLengthID = 0;
47 }
48
49
50 /*-*************************************
51 * Context memory management
52 ***************************************/
53 struct ZSTD_CCtx_s
54 {
55 const BYTE* nextSrc; /* next block here to continue on current prefix */
56 const BYTE* base; /* All regular indexes relative to this position */
57 const BYTE* dictBase; /* extDict indexes relative to this position */
58 U32 dictLimit; /* below that point, need extDict */
59 U32 lowLimit; /* below that point, no more data */
60 U32 nextToUpdate; /* index from which to continue dictionary update */
61 U32 nextToUpdate3; /* index from which to continue dictionary update */
62 U32 hashLog3; /* dispatch table : larger == faster, more memory */
63 U32 loadedDictEnd;
64 ZSTD_compressionStage_e stage;
65 U32 rep[ZSTD_REP_NUM];
66 U32 savedRep[ZSTD_REP_NUM];
67 U32 dictID;
68 ZSTD_parameters params;
69 void* workSpace;
70 size_t workSpaceSize;
71 size_t blockSize;
72 U64 frameContentSize;
73 XXH64_state_t xxhState;
74 ZSTD_customMem customMem;
75
76 seqStore_t seqStore; /* sequences storage ptrs */
77 U32* hashTable;
78 U32* hashTable3;
79 U32* chainTable;
80 HUF_CElt* hufTable;
81 U32 flagStaticTables;
82 FSE_CTable offcodeCTable [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
83 FSE_CTable matchlengthCTable[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML)];
84 FSE_CTable litlengthCTable [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL)];
85 };
86
87 ZSTD_CCtx* ZSTD_createCCtx(void)
88 {
89 return ZSTD_createCCtx_advanced(defaultCustomMem);
90 }
91
92 ZSTD_CCtx* ZSTD_createCCtx_advanced(ZSTD_customMem customMem)
93 {
94 ZSTD_CCtx* cctx;
95
96 if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
97 if (!customMem.customAlloc || !customMem.customFree) return NULL;
98
99 cctx = (ZSTD_CCtx*) ZSTD_malloc(sizeof(ZSTD_CCtx), customMem);
100 if (!cctx) return NULL;
101 memset(cctx, 0, sizeof(ZSTD_CCtx));
102 memcpy(&(cctx->customMem), &customMem, sizeof(customMem));
103 return cctx;
104 }
105
106 size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx)
107 {
108 if (cctx==NULL) return 0; /* support free on NULL */
109 ZSTD_free(cctx->workSpace, cctx->customMem);
110 ZSTD_free(cctx, cctx->customMem);
111 return 0; /* reserved as a potential error code in the future */
112 }
113
114 size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx)
115 {
116 if (cctx==NULL) return 0; /* support sizeof on NULL */
117 return sizeof(*cctx) + cctx->workSpaceSize;
118 }
119
120 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) /* hidden interface */
121 {
122 return &(ctx->seqStore);
123 }
124
125 static ZSTD_parameters ZSTD_getParamsFromCCtx(const ZSTD_CCtx* cctx)
126 {
127 return cctx->params;
128 }
129
130
131 /** ZSTD_checkParams() :
132 ensure param values remain within authorized range.
133 @return : 0, or an error code if one value is beyond authorized range */
134 size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams)
135 {
136 # define CLAMPCHECK(val,min,max) { if ((val<min) | (val>max)) return ERROR(compressionParameter_unsupported); }
137 CLAMPCHECK(cParams.windowLog, ZSTD_WINDOWLOG_MIN, ZSTD_WINDOWLOG_MAX);
138 CLAMPCHECK(cParams.chainLog, ZSTD_CHAINLOG_MIN, ZSTD_CHAINLOG_MAX);
139 CLAMPCHECK(cParams.hashLog, ZSTD_HASHLOG_MIN, ZSTD_HASHLOG_MAX);
140 CLAMPCHECK(cParams.searchLog, ZSTD_SEARCHLOG_MIN, ZSTD_SEARCHLOG_MAX);
141 { U32 const searchLengthMin = ((cParams.strategy == ZSTD_fast) | (cParams.strategy == ZSTD_greedy)) ? ZSTD_SEARCHLENGTH_MIN+1 : ZSTD_SEARCHLENGTH_MIN;
142 U32 const searchLengthMax = (cParams.strategy == ZSTD_fast) ? ZSTD_SEARCHLENGTH_MAX : ZSTD_SEARCHLENGTH_MAX-1;
143 CLAMPCHECK(cParams.searchLength, searchLengthMin, searchLengthMax); }
144 CLAMPCHECK(cParams.targetLength, ZSTD_TARGETLENGTH_MIN, ZSTD_TARGETLENGTH_MAX);
145 if ((U32)(cParams.strategy) > (U32)ZSTD_btopt2) return ERROR(compressionParameter_unsupported);
146 return 0;
147 }
148
149
150 /** ZSTD_adjustCParams() :
151 optimize `cPar` for a given input (`srcSize` and `dictSize`).
152 mostly downsizing to reduce memory consumption and initialization.
153 Both `srcSize` and `dictSize` are optional (use 0 if unknown),
154 but if both are 0, no optimization can be done.
155 Note : cPar is considered validated at this stage. Use ZSTD_checkParams() to ensure that. */
156 ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize)
157 {
158 if (srcSize+dictSize == 0) return cPar; /* no size information available : no adjustment */
159
160 /* resize params, to use less memory when necessary */
161 { U32 const minSrcSize = (srcSize==0) ? 500 : 0;
162 U64 const rSize = srcSize + dictSize + minSrcSize;
163 if (rSize < ((U64)1<<ZSTD_WINDOWLOG_MAX)) {
164 U32 const srcLog = MAX(ZSTD_HASHLOG_MIN, ZSTD_highbit32((U32)(rSize)-1) + 1);
165 if (cPar.windowLog > srcLog) cPar.windowLog = srcLog;
166 } }
167 if (cPar.hashLog > cPar.windowLog) cPar.hashLog = cPar.windowLog;
168 { U32 const btPlus = (cPar.strategy == ZSTD_btlazy2) | (cPar.strategy == ZSTD_btopt) | (cPar.strategy == ZSTD_btopt2);
169 U32 const maxChainLog = cPar.windowLog+btPlus;
170 if (cPar.chainLog > maxChainLog) cPar.chainLog = maxChainLog; } /* <= ZSTD_CHAINLOG_MAX */
171
172 if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */
173
174 return cPar;
175 }
176
177
178 size_t ZSTD_estimateCCtxSize(ZSTD_compressionParameters cParams)
179 {
180 size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << cParams.windowLog);
181 U32 const divider = (cParams.searchLength==3) ? 3 : 4;
182 size_t const maxNbSeq = blockSize / divider;
183 size_t const tokenSpace = blockSize + 11*maxNbSeq;
184
185 size_t const chainSize = (cParams.strategy == ZSTD_fast) ? 0 : (1 << cParams.chainLog);
186 size_t const hSize = ((size_t)1) << cParams.hashLog;
187 U32 const hashLog3 = (cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, cParams.windowLog);
188 size_t const h3Size = ((size_t)1) << hashLog3;
189 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
190
191 size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
192 + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
193 size_t const neededSpace = tableSpace + (256*sizeof(U32)) /* huffTable */ + tokenSpace
194 + (((cParams.strategy == ZSTD_btopt) || (cParams.strategy == ZSTD_btopt2)) ? optSpace : 0);
195
196 return sizeof(ZSTD_CCtx) + neededSpace;
197 }
198
199
200 static U32 ZSTD_equivalentParams(ZSTD_parameters param1, ZSTD_parameters param2)
201 {
202 return (param1.cParams.hashLog == param2.cParams.hashLog)
203 & (param1.cParams.chainLog == param2.cParams.chainLog)
204 & (param1.cParams.strategy == param2.cParams.strategy)
205 & ((param1.cParams.searchLength==3) == (param2.cParams.searchLength==3));
206 }
207
208 /*! ZSTD_continueCCtx() :
209 reuse CCtx without reset (note : requires no dictionary) */
210 static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_parameters params, U64 frameContentSize)
211 {
212 U32 const end = (U32)(cctx->nextSrc - cctx->base);
213 cctx->params = params;
214 cctx->frameContentSize = frameContentSize;
215 cctx->lowLimit = end;
216 cctx->dictLimit = end;
217 cctx->nextToUpdate = end+1;
218 cctx->stage = ZSTDcs_init;
219 cctx->dictID = 0;
220 cctx->loadedDictEnd = 0;
221 { int i; for (i=0; i<ZSTD_REP_NUM; i++) cctx->rep[i] = repStartValue[i]; }
222 cctx->seqStore.litLengthSum = 0; /* force reset of btopt stats */
223 XXH64_reset(&cctx->xxhState, 0);
224 return 0;
225 }
226
227 typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset, ZSTDcrp_fullReset } ZSTD_compResetPolicy_e;
228
229 /*! ZSTD_resetCCtx_advanced() :
230 note : 'params' must be validated */
231 static size_t ZSTD_resetCCtx_advanced (ZSTD_CCtx* zc,
232 ZSTD_parameters params, U64 frameContentSize,
233 ZSTD_compResetPolicy_e const crp)
234 {
235 if (crp == ZSTDcrp_continue)
236 if (ZSTD_equivalentParams(params, zc->params))
237 return ZSTD_continueCCtx(zc, params, frameContentSize);
238
239 { size_t const blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, (size_t)1 << params.cParams.windowLog);
240 U32 const divider = (params.cParams.searchLength==3) ? 3 : 4;
241 size_t const maxNbSeq = blockSize / divider;
242 size_t const tokenSpace = blockSize + 11*maxNbSeq;
243 size_t const chainSize = (params.cParams.strategy == ZSTD_fast) ? 0 : (1 << params.cParams.chainLog);
244 size_t const hSize = ((size_t)1) << params.cParams.hashLog;
245 U32 const hashLog3 = (params.cParams.searchLength>3) ? 0 : MIN(ZSTD_HASHLOG3_MAX, params.cParams.windowLog);
246 size_t const h3Size = ((size_t)1) << hashLog3;
247 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
248 void* ptr;
249
250 /* Check if workSpace is large enough, alloc a new one if needed */
251 { size_t const optSpace = ((MaxML+1) + (MaxLL+1) + (MaxOff+1) + (1<<Litbits))*sizeof(U32)
252 + (ZSTD_OPT_NUM+1)*(sizeof(ZSTD_match_t) + sizeof(ZSTD_optimal_t));
253 size_t const neededSpace = tableSpace + (256*sizeof(U32)) /* huffTable */ + tokenSpace
254 + (((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) ? optSpace : 0);
255 if (zc->workSpaceSize < neededSpace) {
256 ZSTD_free(zc->workSpace, zc->customMem);
257 zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem);
258 if (zc->workSpace == NULL) return ERROR(memory_allocation);
259 zc->workSpaceSize = neededSpace;
260 } }
261
262 if (crp!=ZSTDcrp_noMemset) memset(zc->workSpace, 0, tableSpace); /* reset tables only */
263 XXH64_reset(&zc->xxhState, 0);
264 zc->hashLog3 = hashLog3;
265 zc->hashTable = (U32*)(zc->workSpace);
266 zc->chainTable = zc->hashTable + hSize;
267 zc->hashTable3 = zc->chainTable + chainSize;
268 ptr = zc->hashTable3 + h3Size;
269 zc->hufTable = (HUF_CElt*)ptr;
270 zc->flagStaticTables = 0;
271 ptr = ((U32*)ptr) + 256; /* note : HUF_CElt* is incomplete type, size is simulated using U32 */
272
273 zc->nextToUpdate = 1;
274 zc->nextSrc = NULL;
275 zc->base = NULL;
276 zc->dictBase = NULL;
277 zc->dictLimit = 0;
278 zc->lowLimit = 0;
279 zc->params = params;
280 zc->blockSize = blockSize;
281 zc->frameContentSize = frameContentSize;
282 { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = repStartValue[i]; }
283
284 if ((params.cParams.strategy == ZSTD_btopt) || (params.cParams.strategy == ZSTD_btopt2)) {
285 zc->seqStore.litFreq = (U32*)ptr;
286 zc->seqStore.litLengthFreq = zc->seqStore.litFreq + (1<<Litbits);
287 zc->seqStore.matchLengthFreq = zc->seqStore.litLengthFreq + (MaxLL+1);
288 zc->seqStore.offCodeFreq = zc->seqStore.matchLengthFreq + (MaxML+1);
289 ptr = zc->seqStore.offCodeFreq + (MaxOff+1);
290 zc->seqStore.matchTable = (ZSTD_match_t*)ptr;
291 ptr = zc->seqStore.matchTable + ZSTD_OPT_NUM+1;
292 zc->seqStore.priceTable = (ZSTD_optimal_t*)ptr;
293 ptr = zc->seqStore.priceTable + ZSTD_OPT_NUM+1;
294 zc->seqStore.litLengthSum = 0;
295 }
296 zc->seqStore.sequencesStart = (seqDef*)ptr;
297 ptr = zc->seqStore.sequencesStart + maxNbSeq;
298 zc->seqStore.llCode = (BYTE*) ptr;
299 zc->seqStore.mlCode = zc->seqStore.llCode + maxNbSeq;
300 zc->seqStore.ofCode = zc->seqStore.mlCode + maxNbSeq;
301 zc->seqStore.litStart = zc->seqStore.ofCode + maxNbSeq;
302
303 zc->stage = ZSTDcs_init;
304 zc->dictID = 0;
305 zc->loadedDictEnd = 0;
306
307 return 0;
308 }
309 }
310
311
312 /*! ZSTD_copyCCtx() :
313 * Duplicate an existing context `srcCCtx` into another one `dstCCtx`.
314 * Only works during stage ZSTDcs_init (i.e. after creation, but before first call to ZSTD_compressContinue()).
315 * @return : 0, or an error code */
316 size_t ZSTD_copyCCtx(ZSTD_CCtx* dstCCtx, const ZSTD_CCtx* srcCCtx, unsigned long long pledgedSrcSize)
317 {
318 if (srcCCtx->stage!=ZSTDcs_init) return ERROR(stage_wrong);
319
320 memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
321 ZSTD_resetCCtx_advanced(dstCCtx, srcCCtx->params, pledgedSrcSize, ZSTDcrp_noMemset);
322
323 /* copy tables */
324 { size_t const chainSize = (srcCCtx->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << srcCCtx->params.cParams.chainLog);
325 size_t const hSize = ((size_t)1) << srcCCtx->params.cParams.hashLog;
326 size_t const h3Size = (size_t)1 << srcCCtx->hashLog3;
327 size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32);
328 memcpy(dstCCtx->workSpace, srcCCtx->workSpace, tableSpace);
329 }
330
331 /* copy dictionary offsets */
332 dstCCtx->nextToUpdate = srcCCtx->nextToUpdate;
333 dstCCtx->nextToUpdate3= srcCCtx->nextToUpdate3;
334 dstCCtx->nextSrc = srcCCtx->nextSrc;
335 dstCCtx->base = srcCCtx->base;
336 dstCCtx->dictBase = srcCCtx->dictBase;
337 dstCCtx->dictLimit = srcCCtx->dictLimit;
338 dstCCtx->lowLimit = srcCCtx->lowLimit;
339 dstCCtx->loadedDictEnd= srcCCtx->loadedDictEnd;
340 dstCCtx->dictID = srcCCtx->dictID;
341
342 /* copy entropy tables */
343 dstCCtx->flagStaticTables = srcCCtx->flagStaticTables;
344 if (srcCCtx->flagStaticTables) {
345 memcpy(dstCCtx->hufTable, srcCCtx->hufTable, 256*4);
346 memcpy(dstCCtx->litlengthCTable, srcCCtx->litlengthCTable, sizeof(dstCCtx->litlengthCTable));
347 memcpy(dstCCtx->matchlengthCTable, srcCCtx->matchlengthCTable, sizeof(dstCCtx->matchlengthCTable));
348 memcpy(dstCCtx->offcodeCTable, srcCCtx->offcodeCTable, sizeof(dstCCtx->offcodeCTable));
349 }
350
351 return 0;
352 }
353
354
355 /*! ZSTD_reduceTable() :
356 * reduce table indexes by `reducerValue` */
357 static void ZSTD_reduceTable (U32* const table, U32 const size, U32 const reducerValue)
358 {
359 U32 u;
360 for (u=0 ; u < size ; u++) {
361 if (table[u] < reducerValue) table[u] = 0;
362 else table[u] -= reducerValue;
363 }
364 }
365
366 /*! ZSTD_reduceIndex() :
367 * rescale all indexes to avoid future overflow (indexes are U32) */
368 static void ZSTD_reduceIndex (ZSTD_CCtx* zc, const U32 reducerValue)
369 {
370 { U32 const hSize = 1 << zc->params.cParams.hashLog;
371 ZSTD_reduceTable(zc->hashTable, hSize, reducerValue); }
372
373 { U32 const chainSize = (zc->params.cParams.strategy == ZSTD_fast) ? 0 : (1 << zc->params.cParams.chainLog);
374 ZSTD_reduceTable(zc->chainTable, chainSize, reducerValue); }
375
376 { U32 const h3Size = (zc->hashLog3) ? 1 << zc->hashLog3 : 0;
377 ZSTD_reduceTable(zc->hashTable3, h3Size, reducerValue); }
378 }
379
380
381 /*-*******************************************************
382 * Block entropic compression
383 *********************************************************/
384
385 /* See doc/zstd_compression_format.md for detailed format description */
386
387 size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
388 {
389 if (srcSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
390 memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize);
391 MEM_writeLE24(dst, (U32)(srcSize << 2) + (U32)bt_raw);
392 return ZSTD_blockHeaderSize+srcSize;
393 }
394
395
396 static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
397 {
398 BYTE* const ostart = (BYTE* const)dst;
399 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
400
401 if (srcSize + flSize > dstCapacity) return ERROR(dstSize_tooSmall);
402
403 switch(flSize)
404 {
405 case 1: /* 2 - 1 - 5 */
406 ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
407 break;
408 case 2: /* 2 - 2 - 12 */
409 MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
410 break;
411 default: /*note : should not be necessary : flSize is within {1,2,3} */
412 case 3: /* 2 - 2 - 20 */
413 MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
414 break;
415 }
416
417 memcpy(ostart + flSize, src, srcSize);
418 return srcSize + flSize;
419 }
420
421 static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
422 {
423 BYTE* const ostart = (BYTE* const)dst;
424 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
425
426 (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */
427
428 switch(flSize)
429 {
430 case 1: /* 2 - 1 - 5 */
431 ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
432 break;
433 case 2: /* 2 - 2 - 12 */
434 MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
435 break;
436 default: /*note : should not be necessary : flSize is necessarily within {1,2,3} */
437 case 3: /* 2 - 2 - 20 */
438 MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
439 break;
440 }
441
442 ostart[flSize] = *(const BYTE*)src;
443 return flSize+1;
444 }
445
446
447 static size_t ZSTD_minGain(size_t srcSize) { return (srcSize >> 6) + 2; }
448
449 static size_t ZSTD_compressLiterals (ZSTD_CCtx* zc,
450 void* dst, size_t dstCapacity,
451 const void* src, size_t srcSize)
452 {
453 size_t const minGain = ZSTD_minGain(srcSize);
454 size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
455 BYTE* const ostart = (BYTE*)dst;
456 U32 singleStream = srcSize < 256;
457 symbolEncodingType_e hType = set_compressed;
458 size_t cLitSize;
459
460
461 /* small ? don't even attempt compression (speed opt) */
462 # define LITERAL_NOENTROPY 63
463 { size_t const minLitSize = zc->flagStaticTables ? 6 : LITERAL_NOENTROPY;
464 if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
465 }
466
467 if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */
468 if (zc->flagStaticTables && (lhSize==3)) {
469 hType = set_repeat;
470 singleStream = 1;
471 cLitSize = HUF_compress1X_usingCTable(ostart+lhSize, dstCapacity-lhSize, src, srcSize, zc->hufTable);
472 } else {
473 cLitSize = singleStream ? HUF_compress1X(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11)
474 : HUF_compress2 (ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11);
475 }
476
477 if ((cLitSize==0) | (cLitSize >= srcSize - minGain))
478 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
479 if (cLitSize==1)
480 return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
481
482 /* Build header */
483 switch(lhSize)
484 {
485 case 3: /* 2 - 2 - 10 - 10 */
486 { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
487 MEM_writeLE24(ostart, lhc);
488 break;
489 }
490 case 4: /* 2 - 2 - 14 - 14 */
491 { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
492 MEM_writeLE32(ostart, lhc);
493 break;
494 }
495 default: /* should not be necessary, lhSize is only {3,4,5} */
496 case 5: /* 2 - 2 - 18 - 18 */
497 { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
498 MEM_writeLE32(ostart, lhc);
499 ostart[4] = (BYTE)(cLitSize >> 10);
500 break;
501 }
502 }
503 return lhSize+cLitSize;
504 }
505
506 static const BYTE LL_Code[64] = { 0, 1, 2, 3, 4, 5, 6, 7,
507 8, 9, 10, 11, 12, 13, 14, 15,
508 16, 16, 17, 17, 18, 18, 19, 19,
509 20, 20, 20, 20, 21, 21, 21, 21,
510 22, 22, 22, 22, 22, 22, 22, 22,
511 23, 23, 23, 23, 23, 23, 23, 23,
512 24, 24, 24, 24, 24, 24, 24, 24,
513 24, 24, 24, 24, 24, 24, 24, 24 };
514
515 static const BYTE ML_Code[128] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
516 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
517 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37,
518 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39,
519 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40,
520 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41,
521 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42,
522 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42 };
523
524
525 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
526 {
527 BYTE const LL_deltaCode = 19;
528 BYTE const ML_deltaCode = 36;
529 const seqDef* const sequences = seqStorePtr->sequencesStart;
530 BYTE* const llCodeTable = seqStorePtr->llCode;
531 BYTE* const ofCodeTable = seqStorePtr->ofCode;
532 BYTE* const mlCodeTable = seqStorePtr->mlCode;
533 U32 const nbSeq = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
534 U32 u;
535 for (u=0; u<nbSeq; u++) {
536 U32 const llv = sequences[u].litLength;
537 U32 const mlv = sequences[u].matchLength;
538 llCodeTable[u] = (llv> 63) ? (BYTE)ZSTD_highbit32(llv) + LL_deltaCode : LL_Code[llv];
539 ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
540 mlCodeTable[u] = (mlv>127) ? (BYTE)ZSTD_highbit32(mlv) + ML_deltaCode : ML_Code[mlv];
541 }
542 if (seqStorePtr->longLengthID==1)
543 llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
544 if (seqStorePtr->longLengthID==2)
545 mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
546 }
547
548
549 size_t ZSTD_compressSequences(ZSTD_CCtx* zc,
550 void* dst, size_t dstCapacity,
551 size_t srcSize)
552 {
553 const seqStore_t* seqStorePtr = &(zc->seqStore);
554 U32 count[MaxSeq+1];
555 S16 norm[MaxSeq+1];
556 FSE_CTable* CTable_LitLength = zc->litlengthCTable;
557 FSE_CTable* CTable_OffsetBits = zc->offcodeCTable;
558 FSE_CTable* CTable_MatchLength = zc->matchlengthCTable;
559 U32 LLtype, Offtype, MLtype; /* compressed, raw or rle */
560 const seqDef* const sequences = seqStorePtr->sequencesStart;
561 const BYTE* const ofCodeTable = seqStorePtr->ofCode;
562 const BYTE* const llCodeTable = seqStorePtr->llCode;
563 const BYTE* const mlCodeTable = seqStorePtr->mlCode;
564 BYTE* const ostart = (BYTE*)dst;
565 BYTE* const oend = ostart + dstCapacity;
566 BYTE* op = ostart;
567 size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
568 BYTE* seqHead;
569
570 /* Compress literals */
571 { const BYTE* const literals = seqStorePtr->litStart;
572 size_t const litSize = seqStorePtr->lit - literals;
573 size_t const cSize = ZSTD_compressLiterals(zc, op, dstCapacity, literals, litSize);
574 if (ZSTD_isError(cSize)) return cSize;
575 op += cSize;
576 }
577
578 /* Sequences Header */
579 if ((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead */) return ERROR(dstSize_tooSmall);
580 if (nbSeq < 0x7F) *op++ = (BYTE)nbSeq;
581 else if (nbSeq < LONGNBSEQ) op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2;
582 else op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3;
583 if (nbSeq==0) goto _check_compressibility;
584
585 /* seqHead : flags for FSE encoding type */
586 seqHead = op++;
587
588 #define MIN_SEQ_FOR_DYNAMIC_FSE 64
589 #define MAX_SEQ_FOR_STATIC_FSE 1000
590
591 /* convert length/distances into codes */
592 ZSTD_seqToCodes(seqStorePtr);
593
594 /* CTable for Literal Lengths */
595 { U32 max = MaxLL;
596 size_t const mostFrequent = FSE_countFast(count, &max, llCodeTable, nbSeq);
597 if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
598 *op++ = llCodeTable[0];
599 FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
600 LLtype = set_rle;
601 } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
602 LLtype = set_repeat;
603 } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (LL_defaultNormLog-1)))) {
604 FSE_buildCTable(CTable_LitLength, LL_defaultNorm, MaxLL, LL_defaultNormLog);
605 LLtype = set_basic;
606 } else {
607 size_t nbSeq_1 = nbSeq;
608 const U32 tableLog = FSE_optimalTableLog(LLFSELog, nbSeq, max);
609 if (count[llCodeTable[nbSeq-1]]>1) { count[llCodeTable[nbSeq-1]]--; nbSeq_1--; }
610 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
611 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
612 if (FSE_isError(NCountSize)) return ERROR(GENERIC);
613 op += NCountSize; }
614 FSE_buildCTable(CTable_LitLength, norm, max, tableLog);
615 LLtype = set_compressed;
616 } }
617
618 /* CTable for Offsets */
619 { U32 max = MaxOff;
620 size_t const mostFrequent = FSE_countFast(count, &max, ofCodeTable, nbSeq);
621 if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
622 *op++ = ofCodeTable[0];
623 FSE_buildCTable_rle(CTable_OffsetBits, (BYTE)max);
624 Offtype = set_rle;
625 } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
626 Offtype = set_repeat;
627 } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (OF_defaultNormLog-1)))) {
628 FSE_buildCTable(CTable_OffsetBits, OF_defaultNorm, MaxOff, OF_defaultNormLog);
629 Offtype = set_basic;
630 } else {
631 size_t nbSeq_1 = nbSeq;
632 const U32 tableLog = FSE_optimalTableLog(OffFSELog, nbSeq, max);
633 if (count[ofCodeTable[nbSeq-1]]>1) { count[ofCodeTable[nbSeq-1]]--; nbSeq_1--; }
634 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
635 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
636 if (FSE_isError(NCountSize)) return ERROR(GENERIC);
637 op += NCountSize; }
638 FSE_buildCTable(CTable_OffsetBits, norm, max, tableLog);
639 Offtype = set_compressed;
640 } }
641
642 /* CTable for MatchLengths */
643 { U32 max = MaxML;
644 size_t const mostFrequent = FSE_countFast(count, &max, mlCodeTable, nbSeq);
645 if ((mostFrequent == nbSeq) && (nbSeq > 2)) {
646 *op++ = *mlCodeTable;
647 FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
648 MLtype = set_rle;
649 } else if ((zc->flagStaticTables) && (nbSeq < MAX_SEQ_FOR_STATIC_FSE)) {
650 MLtype = set_repeat;
651 } else if ((nbSeq < MIN_SEQ_FOR_DYNAMIC_FSE) || (mostFrequent < (nbSeq >> (ML_defaultNormLog-1)))) {
652 FSE_buildCTable(CTable_MatchLength, ML_defaultNorm, MaxML, ML_defaultNormLog);
653 MLtype = set_basic;
654 } else {
655 size_t nbSeq_1 = nbSeq;
656 const U32 tableLog = FSE_optimalTableLog(MLFSELog, nbSeq, max);
657 if (count[mlCodeTable[nbSeq-1]]>1) { count[mlCodeTable[nbSeq-1]]--; nbSeq_1--; }
658 FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max);
659 { size_t const NCountSize = FSE_writeNCount(op, oend-op, norm, max, tableLog); /* overflow protected */
660 if (FSE_isError(NCountSize)) return ERROR(GENERIC);
661 op += NCountSize; }
662 FSE_buildCTable(CTable_MatchLength, norm, max, tableLog);
663 MLtype = set_compressed;
664 } }
665
666 *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
667 zc->flagStaticTables = 0;
668
669 /* Encoding Sequences */
670 { BIT_CStream_t blockStream;
671 FSE_CState_t stateMatchLength;
672 FSE_CState_t stateOffsetBits;
673 FSE_CState_t stateLitLength;
674
675 CHECK_E(BIT_initCStream(&blockStream, op, oend-op), dstSize_tooSmall); /* not enough space remaining */
676
677 /* first symbols */
678 FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]);
679 FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]);
680 FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]);
681 BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
682 if (MEM_32bits()) BIT_flushBits(&blockStream);
683 BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
684 if (MEM_32bits()) BIT_flushBits(&blockStream);
685 BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
686 BIT_flushBits(&blockStream);
687
688 { size_t n;
689 for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */
690 BYTE const llCode = llCodeTable[n];
691 BYTE const ofCode = ofCodeTable[n];
692 BYTE const mlCode = mlCodeTable[n];
693 U32 const llBits = LL_bits[llCode];
694 U32 const ofBits = ofCode; /* 32b*/ /* 64b*/
695 U32 const mlBits = ML_bits[mlCode];
696 /* (7)*/ /* (7)*/
697 FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */
698 FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */
699 if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
700 FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */
701 if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog)))
702 BIT_flushBits(&blockStream); /* (7)*/
703 BIT_addBits(&blockStream, sequences[n].litLength, llBits);
704 if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
705 BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
706 if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/
707 BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */
708 BIT_flushBits(&blockStream); /* (7)*/
709 } }
710
711 FSE_flushCState(&blockStream, &stateMatchLength);
712 FSE_flushCState(&blockStream, &stateOffsetBits);
713 FSE_flushCState(&blockStream, &stateLitLength);
714
715 { size_t const streamSize = BIT_closeCStream(&blockStream);
716 if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */
717 op += streamSize;
718 } }
719
720 /* check compressibility */
721 _check_compressibility:
722 { size_t const minGain = ZSTD_minGain(srcSize);
723 size_t const maxCSize = srcSize - minGain;
724 if ((size_t)(op-ostart) >= maxCSize) return 0; }
725
726 /* confirm repcodes */
727 { int i; for (i=0; i<ZSTD_REP_NUM; i++) zc->rep[i] = zc->savedRep[i]; }
728
729 return op - ostart;
730 }
731
732
733 /*! ZSTD_storeSeq() :
734 Store a sequence (literal length, literals, offset code and match length code) into seqStore_t.
735 `offsetCode` : distance to match, or 0 == repCode.
736 `matchCode` : matchLength - MINMATCH
737 */
738 MEM_STATIC void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const void* literals, U32 offsetCode, size_t matchCode)
739 {
740 #if 0 /* for debug */
741 static const BYTE* g_start = NULL;
742 const U32 pos = (U32)(literals - g_start);
743 if (g_start==NULL) g_start = literals;
744 //if ((pos > 1) && (pos < 50000))
745 printf("Cpos %6u :%5u literals & match %3u bytes at distance %6u \n",
746 pos, (U32)litLength, (U32)matchCode+MINMATCH, (U32)offsetCode);
747 #endif
748 /* copy Literals */
749 ZSTD_wildcopy(seqStorePtr->lit, literals, litLength);
750 seqStorePtr->lit += litLength;
751
752 /* literal Length */
753 if (litLength>0xFFFF) { seqStorePtr->longLengthID = 1; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); }
754 seqStorePtr->sequences[0].litLength = (U16)litLength;
755
756 /* match offset */
757 seqStorePtr->sequences[0].offset = offsetCode + 1;
758
759 /* match Length */
760 if (matchCode>0xFFFF) { seqStorePtr->longLengthID = 2; seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart); }
761 seqStorePtr->sequences[0].matchLength = (U16)matchCode;
762
763 seqStorePtr->sequences++;
764 }
765
766
767 /*-*************************************
768 * Match length counter
769 ***************************************/
770 static unsigned ZSTD_NbCommonBytes (register size_t val)
771 {
772 if (MEM_isLittleEndian()) {
773 if (MEM_64bits()) {
774 # if defined(_MSC_VER) && defined(_WIN64)
775 unsigned long r = 0;
776 _BitScanForward64( &r, (U64)val );
777 return (unsigned)(r>>3);
778 # elif defined(__GNUC__) && (__GNUC__ >= 3)
779 return (__builtin_ctzll((U64)val) >> 3);
780 # else
781 static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
782 return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
783 # endif
784 } else { /* 32 bits */
785 # if defined(_MSC_VER)
786 unsigned long r=0;
787 _BitScanForward( &r, (U32)val );
788 return (unsigned)(r>>3);
789 # elif defined(__GNUC__) && (__GNUC__ >= 3)
790 return (__builtin_ctz((U32)val) >> 3);
791 # else
792 static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
793 return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
794 # endif
795 }
796 } else { /* Big Endian CPU */
797 if (MEM_64bits()) {
798 # if defined(_MSC_VER) && defined(_WIN64)
799 unsigned long r = 0;
800 _BitScanReverse64( &r, val );
801 return (unsigned)(r>>3);
802 # elif defined(__GNUC__) && (__GNUC__ >= 3)
803 return (__builtin_clzll(val) >> 3);
804 # else
805 unsigned r;
806 const unsigned n32 = sizeof(size_t)*4; /* calculate this way due to compiler complaining in 32-bits mode */
807 if (!(val>>n32)) { r=4; } else { r=0; val>>=n32; }
808 if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
809 r += (!val);
810 return r;
811 # endif
812 } else { /* 32 bits */
813 # if defined(_MSC_VER)
814 unsigned long r = 0;
815 _BitScanReverse( &r, (unsigned long)val );
816 return (unsigned)(r>>3);
817 # elif defined(__GNUC__) && (__GNUC__ >= 3)
818 return (__builtin_clz((U32)val) >> 3);
819 # else
820 unsigned r;
821 if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
822 r += (!val);
823 return r;
824 # endif
825 } }
826 }
827
828
829 static size_t ZSTD_count(const BYTE* pIn, const BYTE* pMatch, const BYTE* const pInLimit)
830 {
831 const BYTE* const pStart = pIn;
832 const BYTE* const pInLoopLimit = pInLimit - (sizeof(size_t)-1);
833
834 while (pIn < pInLoopLimit) {
835 size_t const diff = MEM_readST(pMatch) ^ MEM_readST(pIn);
836 if (!diff) { pIn+=sizeof(size_t); pMatch+=sizeof(size_t); continue; }
837 pIn += ZSTD_NbCommonBytes(diff);
838 return (size_t)(pIn - pStart);
839 }
840 if (MEM_64bits()) if ((pIn<(pInLimit-3)) && (MEM_read32(pMatch) == MEM_read32(pIn))) { pIn+=4; pMatch+=4; }
841 if ((pIn<(pInLimit-1)) && (MEM_read16(pMatch) == MEM_read16(pIn))) { pIn+=2; pMatch+=2; }
842 if ((pIn<pInLimit) && (*pMatch == *pIn)) pIn++;
843 return (size_t)(pIn - pStart);
844 }
845
846 /** ZSTD_count_2segments() :
847 * can count match length with `ip` & `match` in 2 different segments.
848 * convention : on reaching mEnd, match count continue starting from iStart
849 */
850 static size_t ZSTD_count_2segments(const BYTE* ip, const BYTE* match, const BYTE* iEnd, const BYTE* mEnd, const BYTE* iStart)
851 {
852 const BYTE* const vEnd = MIN( ip + (mEnd - match), iEnd);
853 size_t const matchLength = ZSTD_count(ip, match, vEnd);
854 if (match + matchLength != mEnd) return matchLength;
855 return matchLength + ZSTD_count(ip+matchLength, iStart, iEnd);
856 }
857
858
859 /*-*************************************
860 * Hashes
861 ***************************************/
862 static const U32 prime3bytes = 506832829U;
863 static U32 ZSTD_hash3(U32 u, U32 h) { return ((u << (32-24)) * prime3bytes) >> (32-h) ; }
864 MEM_STATIC size_t ZSTD_hash3Ptr(const void* ptr, U32 h) { return ZSTD_hash3(MEM_readLE32(ptr), h); } /* only in zstd_opt.h */
865
866 static const U32 prime4bytes = 2654435761U;
867 static U32 ZSTD_hash4(U32 u, U32 h) { return (u * prime4bytes) >> (32-h) ; }
868 static size_t ZSTD_hash4Ptr(const void* ptr, U32 h) { return ZSTD_hash4(MEM_read32(ptr), h); }
869
870 static const U64 prime5bytes = 889523592379ULL;
871 static size_t ZSTD_hash5(U64 u, U32 h) { return (size_t)(((u << (64-40)) * prime5bytes) >> (64-h)) ; }
872 static size_t ZSTD_hash5Ptr(const void* p, U32 h) { return ZSTD_hash5(MEM_readLE64(p), h); }
873
874 static const U64 prime6bytes = 227718039650203ULL;
875 static size_t ZSTD_hash6(U64 u, U32 h) { return (size_t)(((u << (64-48)) * prime6bytes) >> (64-h)) ; }
876 static size_t ZSTD_hash6Ptr(const void* p, U32 h) { return ZSTD_hash6(MEM_readLE64(p), h); }
877
878 static const U64 prime7bytes = 58295818150454627ULL;
879 static size_t ZSTD_hash7(U64 u, U32 h) { return (size_t)(((u << (64-56)) * prime7bytes) >> (64-h)) ; }
880 static size_t ZSTD_hash7Ptr(const void* p, U32 h) { return ZSTD_hash7(MEM_readLE64(p), h); }
881
882 static const U64 prime8bytes = 0xCF1BBCDCB7A56463ULL;
883 static size_t ZSTD_hash8(U64 u, U32 h) { return (size_t)(((u) * prime8bytes) >> (64-h)) ; }
884 static size_t ZSTD_hash8Ptr(const void* p, U32 h) { return ZSTD_hash8(MEM_readLE64(p), h); }
885
886 static size_t ZSTD_hashPtr(const void* p, U32 hBits, U32 mls)
887 {
888 switch(mls)
889 {
890 default:
891 case 4: return ZSTD_hash4Ptr(p, hBits);
892 case 5: return ZSTD_hash5Ptr(p, hBits);
893 case 6: return ZSTD_hash6Ptr(p, hBits);
894 case 7: return ZSTD_hash7Ptr(p, hBits);
895 case 8: return ZSTD_hash8Ptr(p, hBits);
896 }
897 }
898
899
900 /*-*************************************
901 * Fast Scan
902 ***************************************/
903 static void ZSTD_fillHashTable (ZSTD_CCtx* zc, const void* end, const U32 mls)
904 {
905 U32* const hashTable = zc->hashTable;
906 U32 const hBits = zc->params.cParams.hashLog;
907 const BYTE* const base = zc->base;
908 const BYTE* ip = base + zc->nextToUpdate;
909 const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
910 const size_t fastHashFillStep = 3;
911
912 while(ip <= iend) {
913 hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip - base);
914 ip += fastHashFillStep;
915 }
916 }
917
918
919 FORCE_INLINE
920 void ZSTD_compressBlock_fast_generic(ZSTD_CCtx* cctx,
921 const void* src, size_t srcSize,
922 const U32 mls)
923 {
924 U32* const hashTable = cctx->hashTable;
925 U32 const hBits = cctx->params.cParams.hashLog;
926 seqStore_t* seqStorePtr = &(cctx->seqStore);
927 const BYTE* const base = cctx->base;
928 const BYTE* const istart = (const BYTE*)src;
929 const BYTE* ip = istart;
930 const BYTE* anchor = istart;
931 const U32 lowestIndex = cctx->dictLimit;
932 const BYTE* const lowest = base + lowestIndex;
933 const BYTE* const iend = istart + srcSize;
934 const BYTE* const ilimit = iend - HASH_READ_SIZE;
935 U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1];
936 U32 offsetSaved = 0;
937
938 /* init */
939 ip += (ip==lowest);
940 { U32 const maxRep = (U32)(ip-lowest);
941 if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
942 if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
943 }
944
945 /* Main Search Loop */
946 while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
947 size_t mLength;
948 size_t const h = ZSTD_hashPtr(ip, hBits, mls);
949 U32 const current = (U32)(ip-base);
950 U32 const matchIndex = hashTable[h];
951 const BYTE* match = base + matchIndex;
952 hashTable[h] = current; /* update hash table */
953
954 if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
955 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
956 ip++;
957 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
958 } else {
959 U32 offset;
960 if ( (matchIndex <= lowestIndex) || (MEM_read32(match) != MEM_read32(ip)) ) {
961 ip += ((ip-anchor) >> g_searchStrength) + 1;
962 continue;
963 }
964 mLength = ZSTD_count(ip+4, match+4, iend) + 4;
965 offset = (U32)(ip-match);
966 while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
967 offset_2 = offset_1;
968 offset_1 = offset;
969
970 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
971 }
972
973 /* match found */
974 ip += mLength;
975 anchor = ip;
976
977 if (ip <= ilimit) {
978 /* Fill Table */
979 hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2; /* here because current+2 could be > iend-8 */
980 hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
981 /* check immediate repcode */
982 while ( (ip <= ilimit)
983 && ( (offset_2>0)
984 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
985 /* store sequence */
986 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
987 { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
988 hashTable[ZSTD_hashPtr(ip, hBits, mls)] = (U32)(ip-base);
989 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
990 ip += rLength;
991 anchor = ip;
992 continue; /* faster when present ... (?) */
993 } } }
994
995 /* save reps for next block */
996 cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
997 cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;
998
999 /* Last Literals */
1000 { size_t const lastLLSize = iend - anchor;
1001 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1002 seqStorePtr->lit += lastLLSize;
1003 }
1004 }
1005
1006
1007 static void ZSTD_compressBlock_fast(ZSTD_CCtx* ctx,
1008 const void* src, size_t srcSize)
1009 {
1010 const U32 mls = ctx->params.cParams.searchLength;
1011 switch(mls)
1012 {
1013 default:
1014 case 4 :
1015 ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 4); return;
1016 case 5 :
1017 ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 5); return;
1018 case 6 :
1019 ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 6); return;
1020 case 7 :
1021 ZSTD_compressBlock_fast_generic(ctx, src, srcSize, 7); return;
1022 }
1023 }
1024
1025
1026 static void ZSTD_compressBlock_fast_extDict_generic(ZSTD_CCtx* ctx,
1027 const void* src, size_t srcSize,
1028 const U32 mls)
1029 {
1030 U32* hashTable = ctx->hashTable;
1031 const U32 hBits = ctx->params.cParams.hashLog;
1032 seqStore_t* seqStorePtr = &(ctx->seqStore);
1033 const BYTE* const base = ctx->base;
1034 const BYTE* const dictBase = ctx->dictBase;
1035 const BYTE* const istart = (const BYTE*)src;
1036 const BYTE* ip = istart;
1037 const BYTE* anchor = istart;
1038 const U32 lowestIndex = ctx->lowLimit;
1039 const BYTE* const dictStart = dictBase + lowestIndex;
1040 const U32 dictLimit = ctx->dictLimit;
1041 const BYTE* const lowPrefixPtr = base + dictLimit;
1042 const BYTE* const dictEnd = dictBase + dictLimit;
1043 const BYTE* const iend = istart + srcSize;
1044 const BYTE* const ilimit = iend - 8;
1045 U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1];
1046
1047 /* Search Loop */
1048 while (ip < ilimit) { /* < instead of <=, because (ip+1) */
1049 const size_t h = ZSTD_hashPtr(ip, hBits, mls);
1050 const U32 matchIndex = hashTable[h];
1051 const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
1052 const BYTE* match = matchBase + matchIndex;
1053 const U32 current = (U32)(ip-base);
1054 const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
1055 const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
1056 const BYTE* repMatch = repBase + repIndex;
1057 size_t mLength;
1058 hashTable[h] = current; /* update hash table */
1059
1060 if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
1061 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
1062 const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
1063 mLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repMatchEnd, lowPrefixPtr) + EQUAL_READ32;
1064 ip++;
1065 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
1066 } else {
1067 if ( (matchIndex < lowestIndex) ||
1068 (MEM_read32(match) != MEM_read32(ip)) ) {
1069 ip += ((ip-anchor) >> g_searchStrength) + 1;
1070 continue;
1071 }
1072 { const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
1073 const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
1074 U32 offset;
1075 mLength = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iend, matchEnd, lowPrefixPtr) + EQUAL_READ32;
1076 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
1077 offset = current - matchIndex;
1078 offset_2 = offset_1;
1079 offset_1 = offset;
1080 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
1081 } }
1082
1083 /* found a match : store it */
1084 ip += mLength;
1085 anchor = ip;
1086
1087 if (ip <= ilimit) {
1088 /* Fill Table */
1089 hashTable[ZSTD_hashPtr(base+current+2, hBits, mls)] = current+2;
1090 hashTable[ZSTD_hashPtr(ip-2, hBits, mls)] = (U32)(ip-2-base);
1091 /* check immediate repcode */
1092 while (ip <= ilimit) {
1093 U32 const current2 = (U32)(ip-base);
1094 U32 const repIndex2 = current2 - offset_2;
1095 const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
1096 if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
1097 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
1098 const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
1099 size_t repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
1100 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
1101 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
1102 hashTable[ZSTD_hashPtr(ip, hBits, mls)] = current2;
1103 ip += repLength2;
1104 anchor = ip;
1105 continue;
1106 }
1107 break;
1108 } } }
1109
1110 /* save reps for next block */
1111 ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
1112
1113 /* Last Literals */
1114 { size_t const lastLLSize = iend - anchor;
1115 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1116 seqStorePtr->lit += lastLLSize;
1117 }
1118 }
1119
1120
1121 static void ZSTD_compressBlock_fast_extDict(ZSTD_CCtx* ctx,
1122 const void* src, size_t srcSize)
1123 {
1124 U32 const mls = ctx->params.cParams.searchLength;
1125 switch(mls)
1126 {
1127 default:
1128 case 4 :
1129 ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 4); return;
1130 case 5 :
1131 ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 5); return;
1132 case 6 :
1133 ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 6); return;
1134 case 7 :
1135 ZSTD_compressBlock_fast_extDict_generic(ctx, src, srcSize, 7); return;
1136 }
1137 }
1138
1139
1140 /*-*************************************
1141 * Double Fast
1142 ***************************************/
1143 static void ZSTD_fillDoubleHashTable (ZSTD_CCtx* cctx, const void* end, const U32 mls)
1144 {
1145 U32* const hashLarge = cctx->hashTable;
1146 U32 const hBitsL = cctx->params.cParams.hashLog;
1147 U32* const hashSmall = cctx->chainTable;
1148 U32 const hBitsS = cctx->params.cParams.chainLog;
1149 const BYTE* const base = cctx->base;
1150 const BYTE* ip = base + cctx->nextToUpdate;
1151 const BYTE* const iend = ((const BYTE*)end) - HASH_READ_SIZE;
1152 const size_t fastHashFillStep = 3;
1153
1154 while(ip <= iend) {
1155 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip - base);
1156 hashLarge[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip - base);
1157 ip += fastHashFillStep;
1158 }
1159 }
1160
1161
1162 FORCE_INLINE
1163 void ZSTD_compressBlock_doubleFast_generic(ZSTD_CCtx* cctx,
1164 const void* src, size_t srcSize,
1165 const U32 mls)
1166 {
1167 U32* const hashLong = cctx->hashTable;
1168 const U32 hBitsL = cctx->params.cParams.hashLog;
1169 U32* const hashSmall = cctx->chainTable;
1170 const U32 hBitsS = cctx->params.cParams.chainLog;
1171 seqStore_t* seqStorePtr = &(cctx->seqStore);
1172 const BYTE* const base = cctx->base;
1173 const BYTE* const istart = (const BYTE*)src;
1174 const BYTE* ip = istart;
1175 const BYTE* anchor = istart;
1176 const U32 lowestIndex = cctx->dictLimit;
1177 const BYTE* const lowest = base + lowestIndex;
1178 const BYTE* const iend = istart + srcSize;
1179 const BYTE* const ilimit = iend - HASH_READ_SIZE;
1180 U32 offset_1=cctx->rep[0], offset_2=cctx->rep[1];
1181 U32 offsetSaved = 0;
1182
1183 /* init */
1184 ip += (ip==lowest);
1185 { U32 const maxRep = (U32)(ip-lowest);
1186 if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
1187 if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
1188 }
1189
1190 /* Main Search Loop */
1191 while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */
1192 size_t mLength;
1193 size_t const h2 = ZSTD_hashPtr(ip, hBitsL, 8);
1194 size_t const h = ZSTD_hashPtr(ip, hBitsS, mls);
1195 U32 const current = (U32)(ip-base);
1196 U32 const matchIndexL = hashLong[h2];
1197 U32 const matchIndexS = hashSmall[h];
1198 const BYTE* matchLong = base + matchIndexL;
1199 const BYTE* match = base + matchIndexS;
1200 hashLong[h2] = hashSmall[h] = current; /* update hash tables */
1201
1202 if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) { /* note : by construction, offset_1 <= current */
1203 mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
1204 ip++;
1205 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
1206 } else {
1207 U32 offset;
1208 if ( (matchIndexL > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip)) ) {
1209 mLength = ZSTD_count(ip+8, matchLong+8, iend) + 8;
1210 offset = (U32)(ip-matchLong);
1211 while (((ip>anchor) & (matchLong>lowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
1212 } else if ( (matchIndexS > lowestIndex) && (MEM_read32(match) == MEM_read32(ip)) ) {
1213 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
1214 U32 const matchIndex3 = hashLong[h3];
1215 const BYTE* match3 = base + matchIndex3;
1216 hashLong[h3] = current + 1;
1217 if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
1218 mLength = ZSTD_count(ip+9, match3+8, iend) + 8;
1219 ip++;
1220 offset = (U32)(ip-match3);
1221 while (((ip>anchor) & (match3>lowest)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
1222 } else {
1223 mLength = ZSTD_count(ip+4, match+4, iend) + 4;
1224 offset = (U32)(ip-match);
1225 while (((ip>anchor) & (match>lowest)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
1226 }
1227 } else {
1228 ip += ((ip-anchor) >> g_searchStrength) + 1;
1229 continue;
1230 }
1231
1232 offset_2 = offset_1;
1233 offset_1 = offset;
1234
1235 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
1236 }
1237
1238 /* match found */
1239 ip += mLength;
1240 anchor = ip;
1241
1242 if (ip <= ilimit) {
1243 /* Fill Table */
1244 hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] =
1245 hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */
1246 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] =
1247 hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
1248
1249 /* check immediate repcode */
1250 while ( (ip <= ilimit)
1251 && ( (offset_2>0)
1252 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
1253 /* store sequence */
1254 size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
1255 { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
1256 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
1257 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
1258 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, rLength-MINMATCH);
1259 ip += rLength;
1260 anchor = ip;
1261 continue; /* faster when present ... (?) */
1262 } } }
1263
1264 /* save reps for next block */
1265 cctx->savedRep[0] = offset_1 ? offset_1 : offsetSaved;
1266 cctx->savedRep[1] = offset_2 ? offset_2 : offsetSaved;
1267
1268 /* Last Literals */
1269 { size_t const lastLLSize = iend - anchor;
1270 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1271 seqStorePtr->lit += lastLLSize;
1272 }
1273 }
1274
1275
1276 static void ZSTD_compressBlock_doubleFast(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
1277 {
1278 const U32 mls = ctx->params.cParams.searchLength;
1279 switch(mls)
1280 {
1281 default:
1282 case 4 :
1283 ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 4); return;
1284 case 5 :
1285 ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 5); return;
1286 case 6 :
1287 ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 6); return;
1288 case 7 :
1289 ZSTD_compressBlock_doubleFast_generic(ctx, src, srcSize, 7); return;
1290 }
1291 }
1292
1293
1294 static void ZSTD_compressBlock_doubleFast_extDict_generic(ZSTD_CCtx* ctx,
1295 const void* src, size_t srcSize,
1296 const U32 mls)
1297 {
1298 U32* const hashLong = ctx->hashTable;
1299 U32 const hBitsL = ctx->params.cParams.hashLog;
1300 U32* const hashSmall = ctx->chainTable;
1301 U32 const hBitsS = ctx->params.cParams.chainLog;
1302 seqStore_t* seqStorePtr = &(ctx->seqStore);
1303 const BYTE* const base = ctx->base;
1304 const BYTE* const dictBase = ctx->dictBase;
1305 const BYTE* const istart = (const BYTE*)src;
1306 const BYTE* ip = istart;
1307 const BYTE* anchor = istart;
1308 const U32 lowestIndex = ctx->lowLimit;
1309 const BYTE* const dictStart = dictBase + lowestIndex;
1310 const U32 dictLimit = ctx->dictLimit;
1311 const BYTE* const lowPrefixPtr = base + dictLimit;
1312 const BYTE* const dictEnd = dictBase + dictLimit;
1313 const BYTE* const iend = istart + srcSize;
1314 const BYTE* const ilimit = iend - 8;
1315 U32 offset_1=ctx->rep[0], offset_2=ctx->rep[1];
1316
1317 /* Search Loop */
1318 while (ip < ilimit) { /* < instead of <=, because (ip+1) */
1319 const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls);
1320 const U32 matchIndex = hashSmall[hSmall];
1321 const BYTE* matchBase = matchIndex < dictLimit ? dictBase : base;
1322 const BYTE* match = matchBase + matchIndex;
1323
1324 const size_t hLong = ZSTD_hashPtr(ip, hBitsL, 8);
1325 const U32 matchLongIndex = hashLong[hLong];
1326 const BYTE* matchLongBase = matchLongIndex < dictLimit ? dictBase : base;
1327 const BYTE* matchLong = matchLongBase + matchLongIndex;
1328
1329 const U32 current = (U32)(ip-base);
1330 const U32 repIndex = current + 1 - offset_1; /* offset_1 expected <= current +1 */
1331 const BYTE* repBase = repIndex < dictLimit ? dictBase : base;
1332 const BYTE* repMatch = repBase + repIndex;
1333 size_t mLength;
1334 hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
1335
1336 if ( (((U32)((dictLimit-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > lowestIndex))
1337 && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
1338 const BYTE* repMatchEnd = repIndex < dictLimit ? dictEnd : iend;
1339 mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, lowPrefixPtr) + 4;
1340 ip++;
1341 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, 0, mLength-MINMATCH);
1342 } else {
1343 if ((matchLongIndex > lowestIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
1344 const BYTE* matchEnd = matchLongIndex < dictLimit ? dictEnd : iend;
1345 const BYTE* lowMatchPtr = matchLongIndex < dictLimit ? dictStart : lowPrefixPtr;
1346 U32 offset;
1347 mLength = ZSTD_count_2segments(ip+8, matchLong+8, iend, matchEnd, lowPrefixPtr) + 8;
1348 offset = current - matchLongIndex;
1349 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
1350 offset_2 = offset_1;
1351 offset_1 = offset;
1352 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
1353
1354 } else if ((matchIndex > lowestIndex) && (MEM_read32(match) == MEM_read32(ip))) {
1355 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
1356 U32 const matchIndex3 = hashLong[h3];
1357 const BYTE* const match3Base = matchIndex3 < dictLimit ? dictBase : base;
1358 const BYTE* match3 = match3Base + matchIndex3;
1359 U32 offset;
1360 hashLong[h3] = current + 1;
1361 if ( (matchIndex3 > lowestIndex) && (MEM_read64(match3) == MEM_read64(ip+1)) ) {
1362 const BYTE* matchEnd = matchIndex3 < dictLimit ? dictEnd : iend;
1363 const BYTE* lowMatchPtr = matchIndex3 < dictLimit ? dictStart : lowPrefixPtr;
1364 mLength = ZSTD_count_2segments(ip+9, match3+8, iend, matchEnd, lowPrefixPtr) + 8;
1365 ip++;
1366 offset = current+1 - matchIndex3;
1367 while (((ip>anchor) & (match3>lowMatchPtr)) && (ip[-1] == match3[-1])) { ip--; match3--; mLength++; } /* catch up */
1368 } else {
1369 const BYTE* matchEnd = matchIndex < dictLimit ? dictEnd : iend;
1370 const BYTE* lowMatchPtr = matchIndex < dictLimit ? dictStart : lowPrefixPtr;
1371 mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, lowPrefixPtr) + 4;
1372 offset = current - matchIndex;
1373 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
1374 }
1375 offset_2 = offset_1;
1376 offset_1 = offset;
1377 ZSTD_storeSeq(seqStorePtr, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
1378
1379 } else {
1380 ip += ((ip-anchor) >> g_searchStrength) + 1;
1381 continue;
1382 } }
1383
1384 /* found a match : store it */
1385 ip += mLength;
1386 anchor = ip;
1387
1388 if (ip <= ilimit) {
1389 /* Fill Table */
1390 hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2;
1391 hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2;
1392 hashSmall[ZSTD_hashPtr(ip-2, hBitsS, mls)] = (U32)(ip-2-base);
1393 hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
1394 /* check immediate repcode */
1395 while (ip <= ilimit) {
1396 U32 const current2 = (U32)(ip-base);
1397 U32 const repIndex2 = current2 - offset_2;
1398 const BYTE* repMatch2 = repIndex2 < dictLimit ? dictBase + repIndex2 : base + repIndex2;
1399 if ( (((U32)((dictLimit-1) - repIndex2) >= 3) & (repIndex2 > lowestIndex)) /* intentional overflow */
1400 && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
1401 const BYTE* const repEnd2 = repIndex2 < dictLimit ? dictEnd : iend;
1402 size_t const repLength2 = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch2+EQUAL_READ32, iend, repEnd2, lowPrefixPtr) + EQUAL_READ32;
1403 U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */
1404 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, repLength2-MINMATCH);
1405 hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
1406 hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
1407 ip += repLength2;
1408 anchor = ip;
1409 continue;
1410 }
1411 break;
1412 } } }
1413
1414 /* save reps for next block */
1415 ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
1416
1417 /* Last Literals */
1418 { size_t const lastLLSize = iend - anchor;
1419 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1420 seqStorePtr->lit += lastLLSize;
1421 }
1422 }
1423
1424
1425 static void ZSTD_compressBlock_doubleFast_extDict(ZSTD_CCtx* ctx,
1426 const void* src, size_t srcSize)
1427 {
1428 U32 const mls = ctx->params.cParams.searchLength;
1429 switch(mls)
1430 {
1431 default:
1432 case 4 :
1433 ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 4); return;
1434 case 5 :
1435 ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 5); return;
1436 case 6 :
1437 ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 6); return;
1438 case 7 :
1439 ZSTD_compressBlock_doubleFast_extDict_generic(ctx, src, srcSize, 7); return;
1440 }
1441 }
1442
1443
1444 /*-*************************************
1445 * Binary Tree search
1446 ***************************************/
1447 /** ZSTD_insertBt1() : add one or multiple positions to tree.
1448 * ip : assumed <= iend-8 .
1449 * @return : nb of positions added */
1450 static U32 ZSTD_insertBt1(ZSTD_CCtx* zc, const BYTE* const ip, const U32 mls, const BYTE* const iend, U32 nbCompares,
1451 U32 extDict)
1452 {
1453 U32* const hashTable = zc->hashTable;
1454 U32 const hashLog = zc->params.cParams.hashLog;
1455 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
1456 U32* const bt = zc->chainTable;
1457 U32 const btLog = zc->params.cParams.chainLog - 1;
1458 U32 const btMask = (1 << btLog) - 1;
1459 U32 matchIndex = hashTable[h];
1460 size_t commonLengthSmaller=0, commonLengthLarger=0;
1461 const BYTE* const base = zc->base;
1462 const BYTE* const dictBase = zc->dictBase;
1463 const U32 dictLimit = zc->dictLimit;
1464 const BYTE* const dictEnd = dictBase + dictLimit;
1465 const BYTE* const prefixStart = base + dictLimit;
1466 const BYTE* match;
1467 const U32 current = (U32)(ip-base);
1468 const U32 btLow = btMask >= current ? 0 : current - btMask;
1469 U32* smallerPtr = bt + 2*(current&btMask);
1470 U32* largerPtr = smallerPtr + 1;
1471 U32 dummy32; /* to be nullified at the end */
1472 U32 const windowLow = zc->lowLimit;
1473 U32 matchEndIdx = current+8;
1474 size_t bestLength = 8;
1475 #ifdef ZSTD_C_PREDICT
1476 U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0);
1477 U32 predictedLarge = *(bt + 2*((current-1)&btMask) + 1);
1478 predictedSmall += (predictedSmall>0);
1479 predictedLarge += (predictedLarge>0);
1480 #endif /* ZSTD_C_PREDICT */
1481
1482 hashTable[h] = current; /* Update Hash Table */
1483
1484 while (nbCompares-- && (matchIndex > windowLow)) {
1485 U32* nextPtr = bt + 2*(matchIndex & btMask);
1486 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
1487 #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */
1488 const U32* predictPtr = bt + 2*((matchIndex-1) & btMask); /* written this way, as bt is a roll buffer */
1489 if (matchIndex == predictedSmall) {
1490 /* no need to check length, result known */
1491 *smallerPtr = matchIndex;
1492 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
1493 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
1494 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
1495 predictedSmall = predictPtr[1] + (predictPtr[1]>0);
1496 continue;
1497 }
1498 if (matchIndex == predictedLarge) {
1499 *largerPtr = matchIndex;
1500 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
1501 largerPtr = nextPtr;
1502 matchIndex = nextPtr[0];
1503 predictedLarge = predictPtr[0] + (predictPtr[0]>0);
1504 continue;
1505 }
1506 #endif
1507 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
1508 match = base + matchIndex;
1509 if (match[matchLength] == ip[matchLength])
1510 matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
1511 } else {
1512 match = dictBase + matchIndex;
1513 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
1514 if (matchIndex+matchLength >= dictLimit)
1515 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
1516 }
1517
1518 if (matchLength > bestLength) {
1519 bestLength = matchLength;
1520 if (matchLength > matchEndIdx - matchIndex)
1521 matchEndIdx = matchIndex + (U32)matchLength;
1522 }
1523
1524 if (ip+matchLength == iend) /* equal : no way to know if inf or sup */
1525 break; /* drop , to guarantee consistency ; miss a bit of compression, but other solutions can corrupt the tree */
1526
1527 if (match[matchLength] < ip[matchLength]) { /* necessarily within correct buffer */
1528 /* match is smaller than current */
1529 *smallerPtr = matchIndex; /* update smaller idx */
1530 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
1531 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
1532 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
1533 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
1534 } else {
1535 /* match is larger than current */
1536 *largerPtr = matchIndex;
1537 commonLengthLarger = matchLength;
1538 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
1539 largerPtr = nextPtr;
1540 matchIndex = nextPtr[0];
1541 } }
1542
1543 *smallerPtr = *largerPtr = 0;
1544 if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */
1545 if (matchEndIdx > current + 8) return matchEndIdx - current - 8;
1546 return 1;
1547 }
1548
1549
1550 static size_t ZSTD_insertBtAndFindBestMatch (
1551 ZSTD_CCtx* zc,
1552 const BYTE* const ip, const BYTE* const iend,
1553 size_t* offsetPtr,
1554 U32 nbCompares, const U32 mls,
1555 U32 extDict)
1556 {
1557 U32* const hashTable = zc->hashTable;
1558 U32 const hashLog = zc->params.cParams.hashLog;
1559 size_t const h = ZSTD_hashPtr(ip, hashLog, mls);
1560 U32* const bt = zc->chainTable;
1561 U32 const btLog = zc->params.cParams.chainLog - 1;
1562 U32 const btMask = (1 << btLog) - 1;
1563 U32 matchIndex = hashTable[h];
1564 size_t commonLengthSmaller=0, commonLengthLarger=0;
1565 const BYTE* const base = zc->base;
1566 const BYTE* const dictBase = zc->dictBase;
1567 const U32 dictLimit = zc->dictLimit;
1568 const BYTE* const dictEnd = dictBase + dictLimit;
1569 const BYTE* const prefixStart = base + dictLimit;
1570 const U32 current = (U32)(ip-base);
1571 const U32 btLow = btMask >= current ? 0 : current - btMask;
1572 const U32 windowLow = zc->lowLimit;
1573 U32* smallerPtr = bt + 2*(current&btMask);
1574 U32* largerPtr = bt + 2*(current&btMask) + 1;
1575 U32 matchEndIdx = current+8;
1576 U32 dummy32; /* to be nullified at the end */
1577 size_t bestLength = 0;
1578
1579 hashTable[h] = current; /* Update Hash Table */
1580
1581 while (nbCompares-- && (matchIndex > windowLow)) {
1582 U32* nextPtr = bt + 2*(matchIndex & btMask);
1583 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
1584 const BYTE* match;
1585
1586 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
1587 match = base + matchIndex;
1588 if (match[matchLength] == ip[matchLength])
1589 matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iend) +1;
1590 } else {
1591 match = dictBase + matchIndex;
1592 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iend, dictEnd, prefixStart);
1593 if (matchIndex+matchLength >= dictLimit)
1594 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
1595 }
1596
1597 if (matchLength > bestLength) {
1598 if (matchLength > matchEndIdx - matchIndex)
1599 matchEndIdx = matchIndex + (U32)matchLength;
1600 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(current-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
1601 bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + current - matchIndex;
1602 if (ip+matchLength == iend) /* equal : no way to know if inf or sup */
1603 break; /* drop, to guarantee consistency (miss a little bit of compression) */
1604 }
1605
1606 if (match[matchLength] < ip[matchLength]) {
1607 /* match is smaller than current */
1608 *smallerPtr = matchIndex; /* update smaller idx */
1609 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
1610 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
1611 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
1612 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
1613 } else {
1614 /* match is larger than current */
1615 *largerPtr = matchIndex;
1616 commonLengthLarger = matchLength;
1617 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
1618 largerPtr = nextPtr;
1619 matchIndex = nextPtr[0];
1620 } }
1621
1622 *smallerPtr = *largerPtr = 0;
1623
1624 zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;
1625 return bestLength;
1626 }
1627
1628
1629 static void ZSTD_updateTree(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
1630 {
1631 const BYTE* const base = zc->base;
1632 const U32 target = (U32)(ip - base);
1633 U32 idx = zc->nextToUpdate;
1634
1635 while(idx < target)
1636 idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 0);
1637 }
1638
1639 /** ZSTD_BtFindBestMatch() : Tree updater, providing best match */
1640 static size_t ZSTD_BtFindBestMatch (
1641 ZSTD_CCtx* zc,
1642 const BYTE* const ip, const BYTE* const iLimit,
1643 size_t* offsetPtr,
1644 const U32 maxNbAttempts, const U32 mls)
1645 {
1646 if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
1647 ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
1648 return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 0);
1649 }
1650
1651
1652 static size_t ZSTD_BtFindBestMatch_selectMLS (
1653 ZSTD_CCtx* zc, /* Index table will be updated */
1654 const BYTE* ip, const BYTE* const iLimit,
1655 size_t* offsetPtr,
1656 const U32 maxNbAttempts, const U32 matchLengthSearch)
1657 {
1658 switch(matchLengthSearch)
1659 {
1660 default :
1661 case 4 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
1662 case 5 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
1663 case 6 : return ZSTD_BtFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
1664 }
1665 }
1666
1667
1668 static void ZSTD_updateTree_extDict(ZSTD_CCtx* zc, const BYTE* const ip, const BYTE* const iend, const U32 nbCompares, const U32 mls)
1669 {
1670 const BYTE* const base = zc->base;
1671 const U32 target = (U32)(ip - base);
1672 U32 idx = zc->nextToUpdate;
1673
1674 while (idx < target) idx += ZSTD_insertBt1(zc, base+idx, mls, iend, nbCompares, 1);
1675 }
1676
1677
1678 /** Tree updater, providing best match */
1679 static size_t ZSTD_BtFindBestMatch_extDict (
1680 ZSTD_CCtx* zc,
1681 const BYTE* const ip, const BYTE* const iLimit,
1682 size_t* offsetPtr,
1683 const U32 maxNbAttempts, const U32 mls)
1684 {
1685 if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
1686 ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
1687 return ZSTD_insertBtAndFindBestMatch(zc, ip, iLimit, offsetPtr, maxNbAttempts, mls, 1);
1688 }
1689
1690
1691 static size_t ZSTD_BtFindBestMatch_selectMLS_extDict (
1692 ZSTD_CCtx* zc, /* Index table will be updated */
1693 const BYTE* ip, const BYTE* const iLimit,
1694 size_t* offsetPtr,
1695 const U32 maxNbAttempts, const U32 matchLengthSearch)
1696 {
1697 switch(matchLengthSearch)
1698 {
1699 default :
1700 case 4 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4);
1701 case 5 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5);
1702 case 6 : return ZSTD_BtFindBestMatch_extDict(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6);
1703 }
1704 }
1705
1706
1707
1708 /* *********************************
1709 * Hash Chain
1710 ***********************************/
1711 #define NEXT_IN_CHAIN(d, mask) chainTable[(d) & mask]
1712
1713 /* Update chains up to ip (excluded)
1714 Assumption : always within prefix (ie. not within extDict) */
1715 FORCE_INLINE
1716 U32 ZSTD_insertAndFindFirstIndex (ZSTD_CCtx* zc, const BYTE* ip, U32 mls)
1717 {
1718 U32* const hashTable = zc->hashTable;
1719 const U32 hashLog = zc->params.cParams.hashLog;
1720 U32* const chainTable = zc->chainTable;
1721 const U32 chainMask = (1 << zc->params.cParams.chainLog) - 1;
1722 const BYTE* const base = zc->base;
1723 const U32 target = (U32)(ip - base);
1724 U32 idx = zc->nextToUpdate;
1725
1726 while(idx < target) { /* catch up */
1727 size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
1728 NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
1729 hashTable[h] = idx;
1730 idx++;
1731 }
1732
1733 zc->nextToUpdate = target;
1734 return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
1735 }
1736
1737
1738
1739 FORCE_INLINE /* inlining is important to hardwire a hot branch (template emulation) */
1740 size_t ZSTD_HcFindBestMatch_generic (
1741 ZSTD_CCtx* zc, /* Index table will be updated */
1742 const BYTE* const ip, const BYTE* const iLimit,
1743 size_t* offsetPtr,
1744 const U32 maxNbAttempts, const U32 mls, const U32 extDict)
1745 {
1746 U32* const chainTable = zc->chainTable;
1747 const U32 chainSize = (1 << zc->params.cParams.chainLog);
1748 const U32 chainMask = chainSize-1;
1749 const BYTE* const base = zc->base;
1750 const BYTE* const dictBase = zc->dictBase;
1751 const U32 dictLimit = zc->dictLimit;
1752 const BYTE* const prefixStart = base + dictLimit;
1753 const BYTE* const dictEnd = dictBase + dictLimit;
1754 const U32 lowLimit = zc->lowLimit;
1755 const U32 current = (U32)(ip-base);
1756 const U32 minChain = current > chainSize ? current - chainSize : 0;
1757 int nbAttempts=maxNbAttempts;
1758 size_t ml=EQUAL_READ32-1;
1759
1760 /* HC4 match finder */
1761 U32 matchIndex = ZSTD_insertAndFindFirstIndex (zc, ip, mls);
1762
1763 for ( ; (matchIndex>lowLimit) & (nbAttempts>0) ; nbAttempts--) {
1764 const BYTE* match;
1765 size_t currentMl=0;
1766 if ((!extDict) || matchIndex >= dictLimit) {
1767 match = base + matchIndex;
1768 if (match[ml] == ip[ml]) /* potentially better */
1769 currentMl = ZSTD_count(ip, match, iLimit);
1770 } else {
1771 match = dictBase + matchIndex;
1772 if (MEM_read32(match) == MEM_read32(ip)) /* assumption : matchIndex <= dictLimit-4 (by table construction) */
1773 currentMl = ZSTD_count_2segments(ip+EQUAL_READ32, match+EQUAL_READ32, iLimit, dictEnd, prefixStart) + EQUAL_READ32;
1774 }
1775
1776 /* save best solution */
1777 if (currentMl > ml) { ml = currentMl; *offsetPtr = current - matchIndex + ZSTD_REP_MOVE; if (ip+currentMl == iLimit) break; /* best possible, and avoid read overflow*/ }
1778
1779 if (matchIndex <= minChain) break;
1780 matchIndex = NEXT_IN_CHAIN(matchIndex, chainMask);
1781 }
1782
1783 return ml;
1784 }
1785
1786
1787 FORCE_INLINE size_t ZSTD_HcFindBestMatch_selectMLS (
1788 ZSTD_CCtx* zc,
1789 const BYTE* ip, const BYTE* const iLimit,
1790 size_t* offsetPtr,
1791 const U32 maxNbAttempts, const U32 matchLengthSearch)
1792 {
1793 switch(matchLengthSearch)
1794 {
1795 default :
1796 case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 0);
1797 case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 0);
1798 case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 0);
1799 }
1800 }
1801
1802
1803 FORCE_INLINE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
1804 ZSTD_CCtx* zc,
1805 const BYTE* ip, const BYTE* const iLimit,
1806 size_t* offsetPtr,
1807 const U32 maxNbAttempts, const U32 matchLengthSearch)
1808 {
1809 switch(matchLengthSearch)
1810 {
1811 default :
1812 case 4 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 4, 1);
1813 case 5 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 5, 1);
1814 case 6 : return ZSTD_HcFindBestMatch_generic(zc, ip, iLimit, offsetPtr, maxNbAttempts, 6, 1);
1815 }
1816 }
1817
1818
1819 /* *******************************
1820 * Common parser - lazy strategy
1821 *********************************/
1822 FORCE_INLINE
1823 void ZSTD_compressBlock_lazy_generic(ZSTD_CCtx* ctx,
1824 const void* src, size_t srcSize,
1825 const U32 searchMethod, const U32 depth)
1826 {
1827 seqStore_t* seqStorePtr = &(ctx->seqStore);
1828 const BYTE* const istart = (const BYTE*)src;
1829 const BYTE* ip = istart;
1830 const BYTE* anchor = istart;
1831 const BYTE* const iend = istart + srcSize;
1832 const BYTE* const ilimit = iend - 8;
1833 const BYTE* const base = ctx->base + ctx->dictLimit;
1834
1835 U32 const maxSearches = 1 << ctx->params.cParams.searchLog;
1836 U32 const mls = ctx->params.cParams.searchLength;
1837
1838 typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
1839 size_t* offsetPtr,
1840 U32 maxNbAttempts, U32 matchLengthSearch);
1841 searchMax_f const searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS;
1842 U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1], savedOffset=0;
1843
1844 /* init */
1845 ip += (ip==base);
1846 ctx->nextToUpdate3 = ctx->nextToUpdate;
1847 { U32 const maxRep = (U32)(ip-base);
1848 if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0;
1849 if (offset_1 > maxRep) savedOffset = offset_1, offset_1 = 0;
1850 }
1851
1852 /* Match Loop */
1853 while (ip < ilimit) {
1854 size_t matchLength=0;
1855 size_t offset=0;
1856 const BYTE* start=ip+1;
1857
1858 /* check repCode */
1859 if ((offset_1>0) & (MEM_read32(ip+1) == MEM_read32(ip+1 - offset_1))) {
1860 /* repcode : we take it */
1861 matchLength = ZSTD_count(ip+1+EQUAL_READ32, ip+1+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
1862 if (depth==0) goto _storeSequence;
1863 }
1864
1865 /* first search (depth 0) */
1866 { size_t offsetFound = 99999999;
1867 size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
1868 if (ml2 > matchLength)
1869 matchLength = ml2, start = ip, offset=offsetFound;
1870 }
1871
1872 if (matchLength < EQUAL_READ32) {
1873 ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
1874 continue;
1875 }
1876
1877 /* let's try to find a better solution */
1878 if (depth>=1)
1879 while (ip<ilimit) {
1880 ip ++;
1881 if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
1882 size_t const mlRep = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
1883 int const gain2 = (int)(mlRep * 3);
1884 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
1885 if ((mlRep >= EQUAL_READ32) && (gain2 > gain1))
1886 matchLength = mlRep, offset = 0, start = ip;
1887 }
1888 { size_t offset2=99999999;
1889 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
1890 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
1891 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
1892 if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
1893 matchLength = ml2, offset = offset2, start = ip;
1894 continue; /* search a better one */
1895 } }
1896
1897 /* let's find an even better one */
1898 if ((depth==2) && (ip<ilimit)) {
1899 ip ++;
1900 if ((offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
1901 size_t const ml2 = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_1, iend) + EQUAL_READ32;
1902 int const gain2 = (int)(ml2 * 4);
1903 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
1904 if ((ml2 >= EQUAL_READ32) && (gain2 > gain1))
1905 matchLength = ml2, offset = 0, start = ip;
1906 }
1907 { size_t offset2=99999999;
1908 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
1909 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
1910 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
1911 if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
1912 matchLength = ml2, offset = offset2, start = ip;
1913 continue;
1914 } } }
1915 break; /* nothing found : store previous solution */
1916 }
1917
1918 /* catch up */
1919 if (offset) {
1920 while ((start>anchor) && (start>base+offset-ZSTD_REP_MOVE) && (start[-1] == start[-1-offset+ZSTD_REP_MOVE])) /* only search for offset within prefix */
1921 { start--; matchLength++; }
1922 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
1923 }
1924
1925 /* store sequence */
1926 _storeSequence:
1927 { size_t const litLength = start - anchor;
1928 ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
1929 anchor = ip = start + matchLength;
1930 }
1931
1932 /* check immediate repcode */
1933 while ( (ip <= ilimit)
1934 && ((offset_2>0)
1935 & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
1936 /* store sequence */
1937 matchLength = ZSTD_count(ip+EQUAL_READ32, ip+EQUAL_READ32-offset_2, iend) + EQUAL_READ32;
1938 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
1939 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
1940 ip += matchLength;
1941 anchor = ip;
1942 continue; /* faster when present ... (?) */
1943 } }
1944
1945 /* Save reps for next block */
1946 ctx->savedRep[0] = offset_1 ? offset_1 : savedOffset;
1947 ctx->savedRep[1] = offset_2 ? offset_2 : savedOffset;
1948
1949 /* Last Literals */
1950 { size_t const lastLLSize = iend - anchor;
1951 memcpy(seqStorePtr->lit, anchor, lastLLSize);
1952 seqStorePtr->lit += lastLLSize;
1953 }
1954 }
1955
1956
1957 static void ZSTD_compressBlock_btlazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
1958 {
1959 ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 1, 2);
1960 }
1961
1962 static void ZSTD_compressBlock_lazy2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
1963 {
1964 ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 2);
1965 }
1966
1967 static void ZSTD_compressBlock_lazy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
1968 {
1969 ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 1);
1970 }
1971
1972 static void ZSTD_compressBlock_greedy(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
1973 {
1974 ZSTD_compressBlock_lazy_generic(ctx, src, srcSize, 0, 0);
1975 }
1976
1977
1978 FORCE_INLINE
1979 void ZSTD_compressBlock_lazy_extDict_generic(ZSTD_CCtx* ctx,
1980 const void* src, size_t srcSize,
1981 const U32 searchMethod, const U32 depth)
1982 {
1983 seqStore_t* seqStorePtr = &(ctx->seqStore);
1984 const BYTE* const istart = (const BYTE*)src;
1985 const BYTE* ip = istart;
1986 const BYTE* anchor = istart;
1987 const BYTE* const iend = istart + srcSize;
1988 const BYTE* const ilimit = iend - 8;
1989 const BYTE* const base = ctx->base;
1990 const U32 dictLimit = ctx->dictLimit;
1991 const U32 lowestIndex = ctx->lowLimit;
1992 const BYTE* const prefixStart = base + dictLimit;
1993 const BYTE* const dictBase = ctx->dictBase;
1994 const BYTE* const dictEnd = dictBase + dictLimit;
1995 const BYTE* const dictStart = dictBase + ctx->lowLimit;
1996
1997 const U32 maxSearches = 1 << ctx->params.cParams.searchLog;
1998 const U32 mls = ctx->params.cParams.searchLength;
1999
2000 typedef size_t (*searchMax_f)(ZSTD_CCtx* zc, const BYTE* ip, const BYTE* iLimit,
2001 size_t* offsetPtr,
2002 U32 maxNbAttempts, U32 matchLengthSearch);
2003 searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_selectMLS_extDict : ZSTD_HcFindBestMatch_extDict_selectMLS;
2004
2005 U32 offset_1 = ctx->rep[0], offset_2 = ctx->rep[1];
2006
2007 /* init */
2008 ctx->nextToUpdate3 = ctx->nextToUpdate;
2009 ip += (ip == prefixStart);
2010
2011 /* Match Loop */
2012 while (ip < ilimit) {
2013 size_t matchLength=0;
2014 size_t offset=0;
2015 const BYTE* start=ip+1;
2016 U32 current = (U32)(ip-base);
2017
2018 /* check repCode */
2019 { const U32 repIndex = (U32)(current+1 - offset_1);
2020 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2021 const BYTE* const repMatch = repBase + repIndex;
2022 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
2023 if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
2024 /* repcode detected we should take it */
2025 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
2026 matchLength = ZSTD_count_2segments(ip+1+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
2027 if (depth==0) goto _storeSequence;
2028 } }
2029
2030 /* first search (depth 0) */
2031 { size_t offsetFound = 99999999;
2032 size_t const ml2 = searchMax(ctx, ip, iend, &offsetFound, maxSearches, mls);
2033 if (ml2 > matchLength)
2034 matchLength = ml2, start = ip, offset=offsetFound;
2035 }
2036
2037 if (matchLength < EQUAL_READ32) {
2038 ip += ((ip-anchor) >> g_searchStrength) + 1; /* jump faster over incompressible sections */
2039 continue;
2040 }
2041
2042 /* let's try to find a better solution */
2043 if (depth>=1)
2044 while (ip<ilimit) {
2045 ip ++;
2046 current++;
2047 /* check repCode */
2048 if (offset) {
2049 const U32 repIndex = (U32)(current - offset_1);
2050 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2051 const BYTE* const repMatch = repBase + repIndex;
2052 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
2053 if (MEM_read32(ip) == MEM_read32(repMatch)) {
2054 /* repcode detected */
2055 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
2056 size_t const repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
2057 int const gain2 = (int)(repLength * 3);
2058 int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
2059 if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
2060 matchLength = repLength, offset = 0, start = ip;
2061 } }
2062
2063 /* search match, depth 1 */
2064 { size_t offset2=99999999;
2065 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
2066 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
2067 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
2068 if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
2069 matchLength = ml2, offset = offset2, start = ip;
2070 continue; /* search a better one */
2071 } }
2072
2073 /* let's find an even better one */
2074 if ((depth==2) && (ip<ilimit)) {
2075 ip ++;
2076 current++;
2077 /* check repCode */
2078 if (offset) {
2079 const U32 repIndex = (U32)(current - offset_1);
2080 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2081 const BYTE* const repMatch = repBase + repIndex;
2082 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
2083 if (MEM_read32(ip) == MEM_read32(repMatch)) {
2084 /* repcode detected */
2085 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
2086 size_t repLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
2087 int gain2 = (int)(repLength * 4);
2088 int gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
2089 if ((repLength >= EQUAL_READ32) && (gain2 > gain1))
2090 matchLength = repLength, offset = 0, start = ip;
2091 } }
2092
2093 /* search match, depth 2 */
2094 { size_t offset2=99999999;
2095 size_t const ml2 = searchMax(ctx, ip, iend, &offset2, maxSearches, mls);
2096 int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1)); /* raw approx */
2097 int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
2098 if ((ml2 >= EQUAL_READ32) && (gain2 > gain1)) {
2099 matchLength = ml2, offset = offset2, start = ip;
2100 continue;
2101 } } }
2102 break; /* nothing found : store previous solution */
2103 }
2104
2105 /* catch up */
2106 if (offset) {
2107 U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
2108 const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
2109 const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
2110 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; } /* catch up */
2111 offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
2112 }
2113
2114 /* store sequence */
2115 _storeSequence:
2116 { size_t const litLength = start - anchor;
2117 ZSTD_storeSeq(seqStorePtr, litLength, anchor, (U32)offset, matchLength-MINMATCH);
2118 anchor = ip = start + matchLength;
2119 }
2120
2121 /* check immediate repcode */
2122 while (ip <= ilimit) {
2123 const U32 repIndex = (U32)((ip-base) - offset_2);
2124 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
2125 const BYTE* const repMatch = repBase + repIndex;
2126 if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > lowestIndex)) /* intentional overflow */
2127 if (MEM_read32(ip) == MEM_read32(repMatch)) {
2128 /* repcode detected we should take it */
2129 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
2130 matchLength = ZSTD_count_2segments(ip+EQUAL_READ32, repMatch+EQUAL_READ32, iend, repEnd, prefixStart) + EQUAL_READ32;
2131 offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap offset history */
2132 ZSTD_storeSeq(seqStorePtr, 0, anchor, 0, matchLength-MINMATCH);
2133 ip += matchLength;
2134 anchor = ip;
2135 continue; /* faster when present ... (?) */
2136 }
2137 break;
2138 } }
2139
2140 /* Save reps for next block */
2141 ctx->savedRep[0] = offset_1; ctx->savedRep[1] = offset_2;
2142
2143 /* Last Literals */
2144 { size_t const lastLLSize = iend - anchor;
2145 memcpy(seqStorePtr->lit, anchor, lastLLSize);
2146 seqStorePtr->lit += lastLLSize;
2147 }
2148 }
2149
2150
2151 void ZSTD_compressBlock_greedy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2152 {
2153 ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 0);
2154 }
2155
2156 static void ZSTD_compressBlock_lazy_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2157 {
2158 ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 1);
2159 }
2160
2161 static void ZSTD_compressBlock_lazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2162 {
2163 ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 0, 2);
2164 }
2165
2166 static void ZSTD_compressBlock_btlazy2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2167 {
2168 ZSTD_compressBlock_lazy_extDict_generic(ctx, src, srcSize, 1, 2);
2169 }
2170
2171
2172 /* The optimal parser */
2173 #include "zstd_opt.h"
2174
2175 static void ZSTD_compressBlock_btopt(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2176 {
2177 #ifdef ZSTD_OPT_H_91842398743
2178 ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 0);
2179 #else
2180 (void)ctx; (void)src; (void)srcSize;
2181 return;
2182 #endif
2183 }
2184
2185 static void ZSTD_compressBlock_btopt2(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2186 {
2187 #ifdef ZSTD_OPT_H_91842398743
2188 ZSTD_compressBlock_opt_generic(ctx, src, srcSize, 1);
2189 #else
2190 (void)ctx; (void)src; (void)srcSize;
2191 return;
2192 #endif
2193 }
2194
2195 static void ZSTD_compressBlock_btopt_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2196 {
2197 #ifdef ZSTD_OPT_H_91842398743
2198 ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 0);
2199 #else
2200 (void)ctx; (void)src; (void)srcSize;
2201 return;
2202 #endif
2203 }
2204
2205 static void ZSTD_compressBlock_btopt2_extDict(ZSTD_CCtx* ctx, const void* src, size_t srcSize)
2206 {
2207 #ifdef ZSTD_OPT_H_91842398743
2208 ZSTD_compressBlock_opt_extDict_generic(ctx, src, srcSize, 1);
2209 #else
2210 (void)ctx; (void)src; (void)srcSize;
2211 return;
2212 #endif
2213 }
2214
2215
2216 typedef void (*ZSTD_blockCompressor) (ZSTD_CCtx* ctx, const void* src, size_t srcSize);
2217
2218 static ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, int extDict)
2219 {
2220 static const ZSTD_blockCompressor blockCompressor[2][8] = {
2221 { ZSTD_compressBlock_fast, ZSTD_compressBlock_doubleFast, ZSTD_compressBlock_greedy, ZSTD_compressBlock_lazy, ZSTD_compressBlock_lazy2, ZSTD_compressBlock_btlazy2, ZSTD_compressBlock_btopt, ZSTD_compressBlock_btopt2 },
2222 { ZSTD_compressBlock_fast_extDict, ZSTD_compressBlock_doubleFast_extDict, ZSTD_compressBlock_greedy_extDict, ZSTD_compressBlock_lazy_extDict,ZSTD_compressBlock_lazy2_extDict, ZSTD_compressBlock_btlazy2_extDict, ZSTD_compressBlock_btopt_extDict, ZSTD_compressBlock_btopt2_extDict }
2223 };
2224
2225 return blockCompressor[extDict][(U32)strat];
2226 }
2227
2228
2229 static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
2230 {
2231 ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->params.cParams.strategy, zc->lowLimit < zc->dictLimit);
2232 const BYTE* const base = zc->base;
2233 const BYTE* const istart = (const BYTE*)src;
2234 const U32 current = (U32)(istart-base);
2235 if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) return 0; /* don't even attempt compression below a certain srcSize */
2236 ZSTD_resetSeqStore(&(zc->seqStore));
2237 if (current > zc->nextToUpdate + 384)
2238 zc->nextToUpdate = current - MIN(192, (U32)(current - zc->nextToUpdate - 384)); /* update tree not updated after finding very long rep matches */
2239 blockCompressor(zc, src, srcSize);
2240 return ZSTD_compressSequences(zc, dst, dstCapacity, srcSize);
2241 }
2242
2243
2244 /*! ZSTD_compress_generic() :
2245 * Compress a chunk of data into one or multiple blocks.
2246 * All blocks will be terminated, all input will be consumed.
2247 * Function will issue an error if there is not enough `dstCapacity` to hold the compressed content.
2248 * Frame is supposed already started (header already produced)
2249 * @return : compressed size, or an error code
2250 */
2251 static size_t ZSTD_compress_generic (ZSTD_CCtx* cctx,
2252 void* dst, size_t dstCapacity,
2253 const void* src, size_t srcSize,
2254 U32 lastFrameChunk)
2255 {
2256 size_t blockSize = cctx->blockSize;
2257 size_t remaining = srcSize;
2258 const BYTE* ip = (const BYTE*)src;
2259 BYTE* const ostart = (BYTE*)dst;
2260 BYTE* op = ostart;
2261 U32 const maxDist = 1 << cctx->params.cParams.windowLog;
2262
2263 if (cctx->params.fParams.checksumFlag && srcSize)
2264 XXH64_update(&cctx->xxhState, src, srcSize);
2265
2266 while (remaining) {
2267 U32 const lastBlock = lastFrameChunk & (blockSize >= remaining);
2268 size_t cSize;
2269
2270 if (dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE) return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */
2271 if (remaining < blockSize) blockSize = remaining;
2272
2273 /* preemptive overflow correction */
2274 if (cctx->lowLimit > (1<<30)) {
2275 U32 const btplus = (cctx->params.cParams.strategy == ZSTD_btlazy2) | (cctx->params.cParams.strategy == ZSTD_btopt) | (cctx->params.cParams.strategy == ZSTD_btopt2);
2276 U32 const chainMask = (1 << (cctx->params.cParams.chainLog - btplus)) - 1;
2277 U32 const supLog = MAX(cctx->params.cParams.chainLog, 17 /* blockSize */);
2278 U32 const newLowLimit = (cctx->lowLimit & chainMask) + (1 << supLog); /* preserve position % chainSize, ensure current-repcode doesn't underflow */
2279 U32 const correction = cctx->lowLimit - newLowLimit;
2280 ZSTD_reduceIndex(cctx, correction);
2281 cctx->base += correction;
2282 cctx->dictBase += correction;
2283 cctx->lowLimit = newLowLimit;
2284 cctx->dictLimit -= correction;
2285 if (cctx->nextToUpdate < correction) cctx->nextToUpdate = 0;
2286 else cctx->nextToUpdate -= correction;
2287 }
2288
2289 if ((U32)(ip+blockSize - cctx->base) > cctx->loadedDictEnd + maxDist) {
2290 /* enforce maxDist */
2291 U32 const newLowLimit = (U32)(ip+blockSize - cctx->base) - maxDist;
2292 if (cctx->lowLimit < newLowLimit) cctx->lowLimit = newLowLimit;
2293 if (cctx->dictLimit < cctx->lowLimit) cctx->dictLimit = cctx->lowLimit;
2294 }
2295
2296 cSize = ZSTD_compressBlock_internal(cctx, op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, ip, blockSize);
2297 if (ZSTD_isError(cSize)) return cSize;
2298
2299 if (cSize == 0) { /* block is not compressible */
2300 U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(blockSize << 3);
2301 if (blockSize + ZSTD_blockHeaderSize > dstCapacity) return ERROR(dstSize_tooSmall);
2302 MEM_writeLE32(op, cBlockHeader24); /* no pb, 4th byte will be overwritten */
2303 memcpy(op + ZSTD_blockHeaderSize, ip, blockSize);
2304 cSize = ZSTD_blockHeaderSize+blockSize;
2305 } else {
2306 U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
2307 MEM_writeLE24(op, cBlockHeader24);
2308 cSize += ZSTD_blockHeaderSize;
2309 }
2310
2311 remaining -= blockSize;
2312 dstCapacity -= cSize;
2313 ip += blockSize;
2314 op += cSize;
2315 }
2316
2317 if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending;
2318 return op-ostart;
2319 }
2320
2321
2322 static size_t ZSTD_writeFrameHeader(void* dst, size_t dstCapacity,
2323 ZSTD_parameters params, U64 pledgedSrcSize, U32 dictID)
2324 { BYTE* const op = (BYTE*)dst;
2325 U32 const dictIDSizeCode = (dictID>0) + (dictID>=256) + (dictID>=65536); /* 0-3 */
2326 U32 const checksumFlag = params.fParams.checksumFlag>0;
2327 U32 const windowSize = 1U << params.cParams.windowLog;
2328 U32 const singleSegment = params.fParams.contentSizeFlag && (windowSize > (pledgedSrcSize-1));
2329 BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3);
2330 U32 const fcsCode = params.fParams.contentSizeFlag ?
2331 (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : /* 0-3 */
2332 0;
2333 BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) );
2334 size_t pos;
2335
2336 if (dstCapacity < ZSTD_frameHeaderSize_max) return ERROR(dstSize_tooSmall);
2337
2338 MEM_writeLE32(dst, ZSTD_MAGICNUMBER);
2339 op[4] = frameHeaderDecriptionByte; pos=5;
2340 if (!singleSegment) op[pos++] = windowLogByte;
2341 switch(dictIDSizeCode)
2342 {
2343 default: /* impossible */
2344 case 0 : break;
2345 case 1 : op[pos] = (BYTE)(dictID); pos++; break;
2346 case 2 : MEM_writeLE16(op+pos, (U16)dictID); pos+=2; break;
2347 case 3 : MEM_writeLE32(op+pos, dictID); pos+=4; break;
2348 }
2349 switch(fcsCode)
2350 {
2351 default: /* impossible */
2352 case 0 : if (singleSegment) op[pos++] = (BYTE)(pledgedSrcSize); break;
2353 case 1 : MEM_writeLE16(op+pos, (U16)(pledgedSrcSize-256)); pos+=2; break;
2354 case 2 : MEM_writeLE32(op+pos, (U32)(pledgedSrcSize)); pos+=4; break;
2355 case 3 : MEM_writeLE64(op+pos, (U64)(pledgedSrcSize)); pos+=8; break;
2356 }
2357 return pos;
2358 }
2359
2360
2361 static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
2362 void* dst, size_t dstCapacity,
2363 const void* src, size_t srcSize,
2364 U32 frame, U32 lastFrameChunk)
2365 {
2366 const BYTE* const ip = (const BYTE*) src;
2367 size_t fhSize = 0;
2368
2369 if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */
2370
2371 if (frame && (cctx->stage==ZSTDcs_init)) {
2372 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, cctx->frameContentSize, cctx->dictID);
2373 if (ZSTD_isError(fhSize)) return fhSize;
2374 dstCapacity -= fhSize;
2375 dst = (char*)dst + fhSize;
2376 cctx->stage = ZSTDcs_ongoing;
2377 }
2378
2379 /* Check if blocks follow each other */
2380 if (src != cctx->nextSrc) {
2381 /* not contiguous */
2382 ptrdiff_t const delta = cctx->nextSrc - ip;
2383 cctx->lowLimit = cctx->dictLimit;
2384 cctx->dictLimit = (U32)(cctx->nextSrc - cctx->base);
2385 cctx->dictBase = cctx->base;
2386 cctx->base -= delta;
2387 cctx->nextToUpdate = cctx->dictLimit;
2388 if (cctx->dictLimit - cctx->lowLimit < HASH_READ_SIZE) cctx->lowLimit = cctx->dictLimit; /* too small extDict */
2389 }
2390
2391 /* if input and dictionary overlap : reduce dictionary (area presumed modified by input) */
2392 if ((ip+srcSize > cctx->dictBase + cctx->lowLimit) & (ip < cctx->dictBase + cctx->dictLimit)) {
2393 ptrdiff_t const highInputIdx = (ip + srcSize) - cctx->dictBase;
2394 U32 const lowLimitMax = (highInputIdx > (ptrdiff_t)cctx->dictLimit) ? cctx->dictLimit : (U32)highInputIdx;
2395 cctx->lowLimit = lowLimitMax;
2396 }
2397
2398 cctx->nextSrc = ip + srcSize;
2399
2400 { size_t const cSize = frame ?
2401 ZSTD_compress_generic (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) :
2402 ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize);
2403 if (ZSTD_isError(cSize)) return cSize;
2404 return cSize + fhSize;
2405 }
2406 }
2407
2408
2409 size_t ZSTD_compressContinue (ZSTD_CCtx* cctx,
2410 void* dst, size_t dstCapacity,
2411 const void* src, size_t srcSize)
2412 {
2413 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 0);
2414 }
2415
2416
2417 size_t ZSTD_getBlockSizeMax(ZSTD_CCtx* cctx)
2418 {
2419 return MIN (ZSTD_BLOCKSIZE_ABSOLUTEMAX, 1 << cctx->params.cParams.windowLog);
2420 }
2421
2422 size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize)
2423 {
2424 size_t const blockSizeMax = ZSTD_getBlockSizeMax(cctx);
2425 if (srcSize > blockSizeMax) return ERROR(srcSize_wrong);
2426 return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0, 0);
2427 }
2428
2429
2430 static size_t ZSTD_loadDictionaryContent(ZSTD_CCtx* zc, const void* src, size_t srcSize)
2431 {
2432 const BYTE* const ip = (const BYTE*) src;
2433 const BYTE* const iend = ip + srcSize;
2434
2435 /* input becomes current prefix */
2436 zc->lowLimit = zc->dictLimit;
2437 zc->dictLimit = (U32)(zc->nextSrc - zc->base);
2438 zc->dictBase = zc->base;
2439 zc->base += ip - zc->nextSrc;
2440 zc->nextToUpdate = zc->dictLimit;
2441 zc->loadedDictEnd = (U32)(iend - zc->base);
2442
2443 zc->nextSrc = iend;
2444 if (srcSize <= HASH_READ_SIZE) return 0;
2445
2446 switch(zc->params.cParams.strategy)
2447 {
2448 case ZSTD_fast:
2449 ZSTD_fillHashTable (zc, iend, zc->params.cParams.searchLength);
2450 break;
2451
2452 case ZSTD_dfast:
2453 ZSTD_fillDoubleHashTable (zc, iend, zc->params.cParams.searchLength);
2454 break;
2455
2456 case ZSTD_greedy:
2457 case ZSTD_lazy:
2458 case ZSTD_lazy2:
2459 ZSTD_insertAndFindFirstIndex (zc, iend-HASH_READ_SIZE, zc->params.cParams.searchLength);
2460 break;
2461
2462 case ZSTD_btlazy2:
2463 case ZSTD_btopt:
2464 case ZSTD_btopt2:
2465 ZSTD_updateTree(zc, iend-HASH_READ_SIZE, iend, 1 << zc->params.cParams.searchLog, zc->params.cParams.searchLength);
2466 break;
2467
2468 default:
2469 return ERROR(GENERIC); /* strategy doesn't exist; impossible */
2470 }
2471
2472 zc->nextToUpdate = zc->loadedDictEnd;
2473 return 0;
2474 }
2475
2476
2477 /* Dictionaries that assign zero probability to symbols that show up causes problems
2478 when FSE encoding. Refuse dictionaries that assign zero probability to symbols
2479 that we may encounter during compression.
2480 NOTE: This behavior is not standard and could be improved in the future. */
2481 static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) {
2482 U32 s;
2483 if (dictMaxSymbolValue < maxSymbolValue) return ERROR(dictionary_corrupted);
2484 for (s = 0; s <= maxSymbolValue; ++s) {
2485 if (normalizedCounter[s] == 0) return ERROR(dictionary_corrupted);
2486 }
2487 return 0;
2488 }
2489
2490
2491 /* Dictionary format :
2492 Magic == ZSTD_DICT_MAGIC (4 bytes)
2493 HUF_writeCTable(256)
2494 FSE_writeNCount(off)
2495 FSE_writeNCount(ml)
2496 FSE_writeNCount(ll)
2497 RepOffsets
2498 Dictionary content
2499 */
2500 /*! ZSTD_loadDictEntropyStats() :
2501 @return : size read from dictionary
2502 note : magic number supposed already checked */
2503 static size_t ZSTD_loadDictEntropyStats(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
2504 {
2505 const BYTE* dictPtr = (const BYTE*)dict;
2506 const BYTE* const dictEnd = dictPtr + dictSize;
2507 short offcodeNCount[MaxOff+1];
2508 unsigned offcodeMaxValue = MaxOff;
2509
2510 { size_t const hufHeaderSize = HUF_readCTable(cctx->hufTable, 255, dict, dictSize);
2511 if (HUF_isError(hufHeaderSize)) return ERROR(dictionary_corrupted);
2512 dictPtr += hufHeaderSize;
2513 }
2514
2515 { unsigned offcodeLog;
2516 size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr);
2517 if (FSE_isError(offcodeHeaderSize)) return ERROR(dictionary_corrupted);
2518 if (offcodeLog > OffFSELog) return ERROR(dictionary_corrupted);
2519 /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */
2520 CHECK_E (FSE_buildCTable(cctx->offcodeCTable, offcodeNCount, offcodeMaxValue, offcodeLog), dictionary_corrupted);
2521 dictPtr += offcodeHeaderSize;
2522 }
2523
2524 { short matchlengthNCount[MaxML+1];
2525 unsigned matchlengthMaxValue = MaxML, matchlengthLog;
2526 size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr);
2527 if (FSE_isError(matchlengthHeaderSize)) return ERROR(dictionary_corrupted);
2528 if (matchlengthLog > MLFSELog) return ERROR(dictionary_corrupted);
2529 /* Every match length code must have non-zero probability */
2530 CHECK_F (ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML));
2531 CHECK_E (FSE_buildCTable(cctx->matchlengthCTable, matchlengthNCount, matchlengthMaxValue, matchlengthLog), dictionary_corrupted);
2532 dictPtr += matchlengthHeaderSize;
2533 }
2534
2535 { short litlengthNCount[MaxLL+1];
2536 unsigned litlengthMaxValue = MaxLL, litlengthLog;
2537 size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr);
2538 if (FSE_isError(litlengthHeaderSize)) return ERROR(dictionary_corrupted);
2539 if (litlengthLog > LLFSELog) return ERROR(dictionary_corrupted);
2540 /* Every literal length code must have non-zero probability */
2541 CHECK_F (ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL));
2542 CHECK_E(FSE_buildCTable(cctx->litlengthCTable, litlengthNCount, litlengthMaxValue, litlengthLog), dictionary_corrupted);
2543 dictPtr += litlengthHeaderSize;
2544 }
2545
2546 if (dictPtr+12 > dictEnd) return ERROR(dictionary_corrupted);
2547 cctx->rep[0] = MEM_readLE32(dictPtr+0); if (cctx->rep[0] >= dictSize) return ERROR(dictionary_corrupted);
2548 cctx->rep[1] = MEM_readLE32(dictPtr+4); if (cctx->rep[1] >= dictSize) return ERROR(dictionary_corrupted);
2549 cctx->rep[2] = MEM_readLE32(dictPtr+8); if (cctx->rep[2] >= dictSize) return ERROR(dictionary_corrupted);
2550 dictPtr += 12;
2551
2552 { U32 offcodeMax = MaxOff;
2553 if ((size_t)(dictEnd - dictPtr) <= ((U32)-1) - 128 KB) {
2554 U32 const maxOffset = (U32)(dictEnd - dictPtr) + 128 KB; /* The maximum offset that must be supported */
2555 /* Calculate minimum offset code required to represent maxOffset */
2556 offcodeMax = ZSTD_highbit32(maxOffset);
2557 }
2558 /* Every possible supported offset <= dictContentSize + 128 KB must be representable */
2559 CHECK_F (ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff)));
2560 }
2561
2562 cctx->flagStaticTables = 1;
2563 return dictPtr - (const BYTE*)dict;
2564 }
2565
2566 /** ZSTD_compress_insertDictionary() :
2567 * @return : 0, or an error code */
2568 static size_t ZSTD_compress_insertDictionary(ZSTD_CCtx* zc, const void* dict, size_t dictSize)
2569 {
2570 if ((dict==NULL) || (dictSize<=8)) return 0;
2571
2572 /* default : dict is pure content */
2573 if (MEM_readLE32(dict) != ZSTD_DICT_MAGIC) return ZSTD_loadDictionaryContent(zc, dict, dictSize);
2574 zc->dictID = zc->params.fParams.noDictIDFlag ? 0 : MEM_readLE32((const char*)dict+4);
2575
2576 /* known magic number : dict is parsed for entropy stats and content */
2577 { size_t const loadError = ZSTD_loadDictEntropyStats(zc, (const char*)dict+8 /* skip dictHeader */, dictSize-8);
2578 size_t const eSize = loadError + 8;
2579 if (ZSTD_isError(loadError)) return loadError;
2580 return ZSTD_loadDictionaryContent(zc, (const char*)dict+eSize, dictSize-eSize);
2581 }
2582 }
2583
2584
2585 /*! ZSTD_compressBegin_internal() :
2586 * @return : 0, or an error code */
2587 static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
2588 const void* dict, size_t dictSize,
2589 ZSTD_parameters params, U64 pledgedSrcSize)
2590 {
2591 ZSTD_compResetPolicy_e const crp = dictSize ? ZSTDcrp_fullReset : ZSTDcrp_continue;
2592 CHECK_F(ZSTD_resetCCtx_advanced(cctx, params, pledgedSrcSize, crp));
2593 return ZSTD_compress_insertDictionary(cctx, dict, dictSize);
2594 }
2595
2596
2597 /*! ZSTD_compressBegin_advanced() :
2598 * @return : 0, or an error code */
2599 size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx,
2600 const void* dict, size_t dictSize,
2601 ZSTD_parameters params, unsigned long long pledgedSrcSize)
2602 {
2603 /* compression parameters verification and optimization */
2604 CHECK_F(ZSTD_checkCParams(params.cParams));
2605 return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, pledgedSrcSize);
2606 }
2607
2608
2609 size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel)
2610 {
2611 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
2612 return ZSTD_compressBegin_internal(cctx, dict, dictSize, params, 0);
2613 }
2614
2615
2616 size_t ZSTD_compressBegin(ZSTD_CCtx* zc, int compressionLevel)
2617 {
2618 return ZSTD_compressBegin_usingDict(zc, NULL, 0, compressionLevel);
2619 }
2620
2621
2622 /*! ZSTD_writeEpilogue() :
2623 * Ends a frame.
2624 * @return : nb of bytes written into dst (or an error code) */
2625 static size_t ZSTD_writeEpilogue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity)
2626 {
2627 BYTE* const ostart = (BYTE*)dst;
2628 BYTE* op = ostart;
2629 size_t fhSize = 0;
2630
2631 if (cctx->stage == ZSTDcs_created) return ERROR(stage_wrong); /* init missing */
2632
2633 /* special case : empty frame */
2634 if (cctx->stage == ZSTDcs_init) {
2635 fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->params, 0, 0);
2636 if (ZSTD_isError(fhSize)) return fhSize;
2637 dstCapacity -= fhSize;
2638 op += fhSize;
2639 cctx->stage = ZSTDcs_ongoing;
2640 }
2641
2642 if (cctx->stage != ZSTDcs_ending) {
2643 /* write one last empty block, make it the "last" block */
2644 U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0;
2645 if (dstCapacity<4) return ERROR(dstSize_tooSmall);
2646 MEM_writeLE32(op, cBlockHeader24);
2647 op += ZSTD_blockHeaderSize;
2648 dstCapacity -= ZSTD_blockHeaderSize;
2649 }
2650
2651 if (cctx->params.fParams.checksumFlag) {
2652 U32 const checksum = (U32) XXH64_digest(&cctx->xxhState);
2653 if (dstCapacity<4) return ERROR(dstSize_tooSmall);
2654 MEM_writeLE32(op, checksum);
2655 op += 4;
2656 }
2657
2658 cctx->stage = ZSTDcs_created; /* return to "created but no init" status */
2659 return op-ostart;
2660 }
2661
2662
2663 size_t ZSTD_compressEnd (ZSTD_CCtx* cctx,
2664 void* dst, size_t dstCapacity,
2665 const void* src, size_t srcSize)
2666 {
2667 size_t endResult;
2668 size_t const cSize = ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 1, 1);
2669 if (ZSTD_isError(cSize)) return cSize;
2670 endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize);
2671 if (ZSTD_isError(endResult)) return endResult;
2672 return cSize + endResult;
2673 }
2674
2675
2676 static size_t ZSTD_compress_internal (ZSTD_CCtx* cctx,
2677 void* dst, size_t dstCapacity,
2678 const void* src, size_t srcSize,
2679 const void* dict,size_t dictSize,
2680 ZSTD_parameters params)
2681 {
2682 CHECK_F(ZSTD_compressBegin_internal(cctx, dict, dictSize, params, srcSize));
2683 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
2684 }
2685
2686 size_t ZSTD_compress_advanced (ZSTD_CCtx* ctx,
2687 void* dst, size_t dstCapacity,
2688 const void* src, size_t srcSize,
2689 const void* dict,size_t dictSize,
2690 ZSTD_parameters params)
2691 {
2692 CHECK_F(ZSTD_checkCParams(params.cParams));
2693 return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
2694 }
2695
2696 size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, const void* dict, size_t dictSize, int compressionLevel)
2697 {
2698 ZSTD_parameters params = ZSTD_getParams(compressionLevel, srcSize, dictSize);
2699 params.fParams.contentSizeFlag = 1;
2700 return ZSTD_compress_internal(ctx, dst, dstCapacity, src, srcSize, dict, dictSize, params);
2701 }
2702
2703 size_t ZSTD_compressCCtx (ZSTD_CCtx* ctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
2704 {
2705 return ZSTD_compress_usingDict(ctx, dst, dstCapacity, src, srcSize, NULL, 0, compressionLevel);
2706 }
2707
2708 size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int compressionLevel)
2709 {
2710 size_t result;
2711 ZSTD_CCtx ctxBody;
2712 memset(&ctxBody, 0, sizeof(ctxBody));
2713 memcpy(&ctxBody.customMem, &defaultCustomMem, sizeof(ZSTD_customMem));
2714 result = ZSTD_compressCCtx(&ctxBody, dst, dstCapacity, src, srcSize, compressionLevel);
2715 ZSTD_free(ctxBody.workSpace, defaultCustomMem); /* can't free ctxBody itself, as it's on stack; free only heap content */
2716 return result;
2717 }
2718
2719
2720 /* ===== Dictionary API ===== */
2721
2722 struct ZSTD_CDict_s {
2723 void* dictContent;
2724 size_t dictContentSize;
2725 ZSTD_CCtx* refContext;
2726 }; /* typedef'd tp ZSTD_CDict within "zstd.h" */
2727
2728 size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict)
2729 {
2730 if (cdict==NULL) return 0; /* support sizeof on NULL */
2731 return ZSTD_sizeof_CCtx(cdict->refContext) + cdict->dictContentSize;
2732 }
2733
2734 ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize, ZSTD_parameters params, ZSTD_customMem customMem)
2735 {
2736 if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
2737 if (!customMem.customAlloc || !customMem.customFree) return NULL;
2738
2739 { ZSTD_CDict* const cdict = (ZSTD_CDict*) ZSTD_malloc(sizeof(ZSTD_CDict), customMem);
2740 void* const dictContent = ZSTD_malloc(dictSize, customMem);
2741 ZSTD_CCtx* const cctx = ZSTD_createCCtx_advanced(customMem);
2742
2743 if (!dictContent || !cdict || !cctx) {
2744 ZSTD_free(dictContent, customMem);
2745 ZSTD_free(cdict, customMem);
2746 ZSTD_free(cctx, customMem);
2747 return NULL;
2748 }
2749
2750 if (dictSize) {
2751 memcpy(dictContent, dict, dictSize);
2752 }
2753 { size_t const errorCode = ZSTD_compressBegin_advanced(cctx, dictContent, dictSize, params, 0);
2754 if (ZSTD_isError(errorCode)) {
2755 ZSTD_free(dictContent, customMem);
2756 ZSTD_free(cdict, customMem);
2757 ZSTD_free(cctx, customMem);
2758 return NULL;
2759 } }
2760
2761 cdict->dictContent = dictContent;
2762 cdict->dictContentSize = dictSize;
2763 cdict->refContext = cctx;
2764 return cdict;
2765 }
2766 }
2767
2768 ZSTD_CDict* ZSTD_createCDict(const void* dict, size_t dictSize, int compressionLevel)
2769 {
2770 ZSTD_customMem const allocator = { NULL, NULL, NULL };
2771 ZSTD_parameters params = ZSTD_getParams(compressionLevel, 0, dictSize);
2772 params.fParams.contentSizeFlag = 1;
2773 return ZSTD_createCDict_advanced(dict, dictSize, params, allocator);
2774 }
2775
2776 size_t ZSTD_freeCDict(ZSTD_CDict* cdict)
2777 {
2778 if (cdict==NULL) return 0; /* support free on NULL */
2779 { ZSTD_customMem const cMem = cdict->refContext->customMem;
2780 ZSTD_freeCCtx(cdict->refContext);
2781 ZSTD_free(cdict->dictContent, cMem);
2782 ZSTD_free(cdict, cMem);
2783 return 0;
2784 }
2785 }
2786
2787 static ZSTD_parameters ZSTD_getParamsFromCDict(const ZSTD_CDict* cdict) {
2788 return ZSTD_getParamsFromCCtx(cdict->refContext);
2789 }
2790
2791 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict, U64 pledgedSrcSize)
2792 {
2793 if (cdict->dictContentSize) CHECK_F(ZSTD_copyCCtx(cctx, cdict->refContext, pledgedSrcSize))
2794 else CHECK_F(ZSTD_compressBegin_advanced(cctx, NULL, 0, cdict->refContext->params, pledgedSrcSize));
2795 return 0;
2796 }
2797
2798 /*! ZSTD_compress_usingCDict() :
2799 * Compression using a digested Dictionary.
2800 * Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
2801 * Note that compression level is decided during dictionary creation */
2802 size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
2803 void* dst, size_t dstCapacity,
2804 const void* src, size_t srcSize,
2805 const ZSTD_CDict* cdict)
2806 {
2807 CHECK_F(ZSTD_compressBegin_usingCDict(cctx, cdict, srcSize));
2808
2809 if (cdict->refContext->params.fParams.contentSizeFlag==1) {
2810 cctx->params.fParams.contentSizeFlag = 1;
2811 cctx->frameContentSize = srcSize;
2812 }
2813
2814 return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
2815 }
2816
2817
2818
2819 /* ******************************************************************
2820 * Streaming
2821 ********************************************************************/
2822
2823 typedef enum { zcss_init, zcss_load, zcss_flush, zcss_final } ZSTD_cStreamStage;
2824
2825 struct ZSTD_CStream_s {
2826 ZSTD_CCtx* cctx;
2827 ZSTD_CDict* cdictLocal;
2828 const ZSTD_CDict* cdict;
2829 char* inBuff;
2830 size_t inBuffSize;
2831 size_t inToCompress;
2832 size_t inBuffPos;
2833 size_t inBuffTarget;
2834 size_t blockSize;
2835 char* outBuff;
2836 size_t outBuffSize;
2837 size_t outBuffContentSize;
2838 size_t outBuffFlushedSize;
2839 ZSTD_cStreamStage stage;
2840 U32 checksum;
2841 U32 frameEnded;
2842 ZSTD_parameters params;
2843 ZSTD_customMem customMem;
2844 }; /* typedef'd to ZSTD_CStream within "zstd.h" */
2845
2846 ZSTD_CStream* ZSTD_createCStream(void)
2847 {
2848 return ZSTD_createCStream_advanced(defaultCustomMem);
2849 }
2850
2851 ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem)
2852 {
2853 ZSTD_CStream* zcs;
2854
2855 if (!customMem.customAlloc && !customMem.customFree) customMem = defaultCustomMem;
2856 if (!customMem.customAlloc || !customMem.customFree) return NULL;
2857
2858 zcs = (ZSTD_CStream*)ZSTD_malloc(sizeof(ZSTD_CStream), customMem);
2859 if (zcs==NULL) return NULL;
2860 memset(zcs, 0, sizeof(ZSTD_CStream));
2861 memcpy(&zcs->customMem, &customMem, sizeof(ZSTD_customMem));
2862 zcs->cctx = ZSTD_createCCtx_advanced(customMem);
2863 if (zcs->cctx == NULL) { ZSTD_freeCStream(zcs); return NULL; }
2864 return zcs;
2865 }
2866
2867 size_t ZSTD_freeCStream(ZSTD_CStream* zcs)
2868 {
2869 if (zcs==NULL) return 0; /* support free on NULL */
2870 { ZSTD_customMem const cMem = zcs->customMem;
2871 ZSTD_freeCCtx(zcs->cctx);
2872 ZSTD_freeCDict(zcs->cdictLocal);
2873 ZSTD_free(zcs->inBuff, cMem);
2874 ZSTD_free(zcs->outBuff, cMem);
2875 ZSTD_free(zcs, cMem);
2876 return 0;
2877 }
2878 }
2879
2880
2881 /*====== Initialization ======*/
2882
2883 size_t ZSTD_CStreamInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
2884 size_t ZSTD_CStreamOutSize(void) { return ZSTD_compressBound(ZSTD_BLOCKSIZE_ABSOLUTEMAX) + ZSTD_blockHeaderSize + 4 /* 32-bits hash */ ; }
2885
2886 size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize)
2887 {
2888 if (zcs->inBuffSize==0) return ERROR(stage_wrong); /* zcs has not been init at least once */
2889
2890 if (zcs->cdict) CHECK_F(ZSTD_compressBegin_usingCDict(zcs->cctx, zcs->cdict, pledgedSrcSize))
2891 else CHECK_F(ZSTD_compressBegin_advanced(zcs->cctx, NULL, 0, zcs->params, pledgedSrcSize));
2892
2893 zcs->inToCompress = 0;
2894 zcs->inBuffPos = 0;
2895 zcs->inBuffTarget = zcs->blockSize;
2896 zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
2897 zcs->stage = zcss_load;
2898 zcs->frameEnded = 0;
2899 return 0; /* ready to go */
2900 }
2901
2902 size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
2903 const void* dict, size_t dictSize,
2904 ZSTD_parameters params, unsigned long long pledgedSrcSize)
2905 {
2906 /* allocate buffers */
2907 { size_t const neededInBuffSize = (size_t)1 << params.cParams.windowLog;
2908 if (zcs->inBuffSize < neededInBuffSize) {
2909 zcs->inBuffSize = neededInBuffSize;
2910 ZSTD_free(zcs->inBuff, zcs->customMem);
2911 zcs->inBuff = (char*) ZSTD_malloc(neededInBuffSize, zcs->customMem);
2912 if (zcs->inBuff == NULL) return ERROR(memory_allocation);
2913 }
2914 zcs->blockSize = MIN(ZSTD_BLOCKSIZE_ABSOLUTEMAX, neededInBuffSize);
2915 }
2916 if (zcs->outBuffSize < ZSTD_compressBound(zcs->blockSize)+1) {
2917 zcs->outBuffSize = ZSTD_compressBound(zcs->blockSize)+1;
2918 ZSTD_free(zcs->outBuff, zcs->customMem);
2919 zcs->outBuff = (char*) ZSTD_malloc(zcs->outBuffSize, zcs->customMem);
2920 if (zcs->outBuff == NULL) return ERROR(memory_allocation);
2921 }
2922
2923 if (dict) {
2924 ZSTD_freeCDict(zcs->cdictLocal);
2925 zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, params, zcs->customMem);
2926 if (zcs->cdictLocal == NULL) return ERROR(memory_allocation);
2927 zcs->cdict = zcs->cdictLocal;
2928 } else zcs->cdict = NULL;
2929
2930 zcs->checksum = params.fParams.checksumFlag > 0;
2931 zcs->params = params;
2932
2933 return ZSTD_resetCStream(zcs, pledgedSrcSize);
2934 }
2935
2936 /* note : cdict must outlive compression session */
2937 size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict)
2938 {
2939 ZSTD_parameters const params = ZSTD_getParamsFromCDict(cdict);
2940 size_t const initError = ZSTD_initCStream_advanced(zcs, NULL, 0, params, 0);
2941 zcs->cdict = cdict;
2942 return initError;
2943 }
2944
2945 size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel)
2946 {
2947 ZSTD_parameters const params = ZSTD_getParams(compressionLevel, 0, dictSize);
2948 return ZSTD_initCStream_advanced(zcs, dict, dictSize, params, 0);
2949 }
2950
2951 size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel)
2952 {
2953 return ZSTD_initCStream_usingDict(zcs, NULL, 0, compressionLevel);
2954 }
2955
2956 size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
2957 {
2958 if (zcs==NULL) return 0; /* support sizeof on NULL */
2959 return sizeof(zcs) + ZSTD_sizeof_CCtx(zcs->cctx) + ZSTD_sizeof_CDict(zcs->cdictLocal) + zcs->outBuffSize + zcs->inBuffSize;
2960 }
2961
2962 /*====== Compression ======*/
2963
2964 typedef enum { zsf_gather, zsf_flush, zsf_end } ZSTD_flush_e;
2965
2966 MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
2967 {
2968 size_t const length = MIN(dstCapacity, srcSize);
2969 memcpy(dst, src, length);
2970 return length;
2971 }
2972
2973 static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs,
2974 void* dst, size_t* dstCapacityPtr,
2975 const void* src, size_t* srcSizePtr,
2976 ZSTD_flush_e const flush)
2977 {
2978 U32 someMoreWork = 1;
2979 const char* const istart = (const char*)src;
2980 const char* const iend = istart + *srcSizePtr;
2981 const char* ip = istart;
2982 char* const ostart = (char*)dst;
2983 char* const oend = ostart + *dstCapacityPtr;
2984 char* op = ostart;
2985
2986 while (someMoreWork) {
2987 switch(zcs->stage)
2988 {
2989 case zcss_init: return ERROR(init_missing); /* call ZBUFF_compressInit() first ! */
2990
2991 case zcss_load:
2992 /* complete inBuffer */
2993 { size_t const toLoad = zcs->inBuffTarget - zcs->inBuffPos;
2994 size_t const loaded = ZSTD_limitCopy(zcs->inBuff + zcs->inBuffPos, toLoad, ip, iend-ip);
2995 zcs->inBuffPos += loaded;
2996 ip += loaded;
2997 if ( (zcs->inBuffPos==zcs->inToCompress) || (!flush && (toLoad != loaded)) ) {
2998 someMoreWork = 0; break; /* not enough input to get a full block : stop there, wait for more */
2999 } }
3000 /* compress current block (note : this stage cannot be stopped in the middle) */
3001 { void* cDst;
3002 size_t cSize;
3003 size_t const iSize = zcs->inBuffPos - zcs->inToCompress;
3004 size_t oSize = oend-op;
3005 if (oSize >= ZSTD_compressBound(iSize))
3006 cDst = op; /* compress directly into output buffer (avoid flush stage) */
3007 else
3008 cDst = zcs->outBuff, oSize = zcs->outBuffSize;
3009 cSize = (flush == zsf_end) ?
3010 ZSTD_compressEnd(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize) :
3011 ZSTD_compressContinue(zcs->cctx, cDst, oSize, zcs->inBuff + zcs->inToCompress, iSize);
3012 if (ZSTD_isError(cSize)) return cSize;
3013 if (flush == zsf_end) zcs->frameEnded = 1;
3014 /* prepare next block */
3015 zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize;
3016 if (zcs->inBuffTarget > zcs->inBuffSize)
3017 zcs->inBuffPos = 0, zcs->inBuffTarget = zcs->blockSize; /* note : inBuffSize >= blockSize */
3018 zcs->inToCompress = zcs->inBuffPos;
3019 if (cDst == op) { op += cSize; break; } /* no need to flush */
3020 zcs->outBuffContentSize = cSize;
3021 zcs->outBuffFlushedSize = 0;
3022 zcs->stage = zcss_flush; /* pass-through to flush stage */
3023 }
3024
3025 case zcss_flush:
3026 { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
3027 size_t const flushed = ZSTD_limitCopy(op, oend-op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
3028 op += flushed;
3029 zcs->outBuffFlushedSize += flushed;
3030 if (toFlush!=flushed) { someMoreWork = 0; break; } /* dst too small to store flushed data : stop there */
3031 zcs->outBuffContentSize = zcs->outBuffFlushedSize = 0;
3032 zcs->stage = zcss_load;
3033 break;
3034 }
3035
3036 case zcss_final:
3037 someMoreWork = 0; /* do nothing */
3038 break;
3039
3040 default:
3041 return ERROR(GENERIC); /* impossible */
3042 }
3043 }
3044
3045 *srcSizePtr = ip - istart;
3046 *dstCapacityPtr = op - ostart;
3047 if (zcs->frameEnded) return 0;
3048 { size_t hintInSize = zcs->inBuffTarget - zcs->inBuffPos;
3049 if (hintInSize==0) hintInSize = zcs->blockSize;
3050 return hintInSize;
3051 }
3052 }
3053
3054 size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input)
3055 {
3056 size_t sizeRead = input->size - input->pos;
3057 size_t sizeWritten = output->size - output->pos;
3058 size_t const result = ZSTD_compressStream_generic(zcs,
3059 (char*)(output->dst) + output->pos, &sizeWritten,
3060 (const char*)(input->src) + input->pos, &sizeRead, zsf_gather);
3061 input->pos += sizeRead;
3062 output->pos += sizeWritten;
3063 return result;
3064 }
3065
3066
3067 /*====== Finalize ======*/
3068
3069 /*! ZSTD_flushStream() :
3070 * @return : amount of data remaining to flush */
3071 size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
3072 {
3073 size_t srcSize = 0;
3074 size_t sizeWritten = output->size - output->pos;
3075 size_t const result = ZSTD_compressStream_generic(zcs,
3076 (char*)(output->dst) + output->pos, &sizeWritten,
3077 &srcSize, &srcSize, /* use a valid src address instead of NULL */
3078 zsf_flush);
3079 output->pos += sizeWritten;
3080 if (ZSTD_isError(result)) return result;
3081 return zcs->outBuffContentSize - zcs->outBuffFlushedSize; /* remaining to flush */
3082 }
3083
3084
3085 size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
3086 {
3087 BYTE* const ostart = (BYTE*)(output->dst) + output->pos;
3088 BYTE* const oend = (BYTE*)(output->dst) + output->size;
3089 BYTE* op = ostart;
3090
3091 if (zcs->stage != zcss_final) {
3092 /* flush whatever remains */
3093 size_t srcSize = 0;
3094 size_t sizeWritten = output->size - output->pos;
3095 size_t const notEnded = ZSTD_compressStream_generic(zcs, ostart, &sizeWritten, &srcSize, &srcSize, zsf_end); /* use a valid src address instead of NULL */
3096 size_t const remainingToFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
3097 op += sizeWritten;
3098 if (remainingToFlush) {
3099 output->pos += sizeWritten;
3100 return remainingToFlush + ZSTD_BLOCKHEADERSIZE /* final empty block */ + (zcs->checksum * 4);
3101 }
3102 /* create epilogue */
3103 zcs->stage = zcss_final;
3104 zcs->outBuffContentSize = !notEnded ? 0 :
3105 ZSTD_compressEnd(zcs->cctx, zcs->outBuff, zcs->outBuffSize, NULL, 0); /* write epilogue, including final empty block, into outBuff */
3106 }
3107
3108 /* flush epilogue */
3109 { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize;
3110 size_t const flushed = ZSTD_limitCopy(op, oend-op, zcs->outBuff + zcs->outBuffFlushedSize, toFlush);
3111 op += flushed;
3112 zcs->outBuffFlushedSize += flushed;
3113 output->pos += op-ostart;
3114 if (toFlush==flushed) zcs->stage = zcss_init; /* end reached */
3115 return toFlush - flushed;
3116 }
3117 }
3118
3119
3120
3121 /*-===== Pre-defined compression levels =====-*/
3122
3123 #define ZSTD_DEFAULT_CLEVEL 1
3124 #define ZSTD_MAX_CLEVEL 22
3125 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
3126
3127 static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
3128 { /* "default" */
3129 /* W, C, H, S, L, TL, strat */
3130 { 18, 12, 12, 1, 7, 16, ZSTD_fast }, /* level 0 - never used */
3131 { 19, 13, 14, 1, 7, 16, ZSTD_fast }, /* level 1 */
3132 { 19, 15, 16, 1, 6, 16, ZSTD_fast }, /* level 2 */
3133 { 20, 16, 17, 1, 5, 16, ZSTD_dfast }, /* level 3.*/
3134 { 20, 18, 18, 1, 5, 16, ZSTD_dfast }, /* level 4.*/
3135 { 20, 15, 18, 3, 5, 16, ZSTD_greedy }, /* level 5 */
3136 { 21, 16, 19, 2, 5, 16, ZSTD_lazy }, /* level 6 */
3137 { 21, 17, 20, 3, 5, 16, ZSTD_lazy }, /* level 7 */
3138 { 21, 18, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 8 */
3139 { 21, 20, 20, 3, 5, 16, ZSTD_lazy2 }, /* level 9 */
3140 { 21, 19, 21, 4, 5, 16, ZSTD_lazy2 }, /* level 10 */
3141 { 22, 20, 22, 4, 5, 16, ZSTD_lazy2 }, /* level 11 */
3142 { 22, 20, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 12 */
3143 { 22, 21, 22, 5, 5, 16, ZSTD_lazy2 }, /* level 13 */
3144 { 22, 21, 22, 6, 5, 16, ZSTD_lazy2 }, /* level 14 */
3145 { 22, 21, 21, 5, 5, 16, ZSTD_btlazy2 }, /* level 15 */
3146 { 23, 22, 22, 5, 5, 16, ZSTD_btlazy2 }, /* level 16 */
3147 { 23, 21, 22, 4, 5, 24, ZSTD_btopt }, /* level 17 */
3148 { 23, 23, 22, 6, 5, 32, ZSTD_btopt }, /* level 18 */
3149 { 23, 23, 22, 6, 3, 48, ZSTD_btopt }, /* level 19 */
3150 { 25, 25, 23, 7, 3, 64, ZSTD_btopt2 }, /* level 20 */
3151 { 26, 26, 23, 7, 3,256, ZSTD_btopt2 }, /* level 21 */
3152 { 27, 27, 25, 9, 3,512, ZSTD_btopt2 }, /* level 22 */
3153 },
3154 { /* for srcSize <= 256 KB */
3155 /* W, C, H, S, L, T, strat */
3156 { 0, 0, 0, 0, 0, 0, ZSTD_fast }, /* level 0 - not used */
3157 { 18, 13, 14, 1, 6, 8, ZSTD_fast }, /* level 1 */
3158 { 18, 14, 13, 1, 5, 8, ZSTD_dfast }, /* level 2 */
3159 { 18, 16, 15, 1, 5, 8, ZSTD_dfast }, /* level 3 */
3160 { 18, 15, 17, 1, 5, 8, ZSTD_greedy }, /* level 4.*/
3161 { 18, 16, 17, 4, 5, 8, ZSTD_greedy }, /* level 5.*/
3162 { 18, 16, 17, 3, 5, 8, ZSTD_lazy }, /* level 6.*/
3163 { 18, 17, 17, 4, 4, 8, ZSTD_lazy }, /* level 7 */
3164 { 18, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
3165 { 18, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
3166 { 18, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
3167 { 18, 18, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 11.*/
3168 { 18, 18, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 12.*/
3169 { 18, 19, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13 */
3170 { 18, 18, 18, 4, 4, 16, ZSTD_btopt }, /* level 14.*/
3171 { 18, 18, 18, 4, 3, 16, ZSTD_btopt }, /* level 15.*/
3172 { 18, 19, 18, 6, 3, 32, ZSTD_btopt }, /* level 16.*/
3173 { 18, 19, 18, 8, 3, 64, ZSTD_btopt }, /* level 17.*/
3174 { 18, 19, 18, 9, 3,128, ZSTD_btopt }, /* level 18.*/
3175 { 18, 19, 18, 10, 3,256, ZSTD_btopt }, /* level 19.*/
3176 { 18, 19, 18, 11, 3,512, ZSTD_btopt2 }, /* level 20.*/
3177 { 18, 19, 18, 12, 3,512, ZSTD_btopt2 }, /* level 21.*/
3178 { 18, 19, 18, 13, 3,512, ZSTD_btopt2 }, /* level 22.*/
3179 },
3180 { /* for srcSize <= 128 KB */
3181 /* W, C, H, S, L, T, strat */
3182 { 17, 12, 12, 1, 7, 8, ZSTD_fast }, /* level 0 - not used */
3183 { 17, 12, 13, 1, 6, 8, ZSTD_fast }, /* level 1 */
3184 { 17, 13, 16, 1, 5, 8, ZSTD_fast }, /* level 2 */
3185 { 17, 16, 16, 2, 5, 8, ZSTD_dfast }, /* level 3 */
3186 { 17, 13, 15, 3, 4, 8, ZSTD_greedy }, /* level 4 */
3187 { 17, 15, 17, 4, 4, 8, ZSTD_greedy }, /* level 5 */
3188 { 17, 16, 17, 3, 4, 8, ZSTD_lazy }, /* level 6 */
3189 { 17, 15, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 7 */
3190 { 17, 17, 17, 4, 4, 8, ZSTD_lazy2 }, /* level 8 */
3191 { 17, 17, 17, 5, 4, 8, ZSTD_lazy2 }, /* level 9 */
3192 { 17, 17, 17, 6, 4, 8, ZSTD_lazy2 }, /* level 10 */
3193 { 17, 17, 17, 7, 4, 8, ZSTD_lazy2 }, /* level 11 */
3194 { 17, 17, 17, 8, 4, 8, ZSTD_lazy2 }, /* level 12 */
3195 { 17, 18, 17, 6, 4, 8, ZSTD_btlazy2 }, /* level 13.*/
3196 { 17, 17, 17, 7, 3, 8, ZSTD_btopt }, /* level 14.*/
3197 { 17, 17, 17, 7, 3, 16, ZSTD_btopt }, /* level 15.*/
3198 { 17, 18, 17, 7, 3, 32, ZSTD_btopt }, /* level 16.*/
3199 { 17, 18, 17, 7, 3, 64, ZSTD_btopt }, /* level 17.*/
3200 { 17, 18, 17, 7, 3,256, ZSTD_btopt }, /* level 18.*/
3201 { 17, 18, 17, 8, 3,256, ZSTD_btopt }, /* level 19.*/
3202 { 17, 18, 17, 9, 3,256, ZSTD_btopt2 }, /* level 20.*/
3203 { 17, 18, 17, 10, 3,256, ZSTD_btopt2 }, /* level 21.*/
3204 { 17, 18, 17, 11, 3,512, ZSTD_btopt2 }, /* level 22.*/
3205 },
3206 { /* for srcSize <= 16 KB */
3207 /* W, C, H, S, L, T, strat */
3208 { 14, 12, 12, 1, 7, 6, ZSTD_fast }, /* level 0 - not used */
3209 { 14, 14, 14, 1, 6, 6, ZSTD_fast }, /* level 1 */
3210 { 14, 14, 14, 1, 4, 6, ZSTD_fast }, /* level 2 */
3211 { 14, 14, 14, 1, 4, 6, ZSTD_dfast }, /* level 3.*/
3212 { 14, 14, 14, 4, 4, 6, ZSTD_greedy }, /* level 4.*/
3213 { 14, 14, 14, 3, 4, 6, ZSTD_lazy }, /* level 5.*/
3214 { 14, 14, 14, 4, 4, 6, ZSTD_lazy2 }, /* level 6 */
3215 { 14, 14, 14, 5, 4, 6, ZSTD_lazy2 }, /* level 7 */
3216 { 14, 14, 14, 6, 4, 6, ZSTD_lazy2 }, /* level 8.*/
3217 { 14, 15, 14, 6, 4, 6, ZSTD_btlazy2 }, /* level 9.*/
3218 { 14, 15, 14, 3, 3, 6, ZSTD_btopt }, /* level 10.*/
3219 { 14, 15, 14, 6, 3, 8, ZSTD_btopt }, /* level 11.*/
3220 { 14, 15, 14, 6, 3, 16, ZSTD_btopt }, /* level 12.*/
3221 { 14, 15, 14, 6, 3, 24, ZSTD_btopt }, /* level 13.*/
3222 { 14, 15, 15, 6, 3, 48, ZSTD_btopt }, /* level 14.*/
3223 { 14, 15, 15, 6, 3, 64, ZSTD_btopt }, /* level 15.*/
3224 { 14, 15, 15, 6, 3, 96, ZSTD_btopt }, /* level 16.*/
3225 { 14, 15, 15, 6, 3,128, ZSTD_btopt }, /* level 17.*/
3226 { 14, 15, 15, 6, 3,256, ZSTD_btopt }, /* level 18.*/
3227 { 14, 15, 15, 7, 3,256, ZSTD_btopt }, /* level 19.*/
3228 { 14, 15, 15, 8, 3,256, ZSTD_btopt2 }, /* level 20.*/
3229 { 14, 15, 15, 9, 3,256, ZSTD_btopt2 }, /* level 21.*/
3230 { 14, 15, 15, 10, 3,256, ZSTD_btopt2 }, /* level 22.*/
3231 },
3232 };
3233
3234 /*! ZSTD_getCParams() :
3235 * @return ZSTD_compressionParameters structure for a selected compression level, `srcSize` and `dictSize`.
3236 * Size values are optional, provide 0 if not known or unused */
3237 ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSize, size_t dictSize)
3238 {
3239 ZSTD_compressionParameters cp;
3240 size_t const addedSize = srcSize ? 0 : 500;
3241 U64 const rSize = srcSize+dictSize ? srcSize+dictSize+addedSize : (U64)-1;
3242 U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); /* intentional underflow for srcSizeHint == 0 */
3243 if (compressionLevel <= 0) compressionLevel = ZSTD_DEFAULT_CLEVEL; /* 0 == default; no negative compressionLevel yet */
3244 if (compressionLevel > ZSTD_MAX_CLEVEL) compressionLevel = ZSTD_MAX_CLEVEL;
3245 cp = ZSTD_defaultCParameters[tableID][compressionLevel];
3246 if (MEM_32bits()) { /* auto-correction, for 32-bits mode */
3247 if (cp.windowLog > ZSTD_WINDOWLOG_MAX) cp.windowLog = ZSTD_WINDOWLOG_MAX;
3248 if (cp.chainLog > ZSTD_CHAINLOG_MAX) cp.chainLog = ZSTD_CHAINLOG_MAX;
3249 if (cp.hashLog > ZSTD_HASHLOG_MAX) cp.hashLog = ZSTD_HASHLOG_MAX;
3250 }
3251 cp = ZSTD_adjustCParams(cp, srcSize, dictSize);
3252 return cp;
3253 }
3254
3255 /*! ZSTD_getParams() :
3256 * same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`).
3257 * All fields of `ZSTD_frameParameters` are set to default (0) */
3258 ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSize, size_t dictSize) {
3259 ZSTD_parameters params;
3260 ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSize, dictSize);
3261 memset(&params, 0, sizeof(params));
3262 params.cParams = cParams;
3263 return params;
3264 }
This diff has been collapsed as it changes many lines, (900 lines changed) Show them Hide them
@@ -0,0 +1,900 b''
1 /**
2 * Copyright (c) 2016-present, Przemyslaw Skibinski, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9
10
11 /* Note : this file is intended to be included within zstd_compress.c */
12
13
14 #ifndef ZSTD_OPT_H_91842398743
15 #define ZSTD_OPT_H_91842398743
16
17
18 #define ZSTD_FREQ_DIV 5
19 #define ZSTD_MAX_PRICE (1<<30)
20
21 /*-*************************************
22 * Price functions for optimal parser
23 ***************************************/
24 FORCE_INLINE void ZSTD_setLog2Prices(seqStore_t* ssPtr)
25 {
26 ssPtr->log2matchLengthSum = ZSTD_highbit32(ssPtr->matchLengthSum+1);
27 ssPtr->log2litLengthSum = ZSTD_highbit32(ssPtr->litLengthSum+1);
28 ssPtr->log2litSum = ZSTD_highbit32(ssPtr->litSum+1);
29 ssPtr->log2offCodeSum = ZSTD_highbit32(ssPtr->offCodeSum+1);
30 ssPtr->factor = 1 + ((ssPtr->litSum>>5) / ssPtr->litLengthSum) + ((ssPtr->litSum<<1) / (ssPtr->litSum + ssPtr->matchSum));
31 }
32
33
34 MEM_STATIC void ZSTD_rescaleFreqs(seqStore_t* ssPtr)
35 {
36 unsigned u;
37
38 ssPtr->cachedLiterals = NULL;
39 ssPtr->cachedPrice = ssPtr->cachedLitLength = 0;
40
41 if (ssPtr->litLengthSum == 0) {
42 ssPtr->litSum = (2<<Litbits);
43 ssPtr->litLengthSum = MaxLL+1;
44 ssPtr->matchLengthSum = MaxML+1;
45 ssPtr->offCodeSum = (MaxOff+1);
46 ssPtr->matchSum = (2<<Litbits);
47
48 for (u=0; u<=MaxLit; u++)
49 ssPtr->litFreq[u] = 2;
50 for (u=0; u<=MaxLL; u++)
51 ssPtr->litLengthFreq[u] = 1;
52 for (u=0; u<=MaxML; u++)
53 ssPtr->matchLengthFreq[u] = 1;
54 for (u=0; u<=MaxOff; u++)
55 ssPtr->offCodeFreq[u] = 1;
56 } else {
57 ssPtr->matchLengthSum = 0;
58 ssPtr->litLengthSum = 0;
59 ssPtr->offCodeSum = 0;
60 ssPtr->matchSum = 0;
61 ssPtr->litSum = 0;
62
63 for (u=0; u<=MaxLit; u++) {
64 ssPtr->litFreq[u] = 1 + (ssPtr->litFreq[u]>>ZSTD_FREQ_DIV);
65 ssPtr->litSum += ssPtr->litFreq[u];
66 }
67 for (u=0; u<=MaxLL; u++) {
68 ssPtr->litLengthFreq[u] = 1 + (ssPtr->litLengthFreq[u]>>ZSTD_FREQ_DIV);
69 ssPtr->litLengthSum += ssPtr->litLengthFreq[u];
70 }
71 for (u=0; u<=MaxML; u++) {
72 ssPtr->matchLengthFreq[u] = 1 + (ssPtr->matchLengthFreq[u]>>ZSTD_FREQ_DIV);
73 ssPtr->matchLengthSum += ssPtr->matchLengthFreq[u];
74 ssPtr->matchSum += ssPtr->matchLengthFreq[u] * (u + 3);
75 }
76 for (u=0; u<=MaxOff; u++) {
77 ssPtr->offCodeFreq[u] = 1 + (ssPtr->offCodeFreq[u]>>ZSTD_FREQ_DIV);
78 ssPtr->offCodeSum += ssPtr->offCodeFreq[u];
79 }
80 }
81
82 ZSTD_setLog2Prices(ssPtr);
83 }
84
85
86 FORCE_INLINE U32 ZSTD_getLiteralPrice(seqStore_t* ssPtr, U32 litLength, const BYTE* literals)
87 {
88 U32 price, u;
89
90 if (litLength == 0)
91 return ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[0]+1);
92
93 /* literals */
94 if (ssPtr->cachedLiterals == literals) {
95 U32 const additional = litLength - ssPtr->cachedLitLength;
96 const BYTE* literals2 = ssPtr->cachedLiterals + ssPtr->cachedLitLength;
97 price = ssPtr->cachedPrice + additional * ssPtr->log2litSum;
98 for (u=0; u < additional; u++)
99 price -= ZSTD_highbit32(ssPtr->litFreq[literals2[u]]+1);
100 ssPtr->cachedPrice = price;
101 ssPtr->cachedLitLength = litLength;
102 } else {
103 price = litLength * ssPtr->log2litSum;
104 for (u=0; u < litLength; u++)
105 price -= ZSTD_highbit32(ssPtr->litFreq[literals[u]]+1);
106
107 if (litLength >= 12) {
108 ssPtr->cachedLiterals = literals;
109 ssPtr->cachedPrice = price;
110 ssPtr->cachedLitLength = litLength;
111 }
112 }
113
114 /* literal Length */
115 { const BYTE LL_deltaCode = 19;
116 const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
117 price += LL_bits[llCode] + ssPtr->log2litLengthSum - ZSTD_highbit32(ssPtr->litLengthFreq[llCode]+1);
118 }
119
120 return price;
121 }
122
123
124 FORCE_INLINE U32 ZSTD_getPrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength, const int ultra)
125 {
126 /* offset */
127 BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
128 U32 price = offCode + seqStorePtr->log2offCodeSum - ZSTD_highbit32(seqStorePtr->offCodeFreq[offCode]+1);
129
130 if (!ultra && offCode >= 20) price += (offCode-19)*2;
131
132 /* match Length */
133 { const BYTE ML_deltaCode = 36;
134 const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
135 price += ML_bits[mlCode] + seqStorePtr->log2matchLengthSum - ZSTD_highbit32(seqStorePtr->matchLengthFreq[mlCode]+1);
136 }
137
138 return price + ZSTD_getLiteralPrice(seqStorePtr, litLength, literals) + seqStorePtr->factor;
139 }
140
141
142 MEM_STATIC void ZSTD_updatePrice(seqStore_t* seqStorePtr, U32 litLength, const BYTE* literals, U32 offset, U32 matchLength)
143 {
144 U32 u;
145
146 /* literals */
147 seqStorePtr->litSum += litLength;
148 for (u=0; u < litLength; u++)
149 seqStorePtr->litFreq[literals[u]]++;
150
151 /* literal Length */
152 { const BYTE LL_deltaCode = 19;
153 const BYTE llCode = (litLength>63) ? (BYTE)ZSTD_highbit32(litLength) + LL_deltaCode : LL_Code[litLength];
154 seqStorePtr->litLengthFreq[llCode]++;
155 seqStorePtr->litLengthSum++;
156 }
157
158 /* match offset */
159 { BYTE const offCode = (BYTE)ZSTD_highbit32(offset+1);
160 seqStorePtr->offCodeSum++;
161 seqStorePtr->offCodeFreq[offCode]++;
162 }
163
164 /* match Length */
165 { const BYTE ML_deltaCode = 36;
166 const BYTE mlCode = (matchLength>127) ? (BYTE)ZSTD_highbit32(matchLength) + ML_deltaCode : ML_Code[matchLength];
167 seqStorePtr->matchLengthFreq[mlCode]++;
168 seqStorePtr->matchLengthSum++;
169 }
170
171 ZSTD_setLog2Prices(seqStorePtr);
172 }
173
174
175 #define SET_PRICE(pos, mlen_, offset_, litlen_, price_) \
176 { \
177 while (last_pos < pos) { opt[last_pos+1].price = ZSTD_MAX_PRICE; last_pos++; } \
178 opt[pos].mlen = mlen_; \
179 opt[pos].off = offset_; \
180 opt[pos].litlen = litlen_; \
181 opt[pos].price = price_; \
182 }
183
184
185
186 /* Update hashTable3 up to ip (excluded)
187 Assumption : always within prefix (ie. not within extDict) */
188 FORCE_INLINE
189 U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_CCtx* zc, const BYTE* ip)
190 {
191 U32* const hashTable3 = zc->hashTable3;
192 U32 const hashLog3 = zc->hashLog3;
193 const BYTE* const base = zc->base;
194 U32 idx = zc->nextToUpdate3;
195 const U32 target = zc->nextToUpdate3 = (U32)(ip - base);
196 const size_t hash3 = ZSTD_hash3Ptr(ip, hashLog3);
197
198 while(idx < target) {
199 hashTable3[ZSTD_hash3Ptr(base+idx, hashLog3)] = idx;
200 idx++;
201 }
202
203 return hashTable3[hash3];
204 }
205
206
207 /*-*************************************
208 * Binary Tree search
209 ***************************************/
210 static U32 ZSTD_insertBtAndGetAllMatches (
211 ZSTD_CCtx* zc,
212 const BYTE* const ip, const BYTE* const iLimit,
213 U32 nbCompares, const U32 mls,
214 U32 extDict, ZSTD_match_t* matches, const U32 minMatchLen)
215 {
216 const BYTE* const base = zc->base;
217 const U32 current = (U32)(ip-base);
218 const U32 hashLog = zc->params.cParams.hashLog;
219 const size_t h = ZSTD_hashPtr(ip, hashLog, mls);
220 U32* const hashTable = zc->hashTable;
221 U32 matchIndex = hashTable[h];
222 U32* const bt = zc->chainTable;
223 const U32 btLog = zc->params.cParams.chainLog - 1;
224 const U32 btMask= (1U << btLog) - 1;
225 size_t commonLengthSmaller=0, commonLengthLarger=0;
226 const BYTE* const dictBase = zc->dictBase;
227 const U32 dictLimit = zc->dictLimit;
228 const BYTE* const dictEnd = dictBase + dictLimit;
229 const BYTE* const prefixStart = base + dictLimit;
230 const U32 btLow = btMask >= current ? 0 : current - btMask;
231 const U32 windowLow = zc->lowLimit;
232 U32* smallerPtr = bt + 2*(current&btMask);
233 U32* largerPtr = bt + 2*(current&btMask) + 1;
234 U32 matchEndIdx = current+8;
235 U32 dummy32; /* to be nullified at the end */
236 U32 mnum = 0;
237
238 const U32 minMatch = (mls == 3) ? 3 : 4;
239 size_t bestLength = minMatchLen-1;
240
241 if (minMatch == 3) { /* HC3 match finder */
242 U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3 (zc, ip);
243 if (matchIndex3>windowLow && (current - matchIndex3 < (1<<18))) {
244 const BYTE* match;
245 size_t currentMl=0;
246 if ((!extDict) || matchIndex3 >= dictLimit) {
247 match = base + matchIndex3;
248 if (match[bestLength] == ip[bestLength]) currentMl = ZSTD_count(ip, match, iLimit);
249 } else {
250 match = dictBase + matchIndex3;
251 if (MEM_readMINMATCH(match, MINMATCH) == MEM_readMINMATCH(ip, MINMATCH)) /* assumption : matchIndex3 <= dictLimit-4 (by table construction) */
252 currentMl = ZSTD_count_2segments(ip+MINMATCH, match+MINMATCH, iLimit, dictEnd, prefixStart) + MINMATCH;
253 }
254
255 /* save best solution */
256 if (currentMl > bestLength) {
257 bestLength = currentMl;
258 matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex3;
259 matches[mnum].len = (U32)currentMl;
260 mnum++;
261 if (currentMl > ZSTD_OPT_NUM) goto update;
262 if (ip+currentMl == iLimit) goto update; /* best possible, and avoid read overflow*/
263 }
264 }
265 }
266
267 hashTable[h] = current; /* Update Hash Table */
268
269 while (nbCompares-- && (matchIndex > windowLow)) {
270 U32* nextPtr = bt + 2*(matchIndex & btMask);
271 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */
272 const BYTE* match;
273
274 if ((!extDict) || (matchIndex+matchLength >= dictLimit)) {
275 match = base + matchIndex;
276 if (match[matchLength] == ip[matchLength]) {
277 matchLength += ZSTD_count(ip+matchLength+1, match+matchLength+1, iLimit) +1;
278 }
279 } else {
280 match = dictBase + matchIndex;
281 matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart);
282 if (matchIndex+matchLength >= dictLimit)
283 match = base + matchIndex; /* to prepare for next usage of match[matchLength] */
284 }
285
286 if (matchLength > bestLength) {
287 if (matchLength > matchEndIdx - matchIndex) matchEndIdx = matchIndex + (U32)matchLength;
288 bestLength = matchLength;
289 matches[mnum].off = ZSTD_REP_MOVE_OPT + current - matchIndex;
290 matches[mnum].len = (U32)matchLength;
291 mnum++;
292 if (matchLength > ZSTD_OPT_NUM) break;
293 if (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */
294 break; /* drop, to guarantee consistency (miss a little bit of compression) */
295 }
296
297 if (match[matchLength] < ip[matchLength]) {
298 /* match is smaller than current */
299 *smallerPtr = matchIndex; /* update smaller idx */
300 commonLengthSmaller = matchLength; /* all smaller will now have at least this guaranteed common length */
301 if (matchIndex <= btLow) { smallerPtr=&dummy32; break; } /* beyond tree size, stop the search */
302 smallerPtr = nextPtr+1; /* new "smaller" => larger of match */
303 matchIndex = nextPtr[1]; /* new matchIndex larger than previous (closer to current) */
304 } else {
305 /* match is larger than current */
306 *largerPtr = matchIndex;
307 commonLengthLarger = matchLength;
308 if (matchIndex <= btLow) { largerPtr=&dummy32; break; } /* beyond tree size, stop the search */
309 largerPtr = nextPtr;
310 matchIndex = nextPtr[0];
311 } }
312
313 *smallerPtr = *largerPtr = 0;
314
315 update:
316 zc->nextToUpdate = (matchEndIdx > current + 8) ? matchEndIdx - 8 : current+1;
317 return mnum;
318 }
319
320
321 /** Tree updater, providing best match */
322 static U32 ZSTD_BtGetAllMatches (
323 ZSTD_CCtx* zc,
324 const BYTE* const ip, const BYTE* const iLimit,
325 const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
326 {
327 if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
328 ZSTD_updateTree(zc, ip, iLimit, maxNbAttempts, mls);
329 return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 0, matches, minMatchLen);
330 }
331
332
333 static U32 ZSTD_BtGetAllMatches_selectMLS (
334 ZSTD_CCtx* zc, /* Index table will be updated */
335 const BYTE* ip, const BYTE* const iHighLimit,
336 const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
337 {
338 switch(matchLengthSearch)
339 {
340 case 3 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
341 default :
342 case 4 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
343 case 5 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
344 case 6 : return ZSTD_BtGetAllMatches(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
345 }
346 }
347
348 /** Tree updater, providing best match */
349 static U32 ZSTD_BtGetAllMatches_extDict (
350 ZSTD_CCtx* zc,
351 const BYTE* const ip, const BYTE* const iLimit,
352 const U32 maxNbAttempts, const U32 mls, ZSTD_match_t* matches, const U32 minMatchLen)
353 {
354 if (ip < zc->base + zc->nextToUpdate) return 0; /* skipped area */
355 ZSTD_updateTree_extDict(zc, ip, iLimit, maxNbAttempts, mls);
356 return ZSTD_insertBtAndGetAllMatches(zc, ip, iLimit, maxNbAttempts, mls, 1, matches, minMatchLen);
357 }
358
359
360 static U32 ZSTD_BtGetAllMatches_selectMLS_extDict (
361 ZSTD_CCtx* zc, /* Index table will be updated */
362 const BYTE* ip, const BYTE* const iHighLimit,
363 const U32 maxNbAttempts, const U32 matchLengthSearch, ZSTD_match_t* matches, const U32 minMatchLen)
364 {
365 switch(matchLengthSearch)
366 {
367 case 3 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 3, matches, minMatchLen);
368 default :
369 case 4 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 4, matches, minMatchLen);
370 case 5 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 5, matches, minMatchLen);
371 case 6 : return ZSTD_BtGetAllMatches_extDict(zc, ip, iHighLimit, maxNbAttempts, 6, matches, minMatchLen);
372 }
373 }
374
375
376 /*-*******************************
377 * Optimal parser
378 *********************************/
379 FORCE_INLINE
380 void ZSTD_compressBlock_opt_generic(ZSTD_CCtx* ctx,
381 const void* src, size_t srcSize, const int ultra)
382 {
383 seqStore_t* seqStorePtr = &(ctx->seqStore);
384 const BYTE* const istart = (const BYTE*)src;
385 const BYTE* ip = istart;
386 const BYTE* anchor = istart;
387 const BYTE* const iend = istart + srcSize;
388 const BYTE* const ilimit = iend - 8;
389 const BYTE* const base = ctx->base;
390 const BYTE* const prefixStart = base + ctx->dictLimit;
391
392 const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
393 const U32 sufficient_len = ctx->params.cParams.targetLength;
394 const U32 mls = ctx->params.cParams.searchLength;
395 const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
396
397 ZSTD_optimal_t* opt = seqStorePtr->priceTable;
398 ZSTD_match_t* matches = seqStorePtr->matchTable;
399 const BYTE* inr;
400 U32 offset, rep[ZSTD_REP_NUM];
401
402 /* init */
403 ctx->nextToUpdate3 = ctx->nextToUpdate;
404 ZSTD_rescaleFreqs(seqStorePtr);
405 ip += (ip==prefixStart);
406 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }
407
408 /* Match Loop */
409 while (ip < ilimit) {
410 U32 cur, match_num, last_pos, litlen, price;
411 U32 u, mlen, best_mlen, best_off, litLength;
412 memset(opt, 0, sizeof(ZSTD_optimal_t));
413 last_pos = 0;
414 litlen = (U32)(ip - anchor);
415
416 /* check repCode */
417 { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
418 for (i=(ip == anchor); i<last_i; i++) {
419 const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (ip==anchor)) ? (rep[0] - 1) : rep[i];
420 if ( (repCur > 0) && (repCur < (S32)(ip-prefixStart))
421 && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(ip - repCur, minMatch))) {
422 mlen = (U32)ZSTD_count(ip+minMatch, ip+minMatch-repCur, iend) + minMatch;
423 if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
424 best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
425 goto _storeSequence;
426 }
427 best_off = i - (ip == anchor);
428 do {
429 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
430 if (mlen > last_pos || price < opt[mlen].price)
431 SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */
432 mlen--;
433 } while (mlen >= minMatch);
434 } } }
435
436 match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, ip, iend, maxSearches, mls, matches, minMatch);
437
438 if (!last_pos && !match_num) { ip++; continue; }
439
440 if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) {
441 best_mlen = matches[match_num-1].len;
442 best_off = matches[match_num-1].off;
443 cur = 0;
444 last_pos = 1;
445 goto _storeSequence;
446 }
447
448 /* set prices using matches at position = 0 */
449 best_mlen = (last_pos) ? last_pos : minMatch;
450 for (u = 0; u < match_num; u++) {
451 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
452 best_mlen = matches[u].len;
453 while (mlen <= best_mlen) {
454 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
455 if (mlen > last_pos || price < opt[mlen].price)
456 SET_PRICE(mlen, mlen, matches[u].off, litlen, price); /* note : macro modifies last_pos */
457 mlen++;
458 } }
459
460 if (last_pos < minMatch) { ip++; continue; }
461
462 /* initialize opt[0] */
463 { U32 i ; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
464 opt[0].mlen = 1;
465 opt[0].litlen = litlen;
466
467 /* check further positions */
468 for (cur = 1; cur <= last_pos; cur++) {
469 inr = ip + cur;
470
471 if (opt[cur-1].mlen == 1) {
472 litlen = opt[cur-1].litlen + 1;
473 if (cur > litlen) {
474 price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen);
475 } else
476 price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
477 } else {
478 litlen = 1;
479 price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1);
480 }
481
482 if (cur > last_pos || price <= opt[cur].price)
483 SET_PRICE(cur, 1, 0, litlen, price);
484
485 if (cur == last_pos) break;
486
487 if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */
488 continue;
489
490 mlen = opt[cur].mlen;
491 if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
492 opt[cur].rep[2] = opt[cur-mlen].rep[1];
493 opt[cur].rep[1] = opt[cur-mlen].rep[0];
494 opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
495 } else {
496 opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];
497 opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1];
498 opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
499 }
500
501 best_mlen = minMatch;
502 { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
503 for (i=(opt[cur].mlen != 1); i<last_i; i++) { /* check rep */
504 const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (opt[cur].mlen != 1)) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
505 if ( (repCur > 0) && (repCur < (S32)(inr-prefixStart))
506 && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(inr - repCur, minMatch))) {
507 mlen = (U32)ZSTD_count(inr+minMatch, inr+minMatch - repCur, iend) + minMatch;
508
509 if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
510 best_mlen = mlen; best_off = i; last_pos = cur + 1;
511 goto _storeSequence;
512 }
513
514 best_off = i - (opt[cur].mlen != 1);
515 if (mlen > best_mlen) best_mlen = mlen;
516
517 do {
518 if (opt[cur].mlen == 1) {
519 litlen = opt[cur].litlen;
520 if (cur > litlen) {
521 price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
522 } else
523 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
524 } else {
525 litlen = 0;
526 price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
527 }
528
529 if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
530 SET_PRICE(cur + mlen, mlen, i, litlen, price);
531 mlen--;
532 } while (mlen >= minMatch);
533 } } }
534
535 match_num = ZSTD_BtGetAllMatches_selectMLS(ctx, inr, iend, maxSearches, mls, matches, best_mlen);
536
537 if (match_num > 0 && (matches[match_num-1].len > sufficient_len || cur + matches[match_num-1].len >= ZSTD_OPT_NUM)) {
538 best_mlen = matches[match_num-1].len;
539 best_off = matches[match_num-1].off;
540 last_pos = cur + 1;
541 goto _storeSequence;
542 }
543
544 /* set prices using matches at position = cur */
545 for (u = 0; u < match_num; u++) {
546 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
547 best_mlen = matches[u].len;
548
549 while (mlen <= best_mlen) {
550 if (opt[cur].mlen == 1) {
551 litlen = opt[cur].litlen;
552 if (cur > litlen)
553 price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
554 else
555 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
556 } else {
557 litlen = 0;
558 price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
559 }
560
561 if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
562 SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
563
564 mlen++;
565 } } }
566
567 best_mlen = opt[last_pos].mlen;
568 best_off = opt[last_pos].off;
569 cur = last_pos - best_mlen;
570
571 /* store sequence */
572 _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
573 opt[0].mlen = 1;
574
575 while (1) {
576 mlen = opt[cur].mlen;
577 offset = opt[cur].off;
578 opt[cur].mlen = best_mlen;
579 opt[cur].off = best_off;
580 best_mlen = mlen;
581 best_off = offset;
582 if (mlen > cur) break;
583 cur -= mlen;
584 }
585
586 for (u = 0; u <= last_pos;) {
587 u += opt[u].mlen;
588 }
589
590 for (cur=0; cur < last_pos; ) {
591 mlen = opt[cur].mlen;
592 if (mlen == 1) { ip++; cur++; continue; }
593 offset = opt[cur].off;
594 cur += mlen;
595 litLength = (U32)(ip - anchor);
596
597 if (offset > ZSTD_REP_MOVE_OPT) {
598 rep[2] = rep[1];
599 rep[1] = rep[0];
600 rep[0] = offset - ZSTD_REP_MOVE_OPT;
601 offset--;
602 } else {
603 if (offset != 0) {
604 best_off = ((offset==ZSTD_REP_MOVE_OPT) && (litLength==0)) ? (rep[0] - 1) : (rep[offset]);
605 if (offset != 1) rep[2] = rep[1];
606 rep[1] = rep[0];
607 rep[0] = best_off;
608 }
609 if (litLength==0) offset--;
610 }
611
612 ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
613 ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
614 anchor = ip = ip + mlen;
615 } } /* for (cur=0; cur < last_pos; ) */
616
617 /* Save reps for next block */
618 { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->savedRep[i] = rep[i]; }
619
620 /* Last Literals */
621 { size_t const lastLLSize = iend - anchor;
622 memcpy(seqStorePtr->lit, anchor, lastLLSize);
623 seqStorePtr->lit += lastLLSize;
624 }
625 }
626
627
628 FORCE_INLINE
629 void ZSTD_compressBlock_opt_extDict_generic(ZSTD_CCtx* ctx,
630 const void* src, size_t srcSize, const int ultra)
631 {
632 seqStore_t* seqStorePtr = &(ctx->seqStore);
633 const BYTE* const istart = (const BYTE*)src;
634 const BYTE* ip = istart;
635 const BYTE* anchor = istart;
636 const BYTE* const iend = istart + srcSize;
637 const BYTE* const ilimit = iend - 8;
638 const BYTE* const base = ctx->base;
639 const U32 lowestIndex = ctx->lowLimit;
640 const U32 dictLimit = ctx->dictLimit;
641 const BYTE* const prefixStart = base + dictLimit;
642 const BYTE* const dictBase = ctx->dictBase;
643 const BYTE* const dictEnd = dictBase + dictLimit;
644
645 const U32 maxSearches = 1U << ctx->params.cParams.searchLog;
646 const U32 sufficient_len = ctx->params.cParams.targetLength;
647 const U32 mls = ctx->params.cParams.searchLength;
648 const U32 minMatch = (ctx->params.cParams.searchLength == 3) ? 3 : 4;
649
650 ZSTD_optimal_t* opt = seqStorePtr->priceTable;
651 ZSTD_match_t* matches = seqStorePtr->matchTable;
652 const BYTE* inr;
653
654 /* init */
655 U32 offset, rep[ZSTD_REP_NUM];
656 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) rep[i]=ctx->rep[i]; }
657
658 ctx->nextToUpdate3 = ctx->nextToUpdate;
659 ZSTD_rescaleFreqs(seqStorePtr);
660 ip += (ip==prefixStart);
661
662 /* Match Loop */
663 while (ip < ilimit) {
664 U32 cur, match_num, last_pos, litlen, price;
665 U32 u, mlen, best_mlen, best_off, litLength;
666 U32 current = (U32)(ip-base);
667 memset(opt, 0, sizeof(ZSTD_optimal_t));
668 last_pos = 0;
669 opt[0].litlen = (U32)(ip - anchor);
670
671 /* check repCode */
672 { U32 i, last_i = ZSTD_REP_CHECK + (ip==anchor);
673 for (i = (ip==anchor); i<last_i; i++) {
674 const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (ip==anchor)) ? (rep[0] - 1) : rep[i];
675 const U32 repIndex = (U32)(current - repCur);
676 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
677 const BYTE* const repMatch = repBase + repIndex;
678 if ( (repCur > 0 && repCur <= (S32)current)
679 && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */
680 && (MEM_readMINMATCH(ip, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
681 /* repcode detected we should take it */
682 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
683 mlen = (U32)ZSTD_count_2segments(ip+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
684
685 if (mlen > sufficient_len || mlen >= ZSTD_OPT_NUM) {
686 best_mlen = mlen; best_off = i; cur = 0; last_pos = 1;
687 goto _storeSequence;
688 }
689
690 best_off = i - (ip==anchor);
691 litlen = opt[0].litlen;
692 do {
693 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
694 if (mlen > last_pos || price < opt[mlen].price)
695 SET_PRICE(mlen, mlen, i, litlen, price); /* note : macro modifies last_pos */
696 mlen--;
697 } while (mlen >= minMatch);
698 } } }
699
700 match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, ip, iend, maxSearches, mls, matches, minMatch); /* first search (depth 0) */
701
702 if (!last_pos && !match_num) { ip++; continue; }
703
704 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) opt[0].rep[i] = rep[i]; }
705 opt[0].mlen = 1;
706
707 if (match_num && (matches[match_num-1].len > sufficient_len || matches[match_num-1].len >= ZSTD_OPT_NUM)) {
708 best_mlen = matches[match_num-1].len;
709 best_off = matches[match_num-1].off;
710 cur = 0;
711 last_pos = 1;
712 goto _storeSequence;
713 }
714
715 best_mlen = (last_pos) ? last_pos : minMatch;
716
717 /* set prices using matches at position = 0 */
718 for (u = 0; u < match_num; u++) {
719 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
720 best_mlen = matches[u].len;
721 litlen = opt[0].litlen;
722 while (mlen <= best_mlen) {
723 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
724 if (mlen > last_pos || price < opt[mlen].price)
725 SET_PRICE(mlen, mlen, matches[u].off, litlen, price);
726 mlen++;
727 } }
728
729 if (last_pos < minMatch) {
730 ip++; continue;
731 }
732
733 /* check further positions */
734 for (cur = 1; cur <= last_pos; cur++) {
735 inr = ip + cur;
736
737 if (opt[cur-1].mlen == 1) {
738 litlen = opt[cur-1].litlen + 1;
739 if (cur > litlen) {
740 price = opt[cur - litlen].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-litlen);
741 } else
742 price = ZSTD_getLiteralPrice(seqStorePtr, litlen, anchor);
743 } else {
744 litlen = 1;
745 price = opt[cur - 1].price + ZSTD_getLiteralPrice(seqStorePtr, litlen, inr-1);
746 }
747
748 if (cur > last_pos || price <= opt[cur].price)
749 SET_PRICE(cur, 1, 0, litlen, price);
750
751 if (cur == last_pos) break;
752
753 if (inr > ilimit) /* last match must start at a minimum distance of 8 from oend */
754 continue;
755
756 mlen = opt[cur].mlen;
757 if (opt[cur].off > ZSTD_REP_MOVE_OPT) {
758 opt[cur].rep[2] = opt[cur-mlen].rep[1];
759 opt[cur].rep[1] = opt[cur-mlen].rep[0];
760 opt[cur].rep[0] = opt[cur].off - ZSTD_REP_MOVE_OPT;
761 } else {
762 opt[cur].rep[2] = (opt[cur].off > 1) ? opt[cur-mlen].rep[1] : opt[cur-mlen].rep[2];
763 opt[cur].rep[1] = (opt[cur].off > 0) ? opt[cur-mlen].rep[0] : opt[cur-mlen].rep[1];
764 opt[cur].rep[0] = ((opt[cur].off==ZSTD_REP_MOVE_OPT) && (mlen != 1)) ? (opt[cur-mlen].rep[0] - 1) : (opt[cur-mlen].rep[opt[cur].off]);
765 }
766
767 best_mlen = minMatch;
768 { U32 i, last_i = ZSTD_REP_CHECK + (mlen != 1);
769 for (i = (mlen != 1); i<last_i; i++) {
770 const S32 repCur = ((i==ZSTD_REP_MOVE_OPT) && (opt[cur].mlen != 1)) ? (opt[cur].rep[0] - 1) : opt[cur].rep[i];
771 const U32 repIndex = (U32)(current+cur - repCur);
772 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
773 const BYTE* const repMatch = repBase + repIndex;
774 if ( (repCur > 0 && repCur <= (S32)(current+cur))
775 && (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex>lowestIndex)) /* intentional overflow */
776 && (MEM_readMINMATCH(inr, minMatch) == MEM_readMINMATCH(repMatch, minMatch)) ) {
777 /* repcode detected */
778 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
779 mlen = (U32)ZSTD_count_2segments(inr+minMatch, repMatch+minMatch, iend, repEnd, prefixStart) + minMatch;
780
781 if (mlen > sufficient_len || cur + mlen >= ZSTD_OPT_NUM) {
782 best_mlen = mlen; best_off = i; last_pos = cur + 1;
783 goto _storeSequence;
784 }
785
786 best_off = i - (opt[cur].mlen != 1);
787 if (mlen > best_mlen) best_mlen = mlen;
788
789 do {
790 if (opt[cur].mlen == 1) {
791 litlen = opt[cur].litlen;
792 if (cur > litlen) {
793 price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, inr-litlen, best_off, mlen - MINMATCH, ultra);
794 } else
795 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, best_off, mlen - MINMATCH, ultra);
796 } else {
797 litlen = 0;
798 price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, best_off, mlen - MINMATCH, ultra);
799 }
800
801 if (cur + mlen > last_pos || price <= opt[cur + mlen].price)
802 SET_PRICE(cur + mlen, mlen, i, litlen, price);
803 mlen--;
804 } while (mlen >= minMatch);
805 } } }
806
807 match_num = ZSTD_BtGetAllMatches_selectMLS_extDict(ctx, inr, iend, maxSearches, mls, matches, minMatch);
808
809 if (match_num > 0 && matches[match_num-1].len > sufficient_len) {
810 best_mlen = matches[match_num-1].len;
811 best_off = matches[match_num-1].off;
812 last_pos = cur + 1;
813 goto _storeSequence;
814 }
815
816 /* set prices using matches at position = cur */
817 for (u = 0; u < match_num; u++) {
818 mlen = (u>0) ? matches[u-1].len+1 : best_mlen;
819 best_mlen = (cur + matches[u].len < ZSTD_OPT_NUM) ? matches[u].len : ZSTD_OPT_NUM - cur;
820
821 while (mlen <= best_mlen) {
822 if (opt[cur].mlen == 1) {
823 litlen = opt[cur].litlen;
824 if (cur > litlen)
825 price = opt[cur - litlen].price + ZSTD_getPrice(seqStorePtr, litlen, ip+cur-litlen, matches[u].off-1, mlen - MINMATCH, ultra);
826 else
827 price = ZSTD_getPrice(seqStorePtr, litlen, anchor, matches[u].off-1, mlen - MINMATCH, ultra);
828 } else {
829 litlen = 0;
830 price = opt[cur].price + ZSTD_getPrice(seqStorePtr, 0, NULL, matches[u].off-1, mlen - MINMATCH, ultra);
831 }
832
833 if (cur + mlen > last_pos || (price < opt[cur + mlen].price))
834 SET_PRICE(cur + mlen, mlen, matches[u].off, litlen, price);
835
836 mlen++;
837 } } } /* for (cur = 1; cur <= last_pos; cur++) */
838
839 best_mlen = opt[last_pos].mlen;
840 best_off = opt[last_pos].off;
841 cur = last_pos - best_mlen;
842
843 /* store sequence */
844 _storeSequence: /* cur, last_pos, best_mlen, best_off have to be set */
845 opt[0].mlen = 1;
846
847 while (1) {
848 mlen = opt[cur].mlen;
849 offset = opt[cur].off;
850 opt[cur].mlen = best_mlen;
851 opt[cur].off = best_off;
852 best_mlen = mlen;
853 best_off = offset;
854 if (mlen > cur) break;
855 cur -= mlen;
856 }
857
858 for (u = 0; u <= last_pos; ) {
859 u += opt[u].mlen;
860 }
861
862 for (cur=0; cur < last_pos; ) {
863 mlen = opt[cur].mlen;
864 if (mlen == 1) { ip++; cur++; continue; }
865 offset = opt[cur].off;
866 cur += mlen;
867 litLength = (U32)(ip - anchor);
868
869 if (offset > ZSTD_REP_MOVE_OPT) {
870 rep[2] = rep[1];
871 rep[1] = rep[0];
872 rep[0] = offset - ZSTD_REP_MOVE_OPT;
873 offset--;
874 } else {
875 if (offset != 0) {
876 best_off = ((offset==ZSTD_REP_MOVE_OPT) && (litLength==0)) ? (rep[0] - 1) : (rep[offset]);
877 if (offset != 1) rep[2] = rep[1];
878 rep[1] = rep[0];
879 rep[0] = best_off;
880 }
881
882 if (litLength==0) offset--;
883 }
884
885 ZSTD_updatePrice(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
886 ZSTD_storeSeq(seqStorePtr, litLength, anchor, offset, mlen-MINMATCH);
887 anchor = ip = ip + mlen;
888 } } /* for (cur=0; cur < last_pos; ) */
889
890 /* Save reps for next block */
891 { int i; for (i=0; i<ZSTD_REP_NUM; i++) ctx->savedRep[i] = rep[i]; }
892
893 /* Last Literals */
894 { size_t lastLLSize = iend - anchor;
895 memcpy(seqStorePtr->lit, anchor, lastLLSize);
896 seqStorePtr->lit += lastLLSize;
897 }
898 }
899
900 #endif /* ZSTD_OPT_H_91842398743 */
This diff has been collapsed as it changes many lines, (883 lines changed) Show them Hide them
@@ -0,0 +1,883 b''
1 /* ******************************************************************
2 Huffman decoder, part of New Generation Entropy library
3 Copyright (C) 2013-2016, Yann Collet.
4
5 BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are
9 met:
10
11 * Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
13 * Redistributions in binary form must reproduce the above
14 copyright notice, this list of conditions and the following disclaimer
15 in the documentation and/or other materials provided with the
16 distribution.
17
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 You can contact the author at :
31 - FSE+HUF source repository : https://github.com/Cyan4973/FiniteStateEntropy
32 - Public forum : https://groups.google.com/forum/#!forum/lz4c
33 ****************************************************************** */
34
35 /* **************************************************************
36 * Compiler specifics
37 ****************************************************************/
38 #if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
39 /* inline is defined */
40 #elif defined(_MSC_VER) || defined(__GNUC__)
41 # define inline __inline
42 #else
43 # define inline /* disable inline */
44 #endif
45
46 #ifdef _MSC_VER /* Visual Studio */
47 # pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
48 #endif
49
50
51 /* **************************************************************
52 * Dependencies
53 ****************************************************************/
54 #include <string.h> /* memcpy, memset */
55 #include "bitstream.h" /* BIT_* */
56 #include "fse.h" /* header compression */
57 #define HUF_STATIC_LINKING_ONLY
58 #include "huf.h"
59
60
61 /* **************************************************************
62 * Error Management
63 ****************************************************************/
64 #define HUF_STATIC_ASSERT(c) { enum { HUF_static_assert = 1/(int)(!!(c)) }; } /* use only *after* variable declarations */
65
66
67 /*-***************************/
68 /* generic DTableDesc */
69 /*-***************************/
70
71 typedef struct { BYTE maxTableLog; BYTE tableType; BYTE tableLog; BYTE reserved; } DTableDesc;
72
73 static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
74 {
75 DTableDesc dtd;
76 memcpy(&dtd, table, sizeof(dtd));
77 return dtd;
78 }
79
80
81 /*-***************************/
82 /* single-symbol decoding */
83 /*-***************************/
84
85 typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX2; /* single-symbol decoding */
86
87 size_t HUF_readDTableX2 (HUF_DTable* DTable, const void* src, size_t srcSize)
88 {
89 BYTE huffWeight[HUF_SYMBOLVALUE_MAX + 1];
90 U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1]; /* large enough for values from 0 to 16 */
91 U32 tableLog = 0;
92 U32 nbSymbols = 0;
93 size_t iSize;
94 void* const dtPtr = DTable + 1;
95 HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
96
97 HUF_STATIC_ASSERT(sizeof(DTableDesc) == sizeof(HUF_DTable));
98 /* memset(huffWeight, 0, sizeof(huffWeight)); */ /* is not necessary, even though some analyzer complain ... */
99
100 iSize = HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX + 1, rankVal, &nbSymbols, &tableLog, src, srcSize);
101 if (HUF_isError(iSize)) return iSize;
102
103 /* Table header */
104 { DTableDesc dtd = HUF_getDTableDesc(DTable);
105 if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge); /* DTable too small, huffman tree cannot fit in */
106 dtd.tableType = 0;
107 dtd.tableLog = (BYTE)tableLog;
108 memcpy(DTable, &dtd, sizeof(dtd));
109 }
110
111 /* Prepare ranks */
112 { U32 n, nextRankStart = 0;
113 for (n=1; n<tableLog+1; n++) {
114 U32 current = nextRankStart;
115 nextRankStart += (rankVal[n] << (n-1));
116 rankVal[n] = current;
117 } }
118
119 /* fill DTable */
120 { U32 n;
121 for (n=0; n<nbSymbols; n++) {
122 U32 const w = huffWeight[n];
123 U32 const length = (1 << w) >> 1;
124 U32 i;
125 HUF_DEltX2 D;
126 D.byte = (BYTE)n; D.nbBits = (BYTE)(tableLog + 1 - w);
127 for (i = rankVal[w]; i < rankVal[w] + length; i++)
128 dt[i] = D;
129 rankVal[w] += length;
130 } }
131
132 return iSize;
133 }
134
135
136 static BYTE HUF_decodeSymbolX2(BIT_DStream_t* Dstream, const HUF_DEltX2* dt, const U32 dtLog)
137 {
138 size_t const val = BIT_lookBitsFast(Dstream, dtLog); /* note : dtLog >= 1 */
139 BYTE const c = dt[val].byte;
140 BIT_skipBits(Dstream, dt[val].nbBits);
141 return c;
142 }
143
144 #define HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr) \
145 *ptr++ = HUF_decodeSymbolX2(DStreamPtr, dt, dtLog)
146
147 #define HUF_DECODE_SYMBOLX2_1(ptr, DStreamPtr) \
148 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
149 HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
150
151 #define HUF_DECODE_SYMBOLX2_2(ptr, DStreamPtr) \
152 if (MEM_64bits()) \
153 HUF_DECODE_SYMBOLX2_0(ptr, DStreamPtr)
154
155 static inline size_t HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, const HUF_DEltX2* const dt, const U32 dtLog)
156 {
157 BYTE* const pStart = p;
158
159 /* up to 4 symbols at a time */
160 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p <= pEnd-4)) {
161 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
162 HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
163 HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
164 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
165 }
166
167 /* closer to the end */
168 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) && (p < pEnd))
169 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
170
171 /* no more data to retrieve from bitstream, hence no need to reload */
172 while (p < pEnd)
173 HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
174
175 return pEnd-pStart;
176 }
177
178 static size_t HUF_decompress1X2_usingDTable_internal(
179 void* dst, size_t dstSize,
180 const void* cSrc, size_t cSrcSize,
181 const HUF_DTable* DTable)
182 {
183 BYTE* op = (BYTE*)dst;
184 BYTE* const oend = op + dstSize;
185 const void* dtPtr = DTable + 1;
186 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
187 BIT_DStream_t bitD;
188 DTableDesc const dtd = HUF_getDTableDesc(DTable);
189 U32 const dtLog = dtd.tableLog;
190
191 { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
192 if (HUF_isError(errorCode)) return errorCode; }
193
194 HUF_decodeStreamX2(op, &bitD, oend, dt, dtLog);
195
196 /* check */
197 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
198
199 return dstSize;
200 }
201
202 size_t HUF_decompress1X2_usingDTable(
203 void* dst, size_t dstSize,
204 const void* cSrc, size_t cSrcSize,
205 const HUF_DTable* DTable)
206 {
207 DTableDesc dtd = HUF_getDTableDesc(DTable);
208 if (dtd.tableType != 0) return ERROR(GENERIC);
209 return HUF_decompress1X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
210 }
211
212 size_t HUF_decompress1X2_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
213 {
214 const BYTE* ip = (const BYTE*) cSrc;
215
216 size_t const hSize = HUF_readDTableX2 (DCtx, cSrc, cSrcSize);
217 if (HUF_isError(hSize)) return hSize;
218 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
219 ip += hSize; cSrcSize -= hSize;
220
221 return HUF_decompress1X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
222 }
223
224 size_t HUF_decompress1X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
225 {
226 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
227 return HUF_decompress1X2_DCtx (DTable, dst, dstSize, cSrc, cSrcSize);
228 }
229
230
231 static size_t HUF_decompress4X2_usingDTable_internal(
232 void* dst, size_t dstSize,
233 const void* cSrc, size_t cSrcSize,
234 const HUF_DTable* DTable)
235 {
236 /* Check */
237 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
238
239 { const BYTE* const istart = (const BYTE*) cSrc;
240 BYTE* const ostart = (BYTE*) dst;
241 BYTE* const oend = ostart + dstSize;
242 const void* const dtPtr = DTable + 1;
243 const HUF_DEltX2* const dt = (const HUF_DEltX2*)dtPtr;
244
245 /* Init */
246 BIT_DStream_t bitD1;
247 BIT_DStream_t bitD2;
248 BIT_DStream_t bitD3;
249 BIT_DStream_t bitD4;
250 size_t const length1 = MEM_readLE16(istart);
251 size_t const length2 = MEM_readLE16(istart+2);
252 size_t const length3 = MEM_readLE16(istart+4);
253 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
254 const BYTE* const istart1 = istart + 6; /* jumpTable */
255 const BYTE* const istart2 = istart1 + length1;
256 const BYTE* const istart3 = istart2 + length2;
257 const BYTE* const istart4 = istart3 + length3;
258 const size_t segmentSize = (dstSize+3) / 4;
259 BYTE* const opStart2 = ostart + segmentSize;
260 BYTE* const opStart3 = opStart2 + segmentSize;
261 BYTE* const opStart4 = opStart3 + segmentSize;
262 BYTE* op1 = ostart;
263 BYTE* op2 = opStart2;
264 BYTE* op3 = opStart3;
265 BYTE* op4 = opStart4;
266 U32 endSignal;
267 DTableDesc const dtd = HUF_getDTableDesc(DTable);
268 U32 const dtLog = dtd.tableLog;
269
270 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
271 { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
272 if (HUF_isError(errorCode)) return errorCode; }
273 { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
274 if (HUF_isError(errorCode)) return errorCode; }
275 { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
276 if (HUF_isError(errorCode)) return errorCode; }
277 { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
278 if (HUF_isError(errorCode)) return errorCode; }
279
280 /* 16-32 symbols per loop (4-8 symbols per stream) */
281 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
282 for ( ; (endSignal==BIT_DStream_unfinished) && (op4<(oend-7)) ; ) {
283 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
284 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
285 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
286 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
287 HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
288 HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
289 HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
290 HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
291 HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
292 HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
293 HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
294 HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
295 HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
296 HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
297 HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
298 HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
299 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
300 }
301
302 /* check corruption */
303 if (op1 > opStart2) return ERROR(corruption_detected);
304 if (op2 > opStart3) return ERROR(corruption_detected);
305 if (op3 > opStart4) return ERROR(corruption_detected);
306 /* note : op4 supposed already verified within main loop */
307
308 /* finish bitStreams one by one */
309 HUF_decodeStreamX2(op1, &bitD1, opStart2, dt, dtLog);
310 HUF_decodeStreamX2(op2, &bitD2, opStart3, dt, dtLog);
311 HUF_decodeStreamX2(op3, &bitD3, opStart4, dt, dtLog);
312 HUF_decodeStreamX2(op4, &bitD4, oend, dt, dtLog);
313
314 /* check */
315 endSignal = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
316 if (!endSignal) return ERROR(corruption_detected);
317
318 /* decoded size */
319 return dstSize;
320 }
321 }
322
323
324 size_t HUF_decompress4X2_usingDTable(
325 void* dst, size_t dstSize,
326 const void* cSrc, size_t cSrcSize,
327 const HUF_DTable* DTable)
328 {
329 DTableDesc dtd = HUF_getDTableDesc(DTable);
330 if (dtd.tableType != 0) return ERROR(GENERIC);
331 return HUF_decompress4X2_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
332 }
333
334
335 size_t HUF_decompress4X2_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
336 {
337 const BYTE* ip = (const BYTE*) cSrc;
338
339 size_t const hSize = HUF_readDTableX2 (dctx, cSrc, cSrcSize);
340 if (HUF_isError(hSize)) return hSize;
341 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
342 ip += hSize; cSrcSize -= hSize;
343
344 return HUF_decompress4X2_usingDTable_internal (dst, dstSize, ip, cSrcSize, dctx);
345 }
346
347 size_t HUF_decompress4X2 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
348 {
349 HUF_CREATE_STATIC_DTABLEX2(DTable, HUF_TABLELOG_MAX);
350 return HUF_decompress4X2_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
351 }
352
353
354 /* *************************/
355 /* double-symbols decoding */
356 /* *************************/
357 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX4; /* double-symbols decoding */
358
359 typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
360
361 static void HUF_fillDTableX4Level2(HUF_DEltX4* DTable, U32 sizeLog, const U32 consumed,
362 const U32* rankValOrigin, const int minWeight,
363 const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
364 U32 nbBitsBaseline, U16 baseSeq)
365 {
366 HUF_DEltX4 DElt;
367 U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
368
369 /* get pre-calculated rankVal */
370 memcpy(rankVal, rankValOrigin, sizeof(rankVal));
371
372 /* fill skipped values */
373 if (minWeight>1) {
374 U32 i, skipSize = rankVal[minWeight];
375 MEM_writeLE16(&(DElt.sequence), baseSeq);
376 DElt.nbBits = (BYTE)(consumed);
377 DElt.length = 1;
378 for (i = 0; i < skipSize; i++)
379 DTable[i] = DElt;
380 }
381
382 /* fill DTable */
383 { U32 s; for (s=0; s<sortedListSize; s++) { /* note : sortedSymbols already skipped */
384 const U32 symbol = sortedSymbols[s].symbol;
385 const U32 weight = sortedSymbols[s].weight;
386 const U32 nbBits = nbBitsBaseline - weight;
387 const U32 length = 1 << (sizeLog-nbBits);
388 const U32 start = rankVal[weight];
389 U32 i = start;
390 const U32 end = start + length;
391
392 MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
393 DElt.nbBits = (BYTE)(nbBits + consumed);
394 DElt.length = 2;
395 do { DTable[i++] = DElt; } while (i<end); /* since length >= 1 */
396
397 rankVal[weight] += length;
398 } }
399 }
400
401 typedef U32 rankVal_t[HUF_TABLELOG_ABSOLUTEMAX][HUF_TABLELOG_ABSOLUTEMAX + 1];
402
403 static void HUF_fillDTableX4(HUF_DEltX4* DTable, const U32 targetLog,
404 const sortedSymbol_t* sortedList, const U32 sortedListSize,
405 const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
406 const U32 nbBitsBaseline)
407 {
408 U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
409 const int scaleLog = nbBitsBaseline - targetLog; /* note : targetLog >= srcLog, hence scaleLog <= 1 */
410 const U32 minBits = nbBitsBaseline - maxWeight;
411 U32 s;
412
413 memcpy(rankVal, rankValOrigin, sizeof(rankVal));
414
415 /* fill DTable */
416 for (s=0; s<sortedListSize; s++) {
417 const U16 symbol = sortedList[s].symbol;
418 const U32 weight = sortedList[s].weight;
419 const U32 nbBits = nbBitsBaseline - weight;
420 const U32 start = rankVal[weight];
421 const U32 length = 1 << (targetLog-nbBits);
422
423 if (targetLog-nbBits >= minBits) { /* enough room for a second symbol */
424 U32 sortedRank;
425 int minWeight = nbBits + scaleLog;
426 if (minWeight < 1) minWeight = 1;
427 sortedRank = rankStart[minWeight];
428 HUF_fillDTableX4Level2(DTable+start, targetLog-nbBits, nbBits,
429 rankValOrigin[nbBits], minWeight,
430 sortedList+sortedRank, sortedListSize-sortedRank,
431 nbBitsBaseline, symbol);
432 } else {
433 HUF_DEltX4 DElt;
434 MEM_writeLE16(&(DElt.sequence), symbol);
435 DElt.nbBits = (BYTE)(nbBits);
436 DElt.length = 1;
437 { U32 const end = start + length;
438 U32 u;
439 for (u = start; u < end; u++) DTable[u] = DElt;
440 } }
441 rankVal[weight] += length;
442 }
443 }
444
445 size_t HUF_readDTableX4 (HUF_DTable* DTable, const void* src, size_t srcSize)
446 {
447 BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
448 sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
449 U32 rankStats[HUF_TABLELOG_ABSOLUTEMAX + 1] = { 0 };
450 U32 rankStart0[HUF_TABLELOG_ABSOLUTEMAX + 2] = { 0 };
451 U32* const rankStart = rankStart0+1;
452 rankVal_t rankVal;
453 U32 tableLog, maxW, sizeOfSort, nbSymbols;
454 DTableDesc dtd = HUF_getDTableDesc(DTable);
455 U32 const maxTableLog = dtd.maxTableLog;
456 size_t iSize;
457 void* dtPtr = DTable+1; /* force compiler to avoid strict-aliasing */
458 HUF_DEltX4* const dt = (HUF_DEltX4*)dtPtr;
459
460 HUF_STATIC_ASSERT(sizeof(HUF_DEltX4) == sizeof(HUF_DTable)); /* if compilation fails here, assertion is false */
461 if (maxTableLog > HUF_TABLELOG_ABSOLUTEMAX) return ERROR(tableLog_tooLarge);
462 /* memset(weightList, 0, sizeof(weightList)); */ /* is not necessary, even though some analyzer complain ... */
463
464 iSize = HUF_readStats(weightList, HUF_SYMBOLVALUE_MAX + 1, rankStats, &nbSymbols, &tableLog, src, srcSize);
465 if (HUF_isError(iSize)) return iSize;
466
467 /* check result */
468 if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge); /* DTable can't fit code depth */
469
470 /* find maxWeight */
471 for (maxW = tableLog; rankStats[maxW]==0; maxW--) {} /* necessarily finds a solution before 0 */
472
473 /* Get start index of each weight */
474 { U32 w, nextRankStart = 0;
475 for (w=1; w<maxW+1; w++) {
476 U32 current = nextRankStart;
477 nextRankStart += rankStats[w];
478 rankStart[w] = current;
479 }
480 rankStart[0] = nextRankStart; /* put all 0w symbols at the end of sorted list*/
481 sizeOfSort = nextRankStart;
482 }
483
484 /* sort symbols by weight */
485 { U32 s;
486 for (s=0; s<nbSymbols; s++) {
487 U32 const w = weightList[s];
488 U32 const r = rankStart[w]++;
489 sortedSymbol[r].symbol = (BYTE)s;
490 sortedSymbol[r].weight = (BYTE)w;
491 }
492 rankStart[0] = 0; /* forget 0w symbols; this is beginning of weight(1) */
493 }
494
495 /* Build rankVal */
496 { U32* const rankVal0 = rankVal[0];
497 { int const rescale = (maxTableLog-tableLog) - 1; /* tableLog <= maxTableLog */
498 U32 nextRankVal = 0;
499 U32 w;
500 for (w=1; w<maxW+1; w++) {
501 U32 current = nextRankVal;
502 nextRankVal += rankStats[w] << (w+rescale);
503 rankVal0[w] = current;
504 } }
505 { U32 const minBits = tableLog+1 - maxW;
506 U32 consumed;
507 for (consumed = minBits; consumed < maxTableLog - minBits + 1; consumed++) {
508 U32* const rankValPtr = rankVal[consumed];
509 U32 w;
510 for (w = 1; w < maxW+1; w++) {
511 rankValPtr[w] = rankVal0[w] >> consumed;
512 } } } }
513
514 HUF_fillDTableX4(dt, maxTableLog,
515 sortedSymbol, sizeOfSort,
516 rankStart0, rankVal, maxW,
517 tableLog+1);
518
519 dtd.tableLog = (BYTE)maxTableLog;
520 dtd.tableType = 1;
521 memcpy(DTable, &dtd, sizeof(dtd));
522 return iSize;
523 }
524
525
526 static U32 HUF_decodeSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
527 {
528 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
529 memcpy(op, dt+val, 2);
530 BIT_skipBits(DStream, dt[val].nbBits);
531 return dt[val].length;
532 }
533
534 static U32 HUF_decodeLastSymbolX4(void* op, BIT_DStream_t* DStream, const HUF_DEltX4* dt, const U32 dtLog)
535 {
536 size_t const val = BIT_lookBitsFast(DStream, dtLog); /* note : dtLog >= 1 */
537 memcpy(op, dt+val, 1);
538 if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
539 else {
540 if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
541 BIT_skipBits(DStream, dt[val].nbBits);
542 if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
543 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8); /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
544 } }
545 return 1;
546 }
547
548
549 #define HUF_DECODE_SYMBOLX4_0(ptr, DStreamPtr) \
550 ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
551
552 #define HUF_DECODE_SYMBOLX4_1(ptr, DStreamPtr) \
553 if (MEM_64bits() || (HUF_TABLELOG_MAX<=12)) \
554 ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
555
556 #define HUF_DECODE_SYMBOLX4_2(ptr, DStreamPtr) \
557 if (MEM_64bits()) \
558 ptr += HUF_decodeSymbolX4(ptr, DStreamPtr, dt, dtLog)
559
560 static inline size_t HUF_decodeStreamX4(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd, const HUF_DEltX4* const dt, const U32 dtLog)
561 {
562 BYTE* const pStart = p;
563
564 /* up to 8 symbols at a time */
565 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
566 HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
567 HUF_DECODE_SYMBOLX4_1(p, bitDPtr);
568 HUF_DECODE_SYMBOLX4_2(p, bitDPtr);
569 HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
570 }
571
572 /* closer to end : up to 2 symbols at a time */
573 while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
574 HUF_DECODE_SYMBOLX4_0(p, bitDPtr);
575
576 while (p <= pEnd-2)
577 HUF_DECODE_SYMBOLX4_0(p, bitDPtr); /* no need to reload : reached the end of DStream */
578
579 if (p < pEnd)
580 p += HUF_decodeLastSymbolX4(p, bitDPtr, dt, dtLog);
581
582 return p-pStart;
583 }
584
585
586 static size_t HUF_decompress1X4_usingDTable_internal(
587 void* dst, size_t dstSize,
588 const void* cSrc, size_t cSrcSize,
589 const HUF_DTable* DTable)
590 {
591 BIT_DStream_t bitD;
592
593 /* Init */
594 { size_t const errorCode = BIT_initDStream(&bitD, cSrc, cSrcSize);
595 if (HUF_isError(errorCode)) return errorCode;
596 }
597
598 /* decode */
599 { BYTE* const ostart = (BYTE*) dst;
600 BYTE* const oend = ostart + dstSize;
601 const void* const dtPtr = DTable+1; /* force compiler to not use strict-aliasing */
602 const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
603 DTableDesc const dtd = HUF_getDTableDesc(DTable);
604 HUF_decodeStreamX4(ostart, &bitD, oend, dt, dtd.tableLog);
605 }
606
607 /* check */
608 if (!BIT_endOfDStream(&bitD)) return ERROR(corruption_detected);
609
610 /* decoded size */
611 return dstSize;
612 }
613
614 size_t HUF_decompress1X4_usingDTable(
615 void* dst, size_t dstSize,
616 const void* cSrc, size_t cSrcSize,
617 const HUF_DTable* DTable)
618 {
619 DTableDesc dtd = HUF_getDTableDesc(DTable);
620 if (dtd.tableType != 1) return ERROR(GENERIC);
621 return HUF_decompress1X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
622 }
623
624 size_t HUF_decompress1X4_DCtx (HUF_DTable* DCtx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
625 {
626 const BYTE* ip = (const BYTE*) cSrc;
627
628 size_t const hSize = HUF_readDTableX4 (DCtx, cSrc, cSrcSize);
629 if (HUF_isError(hSize)) return hSize;
630 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
631 ip += hSize; cSrcSize -= hSize;
632
633 return HUF_decompress1X4_usingDTable_internal (dst, dstSize, ip, cSrcSize, DCtx);
634 }
635
636 size_t HUF_decompress1X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
637 {
638 HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
639 return HUF_decompress1X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
640 }
641
642 static size_t HUF_decompress4X4_usingDTable_internal(
643 void* dst, size_t dstSize,
644 const void* cSrc, size_t cSrcSize,
645 const HUF_DTable* DTable)
646 {
647 if (cSrcSize < 10) return ERROR(corruption_detected); /* strict minimum : jump table + 1 byte per stream */
648
649 { const BYTE* const istart = (const BYTE*) cSrc;
650 BYTE* const ostart = (BYTE*) dst;
651 BYTE* const oend = ostart + dstSize;
652 const void* const dtPtr = DTable+1;
653 const HUF_DEltX4* const dt = (const HUF_DEltX4*)dtPtr;
654
655 /* Init */
656 BIT_DStream_t bitD1;
657 BIT_DStream_t bitD2;
658 BIT_DStream_t bitD3;
659 BIT_DStream_t bitD4;
660 size_t const length1 = MEM_readLE16(istart);
661 size_t const length2 = MEM_readLE16(istart+2);
662 size_t const length3 = MEM_readLE16(istart+4);
663 size_t const length4 = cSrcSize - (length1 + length2 + length3 + 6);
664 const BYTE* const istart1 = istart + 6; /* jumpTable */
665 const BYTE* const istart2 = istart1 + length1;
666 const BYTE* const istart3 = istart2 + length2;
667 const BYTE* const istart4 = istart3 + length3;
668 size_t const segmentSize = (dstSize+3) / 4;
669 BYTE* const opStart2 = ostart + segmentSize;
670 BYTE* const opStart3 = opStart2 + segmentSize;
671 BYTE* const opStart4 = opStart3 + segmentSize;
672 BYTE* op1 = ostart;
673 BYTE* op2 = opStart2;
674 BYTE* op3 = opStart3;
675 BYTE* op4 = opStart4;
676 U32 endSignal;
677 DTableDesc const dtd = HUF_getDTableDesc(DTable);
678 U32 const dtLog = dtd.tableLog;
679
680 if (length4 > cSrcSize) return ERROR(corruption_detected); /* overflow */
681 { size_t const errorCode = BIT_initDStream(&bitD1, istart1, length1);
682 if (HUF_isError(errorCode)) return errorCode; }
683 { size_t const errorCode = BIT_initDStream(&bitD2, istart2, length2);
684 if (HUF_isError(errorCode)) return errorCode; }
685 { size_t const errorCode = BIT_initDStream(&bitD3, istart3, length3);
686 if (HUF_isError(errorCode)) return errorCode; }
687 { size_t const errorCode = BIT_initDStream(&bitD4, istart4, length4);
688 if (HUF_isError(errorCode)) return errorCode; }
689
690 /* 16-32 symbols per loop (4-8 symbols per stream) */
691 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
692 for ( ; (endSignal==BIT_DStream_unfinished) & (op4<(oend-(sizeof(bitD4.bitContainer)-1))) ; ) {
693 HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
694 HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
695 HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
696 HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
697 HUF_DECODE_SYMBOLX4_1(op1, &bitD1);
698 HUF_DECODE_SYMBOLX4_1(op2, &bitD2);
699 HUF_DECODE_SYMBOLX4_1(op3, &bitD3);
700 HUF_DECODE_SYMBOLX4_1(op4, &bitD4);
701 HUF_DECODE_SYMBOLX4_2(op1, &bitD1);
702 HUF_DECODE_SYMBOLX4_2(op2, &bitD2);
703 HUF_DECODE_SYMBOLX4_2(op3, &bitD3);
704 HUF_DECODE_SYMBOLX4_2(op4, &bitD4);
705 HUF_DECODE_SYMBOLX4_0(op1, &bitD1);
706 HUF_DECODE_SYMBOLX4_0(op2, &bitD2);
707 HUF_DECODE_SYMBOLX4_0(op3, &bitD3);
708 HUF_DECODE_SYMBOLX4_0(op4, &bitD4);
709
710 endSignal = BIT_reloadDStream(&bitD1) | BIT_reloadDStream(&bitD2) | BIT_reloadDStream(&bitD3) | BIT_reloadDStream(&bitD4);
711 }
712
713 /* check corruption */
714 if (op1 > opStart2) return ERROR(corruption_detected);
715 if (op2 > opStart3) return ERROR(corruption_detected);
716 if (op3 > opStart4) return ERROR(corruption_detected);
717 /* note : op4 already verified within main loop */
718
719 /* finish bitStreams one by one */
720 HUF_decodeStreamX4(op1, &bitD1, opStart2, dt, dtLog);
721 HUF_decodeStreamX4(op2, &bitD2, opStart3, dt, dtLog);
722 HUF_decodeStreamX4(op3, &bitD3, opStart4, dt, dtLog);
723 HUF_decodeStreamX4(op4, &bitD4, oend, dt, dtLog);
724
725 /* check */
726 { U32 const endCheck = BIT_endOfDStream(&bitD1) & BIT_endOfDStream(&bitD2) & BIT_endOfDStream(&bitD3) & BIT_endOfDStream(&bitD4);
727 if (!endCheck) return ERROR(corruption_detected); }
728
729 /* decoded size */
730 return dstSize;
731 }
732 }
733
734
735 size_t HUF_decompress4X4_usingDTable(
736 void* dst, size_t dstSize,
737 const void* cSrc, size_t cSrcSize,
738 const HUF_DTable* DTable)
739 {
740 DTableDesc dtd = HUF_getDTableDesc(DTable);
741 if (dtd.tableType != 1) return ERROR(GENERIC);
742 return HUF_decompress4X4_usingDTable_internal(dst, dstSize, cSrc, cSrcSize, DTable);
743 }
744
745
746 size_t HUF_decompress4X4_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
747 {
748 const BYTE* ip = (const BYTE*) cSrc;
749
750 size_t hSize = HUF_readDTableX4 (dctx, cSrc, cSrcSize);
751 if (HUF_isError(hSize)) return hSize;
752 if (hSize >= cSrcSize) return ERROR(srcSize_wrong);
753 ip += hSize; cSrcSize -= hSize;
754
755 return HUF_decompress4X4_usingDTable_internal(dst, dstSize, ip, cSrcSize, dctx);
756 }
757
758 size_t HUF_decompress4X4 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
759 {
760 HUF_CREATE_STATIC_DTABLEX4(DTable, HUF_TABLELOG_MAX);
761 return HUF_decompress4X4_DCtx(DTable, dst, dstSize, cSrc, cSrcSize);
762 }
763
764
765 /* ********************************/
766 /* Generic decompression selector */
767 /* ********************************/
768
769 size_t HUF_decompress1X_usingDTable(void* dst, size_t maxDstSize,
770 const void* cSrc, size_t cSrcSize,
771 const HUF_DTable* DTable)
772 {
773 DTableDesc const dtd = HUF_getDTableDesc(DTable);
774 return dtd.tableType ? HUF_decompress1X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
775 HUF_decompress1X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
776 }
777
778 size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
779 const void* cSrc, size_t cSrcSize,
780 const HUF_DTable* DTable)
781 {
782 DTableDesc const dtd = HUF_getDTableDesc(DTable);
783 return dtd.tableType ? HUF_decompress4X4_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable) :
784 HUF_decompress4X2_usingDTable_internal(dst, maxDstSize, cSrc, cSrcSize, DTable);
785 }
786
787
788 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
789 static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
790 {
791 /* single, double, quad */
792 {{0,0}, {1,1}, {2,2}}, /* Q==0 : impossible */
793 {{0,0}, {1,1}, {2,2}}, /* Q==1 : impossible */
794 {{ 38,130}, {1313, 74}, {2151, 38}}, /* Q == 2 : 12-18% */
795 {{ 448,128}, {1353, 74}, {2238, 41}}, /* Q == 3 : 18-25% */
796 {{ 556,128}, {1353, 74}, {2238, 47}}, /* Q == 4 : 25-32% */
797 {{ 714,128}, {1418, 74}, {2436, 53}}, /* Q == 5 : 32-38% */
798 {{ 883,128}, {1437, 74}, {2464, 61}}, /* Q == 6 : 38-44% */
799 {{ 897,128}, {1515, 75}, {2622, 68}}, /* Q == 7 : 44-50% */
800 {{ 926,128}, {1613, 75}, {2730, 75}}, /* Q == 8 : 50-56% */
801 {{ 947,128}, {1729, 77}, {3359, 77}}, /* Q == 9 : 56-62% */
802 {{1107,128}, {2083, 81}, {4006, 84}}, /* Q ==10 : 62-69% */
803 {{1177,128}, {2379, 87}, {4785, 88}}, /* Q ==11 : 69-75% */
804 {{1242,128}, {2415, 93}, {5155, 84}}, /* Q ==12 : 75-81% */
805 {{1349,128}, {2644,106}, {5260,106}}, /* Q ==13 : 81-87% */
806 {{1455,128}, {2422,124}, {4174,124}}, /* Q ==14 : 87-93% */
807 {{ 722,128}, {1891,145}, {1936,146}}, /* Q ==15 : 93-99% */
808 };
809
810 /** HUF_selectDecoder() :
811 * Tells which decoder is likely to decode faster,
812 * based on a set of pre-determined metrics.
813 * @return : 0==HUF_decompress4X2, 1==HUF_decompress4X4 .
814 * Assumption : 0 < cSrcSize < dstSize <= 128 KB */
815 U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
816 {
817 /* decoder timing evaluation */
818 U32 const Q = (U32)(cSrcSize * 16 / dstSize); /* Q < 16 since dstSize > cSrcSize */
819 U32 const D256 = (U32)(dstSize >> 8);
820 U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
821 U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
822 DTime1 += DTime1 >> 3; /* advantage to algorithm using less memory, for cache eviction */
823
824 return DTime1 < DTime0;
825 }
826
827
828 typedef size_t (*decompressionAlgo)(void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);
829
830 size_t HUF_decompress (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
831 {
832 static const decompressionAlgo decompress[2] = { HUF_decompress4X2, HUF_decompress4X4 };
833
834 /* validation checks */
835 if (dstSize == 0) return ERROR(dstSize_tooSmall);
836 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
837 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
838 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
839
840 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
841 return decompress[algoNb](dst, dstSize, cSrc, cSrcSize);
842 }
843 }
844
845 size_t HUF_decompress4X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
846 {
847 /* validation checks */
848 if (dstSize == 0) return ERROR(dstSize_tooSmall);
849 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
850 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
851 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
852
853 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
854 return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
855 HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
856 }
857 }
858
859 size_t HUF_decompress4X_hufOnly (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
860 {
861 /* validation checks */
862 if (dstSize == 0) return ERROR(dstSize_tooSmall);
863 if ((cSrcSize >= dstSize) || (cSrcSize <= 1)) return ERROR(corruption_detected); /* invalid */
864
865 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
866 return algoNb ? HUF_decompress4X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
867 HUF_decompress4X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
868 }
869 }
870
871 size_t HUF_decompress1X_DCtx (HUF_DTable* dctx, void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize)
872 {
873 /* validation checks */
874 if (dstSize == 0) return ERROR(dstSize_tooSmall);
875 if (cSrcSize > dstSize) return ERROR(corruption_detected); /* invalid */
876 if (cSrcSize == dstSize) { memcpy(dst, cSrc, dstSize); return dstSize; } /* not compressed */
877 if (cSrcSize == 1) { memset(dst, *(const BYTE*)cSrc, dstSize); return dstSize; } /* RLE */
878
879 { U32 const algoNb = HUF_selectDecoder(dstSize, cSrcSize);
880 return algoNb ? HUF_decompress1X4_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) :
881 HUF_decompress1X2_DCtx(dctx, dst, dstSize, cSrc, cSrcSize) ;
882 }
883 }
@@ -0,0 +1,252 b''
1 /**
2 * Copyright (c) 2016-present, Yann Collet, Facebook, Inc.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree. An additional grant
7 * of patent rights can be found in the PATENTS file in the same directory.
8 */
9
10
11
12 /* *************************************
13 * Dependencies
14 ***************************************/
15 #include <stdlib.h>
16 #include "error_private.h"
17 #include "zstd_internal.h" /* MIN, ZSTD_blockHeaderSize, ZSTD_BLOCKSIZE_MAX */
18 #define ZBUFF_STATIC_LINKING_ONLY
19 #include "zbuff.h"
20
21
22 typedef enum { ZBUFFds_init, ZBUFFds_loadHeader,
23 ZBUFFds_read, ZBUFFds_load, ZBUFFds_flush } ZBUFF_dStage;
24
25 /* *** Resource management *** */
26 struct ZBUFF_DCtx_s {
27 ZSTD_DCtx* zd;
28 ZSTD_frameParams fParams;
29 ZBUFF_dStage stage;
30 char* inBuff;
31 size_t inBuffSize;
32 size_t inPos;
33 char* outBuff;
34 size_t outBuffSize;
35 size_t outStart;
36 size_t outEnd;
37 size_t blockSize;
38 BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
39 size_t lhSize;
40 ZSTD_customMem customMem;
41 }; /* typedef'd to ZBUFF_DCtx within "zbuff.h" */
42
43
44 ZBUFF_DCtx* ZBUFF_createDCtx(void)
45 {
46 return ZBUFF_createDCtx_advanced(defaultCustomMem);
47 }
48
49 ZBUFF_DCtx* ZBUFF_createDCtx_advanced(ZSTD_customMem customMem)
50 {
51 ZBUFF_DCtx* zbd;
52
53 if (!customMem.customAlloc && !customMem.customFree)
54 customMem = defaultCustomMem;
55
56 if (!customMem.customAlloc || !customMem.customFree)
57 return NULL;
58
59 zbd = (ZBUFF_DCtx*)customMem.customAlloc(customMem.opaque, sizeof(ZBUFF_DCtx));
60 if (zbd==NULL) return NULL;
61 memset(zbd, 0, sizeof(ZBUFF_DCtx));
62 memcpy(&zbd->customMem, &customMem, sizeof(ZSTD_customMem));
63 zbd->zd = ZSTD_createDCtx_advanced(customMem);
64 if (zbd->zd == NULL) { ZBUFF_freeDCtx(zbd); return NULL; }
65 zbd->stage = ZBUFFds_init;
66 return zbd;
67 }
68
69 size_t ZBUFF_freeDCtx(ZBUFF_DCtx* zbd)
70 {
71 if (zbd==NULL) return 0; /* support free on null */
72 ZSTD_freeDCtx(zbd->zd);
73 if (zbd->inBuff) zbd->customMem.customFree(zbd->customMem.opaque, zbd->inBuff);
74 if (zbd->outBuff) zbd->customMem.customFree(zbd->customMem.opaque, zbd->outBuff);
75 zbd->customMem.customFree(zbd->customMem.opaque, zbd);
76 return 0;
77 }
78
79
80 /* *** Initialization *** */
81
82 size_t ZBUFF_decompressInitDictionary(ZBUFF_DCtx* zbd, const void* dict, size_t dictSize)
83 {
84 zbd->stage = ZBUFFds_loadHeader;
85 zbd->lhSize = zbd->inPos = zbd->outStart = zbd->outEnd = 0;
86 return ZSTD_decompressBegin_usingDict(zbd->zd, dict, dictSize);
87 }
88
89 size_t ZBUFF_decompressInit(ZBUFF_DCtx* zbd)
90 {
91 return ZBUFF_decompressInitDictionary(zbd, NULL, 0);
92 }
93
94
95 /* internal util function */
96 MEM_STATIC size_t ZBUFF_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
97 {
98 size_t const length = MIN(dstCapacity, srcSize);
99 memcpy(dst, src, length);
100 return length;
101 }
102
103
104 /* *** Decompression *** */
105
106 size_t ZBUFF_decompressContinue(ZBUFF_DCtx* zbd,
107 void* dst, size_t* dstCapacityPtr,
108 const void* src, size_t* srcSizePtr)
109 {
110 const char* const istart = (const char*)src;
111 const char* const iend = istart + *srcSizePtr;
112 const char* ip = istart;
113 char* const ostart = (char*)dst;
114 char* const oend = ostart + *dstCapacityPtr;
115 char* op = ostart;
116 U32 someMoreWork = 1;
117
118 while (someMoreWork) {
119 switch(zbd->stage)
120 {
121 case ZBUFFds_init :
122 return ERROR(init_missing);
123
124 case ZBUFFds_loadHeader :
125 { size_t const hSize = ZSTD_getFrameParams(&(zbd->fParams), zbd->headerBuffer, zbd->lhSize);
126 if (ZSTD_isError(hSize)) return hSize;
127 if (hSize != 0) { /* need more input */
128 size_t const toLoad = hSize - zbd->lhSize; /* if hSize!=0, hSize > zbd->lhSize */
129 if (toLoad > (size_t)(iend-ip)) { /* not enough input to load full header */
130 memcpy(zbd->headerBuffer + zbd->lhSize, ip, iend-ip);
131 zbd->lhSize += iend-ip;
132 *dstCapacityPtr = 0;
133 return (hSize - zbd->lhSize) + ZSTD_blockHeaderSize; /* remaining header bytes + next block header */
134 }
135 memcpy(zbd->headerBuffer + zbd->lhSize, ip, toLoad); zbd->lhSize = hSize; ip += toLoad;
136 break;
137 } }
138
139 /* Consume header */
140 { size_t const h1Size = ZSTD_nextSrcSizeToDecompress(zbd->zd); /* == ZSTD_frameHeaderSize_min */
141 size_t const h1Result = ZSTD_decompressContinue(zbd->zd, NULL, 0, zbd->headerBuffer, h1Size);
142 if (ZSTD_isError(h1Result)) return h1Result; /* should not happen : already checked */
143 if (h1Size < zbd->lhSize) { /* long header */
144 size_t const h2Size = ZSTD_nextSrcSizeToDecompress(zbd->zd);
145 size_t const h2Result = ZSTD_decompressContinue(zbd->zd, NULL, 0, zbd->headerBuffer+h1Size, h2Size);
146 if (ZSTD_isError(h2Result)) return h2Result;
147 } }
148
149 zbd->fParams.windowSize = MAX(zbd->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
150
151 /* Frame header instruct buffer sizes */
152 { size_t const blockSize = MIN(zbd->fParams.windowSize, ZSTD_BLOCKSIZE_ABSOLUTEMAX);
153 size_t const neededOutSize = zbd->fParams.windowSize + blockSize;
154 zbd->blockSize = blockSize;
155 if (zbd->inBuffSize < blockSize) {
156 zbd->customMem.customFree(zbd->customMem.opaque, zbd->inBuff);
157 zbd->inBuffSize = blockSize;
158 zbd->inBuff = (char*)zbd->customMem.customAlloc(zbd->customMem.opaque, blockSize);
159 if (zbd->inBuff == NULL) return ERROR(memory_allocation);
160 }
161 if (zbd->outBuffSize < neededOutSize) {
162 zbd->customMem.customFree(zbd->customMem.opaque, zbd->outBuff);
163 zbd->outBuffSize = neededOutSize;
164 zbd->outBuff = (char*)zbd->customMem.customAlloc(zbd->customMem.opaque, neededOutSize);
165 if (zbd->outBuff == NULL) return ERROR(memory_allocation);
166 } }
167 zbd->stage = ZBUFFds_read;
168 /* pass-through */
169
170 case ZBUFFds_read:
171 { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zbd->zd);
172 if (neededInSize==0) { /* end of frame */
173 zbd->stage = ZBUFFds_init;
174 someMoreWork = 0;
175 break;
176 }
177 if ((size_t)(iend-ip) >= neededInSize) { /* decode directly from src */
178 const int isSkipFrame = ZSTD_isSkipFrame(zbd->zd);
179 size_t const decodedSize = ZSTD_decompressContinue(zbd->zd,
180 zbd->outBuff + zbd->outStart, (isSkipFrame ? 0 : zbd->outBuffSize - zbd->outStart),
181 ip, neededInSize);
182 if (ZSTD_isError(decodedSize)) return decodedSize;
183 ip += neededInSize;
184 if (!decodedSize && !isSkipFrame) break; /* this was just a header */
185 zbd->outEnd = zbd->outStart + decodedSize;
186 zbd->stage = ZBUFFds_flush;
187 break;
188 }
189 if (ip==iend) { someMoreWork = 0; break; } /* no more input */
190 zbd->stage = ZBUFFds_load;
191 /* pass-through */
192 }
193
194 case ZBUFFds_load:
195 { size_t const neededInSize = ZSTD_nextSrcSizeToDecompress(zbd->zd);
196 size_t const toLoad = neededInSize - zbd->inPos; /* should always be <= remaining space within inBuff */
197 size_t loadedSize;
198 if (toLoad > zbd->inBuffSize - zbd->inPos) return ERROR(corruption_detected); /* should never happen */
199 loadedSize = ZBUFF_limitCopy(zbd->inBuff + zbd->inPos, toLoad, ip, iend-ip);
200 ip += loadedSize;
201 zbd->inPos += loadedSize;
202 if (loadedSize < toLoad) { someMoreWork = 0; break; } /* not enough input, wait for more */
203
204 /* decode loaded input */
205 { const int isSkipFrame = ZSTD_isSkipFrame(zbd->zd);
206 size_t const decodedSize = ZSTD_decompressContinue(zbd->zd,
207 zbd->outBuff + zbd->outStart, zbd->outBuffSize - zbd->outStart,
208 zbd->inBuff, neededInSize);
209 if (ZSTD_isError(decodedSize)) return decodedSize;
210 zbd->inPos = 0; /* input is consumed */
211 if (!decodedSize && !isSkipFrame) { zbd->stage = ZBUFFds_read; break; } /* this was just a header */
212 zbd->outEnd = zbd->outStart + decodedSize;
213 zbd->stage = ZBUFFds_flush;
214 /* pass-through */
215 } }
216
217 case ZBUFFds_flush:
218 { size_t const toFlushSize = zbd->outEnd - zbd->outStart;
219 size_t const flushedSize = ZBUFF_limitCopy(op, oend-op, zbd->outBuff + zbd->outStart, toFlushSize);
220 op += flushedSize;
221 zbd->outStart += flushedSize;
222 if (flushedSize == toFlushSize) { /* flush completed */
223 zbd->stage = ZBUFFds_read;
224 if (zbd->outStart + zbd->blockSize > zbd->outBuffSize)
225 zbd->outStart = zbd->outEnd = 0;
226 break;
227 }
228 /* cannot flush everything */
229 someMoreWork = 0;
230 break;
231 }
232 default: return ERROR(GENERIC); /* impossible */
233 } }
234
235 /* result */
236 *srcSizePtr = ip-istart;
237 *dstCapacityPtr = op-ostart;
238 { size_t nextSrcSizeHint = ZSTD_nextSrcSizeToDecompress(zbd->zd);
239 if (!nextSrcSizeHint) return (zbd->outEnd != zbd->outStart); /* return 0 only if fully flushed too */
240 nextSrcSizeHint += ZSTD_blockHeaderSize * (ZSTD_nextInputType(zbd->zd) == ZSTDnit_block);
241 if (zbd->inPos > nextSrcSizeHint) return ERROR(GENERIC); /* should never happen */
242 nextSrcSizeHint -= zbd->inPos; /* already loaded*/
243 return nextSrcSizeHint;
244 }
245 }
246
247
248 /* *************************************
249 * Tool functions
250 ***************************************/
251 size_t ZBUFF_recommendedDInSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX + ZSTD_blockHeaderSize /* block header size*/ ; }
252 size_t ZBUFF_recommendedDOutSize(void) { return ZSTD_BLOCKSIZE_ABSOLUTEMAX; }
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
1 NO CONTENT: new file 100644
NO CONTENT: new file 100644
The requested commit or file is too big and content was truncated. Show full diff
General Comments 0
You need to be logged in to leave comments. Login now