Show More
@@ -0,0 +1,149 b'' | |||
|
1 | /* | |
|
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This source code is licensed under both the BSD-style license (found in the | |
|
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | |
|
7 | * in the COPYING file in the root directory of this source tree). | |
|
8 | * You may select, at your option, one of the above-listed licenses. | |
|
9 | */ | |
|
10 | ||
|
11 | /*-************************************* | |
|
12 | * Dependencies | |
|
13 | ***************************************/ | |
|
14 | #include "zstd_compress_literals.h" | |
|
15 | ||
|
16 | size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) | |
|
17 | { | |
|
18 | BYTE* const ostart = (BYTE* const)dst; | |
|
19 | U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); | |
|
20 | ||
|
21 | RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall); | |
|
22 | ||
|
23 | switch(flSize) | |
|
24 | { | |
|
25 | case 1: /* 2 - 1 - 5 */ | |
|
26 | ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); | |
|
27 | break; | |
|
28 | case 2: /* 2 - 2 - 12 */ | |
|
29 | MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); | |
|
30 | break; | |
|
31 | case 3: /* 2 - 2 - 20 */ | |
|
32 | MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); | |
|
33 | break; | |
|
34 | default: /* not necessary : flSize is {1,2,3} */ | |
|
35 | assert(0); | |
|
36 | } | |
|
37 | ||
|
38 | memcpy(ostart + flSize, src, srcSize); | |
|
39 | return srcSize + flSize; | |
|
40 | } | |
|
41 | ||
|
42 | size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) | |
|
43 | { | |
|
44 | BYTE* const ostart = (BYTE* const)dst; | |
|
45 | U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); | |
|
46 | ||
|
47 | (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ | |
|
48 | ||
|
49 | switch(flSize) | |
|
50 | { | |
|
51 | case 1: /* 2 - 1 - 5 */ | |
|
52 | ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); | |
|
53 | break; | |
|
54 | case 2: /* 2 - 2 - 12 */ | |
|
55 | MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); | |
|
56 | break; | |
|
57 | case 3: /* 2 - 2 - 20 */ | |
|
58 | MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); | |
|
59 | break; | |
|
60 | default: /* not necessary : flSize is {1,2,3} */ | |
|
61 | assert(0); | |
|
62 | } | |
|
63 | ||
|
64 | ostart[flSize] = *(const BYTE*)src; | |
|
65 | return flSize+1; | |
|
66 | } | |
|
67 | ||
|
68 | size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, | |
|
69 | ZSTD_hufCTables_t* nextHuf, | |
|
70 | ZSTD_strategy strategy, int disableLiteralCompression, | |
|
71 | void* dst, size_t dstCapacity, | |
|
72 | const void* src, size_t srcSize, | |
|
73 | void* workspace, size_t wkspSize, | |
|
74 | const int bmi2) | |
|
75 | { | |
|
76 | size_t const minGain = ZSTD_minGain(srcSize, strategy); | |
|
77 | size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); | |
|
78 | BYTE* const ostart = (BYTE*)dst; | |
|
79 | U32 singleStream = srcSize < 256; | |
|
80 | symbolEncodingType_e hType = set_compressed; | |
|
81 | size_t cLitSize; | |
|
82 | ||
|
83 | DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)", | |
|
84 | disableLiteralCompression); | |
|
85 | ||
|
86 | /* Prepare nextEntropy assuming reusing the existing table */ | |
|
87 | memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |
|
88 | ||
|
89 | if (disableLiteralCompression) | |
|
90 | return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); | |
|
91 | ||
|
92 | /* small ? don't even attempt compression (speed opt) */ | |
|
93 | # define COMPRESS_LITERALS_SIZE_MIN 63 | |
|
94 | { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; | |
|
95 | if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); | |
|
96 | } | |
|
97 | ||
|
98 | RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression"); | |
|
99 | { HUF_repeat repeat = prevHuf->repeatMode; | |
|
100 | int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; | |
|
101 | if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; | |
|
102 | cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, | |
|
103 | workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) | |
|
104 | : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, | |
|
105 | workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); | |
|
106 | if (repeat != HUF_repeat_none) { | |
|
107 | /* reused the existing table */ | |
|
108 | hType = set_repeat; | |
|
109 | } | |
|
110 | } | |
|
111 | ||
|
112 | if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { | |
|
113 | memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |
|
114 | return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); | |
|
115 | } | |
|
116 | if (cLitSize==1) { | |
|
117 | memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |
|
118 | return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); | |
|
119 | } | |
|
120 | ||
|
121 | if (hType == set_compressed) { | |
|
122 | /* using a newly constructed table */ | |
|
123 | nextHuf->repeatMode = HUF_repeat_check; | |
|
124 | } | |
|
125 | ||
|
126 | /* Build header */ | |
|
127 | switch(lhSize) | |
|
128 | { | |
|
129 | case 3: /* 2 - 2 - 10 - 10 */ | |
|
130 | { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); | |
|
131 | MEM_writeLE24(ostart, lhc); | |
|
132 | break; | |
|
133 | } | |
|
134 | case 4: /* 2 - 2 - 14 - 14 */ | |
|
135 | { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); | |
|
136 | MEM_writeLE32(ostart, lhc); | |
|
137 | break; | |
|
138 | } | |
|
139 | case 5: /* 2 - 2 - 18 - 18 */ | |
|
140 | { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); | |
|
141 | MEM_writeLE32(ostart, lhc); | |
|
142 | ostart[4] = (BYTE)(cLitSize >> 10); | |
|
143 | break; | |
|
144 | } | |
|
145 | default: /* not possible : lhSize is {3,4,5} */ | |
|
146 | assert(0); | |
|
147 | } | |
|
148 | return lhSize+cLitSize; | |
|
149 | } |
@@ -0,0 +1,29 b'' | |||
|
1 | /* | |
|
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This source code is licensed under both the BSD-style license (found in the | |
|
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | |
|
7 | * in the COPYING file in the root directory of this source tree). | |
|
8 | * You may select, at your option, one of the above-listed licenses. | |
|
9 | */ | |
|
10 | ||
|
11 | #ifndef ZSTD_COMPRESS_LITERALS_H | |
|
12 | #define ZSTD_COMPRESS_LITERALS_H | |
|
13 | ||
|
14 | #include "zstd_compress_internal.h" /* ZSTD_hufCTables_t, ZSTD_minGain() */ | |
|
15 | ||
|
16 | ||
|
17 | size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize); | |
|
18 | ||
|
19 | size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize); | |
|
20 | ||
|
21 | size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, | |
|
22 | ZSTD_hufCTables_t* nextHuf, | |
|
23 | ZSTD_strategy strategy, int disableLiteralCompression, | |
|
24 | void* dst, size_t dstCapacity, | |
|
25 | const void* src, size_t srcSize, | |
|
26 | void* workspace, size_t wkspSize, | |
|
27 | const int bmi2); | |
|
28 | ||
|
29 | #endif /* ZSTD_COMPRESS_LITERALS_H */ |
@@ -0,0 +1,415 b'' | |||
|
1 | /* | |
|
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This source code is licensed under both the BSD-style license (found in the | |
|
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | |
|
7 | * in the COPYING file in the root directory of this source tree). | |
|
8 | * You may select, at your option, one of the above-listed licenses. | |
|
9 | */ | |
|
10 | ||
|
11 | /*-************************************* | |
|
12 | * Dependencies | |
|
13 | ***************************************/ | |
|
14 | #include "zstd_compress_sequences.h" | |
|
15 | ||
|
16 | /** | |
|
17 | * -log2(x / 256) lookup table for x in [0, 256). | |
|
18 | * If x == 0: Return 0 | |
|
19 | * Else: Return floor(-log2(x / 256) * 256) | |
|
20 | */ | |
|
21 | static unsigned const kInverseProbabilityLog256[256] = { | |
|
22 | 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162, | |
|
23 | 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889, | |
|
24 | 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734, | |
|
25 | 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626, | |
|
26 | 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542, | |
|
27 | 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473, | |
|
28 | 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415, | |
|
29 | 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366, | |
|
30 | 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322, | |
|
31 | 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282, | |
|
32 | 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247, | |
|
33 | 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215, | |
|
34 | 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185, | |
|
35 | 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157, | |
|
36 | 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132, | |
|
37 | 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108, | |
|
38 | 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85, | |
|
39 | 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, | |
|
40 | 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44, | |
|
41 | 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25, | |
|
42 | 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7, | |
|
43 | 5, 4, 2, 1, | |
|
44 | }; | |
|
45 | ||
|
46 | static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { | |
|
47 | void const* ptr = ctable; | |
|
48 | U16 const* u16ptr = (U16 const*)ptr; | |
|
49 | U32 const maxSymbolValue = MEM_read16(u16ptr + 1); | |
|
50 | return maxSymbolValue; | |
|
51 | } | |
|
52 | ||
|
53 | /** | |
|
54 | * Returns the cost in bytes of encoding the normalized count header. | |
|
55 | * Returns an error if any of the helper functions return an error. | |
|
56 | */ | |
|
57 | static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, | |
|
58 | size_t const nbSeq, unsigned const FSELog) | |
|
59 | { | |
|
60 | BYTE wksp[FSE_NCOUNTBOUND]; | |
|
61 | S16 norm[MaxSeq + 1]; | |
|
62 | const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); | |
|
63 | FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq, max)); | |
|
64 | return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); | |
|
65 | } | |
|
66 | ||
|
67 | /** | |
|
68 | * Returns the cost in bits of encoding the distribution described by count | |
|
69 | * using the entropy bound. | |
|
70 | */ | |
|
71 | static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total) | |
|
72 | { | |
|
73 | unsigned cost = 0; | |
|
74 | unsigned s; | |
|
75 | for (s = 0; s <= max; ++s) { | |
|
76 | unsigned norm = (unsigned)((256 * count[s]) / total); | |
|
77 | if (count[s] != 0 && norm == 0) | |
|
78 | norm = 1; | |
|
79 | assert(count[s] < total); | |
|
80 | cost += count[s] * kInverseProbabilityLog256[norm]; | |
|
81 | } | |
|
82 | return cost >> 8; | |
|
83 | } | |
|
84 | ||
|
85 | /** | |
|
86 | * Returns the cost in bits of encoding the distribution in count using ctable. | |
|
87 | * Returns an error if ctable cannot represent all the symbols in count. | |
|
88 | */ | |
|
89 | static size_t ZSTD_fseBitCost( | |
|
90 | FSE_CTable const* ctable, | |
|
91 | unsigned const* count, | |
|
92 | unsigned const max) | |
|
93 | { | |
|
94 | unsigned const kAccuracyLog = 8; | |
|
95 | size_t cost = 0; | |
|
96 | unsigned s; | |
|
97 | FSE_CState_t cstate; | |
|
98 | FSE_initCState(&cstate, ctable); | |
|
99 | RETURN_ERROR_IF(ZSTD_getFSEMaxSymbolValue(ctable) < max, GENERIC, | |
|
100 | "Repeat FSE_CTable has maxSymbolValue %u < %u", | |
|
101 | ZSTD_getFSEMaxSymbolValue(ctable), max); | |
|
102 | for (s = 0; s <= max; ++s) { | |
|
103 | unsigned const tableLog = cstate.stateLog; | |
|
104 | unsigned const badCost = (tableLog + 1) << kAccuracyLog; | |
|
105 | unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); | |
|
106 | if (count[s] == 0) | |
|
107 | continue; | |
|
108 | RETURN_ERROR_IF(bitCost >= badCost, GENERIC, | |
|
109 | "Repeat FSE_CTable has Prob[%u] == 0", s); | |
|
110 | cost += count[s] * bitCost; | |
|
111 | } | |
|
112 | return cost >> kAccuracyLog; | |
|
113 | } | |
|
114 | ||
|
115 | /** | |
|
116 | * Returns the cost in bits of encoding the distribution in count using the | |
|
117 | * table described by norm. The max symbol support by norm is assumed >= max. | |
|
118 | * norm must be valid for every symbol with non-zero probability in count. | |
|
119 | */ | |
|
120 | static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, | |
|
121 | unsigned const* count, unsigned const max) | |
|
122 | { | |
|
123 | unsigned const shift = 8 - accuracyLog; | |
|
124 | size_t cost = 0; | |
|
125 | unsigned s; | |
|
126 | assert(accuracyLog <= 8); | |
|
127 | for (s = 0; s <= max; ++s) { | |
|
128 | unsigned const normAcc = norm[s] != -1 ? norm[s] : 1; | |
|
129 | unsigned const norm256 = normAcc << shift; | |
|
130 | assert(norm256 > 0); | |
|
131 | assert(norm256 < 256); | |
|
132 | cost += count[s] * kInverseProbabilityLog256[norm256]; | |
|
133 | } | |
|
134 | return cost >> 8; | |
|
135 | } | |
|
136 | ||
|
137 | symbolEncodingType_e | |
|
138 | ZSTD_selectEncodingType( | |
|
139 | FSE_repeat* repeatMode, unsigned const* count, unsigned const max, | |
|
140 | size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, | |
|
141 | FSE_CTable const* prevCTable, | |
|
142 | short const* defaultNorm, U32 defaultNormLog, | |
|
143 | ZSTD_defaultPolicy_e const isDefaultAllowed, | |
|
144 | ZSTD_strategy const strategy) | |
|
145 | { | |
|
146 | ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); | |
|
147 | if (mostFrequent == nbSeq) { | |
|
148 | *repeatMode = FSE_repeat_none; | |
|
149 | if (isDefaultAllowed && nbSeq <= 2) { | |
|
150 | /* Prefer set_basic over set_rle when there are 2 or less symbols, | |
|
151 | * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. | |
|
152 | * If basic encoding isn't possible, always choose RLE. | |
|
153 | */ | |
|
154 | DEBUGLOG(5, "Selected set_basic"); | |
|
155 | return set_basic; | |
|
156 | } | |
|
157 | DEBUGLOG(5, "Selected set_rle"); | |
|
158 | return set_rle; | |
|
159 | } | |
|
160 | if (strategy < ZSTD_lazy) { | |
|
161 | if (isDefaultAllowed) { | |
|
162 | size_t const staticFse_nbSeq_max = 1000; | |
|
163 | size_t const mult = 10 - strategy; | |
|
164 | size_t const baseLog = 3; | |
|
165 | size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */ | |
|
166 | assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */ | |
|
167 | assert(mult <= 9 && mult >= 7); | |
|
168 | if ( (*repeatMode == FSE_repeat_valid) | |
|
169 | && (nbSeq < staticFse_nbSeq_max) ) { | |
|
170 | DEBUGLOG(5, "Selected set_repeat"); | |
|
171 | return set_repeat; | |
|
172 | } | |
|
173 | if ( (nbSeq < dynamicFse_nbSeq_min) | |
|
174 | || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) { | |
|
175 | DEBUGLOG(5, "Selected set_basic"); | |
|
176 | /* The format allows default tables to be repeated, but it isn't useful. | |
|
177 | * When using simple heuristics to select encoding type, we don't want | |
|
178 | * to confuse these tables with dictionaries. When running more careful | |
|
179 | * analysis, we don't need to waste time checking both repeating tables | |
|
180 | * and default tables. | |
|
181 | */ | |
|
182 | *repeatMode = FSE_repeat_none; | |
|
183 | return set_basic; | |
|
184 | } | |
|
185 | } | |
|
186 | } else { | |
|
187 | size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC); | |
|
188 | size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC); | |
|
189 | size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog); | |
|
190 | size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq); | |
|
191 | ||
|
192 | if (isDefaultAllowed) { | |
|
193 | assert(!ZSTD_isError(basicCost)); | |
|
194 | assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost))); | |
|
195 | } | |
|
196 | assert(!ZSTD_isError(NCountCost)); | |
|
197 | assert(compressedCost < ERROR(maxCode)); | |
|
198 | DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u", | |
|
199 | (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost); | |
|
200 | if (basicCost <= repeatCost && basicCost <= compressedCost) { | |
|
201 | DEBUGLOG(5, "Selected set_basic"); | |
|
202 | assert(isDefaultAllowed); | |
|
203 | *repeatMode = FSE_repeat_none; | |
|
204 | return set_basic; | |
|
205 | } | |
|
206 | if (repeatCost <= compressedCost) { | |
|
207 | DEBUGLOG(5, "Selected set_repeat"); | |
|
208 | assert(!ZSTD_isError(repeatCost)); | |
|
209 | return set_repeat; | |
|
210 | } | |
|
211 | assert(compressedCost < basicCost && compressedCost < repeatCost); | |
|
212 | } | |
|
213 | DEBUGLOG(5, "Selected set_compressed"); | |
|
214 | *repeatMode = FSE_repeat_check; | |
|
215 | return set_compressed; | |
|
216 | } | |
|
217 | ||
|
218 | size_t | |
|
219 | ZSTD_buildCTable(void* dst, size_t dstCapacity, | |
|
220 | FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, | |
|
221 | unsigned* count, U32 max, | |
|
222 | const BYTE* codeTable, size_t nbSeq, | |
|
223 | const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, | |
|
224 | const FSE_CTable* prevCTable, size_t prevCTableSize, | |
|
225 | void* workspace, size_t workspaceSize) | |
|
226 | { | |
|
227 | BYTE* op = (BYTE*)dst; | |
|
228 | const BYTE* const oend = op + dstCapacity; | |
|
229 | DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity); | |
|
230 | ||
|
231 | switch (type) { | |
|
232 | case set_rle: | |
|
233 | FORWARD_IF_ERROR(FSE_buildCTable_rle(nextCTable, (BYTE)max)); | |
|
234 | RETURN_ERROR_IF(dstCapacity==0, dstSize_tooSmall); | |
|
235 | *op = codeTable[0]; | |
|
236 | return 1; | |
|
237 | case set_repeat: | |
|
238 | memcpy(nextCTable, prevCTable, prevCTableSize); | |
|
239 | return 0; | |
|
240 | case set_basic: | |
|
241 | FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */ | |
|
242 | return 0; | |
|
243 | case set_compressed: { | |
|
244 | S16 norm[MaxSeq + 1]; | |
|
245 | size_t nbSeq_1 = nbSeq; | |
|
246 | const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); | |
|
247 | if (count[codeTable[nbSeq-1]] > 1) { | |
|
248 | count[codeTable[nbSeq-1]]--; | |
|
249 | nbSeq_1--; | |
|
250 | } | |
|
251 | assert(nbSeq_1 > 1); | |
|
252 | FORWARD_IF_ERROR(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); | |
|
253 | { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ | |
|
254 | FORWARD_IF_ERROR(NCountSize); | |
|
255 | FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize)); | |
|
256 | return NCountSize; | |
|
257 | } | |
|
258 | } | |
|
259 | default: assert(0); RETURN_ERROR(GENERIC); | |
|
260 | } | |
|
261 | } | |
|
262 | ||
|
263 | FORCE_INLINE_TEMPLATE size_t | |
|
264 | ZSTD_encodeSequences_body( | |
|
265 | void* dst, size_t dstCapacity, | |
|
266 | FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, | |
|
267 | FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, | |
|
268 | FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, | |
|
269 | seqDef const* sequences, size_t nbSeq, int longOffsets) | |
|
270 | { | |
|
271 | BIT_CStream_t blockStream; | |
|
272 | FSE_CState_t stateMatchLength; | |
|
273 | FSE_CState_t stateOffsetBits; | |
|
274 | FSE_CState_t stateLitLength; | |
|
275 | ||
|
276 | RETURN_ERROR_IF( | |
|
277 | ERR_isError(BIT_initCStream(&blockStream, dst, dstCapacity)), | |
|
278 | dstSize_tooSmall, "not enough space remaining"); | |
|
279 | DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)", | |
|
280 | (int)(blockStream.endPtr - blockStream.startPtr), | |
|
281 | (unsigned)dstCapacity); | |
|
282 | ||
|
283 | /* first symbols */ | |
|
284 | FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); | |
|
285 | FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); | |
|
286 | FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); | |
|
287 | BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); | |
|
288 | if (MEM_32bits()) BIT_flushBits(&blockStream); | |
|
289 | BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); | |
|
290 | if (MEM_32bits()) BIT_flushBits(&blockStream); | |
|
291 | if (longOffsets) { | |
|
292 | U32 const ofBits = ofCodeTable[nbSeq-1]; | |
|
293 | int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); | |
|
294 | if (extraBits) { | |
|
295 | BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); | |
|
296 | BIT_flushBits(&blockStream); | |
|
297 | } | |
|
298 | BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, | |
|
299 | ofBits - extraBits); | |
|
300 | } else { | |
|
301 | BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); | |
|
302 | } | |
|
303 | BIT_flushBits(&blockStream); | |
|
304 | ||
|
305 | { size_t n; | |
|
306 | for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */ | |
|
307 | BYTE const llCode = llCodeTable[n]; | |
|
308 | BYTE const ofCode = ofCodeTable[n]; | |
|
309 | BYTE const mlCode = mlCodeTable[n]; | |
|
310 | U32 const llBits = LL_bits[llCode]; | |
|
311 | U32 const ofBits = ofCode; | |
|
312 | U32 const mlBits = ML_bits[mlCode]; | |
|
313 | DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u", | |
|
314 | (unsigned)sequences[n].litLength, | |
|
315 | (unsigned)sequences[n].matchLength + MINMATCH, | |
|
316 | (unsigned)sequences[n].offset); | |
|
317 | /* 32b*/ /* 64b*/ | |
|
318 | /* (7)*/ /* (7)*/ | |
|
319 | FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ | |
|
320 | FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */ | |
|
321 | if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ | |
|
322 | FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */ | |
|
323 | if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog))) | |
|
324 | BIT_flushBits(&blockStream); /* (7)*/ | |
|
325 | BIT_addBits(&blockStream, sequences[n].litLength, llBits); | |
|
326 | if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); | |
|
327 | BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); | |
|
328 | if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); | |
|
329 | if (longOffsets) { | |
|
330 | int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); | |
|
331 | if (extraBits) { | |
|
332 | BIT_addBits(&blockStream, sequences[n].offset, extraBits); | |
|
333 | BIT_flushBits(&blockStream); /* (7)*/ | |
|
334 | } | |
|
335 | BIT_addBits(&blockStream, sequences[n].offset >> extraBits, | |
|
336 | ofBits - extraBits); /* 31 */ | |
|
337 | } else { | |
|
338 | BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ | |
|
339 | } | |
|
340 | BIT_flushBits(&blockStream); /* (7)*/ | |
|
341 | DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); | |
|
342 | } } | |
|
343 | ||
|
344 | DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); | |
|
345 | FSE_flushCState(&blockStream, &stateMatchLength); | |
|
346 | DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); | |
|
347 | FSE_flushCState(&blockStream, &stateOffsetBits); | |
|
348 | DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); | |
|
349 | FSE_flushCState(&blockStream, &stateLitLength); | |
|
350 | ||
|
351 | { size_t const streamSize = BIT_closeCStream(&blockStream); | |
|
352 | RETURN_ERROR_IF(streamSize==0, dstSize_tooSmall, "not enough space"); | |
|
353 | return streamSize; | |
|
354 | } | |
|
355 | } | |
|
356 | ||
|
357 | static size_t | |
|
358 | ZSTD_encodeSequences_default( | |
|
359 | void* dst, size_t dstCapacity, | |
|
360 | FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, | |
|
361 | FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, | |
|
362 | FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, | |
|
363 | seqDef const* sequences, size_t nbSeq, int longOffsets) | |
|
364 | { | |
|
365 | return ZSTD_encodeSequences_body(dst, dstCapacity, | |
|
366 | CTable_MatchLength, mlCodeTable, | |
|
367 | CTable_OffsetBits, ofCodeTable, | |
|
368 | CTable_LitLength, llCodeTable, | |
|
369 | sequences, nbSeq, longOffsets); | |
|
370 | } | |
|
371 | ||
|
372 | ||
|
373 | #if DYNAMIC_BMI2 | |
|
374 | ||
|
375 | static TARGET_ATTRIBUTE("bmi2") size_t | |
|
376 | ZSTD_encodeSequences_bmi2( | |
|
377 | void* dst, size_t dstCapacity, | |
|
378 | FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, | |
|
379 | FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, | |
|
380 | FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, | |
|
381 | seqDef const* sequences, size_t nbSeq, int longOffsets) | |
|
382 | { | |
|
383 | return ZSTD_encodeSequences_body(dst, dstCapacity, | |
|
384 | CTable_MatchLength, mlCodeTable, | |
|
385 | CTable_OffsetBits, ofCodeTable, | |
|
386 | CTable_LitLength, llCodeTable, | |
|
387 | sequences, nbSeq, longOffsets); | |
|
388 | } | |
|
389 | ||
|
390 | #endif | |
|
391 | ||
|
392 | size_t ZSTD_encodeSequences( | |
|
393 | void* dst, size_t dstCapacity, | |
|
394 | FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, | |
|
395 | FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, | |
|
396 | FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, | |
|
397 | seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) | |
|
398 | { | |
|
399 | DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity); | |
|
400 | #if DYNAMIC_BMI2 | |
|
401 | if (bmi2) { | |
|
402 | return ZSTD_encodeSequences_bmi2(dst, dstCapacity, | |
|
403 | CTable_MatchLength, mlCodeTable, | |
|
404 | CTable_OffsetBits, ofCodeTable, | |
|
405 | CTable_LitLength, llCodeTable, | |
|
406 | sequences, nbSeq, longOffsets); | |
|
407 | } | |
|
408 | #endif | |
|
409 | (void)bmi2; | |
|
410 | return ZSTD_encodeSequences_default(dst, dstCapacity, | |
|
411 | CTable_MatchLength, mlCodeTable, | |
|
412 | CTable_OffsetBits, ofCodeTable, | |
|
413 | CTable_LitLength, llCodeTable, | |
|
414 | sequences, nbSeq, longOffsets); | |
|
415 | } |
@@ -0,0 +1,47 b'' | |||
|
1 | /* | |
|
2 | * Copyright (c) 2016-present, Yann Collet, Facebook, Inc. | |
|
3 | * All rights reserved. | |
|
4 | * | |
|
5 | * This source code is licensed under both the BSD-style license (found in the | |
|
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found | |
|
7 | * in the COPYING file in the root directory of this source tree). | |
|
8 | * You may select, at your option, one of the above-listed licenses. | |
|
9 | */ | |
|
10 | ||
|
11 | #ifndef ZSTD_COMPRESS_SEQUENCES_H | |
|
12 | #define ZSTD_COMPRESS_SEQUENCES_H | |
|
13 | ||
|
14 | #include "fse.h" /* FSE_repeat, FSE_CTable */ | |
|
15 | #include "zstd_internal.h" /* symbolEncodingType_e, ZSTD_strategy */ | |
|
16 | ||
|
17 | typedef enum { | |
|
18 | ZSTD_defaultDisallowed = 0, | |
|
19 | ZSTD_defaultAllowed = 1 | |
|
20 | } ZSTD_defaultPolicy_e; | |
|
21 | ||
|
22 | symbolEncodingType_e | |
|
23 | ZSTD_selectEncodingType( | |
|
24 | FSE_repeat* repeatMode, unsigned const* count, unsigned const max, | |
|
25 | size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, | |
|
26 | FSE_CTable const* prevCTable, | |
|
27 | short const* defaultNorm, U32 defaultNormLog, | |
|
28 | ZSTD_defaultPolicy_e const isDefaultAllowed, | |
|
29 | ZSTD_strategy const strategy); | |
|
30 | ||
|
31 | size_t | |
|
32 | ZSTD_buildCTable(void* dst, size_t dstCapacity, | |
|
33 | FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, | |
|
34 | unsigned* count, U32 max, | |
|
35 | const BYTE* codeTable, size_t nbSeq, | |
|
36 | const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, | |
|
37 | const FSE_CTable* prevCTable, size_t prevCTableSize, | |
|
38 | void* workspace, size_t workspaceSize); | |
|
39 | ||
|
40 | size_t ZSTD_encodeSequences( | |
|
41 | void* dst, size_t dstCapacity, | |
|
42 | FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, | |
|
43 | FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, | |
|
44 | FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, | |
|
45 | seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2); | |
|
46 | ||
|
47 | #endif /* ZSTD_COMPRESS_SEQUENCES_H */ |
@@ -49,6 +49,10 b' contrib/python-zstandard/zstd/compress/h' | |||
|
49 | 49 | contrib/python-zstandard/zstd/compress/huf_compress.c |
|
50 | 50 | contrib/python-zstandard/zstd/compress/zstd_compress.c |
|
51 | 51 | contrib/python-zstandard/zstd/compress/zstd_compress_internal.h |
|
52 | contrib/python-zstandard/zstd/compress/zstd_compress_literals.c | |
|
53 | contrib/python-zstandard/zstd/compress/zstd_compress_literals.h | |
|
54 | contrib/python-zstandard/zstd/compress/zstd_compress_sequences.c | |
|
55 | contrib/python-zstandard/zstd/compress/zstd_compress_sequences.h | |
|
52 | 56 | contrib/python-zstandard/zstd/compress/zstd_double_fast.c |
|
53 | 57 | contrib/python-zstandard/zstd/compress/zstd_double_fast.h |
|
54 | 58 | contrib/python-zstandard/zstd/compress/zstd_fast.c |
@@ -44,6 +44,7 b' Actions Blocking Release' | |||
|
44 | 44 | zstd API. |
|
45 | 45 | * Expose ``ZSTD_CLEVEL_DEFAULT`` constant. |
|
46 | 46 | * Support ``ZSTD_p_forceAttachDict`` compression parameter. |
|
47 | * Support ``ZSTD_c_literalCompressionMode `` compression parameter. | |
|
47 | 48 | * Use ``ZSTD_CCtx_getParameter()``/``ZSTD_CCtxParam_getParameter()`` for retrieving |
|
48 | 49 | compression parameters. |
|
49 | 50 | * Consider exposing ``ZSTDMT_toFlushNow()``. |
@@ -66,10 +67,39 b' Other Actions Not Blocking Release' | |||
|
66 | 67 | * API for ensuring max memory ceiling isn't exceeded. |
|
67 | 68 | * Move off nose for testing. |
|
68 | 69 | |
|
70 | 0.12.0 (released 2019-09-15) | |
|
71 | ============================ | |
|
72 | ||
|
73 | Backwards Compatibility Notes | |
|
74 | ----------------------------- | |
|
75 | ||
|
76 | * Support for Python 3.4 has been dropped since Python 3.4 is no longer | |
|
77 | a supported Python version upstream. (But it will likely continue to | |
|
78 | work until Python 2.7 support is dropped and we port to Python 3.5+ | |
|
79 | APIs.) | |
|
80 | ||
|
81 | Bug Fixes | |
|
82 | --------- | |
|
83 | ||
|
84 | * Fix ``ZstdDecompressor.__init__`` on 64-bit big-endian systems (#91). | |
|
85 | * Fix memory leak in ``ZstdDecompressionReader.seek()`` (#82). | |
|
86 | ||
|
87 | Changes | |
|
88 | ------- | |
|
89 | ||
|
90 | * CI transitioned to Azure Pipelines (from AppVeyor and Travis CI). | |
|
91 | * Switched to ``pytest`` for running tests (from ``nose``). | |
|
92 | * Bundled zstandard library upgraded from 1.3.8 to 1.4.3. | |
|
93 | ||
|
94 | 0.11.1 (released 2019-05-14) | |
|
95 | ============================ | |
|
96 | ||
|
97 | * Fix memory leak in ``ZstdDecompressionReader.seek()`` (#82). | |
|
98 | ||
|
69 | 99 | 0.11.0 (released 2019-02-24) |
|
70 | 100 | ============================ |
|
71 | 101 | |
|
72 |
Backwards Compatibility No |
|
|
102 | Backwards Compatibility Notes | |
|
73 | 103 | ----------------------------- |
|
74 | 104 | |
|
75 | 105 | * ``ZstdDecompressor.read()`` now allows reading sizes of ``-1`` or ``0`` |
@@ -15,7 +15,7 b' The canonical home for this project live' | |||
|
15 | 15 | the author. For convenience, that repository is frequently synchronized to |
|
16 | 16 | https://github.com/indygreg/python-zstandard. |
|
17 | 17 | |
|
18 |
| |ci-status| |
|
|
18 | | |ci-status| | |
|
19 | 19 | |
|
20 | 20 | Requirements |
|
21 | 21 | ============ |
@@ -1598,9 +1598,5 b' their work, consider donating some money' | |||
|
1598 | 1598 | :target: https://www.paypal.com/cgi-bin/webscr?cmd=_donations&business=gregory%2eszorc%40gmail%2ecom&lc=US&item_name=python%2dzstandard¤cy_code=USD&bn=PP%2dDonationsBF%3abtn_donate_LG%2egif%3aNonHosted |
|
1599 | 1599 | :alt: Donate via PayPal |
|
1600 | 1600 | |
|
1601 |
.. |ci-status| image:: https:// |
|
|
1602 | :target: https://travis-ci.org/indygreg/python-zstandard | |
|
1603 | ||
|
1604 | .. |win-ci-status| image:: https://ci.appveyor.com/api/projects/status/github/indygreg/python-zstandard?svg=true | |
|
1605 | :target: https://ci.appveyor.com/project/indygreg/python-zstandard | |
|
1606 | :alt: Windows build status | |
|
1601 | .. |ci-status| image:: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master | |
|
1602 | :target: https://dev.azure.com/gregoryszorc/python-zstandard/_apis/build/status/indygreg.python-zstandard?branchName=master |
@@ -11,7 +11,7 b'' | |||
|
11 | 11 | extern PyObject* ZstdError; |
|
12 | 12 | |
|
13 | 13 | int set_parameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value) { |
|
14 | size_t zresult = ZSTD_CCtxParam_setParameter(params, param, value); | |
|
14 | size_t zresult = ZSTD_CCtxParams_setParameter(params, param, value); | |
|
15 | 15 | if (ZSTD_isError(zresult)) { |
|
16 | 16 | PyErr_Format(ZstdError, "unable to set compression context parameter: %s", |
|
17 | 17 | ZSTD_getErrorName(zresult)); |
@@ -25,11 +25,11 b' int set_parameter(ZSTD_CCtx_params* para' | |||
|
25 | 25 | |
|
26 | 26 | #define TRY_COPY_PARAMETER(source, dest, param) { \ |
|
27 | 27 | int result; \ |
|
28 | size_t zresult = ZSTD_CCtxParam_getParameter(source, param, &result); \ | |
|
28 | size_t zresult = ZSTD_CCtxParams_getParameter(source, param, &result); \ | |
|
29 | 29 | if (ZSTD_isError(zresult)) { \ |
|
30 | 30 | return 1; \ |
|
31 | 31 | } \ |
|
32 | zresult = ZSTD_CCtxParam_setParameter(dest, param, result); \ | |
|
32 | zresult = ZSTD_CCtxParams_setParameter(dest, param, result); \ | |
|
33 | 33 | if (ZSTD_isError(zresult)) { \ |
|
34 | 34 | return 1; \ |
|
35 | 35 | } \ |
@@ -78,7 +78,7 b' int reset_params(ZstdCompressionParamete' | |||
|
78 | 78 | } |
|
79 | 79 | |
|
80 | 80 | #define TRY_GET_PARAMETER(params, param, value) { \ |
|
81 | size_t zresult = ZSTD_CCtxParam_getParameter(params, param, value); \ | |
|
81 | size_t zresult = ZSTD_CCtxParams_getParameter(params, param, value); \ | |
|
82 | 82 | if (ZSTD_isError(zresult)) { \ |
|
83 | 83 | PyErr_Format(ZstdError, "unable to retrieve parameter: %s", ZSTD_getErrorName(zresult)); \ |
|
84 | 84 | return 1; \ |
@@ -436,7 +436,7 b' static void ZstdCompressionParameters_de' | |||
|
436 | 436 | int result; \ |
|
437 | 437 | size_t zresult; \ |
|
438 | 438 | ZstdCompressionParametersObject* p = (ZstdCompressionParametersObject*)(self); \ |
|
439 | zresult = ZSTD_CCtxParam_getParameter(p->params, param, &result); \ | |
|
439 | zresult = ZSTD_CCtxParams_getParameter(p->params, param, &result); \ | |
|
440 | 440 | if (ZSTD_isError(zresult)) { \ |
|
441 | 441 | PyErr_Format(ZstdError, "unable to get compression parameter: %s", \ |
|
442 | 442 | ZSTD_getErrorName(zresult)); \ |
@@ -653,6 +653,8 b' static PyObject* reader_seek(ZstdDecompr' | |||
|
653 | 653 | |
|
654 | 654 | readSize = PyBytes_GET_SIZE(readResult); |
|
655 | 655 | |
|
656 | Py_CLEAR(readResult); | |
|
657 | ||
|
656 | 658 | /* Empty read means EOF. */ |
|
657 | 659 | if (!readSize) { |
|
658 | 660 | break; |
@@ -16,7 +16,7 b'' | |||
|
16 | 16 | #include <zdict.h> |
|
17 | 17 | |
|
18 | 18 | /* Remember to change the string in zstandard/__init__ as well */ |
|
19 |
#define PYTHON_ZSTANDARD_VERSION "0.1 |
|
|
19 | #define PYTHON_ZSTANDARD_VERSION "0.12.0" | |
|
20 | 20 | |
|
21 | 21 | typedef enum { |
|
22 | 22 | compressorobj_flush_finish, |
@@ -29,6 +29,8 b" SOURCES = ['zstd/%s' % p for p in (" | |||
|
29 | 29 | 'compress/hist.c', |
|
30 | 30 | 'compress/huf_compress.c', |
|
31 | 31 | 'compress/zstd_compress.c', |
|
32 | 'compress/zstd_compress_literals.c', | |
|
33 | 'compress/zstd_compress_sequences.c', | |
|
32 | 34 | 'compress/zstd_double_fast.c', |
|
33 | 35 | 'compress/zstd_fast.c', |
|
34 | 36 | 'compress/zstd_lazy.c', |
@@ -119,7 +121,11 b' def preprocess(path):' | |||
|
119 | 121 | os.close(fd) |
|
120 | 122 | |
|
121 | 123 | try: |
|
122 | process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE) | |
|
124 | env = dict(os.environ) | |
|
125 | if getattr(compiler, '_paths', None): | |
|
126 | env['PATH'] = compiler._paths | |
|
127 | process = subprocess.Popen(args + [input_file], stdout=subprocess.PIPE, | |
|
128 | env=env) | |
|
123 | 129 | output = process.communicate()[0] |
|
124 | 130 | ret = process.poll() |
|
125 | 131 | if ret: |
@@ -100,7 +100,6 b' setup(' | |||
|
100 | 100 | 'License :: OSI Approved :: BSD License', |
|
101 | 101 | 'Programming Language :: C', |
|
102 | 102 | 'Programming Language :: Python :: 2.7', |
|
103 | 'Programming Language :: Python :: 3.4', | |
|
104 | 103 | 'Programming Language :: Python :: 3.5', |
|
105 | 104 | 'Programming Language :: Python :: 3.6', |
|
106 | 105 | 'Programming Language :: Python :: 3.7', |
@@ -22,6 +22,8 b" zstd_sources = ['zstd/%s' % p for p in (" | |||
|
22 | 22 | 'compress/fse_compress.c', |
|
23 | 23 | 'compress/hist.c', |
|
24 | 24 | 'compress/huf_compress.c', |
|
25 | 'compress/zstd_compress_literals.c', | |
|
26 | 'compress/zstd_compress_sequences.c', | |
|
25 | 27 | 'compress/zstd_compress.c', |
|
26 | 28 | 'compress/zstd_double_fast.c', |
|
27 | 29 | 'compress/zstd_fast.c', |
@@ -1038,7 +1038,7 b' class TestCompressor_stream_writer(unitt' | |||
|
1038 | 1038 | d = zstd.train_dictionary(8192, samples) |
|
1039 | 1039 | |
|
1040 | 1040 | h = hashlib.sha1(d.as_bytes()).hexdigest() |
|
1041 | self.assertEqual(h, '88ca0d38332aff379d4ced166a51c280a7679aad') | |
|
1041 | self.assertEqual(h, '7a2e59a876db958f74257141045af8f912e00d4e') | |
|
1042 | 1042 | |
|
1043 | 1043 | buffer = NonClosingBytesIO() |
|
1044 | 1044 | cctx = zstd.ZstdCompressor(level=9, dict_data=d) |
@@ -1056,7 +1056,7 b' class TestCompressor_stream_writer(unitt' | |||
|
1056 | 1056 | self.assertFalse(params.has_checksum) |
|
1057 | 1057 | |
|
1058 | 1058 | h = hashlib.sha1(compressed).hexdigest() |
|
1059 | self.assertEqual(h, '8703b4316f274d26697ea5dd480f29c08e85d940') | |
|
1059 | self.assertEqual(h, '0a7c05635061f58039727cdbe76388c6f4cfef06') | |
|
1060 | 1060 | |
|
1061 | 1061 | source = b'foo' + b'bar' + (b'foo' * 16384) |
|
1062 | 1062 | |
@@ -1091,7 +1091,7 b' class TestCompressor_stream_writer(unitt' | |||
|
1091 | 1091 | self.assertFalse(params.has_checksum) |
|
1092 | 1092 | |
|
1093 | 1093 | h = hashlib.sha1(compressed).hexdigest() |
|
1094 | self.assertEqual(h, '2a8111d72eb5004cdcecbdac37da9f26720d30ef') | |
|
1094 | self.assertEqual(h, 'dd4bb7d37c1a0235b38a2f6b462814376843ef0b') | |
|
1095 | 1095 | |
|
1096 | 1096 | def test_write_checksum(self): |
|
1097 | 1097 | no_checksum = NonClosingBytesIO() |
@@ -100,7 +100,7 b' class TestCompressionParameters(unittest' | |||
|
100 | 100 | strategy=zstd.STRATEGY_DFAST) |
|
101 | 101 | |
|
102 | 102 | # 32-bit has slightly different values from 64-bit. |
|
103 |
self.assertAlmostEqual(p.estimated_compression_context_size(), 1294 |
|
|
103 | self.assertAlmostEqual(p.estimated_compression_context_size(), 1294144, | |
|
104 | 104 | delta=250) |
|
105 | 105 | |
|
106 | 106 | def test_strategy(self): |
@@ -12,9 +12,9 b' from . common import (' | |||
|
12 | 12 | @make_cffi |
|
13 | 13 | class TestModuleAttributes(unittest.TestCase): |
|
14 | 14 | def test_version(self): |
|
15 |
self.assertEqual(zstd.ZSTD_VERSION, (1, |
|
|
15 | self.assertEqual(zstd.ZSTD_VERSION, (1, 4, 3)) | |
|
16 | 16 | |
|
17 |
self.assertEqual(zstd.__version__, '0.1 |
|
|
17 | self.assertEqual(zstd.__version__, '0.12.0') | |
|
18 | 18 | |
|
19 | 19 | def test_constants(self): |
|
20 | 20 | self.assertEqual(zstd.MAX_COMPRESSION_LEVEL, 22) |
@@ -7,6 +7,7 b' import zstandard as zstd' | |||
|
7 | 7 | from . common import ( |
|
8 | 8 | generate_samples, |
|
9 | 9 | make_cffi, |
|
10 | random_input_data, | |
|
10 | 11 | ) |
|
11 | 12 | |
|
12 | 13 | if sys.version_info[0] >= 3: |
@@ -29,7 +30,7 b' class TestTrainDictionary(unittest.TestC' | |||
|
29 | 30 | zstd.train_dictionary(8192, [u'foo']) |
|
30 | 31 | |
|
31 | 32 | def test_no_params(self): |
|
32 |
d = zstd.train_dictionary(8192, |
|
|
33 | d = zstd.train_dictionary(8192, random_input_data()) | |
|
33 | 34 | self.assertIsInstance(d.dict_id(), int_type) |
|
34 | 35 | |
|
35 | 36 | # The dictionary ID may be different across platforms. |
@@ -62,4 +62,4 b' else:' | |||
|
62 | 62 | 'cext, or cffi' % _module_policy) |
|
63 | 63 | |
|
64 | 64 | # Keep this in sync with python-zstandard.h. |
|
65 |
__version__ = '0.1 |
|
|
65 | __version__ = '0.12.0' |
@@ -416,7 +416,7 b' def estimate_decompression_context_size(' | |||
|
416 | 416 | |
|
417 | 417 | |
|
418 | 418 | def _set_compression_parameter(params, param, value): |
|
419 | zresult = lib.ZSTD_CCtxParam_setParameter(params, param, value) | |
|
419 | zresult = lib.ZSTD_CCtxParams_setParameter(params, param, value) | |
|
420 | 420 | if lib.ZSTD_isError(zresult): |
|
421 | 421 | raise ZstdError('unable to set compression context parameter: %s' % |
|
422 | 422 | _zstd_error(zresult)) |
@@ -425,7 +425,7 b' def _set_compression_parameter(params, p' | |||
|
425 | 425 | def _get_compression_parameter(params, param): |
|
426 | 426 | result = ffi.new('int *') |
|
427 | 427 | |
|
428 | zresult = lib.ZSTD_CCtxParam_getParameter(params, param, result) | |
|
428 | zresult = lib.ZSTD_CCtxParams_getParameter(params, param, result) | |
|
429 | 429 | if lib.ZSTD_isError(zresult): |
|
430 | 430 | raise ZstdError('unable to get compression context parameter: %s' % |
|
431 | 431 | _zstd_error(zresult)) |
@@ -210,7 +210,7 b' void zstd_module_init(PyObject* m) {' | |||
|
210 | 210 | We detect this mismatch here and refuse to load the module if this |
|
211 | 211 | scenario is detected. |
|
212 | 212 | */ |
|
213 |
if (ZSTD_VERSION_NUMBER != 10 |
|
|
213 | if (ZSTD_VERSION_NUMBER != 10403 || ZSTD_versionNumber() != 10403) { | |
|
214 | 214 | PyErr_SetString(PyExc_ImportError, "zstd C API mismatch; Python bindings not compiled against expected zstd version"); |
|
215 | 215 | return; |
|
216 | 216 | } |
@@ -57,6 +57,8 b' extern "C" {' | |||
|
57 | 57 | =========================================*/ |
|
58 | 58 | #if defined(__BMI__) && defined(__GNUC__) |
|
59 | 59 | # include <immintrin.h> /* support for bextr (experimental) */ |
|
60 | #elif defined(__ICCARM__) | |
|
61 | # include <intrinsics.h> | |
|
60 | 62 | #endif |
|
61 | 63 | |
|
62 | 64 | #define STREAM_ACCUMULATOR_MIN_32 25 |
@@ -163,6 +165,8 b' MEM_STATIC unsigned BIT_highbit32 (U32 v' | |||
|
163 | 165 | return (unsigned) r; |
|
164 | 166 | # elif defined(__GNUC__) && (__GNUC__ >= 3) /* Use GCC Intrinsic */ |
|
165 | 167 | return 31 - __builtin_clz (val); |
|
168 | # elif defined(__ICCARM__) /* IAR Intrinsic */ | |
|
169 | return 31 - __CLZ(val); | |
|
166 | 170 | # else /* Software version */ |
|
167 | 171 | static const unsigned DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, |
|
168 | 172 | 11, 14, 16, 18, 22, 25, 3, 30, |
@@ -23,7 +23,7 b'' | |||
|
23 | 23 | # define INLINE_KEYWORD |
|
24 | 24 | #endif |
|
25 | 25 | |
|
26 | #if defined(__GNUC__) | |
|
26 | #if defined(__GNUC__) || defined(__ICCARM__) | |
|
27 | 27 | # define FORCE_INLINE_ATTR __attribute__((always_inline)) |
|
28 | 28 | #elif defined(_MSC_VER) |
|
29 | 29 | # define FORCE_INLINE_ATTR __forceinline |
@@ -40,7 +40,7 b'' | |||
|
40 | 40 | |
|
41 | 41 | /** |
|
42 | 42 | * FORCE_INLINE_TEMPLATE is used to define C "templates", which take constant |
|
43 |
* parameters. They must be inlined for the compiler to elimin |
|
|
43 | * parameters. They must be inlined for the compiler to eliminate the constant | |
|
44 | 44 | * branches. |
|
45 | 45 | */ |
|
46 | 46 | #define FORCE_INLINE_TEMPLATE static INLINE_KEYWORD FORCE_INLINE_ATTR |
@@ -65,7 +65,7 b'' | |||
|
65 | 65 | #ifdef _MSC_VER |
|
66 | 66 | # define FORCE_NOINLINE static __declspec(noinline) |
|
67 | 67 | #else |
|
68 | # ifdef __GNUC__ | |
|
68 | # if defined(__GNUC__) || defined(__ICCARM__) | |
|
69 | 69 | # define FORCE_NOINLINE static __attribute__((__noinline__)) |
|
70 | 70 | # else |
|
71 | 71 | # define FORCE_NOINLINE static |
@@ -76,7 +76,7 b'' | |||
|
76 | 76 | #ifndef __has_attribute |
|
77 | 77 | #define __has_attribute(x) 0 /* Compatibility with non-clang compilers. */ |
|
78 | 78 | #endif |
|
79 | #if defined(__GNUC__) | |
|
79 | #if defined(__GNUC__) || defined(__ICCARM__) | |
|
80 | 80 | # define TARGET_ATTRIBUTE(target) __attribute__((__target__(target))) |
|
81 | 81 | #else |
|
82 | 82 | # define TARGET_ATTRIBUTE(target) |
@@ -127,6 +127,13 b'' | |||
|
127 | 127 | } \ |
|
128 | 128 | } |
|
129 | 129 | |
|
130 | /* vectorization */ | |
|
131 | #if !defined(__clang__) && defined(__GNUC__) | |
|
132 | # define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize"))) | |
|
133 | #else | |
|
134 | # define DONT_VECTORIZE | |
|
135 | #endif | |
|
136 | ||
|
130 | 137 | /* disable warnings */ |
|
131 | 138 | #ifdef _MSC_VER /* Visual Studio */ |
|
132 | 139 | # include <intrin.h> /* For Visual 2005 */ |
@@ -358,7 +358,7 b' size_t FSE_decompress_wksp(void* dst, si' | |||
|
358 | 358 | typedef enum { |
|
359 | 359 | FSE_repeat_none, /**< Cannot use the previous table */ |
|
360 | 360 | FSE_repeat_check, /**< Can use the previous table but it must be checked */ |
|
361 | FSE_repeat_valid /**< Can use the previous table and it is asumed to be valid */ | |
|
361 | FSE_repeat_valid /**< Can use the previous table and it is assumed to be valid */ | |
|
362 | 362 | } FSE_repeat; |
|
363 | 363 | |
|
364 | 364 | /* ***************************************** |
@@ -102,7 +102,7 b' MEM_STATIC void MEM_check(void) { MEM_ST' | |||
|
102 | 102 | #ifndef MEM_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */ |
|
103 | 103 | # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) |
|
104 | 104 | # define MEM_FORCE_MEMORY_ACCESS 2 |
|
105 | # elif defined(__INTEL_COMPILER) || defined(__GNUC__) | |
|
105 | # elif defined(__INTEL_COMPILER) || defined(__GNUC__) || defined(__ICCARM__) | |
|
106 | 106 | # define MEM_FORCE_MEMORY_ACCESS 1 |
|
107 | 107 | # endif |
|
108 | 108 | #endif |
@@ -14,8 +14,8 b'' | |||
|
14 | 14 | * This file will hold wrapper for systems, which do not support pthreads |
|
15 | 15 | */ |
|
16 | 16 | |
|
17 |
/* create fake symbol to avoid empty tr |
|
|
18 | int g_ZSTD_threading_useles_symbol; | |
|
17 | /* create fake symbol to avoid empty translation unit warning */ | |
|
18 | int g_ZSTD_threading_useless_symbol; | |
|
19 | 19 | |
|
20 | 20 | #if defined(ZSTD_MULTITHREAD) && defined(_WIN32) |
|
21 | 21 |
@@ -53,7 +53,8 b'' | |||
|
53 | 53 | # if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) |
|
54 | 54 | # define XXH_FORCE_MEMORY_ACCESS 2 |
|
55 | 55 | # elif (defined(__INTEL_COMPILER) && !defined(WIN32)) || \ |
|
56 | (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) | |
|
56 | (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) || \ | |
|
57 | defined(__ICCARM__) | |
|
57 | 58 | # define XXH_FORCE_MEMORY_ACCESS 1 |
|
58 | 59 | # endif |
|
59 | 60 | #endif |
@@ -66,10 +67,10 b'' | |||
|
66 | 67 | /* #define XXH_ACCEPT_NULL_INPUT_POINTER 1 */ |
|
67 | 68 | |
|
68 | 69 | /*!XXH_FORCE_NATIVE_FORMAT : |
|
69 |
* By default, xxHash library provides endian-independ |
|
|
70 | * By default, xxHash library provides endian-independent Hash values, based on little-endian convention. | |
|
70 | 71 | * Results are therefore identical for little-endian and big-endian CPU. |
|
71 | 72 | * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. |
|
72 |
* Should endian-independ |
|
|
73 | * Should endian-independence be of no importance for your application, you may set the #define below to 1, | |
|
73 | 74 | * to improve speed for Big-endian CPU. |
|
74 | 75 | * This option has no impact on Little_Endian CPU. |
|
75 | 76 | */ |
@@ -120,7 +121,7 b' static void* XXH_memcpy(void* dest, cons' | |||
|
120 | 121 | # define INLINE_KEYWORD |
|
121 | 122 | #endif |
|
122 | 123 | |
|
123 | #if defined(__GNUC__) | |
|
124 | #if defined(__GNUC__) || defined(__ICCARM__) | |
|
124 | 125 | # define FORCE_INLINE_ATTR __attribute__((always_inline)) |
|
125 | 126 | #elif defined(_MSC_VER) |
|
126 | 127 | # define FORCE_INLINE_ATTR __forceinline |
@@ -206,7 +207,12 b' static U64 XXH_read64(const void* memPtr' | |||
|
206 | 207 | # define XXH_rotl32(x,r) _rotl(x,r) |
|
207 | 208 | # define XXH_rotl64(x,r) _rotl64(x,r) |
|
208 | 209 | #else |
|
210 | #if defined(__ICCARM__) | |
|
211 | # include <intrinsics.h> | |
|
212 | # define XXH_rotl32(x,r) __ROR(x,(32 - r)) | |
|
213 | #else | |
|
209 | 214 | # define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) |
|
215 | #endif | |
|
210 | 216 | # define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r))) |
|
211 | 217 | #endif |
|
212 | 218 |
@@ -34,7 +34,6 b'' | |||
|
34 | 34 | #endif |
|
35 | 35 | #include "xxhash.h" /* XXH_reset, update, digest */ |
|
36 | 36 | |
|
37 | ||
|
38 | 37 | #if defined (__cplusplus) |
|
39 | 38 | extern "C" { |
|
40 | 39 | #endif |
@@ -53,8 +52,50 b' extern "C" {' | |||
|
53 | 52 | #undef MAX |
|
54 | 53 | #define MIN(a,b) ((a)<(b) ? (a) : (b)) |
|
55 | 54 | #define MAX(a,b) ((a)>(b) ? (a) : (b)) |
|
56 | #define CHECK_F(f) { size_t const errcod = f; if (ERR_isError(errcod)) return errcod; } /* check and Forward error code */ | |
|
57 | #define CHECK_E(f, e) { size_t const errcod = f; if (ERR_isError(errcod)) return ERROR(e); } /* check and send Error code */ | |
|
55 | ||
|
56 | /** | |
|
57 | * Return the specified error if the condition evaluates to true. | |
|
58 | * | |
|
59 | * In debug modes, prints additional information. | |
|
60 | * In order to do that (particularly, printing the conditional that failed), | |
|
61 | * this can't just wrap RETURN_ERROR(). | |
|
62 | */ | |
|
63 | #define RETURN_ERROR_IF(cond, err, ...) \ | |
|
64 | if (cond) { \ | |
|
65 | RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \ | |
|
66 | RAWLOG(3, ": " __VA_ARGS__); \ | |
|
67 | RAWLOG(3, "\n"); \ | |
|
68 | return ERROR(err); \ | |
|
69 | } | |
|
70 | ||
|
71 | /** | |
|
72 | * Unconditionally return the specified error. | |
|
73 | * | |
|
74 | * In debug modes, prints additional information. | |
|
75 | */ | |
|
76 | #define RETURN_ERROR(err, ...) \ | |
|
77 | do { \ | |
|
78 | RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \ | |
|
79 | RAWLOG(3, ": " __VA_ARGS__); \ | |
|
80 | RAWLOG(3, "\n"); \ | |
|
81 | return ERROR(err); \ | |
|
82 | } while(0); | |
|
83 | ||
|
84 | /** | |
|
85 | * If the provided expression evaluates to an error code, returns that error code. | |
|
86 | * | |
|
87 | * In debug modes, prints additional information. | |
|
88 | */ | |
|
89 | #define FORWARD_IF_ERROR(err, ...) \ | |
|
90 | do { \ | |
|
91 | size_t const err_code = (err); \ | |
|
92 | if (ERR_isError(err_code)) { \ | |
|
93 | RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \ | |
|
94 | RAWLOG(3, ": " __VA_ARGS__); \ | |
|
95 | RAWLOG(3, "\n"); \ | |
|
96 | return err_code; \ | |
|
97 | } \ | |
|
98 | } while(0); | |
|
58 | 99 | |
|
59 | 100 | |
|
60 | 101 | /*-************************************* |
@@ -151,20 +192,73 b' static const U32 OF_defaultNormLog = OF_' | |||
|
151 | 192 | * Shared functions to include for inlining |
|
152 | 193 | *********************************************/ |
|
153 | 194 | static void ZSTD_copy8(void* dst, const void* src) { memcpy(dst, src, 8); } |
|
195 | ||
|
154 | 196 | #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; } |
|
197 | static void ZSTD_copy16(void* dst, const void* src) { memcpy(dst, src, 16); } | |
|
198 | #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; } | |
|
199 | ||
|
200 | #define WILDCOPY_OVERLENGTH 8 | |
|
201 | #define VECLEN 16 | |
|
202 | ||
|
203 | typedef enum { | |
|
204 | ZSTD_no_overlap, | |
|
205 | ZSTD_overlap_src_before_dst, | |
|
206 | /* ZSTD_overlap_dst_before_src, */ | |
|
207 | } ZSTD_overlap_e; | |
|
155 | 208 | |
|
156 | 209 | /*! ZSTD_wildcopy() : |
|
157 | 210 | * custom version of memcpy(), can overwrite up to WILDCOPY_OVERLENGTH bytes (if length==0) */ |
|
158 | #define WILDCOPY_OVERLENGTH 8 | |
|
159 |
|
|
|
211 | MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE | |
|
212 | void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype) | |
|
160 | 213 | { |
|
214 | ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; | |
|
161 | 215 | const BYTE* ip = (const BYTE*)src; |
|
162 | 216 | BYTE* op = (BYTE*)dst; |
|
163 | 217 | BYTE* const oend = op + length; |
|
218 | ||
|
219 | assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8)); | |
|
220 | if (length < VECLEN || (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN)) { | |
|
164 | 221 | do |
|
165 | 222 | COPY8(op, ip) |
|
166 | 223 | while (op < oend); |
|
167 | 224 | } |
|
225 | else { | |
|
226 | if ((length & 8) == 0) | |
|
227 | COPY8(op, ip); | |
|
228 | do { | |
|
229 | COPY16(op, ip); | |
|
230 | } | |
|
231 | while (op < oend); | |
|
232 | } | |
|
233 | } | |
|
234 | ||
|
235 | /*! ZSTD_wildcopy_16min() : | |
|
236 | * same semantics as ZSTD_wilcopy() except guaranteed to be able to copy 16 bytes at the start */ | |
|
237 | MEM_STATIC FORCE_INLINE_ATTR DONT_VECTORIZE | |
|
238 | void ZSTD_wildcopy_16min(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e ovtype) | |
|
239 | { | |
|
240 | ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src; | |
|
241 | const BYTE* ip = (const BYTE*)src; | |
|
242 | BYTE* op = (BYTE*)dst; | |
|
243 | BYTE* const oend = op + length; | |
|
244 | ||
|
245 | assert(length >= 8); | |
|
246 | assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff < -8)); | |
|
247 | ||
|
248 | if (ovtype == ZSTD_overlap_src_before_dst && diff < VECLEN) { | |
|
249 | do | |
|
250 | COPY8(op, ip) | |
|
251 | while (op < oend); | |
|
252 | } | |
|
253 | else { | |
|
254 | if ((length & 8) == 0) | |
|
255 | COPY8(op, ip); | |
|
256 | do { | |
|
257 | COPY16(op, ip); | |
|
258 | } | |
|
259 | while (op < oend); | |
|
260 | } | |
|
261 | } | |
|
168 | 262 | |
|
169 | 263 | MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* should be faster for decoding, but strangely, not verified on all platform */ |
|
170 | 264 | { |
@@ -200,6 +294,17 b' typedef struct {' | |||
|
200 | 294 | U32 longLengthPos; |
|
201 | 295 | } seqStore_t; |
|
202 | 296 | |
|
297 | /** | |
|
298 | * Contains the compressed frame size and an upper-bound for the decompressed frame size. | |
|
299 | * Note: before using `compressedSize`, check for errors using ZSTD_isError(). | |
|
300 | * similarly, before using `decompressedBound`, check for errors using: | |
|
301 | * `decompressedBound != ZSTD_CONTENTSIZE_ERROR` | |
|
302 | */ | |
|
303 | typedef struct { | |
|
304 | size_t compressedSize; | |
|
305 | unsigned long long decompressedBound; | |
|
306 | } ZSTD_frameSizeInfo; /* decompress & legacy */ | |
|
307 | ||
|
203 | 308 | const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx); /* compress & dictBuilder */ |
|
204 | 309 | void ZSTD_seqToCodes(const seqStore_t* seqStorePtr); /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */ |
|
205 | 310 | |
@@ -219,6 +324,8 b' MEM_STATIC U32 ZSTD_highbit32(U32 val) ' | |||
|
219 | 324 | return (unsigned)r; |
|
220 | 325 | # elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC Intrinsic */ |
|
221 | 326 | return 31 - __builtin_clz(val); |
|
327 | # elif defined(__ICCARM__) /* IAR Intrinsic */ | |
|
328 | return 31 - __CLZ(val); | |
|
222 | 329 | # else /* Software version */ |
|
223 | 330 | static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 }; |
|
224 | 331 | U32 v = val; |
@@ -129,9 +129,9 b' size_t FSE_buildCTable_wksp(FSE_CTable* ' | |||
|
129 | 129 | { U32 position = 0; |
|
130 | 130 | U32 symbol; |
|
131 | 131 | for (symbol=0; symbol<=maxSymbolValue; symbol++) { |
|
132 | int nbOccurences; | |
|
132 | int nbOccurrences; | |
|
133 | 133 | int const freq = normalizedCounter[symbol]; |
|
134 | for (nbOccurences=0; nbOccurences<freq; nbOccurences++) { | |
|
134 | for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) { | |
|
135 | 135 | tableSymbol[position] = (FSE_FUNCTION_TYPE)symbol; |
|
136 | 136 | position = (position + step) & tableMask; |
|
137 | 137 | while (position > highThreshold) |
This diff has been collapsed as it changes many lines, (1486 lines changed) Show them Hide them | |||
@@ -21,6 +21,8 b'' | |||
|
21 | 21 | #define HUF_STATIC_LINKING_ONLY |
|
22 | 22 | #include "huf.h" |
|
23 | 23 | #include "zstd_compress_internal.h" |
|
24 | #include "zstd_compress_sequences.h" | |
|
25 | #include "zstd_compress_literals.h" | |
|
24 | 26 | #include "zstd_fast.h" |
|
25 | 27 | #include "zstd_double_fast.h" |
|
26 | 28 | #include "zstd_lazy.h" |
@@ -103,12 +105,31 b' ZSTD_CCtx* ZSTD_initStaticCCtx(void *wor' | |||
|
103 | 105 | return cctx; |
|
104 | 106 | } |
|
105 | 107 | |
|
108 | /** | |
|
109 | * Clears and frees all of the dictionaries in the CCtx. | |
|
110 | */ | |
|
111 | static void ZSTD_clearAllDicts(ZSTD_CCtx* cctx) | |
|
112 | { | |
|
113 | ZSTD_free(cctx->localDict.dictBuffer, cctx->customMem); | |
|
114 | ZSTD_freeCDict(cctx->localDict.cdict); | |
|
115 | memset(&cctx->localDict, 0, sizeof(cctx->localDict)); | |
|
116 | memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); | |
|
117 | cctx->cdict = NULL; | |
|
118 | } | |
|
119 | ||
|
120 | static size_t ZSTD_sizeof_localDict(ZSTD_localDict dict) | |
|
121 | { | |
|
122 | size_t const bufferSize = dict.dictBuffer != NULL ? dict.dictSize : 0; | |
|
123 | size_t const cdictSize = ZSTD_sizeof_CDict(dict.cdict); | |
|
124 | return bufferSize + cdictSize; | |
|
125 | } | |
|
126 | ||
|
106 | 127 | static void ZSTD_freeCCtxContent(ZSTD_CCtx* cctx) |
|
107 | 128 | { |
|
108 | 129 | assert(cctx != NULL); |
|
109 | 130 | assert(cctx->staticSize == 0); |
|
110 | 131 | ZSTD_free(cctx->workSpace, cctx->customMem); cctx->workSpace = NULL; |
|
111 | ZSTD_freeCDict(cctx->cdictLocal); cctx->cdictLocal = NULL; | |
|
132 | ZSTD_clearAllDicts(cctx); | |
|
112 | 133 | #ifdef ZSTD_MULTITHREAD |
|
113 | 134 | ZSTDMT_freeCCtx(cctx->mtctx); cctx->mtctx = NULL; |
|
114 | 135 | #endif |
@@ -117,7 +138,8 b' static void ZSTD_freeCCtxContent(ZSTD_CC' | |||
|
117 | 138 | size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx) |
|
118 | 139 | { |
|
119 | 140 | if (cctx==NULL) return 0; /* support free on NULL */ |
|
120 | if (cctx->staticSize) return ERROR(memory_allocation); /* not compatible with static CCtx */ | |
|
141 | RETURN_ERROR_IF(cctx->staticSize, memory_allocation, | |
|
142 | "not compatible with static CCtx"); | |
|
121 | 143 | ZSTD_freeCCtxContent(cctx); |
|
122 | 144 | ZSTD_free(cctx, cctx->customMem); |
|
123 | 145 | return 0; |
@@ -139,7 +161,7 b' size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx*' | |||
|
139 | 161 | { |
|
140 | 162 | if (cctx==NULL) return 0; /* support sizeof on NULL */ |
|
141 | 163 | return sizeof(*cctx) + cctx->workSpaceSize |
|
142 |
+ ZSTD_sizeof_ |
|
|
164 | + ZSTD_sizeof_localDict(cctx->localDict) | |
|
143 | 165 | + ZSTD_sizeof_mtctx(cctx); |
|
144 | 166 | } |
|
145 | 167 | |
@@ -195,7 +217,7 b' size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_p' | |||
|
195 | 217 | } |
|
196 | 218 | |
|
197 | 219 | size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel) { |
|
198 | if (!cctxParams) { return ERROR(GENERIC); } | |
|
220 | RETURN_ERROR_IF(!cctxParams, GENERIC); | |
|
199 | 221 | memset(cctxParams, 0, sizeof(*cctxParams)); |
|
200 | 222 | cctxParams->compressionLevel = compressionLevel; |
|
201 | 223 | cctxParams->fParams.contentSizeFlag = 1; |
@@ -204,8 +226,8 b' size_t ZSTD_CCtxParams_init(ZSTD_CCtx_pa' | |||
|
204 | 226 | |
|
205 | 227 | size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params) |
|
206 | 228 | { |
|
207 | if (!cctxParams) { return ERROR(GENERIC); } | |
|
208 |
|
|
|
229 | RETURN_ERROR_IF(!cctxParams, GENERIC); | |
|
230 | FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); | |
|
209 | 231 | memset(cctxParams, 0, sizeof(*cctxParams)); |
|
210 | 232 | cctxParams->cParams = params.cParams; |
|
211 | 233 | cctxParams->fParams = params.fParams; |
@@ -359,6 +381,17 b' ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_c' | |||
|
359 | 381 | bounds.upperBound = ZSTD_dictForceCopy; /* note : how to ensure at compile time that this is the highest value enum ? */ |
|
360 | 382 | return bounds; |
|
361 | 383 | |
|
384 | case ZSTD_c_literalCompressionMode: | |
|
385 | ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed); | |
|
386 | bounds.lowerBound = ZSTD_lcm_auto; | |
|
387 | bounds.upperBound = ZSTD_lcm_uncompressed; | |
|
388 | return bounds; | |
|
389 | ||
|
390 | case ZSTD_c_targetCBlockSize: | |
|
391 | bounds.lowerBound = ZSTD_TARGETCBLOCKSIZE_MIN; | |
|
392 | bounds.upperBound = ZSTD_TARGETCBLOCKSIZE_MAX; | |
|
393 | return bounds; | |
|
394 | ||
|
362 | 395 | default: |
|
363 | 396 | { ZSTD_bounds const boundError = { ERROR(parameter_unsupported), 0, 0 }; |
|
364 | 397 | return boundError; |
@@ -366,22 +399,22 b' ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_c' | |||
|
366 | 399 | } |
|
367 | 400 | } |
|
368 | 401 | |
|
369 |
/* ZSTD_cParam_ |
|
|
370 | * @return 1 if value is within cParam bounds, | |
|
371 | * 0 otherwise */ | |
|
372 |
static |
|
|
402 | /* ZSTD_cParam_clampBounds: | |
|
403 | * Clamps the value into the bounded range. | |
|
404 | */ | |
|
405 | static size_t ZSTD_cParam_clampBounds(ZSTD_cParameter cParam, int* value) | |
|
373 | 406 | { |
|
374 | 407 | ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); |
|
375 |
if (ZSTD_isError(bounds.error)) return |
|
|
376 |
if (value < bounds.lowerBound) |
|
|
377 |
if (value > bounds.upperBound) |
|
|
378 |
return |
|
|
408 | if (ZSTD_isError(bounds.error)) return bounds.error; | |
|
409 | if (*value < bounds.lowerBound) *value = bounds.lowerBound; | |
|
410 | if (*value > bounds.upperBound) *value = bounds.upperBound; | |
|
411 | return 0; | |
|
379 | 412 | } |
|
380 | 413 | |
|
381 | 414 | #define BOUNDCHECK(cParam, val) { \ |
|
382 |
|
|
|
383 |
|
|
|
384 | } } | |
|
415 | RETURN_ERROR_IF(!ZSTD_cParam_withinBounds(cParam,val), \ | |
|
416 | parameter_outOfBound); \ | |
|
417 | } | |
|
385 | 418 | |
|
386 | 419 | |
|
387 | 420 | static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param) |
@@ -413,6 +446,8 b' static int ZSTD_isUpdateAuthorized(ZSTD_' | |||
|
413 | 446 | case ZSTD_c_ldmBucketSizeLog: |
|
414 | 447 | case ZSTD_c_ldmHashRateLog: |
|
415 | 448 | case ZSTD_c_forceAttachDict: |
|
449 | case ZSTD_c_literalCompressionMode: | |
|
450 | case ZSTD_c_targetCBlockSize: | |
|
416 | 451 | default: |
|
417 | 452 | return 0; |
|
418 | 453 | } |
@@ -425,18 +460,17 b' size_t ZSTD_CCtx_setParameter(ZSTD_CCtx*' | |||
|
425 | 460 | if (ZSTD_isUpdateAuthorized(param)) { |
|
426 | 461 | cctx->cParamsChanged = 1; |
|
427 | 462 | } else { |
|
428 |
|
|
|
463 | RETURN_ERROR(stage_wrong); | |
|
429 | 464 | } } |
|
430 | 465 | |
|
431 | 466 | switch(param) |
|
432 | 467 | { |
|
433 |
case ZSTD_c_ |
|
|
434 | return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); | |
|
468 | case ZSTD_c_nbWorkers: | |
|
469 | RETURN_ERROR_IF((value!=0) && cctx->staticSize, parameter_unsupported, | |
|
470 | "MT not compatible with static alloc"); | |
|
471 | break; | |
|
435 | 472 | |
|
436 | 473 | case ZSTD_c_compressionLevel: |
|
437 | if (cctx->cdict) return ERROR(stage_wrong); | |
|
438 | return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); | |
|
439 | ||
|
440 | 474 | case ZSTD_c_windowLog: |
|
441 | 475 | case ZSTD_c_hashLog: |
|
442 | 476 | case ZSTD_c_chainLog: |
@@ -444,49 +478,33 b' size_t ZSTD_CCtx_setParameter(ZSTD_CCtx*' | |||
|
444 | 478 | case ZSTD_c_minMatch: |
|
445 | 479 | case ZSTD_c_targetLength: |
|
446 | 480 | case ZSTD_c_strategy: |
|
447 | if (cctx->cdict) return ERROR(stage_wrong); | |
|
448 | return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); | |
|
449 | ||
|
481 | case ZSTD_c_ldmHashRateLog: | |
|
482 | case ZSTD_c_format: | |
|
450 | 483 | case ZSTD_c_contentSizeFlag: |
|
451 | 484 | case ZSTD_c_checksumFlag: |
|
452 | 485 | case ZSTD_c_dictIDFlag: |
|
453 | return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); | |
|
454 | ||
|
455 | case ZSTD_c_forceMaxWindow : /* Force back-references to remain < windowSize, | |
|
456 | * even when referencing into Dictionary content. | |
|
457 | * default : 0 when using a CDict, 1 when using a Prefix */ | |
|
458 | return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); | |
|
459 | ||
|
486 | case ZSTD_c_forceMaxWindow: | |
|
460 | 487 | case ZSTD_c_forceAttachDict: |
|
461 | return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); | |
|
462 | ||
|
463 | case ZSTD_c_nbWorkers: | |
|
464 | if ((value!=0) && cctx->staticSize) { | |
|
465 | return ERROR(parameter_unsupported); /* MT not compatible with static alloc */ | |
|
466 | } | |
|
467 | return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); | |
|
468 | ||
|
488 | case ZSTD_c_literalCompressionMode: | |
|
469 | 489 | case ZSTD_c_jobSize: |
|
470 | 490 | case ZSTD_c_overlapLog: |
|
471 | 491 | case ZSTD_c_rsyncable: |
|
472 | return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); | |
|
473 | ||
|
474 | 492 | case ZSTD_c_enableLongDistanceMatching: |
|
475 | 493 | case ZSTD_c_ldmHashLog: |
|
476 | 494 | case ZSTD_c_ldmMinMatch: |
|
477 | 495 | case ZSTD_c_ldmBucketSizeLog: |
|
478 |
case ZSTD_c_ |
|
|
479 | if (cctx->cdict) return ERROR(stage_wrong); | |
|
480 | return ZSTD_CCtxParam_setParameter(&cctx->requestedParams, param, value); | |
|
481 | ||
|
482 | default: return ERROR(parameter_unsupported); | |
|
483 | } | |
|
484 | } | |
|
485 | ||
|
486 | size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* CCtxParams, | |
|
496 | case ZSTD_c_targetCBlockSize: | |
|
497 | break; | |
|
498 | ||
|
499 | default: RETURN_ERROR(parameter_unsupported); | |
|
500 | } | |
|
501 | return ZSTD_CCtxParams_setParameter(&cctx->requestedParams, param, value); | |
|
502 | } | |
|
503 | ||
|
504 | size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams, | |
|
487 | 505 | ZSTD_cParameter param, int value) |
|
488 | 506 | { |
|
489 | DEBUGLOG(4, "ZSTD_CCtxParam_setParameter (%i, %i)", (int)param, value); | |
|
507 | DEBUGLOG(4, "ZSTD_CCtxParams_setParameter (%i, %i)", (int)param, value); | |
|
490 | 508 | switch(param) |
|
491 | 509 | { |
|
492 | 510 | case ZSTD_c_format : |
@@ -495,11 +513,9 b' size_t ZSTD_CCtxParam_setParameter(ZSTD_' | |||
|
495 | 513 | return (size_t)CCtxParams->format; |
|
496 | 514 | |
|
497 | 515 | case ZSTD_c_compressionLevel : { |
|
498 | int cLevel = value; | |
|
499 | if (cLevel > ZSTD_maxCLevel()) cLevel = ZSTD_maxCLevel(); | |
|
500 | if (cLevel < ZSTD_minCLevel()) cLevel = ZSTD_minCLevel(); | |
|
501 | if (cLevel) { /* 0 : does not change current level */ | |
|
502 | CCtxParams->compressionLevel = cLevel; | |
|
516 | FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); | |
|
517 | if (value) { /* 0 : does not change current level */ | |
|
518 | CCtxParams->compressionLevel = value; | |
|
503 | 519 | } |
|
504 | 520 | if (CCtxParams->compressionLevel >= 0) return CCtxParams->compressionLevel; |
|
505 | 521 | return 0; /* return type (size_t) cannot represent negative values */ |
@@ -573,33 +589,55 b' size_t ZSTD_CCtxParam_setParameter(ZSTD_' | |||
|
573 | 589 | return CCtxParams->attachDictPref; |
|
574 | 590 | } |
|
575 | 591 | |
|
592 | case ZSTD_c_literalCompressionMode : { | |
|
593 | const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value; | |
|
594 | BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm); | |
|
595 | CCtxParams->literalCompressionMode = lcm; | |
|
596 | return CCtxParams->literalCompressionMode; | |
|
597 | } | |
|
598 | ||
|
576 | 599 | case ZSTD_c_nbWorkers : |
|
577 | 600 | #ifndef ZSTD_MULTITHREAD |
|
578 | if (value!=0) return ERROR(parameter_unsupported); | |
|
601 | RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); | |
|
579 | 602 | return 0; |
|
580 | 603 | #else |
|
581 | return ZSTDMT_CCtxParam_setNbWorkers(CCtxParams, value); | |
|
604 | FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); | |
|
605 | CCtxParams->nbWorkers = value; | |
|
606 | return CCtxParams->nbWorkers; | |
|
582 | 607 | #endif |
|
583 | 608 | |
|
584 | 609 | case ZSTD_c_jobSize : |
|
585 | 610 | #ifndef ZSTD_MULTITHREAD |
|
586 | return ERROR(parameter_unsupported); | |
|
611 | RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); | |
|
612 | return 0; | |
|
587 | 613 | #else |
|
588 | return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_jobSize, value); | |
|
614 | /* Adjust to the minimum non-default value. */ | |
|
615 | if (value != 0 && value < ZSTDMT_JOBSIZE_MIN) | |
|
616 | value = ZSTDMT_JOBSIZE_MIN; | |
|
617 | FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(param, &value)); | |
|
618 | assert(value >= 0); | |
|
619 | CCtxParams->jobSize = value; | |
|
620 | return CCtxParams->jobSize; | |
|
589 | 621 | #endif |
|
590 | 622 | |
|
591 | 623 | case ZSTD_c_overlapLog : |
|
592 | 624 | #ifndef ZSTD_MULTITHREAD |
|
593 | return ERROR(parameter_unsupported); | |
|
625 | RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); | |
|
626 | return 0; | |
|
594 | 627 | #else |
|
595 | return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_overlapLog, value); | |
|
628 | FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value)); | |
|
629 | CCtxParams->overlapLog = value; | |
|
630 | return CCtxParams->overlapLog; | |
|
596 | 631 | #endif |
|
597 | 632 | |
|
598 | 633 | case ZSTD_c_rsyncable : |
|
599 | 634 | #ifndef ZSTD_MULTITHREAD |
|
600 | return ERROR(parameter_unsupported); | |
|
635 | RETURN_ERROR_IF(value!=0, parameter_unsupported, "not compiled with multithreading"); | |
|
636 | return 0; | |
|
601 | 637 | #else |
|
602 | return ZSTDMT_CCtxParam_setMTCtxParameter(CCtxParams, ZSTDMT_p_rsyncable, value); | |
|
638 | FORWARD_IF_ERROR(ZSTD_cParam_clampBounds(ZSTD_c_overlapLog, &value)); | |
|
639 | CCtxParams->rsyncable = value; | |
|
640 | return CCtxParams->rsyncable; | |
|
603 | 641 | #endif |
|
604 | 642 | |
|
605 | 643 | case ZSTD_c_enableLongDistanceMatching : |
@@ -625,21 +663,27 b' size_t ZSTD_CCtxParam_setParameter(ZSTD_' | |||
|
625 | 663 | return CCtxParams->ldmParams.bucketSizeLog; |
|
626 | 664 | |
|
627 | 665 | case ZSTD_c_ldmHashRateLog : |
|
628 |
|
|
|
629 |
|
|
|
666 | RETURN_ERROR_IF(value > ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN, | |
|
667 | parameter_outOfBound); | |
|
630 | 668 | CCtxParams->ldmParams.hashRateLog = value; |
|
631 | 669 | return CCtxParams->ldmParams.hashRateLog; |
|
632 | 670 | |
|
633 | default: return ERROR(parameter_unsupported); | |
|
671 | case ZSTD_c_targetCBlockSize : | |
|
672 | if (value!=0) /* 0 ==> default */ | |
|
673 | BOUNDCHECK(ZSTD_c_targetCBlockSize, value); | |
|
674 | CCtxParams->targetCBlockSize = value; | |
|
675 | return CCtxParams->targetCBlockSize; | |
|
676 | ||
|
677 | default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); | |
|
634 | 678 | } |
|
635 | 679 | } |
|
636 | 680 | |
|
637 | 681 | size_t ZSTD_CCtx_getParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value) |
|
638 | 682 | { |
|
639 | return ZSTD_CCtxParam_getParameter(&cctx->requestedParams, param, value); | |
|
640 | } | |
|
641 | ||
|
642 | size_t ZSTD_CCtxParam_getParameter( | |
|
683 | return ZSTD_CCtxParams_getParameter(&cctx->requestedParams, param, value); | |
|
684 | } | |
|
685 | ||
|
686 | size_t ZSTD_CCtxParams_getParameter( | |
|
643 | 687 | ZSTD_CCtx_params* CCtxParams, ZSTD_cParameter param, int* value) |
|
644 | 688 | { |
|
645 | 689 | switch(param) |
@@ -651,13 +695,13 b' size_t ZSTD_CCtxParam_getParameter(' | |||
|
651 | 695 | *value = CCtxParams->compressionLevel; |
|
652 | 696 | break; |
|
653 | 697 | case ZSTD_c_windowLog : |
|
654 | *value = CCtxParams->cParams.windowLog; | |
|
698 | *value = (int)CCtxParams->cParams.windowLog; | |
|
655 | 699 | break; |
|
656 | 700 | case ZSTD_c_hashLog : |
|
657 | *value = CCtxParams->cParams.hashLog; | |
|
701 | *value = (int)CCtxParams->cParams.hashLog; | |
|
658 | 702 | break; |
|
659 | 703 | case ZSTD_c_chainLog : |
|
660 | *value = CCtxParams->cParams.chainLog; | |
|
704 | *value = (int)CCtxParams->cParams.chainLog; | |
|
661 | 705 | break; |
|
662 | 706 | case ZSTD_c_searchLog : |
|
663 | 707 | *value = CCtxParams->cParams.searchLog; |
@@ -686,6 +730,9 b' size_t ZSTD_CCtxParam_getParameter(' | |||
|
686 | 730 | case ZSTD_c_forceAttachDict : |
|
687 | 731 | *value = CCtxParams->attachDictPref; |
|
688 | 732 | break; |
|
733 | case ZSTD_c_literalCompressionMode : | |
|
734 | *value = CCtxParams->literalCompressionMode; | |
|
735 | break; | |
|
689 | 736 | case ZSTD_c_nbWorkers : |
|
690 | 737 | #ifndef ZSTD_MULTITHREAD |
|
691 | 738 | assert(CCtxParams->nbWorkers == 0); |
@@ -694,7 +741,7 b' size_t ZSTD_CCtxParam_getParameter(' | |||
|
694 | 741 | break; |
|
695 | 742 | case ZSTD_c_jobSize : |
|
696 | 743 | #ifndef ZSTD_MULTITHREAD |
|
697 |
|
|
|
744 | RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); | |
|
698 | 745 | #else |
|
699 | 746 | assert(CCtxParams->jobSize <= INT_MAX); |
|
700 | 747 | *value = (int)CCtxParams->jobSize; |
@@ -702,14 +749,14 b' size_t ZSTD_CCtxParam_getParameter(' | |||
|
702 | 749 | #endif |
|
703 | 750 | case ZSTD_c_overlapLog : |
|
704 | 751 | #ifndef ZSTD_MULTITHREAD |
|
705 |
|
|
|
752 | RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); | |
|
706 | 753 | #else |
|
707 | 754 | *value = CCtxParams->overlapLog; |
|
708 | 755 | break; |
|
709 | 756 | #endif |
|
710 | 757 | case ZSTD_c_rsyncable : |
|
711 | 758 | #ifndef ZSTD_MULTITHREAD |
|
712 |
|
|
|
759 | RETURN_ERROR(parameter_unsupported, "not compiled with multithreading"); | |
|
713 | 760 | #else |
|
714 | 761 | *value = CCtxParams->rsyncable; |
|
715 | 762 | break; |
@@ -729,7 +776,10 b' size_t ZSTD_CCtxParam_getParameter(' | |||
|
729 | 776 | case ZSTD_c_ldmHashRateLog : |
|
730 | 777 | *value = CCtxParams->ldmParams.hashRateLog; |
|
731 | 778 | break; |
|
732 | default: return ERROR(parameter_unsupported); | |
|
779 | case ZSTD_c_targetCBlockSize : | |
|
780 | *value = (int)CCtxParams->targetCBlockSize; | |
|
781 | break; | |
|
782 | default: RETURN_ERROR(parameter_unsupported, "unknown parameter"); | |
|
733 | 783 | } |
|
734 | 784 | return 0; |
|
735 | 785 | } |
@@ -745,8 +795,8 b' size_t ZSTD_CCtx_setParametersUsingCCtxP' | |||
|
745 | 795 | ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params) |
|
746 | 796 | { |
|
747 | 797 | DEBUGLOG(4, "ZSTD_CCtx_setParametersUsingCCtxParams"); |
|
748 |
|
|
|
749 |
|
|
|
798 | RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); | |
|
799 | RETURN_ERROR_IF(cctx->cdict, stage_wrong); | |
|
750 | 800 | |
|
751 | 801 | cctx->requestedParams = *params; |
|
752 | 802 | return 0; |
@@ -755,33 +805,71 b' size_t ZSTD_CCtx_setParametersUsingCCtxP' | |||
|
755 | 805 | ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize) |
|
756 | 806 | { |
|
757 | 807 | DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize); |
|
758 |
|
|
|
808 | RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); | |
|
759 | 809 | cctx->pledgedSrcSizePlusOne = pledgedSrcSize+1; |
|
760 | 810 | return 0; |
|
761 | 811 | } |
|
762 | 812 | |
|
813 | /** | |
|
814 | * Initializes the local dict using the requested parameters. | |
|
815 | * NOTE: This does not use the pledged src size, because it may be used for more | |
|
816 | * than one compression. | |
|
817 | */ | |
|
818 | static size_t ZSTD_initLocalDict(ZSTD_CCtx* cctx) | |
|
819 | { | |
|
820 | ZSTD_localDict* const dl = &cctx->localDict; | |
|
821 | ZSTD_compressionParameters const cParams = ZSTD_getCParamsFromCCtxParams( | |
|
822 | &cctx->requestedParams, 0, dl->dictSize); | |
|
823 | if (dl->dict == NULL) { | |
|
824 | /* No local dictionary. */ | |
|
825 | assert(dl->dictBuffer == NULL); | |
|
826 | assert(dl->cdict == NULL); | |
|
827 | assert(dl->dictSize == 0); | |
|
828 | return 0; | |
|
829 | } | |
|
830 | if (dl->cdict != NULL) { | |
|
831 | assert(cctx->cdict == dl->cdict); | |
|
832 | /* Local dictionary already initialized. */ | |
|
833 | return 0; | |
|
834 | } | |
|
835 | assert(dl->dictSize > 0); | |
|
836 | assert(cctx->cdict == NULL); | |
|
837 | assert(cctx->prefixDict.dict == NULL); | |
|
838 | ||
|
839 | dl->cdict = ZSTD_createCDict_advanced( | |
|
840 | dl->dict, | |
|
841 | dl->dictSize, | |
|
842 | ZSTD_dlm_byRef, | |
|
843 | dl->dictContentType, | |
|
844 | cParams, | |
|
845 | cctx->customMem); | |
|
846 | RETURN_ERROR_IF(!dl->cdict, memory_allocation); | |
|
847 | cctx->cdict = dl->cdict; | |
|
848 | return 0; | |
|
849 | } | |
|
850 | ||
|
763 | 851 | size_t ZSTD_CCtx_loadDictionary_advanced( |
|
764 | 852 | ZSTD_CCtx* cctx, const void* dict, size_t dictSize, |
|
765 | 853 | ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType) |
|
766 | 854 | { |
|
767 |
|
|
|
768 | if (cctx->staticSize) return ERROR(memory_allocation); /* no malloc for static CCtx */ | |
|
855 | RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); | |
|
856 | RETURN_ERROR_IF(cctx->staticSize, memory_allocation, | |
|
857 | "no malloc for static CCtx"); | |
|
769 | 858 | DEBUGLOG(4, "ZSTD_CCtx_loadDictionary_advanced (size: %u)", (U32)dictSize); |
|
770 |
ZSTD_ |
|
|
771 |
if (dict==NULL || dictSize==0) |
|
|
772 | cctx->cdictLocal = NULL; | |
|
773 | cctx->cdict = NULL; | |
|
859 | ZSTD_clearAllDicts(cctx); /* in case one already exists */ | |
|
860 | if (dict == NULL || dictSize == 0) /* no dictionary mode */ | |
|
861 | return 0; | |
|
862 | if (dictLoadMethod == ZSTD_dlm_byRef) { | |
|
863 | cctx->localDict.dict = dict; | |
|
774 | 864 | } else { |
|
775 | ZSTD_compressionParameters const cParams = | |
|
776 | ZSTD_getCParamsFromCCtxParams(&cctx->requestedParams, cctx->pledgedSrcSizePlusOne-1, dictSize); | |
|
777 | cctx->cdictLocal = ZSTD_createCDict_advanced( | |
|
778 | dict, dictSize, | |
|
779 | dictLoadMethod, dictContentType, | |
|
780 | cParams, cctx->customMem); | |
|
781 | cctx->cdict = cctx->cdictLocal; | |
|
782 | if (cctx->cdictLocal == NULL) | |
|
783 | return ERROR(memory_allocation); | |
|
784 | } | |
|
865 | void* dictBuffer = ZSTD_malloc(dictSize, cctx->customMem); | |
|
866 | RETURN_ERROR_IF(!dictBuffer, memory_allocation); | |
|
867 | memcpy(dictBuffer, dict, dictSize); | |
|
868 | cctx->localDict.dictBuffer = dictBuffer; | |
|
869 | cctx->localDict.dict = dictBuffer; | |
|
870 | } | |
|
871 | cctx->localDict.dictSize = dictSize; | |
|
872 | cctx->localDict.dictContentType = dictContentType; | |
|
785 | 873 | return 0; |
|
786 | 874 | } |
|
787 | 875 | |
@@ -801,9 +889,10 b' ZSTDLIB_API size_t ZSTD_CCtx_loadDiction' | |||
|
801 | 889 | |
|
802 | 890 | size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict) |
|
803 | 891 | { |
|
804 |
|
|
|
892 | RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); | |
|
893 | /* Free the existing local cdict (if any) to save memory. */ | |
|
894 | ZSTD_clearAllDicts(cctx); | |
|
805 | 895 | cctx->cdict = cdict; |
|
806 | memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* exclusive */ | |
|
807 | 896 | return 0; |
|
808 | 897 | } |
|
809 | 898 | |
@@ -815,8 +904,8 b' size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cc' | |||
|
815 | 904 | size_t ZSTD_CCtx_refPrefix_advanced( |
|
816 | 905 | ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) |
|
817 | 906 | { |
|
818 |
|
|
|
819 | cctx->cdict = NULL; /* prefix discards any prior cdict */ | |
|
907 | RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); | |
|
908 | ZSTD_clearAllDicts(cctx); | |
|
820 | 909 | cctx->prefixDict.dict = prefix; |
|
821 | 910 | cctx->prefixDict.dictSize = prefixSize; |
|
822 | 911 | cctx->prefixDict.dictContentType = dictContentType; |
@@ -834,8 +923,8 b' size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ' | |||
|
834 | 923 | } |
|
835 | 924 | if ( (reset == ZSTD_reset_parameters) |
|
836 | 925 | || (reset == ZSTD_reset_session_and_parameters) ) { |
|
837 |
|
|
|
838 | cctx->cdict = NULL; | |
|
926 | RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong); | |
|
927 | ZSTD_clearAllDicts(cctx); | |
|
839 | 928 | return ZSTD_CCtxParams_reset(&cctx->requestedParams); |
|
840 | 929 | } |
|
841 | 930 | return 0; |
@@ -847,12 +936,12 b' size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ' | |||
|
847 | 936 | @return : 0, or an error code if one value is beyond authorized range */ |
|
848 | 937 | size_t ZSTD_checkCParams(ZSTD_compressionParameters cParams) |
|
849 | 938 | { |
|
850 | BOUNDCHECK(ZSTD_c_windowLog, cParams.windowLog); | |
|
851 | BOUNDCHECK(ZSTD_c_chainLog, cParams.chainLog); | |
|
852 | BOUNDCHECK(ZSTD_c_hashLog, cParams.hashLog); | |
|
853 | BOUNDCHECK(ZSTD_c_searchLog, cParams.searchLog); | |
|
854 | BOUNDCHECK(ZSTD_c_minMatch, cParams.minMatch); | |
|
855 | BOUNDCHECK(ZSTD_c_targetLength,cParams.targetLength); | |
|
939 | BOUNDCHECK(ZSTD_c_windowLog, (int)cParams.windowLog); | |
|
940 | BOUNDCHECK(ZSTD_c_chainLog, (int)cParams.chainLog); | |
|
941 | BOUNDCHECK(ZSTD_c_hashLog, (int)cParams.hashLog); | |
|
942 | BOUNDCHECK(ZSTD_c_searchLog, (int)cParams.searchLog); | |
|
943 | BOUNDCHECK(ZSTD_c_minMatch, (int)cParams.minMatch); | |
|
944 | BOUNDCHECK(ZSTD_c_targetLength,(int)cParams.targetLength); | |
|
856 | 945 | BOUNDCHECK(ZSTD_c_strategy, cParams.strategy); |
|
857 | 946 | return 0; |
|
858 | 947 | } |
@@ -868,7 +957,7 b' ZSTD_clampCParams(ZSTD_compressionParame' | |||
|
868 | 957 | if ((int)val<bounds.lowerBound) val=(type)bounds.lowerBound; \ |
|
869 | 958 | else if ((int)val>bounds.upperBound) val=(type)bounds.upperBound; \ |
|
870 | 959 | } |
|
871 |
# define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, |
|
|
960 | # define CLAMP(cParam, val) CLAMP_TYPE(cParam, val, unsigned) | |
|
872 | 961 | CLAMP(ZSTD_c_windowLog, cParams.windowLog); |
|
873 | 962 | CLAMP(ZSTD_c_chainLog, cParams.chainLog); |
|
874 | 963 | CLAMP(ZSTD_c_hashLog, cParams.hashLog); |
@@ -888,10 +977,11 b' static U32 ZSTD_cycleLog(U32 hashLog, ZS' | |||
|
888 | 977 | } |
|
889 | 978 | |
|
890 | 979 | /** ZSTD_adjustCParams_internal() : |
|
891 |
|
|
|
892 |
|
|
|
893 | Both `srcSize` and `dictSize` are optional (use 0 if unknown). | |
|
894 | Note : cPar is assumed validated. Use ZSTD_checkCParams() to ensure this condition. */ | |
|
980 | * optimize `cPar` for a specified input (`srcSize` and `dictSize`). | |
|
981 | * mostly downsize to reduce memory consumption and initialization latency. | |
|
982 | * `srcSize` can be ZSTD_CONTENTSIZE_UNKNOWN when not known. | |
|
983 | * note : for the time being, `srcSize==0` means "unknown" too, for compatibility with older convention. | |
|
984 | * condition : cPar is presumed validated (can be checked using ZSTD_checkCParams()). */ | |
|
895 | 985 | static ZSTD_compressionParameters |
|
896 | 986 | ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar, |
|
897 | 987 | unsigned long long srcSize, |
@@ -901,7 +991,7 b' ZSTD_adjustCParams_internal(ZSTD_compres' | |||
|
901 | 991 | static const U64 maxWindowResize = 1ULL << (ZSTD_WINDOWLOG_MAX-1); |
|
902 | 992 | assert(ZSTD_checkCParams(cPar)==0); |
|
903 | 993 | |
|
904 |
if (dictSize && (srcSize+1<2) /* |
|
|
994 | if (dictSize && (srcSize+1<2) /* ZSTD_CONTENTSIZE_UNKNOWN and 0 mean "unknown" */ ) | |
|
905 | 995 | srcSize = minSrcSize; /* presumed small when there is a dictionary */ |
|
906 | 996 | else if (srcSize == 0) |
|
907 | 997 | srcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* 0 == unknown : presumed large */ |
@@ -922,7 +1012,7 b' ZSTD_adjustCParams_internal(ZSTD_compres' | |||
|
922 | 1012 | } |
|
923 | 1013 | |
|
924 | 1014 | if (cPar.windowLog < ZSTD_WINDOWLOG_ABSOLUTEMIN) |
|
925 | cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* required for frame header */ | |
|
1015 | cPar.windowLog = ZSTD_WINDOWLOG_ABSOLUTEMIN; /* minimum wlog required for valid frame header */ | |
|
926 | 1016 | |
|
927 | 1017 | return cPar; |
|
928 | 1018 | } |
@@ -932,7 +1022,7 b' ZSTD_adjustCParams(ZSTD_compressionParam' | |||
|
932 | 1022 | unsigned long long srcSize, |
|
933 | 1023 | size_t dictSize) |
|
934 | 1024 | { |
|
935 | cPar = ZSTD_clampCParams(cPar); | |
|
1025 | cPar = ZSTD_clampCParams(cPar); /* resulting cPar is necessarily valid (all parameters within range) */ | |
|
936 | 1026 | return ZSTD_adjustCParams_internal(cPar, srcSize, dictSize); |
|
937 | 1027 | } |
|
938 | 1028 | |
@@ -973,8 +1063,7 b' ZSTD_sizeof_matchState(const ZSTD_compre' | |||
|
973 | 1063 | |
|
974 | 1064 | size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params) |
|
975 | 1065 | { |
|
976 |
|
|
|
977 | if (params->nbWorkers > 0) { return ERROR(GENERIC); } | |
|
1066 | RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); | |
|
978 | 1067 | { ZSTD_compressionParameters const cParams = |
|
979 | 1068 | ZSTD_getCParamsFromCCtxParams(params, 0, 0); |
|
980 | 1069 | size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); |
@@ -1022,10 +1111,12 b' size_t ZSTD_estimateCCtxSize(int compres' | |||
|
1022 | 1111 | |
|
1023 | 1112 | size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params) |
|
1024 | 1113 | { |
|
1025 | if (params->nbWorkers > 0) { return ERROR(GENERIC); } | |
|
1026 | { size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params); | |
|
1027 | size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << params->cParams.windowLog); | |
|
1028 | size_t const inBuffSize = ((size_t)1 << params->cParams.windowLog) + blockSize; | |
|
1114 | RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only."); | |
|
1115 | { ZSTD_compressionParameters const cParams = | |
|
1116 | ZSTD_getCParamsFromCCtxParams(params, 0, 0); | |
|
1117 | size_t const CCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(params); | |
|
1118 | size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, (size_t)1 << cParams.windowLog); | |
|
1119 | size_t const inBuffSize = ((size_t)1 << cParams.windowLog) + blockSize; | |
|
1029 | 1120 | size_t const outBuffSize = ZSTD_compressBound(blockSize) + 1; |
|
1030 | 1121 | size_t const streamingSize = inBuffSize + outBuffSize; |
|
1031 | 1122 | |
@@ -1205,7 +1296,6 b' static void ZSTD_invalidateMatchState(ZS' | |||
|
1205 | 1296 | ZSTD_window_clear(&ms->window); |
|
1206 | 1297 | |
|
1207 | 1298 | ms->nextToUpdate = ms->window.dictLimit; |
|
1208 | ms->nextToUpdate3 = ms->window.dictLimit; | |
|
1209 | 1299 | ms->loadedDictEnd = 0; |
|
1210 | 1300 | ms->opt.litLengthSum = 0; /* force reset of btopt stats */ |
|
1211 | 1301 | ms->dictMatchState = NULL; |
@@ -1242,15 +1332,17 b' static size_t ZSTD_continueCCtx(ZSTD_CCt' | |||
|
1242 | 1332 | |
|
1243 | 1333 | typedef enum { ZSTDcrp_continue, ZSTDcrp_noMemset } ZSTD_compResetPolicy_e; |
|
1244 | 1334 | |
|
1335 | typedef enum { ZSTD_resetTarget_CDict, ZSTD_resetTarget_CCtx } ZSTD_resetTarget_e; | |
|
1336 | ||
|
1245 | 1337 | static void* |
|
1246 | 1338 | ZSTD_reset_matchState(ZSTD_matchState_t* ms, |
|
1247 | 1339 | void* ptr, |
|
1248 | 1340 | const ZSTD_compressionParameters* cParams, |
|
1249 |
ZSTD_compResetPolicy_e const crp, |
|
|
1341 | ZSTD_compResetPolicy_e const crp, ZSTD_resetTarget_e const forWho) | |
|
1250 | 1342 | { |
|
1251 | 1343 | size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog); |
|
1252 | 1344 | size_t const hSize = ((size_t)1) << cParams->hashLog; |
|
1253 | U32 const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; | |
|
1345 | U32 const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0; | |
|
1254 | 1346 | size_t const h3Size = ((size_t)1) << hashLog3; |
|
1255 | 1347 | size_t const tableSpace = (chainSize + hSize + h3Size) * sizeof(U32); |
|
1256 | 1348 | |
@@ -1264,7 +1356,7 b' ZSTD_reset_matchState(ZSTD_matchState_t*' | |||
|
1264 | 1356 | ZSTD_invalidateMatchState(ms); |
|
1265 | 1357 | |
|
1266 | 1358 | /* opt parser space */ |
|
1267 | if (forCCtx && (cParams->strategy >= ZSTD_btopt)) { | |
|
1359 | if ((forWho == ZSTD_resetTarget_CCtx) && (cParams->strategy >= ZSTD_btopt)) { | |
|
1268 | 1360 | DEBUGLOG(4, "reserving optimal parser space"); |
|
1269 | 1361 | ms->opt.litFreq = (unsigned*)ptr; |
|
1270 | 1362 | ms->opt.litLengthFreq = ms->opt.litFreq + (1<<Litbits); |
@@ -1292,6 +1384,19 b' ZSTD_reset_matchState(ZSTD_matchState_t*' | |||
|
1292 | 1384 | return ptr; |
|
1293 | 1385 | } |
|
1294 | 1386 | |
|
1387 | /* ZSTD_indexTooCloseToMax() : | |
|
1388 | * minor optimization : prefer memset() rather than reduceIndex() | |
|
1389 | * which is measurably slow in some circumstances (reported for Visual Studio). | |
|
1390 | * Works when re-using a context for a lot of smallish inputs : | |
|
1391 | * if all inputs are smaller than ZSTD_INDEXOVERFLOW_MARGIN, | |
|
1392 | * memset() will be triggered before reduceIndex(). | |
|
1393 | */ | |
|
1394 | #define ZSTD_INDEXOVERFLOW_MARGIN (16 MB) | |
|
1395 | static int ZSTD_indexTooCloseToMax(ZSTD_window_t w) | |
|
1396 | { | |
|
1397 | return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN); | |
|
1398 | } | |
|
1399 | ||
|
1295 | 1400 | #define ZSTD_WORKSPACETOOLARGE_FACTOR 3 /* define "workspace is too large" as this number of times larger than needed */ |
|
1296 | 1401 | #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128 /* when workspace is continuously too large |
|
1297 | 1402 | * during at least this number of times, |
@@ -1303,7 +1408,7 b' ZSTD_reset_matchState(ZSTD_matchState_t*' | |||
|
1303 | 1408 | note : `params` are assumed fully validated at this stage */ |
|
1304 | 1409 | static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, |
|
1305 | 1410 | ZSTD_CCtx_params params, |
|
1306 | U64 pledgedSrcSize, | |
|
1411 | U64 const pledgedSrcSize, | |
|
1307 | 1412 | ZSTD_compResetPolicy_e const crp, |
|
1308 | 1413 | ZSTD_buffered_policy_e const zbuff) |
|
1309 | 1414 | { |
@@ -1316,12 +1421,20 b' static size_t ZSTD_resetCCtx_internal(ZS' | |||
|
1316 | 1421 | zc->inBuffSize, |
|
1317 | 1422 | zc->seqStore.maxNbSeq, zc->seqStore.maxNbLit, |
|
1318 | 1423 | zbuff, pledgedSrcSize)) { |
|
1319 |
DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> continue mode |
|
|
1320 | zc->appliedParams.cParams.windowLog, zc->blockSize); | |
|
1424 | DEBUGLOG(4, "ZSTD_equivalentParams()==1 -> consider continue mode"); | |
|
1321 | 1425 | zc->workSpaceOversizedDuration += (zc->workSpaceOversizedDuration > 0); /* if it was too large, it still is */ |
|
1322 | if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) | |
|
1426 | if (zc->workSpaceOversizedDuration <= ZSTD_WORKSPACETOOLARGE_MAXDURATION) { | |
|
1427 | DEBUGLOG(4, "continue mode confirmed (wLog1=%u, blockSize1=%zu)", | |
|
1428 | zc->appliedParams.cParams.windowLog, zc->blockSize); | |
|
1429 | if (ZSTD_indexTooCloseToMax(zc->blockState.matchState.window)) { | |
|
1430 | /* prefer a reset, faster than a rescale */ | |
|
1431 | ZSTD_reset_matchState(&zc->blockState.matchState, | |
|
1432 | zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32, | |
|
1433 | ¶ms.cParams, | |
|
1434 | crp, ZSTD_resetTarget_CCtx); | |
|
1435 | } | |
|
1323 | 1436 | return ZSTD_continueCCtx(zc, params, pledgedSrcSize); |
|
1324 | } } | |
|
1437 | } } } | |
|
1325 | 1438 | DEBUGLOG(4, "ZSTD_equivalentParams()==0 -> reset CCtx"); |
|
1326 | 1439 | |
|
1327 | 1440 | if (params.ldmParams.enableLdm) { |
@@ -1364,16 +1477,16 b' static size_t ZSTD_resetCCtx_internal(ZS' | |||
|
1364 | 1477 | DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize); |
|
1365 | 1478 | |
|
1366 | 1479 | if (workSpaceTooSmall || workSpaceWasteful) { |
|
1367 |
DEBUGLOG(4, " |
|
|
1480 | DEBUGLOG(4, "Resize workSpaceSize from %zuKB to %zuKB", | |
|
1368 | 1481 | zc->workSpaceSize >> 10, |
|
1369 | 1482 | neededSpace >> 10); |
|
1370 | /* static cctx : no resize, error out */ | |
|
1371 |
|
|
|
1483 | ||
|
1484 | RETURN_ERROR_IF(zc->staticSize, memory_allocation, "static cctx : no resize"); | |
|
1372 | 1485 | |
|
1373 | 1486 | zc->workSpaceSize = 0; |
|
1374 | 1487 | ZSTD_free(zc->workSpace, zc->customMem); |
|
1375 | 1488 | zc->workSpace = ZSTD_malloc(neededSpace, zc->customMem); |
|
1376 |
|
|
|
1489 | RETURN_ERROR_IF(zc->workSpace == NULL, memory_allocation); | |
|
1377 | 1490 | zc->workSpaceSize = neededSpace; |
|
1378 | 1491 | zc->workSpaceOversizedDuration = 0; |
|
1379 | 1492 | |
@@ -1406,7 +1519,10 b' static size_t ZSTD_resetCCtx_internal(ZS' | |||
|
1406 | 1519 | |
|
1407 | 1520 | ZSTD_reset_compressedBlockState(zc->blockState.prevCBlock); |
|
1408 | 1521 | |
|
1409 | ptr = zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32; | |
|
1522 | ptr = ZSTD_reset_matchState(&zc->blockState.matchState, | |
|
1523 | zc->entropyWorkspace + HUF_WORKSPACE_SIZE_U32, | |
|
1524 | ¶ms.cParams, | |
|
1525 | crp, ZSTD_resetTarget_CCtx); | |
|
1410 | 1526 | |
|
1411 | 1527 | /* ldm hash table */ |
|
1412 | 1528 | /* initialize bucketOffsets table later for pointer alignment */ |
@@ -1424,8 +1540,6 b' static size_t ZSTD_resetCCtx_internal(ZS' | |||
|
1424 | 1540 | } |
|
1425 | 1541 | assert(((size_t)ptr & 3) == 0); /* ensure ptr is properly aligned */ |
|
1426 | 1542 | |
|
1427 | ptr = ZSTD_reset_matchState(&zc->blockState.matchState, ptr, ¶ms.cParams, crp, /* forCCtx */ 1); | |
|
1428 | ||
|
1429 | 1543 | /* sequences storage */ |
|
1430 | 1544 | zc->seqStore.maxNbSeq = maxNbSeq; |
|
1431 | 1545 | zc->seqStore.sequencesStart = (seqDef*)ptr; |
@@ -1502,15 +1616,14 b' static int ZSTD_shouldAttachDict(const Z' | |||
|
1502 | 1616 | * handled in _enforceMaxDist */ |
|
1503 | 1617 | } |
|
1504 | 1618 | |
|
1505 | static size_t ZSTD_resetCCtx_byAttachingCDict( | |
|
1506 | ZSTD_CCtx* cctx, | |
|
1619 | static size_t | |
|
1620 | ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx, | |
|
1507 | 1621 | const ZSTD_CDict* cdict, |
|
1508 | 1622 | ZSTD_CCtx_params params, |
|
1509 | 1623 | U64 pledgedSrcSize, |
|
1510 | 1624 | ZSTD_buffered_policy_e zbuff) |
|
1511 | 1625 | { |
|
1512 | { | |
|
1513 | const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams; | |
|
1626 | { const ZSTD_compressionParameters* const cdict_cParams = &cdict->matchState.cParams; | |
|
1514 | 1627 | unsigned const windowLog = params.cParams.windowLog; |
|
1515 | 1628 | assert(windowLog != 0); |
|
1516 | 1629 | /* Resize working context table params for input only, since the dict |
@@ -1522,8 +1635,7 b' static size_t ZSTD_resetCCtx_byAttaching' | |||
|
1522 | 1635 | assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy); |
|
1523 | 1636 | } |
|
1524 | 1637 | |
|
1525 | { | |
|
1526 | const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc | |
|
1638 | { const U32 cdictEnd = (U32)( cdict->matchState.window.nextSrc | |
|
1527 | 1639 | - cdict->matchState.window.base); |
|
1528 | 1640 | const U32 cdictLen = cdictEnd - cdict->matchState.window.dictLimit; |
|
1529 | 1641 | if (cdictLen == 0) { |
@@ -1540,9 +1652,9 b' static size_t ZSTD_resetCCtx_byAttaching' | |||
|
1540 | 1652 | cctx->blockState.matchState.window.base + cdictEnd; |
|
1541 | 1653 | ZSTD_window_clear(&cctx->blockState.matchState.window); |
|
1542 | 1654 | } |
|
1655 | /* loadedDictEnd is expressed within the referential of the active context */ | |
|
1543 | 1656 | cctx->blockState.matchState.loadedDictEnd = cctx->blockState.matchState.window.dictLimit; |
|
1544 |
|
|
|
1545 | } | |
|
1657 | } } | |
|
1546 | 1658 | |
|
1547 | 1659 | cctx->dictID = cdict->dictID; |
|
1548 | 1660 | |
@@ -1596,7 +1708,6 b' static size_t ZSTD_resetCCtx_byCopyingCD' | |||
|
1596 | 1708 | ZSTD_matchState_t* dstMatchState = &cctx->blockState.matchState; |
|
1597 | 1709 | dstMatchState->window = srcMatchState->window; |
|
1598 | 1710 | dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; |
|
1599 | dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3; | |
|
1600 | 1711 | dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; |
|
1601 | 1712 | } |
|
1602 | 1713 | |
@@ -1644,7 +1755,7 b' static size_t ZSTD_copyCCtx_internal(ZST' | |||
|
1644 | 1755 | ZSTD_buffered_policy_e zbuff) |
|
1645 | 1756 | { |
|
1646 | 1757 | DEBUGLOG(5, "ZSTD_copyCCtx_internal"); |
|
1647 |
|
|
|
1758 | RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong); | |
|
1648 | 1759 | |
|
1649 | 1760 | memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem)); |
|
1650 | 1761 | { ZSTD_CCtx_params params = dstCCtx->requestedParams; |
@@ -1676,7 +1787,6 b' static size_t ZSTD_copyCCtx_internal(ZST' | |||
|
1676 | 1787 | ZSTD_matchState_t* dstMatchState = &dstCCtx->blockState.matchState; |
|
1677 | 1788 | dstMatchState->window = srcMatchState->window; |
|
1678 | 1789 | dstMatchState->nextToUpdate = srcMatchState->nextToUpdate; |
|
1679 | dstMatchState->nextToUpdate3= srcMatchState->nextToUpdate3; | |
|
1680 | 1790 | dstMatchState->loadedDictEnd= srcMatchState->loadedDictEnd; |
|
1681 | 1791 | } |
|
1682 | 1792 | dstCCtx->dictID = srcCCtx->dictID; |
@@ -1746,16 +1856,15 b' static void ZSTD_reduceTable_btlazy2(U32' | |||
|
1746 | 1856 | |
|
1747 | 1857 | /*! ZSTD_reduceIndex() : |
|
1748 | 1858 | * rescale all indexes to avoid future overflow (indexes are U32) */ |
|
1749 |
static void ZSTD_reduceIndex (ZSTD_ |
|
|
1859 | static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, const U32 reducerValue) | |
|
1750 | 1860 | { |
|
1751 | ZSTD_matchState_t* const ms = &zc->blockState.matchState; | |
|
1752 | { U32 const hSize = (U32)1 << zc->appliedParams.cParams.hashLog; | |
|
1861 | { U32 const hSize = (U32)1 << params->cParams.hashLog; | |
|
1753 | 1862 | ZSTD_reduceTable(ms->hashTable, hSize, reducerValue); |
|
1754 | 1863 | } |
|
1755 | 1864 | |
|
1756 |
if ( |
|
|
1757 |
U32 const chainSize = (U32)1 << |
|
|
1758 |
if ( |
|
|
1865 | if (params->cParams.strategy != ZSTD_fast) { | |
|
1866 | U32 const chainSize = (U32)1 << params->cParams.chainLog; | |
|
1867 | if (params->cParams.strategy == ZSTD_btlazy2) | |
|
1759 | 1868 | ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue); |
|
1760 | 1869 | else |
|
1761 | 1870 | ZSTD_reduceTable(ms->chainTable, chainSize, reducerValue); |
@@ -1777,161 +1886,13 b' static void ZSTD_reduceIndex (ZSTD_CCtx*' | |||
|
1777 | 1886 | static size_t ZSTD_noCompressBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize, U32 lastBlock) |
|
1778 | 1887 | { |
|
1779 | 1888 | U32 const cBlockHeader24 = lastBlock + (((U32)bt_raw)<<1) + (U32)(srcSize << 3); |
|
1780 |
|
|
|
1889 | RETURN_ERROR_IF(srcSize + ZSTD_blockHeaderSize > dstCapacity, | |
|
1890 | dstSize_tooSmall); | |
|
1781 | 1891 | MEM_writeLE24(dst, cBlockHeader24); |
|
1782 | 1892 | memcpy((BYTE*)dst + ZSTD_blockHeaderSize, src, srcSize); |
|
1783 | 1893 | return ZSTD_blockHeaderSize + srcSize; |
|
1784 | 1894 | } |
|
1785 | 1895 | |
|
1786 | static size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize) | |
|
1787 | { | |
|
1788 | BYTE* const ostart = (BYTE* const)dst; | |
|
1789 | U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); | |
|
1790 | ||
|
1791 | if (srcSize + flSize > dstCapacity) return ERROR(dstSize_tooSmall); | |
|
1792 | ||
|
1793 | switch(flSize) | |
|
1794 | { | |
|
1795 | case 1: /* 2 - 1 - 5 */ | |
|
1796 | ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3)); | |
|
1797 | break; | |
|
1798 | case 2: /* 2 - 2 - 12 */ | |
|
1799 | MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4))); | |
|
1800 | break; | |
|
1801 | case 3: /* 2 - 2 - 20 */ | |
|
1802 | MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4))); | |
|
1803 | break; | |
|
1804 | default: /* not necessary : flSize is {1,2,3} */ | |
|
1805 | assert(0); | |
|
1806 | } | |
|
1807 | ||
|
1808 | memcpy(ostart + flSize, src, srcSize); | |
|
1809 | return srcSize + flSize; | |
|
1810 | } | |
|
1811 | ||
|
1812 | static size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize) | |
|
1813 | { | |
|
1814 | BYTE* const ostart = (BYTE* const)dst; | |
|
1815 | U32 const flSize = 1 + (srcSize>31) + (srcSize>4095); | |
|
1816 | ||
|
1817 | (void)dstCapacity; /* dstCapacity already guaranteed to be >=4, hence large enough */ | |
|
1818 | ||
|
1819 | switch(flSize) | |
|
1820 | { | |
|
1821 | case 1: /* 2 - 1 - 5 */ | |
|
1822 | ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3)); | |
|
1823 | break; | |
|
1824 | case 2: /* 2 - 2 - 12 */ | |
|
1825 | MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4))); | |
|
1826 | break; | |
|
1827 | case 3: /* 2 - 2 - 20 */ | |
|
1828 | MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4))); | |
|
1829 | break; | |
|
1830 | default: /* not necessary : flSize is {1,2,3} */ | |
|
1831 | assert(0); | |
|
1832 | } | |
|
1833 | ||
|
1834 | ostart[flSize] = *(const BYTE*)src; | |
|
1835 | return flSize+1; | |
|
1836 | } | |
|
1837 | ||
|
1838 | ||
|
1839 | /* ZSTD_minGain() : | |
|
1840 | * minimum compression required | |
|
1841 | * to generate a compress block or a compressed literals section. | |
|
1842 | * note : use same formula for both situations */ | |
|
1843 | static size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) | |
|
1844 | { | |
|
1845 | U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; | |
|
1846 | ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); | |
|
1847 | assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); | |
|
1848 | return (srcSize >> minlog) + 2; | |
|
1849 | } | |
|
1850 | ||
|
1851 | static size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf, | |
|
1852 | ZSTD_hufCTables_t* nextHuf, | |
|
1853 | ZSTD_strategy strategy, int disableLiteralCompression, | |
|
1854 | void* dst, size_t dstCapacity, | |
|
1855 | const void* src, size_t srcSize, | |
|
1856 | void* workspace, size_t wkspSize, | |
|
1857 | const int bmi2) | |
|
1858 | { | |
|
1859 | size_t const minGain = ZSTD_minGain(srcSize, strategy); | |
|
1860 | size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB); | |
|
1861 | BYTE* const ostart = (BYTE*)dst; | |
|
1862 | U32 singleStream = srcSize < 256; | |
|
1863 | symbolEncodingType_e hType = set_compressed; | |
|
1864 | size_t cLitSize; | |
|
1865 | ||
|
1866 | DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i)", | |
|
1867 | disableLiteralCompression); | |
|
1868 | ||
|
1869 | /* Prepare nextEntropy assuming reusing the existing table */ | |
|
1870 | memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |
|
1871 | ||
|
1872 | if (disableLiteralCompression) | |
|
1873 | return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); | |
|
1874 | ||
|
1875 | /* small ? don't even attempt compression (speed opt) */ | |
|
1876 | # define COMPRESS_LITERALS_SIZE_MIN 63 | |
|
1877 | { size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN; | |
|
1878 | if (srcSize <= minLitSize) return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); | |
|
1879 | } | |
|
1880 | ||
|
1881 | if (dstCapacity < lhSize+1) return ERROR(dstSize_tooSmall); /* not enough space for compression */ | |
|
1882 | { HUF_repeat repeat = prevHuf->repeatMode; | |
|
1883 | int const preferRepeat = strategy < ZSTD_lazy ? srcSize <= 1024 : 0; | |
|
1884 | if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1; | |
|
1885 | cLitSize = singleStream ? HUF_compress1X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, | |
|
1886 | workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) | |
|
1887 | : HUF_compress4X_repeat(ostart+lhSize, dstCapacity-lhSize, src, srcSize, 255, 11, | |
|
1888 | workspace, wkspSize, (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2); | |
|
1889 | if (repeat != HUF_repeat_none) { | |
|
1890 | /* reused the existing table */ | |
|
1891 | hType = set_repeat; | |
|
1892 | } | |
|
1893 | } | |
|
1894 | ||
|
1895 | if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) { | |
|
1896 | memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |
|
1897 | return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize); | |
|
1898 | } | |
|
1899 | if (cLitSize==1) { | |
|
1900 | memcpy(nextHuf, prevHuf, sizeof(*prevHuf)); | |
|
1901 | return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize); | |
|
1902 | } | |
|
1903 | ||
|
1904 | if (hType == set_compressed) { | |
|
1905 | /* using a newly constructed table */ | |
|
1906 | nextHuf->repeatMode = HUF_repeat_check; | |
|
1907 | } | |
|
1908 | ||
|
1909 | /* Build header */ | |
|
1910 | switch(lhSize) | |
|
1911 | { | |
|
1912 | case 3: /* 2 - 2 - 10 - 10 */ | |
|
1913 | { U32 const lhc = hType + ((!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14); | |
|
1914 | MEM_writeLE24(ostart, lhc); | |
|
1915 | break; | |
|
1916 | } | |
|
1917 | case 4: /* 2 - 2 - 14 - 14 */ | |
|
1918 | { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18); | |
|
1919 | MEM_writeLE32(ostart, lhc); | |
|
1920 | break; | |
|
1921 | } | |
|
1922 | case 5: /* 2 - 2 - 18 - 18 */ | |
|
1923 | { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22); | |
|
1924 | MEM_writeLE32(ostart, lhc); | |
|
1925 | ostart[4] = (BYTE)(cLitSize >> 10); | |
|
1926 | break; | |
|
1927 | } | |
|
1928 | default: /* not possible : lhSize is {3,4,5} */ | |
|
1929 | assert(0); | |
|
1930 | } | |
|
1931 | return lhSize+cLitSize; | |
|
1932 | } | |
|
1933 | ||
|
1934 | ||
|
1935 | 1896 | void ZSTD_seqToCodes(const seqStore_t* seqStorePtr) |
|
1936 | 1897 | { |
|
1937 | 1898 | const seqDef* const sequences = seqStorePtr->sequencesStart; |
@@ -1954,418 +1915,19 b' void ZSTD_seqToCodes(const seqStore_t* s' | |||
|
1954 | 1915 | mlCodeTable[seqStorePtr->longLengthPos] = MaxML; |
|
1955 | 1916 | } |
|
1956 | 1917 | |
|
1957 | ||
|
1958 | /** | |
|
1959 | * -log2(x / 256) lookup table for x in [0, 256). | |
|
1960 | * If x == 0: Return 0 | |
|
1961 | * Else: Return floor(-log2(x / 256) * 256) | |
|
1962 | */ | |
|
1963 | static unsigned const kInverseProbabiltyLog256[256] = { | |
|
1964 | 0, 2048, 1792, 1642, 1536, 1453, 1386, 1329, 1280, 1236, 1197, 1162, | |
|
1965 | 1130, 1100, 1073, 1047, 1024, 1001, 980, 960, 941, 923, 906, 889, | |
|
1966 | 874, 859, 844, 830, 817, 804, 791, 779, 768, 756, 745, 734, | |
|
1967 | 724, 714, 704, 694, 685, 676, 667, 658, 650, 642, 633, 626, | |
|
1968 | 618, 610, 603, 595, 588, 581, 574, 567, 561, 554, 548, 542, | |
|
1969 | 535, 529, 523, 517, 512, 506, 500, 495, 489, 484, 478, 473, | |
|
1970 | 468, 463, 458, 453, 448, 443, 438, 434, 429, 424, 420, 415, | |
|
1971 | 411, 407, 402, 398, 394, 390, 386, 382, 377, 373, 370, 366, | |
|
1972 | 362, 358, 354, 350, 347, 343, 339, 336, 332, 329, 325, 322, | |
|
1973 | 318, 315, 311, 308, 305, 302, 298, 295, 292, 289, 286, 282, | |
|
1974 | 279, 276, 273, 270, 267, 264, 261, 258, 256, 253, 250, 247, | |
|
1975 | 244, 241, 239, 236, 233, 230, 228, 225, 222, 220, 217, 215, | |
|
1976 | 212, 209, 207, 204, 202, 199, 197, 194, 192, 190, 187, 185, | |
|
1977 | 182, 180, 178, 175, 173, 171, 168, 166, 164, 162, 159, 157, | |
|
1978 | 155, 153, 151, 149, 146, 144, 142, 140, 138, 136, 134, 132, | |
|
1979 | 130, 128, 126, 123, 121, 119, 117, 115, 114, 112, 110, 108, | |
|
1980 | 106, 104, 102, 100, 98, 96, 94, 93, 91, 89, 87, 85, | |
|
1981 | 83, 82, 80, 78, 76, 74, 73, 71, 69, 67, 66, 64, | |
|
1982 | 62, 61, 59, 57, 55, 54, 52, 50, 49, 47, 46, 44, | |
|
1983 | 42, 41, 39, 37, 36, 34, 33, 31, 30, 28, 26, 25, | |
|
1984 | 23, 22, 20, 19, 17, 16, 14, 13, 11, 10, 8, 7, | |
|
1985 | 5, 4, 2, 1, | |
|
1986 | }; | |
|
1987 | ||
|
1988 | ||
|
1989 | /** | |
|
1990 | * Returns the cost in bits of encoding the distribution described by count | |
|
1991 | * using the entropy bound. | |
|
1992 | */ | |
|
1993 | static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t const total) | |
|
1994 | { | |
|
1995 | unsigned cost = 0; | |
|
1996 | unsigned s; | |
|
1997 | for (s = 0; s <= max; ++s) { | |
|
1998 | unsigned norm = (unsigned)((256 * count[s]) / total); | |
|
1999 | if (count[s] != 0 && norm == 0) | |
|
2000 | norm = 1; | |
|
2001 | assert(count[s] < total); | |
|
2002 | cost += count[s] * kInverseProbabiltyLog256[norm]; | |
|
2003 | } | |
|
2004 | return cost >> 8; | |
|
2005 | } | |
|
2006 | ||
|
2007 | ||
|
2008 | /** | |
|
2009 | * Returns the cost in bits of encoding the distribution in count using the | |
|
2010 | * table described by norm. The max symbol support by norm is assumed >= max. | |
|
2011 | * norm must be valid for every symbol with non-zero probability in count. | |
|
2012 | */ | |
|
2013 | static size_t ZSTD_crossEntropyCost(short const* norm, unsigned accuracyLog, | |
|
2014 | unsigned const* count, unsigned const max) | |
|
2015 | { | |
|
2016 | unsigned const shift = 8 - accuracyLog; | |
|
2017 | size_t cost = 0; | |
|
2018 | unsigned s; | |
|
2019 | assert(accuracyLog <= 8); | |
|
2020 | for (s = 0; s <= max; ++s) { | |
|
2021 | unsigned const normAcc = norm[s] != -1 ? norm[s] : 1; | |
|
2022 | unsigned const norm256 = normAcc << shift; | |
|
2023 | assert(norm256 > 0); | |
|
2024 | assert(norm256 < 256); | |
|
2025 | cost += count[s] * kInverseProbabiltyLog256[norm256]; | |
|
2026 | } | |
|
2027 | return cost >> 8; | |
|
2028 | } | |
|
2029 | ||
|
2030 | ||
|
2031 | static unsigned ZSTD_getFSEMaxSymbolValue(FSE_CTable const* ctable) { | |
|
2032 | void const* ptr = ctable; | |
|
2033 | U16 const* u16ptr = (U16 const*)ptr; | |
|
2034 | U32 const maxSymbolValue = MEM_read16(u16ptr + 1); | |
|
2035 | return maxSymbolValue; | |
|
2036 | } | |
|
2037 | ||
|
2038 | ||
|
2039 | /** | |
|
2040 | * Returns the cost in bits of encoding the distribution in count using ctable. | |
|
2041 | * Returns an error if ctable cannot represent all the symbols in count. | |
|
2042 | */ | |
|
2043 | static size_t ZSTD_fseBitCost( | |
|
2044 | FSE_CTable const* ctable, | |
|
2045 | unsigned const* count, | |
|
2046 | unsigned const max) | |
|
2047 | { | |
|
2048 | unsigned const kAccuracyLog = 8; | |
|
2049 | size_t cost = 0; | |
|
2050 | unsigned s; | |
|
2051 | FSE_CState_t cstate; | |
|
2052 | FSE_initCState(&cstate, ctable); | |
|
2053 | if (ZSTD_getFSEMaxSymbolValue(ctable) < max) { | |
|
2054 | DEBUGLOG(5, "Repeat FSE_CTable has maxSymbolValue %u < %u", | |
|
2055 | ZSTD_getFSEMaxSymbolValue(ctable), max); | |
|
2056 | return ERROR(GENERIC); | |
|
2057 | } | |
|
2058 | for (s = 0; s <= max; ++s) { | |
|
2059 | unsigned const tableLog = cstate.stateLog; | |
|
2060 | unsigned const badCost = (tableLog + 1) << kAccuracyLog; | |
|
2061 | unsigned const bitCost = FSE_bitCost(cstate.symbolTT, tableLog, s, kAccuracyLog); | |
|
2062 | if (count[s] == 0) | |
|
2063 | continue; | |
|
2064 | if (bitCost >= badCost) { | |
|
2065 | DEBUGLOG(5, "Repeat FSE_CTable has Prob[%u] == 0", s); | |
|
2066 | return ERROR(GENERIC); | |
|
2067 | } | |
|
2068 | cost += count[s] * bitCost; | |
|
2069 | } | |
|
2070 | return cost >> kAccuracyLog; | |
|
2071 | } | |
|
2072 | ||
|
2073 | /** | |
|
2074 | * Returns the cost in bytes of encoding the normalized count header. | |
|
2075 | * Returns an error if any of the helper functions return an error. | |
|
2076 | */ | |
|
2077 | static size_t ZSTD_NCountCost(unsigned const* count, unsigned const max, | |
|
2078 | size_t const nbSeq, unsigned const FSELog) | |
|
2079 | { | |
|
2080 | BYTE wksp[FSE_NCOUNTBOUND]; | |
|
2081 | S16 norm[MaxSeq + 1]; | |
|
2082 | const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); | |
|
2083 | CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq, max)); | |
|
2084 | return FSE_writeNCount(wksp, sizeof(wksp), norm, max, tableLog); | |
|
2085 | } | |
|
2086 | ||
|
2087 | ||
|
2088 | typedef enum { | |
|
2089 | ZSTD_defaultDisallowed = 0, | |
|
2090 | ZSTD_defaultAllowed = 1 | |
|
2091 | } ZSTD_defaultPolicy_e; | |
|
2092 | ||
|
2093 | MEM_STATIC symbolEncodingType_e | |
|
2094 | ZSTD_selectEncodingType( | |
|
2095 | FSE_repeat* repeatMode, unsigned const* count, unsigned const max, | |
|
2096 | size_t const mostFrequent, size_t nbSeq, unsigned const FSELog, | |
|
2097 | FSE_CTable const* prevCTable, | |
|
2098 | short const* defaultNorm, U32 defaultNormLog, | |
|
2099 | ZSTD_defaultPolicy_e const isDefaultAllowed, | |
|
2100 | ZSTD_strategy const strategy) | |
|
1918 | static int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams) | |
|
2101 | 1919 | { |
|
2102 | ZSTD_STATIC_ASSERT(ZSTD_defaultDisallowed == 0 && ZSTD_defaultAllowed != 0); | |
|
2103 | if (mostFrequent == nbSeq) { | |
|
2104 | *repeatMode = FSE_repeat_none; | |
|
2105 | if (isDefaultAllowed && nbSeq <= 2) { | |
|
2106 | /* Prefer set_basic over set_rle when there are 2 or less symbols, | |
|
2107 | * since RLE uses 1 byte, but set_basic uses 5-6 bits per symbol. | |
|
2108 | * If basic encoding isn't possible, always choose RLE. | |
|
2109 | */ | |
|
2110 | DEBUGLOG(5, "Selected set_basic"); | |
|
2111 | return set_basic; | |
|
2112 | } | |
|
2113 | DEBUGLOG(5, "Selected set_rle"); | |
|
2114 | return set_rle; | |
|
2115 | } | |
|
2116 | if (strategy < ZSTD_lazy) { | |
|
2117 | if (isDefaultAllowed) { | |
|
2118 | size_t const staticFse_nbSeq_max = 1000; | |
|
2119 | size_t const mult = 10 - strategy; | |
|
2120 | size_t const baseLog = 3; | |
|
2121 | size_t const dynamicFse_nbSeq_min = (((size_t)1 << defaultNormLog) * mult) >> baseLog; /* 28-36 for offset, 56-72 for lengths */ | |
|
2122 | assert(defaultNormLog >= 5 && defaultNormLog <= 6); /* xx_DEFAULTNORMLOG */ | |
|
2123 | assert(mult <= 9 && mult >= 7); | |
|
2124 | if ( (*repeatMode == FSE_repeat_valid) | |
|
2125 | && (nbSeq < staticFse_nbSeq_max) ) { | |
|
2126 | DEBUGLOG(5, "Selected set_repeat"); | |
|
2127 | return set_repeat; | |
|
2128 | } | |
|
2129 | if ( (nbSeq < dynamicFse_nbSeq_min) | |
|
2130 | || (mostFrequent < (nbSeq >> (defaultNormLog-1))) ) { | |
|
2131 | DEBUGLOG(5, "Selected set_basic"); | |
|
2132 | /* The format allows default tables to be repeated, but it isn't useful. | |
|
2133 | * When using simple heuristics to select encoding type, we don't want | |
|
2134 | * to confuse these tables with dictionaries. When running more careful | |
|
2135 | * analysis, we don't need to waste time checking both repeating tables | |
|
2136 | * and default tables. | |
|
2137 | */ | |
|
2138 | *repeatMode = FSE_repeat_none; | |
|
2139 | return set_basic; | |
|
2140 | } | |
|
2141 | } | |
|
2142 | } else { | |
|
2143 | size_t const basicCost = isDefaultAllowed ? ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, count, max) : ERROR(GENERIC); | |
|
2144 | size_t const repeatCost = *repeatMode != FSE_repeat_none ? ZSTD_fseBitCost(prevCTable, count, max) : ERROR(GENERIC); | |
|
2145 | size_t const NCountCost = ZSTD_NCountCost(count, max, nbSeq, FSELog); | |
|
2146 | size_t const compressedCost = (NCountCost << 3) + ZSTD_entropyCost(count, max, nbSeq); | |
|
2147 | ||
|
2148 | if (isDefaultAllowed) { | |
|
2149 | assert(!ZSTD_isError(basicCost)); | |
|
2150 | assert(!(*repeatMode == FSE_repeat_valid && ZSTD_isError(repeatCost))); | |
|
2151 | } | |
|
2152 | assert(!ZSTD_isError(NCountCost)); | |
|
2153 | assert(compressedCost < ERROR(maxCode)); | |
|
2154 | DEBUGLOG(5, "Estimated bit costs: basic=%u\trepeat=%u\tcompressed=%u", | |
|
2155 | (unsigned)basicCost, (unsigned)repeatCost, (unsigned)compressedCost); | |
|
2156 | if (basicCost <= repeatCost && basicCost <= compressedCost) { | |
|
2157 | DEBUGLOG(5, "Selected set_basic"); | |
|
2158 | assert(isDefaultAllowed); | |
|
2159 | *repeatMode = FSE_repeat_none; | |
|
2160 | return set_basic; | |
|
2161 | } | |
|
2162 | if (repeatCost <= compressedCost) { | |
|
2163 | DEBUGLOG(5, "Selected set_repeat"); | |
|
2164 | assert(!ZSTD_isError(repeatCost)); | |
|
2165 | return set_repeat; | |
|
2166 | } | |
|
2167 | assert(compressedCost < basicCost && compressedCost < repeatCost); | |
|
2168 | } | |
|
2169 | DEBUGLOG(5, "Selected set_compressed"); | |
|
2170 | *repeatMode = FSE_repeat_check; | |
|
2171 | return set_compressed; | |
|
2172 | } | |
|
2173 | ||
|
2174 | MEM_STATIC size_t | |
|
2175 | ZSTD_buildCTable(void* dst, size_t dstCapacity, | |
|
2176 | FSE_CTable* nextCTable, U32 FSELog, symbolEncodingType_e type, | |
|
2177 | unsigned* count, U32 max, | |
|
2178 | const BYTE* codeTable, size_t nbSeq, | |
|
2179 | const S16* defaultNorm, U32 defaultNormLog, U32 defaultMax, | |
|
2180 | const FSE_CTable* prevCTable, size_t prevCTableSize, | |
|
2181 | void* workspace, size_t workspaceSize) | |
|
2182 | { | |
|
2183 | BYTE* op = (BYTE*)dst; | |
|
2184 | const BYTE* const oend = op + dstCapacity; | |
|
2185 | DEBUGLOG(6, "ZSTD_buildCTable (dstCapacity=%u)", (unsigned)dstCapacity); | |
|
2186 | ||
|
2187 | switch (type) { | |
|
2188 | case set_rle: | |
|
2189 | CHECK_F(FSE_buildCTable_rle(nextCTable, (BYTE)max)); | |
|
2190 | if (dstCapacity==0) return ERROR(dstSize_tooSmall); | |
|
2191 | *op = codeTable[0]; | |
|
2192 | return 1; | |
|
2193 | case set_repeat: | |
|
2194 | memcpy(nextCTable, prevCTable, prevCTableSize); | |
|
2195 | return 0; | |
|
2196 | case set_basic: | |
|
2197 | CHECK_F(FSE_buildCTable_wksp(nextCTable, defaultNorm, defaultMax, defaultNormLog, workspace, workspaceSize)); /* note : could be pre-calculated */ | |
|
1920 | switch (cctxParams->literalCompressionMode) { | |
|
1921 | case ZSTD_lcm_huffman: | |
|
2198 | 1922 | return 0; |
|
2199 |
case |
|
|
2200 | S16 norm[MaxSeq + 1]; | |
|
2201 | size_t nbSeq_1 = nbSeq; | |
|
2202 | const U32 tableLog = FSE_optimalTableLog(FSELog, nbSeq, max); | |
|
2203 | if (count[codeTable[nbSeq-1]] > 1) { | |
|
2204 | count[codeTable[nbSeq-1]]--; | |
|
2205 | nbSeq_1--; | |
|
2206 |
|
|
|
2207 | assert(nbSeq_1 > 1); | |
|
2208 | CHECK_F(FSE_normalizeCount(norm, tableLog, count, nbSeq_1, max)); | |
|
2209 | { size_t const NCountSize = FSE_writeNCount(op, oend - op, norm, max, tableLog); /* overflow protected */ | |
|
2210 | if (FSE_isError(NCountSize)) return NCountSize; | |
|
2211 | CHECK_F(FSE_buildCTable_wksp(nextCTable, norm, max, tableLog, workspace, workspaceSize)); | |
|
2212 | return NCountSize; | |
|
2213 | } | |
|
2214 | } | |
|
2215 | default: return assert(0), ERROR(GENERIC); | |
|
2216 | } | |
|
2217 | } | |
|
2218 | ||
|
2219 | FORCE_INLINE_TEMPLATE size_t | |
|
2220 | ZSTD_encodeSequences_body( | |
|
2221 | void* dst, size_t dstCapacity, | |
|
2222 | FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, | |
|
2223 | FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, | |
|
2224 | FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, | |
|
2225 | seqDef const* sequences, size_t nbSeq, int longOffsets) | |
|
2226 | { | |
|
2227 | BIT_CStream_t blockStream; | |
|
2228 | FSE_CState_t stateMatchLength; | |
|
2229 | FSE_CState_t stateOffsetBits; | |
|
2230 | FSE_CState_t stateLitLength; | |
|
2231 | ||
|
2232 | CHECK_E(BIT_initCStream(&blockStream, dst, dstCapacity), dstSize_tooSmall); /* not enough space remaining */ | |
|
2233 | DEBUGLOG(6, "available space for bitstream : %i (dstCapacity=%u)", | |
|
2234 | (int)(blockStream.endPtr - blockStream.startPtr), | |
|
2235 | (unsigned)dstCapacity); | |
|
2236 | ||
|
2237 | /* first symbols */ | |
|
2238 | FSE_initCState2(&stateMatchLength, CTable_MatchLength, mlCodeTable[nbSeq-1]); | |
|
2239 | FSE_initCState2(&stateOffsetBits, CTable_OffsetBits, ofCodeTable[nbSeq-1]); | |
|
2240 | FSE_initCState2(&stateLitLength, CTable_LitLength, llCodeTable[nbSeq-1]); | |
|
2241 | BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]); | |
|
2242 | if (MEM_32bits()) BIT_flushBits(&blockStream); | |
|
2243 | BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]); | |
|
2244 | if (MEM_32bits()) BIT_flushBits(&blockStream); | |
|
2245 | if (longOffsets) { | |
|
2246 | U32 const ofBits = ofCodeTable[nbSeq-1]; | |
|
2247 | int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); | |
|
2248 | if (extraBits) { | |
|
2249 | BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits); | |
|
2250 | BIT_flushBits(&blockStream); | |
|
2251 | } | |
|
2252 | BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits, | |
|
2253 | ofBits - extraBits); | |
|
2254 | } else { | |
|
2255 | BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]); | |
|
2256 | } | |
|
2257 | BIT_flushBits(&blockStream); | |
|
2258 | ||
|
2259 | { size_t n; | |
|
2260 | for (n=nbSeq-2 ; n<nbSeq ; n--) { /* intentional underflow */ | |
|
2261 | BYTE const llCode = llCodeTable[n]; | |
|
2262 | BYTE const ofCode = ofCodeTable[n]; | |
|
2263 | BYTE const mlCode = mlCodeTable[n]; | |
|
2264 | U32 const llBits = LL_bits[llCode]; | |
|
2265 | U32 const ofBits = ofCode; | |
|
2266 | U32 const mlBits = ML_bits[mlCode]; | |
|
2267 | DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u", | |
|
2268 | (unsigned)sequences[n].litLength, | |
|
2269 | (unsigned)sequences[n].matchLength + MINMATCH, | |
|
2270 | (unsigned)sequences[n].offset); | |
|
2271 | /* 32b*/ /* 64b*/ | |
|
2272 | /* (7)*/ /* (7)*/ | |
|
2273 | FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode); /* 15 */ /* 15 */ | |
|
2274 | FSE_encodeSymbol(&blockStream, &stateMatchLength, mlCode); /* 24 */ /* 24 */ | |
|
2275 | if (MEM_32bits()) BIT_flushBits(&blockStream); /* (7)*/ | |
|
2276 | FSE_encodeSymbol(&blockStream, &stateLitLength, llCode); /* 16 */ /* 33 */ | |
|
2277 | if (MEM_32bits() || (ofBits+mlBits+llBits >= 64-7-(LLFSELog+MLFSELog+OffFSELog))) | |
|
2278 | BIT_flushBits(&blockStream); /* (7)*/ | |
|
2279 | BIT_addBits(&blockStream, sequences[n].litLength, llBits); | |
|
2280 | if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream); | |
|
2281 | BIT_addBits(&blockStream, sequences[n].matchLength, mlBits); | |
|
2282 | if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream); | |
|
2283 | if (longOffsets) { | |
|
2284 | int const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1); | |
|
2285 | if (extraBits) { | |
|
2286 | BIT_addBits(&blockStream, sequences[n].offset, extraBits); | |
|
2287 | BIT_flushBits(&blockStream); /* (7)*/ | |
|
2288 | } | |
|
2289 | BIT_addBits(&blockStream, sequences[n].offset >> extraBits, | |
|
2290 | ofBits - extraBits); /* 31 */ | |
|
2291 | } else { | |
|
2292 | BIT_addBits(&blockStream, sequences[n].offset, ofBits); /* 31 */ | |
|
2293 | } | |
|
2294 | BIT_flushBits(&blockStream); /* (7)*/ | |
|
2295 | DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr)); | |
|
2296 | } } | |
|
2297 | ||
|
2298 | DEBUGLOG(6, "ZSTD_encodeSequences: flushing ML state with %u bits", stateMatchLength.stateLog); | |
|
2299 | FSE_flushCState(&blockStream, &stateMatchLength); | |
|
2300 | DEBUGLOG(6, "ZSTD_encodeSequences: flushing Off state with %u bits", stateOffsetBits.stateLog); | |
|
2301 | FSE_flushCState(&blockStream, &stateOffsetBits); | |
|
2302 | DEBUGLOG(6, "ZSTD_encodeSequences: flushing LL state with %u bits", stateLitLength.stateLog); | |
|
2303 | FSE_flushCState(&blockStream, &stateLitLength); | |
|
2304 | ||
|
2305 | { size_t const streamSize = BIT_closeCStream(&blockStream); | |
|
2306 | if (streamSize==0) return ERROR(dstSize_tooSmall); /* not enough space */ | |
|
2307 | return streamSize; | |
|
2308 | } | |
|
2309 | } | |
|
2310 | ||
|
2311 | static size_t | |
|
2312 | ZSTD_encodeSequences_default( | |
|
2313 | void* dst, size_t dstCapacity, | |
|
2314 | FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, | |
|
2315 | FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, | |
|
2316 | FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, | |
|
2317 | seqDef const* sequences, size_t nbSeq, int longOffsets) | |
|
2318 | { | |
|
2319 | return ZSTD_encodeSequences_body(dst, dstCapacity, | |
|
2320 | CTable_MatchLength, mlCodeTable, | |
|
2321 | CTable_OffsetBits, ofCodeTable, | |
|
2322 | CTable_LitLength, llCodeTable, | |
|
2323 | sequences, nbSeq, longOffsets); | |
|
2324 | } | |
|
2325 | ||
|
2326 | ||
|
2327 | #if DYNAMIC_BMI2 | |
|
2328 | ||
|
2329 | static TARGET_ATTRIBUTE("bmi2") size_t | |
|
2330 | ZSTD_encodeSequences_bmi2( | |
|
2331 | void* dst, size_t dstCapacity, | |
|
2332 | FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, | |
|
2333 | FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, | |
|
2334 | FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, | |
|
2335 | seqDef const* sequences, size_t nbSeq, int longOffsets) | |
|
2336 | { | |
|
2337 | return ZSTD_encodeSequences_body(dst, dstCapacity, | |
|
2338 | CTable_MatchLength, mlCodeTable, | |
|
2339 | CTable_OffsetBits, ofCodeTable, | |
|
2340 | CTable_LitLength, llCodeTable, | |
|
2341 | sequences, nbSeq, longOffsets); | |
|
2342 | } | |
|
2343 | ||
|
2344 | #endif | |
|
2345 | ||
|
2346 | static size_t ZSTD_encodeSequences( | |
|
2347 | void* dst, size_t dstCapacity, | |
|
2348 | FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable, | |
|
2349 | FSE_CTable const* CTable_OffsetBits, BYTE const* ofCodeTable, | |
|
2350 | FSE_CTable const* CTable_LitLength, BYTE const* llCodeTable, | |
|
2351 | seqDef const* sequences, size_t nbSeq, int longOffsets, int bmi2) | |
|
2352 | { | |
|
2353 | DEBUGLOG(5, "ZSTD_encodeSequences: dstCapacity = %u", (unsigned)dstCapacity); | |
|
2354 | #if DYNAMIC_BMI2 | |
|
2355 | if (bmi2) { | |
|
2356 | return ZSTD_encodeSequences_bmi2(dst, dstCapacity, | |
|
2357 | CTable_MatchLength, mlCodeTable, | |
|
2358 | CTable_OffsetBits, ofCodeTable, | |
|
2359 | CTable_LitLength, llCodeTable, | |
|
2360 | sequences, nbSeq, longOffsets); | |
|
2361 | } | |
|
2362 | #endif | |
|
2363 | (void)bmi2; | |
|
2364 | return ZSTD_encodeSequences_default(dst, dstCapacity, | |
|
2365 | CTable_MatchLength, mlCodeTable, | |
|
2366 | CTable_OffsetBits, ofCodeTable, | |
|
2367 | CTable_LitLength, llCodeTable, | |
|
2368 | sequences, nbSeq, longOffsets); | |
|
1923 | case ZSTD_lcm_uncompressed: | |
|
1924 | return 1; | |
|
1925 | default: | |
|
1926 | assert(0 /* impossible: pre-validated */); | |
|
1927 | /* fall-through */ | |
|
1928 | case ZSTD_lcm_auto: | |
|
1929 | return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); | |
|
1930 | } | |
|
2369 | 1931 | } |
|
2370 | 1932 | |
|
2371 | 1933 | /* ZSTD_compressSequences_internal(): |
@@ -2393,46 +1955,48 b' ZSTD_compressSequences_internal(seqStore' | |||
|
2393 | 1955 | BYTE* const ostart = (BYTE*)dst; |
|
2394 | 1956 | BYTE* const oend = ostart + dstCapacity; |
|
2395 | 1957 | BYTE* op = ostart; |
|
2396 | size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart; | |
|
1958 | size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart); | |
|
2397 | 1959 | BYTE* seqHead; |
|
2398 | 1960 | BYTE* lastNCount = NULL; |
|
2399 | 1961 | |
|
1962 | DEBUGLOG(5, "ZSTD_compressSequences_internal (nbSeq=%zu)", nbSeq); | |
|
2400 | 1963 | ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog))); |
|
2401 | DEBUGLOG(5, "ZSTD_compressSequences_internal"); | |
|
2402 | 1964 | |
|
2403 | 1965 | /* Compress literals */ |
|
2404 | 1966 | { const BYTE* const literals = seqStorePtr->litStart; |
|
2405 | size_t const litSize = seqStorePtr->lit - literals; | |
|
2406 | int const disableLiteralCompression = (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0); | |
|
1967 | size_t const litSize = (size_t)(seqStorePtr->lit - literals); | |
|
2407 | 1968 | size_t const cSize = ZSTD_compressLiterals( |
|
2408 | 1969 | &prevEntropy->huf, &nextEntropy->huf, |
|
2409 |
cctxParams->cParams.strategy, |
|
|
1970 | cctxParams->cParams.strategy, | |
|
1971 | ZSTD_disableLiteralsCompression(cctxParams), | |
|
2410 | 1972 | op, dstCapacity, |
|
2411 | 1973 | literals, litSize, |
|
2412 | 1974 | workspace, wkspSize, |
|
2413 | 1975 | bmi2); |
|
2414 | if (ZSTD_isError(cSize)) | |
|
2415 | return cSize; | |
|
1976 | FORWARD_IF_ERROR(cSize); | |
|
2416 | 1977 | assert(cSize <= dstCapacity); |
|
2417 | 1978 | op += cSize; |
|
2418 | 1979 | } |
|
2419 | 1980 | |
|
2420 | 1981 | /* Sequences Header */ |
|
2421 |
|
|
|
1982 | RETURN_ERROR_IF((oend-op) < 3 /*max nbSeq Size*/ + 1 /*seqHead*/, | |
|
1983 | dstSize_tooSmall); | |
|
2422 | 1984 | if (nbSeq < 0x7F) |
|
2423 | 1985 | *op++ = (BYTE)nbSeq; |
|
2424 | 1986 | else if (nbSeq < LONGNBSEQ) |
|
2425 | 1987 | op[0] = (BYTE)((nbSeq>>8) + 0x80), op[1] = (BYTE)nbSeq, op+=2; |
|
2426 | 1988 | else |
|
2427 | 1989 | op[0]=0xFF, MEM_writeLE16(op+1, (U16)(nbSeq - LONGNBSEQ)), op+=3; |
|
1990 | assert(op <= oend); | |
|
2428 | 1991 | if (nbSeq==0) { |
|
2429 | 1992 | /* Copy the old tables over as if we repeated them */ |
|
2430 | 1993 | memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse)); |
|
2431 | return op - ostart; | |
|
1994 | return (size_t)(op - ostart); | |
|
2432 | 1995 | } |
|
2433 | 1996 | |
|
2434 | 1997 | /* seqHead : flags for FSE encoding type */ |
|
2435 | 1998 | seqHead = op++; |
|
1999 | assert(op <= oend); | |
|
2436 | 2000 | |
|
2437 | 2001 | /* convert length/distances into codes */ |
|
2438 | 2002 | ZSTD_seqToCodes(seqStorePtr); |
@@ -2448,14 +2012,15 b' ZSTD_compressSequences_internal(seqStore' | |||
|
2448 | 2012 | ZSTD_defaultAllowed, strategy); |
|
2449 | 2013 | assert(set_basic < set_compressed && set_rle < set_compressed); |
|
2450 | 2014 | assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ |
|
2451 | { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, | |
|
2015 | { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype, | |
|
2452 | 2016 | count, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL, |
|
2453 | 2017 | prevEntropy->fse.litlengthCTable, sizeof(prevEntropy->fse.litlengthCTable), |
|
2454 | 2018 | workspace, wkspSize); |
|
2455 | if (ZSTD_isError(countSize)) return countSize; | |
|
2019 | FORWARD_IF_ERROR(countSize); | |
|
2456 | 2020 | if (LLtype == set_compressed) |
|
2457 | 2021 | lastNCount = op; |
|
2458 | 2022 | op += countSize; |
|
2023 | assert(op <= oend); | |
|
2459 | 2024 | } } |
|
2460 | 2025 | /* build CTable for Offsets */ |
|
2461 | 2026 | { unsigned max = MaxOff; |
@@ -2470,14 +2035,15 b' ZSTD_compressSequences_internal(seqStore' | |||
|
2470 | 2035 | OF_defaultNorm, OF_defaultNormLog, |
|
2471 | 2036 | defaultPolicy, strategy); |
|
2472 | 2037 | assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */ |
|
2473 | { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, | |
|
2038 | { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype, | |
|
2474 | 2039 | count, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff, |
|
2475 | 2040 | prevEntropy->fse.offcodeCTable, sizeof(prevEntropy->fse.offcodeCTable), |
|
2476 | 2041 | workspace, wkspSize); |
|
2477 | if (ZSTD_isError(countSize)) return countSize; | |
|
2042 | FORWARD_IF_ERROR(countSize); | |
|
2478 | 2043 | if (Offtype == set_compressed) |
|
2479 | 2044 | lastNCount = op; |
|
2480 | 2045 | op += countSize; |
|
2046 | assert(op <= oend); | |
|
2481 | 2047 | } } |
|
2482 | 2048 | /* build CTable for MatchLengths */ |
|
2483 | 2049 | { unsigned max = MaxML; |
@@ -2490,29 +2056,31 b' ZSTD_compressSequences_internal(seqStore' | |||
|
2490 | 2056 | ML_defaultNorm, ML_defaultNormLog, |
|
2491 | 2057 | ZSTD_defaultAllowed, strategy); |
|
2492 | 2058 | assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */ |
|
2493 | { size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, | |
|
2059 | { size_t const countSize = ZSTD_buildCTable(op, (size_t)(oend - op), CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype, | |
|
2494 | 2060 | count, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML, |
|
2495 | 2061 | prevEntropy->fse.matchlengthCTable, sizeof(prevEntropy->fse.matchlengthCTable), |
|
2496 | 2062 | workspace, wkspSize); |
|
2497 | if (ZSTD_isError(countSize)) return countSize; | |
|
2063 | FORWARD_IF_ERROR(countSize); | |
|
2498 | 2064 | if (MLtype == set_compressed) |
|
2499 | 2065 | lastNCount = op; |
|
2500 | 2066 | op += countSize; |
|
2067 | assert(op <= oend); | |
|
2501 | 2068 | } } |
|
2502 | 2069 | |
|
2503 | 2070 | *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2)); |
|
2504 | 2071 | |
|
2505 | 2072 | { size_t const bitstreamSize = ZSTD_encodeSequences( |
|
2506 | op, oend - op, | |
|
2073 | op, (size_t)(oend - op), | |
|
2507 | 2074 | CTable_MatchLength, mlCodeTable, |
|
2508 | 2075 | CTable_OffsetBits, ofCodeTable, |
|
2509 | 2076 | CTable_LitLength, llCodeTable, |
|
2510 | 2077 | sequences, nbSeq, |
|
2511 | 2078 | longOffsets, bmi2); |
|
2512 | if (ZSTD_isError(bitstreamSize)) return bitstreamSize; | |
|
2079 | FORWARD_IF_ERROR(bitstreamSize); | |
|
2513 | 2080 | op += bitstreamSize; |
|
2081 | assert(op <= oend); | |
|
2514 | 2082 | /* zstd versions <= 1.3.4 mistakenly report corruption when |
|
2515 |
* FSE_readNCount() rec |
|
|
2083 | * FSE_readNCount() receives a buffer < 4 bytes. | |
|
2516 | 2084 | * Fixed by https://github.com/facebook/zstd/pull/1146. |
|
2517 | 2085 | * This can happen when the last set_compressed table present is 2 |
|
2518 | 2086 | * bytes and the bitstream is only one byte. |
@@ -2529,7 +2097,7 b' ZSTD_compressSequences_internal(seqStore' | |||
|
2529 | 2097 | } |
|
2530 | 2098 | |
|
2531 | 2099 | DEBUGLOG(5, "compressed block size : %u", (unsigned)(op - ostart)); |
|
2532 | return op - ostart; | |
|
2100 | return (size_t)(op - ostart); | |
|
2533 | 2101 | } |
|
2534 | 2102 | |
|
2535 | 2103 | MEM_STATIC size_t |
@@ -2552,7 +2120,7 b' ZSTD_compressSequences(seqStore_t* seqSt' | |||
|
2552 | 2120 | */ |
|
2553 | 2121 | if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity)) |
|
2554 | 2122 | return 0; /* block not compressed */ |
|
2555 | if (ZSTD_isError(cSize)) return cSize; | |
|
2123 | FORWARD_IF_ERROR(cSize); | |
|
2556 | 2124 | |
|
2557 | 2125 | /* Check compressibility */ |
|
2558 | 2126 | { size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy); |
@@ -2622,27 +2190,24 b' void ZSTD_resetSeqStore(seqStore_t* ssPt' | |||
|
2622 | 2190 | ssPtr->longLengthID = 0; |
|
2623 | 2191 | } |
|
2624 | 2192 | |
|
2625 | static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, | |
|
2626 | void* dst, size_t dstCapacity, | |
|
2627 | const void* src, size_t srcSize) | |
|
2193 | typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e; | |
|
2194 | ||
|
2195 | static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize) | |
|
2628 | 2196 | { |
|
2629 | 2197 | ZSTD_matchState_t* const ms = &zc->blockState.matchState; |
|
2630 | size_t cSize; | |
|
2631 | DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", | |
|
2632 | (unsigned)dstCapacity, (unsigned)ms->window.dictLimit, (unsigned)ms->nextToUpdate); | |
|
2198 | DEBUGLOG(5, "ZSTD_buildSeqStore (srcSize=%zu)", srcSize); | |
|
2633 | 2199 | assert(srcSize <= ZSTD_BLOCKSIZE_MAX); |
|
2634 | ||
|
2635 | 2200 | /* Assert that we have correctly flushed the ctx params into the ms's copy */ |
|
2636 | 2201 | ZSTD_assertEqualCParams(zc->appliedParams.cParams, ms->cParams); |
|
2637 | ||
|
2638 | 2202 | if (srcSize < MIN_CBLOCK_SIZE+ZSTD_blockHeaderSize+1) { |
|
2639 | 2203 | ZSTD_ldm_skipSequences(&zc->externSeqStore, srcSize, zc->appliedParams.cParams.minMatch); |
|
2640 | cSize = 0; | |
|
2641 | goto out; /* don't even attempt compression below a certain srcSize */ | |
|
2204 | return ZSTDbss_noCompress; /* don't even attempt compression below a certain srcSize */ | |
|
2642 | 2205 | } |
|
2643 | 2206 | ZSTD_resetSeqStore(&(zc->seqStore)); |
|
2644 |
|
|
|
2645 | ||
|
2207 | /* required for optimal parser to read stats from dictionary */ | |
|
2208 | ms->opt.symbolCosts = &zc->blockState.prevCBlock->entropy; | |
|
2209 | /* tell the optimal parser how we expect to compress literals */ | |
|
2210 | ms->opt.literalCompressionMode = zc->appliedParams.literalCompressionMode; | |
|
2646 | 2211 | /* a gap between an attached dict and the current window is not safe, |
|
2647 | 2212 | * they must remain adjacent, |
|
2648 | 2213 | * and when that stops being the case, the dict must be unset */ |
@@ -2679,7 +2244,7 b' static size_t ZSTD_compressBlock_interna' | |||
|
2679 | 2244 | ldmSeqStore.seq = zc->ldmSequences; |
|
2680 | 2245 | ldmSeqStore.capacity = zc->maxNbLdmSequences; |
|
2681 | 2246 | /* Updates ldmSeqStore.size */ |
|
2682 |
|
|
|
2247 | FORWARD_IF_ERROR(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, | |
|
2683 | 2248 | &zc->appliedParams.ldmParams, |
|
2684 | 2249 | src, srcSize)); |
|
2685 | 2250 | /* Updates ldmSeqStore.pos */ |
@@ -2696,6 +2261,22 b' static size_t ZSTD_compressBlock_interna' | |||
|
2696 | 2261 | { const BYTE* const lastLiterals = (const BYTE*)src + srcSize - lastLLSize; |
|
2697 | 2262 | ZSTD_storeLastLiterals(&zc->seqStore, lastLiterals, lastLLSize); |
|
2698 | 2263 | } } |
|
2264 | return ZSTDbss_compress; | |
|
2265 | } | |
|
2266 | ||
|
2267 | static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, | |
|
2268 | void* dst, size_t dstCapacity, | |
|
2269 | const void* src, size_t srcSize) | |
|
2270 | { | |
|
2271 | size_t cSize; | |
|
2272 | DEBUGLOG(5, "ZSTD_compressBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)", | |
|
2273 | (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit, | |
|
2274 | (unsigned)zc->blockState.matchState.nextToUpdate); | |
|
2275 | ||
|
2276 | { const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize); | |
|
2277 | FORWARD_IF_ERROR(bss); | |
|
2278 | if (bss == ZSTDbss_noCompress) { cSize = 0; goto out; } | |
|
2279 | } | |
|
2699 | 2280 | |
|
2700 | 2281 | /* encode sequences and literals */ |
|
2701 | 2282 | cSize = ZSTD_compressSequences(&zc->seqStore, |
@@ -2724,6 +2305,25 b' out:' | |||
|
2724 | 2305 | } |
|
2725 | 2306 | |
|
2726 | 2307 | |
|
2308 | static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms, ZSTD_CCtx_params const* params, void const* ip, void const* iend) | |
|
2309 | { | |
|
2310 | if (ZSTD_window_needOverflowCorrection(ms->window, iend)) { | |
|
2311 | U32 const maxDist = (U32)1 << params->cParams.windowLog; | |
|
2312 | U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy); | |
|
2313 | U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); | |
|
2314 | ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); | |
|
2315 | ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); | |
|
2316 | ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); | |
|
2317 | ZSTD_reduceIndex(ms, params, correction); | |
|
2318 | if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; | |
|
2319 | else ms->nextToUpdate -= correction; | |
|
2320 | /* invalidate dictionaries on overflow correction */ | |
|
2321 | ms->loadedDictEnd = 0; | |
|
2322 | ms->dictMatchState = NULL; | |
|
2323 | } | |
|
2324 | } | |
|
2325 | ||
|
2326 | ||
|
2727 | 2327 | /*! ZSTD_compress_frameChunk() : |
|
2728 | 2328 | * Compress a chunk of data into one or multiple blocks. |
|
2729 | 2329 | * All blocks will be terminated, all input will be consumed. |
@@ -2742,7 +2342,7 b' static size_t ZSTD_compress_frameChunk (' | |||
|
2742 | 2342 | BYTE* const ostart = (BYTE*)dst; |
|
2743 | 2343 | BYTE* op = ostart; |
|
2744 | 2344 | U32 const maxDist = (U32)1 << cctx->appliedParams.cParams.windowLog; |
|
2745 |
assert(cctx->appliedParams.cParams.windowLog <= |
|
|
2345 | assert(cctx->appliedParams.cParams.windowLog <= ZSTD_WINDOWLOG_MAX); | |
|
2746 | 2346 | |
|
2747 | 2347 | DEBUGLOG(5, "ZSTD_compress_frameChunk (blockSize=%u)", (unsigned)blockSize); |
|
2748 | 2348 | if (cctx->appliedParams.fParams.checksumFlag && srcSize) |
@@ -2752,33 +2352,25 b' static size_t ZSTD_compress_frameChunk (' | |||
|
2752 | 2352 | ZSTD_matchState_t* const ms = &cctx->blockState.matchState; |
|
2753 | 2353 | U32 const lastBlock = lastFrameChunk & (blockSize >= remaining); |
|
2754 | 2354 | |
|
2755 |
|
|
|
2756 | return ERROR(dstSize_tooSmall); /* not enough space to store compressed block */ | |
|
2355 | RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize + MIN_CBLOCK_SIZE, | |
|
2356 | dstSize_tooSmall, | |
|
2357 | "not enough space to store compressed block"); | |
|
2757 | 2358 | if (remaining < blockSize) blockSize = remaining; |
|
2758 | 2359 | |
|
2759 |
|
|
|
2760 | U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy); | |
|
2761 | U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip); | |
|
2762 | ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); | |
|
2763 | ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); | |
|
2764 | ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); | |
|
2765 | ZSTD_reduceIndex(cctx, correction); | |
|
2766 | if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; | |
|
2767 | else ms->nextToUpdate -= correction; | |
|
2768 | ms->loadedDictEnd = 0; | |
|
2769 | ms->dictMatchState = NULL; | |
|
2770 | } | |
|
2771 | ZSTD_window_enforceMaxDist(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); | |
|
2360 | ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, ip, ip + blockSize); | |
|
2361 | ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState); | |
|
2362 | ||
|
2363 | /* Ensure hash/chain table insertion resumes no sooner than lowlimit */ | |
|
2772 | 2364 | if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit; |
|
2773 | 2365 | |
|
2774 | 2366 | { size_t cSize = ZSTD_compressBlock_internal(cctx, |
|
2775 | 2367 | op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize, |
|
2776 | 2368 | ip, blockSize); |
|
2777 | if (ZSTD_isError(cSize)) return cSize; | |
|
2369 | FORWARD_IF_ERROR(cSize); | |
|
2778 | 2370 | |
|
2779 | 2371 | if (cSize == 0) { /* block is not compressible */ |
|
2780 | 2372 | cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, blockSize, lastBlock); |
|
2781 | if (ZSTD_isError(cSize)) return cSize; | |
|
2373 | FORWARD_IF_ERROR(cSize); | |
|
2782 | 2374 | } else { |
|
2783 | 2375 | U32 const cBlockHeader24 = lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3); |
|
2784 | 2376 | MEM_writeLE24(op, cBlockHeader24); |
@@ -2796,7 +2388,7 b' static size_t ZSTD_compress_frameChunk (' | |||
|
2796 | 2388 | } } |
|
2797 | 2389 | |
|
2798 | 2390 | if (lastFrameChunk && (op>ostart)) cctx->stage = ZSTDcs_ending; |
|
2799 | return op-ostart; | |
|
2391 | return (size_t)(op-ostart); | |
|
2800 | 2392 | } |
|
2801 | 2393 | |
|
2802 | 2394 | |
@@ -2811,11 +2403,11 b' static size_t ZSTD_writeFrameHeader(void' | |||
|
2811 | 2403 | BYTE const windowLogByte = (BYTE)((params.cParams.windowLog - ZSTD_WINDOWLOG_ABSOLUTEMIN) << 3); |
|
2812 | 2404 | U32 const fcsCode = params.fParams.contentSizeFlag ? |
|
2813 | 2405 | (pledgedSrcSize>=256) + (pledgedSrcSize>=65536+256) + (pledgedSrcSize>=0xFFFFFFFFU) : 0; /* 0-3 */ |
|
2814 | BYTE const frameHeaderDecriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); | |
|
2406 | BYTE const frameHeaderDescriptionByte = (BYTE)(dictIDSizeCode + (checksumFlag<<2) + (singleSegment<<5) + (fcsCode<<6) ); | |
|
2815 | 2407 | size_t pos=0; |
|
2816 | 2408 | |
|
2817 | 2409 | assert(!(params.fParams.contentSizeFlag && pledgedSrcSize == ZSTD_CONTENTSIZE_UNKNOWN)); |
|
2818 |
|
|
|
2410 | RETURN_ERROR_IF(dstCapacity < ZSTD_FRAMEHEADERSIZE_MAX, dstSize_tooSmall); | |
|
2819 | 2411 | DEBUGLOG(4, "ZSTD_writeFrameHeader : dictIDFlag : %u ; dictID : %u ; dictIDSizeCode : %u", |
|
2820 | 2412 | !params.fParams.noDictIDFlag, (unsigned)dictID, (unsigned)dictIDSizeCode); |
|
2821 | 2413 | |
@@ -2823,7 +2415,7 b' static size_t ZSTD_writeFrameHeader(void' | |||
|
2823 | 2415 | MEM_writeLE32(dst, ZSTD_MAGICNUMBER); |
|
2824 | 2416 | pos = 4; |
|
2825 | 2417 | } |
|
2826 | op[pos++] = frameHeaderDecriptionByte; | |
|
2418 | op[pos++] = frameHeaderDescriptionByte; | |
|
2827 | 2419 | if (!singleSegment) op[pos++] = windowLogByte; |
|
2828 | 2420 | switch(dictIDSizeCode) |
|
2829 | 2421 | { |
@@ -2847,11 +2439,11 b' static size_t ZSTD_writeFrameHeader(void' | |||
|
2847 | 2439 | /* ZSTD_writeLastEmptyBlock() : |
|
2848 | 2440 | * output an empty Block with end-of-frame mark to complete a frame |
|
2849 | 2441 | * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) |
|
2850 | * or an error code if `dstCapcity` is too small (<ZSTD_blockHeaderSize) | |
|
2442 | * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize) | |
|
2851 | 2443 | */ |
|
2852 | 2444 | size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity) |
|
2853 | 2445 | { |
|
2854 |
|
|
|
2446 | RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall); | |
|
2855 | 2447 | { U32 const cBlockHeader24 = 1 /*lastBlock*/ + (((U32)bt_raw)<<1); /* 0 size */ |
|
2856 | 2448 | MEM_writeLE24(dst, cBlockHeader24); |
|
2857 | 2449 | return ZSTD_blockHeaderSize; |
@@ -2860,10 +2452,9 b' size_t ZSTD_writeLastEmptyBlock(void* ds' | |||
|
2860 | 2452 | |
|
2861 | 2453 | size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) |
|
2862 | 2454 | { |
|
2863 |
|
|
|
2864 | return ERROR(stage_wrong); | |
|
2865 | if (cctx->appliedParams.ldmParams.enableLdm) | |
|
2866 | return ERROR(parameter_unsupported); | |
|
2455 | RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong); | |
|
2456 | RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm, | |
|
2457 | parameter_unsupported); | |
|
2867 | 2458 | cctx->externSeqStore.seq = seq; |
|
2868 | 2459 | cctx->externSeqStore.size = nbSeq; |
|
2869 | 2460 | cctx->externSeqStore.capacity = nbSeq; |
@@ -2882,12 +2473,14 b' static size_t ZSTD_compressContinue_inte' | |||
|
2882 | 2473 | |
|
2883 | 2474 | DEBUGLOG(5, "ZSTD_compressContinue_internal, stage: %u, srcSize: %u", |
|
2884 | 2475 | cctx->stage, (unsigned)srcSize); |
|
2885 | if (cctx->stage==ZSTDcs_created) return ERROR(stage_wrong); /* missing init (ZSTD_compressBegin) */ | |
|
2476 | RETURN_ERROR_IF(cctx->stage==ZSTDcs_created, stage_wrong, | |
|
2477 | "missing init (ZSTD_compressBegin)"); | |
|
2886 | 2478 | |
|
2887 | 2479 | if (frame && (cctx->stage==ZSTDcs_init)) { |
|
2888 | 2480 | fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, |
|
2889 | 2481 | cctx->pledgedSrcSizePlusOne-1, cctx->dictID); |
|
2890 | if (ZSTD_isError(fhSize)) return fhSize; | |
|
2482 | FORWARD_IF_ERROR(fhSize); | |
|
2483 | assert(fhSize <= dstCapacity); | |
|
2891 | 2484 | dstCapacity -= fhSize; |
|
2892 | 2485 | dst = (char*)dst + fhSize; |
|
2893 | 2486 | cctx->stage = ZSTDcs_ongoing; |
@@ -2904,35 +2497,25 b' static size_t ZSTD_compressContinue_inte' | |||
|
2904 | 2497 | |
|
2905 | 2498 | if (!frame) { |
|
2906 | 2499 | /* overflow check and correction for block mode */ |
|
2907 |
|
|
|
2908 | U32 const cycleLog = ZSTD_cycleLog(cctx->appliedParams.cParams.chainLog, cctx->appliedParams.cParams.strategy); | |
|
2909 | U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, 1 << cctx->appliedParams.cParams.windowLog, src); | |
|
2910 | ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30); | |
|
2911 | ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30); | |
|
2912 | ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX <= 31); | |
|
2913 | ZSTD_reduceIndex(cctx, correction); | |
|
2914 | if (ms->nextToUpdate < correction) ms->nextToUpdate = 0; | |
|
2915 | else ms->nextToUpdate -= correction; | |
|
2916 | ms->loadedDictEnd = 0; | |
|
2917 | ms->dictMatchState = NULL; | |
|
2918 | } | |
|
2500 | ZSTD_overflowCorrectIfNeeded(ms, &cctx->appliedParams, src, (BYTE const*)src + srcSize); | |
|
2919 | 2501 | } |
|
2920 | 2502 | |
|
2921 | 2503 | DEBUGLOG(5, "ZSTD_compressContinue_internal (blockSize=%u)", (unsigned)cctx->blockSize); |
|
2922 | 2504 | { size_t const cSize = frame ? |
|
2923 | 2505 | ZSTD_compress_frameChunk (cctx, dst, dstCapacity, src, srcSize, lastFrameChunk) : |
|
2924 | 2506 | ZSTD_compressBlock_internal (cctx, dst, dstCapacity, src, srcSize); |
|
2925 | if (ZSTD_isError(cSize)) return cSize; | |
|
2507 | FORWARD_IF_ERROR(cSize); | |
|
2926 | 2508 | cctx->consumedSrcSize += srcSize; |
|
2927 | 2509 | cctx->producedCSize += (cSize + fhSize); |
|
2928 | 2510 | assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); |
|
2929 | 2511 | if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ |
|
2930 | 2512 | ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); |
|
2931 | if (cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne) { | |
|
2932 | DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize >= %u", | |
|
2933 | (unsigned)cctx->pledgedSrcSizePlusOne-1, (unsigned)cctx->consumedSrcSize); | |
|
2934 | return ERROR(srcSize_wrong); | |
|
2935 | } | |
|
2513 | RETURN_ERROR_IF( | |
|
2514 | cctx->consumedSrcSize+1 > cctx->pledgedSrcSizePlusOne, | |
|
2515 | srcSize_wrong, | |
|
2516 | "error : pledgedSrcSize = %u, while realSrcSize >= %u", | |
|
2517 | (unsigned)cctx->pledgedSrcSizePlusOne-1, | |
|
2518 | (unsigned)cctx->consumedSrcSize); | |
|
2936 | 2519 | } |
|
2937 | 2520 | return cSize + fhSize; |
|
2938 | 2521 | } |
@@ -2956,8 +2539,9 b' size_t ZSTD_getBlockSize(const ZSTD_CCtx' | |||
|
2956 | 2539 | |
|
2957 | 2540 | size_t ZSTD_compressBlock(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) |
|
2958 | 2541 | { |
|
2959 | size_t const blockSizeMax = ZSTD_getBlockSize(cctx); | |
|
2960 | if (srcSize > blockSizeMax) return ERROR(srcSize_wrong); | |
|
2542 | DEBUGLOG(5, "ZSTD_compressBlock: srcSize = %u", (unsigned)srcSize); | |
|
2543 | { size_t const blockSizeMax = ZSTD_getBlockSize(cctx); | |
|
2544 | RETURN_ERROR_IF(srcSize > blockSizeMax, srcSize_wrong); } | |
|
2961 | 2545 | |
|
2962 | 2546 | return ZSTD_compressContinue_internal(cctx, dst, dstCapacity, src, srcSize, 0 /* frame mode */, 0 /* last chunk */); |
|
2963 | 2547 | } |
@@ -2970,7 +2554,7 b' static size_t ZSTD_loadDictionaryContent' | |||
|
2970 | 2554 | const void* src, size_t srcSize, |
|
2971 | 2555 | ZSTD_dictTableLoadMethod_e dtlm) |
|
2972 | 2556 | { |
|
2973 |
const BYTE* |
|
|
2557 | const BYTE* ip = (const BYTE*) src; | |
|
2974 | 2558 | const BYTE* const iend = ip + srcSize; |
|
2975 | 2559 | |
|
2976 | 2560 | ZSTD_window_update(&ms->window, src, srcSize); |
@@ -2981,34 +2565,44 b' static size_t ZSTD_loadDictionaryContent' | |||
|
2981 | 2565 | |
|
2982 | 2566 | if (srcSize <= HASH_READ_SIZE) return 0; |
|
2983 | 2567 | |
|
2568 | while (iend - ip > HASH_READ_SIZE) { | |
|
2569 | size_t const remaining = (size_t)(iend - ip); | |
|
2570 | size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX); | |
|
2571 | const BYTE* const ichunk = ip + chunk; | |
|
2572 | ||
|
2573 | ZSTD_overflowCorrectIfNeeded(ms, params, ip, ichunk); | |
|
2574 | ||
|
2984 | 2575 | switch(params->cParams.strategy) |
|
2985 | 2576 | { |
|
2986 | 2577 | case ZSTD_fast: |
|
2987 |
ZSTD_fillHashTable(ms, i |
|
|
2578 | ZSTD_fillHashTable(ms, ichunk, dtlm); | |
|
2988 | 2579 | break; |
|
2989 | 2580 | case ZSTD_dfast: |
|
2990 |
ZSTD_fillDoubleHashTable(ms, i |
|
|
2581 | ZSTD_fillDoubleHashTable(ms, ichunk, dtlm); | |
|
2991 | 2582 | break; |
|
2992 | 2583 | |
|
2993 | 2584 | case ZSTD_greedy: |
|
2994 | 2585 | case ZSTD_lazy: |
|
2995 | 2586 | case ZSTD_lazy2: |
|
2996 |
if ( |
|
|
2997 |
ZSTD_insertAndFindFirstIndex(ms, i |
|
|
2587 | if (chunk >= HASH_READ_SIZE) | |
|
2588 | ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE); | |
|
2998 | 2589 | break; |
|
2999 | 2590 | |
|
3000 | 2591 | case ZSTD_btlazy2: /* we want the dictionary table fully sorted */ |
|
3001 | 2592 | case ZSTD_btopt: |
|
3002 | 2593 | case ZSTD_btultra: |
|
3003 | 2594 | case ZSTD_btultra2: |
|
3004 |
if ( |
|
|
3005 |
ZSTD_updateTree(ms, i |
|
|
2595 | if (chunk >= HASH_READ_SIZE) | |
|
2596 | ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk); | |
|
3006 | 2597 | break; |
|
3007 | 2598 | |
|
3008 | 2599 | default: |
|
3009 | 2600 | assert(0); /* not possible : not a valid strategy id */ |
|
3010 | 2601 | } |
|
3011 | 2602 | |
|
2603 | ip = ichunk; | |
|
2604 | } | |
|
2605 | ||
|
3012 | 2606 | ms->nextToUpdate = (U32)(iend - ms->window.base); |
|
3013 | 2607 | return 0; |
|
3014 | 2608 | } |
@@ -3020,9 +2614,9 b' static size_t ZSTD_loadDictionaryContent' | |||
|
3020 | 2614 | NOTE: This behavior is not standard and could be improved in the future. */ |
|
3021 | 2615 | static size_t ZSTD_checkDictNCount(short* normalizedCounter, unsigned dictMaxSymbolValue, unsigned maxSymbolValue) { |
|
3022 | 2616 | U32 s; |
|
3023 |
|
|
|
2617 | RETURN_ERROR_IF(dictMaxSymbolValue < maxSymbolValue, dictionary_corrupted); | |
|
3024 | 2618 | for (s = 0; s <= maxSymbolValue; ++s) { |
|
3025 |
|
|
|
2619 | RETURN_ERROR_IF(normalizedCounter[s] == 0, dictionary_corrupted); | |
|
3026 | 2620 | } |
|
3027 | 2621 | return 0; |
|
3028 | 2622 | } |
@@ -3060,20 +2654,21 b' static size_t ZSTD_loadZstdDictionary(ZS' | |||
|
3060 | 2654 | |
|
3061 | 2655 | { unsigned maxSymbolValue = 255; |
|
3062 | 2656 | size_t const hufHeaderSize = HUF_readCTable((HUF_CElt*)bs->entropy.huf.CTable, &maxSymbolValue, dictPtr, dictEnd-dictPtr); |
|
3063 |
|
|
|
3064 |
|
|
|
2657 | RETURN_ERROR_IF(HUF_isError(hufHeaderSize), dictionary_corrupted); | |
|
2658 | RETURN_ERROR_IF(maxSymbolValue < 255, dictionary_corrupted); | |
|
3065 | 2659 | dictPtr += hufHeaderSize; |
|
3066 | 2660 | } |
|
3067 | 2661 | |
|
3068 | 2662 | { unsigned offcodeLog; |
|
3069 | 2663 | size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); |
|
3070 |
|
|
|
3071 |
|
|
|
2664 | RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted); | |
|
2665 | RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted); | |
|
3072 | 2666 | /* Defer checking offcodeMaxValue because we need to know the size of the dictionary content */ |
|
3073 | 2667 | /* fill all offset symbols to avoid garbage at end of table */ |
|
3074 | CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.offcodeCTable, | |
|
2668 | RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( | |
|
2669 | bs->entropy.fse.offcodeCTable, | |
|
3075 | 2670 | offcodeNCount, MaxOff, offcodeLog, |
|
3076 |
|
|
|
2671 | workspace, HUF_WORKSPACE_SIZE)), | |
|
3077 | 2672 | dictionary_corrupted); |
|
3078 | 2673 | dictPtr += offcodeHeaderSize; |
|
3079 | 2674 | } |
@@ -3081,13 +2676,14 b' static size_t ZSTD_loadZstdDictionary(ZS' | |||
|
3081 | 2676 | { short matchlengthNCount[MaxML+1]; |
|
3082 | 2677 | unsigned matchlengthMaxValue = MaxML, matchlengthLog; |
|
3083 | 2678 | size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); |
|
3084 |
|
|
|
3085 |
|
|
|
2679 | RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted); | |
|
2680 | RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted); | |
|
3086 | 2681 | /* Every match length code must have non-zero probability */ |
|
3087 |
|
|
|
3088 | CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.matchlengthCTable, | |
|
2682 | FORWARD_IF_ERROR( ZSTD_checkDictNCount(matchlengthNCount, matchlengthMaxValue, MaxML)); | |
|
2683 | RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( | |
|
2684 | bs->entropy.fse.matchlengthCTable, | |
|
3089 | 2685 | matchlengthNCount, matchlengthMaxValue, matchlengthLog, |
|
3090 |
|
|
|
2686 | workspace, HUF_WORKSPACE_SIZE)), | |
|
3091 | 2687 | dictionary_corrupted); |
|
3092 | 2688 | dictPtr += matchlengthHeaderSize; |
|
3093 | 2689 | } |
@@ -3095,18 +2691,19 b' static size_t ZSTD_loadZstdDictionary(ZS' | |||
|
3095 | 2691 | { short litlengthNCount[MaxLL+1]; |
|
3096 | 2692 | unsigned litlengthMaxValue = MaxLL, litlengthLog; |
|
3097 | 2693 | size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); |
|
3098 |
|
|
|
3099 |
|
|
|
2694 | RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted); | |
|
2695 | RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted); | |
|
3100 | 2696 | /* Every literal length code must have non-zero probability */ |
|
3101 |
|
|
|
3102 | CHECK_E( FSE_buildCTable_wksp(bs->entropy.fse.litlengthCTable, | |
|
2697 | FORWARD_IF_ERROR( ZSTD_checkDictNCount(litlengthNCount, litlengthMaxValue, MaxLL)); | |
|
2698 | RETURN_ERROR_IF(FSE_isError(FSE_buildCTable_wksp( | |
|
2699 | bs->entropy.fse.litlengthCTable, | |
|
3103 | 2700 | litlengthNCount, litlengthMaxValue, litlengthLog, |
|
3104 |
|
|
|
2701 | workspace, HUF_WORKSPACE_SIZE)), | |
|
3105 | 2702 | dictionary_corrupted); |
|
3106 | 2703 | dictPtr += litlengthHeaderSize; |
|
3107 | 2704 | } |
|
3108 | 2705 | |
|
3109 |
|
|
|
2706 | RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted); | |
|
3110 | 2707 | bs->rep[0] = MEM_readLE32(dictPtr+0); |
|
3111 | 2708 | bs->rep[1] = MEM_readLE32(dictPtr+4); |
|
3112 | 2709 | bs->rep[2] = MEM_readLE32(dictPtr+8); |
@@ -3119,19 +2716,19 b' static size_t ZSTD_loadZstdDictionary(ZS' | |||
|
3119 | 2716 | offcodeMax = ZSTD_highbit32(maxOffset); /* Calculate minimum offset code required to represent maxOffset */ |
|
3120 | 2717 | } |
|
3121 | 2718 | /* All offset values <= dictContentSize + 128 KB must be representable */ |
|
3122 |
|
|
|
2719 | FORWARD_IF_ERROR(ZSTD_checkDictNCount(offcodeNCount, offcodeMaxValue, MIN(offcodeMax, MaxOff))); | |
|
3123 | 2720 | /* All repCodes must be <= dictContentSize and != 0*/ |
|
3124 | 2721 | { U32 u; |
|
3125 | 2722 | for (u=0; u<3; u++) { |
|
3126 |
|
|
|
3127 |
|
|
|
2723 | RETURN_ERROR_IF(bs->rep[u] == 0, dictionary_corrupted); | |
|
2724 | RETURN_ERROR_IF(bs->rep[u] > dictContentSize, dictionary_corrupted); | |
|
3128 | 2725 | } } |
|
3129 | 2726 | |
|
3130 | 2727 | bs->entropy.huf.repeatMode = HUF_repeat_valid; |
|
3131 | 2728 | bs->entropy.fse.offcode_repeatMode = FSE_repeat_valid; |
|
3132 | 2729 | bs->entropy.fse.matchlength_repeatMode = FSE_repeat_valid; |
|
3133 | 2730 | bs->entropy.fse.litlength_repeatMode = FSE_repeat_valid; |
|
3134 |
|
|
|
2731 | FORWARD_IF_ERROR(ZSTD_loadDictionaryContent(ms, params, dictPtr, dictContentSize, dtlm)); | |
|
3135 | 2732 | return dictID; |
|
3136 | 2733 | } |
|
3137 | 2734 | } |
@@ -3161,8 +2758,7 b' ZSTD_compress_insertDictionary(ZSTD_comp' | |||
|
3161 | 2758 | DEBUGLOG(4, "raw content dictionary detected"); |
|
3162 | 2759 | return ZSTD_loadDictionaryContent(ms, params, dict, dictSize, dtlm); |
|
3163 | 2760 | } |
|
3164 |
|
|
|
3165 | return ERROR(dictionary_wrong); | |
|
2761 | RETURN_ERROR_IF(dictContentType == ZSTD_dct_fullDict, dictionary_wrong); | |
|
3166 | 2762 | assert(0); /* impossible */ |
|
3167 | 2763 | } |
|
3168 | 2764 | |
@@ -3189,14 +2785,13 b' static size_t ZSTD_compressBegin_interna' | |||
|
3189 | 2785 | return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff); |
|
3190 | 2786 | } |
|
3191 | 2787 | |
|
3192 |
|
|
|
2788 | FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize, | |
|
3193 | 2789 | ZSTDcrp_continue, zbuff) ); |
|
3194 | { | |
|
3195 | size_t const dictID = ZSTD_compress_insertDictionary( | |
|
2790 | { size_t const dictID = ZSTD_compress_insertDictionary( | |
|
3196 | 2791 | cctx->blockState.prevCBlock, &cctx->blockState.matchState, |
|
3197 | 2792 | ¶ms, dict, dictSize, dictContentType, dtlm, cctx->entropyWorkspace); |
|
3198 | if (ZSTD_isError(dictID)) return dictID; | |
|
3199 |
assert(dictID <= |
|
|
2793 | FORWARD_IF_ERROR(dictID); | |
|
2794 | assert(dictID <= UINT_MAX); | |
|
3200 | 2795 | cctx->dictID = (U32)dictID; |
|
3201 | 2796 | } |
|
3202 | 2797 | return 0; |
@@ -3212,7 +2807,7 b' size_t ZSTD_compressBegin_advanced_inter' | |||
|
3212 | 2807 | { |
|
3213 | 2808 | DEBUGLOG(4, "ZSTD_compressBegin_advanced_internal: wlog=%u", params.cParams.windowLog); |
|
3214 | 2809 | /* compression parameters verification and optimization */ |
|
3215 |
|
|
|
2810 | FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); | |
|
3216 | 2811 | return ZSTD_compressBegin_internal(cctx, |
|
3217 | 2812 | dict, dictSize, dictContentType, dtlm, |
|
3218 | 2813 | cdict, |
@@ -3260,12 +2855,12 b' static size_t ZSTD_writeEpilogue(ZSTD_CC' | |||
|
3260 | 2855 | size_t fhSize = 0; |
|
3261 | 2856 | |
|
3262 | 2857 | DEBUGLOG(4, "ZSTD_writeEpilogue"); |
|
3263 |
|
|
|
2858 | RETURN_ERROR_IF(cctx->stage == ZSTDcs_created, stage_wrong, "init missing"); | |
|
3264 | 2859 | |
|
3265 | 2860 | /* special case : empty frame */ |
|
3266 | 2861 | if (cctx->stage == ZSTDcs_init) { |
|
3267 | 2862 | fhSize = ZSTD_writeFrameHeader(dst, dstCapacity, cctx->appliedParams, 0, 0); |
|
3268 | if (ZSTD_isError(fhSize)) return fhSize; | |
|
2863 | FORWARD_IF_ERROR(fhSize); | |
|
3269 | 2864 | dstCapacity -= fhSize; |
|
3270 | 2865 | op += fhSize; |
|
3271 | 2866 | cctx->stage = ZSTDcs_ongoing; |
@@ -3274,7 +2869,7 b' static size_t ZSTD_writeEpilogue(ZSTD_CC' | |||
|
3274 | 2869 | if (cctx->stage != ZSTDcs_ending) { |
|
3275 | 2870 | /* write one last empty block, make it the "last" block */ |
|
3276 | 2871 | U32 const cBlockHeader24 = 1 /* last block */ + (((U32)bt_raw)<<1) + 0; |
|
3277 |
|
|
|
2872 | RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall); | |
|
3278 | 2873 | MEM_writeLE32(op, cBlockHeader24); |
|
3279 | 2874 | op += ZSTD_blockHeaderSize; |
|
3280 | 2875 | dstCapacity -= ZSTD_blockHeaderSize; |
@@ -3282,7 +2877,7 b' static size_t ZSTD_writeEpilogue(ZSTD_CC' | |||
|
3282 | 2877 | |
|
3283 | 2878 | if (cctx->appliedParams.fParams.checksumFlag) { |
|
3284 | 2879 | U32 const checksum = (U32) XXH64_digest(&cctx->xxhState); |
|
3285 |
|
|
|
2880 | RETURN_ERROR_IF(dstCapacity<4, dstSize_tooSmall); | |
|
3286 | 2881 | DEBUGLOG(4, "ZSTD_writeEpilogue: write checksum : %08X", (unsigned)checksum); |
|
3287 | 2882 | MEM_writeLE32(op, checksum); |
|
3288 | 2883 | op += 4; |
@@ -3300,18 +2895,20 b' size_t ZSTD_compressEnd (ZSTD_CCtx* cctx' | |||
|
3300 | 2895 | size_t const cSize = ZSTD_compressContinue_internal(cctx, |
|
3301 | 2896 | dst, dstCapacity, src, srcSize, |
|
3302 | 2897 | 1 /* frame mode */, 1 /* last chunk */); |
|
3303 | if (ZSTD_isError(cSize)) return cSize; | |
|
2898 | FORWARD_IF_ERROR(cSize); | |
|
3304 | 2899 | endResult = ZSTD_writeEpilogue(cctx, (char*)dst + cSize, dstCapacity-cSize); |
|
3305 | if (ZSTD_isError(endResult)) return endResult; | |
|
2900 | FORWARD_IF_ERROR(endResult); | |
|
3306 | 2901 | assert(!(cctx->appliedParams.fParams.contentSizeFlag && cctx->pledgedSrcSizePlusOne == 0)); |
|
3307 | 2902 | if (cctx->pledgedSrcSizePlusOne != 0) { /* control src size */ |
|
3308 | 2903 | ZSTD_STATIC_ASSERT(ZSTD_CONTENTSIZE_UNKNOWN == (unsigned long long)-1); |
|
3309 | 2904 | DEBUGLOG(4, "end of frame : controlling src size"); |
|
3310 | if (cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1) { | |
|
3311 | DEBUGLOG(4, "error : pledgedSrcSize = %u, while realSrcSize = %u", | |
|
3312 | (unsigned)cctx->pledgedSrcSizePlusOne-1, (unsigned)cctx->consumedSrcSize); | |
|
3313 | return ERROR(srcSize_wrong); | |
|
3314 | } } | |
|
2905 | RETURN_ERROR_IF( | |
|
2906 | cctx->pledgedSrcSizePlusOne != cctx->consumedSrcSize+1, | |
|
2907 | srcSize_wrong, | |
|
2908 | "error : pledgedSrcSize = %u, while realSrcSize = %u", | |
|
2909 | (unsigned)cctx->pledgedSrcSizePlusOne-1, | |
|
2910 | (unsigned)cctx->consumedSrcSize); | |
|
2911 | } | |
|
3315 | 2912 | return cSize + endResult; |
|
3316 | 2913 | } |
|
3317 | 2914 | |
@@ -3339,7 +2936,7 b' size_t ZSTD_compress_advanced (ZSTD_CCtx' | |||
|
3339 | 2936 | ZSTD_parameters params) |
|
3340 | 2937 | { |
|
3341 | 2938 | DEBUGLOG(4, "ZSTD_compress_advanced"); |
|
3342 |
|
|
|
2939 | FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams)); | |
|
3343 | 2940 | return ZSTD_compress_internal(cctx, |
|
3344 | 2941 | dst, dstCapacity, |
|
3345 | 2942 | src, srcSize, |
@@ -3356,7 +2953,7 b' size_t ZSTD_compress_advanced_internal(' | |||
|
3356 | 2953 | ZSTD_CCtx_params params) |
|
3357 | 2954 | { |
|
3358 | 2955 | DEBUGLOG(4, "ZSTD_compress_advanced_internal (srcSize:%u)", (unsigned)srcSize); |
|
3359 |
|
|
|
2956 | FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, | |
|
3360 | 2957 | dict, dictSize, ZSTD_dct_auto, ZSTD_dtlm_fast, NULL, |
|
3361 | 2958 | params, srcSize, ZSTDb_not_buffered) ); |
|
3362 | 2959 | return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); |
@@ -3440,17 +3037,17 b' static size_t ZSTD_initCDict_internal(' | |||
|
3440 | 3037 | void* const internalBuffer = ZSTD_malloc(dictSize, cdict->customMem); |
|
3441 | 3038 | cdict->dictBuffer = internalBuffer; |
|
3442 | 3039 | cdict->dictContent = internalBuffer; |
|
3443 |
|
|
|
3040 | RETURN_ERROR_IF(!internalBuffer, memory_allocation); | |
|
3444 | 3041 | memcpy(internalBuffer, dictBuffer, dictSize); |
|
3445 | 3042 | } |
|
3446 | 3043 | cdict->dictContentSize = dictSize; |
|
3447 | 3044 | |
|
3448 | 3045 | /* Reset the state to no dictionary */ |
|
3449 | 3046 | ZSTD_reset_compressedBlockState(&cdict->cBlockState); |
|
3450 | { void* const end = ZSTD_reset_matchState( | |
|
3451 | &cdict->matchState, | |
|
3047 | { void* const end = ZSTD_reset_matchState(&cdict->matchState, | |
|
3452 | 3048 | (U32*)cdict->workspace + HUF_WORKSPACE_SIZE_U32, |
|
3453 | &cParams, ZSTDcrp_continue, /* forCCtx */ 0); | |
|
3049 | &cParams, | |
|
3050 | ZSTDcrp_continue, ZSTD_resetTarget_CDict); | |
|
3454 | 3051 | assert(end == (char*)cdict->workspace + cdict->workspaceSize); |
|
3455 | 3052 | (void)end; |
|
3456 | 3053 | } |
@@ -3466,7 +3063,7 b' static size_t ZSTD_initCDict_internal(' | |||
|
3466 | 3063 | &cdict->cBlockState, &cdict->matchState, ¶ms, |
|
3467 | 3064 | cdict->dictContent, cdict->dictContentSize, |
|
3468 | 3065 | dictContentType, ZSTD_dtlm_full, cdict->workspace); |
|
3469 | if (ZSTD_isError(dictID)) return dictID; | |
|
3066 | FORWARD_IF_ERROR(dictID); | |
|
3470 | 3067 | assert(dictID <= (size_t)(U32)-1); |
|
3471 | 3068 | cdict->dictID = (U32)dictID; |
|
3472 | 3069 | } |
@@ -3596,7 +3193,7 b' size_t ZSTD_compressBegin_usingCDict_adv' | |||
|
3596 | 3193 | ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize) |
|
3597 | 3194 | { |
|
3598 | 3195 | DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced"); |
|
3599 |
|
|
|
3196 | RETURN_ERROR_IF(cdict==NULL, dictionary_wrong); | |
|
3600 | 3197 | { ZSTD_CCtx_params params = cctx->requestedParams; |
|
3601 | 3198 | params.cParams = ZSTD_getCParamsFromCDict(cdict); |
|
3602 | 3199 | /* Increase window log to fit the entire dictionary and source if the |
@@ -3632,7 +3229,7 b' size_t ZSTD_compress_usingCDict_advanced' | |||
|
3632 | 3229 | const void* src, size_t srcSize, |
|
3633 | 3230 | const ZSTD_CDict* cdict, ZSTD_frameParameters fParams) |
|
3634 | 3231 | { |
|
3635 |
|
|
|
3232 | FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize)); /* will check if cdict != NULL */ | |
|
3636 | 3233 | return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize); |
|
3637 | 3234 | } |
|
3638 | 3235 | |
@@ -3700,7 +3297,7 b' static size_t ZSTD_resetCStream_internal' | |||
|
3700 | 3297 | assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); |
|
3701 | 3298 | assert(!((dict) && (cdict))); /* either dict or cdict, not both */ |
|
3702 | 3299 | |
|
3703 |
|
|
|
3300 | FORWARD_IF_ERROR( ZSTD_compressBegin_internal(cctx, | |
|
3704 | 3301 | dict, dictSize, dictContentType, ZSTD_dtlm_fast, |
|
3705 | 3302 | cdict, |
|
3706 | 3303 | params, pledgedSrcSize, |
@@ -3718,13 +3315,17 b' static size_t ZSTD_resetCStream_internal' | |||
|
3718 | 3315 | |
|
3719 | 3316 | /* ZSTD_resetCStream(): |
|
3720 | 3317 | * pledgedSrcSize == 0 means "unknown" */ |
|
3721 |
size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long p |
|
|
3318 | size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pss) | |
|
3722 | 3319 | { |
|
3723 | ZSTD_CCtx_params params = zcs->requestedParams; | |
|
3320 | /* temporary : 0 interpreted as "unknown" during transition period. | |
|
3321 | * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. | |
|
3322 | * 0 will be interpreted as "empty" in the future. | |
|
3323 | */ | |
|
3324 | U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; | |
|
3724 | 3325 | DEBUGLOG(4, "ZSTD_resetCStream: pledgedSrcSize = %u", (unsigned)pledgedSrcSize); |
|
3725 | if (pledgedSrcSize==0) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; | |
|
3726 | params.fParams.contentSizeFlag = 1; | |
|
3727 | return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dct_auto, zcs->cdict, params, pledgedSrcSize); | |
|
3326 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); | |
|
3327 | FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); | |
|
3328 | return 0; | |
|
3728 | 3329 | } |
|
3729 | 3330 | |
|
3730 | 3331 | /*! ZSTD_initCStream_internal() : |
@@ -3736,32 +3337,18 b' size_t ZSTD_initCStream_internal(ZSTD_CS' | |||
|
3736 | 3337 | ZSTD_CCtx_params params, unsigned long long pledgedSrcSize) |
|
3737 | 3338 | { |
|
3738 | 3339 | DEBUGLOG(4, "ZSTD_initCStream_internal"); |
|
3739 | params.cParams = ZSTD_getCParamsFromCCtxParams(¶ms, pledgedSrcSize, dictSize); | |
|
3340 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); | |
|
3341 | FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); | |
|
3740 | 3342 | assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams))); |
|
3343 | zcs->requestedParams = params; | |
|
3741 | 3344 | assert(!((dict) && (cdict))); /* either dict or cdict, not both */ |
|
3742 | ||
|
3743 | if (dict && dictSize >= 8) { | |
|
3744 | DEBUGLOG(4, "loading dictionary of size %u", (unsigned)dictSize); | |
|
3745 | if (zcs->staticSize) { /* static CCtx : never uses malloc */ | |
|
3746 | /* incompatible with internal cdict creation */ | |
|
3747 | return ERROR(memory_allocation); | |
|
3748 | } | |
|
3749 | ZSTD_freeCDict(zcs->cdictLocal); | |
|
3750 | zcs->cdictLocal = ZSTD_createCDict_advanced(dict, dictSize, | |
|
3751 | ZSTD_dlm_byCopy, ZSTD_dct_auto, | |
|
3752 | params.cParams, zcs->customMem); | |
|
3753 | zcs->cdict = zcs->cdictLocal; | |
|
3754 | if (zcs->cdictLocal == NULL) return ERROR(memory_allocation); | |
|
3345 | if (dict) { | |
|
3346 | FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); | |
|
3755 | 3347 | } else { |
|
3756 | if (cdict) { | |
|
3757 | params.cParams = ZSTD_getCParamsFromCDict(cdict); /* cParams are enforced from cdict; it includes windowLog */ | |
|
3758 |
|
|
|
3759 | ZSTD_freeCDict(zcs->cdictLocal); | |
|
3760 | zcs->cdictLocal = NULL; | |
|
3761 | zcs->cdict = cdict; | |
|
3762 | } | |
|
3763 | ||
|
3764 | return ZSTD_resetCStream_internal(zcs, NULL, 0, ZSTD_dct_auto, zcs->cdict, params, pledgedSrcSize); | |
|
3348 | /* Dictionary is cleared if !cdict */ | |
|
3349 | FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); | |
|
3350 | } | |
|
3351 | return 0; | |
|
3765 | 3352 | } |
|
3766 | 3353 | |
|
3767 | 3354 | /* ZSTD_initCStream_usingCDict_advanced() : |
@@ -3772,22 +3359,20 b' size_t ZSTD_initCStream_usingCDict_advan' | |||
|
3772 | 3359 | unsigned long long pledgedSrcSize) |
|
3773 | 3360 | { |
|
3774 | 3361 | DEBUGLOG(4, "ZSTD_initCStream_usingCDict_advanced"); |
|
3775 | if (!cdict) return ERROR(dictionary_wrong); /* cannot handle NULL cdict (does not know what to do) */ | |
|
3776 | { ZSTD_CCtx_params params = zcs->requestedParams; | |
|
3777 | params.cParams = ZSTD_getCParamsFromCDict(cdict); | |
|
3778 | params.fParams = fParams; | |
|
3779 | return ZSTD_initCStream_internal(zcs, | |
|
3780 | NULL, 0, cdict, | |
|
3781 | params, pledgedSrcSize); | |
|
3782 | } | |
|
3362 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); | |
|
3363 | FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); | |
|
3364 | zcs->requestedParams.fParams = fParams; | |
|
3365 | FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); | |
|
3366 | return 0; | |
|
3783 | 3367 | } |
|
3784 | 3368 | |
|
3785 | 3369 | /* note : cdict must outlive compression session */ |
|
3786 | 3370 | size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict) |
|
3787 | 3371 | { |
|
3788 | ZSTD_frameParameters const fParams = { 0 /* contentSizeFlag */, 0 /* checksum */, 0 /* hideDictID */ }; | |
|
3789 | 3372 | DEBUGLOG(4, "ZSTD_initCStream_usingCDict"); |
|
3790 | return ZSTD_initCStream_usingCDict_advanced(zcs, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN); /* note : will check that cdict != NULL */ | |
|
3373 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); | |
|
3374 | FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, cdict) ); | |
|
3375 | return 0; | |
|
3791 | 3376 | } |
|
3792 | 3377 | |
|
3793 | 3378 | |
@@ -3797,33 +3382,53 b' size_t ZSTD_initCStream_usingCDict(ZSTD_' | |||
|
3797 | 3382 | * dict is loaded with default parameters ZSTD_dm_auto and ZSTD_dlm_byCopy. */ |
|
3798 | 3383 | size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, |
|
3799 | 3384 | const void* dict, size_t dictSize, |
|
3800 |
ZSTD_parameters params, unsigned long long p |
|
|
3385 | ZSTD_parameters params, unsigned long long pss) | |
|
3801 | 3386 | { |
|
3802 | DEBUGLOG(4, "ZSTD_initCStream_advanced: pledgedSrcSize=%u, flag=%u", | |
|
3803 | (unsigned)pledgedSrcSize, params.fParams.contentSizeFlag); | |
|
3804 | CHECK_F( ZSTD_checkCParams(params.cParams) ); | |
|
3805 | if ((pledgedSrcSize==0) && (params.fParams.contentSizeFlag==0)) pledgedSrcSize = ZSTD_CONTENTSIZE_UNKNOWN; /* for compatibility with older programs relying on this behavior. Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. This line will be removed in the future. */ | |
|
3387 | /* for compatibility with older programs relying on this behavior. | |
|
3388 | * Users should now specify ZSTD_CONTENTSIZE_UNKNOWN. | |
|
3389 | * This line will be removed in the future. | |
|
3390 | */ | |
|
3391 | U64 const pledgedSrcSize = (pss==0 && params.fParams.contentSizeFlag==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; | |
|
3392 | DEBUGLOG(4, "ZSTD_initCStream_advanced"); | |
|
3393 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); | |
|
3394 | FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); | |
|
3395 | FORWARD_IF_ERROR( ZSTD_checkCParams(params.cParams) ); | |
|
3806 | 3396 | zcs->requestedParams = ZSTD_assignParamsToCCtxParams(zcs->requestedParams, params); |
|
3807 | return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL /*cdict*/, zcs->requestedParams, pledgedSrcSize); | |
|
3397 | FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); | |
|
3398 | return 0; | |
|
3808 | 3399 | } |
|
3809 | 3400 | |
|
3810 | 3401 | size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel) |
|
3811 | 3402 | { |
|
3812 | ZSTD_CCtxParams_init(&zcs->requestedParams, compressionLevel); | |
|
3813 | return ZSTD_initCStream_internal(zcs, dict, dictSize, NULL, zcs->requestedParams, ZSTD_CONTENTSIZE_UNKNOWN); | |
|
3403 | DEBUGLOG(4, "ZSTD_initCStream_usingDict"); | |
|
3404 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); | |
|
3405 | FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); | |
|
3406 | FORWARD_IF_ERROR( ZSTD_CCtx_loadDictionary(zcs, dict, dictSize) ); | |
|
3407 | return 0; | |
|
3814 | 3408 | } |
|
3815 | 3409 | |
|
3816 | 3410 | size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pss) |
|
3817 | 3411 | { |
|
3818 | U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; /* temporary : 0 interpreted as "unknown" during transition period. Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. `0` will be interpreted as "empty" in the future */ | |
|
3819 | ZSTD_CCtxParams_init(&zcs->requestedParams, compressionLevel); | |
|
3820 | return ZSTD_initCStream_internal(zcs, NULL, 0, NULL, zcs->requestedParams, pledgedSrcSize); | |
|
3412 | /* temporary : 0 interpreted as "unknown" during transition period. | |
|
3413 | * Users willing to specify "unknown" **must** use ZSTD_CONTENTSIZE_UNKNOWN. | |
|
3414 | * 0 will be interpreted as "empty" in the future. | |
|
3415 | */ | |
|
3416 | U64 const pledgedSrcSize = (pss==0) ? ZSTD_CONTENTSIZE_UNKNOWN : pss; | |
|
3417 | DEBUGLOG(4, "ZSTD_initCStream_srcSize"); | |
|
3418 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); | |
|
3419 | FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) ); | |
|
3420 | FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); | |
|
3421 | FORWARD_IF_ERROR( ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize) ); | |
|
3422 | return 0; | |
|
3821 | 3423 | } |
|
3822 | 3424 | |
|
3823 | 3425 | size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel) |
|
3824 | 3426 | { |
|
3825 | 3427 | DEBUGLOG(4, "ZSTD_initCStream"); |
|
3826 | return ZSTD_initCStream_srcSize(zcs, compressionLevel, ZSTD_CONTENTSIZE_UNKNOWN); | |
|
3428 | FORWARD_IF_ERROR( ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only) ); | |
|
3429 | FORWARD_IF_ERROR( ZSTD_CCtx_refCDict(zcs, NULL) ); | |
|
3430 | FORWARD_IF_ERROR( ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel) ); | |
|
3431 | return 0; | |
|
3827 | 3432 | } |
|
3828 | 3433 | |
|
3829 | 3434 | /*====== Compression ======*/ |
@@ -3847,7 +3452,7 b' static size_t ZSTD_limitCopy(void* dst, ' | |||
|
3847 | 3452 | * internal function for all *compressStream*() variants |
|
3848 | 3453 | * non-static, because can be called from zstdmt_compress.c |
|
3849 | 3454 | * @return : hint size for next input */ |
|
3850 | size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, | |
|
3455 | static size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, | |
|
3851 | 3456 | ZSTD_outBuffer* output, |
|
3852 | 3457 | ZSTD_inBuffer* input, |
|
3853 | 3458 | ZSTD_EndDirective const flushMode) |
@@ -3873,8 +3478,7 b' size_t ZSTD_compressStream_generic(ZSTD_' | |||
|
3873 | 3478 | switch(zcs->streamStage) |
|
3874 | 3479 | { |
|
3875 | 3480 | case zcss_init: |
|
3876 |
|
|
|
3877 | return ERROR(init_missing); | |
|
3481 | RETURN_ERROR(init_missing, "call ZSTD_initCStream() first!"); | |
|
3878 | 3482 | |
|
3879 | 3483 | case zcss_load: |
|
3880 | 3484 | if ( (flushMode == ZSTD_e_end) |
@@ -3884,7 +3488,7 b' size_t ZSTD_compressStream_generic(ZSTD_' | |||
|
3884 | 3488 | size_t const cSize = ZSTD_compressEnd(zcs, |
|
3885 | 3489 | op, oend-op, ip, iend-ip); |
|
3886 | 3490 | DEBUGLOG(4, "ZSTD_compressEnd : cSize=%u", (unsigned)cSize); |
|
3887 | if (ZSTD_isError(cSize)) return cSize; | |
|
3491 | FORWARD_IF_ERROR(cSize); | |
|
3888 | 3492 | ip = iend; |
|
3889 | 3493 | op += cSize; |
|
3890 | 3494 | zcs->frameEnded = 1; |
@@ -3925,7 +3529,7 b' size_t ZSTD_compressStream_generic(ZSTD_' | |||
|
3925 | 3529 | zcs->inBuff + zcs->inToCompress, iSize) : |
|
3926 | 3530 | ZSTD_compressContinue(zcs, cDst, oSize, |
|
3927 | 3531 | zcs->inBuff + zcs->inToCompress, iSize); |
|
3928 | if (ZSTD_isError(cSize)) return cSize; | |
|
3532 | FORWARD_IF_ERROR(cSize); | |
|
3929 | 3533 | zcs->frameEnded = lastBlock; |
|
3930 | 3534 | /* prepare next block */ |
|
3931 | 3535 | zcs->inBuffTarget = zcs->inBuffPos + zcs->blockSize; |
@@ -3953,7 +3557,7 b' size_t ZSTD_compressStream_generic(ZSTD_' | |||
|
3953 | 3557 | case zcss_flush: |
|
3954 | 3558 | DEBUGLOG(5, "flush stage"); |
|
3955 | 3559 | { size_t const toFlush = zcs->outBuffContentSize - zcs->outBuffFlushedSize; |
|
3956 | size_t const flushed = ZSTD_limitCopy(op, oend-op, | |
|
3560 | size_t const flushed = ZSTD_limitCopy(op, (size_t)(oend-op), | |
|
3957 | 3561 | zcs->outBuff + zcs->outBuffFlushedSize, toFlush); |
|
3958 | 3562 | DEBUGLOG(5, "toFlush: %u into %u ==> flushed: %u", |
|
3959 | 3563 | (unsigned)toFlush, (unsigned)(oend-op), (unsigned)flushed); |
@@ -4001,7 +3605,7 b' static size_t ZSTD_nextInputSizeHint_MTo' | |||
|
4001 | 3605 | |
|
4002 | 3606 | size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input) |
|
4003 | 3607 | { |
|
4004 |
|
|
|
3608 | FORWARD_IF_ERROR( ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue) ); | |
|
4005 | 3609 | return ZSTD_nextInputSizeHint_MTorST(zcs); |
|
4006 | 3610 | } |
|
4007 | 3611 | |
@@ -4013,14 +3617,15 b' size_t ZSTD_compressStream2( ZSTD_CCtx* ' | |||
|
4013 | 3617 | { |
|
4014 | 3618 | DEBUGLOG(5, "ZSTD_compressStream2, endOp=%u ", (unsigned)endOp); |
|
4015 | 3619 | /* check conditions */ |
|
4016 |
|
|
|
4017 |
|
|
|
3620 | RETURN_ERROR_IF(output->pos > output->size, GENERIC); | |
|
3621 | RETURN_ERROR_IF(input->pos > input->size, GENERIC); | |
|
4018 | 3622 | assert(cctx!=NULL); |
|
4019 | 3623 | |
|
4020 | 3624 | /* transparent initialization stage */ |
|
4021 | 3625 | if (cctx->streamStage == zcss_init) { |
|
4022 | 3626 | ZSTD_CCtx_params params = cctx->requestedParams; |
|
4023 | 3627 | ZSTD_prefixDict const prefixDict = cctx->prefixDict; |
|
3628 | FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) ); /* Init the local dict if present. */ | |
|
4024 | 3629 | memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict)); /* single usage */ |
|
4025 | 3630 | assert(prefixDict.dict==NULL || cctx->cdict==NULL); /* only one can be set */ |
|
4026 | 3631 | DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage"); |
@@ -4039,11 +3644,11 b' size_t ZSTD_compressStream2( ZSTD_CCtx* ' | |||
|
4039 | 3644 | DEBUGLOG(4, "ZSTD_compressStream2: creating new mtctx for nbWorkers=%u", |
|
4040 | 3645 | params.nbWorkers); |
|
4041 | 3646 | cctx->mtctx = ZSTDMT_createCCtx_advanced(params.nbWorkers, cctx->customMem); |
|
4042 |
|
|
|
3647 | RETURN_ERROR_IF(cctx->mtctx == NULL, memory_allocation); | |
|
4043 | 3648 | } |
|
4044 | 3649 | /* mt compression */ |
|
4045 | 3650 | DEBUGLOG(4, "call ZSTDMT_initCStream_internal as nbWorkers=%u", params.nbWorkers); |
|
4046 |
|
|
|
3651 | FORWARD_IF_ERROR( ZSTDMT_initCStream_internal( | |
|
4047 | 3652 | cctx->mtctx, |
|
4048 | 3653 | prefixDict.dict, prefixDict.dictSize, ZSTD_dct_rawContent, |
|
4049 | 3654 | cctx->cdict, params, cctx->pledgedSrcSizePlusOne-1) ); |
@@ -4051,7 +3656,7 b' size_t ZSTD_compressStream2( ZSTD_CCtx* ' | |||
|
4051 | 3656 | cctx->appliedParams.nbWorkers = params.nbWorkers; |
|
4052 | 3657 | } else |
|
4053 | 3658 | #endif |
|
4054 |
{ |
|
|
3659 | { FORWARD_IF_ERROR( ZSTD_resetCStream_internal(cctx, | |
|
4055 | 3660 | prefixDict.dict, prefixDict.dictSize, prefixDict.dictContentType, |
|
4056 | 3661 | cctx->cdict, |
|
4057 | 3662 | params, cctx->pledgedSrcSizePlusOne-1) ); |
@@ -4063,20 +3668,30 b' size_t ZSTD_compressStream2( ZSTD_CCtx* ' | |||
|
4063 | 3668 | /* compression stage */ |
|
4064 | 3669 | #ifdef ZSTD_MULTITHREAD |
|
4065 | 3670 | if (cctx->appliedParams.nbWorkers > 0) { |
|
3671 | int const forceMaxProgress = (endOp == ZSTD_e_flush || endOp == ZSTD_e_end); | |
|
3672 | size_t flushMin; | |
|
3673 | assert(forceMaxProgress || endOp == ZSTD_e_continue /* Protection for a new flush type */); | |
|
4066 | 3674 | if (cctx->cParamsChanged) { |
|
4067 | 3675 | ZSTDMT_updateCParams_whileCompressing(cctx->mtctx, &cctx->requestedParams); |
|
4068 | 3676 | cctx->cParamsChanged = 0; |
|
4069 | 3677 | } |
|
4070 | { size_t const flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); | |
|
3678 | do { | |
|
3679 | flushMin = ZSTDMT_compressStream_generic(cctx->mtctx, output, input, endOp); | |
|
4071 | 3680 | if ( ZSTD_isError(flushMin) |
|
4072 | 3681 | || (endOp == ZSTD_e_end && flushMin == 0) ) { /* compression completed */ |
|
4073 | 3682 | ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only); |
|
4074 | 3683 | } |
|
3684 | FORWARD_IF_ERROR(flushMin); | |
|
3685 | } while (forceMaxProgress && flushMin != 0 && output->pos < output->size); | |
|
4075 | 3686 | DEBUGLOG(5, "completed ZSTD_compressStream2 delegating to ZSTDMT_compressStream_generic"); |
|
3687 | /* Either we don't require maximum forward progress, we've finished the | |
|
3688 | * flush, or we are out of output space. | |
|
3689 | */ | |
|
3690 | assert(!forceMaxProgress || flushMin == 0 || output->pos == output->size); | |
|
4076 | 3691 | return flushMin; |
|
4077 |
} |
|
|
3692 | } | |
|
4078 | 3693 | #endif |
|
4079 |
|
|
|
3694 | FORWARD_IF_ERROR( ZSTD_compressStream_generic(cctx, output, input, endOp) ); | |
|
4080 | 3695 | DEBUGLOG(5, "completed ZSTD_compressStream2"); |
|
4081 | 3696 | return cctx->outBuffContentSize - cctx->outBuffFlushedSize; /* remaining to flush */ |
|
4082 | 3697 | } |
@@ -4107,10 +3722,10 b' size_t ZSTD_compress2(ZSTD_CCtx* cctx,' | |||
|
4107 | 3722 | dst, dstCapacity, &oPos, |
|
4108 | 3723 | src, srcSize, &iPos, |
|
4109 | 3724 | ZSTD_e_end); |
|
4110 | if (ZSTD_isError(result)) return result; | |
|
3725 | FORWARD_IF_ERROR(result); | |
|
4111 | 3726 | if (result != 0) { /* compression not completed, due to lack of output space */ |
|
4112 | 3727 | assert(oPos == dstCapacity); |
|
4113 |
|
|
|
3728 | RETURN_ERROR(dstSize_tooSmall); | |
|
4114 | 3729 | } |
|
4115 | 3730 | assert(iPos == srcSize); /* all input is expected consumed */ |
|
4116 | 3731 | return oPos; |
@@ -4132,11 +3747,11 b' size_t ZSTD_endStream(ZSTD_CStream* zcs,' | |||
|
4132 | 3747 | { |
|
4133 | 3748 | ZSTD_inBuffer input = { NULL, 0, 0 }; |
|
4134 | 3749 | size_t const remainingToFlush = ZSTD_compressStream2(zcs, output, &input, ZSTD_e_end); |
|
4135 |
|
|
|
3750 | FORWARD_IF_ERROR( remainingToFlush ); | |
|
4136 | 3751 | if (zcs->appliedParams.nbWorkers > 0) return remainingToFlush; /* minimal estimation */ |
|
4137 | 3752 | /* single thread mode : attempt to calculate remaining to flush more precisely */ |
|
4138 | 3753 | { size_t const lastBlockSize = zcs->frameEnded ? 0 : ZSTD_BLOCKHEADERSIZE; |
|
4139 | size_t const checksumSize = zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4; | |
|
3754 | size_t const checksumSize = (size_t)(zcs->frameEnded ? 0 : zcs->appliedParams.fParams.checksumFlag * 4); | |
|
4140 | 3755 | size_t const toFlush = remainingToFlush + lastBlockSize + checksumSize; |
|
4141 | 3756 | DEBUGLOG(4, "ZSTD_endStream : remaining to flush : %u", (unsigned)toFlush); |
|
4142 | 3757 | return toFlush; |
@@ -4151,7 +3766,7 b' int ZSTD_maxCLevel(void) { return ZSTD_M' | |||
|
4151 | 3766 | int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; } |
|
4152 | 3767 | |
|
4153 | 3768 | static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = { |
|
4154 | { /* "default" - guarantees a monotonically increasing memory budget */ | |
|
3769 | { /* "default" - for any srcSize > 256 KB */ | |
|
4155 | 3770 | /* W, C, H, S, L, TL, strat */ |
|
4156 | 3771 | { 19, 12, 13, 1, 6, 1, ZSTD_fast }, /* base for negative levels */ |
|
4157 | 3772 | { 19, 13, 14, 1, 7, 0, ZSTD_fast }, /* level 1 */ |
@@ -4263,8 +3878,8 b' static const ZSTD_compressionParameters ' | |||
|
4263 | 3878 | ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) |
|
4264 | 3879 | { |
|
4265 | 3880 | size_t const addedSize = srcSizeHint ? 0 : 500; |
|
4266 |
U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : |
|
|
4267 |
U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); |
|
|
3881 | U64 const rSize = srcSizeHint+dictSize ? srcSizeHint+dictSize+addedSize : ZSTD_CONTENTSIZE_UNKNOWN; /* intentional overflow for srcSizeHint == ZSTD_CONTENTSIZE_UNKNOWN */ | |
|
3882 | U32 const tableID = (rSize <= 256 KB) + (rSize <= 128 KB) + (rSize <= 16 KB); | |
|
4268 | 3883 | int row = compressionLevel; |
|
4269 | 3884 | DEBUGLOG(5, "ZSTD_getCParams (cLevel=%i)", compressionLevel); |
|
4270 | 3885 | if (compressionLevel == 0) row = ZSTD_CLEVEL_DEFAULT; /* 0 == default */ |
@@ -4272,13 +3887,14 b' ZSTD_compressionParameters ZSTD_getCPara' | |||
|
4272 | 3887 | if (compressionLevel > ZSTD_MAX_CLEVEL) row = ZSTD_MAX_CLEVEL; |
|
4273 | 3888 | { ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row]; |
|
4274 | 3889 | if (compressionLevel < 0) cp.targetLength = (unsigned)(-compressionLevel); /* acceleration factor */ |
|
4275 | return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); | |
|
3890 | return ZSTD_adjustCParams_internal(cp, srcSizeHint, dictSize); /* refine parameters based on srcSize & dictSize */ | |
|
4276 | 3891 | } |
|
4277 | 3892 | } |
|
4278 | 3893 | |
|
4279 | 3894 | /*! ZSTD_getParams() : |
|
4280 | * same as ZSTD_getCParams(), but @return a `ZSTD_parameters` object (instead of `ZSTD_compressionParameters`). | |
|
4281 | * All fields of `ZSTD_frameParameters` are set to default (0) */ | |
|
3895 | * same idea as ZSTD_getCParams() | |
|
3896 | * @return a `ZSTD_parameters` structure (instead of `ZSTD_compressionParameters`). | |
|
3897 | * Fields of `ZSTD_frameParameters` are set to default values */ | |
|
4282 | 3898 | ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long srcSizeHint, size_t dictSize) { |
|
4283 | 3899 | ZSTD_parameters params; |
|
4284 | 3900 | ZSTD_compressionParameters const cParams = ZSTD_getCParams(compressionLevel, srcSizeHint, dictSize); |
@@ -33,13 +33,13 b' extern "C" {' | |||
|
33 | 33 | ***************************************/ |
|
34 | 34 | #define kSearchStrength 8 |
|
35 | 35 | #define HASH_READ_SIZE 8 |
|
36 |
#define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index 1 |
|
|
36 | #define ZSTD_DUBT_UNSORTED_MARK 1 /* For btlazy2 strategy, index ZSTD_DUBT_UNSORTED_MARK==1 means "unsorted". | |
|
37 | 37 | It could be confused for a real successor at index "1", if sorted as larger than its predecessor. |
|
38 | 38 | It's not a big deal though : candidate will just be sorted again. |
|
39 |
Addition |
|
|
39 | Additionally, candidate position 1 will be lost. | |
|
40 | 40 | But candidate 1 cannot hide a large tree of candidates, so it's a minimal loss. |
|
41 |
The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mis |
|
|
42 |
|
|
|
41 | The benefit is that ZSTD_DUBT_UNSORTED_MARK cannot be mishandled after table re-use with a different strategy. | |
|
42 | This constant is required by ZSTD_compressBlock_btlazy2() and ZSTD_reduceTable_internal() */ | |
|
43 | 43 | |
|
44 | 44 | |
|
45 | 45 | /*-************************************* |
@@ -55,6 +55,14 b' typedef struct ZSTD_prefixDict_s {' | |||
|
55 | 55 | } ZSTD_prefixDict; |
|
56 | 56 | |
|
57 | 57 | typedef struct { |
|
58 | void* dictBuffer; | |
|
59 | void const* dict; | |
|
60 | size_t dictSize; | |
|
61 | ZSTD_dictContentType_e dictContentType; | |
|
62 | ZSTD_CDict* cdict; | |
|
63 | } ZSTD_localDict; | |
|
64 | ||
|
65 | typedef struct { | |
|
58 | 66 | U32 CTable[HUF_CTABLE_SIZE_U32(255)]; |
|
59 | 67 | HUF_repeat repeatMode; |
|
60 | 68 | } ZSTD_hufCTables_t; |
@@ -107,6 +115,7 b' typedef struct {' | |||
|
107 | 115 | U32 offCodeSumBasePrice; /* to compare to log2(offreq) */ |
|
108 | 116 | ZSTD_OptPrice_e priceType; /* prices can be determined dynamically, or follow a pre-defined cost structure */ |
|
109 | 117 | const ZSTD_entropyCTables_t* symbolCosts; /* pre-calculated dictionary statistics */ |
|
118 | ZSTD_literalCompressionMode_e literalCompressionMode; | |
|
110 | 119 | } optState_t; |
|
111 | 120 | |
|
112 | 121 | typedef struct { |
@@ -119,16 +128,21 b' typedef struct {' | |||
|
119 | 128 | BYTE const* base; /* All regular indexes relative to this position */ |
|
120 | 129 | BYTE const* dictBase; /* extDict indexes relative to this position */ |
|
121 | 130 | U32 dictLimit; /* below that point, need extDict */ |
|
122 | U32 lowLimit; /* below that point, no more data */ | |
|
131 | U32 lowLimit; /* below that point, no more valid data */ | |
|
123 | 132 | } ZSTD_window_t; |
|
124 | 133 | |
|
125 | 134 | typedef struct ZSTD_matchState_t ZSTD_matchState_t; |
|
126 | 135 | struct ZSTD_matchState_t { |
|
127 | 136 | ZSTD_window_t window; /* State for window round buffer management */ |
|
128 |
U32 loadedDictEnd; /* index of end of dictionary |
|
|
137 | U32 loadedDictEnd; /* index of end of dictionary, within context's referential. | |
|
138 | * When loadedDictEnd != 0, a dictionary is in use, and still valid. | |
|
139 | * This relies on a mechanism to set loadedDictEnd=0 when dictionary is no longer within distance. | |
|
140 | * Such mechanism is provided within ZSTD_window_enforceMaxDist() and ZSTD_checkDictValidity(). | |
|
141 | * When dict referential is copied into active context (i.e. not attached), | |
|
142 | * loadedDictEnd == dictSize, since referential starts from zero. | |
|
143 | */ | |
|
129 | 144 | U32 nextToUpdate; /* index from which to continue table update */ |
|
130 | U32 nextToUpdate3; /* index from which to continue table update */ | |
|
131 | U32 hashLog3; /* dispatch table : larger == faster, more memory */ | |
|
145 | U32 hashLog3; /* dispatch table for matches of len==3 : larger == faster, more memory */ | |
|
132 | 146 | U32* hashTable; |
|
133 | 147 | U32* hashTable3; |
|
134 | 148 | U32* chainTable; |
@@ -186,8 +200,12 b' struct ZSTD_CCtx_params_s {' | |||
|
186 | 200 | int compressionLevel; |
|
187 | 201 | int forceWindow; /* force back-references to respect limit of |
|
188 | 202 | * 1<<wLog, even for dictionary */ |
|
203 | size_t targetCBlockSize; /* Tries to fit compressed block size to be around targetCBlockSize. | |
|
204 | * No target when targetCBlockSize == 0. | |
|
205 | * There is no guarantee on compressed block size */ | |
|
189 | 206 | |
|
190 | 207 | ZSTD_dictAttachPref_e attachDictPref; |
|
208 | ZSTD_literalCompressionMode_e literalCompressionMode; | |
|
191 | 209 | |
|
192 | 210 | /* Multithreading: used to pass parameters to mtctx */ |
|
193 | 211 | int nbWorkers; |
@@ -243,7 +261,7 b' struct ZSTD_CCtx_s {' | |||
|
243 | 261 | U32 frameEnded; |
|
244 | 262 | |
|
245 | 263 | /* Dictionary */ |
|
246 |
ZSTD_ |
|
|
264 | ZSTD_localDict localDict; | |
|
247 | 265 | const ZSTD_CDict* cdict; |
|
248 | 266 | ZSTD_prefixDict prefixDict; /* single-usage dictionary */ |
|
249 | 267 | |
@@ -295,6 +313,30 b' MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)' | |||
|
295 | 313 | return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase]; |
|
296 | 314 | } |
|
297 | 315 | |
|
316 | /* ZSTD_cParam_withinBounds: | |
|
317 | * @return 1 if value is within cParam bounds, | |
|
318 | * 0 otherwise */ | |
|
319 | MEM_STATIC int ZSTD_cParam_withinBounds(ZSTD_cParameter cParam, int value) | |
|
320 | { | |
|
321 | ZSTD_bounds const bounds = ZSTD_cParam_getBounds(cParam); | |
|
322 | if (ZSTD_isError(bounds.error)) return 0; | |
|
323 | if (value < bounds.lowerBound) return 0; | |
|
324 | if (value > bounds.upperBound) return 0; | |
|
325 | return 1; | |
|
326 | } | |
|
327 | ||
|
328 | /* ZSTD_minGain() : | |
|
329 | * minimum compression required | |
|
330 | * to generate a compress block or a compressed literals section. | |
|
331 | * note : use same formula for both situations */ | |
|
332 | MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat) | |
|
333 | { | |
|
334 | U32 const minlog = (strat>=ZSTD_btultra) ? (U32)(strat) - 1 : 6; | |
|
335 | ZSTD_STATIC_ASSERT(ZSTD_btultra == 8); | |
|
336 | assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat)); | |
|
337 | return (srcSize >> minlog) + 2; | |
|
338 | } | |
|
339 | ||
|
298 | 340 | /*! ZSTD_storeSeq() : |
|
299 | 341 | * Store a sequence (literal length, literals, offset code and match length code) into seqStore_t. |
|
300 | 342 | * `offsetCode` : distance to match + 3 (values 1-3 are repCodes). |
@@ -314,7 +356,7 b' MEM_STATIC void ZSTD_storeSeq(seqStore_t' | |||
|
314 | 356 | /* copy Literals */ |
|
315 | 357 | assert(seqStorePtr->maxNbLit <= 128 KB); |
|
316 | 358 | assert(seqStorePtr->lit + litLength <= seqStorePtr->litStart + seqStorePtr->maxNbLit); |
|
317 | ZSTD_wildcopy(seqStorePtr->lit, literals, litLength); | |
|
359 | ZSTD_wildcopy(seqStorePtr->lit, literals, (ptrdiff_t)litLength, ZSTD_no_overlap); | |
|
318 | 360 | seqStorePtr->lit += litLength; |
|
319 | 361 | |
|
320 | 362 | /* literal Length */ |
@@ -554,6 +596,9 b' MEM_STATIC U64 ZSTD_rollingHash_rotate(U' | |||
|
554 | 596 | /*-************************************* |
|
555 | 597 | * Round buffer management |
|
556 | 598 | ***************************************/ |
|
599 | #if (ZSTD_WINDOWLOG_MAX_64 > 31) | |
|
600 | # error "ZSTD_WINDOWLOG_MAX is too large : would overflow ZSTD_CURRENT_MAX" | |
|
601 | #endif | |
|
557 | 602 | /* Max current allowed */ |
|
558 | 603 | #define ZSTD_CURRENT_MAX ((3U << 29) + (1U << ZSTD_WINDOWLOG_MAX)) |
|
559 | 604 | /* Maximum chunk size before overflow correction needs to be called again */ |
@@ -665,31 +710,49 b' MEM_STATIC U32 ZSTD_window_correctOverfl' | |||
|
665 | 710 | * Updates lowLimit so that: |
|
666 | 711 | * (srcEnd - base) - lowLimit == maxDist + loadedDictEnd |
|
667 | 712 | * |
|
668 | * This allows a simple check that index >= lowLimit to see if index is valid. | |
|
669 |
* This must be called before a block compression call |
|
|
670 | * source end. | |
|
713 | * It ensures index is valid as long as index >= lowLimit. | |
|
714 | * This must be called before a block compression call. | |
|
715 | * | |
|
716 | * loadedDictEnd is only defined if a dictionary is in use for current compression. | |
|
717 | * As the name implies, loadedDictEnd represents the index at end of dictionary. | |
|
718 | * The value lies within context's referential, it can be directly compared to blockEndIdx. | |
|
671 | 719 | * |
|
672 | * If loadedDictEndPtr is not NULL, we set it to zero once we update lowLimit. | |
|
673 | * This is because dictionaries are allowed to be referenced as long as the last | |
|
674 | * byte of the dictionary is in the window, but once they are out of range, | |
|
675 | * they cannot be referenced. If loadedDictEndPtr is NULL, we use | |
|
676 | * loadedDictEnd == 0. | |
|
720 | * If loadedDictEndPtr is NULL, no dictionary is in use, and we use loadedDictEnd == 0. | |
|
721 | * If loadedDictEndPtr is not NULL, we set it to zero after updating lowLimit. | |
|
722 | * This is because dictionaries are allowed to be referenced fully | |
|
723 | * as long as the last byte of the dictionary is in the window. | |
|
724 | * Once input has progressed beyond window size, dictionary cannot be referenced anymore. | |
|
677 | 725 | * |
|
678 |
* In normal dict mode, the dict is between lowLimit and dictLimit. |
|
|
679 |
* dictMatchState mode, lowLimit and dictLimit are the same, |
|
|
680 | * is below them. forceWindow and dictMatchState are therefore incompatible. | |
|
726 | * In normal dict mode, the dictionary lies between lowLimit and dictLimit. | |
|
727 | * In dictMatchState mode, lowLimit and dictLimit are the same, | |
|
728 | * and the dictionary is below them. | |
|
729 | * forceWindow and dictMatchState are therefore incompatible. | |
|
681 | 730 | */ |
|
682 | 731 | MEM_STATIC void |
|
683 | 732 | ZSTD_window_enforceMaxDist(ZSTD_window_t* window, |
|
684 |
|
|
|
733 | const void* blockEnd, | |
|
685 | 734 | U32 maxDist, |
|
686 | 735 | U32* loadedDictEndPtr, |
|
687 | 736 | const ZSTD_matchState_t** dictMatchStatePtr) |
|
688 | 737 | { |
|
689 |
U32 const blockEndIdx = (U32)((BYTE const*) |
|
|
690 | U32 loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; | |
|
691 | DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u", | |
|
692 | (unsigned)blockEndIdx, (unsigned)maxDist); | |
|
738 | U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); | |
|
739 | U32 const loadedDictEnd = (loadedDictEndPtr != NULL) ? *loadedDictEndPtr : 0; | |
|
740 | DEBUGLOG(5, "ZSTD_window_enforceMaxDist: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", | |
|
741 | (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); | |
|
742 | ||
|
743 | /* - When there is no dictionary : loadedDictEnd == 0. | |
|
744 | In which case, the test (blockEndIdx > maxDist) is merely to avoid | |
|
745 | overflowing next operation `newLowLimit = blockEndIdx - maxDist`. | |
|
746 | - When there is a standard dictionary : | |
|
747 | Index referential is copied from the dictionary, | |
|
748 | which means it starts from 0. | |
|
749 | In which case, loadedDictEnd == dictSize, | |
|
750 | and it makes sense to compare `blockEndIdx > maxDist + dictSize` | |
|
751 | since `blockEndIdx` also starts from zero. | |
|
752 | - When there is an attached dictionary : | |
|
753 | loadedDictEnd is expressed within the referential of the context, | |
|
754 | so it can be directly compared against blockEndIdx. | |
|
755 | */ | |
|
693 | 756 | if (blockEndIdx > maxDist + loadedDictEnd) { |
|
694 | 757 | U32 const newLowLimit = blockEndIdx - maxDist; |
|
695 | 758 | if (window->lowLimit < newLowLimit) window->lowLimit = newLowLimit; |
@@ -698,11 +761,45 b' ZSTD_window_enforceMaxDist(ZSTD_window_t' | |||
|
698 | 761 | (unsigned)window->dictLimit, (unsigned)window->lowLimit); |
|
699 | 762 | window->dictLimit = window->lowLimit; |
|
700 | 763 | } |
|
701 | if (loadedDictEndPtr) | |
|
764 | /* On reaching window size, dictionaries are invalidated */ | |
|
765 | if (loadedDictEndPtr) *loadedDictEndPtr = 0; | |
|
766 | if (dictMatchStatePtr) *dictMatchStatePtr = NULL; | |
|
767 | } | |
|
768 | } | |
|
769 | ||
|
770 | /* Similar to ZSTD_window_enforceMaxDist(), | |
|
771 | * but only invalidates dictionary | |
|
772 | * when input progresses beyond window size. | |
|
773 | * assumption : loadedDictEndPtr and dictMatchStatePtr are valid (non NULL) | |
|
774 | * loadedDictEnd uses same referential as window->base | |
|
775 | * maxDist is the window size */ | |
|
776 | MEM_STATIC void | |
|
777 | ZSTD_checkDictValidity(const ZSTD_window_t* window, | |
|
778 | const void* blockEnd, | |
|
779 | U32 maxDist, | |
|
780 | U32* loadedDictEndPtr, | |
|
781 | const ZSTD_matchState_t** dictMatchStatePtr) | |
|
782 | { | |
|
783 | assert(loadedDictEndPtr != NULL); | |
|
784 | assert(dictMatchStatePtr != NULL); | |
|
785 | { U32 const blockEndIdx = (U32)((BYTE const*)blockEnd - window->base); | |
|
786 | U32 const loadedDictEnd = *loadedDictEndPtr; | |
|
787 | DEBUGLOG(5, "ZSTD_checkDictValidity: blockEndIdx=%u, maxDist=%u, loadedDictEnd=%u", | |
|
788 | (unsigned)blockEndIdx, (unsigned)maxDist, (unsigned)loadedDictEnd); | |
|
789 | assert(blockEndIdx >= loadedDictEnd); | |
|
790 | ||
|
791 | if (blockEndIdx > loadedDictEnd + maxDist) { | |
|
792 | /* On reaching window size, dictionaries are invalidated. | |
|
793 | * For simplification, if window size is reached anywhere within next block, | |
|
794 | * the dictionary is invalidated for the full block. | |
|
795 | */ | |
|
796 | DEBUGLOG(6, "invalidating dictionary for current block (distance > windowSize)"); | |
|
702 | 797 | *loadedDictEndPtr = 0; |
|
703 | if (dictMatchStatePtr) | |
|
704 | 798 | *dictMatchStatePtr = NULL; |
|
705 | } | |
|
799 | } else { | |
|
800 | if (*loadedDictEndPtr != 0) { | |
|
801 | DEBUGLOG(6, "dictionary considered valid for current block"); | |
|
802 | } } } | |
|
706 | 803 | } |
|
707 | 804 | |
|
708 | 805 | /** |
@@ -744,6 +841,17 b' MEM_STATIC U32 ZSTD_window_update(ZSTD_w' | |||
|
744 | 841 | return contiguous; |
|
745 | 842 | } |
|
746 | 843 | |
|
844 | MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 current, unsigned windowLog) | |
|
845 | { | |
|
846 | U32 const maxDistance = 1U << windowLog; | |
|
847 | U32 const lowestValid = ms->window.lowLimit; | |
|
848 | U32 const withinWindow = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; | |
|
849 | U32 const isDictionary = (ms->loadedDictEnd != 0); | |
|
850 | U32 const matchLowest = isDictionary ? lowestValid : withinWindow; | |
|
851 | return matchLowest; | |
|
852 | } | |
|
853 | ||
|
854 | ||
|
747 | 855 | |
|
748 | 856 | /* debug functions */ |
|
749 | 857 | #if (DEBUGLEVEL>=2) |
@@ -806,13 +914,6 b' size_t ZSTD_initCStream_internal(ZSTD_CS' | |||
|
806 | 914 | |
|
807 | 915 | void ZSTD_resetSeqStore(seqStore_t* ssPtr); |
|
808 | 916 | |
|
809 | /*! ZSTD_compressStream_generic() : | |
|
810 | * Private use only. To be called from zstdmt_compress.c in single-thread mode. */ | |
|
811 | size_t ZSTD_compressStream_generic(ZSTD_CStream* zcs, | |
|
812 | ZSTD_outBuffer* output, | |
|
813 | ZSTD_inBuffer* input, | |
|
814 | ZSTD_EndDirective const flushMode); | |
|
815 | ||
|
816 | 917 | /*! ZSTD_getCParamsFromCDict() : |
|
817 | 918 | * as the name implies */ |
|
818 | 919 | ZSTD_compressionParameters ZSTD_getCParamsFromCDict(const ZSTD_CDict* cdict); |
@@ -839,7 +940,7 b' size_t ZSTD_compress_advanced_internal(Z' | |||
|
839 | 940 | /* ZSTD_writeLastEmptyBlock() : |
|
840 | 941 | * output an empty Block with end-of-frame mark to complete a frame |
|
841 | 942 | * @return : size of data written into `dst` (== ZSTD_blockHeaderSize (defined in zstd_internal.h)) |
|
842 | * or an error code if `dstCapcity` is too small (<ZSTD_blockHeaderSize) | |
|
943 | * or an error code if `dstCapacity` is too small (<ZSTD_blockHeaderSize) | |
|
843 | 944 | */ |
|
844 | 945 | size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity); |
|
845 | 946 |
@@ -43,8 +43,7 b' void ZSTD_fillDoubleHashTable(ZSTD_match' | |||
|
43 | 43 | /* Only load extra positions for ZSTD_dtlm_full */ |
|
44 | 44 | if (dtlm == ZSTD_dtlm_fast) |
|
45 | 45 | break; |
|
46 | } | |
|
47 | } | |
|
46 | } } | |
|
48 | 47 | } |
|
49 | 48 | |
|
50 | 49 | |
@@ -63,7 +62,11 b' size_t ZSTD_compressBlock_doubleFast_gen' | |||
|
63 | 62 | const BYTE* const istart = (const BYTE*)src; |
|
64 | 63 | const BYTE* ip = istart; |
|
65 | 64 | const BYTE* anchor = istart; |
|
66 | const U32 prefixLowestIndex = ms->window.dictLimit; | |
|
65 | const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); | |
|
66 | const U32 lowestValid = ms->window.dictLimit; | |
|
67 | const U32 maxDistance = 1U << cParams->windowLog; | |
|
68 | /* presumes that, if there is a dictionary, it must be using Attach mode */ | |
|
69 | const U32 prefixLowestIndex = (endIndex - lowestValid > maxDistance) ? endIndex - maxDistance : lowestValid; | |
|
67 | 70 | const BYTE* const prefixLowest = base + prefixLowestIndex; |
|
68 | 71 | const BYTE* const iend = istart + srcSize; |
|
69 | 72 | const BYTE* const ilimit = iend - HASH_READ_SIZE; |
@@ -95,8 +98,15 b' size_t ZSTD_compressBlock_doubleFast_gen' | |||
|
95 | 98 | dictCParams->chainLog : hBitsS; |
|
96 | 99 | const U32 dictAndPrefixLength = (U32)(ip - prefixLowest + dictEnd - dictStart); |
|
97 | 100 | |
|
101 | DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic"); | |
|
102 | ||
|
98 | 103 | assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); |
|
99 | 104 | |
|
105 | /* if a dictionary is attached, it must be within window range */ | |
|
106 | if (dictMode == ZSTD_dictMatchState) { | |
|
107 | assert(lowestValid + maxDistance >= endIndex); | |
|
108 | } | |
|
109 | ||
|
100 | 110 | /* init */ |
|
101 | 111 | ip += (dictAndPrefixLength == 0); |
|
102 | 112 | if (dictMode == ZSTD_noDict) { |
@@ -138,7 +148,7 b' size_t ZSTD_compressBlock_doubleFast_gen' | |||
|
138 | 148 | const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend; |
|
139 | 149 | mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4; |
|
140 | 150 | ip++; |
|
141 | ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); | |
|
151 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); | |
|
142 | 152 | goto _match_stored; |
|
143 | 153 | } |
|
144 | 154 | |
@@ -147,7 +157,7 b' size_t ZSTD_compressBlock_doubleFast_gen' | |||
|
147 | 157 | && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { |
|
148 | 158 | mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; |
|
149 | 159 | ip++; |
|
150 | ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); | |
|
160 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); | |
|
151 | 161 | goto _match_stored; |
|
152 | 162 | } |
|
153 | 163 | |
@@ -170,8 +180,7 b' size_t ZSTD_compressBlock_doubleFast_gen' | |||
|
170 | 180 | offset = (U32)(current - dictMatchIndexL - dictIndexDelta); |
|
171 | 181 | while (((ip>anchor) & (dictMatchL>dictStart)) && (ip[-1] == dictMatchL[-1])) { ip--; dictMatchL--; mLength++; } /* catch up */ |
|
172 | 182 | goto _match_found; |
|
173 |
|
|
|
174 | } | |
|
183 | } } | |
|
175 | 184 | |
|
176 | 185 | if (matchIndexS > prefixLowestIndex) { |
|
177 | 186 | /* check prefix short match */ |
@@ -186,16 +195,14 b' size_t ZSTD_compressBlock_doubleFast_gen' | |||
|
186 | 195 | |
|
187 | 196 | if (match > dictStart && MEM_read32(match) == MEM_read32(ip)) { |
|
188 | 197 | goto _search_next_long; |
|
189 |
|
|
|
190 | } | |
|
198 | } } | |
|
191 | 199 | |
|
192 | 200 | ip += ((ip-anchor) >> kSearchStrength) + 1; |
|
193 | 201 | continue; |
|
194 | 202 | |
|
195 | 203 | _search_next_long: |
|
196 | 204 | |
|
197 | { | |
|
198 | size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); | |
|
205 | { size_t const hl3 = ZSTD_hashPtr(ip+1, hBitsL, 8); | |
|
199 | 206 | size_t const dictHLNext = ZSTD_hashPtr(ip+1, dictHBitsL, 8); |
|
200 | 207 | U32 const matchIndexL3 = hashLong[hl3]; |
|
201 | 208 | const BYTE* matchL3 = base + matchIndexL3; |
@@ -221,9 +228,7 b' size_t ZSTD_compressBlock_doubleFast_gen' | |||
|
221 | 228 | offset = (U32)(current + 1 - dictMatchIndexL3 - dictIndexDelta); |
|
222 | 229 | while (((ip>anchor) & (dictMatchL3>dictStart)) && (ip[-1] == dictMatchL3[-1])) { ip--; dictMatchL3--; mLength++; } /* catch up */ |
|
223 | 230 | goto _match_found; |
|
224 | } | |
|
225 | } | |
|
226 | } | |
|
231 | } } } | |
|
227 | 232 | |
|
228 | 233 | /* if no long +1 match, explore the short match we found */ |
|
229 | 234 | if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) { |
@@ -242,7 +247,7 b' size_t ZSTD_compressBlock_doubleFast_gen' | |||
|
242 | 247 | offset_2 = offset_1; |
|
243 | 248 | offset_1 = offset; |
|
244 | 249 | |
|
245 | ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
|
250 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
|
246 | 251 | |
|
247 | 252 | _match_stored: |
|
248 | 253 | /* match found */ |
@@ -250,11 +255,14 b' size_t ZSTD_compressBlock_doubleFast_gen' | |||
|
250 | 255 | anchor = ip; |
|
251 | 256 | |
|
252 | 257 | if (ip <= ilimit) { |
|
253 | /* Fill Table */ | |
|
254 | hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = | |
|
255 | hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; /* here because current+2 could be > iend-8 */ | |
|
256 |
hashLong[ZSTD_hashPtr( |
|
|
257 |
hash |
|
|
258 | /* Complementary insertion */ | |
|
259 | /* done after iLimit test, as candidates could be > iend-8 */ | |
|
260 | { U32 const indexToInsert = current+2; | |
|
261 | hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; | |
|
262 | hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); | |
|
263 | hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; | |
|
264 | hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); | |
|
265 | } | |
|
258 | 266 | |
|
259 | 267 | /* check immediate repcode */ |
|
260 | 268 | if (dictMode == ZSTD_dictMatchState) { |
@@ -278,8 +286,7 b' size_t ZSTD_compressBlock_doubleFast_gen' | |||
|
278 | 286 | continue; |
|
279 | 287 | } |
|
280 | 288 | break; |
|
281 |
|
|
|
282 | } | |
|
289 | } } | |
|
283 | 290 | |
|
284 | 291 | if (dictMode == ZSTD_noDict) { |
|
285 | 292 | while ( (ip <= ilimit) |
@@ -294,14 +301,15 b' size_t ZSTD_compressBlock_doubleFast_gen' | |||
|
294 | 301 | ip += rLength; |
|
295 | 302 | anchor = ip; |
|
296 | 303 | continue; /* faster when present ... (?) */ |
|
297 |
|
|
|
304 | } } } | |
|
305 | } /* while (ip < ilimit) */ | |
|
298 | 306 | |
|
299 | 307 | /* save reps for next block */ |
|
300 | 308 | rep[0] = offset_1 ? offset_1 : offsetSaved; |
|
301 | 309 | rep[1] = offset_2 ? offset_2 : offsetSaved; |
|
302 | 310 | |
|
303 | 311 | /* Return the last literals size */ |
|
304 | return iend - anchor; | |
|
312 | return (size_t)(iend - anchor); | |
|
305 | 313 | } |
|
306 | 314 | |
|
307 | 315 | |
@@ -360,10 +368,13 b' static size_t ZSTD_compressBlock_doubleF' | |||
|
360 | 368 | const BYTE* anchor = istart; |
|
361 | 369 | const BYTE* const iend = istart + srcSize; |
|
362 | 370 | const BYTE* const ilimit = iend - 8; |
|
363 | const U32 prefixStartIndex = ms->window.dictLimit; | |
|
364 | 371 | const BYTE* const base = ms->window.base; |
|
372 | const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); | |
|
373 | const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); | |
|
374 | const U32 dictStartIndex = lowLimit; | |
|
375 | const U32 dictLimit = ms->window.dictLimit; | |
|
376 | const U32 prefixStartIndex = (dictLimit > lowLimit) ? dictLimit : lowLimit; | |
|
365 | 377 | const BYTE* const prefixStart = base + prefixStartIndex; |
|
366 | const U32 dictStartIndex = ms->window.lowLimit; | |
|
367 | 378 | const BYTE* const dictBase = ms->window.dictBase; |
|
368 | 379 | const BYTE* const dictStart = dictBase + dictStartIndex; |
|
369 | 380 | const BYTE* const dictEnd = dictBase + prefixStartIndex; |
@@ -371,6 +382,10 b' static size_t ZSTD_compressBlock_doubleF' | |||
|
371 | 382 | |
|
372 | 383 | DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_extDict_generic (srcSize=%zu)", srcSize); |
|
373 | 384 | |
|
385 | /* if extDict is invalidated due to maxDistance, switch to "regular" variant */ | |
|
386 | if (prefixStartIndex == dictStartIndex) | |
|
387 | return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict); | |
|
388 | ||
|
374 | 389 | /* Search Loop */ |
|
375 | 390 | while (ip < ilimit) { /* < instead of <=, because (ip+1) */ |
|
376 | 391 | const size_t hSmall = ZSTD_hashPtr(ip, hBitsS, mls); |
@@ -396,7 +411,7 b' static size_t ZSTD_compressBlock_doubleF' | |||
|
396 | 411 | const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; |
|
397 | 412 | mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; |
|
398 | 413 | ip++; |
|
399 | ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); | |
|
414 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); | |
|
400 | 415 | } else { |
|
401 | 416 | if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) { |
|
402 | 417 | const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend; |
@@ -407,7 +422,7 b' static size_t ZSTD_compressBlock_doubleF' | |||
|
407 | 422 | while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */ |
|
408 | 423 | offset_2 = offset_1; |
|
409 | 424 | offset_1 = offset; |
|
410 | ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
|
425 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
|
411 | 426 | |
|
412 | 427 | } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) { |
|
413 | 428 | size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8); |
@@ -432,23 +447,27 b' static size_t ZSTD_compressBlock_doubleF' | |||
|
432 | 447 | } |
|
433 | 448 | offset_2 = offset_1; |
|
434 | 449 | offset_1 = offset; |
|
435 | ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
|
450 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
|
436 | 451 | |
|
437 | 452 | } else { |
|
438 | 453 | ip += ((ip-anchor) >> kSearchStrength) + 1; |
|
439 | 454 | continue; |
|
440 | 455 | } } |
|
441 | 456 | |
|
442 | /* found a match : store it */ | |
|
457 | /* move to next sequence start */ | |
|
443 | 458 | ip += mLength; |
|
444 | 459 | anchor = ip; |
|
445 | 460 | |
|
446 | 461 | if (ip <= ilimit) { |
|
447 | /* Fill Table */ | |
|
448 | hashSmall[ZSTD_hashPtr(base+current+2, hBitsS, mls)] = current+2; | |
|
449 | hashLong[ZSTD_hashPtr(base+current+2, hBitsL, 8)] = current+2; | |
|
450 |
|
|
|
462 | /* Complementary insertion */ | |
|
463 | /* done after iLimit test, as candidates could be > iend-8 */ | |
|
464 | { U32 const indexToInsert = current+2; | |
|
465 | hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert; | |
|
451 | 466 | hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base); |
|
467 | hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert; | |
|
468 | hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base); | |
|
469 | } | |
|
470 | ||
|
452 | 471 | /* check immediate repcode */ |
|
453 | 472 | while (ip <= ilimit) { |
|
454 | 473 | U32 const current2 = (U32)(ip-base); |
@@ -475,7 +494,7 b' static size_t ZSTD_compressBlock_doubleF' | |||
|
475 | 494 | rep[1] = offset_2; |
|
476 | 495 | |
|
477 | 496 | /* Return the last literals size */ |
|
478 | return iend - anchor; | |
|
497 | return (size_t)(iend - anchor); | |
|
479 | 498 | } |
|
480 | 499 | |
|
481 | 500 |
@@ -13,7 +13,8 b'' | |||
|
13 | 13 | |
|
14 | 14 | |
|
15 | 15 | void ZSTD_fillHashTable(ZSTD_matchState_t* ms, |
|
16 |
void const |
|
|
16 | const void* const end, | |
|
17 | ZSTD_dictTableLoadMethod_e dtlm) | |
|
17 | 18 | { |
|
18 | 19 | const ZSTD_compressionParameters* const cParams = &ms->cParams; |
|
19 | 20 | U32* const hashTable = ms->hashTable; |
@@ -41,11 +42,164 b' void ZSTD_fillHashTable(ZSTD_matchState_' | |||
|
41 | 42 | } } } } |
|
42 | 43 | } |
|
43 | 44 | |
|
45 | ||
|
44 | 46 | FORCE_INLINE_TEMPLATE |
|
45 | 47 | size_t ZSTD_compressBlock_fast_generic( |
|
46 | 48 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
47 | 49 | void const* src, size_t srcSize, |
|
48 | U32 const mls, ZSTD_dictMode_e const dictMode) | |
|
50 | U32 const mls) | |
|
51 | { | |
|
52 | const ZSTD_compressionParameters* const cParams = &ms->cParams; | |
|
53 | U32* const hashTable = ms->hashTable; | |
|
54 | U32 const hlog = cParams->hashLog; | |
|
55 | /* support stepSize of 0 */ | |
|
56 | size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1; | |
|
57 | const BYTE* const base = ms->window.base; | |
|
58 | const BYTE* const istart = (const BYTE*)src; | |
|
59 | /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */ | |
|
60 | const BYTE* ip0 = istart; | |
|
61 | const BYTE* ip1; | |
|
62 | const BYTE* anchor = istart; | |
|
63 | const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); | |
|
64 | const U32 maxDistance = 1U << cParams->windowLog; | |
|
65 | const U32 validStartIndex = ms->window.dictLimit; | |
|
66 | const U32 prefixStartIndex = (endIndex - validStartIndex > maxDistance) ? endIndex - maxDistance : validStartIndex; | |
|
67 | const BYTE* const prefixStart = base + prefixStartIndex; | |
|
68 | const BYTE* const iend = istart + srcSize; | |
|
69 | const BYTE* const ilimit = iend - HASH_READ_SIZE; | |
|
70 | U32 offset_1=rep[0], offset_2=rep[1]; | |
|
71 | U32 offsetSaved = 0; | |
|
72 | ||
|
73 | /* init */ | |
|
74 | DEBUGLOG(5, "ZSTD_compressBlock_fast_generic"); | |
|
75 | ip0 += (ip0 == prefixStart); | |
|
76 | ip1 = ip0 + 1; | |
|
77 | { | |
|
78 | U32 const maxRep = (U32)(ip0 - prefixStart); | |
|
79 | if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; | |
|
80 | if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; | |
|
81 | } | |
|
82 | ||
|
83 | /* Main Search Loop */ | |
|
84 | while (ip1 < ilimit) { /* < instead of <=, because check at ip0+2 */ | |
|
85 | size_t mLength; | |
|
86 | BYTE const* ip2 = ip0 + 2; | |
|
87 | size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls); | |
|
88 | U32 const val0 = MEM_read32(ip0); | |
|
89 | size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls); | |
|
90 | U32 const val1 = MEM_read32(ip1); | |
|
91 | U32 const current0 = (U32)(ip0-base); | |
|
92 | U32 const current1 = (U32)(ip1-base); | |
|
93 | U32 const matchIndex0 = hashTable[h0]; | |
|
94 | U32 const matchIndex1 = hashTable[h1]; | |
|
95 | BYTE const* repMatch = ip2-offset_1; | |
|
96 | const BYTE* match0 = base + matchIndex0; | |
|
97 | const BYTE* match1 = base + matchIndex1; | |
|
98 | U32 offcode; | |
|
99 | hashTable[h0] = current0; /* update hash table */ | |
|
100 | hashTable[h1] = current1; /* update hash table */ | |
|
101 | ||
|
102 | assert(ip0 + 1 == ip1); | |
|
103 | ||
|
104 | if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) { | |
|
105 | mLength = ip2[-1] == repMatch[-1] ? 1 : 0; | |
|
106 | ip0 = ip2 - mLength; | |
|
107 | match0 = repMatch - mLength; | |
|
108 | offcode = 0; | |
|
109 | goto _match; | |
|
110 | } | |
|
111 | if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) { | |
|
112 | /* found a regular match */ | |
|
113 | goto _offset; | |
|
114 | } | |
|
115 | if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) { | |
|
116 | /* found a regular match after one literal */ | |
|
117 | ip0 = ip1; | |
|
118 | match0 = match1; | |
|
119 | goto _offset; | |
|
120 | } | |
|
121 | { | |
|
122 | size_t const step = ((ip0-anchor) >> (kSearchStrength - 1)) + stepSize; | |
|
123 | assert(step >= 2); | |
|
124 | ip0 += step; | |
|
125 | ip1 += step; | |
|
126 | continue; | |
|
127 | } | |
|
128 | _offset: /* Requires: ip0, match0 */ | |
|
129 | /* Compute the offset code */ | |
|
130 | offset_2 = offset_1; | |
|
131 | offset_1 = (U32)(ip0-match0); | |
|
132 | offcode = offset_1 + ZSTD_REP_MOVE; | |
|
133 | mLength = 0; | |
|
134 | /* Count the backwards match length */ | |
|
135 | while (((ip0>anchor) & (match0>prefixStart)) | |
|
136 | && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */ | |
|
137 | ||
|
138 | _match: /* Requires: ip0, match0, offcode */ | |
|
139 | /* Count the forward length */ | |
|
140 | mLength += ZSTD_count(ip0+mLength+4, match0+mLength+4, iend) + 4; | |
|
141 | ZSTD_storeSeq(seqStore, ip0-anchor, anchor, offcode, mLength-MINMATCH); | |
|
142 | /* match found */ | |
|
143 | ip0 += mLength; | |
|
144 | anchor = ip0; | |
|
145 | ip1 = ip0 + 1; | |
|
146 | ||
|
147 | if (ip0 <= ilimit) { | |
|
148 | /* Fill Table */ | |
|
149 | assert(base+current0+2 > istart); /* check base overflow */ | |
|
150 | hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2; /* here because current+2 could be > iend-8 */ | |
|
151 | hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base); | |
|
152 | ||
|
153 | while ( (ip0 <= ilimit) | |
|
154 | && ( (offset_2>0) | |
|
155 | & (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) )) { | |
|
156 | /* store sequence */ | |
|
157 | size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4; | |
|
158 | U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ | |
|
159 | hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base); | |
|
160 | ip0 += rLength; | |
|
161 | ip1 = ip0 + 1; | |
|
162 | ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); | |
|
163 | anchor = ip0; | |
|
164 | continue; /* faster when present (confirmed on gcc-8) ... (?) */ | |
|
165 | } | |
|
166 | } | |
|
167 | } | |
|
168 | ||
|
169 | /* save reps for next block */ | |
|
170 | rep[0] = offset_1 ? offset_1 : offsetSaved; | |
|
171 | rep[1] = offset_2 ? offset_2 : offsetSaved; | |
|
172 | ||
|
173 | /* Return the last literals size */ | |
|
174 | return (size_t)(iend - anchor); | |
|
175 | } | |
|
176 | ||
|
177 | ||
|
178 | size_t ZSTD_compressBlock_fast( | |
|
179 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |
|
180 | void const* src, size_t srcSize) | |
|
181 | { | |
|
182 | ZSTD_compressionParameters const* cParams = &ms->cParams; | |
|
183 | U32 const mls = cParams->minMatch; | |
|
184 | assert(ms->dictMatchState == NULL); | |
|
185 | switch(mls) | |
|
186 | { | |
|
187 | default: /* includes case 3 */ | |
|
188 | case 4 : | |
|
189 | return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4); | |
|
190 | case 5 : | |
|
191 | return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5); | |
|
192 | case 6 : | |
|
193 | return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6); | |
|
194 | case 7 : | |
|
195 | return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7); | |
|
196 | } | |
|
197 | } | |
|
198 | ||
|
199 | FORCE_INLINE_TEMPLATE | |
|
200 | size_t ZSTD_compressBlock_fast_dictMatchState_generic( | |
|
201 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |
|
202 | void const* src, size_t srcSize, U32 const mls) | |
|
49 | 203 | { |
|
50 | 204 | const ZSTD_compressionParameters* const cParams = &ms->cParams; |
|
51 | 205 | U32* const hashTable = ms->hashTable; |
@@ -64,46 +218,34 b' size_t ZSTD_compressBlock_fast_generic(' | |||
|
64 | 218 | U32 offsetSaved = 0; |
|
65 | 219 | |
|
66 | 220 | const ZSTD_matchState_t* const dms = ms->dictMatchState; |
|
67 | const ZSTD_compressionParameters* const dictCParams = | |
|
68 | dictMode == ZSTD_dictMatchState ? | |
|
69 | &dms->cParams : NULL; | |
|
70 | const U32* const dictHashTable = dictMode == ZSTD_dictMatchState ? | |
|
71 | dms->hashTable : NULL; | |
|
72 | const U32 dictStartIndex = dictMode == ZSTD_dictMatchState ? | |
|
73 | dms->window.dictLimit : 0; | |
|
74 | const BYTE* const dictBase = dictMode == ZSTD_dictMatchState ? | |
|
75 | dms->window.base : NULL; | |
|
76 | const BYTE* const dictStart = dictMode == ZSTD_dictMatchState ? | |
|
77 | dictBase + dictStartIndex : NULL; | |
|
78 | const BYTE* const dictEnd = dictMode == ZSTD_dictMatchState ? | |
|
79 | dms->window.nextSrc : NULL; | |
|
80 | const U32 dictIndexDelta = dictMode == ZSTD_dictMatchState ? | |
|
81 | prefixStartIndex - (U32)(dictEnd - dictBase) : | |
|
82 | 0; | |
|
221 | const ZSTD_compressionParameters* const dictCParams = &dms->cParams ; | |
|
222 | const U32* const dictHashTable = dms->hashTable; | |
|
223 | const U32 dictStartIndex = dms->window.dictLimit; | |
|
224 | const BYTE* const dictBase = dms->window.base; | |
|
225 | const BYTE* const dictStart = dictBase + dictStartIndex; | |
|
226 | const BYTE* const dictEnd = dms->window.nextSrc; | |
|
227 | const U32 dictIndexDelta = prefixStartIndex - (U32)(dictEnd - dictBase); | |
|
83 | 228 | const U32 dictAndPrefixLength = (U32)(ip - prefixStart + dictEnd - dictStart); |
|
84 |
const U32 dictHLog = dict |
|
|
85 | dictCParams->hashLog : hlog; | |
|
86 | ||
|
87 | assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState); | |
|
229 | const U32 dictHLog = dictCParams->hashLog; | |
|
88 | 230 | |
|
89 | /* otherwise, we would get index underflow when translating a dict index | |
|
90 | * into a local index */ | |
|
91 | assert(dictMode != ZSTD_dictMatchState | |
|
92 | || prefixStartIndex >= (U32)(dictEnd - dictBase)); | |
|
231 | /* if a dictionary is still attached, it necessarily means that | |
|
232 | * it is within window size. So we just check it. */ | |
|
233 | const U32 maxDistance = 1U << cParams->windowLog; | |
|
234 | const U32 endIndex = (U32)((size_t)(ip - base) + srcSize); | |
|
235 | assert(endIndex - prefixStartIndex <= maxDistance); | |
|
236 | (void)maxDistance; (void)endIndex; /* these variables are not used when assert() is disabled */ | |
|
237 | ||
|
238 | /* ensure there will be no no underflow | |
|
239 | * when translating a dict index into a local index */ | |
|
240 | assert(prefixStartIndex >= (U32)(dictEnd - dictBase)); | |
|
93 | 241 | |
|
94 | 242 | /* init */ |
|
243 | DEBUGLOG(5, "ZSTD_compressBlock_fast_dictMatchState_generic"); | |
|
95 | 244 | ip += (dictAndPrefixLength == 0); |
|
96 | if (dictMode == ZSTD_noDict) { | |
|
97 | U32 const maxRep = (U32)(ip - prefixStart); | |
|
98 | if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0; | |
|
99 | if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0; | |
|
100 | } | |
|
101 | if (dictMode == ZSTD_dictMatchState) { | |
|
102 | 245 |
|
|
103 | 246 |
|
|
104 | 247 |
|
|
105 | 248 |
|
|
106 | } | |
|
107 | 249 | |
|
108 | 250 | /* Main Search Loop */ |
|
109 | 251 | while (ip < ilimit) { /* < instead of <=, because repcode check at (ip+1) */ |
@@ -113,26 +255,18 b' size_t ZSTD_compressBlock_fast_generic(' | |||
|
113 | 255 | U32 const matchIndex = hashTable[h]; |
|
114 | 256 | const BYTE* match = base + matchIndex; |
|
115 | 257 | const U32 repIndex = current + 1 - offset_1; |
|
116 |
const BYTE* repMatch = ( |
|
|
117 | && repIndex < prefixStartIndex) ? | |
|
258 | const BYTE* repMatch = (repIndex < prefixStartIndex) ? | |
|
118 | 259 | dictBase + (repIndex - dictIndexDelta) : |
|
119 | 260 | base + repIndex; |
|
120 | 261 | hashTable[h] = current; /* update hash table */ |
|
121 | 262 | |
|
122 | if ( (dictMode == ZSTD_dictMatchState) | |
|
123 | && ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ | |
|
263 | if ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex isn't overlapping dict + prefix */ | |
|
124 | 264 | && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { |
|
125 | 265 | const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; |
|
126 | 266 | mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; |
|
127 | 267 | ip++; |
|
128 | ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); | |
|
129 | } else if ( dictMode == ZSTD_noDict | |
|
130 | && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) { | |
|
131 | mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4; | |
|
132 | ip++; | |
|
133 | ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); | |
|
268 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); | |
|
134 | 269 | } else if ( (matchIndex <= prefixStartIndex) ) { |
|
135 | if (dictMode == ZSTD_dictMatchState) { | |
|
136 | 270 |
|
|
137 | 271 |
|
|
138 | 272 |
|
@@ -151,12 +285,7 b' size_t ZSTD_compressBlock_fast_generic(' | |||
|
151 | 285 |
|
|
152 | 286 |
|
|
153 | 287 |
|
|
154 |
|
|
|
155 | } | |
|
156 | } else { | |
|
157 | assert(stepSize >= 1); | |
|
158 | ip += ((ip-anchor) >> kSearchStrength) + stepSize; | |
|
159 | continue; | |
|
288 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
|
160 | 289 | } |
|
161 | 290 | } else if (MEM_read32(match) != MEM_read32(ip)) { |
|
162 | 291 | /* it's not a match, and we're not going to check the dictionary */ |
@@ -171,7 +300,7 b' size_t ZSTD_compressBlock_fast_generic(' | |||
|
171 | 300 | && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ |
|
172 | 301 | offset_2 = offset_1; |
|
173 | 302 | offset_1 = offset; |
|
174 | ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
|
303 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
|
175 | 304 | } |
|
176 | 305 | |
|
177 | 306 | /* match found */ |
@@ -185,7 +314,6 b' size_t ZSTD_compressBlock_fast_generic(' | |||
|
185 | 314 | hashTable[ZSTD_hashPtr(ip-2, hlog, mls)] = (U32)(ip-2-base); |
|
186 | 315 | |
|
187 | 316 | /* check immediate repcode */ |
|
188 | if (dictMode == ZSTD_dictMatchState) { | |
|
189 | 317 |
|
|
190 | 318 |
|
|
191 | 319 |
|
@@ -206,49 +334,14 b' size_t ZSTD_compressBlock_fast_generic(' | |||
|
206 | 334 |
|
|
207 | 335 |
|
|
208 | 336 |
|
|
209 | ||
|
210 | if (dictMode == ZSTD_noDict) { | |
|
211 | while ( (ip <= ilimit) | |
|
212 | && ( (offset_2>0) | |
|
213 | & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) { | |
|
214 | /* store sequence */ | |
|
215 | size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4; | |
|
216 | U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; /* swap offset_2 <=> offset_1 */ | |
|
217 | hashTable[ZSTD_hashPtr(ip, hlog, mls)] = (U32)(ip-base); | |
|
218 | ZSTD_storeSeq(seqStore, 0, anchor, 0, rLength-MINMATCH); | |
|
219 | ip += rLength; | |
|
220 | anchor = ip; | |
|
221 | continue; /* faster when present ... (?) */ | |
|
222 | } } } } | |
|
337 | } | |
|
223 | 338 | |
|
224 | 339 | /* save reps for next block */ |
|
225 | 340 | rep[0] = offset_1 ? offset_1 : offsetSaved; |
|
226 | 341 | rep[1] = offset_2 ? offset_2 : offsetSaved; |
|
227 | 342 | |
|
228 | 343 | /* Return the last literals size */ |
|
229 | return iend - anchor; | |
|
230 | } | |
|
231 | ||
|
232 | ||
|
233 | size_t ZSTD_compressBlock_fast( | |
|
234 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |
|
235 | void const* src, size_t srcSize) | |
|
236 | { | |
|
237 | ZSTD_compressionParameters const* cParams = &ms->cParams; | |
|
238 | U32 const mls = cParams->minMatch; | |
|
239 | assert(ms->dictMatchState == NULL); | |
|
240 | switch(mls) | |
|
241 | { | |
|
242 | default: /* includes case 3 */ | |
|
243 | case 4 : | |
|
244 | return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict); | |
|
245 | case 5 : | |
|
246 | return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict); | |
|
247 | case 6 : | |
|
248 | return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict); | |
|
249 | case 7 : | |
|
250 | return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict); | |
|
251 | } | |
|
344 | return (size_t)(iend - anchor); | |
|
252 | 345 | } |
|
253 | 346 | |
|
254 | 347 | size_t ZSTD_compressBlock_fast_dictMatchState( |
@@ -262,13 +355,13 b' size_t ZSTD_compressBlock_fast_dictMatch' | |||
|
262 | 355 | { |
|
263 | 356 | default: /* includes case 3 */ |
|
264 | 357 | case 4 : |
|
265 |
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4 |
|
|
358 | return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4); | |
|
266 | 359 | case 5 : |
|
267 |
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5 |
|
|
360 | return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5); | |
|
268 | 361 | case 6 : |
|
269 |
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6 |
|
|
362 | return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6); | |
|
270 | 363 | case 7 : |
|
271 |
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7 |
|
|
364 | return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7); | |
|
272 | 365 | } |
|
273 | 366 | } |
|
274 | 367 | |
@@ -287,15 +380,24 b' static size_t ZSTD_compressBlock_fast_ex' | |||
|
287 | 380 | const BYTE* const istart = (const BYTE*)src; |
|
288 | 381 | const BYTE* ip = istart; |
|
289 | 382 | const BYTE* anchor = istart; |
|
290 | const U32 dictStartIndex = ms->window.lowLimit; | |
|
383 | const U32 endIndex = (U32)((size_t)(istart - base) + srcSize); | |
|
384 | const U32 lowLimit = ZSTD_getLowestMatchIndex(ms, endIndex, cParams->windowLog); | |
|
385 | const U32 dictStartIndex = lowLimit; | |
|
291 | 386 | const BYTE* const dictStart = dictBase + dictStartIndex; |
|
292 |
const U32 |
|
|
387 | const U32 dictLimit = ms->window.dictLimit; | |
|
388 | const U32 prefixStartIndex = dictLimit < lowLimit ? lowLimit : dictLimit; | |
|
293 | 389 | const BYTE* const prefixStart = base + prefixStartIndex; |
|
294 | 390 | const BYTE* const dictEnd = dictBase + prefixStartIndex; |
|
295 | 391 | const BYTE* const iend = istart + srcSize; |
|
296 | 392 | const BYTE* const ilimit = iend - 8; |
|
297 | 393 | U32 offset_1=rep[0], offset_2=rep[1]; |
|
298 | 394 | |
|
395 | DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic"); | |
|
396 | ||
|
397 | /* switch to "regular" variant if extDict is invalidated due to maxDistance */ | |
|
398 | if (prefixStartIndex == dictStartIndex) | |
|
399 | return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); | |
|
400 | ||
|
299 | 401 | /* Search Loop */ |
|
300 | 402 | while (ip < ilimit) { /* < instead of <=, because (ip+1) */ |
|
301 | 403 | const size_t h = ZSTD_hashPtr(ip, hlog, mls); |
@@ -312,10 +414,10 b' static size_t ZSTD_compressBlock_fast_ex' | |||
|
312 | 414 | |
|
313 | 415 | if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) |
|
314 | 416 | && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { |
|
315 | const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; | |
|
417 | const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; | |
|
316 | 418 | mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; |
|
317 | 419 | ip++; |
|
318 | ZSTD_storeSeq(seqStore, ip-anchor, anchor, 0, mLength-MINMATCH); | |
|
420 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, 0, mLength-MINMATCH); | |
|
319 | 421 | } else { |
|
320 | 422 | if ( (matchIndex < dictStartIndex) || |
|
321 | 423 | (MEM_read32(match) != MEM_read32(ip)) ) { |
@@ -323,15 +425,15 b' static size_t ZSTD_compressBlock_fast_ex' | |||
|
323 | 425 | ip += ((ip-anchor) >> kSearchStrength) + stepSize; |
|
324 | 426 | continue; |
|
325 | 427 | } |
|
326 | { const BYTE* matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; | |
|
327 | const BYTE* lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; | |
|
428 | { const BYTE* const matchEnd = matchIndex < prefixStartIndex ? dictEnd : iend; | |
|
429 | const BYTE* const lowMatchPtr = matchIndex < prefixStartIndex ? dictStart : prefixStart; | |
|
328 | 430 | U32 offset; |
|
329 | 431 | mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4; |
|
330 | 432 | while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */ |
|
331 | 433 | offset = current - matchIndex; |
|
332 | 434 | offset_2 = offset_1; |
|
333 | 435 | offset_1 = offset; |
|
334 | ZSTD_storeSeq(seqStore, ip-anchor, anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
|
436 | ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, offset + ZSTD_REP_MOVE, mLength-MINMATCH); | |
|
335 | 437 | } } |
|
336 | 438 | |
|
337 | 439 | /* found a match : store it */ |
@@ -351,7 +453,7 b' static size_t ZSTD_compressBlock_fast_ex' | |||
|
351 | 453 | && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { |
|
352 | 454 | const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; |
|
353 | 455 | size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; |
|
354 | U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ | |
|
456 | U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; /* swap offset_2 <=> offset_1 */ | |
|
355 | 457 | ZSTD_storeSeq(seqStore, 0, anchor, 0, repLength2-MINMATCH); |
|
356 | 458 | hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2; |
|
357 | 459 | ip += repLength2; |
@@ -366,7 +468,7 b' static size_t ZSTD_compressBlock_fast_ex' | |||
|
366 | 468 | rep[1] = offset_2; |
|
367 | 469 | |
|
368 | 470 | /* Return the last literals size */ |
|
369 | return iend - anchor; | |
|
471 | return (size_t)(iend - anchor); | |
|
370 | 472 | } |
|
371 | 473 | |
|
372 | 474 |
@@ -83,7 +83,10 b' ZSTD_insertDUBT1(ZSTD_matchState_t* ms,' | |||
|
83 | 83 | U32* largerPtr = smallerPtr + 1; |
|
84 | 84 | U32 matchIndex = *smallerPtr; /* this candidate is unsorted : next sorted candidate is reached through *smallerPtr, while *largerPtr contains previous unsorted candidate (which is already saved and can be overwritten) */ |
|
85 | 85 | U32 dummy32; /* to be nullified at the end */ |
|
86 |
U32 const window |
|
|
86 | U32 const windowValid = ms->window.lowLimit; | |
|
87 | U32 const maxDistance = 1U << cParams->windowLog; | |
|
88 | U32 const windowLow = (current - windowValid > maxDistance) ? current - maxDistance : windowValid; | |
|
89 | ||
|
87 | 90 | |
|
88 | 91 | DEBUGLOG(8, "ZSTD_insertDUBT1(%u) (dictLimit=%u, lowLimit=%u)", |
|
89 | 92 | current, dictLimit, windowLow); |
@@ -239,7 +242,7 b' ZSTD_DUBT_findBestMatch(ZSTD_matchState_' | |||
|
239 | 242 | |
|
240 | 243 | const BYTE* const base = ms->window.base; |
|
241 | 244 | U32 const current = (U32)(ip-base); |
|
242 |
U32 const windowLow = ms->window |
|
|
245 | U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog); | |
|
243 | 246 | |
|
244 | 247 | U32* const bt = ms->chainTable; |
|
245 | 248 | U32 const btLog = cParams->chainLog - 1; |
@@ -490,8 +493,12 b' size_t ZSTD_HcFindBestMatch_generic (' | |||
|
490 | 493 | const U32 dictLimit = ms->window.dictLimit; |
|
491 | 494 | const BYTE* const prefixStart = base + dictLimit; |
|
492 | 495 | const BYTE* const dictEnd = dictBase + dictLimit; |
|
493 | const U32 lowLimit = ms->window.lowLimit; | |
|
494 | 496 | const U32 current = (U32)(ip-base); |
|
497 | const U32 maxDistance = 1U << cParams->windowLog; | |
|
498 | const U32 lowestValid = ms->window.lowLimit; | |
|
499 | const U32 withinMaxDistance = (current - lowestValid > maxDistance) ? current - maxDistance : lowestValid; | |
|
500 | const U32 isDictionary = (ms->loadedDictEnd != 0); | |
|
501 | const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance; | |
|
495 | 502 | const U32 minChain = current > chainSize ? current - chainSize : 0; |
|
496 | 503 | U32 nbAttempts = 1U << cParams->searchLog; |
|
497 | 504 | size_t ml=4-1; |
@@ -612,12 +619,14 b' FORCE_INLINE_TEMPLATE size_t ZSTD_HcFind' | |||
|
612 | 619 | /* ******************************* |
|
613 | 620 | * Common parser - lazy strategy |
|
614 | 621 | *********************************/ |
|
615 | FORCE_INLINE_TEMPLATE | |
|
616 | size_t ZSTD_compressBlock_lazy_generic( | |
|
622 | typedef enum { search_hashChain, search_binaryTree } searchMethod_e; | |
|
623 | ||
|
624 | FORCE_INLINE_TEMPLATE size_t | |
|
625 | ZSTD_compressBlock_lazy_generic( | |
|
617 | 626 | ZSTD_matchState_t* ms, seqStore_t* seqStore, |
|
618 | 627 | U32 rep[ZSTD_REP_NUM], |
|
619 | 628 | const void* src, size_t srcSize, |
|
620 |
const |
|
|
629 | const searchMethod_e searchMethod, const U32 depth, | |
|
621 | 630 | ZSTD_dictMode_e const dictMode) |
|
622 | 631 | { |
|
623 | 632 | const BYTE* const istart = (const BYTE*)src; |
@@ -633,8 +642,10 b' size_t ZSTD_compressBlock_lazy_generic(' | |||
|
633 | 642 | ZSTD_matchState_t* ms, |
|
634 | 643 | const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); |
|
635 | 644 | searchMax_f const searchMax = dictMode == ZSTD_dictMatchState ? |
|
636 |
(searchMethod ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS |
|
|
637 | (searchMethod ? ZSTD_BtFindBestMatch_selectMLS : ZSTD_HcFindBestMatch_selectMLS); | |
|
645 | (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_dictMatchState_selectMLS | |
|
646 | : ZSTD_HcFindBestMatch_dictMatchState_selectMLS) : | |
|
647 | (searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_selectMLS | |
|
648 | : ZSTD_HcFindBestMatch_selectMLS); | |
|
638 | 649 | U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0; |
|
639 | 650 | |
|
640 | 651 | const ZSTD_matchState_t* const dms = ms->dictMatchState; |
@@ -653,7 +664,6 b' size_t ZSTD_compressBlock_lazy_generic(' | |||
|
653 | 664 | |
|
654 | 665 | /* init */ |
|
655 | 666 | ip += (dictAndPrefixLength == 0); |
|
656 | ms->nextToUpdate3 = ms->nextToUpdate; | |
|
657 | 667 | if (dictMode == ZSTD_noDict) { |
|
658 | 668 | U32 const maxRep = (U32)(ip - prefixLowest); |
|
659 | 669 | if (offset_2 > maxRep) savedOffset = offset_2, offset_2 = 0; |
@@ -844,7 +854,7 b' size_t ZSTD_compressBlock_lazy_generic(' | |||
|
844 | 854 | rep[1] = offset_2 ? offset_2 : savedOffset; |
|
845 | 855 | |
|
846 | 856 | /* Return the last literals size */ |
|
847 | return iend - anchor; | |
|
857 | return (size_t)(iend - anchor); | |
|
848 | 858 | } |
|
849 | 859 | |
|
850 | 860 | |
@@ -852,56 +862,56 b' size_t ZSTD_compressBlock_btlazy2(' | |||
|
852 | 862 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
853 | 863 | void const* src, size_t srcSize) |
|
854 | 864 | { |
|
855 |
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, |
|
|
865 | return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_noDict); | |
|
856 | 866 | } |
|
857 | 867 | |
|
858 | 868 | size_t ZSTD_compressBlock_lazy2( |
|
859 | 869 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
860 | 870 | void const* src, size_t srcSize) |
|
861 | 871 | { |
|
862 |
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, |
|
|
872 | return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_noDict); | |
|
863 | 873 | } |
|
864 | 874 | |
|
865 | 875 | size_t ZSTD_compressBlock_lazy( |
|
866 | 876 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
867 | 877 | void const* src, size_t srcSize) |
|
868 | 878 | { |
|
869 |
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, |
|
|
879 | return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_noDict); | |
|
870 | 880 | } |
|
871 | 881 | |
|
872 | 882 | size_t ZSTD_compressBlock_greedy( |
|
873 | 883 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
874 | 884 | void const* src, size_t srcSize) |
|
875 | 885 | { |
|
876 |
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, |
|
|
886 | return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_noDict); | |
|
877 | 887 | } |
|
878 | 888 | |
|
879 | 889 | size_t ZSTD_compressBlock_btlazy2_dictMatchState( |
|
880 | 890 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
881 | 891 | void const* src, size_t srcSize) |
|
882 | 892 | { |
|
883 |
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, |
|
|
893 | return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2, ZSTD_dictMatchState); | |
|
884 | 894 | } |
|
885 | 895 | |
|
886 | 896 | size_t ZSTD_compressBlock_lazy2_dictMatchState( |
|
887 | 897 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
888 | 898 | void const* src, size_t srcSize) |
|
889 | 899 | { |
|
890 |
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, |
|
|
900 | return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2, ZSTD_dictMatchState); | |
|
891 | 901 | } |
|
892 | 902 | |
|
893 | 903 | size_t ZSTD_compressBlock_lazy_dictMatchState( |
|
894 | 904 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
895 | 905 | void const* src, size_t srcSize) |
|
896 | 906 | { |
|
897 |
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, |
|
|
907 | return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1, ZSTD_dictMatchState); | |
|
898 | 908 | } |
|
899 | 909 | |
|
900 | 910 | size_t ZSTD_compressBlock_greedy_dictMatchState( |
|
901 | 911 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
902 | 912 | void const* src, size_t srcSize) |
|
903 | 913 | { |
|
904 |
return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, |
|
|
914 | return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dictMatchState); | |
|
905 | 915 | } |
|
906 | 916 | |
|
907 | 917 | |
@@ -910,7 +920,7 b' size_t ZSTD_compressBlock_lazy_extDict_g' | |||
|
910 | 920 | ZSTD_matchState_t* ms, seqStore_t* seqStore, |
|
911 | 921 | U32 rep[ZSTD_REP_NUM], |
|
912 | 922 | const void* src, size_t srcSize, |
|
913 |
const |
|
|
923 | const searchMethod_e searchMethod, const U32 depth) | |
|
914 | 924 | { |
|
915 | 925 | const BYTE* const istart = (const BYTE*)src; |
|
916 | 926 | const BYTE* ip = istart; |
@@ -928,12 +938,11 b' size_t ZSTD_compressBlock_lazy_extDict_g' | |||
|
928 | 938 | typedef size_t (*searchMax_f)( |
|
929 | 939 | ZSTD_matchState_t* ms, |
|
930 | 940 | const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr); |
|
931 | searchMax_f searchMax = searchMethod ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; | |
|
941 | searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS; | |
|
932 | 942 | |
|
933 | 943 | U32 offset_1 = rep[0], offset_2 = rep[1]; |
|
934 | 944 | |
|
935 | 945 | /* init */ |
|
936 | ms->nextToUpdate3 = ms->nextToUpdate; | |
|
937 | 946 | ip += (ip == prefixStart); |
|
938 | 947 | |
|
939 | 948 | /* Match Loop */ |
@@ -1070,7 +1079,7 b' size_t ZSTD_compressBlock_lazy_extDict_g' | |||
|
1070 | 1079 | rep[1] = offset_2; |
|
1071 | 1080 | |
|
1072 | 1081 | /* Return the last literals size */ |
|
1073 | return iend - anchor; | |
|
1082 | return (size_t)(iend - anchor); | |
|
1074 | 1083 | } |
|
1075 | 1084 | |
|
1076 | 1085 | |
@@ -1078,7 +1087,7 b' size_t ZSTD_compressBlock_greedy_extDict' | |||
|
1078 | 1087 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
|
1079 | 1088 | void const* src, size_t srcSize) |
|
1080 | 1089 | { |
|
1081 |
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, |
|
|
1090 | return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0); | |
|
1082 | 1091 | } |
|
1083 | 1092 | |
|
1084 | 1093 | size_t ZSTD_compressBlock_lazy_extDict( |
@@ -1086,7 +1095,7 b' size_t ZSTD_compressBlock_lazy_extDict(' | |||
|
1086 | 1095 | void const* src, size_t srcSize) |
|
1087 | 1096 | |
|
1088 | 1097 | { |
|
1089 |
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, |
|
|
1098 | return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 1); | |
|
1090 | 1099 | } |
|
1091 | 1100 | |
|
1092 | 1101 | size_t ZSTD_compressBlock_lazy2_extDict( |
@@ -1094,7 +1103,7 b' size_t ZSTD_compressBlock_lazy2_extDict(' | |||
|
1094 | 1103 | void const* src, size_t srcSize) |
|
1095 | 1104 | |
|
1096 | 1105 | { |
|
1097 |
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, |
|
|
1106 | return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 2); | |
|
1098 | 1107 | } |
|
1099 | 1108 | |
|
1100 | 1109 | size_t ZSTD_compressBlock_btlazy2_extDict( |
@@ -1102,5 +1111,5 b' size_t ZSTD_compressBlock_btlazy2_extDic' | |||
|
1102 | 1111 | void const* src, size_t srcSize) |
|
1103 | 1112 | |
|
1104 | 1113 | { |
|
1105 |
return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, |
|
|
1114 | return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2); | |
|
1106 | 1115 | } |
@@ -19,7 +19,7 b' extern "C" {' | |||
|
19 | 19 | |
|
20 | 20 | U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip); |
|
21 | 21 | |
|
22 |
void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). pre |
|
|
22 | void ZSTD_preserveUnsortedMark (U32* const table, U32 const size, U32 const reducerValue); /*! used in ZSTD_reduceIndex(). preemptively increase value of ZSTD_DUBT_UNSORTED_MARK */ | |
|
23 | 23 | |
|
24 | 24 | size_t ZSTD_compressBlock_btlazy2( |
|
25 | 25 | ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
@@ -429,7 +429,7 b' size_t ZSTD_ldm_generateSequences(' | |||
|
429 | 429 | */ |
|
430 | 430 | assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); |
|
431 | 431 | /* The input could be very large (in zstdmt), so it must be broken up into |
|
432 |
* chunks to enforce the maxim |
|
|
432 | * chunks to enforce the maximum distance and handle overflow correction. | |
|
433 | 433 | */ |
|
434 | 434 | assert(sequences->pos <= sequences->size); |
|
435 | 435 | assert(sequences->size <= sequences->capacity); |
@@ -447,7 +447,7 b' size_t ZSTD_ldm_generateSequences(' | |||
|
447 | 447 | if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) { |
|
448 | 448 | U32 const ldmHSize = 1U << params->hashLog; |
|
449 | 449 | U32 const correction = ZSTD_window_correctOverflow( |
|
450 |
&ldmState->window, /* cycleLog */ 0, maxDist, |
|
|
450 | &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart); | |
|
451 | 451 | ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); |
|
452 | 452 | } |
|
453 | 453 | /* 2. We enforce the maximum offset allowed. |
@@ -64,8 +64,14 b' MEM_STATIC double ZSTD_fCost(U32 price)' | |||
|
64 | 64 | } |
|
65 | 65 | #endif |
|
66 | 66 | |
|
67 | static int ZSTD_compressedLiterals(optState_t const* const optPtr) | |
|
68 | { | |
|
69 | return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed; | |
|
70 | } | |
|
71 | ||
|
67 | 72 | static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) |
|
68 | 73 | { |
|
74 | if (ZSTD_compressedLiterals(optPtr)) | |
|
69 | 75 | optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); |
|
70 | 76 | optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); |
|
71 | 77 | optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); |
@@ -99,6 +105,7 b' ZSTD_rescaleFreqs(optState_t* const optP' | |||
|
99 | 105 | const BYTE* const src, size_t const srcSize, |
|
100 | 106 | int const optLevel) |
|
101 | 107 | { |
|
108 | int const compressedLiterals = ZSTD_compressedLiterals(optPtr); | |
|
102 | 109 | DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); |
|
103 | 110 | optPtr->priceType = zop_dynamic; |
|
104 | 111 | |
@@ -113,9 +120,10 b' ZSTD_rescaleFreqs(optState_t* const optP' | |||
|
113 | 120 | /* huffman table presumed generated by dictionary */ |
|
114 | 121 | optPtr->priceType = zop_dynamic; |
|
115 | 122 | |
|
123 | if (compressedLiterals) { | |
|
124 | unsigned lit; | |
|
116 | 125 | assert(optPtr->litFreq != NULL); |
|
117 | 126 | optPtr->litSum = 0; |
|
118 | { unsigned lit; | |
|
119 | 127 | for (lit=0; lit<=MaxLit; lit++) { |
|
120 | 128 | U32 const scaleLog = 11; /* scale to 2K */ |
|
121 | 129 | U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit); |
@@ -163,10 +171,11 b' ZSTD_rescaleFreqs(optState_t* const optP' | |||
|
163 | 171 | } else { /* not a dictionary */ |
|
164 | 172 | |
|
165 | 173 | assert(optPtr->litFreq != NULL); |
|
166 | { unsigned lit = MaxLit; | |
|
174 | if (compressedLiterals) { | |
|
175 | unsigned lit = MaxLit; | |
|
167 | 176 | HIST_count_simple(optPtr->litFreq, &lit, src, srcSize); /* use raw first block to init statistics */ |
|
177 | optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); | |
|
168 | 178 | } |
|
169 | optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); | |
|
170 | 179 | |
|
171 | 180 | { unsigned ll; |
|
172 | 181 | for (ll=0; ll<=MaxLL; ll++) |
@@ -190,6 +199,7 b' ZSTD_rescaleFreqs(optState_t* const optP' | |||
|
190 | 199 | |
|
191 | 200 | } else { /* new block : re-use previous statistics, scaled down */ |
|
192 | 201 | |
|
202 | if (compressedLiterals) | |
|
193 | 203 | optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1); |
|
194 | 204 | optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0); |
|
195 | 205 | optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0); |
@@ -207,6 +217,10 b' static U32 ZSTD_rawLiteralsCost(const BY' | |||
|
207 | 217 | int optLevel) |
|
208 | 218 | { |
|
209 | 219 | if (litLength == 0) return 0; |
|
220 | ||
|
221 | if (!ZSTD_compressedLiterals(optPtr)) | |
|
222 | return (litLength << 3) * BITCOST_MULTIPLIER; /* Uncompressed - 8 bytes per literal. */ | |
|
223 | ||
|
210 | 224 | if (optPtr->priceType == zop_predef) |
|
211 | 225 | return (litLength*6) * BITCOST_MULTIPLIER; /* 6 bit per literal - no statistic used */ |
|
212 | 226 | |
@@ -241,13 +255,13 b' static U32 ZSTD_litLengthPrice(U32 const' | |||
|
241 | 255 | * to provide a cost which is directly comparable to a match ending at same position */ |
|
242 | 256 | static int ZSTD_litLengthContribution(U32 const litLength, const optState_t* const optPtr, int optLevel) |
|
243 | 257 | { |
|
244 | if (optPtr->priceType >= zop_predef) return WEIGHT(litLength, optLevel); | |
|
258 | if (optPtr->priceType >= zop_predef) return (int)WEIGHT(litLength, optLevel); | |
|
245 | 259 | |
|
246 | 260 | /* dynamic statistics */ |
|
247 | 261 | { U32 const llCode = ZSTD_LLcode(litLength); |
|
248 | int const contribution = (LL_bits[llCode] * BITCOST_MULTIPLIER) | |
|
249 | + WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */ | |
|
250 | - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); | |
|
262 | int const contribution = (int)(LL_bits[llCode] * BITCOST_MULTIPLIER) | |
|
263 | + (int)WEIGHT(optPtr->litLengthFreq[0], optLevel) /* note: log2litLengthSum cancel out */ | |
|
264 | - (int)WEIGHT(optPtr->litLengthFreq[llCode], optLevel); | |
|
251 | 265 | #if 1 |
|
252 | 266 | return contribution; |
|
253 | 267 | #else |
@@ -264,7 +278,7 b' static int ZSTD_literalsContribution(con' | |||
|
264 | 278 | const optState_t* const optPtr, |
|
265 | 279 | int optLevel) |
|
266 | 280 | { |
|
267 | int const contribution = ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel) | |
|
281 | int const contribution = (int)ZSTD_rawLiteralsCost(literals, litLength, optPtr, optLevel) | |
|
268 | 282 | + ZSTD_litLengthContribution(litLength, optPtr, optLevel); |
|
269 | 283 | return contribution; |
|
270 | 284 | } |
@@ -310,7 +324,8 b' static void ZSTD_updateStats(optState_t*' | |||
|
310 | 324 | U32 offsetCode, U32 matchLength) |
|
311 | 325 | { |
|
312 | 326 | /* literals */ |
|
313 | { U32 u; | |
|
327 | if (ZSTD_compressedLiterals(optPtr)) { | |
|
328 | U32 u; | |
|
314 | 329 | for (u=0; u < litLength; u++) |
|
315 | 330 | optPtr->litFreq[literals[u]] += ZSTD_LITFREQ_ADD; |
|
316 | 331 | optPtr->litSum += litLength*ZSTD_LITFREQ_ADD; |
@@ -357,13 +372,15 b' MEM_STATIC U32 ZSTD_readMINMATCH(const v' | |||
|
357 | 372 | |
|
358 | 373 | /* Update hashTable3 up to ip (excluded) |
|
359 | 374 | Assumption : always within prefix (i.e. not within extDict) */ |
|
360 |
static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, |
|
|
375 | static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms, | |
|
376 | U32* nextToUpdate3, | |
|
377 | const BYTE* const ip) | |
|
361 | 378 | { |
|
362 | 379 | U32* const hashTable3 = ms->hashTable3; |
|
363 | 380 | U32 const hashLog3 = ms->hashLog3; |
|
364 | 381 | const BYTE* const base = ms->window.base; |
|
365 |
U32 idx = |
|
|
366 |
U32 const target |
|
|
382 | U32 idx = *nextToUpdate3; | |
|
383 | U32 const target = (U32)(ip - base); | |
|
367 | 384 | size_t const hash3 = ZSTD_hash3Ptr(ip, hashLog3); |
|
368 | 385 | assert(hashLog3 > 0); |
|
369 | 386 | |
@@ -372,6 +389,7 b' static U32 ZSTD_insertAndFindFirstIndexH' | |||
|
372 | 389 | idx++; |
|
373 | 390 | } |
|
374 | 391 | |
|
392 | *nextToUpdate3 = target; | |
|
375 | 393 | return hashTable3[hash3]; |
|
376 | 394 | } |
|
377 | 395 | |
@@ -488,9 +506,11 b' static U32 ZSTD_insertBt1(' | |||
|
488 | 506 | } } |
|
489 | 507 | |
|
490 | 508 | *smallerPtr = *largerPtr = 0; |
|
491 | if (bestLength > 384) return MIN(192, (U32)(bestLength - 384)); /* speed optimization */ | |
|
509 | { U32 positions = 0; | |
|
510 | if (bestLength > 384) positions = MIN(192, (U32)(bestLength - 384)); /* speed optimization */ | |
|
492 | 511 | assert(matchEndIdx > current + 8); |
|
493 | return matchEndIdx - (current + 8); | |
|
512 | return MAX(positions, matchEndIdx - (current + 8)); | |
|
513 | } | |
|
494 | 514 | } |
|
495 | 515 | |
|
496 | 516 | FORCE_INLINE_TEMPLATE |
@@ -505,8 +525,13 b' void ZSTD_updateTree_internal(' | |||
|
505 | 525 | DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", |
|
506 | 526 | idx, target, dictMode); |
|
507 | 527 | |
|
508 | while(idx < target) | |
|
509 |
|
|
|
528 | while(idx < target) { | |
|
529 | U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict); | |
|
530 | assert(idx < (U32)(idx + forward)); | |
|
531 | idx += forward; | |
|
532 | } | |
|
533 | assert((size_t)(ip - base) <= (size_t)(U32)(-1)); | |
|
534 | assert((size_t)(iend - base) <= (size_t)(U32)(-1)); | |
|
510 | 535 | ms->nextToUpdate = target; |
|
511 | 536 | } |
|
512 | 537 | |
@@ -516,11 +541,12 b' void ZSTD_updateTree(ZSTD_matchState_t* ' | |||
|
516 | 541 | |
|
517 | 542 | FORCE_INLINE_TEMPLATE |
|
518 | 543 | U32 ZSTD_insertBtAndGetAllMatches ( |
|
544 | ZSTD_match_t* matches, /* store result (found matches) in this table (presumed large enough) */ | |
|
519 | 545 | ZSTD_matchState_t* ms, |
|
546 | U32* nextToUpdate3, | |
|
520 | 547 | const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, |
|
521 | U32 rep[ZSTD_REP_NUM], | |
|
548 | const U32 rep[ZSTD_REP_NUM], | |
|
522 | 549 | U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ |
|
523 | ZSTD_match_t* matches, | |
|
524 | 550 | const U32 lengthToBeat, |
|
525 | 551 | U32 const mls /* template */) |
|
526 | 552 | { |
@@ -541,8 +567,8 b' U32 ZSTD_insertBtAndGetAllMatches (' | |||
|
541 | 567 | U32 const dictLimit = ms->window.dictLimit; |
|
542 | 568 | const BYTE* const dictEnd = dictBase + dictLimit; |
|
543 | 569 | const BYTE* const prefixStart = base + dictLimit; |
|
544 | U32 const btLow = btMask >= current ? 0 : current - btMask; | |
|
545 | U32 const windowLow = ms->window.lowLimit; | |
|
570 | U32 const btLow = (btMask >= current) ? 0 : current - btMask; | |
|
571 | U32 const windowLow = ZSTD_getLowestMatchIndex(ms, current, cParams->windowLog); | |
|
546 | 572 | U32 const matchLow = windowLow ? windowLow : 1; |
|
547 | 573 | U32* smallerPtr = bt + 2*(current&btMask); |
|
548 | 574 | U32* largerPtr = bt + 2*(current&btMask) + 1; |
@@ -612,7 +638,7 b' U32 ZSTD_insertBtAndGetAllMatches (' | |||
|
612 | 638 | |
|
613 | 639 | /* HC3 match finder */ |
|
614 | 640 | if ((mls == 3) /*static*/ && (bestLength < mls)) { |
|
615 | U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, ip); | |
|
641 | U32 const matchIndex3 = ZSTD_insertAndFindFirstIndexHash3(ms, nextToUpdate3, ip); | |
|
616 | 642 | if ((matchIndex3 >= matchLow) |
|
617 | 643 | & (current - matchIndex3 < (1<<18)) /*heuristic : longer distance likely too expensive*/ ) { |
|
618 | 644 | size_t mlen; |
@@ -638,9 +664,7 b' U32 ZSTD_insertBtAndGetAllMatches (' | |||
|
638 | 664 | (ip+mlen == iLimit) ) { /* best possible length */ |
|
639 | 665 | ms->nextToUpdate = current+1; /* skip insertion */ |
|
640 | 666 | return 1; |
|
641 | } | |
|
642 | } | |
|
643 | } | |
|
667 | } } } | |
|
644 | 668 | /* no dictMatchState lookup: dicts don't have a populated HC3 table */ |
|
645 | 669 | } |
|
646 | 670 | |
@@ -648,19 +672,21 b' U32 ZSTD_insertBtAndGetAllMatches (' | |||
|
648 | 672 | |
|
649 | 673 | while (nbCompares-- && (matchIndex >= matchLow)) { |
|
650 | 674 | U32* const nextPtr = bt + 2*(matchIndex & btMask); |
|
675 | const BYTE* match; | |
|
651 | 676 | size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ |
|
652 | const BYTE* match; | |
|
653 | 677 | assert(current > matchIndex); |
|
654 | 678 | |
|
655 | 679 | if ((dictMode == ZSTD_noDict) || (dictMode == ZSTD_dictMatchState) || (matchIndex+matchLength >= dictLimit)) { |
|
656 | 680 | assert(matchIndex+matchLength >= dictLimit); /* ensure the condition is correct when !extDict */ |
|
657 | 681 | match = base + matchIndex; |
|
682 | if (matchIndex >= dictLimit) assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ | |
|
658 | 683 | matchLength += ZSTD_count(ip+matchLength, match+matchLength, iLimit); |
|
659 | 684 | } else { |
|
660 | 685 | match = dictBase + matchIndex; |
|
686 | assert(memcmp(match, ip, matchLength) == 0); /* ensure early section of match is equal as expected */ | |
|
661 | 687 | matchLength += ZSTD_count_2segments(ip+matchLength, match+matchLength, iLimit, dictEnd, prefixStart); |
|
662 | 688 | if (matchIndex+matchLength >= dictLimit) |
|
663 | match = base + matchIndex; /* prepare for match[matchLength] */ | |
|
689 | match = base + matchIndex; /* prepare for match[matchLength] read */ | |
|
664 | 690 | } |
|
665 | 691 | |
|
666 | 692 | if (matchLength > bestLength) { |
@@ -745,10 +771,13 b' U32 ZSTD_insertBtAndGetAllMatches (' | |||
|
745 | 771 | |
|
746 | 772 | |
|
747 | 773 | FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches ( |
|
774 | ZSTD_match_t* matches, /* store result (match found, increasing size) in this table */ | |
|
748 | 775 | ZSTD_matchState_t* ms, |
|
776 | U32* nextToUpdate3, | |
|
749 | 777 | const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode, |
|
750 |
U32 rep[ZSTD_REP_NUM], |
|
|
751 |
|
|
|
778 | const U32 rep[ZSTD_REP_NUM], | |
|
779 | U32 const ll0, | |
|
780 | U32 const lengthToBeat) | |
|
752 | 781 | { |
|
753 | 782 | const ZSTD_compressionParameters* const cParams = &ms->cParams; |
|
754 | 783 | U32 const matchLengthSearch = cParams->minMatch; |
@@ -757,12 +786,12 b' FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllM' | |||
|
757 | 786 | ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode); |
|
758 | 787 | switch(matchLengthSearch) |
|
759 | 788 | { |
|
760 |
case 3 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, |
|
|
789 | case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3); | |
|
761 | 790 | default : |
|
762 |
case 4 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, |
|
|
763 |
case 5 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, |
|
|
791 | case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4); | |
|
792 | case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5); | |
|
764 | 793 | case 7 : |
|
765 |
case 6 : return ZSTD_insertBtAndGetAllMatches(ms, ip, iHighLimit, dictMode, rep, ll0, |
|
|
794 | case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6); | |
|
766 | 795 | } |
|
767 | 796 | } |
|
768 | 797 | |
@@ -838,6 +867,7 b' ZSTD_compressBlock_opt_generic(ZSTD_matc' | |||
|
838 | 867 | |
|
839 | 868 | U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); |
|
840 | 869 | U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; |
|
870 | U32 nextToUpdate3 = ms->nextToUpdate; | |
|
841 | 871 | |
|
842 | 872 | ZSTD_optimal_t* const opt = optStatePtr->priceTable; |
|
843 | 873 | ZSTD_match_t* const matches = optStatePtr->matchTable; |
@@ -847,7 +877,6 b' ZSTD_compressBlock_opt_generic(ZSTD_matc' | |||
|
847 | 877 | DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", |
|
848 | 878 | (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); |
|
849 | 879 | assert(optLevel <= 2); |
|
850 | ms->nextToUpdate3 = ms->nextToUpdate; | |
|
851 | 880 | ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); |
|
852 | 881 | ip += (ip==prefixStart); |
|
853 | 882 | |
@@ -858,7 +887,7 b' ZSTD_compressBlock_opt_generic(ZSTD_matc' | |||
|
858 | 887 | /* find first match */ |
|
859 | 888 | { U32 const litlen = (U32)(ip - anchor); |
|
860 | 889 | U32 const ll0 = !litlen; |
|
861 |
U32 const nbMatches = ZSTD_BtGetAllMatches(ms, ip, iend, dictMode, rep, ll0, |
|
|
890 | U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch); | |
|
862 | 891 | if (!nbMatches) { ip++; continue; } |
|
863 | 892 | |
|
864 | 893 | /* initialize opt[0] */ |
@@ -870,7 +899,7 b' ZSTD_compressBlock_opt_generic(ZSTD_matc' | |||
|
870 | 899 | /* large match -> immediate encoding */ |
|
871 | 900 | { U32 const maxML = matches[nbMatches-1].len; |
|
872 | 901 | U32 const maxOffset = matches[nbMatches-1].off; |
|
873 | DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new serie", | |
|
902 | DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series", | |
|
874 | 903 | nbMatches, maxML, maxOffset, (U32)(ip-prefixStart)); |
|
875 | 904 | |
|
876 | 905 | if (maxML > sufficient_len) { |
@@ -955,7 +984,7 b' ZSTD_compressBlock_opt_generic(ZSTD_matc' | |||
|
955 | 984 | U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0; |
|
956 | 985 | U32 const previousPrice = opt[cur].price; |
|
957 | 986 | U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel); |
|
958 |
U32 const nbMatches = ZSTD_BtGetAllMatches(ms, inr, iend, dictMode, opt[cur].rep, ll0, |
|
|
987 | U32 const nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch); | |
|
959 | 988 | U32 matchNb; |
|
960 | 989 | if (!nbMatches) { |
|
961 | 990 | DEBUGLOG(7, "rPos:%u : no match found", cur); |
@@ -1079,7 +1108,7 b' ZSTD_compressBlock_opt_generic(ZSTD_matc' | |||
|
1079 | 1108 | } /* while (ip < ilimit) */ |
|
1080 | 1109 | |
|
1081 | 1110 | /* Return the last literals size */ |
|
1082 | return iend - anchor; | |
|
1111 | return (size_t)(iend - anchor); | |
|
1083 | 1112 | } |
|
1084 | 1113 | |
|
1085 | 1114 | |
@@ -1108,6 +1137,7 b' static U32 ZSTD_upscaleStat(unsigned* ta' | |||
|
1108 | 1137 | /* used in 2-pass strategy */ |
|
1109 | 1138 | MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr) |
|
1110 | 1139 | { |
|
1140 | if (ZSTD_compressedLiterals(optPtr)) | |
|
1111 | 1141 | optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); |
|
1112 | 1142 | optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0); |
|
1113 | 1143 | optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0); |
@@ -1117,7 +1147,7 b' MEM_STATIC void ZSTD_upscaleStats(optSta' | |||
|
1117 | 1147 | /* ZSTD_initStats_ultra(): |
|
1118 | 1148 | * make a first compression pass, just to seed stats with more accurate starting values. |
|
1119 | 1149 | * only works on first block, with no dictionary and no ldm. |
|
1120 |
* this function cannot error, hence its con |
|
|
1150 | * this function cannot error, hence its contract must be respected. | |
|
1121 | 1151 | */ |
|
1122 | 1152 | static void |
|
1123 | 1153 | ZSTD_initStats_ultra(ZSTD_matchState_t* ms, |
@@ -1142,7 +1172,6 b' ZSTD_initStats_ultra(ZSTD_matchState_t* ' | |||
|
1142 | 1172 | ms->window.dictLimit += (U32)srcSize; |
|
1143 | 1173 | ms->window.lowLimit = ms->window.dictLimit; |
|
1144 | 1174 | ms->nextToUpdate = ms->window.dictLimit; |
|
1145 | ms->nextToUpdate3 = ms->window.dictLimit; | |
|
1146 | 1175 | |
|
1147 | 1176 | /* re-inforce weight of collected statistics */ |
|
1148 | 1177 | ZSTD_upscaleStats(&ms->opt); |
@@ -22,6 +22,7 b'' | |||
|
22 | 22 | /* ====== Dependencies ====== */ |
|
23 | 23 | #include <string.h> /* memcpy, memset */ |
|
24 | 24 | #include <limits.h> /* INT_MAX, UINT_MAX */ |
|
25 | #include "mem.h" /* MEM_STATIC */ | |
|
25 | 26 | #include "pool.h" /* threadpool */ |
|
26 | 27 | #include "threading.h" /* mutex */ |
|
27 | 28 | #include "zstd_compress_internal.h" /* MIN, ERROR, ZSTD_*, ZSTD_highbit32 */ |
@@ -456,7 +457,7 b' typedef struct {' | |||
|
456 | 457 | * Must be acquired after the main mutex when acquiring both. |
|
457 | 458 | */ |
|
458 | 459 | ZSTD_pthread_mutex_t ldmWindowMutex; |
|
459 |
ZSTD_pthread_cond_t ldmWindowCond; /* Signaled when ldmWindow is u |
|
|
460 | ZSTD_pthread_cond_t ldmWindowCond; /* Signaled when ldmWindow is updated */ | |
|
460 | 461 | ZSTD_window_t ldmWindow; /* A thread-safe copy of ldmState.window */ |
|
461 | 462 | } serialState_t; |
|
462 | 463 | |
@@ -647,7 +648,7 b' static void ZSTDMT_compressionJob(void* ' | |||
|
647 | 648 | buffer_t dstBuff = job->dstBuff; |
|
648 | 649 | size_t lastCBlockSize = 0; |
|
649 | 650 | |
|
650 |
/* res |
|
|
651 | /* resources */ | |
|
651 | 652 | if (cctx==NULL) JOB_ERROR(ERROR(memory_allocation)); |
|
652 | 653 | if (dstBuff.start == NULL) { /* streaming job : doesn't provide a dstBuffer */ |
|
653 | 654 | dstBuff = ZSTDMT_getBuffer(job->bufPool); |
@@ -672,7 +673,7 b' static void ZSTDMT_compressionJob(void* ' | |||
|
672 | 673 | if (ZSTD_isError(initError)) JOB_ERROR(initError); |
|
673 | 674 | } else { /* srcStart points at reloaded section */ |
|
674 | 675 | U64 const pledgedSrcSize = job->firstJob ? job->fullFrameSize : job->src.size; |
|
675 | { size_t const forceWindowError = ZSTD_CCtxParam_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob); | |
|
676 | { size_t const forceWindowError = ZSTD_CCtxParams_setParameter(&jobParams, ZSTD_c_forceMaxWindow, !job->firstJob); | |
|
676 | 677 | if (ZSTD_isError(forceWindowError)) JOB_ERROR(forceWindowError); |
|
677 | 678 | } |
|
678 | 679 | { size_t const initError = ZSTD_compressBegin_advanced_internal(cctx, |
@@ -864,14 +865,10 b' static size_t ZSTDMT_expandJobsTable (ZS' | |||
|
864 | 865 | * Internal use only */ |
|
865 | 866 | size_t ZSTDMT_CCtxParam_setNbWorkers(ZSTD_CCtx_params* params, unsigned nbWorkers) |
|
866 | 867 | { |
|
867 | if (nbWorkers > ZSTDMT_NBWORKERS_MAX) nbWorkers = ZSTDMT_NBWORKERS_MAX; | |
|
868 | params->nbWorkers = nbWorkers; | |
|
869 | params->overlapLog = ZSTDMT_OVERLAPLOG_DEFAULT; | |
|
870 | params->jobSize = 0; | |
|
871 | return nbWorkers; | |
|
868 | return ZSTD_CCtxParams_setParameter(params, ZSTD_c_nbWorkers, (int)nbWorkers); | |
|
872 | 869 | } |
|
873 | 870 | |
|
874 | ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem) | |
|
871 | MEM_STATIC ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced_internal(unsigned nbWorkers, ZSTD_customMem cMem) | |
|
875 | 872 | { |
|
876 | 873 | ZSTDMT_CCtx* mtctx; |
|
877 | 874 | U32 nbJobs = nbWorkers + 2; |
@@ -906,6 +903,17 b' ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(' | |||
|
906 | 903 | return mtctx; |
|
907 | 904 | } |
|
908 | 905 | |
|
906 | ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, ZSTD_customMem cMem) | |
|
907 | { | |
|
908 | #ifdef ZSTD_MULTITHREAD | |
|
909 | return ZSTDMT_createCCtx_advanced_internal(nbWorkers, cMem); | |
|
910 | #else | |
|
911 | (void)nbWorkers; | |
|
912 | (void)cMem; | |
|
913 | return NULL; | |
|
914 | #endif | |
|
915 | } | |
|
916 | ||
|
909 | 917 | ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers) |
|
910 | 918 | { |
|
911 | 919 | return ZSTDMT_createCCtx_advanced(nbWorkers, ZSTD_defaultCMem); |
@@ -986,26 +994,13 b' ZSTDMT_CCtxParam_setMTCtxParameter(ZSTD_' | |||
|
986 | 994 | { |
|
987 | 995 | case ZSTDMT_p_jobSize : |
|
988 | 996 | DEBUGLOG(4, "ZSTDMT_CCtxParam_setMTCtxParameter : set jobSize to %i", value); |
|
989 | if ( value != 0 /* default */ | |
|
990 | && value < ZSTDMT_JOBSIZE_MIN) | |
|
991 | value = ZSTDMT_JOBSIZE_MIN; | |
|
992 | assert(value >= 0); | |
|
993 | if (value > ZSTDMT_JOBSIZE_MAX) value = ZSTDMT_JOBSIZE_MAX; | |
|
994 | params->jobSize = value; | |
|
995 | return value; | |
|
996 | ||
|
997 | return ZSTD_CCtxParams_setParameter(params, ZSTD_c_jobSize, value); | |
|
997 | 998 | case ZSTDMT_p_overlapLog : |
|
998 | 999 | DEBUGLOG(4, "ZSTDMT_p_overlapLog : %i", value); |
|
999 | if (value < ZSTD_OVERLAPLOG_MIN) value = ZSTD_OVERLAPLOG_MIN; | |
|
1000 | if (value > ZSTD_OVERLAPLOG_MAX) value = ZSTD_OVERLAPLOG_MAX; | |
|
1001 | params->overlapLog = value; | |
|
1002 | return value; | |
|
1003 | ||
|
1000 | return ZSTD_CCtxParams_setParameter(params, ZSTD_c_overlapLog, value); | |
|
1004 | 1001 | case ZSTDMT_p_rsyncable : |
|
1005 | value = (value != 0); | |
|
1006 | params->rsyncable = value; | |
|
1007 | return value; | |
|
1008 | ||
|
1002 | DEBUGLOG(4, "ZSTD_p_rsyncable : %i", value); | |
|
1003 | return ZSTD_CCtxParams_setParameter(params, ZSTD_c_rsyncable, value); | |
|
1009 | 1004 | default : |
|
1010 | 1005 | return ERROR(parameter_unsupported); |
|
1011 | 1006 | } |
@@ -1021,32 +1016,29 b' size_t ZSTDMT_getMTCtxParameter(ZSTDMT_C' | |||
|
1021 | 1016 | { |
|
1022 | 1017 | switch (parameter) { |
|
1023 | 1018 | case ZSTDMT_p_jobSize: |
|
1024 | assert(mtctx->params.jobSize <= INT_MAX); | |
|
1025 | *value = (int)(mtctx->params.jobSize); | |
|
1026 | break; | |
|
1019 | return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_jobSize, value); | |
|
1027 | 1020 | case ZSTDMT_p_overlapLog: |
|
1028 | *value = mtctx->params.overlapLog; | |
|
1029 | break; | |
|
1021 | return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_overlapLog, value); | |
|
1030 | 1022 | case ZSTDMT_p_rsyncable: |
|
1031 | *value = mtctx->params.rsyncable; | |
|
1032 | break; | |
|
1023 | return ZSTD_CCtxParams_getParameter(&mtctx->params, ZSTD_c_rsyncable, value); | |
|
1033 | 1024 | default: |
|
1034 | 1025 | return ERROR(parameter_unsupported); |
|
1035 | 1026 | } |
|
1036 | return 0; | |
|
1037 | 1027 | } |
|
1038 | 1028 | |
|
1039 | 1029 | /* Sets parameters relevant to the compression job, |
|
1040 | 1030 | * initializing others to default values. */ |
|
1041 | 1031 | static ZSTD_CCtx_params ZSTDMT_initJobCCtxParams(ZSTD_CCtx_params const params) |
|
1042 | 1032 | { |
|
1043 | ZSTD_CCtx_params jobParams; | |
|
1044 | memset(&jobParams, 0, sizeof(jobParams)); | |
|
1045 | ||
|
1046 | jobParams.cParams = params.cParams; | |
|
1047 | jobParams.fParams = params.fParams; | |
|
1048 | jobParams.compressionLevel = params.compressionLevel; | |
|
1049 | ||
|
1033 | ZSTD_CCtx_params jobParams = params; | |
|
1034 | /* Clear parameters related to multithreading */ | |
|
1035 | jobParams.forceWindow = 0; | |
|
1036 | jobParams.nbWorkers = 0; | |
|
1037 | jobParams.jobSize = 0; | |
|
1038 | jobParams.overlapLog = 0; | |
|
1039 | jobParams.rsyncable = 0; | |
|
1040 | memset(&jobParams.ldmParams, 0, sizeof(ldmParams_t)); | |
|
1041 | memset(&jobParams.customMem, 0, sizeof(ZSTD_customMem)); | |
|
1050 | 1042 | return jobParams; |
|
1051 | 1043 | } |
|
1052 | 1044 | |
@@ -1056,7 +1048,7 b' static ZSTD_CCtx_params ZSTDMT_initJobCC' | |||
|
1056 | 1048 | static size_t ZSTDMT_resize(ZSTDMT_CCtx* mtctx, unsigned nbWorkers) |
|
1057 | 1049 | { |
|
1058 | 1050 | if (POOL_resize(mtctx->factory, nbWorkers)) return ERROR(memory_allocation); |
|
1059 |
|
|
|
1051 | FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbWorkers) ); | |
|
1060 | 1052 | mtctx->bufPool = ZSTDMT_expandBufferPool(mtctx->bufPool, nbWorkers); |
|
1061 | 1053 | if (mtctx->bufPool == NULL) return ERROR(memory_allocation); |
|
1062 | 1054 | mtctx->cctxPool = ZSTDMT_expandCCtxPool(mtctx->cctxPool, nbWorkers); |
@@ -1137,9 +1129,14 b' size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mt' | |||
|
1137 | 1129 | size_t const produced = ZSTD_isError(cResult) ? 0 : cResult; |
|
1138 | 1130 | size_t const flushed = ZSTD_isError(cResult) ? 0 : jobPtr->dstFlushed; |
|
1139 | 1131 | assert(flushed <= produced); |
|
1132 | assert(jobPtr->consumed <= jobPtr->src.size); | |
|
1140 | 1133 | toFlush = produced - flushed; |
|
1141 | if (toFlush==0 && (jobPtr->consumed >= jobPtr->src.size)) { | |
|
1142 | /* doneJobID is not-fully-flushed, but toFlush==0 : doneJobID should be compressing some more data */ | |
|
1134 | /* if toFlush==0, nothing is available to flush. | |
|
1135 | * However, jobID is expected to still be active: | |
|
1136 | * if jobID was already completed and fully flushed, | |
|
1137 | * ZSTDMT_flushProduced() should have already moved onto next job. | |
|
1138 | * Therefore, some input has not yet been consumed. */ | |
|
1139 | if (toFlush==0) { | |
|
1143 | 1140 | assert(jobPtr->consumed < jobPtr->src.size); |
|
1144 | 1141 | } |
|
1145 | 1142 | } |
@@ -1156,12 +1153,16 b' size_t ZSTDMT_toFlushNow(ZSTDMT_CCtx* mt' | |||
|
1156 | 1153 | |
|
1157 | 1154 | static unsigned ZSTDMT_computeTargetJobLog(ZSTD_CCtx_params const params) |
|
1158 | 1155 | { |
|
1159 | if (params.ldmParams.enableLdm) | |
|
1156 | unsigned jobLog; | |
|
1157 | if (params.ldmParams.enableLdm) { | |
|
1160 | 1158 | /* In Long Range Mode, the windowLog is typically oversized. |
|
1161 | 1159 | * In which case, it's preferable to determine the jobSize |
|
1162 | 1160 | * based on chainLog instead. */ |
|
1163 |
|
|
|
1164 | return MAX(20, params.cParams.windowLog + 2); | |
|
1161 | jobLog = MAX(21, params.cParams.chainLog + 4); | |
|
1162 | } else { | |
|
1163 | jobLog = MAX(20, params.cParams.windowLog + 2); | |
|
1164 | } | |
|
1165 | return MIN(jobLog, (unsigned)ZSTDMT_JOBLOG_MAX); | |
|
1165 | 1166 | } |
|
1166 | 1167 | |
|
1167 | 1168 | static int ZSTDMT_overlapLog_default(ZSTD_strategy strat) |
@@ -1205,7 +1206,7 b' static size_t ZSTDMT_computeOverlapSize(' | |||
|
1205 | 1206 | ovLog = MIN(params.cParams.windowLog, ZSTDMT_computeTargetJobLog(params) - 2) |
|
1206 | 1207 | - overlapRLog; |
|
1207 | 1208 | } |
|
1208 |
assert(0 <= ovLog && ovLog <= |
|
|
1209 | assert(0 <= ovLog && ovLog <= ZSTD_WINDOWLOG_MAX); | |
|
1209 | 1210 | DEBUGLOG(4, "overlapLog : %i", params.overlapLog); |
|
1210 | 1211 | DEBUGLOG(4, "overlap size : %i", 1 << ovLog); |
|
1211 | 1212 | return (ovLog==0) ? 0 : (size_t)1 << ovLog; |
@@ -1263,7 +1264,7 b' static size_t ZSTDMT_compress_advanced_i' | |||
|
1263 | 1264 | if (ZSTDMT_serialState_reset(&mtctx->serial, mtctx->seqPool, params, avgJobSize)) |
|
1264 | 1265 | return ERROR(memory_allocation); |
|
1265 | 1266 | |
|
1266 |
|
|
|
1267 | FORWARD_IF_ERROR( ZSTDMT_expandJobsTable(mtctx, nbJobs) ); /* only expands if necessary */ | |
|
1267 | 1268 | |
|
1268 | 1269 | { unsigned u; |
|
1269 | 1270 | for (u=0; u<nbJobs; u++) { |
@@ -1396,10 +1397,10 b' size_t ZSTDMT_initCStream_internal(' | |||
|
1396 | 1397 | |
|
1397 | 1398 | /* init */ |
|
1398 | 1399 | if (params.nbWorkers != mtctx->params.nbWorkers) |
|
1399 |
|
|
|
1400 | FORWARD_IF_ERROR( ZSTDMT_resize(mtctx, params.nbWorkers) ); | |
|
1400 | 1401 | |
|
1401 | 1402 | if (params.jobSize != 0 && params.jobSize < ZSTDMT_JOBSIZE_MIN) params.jobSize = ZSTDMT_JOBSIZE_MIN; |
|
1402 | if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = ZSTDMT_JOBSIZE_MAX; | |
|
1403 | if (params.jobSize > (size_t)ZSTDMT_JOBSIZE_MAX) params.jobSize = (size_t)ZSTDMT_JOBSIZE_MAX; | |
|
1403 | 1404 | |
|
1404 | 1405 | mtctx->singleBlockingThread = (pledgedSrcSize <= ZSTDMT_JOBSIZE_MIN); /* do not trigger multi-threading when srcSize is too small */ |
|
1405 | 1406 | if (mtctx->singleBlockingThread) { |
@@ -1440,6 +1441,8 b' size_t ZSTDMT_initCStream_internal(' | |||
|
1440 | 1441 | if (mtctx->targetSectionSize == 0) { |
|
1441 | 1442 | mtctx->targetSectionSize = 1ULL << ZSTDMT_computeTargetJobLog(params); |
|
1442 | 1443 | } |
|
1444 | assert(mtctx->targetSectionSize <= (size_t)ZSTDMT_JOBSIZE_MAX); | |
|
1445 | ||
|
1443 | 1446 | if (params.rsyncable) { |
|
1444 | 1447 | /* Aim for the targetsectionSize as the average job size. */ |
|
1445 | 1448 | U32 const jobSizeMB = (U32)(mtctx->targetSectionSize >> 20); |
@@ -1547,7 +1550,7 b' size_t ZSTDMT_initCStream(ZSTDMT_CCtx* m' | |||
|
1547 | 1550 | /* ZSTDMT_writeLastEmptyBlock() |
|
1548 | 1551 | * Write a single empty block with an end-of-frame to finish a frame. |
|
1549 | 1552 | * Job must be created from streaming variant. |
|
1550 |
* This function is always successful |
|
|
1553 | * This function is always successful if expected conditions are fulfilled. | |
|
1551 | 1554 | */ |
|
1552 | 1555 | static void ZSTDMT_writeLastEmptyBlock(ZSTDMT_jobDescription* job) |
|
1553 | 1556 | { |
@@ -1987,7 +1990,7 b' size_t ZSTDMT_compressStream_generic(ZST' | |||
|
1987 | 1990 | assert(input->pos <= input->size); |
|
1988 | 1991 | |
|
1989 | 1992 | if (mtctx->singleBlockingThread) { /* delegate to single-thread (synchronous) */ |
|
1990 |
return ZSTD_compressStream |
|
|
1993 | return ZSTD_compressStream2(mtctx->cctxPool->cctx[0], output, input, endOp); | |
|
1991 | 1994 | } |
|
1992 | 1995 | |
|
1993 | 1996 | if ((mtctx->frameEnded) && (endOp==ZSTD_e_continue)) { |
@@ -2051,7 +2054,7 b' size_t ZSTDMT_compressStream_generic(ZST' | |||
|
2051 | 2054 | || ((endOp == ZSTD_e_end) && (!mtctx->frameEnded)) ) { /* must finish the frame with a zero-size block */ |
|
2052 | 2055 | size_t const jobSize = mtctx->inBuff.filled; |
|
2053 | 2056 | assert(mtctx->inBuff.filled <= mtctx->targetSectionSize); |
|
2054 |
|
|
|
2057 | FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, jobSize, endOp) ); | |
|
2055 | 2058 | } |
|
2056 | 2059 | |
|
2057 | 2060 | /* check for potential compressed data ready to be flushed */ |
@@ -2065,7 +2068,7 b' size_t ZSTDMT_compressStream_generic(ZST' | |||
|
2065 | 2068 | |
|
2066 | 2069 | size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input) |
|
2067 | 2070 | { |
|
2068 |
|
|
|
2071 | FORWARD_IF_ERROR( ZSTDMT_compressStream_generic(mtctx, output, input, ZSTD_e_continue) ); | |
|
2069 | 2072 | |
|
2070 | 2073 | /* recommended next input size : fill current input buffer */ |
|
2071 | 2074 | return mtctx->targetSectionSize - mtctx->inBuff.filled; /* note : could be zero when input buffer is fully filled and no more availability to create new job */ |
@@ -2082,7 +2085,7 b' static size_t ZSTDMT_flushStream_interna' | |||
|
2082 | 2085 | || ((endFrame==ZSTD_e_end) && !mtctx->frameEnded)) { /* need a last 0-size block to end frame */ |
|
2083 | 2086 | DEBUGLOG(5, "ZSTDMT_flushStream_internal : create a new job (%u bytes, end:%u)", |
|
2084 | 2087 | (U32)srcSize, (U32)endFrame); |
|
2085 |
|
|
|
2088 | FORWARD_IF_ERROR( ZSTDMT_createCompressionJob(mtctx, srcSize, endFrame) ); | |
|
2086 | 2089 | } |
|
2087 | 2090 | |
|
2088 | 2091 | /* check if there is any data available to flush */ |
@@ -17,10 +17,25 b'' | |||
|
17 | 17 | |
|
18 | 18 | |
|
19 | 19 | /* Note : This is an internal API. |
|
20 |
* |
|
|
20 | * These APIs used to be exposed with ZSTDLIB_API, | |
|
21 | 21 | * because it used to be the only way to invoke MT compression. |
|
22 |
* Now, it's recommended to use ZSTD_compress |
|
|
23 | * These methods will stop being exposed in a future version */ | |
|
22 | * Now, it's recommended to use ZSTD_compress2 and ZSTD_compressStream2() | |
|
23 | * instead. | |
|
24 | * | |
|
25 | * If you depend on these APIs and can't switch, then define | |
|
26 | * ZSTD_LEGACY_MULTITHREADED_API when making the dynamic library. | |
|
27 | * However, we may completely remove these functions in a future | |
|
28 | * release, so please switch soon. | |
|
29 | * | |
|
30 | * This API requires ZSTD_MULTITHREAD to be defined during compilation, | |
|
31 | * otherwise ZSTDMT_createCCtx*() will fail. | |
|
32 | */ | |
|
33 | ||
|
34 | #ifdef ZSTD_LEGACY_MULTITHREADED_API | |
|
35 | # define ZSTDMT_API ZSTDLIB_API | |
|
36 | #else | |
|
37 | # define ZSTDMT_API | |
|
38 | #endif | |
|
24 | 39 | |
|
25 | 40 | /* === Dependencies === */ |
|
26 | 41 | #include <stddef.h> /* size_t */ |
@@ -35,22 +50,25 b'' | |||
|
35 | 50 | #ifndef ZSTDMT_JOBSIZE_MIN |
|
36 | 51 | # define ZSTDMT_JOBSIZE_MIN (1 MB) |
|
37 | 52 | #endif |
|
53 | #define ZSTDMT_JOBLOG_MAX (MEM_32bits() ? 29 : 30) | |
|
38 | 54 | #define ZSTDMT_JOBSIZE_MAX (MEM_32bits() ? (512 MB) : (1024 MB)) |
|
39 | 55 | |
|
40 | 56 | |
|
41 | 57 | /* === Memory management === */ |
|
42 | 58 | typedef struct ZSTDMT_CCtx_s ZSTDMT_CCtx; |
|
43 | ZSTDLIB_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers); | |
|
44 |
ZSTD |
|
|
59 | /* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */ | |
|
60 | ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx(unsigned nbWorkers); | |
|
61 | /* Requires ZSTD_MULTITHREAD to be defined during compilation, otherwise it will return NULL. */ | |
|
62 | ZSTDMT_API ZSTDMT_CCtx* ZSTDMT_createCCtx_advanced(unsigned nbWorkers, | |
|
45 | 63 | ZSTD_customMem cMem); |
|
46 |
ZSTD |
|
|
64 | ZSTDMT_API size_t ZSTDMT_freeCCtx(ZSTDMT_CCtx* mtctx); | |
|
47 | 65 | |
|
48 |
ZSTD |
|
|
66 | ZSTDMT_API size_t ZSTDMT_sizeof_CCtx(ZSTDMT_CCtx* mtctx); | |
|
49 | 67 | |
|
50 | 68 | |
|
51 | 69 | /* === Simple one-pass compression function === */ |
|
52 | 70 | |
|
53 |
ZSTD |
|
|
71 | ZSTDMT_API size_t ZSTDMT_compressCCtx(ZSTDMT_CCtx* mtctx, | |
|
54 | 72 | void* dst, size_t dstCapacity, |
|
55 | 73 | const void* src, size_t srcSize, |
|
56 | 74 | int compressionLevel); |
@@ -59,31 +77,31 b' ZSTDLIB_API size_t ZSTDMT_compressCCtx(Z' | |||
|
59 | 77 | |
|
60 | 78 | /* === Streaming functions === */ |
|
61 | 79 | |
|
62 |
ZSTD |
|
|
63 |
ZSTD |
|
|
80 | ZSTDMT_API size_t ZSTDMT_initCStream(ZSTDMT_CCtx* mtctx, int compressionLevel); | |
|
81 | ZSTDMT_API size_t ZSTDMT_resetCStream(ZSTDMT_CCtx* mtctx, unsigned long long pledgedSrcSize); /**< if srcSize is not known at reset time, use ZSTD_CONTENTSIZE_UNKNOWN. Note: for compatibility with older programs, 0 means the same as ZSTD_CONTENTSIZE_UNKNOWN, but it will change in the future to mean "empty" */ | |
|
64 | 82 | |
|
65 |
ZSTD |
|
|
66 |
ZSTD |
|
|
83 | ZSTDMT_API size_t ZSTDMT_nextInputSizeHint(const ZSTDMT_CCtx* mtctx); | |
|
84 | ZSTDMT_API size_t ZSTDMT_compressStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output, ZSTD_inBuffer* input); | |
|
67 | 85 | |
|
68 |
ZSTD |
|
|
69 |
ZSTD |
|
|
86 | ZSTDMT_API size_t ZSTDMT_flushStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ | |
|
87 | ZSTDMT_API size_t ZSTDMT_endStream(ZSTDMT_CCtx* mtctx, ZSTD_outBuffer* output); /**< @return : 0 == all flushed; >0 : still some data to be flushed; or an error code (ZSTD_isError()) */ | |
|
70 | 88 | |
|
71 | 89 | |
|
72 | 90 | /* === Advanced functions and parameters === */ |
|
73 | 91 | |
|
74 |
ZSTD |
|
|
92 | ZSTDMT_API size_t ZSTDMT_compress_advanced(ZSTDMT_CCtx* mtctx, | |
|
75 | 93 |
|
|
76 | 94 |
|
|
77 | 95 |
|
|
78 | 96 |
|
|
79 | 97 |
|
|
80 | 98 | |
|
81 |
ZSTD |
|
|
99 | ZSTDMT_API size_t ZSTDMT_initCStream_advanced(ZSTDMT_CCtx* mtctx, | |
|
82 | 100 | const void* dict, size_t dictSize, /* dict can be released after init, a local copy is preserved within zcs */ |
|
83 | 101 | ZSTD_parameters params, |
|
84 | 102 | unsigned long long pledgedSrcSize); /* pledgedSrcSize is optional and can be zero == unknown */ |
|
85 | 103 | |
|
86 |
ZSTD |
|
|
104 | ZSTDMT_API size_t ZSTDMT_initCStream_usingCDict(ZSTDMT_CCtx* mtctx, | |
|
87 | 105 | const ZSTD_CDict* cdict, |
|
88 | 106 | ZSTD_frameParameters fparams, |
|
89 | 107 | unsigned long long pledgedSrcSize); /* note : zero means empty */ |
@@ -92,7 +110,7 b' ZSTDLIB_API size_t ZSTDMT_initCStream_us' | |||
|
92 | 110 | * List of parameters that can be set using ZSTDMT_setMTCtxParameter() */ |
|
93 | 111 | typedef enum { |
|
94 | 112 | ZSTDMT_p_jobSize, /* Each job is compressed in parallel. By default, this value is dynamically determined depending on compression parameters. Can be set explicitly here. */ |
|
95 |
ZSTDMT_p_overlapLog, /* Each job may reload a part of previous job to enhance compression |
|
|
113 | ZSTDMT_p_overlapLog, /* Each job may reload a part of previous job to enhance compression ratio; 0 == no overlap, 6(default) == use 1/8th of window, >=9 == use full window. This is a "sticky" parameter : its value will be re-used on next compression job */ | |
|
96 | 114 | ZSTDMT_p_rsyncable /* Enables rsyncable mode. */ |
|
97 | 115 | } ZSTDMT_parameter; |
|
98 | 116 | |
@@ -101,12 +119,12 b' typedef enum {' | |||
|
101 | 119 | * The function must be called typically after ZSTD_createCCtx() but __before ZSTDMT_init*() !__ |
|
102 | 120 | * Parameters not explicitly reset by ZSTDMT_init*() remain the same in consecutive compression sessions. |
|
103 | 121 | * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ |
|
104 |
ZSTD |
|
|
122 | ZSTDMT_API size_t ZSTDMT_setMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int value); | |
|
105 | 123 | |
|
106 | 124 | /* ZSTDMT_getMTCtxParameter() : |
|
107 | 125 | * Query the ZSTDMT_CCtx for a parameter value. |
|
108 | 126 | * @return : 0, or an error code (which can be tested using ZSTD_isError()) */ |
|
109 |
ZSTD |
|
|
127 | ZSTDMT_API size_t ZSTDMT_getMTCtxParameter(ZSTDMT_CCtx* mtctx, ZSTDMT_parameter parameter, int* value); | |
|
110 | 128 | |
|
111 | 129 | |
|
112 | 130 | /*! ZSTDMT_compressStream_generic() : |
@@ -116,7 +134,7 b' ZSTDLIB_API size_t ZSTDMT_getMTCtxParame' | |||
|
116 | 134 | * 0 if fully flushed |
|
117 | 135 | * or an error code |
|
118 | 136 | * note : needs to be init using any ZSTD_initCStream*() variant */ |
|
119 |
ZSTD |
|
|
137 | ZSTDMT_API size_t ZSTDMT_compressStream_generic(ZSTDMT_CCtx* mtctx, | |
|
120 | 138 | ZSTD_outBuffer* output, |
|
121 | 139 | ZSTD_inBuffer* input, |
|
122 | 140 | ZSTD_EndDirective endOp); |
@@ -105,8 +105,8 b' ZSTD_loadEntropy_intoDDict(ZSTD_DDict* d' | |||
|
105 | 105 | ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); |
|
106 | 106 | |
|
107 | 107 | /* load entropy tables */ |
|
108 | CHECK_E( ZSTD_loadDEntropy(&ddict->entropy, | |
|
109 |
|
|
|
108 | RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( | |
|
109 | &ddict->entropy, ddict->dictContent, ddict->dictSize)), | |
|
110 | 110 |
|
|
111 | 111 | ddict->entropyPresent = 1; |
|
112 | 112 | return 0; |
@@ -133,7 +133,7 b' static size_t ZSTD_initDDict_internal(ZS' | |||
|
133 | 133 | ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001); /* cover both little and big endian */ |
|
134 | 134 | |
|
135 | 135 | /* parse dictionary content */ |
|
136 |
|
|
|
136 | FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) ); | |
|
137 | 137 | |
|
138 | 138 | return 0; |
|
139 | 139 | } |
@@ -106,6 +106,7 b' static void ZSTD_initDCtx_internal(ZSTD_' | |||
|
106 | 106 | dctx->ddictLocal = NULL; |
|
107 | 107 | dctx->dictEnd = NULL; |
|
108 | 108 | dctx->ddictIsCold = 0; |
|
109 | dctx->dictUses = ZSTD_dont_use; | |
|
109 | 110 | dctx->inBuff = NULL; |
|
110 | 111 | dctx->inBuffSize = 0; |
|
111 | 112 | dctx->outBuffSize = 0; |
@@ -147,13 +148,20 b' ZSTD_DCtx* ZSTD_createDCtx(void)' | |||
|
147 | 148 | return ZSTD_createDCtx_advanced(ZSTD_defaultCMem); |
|
148 | 149 | } |
|
149 | 150 | |
|
151 | static void ZSTD_clearDict(ZSTD_DCtx* dctx) | |
|
152 | { | |
|
153 | ZSTD_freeDDict(dctx->ddictLocal); | |
|
154 | dctx->ddictLocal = NULL; | |
|
155 | dctx->ddict = NULL; | |
|
156 | dctx->dictUses = ZSTD_dont_use; | |
|
157 | } | |
|
158 | ||
|
150 | 159 | size_t ZSTD_freeDCtx(ZSTD_DCtx* dctx) |
|
151 | 160 | { |
|
152 | 161 | if (dctx==NULL) return 0; /* support free on NULL */ |
|
153 |
|
|
|
162 | RETURN_ERROR_IF(dctx->staticSize, memory_allocation, "not compatible with static DCtx"); | |
|
154 | 163 | { ZSTD_customMem const cMem = dctx->customMem; |
|
155 |
ZSTD_ |
|
|
156 | dctx->ddictLocal = NULL; | |
|
164 | ZSTD_clearDict(dctx); | |
|
157 | 165 | ZSTD_free(dctx->inBuff, cMem); |
|
158 | 166 | dctx->inBuff = NULL; |
|
159 | 167 | #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) |
@@ -203,7 +211,7 b' unsigned ZSTD_isFrame(const void* buffer' | |||
|
203 | 211 | static size_t ZSTD_frameHeaderSize_internal(const void* src, size_t srcSize, ZSTD_format_e format) |
|
204 | 212 | { |
|
205 | 213 | size_t const minInputSize = ZSTD_startingInputLength(format); |
|
206 |
|
|
|
214 | RETURN_ERROR_IF(srcSize < minInputSize, srcSize_wrong); | |
|
207 | 215 | |
|
208 | 216 | { BYTE const fhd = ((const BYTE*)src)[minInputSize-1]; |
|
209 | 217 | U32 const dictID= fhd & 3; |
@@ -238,7 +246,7 b' size_t ZSTD_getFrameHeader_advanced(ZSTD' | |||
|
238 | 246 | |
|
239 | 247 | memset(zfhPtr, 0, sizeof(*zfhPtr)); /* not strictly necessary, but static analyzer do not understand that zfhPtr is only going to be read only if return value is zero, since they are 2 different signals */ |
|
240 | 248 | if (srcSize < minInputSize) return minInputSize; |
|
241 |
|
|
|
249 | RETURN_ERROR_IF(src==NULL, GENERIC, "invalid parameter"); | |
|
242 | 250 | |
|
243 | 251 | if ( (format != ZSTD_f_zstd1_magicless) |
|
244 | 252 | && (MEM_readLE32(src) != ZSTD_MAGICNUMBER) ) { |
@@ -251,7 +259,7 b' size_t ZSTD_getFrameHeader_advanced(ZSTD' | |||
|
251 | 259 | zfhPtr->frameType = ZSTD_skippableFrame; |
|
252 | 260 | return 0; |
|
253 | 261 | } |
|
254 |
|
|
|
262 | RETURN_ERROR(prefix_unknown); | |
|
255 | 263 | } |
|
256 | 264 | |
|
257 | 265 | /* ensure there is enough `srcSize` to fully read/decode frame header */ |
@@ -269,14 +277,13 b' size_t ZSTD_getFrameHeader_advanced(ZSTD' | |||
|
269 | 277 | U64 windowSize = 0; |
|
270 | 278 | U32 dictID = 0; |
|
271 | 279 | U64 frameContentSize = ZSTD_CONTENTSIZE_UNKNOWN; |
|
272 | if ((fhdByte & 0x08) != 0) | |
|
273 | return ERROR(frameParameter_unsupported); /* reserved bits, must be zero */ | |
|
280 | RETURN_ERROR_IF((fhdByte & 0x08) != 0, frameParameter_unsupported, | |
|
281 | "reserved bits, must be zero"); | |
|
274 | 282 | |
|
275 | 283 | if (!singleSegment) { |
|
276 | 284 | BYTE const wlByte = ip[pos++]; |
|
277 | 285 | U32 const windowLog = (wlByte >> 3) + ZSTD_WINDOWLOG_ABSOLUTEMIN; |
|
278 |
|
|
|
279 | return ERROR(frameParameter_windowTooLarge); | |
|
286 | RETURN_ERROR_IF(windowLog > ZSTD_WINDOWLOG_MAX, frameParameter_windowTooLarge); | |
|
280 | 287 | windowSize = (1ULL << windowLog); |
|
281 | 288 | windowSize += (windowSize >> 3) * (wlByte&7); |
|
282 | 289 | } |
@@ -348,14 +355,16 b' static size_t readSkippableFrameSize(voi' | |||
|
348 | 355 | size_t const skippableHeaderSize = ZSTD_SKIPPABLEHEADERSIZE; |
|
349 | 356 | U32 sizeU32; |
|
350 | 357 | |
|
351 |
|
|
|
352 | return ERROR(srcSize_wrong); | |
|
358 | RETURN_ERROR_IF(srcSize < ZSTD_SKIPPABLEHEADERSIZE, srcSize_wrong); | |
|
353 | 359 | |
|
354 | 360 | sizeU32 = MEM_readLE32((BYTE const*)src + ZSTD_FRAMEIDSIZE); |
|
355 |
|
|
|
356 |
|
|
|
357 | ||
|
358 |
|
|
|
361 | RETURN_ERROR_IF((U32)(sizeU32 + ZSTD_SKIPPABLEHEADERSIZE) < sizeU32, | |
|
362 | frameParameter_unsupported); | |
|
363 | { | |
|
364 | size_t const skippableSize = skippableHeaderSize + sizeU32; | |
|
365 | RETURN_ERROR_IF(skippableSize > srcSize, srcSize_wrong); | |
|
366 | return skippableSize; | |
|
367 | } | |
|
359 | 368 | } |
|
360 | 369 | |
|
361 | 370 | /** ZSTD_findDecompressedSize() : |
@@ -372,11 +381,10 b' unsigned long long ZSTD_findDecompressed' | |||
|
372 | 381 | |
|
373 | 382 | if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { |
|
374 | 383 | size_t const skippableSize = readSkippableFrameSize(src, srcSize); |
|
375 | if (ZSTD_isError(skippableSize)) | |
|
376 | return skippableSize; | |
|
377 | if (srcSize < skippableSize) { | |
|
384 | if (ZSTD_isError(skippableSize)) { | |
|
378 | 385 | return ZSTD_CONTENTSIZE_ERROR; |
|
379 | 386 | } |
|
387 | assert(skippableSize <= srcSize); | |
|
380 | 388 | |
|
381 | 389 | src = (const BYTE *)src + skippableSize; |
|
382 | 390 | srcSize -= skippableSize; |
@@ -428,13 +436,91 b' static size_t ZSTD_decodeFrameHeader(ZST' | |||
|
428 | 436 | { |
|
429 | 437 | size_t const result = ZSTD_getFrameHeader_advanced(&(dctx->fParams), src, headerSize, dctx->format); |
|
430 | 438 | if (ZSTD_isError(result)) return result; /* invalid header */ |
|
431 |
|
|
|
432 | if (dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID)) | |
|
433 | return ERROR(dictionary_wrong); | |
|
439 | RETURN_ERROR_IF(result>0, srcSize_wrong, "headerSize too small"); | |
|
440 | #ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION | |
|
441 | /* Skip the dictID check in fuzzing mode, because it makes the search | |
|
442 | * harder. | |
|
443 | */ | |
|
444 | RETURN_ERROR_IF(dctx->fParams.dictID && (dctx->dictID != dctx->fParams.dictID), | |
|
445 | dictionary_wrong); | |
|
446 | #endif | |
|
434 | 447 | if (dctx->fParams.checksumFlag) XXH64_reset(&dctx->xxhState, 0); |
|
435 | 448 | return 0; |
|
436 | 449 | } |
|
437 | 450 | |
|
451 | static ZSTD_frameSizeInfo ZSTD_errorFrameSizeInfo(size_t ret) | |
|
452 | { | |
|
453 | ZSTD_frameSizeInfo frameSizeInfo; | |
|
454 | frameSizeInfo.compressedSize = ret; | |
|
455 | frameSizeInfo.decompressedBound = ZSTD_CONTENTSIZE_ERROR; | |
|
456 | return frameSizeInfo; | |
|
457 | } | |
|
458 | ||
|
459 | static ZSTD_frameSizeInfo ZSTD_findFrameSizeInfo(const void* src, size_t srcSize) | |
|
460 | { | |
|
461 | ZSTD_frameSizeInfo frameSizeInfo; | |
|
462 | memset(&frameSizeInfo, 0, sizeof(ZSTD_frameSizeInfo)); | |
|
463 | ||
|
464 | #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) | |
|
465 | if (ZSTD_isLegacy(src, srcSize)) | |
|
466 | return ZSTD_findFrameSizeInfoLegacy(src, srcSize); | |
|
467 | #endif | |
|
468 | ||
|
469 | if ((srcSize >= ZSTD_SKIPPABLEHEADERSIZE) | |
|
470 | && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { | |
|
471 | frameSizeInfo.compressedSize = readSkippableFrameSize(src, srcSize); | |
|
472 | assert(ZSTD_isError(frameSizeInfo.compressedSize) || | |
|
473 | frameSizeInfo.compressedSize <= srcSize); | |
|
474 | return frameSizeInfo; | |
|
475 | } else { | |
|
476 | const BYTE* ip = (const BYTE*)src; | |
|
477 | const BYTE* const ipstart = ip; | |
|
478 | size_t remainingSize = srcSize; | |
|
479 | size_t nbBlocks = 0; | |
|
480 | ZSTD_frameHeader zfh; | |
|
481 | ||
|
482 | /* Extract Frame Header */ | |
|
483 | { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); | |
|
484 | if (ZSTD_isError(ret)) | |
|
485 | return ZSTD_errorFrameSizeInfo(ret); | |
|
486 | if (ret > 0) | |
|
487 | return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); | |
|
488 | } | |
|
489 | ||
|
490 | ip += zfh.headerSize; | |
|
491 | remainingSize -= zfh.headerSize; | |
|
492 | ||
|
493 | /* Iterate over each block */ | |
|
494 | while (1) { | |
|
495 | blockProperties_t blockProperties; | |
|
496 | size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); | |
|
497 | if (ZSTD_isError(cBlockSize)) | |
|
498 | return ZSTD_errorFrameSizeInfo(cBlockSize); | |
|
499 | ||
|
500 | if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) | |
|
501 | return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); | |
|
502 | ||
|
503 | ip += ZSTD_blockHeaderSize + cBlockSize; | |
|
504 | remainingSize -= ZSTD_blockHeaderSize + cBlockSize; | |
|
505 | nbBlocks++; | |
|
506 | ||
|
507 | if (blockProperties.lastBlock) break; | |
|
508 | } | |
|
509 | ||
|
510 | /* Final frame content checksum */ | |
|
511 | if (zfh.checksumFlag) { | |
|
512 | if (remainingSize < 4) | |
|
513 | return ZSTD_errorFrameSizeInfo(ERROR(srcSize_wrong)); | |
|
514 | ip += 4; | |
|
515 | } | |
|
516 | ||
|
517 | frameSizeInfo.compressedSize = ip - ipstart; | |
|
518 | frameSizeInfo.decompressedBound = (zfh.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) | |
|
519 | ? zfh.frameContentSize | |
|
520 | : nbBlocks * zfh.blockSizeMax; | |
|
521 | return frameSizeInfo; | |
|
522 | } | |
|
523 | } | |
|
438 | 524 | |
|
439 | 525 | /** ZSTD_findFrameCompressedSize() : |
|
440 | 526 | * compatible with legacy mode |
@@ -443,53 +529,34 b' static size_t ZSTD_decodeFrameHeader(ZST' | |||
|
443 | 529 | * @return : the compressed size of the frame starting at `src` */ |
|
444 | 530 | size_t ZSTD_findFrameCompressedSize(const void *src, size_t srcSize) |
|
445 | 531 | { |
|
446 | #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1) | |
|
447 | if (ZSTD_isLegacy(src, srcSize)) | |
|
448 | return ZSTD_findFrameCompressedSizeLegacy(src, srcSize); | |
|
449 | #endif | |
|
450 | if ( (srcSize >= ZSTD_SKIPPABLEHEADERSIZE) | |
|
451 | && (MEM_readLE32(src) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START ) { | |
|
452 | return readSkippableFrameSize(src, srcSize); | |
|
453 | } else { | |
|
454 | const BYTE* ip = (const BYTE*)src; | |
|
455 | const BYTE* const ipstart = ip; | |
|
456 | size_t remainingSize = srcSize; | |
|
457 | ZSTD_frameHeader zfh; | |
|
458 | ||
|
459 | /* Extract Frame Header */ | |
|
460 | { size_t const ret = ZSTD_getFrameHeader(&zfh, src, srcSize); | |
|
461 | if (ZSTD_isError(ret)) return ret; | |
|
462 | if (ret > 0) return ERROR(srcSize_wrong); | |
|
532 | ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); | |
|
533 | return frameSizeInfo.compressedSize; | |
|
463 | 534 | } |
|
464 | 535 | |
|
465 | ip += zfh.headerSize; | |
|
466 | remainingSize -= zfh.headerSize; | |
|
467 | ||
|
468 | /* Loop on each block */ | |
|
469 | while (1) { | |
|
470 | blockProperties_t blockProperties; | |
|
471 | size_t const cBlockSize = ZSTD_getcBlockSize(ip, remainingSize, &blockProperties); | |
|
472 | if (ZSTD_isError(cBlockSize)) return cBlockSize; | |
|
473 | ||
|
474 | if (ZSTD_blockHeaderSize + cBlockSize > remainingSize) | |
|
475 | return ERROR(srcSize_wrong); | |
|
476 | ||
|
477 | ip += ZSTD_blockHeaderSize + cBlockSize; | |
|
478 | remainingSize -= ZSTD_blockHeaderSize + cBlockSize; | |
|
479 | ||
|
480 | if (blockProperties.lastBlock) break; | |
|
536 | /** ZSTD_decompressBound() : | |
|
537 | * compatible with legacy mode | |
|
538 | * `src` must point to the start of a ZSTD frame or a skippeable frame | |
|
539 | * `srcSize` must be at least as large as the frame contained | |
|
540 | * @return : the maximum decompressed size of the compressed source | |
|
541 | */ | |
|
542 | unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize) | |
|
543 | { | |
|
544 | unsigned long long bound = 0; | |
|
545 | /* Iterate over each frame */ | |
|
546 | while (srcSize > 0) { | |
|
547 | ZSTD_frameSizeInfo const frameSizeInfo = ZSTD_findFrameSizeInfo(src, srcSize); | |
|
548 | size_t const compressedSize = frameSizeInfo.compressedSize; | |
|
549 | unsigned long long const decompressedBound = frameSizeInfo.decompressedBound; | |
|
550 | if (ZSTD_isError(compressedSize) || decompressedBound == ZSTD_CONTENTSIZE_ERROR) | |
|
551 | return ZSTD_CONTENTSIZE_ERROR; | |
|
552 | assert(srcSize >= compressedSize); | |
|
553 | src = (const BYTE*)src + compressedSize; | |
|
554 | srcSize -= compressedSize; | |
|
555 | bound += decompressedBound; | |
|
481 | 556 |
|
|
482 | ||
|
483 | if (zfh.checksumFlag) { /* Final frame content checksum */ | |
|
484 | if (remainingSize < 4) return ERROR(srcSize_wrong); | |
|
485 | ip += 4; | |
|
557 | return bound; | |
|
486 | 558 | } |
|
487 | 559 | |
|
488 | return ip - ipstart; | |
|
489 | } | |
|
490 | } | |
|
491 | ||
|
492 | ||
|
493 | 560 | |
|
494 | 561 | /*-************************************************************* |
|
495 | 562 | * Frame decoding |
@@ -507,9 +574,10 b' void ZSTD_checkContinuity(ZSTD_DCtx* dct' | |||
|
507 | 574 | } |
|
508 | 575 | |
|
509 | 576 | /** ZSTD_insertBlock() : |
|
510 |
|
|
|
577 | * insert `src` block into `dctx` history. Useful to track uncompressed blocks. */ | |
|
511 | 578 | size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize) |
|
512 | 579 | { |
|
580 | DEBUGLOG(5, "ZSTD_insertBlock: %u bytes", (unsigned)blockSize); | |
|
513 | 581 | ZSTD_checkContinuity(dctx, blockStart); |
|
514 | 582 | dctx->previousDstEnd = (const char*)blockStart + blockSize; |
|
515 | 583 | return blockSize; |
@@ -522,9 +590,9 b' static size_t ZSTD_copyRawBlock(void* ds' | |||
|
522 | 590 | DEBUGLOG(5, "ZSTD_copyRawBlock"); |
|
523 | 591 | if (dst == NULL) { |
|
524 | 592 | if (srcSize == 0) return 0; |
|
525 |
|
|
|
593 | RETURN_ERROR(dstBuffer_null); | |
|
526 | 594 | } |
|
527 |
|
|
|
595 | RETURN_ERROR_IF(srcSize > dstCapacity, dstSize_tooSmall); | |
|
528 | 596 | memcpy(dst, src, srcSize); |
|
529 | 597 | return srcSize; |
|
530 | 598 | } |
@@ -535,9 +603,9 b' static size_t ZSTD_setRleBlock(void* dst' | |||
|
535 | 603 | { |
|
536 | 604 | if (dst == NULL) { |
|
537 | 605 | if (regenSize == 0) return 0; |
|
538 |
|
|
|
606 | RETURN_ERROR(dstBuffer_null); | |
|
539 | 607 | } |
|
540 |
|
|
|
608 | RETURN_ERROR_IF(regenSize > dstCapacity, dstSize_tooSmall); | |
|
541 | 609 | memset(dst, b, regenSize); |
|
542 | 610 | return regenSize; |
|
543 | 611 | } |
@@ -560,15 +628,16 b' static size_t ZSTD_decompressFrame(ZSTD_' | |||
|
560 | 628 | DEBUGLOG(4, "ZSTD_decompressFrame (srcSize:%i)", (int)*srcSizePtr); |
|
561 | 629 | |
|
562 | 630 | /* check */ |
|
563 | if (remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize) | |
|
564 | return ERROR(srcSize_wrong); | |
|
631 | RETURN_ERROR_IF( | |
|
632 | remainingSrcSize < ZSTD_FRAMEHEADERSIZE_MIN+ZSTD_blockHeaderSize, | |
|
633 | srcSize_wrong); | |
|
565 | 634 | |
|
566 | 635 | /* Frame Header */ |
|
567 | 636 | { size_t const frameHeaderSize = ZSTD_frameHeaderSize(ip, ZSTD_FRAMEHEADERSIZE_PREFIX); |
|
568 | 637 | if (ZSTD_isError(frameHeaderSize)) return frameHeaderSize; |
|
569 |
|
|
|
570 |
|
|
|
571 |
|
|
|
638 | RETURN_ERROR_IF(remainingSrcSize < frameHeaderSize+ZSTD_blockHeaderSize, | |
|
639 | srcSize_wrong); | |
|
640 | FORWARD_IF_ERROR( ZSTD_decodeFrameHeader(dctx, ip, frameHeaderSize) ); | |
|
572 | 641 | ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize; |
|
573 | 642 | } |
|
574 | 643 | |
@@ -581,7 +650,7 b' static size_t ZSTD_decompressFrame(ZSTD_' | |||
|
581 | 650 | |
|
582 | 651 | ip += ZSTD_blockHeaderSize; |
|
583 | 652 | remainingSrcSize -= ZSTD_blockHeaderSize; |
|
584 |
|
|
|
653 | RETURN_ERROR_IF(cBlockSize > remainingSrcSize, srcSize_wrong); | |
|
585 | 654 | |
|
586 | 655 | switch(blockProperties.blockType) |
|
587 | 656 | { |
@@ -596,7 +665,7 b' static size_t ZSTD_decompressFrame(ZSTD_' | |||
|
596 | 665 | break; |
|
597 | 666 | case bt_reserved : |
|
598 | 667 | default: |
|
599 |
|
|
|
668 | RETURN_ERROR(corruption_detected); | |
|
600 | 669 | } |
|
601 | 670 | |
|
602 | 671 | if (ZSTD_isError(decodedSize)) return decodedSize; |
@@ -609,15 +678,15 b' static size_t ZSTD_decompressFrame(ZSTD_' | |||
|
609 | 678 | } |
|
610 | 679 | |
|
611 | 680 | if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { |
|
612 |
|
|
|
613 |
|
|
|
614 |
} |
|
|
681 | RETURN_ERROR_IF((U64)(op-ostart) != dctx->fParams.frameContentSize, | |
|
682 | corruption_detected); | |
|
683 | } | |
|
615 | 684 | if (dctx->fParams.checksumFlag) { /* Frame content checksum verification */ |
|
616 | 685 | U32 const checkCalc = (U32)XXH64_digest(&dctx->xxhState); |
|
617 | 686 | U32 checkRead; |
|
618 |
|
|
|
687 | RETURN_ERROR_IF(remainingSrcSize<4, checksum_wrong); | |
|
619 | 688 | checkRead = MEM_readLE32(ip); |
|
620 |
|
|
|
689 | RETURN_ERROR_IF(checkRead != checkCalc, checksum_wrong); | |
|
621 | 690 | ip += 4; |
|
622 | 691 | remainingSrcSize -= 4; |
|
623 | 692 | } |
@@ -652,8 +721,8 b' static size_t ZSTD_decompressMultiFrame(' | |||
|
652 | 721 | size_t decodedSize; |
|
653 | 722 | size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize); |
|
654 | 723 | if (ZSTD_isError(frameSize)) return frameSize; |
|
655 | /* legacy support is not compatible with static dctx */ | |
|
656 | if (dctx->staticSize) return ERROR(memory_allocation); | |
|
724 | RETURN_ERROR_IF(dctx->staticSize, memory_allocation, | |
|
725 | "legacy support is not compatible with static dctx"); | |
|
657 | 726 | |
|
658 | 727 | decodedSize = ZSTD_decompressLegacy(dst, dstCapacity, src, frameSize, dict, dictSize); |
|
659 | 728 | if (ZSTD_isError(decodedSize)) return decodedSize; |
@@ -674,9 +743,8 b' static size_t ZSTD_decompressMultiFrame(' | |||
|
674 | 743 | (unsigned)magicNumber, ZSTD_MAGICNUMBER); |
|
675 | 744 | if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { |
|
676 | 745 | size_t const skippableSize = readSkippableFrameSize(src, srcSize); |
|
677 |
|
|
|
678 |
|
|
|
679 | if (srcSize < skippableSize) return ERROR(srcSize_wrong); | |
|
746 | FORWARD_IF_ERROR(skippableSize); | |
|
747 | assert(skippableSize <= srcSize); | |
|
680 | 748 | |
|
681 | 749 | src = (const BYTE *)src + skippableSize; |
|
682 | 750 | srcSize -= skippableSize; |
@@ -685,29 +753,29 b' static size_t ZSTD_decompressMultiFrame(' | |||
|
685 | 753 | |
|
686 | 754 | if (ddict) { |
|
687 | 755 | /* we were called from ZSTD_decompress_usingDDict */ |
|
688 |
|
|
|
756 | FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(dctx, ddict)); | |
|
689 | 757 | } else { |
|
690 | 758 | /* this will initialize correctly with no dict if dict == NULL, so |
|
691 | 759 | * use this in all cases but ddict */ |
|
692 |
|
|
|
760 | FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDict(dctx, dict, dictSize)); | |
|
693 | 761 | } |
|
694 | 762 | ZSTD_checkContinuity(dctx, dst); |
|
695 | 763 | |
|
696 | 764 | { const size_t res = ZSTD_decompressFrame(dctx, dst, dstCapacity, |
|
697 | 765 | &src, &srcSize); |
|
698 | if ( (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) | |
|
699 | && (moreThan1Frame==1) ) { | |
|
700 | /* at least one frame successfully completed, | |
|
701 | * but following bytes are garbage : | |
|
702 | * it's more likely to be a srcSize error, | |
|
703 | * specifying more bytes than compressed size of frame(s). | |
|
704 | * This error message replaces ERROR(prefix_unknown), | |
|
705 | * which would be confusing, as the first header is actually correct. | |
|
706 | * Note that one could be unlucky, it might be a corruption error instead, | |
|
707 | * happening right at the place where we expect zstd magic bytes. | |
|
708 | * But this is _much_ less likely than a srcSize field error. */ | |
|
709 | return ERROR(srcSize_wrong); | |
|
710 |
|
|
|
766 | RETURN_ERROR_IF( | |
|
767 | (ZSTD_getErrorCode(res) == ZSTD_error_prefix_unknown) | |
|
768 | && (moreThan1Frame==1), | |
|
769 | srcSize_wrong, | |
|
770 | "at least one frame successfully completed, but following " | |
|
771 | "bytes are garbage: it's more likely to be a srcSize error, " | |
|
772 | "specifying more bytes than compressed size of frame(s). This " | |
|
773 | "error message replaces ERROR(prefix_unknown), which would be " | |
|
774 | "confusing, as the first header is actually correct. Note that " | |
|
775 | "one could be unlucky, it might be a corruption error instead, " | |
|
776 | "happening right at the place where we expect zstd magic " | |
|
777 | "bytes. But this is _much_ less likely than a srcSize field " | |
|
778 | "error."); | |
|
711 | 779 | if (ZSTD_isError(res)) return res; |
|
712 | 780 | assert(res <= dstCapacity); |
|
713 | 781 | dst = (BYTE*)dst + res; |
@@ -716,7 +784,7 b' static size_t ZSTD_decompressMultiFrame(' | |||
|
716 | 784 | moreThan1Frame = 1; |
|
717 | 785 | } /* while (srcSize >= ZSTD_frameHeaderSize_prefix) */ |
|
718 | 786 | |
|
719 |
|
|
|
787 | RETURN_ERROR_IF(srcSize, srcSize_wrong, "input not entirely consumed"); | |
|
720 | 788 | |
|
721 | 789 | return (BYTE*)dst - (BYTE*)dststart; |
|
722 | 790 | } |
@@ -730,9 +798,26 b' size_t ZSTD_decompress_usingDict(ZSTD_DC' | |||
|
730 | 798 | } |
|
731 | 799 | |
|
732 | 800 | |
|
801 | static ZSTD_DDict const* ZSTD_getDDict(ZSTD_DCtx* dctx) | |
|
802 | { | |
|
803 | switch (dctx->dictUses) { | |
|
804 | default: | |
|
805 | assert(0 /* Impossible */); | |
|
806 | /* fall-through */ | |
|
807 | case ZSTD_dont_use: | |
|
808 | ZSTD_clearDict(dctx); | |
|
809 | return NULL; | |
|
810 | case ZSTD_use_indefinitely: | |
|
811 | return dctx->ddict; | |
|
812 | case ZSTD_use_once: | |
|
813 | dctx->dictUses = ZSTD_dont_use; | |
|
814 | return dctx->ddict; | |
|
815 | } | |
|
816 | } | |
|
817 | ||
|
733 | 818 | size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize) |
|
734 | 819 | { |
|
735 |
return ZSTD_decompress_usingDict(dctx, dst, dstCapacity, src, srcSize, |
|
|
820 | return ZSTD_decompress_usingDDict(dctx, dst, dstCapacity, src, srcSize, ZSTD_getDDict(dctx)); | |
|
736 | 821 | } |
|
737 | 822 | |
|
738 | 823 | |
@@ -741,7 +826,7 b' size_t ZSTD_decompress(void* dst, size_t' | |||
|
741 | 826 | #if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1) |
|
742 | 827 | size_t regenSize; |
|
743 | 828 | ZSTD_DCtx* const dctx = ZSTD_createDCtx(); |
|
744 |
|
|
|
829 | RETURN_ERROR_IF(dctx==NULL, memory_allocation); | |
|
745 | 830 | regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize); |
|
746 | 831 | ZSTD_freeDCtx(dctx); |
|
747 | 832 | return regenSize; |
@@ -791,8 +876,7 b' size_t ZSTD_decompressContinue(ZSTD_DCtx' | |||
|
791 | 876 | { |
|
792 | 877 | DEBUGLOG(5, "ZSTD_decompressContinue (srcSize:%u)", (unsigned)srcSize); |
|
793 | 878 | /* Sanity check */ |
|
794 | if (srcSize != dctx->expected) | |
|
795 | return ERROR(srcSize_wrong); /* not allowed */ | |
|
879 | RETURN_ERROR_IF(srcSize != dctx->expected, srcSize_wrong, "not allowed"); | |
|
796 | 880 | if (dstCapacity) ZSTD_checkContinuity(dctx, dst); |
|
797 | 881 | |
|
798 | 882 | switch (dctx->stage) |
@@ -817,7 +901,7 b' size_t ZSTD_decompressContinue(ZSTD_DCtx' | |||
|
817 | 901 | case ZSTDds_decodeFrameHeader: |
|
818 | 902 | assert(src != NULL); |
|
819 | 903 | memcpy(dctx->headerBuffer + (dctx->headerSize - srcSize), src, srcSize); |
|
820 |
|
|
|
904 | FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(dctx, dctx->headerBuffer, dctx->headerSize)); | |
|
821 | 905 | dctx->expected = ZSTD_blockHeaderSize; |
|
822 | 906 | dctx->stage = ZSTDds_decodeBlockHeader; |
|
823 | 907 | return 0; |
@@ -826,6 +910,7 b' size_t ZSTD_decompressContinue(ZSTD_DCtx' | |||
|
826 | 910 | { blockProperties_t bp; |
|
827 | 911 | size_t const cBlockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp); |
|
828 | 912 | if (ZSTD_isError(cBlockSize)) return cBlockSize; |
|
913 | RETURN_ERROR_IF(cBlockSize > dctx->fParams.blockSizeMax, corruption_detected, "Block Size Exceeds Maximum"); | |
|
829 | 914 | dctx->expected = cBlockSize; |
|
830 | 915 | dctx->bType = bp.blockType; |
|
831 | 916 | dctx->rleSize = bp.origSize; |
@@ -867,19 +952,20 b' size_t ZSTD_decompressContinue(ZSTD_DCtx' | |||
|
867 | 952 | break; |
|
868 | 953 | case bt_reserved : /* should never happen */ |
|
869 | 954 | default: |
|
870 |
|
|
|
955 | RETURN_ERROR(corruption_detected); | |
|
871 | 956 | } |
|
872 | 957 | if (ZSTD_isError(rSize)) return rSize; |
|
958 | RETURN_ERROR_IF(rSize > dctx->fParams.blockSizeMax, corruption_detected, "Decompressed Block Size Exceeds Maximum"); | |
|
873 | 959 | DEBUGLOG(5, "ZSTD_decompressContinue: decoded size from block : %u", (unsigned)rSize); |
|
874 | 960 | dctx->decodedSize += rSize; |
|
875 | 961 | if (dctx->fParams.checksumFlag) XXH64_update(&dctx->xxhState, dst, rSize); |
|
876 | 962 | |
|
877 | 963 | if (dctx->stage == ZSTDds_decompressLastBlock) { /* end of frame */ |
|
878 | 964 | DEBUGLOG(4, "ZSTD_decompressContinue: decoded size from frame : %u", (unsigned)dctx->decodedSize); |
|
879 | if (dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN) { | |
|
880 |
|
|
|
881 | return ERROR(corruption_detected); | |
|
882 |
|
|
|
965 | RETURN_ERROR_IF( | |
|
966 | dctx->fParams.frameContentSize != ZSTD_CONTENTSIZE_UNKNOWN | |
|
967 | && dctx->decodedSize != dctx->fParams.frameContentSize, | |
|
968 | corruption_detected); | |
|
883 | 969 | if (dctx->fParams.checksumFlag) { /* another round for frame checksum */ |
|
884 | 970 | dctx->expected = 4; |
|
885 | 971 | dctx->stage = ZSTDds_checkChecksum; |
@@ -900,7 +986,7 b' size_t ZSTD_decompressContinue(ZSTD_DCtx' | |||
|
900 | 986 | { U32 const h32 = (U32)XXH64_digest(&dctx->xxhState); |
|
901 | 987 | U32 const check32 = MEM_readLE32(src); |
|
902 | 988 | DEBUGLOG(4, "ZSTD_decompressContinue: checksum : calculated %08X :: %08X read", (unsigned)h32, (unsigned)check32); |
|
903 |
|
|
|
989 | RETURN_ERROR_IF(check32 != h32, checksum_wrong); | |
|
904 | 990 | dctx->expected = 0; |
|
905 | 991 | dctx->stage = ZSTDds_getFrameHeaderSize; |
|
906 | 992 | return 0; |
@@ -921,7 +1007,7 b' size_t ZSTD_decompressContinue(ZSTD_DCtx' | |||
|
921 | 1007 | |
|
922 | 1008 | default: |
|
923 | 1009 | assert(0); /* impossible */ |
|
924 |
|
|
|
1010 | RETURN_ERROR(GENERIC); /* some compiler require default to do something */ | |
|
925 | 1011 | } |
|
926 | 1012 | } |
|
927 | 1013 | |
@@ -945,7 +1031,7 b' ZSTD_loadDEntropy(ZSTD_entropyDTables_t*' | |||
|
945 | 1031 | const BYTE* dictPtr = (const BYTE*)dict; |
|
946 | 1032 | const BYTE* const dictEnd = dictPtr + dictSize; |
|
947 | 1033 | |
|
948 |
|
|
|
1034 | RETURN_ERROR_IF(dictSize <= 8, dictionary_corrupted); | |
|
949 | 1035 | assert(MEM_readLE32(dict) == ZSTD_MAGIC_DICTIONARY); /* dict must be valid */ |
|
950 | 1036 | dictPtr += 8; /* skip header = magic + dictID */ |
|
951 | 1037 | |
@@ -964,16 +1050,16 b' ZSTD_loadDEntropy(ZSTD_entropyDTables_t*' | |||
|
964 | 1050 | dictPtr, dictEnd - dictPtr, |
|
965 | 1051 | workspace, workspaceSize); |
|
966 | 1052 | #endif |
|
967 |
|
|
|
1053 | RETURN_ERROR_IF(HUF_isError(hSize), dictionary_corrupted); | |
|
968 | 1054 | dictPtr += hSize; |
|
969 | 1055 | } |
|
970 | 1056 | |
|
971 | 1057 | { short offcodeNCount[MaxOff+1]; |
|
972 | 1058 | unsigned offcodeMaxValue = MaxOff, offcodeLog; |
|
973 | 1059 | size_t const offcodeHeaderSize = FSE_readNCount(offcodeNCount, &offcodeMaxValue, &offcodeLog, dictPtr, dictEnd-dictPtr); |
|
974 |
|
|
|
975 |
|
|
|
976 |
|
|
|
1060 | RETURN_ERROR_IF(FSE_isError(offcodeHeaderSize), dictionary_corrupted); | |
|
1061 | RETURN_ERROR_IF(offcodeMaxValue > MaxOff, dictionary_corrupted); | |
|
1062 | RETURN_ERROR_IF(offcodeLog > OffFSELog, dictionary_corrupted); | |
|
977 | 1063 | ZSTD_buildFSETable( entropy->OFTable, |
|
978 | 1064 | offcodeNCount, offcodeMaxValue, |
|
979 | 1065 | OF_base, OF_bits, |
@@ -984,9 +1070,9 b' ZSTD_loadDEntropy(ZSTD_entropyDTables_t*' | |||
|
984 | 1070 | { short matchlengthNCount[MaxML+1]; |
|
985 | 1071 | unsigned matchlengthMaxValue = MaxML, matchlengthLog; |
|
986 | 1072 | size_t const matchlengthHeaderSize = FSE_readNCount(matchlengthNCount, &matchlengthMaxValue, &matchlengthLog, dictPtr, dictEnd-dictPtr); |
|
987 |
|
|
|
988 |
|
|
|
989 |
|
|
|
1073 | RETURN_ERROR_IF(FSE_isError(matchlengthHeaderSize), dictionary_corrupted); | |
|
1074 | RETURN_ERROR_IF(matchlengthMaxValue > MaxML, dictionary_corrupted); | |
|
1075 | RETURN_ERROR_IF(matchlengthLog > MLFSELog, dictionary_corrupted); | |
|
990 | 1076 | ZSTD_buildFSETable( entropy->MLTable, |
|
991 | 1077 | matchlengthNCount, matchlengthMaxValue, |
|
992 | 1078 | ML_base, ML_bits, |
@@ -997,9 +1083,9 b' ZSTD_loadDEntropy(ZSTD_entropyDTables_t*' | |||
|
997 | 1083 | { short litlengthNCount[MaxLL+1]; |
|
998 | 1084 | unsigned litlengthMaxValue = MaxLL, litlengthLog; |
|
999 | 1085 | size_t const litlengthHeaderSize = FSE_readNCount(litlengthNCount, &litlengthMaxValue, &litlengthLog, dictPtr, dictEnd-dictPtr); |
|
1000 |
|
|
|
1001 |
|
|
|
1002 |
|
|
|
1086 | RETURN_ERROR_IF(FSE_isError(litlengthHeaderSize), dictionary_corrupted); | |
|
1087 | RETURN_ERROR_IF(litlengthMaxValue > MaxLL, dictionary_corrupted); | |
|
1088 | RETURN_ERROR_IF(litlengthLog > LLFSELog, dictionary_corrupted); | |
|
1003 | 1089 | ZSTD_buildFSETable( entropy->LLTable, |
|
1004 | 1090 | litlengthNCount, litlengthMaxValue, |
|
1005 | 1091 | LL_base, LL_bits, |
@@ -1007,12 +1093,13 b' ZSTD_loadDEntropy(ZSTD_entropyDTables_t*' | |||
|
1007 | 1093 | dictPtr += litlengthHeaderSize; |
|
1008 | 1094 | } |
|
1009 | 1095 | |
|
1010 |
|
|
|
1096 | RETURN_ERROR_IF(dictPtr+12 > dictEnd, dictionary_corrupted); | |
|
1011 | 1097 | { int i; |
|
1012 | 1098 | size_t const dictContentSize = (size_t)(dictEnd - (dictPtr+12)); |
|
1013 | 1099 | for (i=0; i<3; i++) { |
|
1014 | 1100 | U32 const rep = MEM_readLE32(dictPtr); dictPtr += 4; |
|
1015 |
|
|
|
1101 | RETURN_ERROR_IF(rep==0 || rep >= dictContentSize, | |
|
1102 | dictionary_corrupted); | |
|
1016 | 1103 | entropy->rep[i] = rep; |
|
1017 | 1104 | } } |
|
1018 | 1105 | |
@@ -1030,7 +1117,7 b' static size_t ZSTD_decompress_insertDict' | |||
|
1030 | 1117 | |
|
1031 | 1118 | /* load entropy tables */ |
|
1032 | 1119 | { size_t const eSize = ZSTD_loadDEntropy(&dctx->entropy, dict, dictSize); |
|
1033 |
|
|
|
1120 | RETURN_ERROR_IF(ZSTD_isError(eSize), dictionary_corrupted); | |
|
1034 | 1121 | dict = (const char*)dict + eSize; |
|
1035 | 1122 | dictSize -= eSize; |
|
1036 | 1123 | } |
@@ -1064,9 +1151,11 b' size_t ZSTD_decompressBegin(ZSTD_DCtx* d' | |||
|
1064 | 1151 | |
|
1065 | 1152 | size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize) |
|
1066 | 1153 | { |
|
1067 |
|
|
|
1154 | FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) ); | |
|
1068 | 1155 | if (dict && dictSize) |
|
1069 | CHECK_E(ZSTD_decompress_insertDictionary(dctx, dict, dictSize), dictionary_corrupted); | |
|
1156 | RETURN_ERROR_IF( | |
|
1157 | ZSTD_isError(ZSTD_decompress_insertDictionary(dctx, dict, dictSize)), | |
|
1158 | dictionary_corrupted); | |
|
1070 | 1159 | return 0; |
|
1071 | 1160 | } |
|
1072 | 1161 | |
@@ -1085,7 +1174,7 b' size_t ZSTD_decompressBegin_usingDDict(Z' | |||
|
1085 | 1174 | DEBUGLOG(4, "DDict is %s", |
|
1086 | 1175 | dctx->ddictIsCold ? "~cold~" : "hot!"); |
|
1087 | 1176 | } |
|
1088 |
|
|
|
1177 | FORWARD_IF_ERROR( ZSTD_decompressBegin(dctx) ); | |
|
1089 | 1178 | if (ddict) { /* NULL ddict is equivalent to no dictionary */ |
|
1090 | 1179 | ZSTD_copyDDictParameters(dctx, ddict); |
|
1091 | 1180 | } |
@@ -1104,7 +1193,7 b' unsigned ZSTD_getDictID_fromDict(const v' | |||
|
1104 | 1193 | } |
|
1105 | 1194 | |
|
1106 | 1195 | /*! ZSTD_getDictID_fromFrame() : |
|
1107 |
* Provides the dictID required to decompress |
|
|
1196 | * Provides the dictID required to decompress frame stored within `src`. | |
|
1108 | 1197 | * If @return == 0, the dictID could not be decoded. |
|
1109 | 1198 | * This could for one of the following reasons : |
|
1110 | 1199 | * - The frame does not require a dictionary (most common case). |
@@ -1176,15 +1265,14 b' size_t ZSTD_DCtx_loadDictionary_advanced' | |||
|
1176 | 1265 | ZSTD_dictLoadMethod_e dictLoadMethod, |
|
1177 | 1266 | ZSTD_dictContentType_e dictContentType) |
|
1178 | 1267 | { |
|
1179 |
|
|
|
1180 |
ZSTD_ |
|
|
1268 | RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); | |
|
1269 | ZSTD_clearDict(dctx); | |
|
1181 | 1270 | if (dict && dictSize >= 8) { |
|
1182 | 1271 | dctx->ddictLocal = ZSTD_createDDict_advanced(dict, dictSize, dictLoadMethod, dictContentType, dctx->customMem); |
|
1183 |
|
|
|
1184 | } else { | |
|
1185 | dctx->ddictLocal = NULL; | |
|
1272 | RETURN_ERROR_IF(dctx->ddictLocal == NULL, memory_allocation); | |
|
1273 | dctx->ddict = dctx->ddictLocal; | |
|
1274 | dctx->dictUses = ZSTD_use_indefinitely; | |
|
1186 | 1275 | } |
|
1187 | dctx->ddict = dctx->ddictLocal; | |
|
1188 | 1276 | return 0; |
|
1189 | 1277 | } |
|
1190 | 1278 | |
@@ -1200,7 +1288,9 b' size_t ZSTD_DCtx_loadDictionary(ZSTD_DCt' | |||
|
1200 | 1288 | |
|
1201 | 1289 | size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType) |
|
1202 | 1290 | { |
|
1203 |
|
|
|
1291 | FORWARD_IF_ERROR(ZSTD_DCtx_loadDictionary_advanced(dctx, prefix, prefixSize, ZSTD_dlm_byRef, dictContentType)); | |
|
1292 | dctx->dictUses = ZSTD_use_once; | |
|
1293 | return 0; | |
|
1204 | 1294 | } |
|
1205 | 1295 | |
|
1206 | 1296 | size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize) |
@@ -1215,9 +1305,8 b' size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dc' | |||
|
1215 | 1305 | size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize) |
|
1216 | 1306 | { |
|
1217 | 1307 | DEBUGLOG(4, "ZSTD_initDStream_usingDict"); |
|
1218 | zds->streamStage = zdss_init; | |
|
1219 | zds->noForwardProgress = 0; | |
|
1220 | CHECK_F( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) ); | |
|
1308 | FORWARD_IF_ERROR( ZSTD_DCtx_reset(zds, ZSTD_reset_session_only) ); | |
|
1309 | FORWARD_IF_ERROR( ZSTD_DCtx_loadDictionary(zds, dict, dictSize) ); | |
|
1221 | 1310 | return ZSTD_FRAMEHEADERSIZE_PREFIX; |
|
1222 | 1311 | } |
|
1223 | 1312 | |
@@ -1225,7 +1314,7 b' size_t ZSTD_initDStream_usingDict(ZSTD_D' | |||
|
1225 | 1314 | size_t ZSTD_initDStream(ZSTD_DStream* zds) |
|
1226 | 1315 | { |
|
1227 | 1316 | DEBUGLOG(4, "ZSTD_initDStream"); |
|
1228 |
return ZSTD_initDStream_usingDict(zds, NULL |
|
|
1317 | return ZSTD_initDStream_usingDDict(zds, NULL); | |
|
1229 | 1318 | } |
|
1230 | 1319 | |
|
1231 | 1320 | /* ZSTD_initDStream_usingDDict() : |
@@ -1233,9 +1322,9 b' size_t ZSTD_initDStream(ZSTD_DStream* zd' | |||
|
1233 | 1322 | * this function cannot fail */ |
|
1234 | 1323 | size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* dctx, const ZSTD_DDict* ddict) |
|
1235 | 1324 | { |
|
1236 | size_t const initResult = ZSTD_initDStream(dctx); | |
|
1237 | dctx->ddict = ddict; | |
|
1238 | return initResult; | |
|
1325 | FORWARD_IF_ERROR( ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only) ); | |
|
1326 | FORWARD_IF_ERROR( ZSTD_DCtx_refDDict(dctx, ddict) ); | |
|
1327 | return ZSTD_FRAMEHEADERSIZE_PREFIX; | |
|
1239 | 1328 | } |
|
1240 | 1329 | |
|
1241 | 1330 | /* ZSTD_resetDStream() : |
@@ -1243,19 +1332,19 b' size_t ZSTD_initDStream_usingDDict(ZSTD_' | |||
|
1243 | 1332 | * this function cannot fail */ |
|
1244 | 1333 | size_t ZSTD_resetDStream(ZSTD_DStream* dctx) |
|
1245 | 1334 | { |
|
1246 | DEBUGLOG(4, "ZSTD_resetDStream"); | |
|
1247 | dctx->streamStage = zdss_loadHeader; | |
|
1248 | dctx->lhSize = dctx->inPos = dctx->outStart = dctx->outEnd = 0; | |
|
1249 | dctx->legacyVersion = 0; | |
|
1250 | dctx->hostageByte = 0; | |
|
1335 | FORWARD_IF_ERROR(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_only)); | |
|
1251 | 1336 | return ZSTD_FRAMEHEADERSIZE_PREFIX; |
|
1252 | 1337 | } |
|
1253 | 1338 | |
|
1254 | 1339 | |
|
1255 | 1340 | size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) |
|
1256 | 1341 | { |
|
1257 |
|
|
|
1342 | RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); | |
|
1343 | ZSTD_clearDict(dctx); | |
|
1344 | if (ddict) { | |
|
1258 | 1345 | dctx->ddict = ddict; |
|
1346 | dctx->dictUses = ZSTD_use_indefinitely; | |
|
1347 | } | |
|
1259 | 1348 | return 0; |
|
1260 | 1349 | } |
|
1261 | 1350 | |
@@ -1267,9 +1356,9 b' size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_D' | |||
|
1267 | 1356 | ZSTD_bounds const bounds = ZSTD_dParam_getBounds(ZSTD_d_windowLogMax); |
|
1268 | 1357 | size_t const min = (size_t)1 << bounds.lowerBound; |
|
1269 | 1358 | size_t const max = (size_t)1 << bounds.upperBound; |
|
1270 |
|
|
|
1271 |
|
|
|
1272 |
|
|
|
1359 | RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); | |
|
1360 | RETURN_ERROR_IF(maxWindowSize < min, parameter_outOfBound); | |
|
1361 | RETURN_ERROR_IF(maxWindowSize > max, parameter_outOfBound); | |
|
1273 | 1362 | dctx->maxWindowSize = maxWindowSize; |
|
1274 | 1363 | return 0; |
|
1275 | 1364 | } |
@@ -1311,15 +1400,15 b' static int ZSTD_dParam_withinBounds(ZSTD' | |||
|
1311 | 1400 | } |
|
1312 | 1401 | |
|
1313 | 1402 | #define CHECK_DBOUNDS(p,v) { \ |
|
1314 |
|
|
|
1315 | return ERROR(parameter_outOfBound); \ | |
|
1403 | RETURN_ERROR_IF(!ZSTD_dParam_withinBounds(p, v), parameter_outOfBound); \ | |
|
1316 | 1404 | } |
|
1317 | 1405 | |
|
1318 | 1406 | size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value) |
|
1319 | 1407 | { |
|
1320 |
|
|
|
1408 | RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); | |
|
1321 | 1409 | switch(dParam) { |
|
1322 | 1410 | case ZSTD_d_windowLogMax: |
|
1411 | if (value == 0) value = ZSTD_WINDOWLOG_LIMIT_DEFAULT; | |
|
1323 | 1412 | CHECK_DBOUNDS(ZSTD_d_windowLogMax, value); |
|
1324 | 1413 | dctx->maxWindowSize = ((size_t)1) << value; |
|
1325 | 1414 | return 0; |
@@ -1329,19 +1418,20 b' size_t ZSTD_DCtx_setParameter(ZSTD_DCtx*' | |||
|
1329 | 1418 | return 0; |
|
1330 | 1419 | default:; |
|
1331 | 1420 | } |
|
1332 |
|
|
|
1421 | RETURN_ERROR(parameter_unsupported); | |
|
1333 | 1422 | } |
|
1334 | 1423 | |
|
1335 | 1424 | size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset) |
|
1336 | 1425 | { |
|
1337 | 1426 | if ( (reset == ZSTD_reset_session_only) |
|
1338 | 1427 | || (reset == ZSTD_reset_session_and_parameters) ) { |
|
1339 | (void)ZSTD_initDStream(dctx); | |
|
1428 | dctx->streamStage = zdss_init; | |
|
1429 | dctx->noForwardProgress = 0; | |
|
1340 | 1430 | } |
|
1341 | 1431 | if ( (reset == ZSTD_reset_parameters) |
|
1342 | 1432 | || (reset == ZSTD_reset_session_and_parameters) ) { |
|
1343 |
|
|
|
1344 | return ERROR(stage_wrong); | |
|
1433 | RETURN_ERROR_IF(dctx->streamStage != zdss_init, stage_wrong); | |
|
1434 | ZSTD_clearDict(dctx); | |
|
1345 | 1435 | dctx->format = ZSTD_f_zstd1; |
|
1346 | 1436 | dctx->maxWindowSize = ZSTD_MAXWINDOWSIZE_DEFAULT; |
|
1347 | 1437 | } |
@@ -1360,7 +1450,8 b' size_t ZSTD_decodingBufferSize_min(unsig' | |||
|
1360 | 1450 | unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2); |
|
1361 | 1451 | unsigned long long const neededSize = MIN(frameContentSize, neededRBSize); |
|
1362 | 1452 | size_t const minRBSize = (size_t) neededSize; |
|
1363 | if ((unsigned long long)minRBSize != neededSize) return ERROR(frameParameter_windowTooLarge); | |
|
1453 | RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize, | |
|
1454 | frameParameter_windowTooLarge); | |
|
1364 | 1455 | return minRBSize; |
|
1365 | 1456 | } |
|
1366 | 1457 | |
@@ -1378,9 +1469,9 b' size_t ZSTD_estimateDStreamSize_fromFram' | |||
|
1378 | 1469 | ZSTD_frameHeader zfh; |
|
1379 | 1470 | size_t const err = ZSTD_getFrameHeader(&zfh, src, srcSize); |
|
1380 | 1471 | if (ZSTD_isError(err)) return err; |
|
1381 |
|
|
|
1382 |
|
|
|
1383 |
|
|
|
1472 | RETURN_ERROR_IF(err>0, srcSize_wrong); | |
|
1473 | RETURN_ERROR_IF(zfh.windowSize > windowSizeMax, | |
|
1474 | frameParameter_windowTooLarge); | |
|
1384 | 1475 | return ZSTD_estimateDStreamSize((size_t)zfh.windowSize); |
|
1385 | 1476 | } |
|
1386 | 1477 | |
@@ -1406,16 +1497,16 b' size_t ZSTD_decompressStream(ZSTD_DStrea' | |||
|
1406 | 1497 | U32 someMoreWork = 1; |
|
1407 | 1498 | |
|
1408 | 1499 | DEBUGLOG(5, "ZSTD_decompressStream"); |
|
1409 | if (input->pos > input->size) { /* forbidden */ | |
|
1410 | DEBUGLOG(5, "in: pos: %u vs size: %u", | |
|
1500 | RETURN_ERROR_IF( | |
|
1501 | input->pos > input->size, | |
|
1502 | srcSize_wrong, | |
|
1503 | "forbidden. in: pos: %u vs size: %u", | |
|
1411 | 1504 |
|
|
1412 | return ERROR(srcSize_wrong); | |
|
1413 | } | |
|
1414 | if (output->pos > output->size) { /* forbidden */ | |
|
1415 |
|
|
|
1505 | RETURN_ERROR_IF( | |
|
1506 | output->pos > output->size, | |
|
1507 | dstSize_tooSmall, | |
|
1508 | "forbidden. out: pos: %u vs size: %u", | |
|
1416 | 1509 |
|
|
1417 | return ERROR(dstSize_tooSmall); | |
|
1418 | } | |
|
1419 | 1510 | DEBUGLOG(5, "input size : %u", (U32)(input->size - input->pos)); |
|
1420 | 1511 | |
|
1421 | 1512 | while (someMoreWork) { |
@@ -1423,15 +1514,18 b' size_t ZSTD_decompressStream(ZSTD_DStrea' | |||
|
1423 | 1514 | { |
|
1424 | 1515 | case zdss_init : |
|
1425 | 1516 | DEBUGLOG(5, "stage zdss_init => transparent reset "); |
|
1426 | ZSTD_resetDStream(zds); /* transparent reset on starting decoding a new frame */ | |
|
1517 | zds->streamStage = zdss_loadHeader; | |
|
1518 | zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0; | |
|
1519 | zds->legacyVersion = 0; | |
|
1520 | zds->hostageByte = 0; | |
|
1427 | 1521 | /* fall-through */ |
|
1428 | 1522 | |
|
1429 | 1523 | case zdss_loadHeader : |
|
1430 | 1524 | DEBUGLOG(5, "stage zdss_loadHeader (srcSize : %u)", (U32)(iend - ip)); |
|
1431 | 1525 | #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) |
|
1432 | 1526 | if (zds->legacyVersion) { |
|
1433 | /* legacy support is incompatible with static dctx */ | |
|
1434 | if (zds->staticSize) return ERROR(memory_allocation); | |
|
1527 | RETURN_ERROR_IF(zds->staticSize, memory_allocation, | |
|
1528 | "legacy support is incompatible with static dctx"); | |
|
1435 | 1529 | { size_t const hint = ZSTD_decompressLegacyStream(zds->legacyContext, zds->legacyVersion, output, input); |
|
1436 | 1530 | if (hint==0) zds->streamStage = zdss_init; |
|
1437 | 1531 | return hint; |
@@ -1443,12 +1537,13 b' size_t ZSTD_decompressStream(ZSTD_DStrea' | |||
|
1443 | 1537 | #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) |
|
1444 | 1538 | U32 const legacyVersion = ZSTD_isLegacy(istart, iend-istart); |
|
1445 | 1539 | if (legacyVersion) { |
|
1446 | const void* const dict = zds->ddict ? ZSTD_DDict_dictContent(zds->ddict) : NULL; | |
|
1447 |
|
|
|
1540 | ZSTD_DDict const* const ddict = ZSTD_getDDict(zds); | |
|
1541 | const void* const dict = ddict ? ZSTD_DDict_dictContent(ddict) : NULL; | |
|
1542 | size_t const dictSize = ddict ? ZSTD_DDict_dictSize(ddict) : 0; | |
|
1448 | 1543 | DEBUGLOG(5, "ZSTD_decompressStream: detected legacy version v0.%u", legacyVersion); |
|
1449 | /* legacy support is incompatible with static dctx */ | |
|
1450 | if (zds->staticSize) return ERROR(memory_allocation); | |
|
1451 |
|
|
|
1544 | RETURN_ERROR_IF(zds->staticSize, memory_allocation, | |
|
1545 | "legacy support is incompatible with static dctx"); | |
|
1546 | FORWARD_IF_ERROR(ZSTD_initLegacyStream(&zds->legacyContext, | |
|
1452 | 1547 | zds->previousLegacyVersion, legacyVersion, |
|
1453 | 1548 | dict, dictSize)); |
|
1454 | 1549 | zds->legacyVersion = zds->previousLegacyVersion = legacyVersion; |
@@ -1482,7 +1577,7 b' size_t ZSTD_decompressStream(ZSTD_DStrea' | |||
|
1482 | 1577 | size_t const cSize = ZSTD_findFrameCompressedSize(istart, iend-istart); |
|
1483 | 1578 | if (cSize <= (size_t)(iend-istart)) { |
|
1484 | 1579 | /* shortcut : using single-pass mode */ |
|
1485 |
size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, zds |
|
|
1580 | size_t const decompressedSize = ZSTD_decompress_usingDDict(zds, op, oend-op, istart, cSize, ZSTD_getDDict(zds)); | |
|
1486 | 1581 | if (ZSTD_isError(decompressedSize)) return decompressedSize; |
|
1487 | 1582 | DEBUGLOG(4, "shortcut to single-pass ZSTD_decompress_usingDDict()") |
|
1488 | 1583 | ip = istart + cSize; |
@@ -1495,13 +1590,13 b' size_t ZSTD_decompressStream(ZSTD_DStrea' | |||
|
1495 | 1590 | |
|
1496 | 1591 | /* Consume header (see ZSTDds_decodeFrameHeader) */ |
|
1497 | 1592 | DEBUGLOG(4, "Consume header"); |
|
1498 |
|
|
|
1593 | FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds))); | |
|
1499 | 1594 | |
|
1500 | 1595 | if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */ |
|
1501 | 1596 | zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE); |
|
1502 | 1597 | zds->stage = ZSTDds_skipFrame; |
|
1503 | 1598 | } else { |
|
1504 |
|
|
|
1599 | FORWARD_IF_ERROR(ZSTD_decodeFrameHeader(zds, zds->headerBuffer, zds->lhSize)); | |
|
1505 | 1600 | zds->expected = ZSTD_blockHeaderSize; |
|
1506 | 1601 | zds->stage = ZSTDds_decodeBlockHeader; |
|
1507 | 1602 | } |
@@ -1511,7 +1606,8 b' size_t ZSTD_decompressStream(ZSTD_DStrea' | |||
|
1511 | 1606 | (U32)(zds->fParams.windowSize >>10), |
|
1512 | 1607 | (U32)(zds->maxWindowSize >> 10) ); |
|
1513 | 1608 | zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN); |
|
1514 |
|
|
|
1609 | RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize, | |
|
1610 | frameParameter_windowTooLarge); | |
|
1515 | 1611 | |
|
1516 | 1612 | /* Adapt buffer sizes to frame header instructions */ |
|
1517 | 1613 | { size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */); |
@@ -1525,14 +1621,15 b' size_t ZSTD_decompressStream(ZSTD_DStrea' | |||
|
1525 | 1621 | if (zds->staticSize) { /* static DCtx */ |
|
1526 | 1622 | DEBUGLOG(4, "staticSize : %u", (U32)zds->staticSize); |
|
1527 | 1623 | assert(zds->staticSize >= sizeof(ZSTD_DCtx)); /* controlled at init */ |
|
1528 | if (bufferSize > zds->staticSize - sizeof(ZSTD_DCtx)) | |
|
1529 | return ERROR(memory_allocation); | |
|
1624 | RETURN_ERROR_IF( | |
|
1625 | bufferSize > zds->staticSize - sizeof(ZSTD_DCtx), | |
|
1626 | memory_allocation); | |
|
1530 | 1627 | } else { |
|
1531 | 1628 | ZSTD_free(zds->inBuff, zds->customMem); |
|
1532 | 1629 | zds->inBuffSize = 0; |
|
1533 | 1630 | zds->outBuffSize = 0; |
|
1534 | 1631 | zds->inBuff = (char*)ZSTD_malloc(bufferSize, zds->customMem); |
|
1535 |
|
|
|
1632 | RETURN_ERROR_IF(zds->inBuff == NULL, memory_allocation); | |
|
1536 | 1633 | } |
|
1537 | 1634 | zds->inBuffSize = neededInBuffSize; |
|
1538 | 1635 | zds->outBuff = zds->inBuff + zds->inBuffSize; |
@@ -1574,7 +1671,9 b' size_t ZSTD_decompressStream(ZSTD_DStrea' | |||
|
1574 | 1671 | if (isSkipFrame) { |
|
1575 | 1672 | loadedSize = MIN(toLoad, (size_t)(iend-ip)); |
|
1576 | 1673 | } else { |
|
1577 | if (toLoad > zds->inBuffSize - zds->inPos) return ERROR(corruption_detected); /* should never happen */ | |
|
1674 | RETURN_ERROR_IF(toLoad > zds->inBuffSize - zds->inPos, | |
|
1675 | corruption_detected, | |
|
1676 | "should never happen"); | |
|
1578 | 1677 | loadedSize = ZSTD_limitCopy(zds->inBuff + zds->inPos, toLoad, ip, iend-ip); |
|
1579 | 1678 | } |
|
1580 | 1679 | ip += loadedSize; |
@@ -1615,7 +1714,7 b' size_t ZSTD_decompressStream(ZSTD_DStrea' | |||
|
1615 | 1714 | |
|
1616 | 1715 | default: |
|
1617 | 1716 | assert(0); /* impossible */ |
|
1618 |
|
|
|
1717 | RETURN_ERROR(GENERIC); /* some compiler require default to do something */ | |
|
1619 | 1718 | } } |
|
1620 | 1719 | |
|
1621 | 1720 | /* result */ |
@@ -1624,8 +1723,8 b' size_t ZSTD_decompressStream(ZSTD_DStrea' | |||
|
1624 | 1723 | if ((ip==istart) && (op==ostart)) { /* no forward progress */ |
|
1625 | 1724 | zds->noForwardProgress ++; |
|
1626 | 1725 | if (zds->noForwardProgress >= ZSTD_NO_FORWARD_PROGRESS_MAX) { |
|
1627 |
|
|
|
1628 |
|
|
|
1726 | RETURN_ERROR_IF(op==oend, dstSize_tooSmall); | |
|
1727 | RETURN_ERROR_IF(ip==iend, srcSize_wrong); | |
|
1629 | 1728 | assert(0); |
|
1630 | 1729 | } |
|
1631 | 1730 | } else { |
@@ -56,14 +56,15 b' static void ZSTD_copy4(void* dst, const ' | |||
|
56 | 56 | size_t ZSTD_getcBlockSize(const void* src, size_t srcSize, |
|
57 | 57 | blockProperties_t* bpPtr) |
|
58 | 58 | { |
|
59 |
|
|
|
59 | RETURN_ERROR_IF(srcSize < ZSTD_blockHeaderSize, srcSize_wrong); | |
|
60 | ||
|
60 | 61 | { U32 const cBlockHeader = MEM_readLE24(src); |
|
61 | 62 | U32 const cSize = cBlockHeader >> 3; |
|
62 | 63 | bpPtr->lastBlock = cBlockHeader & 1; |
|
63 | 64 | bpPtr->blockType = (blockType_e)((cBlockHeader >> 1) & 3); |
|
64 | 65 | bpPtr->origSize = cSize; /* only useful for RLE */ |
|
65 | 66 | if (bpPtr->blockType == bt_rle) return 1; |
|
66 |
|
|
|
67 | RETURN_ERROR_IF(bpPtr->blockType == bt_reserved, corruption_detected); | |
|
67 | 68 | return cSize; |
|
68 | 69 | } |
|
69 | 70 | } |
@@ -78,7 +79,8 b' size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt' | |||
|
78 | 79 | size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx, |
|
79 | 80 | const void* src, size_t srcSize) /* note : srcSize < BLOCKSIZE */ |
|
80 | 81 | { |
|
81 | if (srcSize < MIN_CBLOCK_SIZE) return ERROR(corruption_detected); | |
|
82 | DEBUGLOG(5, "ZSTD_decodeLiteralsBlock"); | |
|
83 | RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected); | |
|
82 | 84 | |
|
83 | 85 | { const BYTE* const istart = (const BYTE*) src; |
|
84 | 86 | symbolEncodingType_e const litEncType = (symbolEncodingType_e)(istart[0] & 3); |
@@ -86,11 +88,12 b' size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt' | |||
|
86 | 88 | switch(litEncType) |
|
87 | 89 | { |
|
88 | 90 | case set_repeat: |
|
89 | if (dctx->litEntropy==0) return ERROR(dictionary_corrupted); | |
|
91 | DEBUGLOG(5, "set_repeat flag : re-using stats from previous compressed literals block"); | |
|
92 | RETURN_ERROR_IF(dctx->litEntropy==0, dictionary_corrupted); | |
|
90 | 93 | /* fall-through */ |
|
91 | 94 | |
|
92 | 95 | case set_compressed: |
|
93 |
|
|
|
96 | RETURN_ERROR_IF(srcSize < 5, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need up to 5 for case 3"); | |
|
94 | 97 | { size_t lhSize, litSize, litCSize; |
|
95 | 98 | U32 singleStream=0; |
|
96 | 99 | U32 const lhlCode = (istart[0] >> 2) & 3; |
@@ -115,11 +118,11 b' size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt' | |||
|
115 | 118 | /* 2 - 2 - 18 - 18 */ |
|
116 | 119 | lhSize = 5; |
|
117 | 120 | litSize = (lhc >> 4) & 0x3FFFF; |
|
118 | litCSize = (lhc >> 22) + (istart[4] << 10); | |
|
121 | litCSize = (lhc >> 22) + ((size_t)istart[4] << 10); | |
|
119 | 122 | break; |
|
120 | 123 | } |
|
121 |
|
|
|
122 |
|
|
|
124 | RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected); | |
|
125 | RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected); | |
|
123 | 126 | |
|
124 | 127 | /* prefetch huffman table if cold */ |
|
125 | 128 | if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) { |
@@ -157,7 +160,7 b' size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt' | |||
|
157 | 160 | } |
|
158 | 161 | } |
|
159 | 162 | |
|
160 |
|
|
|
163 | RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected); | |
|
161 | 164 | |
|
162 | 165 | dctx->litPtr = dctx->litBuffer; |
|
163 | 166 | dctx->litSize = litSize; |
@@ -187,7 +190,7 b' size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt' | |||
|
187 | 190 | } |
|
188 | 191 | |
|
189 | 192 | if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */ |
|
190 |
|
|
|
193 | RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected); | |
|
191 | 194 | memcpy(dctx->litBuffer, istart+lhSize, litSize); |
|
192 | 195 | dctx->litPtr = dctx->litBuffer; |
|
193 | 196 | dctx->litSize = litSize; |
@@ -216,17 +219,17 b' size_t ZSTD_decodeLiteralsBlock(ZSTD_DCt' | |||
|
216 | 219 | case 3: |
|
217 | 220 | lhSize = 3; |
|
218 | 221 | litSize = MEM_readLE24(istart) >> 4; |
|
219 |
|
|
|
222 | RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4"); | |
|
220 | 223 | break; |
|
221 | 224 | } |
|
222 |
|
|
|
225 | RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected); | |
|
223 | 226 | memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH); |
|
224 | 227 | dctx->litPtr = dctx->litBuffer; |
|
225 | 228 | dctx->litSize = litSize; |
|
226 | 229 | return lhSize+1; |
|
227 | 230 | } |
|
228 | 231 | default: |
|
229 |
|
|
|
232 | RETURN_ERROR(corruption_detected, "impossible"); | |
|
230 | 233 | } |
|
231 | 234 | } |
|
232 | 235 | } |
@@ -390,7 +393,8 b' ZSTD_buildFSETable(ZSTD_seqSymbol* dt,' | |||
|
390 | 393 | symbolNext[s] = 1; |
|
391 | 394 | } else { |
|
392 | 395 | if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0; |
|
393 |
|
|
|
396 | assert(normalizedCounter[s]>=0); | |
|
397 | symbolNext[s] = (U16)normalizedCounter[s]; | |
|
394 | 398 | } } } |
|
395 | 399 | memcpy(dt, &DTableH, sizeof(DTableH)); |
|
396 | 400 | } |
@@ -436,8 +440,8 b' static size_t ZSTD_buildSeqTable(ZSTD_se' | |||
|
436 | 440 | switch(type) |
|
437 | 441 | { |
|
438 | 442 | case set_rle : |
|
439 |
|
|
|
440 |
|
|
|
443 | RETURN_ERROR_IF(!srcSize, srcSize_wrong); | |
|
444 | RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected); | |
|
441 | 445 | { U32 const symbol = *(const BYTE*)src; |
|
442 | 446 | U32 const baseline = baseValue[symbol]; |
|
443 | 447 | U32 const nbBits = nbAdditionalBits[symbol]; |
@@ -449,7 +453,7 b' static size_t ZSTD_buildSeqTable(ZSTD_se' | |||
|
449 | 453 | *DTablePtr = defaultTable; |
|
450 | 454 | return 0; |
|
451 | 455 | case set_repeat: |
|
452 |
|
|
|
456 | RETURN_ERROR_IF(!flagRepeatTable, corruption_detected); | |
|
453 | 457 | /* prefetch FSE table if used */ |
|
454 | 458 | if (ddictIsCold && (nbSeq > 24 /* heuristic */)) { |
|
455 | 459 | const void* const pStart = *DTablePtr; |
@@ -461,15 +465,15 b' static size_t ZSTD_buildSeqTable(ZSTD_se' | |||
|
461 | 465 | { unsigned tableLog; |
|
462 | 466 | S16 norm[MaxSeq+1]; |
|
463 | 467 | size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize); |
|
464 |
|
|
|
465 |
|
|
|
468 | RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected); | |
|
469 | RETURN_ERROR_IF(tableLog > maxLog, corruption_detected); | |
|
466 | 470 | ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog); |
|
467 | 471 | *DTablePtr = DTableSpace; |
|
468 | 472 | return headerSize; |
|
469 | 473 | } |
|
470 | default : /* impossible */ | |
|
474 | default : | |
|
471 | 475 | assert(0); |
|
472 |
|
|
|
476 | RETURN_ERROR(GENERIC, "impossible"); | |
|
473 | 477 | } |
|
474 | 478 | } |
|
475 | 479 | |
@@ -483,28 +487,28 b' size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* ' | |||
|
483 | 487 | DEBUGLOG(5, "ZSTD_decodeSeqHeaders"); |
|
484 | 488 | |
|
485 | 489 | /* check */ |
|
486 |
|
|
|
490 | RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong); | |
|
487 | 491 | |
|
488 | 492 | /* SeqHead */ |
|
489 | 493 | nbSeq = *ip++; |
|
490 | 494 | if (!nbSeq) { |
|
491 | 495 | *nbSeqPtr=0; |
|
492 |
|
|
|
496 | RETURN_ERROR_IF(srcSize != 1, srcSize_wrong); | |
|
493 | 497 | return 1; |
|
494 | 498 | } |
|
495 | 499 | if (nbSeq > 0x7F) { |
|
496 | 500 | if (nbSeq == 0xFF) { |
|
497 |
|
|
|
501 | RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong); | |
|
498 | 502 | nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2; |
|
499 | 503 | } else { |
|
500 |
|
|
|
504 | RETURN_ERROR_IF(ip >= iend, srcSize_wrong); | |
|
501 | 505 | nbSeq = ((nbSeq-0x80)<<8) + *ip++; |
|
502 | 506 | } |
|
503 | 507 | } |
|
504 | 508 | *nbSeqPtr = nbSeq; |
|
505 | 509 | |
|
506 | 510 | /* FSE table descriptors */ |
|
507 |
|
|
|
511 | RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong); /* minimum possible size: 1 byte for symbol encoding types */ | |
|
508 | 512 | { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6); |
|
509 | 513 | symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3); |
|
510 | 514 | symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3); |
@@ -517,7 +521,7 b' size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* ' | |||
|
517 | 521 | LL_base, LL_bits, |
|
518 | 522 | LL_defaultDTable, dctx->fseEntropy, |
|
519 | 523 | dctx->ddictIsCold, nbSeq); |
|
520 |
|
|
|
524 | RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected); | |
|
521 | 525 | ip += llhSize; |
|
522 | 526 | } |
|
523 | 527 | |
@@ -527,7 +531,7 b' size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* ' | |||
|
527 | 531 | OF_base, OF_bits, |
|
528 | 532 | OF_defaultDTable, dctx->fseEntropy, |
|
529 | 533 | dctx->ddictIsCold, nbSeq); |
|
530 |
|
|
|
534 | RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected); | |
|
531 | 535 | ip += ofhSize; |
|
532 | 536 | } |
|
533 | 537 | |
@@ -537,7 +541,7 b' size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* ' | |||
|
537 | 541 | ML_base, ML_bits, |
|
538 | 542 | ML_defaultDTable, dctx->fseEntropy, |
|
539 | 543 | dctx->ddictIsCold, nbSeq); |
|
540 |
|
|
|
544 | RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected); | |
|
541 | 545 | ip += mlhSize; |
|
542 | 546 | } |
|
543 | 547 | } |
@@ -590,8 +594,8 b' size_t ZSTD_execSequenceLast7(BYTE* op,' | |||
|
590 | 594 | const BYTE* match = oLitEnd - sequence.offset; |
|
591 | 595 | |
|
592 | 596 | /* check */ |
|
593 |
|
|
|
594 |
|
|
|
597 | RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must fit within dstBuffer"); | |
|
598 | RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "try to read beyond literal buffer"); | |
|
595 | 599 | |
|
596 | 600 | /* copy literals */ |
|
597 | 601 | while (op < oLitEnd) *op++ = *(*litPtr)++; |
@@ -599,7 +603,7 b' size_t ZSTD_execSequenceLast7(BYTE* op,' | |||
|
599 | 603 | /* copy Match */ |
|
600 | 604 | if (sequence.offset > (size_t)(oLitEnd - base)) { |
|
601 | 605 | /* offset beyond prefix */ |
|
602 |
|
|
|
606 | RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - vBase),corruption_detected); | |
|
603 | 607 | match = dictEnd - (base-match); |
|
604 | 608 | if (match + sequence.matchLength <= dictEnd) { |
|
605 | 609 | memmove(oLitEnd, match, sequence.matchLength); |
@@ -631,22 +635,22 b' size_t ZSTD_execSequence(BYTE* op,' | |||
|
631 | 635 | const BYTE* match = oLitEnd - sequence.offset; |
|
632 | 636 | |
|
633 | 637 | /* check */ |
|
634 |
|
|
|
635 |
|
|
|
638 | RETURN_ERROR_IF(oMatchEnd>oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend"); | |
|
639 | RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer"); | |
|
636 | 640 | if (oLitEnd>oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd); |
|
637 | 641 | |
|
638 | 642 | /* copy Literals */ |
|
643 | if (sequence.litLength > 8) | |
|
644 | ZSTD_wildcopy_16min(op, (*litPtr), sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ | |
|
645 | else | |
|
639 | 646 | ZSTD_copy8(op, *litPtr); |
|
640 | if (sequence.litLength > 8) | |
|
641 | ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ | |
|
642 | 647 | op = oLitEnd; |
|
643 | 648 | *litPtr = iLitEnd; /* update for next sequence */ |
|
644 | 649 | |
|
645 | 650 | /* copy Match */ |
|
646 | 651 | if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { |
|
647 | 652 | /* offset beyond prefix -> go into extDict */ |
|
648 |
|
|
|
649 | return ERROR(corruption_detected); | |
|
653 | RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected); | |
|
650 | 654 | match = dictEnd + (match - prefixStart); |
|
651 | 655 | if (match + sequence.matchLength <= dictEnd) { |
|
652 | 656 | memmove(oLitEnd, match, sequence.matchLength); |
@@ -686,13 +690,13 b' size_t ZSTD_execSequence(BYTE* op,' | |||
|
686 | 690 | |
|
687 | 691 | if (oMatchEnd > oend-(16-MINMATCH)) { |
|
688 | 692 | if (op < oend_w) { |
|
689 | ZSTD_wildcopy(op, match, oend_w - op); | |
|
693 | ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst); | |
|
690 | 694 | match += oend_w - op; |
|
691 | 695 | op = oend_w; |
|
692 | 696 | } |
|
693 | 697 | while (op < oMatchEnd) *op++ = *match++; |
|
694 | 698 | } else { |
|
695 | ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ | |
|
699 | ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */ | |
|
696 | 700 | } |
|
697 | 701 | return sequenceLength; |
|
698 | 702 | } |
@@ -712,21 +716,23 b' size_t ZSTD_execSequenceLong(BYTE* op,' | |||
|
712 | 716 | const BYTE* match = sequence.match; |
|
713 | 717 | |
|
714 | 718 | /* check */ |
|
715 |
|
|
|
716 |
|
|
|
719 | RETURN_ERROR_IF(oMatchEnd > oend, dstSize_tooSmall, "last match must start at a minimum distance of WILDCOPY_OVERLENGTH from oend"); | |
|
720 | RETURN_ERROR_IF(iLitEnd > litLimit, corruption_detected, "over-read beyond lit buffer"); | |
|
717 | 721 | if (oLitEnd > oend_w) return ZSTD_execSequenceLast7(op, oend, sequence, litPtr, litLimit, prefixStart, dictStart, dictEnd); |
|
718 | 722 | |
|
719 | 723 | /* copy Literals */ |
|
724 | if (sequence.litLength > 8) | |
|
725 | ZSTD_wildcopy_16min(op, *litPtr, sequence.litLength, ZSTD_no_overlap); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ | |
|
726 | else | |
|
720 | 727 | ZSTD_copy8(op, *litPtr); /* note : op <= oLitEnd <= oend_w == oend - 8 */ |
|
721 | if (sequence.litLength > 8) | |
|
722 | ZSTD_wildcopy(op+8, (*litPtr)+8, sequence.litLength - 8); /* note : since oLitEnd <= oend-WILDCOPY_OVERLENGTH, no risk of overwrite beyond oend */ | |
|
728 | ||
|
723 | 729 | op = oLitEnd; |
|
724 | 730 | *litPtr = iLitEnd; /* update for next sequence */ |
|
725 | 731 | |
|
726 | 732 | /* copy Match */ |
|
727 | 733 | if (sequence.offset > (size_t)(oLitEnd - prefixStart)) { |
|
728 | 734 | /* offset beyond prefix */ |
|
729 |
|
|
|
735 | RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - dictStart), corruption_detected); | |
|
730 | 736 | if (match + sequence.matchLength <= dictEnd) { |
|
731 | 737 | memmove(oLitEnd, match, sequence.matchLength); |
|
732 | 738 | return sequenceLength; |
@@ -766,13 +772,13 b' size_t ZSTD_execSequenceLong(BYTE* op,' | |||
|
766 | 772 | |
|
767 | 773 | if (oMatchEnd > oend-(16-MINMATCH)) { |
|
768 | 774 | if (op < oend_w) { |
|
769 | ZSTD_wildcopy(op, match, oend_w - op); | |
|
775 | ZSTD_wildcopy(op, match, oend_w - op, ZSTD_overlap_src_before_dst); | |
|
770 | 776 | match += oend_w - op; |
|
771 | 777 | op = oend_w; |
|
772 | 778 | } |
|
773 | 779 | while (op < oMatchEnd) *op++ = *match++; |
|
774 | 780 | } else { |
|
775 | ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8); /* works even if matchLength < 8 */ | |
|
781 | ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst); /* works even if matchLength < 8 */ | |
|
776 | 782 | } |
|
777 | 783 | return sequenceLength; |
|
778 | 784 | } |
@@ -801,7 +807,7 b' ZSTD_updateFseState(ZSTD_fseState* DStat' | |||
|
801 | 807 | /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum |
|
802 | 808 | * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1) |
|
803 | 809 | * bits before reloading. This value is the maximum number of bytes we read |
|
804 | * after reloading when we are decoding long offets. | |
|
810 | * after reloading when we are decoding long offsets. | |
|
805 | 811 | */ |
|
806 | 812 | #define LONG_OFFSETS_MAX_EXTRA_BITS_32 \ |
|
807 | 813 | (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \ |
@@ -889,6 +895,7 b' ZSTD_decodeSequence(seqState_t* seqState' | |||
|
889 | 895 | } |
|
890 | 896 | |
|
891 | 897 | FORCE_INLINE_TEMPLATE size_t |
|
898 | DONT_VECTORIZE | |
|
892 | 899 | ZSTD_decompressSequences_body( ZSTD_DCtx* dctx, |
|
893 | 900 | void* dst, size_t maxDstSize, |
|
894 | 901 | const void* seqStart, size_t seqSize, int nbSeq, |
@@ -911,11 +918,18 b' ZSTD_decompressSequences_body( ZSTD_DCtx' | |||
|
911 | 918 | seqState_t seqState; |
|
912 | 919 | dctx->fseEntropy = 1; |
|
913 | 920 | { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; } |
|
914 | CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); | |
|
921 | RETURN_ERROR_IF( | |
|
922 | ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), | |
|
923 | corruption_detected); | |
|
915 | 924 | ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); |
|
916 | 925 | ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); |
|
917 | 926 | ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); |
|
918 | 927 | |
|
928 | ZSTD_STATIC_ASSERT( | |
|
929 | BIT_DStream_unfinished < BIT_DStream_completed && | |
|
930 | BIT_DStream_endOfBuffer < BIT_DStream_completed && | |
|
931 | BIT_DStream_completed < BIT_DStream_overflow); | |
|
932 | ||
|
919 | 933 | for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && nbSeq ; ) { |
|
920 | 934 | nbSeq--; |
|
921 | 935 | { seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset); |
@@ -927,14 +941,15 b' ZSTD_decompressSequences_body( ZSTD_DCtx' | |||
|
927 | 941 | |
|
928 | 942 | /* check if reached exact end */ |
|
929 | 943 | DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq); |
|
930 |
|
|
|
944 | RETURN_ERROR_IF(nbSeq, corruption_detected); | |
|
945 | RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected); | |
|
931 | 946 | /* save reps for next block */ |
|
932 | 947 | { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); } |
|
933 | 948 | } |
|
934 | 949 | |
|
935 | 950 | /* last literal segment */ |
|
936 | 951 | { size_t const lastLLSize = litEnd - litPtr; |
|
937 |
|
|
|
952 | RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall); | |
|
938 | 953 | memcpy(op, litPtr, lastLLSize); |
|
939 | 954 | op += lastLLSize; |
|
940 | 955 | } |
@@ -1066,7 +1081,9 b' ZSTD_decompressSequencesLong_body(' | |||
|
1066 | 1081 | seqState.pos = (size_t)(op-prefixStart); |
|
1067 | 1082 | seqState.dictEnd = dictEnd; |
|
1068 | 1083 | assert(iend >= ip); |
|
1069 | CHECK_E(BIT_initDStream(&seqState.DStream, ip, iend-ip), corruption_detected); | |
|
1084 | RETURN_ERROR_IF( | |
|
1085 | ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)), | |
|
1086 | corruption_detected); | |
|
1070 | 1087 | ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr); |
|
1071 | 1088 | ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr); |
|
1072 | 1089 | ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr); |
@@ -1076,7 +1093,7 b' ZSTD_decompressSequencesLong_body(' | |||
|
1076 | 1093 | sequences[seqNb] = ZSTD_decodeSequenceLong(&seqState, isLongOffset); |
|
1077 | 1094 | PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */ |
|
1078 | 1095 | } |
|
1079 |
|
|
|
1096 | RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected); | |
|
1080 | 1097 | |
|
1081 | 1098 | /* decode and decompress */ |
|
1082 | 1099 | for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) { |
@@ -1087,7 +1104,7 b' ZSTD_decompressSequencesLong_body(' | |||
|
1087 | 1104 | sequences[seqNb & STORED_SEQS_MASK] = sequence; |
|
1088 | 1105 | op += oneSeqSize; |
|
1089 | 1106 | } |
|
1090 |
|
|
|
1107 | RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected); | |
|
1091 | 1108 | |
|
1092 | 1109 | /* finish queue */ |
|
1093 | 1110 | seqNb -= seqAdvance; |
@@ -1103,7 +1120,7 b' ZSTD_decompressSequencesLong_body(' | |||
|
1103 | 1120 | |
|
1104 | 1121 | /* last literal segment */ |
|
1105 | 1122 | { size_t const lastLLSize = litEnd - litPtr; |
|
1106 |
|
|
|
1123 | RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall); | |
|
1107 | 1124 | memcpy(op, litPtr, lastLLSize); |
|
1108 | 1125 | op += lastLLSize; |
|
1109 | 1126 | } |
@@ -1127,6 +1144,7 b' ZSTD_decompressSequencesLong_default(ZST' | |||
|
1127 | 1144 | |
|
1128 | 1145 | #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG |
|
1129 | 1146 | static TARGET_ATTRIBUTE("bmi2") size_t |
|
1147 | DONT_VECTORIZE | |
|
1130 | 1148 | ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx, |
|
1131 | 1149 | void* dst, size_t maxDstSize, |
|
1132 | 1150 | const void* seqStart, size_t seqSize, int nbSeq, |
@@ -1176,7 +1194,7 b' ZSTD_decompressSequences(ZSTD_DCtx* dctx' | |||
|
1176 | 1194 | /* ZSTD_decompressSequencesLong() : |
|
1177 | 1195 | * decompression function triggered when a minimum share of offsets is considered "long", |
|
1178 | 1196 | * aka out of cache. |
|
1179 |
* note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes mea |
|
|
1197 | * note : "long" definition seems overloaded here, sometimes meaning "wider than bitstream register", and sometimes meaning "farther than memory cache distance". | |
|
1180 | 1198 | * This function will try to mitigate main memory latency through the use of prefetching */ |
|
1181 | 1199 | static size_t |
|
1182 | 1200 | ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx, |
@@ -1240,7 +1258,7 b' ZSTD_decompressBlock_internal(ZSTD_DCtx*' | |||
|
1240 | 1258 | ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN)))); |
|
1241 | 1259 | DEBUGLOG(5, "ZSTD_decompressBlock_internal (size : %u)", (U32)srcSize); |
|
1242 | 1260 | |
|
1243 |
|
|
|
1261 | RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong); | |
|
1244 | 1262 | |
|
1245 | 1263 | /* Decode literals section */ |
|
1246 | 1264 | { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize); |
@@ -89,6 +89,12 b' typedef enum { ZSTDds_getFrameHeaderSize' | |||
|
89 | 89 | typedef enum { zdss_init=0, zdss_loadHeader, |
|
90 | 90 | zdss_read, zdss_load, zdss_flush } ZSTD_dStreamStage; |
|
91 | 91 | |
|
92 | typedef enum { | |
|
93 | ZSTD_use_indefinitely = -1, /* Use the dictionary indefinitely */ | |
|
94 | ZSTD_dont_use = 0, /* Do not use the dictionary (if one exists free it) */ | |
|
95 | ZSTD_use_once = 1 /* Use the dictionary once and set to ZSTD_dont_use */ | |
|
96 | } ZSTD_dictUses_e; | |
|
97 | ||
|
92 | 98 | struct ZSTD_DCtx_s |
|
93 | 99 | { |
|
94 | 100 | const ZSTD_seqSymbol* LLTptr; |
@@ -123,6 +129,7 b' struct ZSTD_DCtx_s' | |||
|
123 | 129 | const ZSTD_DDict* ddict; /* set by ZSTD_initDStream_usingDDict(), or ZSTD_DCtx_refDDict() */ |
|
124 | 130 | U32 dictID; |
|
125 | 131 | int ddictIsCold; /* if == 1 : dictionary is "new" for working context, and presumed "cold" (not in cpu cache) */ |
|
132 | ZSTD_dictUses_e dictUses; | |
|
126 | 133 | |
|
127 | 134 | /* streaming */ |
|
128 | 135 | ZSTD_dStreamStage streamStage; |
@@ -391,7 +391,7 b' static void COVER_group(COVER_ctx_t *ctx' | |||
|
391 | 391 | * |
|
392 | 392 | * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) |
|
393 | 393 | * |
|
394 | * Once the dmer d is in the dictionay we set F(d) = 0. | |
|
394 | * Once the dmer d is in the dictionary we set F(d) = 0. | |
|
395 | 395 | */ |
|
396 | 396 | static COVER_segment_t COVER_selectSegment(const COVER_ctx_t *ctx, U32 *freqs, |
|
397 | 397 | COVER_map_t *activeDmers, U32 begin, |
@@ -435,7 +435,7 b' static COVER_segment_t COVER_selectSegme' | |||
|
435 | 435 | U32 *delDmerOcc = COVER_map_at(activeDmers, delDmer); |
|
436 | 436 | activeSegment.begin += 1; |
|
437 | 437 | *delDmerOcc -= 1; |
|
438 | /* If this is the last occurence of the dmer, subtract its score */ | |
|
438 | /* If this is the last occurrence of the dmer, subtract its score */ | |
|
439 | 439 | if (*delDmerOcc == 0) { |
|
440 | 440 | COVER_map_remove(activeDmers, delDmer); |
|
441 | 441 | activeSegment.score -= freqs[delDmer]; |
@@ -526,10 +526,10 b' static void COVER_ctx_destroy(COVER_ctx_' | |||
|
526 | 526 | * Prepare a context for dictionary building. |
|
527 | 527 | * The context is only dependent on the parameter `d` and can used multiple |
|
528 | 528 | * times. |
|
529 |
* Returns |
|
|
529 | * Returns 0 on success or error code on error. | |
|
530 | 530 | * The context must be destroyed with `COVER_ctx_destroy()`. |
|
531 | 531 | */ |
|
532 |
static |
|
|
532 | static size_t COVER_ctx_init(COVER_ctx_t *ctx, const void *samplesBuffer, | |
|
533 | 533 | const size_t *samplesSizes, unsigned nbSamples, |
|
534 | 534 | unsigned d, double splitPoint) { |
|
535 | 535 | const BYTE *const samples = (const BYTE *)samplesBuffer; |
@@ -544,17 +544,17 b' static int COVER_ctx_init(COVER_ctx_t *c' | |||
|
544 | 544 | totalSamplesSize >= (size_t)COVER_MAX_SAMPLES_SIZE) { |
|
545 | 545 | DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", |
|
546 | 546 | (unsigned)(totalSamplesSize>>20), (COVER_MAX_SAMPLES_SIZE >> 20)); |
|
547 | return 0; | |
|
547 | return ERROR(srcSize_wrong); | |
|
548 | 548 | } |
|
549 | 549 | /* Check if there are at least 5 training samples */ |
|
550 | 550 | if (nbTrainSamples < 5) { |
|
551 | 551 | DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid.", nbTrainSamples); |
|
552 | return 0; | |
|
552 | return ERROR(srcSize_wrong); | |
|
553 | 553 | } |
|
554 | 554 | /* Check if there's testing sample */ |
|
555 | 555 | if (nbTestSamples < 1) { |
|
556 | 556 | DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.", nbTestSamples); |
|
557 | return 0; | |
|
557 | return ERROR(srcSize_wrong); | |
|
558 | 558 | } |
|
559 | 559 | /* Zero the context */ |
|
560 | 560 | memset(ctx, 0, sizeof(*ctx)); |
@@ -577,7 +577,7 b' static int COVER_ctx_init(COVER_ctx_t *c' | |||
|
577 | 577 | if (!ctx->suffix || !ctx->dmerAt || !ctx->offsets) { |
|
578 | 578 | DISPLAYLEVEL(1, "Failed to allocate scratch buffers\n"); |
|
579 | 579 | COVER_ctx_destroy(ctx); |
|
580 | return 0; | |
|
580 | return ERROR(memory_allocation); | |
|
581 | 581 | } |
|
582 | 582 | ctx->freqs = NULL; |
|
583 | 583 | ctx->d = d; |
@@ -624,7 +624,40 b' static int COVER_ctx_init(COVER_ctx_t *c' | |||
|
624 | 624 | (ctx->d <= 8 ? &COVER_cmp8 : &COVER_cmp), &COVER_group); |
|
625 | 625 | ctx->freqs = ctx->suffix; |
|
626 | 626 | ctx->suffix = NULL; |
|
627 |
return |
|
|
627 | return 0; | |
|
628 | } | |
|
629 | ||
|
630 | void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel) | |
|
631 | { | |
|
632 | const double ratio = (double)nbDmers / maxDictSize; | |
|
633 | if (ratio >= 10) { | |
|
634 | return; | |
|
635 | } | |
|
636 | LOCALDISPLAYLEVEL(displayLevel, 1, | |
|
637 | "WARNING: The maximum dictionary size %u is too large " | |
|
638 | "compared to the source size %u! " | |
|
639 | "size(source)/size(dictionary) = %f, but it should be >= " | |
|
640 | "10! This may lead to a subpar dictionary! We recommend " | |
|
641 | "training on sources at least 10x, and up to 100x the " | |
|
642 | "size of the dictionary!\n", (U32)maxDictSize, | |
|
643 | (U32)nbDmers, ratio); | |
|
644 | } | |
|
645 | ||
|
646 | COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, | |
|
647 | U32 nbDmers, U32 k, U32 passes) | |
|
648 | { | |
|
649 | const U32 minEpochSize = k * 10; | |
|
650 | COVER_epoch_info_t epochs; | |
|
651 | epochs.num = MAX(1, maxDictSize / k / passes); | |
|
652 | epochs.size = nbDmers / epochs.num; | |
|
653 | if (epochs.size >= minEpochSize) { | |
|
654 | assert(epochs.size * epochs.num <= nbDmers); | |
|
655 | return epochs; | |
|
656 | } | |
|
657 | epochs.size = MIN(minEpochSize, nbDmers); | |
|
658 | epochs.num = nbDmers / epochs.size; | |
|
659 | assert(epochs.size * epochs.num <= nbDmers); | |
|
660 | return epochs; | |
|
628 | 661 | } |
|
629 | 662 | |
|
630 | 663 | /** |
@@ -636,28 +669,34 b' static size_t COVER_buildDictionary(cons' | |||
|
636 | 669 | ZDICT_cover_params_t parameters) { |
|
637 | 670 | BYTE *const dict = (BYTE *)dictBuffer; |
|
638 | 671 | size_t tail = dictBufferCapacity; |
|
639 |
/* Divide the data |
|
|
640 | * We will select at least one segment from each epoch. | |
|
641 | */ | |
|
642 | const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k / 4)); | |
|
643 | const unsigned epochSize = (U32)(ctx->suffixSize / epochs); | |
|
672 | /* Divide the data into epochs. We will select one segment from each epoch. */ | |
|
673 | const COVER_epoch_info_t epochs = COVER_computeEpochs( | |
|
674 | (U32)dictBufferCapacity, (U32)ctx->suffixSize, parameters.k, 4); | |
|
675 | const size_t maxZeroScoreRun = MAX(10, MIN(100, epochs.num >> 3)); | |
|
676 | size_t zeroScoreRun = 0; | |
|
644 | 677 | size_t epoch; |
|
645 | 678 | DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", |
|
646 |
epochs, |
|
|
679 | (U32)epochs.num, (U32)epochs.size); | |
|
647 | 680 | /* Loop through the epochs until there are no more segments or the dictionary |
|
648 | 681 | * is full. |
|
649 | 682 | */ |
|
650 | for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) { | |
|
651 |
const U32 epochBegin = (U32)(epoch * epoch |
|
|
652 |
const U32 epochEnd = epochBegin + epoch |
|
|
683 | for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) { | |
|
684 | const U32 epochBegin = (U32)(epoch * epochs.size); | |
|
685 | const U32 epochEnd = epochBegin + epochs.size; | |
|
653 | 686 | size_t segmentSize; |
|
654 | 687 | /* Select a segment */ |
|
655 | 688 | COVER_segment_t segment = COVER_selectSegment( |
|
656 | 689 | ctx, freqs, activeDmers, epochBegin, epochEnd, parameters); |
|
657 |
/* If the segment covers no dmers, then we are out of content |
|
|
690 | /* If the segment covers no dmers, then we are out of content. | |
|
691 | * There may be new content in other epochs, for continue for some time. | |
|
692 | */ | |
|
658 | 693 | if (segment.score == 0) { |
|
694 | if (++zeroScoreRun >= maxZeroScoreRun) { | |
|
659 | 695 | break; |
|
660 | 696 | } |
|
697 | continue; | |
|
698 | } | |
|
699 | zeroScoreRun = 0; | |
|
661 | 700 | /* Trim the segment if necessary and if it is too small then we are done */ |
|
662 | 701 | segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); |
|
663 | 702 | if (segmentSize < parameters.d) { |
@@ -690,11 +729,11 b' ZDICTLIB_API size_t ZDICT_trainFromBuffe' | |||
|
690 | 729 | /* Checks */ |
|
691 | 730 | if (!COVER_checkParameters(parameters, dictBufferCapacity)) { |
|
692 | 731 | DISPLAYLEVEL(1, "Cover parameters incorrect\n"); |
|
693 | return ERROR(GENERIC); | |
|
732 | return ERROR(parameter_outOfBound); | |
|
694 | 733 | } |
|
695 | 734 | if (nbSamples == 0) { |
|
696 | 735 | DISPLAYLEVEL(1, "Cover must have at least one input file\n"); |
|
697 |
return ERROR( |
|
|
736 | return ERROR(srcSize_wrong); | |
|
698 | 737 | } |
|
699 | 738 | if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { |
|
700 | 739 | DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", |
@@ -702,14 +741,18 b' ZDICTLIB_API size_t ZDICT_trainFromBuffe' | |||
|
702 | 741 | return ERROR(dstSize_tooSmall); |
|
703 | 742 | } |
|
704 | 743 | /* Initialize context and activeDmers */ |
|
705 | if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, | |
|
706 | parameters.d, parameters.splitPoint)) { | |
|
707 | return ERROR(GENERIC); | |
|
744 | { | |
|
745 | size_t const initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, | |
|
746 | parameters.d, parameters.splitPoint); | |
|
747 | if (ZSTD_isError(initVal)) { | |
|
748 | return initVal; | |
|
708 | 749 | } |
|
750 | } | |
|
751 | COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, g_displayLevel); | |
|
709 | 752 | if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { |
|
710 | 753 | DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); |
|
711 | 754 | COVER_ctx_destroy(&ctx); |
|
712 |
return ERROR( |
|
|
755 | return ERROR(memory_allocation); | |
|
713 | 756 | } |
|
714 | 757 | |
|
715 | 758 | DISPLAYLEVEL(2, "Building dictionary\n"); |
@@ -770,7 +813,7 b' size_t COVER_checkTotalCompressedSize(co' | |||
|
770 | 813 | cctx, dst, dstCapacity, samples + offsets[i], |
|
771 | 814 | samplesSizes[i], cdict); |
|
772 | 815 | if (ZSTD_isError(size)) { |
|
773 |
totalCompressedSize = |
|
|
816 | totalCompressedSize = size; | |
|
774 | 817 | goto _compressCleanup; |
|
775 | 818 | } |
|
776 | 819 | totalCompressedSize += size; |
@@ -846,9 +889,11 b' void COVER_best_start(COVER_best_t *best' | |||
|
846 | 889 | * Decrements liveJobs and signals any waiting threads if liveJobs == 0. |
|
847 | 890 | * If this dictionary is the best so far save it and its parameters. |
|
848 | 891 | */ |
|
849 |
void COVER_best_finish(COVER_best_t *best, |
|
|
850 | ZDICT_cover_params_t parameters, void *dict, | |
|
851 | size_t dictSize) { | |
|
892 | void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters, | |
|
893 | COVER_dictSelection_t selection) { | |
|
894 | void* dict = selection.dictContent; | |
|
895 | size_t compressedSize = selection.totalCompressedSize; | |
|
896 | size_t dictSize = selection.dictSize; | |
|
852 | 897 | if (!best) { |
|
853 | 898 | return; |
|
854 | 899 | } |
@@ -874,11 +919,13 b' void COVER_best_finish(COVER_best_t *bes' | |||
|
874 | 919 | } |
|
875 | 920 | } |
|
876 | 921 | /* Save the dictionary, parameters, and size */ |
|
922 | if (dict) { | |
|
877 | 923 | memcpy(best->dict, dict, dictSize); |
|
878 | 924 | best->dictSize = dictSize; |
|
879 | 925 | best->parameters = parameters; |
|
880 | 926 | best->compressedSize = compressedSize; |
|
881 | 927 | } |
|
928 | } | |
|
882 | 929 | if (liveJobs == 0) { |
|
883 | 930 | ZSTD_pthread_cond_broadcast(&best->cond); |
|
884 | 931 | } |
@@ -886,6 +933,111 b' void COVER_best_finish(COVER_best_t *bes' | |||
|
886 | 933 | } |
|
887 | 934 | } |
|
888 | 935 | |
|
936 | COVER_dictSelection_t COVER_dictSelectionError(size_t error) { | |
|
937 | COVER_dictSelection_t selection = { NULL, 0, error }; | |
|
938 | return selection; | |
|
939 | } | |
|
940 | ||
|
941 | unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection) { | |
|
942 | return (ZSTD_isError(selection.totalCompressedSize) || !selection.dictContent); | |
|
943 | } | |
|
944 | ||
|
945 | void COVER_dictSelectionFree(COVER_dictSelection_t selection){ | |
|
946 | free(selection.dictContent); | |
|
947 | } | |
|
948 | ||
|
949 | COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, | |
|
950 | size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples, | |
|
951 | size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize) { | |
|
952 | ||
|
953 | size_t largestDict = 0; | |
|
954 | size_t largestCompressed = 0; | |
|
955 | BYTE* customDictContentEnd = customDictContent + dictContentSize; | |
|
956 | ||
|
957 | BYTE * largestDictbuffer = (BYTE *)malloc(dictContentSize); | |
|
958 | BYTE * candidateDictBuffer = (BYTE *)malloc(dictContentSize); | |
|
959 | double regressionTolerance = ((double)params.shrinkDictMaxRegression / 100.0) + 1.00; | |
|
960 | ||
|
961 | if (!largestDictbuffer || !candidateDictBuffer) { | |
|
962 | free(largestDictbuffer); | |
|
963 | free(candidateDictBuffer); | |
|
964 | return COVER_dictSelectionError(dictContentSize); | |
|
965 | } | |
|
966 | ||
|
967 | /* Initial dictionary size and compressed size */ | |
|
968 | memcpy(largestDictbuffer, customDictContent, dictContentSize); | |
|
969 | dictContentSize = ZDICT_finalizeDictionary( | |
|
970 | largestDictbuffer, dictContentSize, customDictContent, dictContentSize, | |
|
971 | samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams); | |
|
972 | ||
|
973 | if (ZDICT_isError(dictContentSize)) { | |
|
974 | free(largestDictbuffer); | |
|
975 | free(candidateDictBuffer); | |
|
976 | return COVER_dictSelectionError(dictContentSize); | |
|
977 | } | |
|
978 | ||
|
979 | totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes, | |
|
980 | samplesBuffer, offsets, | |
|
981 | nbCheckSamples, nbSamples, | |
|
982 | largestDictbuffer, dictContentSize); | |
|
983 | ||
|
984 | if (ZSTD_isError(totalCompressedSize)) { | |
|
985 | free(largestDictbuffer); | |
|
986 | free(candidateDictBuffer); | |
|
987 | return COVER_dictSelectionError(totalCompressedSize); | |
|
988 | } | |
|
989 | ||
|
990 | if (params.shrinkDict == 0) { | |
|
991 | COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize }; | |
|
992 | free(candidateDictBuffer); | |
|
993 | return selection; | |
|
994 | } | |
|
995 | ||
|
996 | largestDict = dictContentSize; | |
|
997 | largestCompressed = totalCompressedSize; | |
|
998 | dictContentSize = ZDICT_DICTSIZE_MIN; | |
|
999 | ||
|
1000 | /* Largest dict is initially at least ZDICT_DICTSIZE_MIN */ | |
|
1001 | while (dictContentSize < largestDict) { | |
|
1002 | memcpy(candidateDictBuffer, largestDictbuffer, largestDict); | |
|
1003 | dictContentSize = ZDICT_finalizeDictionary( | |
|
1004 | candidateDictBuffer, dictContentSize, customDictContentEnd - dictContentSize, dictContentSize, | |
|
1005 | samplesBuffer, samplesSizes, nbFinalizeSamples, params.zParams); | |
|
1006 | ||
|
1007 | if (ZDICT_isError(dictContentSize)) { | |
|
1008 | free(largestDictbuffer); | |
|
1009 | free(candidateDictBuffer); | |
|
1010 | return COVER_dictSelectionError(dictContentSize); | |
|
1011 | ||
|
1012 | } | |
|
1013 | ||
|
1014 | totalCompressedSize = COVER_checkTotalCompressedSize(params, samplesSizes, | |
|
1015 | samplesBuffer, offsets, | |
|
1016 | nbCheckSamples, nbSamples, | |
|
1017 | candidateDictBuffer, dictContentSize); | |
|
1018 | ||
|
1019 | if (ZSTD_isError(totalCompressedSize)) { | |
|
1020 | free(largestDictbuffer); | |
|
1021 | free(candidateDictBuffer); | |
|
1022 | return COVER_dictSelectionError(totalCompressedSize); | |
|
1023 | } | |
|
1024 | ||
|
1025 | if (totalCompressedSize <= largestCompressed * regressionTolerance) { | |
|
1026 | COVER_dictSelection_t selection = { candidateDictBuffer, dictContentSize, totalCompressedSize }; | |
|
1027 | free(largestDictbuffer); | |
|
1028 | return selection; | |
|
1029 | } | |
|
1030 | dictContentSize *= 2; | |
|
1031 | } | |
|
1032 | dictContentSize = largestDict; | |
|
1033 | totalCompressedSize = largestCompressed; | |
|
1034 | { | |
|
1035 | COVER_dictSelection_t selection = { largestDictbuffer, dictContentSize, totalCompressedSize }; | |
|
1036 | free(candidateDictBuffer); | |
|
1037 | return selection; | |
|
1038 | } | |
|
1039 | } | |
|
1040 | ||
|
889 | 1041 | /** |
|
890 | 1042 | * Parameters for COVER_tryParameters(). |
|
891 | 1043 | */ |
@@ -911,6 +1063,7 b' static void COVER_tryParameters(void *op' | |||
|
911 | 1063 | /* Allocate space for hash table, dict, and freqs */ |
|
912 | 1064 | COVER_map_t activeDmers; |
|
913 | 1065 | BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity); |
|
1066 | COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC)); | |
|
914 | 1067 | U32 *freqs = (U32 *)malloc(ctx->suffixSize * sizeof(U32)); |
|
915 | 1068 | if (!COVER_map_init(&activeDmers, parameters.k - parameters.d + 1)) { |
|
916 | 1069 | DISPLAYLEVEL(1, "Failed to allocate dmer map: out of memory\n"); |
@@ -926,29 +1079,21 b' static void COVER_tryParameters(void *op' | |||
|
926 | 1079 | { |
|
927 | 1080 | const size_t tail = COVER_buildDictionary(ctx, freqs, &activeDmers, dict, |
|
928 | 1081 | dictBufferCapacity, parameters); |
|
929 | dictBufferCapacity = ZDICT_finalizeDictionary( | |
|
930 | dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, | |
|
931 | ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, | |
|
932 | parameters.zParams); | |
|
933 | if (ZDICT_isError(dictBufferCapacity)) { | |
|
934 |
DISPLAYLEVEL(1, "Failed to |
|
|
1082 | selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail, | |
|
1083 | ctx->samples, ctx->samplesSizes, (unsigned)ctx->nbTrainSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, | |
|
1084 | totalCompressedSize); | |
|
1085 | ||
|
1086 | if (COVER_dictSelectionIsError(selection)) { | |
|
1087 | DISPLAYLEVEL(1, "Failed to select dictionary\n"); | |
|
935 | 1088 | goto _cleanup; |
|
936 | 1089 | } |
|
937 | 1090 | } |
|
938 | /* Check total compressed size */ | |
|
939 | totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes, | |
|
940 | ctx->samples, ctx->offsets, | |
|
941 | ctx->nbTrainSamples, ctx->nbSamples, | |
|
942 | dict, dictBufferCapacity); | |
|
943 | ||
|
944 | 1091 | _cleanup: |
|
945 | COVER_best_finish(data->best, totalCompressedSize, parameters, dict, | |
|
946 | dictBufferCapacity); | |
|
1092 | free(dict); | |
|
1093 | COVER_best_finish(data->best, parameters, selection); | |
|
947 | 1094 | free(data); |
|
948 | 1095 | COVER_map_destroy(&activeDmers); |
|
949 | if (dict) { | |
|
950 | free(dict); | |
|
951 | } | |
|
1096 | COVER_dictSelectionFree(selection); | |
|
952 | 1097 | if (freqs) { |
|
953 | 1098 | free(freqs); |
|
954 | 1099 | } |
@@ -970,6 +1115,7 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF' | |||
|
970 | 1115 | const unsigned kStepSize = MAX((kMaxK - kMinK) / kSteps, 1); |
|
971 | 1116 | const unsigned kIterations = |
|
972 | 1117 | (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); |
|
1118 | const unsigned shrinkDict = 0; | |
|
973 | 1119 | /* Local variables */ |
|
974 | 1120 | const int displayLevel = parameters->zParams.notificationLevel; |
|
975 | 1121 | unsigned iteration = 1; |
@@ -977,19 +1123,20 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF' | |||
|
977 | 1123 | unsigned k; |
|
978 | 1124 | COVER_best_t best; |
|
979 | 1125 | POOL_ctx *pool = NULL; |
|
1126 | int warned = 0; | |
|
980 | 1127 | |
|
981 | 1128 | /* Checks */ |
|
982 | 1129 | if (splitPoint <= 0 || splitPoint > 1) { |
|
983 | 1130 | LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); |
|
984 | return ERROR(GENERIC); | |
|
1131 | return ERROR(parameter_outOfBound); | |
|
985 | 1132 | } |
|
986 | 1133 | if (kMinK < kMaxD || kMaxK < kMinK) { |
|
987 | 1134 | LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect parameters\n"); |
|
988 | return ERROR(GENERIC); | |
|
1135 | return ERROR(parameter_outOfBound); | |
|
989 | 1136 | } |
|
990 | 1137 | if (nbSamples == 0) { |
|
991 | 1138 | DISPLAYLEVEL(1, "Cover must have at least one input file\n"); |
|
992 |
return ERROR( |
|
|
1139 | return ERROR(srcSize_wrong); | |
|
993 | 1140 | } |
|
994 | 1141 | if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { |
|
995 | 1142 | DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", |
@@ -1013,11 +1160,18 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF' | |||
|
1013 | 1160 | /* Initialize the context for this value of d */ |
|
1014 | 1161 | COVER_ctx_t ctx; |
|
1015 | 1162 | LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); |
|
1016 | if (!COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint)) { | |
|
1163 | { | |
|
1164 | const size_t initVal = COVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint); | |
|
1165 | if (ZSTD_isError(initVal)) { | |
|
1017 | 1166 | LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); |
|
1018 | 1167 | COVER_best_destroy(&best); |
|
1019 | 1168 | POOL_free(pool); |
|
1020 | return ERROR(GENERIC); | |
|
1169 | return initVal; | |
|
1170 | } | |
|
1171 | } | |
|
1172 | if (!warned) { | |
|
1173 | COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.suffixSize, displayLevel); | |
|
1174 | warned = 1; | |
|
1021 | 1175 | } |
|
1022 | 1176 | /* Loop through k reusing the same context */ |
|
1023 | 1177 | for (k = kMinK; k <= kMaxK; k += kStepSize) { |
@@ -1030,7 +1184,7 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF' | |||
|
1030 | 1184 | COVER_best_destroy(&best); |
|
1031 | 1185 | COVER_ctx_destroy(&ctx); |
|
1032 | 1186 | POOL_free(pool); |
|
1033 |
return ERROR( |
|
|
1187 | return ERROR(memory_allocation); | |
|
1034 | 1188 | } |
|
1035 | 1189 | data->ctx = &ctx; |
|
1036 | 1190 | data->best = &best; |
@@ -1040,6 +1194,7 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF' | |||
|
1040 | 1194 | data->parameters.d = d; |
|
1041 | 1195 | data->parameters.splitPoint = splitPoint; |
|
1042 | 1196 | data->parameters.steps = kSteps; |
|
1197 | data->parameters.shrinkDict = shrinkDict; | |
|
1043 | 1198 | data->parameters.zParams.notificationLevel = g_displayLevel; |
|
1044 | 1199 | /* Check the parameters */ |
|
1045 | 1200 | if (!COVER_checkParameters(data->parameters, dictBufferCapacity)) { |
@@ -39,6 +39,44 b' typedef struct {' | |||
|
39 | 39 | } COVER_segment_t; |
|
40 | 40 | |
|
41 | 41 | /** |
|
42 | *Number of epochs and size of each epoch. | |
|
43 | */ | |
|
44 | typedef struct { | |
|
45 | U32 num; | |
|
46 | U32 size; | |
|
47 | } COVER_epoch_info_t; | |
|
48 | ||
|
49 | /** | |
|
50 | * Struct used for the dictionary selection function. | |
|
51 | */ | |
|
52 | typedef struct COVER_dictSelection { | |
|
53 | BYTE* dictContent; | |
|
54 | size_t dictSize; | |
|
55 | size_t totalCompressedSize; | |
|
56 | } COVER_dictSelection_t; | |
|
57 | ||
|
58 | /** | |
|
59 | * Computes the number of epochs and the size of each epoch. | |
|
60 | * We will make sure that each epoch gets at least 10 * k bytes. | |
|
61 | * | |
|
62 | * The COVER algorithms divide the data up into epochs of equal size and | |
|
63 | * select one segment from each epoch. | |
|
64 | * | |
|
65 | * @param maxDictSize The maximum allowed dictionary size. | |
|
66 | * @param nbDmers The number of dmers we are training on. | |
|
67 | * @param k The parameter k (segment size). | |
|
68 | * @param passes The target number of passes over the dmer corpus. | |
|
69 | * More passes means a better dictionary. | |
|
70 | */ | |
|
71 | COVER_epoch_info_t COVER_computeEpochs(U32 maxDictSize, U32 nbDmers, | |
|
72 | U32 k, U32 passes); | |
|
73 | ||
|
74 | /** | |
|
75 | * Warns the user when their corpus is too small. | |
|
76 | */ | |
|
77 | void COVER_warnOnSmallCorpus(size_t maxDictSize, size_t nbDmers, int displayLevel); | |
|
78 | ||
|
79 | /** | |
|
42 | 80 | * Checks total compressed size of a dictionary |
|
43 | 81 | */ |
|
44 | 82 | size_t COVER_checkTotalCompressedSize(const ZDICT_cover_params_t parameters, |
@@ -78,6 +116,32 b' void COVER_best_start(COVER_best_t *best' | |||
|
78 | 116 | * Decrements liveJobs and signals any waiting threads if liveJobs == 0. |
|
79 | 117 | * If this dictionary is the best so far save it and its parameters. |
|
80 | 118 | */ |
|
81 |
void COVER_best_finish(COVER_best_t *best, |
|
|
82 | ZDICT_cover_params_t parameters, void *dict, | |
|
83 | size_t dictSize); | |
|
119 | void COVER_best_finish(COVER_best_t *best, ZDICT_cover_params_t parameters, | |
|
120 | COVER_dictSelection_t selection); | |
|
121 | /** | |
|
122 | * Error function for COVER_selectDict function. Checks if the return | |
|
123 | * value is an error. | |
|
124 | */ | |
|
125 | unsigned COVER_dictSelectionIsError(COVER_dictSelection_t selection); | |
|
126 | ||
|
127 | /** | |
|
128 | * Error function for COVER_selectDict function. Returns a struct where | |
|
129 | * return.totalCompressedSize is a ZSTD error. | |
|
130 | */ | |
|
131 | COVER_dictSelection_t COVER_dictSelectionError(size_t error); | |
|
132 | ||
|
133 | /** | |
|
134 | * Always call after selectDict is called to free up used memory from | |
|
135 | * newly created dictionary. | |
|
136 | */ | |
|
137 | void COVER_dictSelectionFree(COVER_dictSelection_t selection); | |
|
138 | ||
|
139 | /** | |
|
140 | * Called to finalize the dictionary and select one based on whether or not | |
|
141 | * the shrink-dict flag was enabled. If enabled the dictionary used is the | |
|
142 | * smallest dictionary within a specified regression of the compressed size | |
|
143 | * from the largest dictionary. | |
|
144 | */ | |
|
145 | COVER_dictSelection_t COVER_selectDict(BYTE* customDictContent, | |
|
146 | size_t dictContentSize, const BYTE* samplesBuffer, const size_t* samplesSizes, unsigned nbFinalizeSamples, | |
|
147 | size_t nbCheckSamples, size_t nbSamples, ZDICT_cover_params_t params, size_t* offsets, size_t totalCompressedSize); |
@@ -132,7 +132,7 b' typedef struct {' | |||
|
132 | 132 | * |
|
133 | 133 | * Score(S) = F(S_1) + F(S_2) + ... + F(S_{k-d+1}) |
|
134 | 134 | * |
|
135 | * Once the dmer with hash value d is in the dictionay we set F(d) = 0. | |
|
135 | * Once the dmer with hash value d is in the dictionary we set F(d) = 0. | |
|
136 | 136 | */ |
|
137 | 137 | static COVER_segment_t FASTCOVER_selectSegment(const FASTCOVER_ctx_t *ctx, |
|
138 | 138 | U32 *freqs, U32 begin, U32 end, |
@@ -161,7 +161,7 b' static COVER_segment_t FASTCOVER_selectS' | |||
|
161 | 161 | /* Get hash value of current dmer */ |
|
162 | 162 | const size_t idx = FASTCOVER_hashPtrToIndex(ctx->samples + activeSegment.end, f, d); |
|
163 | 163 | |
|
164 | /* Add frequency of this index to score if this is the first occurence of index in active segment */ | |
|
164 | /* Add frequency of this index to score if this is the first occurrence of index in active segment */ | |
|
165 | 165 | if (segmentFreqs[idx] == 0) { |
|
166 | 166 | activeSegment.score += freqs[idx]; |
|
167 | 167 | } |
@@ -287,10 +287,10 b' FASTCOVER_computeFrequency(U32* freqs, c' | |||
|
287 | 287 | * Prepare a context for dictionary building. |
|
288 | 288 | * The context is only dependent on the parameter `d` and can used multiple |
|
289 | 289 | * times. |
|
290 |
* Returns |
|
|
290 | * Returns 0 on success or error code on error. | |
|
291 | 291 | * The context must be destroyed with `FASTCOVER_ctx_destroy()`. |
|
292 | 292 | */ |
|
293 |
static |
|
|
293 | static size_t | |
|
294 | 294 | FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx, |
|
295 | 295 | const void* samplesBuffer, |
|
296 | 296 | const size_t* samplesSizes, unsigned nbSamples, |
@@ -310,19 +310,19 b' FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,' | |||
|
310 | 310 | totalSamplesSize >= (size_t)FASTCOVER_MAX_SAMPLES_SIZE) { |
|
311 | 311 | DISPLAYLEVEL(1, "Total samples size is too large (%u MB), maximum size is %u MB\n", |
|
312 | 312 | (unsigned)(totalSamplesSize >> 20), (FASTCOVER_MAX_SAMPLES_SIZE >> 20)); |
|
313 |
return |
|
|
313 | return ERROR(srcSize_wrong); | |
|
314 | 314 | } |
|
315 | 315 | |
|
316 | 316 | /* Check if there are at least 5 training samples */ |
|
317 | 317 | if (nbTrainSamples < 5) { |
|
318 | 318 | DISPLAYLEVEL(1, "Total number of training samples is %u and is invalid\n", nbTrainSamples); |
|
319 |
return |
|
|
319 | return ERROR(srcSize_wrong); | |
|
320 | 320 | } |
|
321 | 321 | |
|
322 | 322 | /* Check if there's testing sample */ |
|
323 | 323 | if (nbTestSamples < 1) { |
|
324 | 324 | DISPLAYLEVEL(1, "Total number of testing samples is %u and is invalid.\n", nbTestSamples); |
|
325 |
return |
|
|
325 | return ERROR(srcSize_wrong); | |
|
326 | 326 | } |
|
327 | 327 | |
|
328 | 328 | /* Zero the context */ |
@@ -347,7 +347,7 b' FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,' | |||
|
347 | 347 | if (ctx->offsets == NULL) { |
|
348 | 348 | DISPLAYLEVEL(1, "Failed to allocate scratch buffers \n"); |
|
349 | 349 | FASTCOVER_ctx_destroy(ctx); |
|
350 | return 0; | |
|
350 | return ERROR(memory_allocation); | |
|
351 | 351 | } |
|
352 | 352 | |
|
353 | 353 | /* Fill offsets from the samplesSizes */ |
@@ -364,13 +364,13 b' FASTCOVER_ctx_init(FASTCOVER_ctx_t* ctx,' | |||
|
364 | 364 | if (ctx->freqs == NULL) { |
|
365 | 365 | DISPLAYLEVEL(1, "Failed to allocate frequency table \n"); |
|
366 | 366 | FASTCOVER_ctx_destroy(ctx); |
|
367 | return 0; | |
|
367 | return ERROR(memory_allocation); | |
|
368 | 368 | } |
|
369 | 369 | |
|
370 | 370 | DISPLAYLEVEL(2, "Computing frequencies\n"); |
|
371 | 371 | FASTCOVER_computeFrequency(ctx->freqs, ctx); |
|
372 | 372 | |
|
373 |
return |
|
|
373 | return 0; | |
|
374 | 374 | } |
|
375 | 375 | |
|
376 | 376 | |
@@ -386,29 +386,35 b' FASTCOVER_buildDictionary(const FASTCOVE' | |||
|
386 | 386 | { |
|
387 | 387 | BYTE *const dict = (BYTE *)dictBuffer; |
|
388 | 388 | size_t tail = dictBufferCapacity; |
|
389 |
/* Divide the data |
|
|
390 | * We will select at least one segment from each epoch. | |
|
391 | */ | |
|
392 | const unsigned epochs = MAX(1, (U32)(dictBufferCapacity / parameters.k)); | |
|
393 | const unsigned epochSize = (U32)(ctx->nbDmers / epochs); | |
|
389 | /* Divide the data into epochs. We will select one segment from each epoch. */ | |
|
390 | const COVER_epoch_info_t epochs = COVER_computeEpochs( | |
|
391 | (U32)dictBufferCapacity, (U32)ctx->nbDmers, parameters.k, 1); | |
|
392 | const size_t maxZeroScoreRun = 10; | |
|
393 | size_t zeroScoreRun = 0; | |
|
394 | 394 | size_t epoch; |
|
395 | 395 | DISPLAYLEVEL(2, "Breaking content into %u epochs of size %u\n", |
|
396 |
epochs, |
|
|
396 | (U32)epochs.num, (U32)epochs.size); | |
|
397 | 397 | /* Loop through the epochs until there are no more segments or the dictionary |
|
398 | 398 | * is full. |
|
399 | 399 | */ |
|
400 | for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs) { | |
|
401 |
const U32 epochBegin = (U32)(epoch * epoch |
|
|
402 |
const U32 epochEnd = epochBegin + epoch |
|
|
400 | for (epoch = 0; tail > 0; epoch = (epoch + 1) % epochs.num) { | |
|
401 | const U32 epochBegin = (U32)(epoch * epochs.size); | |
|
402 | const U32 epochEnd = epochBegin + epochs.size; | |
|
403 | 403 | size_t segmentSize; |
|
404 | 404 | /* Select a segment */ |
|
405 | 405 | COVER_segment_t segment = FASTCOVER_selectSegment( |
|
406 | 406 | ctx, freqs, epochBegin, epochEnd, parameters, segmentFreqs); |
|
407 | 407 | |
|
408 |
/* If the segment covers no dmers, then we are out of content |
|
|
408 | /* If the segment covers no dmers, then we are out of content. | |
|
409 | * There may be new content in other epochs, for continue for some time. | |
|
410 | */ | |
|
409 | 411 | if (segment.score == 0) { |
|
412 | if (++zeroScoreRun >= maxZeroScoreRun) { | |
|
410 | 413 | break; |
|
411 | 414 | } |
|
415 | continue; | |
|
416 | } | |
|
417 | zeroScoreRun = 0; | |
|
412 | 418 | |
|
413 | 419 | /* Trim the segment if necessary and if it is too small then we are done */ |
|
414 | 420 | segmentSize = MIN(segment.end - segment.begin + parameters.d - 1, tail); |
@@ -429,7 +435,6 b' FASTCOVER_buildDictionary(const FASTCOVE' | |||
|
429 | 435 | return tail; |
|
430 | 436 | } |
|
431 | 437 | |
|
432 | ||
|
433 | 438 | /** |
|
434 | 439 | * Parameters for FASTCOVER_tryParameters(). |
|
435 | 440 | */ |
@@ -458,6 +463,7 b' static void FASTCOVER_tryParameters(void' | |||
|
458 | 463 | U16* segmentFreqs = (U16 *)calloc(((U64)1 << ctx->f), sizeof(U16)); |
|
459 | 464 | /* Allocate space for hash table, dict, and freqs */ |
|
460 | 465 | BYTE *const dict = (BYTE * const)malloc(dictBufferCapacity); |
|
466 | COVER_dictSelection_t selection = COVER_dictSelectionError(ERROR(GENERIC)); | |
|
461 | 467 | U32 *freqs = (U32*) malloc(((U64)1 << ctx->f) * sizeof(U32)); |
|
462 | 468 | if (!segmentFreqs || !dict || !freqs) { |
|
463 | 469 | DISPLAYLEVEL(1, "Failed to allocate buffers: out of memory\n"); |
@@ -468,26 +474,23 b' static void FASTCOVER_tryParameters(void' | |||
|
468 | 474 | /* Build the dictionary */ |
|
469 | 475 | { const size_t tail = FASTCOVER_buildDictionary(ctx, freqs, dict, dictBufferCapacity, |
|
470 | 476 | parameters, segmentFreqs); |
|
477 | ||
|
471 | 478 | const unsigned nbFinalizeSamples = (unsigned)(ctx->nbTrainSamples * ctx->accelParams.finalize / 100); |
|
472 | dictBufferCapacity = ZDICT_finalizeDictionary( | |
|
473 | dict, dictBufferCapacity, dict + tail, dictBufferCapacity - tail, | |
|
474 | ctx->samples, ctx->samplesSizes, nbFinalizeSamples, parameters.zParams); | |
|
475 | if (ZDICT_isError(dictBufferCapacity)) { | |
|
476 | DISPLAYLEVEL(1, "Failed to finalize dictionary\n"); | |
|
479 | selection = COVER_selectDict(dict + tail, dictBufferCapacity - tail, | |
|
480 | ctx->samples, ctx->samplesSizes, nbFinalizeSamples, ctx->nbTrainSamples, ctx->nbSamples, parameters, ctx->offsets, | |
|
481 | totalCompressedSize); | |
|
482 | ||
|
483 | if (COVER_dictSelectionIsError(selection)) { | |
|
484 | DISPLAYLEVEL(1, "Failed to select dictionary\n"); | |
|
477 | 485 | goto _cleanup; |
|
478 | 486 | } |
|
479 | 487 | } |
|
480 | /* Check total compressed size */ | |
|
481 | totalCompressedSize = COVER_checkTotalCompressedSize(parameters, ctx->samplesSizes, | |
|
482 | ctx->samples, ctx->offsets, | |
|
483 | ctx->nbTrainSamples, ctx->nbSamples, | |
|
484 | dict, dictBufferCapacity); | |
|
485 | 488 | _cleanup: |
|
486 | COVER_best_finish(data->best, totalCompressedSize, parameters, dict, | |
|
487 | dictBufferCapacity); | |
|
489 | free(dict); | |
|
490 | COVER_best_finish(data->best, parameters, selection); | |
|
488 | 491 | free(data); |
|
489 | 492 | free(segmentFreqs); |
|
490 | free(dict); | |
|
493 | COVER_dictSelectionFree(selection); | |
|
491 | 494 | free(freqs); |
|
492 | 495 | } |
|
493 | 496 | |
@@ -502,6 +505,7 b' FASTCOVER_convertToCoverParams(ZDICT_fas' | |||
|
502 | 505 | coverParams->nbThreads = fastCoverParams.nbThreads; |
|
503 | 506 | coverParams->splitPoint = fastCoverParams.splitPoint; |
|
504 | 507 | coverParams->zParams = fastCoverParams.zParams; |
|
508 | coverParams->shrinkDict = fastCoverParams.shrinkDict; | |
|
505 | 509 | } |
|
506 | 510 | |
|
507 | 511 | |
@@ -518,6 +522,7 b' FASTCOVER_convertToFastCoverParams(ZDICT' | |||
|
518 | 522 | fastCoverParams->f = f; |
|
519 | 523 | fastCoverParams->accel = accel; |
|
520 | 524 | fastCoverParams->zParams = coverParams.zParams; |
|
525 | fastCoverParams->shrinkDict = coverParams.shrinkDict; | |
|
521 | 526 | } |
|
522 | 527 | |
|
523 | 528 | |
@@ -544,11 +549,11 b' ZDICT_trainFromBuffer_fastCover(void* di' | |||
|
544 | 549 | if (!FASTCOVER_checkParameters(coverParams, dictBufferCapacity, parameters.f, |
|
545 | 550 | parameters.accel)) { |
|
546 | 551 | DISPLAYLEVEL(1, "FASTCOVER parameters incorrect\n"); |
|
547 |
return ERROR( |
|
|
552 | return ERROR(parameter_outOfBound); | |
|
548 | 553 | } |
|
549 | 554 | if (nbSamples == 0) { |
|
550 | 555 | DISPLAYLEVEL(1, "FASTCOVER must have at least one input file\n"); |
|
551 |
return ERROR( |
|
|
556 | return ERROR(srcSize_wrong); | |
|
552 | 557 | } |
|
553 | 558 | if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { |
|
554 | 559 | DISPLAYLEVEL(1, "dictBufferCapacity must be at least %u\n", |
@@ -558,12 +563,16 b' ZDICT_trainFromBuffer_fastCover(void* di' | |||
|
558 | 563 | /* Assign corresponding FASTCOVER_accel_t to accelParams*/ |
|
559 | 564 | accelParams = FASTCOVER_defaultAccelParameters[parameters.accel]; |
|
560 | 565 | /* Initialize context */ |
|
561 | if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, | |
|
566 | { | |
|
567 | size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, | |
|
562 | 568 | coverParams.d, parameters.splitPoint, parameters.f, |
|
563 |
accelParams) |
|
|
569 | accelParams); | |
|
570 | if (ZSTD_isError(initVal)) { | |
|
564 | 571 | DISPLAYLEVEL(1, "Failed to initialize context\n"); |
|
565 | return ERROR(GENERIC); | |
|
572 | return initVal; | |
|
566 | 573 | } |
|
574 | } | |
|
575 | COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, g_displayLevel); | |
|
567 | 576 | /* Build the dictionary */ |
|
568 | 577 | DISPLAYLEVEL(2, "Building dictionary\n"); |
|
569 | 578 | { |
@@ -609,6 +618,7 b' ZDICT_optimizeTrainFromBuffer_fastCover(' | |||
|
609 | 618 | (1 + (kMaxD - kMinD) / 2) * (1 + (kMaxK - kMinK) / kStepSize); |
|
610 | 619 | const unsigned f = parameters->f == 0 ? DEFAULT_F : parameters->f; |
|
611 | 620 | const unsigned accel = parameters->accel == 0 ? DEFAULT_ACCEL : parameters->accel; |
|
621 | const unsigned shrinkDict = 0; | |
|
612 | 622 | /* Local variables */ |
|
613 | 623 | const int displayLevel = parameters->zParams.notificationLevel; |
|
614 | 624 | unsigned iteration = 1; |
@@ -616,22 +626,23 b' ZDICT_optimizeTrainFromBuffer_fastCover(' | |||
|
616 | 626 | unsigned k; |
|
617 | 627 | COVER_best_t best; |
|
618 | 628 | POOL_ctx *pool = NULL; |
|
629 | int warned = 0; | |
|
619 | 630 | /* Checks */ |
|
620 | 631 | if (splitPoint <= 0 || splitPoint > 1) { |
|
621 | 632 | LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect splitPoint\n"); |
|
622 |
return ERROR( |
|
|
633 | return ERROR(parameter_outOfBound); | |
|
623 | 634 | } |
|
624 | 635 | if (accel == 0 || accel > FASTCOVER_MAX_ACCEL) { |
|
625 | 636 | LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect accel\n"); |
|
626 |
return ERROR( |
|
|
637 | return ERROR(parameter_outOfBound); | |
|
627 | 638 | } |
|
628 | 639 | if (kMinK < kMaxD || kMaxK < kMinK) { |
|
629 | 640 | LOCALDISPLAYLEVEL(displayLevel, 1, "Incorrect k\n"); |
|
630 |
return ERROR( |
|
|
641 | return ERROR(parameter_outOfBound); | |
|
631 | 642 | } |
|
632 | 643 | if (nbSamples == 0) { |
|
633 | 644 | LOCALDISPLAYLEVEL(displayLevel, 1, "FASTCOVER must have at least one input file\n"); |
|
634 |
return ERROR( |
|
|
645 | return ERROR(srcSize_wrong); | |
|
635 | 646 | } |
|
636 | 647 | if (dictBufferCapacity < ZDICT_DICTSIZE_MIN) { |
|
637 | 648 | LOCALDISPLAYLEVEL(displayLevel, 1, "dictBufferCapacity must be at least %u\n", |
@@ -658,11 +669,18 b' ZDICT_optimizeTrainFromBuffer_fastCover(' | |||
|
658 | 669 | /* Initialize the context for this value of d */ |
|
659 | 670 | FASTCOVER_ctx_t ctx; |
|
660 | 671 | LOCALDISPLAYLEVEL(displayLevel, 3, "d=%u\n", d); |
|
661 | if (!FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams)) { | |
|
672 | { | |
|
673 | size_t const initVal = FASTCOVER_ctx_init(&ctx, samplesBuffer, samplesSizes, nbSamples, d, splitPoint, f, accelParams); | |
|
674 | if (ZSTD_isError(initVal)) { | |
|
662 | 675 | LOCALDISPLAYLEVEL(displayLevel, 1, "Failed to initialize context\n"); |
|
663 | 676 | COVER_best_destroy(&best); |
|
664 | 677 | POOL_free(pool); |
|
665 | return ERROR(GENERIC); | |
|
678 | return initVal; | |
|
679 | } | |
|
680 | } | |
|
681 | if (!warned) { | |
|
682 | COVER_warnOnSmallCorpus(dictBufferCapacity, ctx.nbDmers, displayLevel); | |
|
683 | warned = 1; | |
|
666 | 684 | } |
|
667 | 685 | /* Loop through k reusing the same context */ |
|
668 | 686 | for (k = kMinK; k <= kMaxK; k += kStepSize) { |
@@ -675,7 +693,7 b' ZDICT_optimizeTrainFromBuffer_fastCover(' | |||
|
675 | 693 | COVER_best_destroy(&best); |
|
676 | 694 | FASTCOVER_ctx_destroy(&ctx); |
|
677 | 695 | POOL_free(pool); |
|
678 |
return ERROR( |
|
|
696 | return ERROR(memory_allocation); | |
|
679 | 697 | } |
|
680 | 698 | data->ctx = &ctx; |
|
681 | 699 | data->best = &best; |
@@ -685,6 +703,7 b' ZDICT_optimizeTrainFromBuffer_fastCover(' | |||
|
685 | 703 | data->parameters.d = d; |
|
686 | 704 | data->parameters.splitPoint = splitPoint; |
|
687 | 705 | data->parameters.steps = kSteps; |
|
706 | data->parameters.shrinkDict = shrinkDict; | |
|
688 | 707 | data->parameters.zParams.notificationLevel = g_displayLevel; |
|
689 | 708 | /* Check the parameters */ |
|
690 | 709 | if (!FASTCOVER_checkParameters(data->parameters, dictBufferCapacity, |
@@ -741,7 +741,7 b' static size_t ZDICT_analyzeEntropy(void*' | |||
|
741 | 741 | /* analyze, build stats, starting with literals */ |
|
742 | 742 | { size_t maxNbBits = HUF_buildCTable (hufTable, countLit, 255, huffLog); |
|
743 | 743 | if (HUF_isError(maxNbBits)) { |
|
744 |
eSize = |
|
|
744 | eSize = maxNbBits; | |
|
745 | 745 | DISPLAYLEVEL(1, " HUF_buildCTable error \n"); |
|
746 | 746 | goto _cleanup; |
|
747 | 747 | } |
@@ -764,7 +764,7 b' static size_t ZDICT_analyzeEntropy(void*' | |||
|
764 | 764 | total=0; for (u=0; u<=offcodeMax; u++) total+=offcodeCount[u]; |
|
765 | 765 | errorCode = FSE_normalizeCount(offcodeNCount, Offlog, offcodeCount, total, offcodeMax); |
|
766 | 766 | if (FSE_isError(errorCode)) { |
|
767 |
eSize = |
|
|
767 | eSize = errorCode; | |
|
768 | 768 | DISPLAYLEVEL(1, "FSE_normalizeCount error with offcodeCount \n"); |
|
769 | 769 | goto _cleanup; |
|
770 | 770 | } |
@@ -773,7 +773,7 b' static size_t ZDICT_analyzeEntropy(void*' | |||
|
773 | 773 | total=0; for (u=0; u<=MaxML; u++) total+=matchLengthCount[u]; |
|
774 | 774 | errorCode = FSE_normalizeCount(matchLengthNCount, mlLog, matchLengthCount, total, MaxML); |
|
775 | 775 | if (FSE_isError(errorCode)) { |
|
776 |
eSize = |
|
|
776 | eSize = errorCode; | |
|
777 | 777 | DISPLAYLEVEL(1, "FSE_normalizeCount error with matchLengthCount \n"); |
|
778 | 778 | goto _cleanup; |
|
779 | 779 | } |
@@ -782,7 +782,7 b' static size_t ZDICT_analyzeEntropy(void*' | |||
|
782 | 782 | total=0; for (u=0; u<=MaxLL; u++) total+=litLengthCount[u]; |
|
783 | 783 | errorCode = FSE_normalizeCount(litLengthNCount, llLog, litLengthCount, total, MaxLL); |
|
784 | 784 | if (FSE_isError(errorCode)) { |
|
785 |
eSize = |
|
|
785 | eSize = errorCode; | |
|
786 | 786 | DISPLAYLEVEL(1, "FSE_normalizeCount error with litLengthCount \n"); |
|
787 | 787 | goto _cleanup; |
|
788 | 788 | } |
@@ -791,7 +791,7 b' static size_t ZDICT_analyzeEntropy(void*' | |||
|
791 | 791 | /* write result to buffer */ |
|
792 | 792 | { size_t const hhSize = HUF_writeCTable(dstPtr, maxDstSize, hufTable, 255, huffLog); |
|
793 | 793 | if (HUF_isError(hhSize)) { |
|
794 |
eSize = |
|
|
794 | eSize = hhSize; | |
|
795 | 795 | DISPLAYLEVEL(1, "HUF_writeCTable error \n"); |
|
796 | 796 | goto _cleanup; |
|
797 | 797 | } |
@@ -802,7 +802,7 b' static size_t ZDICT_analyzeEntropy(void*' | |||
|
802 | 802 | |
|
803 | 803 | { size_t const ohSize = FSE_writeNCount(dstPtr, maxDstSize, offcodeNCount, OFFCODE_MAX, Offlog); |
|
804 | 804 | if (FSE_isError(ohSize)) { |
|
805 |
eSize = |
|
|
805 | eSize = ohSize; | |
|
806 | 806 | DISPLAYLEVEL(1, "FSE_writeNCount error with offcodeNCount \n"); |
|
807 | 807 | goto _cleanup; |
|
808 | 808 | } |
@@ -813,7 +813,7 b' static size_t ZDICT_analyzeEntropy(void*' | |||
|
813 | 813 | |
|
814 | 814 | { size_t const mhSize = FSE_writeNCount(dstPtr, maxDstSize, matchLengthNCount, MaxML, mlLog); |
|
815 | 815 | if (FSE_isError(mhSize)) { |
|
816 |
eSize = |
|
|
816 | eSize = mhSize; | |
|
817 | 817 | DISPLAYLEVEL(1, "FSE_writeNCount error with matchLengthNCount \n"); |
|
818 | 818 | goto _cleanup; |
|
819 | 819 | } |
@@ -824,7 +824,7 b' static size_t ZDICT_analyzeEntropy(void*' | |||
|
824 | 824 | |
|
825 | 825 | { size_t const lhSize = FSE_writeNCount(dstPtr, maxDstSize, litLengthNCount, MaxLL, llLog); |
|
826 | 826 | if (FSE_isError(lhSize)) { |
|
827 |
eSize = |
|
|
827 | eSize = lhSize; | |
|
828 | 828 | DISPLAYLEVEL(1, "FSE_writeNCount error with litlengthNCount \n"); |
|
829 | 829 | goto _cleanup; |
|
830 | 830 | } |
@@ -834,7 +834,7 b' static size_t ZDICT_analyzeEntropy(void*' | |||
|
834 | 834 | } |
|
835 | 835 | |
|
836 | 836 | if (maxDstSize<12) { |
|
837 |
eSize = ERROR( |
|
|
837 | eSize = ERROR(dstSize_tooSmall); | |
|
838 | 838 | DISPLAYLEVEL(1, "not enough space to write RepOffsets \n"); |
|
839 | 839 | goto _cleanup; |
|
840 | 840 | } |
@@ -46,7 +46,12 b' extern "C" {' | |||
|
46 | 46 | * The resulting dictionary will be saved into `dictBuffer`. |
|
47 | 47 | * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) |
|
48 | 48 | * or an error code, which can be tested with ZDICT_isError(). |
|
49 | * Note: ZDICT_trainFromBuffer() requires about 9 bytes of memory for each input byte. | |
|
49 | * Note: Dictionary training will fail if there are not enough samples to construct a | |
|
50 | * dictionary, or if most of the samples are too small (< 8 bytes being the lower limit). | |
|
51 | * If dictionary training fails, you should use zstd without a dictionary, as the dictionary | |
|
52 | * would've been ineffective anyways. If you believe your samples would benefit from a dictionary | |
|
53 | * please open an issue with details, and we can look into it. | |
|
54 | * Note: ZDICT_trainFromBuffer()'s memory usage is about 6 MB. | |
|
50 | 55 | * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. |
|
51 | 56 | * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. |
|
52 | 57 | * In general, it's recommended to provide a few thousands samples, though this can vary a lot. |
@@ -89,6 +94,8 b' typedef struct {' | |||
|
89 | 94 | unsigned steps; /* Number of steps : Only used for optimization : 0 means default (40) : Higher means more parameters checked */ |
|
90 | 95 | unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ |
|
91 | 96 | double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (1.0), 1.0 when all samples are used for both training and testing */ |
|
97 | unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */ | |
|
98 | unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */ | |
|
92 | 99 | ZDICT_params_t zParams; |
|
93 | 100 | } ZDICT_cover_params_t; |
|
94 | 101 | |
@@ -100,6 +107,9 b' typedef struct {' | |||
|
100 | 107 | unsigned nbThreads; /* Number of threads : constraint: 0 < nbThreads : 1 means single-threaded : Only used for optimization : Ignored if ZSTD_MULTITHREAD is not defined */ |
|
101 | 108 | double splitPoint; /* Percentage of samples used for training: Only used for optimization : the first nbSamples * splitPoint samples will be used to training, the last nbSamples * (1 - splitPoint) samples will be used for testing, 0 means default (0.75), 1.0 when all samples are used for both training and testing */ |
|
102 | 109 | unsigned accel; /* Acceleration level: constraint: 0 < accel <= 10, higher means faster and less accurate, 0 means default(1) */ |
|
110 | unsigned shrinkDict; /* Train dictionaries to shrink in size starting from the minimum size and selects the smallest dictionary that is shrinkDictMaxRegression% worse than the largest dictionary. 0 means no shrinking and 1 means shrinking */ | |
|
111 | unsigned shrinkDictMaxRegression; /* Sets shrinkDictMaxRegression so that a smaller dictionary can be at worse shrinkDictMaxRegression% worse than the max dict size dictionary. */ | |
|
112 | ||
|
103 | 113 | ZDICT_params_t zParams; |
|
104 | 114 | } ZDICT_fastCover_params_t; |
|
105 | 115 | |
@@ -110,6 +120,7 b' typedef struct {' | |||
|
110 | 120 | * The resulting dictionary will be saved into `dictBuffer`. |
|
111 | 121 | * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) |
|
112 | 122 | * or an error code, which can be tested with ZDICT_isError(). |
|
123 | * See ZDICT_trainFromBuffer() for details on failure modes. | |
|
113 | 124 | * Note: ZDICT_trainFromBuffer_cover() requires about 9 bytes of memory for each input byte. |
|
114 | 125 | * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. |
|
115 | 126 | * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. |
@@ -135,6 +146,7 b' ZDICTLIB_API size_t ZDICT_trainFromBuffe' | |||
|
135 | 146 | * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) |
|
136 | 147 |
* |
|
137 | 148 |
* |
|
149 | * See ZDICT_trainFromBuffer() for details on failure modes. | |
|
138 | 150 | * Note: ZDICT_optimizeTrainFromBuffer_cover() requires about 8 bytes of memory for each input byte and additionally another 5 bytes of memory for each byte of memory for each thread. |
|
139 | 151 | */ |
|
140 | 152 | ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_cover( |
@@ -151,7 +163,8 b' ZDICTLIB_API size_t ZDICT_optimizeTrainF' | |||
|
151 | 163 | * The resulting dictionary will be saved into `dictBuffer`. |
|
152 | 164 | * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) |
|
153 | 165 | * or an error code, which can be tested with ZDICT_isError(). |
|
154 | * Note: ZDICT_trainFromBuffer_fastCover() requires about 1 bytes of memory for each input byte and additionally another 6 * 2^f bytes of memory . | |
|
166 | * See ZDICT_trainFromBuffer() for details on failure modes. | |
|
167 | * Note: ZDICT_trainFromBuffer_fastCover() requires 6 * 2^f bytes of memory. | |
|
155 | 168 | * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. |
|
156 | 169 | * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. |
|
157 | 170 | * In general, it's recommended to provide a few thousands samples, though this can vary a lot. |
@@ -177,7 +190,8 b' ZDICTLIB_API size_t ZDICT_trainFromBuffe' | |||
|
177 | 190 | * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) |
|
178 | 191 |
* |
|
179 | 192 |
* |
|
180 | * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 1 byte of memory for each input byte and additionally another 6 * 2^f bytes of memory for each thread. | |
|
193 | * See ZDICT_trainFromBuffer() for details on failure modes. | |
|
194 | * Note: ZDICT_optimizeTrainFromBuffer_fastCover() requires about 6 * 2^f bytes of memory for each thread. | |
|
181 | 195 | */ |
|
182 | 196 | ZDICTLIB_API size_t ZDICT_optimizeTrainFromBuffer_fastCover(void* dictBuffer, |
|
183 | 197 | size_t dictBufferCapacity, const void* samplesBuffer, |
@@ -219,6 +233,7 b' typedef struct {' | |||
|
219 | 233 | * `parameters` is optional and can be provided with values set to 0 to mean "default". |
|
220 | 234 | * @return: size of dictionary stored into `dictBuffer` (<= `dictBufferCapacity`) |
|
221 | 235 | * or an error code, which can be tested with ZDICT_isError(). |
|
236 | * See ZDICT_trainFromBuffer() for details on failure modes. | |
|
222 | 237 | * Tips: In general, a reasonable dictionary has a size of ~ 100 KB. |
|
223 | 238 | * It's possible to select smaller or larger size, just by specifying `dictBufferCapacity`. |
|
224 | 239 | * In general, it's recommended to provide a few thousands samples, though this can vary a lot. |
This diff has been collapsed as it changes many lines, (1127 lines changed) Show them Hide them | |||
@@ -70,8 +70,8 b' extern "C" {' | |||
|
70 | 70 | |
|
71 | 71 | /*------ Version ------*/ |
|
72 | 72 | #define ZSTD_VERSION_MAJOR 1 |
|
73 |
#define ZSTD_VERSION_MINOR |
|
|
74 |
#define ZSTD_VERSION_RELEASE |
|
|
73 | #define ZSTD_VERSION_MINOR 4 | |
|
74 | #define ZSTD_VERSION_RELEASE 3 | |
|
75 | 75 | |
|
76 | 76 | #define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE) |
|
77 | 77 | ZSTDLIB_API unsigned ZSTD_versionNumber(void); /**< to check runtime library version */ |
@@ -82,13 +82,28 b' ZSTDLIB_API unsigned ZSTD_versionNumber(' | |||
|
82 | 82 | #define ZSTD_VERSION_STRING ZSTD_EXPAND_AND_QUOTE(ZSTD_LIB_VERSION) |
|
83 | 83 | ZSTDLIB_API const char* ZSTD_versionString(void); /* requires v1.3.0+ */ |
|
84 | 84 | |
|
85 |
/* |
|
|
85 | /* ************************************* | |
|
86 | 86 | * Default constant |
|
87 | 87 | ***************************************/ |
|
88 | 88 | #ifndef ZSTD_CLEVEL_DEFAULT |
|
89 | 89 | # define ZSTD_CLEVEL_DEFAULT 3 |
|
90 | 90 | #endif |
|
91 | 91 | |
|
92 | /* ************************************* | |
|
93 | * Constants | |
|
94 | ***************************************/ | |
|
95 | ||
|
96 | /* All magic numbers are supposed read/written to/from files/memory using little-endian convention */ | |
|
97 | #define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ | |
|
98 | #define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */ | |
|
99 | #define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */ | |
|
100 | #define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0 | |
|
101 | ||
|
102 | #define ZSTD_BLOCKSIZELOG_MAX 17 | |
|
103 | #define ZSTD_BLOCKSIZE_MAX (1<<ZSTD_BLOCKSIZELOG_MAX) | |
|
104 | ||
|
105 | ||
|
106 | ||
|
92 | 107 | /*************************************** |
|
93 | 108 | * Simple API |
|
94 | 109 | ***************************************/ |
@@ -145,12 +160,21 b' ZSTDLIB_API unsigned long long ZSTD_getF' | |||
|
145 | 160 | * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */ |
|
146 | 161 | ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize); |
|
147 | 162 | |
|
163 | /*! ZSTD_findFrameCompressedSize() : | |
|
164 | * `src` should point to the start of a ZSTD frame or skippable frame. | |
|
165 | * `srcSize` must be >= first frame size | |
|
166 | * @return : the compressed size of the first frame starting at `src`, | |
|
167 | * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, | |
|
168 | * or an error code if input is invalid */ | |
|
169 | ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); | |
|
170 | ||
|
148 | 171 | |
|
149 | 172 | /*====== Helper functions ======*/ |
|
150 | 173 | #define ZSTD_COMPRESSBOUND(srcSize) ((srcSize) + ((srcSize)>>8) + (((srcSize) < (128<<10)) ? (((128<<10) - (srcSize)) >> 11) /* margin, from 64 to 0 */ : 0)) /* this formula ensures that bound(A) + bound(B) <= bound(A+B) as long as A and B >= 128 KB */ |
|
151 | 174 | ZSTDLIB_API size_t ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */ |
|
152 | 175 | ZSTDLIB_API unsigned ZSTD_isError(size_t code); /*!< tells if a `size_t` function result is an error code */ |
|
153 | 176 | ZSTDLIB_API const char* ZSTD_getErrorName(size_t code); /*!< provides readable string from an error code */ |
|
177 | ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */ | |
|
154 | 178 | ZSTDLIB_API int ZSTD_maxCLevel(void); /*!< maximum compression level available */ |
|
155 | 179 | |
|
156 | 180 | |
@@ -159,9 +183,14 b' ZSTDLIB_API int ZSTD_maxCLevel(v' | |||
|
159 | 183 | ***************************************/ |
|
160 | 184 | /*= Compression context |
|
161 | 185 | * When compressing many times, |
|
162 |
* it is recommended to allocate a context just once, |
|
|
186 | * it is recommended to allocate a context just once, | |
|
187 | * and re-use it for each successive compression operation. | |
|
163 | 188 | * This will make workload friendlier for system's memory. |
|
164 | * Use one context per thread for parallel execution in multi-threaded environments. */ | |
|
189 | * Note : re-using context is just a speed / resource optimization. | |
|
190 | * It doesn't change the compression ratio, which remains identical. | |
|
191 | * Note 2 : In multi-threaded environments, | |
|
192 | * use one different context per thread for parallel execution. | |
|
193 | */ | |
|
165 | 194 | typedef struct ZSTD_CCtx_s ZSTD_CCtx; |
|
166 | 195 | ZSTDLIB_API ZSTD_CCtx* ZSTD_createCCtx(void); |
|
167 | 196 | ZSTDLIB_API size_t ZSTD_freeCCtx(ZSTD_CCtx* cctx); |
@@ -195,279 +224,6 b' ZSTDLIB_API size_t ZSTD_decompressDCtx(Z' | |||
|
195 | 224 | const void* src, size_t srcSize); |
|
196 | 225 | |
|
197 | 226 | |
|
198 | /************************** | |
|
199 | * Simple dictionary API | |
|
200 | ***************************/ | |
|
201 | /*! ZSTD_compress_usingDict() : | |
|
202 | * Compression at an explicit compression level using a Dictionary. | |
|
203 | * A dictionary can be any arbitrary data segment (also called a prefix), | |
|
204 | * or a buffer with specified information (see dictBuilder/zdict.h). | |
|
205 | * Note : This function loads the dictionary, resulting in significant startup delay. | |
|
206 | * It's intended for a dictionary used only once. | |
|
207 | * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ | |
|
208 | ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, | |
|
209 | void* dst, size_t dstCapacity, | |
|
210 | const void* src, size_t srcSize, | |
|
211 | const void* dict,size_t dictSize, | |
|
212 | int compressionLevel); | |
|
213 | ||
|
214 | /*! ZSTD_decompress_usingDict() : | |
|
215 | * Decompression using a known Dictionary. | |
|
216 | * Dictionary must be identical to the one used during compression. | |
|
217 | * Note : This function loads the dictionary, resulting in significant startup delay. | |
|
218 | * It's intended for a dictionary used only once. | |
|
219 | * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ | |
|
220 | ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, | |
|
221 | void* dst, size_t dstCapacity, | |
|
222 | const void* src, size_t srcSize, | |
|
223 | const void* dict,size_t dictSize); | |
|
224 | ||
|
225 | ||
|
226 | /*********************************** | |
|
227 | * Bulk processing dictionary API | |
|
228 | **********************************/ | |
|
229 | typedef struct ZSTD_CDict_s ZSTD_CDict; | |
|
230 | ||
|
231 | /*! ZSTD_createCDict() : | |
|
232 | * When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once. | |
|
233 | * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost. | |
|
234 | * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. | |
|
235 | * `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict. | |
|
236 | * Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content. | |
|
237 | * Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. */ | |
|
238 | ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, | |
|
239 | int compressionLevel); | |
|
240 | ||
|
241 | /*! ZSTD_freeCDict() : | |
|
242 | * Function frees memory allocated by ZSTD_createCDict(). */ | |
|
243 | ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); | |
|
244 | ||
|
245 | /*! ZSTD_compress_usingCDict() : | |
|
246 | * Compression using a digested Dictionary. | |
|
247 | * Recommended when same dictionary is used multiple times. | |
|
248 | * Note : compression level is _decided at dictionary creation time_, | |
|
249 | * and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ | |
|
250 | ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, | |
|
251 | void* dst, size_t dstCapacity, | |
|
252 | const void* src, size_t srcSize, | |
|
253 | const ZSTD_CDict* cdict); | |
|
254 | ||
|
255 | ||
|
256 | typedef struct ZSTD_DDict_s ZSTD_DDict; | |
|
257 | ||
|
258 | /*! ZSTD_createDDict() : | |
|
259 | * Create a digested dictionary, ready to start decompression operation without startup delay. | |
|
260 | * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */ | |
|
261 | ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); | |
|
262 | ||
|
263 | /*! ZSTD_freeDDict() : | |
|
264 | * Function frees memory allocated with ZSTD_createDDict() */ | |
|
265 | ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); | |
|
266 | ||
|
267 | /*! ZSTD_decompress_usingDDict() : | |
|
268 | * Decompression using a digested Dictionary. | |
|
269 | * Recommended when same dictionary is used multiple times. */ | |
|
270 | ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, | |
|
271 | void* dst, size_t dstCapacity, | |
|
272 | const void* src, size_t srcSize, | |
|
273 | const ZSTD_DDict* ddict); | |
|
274 | ||
|
275 | ||
|
276 | /**************************** | |
|
277 | * Streaming | |
|
278 | ****************************/ | |
|
279 | ||
|
280 | typedef struct ZSTD_inBuffer_s { | |
|
281 | const void* src; /**< start of input buffer */ | |
|
282 | size_t size; /**< size of input buffer */ | |
|
283 | size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ | |
|
284 | } ZSTD_inBuffer; | |
|
285 | ||
|
286 | typedef struct ZSTD_outBuffer_s { | |
|
287 | void* dst; /**< start of output buffer */ | |
|
288 | size_t size; /**< size of output buffer */ | |
|
289 | size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ | |
|
290 | } ZSTD_outBuffer; | |
|
291 | ||
|
292 | ||
|
293 | ||
|
294 | /*-*********************************************************************** | |
|
295 | * Streaming compression - HowTo | |
|
296 | * | |
|
297 | * A ZSTD_CStream object is required to track streaming operation. | |
|
298 | * Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. | |
|
299 | * ZSTD_CStream objects can be reused multiple times on consecutive compression operations. | |
|
300 | * It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. | |
|
301 | * | |
|
302 | * For parallel execution, use one separate ZSTD_CStream per thread. | |
|
303 | * | |
|
304 | * note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. | |
|
305 | * | |
|
306 | * Parameters are sticky : when starting a new compression on the same context, | |
|
307 | * it will re-use the same sticky parameters as previous compression session. | |
|
308 | * When in doubt, it's recommended to fully initialize the context before usage. | |
|
309 | * Use ZSTD_initCStream() to set the parameter to a selected compression level. | |
|
310 | * Use advanced API (ZSTD_CCtx_setParameter(), etc.) to set more specific parameters. | |
|
311 | * | |
|
312 | * Use ZSTD_compressStream() as many times as necessary to consume input stream. | |
|
313 | * The function will automatically update both `pos` fields within `input` and `output`. | |
|
314 | * Note that the function may not consume the entire input, | |
|
315 | * for example, because the output buffer is already full, | |
|
316 | * in which case `input.pos < input.size`. | |
|
317 | * The caller must check if input has been entirely consumed. | |
|
318 | * If not, the caller must make some room to receive more compressed data, | |
|
319 | * and then present again remaining input data. | |
|
320 | * @return : a size hint, preferred nb of bytes to use as input for next function call | |
|
321 | * or an error code, which can be tested using ZSTD_isError(). | |
|
322 | * Note 1 : it's just a hint, to help latency a little, any value will work fine. | |
|
323 | * Note 2 : size hint is guaranteed to be <= ZSTD_CStreamInSize() | |
|
324 | * | |
|
325 | * At any moment, it's possible to flush whatever data might remain stuck within internal buffer, | |
|
326 | * using ZSTD_flushStream(). `output->pos` will be updated. | |
|
327 | * Note that, if `output->size` is too small, a single invocation of ZSTD_flushStream() might not be enough (return code > 0). | |
|
328 | * In which case, make some room to receive more compressed data, and call again ZSTD_flushStream(). | |
|
329 | * @return : 0 if internal buffers are entirely flushed, | |
|
330 | * >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), | |
|
331 | * or an error code, which can be tested using ZSTD_isError(). | |
|
332 | * | |
|
333 | * ZSTD_endStream() instructs to finish a frame. | |
|
334 | * It will perform a flush and write frame epilogue. | |
|
335 | * The epilogue is required for decoders to consider a frame completed. | |
|
336 | * flush() operation is the same, and follows same rules as ZSTD_flushStream(). | |
|
337 | * @return : 0 if frame fully completed and fully flushed, | |
|
338 | * >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), | |
|
339 | * or an error code, which can be tested using ZSTD_isError(). | |
|
340 | * | |
|
341 | * *******************************************************************/ | |
|
342 | ||
|
343 | typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */ | |
|
344 | /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ | |
|
345 | /*===== ZSTD_CStream management functions =====*/ | |
|
346 | ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); | |
|
347 | ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); | |
|
348 | ||
|
349 | /*===== Streaming compression functions =====*/ | |
|
350 | ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); | |
|
351 | ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); | |
|
352 | ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); | |
|
353 | ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); | |
|
354 | ||
|
355 | ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ | |
|
356 | ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block in all circumstances. */ | |
|
357 | ||
|
358 | ||
|
359 | ||
|
360 | /*-*************************************************************************** | |
|
361 | * Streaming decompression - HowTo | |
|
362 | * | |
|
363 | * A ZSTD_DStream object is required to track streaming operations. | |
|
364 | * Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. | |
|
365 | * ZSTD_DStream objects can be re-used multiple times. | |
|
366 | * | |
|
367 | * Use ZSTD_initDStream() to start a new decompression operation. | |
|
368 | * @return : recommended first input size | |
|
369 | * Alternatively, use advanced API to set specific properties. | |
|
370 | * | |
|
371 | * Use ZSTD_decompressStream() repetitively to consume your input. | |
|
372 | * The function will update both `pos` fields. | |
|
373 | * If `input.pos < input.size`, some input has not been consumed. | |
|
374 | * It's up to the caller to present again remaining data. | |
|
375 | * The function tries to flush all data decoded immediately, respecting output buffer size. | |
|
376 | * If `output.pos < output.size`, decoder has flushed everything it could. | |
|
377 | * But if `output.pos == output.size`, there might be some data left within internal buffers., | |
|
378 | * In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. | |
|
379 | * Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. | |
|
380 | * @return : 0 when a frame is completely decoded and fully flushed, | |
|
381 | * or an error code, which can be tested using ZSTD_isError(), | |
|
382 | * or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : | |
|
383 | * the return value is a suggested next input size (just a hint for better latency) | |
|
384 | * that will never request more than the remaining frame size. | |
|
385 | * *******************************************************************************/ | |
|
386 | ||
|
387 | typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */ | |
|
388 | /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ | |
|
389 | /*===== ZSTD_DStream management functions =====*/ | |
|
390 | ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); | |
|
391 | ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); | |
|
392 | ||
|
393 | /*===== Streaming decompression functions =====*/ | |
|
394 | ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); | |
|
395 | ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); | |
|
396 | ||
|
397 | ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ | |
|
398 | ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ | |
|
399 | ||
|
400 | #endif /* ZSTD_H_235446 */ | |
|
401 | ||
|
402 | ||
|
403 | ||
|
404 | ||
|
405 | /**************************************************************************************** | |
|
406 | * ADVANCED AND EXPERIMENTAL FUNCTIONS | |
|
407 | **************************************************************************************** | |
|
408 | * The definitions in the following section are considered experimental. | |
|
409 | * They are provided for advanced scenarios. | |
|
410 | * They should never be used with a dynamic library, as prototypes may change in the future. | |
|
411 | * Use them only in association with static linking. | |
|
412 | * ***************************************************************************************/ | |
|
413 | ||
|
414 | #if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) | |
|
415 | #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY | |
|
416 | ||
|
417 | ||
|
418 | /**************************************************************************************** | |
|
419 | * Candidate API for promotion to stable status | |
|
420 | **************************************************************************************** | |
|
421 | * The following symbols and constants form the "staging area" : | |
|
422 | * they are considered to join "stable API" by v1.4.0. | |
|
423 | * The proposal is written so that it can be made stable "as is", | |
|
424 | * though it's still possible to suggest improvements. | |
|
425 | * Staging is in fact last chance for changes, | |
|
426 | * the API is locked once reaching "stable" status. | |
|
427 | * ***************************************************************************************/ | |
|
428 | ||
|
429 | ||
|
430 | /* === Constants === */ | |
|
431 | ||
|
432 | /* all magic numbers are supposed read/written to/from files/memory using little-endian convention */ | |
|
433 | #define ZSTD_MAGICNUMBER 0xFD2FB528 /* valid since v0.8.0 */ | |
|
434 | #define ZSTD_MAGIC_DICTIONARY 0xEC30A437 /* valid since v0.7.0 */ | |
|
435 | #define ZSTD_MAGIC_SKIPPABLE_START 0x184D2A50 /* all 16 values, from 0x184D2A50 to 0x184D2A5F, signal the beginning of a skippable frame */ | |
|
436 | #define ZSTD_MAGIC_SKIPPABLE_MASK 0xFFFFFFF0 | |
|
437 | ||
|
438 | #define ZSTD_BLOCKSIZELOG_MAX 17 | |
|
439 | #define ZSTD_BLOCKSIZE_MAX (1<<ZSTD_BLOCKSIZELOG_MAX) | |
|
440 | ||
|
441 | ||
|
442 | /* === query limits === */ | |
|
443 | ||
|
444 | ZSTDLIB_API int ZSTD_minCLevel(void); /*!< minimum negative compression level allowed */ | |
|
445 | ||
|
446 | ||
|
447 | /* === frame size === */ | |
|
448 | ||
|
449 | /*! ZSTD_findFrameCompressedSize() : | |
|
450 | * `src` should point to the start of a ZSTD frame or skippable frame. | |
|
451 | * `srcSize` must be >= first frame size | |
|
452 | * @return : the compressed size of the first frame starting at `src`, | |
|
453 | * suitable to pass as `srcSize` to `ZSTD_decompress` or similar, | |
|
454 | * or an error code if input is invalid */ | |
|
455 | ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize); | |
|
456 | ||
|
457 | ||
|
458 | /* === Memory management === */ | |
|
459 | ||
|
460 | /*! ZSTD_sizeof_*() : | |
|
461 | * These functions give the _current_ memory usage of selected object. | |
|
462 | * Note that object memory usage can evolve (increase or decrease) over time. */ | |
|
463 | ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); | |
|
464 | ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); | |
|
465 | ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); | |
|
466 | ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); | |
|
467 | ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); | |
|
468 | ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); | |
|
469 | ||
|
470 | ||
|
471 | 227 | /*************************************** |
|
472 | 228 | * Advanced compression API |
|
473 | 229 | ***************************************/ |
@@ -503,7 +259,10 b' typedef enum { ZSTD_fast=1,' | |||
|
503 | 259 | |
|
504 | 260 | typedef enum { |
|
505 | 261 | |
|
506 |
/* compression parameters |
|
|
262 | /* compression parameters | |
|
263 | * Note: When compressing with a ZSTD_CDict these parameters are superseded | |
|
264 | * by the parameters used to construct the ZSTD_CDict. See ZSTD_CCtx_refCDict() | |
|
265 | * for more info (superseded-by-cdict). */ | |
|
507 | 266 | ZSTD_c_compressionLevel=100, /* Update all compression parameters according to pre-defined cLevel table |
|
508 | 267 | * Default level is ZSTD_CLEVEL_DEFAULT==3. |
|
509 | 268 | * Special: value 0 means default, which is controlled by ZSTD_CLEVEL_DEFAULT. |
@@ -625,6 +384,8 b' typedef enum {' | |||
|
625 | 384 | * ZSTD_c_format |
|
626 | 385 | * ZSTD_c_forceMaxWindow |
|
627 | 386 | * ZSTD_c_forceAttachDict |
|
387 | * ZSTD_c_literalCompressionMode | |
|
388 | * ZSTD_c_targetCBlockSize | |
|
628 | 389 | * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. |
|
629 | 390 | * note : never ever use experimentalParam? names directly; |
|
630 | 391 | * also, the enums values themselves are unstable and can still change. |
@@ -632,10 +393,11 b' typedef enum {' | |||
|
632 | 393 | ZSTD_c_experimentalParam1=500, |
|
633 | 394 | ZSTD_c_experimentalParam2=10, |
|
634 | 395 | ZSTD_c_experimentalParam3=1000, |
|
635 | ZSTD_c_experimentalParam4=1001 | |
|
396 | ZSTD_c_experimentalParam4=1001, | |
|
397 | ZSTD_c_experimentalParam5=1002, | |
|
398 | ZSTD_c_experimentalParam6=1003, | |
|
636 | 399 | } ZSTD_cParameter; |
|
637 | 400 | |
|
638 | ||
|
639 | 401 | typedef struct { |
|
640 | 402 | size_t error; |
|
641 | 403 | int lowerBound; |
@@ -677,10 +439,443 b' ZSTDLIB_API size_t ZSTD_CCtx_setParamete' | |||
|
677 | 439 | * Note 3 : Whenever all input data is provided and consumed in a single round, |
|
678 | 440 | * for example with ZSTD_compress2(), |
|
679 | 441 | * or invoking immediately ZSTD_compressStream2(,,,ZSTD_e_end), |
|
680 | * this value is automatically overriden by srcSize instead. | |
|
442 | * this value is automatically overridden by srcSize instead. | |
|
681 | 443 | */ |
|
682 | 444 | ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize); |
|
683 | 445 | |
|
446 | typedef enum { | |
|
447 | ZSTD_reset_session_only = 1, | |
|
448 | ZSTD_reset_parameters = 2, | |
|
449 | ZSTD_reset_session_and_parameters = 3 | |
|
450 | } ZSTD_ResetDirective; | |
|
451 | ||
|
452 | /*! ZSTD_CCtx_reset() : | |
|
453 | * There are 2 different things that can be reset, independently or jointly : | |
|
454 | * - The session : will stop compressing current frame, and make CCtx ready to start a new one. | |
|
455 | * Useful after an error, or to interrupt any ongoing compression. | |
|
456 | * Any internal data not yet flushed is cancelled. | |
|
457 | * Compression parameters and dictionary remain unchanged. | |
|
458 | * They will be used to compress next frame. | |
|
459 | * Resetting session never fails. | |
|
460 | * - The parameters : changes all parameters back to "default". | |
|
461 | * This removes any reference to any dictionary too. | |
|
462 | * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) | |
|
463 | * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) | |
|
464 | * - Both : similar to resetting the session, followed by resetting parameters. | |
|
465 | */ | |
|
466 | ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); | |
|
467 | ||
|
468 | /*! ZSTD_compress2() : | |
|
469 | * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. | |
|
470 | * ZSTD_compress2() always starts a new frame. | |
|
471 | * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. | |
|
472 | * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() | |
|
473 | * - The function is always blocking, returns when compression is completed. | |
|
474 | * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. | |
|
475 | * @return : compressed size written into `dst` (<= `dstCapacity), | |
|
476 | * or an error code if it fails (which can be tested using ZSTD_isError()). | |
|
477 | */ | |
|
478 | ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, | |
|
479 | void* dst, size_t dstCapacity, | |
|
480 | const void* src, size_t srcSize); | |
|
481 | ||
|
482 | ||
|
483 | /*************************************** | |
|
484 | * Advanced decompression API | |
|
485 | ***************************************/ | |
|
486 | ||
|
487 | /* The advanced API pushes parameters one by one into an existing DCtx context. | |
|
488 | * Parameters are sticky, and remain valid for all following frames | |
|
489 | * using the same DCtx context. | |
|
490 | * It's possible to reset parameters to default values using ZSTD_DCtx_reset(). | |
|
491 | * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream(). | |
|
492 | * Therefore, no new decompression function is necessary. | |
|
493 | */ | |
|
494 | ||
|
495 | typedef enum { | |
|
496 | ||
|
497 | ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which | |
|
498 | * the streaming API will refuse to allocate memory buffer | |
|
499 | * in order to protect the host from unreasonable memory requirements. | |
|
500 | * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. | |
|
501 | * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT). | |
|
502 | * Special: value 0 means "use default maximum windowLog". */ | |
|
503 | ||
|
504 | /* note : additional experimental parameters are also available | |
|
505 | * within the experimental section of the API. | |
|
506 | * At the time of this writing, they include : | |
|
507 | * ZSTD_c_format | |
|
508 | * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. | |
|
509 | * note : never ever use experimentalParam? names directly | |
|
510 | */ | |
|
511 | ZSTD_d_experimentalParam1=1000 | |
|
512 | ||
|
513 | } ZSTD_dParameter; | |
|
514 | ||
|
515 | /*! ZSTD_dParam_getBounds() : | |
|
516 | * All parameters must belong to an interval with lower and upper bounds, | |
|
517 | * otherwise they will either trigger an error or be automatically clamped. | |
|
518 | * @return : a structure, ZSTD_bounds, which contains | |
|
519 | * - an error status field, which must be tested using ZSTD_isError() | |
|
520 | * - both lower and upper bounds, inclusive | |
|
521 | */ | |
|
522 | ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); | |
|
523 | ||
|
524 | /*! ZSTD_DCtx_setParameter() : | |
|
525 | * Set one compression parameter, selected by enum ZSTD_dParameter. | |
|
526 | * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). | |
|
527 | * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). | |
|
528 | * Setting a parameter is only possible during frame initialization (before starting decompression). | |
|
529 | * @return : 0, or an error code (which can be tested using ZSTD_isError()). | |
|
530 | */ | |
|
531 | ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); | |
|
532 | ||
|
533 | /*! ZSTD_DCtx_reset() : | |
|
534 | * Return a DCtx to clean state. | |
|
535 | * Session and parameters can be reset jointly or separately. | |
|
536 | * Parameters can only be reset when no active frame is being decompressed. | |
|
537 | * @return : 0, or an error code, which can be tested with ZSTD_isError() | |
|
538 | */ | |
|
539 | ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset); | |
|
540 | ||
|
541 | ||
|
542 | /**************************** | |
|
543 | * Streaming | |
|
544 | ****************************/ | |
|
545 | ||
|
546 | typedef struct ZSTD_inBuffer_s { | |
|
547 | const void* src; /**< start of input buffer */ | |
|
548 | size_t size; /**< size of input buffer */ | |
|
549 | size_t pos; /**< position where reading stopped. Will be updated. Necessarily 0 <= pos <= size */ | |
|
550 | } ZSTD_inBuffer; | |
|
551 | ||
|
552 | typedef struct ZSTD_outBuffer_s { | |
|
553 | void* dst; /**< start of output buffer */ | |
|
554 | size_t size; /**< size of output buffer */ | |
|
555 | size_t pos; /**< position where writing stopped. Will be updated. Necessarily 0 <= pos <= size */ | |
|
556 | } ZSTD_outBuffer; | |
|
557 | ||
|
558 | ||
|
559 | ||
|
560 | /*-*********************************************************************** | |
|
561 | * Streaming compression - HowTo | |
|
562 | * | |
|
563 | * A ZSTD_CStream object is required to track streaming operation. | |
|
564 | * Use ZSTD_createCStream() and ZSTD_freeCStream() to create/release resources. | |
|
565 | * ZSTD_CStream objects can be reused multiple times on consecutive compression operations. | |
|
566 | * It is recommended to re-use ZSTD_CStream since it will play nicer with system's memory, by re-using already allocated memory. | |
|
567 | * | |
|
568 | * For parallel execution, use one separate ZSTD_CStream per thread. | |
|
569 | * | |
|
570 | * note : since v1.3.0, ZSTD_CStream and ZSTD_CCtx are the same thing. | |
|
571 | * | |
|
572 | * Parameters are sticky : when starting a new compression on the same context, | |
|
573 | * it will re-use the same sticky parameters as previous compression session. | |
|
574 | * When in doubt, it's recommended to fully initialize the context before usage. | |
|
575 | * Use ZSTD_CCtx_reset() to reset the context and ZSTD_CCtx_setParameter(), | |
|
576 | * ZSTD_CCtx_setPledgedSrcSize(), or ZSTD_CCtx_loadDictionary() and friends to | |
|
577 | * set more specific parameters, the pledged source size, or load a dictionary. | |
|
578 | * | |
|
579 | * Use ZSTD_compressStream2() with ZSTD_e_continue as many times as necessary to | |
|
580 | * consume input stream. The function will automatically update both `pos` | |
|
581 | * fields within `input` and `output`. | |
|
582 | * Note that the function may not consume the entire input, for example, because | |
|
583 | * the output buffer is already full, in which case `input.pos < input.size`. | |
|
584 | * The caller must check if input has been entirely consumed. | |
|
585 | * If not, the caller must make some room to receive more compressed data, | |
|
586 | * and then present again remaining input data. | |
|
587 | * note: ZSTD_e_continue is guaranteed to make some forward progress when called, | |
|
588 | * but doesn't guarantee maximal forward progress. This is especially relevant | |
|
589 | * when compressing with multiple threads. The call won't block if it can | |
|
590 | * consume some input, but if it can't it will wait for some, but not all, | |
|
591 | * output to be flushed. | |
|
592 | * @return : provides a minimum amount of data remaining to be flushed from internal buffers | |
|
593 | * or an error code, which can be tested using ZSTD_isError(). | |
|
594 | * | |
|
595 | * At any moment, it's possible to flush whatever data might remain stuck within internal buffer, | |
|
596 | * using ZSTD_compressStream2() with ZSTD_e_flush. `output->pos` will be updated. | |
|
597 | * Note that, if `output->size` is too small, a single invocation with ZSTD_e_flush might not be enough (return code > 0). | |
|
598 | * In which case, make some room to receive more compressed data, and call again ZSTD_compressStream2() with ZSTD_e_flush. | |
|
599 | * You must continue calling ZSTD_compressStream2() with ZSTD_e_flush until it returns 0, at which point you can change the | |
|
600 | * operation. | |
|
601 | * note: ZSTD_e_flush will flush as much output as possible, meaning when compressing with multiple threads, it will | |
|
602 | * block until the flush is complete or the output buffer is full. | |
|
603 | * @return : 0 if internal buffers are entirely flushed, | |
|
604 | * >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), | |
|
605 | * or an error code, which can be tested using ZSTD_isError(). | |
|
606 | * | |
|
607 | * Calling ZSTD_compressStream2() with ZSTD_e_end instructs to finish a frame. | |
|
608 | * It will perform a flush and write frame epilogue. | |
|
609 | * The epilogue is required for decoders to consider a frame completed. | |
|
610 | * flush operation is the same, and follows same rules as calling ZSTD_compressStream2() with ZSTD_e_flush. | |
|
611 | * You must continue calling ZSTD_compressStream2() with ZSTD_e_end until it returns 0, at which point you are free to | |
|
612 | * start a new frame. | |
|
613 | * note: ZSTD_e_end will flush as much output as possible, meaning when compressing with multiple threads, it will | |
|
614 | * block until the flush is complete or the output buffer is full. | |
|
615 | * @return : 0 if frame fully completed and fully flushed, | |
|
616 | * >0 if some data still present within internal buffer (the value is minimal estimation of remaining size), | |
|
617 | * or an error code, which can be tested using ZSTD_isError(). | |
|
618 | * | |
|
619 | * *******************************************************************/ | |
|
620 | ||
|
621 | typedef ZSTD_CCtx ZSTD_CStream; /**< CCtx and CStream are now effectively same object (>= v1.3.0) */ | |
|
622 | /* Continue to distinguish them for compatibility with older versions <= v1.2.0 */ | |
|
623 | /*===== ZSTD_CStream management functions =====*/ | |
|
624 | ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream(void); | |
|
625 | ZSTDLIB_API size_t ZSTD_freeCStream(ZSTD_CStream* zcs); | |
|
626 | ||
|
627 | /*===== Streaming compression functions =====*/ | |
|
628 | typedef enum { | |
|
629 | ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ | |
|
630 | ZSTD_e_flush=1, /* flush any data provided so far, | |
|
631 | * it creates (at least) one new block, that can be decoded immediately on reception; | |
|
632 | * frame will continue: any future data can still reference previously compressed data, improving compression. | |
|
633 | * note : multithreaded compression will block to flush as much output as possible. */ | |
|
634 | ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. | |
|
635 | * note that frame is only closed after compressed data is fully flushed (return value == 0). | |
|
636 | * After that point, any additional data starts a new frame. | |
|
637 | * note : each frame is independent (does not reference any content from previous frame). | |
|
638 | : note : multithreaded compression will block to flush as much output as possible. */ | |
|
639 | } ZSTD_EndDirective; | |
|
640 | ||
|
641 | /*! ZSTD_compressStream2() : | |
|
642 | * Behaves about the same as ZSTD_compressStream, with additional control on end directive. | |
|
643 | * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() | |
|
644 | * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) | |
|
645 | * - output->pos must be <= dstCapacity, input->pos must be <= srcSize | |
|
646 | * - output->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. | |
|
647 | * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. | |
|
648 | * - When nbWorkers>=1, function is non-blocking : it just acquires a copy of input, and distributes jobs to internal worker threads, flush whatever is available, | |
|
649 | * and then immediately returns, just indicating that there is some data remaining to be flushed. | |
|
650 | * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. | |
|
651 | * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. | |
|
652 | * - @return provides a minimum amount of data remaining to be flushed from internal buffers | |
|
653 | * or an error code, which can be tested using ZSTD_isError(). | |
|
654 | * if @return != 0, flush is not fully completed, there is still some data left within internal buffers. | |
|
655 | * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers. | |
|
656 | * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed. | |
|
657 | * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0), | |
|
658 | * only ZSTD_e_end or ZSTD_e_flush operations are allowed. | |
|
659 | * Before starting a new compression job, or changing compression parameters, | |
|
660 | * it is required to fully flush internal buffers. | |
|
661 | */ | |
|
662 | ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, | |
|
663 | ZSTD_outBuffer* output, | |
|
664 | ZSTD_inBuffer* input, | |
|
665 | ZSTD_EndDirective endOp); | |
|
666 | ||
|
667 | ||
|
668 | /* These buffer sizes are softly recommended. | |
|
669 | * They are not required : ZSTD_compressStream*() happily accepts any buffer size, for both input and output. | |
|
670 | * Respecting the recommended size just makes it a bit easier for ZSTD_compressStream*(), | |
|
671 | * reducing the amount of memory shuffling and buffering, resulting in minor performance savings. | |
|
672 | * | |
|
673 | * However, note that these recommendations are from the perspective of a C caller program. | |
|
674 | * If the streaming interface is invoked from some other language, | |
|
675 | * especially managed ones such as Java or Go, through a foreign function interface such as jni or cgo, | |
|
676 | * a major performance rule is to reduce crossing such interface to an absolute minimum. | |
|
677 | * It's not rare that performance ends being spent more into the interface, rather than compression itself. | |
|
678 | * In which cases, prefer using large buffers, as large as practical, | |
|
679 | * for both input and output, to reduce the nb of roundtrips. | |
|
680 | */ | |
|
681 | ZSTDLIB_API size_t ZSTD_CStreamInSize(void); /**< recommended size for input buffer */ | |
|
682 | ZSTDLIB_API size_t ZSTD_CStreamOutSize(void); /**< recommended size for output buffer. Guarantee to successfully flush at least one complete compressed block. */ | |
|
683 | ||
|
684 | ||
|
685 | /* ***************************************************************************** | |
|
686 | * This following is a legacy streaming API. | |
|
687 | * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2(). | |
|
688 | * It is redundant, but remains fully supported. | |
|
689 | * Advanced parameters and dictionary compression can only be used through the | |
|
690 | * new API. | |
|
691 | ******************************************************************************/ | |
|
692 | ||
|
693 | /*! | |
|
694 | * Equivalent to: | |
|
695 | * | |
|
696 | * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); | |
|
697 | * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) | |
|
698 | * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); | |
|
699 | */ | |
|
700 | ZSTDLIB_API size_t ZSTD_initCStream(ZSTD_CStream* zcs, int compressionLevel); | |
|
701 | /*! | |
|
702 | * Alternative for ZSTD_compressStream2(zcs, output, input, ZSTD_e_continue). | |
|
703 | * NOTE: The return value is different. ZSTD_compressStream() returns a hint for | |
|
704 | * the next read size (if non-zero and not an error). ZSTD_compressStream2() | |
|
705 | * returns the minimum nb of bytes left to flush (if non-zero and not an error). | |
|
706 | */ | |
|
707 | ZSTDLIB_API size_t ZSTD_compressStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output, ZSTD_inBuffer* input); | |
|
708 | /*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_flush). */ | |
|
709 | ZSTDLIB_API size_t ZSTD_flushStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); | |
|
710 | /*! Equivalent to ZSTD_compressStream2(zcs, output, &emptyInput, ZSTD_e_end). */ | |
|
711 | ZSTDLIB_API size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output); | |
|
712 | ||
|
713 | ||
|
714 | /*-*************************************************************************** | |
|
715 | * Streaming decompression - HowTo | |
|
716 | * | |
|
717 | * A ZSTD_DStream object is required to track streaming operations. | |
|
718 | * Use ZSTD_createDStream() and ZSTD_freeDStream() to create/release resources. | |
|
719 | * ZSTD_DStream objects can be re-used multiple times. | |
|
720 | * | |
|
721 | * Use ZSTD_initDStream() to start a new decompression operation. | |
|
722 | * @return : recommended first input size | |
|
723 | * Alternatively, use advanced API to set specific properties. | |
|
724 | * | |
|
725 | * Use ZSTD_decompressStream() repetitively to consume your input. | |
|
726 | * The function will update both `pos` fields. | |
|
727 | * If `input.pos < input.size`, some input has not been consumed. | |
|
728 | * It's up to the caller to present again remaining data. | |
|
729 | * The function tries to flush all data decoded immediately, respecting output buffer size. | |
|
730 | * If `output.pos < output.size`, decoder has flushed everything it could. | |
|
731 | * But if `output.pos == output.size`, there might be some data left within internal buffers., | |
|
732 | * In which case, call ZSTD_decompressStream() again to flush whatever remains in the buffer. | |
|
733 | * Note : with no additional input provided, amount of data flushed is necessarily <= ZSTD_BLOCKSIZE_MAX. | |
|
734 | * @return : 0 when a frame is completely decoded and fully flushed, | |
|
735 | * or an error code, which can be tested using ZSTD_isError(), | |
|
736 | * or any other value > 0, which means there is still some decoding or flushing to do to complete current frame : | |
|
737 | * the return value is a suggested next input size (just a hint for better latency) | |
|
738 | * that will never request more than the remaining frame size. | |
|
739 | * *******************************************************************************/ | |
|
740 | ||
|
741 | typedef ZSTD_DCtx ZSTD_DStream; /**< DCtx and DStream are now effectively same object (>= v1.3.0) */ | |
|
742 | /* For compatibility with versions <= v1.2.0, prefer differentiating them. */ | |
|
743 | /*===== ZSTD_DStream management functions =====*/ | |
|
744 | ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream(void); | |
|
745 | ZSTDLIB_API size_t ZSTD_freeDStream(ZSTD_DStream* zds); | |
|
746 | ||
|
747 | /*===== Streaming decompression functions =====*/ | |
|
748 | ||
|
749 | /* This function is redundant with the advanced API and equivalent to: | |
|
750 | * | |
|
751 | * ZSTD_DCtx_reset(zds); | |
|
752 | * ZSTD_DCtx_refDDict(zds, NULL); | |
|
753 | */ | |
|
754 | ZSTDLIB_API size_t ZSTD_initDStream(ZSTD_DStream* zds); | |
|
755 | ||
|
756 | ZSTDLIB_API size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inBuffer* input); | |
|
757 | ||
|
758 | ZSTDLIB_API size_t ZSTD_DStreamInSize(void); /*!< recommended size for input buffer */ | |
|
759 | ZSTDLIB_API size_t ZSTD_DStreamOutSize(void); /*!< recommended size for output buffer. Guarantee to successfully flush at least one complete block in all circumstances. */ | |
|
760 | ||
|
761 | ||
|
762 | /************************** | |
|
763 | * Simple dictionary API | |
|
764 | ***************************/ | |
|
765 | /*! ZSTD_compress_usingDict() : | |
|
766 | * Compression at an explicit compression level using a Dictionary. | |
|
767 | * A dictionary can be any arbitrary data segment (also called a prefix), | |
|
768 | * or a buffer with specified information (see dictBuilder/zdict.h). | |
|
769 | * Note : This function loads the dictionary, resulting in significant startup delay. | |
|
770 | * It's intended for a dictionary used only once. | |
|
771 | * Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */ | |
|
772 | ZSTDLIB_API size_t ZSTD_compress_usingDict(ZSTD_CCtx* ctx, | |
|
773 | void* dst, size_t dstCapacity, | |
|
774 | const void* src, size_t srcSize, | |
|
775 | const void* dict,size_t dictSize, | |
|
776 | int compressionLevel); | |
|
777 | ||
|
778 | /*! ZSTD_decompress_usingDict() : | |
|
779 | * Decompression using a known Dictionary. | |
|
780 | * Dictionary must be identical to the one used during compression. | |
|
781 | * Note : This function loads the dictionary, resulting in significant startup delay. | |
|
782 | * It's intended for a dictionary used only once. | |
|
783 | * Note : When `dict == NULL || dictSize < 8` no dictionary is used. */ | |
|
784 | ZSTDLIB_API size_t ZSTD_decompress_usingDict(ZSTD_DCtx* dctx, | |
|
785 | void* dst, size_t dstCapacity, | |
|
786 | const void* src, size_t srcSize, | |
|
787 | const void* dict,size_t dictSize); | |
|
788 | ||
|
789 | ||
|
790 | /*********************************** | |
|
791 | * Bulk processing dictionary API | |
|
792 | **********************************/ | |
|
793 | typedef struct ZSTD_CDict_s ZSTD_CDict; | |
|
794 | ||
|
795 | /*! ZSTD_createCDict() : | |
|
796 | * When compressing multiple messages / blocks using the same dictionary, it's recommended to load it only once. | |
|
797 | * ZSTD_createCDict() will create a digested dictionary, ready to start future compression operations without startup cost. | |
|
798 | * ZSTD_CDict can be created once and shared by multiple threads concurrently, since its usage is read-only. | |
|
799 | * `dictBuffer` can be released after ZSTD_CDict creation, because its content is copied within CDict. | |
|
800 | * Consider experimental function `ZSTD_createCDict_byReference()` if you prefer to not duplicate `dictBuffer` content. | |
|
801 | * Note : A ZSTD_CDict can be created from an empty dictBuffer, but it is inefficient when used to compress small data. */ | |
|
802 | ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict(const void* dictBuffer, size_t dictSize, | |
|
803 | int compressionLevel); | |
|
804 | ||
|
805 | /*! ZSTD_freeCDict() : | |
|
806 | * Function frees memory allocated by ZSTD_createCDict(). */ | |
|
807 | ZSTDLIB_API size_t ZSTD_freeCDict(ZSTD_CDict* CDict); | |
|
808 | ||
|
809 | /*! ZSTD_compress_usingCDict() : | |
|
810 | * Compression using a digested Dictionary. | |
|
811 | * Recommended when same dictionary is used multiple times. | |
|
812 | * Note : compression level is _decided at dictionary creation time_, | |
|
813 | * and frame parameters are hardcoded (dictID=yes, contentSize=yes, checksum=no) */ | |
|
814 | ZSTDLIB_API size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx, | |
|
815 | void* dst, size_t dstCapacity, | |
|
816 | const void* src, size_t srcSize, | |
|
817 | const ZSTD_CDict* cdict); | |
|
818 | ||
|
819 | ||
|
820 | typedef struct ZSTD_DDict_s ZSTD_DDict; | |
|
821 | ||
|
822 | /*! ZSTD_createDDict() : | |
|
823 | * Create a digested dictionary, ready to start decompression operation without startup delay. | |
|
824 | * dictBuffer can be released after DDict creation, as its content is copied inside DDict. */ | |
|
825 | ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict(const void* dictBuffer, size_t dictSize); | |
|
826 | ||
|
827 | /*! ZSTD_freeDDict() : | |
|
828 | * Function frees memory allocated with ZSTD_createDDict() */ | |
|
829 | ZSTDLIB_API size_t ZSTD_freeDDict(ZSTD_DDict* ddict); | |
|
830 | ||
|
831 | /*! ZSTD_decompress_usingDDict() : | |
|
832 | * Decompression using a digested Dictionary. | |
|
833 | * Recommended when same dictionary is used multiple times. */ | |
|
834 | ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx, | |
|
835 | void* dst, size_t dstCapacity, | |
|
836 | const void* src, size_t srcSize, | |
|
837 | const ZSTD_DDict* ddict); | |
|
838 | ||
|
839 | ||
|
840 | /******************************** | |
|
841 | * Dictionary helper functions | |
|
842 | *******************************/ | |
|
843 | ||
|
844 | /*! ZSTD_getDictID_fromDict() : | |
|
845 | * Provides the dictID stored within dictionary. | |
|
846 | * if @return == 0, the dictionary is not conformant with Zstandard specification. | |
|
847 | * It can still be loaded, but as a content-only dictionary. */ | |
|
848 | ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); | |
|
849 | ||
|
850 | /*! ZSTD_getDictID_fromDDict() : | |
|
851 | * Provides the dictID of the dictionary loaded into `ddict`. | |
|
852 | * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. | |
|
853 | * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ | |
|
854 | ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); | |
|
855 | ||
|
856 | /*! ZSTD_getDictID_fromFrame() : | |
|
857 | * Provides the dictID required to decompressed the frame stored within `src`. | |
|
858 | * If @return == 0, the dictID could not be decoded. | |
|
859 | * This could for one of the following reasons : | |
|
860 | * - The frame does not require a dictionary to be decoded (most common case). | |
|
861 | * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. | |
|
862 | * Note : this use case also happens when using a non-conformant dictionary. | |
|
863 | * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). | |
|
864 | * - This is not a Zstandard frame. | |
|
865 | * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */ | |
|
866 | ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); | |
|
867 | ||
|
868 | ||
|
869 | /******************************************************************************* | |
|
870 | * Advanced dictionary and prefix API | |
|
871 | * | |
|
872 | * This API allows dictionaries to be used with ZSTD_compress2(), | |
|
873 | * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and | |
|
874 | * only reset with the context is reset with ZSTD_reset_parameters or | |
|
875 | * ZSTD_reset_session_and_parameters. Prefixes are single-use. | |
|
876 | ******************************************************************************/ | |
|
877 | ||
|
878 | ||
|
684 | 879 | /*! ZSTD_CCtx_loadDictionary() : |
|
685 | 880 | * Create an internal CDict from `dict` buffer. |
|
686 | 881 | * Decompression will have to use same dictionary. |
@@ -703,7 +898,9 b' ZSTDLIB_API size_t ZSTD_CCtx_loadDiction' | |||
|
703 | 898 | /*! ZSTD_CCtx_refCDict() : |
|
704 | 899 | * Reference a prepared dictionary, to be used for all next compressed frames. |
|
705 | 900 | * Note that compression parameters are enforced from within CDict, |
|
706 |
* and super |
|
|
901 | * and supersede any compression parameter previously set within CCtx. | |
|
902 | * The parameters ignored are labled as "superseded-by-cdict" in the ZSTD_cParameter enum docs. | |
|
903 | * The ignored parameters will be used again if the CCtx is returned to no-dictionary mode. | |
|
707 | 904 | * The dictionary will remain valid for future compressed frames using same CCtx. |
|
708 | 905 | * @result : 0, or an error code (which can be tested with ZSTD_isError()). |
|
709 | 906 | * Special : Referencing a NULL CDict means "return to no-dictionary mode". |
@@ -733,136 +930,6 b' ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZS' | |||
|
733 | 930 | ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx, |
|
734 | 931 | const void* prefix, size_t prefixSize); |
|
735 | 932 | |
|
736 | ||
|
737 | typedef enum { | |
|
738 | ZSTD_reset_session_only = 1, | |
|
739 | ZSTD_reset_parameters = 2, | |
|
740 | ZSTD_reset_session_and_parameters = 3 | |
|
741 | } ZSTD_ResetDirective; | |
|
742 | ||
|
743 | /*! ZSTD_CCtx_reset() : | |
|
744 | * There are 2 different things that can be reset, independently or jointly : | |
|
745 | * - The session : will stop compressing current frame, and make CCtx ready to start a new one. | |
|
746 | * Useful after an error, or to interrupt any ongoing compression. | |
|
747 | * Any internal data not yet flushed is cancelled. | |
|
748 | * Compression parameters and dictionary remain unchanged. | |
|
749 | * They will be used to compress next frame. | |
|
750 | * Resetting session never fails. | |
|
751 | * - The parameters : changes all parameters back to "default". | |
|
752 | * This removes any reference to any dictionary too. | |
|
753 | * Parameters can only be changed between 2 sessions (i.e. no compression is currently ongoing) | |
|
754 | * otherwise the reset fails, and function returns an error value (which can be tested using ZSTD_isError()) | |
|
755 | * - Both : similar to resetting the session, followed by resetting parameters. | |
|
756 | */ | |
|
757 | ZSTDLIB_API size_t ZSTD_CCtx_reset(ZSTD_CCtx* cctx, ZSTD_ResetDirective reset); | |
|
758 | ||
|
759 | ||
|
760 | ||
|
761 | /*! ZSTD_compress2() : | |
|
762 | * Behave the same as ZSTD_compressCCtx(), but compression parameters are set using the advanced API. | |
|
763 | * ZSTD_compress2() always starts a new frame. | |
|
764 | * Should cctx hold data from a previously unfinished frame, everything about it is forgotten. | |
|
765 | * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() | |
|
766 | * - The function is always blocking, returns when compression is completed. | |
|
767 | * Hint : compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`. | |
|
768 | * @return : compressed size written into `dst` (<= `dstCapacity), | |
|
769 | * or an error code if it fails (which can be tested using ZSTD_isError()). | |
|
770 | */ | |
|
771 | ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx, | |
|
772 | void* dst, size_t dstCapacity, | |
|
773 | const void* src, size_t srcSize); | |
|
774 | ||
|
775 | typedef enum { | |
|
776 | ZSTD_e_continue=0, /* collect more data, encoder decides when to output compressed result, for optimal compression ratio */ | |
|
777 | ZSTD_e_flush=1, /* flush any data provided so far, | |
|
778 | * it creates (at least) one new block, that can be decoded immediately on reception; | |
|
779 | * frame will continue: any future data can still reference previously compressed data, improving compression. */ | |
|
780 | ZSTD_e_end=2 /* flush any remaining data _and_ close current frame. | |
|
781 | * note that frame is only closed after compressed data is fully flushed (return value == 0). | |
|
782 | * After that point, any additional data starts a new frame. | |
|
783 | * note : each frame is independent (does not reference any content from previous frame). */ | |
|
784 | } ZSTD_EndDirective; | |
|
785 | ||
|
786 | /*! ZSTD_compressStream2() : | |
|
787 | * Behaves about the same as ZSTD_compressStream, with additional control on end directive. | |
|
788 | * - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*() | |
|
789 | * - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode) | |
|
790 | * - outpot->pos must be <= dstCapacity, input->pos must be <= srcSize | |
|
791 | * - outpot->pos and input->pos will be updated. They are guaranteed to remain below their respective limit. | |
|
792 | * - When nbWorkers==0 (default), function is blocking : it completes its job before returning to caller. | |
|
793 | * - When nbWorkers>=1, function is non-blocking : it just acquires a copy of input, and distributes jobs to internal worker threads, flush whatever is available, | |
|
794 | * and then immediately returns, just indicating that there is some data remaining to be flushed. | |
|
795 | * The function nonetheless guarantees forward progress : it will return only after it reads or write at least 1+ byte. | |
|
796 | * - Exception : if the first call requests a ZSTD_e_end directive and provides enough dstCapacity, the function delegates to ZSTD_compress2() which is always blocking. | |
|
797 | * - @return provides a minimum amount of data remaining to be flushed from internal buffers | |
|
798 | * or an error code, which can be tested using ZSTD_isError(). | |
|
799 | * if @return != 0, flush is not fully completed, there is still some data left within internal buffers. | |
|
800 | * This is useful for ZSTD_e_flush, since in this case more flushes are necessary to empty all buffers. | |
|
801 | * For ZSTD_e_end, @return == 0 when internal buffers are fully flushed and frame is completed. | |
|
802 | * - after a ZSTD_e_end directive, if internal buffer is not fully flushed (@return != 0), | |
|
803 | * only ZSTD_e_end or ZSTD_e_flush operations are allowed. | |
|
804 | * Before starting a new compression job, or changing compression parameters, | |
|
805 | * it is required to fully flush internal buffers. | |
|
806 | */ | |
|
807 | ZSTDLIB_API size_t ZSTD_compressStream2( ZSTD_CCtx* cctx, | |
|
808 | ZSTD_outBuffer* output, | |
|
809 | ZSTD_inBuffer* input, | |
|
810 | ZSTD_EndDirective endOp); | |
|
811 | ||
|
812 | ||
|
813 | ||
|
814 | /* ============================== */ | |
|
815 | /* Advanced decompression API */ | |
|
816 | /* ============================== */ | |
|
817 | ||
|
818 | /* The advanced API pushes parameters one by one into an existing DCtx context. | |
|
819 | * Parameters are sticky, and remain valid for all following frames | |
|
820 | * using the same DCtx context. | |
|
821 | * It's possible to reset parameters to default values using ZSTD_DCtx_reset(). | |
|
822 | * Note : This API is compatible with existing ZSTD_decompressDCtx() and ZSTD_decompressStream(). | |
|
823 | * Therefore, no new decompression function is necessary. | |
|
824 | */ | |
|
825 | ||
|
826 | ||
|
827 | typedef enum { | |
|
828 | ||
|
829 | ZSTD_d_windowLogMax=100, /* Select a size limit (in power of 2) beyond which | |
|
830 | * the streaming API will refuse to allocate memory buffer | |
|
831 | * in order to protect the host from unreasonable memory requirements. | |
|
832 | * This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode. | |
|
833 | * By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT) */ | |
|
834 | ||
|
835 | /* note : additional experimental parameters are also available | |
|
836 | * within the experimental section of the API. | |
|
837 | * At the time of this writing, they include : | |
|
838 | * ZSTD_c_format | |
|
839 | * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them. | |
|
840 | * note : never ever use experimentalParam? names directly | |
|
841 | */ | |
|
842 | ZSTD_d_experimentalParam1=1000 | |
|
843 | ||
|
844 | } ZSTD_dParameter; | |
|
845 | ||
|
846 | ||
|
847 | /*! ZSTD_dParam_getBounds() : | |
|
848 | * All parameters must belong to an interval with lower and upper bounds, | |
|
849 | * otherwise they will either trigger an error or be automatically clamped. | |
|
850 | * @return : a structure, ZSTD_bounds, which contains | |
|
851 | * - an error status field, which must be tested using ZSTD_isError() | |
|
852 | * - both lower and upper bounds, inclusive | |
|
853 | */ | |
|
854 | ZSTDLIB_API ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam); | |
|
855 | ||
|
856 | /*! ZSTD_DCtx_setParameter() : | |
|
857 | * Set one compression parameter, selected by enum ZSTD_dParameter. | |
|
858 | * All parameters have valid bounds. Bounds can be queried using ZSTD_dParam_getBounds(). | |
|
859 | * Providing a value beyond bound will either clamp it, or trigger an error (depending on parameter). | |
|
860 | * Setting a parameter is only possible during frame initialization (before starting decompression). | |
|
861 | * @return : 0, or an error code (which can be tested using ZSTD_isError()). | |
|
862 | */ | |
|
863 | ZSTDLIB_API size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int value); | |
|
864 | ||
|
865 | ||
|
866 | 933 | /*! ZSTD_DCtx_loadDictionary() : |
|
867 | 934 | * Create an internal DDict from dict buffer, |
|
868 | 935 | * to be used to decompress next frames. |
@@ -910,15 +977,32 b' ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZS' | |||
|
910 | 977 | ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx, |
|
911 | 978 | const void* prefix, size_t prefixSize); |
|
912 | 979 | |
|
913 | /*! ZSTD_DCtx_reset() : | |
|
914 | * Return a DCtx to clean state. | |
|
915 | * Session and parameters can be reset jointly or separately. | |
|
916 | * Parameters can only be reset when no active frame is being decompressed. | |
|
917 | * @return : 0, or an error code, which can be tested with ZSTD_isError() | |
|
918 | */ | |
|
919 |
ZSTDLIB_API size_t ZSTD_DCtx |
|
|
980 | /* === Memory management === */ | |
|
981 | ||
|
982 | /*! ZSTD_sizeof_*() : | |
|
983 | * These functions give the _current_ memory usage of selected object. | |
|
984 | * Note that object memory usage can evolve (increase or decrease) over time. */ | |
|
985 | ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx); | |
|
986 | ZSTDLIB_API size_t ZSTD_sizeof_DCtx(const ZSTD_DCtx* dctx); | |
|
987 | ZSTDLIB_API size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs); | |
|
988 | ZSTDLIB_API size_t ZSTD_sizeof_DStream(const ZSTD_DStream* zds); | |
|
989 | ZSTDLIB_API size_t ZSTD_sizeof_CDict(const ZSTD_CDict* cdict); | |
|
990 | ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict); | |
|
991 | ||
|
992 | #endif /* ZSTD_H_235446 */ | |
|
920 | 993 | |
|
921 | 994 | |
|
995 | /* ************************************************************************************** | |
|
996 | * ADVANCED AND EXPERIMENTAL FUNCTIONS | |
|
997 | **************************************************************************************** | |
|
998 | * The definitions in the following section are considered experimental. | |
|
999 | * They are provided for advanced scenarios. | |
|
1000 | * They should never be used with a dynamic library, as prototypes may change in the future. | |
|
1001 | * Use them only in association with static linking. | |
|
1002 | * ***************************************************************************************/ | |
|
1003 | ||
|
1004 | #if defined(ZSTD_STATIC_LINKING_ONLY) && !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY) | |
|
1005 | #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY | |
|
922 | 1006 | |
|
923 | 1007 | /**************************************************************************************** |
|
924 | 1008 | * experimental API (static linking only) |
@@ -962,7 +1046,7 b' ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_' | |||
|
962 | 1046 | #define ZSTD_WINDOWLOG_LIMIT_DEFAULT 27 /* by default, the streaming decoder will refuse any frame |
|
963 | 1047 | * requiring larger than (1<<ZSTD_WINDOWLOG_LIMIT_DEFAULT) window size, |
|
964 | 1048 | * to preserve host's memory from unreasonable requirements. |
|
965 | * This limit can be overriden using ZSTD_DCtx_setParameter(,ZSTD_d_windowLogMax,). | |
|
1049 | * This limit can be overridden using ZSTD_DCtx_setParameter(,ZSTD_d_windowLogMax,). | |
|
966 | 1050 | * The limit does not apply for one-pass decoders (such as ZSTD_decompress()), since no additional memory is allocated */ |
|
967 | 1051 | |
|
968 | 1052 | |
@@ -976,6 +1060,10 b' ZSTDLIB_API size_t ZSTD_DCtx_reset(ZSTD_' | |||
|
976 | 1060 | #define ZSTD_LDM_HASHRATELOG_MIN 0 |
|
977 | 1061 | #define ZSTD_LDM_HASHRATELOG_MAX (ZSTD_WINDOWLOG_MAX - ZSTD_HASHLOG_MIN) |
|
978 | 1062 | |
|
1063 | /* Advanced parameter bounds */ | |
|
1064 | #define ZSTD_TARGETCBLOCKSIZE_MIN 64 | |
|
1065 | #define ZSTD_TARGETCBLOCKSIZE_MAX ZSTD_BLOCKSIZE_MAX | |
|
1066 | ||
|
979 | 1067 | /* internal */ |
|
980 | 1068 | #define ZSTD_HASHLOG3_MAX 17 |
|
981 | 1069 | |
@@ -1064,15 +1152,24 b' typedef enum {' | |||
|
1064 | 1152 | ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */ |
|
1065 | 1153 | } ZSTD_dictAttachPref_e; |
|
1066 | 1154 | |
|
1155 | typedef enum { | |
|
1156 | ZSTD_lcm_auto = 0, /**< Automatically determine the compression mode based on the compression level. | |
|
1157 | * Negative compression levels will be uncompressed, and positive compression | |
|
1158 | * levels will be compressed. */ | |
|
1159 | ZSTD_lcm_huffman = 1, /**< Always attempt Huffman compression. Uncompressed literals will still be | |
|
1160 | * emitted if Huffman compression is not profitable. */ | |
|
1161 | ZSTD_lcm_uncompressed = 2, /**< Always emit uncompressed literals. */ | |
|
1162 | } ZSTD_literalCompressionMode_e; | |
|
1163 | ||
|
1067 | 1164 | |
|
1068 | 1165 | /*************************************** |
|
1069 | 1166 | * Frame size functions |
|
1070 | 1167 | ***************************************/ |
|
1071 | 1168 | |
|
1072 | 1169 | /*! ZSTD_findDecompressedSize() : |
|
1073 | * `src` should point the start of a series of ZSTD encoded and/or skippable frames | |
|
1170 | * `src` should point to the start of a series of ZSTD encoded and/or skippable frames | |
|
1074 | 1171 | * `srcSize` must be the _exact_ size of this series |
|
1075 |
* (i.e. there should be a frame boundary |
|
|
1172 | * (i.e. there should be a frame boundary at `src + srcSize`) | |
|
1076 | 1173 | * @return : - decompressed size of all data in all successive frames |
|
1077 | 1174 | * - if the decompressed size cannot be determined: ZSTD_CONTENTSIZE_UNKNOWN |
|
1078 | 1175 | * - if an error occurred: ZSTD_CONTENTSIZE_ERROR |
@@ -1092,6 +1189,21 b' typedef enum {' | |||
|
1092 | 1189 | * however it does mean that all frame data must be present and valid. */ |
|
1093 | 1190 | ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize); |
|
1094 | 1191 | |
|
1192 | /*! ZSTD_decompressBound() : | |
|
1193 | * `src` should point to the start of a series of ZSTD encoded and/or skippable frames | |
|
1194 | * `srcSize` must be the _exact_ size of this series | |
|
1195 | * (i.e. there should be a frame boundary at `src + srcSize`) | |
|
1196 | * @return : - upper-bound for the decompressed size of all data in all successive frames | |
|
1197 | * - if an error occured: ZSTD_CONTENTSIZE_ERROR | |
|
1198 | * | |
|
1199 | * note 1 : an error can occur if `src` contains an invalid or incorrectly formatted frame. | |
|
1200 | * note 2 : the upper-bound is exact when the decompressed size field is available in every ZSTD encoded frame of `src`. | |
|
1201 | * in this case, `ZSTD_findDecompressedSize` and `ZSTD_decompressBound` return the same value. | |
|
1202 | * note 3 : when the decompressed size field isn't available, the upper-bound for that frame is calculated by: | |
|
1203 | * upper-bound = # blocks * min(128 KB, Window_Size) | |
|
1204 | */ | |
|
1205 | ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize); | |
|
1206 | ||
|
1095 | 1207 | /*! ZSTD_frameHeaderSize() : |
|
1096 | 1208 | * srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX. |
|
1097 | 1209 | * @return : size of the Frame Header, |
@@ -1110,7 +1222,7 b' ZSTDLIB_API size_t ZSTD_frameHeaderSize(' | |||
|
1110 | 1222 | * It will also consider src size to be arbitrarily "large", which is worst case. |
|
1111 | 1223 | * If srcSize is known to always be small, ZSTD_estimateCCtxSize_usingCParams() can provide a tighter estimation. |
|
1112 | 1224 | * ZSTD_estimateCCtxSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. |
|
1113 | * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. | |
|
1225 | * ZSTD_estimateCCtxSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. | |
|
1114 | 1226 | * Note : CCtx size estimation is only correct for single-threaded compression. */ |
|
1115 | 1227 | ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel); |
|
1116 | 1228 | ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams); |
@@ -1122,7 +1234,7 b' ZSTDLIB_API size_t ZSTD_estimateDCtxSize' | |||
|
1122 | 1234 | * It will also consider src size to be arbitrarily "large", which is worst case. |
|
1123 | 1235 | * If srcSize is known to always be small, ZSTD_estimateCStreamSize_usingCParams() can provide a tighter estimation. |
|
1124 | 1236 | * ZSTD_estimateCStreamSize_usingCParams() can be used in tandem with ZSTD_getCParams() to create cParams from compressionLevel. |
|
1125 | * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParam_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. | |
|
1237 | * ZSTD_estimateCStreamSize_usingCCtxParams() can be used in tandem with ZSTD_CCtxParams_setParameter(). Only single-threaded compression is supported. This function will return an error code if ZSTD_c_nbWorkers is >= 1. | |
|
1126 | 1238 | * Note : CStream size estimation is only correct for single-threaded compression. |
|
1127 | 1239 | * ZSTD_DStream memory budget depends on window Size. |
|
1128 | 1240 | * This information can be passed manually, using ZSTD_estimateDStreamSize, |
@@ -1236,12 +1348,16 b' ZSTDLIB_API ZSTD_compressionParameters Z' | |||
|
1236 | 1348 | ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize); |
|
1237 | 1349 | |
|
1238 | 1350 | /*! ZSTD_checkCParams() : |
|
1239 |
|
|
|
1351 | * Ensure param values remain within authorized range. | |
|
1352 | * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */ | |
|
1240 | 1353 | ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params); |
|
1241 | 1354 | |
|
1242 | 1355 | /*! ZSTD_adjustCParams() : |
|
1243 | 1356 | * optimize params for a given `srcSize` and `dictSize`. |
|
1244 | * both values are optional, select `0` if unknown. */ | |
|
1357 | * `srcSize` can be unknown, in which case use ZSTD_CONTENTSIZE_UNKNOWN. | |
|
1358 | * `dictSize` must be `0` when there is no dictionary. | |
|
1359 | * cPar can be invalid : all parameters will be clamped within valid range in the @return struct. | |
|
1360 | * This function never fails (wide contract) */ | |
|
1245 | 1361 | ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize); |
|
1246 | 1362 | |
|
1247 | 1363 | /*! ZSTD_compress_advanced() : |
@@ -1314,6 +1430,17 b' ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_a' | |||
|
1314 | 1430 | * See the comments on that enum for an explanation of the feature. */ |
|
1315 | 1431 | #define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4 |
|
1316 | 1432 | |
|
1433 | /* Controls how the literals are compressed (default is auto). | |
|
1434 | * The value must be of type ZSTD_literalCompressionMode_e. | |
|
1435 | * See ZSTD_literalCompressionMode_t enum definition for details. | |
|
1436 | */ | |
|
1437 | #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5 | |
|
1438 | ||
|
1439 | /* Tries to fit compressed block size to be around targetCBlockSize. | |
|
1440 | * No target when targetCBlockSize == 0. | |
|
1441 | * There is no guarantee on compressed block size (default:0) */ | |
|
1442 | #define ZSTD_c_targetCBlockSize ZSTD_c_experimentalParam6 | |
|
1443 | ||
|
1317 | 1444 | /*! ZSTD_CCtx_getParameter() : |
|
1318 | 1445 | * Get the requested compression parameter value, selected by enum ZSTD_cParameter, |
|
1319 | 1446 | * and store it into int* value. |
@@ -1325,7 +1452,7 b' ZSTDLIB_API size_t ZSTD_CCtx_getParamete' | |||
|
1325 | 1452 | /*! ZSTD_CCtx_params : |
|
1326 | 1453 | * Quick howto : |
|
1327 | 1454 | * - ZSTD_createCCtxParams() : Create a ZSTD_CCtx_params structure |
|
1328 | * - ZSTD_CCtxParam_setParameter() : Push parameters one by one into | |
|
1455 | * - ZSTD_CCtxParams_setParameter() : Push parameters one by one into | |
|
1329 | 1456 | * an existing ZSTD_CCtx_params structure. |
|
1330 | 1457 | * This is similar to |
|
1331 | 1458 | * ZSTD_CCtx_setParameter(). |
@@ -1359,20 +1486,20 b' ZSTDLIB_API size_t ZSTD_CCtxParams_init(' | |||
|
1359 | 1486 | */ |
|
1360 | 1487 | ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params); |
|
1361 | 1488 | |
|
1362 | /*! ZSTD_CCtxParam_setParameter() : | |
|
1489 | /*! ZSTD_CCtxParams_setParameter() : | |
|
1363 | 1490 | * Similar to ZSTD_CCtx_setParameter. |
|
1364 | 1491 | * Set one compression parameter, selected by enum ZSTD_cParameter. |
|
1365 | 1492 | * Parameters must be applied to a ZSTD_CCtx using ZSTD_CCtx_setParametersUsingCCtxParams(). |
|
1366 | 1493 | * @result : 0, or an error code (which can be tested with ZSTD_isError()). |
|
1367 | 1494 | */ |
|
1368 | ZSTDLIB_API size_t ZSTD_CCtxParam_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); | |
|
1495 | ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value); | |
|
1369 | 1496 | |
|
1370 | /*! ZSTD_CCtxParam_getParameter() : | |
|
1497 | /*! ZSTD_CCtxParams_getParameter() : | |
|
1371 | 1498 | * Similar to ZSTD_CCtx_getParameter. |
|
1372 | 1499 | * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter. |
|
1373 | 1500 | * @result : 0, or an error code (which can be tested with ZSTD_isError()). |
|
1374 | 1501 | */ |
|
1375 | ZSTDLIB_API size_t ZSTD_CCtxParam_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); | |
|
1502 | ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value); | |
|
1376 | 1503 | |
|
1377 | 1504 | /*! ZSTD_CCtx_setParametersUsingCCtxParams() : |
|
1378 | 1505 | * Apply a set of ZSTD_CCtx_params to the compression context. |
@@ -1415,31 +1542,6 b' ZSTDLIB_API unsigned ZSTD_isFrame(const ' | |||
|
1415 | 1542 | * it must remain read accessible throughout the lifetime of DDict */ |
|
1416 | 1543 | ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize); |
|
1417 | 1544 | |
|
1418 | ||
|
1419 | /*! ZSTD_getDictID_fromDict() : | |
|
1420 | * Provides the dictID stored within dictionary. | |
|
1421 | * if @return == 0, the dictionary is not conformant with Zstandard specification. | |
|
1422 | * It can still be loaded, but as a content-only dictionary. */ | |
|
1423 | ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize); | |
|
1424 | ||
|
1425 | /*! ZSTD_getDictID_fromDDict() : | |
|
1426 | * Provides the dictID of the dictionary loaded into `ddict`. | |
|
1427 | * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. | |
|
1428 | * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ | |
|
1429 | ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict); | |
|
1430 | ||
|
1431 | /*! ZSTD_getDictID_fromFrame() : | |
|
1432 | * Provides the dictID required to decompressed the frame stored within `src`. | |
|
1433 | * If @return == 0, the dictID could not be decoded. | |
|
1434 | * This could for one of the following reasons : | |
|
1435 | * - The frame does not require a dictionary to be decoded (most common case). | |
|
1436 | * - The frame was built with dictID intentionally removed. Whatever dictionary is necessary is a hidden information. | |
|
1437 | * Note : this use case also happens when using a non-conformant dictionary. | |
|
1438 | * - `srcSize` is too small, and as a result, the frame header could not be decoded (only possible if `srcSize < ZSTD_FRAMEHEADERSIZE_MAX`). | |
|
1439 | * - This is not a Zstandard frame. | |
|
1440 | * When identifying the exact failure cause, it's possible to use ZSTD_getFrameHeader(), which will provide a more precise error code. */ | |
|
1441 | ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize); | |
|
1442 | ||
|
1443 | 1545 | /*! ZSTD_DCtx_loadDictionary_byReference() : |
|
1444 | 1546 | * Same as ZSTD_DCtx_loadDictionary(), |
|
1445 | 1547 | * but references `dict` content instead of copying it into `dctx`. |
@@ -1501,14 +1603,68 b' ZSTDLIB_API size_t ZSTD_decompressStream' | |||
|
1501 | 1603 | ********************************************************************/ |
|
1502 | 1604 | |
|
1503 | 1605 | /*===== Advanced Streaming compression functions =====*/ |
|
1504 | ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct. If it is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, "0" also disables frame content size field. It may be enabled in the future. */ | |
|
1505 | ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); /**< creates of an internal CDict (incompatible with static CCtx), except if dict == NULL or dictSize < 8, in which case no dict is used. Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.*/ | |
|
1606 | /**! ZSTD_initCStream_srcSize() : | |
|
1607 | * This function is deprecated, and equivalent to: | |
|
1608 | * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); | |
|
1609 | * ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any) | |
|
1610 | * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); | |
|
1611 | * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); | |
|
1612 | * | |
|
1613 | * pledgedSrcSize must be correct. If it is not known at init time, use | |
|
1614 | * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs, | |
|
1615 | * "0" also disables frame content size field. It may be enabled in the future. | |
|
1616 | */ | |
|
1617 | ZSTDLIB_API size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs, int compressionLevel, unsigned long long pledgedSrcSize); | |
|
1618 | /**! ZSTD_initCStream_usingDict() : | |
|
1619 | * This function is deprecated, and is equivalent to: | |
|
1620 | * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); | |
|
1621 | * ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel); | |
|
1622 | * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); | |
|
1623 | * | |
|
1624 | * Creates of an internal CDict (incompatible with static CCtx), except if | |
|
1625 | * dict == NULL or dictSize < 8, in which case no dict is used. | |
|
1626 | * Note: dict is loaded with ZSTD_dm_auto (treated as a full zstd dictionary if | |
|
1627 | * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy. | |
|
1628 | */ | |
|
1629 | ZSTDLIB_API size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs, const void* dict, size_t dictSize, int compressionLevel); | |
|
1630 | /**! ZSTD_initCStream_advanced() : | |
|
1631 | * This function is deprecated, and is approximately equivalent to: | |
|
1632 | * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); | |
|
1633 | * ZSTD_CCtx_setZstdParams(zcs, params); // Set the zstd params and leave the rest as-is | |
|
1634 | * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); | |
|
1635 | * ZSTD_CCtx_loadDictionary(zcs, dict, dictSize); | |
|
1636 | * | |
|
1637 | * pledgedSrcSize must be correct. If srcSize is not known at init time, use | |
|
1638 | * value ZSTD_CONTENTSIZE_UNKNOWN. dict is loaded with ZSTD_dm_auto and ZSTD_dlm_byCopy. | |
|
1639 | */ | |
|
1506 | 1640 | ZSTDLIB_API size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs, const void* dict, size_t dictSize, |
|
1507 | ZSTD_parameters params, unsigned long long pledgedSrcSize); /**< pledgedSrcSize must be correct. If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. dict is loaded with ZSTD_dm_auto and ZSTD_dlm_byCopy. */ | |
|
1508 | ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); /**< note : cdict will just be referenced, and must outlive compression session */ | |
|
1509 | ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize); /**< same as ZSTD_initCStream_usingCDict(), with control over frame parameters. pledgedSrcSize must be correct. If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN. */ | |
|
1641 | ZSTD_parameters params, unsigned long long pledgedSrcSize); | |
|
1642 | /**! ZSTD_initCStream_usingCDict() : | |
|
1643 | * This function is deprecated, and equivalent to: | |
|
1644 | * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); | |
|
1645 | * ZSTD_CCtx_refCDict(zcs, cdict); | |
|
1646 | * | |
|
1647 | * note : cdict will just be referenced, and must outlive compression session | |
|
1648 | */ | |
|
1649 | ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict); | |
|
1650 | /**! ZSTD_initCStream_usingCDict_advanced() : | |
|
1651 | * This function is deprecated, and is approximately equivalent to: | |
|
1652 | * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); | |
|
1653 | * ZSTD_CCtx_setZstdFrameParams(zcs, fParams); // Set the zstd frame params and leave the rest as-is | |
|
1654 | * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); | |
|
1655 | * ZSTD_CCtx_refCDict(zcs, cdict); | |
|
1656 | * | |
|
1657 | * same as ZSTD_initCStream_usingCDict(), with control over frame parameters. | |
|
1658 | * pledgedSrcSize must be correct. If srcSize is not known at init time, use | |
|
1659 | * value ZSTD_CONTENTSIZE_UNKNOWN. | |
|
1660 | */ | |
|
1661 | ZSTDLIB_API size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs, const ZSTD_CDict* cdict, ZSTD_frameParameters fParams, unsigned long long pledgedSrcSize); | |
|
1510 | 1662 | |
|
1511 | 1663 | /*! ZSTD_resetCStream() : |
|
1664 | * This function is deprecated, and is equivalent to: | |
|
1665 | * ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only); | |
|
1666 | * ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize); | |
|
1667 | * | |
|
1512 | 1668 | * start a new frame, using same parameters from previous frame. |
|
1513 | 1669 | * This is typically useful to skip dictionary loading stage, since it will re-use it in-place. |
|
1514 | 1670 | * Note that zcs must be init at least once before using ZSTD_resetCStream(). |
@@ -1555,9 +1711,32 b' ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_' | |||
|
1555 | 1711 | |
|
1556 | 1712 | |
|
1557 | 1713 | /*===== Advanced Streaming decompression functions =====*/ |
|
1558 | ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); /**< note: no dictionary will be used if dict == NULL or dictSize < 8 */ | |
|
1559 | ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); /**< note : ddict is referenced, it must outlive decompression session */ | |
|
1560 | ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); /**< re-use decompression parameters from previous init; saves dictionary loading */ | |
|
1714 | /** | |
|
1715 | * This function is deprecated, and is equivalent to: | |
|
1716 | * | |
|
1717 | * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); | |
|
1718 | * ZSTD_DCtx_loadDictionary(zds, dict, dictSize); | |
|
1719 | * | |
|
1720 | * note: no dictionary will be used if dict == NULL or dictSize < 8 | |
|
1721 | */ | |
|
1722 | ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize); | |
|
1723 | /** | |
|
1724 | * This function is deprecated, and is equivalent to: | |
|
1725 | * | |
|
1726 | * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); | |
|
1727 | * ZSTD_DCtx_refDDict(zds, ddict); | |
|
1728 | * | |
|
1729 | * note : ddict is referenced, it must outlive decompression session | |
|
1730 | */ | |
|
1731 | ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict); | |
|
1732 | /** | |
|
1733 | * This function is deprecated, and is equivalent to: | |
|
1734 | * | |
|
1735 | * ZSTD_DCtx_reset(zds, ZSTD_reset_session_only); | |
|
1736 | * | |
|
1737 | * re-use decompression parameters from previous init; saves dictionary loading | |
|
1738 | */ | |
|
1739 | ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds); | |
|
1561 | 1740 | |
|
1562 | 1741 | |
|
1563 | 1742 | /********************************************************************* |
@@ -1696,7 +1875,7 b' typedef struct {' | |||
|
1696 | 1875 | unsigned checksumFlag; |
|
1697 | 1876 | } ZSTD_frameHeader; |
|
1698 | 1877 | |
|
1699 |
/* |
|
|
1878 | /*! ZSTD_getFrameHeader() : | |
|
1700 | 1879 | * decode Frame Header, or requires larger `srcSize`. |
|
1701 | 1880 | * @return : 0, `zfhPtr` is correctly filled, |
|
1702 | 1881 | * >0, `srcSize` is too small, value is wanted `srcSize` amount, |
@@ -1730,7 +1909,7 b' ZSTDLIB_API ZSTD_nextInputType_e ZSTD_ne' | |||
|
1730 | 1909 | /*! |
|
1731 | 1910 | Block functions produce and decode raw zstd blocks, without frame metadata. |
|
1732 | 1911 | Frame metadata cost is typically ~18 bytes, which can be non-negligible for very small blocks (< 100 bytes). |
|
1733 |
|
|
|
1912 | But users will have to take in charge needed metadata to regenerate data, such as compressed and content sizes. | |
|
1734 | 1913 | |
|
1735 | 1914 | A few rules to respect : |
|
1736 | 1915 | - Compressing and decompressing require a context structure |
@@ -1741,12 +1920,14 b' ZSTDLIB_API ZSTD_nextInputType_e ZSTD_ne' | |||
|
1741 | 1920 | + copyCCtx() and copyDCtx() can be used too |
|
1742 | 1921 | - Block size is limited, it must be <= ZSTD_getBlockSize() <= ZSTD_BLOCKSIZE_MAX == 128 KB |
|
1743 | 1922 | + If input is larger than a block size, it's necessary to split input data into multiple blocks |
|
1744 |
+ For inputs larger than a single block, |
|
|
1745 | Frame metadata is not that costly, and quickly becomes negligible as source size grows larger. | |
|
1746 |
- When a block is considered not compressible enough, ZSTD_compressBlock() result will be |
|
|
1747 | In which case, nothing is produced into `dst` ! | |
|
1748 | + User must test for such outcome and deal directly with uncompressed data | |
|
1749 | + ZSTD_decompressBlock() doesn't accept uncompressed data as input !!! | |
|
1923 | + For inputs larger than a single block, consider using regular ZSTD_compress() instead. | |
|
1924 | Frame metadata is not that costly, and quickly becomes negligible as source size grows larger than a block. | |
|
1925 | - When a block is considered not compressible enough, ZSTD_compressBlock() result will be 0 (zero) ! | |
|
1926 | ===> In which case, nothing is produced into `dst` ! | |
|
1927 | + User __must__ test for such outcome and deal directly with uncompressed data | |
|
1928 | + A block cannot be declared incompressible if ZSTD_compressBlock() return value was != 0. | |
|
1929 | Doing so would mess up with statistics history, leading to potential data corruption. | |
|
1930 | + ZSTD_decompressBlock() _doesn't accept uncompressed data as input_ !! | |
|
1750 | 1931 | + In case of multiple successive blocks, should some of them be uncompressed, |
|
1751 | 1932 | decoder must be informed of their existence in order to follow proper history. |
|
1752 | 1933 | Use ZSTD_insertBlock() for such a case. |
@@ -169,7 +169,7 b' checking zstd options' | |||
|
169 | 169 | > done |
|
170 | 170 | |
|
171 | 171 | $ $RUNTESTDIR/f -s zstd-*/.hg/store/data/* |
|
172 |
zstd-level-1/.hg/store/data/a.i: size=4 |
|
|
172 | zstd-level-1/.hg/store/data/a.i: size=4114 | |
|
173 | 173 | zstd-level-22/.hg/store/data/a.i: size=4091 |
|
174 | 174 | zstd-level-default/\.hg/store/data/a\.i: size=(4094|4102) (re) |
|
175 | 175 |
General Comments 0
You need to be logged in to leave comments.
Login now